From 7c7e3d31e7856a8260a254f8c71db416f7f9f5a1 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 5 Nov 2021 16:23:29 -0700 Subject: bpf: Introduce helper bpf_find_vma In some profiler use cases, it is necessary to map an address to the backing file, e.g., a shared library. bpf_find_vma helper provides a flexible way to achieve this. bpf_find_vma maps an address of a task to the vma (vm_area_struct) for this address, and feed the vma to an callback BPF function. The callback function is necessary here, as we need to ensure mmap_sem is unlocked. It is necessary to lock mmap_sem for find_vma. To lock and unlock mmap_sem safely when irqs are disable, we use the same mechanism as stackmap with build_id. Specifically, when irqs are disabled, the unlocked is postponed in an irq_work. Refactor stackmap.c so that the irq_work is shared among bpf_find_vma and stackmap helpers. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Tested-by: Hengqi Chen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20211105232330.1936330-2-songliubraving@fb.com --- include/uapi/linux/bpf.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ba5af15e25f5..509eee5f0393 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4938,6 +4938,25 @@ union bpf_attr { * **-ENOENT** if symbol is not found. * * **-EPERM** if caller does not have permission to obtain kernel address. + * + * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * Find vma of *task* that contains *addr*, call *callback_fn* + * function with *task*, *vma*, and *callback_ctx*. + * The *callback_fn* should be a static function and + * the *callback_ctx* should be a pointer to the stack. + * The *flags* is used to control certain aspects of the helper. + * Currently, the *flags* must be 0. + * + * The expected callback signature is + * + * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); + * + * Return + * 0 on success. + * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. + * **-EBUSY** if failed to try lock mmap_lock. + * **-EINVAL** for invalid **flags**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5120,6 +5139,7 @@ union bpf_attr { FN(trace_vprintk), \ FN(skc_to_unix_sock), \ FN(kallsyms_lookup_name), \ + FN(find_vma), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From f89315650ba34ec6c91a8bded72796980bee2a4d Mon Sep 17 00:00:00 2001 From: Mark Pashmfouroush Date: Wed, 10 Nov 2021 11:10:15 +0000 Subject: bpf: Add ingress_ifindex to bpf_sk_lookup It may be helpful to have access to the ifindex during bpf socket lookup. An example may be to scope certain socket lookup logic to specific interfaces, i.e. an interface may be made exempt from custom lookup code. Add the ifindex of the arriving connection to the bpf_sk_lookup API. Signed-off-by: Mark Pashmfouroush Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211110111016.5670-2-markpash@cloudflare.com --- include/linux/filter.h | 7 +++++-- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 7 +++++++ net/ipv4/inet_hashtables.c | 8 ++++---- net/ipv4/udp.c | 8 ++++---- net/ipv6/inet6_hashtables.c | 8 ++++---- net/ipv6/udp.c | 8 ++++---- tools/include/uapi/linux/bpf.h | 1 + 8 files changed, 30 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 24b7ed2677af..b6a216eb217a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1374,6 +1374,7 @@ struct bpf_sk_lookup_kern { const struct in6_addr *daddr; } v6; struct sock *selected_sk; + u32 ingress_ifindex; bool no_reuseport; }; @@ -1436,7 +1437,7 @@ extern struct static_key_false bpf_sk_lookup_enabled; static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 dport, - struct sock **psk) + const int ifindex, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; @@ -1452,6 +1453,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, .v4.daddr = daddr, .sport = sport, .dport = dport, + .ingress_ifindex = ifindex, }; u32 act; @@ -1474,7 +1476,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, const __be16 sport, const struct in6_addr *daddr, const u16 dport, - struct sock **psk) + const int ifindex, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; @@ -1490,6 +1492,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, .v6.daddr = daddr, .sport = sport, .dport = dport, + .ingress_ifindex = ifindex, }; u32 act; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 509eee5f0393..6297eafdc40f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6316,6 +6316,7 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ + __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ }; /* diff --git a/net/core/filter.c b/net/core/filter.c index 8e8d3b49c297..315a58466fc9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10491,6 +10491,7 @@ static bool sk_lookup_is_valid_access(int off, int size, case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): case bpf_ctx_range(struct bpf_sk_lookup, remote_port): case bpf_ctx_range(struct bpf_sk_lookup, local_port): + case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex): bpf_ctx_record_field_size(info, sizeof(__u32)); return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); @@ -10580,6 +10581,12 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, bpf_target_off(struct bpf_sk_lookup_kern, dport, 2, target_size)); break; + + case offsetof(struct bpf_sk_lookup, ingress_ifindex): + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + ingress_ifindex, 4, target_size)); + break; } return insn - insn_buf; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 75737267746f..30ab717ff1b8 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -307,7 +307,7 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, - __be32 daddr, u16 hnum) + __be32 daddr, u16 hnum, const int dif) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -315,8 +315,8 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net, if (hashinfo != &tcp_hashinfo) return NULL; /* only TCP is supported */ - no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, - saddr, sport, daddr, hnum, &sk); + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport, + daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; @@ -340,7 +340,7 @@ struct sock *__inet_lookup_listener(struct net *net, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { result = inet_lookup_run_bpf(net, hashinfo, skb, doff, - saddr, sport, daddr, hnum); + saddr, sport, daddr, hnum, dif); if (result) goto done; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2fffcf2b54f3..5fceee3de65d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -460,7 +460,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, struct udp_table *udptable, struct sk_buff *skb, __be32 saddr, __be16 sport, - __be32 daddr, u16 hnum) + __be32 daddr, u16 hnum, const int dif) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -468,8 +468,8 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, if (udptable != &udp_table) return NULL; /* only UDP is supported */ - no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, - saddr, sport, daddr, hnum, &sk); + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport, + daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; @@ -505,7 +505,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = udp4_lookup_run_bpf(net, udptable, skb, - saddr, sport, daddr, hnum); + saddr, sport, daddr, hnum, dif); if (sk) { result = sk; goto done; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 67c9114835c8..4514444e96c8 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -165,7 +165,7 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const u16 hnum) + const u16 hnum, const int dif) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -173,8 +173,8 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net, if (hashinfo != &tcp_hashinfo) return NULL; /* only TCP is supported */ - no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, - saddr, sport, daddr, hnum, &sk); + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport, + daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; @@ -198,7 +198,7 @@ struct sock *inet6_lookup_listener(struct net *net, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, - saddr, sport, daddr, hnum); + saddr, sport, daddr, hnum, dif); if (result) goto done; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 12c12619ee35..ea4ea525f94a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -195,7 +195,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, - u16 hnum) + u16 hnum, const int dif) { struct sock *sk, *reuse_sk; bool no_reuseport; @@ -203,8 +203,8 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, if (udptable != &udp_table) return NULL; /* only UDP is supported */ - no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, - saddr, sport, daddr, hnum, &sk); + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport, + daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; @@ -240,7 +240,7 @@ struct sock *__udp6_lib_lookup(struct net *net, /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = udp6_lookup_run_bpf(net, udptable, skb, - saddr, sport, daddr, hnum); + saddr, sport, daddr, hnum, dif); if (sk) { result = sk; goto done; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 509eee5f0393..6297eafdc40f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6316,6 +6316,7 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ + __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ }; /* -- cgit v1.2.3 From 8c42d2fa4eeab6c37a0b1b1aa7a2715248ef4f34 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 11 Nov 2021 17:26:09 -0800 Subject: bpf: Support BTF_KIND_TYPE_TAG for btf_type_tag attributes LLVM patches ([1] for clang, [2] and [3] for BPF backend) added support for btf_type_tag attributes. This patch added support for the kernel. The main motivation for btf_type_tag is to bring kernel annotations __user, __rcu etc. to btf. With such information available in btf, bpf verifier can detect mis-usages and reject the program. For example, for __user tagged pointer, developers can then use proper helper like bpf_probe_read_user() etc. to read the data. BTF_KIND_TYPE_TAG may also useful for other tracing facility where instead of to require user to specify kernel/user address type, the kernel can detect it by itself with btf. [1] https://reviews.llvm.org/D111199 [2] https://reviews.llvm.org/D113222 [3] https://reviews.llvm.org/D113496 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211112012609.1505032-1-yhs@fb.com --- include/uapi/linux/btf.h | 3 ++- kernel/bpf/btf.c | 14 +++++++++++++- tools/include/uapi/linux/btf.h | 3 ++- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index deb12f755f0f..b0d8fea1951d 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and DECL_TAG. + * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. * "type" is a type_id referring to another type. */ union { @@ -75,6 +75,7 @@ enum { BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ + BTF_KIND_TYPE_TAG = 18, /* Type Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index cdb0fba65600..1dd9ba82da1e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -282,6 +282,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", + [BTF_KIND_TYPE_TAG] = "TYPE_TAG", }; const char *btf_type_str(const struct btf_type *t) @@ -418,6 +419,7 @@ static bool btf_type_is_modifier(const struct btf_type *t) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: return true; } @@ -1737,6 +1739,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type, case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: id = type->type; type = btf_type_by_id(btf, type->type); break; @@ -2345,6 +2348,8 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { + const char *value; + if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; @@ -2360,7 +2365,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, return -EINVAL; } - /* typedef type must have a valid name, and other ref types, + /* typedef/type_tag type must have a valid name, and other ref types, * volatile, const, restrict, should have a null name. */ if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) { @@ -2369,6 +2374,12 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } + } else if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG) { + value = btf_name_by_offset(env->btf, t->name_off); + if (!value || !value[0]) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } } else { if (t->name_off) { btf_verifier_log_type(env, t, "Invalid name"); @@ -4059,6 +4070,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_DATASEC] = &datasec_ops, [BTF_KIND_FLOAT] = &float_ops, [BTF_KIND_DECL_TAG] = &decl_tag_ops, + [BTF_KIND_TYPE_TAG] = &modifier_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index deb12f755f0f..b0d8fea1951d 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and DECL_TAG. + * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. * "type" is a type_id referring to another type. */ union { @@ -75,6 +75,7 @@ enum { BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ + BTF_KIND_TYPE_TAG = 18, /* Type Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, -- cgit v1.2.3 From ef9f18a9e3a04126cf017216193166abe03dedef Mon Sep 17 00:00:00 2001 From: Dillon Min Date: Tue, 19 Oct 2021 09:43:21 +0100 Subject: media: v4l2-ctrls: Add RGB color effects control Add V4L2_COLORFX_SET_RGB color effects control, V4L2_CID_COLORFX_RGB for RGB color setting. with two mirror changes: - change 0xFFFFFF to 0xffffff - fix comments 2^24 to 2^24 - 1 [hverkuil: dropped spaces around + with V4L2_CID_BASE for consistency] Signed-off-by: Dillon Min Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/userspace-api/media/v4l/control.rst | 9 +++++++++ drivers/media/v4l2-core/v4l2-ctrls-defs.c | 6 ++++-- include/uapi/linux/v4l2-controls.h | 4 +++- 3 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/userspace-api/media/v4l/control.rst b/Documentation/userspace-api/media/v4l/control.rst index f8d0b923da20..3eec65174260 100644 --- a/Documentation/userspace-api/media/v4l/control.rst +++ b/Documentation/userspace-api/media/v4l/control.rst @@ -242,8 +242,17 @@ Control IDs * - ``V4L2_COLORFX_SET_CBCR`` - The Cb and Cr chroma components are replaced by fixed coefficients determined by ``V4L2_CID_COLORFX_CBCR`` control. + * - ``V4L2_COLORFX_SET_RGB`` + - The RGB components are replaced by the fixed RGB components determined + by ``V4L2_CID_COLORFX_RGB`` control. +``V4L2_CID_COLORFX_RGB`` ``(integer)`` + Determines the Red, Green, and Blue coefficients for + ``V4L2_COLORFX_SET_RGB`` color effect. + Bits [7:0] of the supplied 32 bit value are interpreted as Blue component, + bits [15:8] as Green component, bits [23:16] as Red component, and + bits [31:24] must be zero. ``V4L2_CID_COLORFX_CBCR`` ``(integer)`` Determines the Cb and Cr coefficients for ``V4L2_COLORFX_SET_CBCR`` diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c b/drivers/media/v4l2-core/v4l2-ctrls-defs.c index 0cb6c0f18b39..431f7ec17557 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c @@ -785,6 +785,7 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_MIN_BUFFERS_FOR_OUTPUT: return "Min Number of Output Buffers"; case V4L2_CID_ALPHA_COMPONENT: return "Alpha Component"; case V4L2_CID_COLORFX_CBCR: return "Color Effects, CbCr"; + case V4L2_CID_COLORFX_RGB: return "Color Effects, RGB"; /* * Codec controls @@ -1394,11 +1395,12 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, *min = *max = *step = *def = 0; break; case V4L2_CID_BG_COLOR: + case V4L2_CID_COLORFX_RGB: *type = V4L2_CTRL_TYPE_INTEGER; *step = 1; *min = 0; - /* Max is calculated as RGB888 that is 2^24 */ - *max = 0xFFFFFF; + /* Max is calculated as RGB888 that is 2^24 - 1 */ + *max = 0xffffff; break; case V4L2_CID_COLORFX_CBCR: *type = V4L2_CTRL_TYPE_INTEGER; diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 5fea5feb0412..c9eea93a43a9 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -128,6 +128,7 @@ enum v4l2_colorfx { V4L2_COLORFX_SOLARIZATION = 13, V4L2_COLORFX_ANTIQUE = 14, V4L2_COLORFX_SET_CBCR = 15, + V4L2_COLORFX_SET_RGB = 16, }; #define V4L2_CID_AUTOBRIGHTNESS (V4L2_CID_BASE+32) #define V4L2_CID_BAND_STOP_FILTER (V4L2_CID_BASE+33) @@ -145,9 +146,10 @@ enum v4l2_colorfx { #define V4L2_CID_ALPHA_COMPONENT (V4L2_CID_BASE+41) #define V4L2_CID_COLORFX_CBCR (V4L2_CID_BASE+42) +#define V4L2_CID_COLORFX_RGB (V4L2_CID_BASE+43) /* last CID + 1 */ -#define V4L2_CID_LASTP1 (V4L2_CID_BASE+43) +#define V4L2_CID_LASTP1 (V4L2_CID_BASE+44) /* USER-class private control IDs */ -- cgit v1.2.3 From 13cae4a104d2b7205696229ba85d34cc035f8c84 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Mon, 15 Nov 2021 10:49:21 +0800 Subject: i2c: core: Allow 255 byte transfers for SMBus 3.x SMBus 3.0 increased the maximum block transfer size from 32 bytes to 255 bytes. We increase the size of struct i2c_smbus_data's block[] member. i2c_smbus_xfer() and i2c_smbus_xfer_emulated() now support 255 byte block operations, other block functions remain limited to 32 bytes for compatibility with existing callers. We allow adapters to indicate support for the larger size with I2C_FUNC_SMBUS_V3_BLOCK. Most emulated drivers should be able to use 255 byte blocks by replacing I2C_SMBUS_BLOCK_MAX with I2C_SMBUS_V3_BLOCK_MAX though some will have hardware limitations that need testing. Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- drivers/i2c/i2c-core-smbus.c | 20 +++++++++++++------- include/linux/i2c.h | 13 +++++++++++++ include/uapi/linux/i2c.h | 5 ++++- 3 files changed, 30 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index e5b2d1465e7e..743415584aba 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c @@ -303,7 +303,8 @@ static void i2c_smbus_try_get_dmabuf(struct i2c_msg *msg, u8 init_val) bool is_read = msg->flags & I2C_M_RD; unsigned char *dma_buf; - dma_buf = kzalloc(I2C_SMBUS_BLOCK_MAX + (is_read ? 2 : 3), GFP_KERNEL); + dma_buf = kzalloc(I2C_SMBUS_V3_BLOCK_MAX + (is_read ? 2 : 3), + GFP_KERNEL); if (!dma_buf) return; @@ -329,9 +330,10 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, * initialize most things with sane defaults, to keep the code below * somewhat simpler. */ - unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX+3]; - unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX+2]; + unsigned char msgbuf0[I2C_SMBUS_V3_BLOCK_MAX+3]; + unsigned char msgbuf1[I2C_SMBUS_V3_BLOCK_MAX+2]; int nmsgs = read_write == I2C_SMBUS_READ ? 2 : 1; + u16 block_max; u8 partial_pec = 0; int status; struct i2c_msg msg[2] = { @@ -350,6 +352,10 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, bool wants_pec = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK && size != I2C_SMBUS_I2C_BLOCK_DATA); + /* Drivers must opt in to 255 byte max block size */ + block_max = i2c_check_functionality(adapter, I2C_FUNC_SMBUS_V3_BLOCK) + ? I2C_SMBUS_V3_BLOCK_MAX : I2C_SMBUS_BLOCK_MAX; + msgbuf0[0] = command; switch (size) { case I2C_SMBUS_QUICK: @@ -399,7 +405,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, i2c_smbus_try_get_dmabuf(&msg[1], 0); } else { msg[0].len = data->block[0] + 2; - if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) { + if (msg[0].len > block_max + 2) { dev_err(&adapter->dev, "Invalid block write size %d\n", data->block[0]); @@ -413,7 +419,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, case I2C_SMBUS_BLOCK_PROC_CALL: nmsgs = 2; /* Another special case */ read_write = I2C_SMBUS_READ; - if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { + if (data->block[0] > block_max) { dev_err(&adapter->dev, "Invalid block write size %d\n", data->block[0]); @@ -430,7 +436,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, i2c_smbus_try_get_dmabuf(&msg[1], 0); break; case I2C_SMBUS_I2C_BLOCK_DATA: - if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { + if (data->block[0] > block_max) { dev_err(&adapter->dev, "Invalid block %s size %d\n", read_write == I2C_SMBUS_READ ? "read" : "write", data->block[0]); @@ -498,7 +504,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, break; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_BLOCK_PROC_CALL: - if (msg[1].buf[0] > I2C_SMBUS_BLOCK_MAX) { + if (msg[1].buf[0] > block_max) { dev_err(&adapter->dev, "Invalid block size returned: %d\n", msg[1].buf[0]); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 16119ac1aa97..353d6b4e7a53 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -52,6 +52,19 @@ typedef int (*i2c_slave_cb_t)(struct i2c_client *client, struct module; struct property_entry; +/* SMBus 3.0 extends the maximum block read/write size to 255 (from 32). + * The larger size is only supported by some drivers, indicated by + * the I2C_FUNC_SMBUS_V3_BLOCK functionality bit. + */ +#define I2C_SMBUS_V3_BLOCK_MAX 255 /* As specified in SMBus 3.0 standard */ + +/* Note compatibility definition in uapi header with 32 byte block */ +union i2c_smbus_data { + __u8 byte; + __u16 word; + __u8 block[I2C_SMBUS_V3_BLOCK_MAX + 1]; /* block[0] is used for length */ +}; + #if IS_ENABLED(CONFIG_I2C) /* Return the Frequency mode string based on the bus frequency */ const char *i2c_freq_mode_string(u32 bus_freq_hz); diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h index 92326ebde350..7b7d90b50cf0 100644 --- a/include/uapi/linux/i2c.h +++ b/include/uapi/linux/i2c.h @@ -108,6 +108,7 @@ struct i2c_msg { #define I2C_FUNC_SMBUS_READ_I2C_BLOCK 0x04000000 /* I2C-like block xfer */ #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK 0x08000000 /* w/ 1-byte reg. addr. */ #define I2C_FUNC_SMBUS_HOST_NOTIFY 0x10000000 /* SMBus 2.0 or later */ +#define I2C_FUNC_SMBUS_V3_BLOCK 0x20000000 /* Device supports 255 byte block */ #define I2C_FUNC_SMBUS_BYTE (I2C_FUNC_SMBUS_READ_BYTE | \ I2C_FUNC_SMBUS_WRITE_BYTE) @@ -137,13 +138,15 @@ struct i2c_msg { /* * Data for SMBus Messages */ -#define I2C_SMBUS_BLOCK_MAX 32 /* As specified in SMBus standard */ +#define I2C_SMBUS_BLOCK_MAX 32 /* As specified in SMBus 2.0 standard */ +#ifndef __KERNEL__ union i2c_smbus_data { __u8 byte; __u16 word; __u8 block[I2C_SMBUS_BLOCK_MAX + 2]; /* block[0] is used for length */ /* and one more for user-space compatibility */ }; +#endif /* i2c_smbus_xfer read or write markers */ #define I2C_SMBUS_READ 1 -- cgit v1.2.3 From 84a107e68b34217eff536e81a6a6f419ee0d0f7e Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Mon, 15 Nov 2021 10:49:22 +0800 Subject: i2c: dev: Handle 255 byte blocks for i2c ioctl I2C_SMBUS is limited to 32 bytes due to compatibility with the 32 byte i2c_smbus_data.block I2C_RDWR allows larger transfers if sufficient sized buffers are passed. Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- drivers/i2c/i2c-dev.c | 93 +++++++++++++++++++++++++++++++++++++------- include/uapi/linux/i2c-dev.h | 2 + include/uapi/linux/i2c.h | 2 + 3 files changed, 83 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index bce0e8bb7852..5ee9118c0407 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -46,6 +46,24 @@ struct i2c_dev { struct cdev cdev; }; +/* The userspace union i2c_smbus_data for I2C_SMBUS ioctl is limited + * to 32 bytes (I2C_SMBUS_BLOCK_MAX) for compatibility. + */ +union compat_i2c_smbus_data { + __u8 byte; + __u16 word; + __u8 block[I2C_SMBUS_BLOCK_MAX + 2]; /* block[0] is used for length */ + /* and one more for user-space compatibility */ +}; + +/* Must match i2c-dev.h definition with compat .data member */ +struct i2c_smbus_ioctl_data { + __u8 read_write; + __u8 command; + __u32 size; + union compat_i2c_smbus_data __user *data; +}; + #define I2C_MINORS (MINORMASK + 1) static LIST_HEAD(i2c_dev_list); static DEFINE_SPINLOCK(i2c_dev_list_lock); @@ -235,14 +253,17 @@ static int i2cdev_check_addr(struct i2c_adapter *adapter, unsigned int addr) static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, unsigned nmsgs, struct i2c_msg *msgs) { - u8 __user **data_ptrs; + u8 __user **data_ptrs = NULL; + u16 *orig_lens = NULL; int i, res; + res = -ENOMEM; data_ptrs = kmalloc_array(nmsgs, sizeof(u8 __user *), GFP_KERNEL); - if (data_ptrs == NULL) { - kfree(msgs); - return -ENOMEM; - } + if (data_ptrs == NULL) + goto out; + orig_lens = kmalloc_array(nmsgs, sizeof(u16), GFP_KERNEL); + if (orig_lens == NULL) + goto out; res = 0; for (i = 0; i < nmsgs; i++) { @@ -253,12 +274,30 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, } data_ptrs[i] = (u8 __user *)msgs[i].buf; - msgs[i].buf = memdup_user(data_ptrs[i], msgs[i].len); + msgs[i].buf = NULL; + if (msgs[i].len < 1) { + /* Sanity check */ + res = -EINVAL; + break; + + } + /* Allocate a larger buffer to accommodate possible 255 byte + * blocks. Read results will be dropped later + * if they are too large for the original length. + */ + orig_lens[i] = msgs[i].len; + msgs[i].buf = kmalloc(msgs[i].len + I2C_SMBUS_V3_BLOCK_MAX, + GFP_USER | __GFP_NOWARN); if (IS_ERR(msgs[i].buf)) { res = PTR_ERR(msgs[i].buf); break; } - /* memdup_user allocates with GFP_KERNEL, so DMA is ok */ + if (copy_from_user(msgs[i].buf, data_ptrs[i], msgs[i].len)) { + kfree(msgs[i].buf); + res = -EFAULT; + break; + } + /* Buffer from kmalloc, so DMA is ok */ msgs[i].flags |= I2C_M_DMA_SAFE; /* @@ -274,7 +313,7 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, */ if (msgs[i].flags & I2C_M_RECV_LEN) { if (!(msgs[i].flags & I2C_M_RD) || - msgs[i].len < 1 || msgs[i].buf[0] < 1 || + msgs[i].buf[0] < 1 || msgs[i].len < msgs[i].buf[0] + I2C_SMBUS_BLOCK_MAX) { i++; @@ -297,12 +336,16 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, res = i2c_transfer(client->adapter, msgs, nmsgs); while (i-- > 0) { if (res >= 0 && (msgs[i].flags & I2C_M_RD)) { - if (copy_to_user(data_ptrs[i], msgs[i].buf, - msgs[i].len)) + if (orig_lens[i] < msgs[i].len) + res = -EINVAL; + else if (copy_to_user(data_ptrs[i], msgs[i].buf, + msgs[i].len)) res = -EFAULT; } kfree(msgs[i].buf); } +out: + kfree(orig_lens); kfree(data_ptrs); kfree(msgs); return res; @@ -310,7 +353,7 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, u8 read_write, u8 command, u32 size, - union i2c_smbus_data __user *data) + union compat_i2c_smbus_data __user *data) { union i2c_smbus_data temp = {}; int datasize, res; @@ -371,6 +414,16 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, if (copy_from_user(&temp, data, datasize)) return -EFAULT; } + if ((size == I2C_SMBUS_BLOCK_PROC_CALL || + size == I2C_SMBUS_I2C_BLOCK_DATA || + size == I2C_SMBUS_BLOCK_DATA) && + read_write == I2C_SMBUS_WRITE && + temp.block[0] > I2C_SMBUS_BLOCK_MAX) { + /* Don't accept writes larger than the buffer size */ + dev_dbg(&client->adapter->dev, "block write is too large"); + return -EINVAL; + + } if (size == I2C_SMBUS_I2C_BLOCK_BROKEN) { /* Convert old I2C block commands to the new convention. This preserves binary compatibility. */ @@ -380,9 +433,21 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, } res = i2c_smbus_xfer(client->adapter, client->addr, client->flags, read_write, command, size, &temp); - if (!res && ((size == I2C_SMBUS_PROC_CALL) || - (size == I2C_SMBUS_BLOCK_PROC_CALL) || - (read_write == I2C_SMBUS_READ))) { + if (res) + return res; + if ((size == I2C_SMBUS_BLOCK_PROC_CALL || + size == I2C_SMBUS_I2C_BLOCK_DATA || + size == I2C_SMBUS_BLOCK_DATA) && + read_write == I2C_SMBUS_READ && + temp.block[0] > I2C_SMBUS_BLOCK_MAX) { + /* Don't accept reads larger than the buffer size */ + dev_dbg(&client->adapter->dev, "block read is too large"); + return -EINVAL; + + } + if ((size == I2C_SMBUS_PROC_CALL) || + (size == I2C_SMBUS_BLOCK_PROC_CALL) || + (read_write == I2C_SMBUS_READ)) { if (copy_to_user(data, &temp, datasize)) return -EFAULT; } diff --git a/include/uapi/linux/i2c-dev.h b/include/uapi/linux/i2c-dev.h index 1c4cec4ddd84..46ce31d42f7d 100644 --- a/include/uapi/linux/i2c-dev.h +++ b/include/uapi/linux/i2c-dev.h @@ -39,12 +39,14 @@ /* This is the structure as used in the I2C_SMBUS ioctl call */ +#ifndef __KERNEL__ struct i2c_smbus_ioctl_data { __u8 read_write; __u8 command; __u32 size; union i2c_smbus_data __user *data; }; +#endif /* This is the structure as used in the I2C_RDWR ioctl call */ struct i2c_rdwr_ioctl_data { diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h index 7b7d90b50cf0..c3534ab1ae53 100644 --- a/include/uapi/linux/i2c.h +++ b/include/uapi/linux/i2c.h @@ -109,6 +109,8 @@ struct i2c_msg { #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK 0x08000000 /* w/ 1-byte reg. addr. */ #define I2C_FUNC_SMBUS_HOST_NOTIFY 0x10000000 /* SMBus 2.0 or later */ #define I2C_FUNC_SMBUS_V3_BLOCK 0x20000000 /* Device supports 255 byte block */ + /* Note that I2C_SMBUS ioctl only */ + /* supports a 32 byte block */ #define I2C_FUNC_SMBUS_BYTE (I2C_FUNC_SMBUS_READ_BYTE | \ I2C_FUNC_SMBUS_WRITE_BYTE) -- cgit v1.2.3 From 2f6a470d6545841cf1891b87e360d3998ef024c8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 15 Nov 2021 07:49:46 -0800 Subject: Revert "Merge branch 'mctp-i2c-driver'" This reverts commit 71812af7234f30362b43ccff33f93890ae4c0655, reversing changes made to cc0be1ad686fb29a4d127948486f40b17fb34b50. Wolfram Sang says: Please revert. Besides the driver in net, it modifies the I2C core code. This has not been acked by the I2C maintainer (in this case me). So, please don't pull this in via the net tree. The question raised here (extending SMBus calls to 255 byte) is complicated because we need ABI backwards compatibility. Link: https://lore.kernel.org/all/YZJ9H4eM%2FM7OXVN0@shikoro/ Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/i2c/i2c.txt | 4 - .../bindings/net/mctp-i2c-controller.yaml | 92 -- drivers/i2c/busses/i2c-aspeed.c | 5 +- drivers/i2c/busses/i2c-npcm7xx.c | 3 +- drivers/i2c/i2c-core-smbus.c | 20 +- drivers/i2c/i2c-dev.c | 93 +- drivers/net/mctp/Kconfig | 12 - drivers/net/mctp/Makefile | 1 - drivers/net/mctp/mctp-i2c.c | 982 --------------------- include/linux/i2c.h | 13 - include/uapi/linux/i2c-dev.h | 2 - include/uapi/linux/i2c.h | 7 +- 12 files changed, 25 insertions(+), 1209 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml delete mode 100644 drivers/net/mctp/mctp-i2c.c (limited to 'include/uapi/linux') diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt index fc3dd7ec0445..b864916e087f 100644 --- a/Documentation/devicetree/bindings/i2c/i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c.txt @@ -95,10 +95,6 @@ wants to support one of the below features, it should adapt these bindings. - smbus-alert states that the optional SMBus-Alert feature apply to this bus. -- mctp-controller - indicates that the system is accessible via this bus as an endpoint for - MCTP over I2C transport. - Required properties (per child device) -------------------------------------- diff --git a/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml b/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml deleted file mode 100644 index afd11c9422fa..000000000000 --- a/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml +++ /dev/null @@ -1,92 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/net/mctp-i2c-controller.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: MCTP I2C transport binding - -maintainers: - - Matt Johnston - -description: | - An mctp-i2c-controller defines a local MCTP endpoint on an I2C controller. - MCTP I2C is specified by DMTF DSP0237. - - An mctp-i2c-controller must be attached to an I2C adapter which supports - slave functionality. I2C busses (either directly or as subordinate mux - busses) are attached to the mctp-i2c-controller with a 'mctp-controller' - property on each used bus. Each mctp-controller I2C bus will be presented - to the host system as a separate MCTP I2C instance. - -properties: - compatible: - const: mctp-i2c-controller - - reg: - minimum: 0x40000000 - maximum: 0x4000007f - description: | - 7 bit I2C address of the local endpoint. - I2C_OWN_SLAVE_ADDRESS (1<<30) flag must be set. - -additionalProperties: false - -required: - - compatible - - reg - -examples: - - | - // Basic case of a single I2C bus - #include - - i2c { - #address-cells = <1>; - #size-cells = <0>; - mctp-controller; - - mctp@30 { - compatible = "mctp-i2c-controller"; - reg = <(0x30 | I2C_OWN_SLAVE_ADDRESS)>; - }; - }; - - - | - // Mux topology with multiple MCTP-handling busses under - // a single mctp-i2c-controller. - // i2c1 and i2c6 can have MCTP devices, i2c5 does not. - #include - - i2c1: i2c { - #address-cells = <1>; - #size-cells = <0>; - mctp-controller; - - mctp@50 { - compatible = "mctp-i2c-controller"; - reg = <(0x50 | I2C_OWN_SLAVE_ADDRESS)>; - }; - }; - - i2c-mux { - #address-cells = <1>; - #size-cells = <0>; - i2c-parent = <&i2c1>; - - i2c5: i2c@0 { - #address-cells = <1>; - #size-cells = <0>; - reg = <0>; - eeprom@33 { - reg = <0x33>; - }; - }; - - i2c6: i2c@1 { - #address-cells = <1>; - #size-cells = <0>; - reg = <1>; - mctp-controller; - }; - }; diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 7395f3702fae..67e8b97c0c95 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -533,7 +533,7 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) msg->buf[bus->buf_index++] = recv_byte; if (msg->flags & I2C_M_RECV_LEN) { - if (unlikely(recv_byte > I2C_SMBUS_V3_BLOCK_MAX)) { + if (unlikely(recv_byte > I2C_SMBUS_BLOCK_MAX)) { bus->cmd_err = -EPROTO; aspeed_i2c_do_stop(bus); goto out_no_complete; @@ -718,8 +718,7 @@ static int aspeed_i2c_master_xfer(struct i2c_adapter *adap, static u32 aspeed_i2c_functionality(struct i2c_adapter *adap) { - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | - I2C_FUNC_SMBUS_BLOCK_DATA | I2C_FUNC_SMBUS_V3_BLOCK; + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_SMBUS_BLOCK_DATA; } #if IS_ENABLED(CONFIG_I2C_SLAVE) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 6d60f65add85..2ad166355ec9 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -1399,7 +1399,7 @@ static void npcm_i2c_irq_master_handler_read(struct npcm_i2c *bus) if (bus->read_block_use) { /* first byte in block protocol is the size: */ data = npcm_i2c_rd_byte(bus); - data = clamp_val(data, 1, I2C_SMBUS_V3_BLOCK_MAX); + data = clamp_val(data, 1, I2C_SMBUS_BLOCK_MAX); bus->rd_size = data + block_extra_bytes_size; bus->rd_buf[bus->rd_ind++] = data; @@ -2187,7 +2187,6 @@ static u32 npcm_i2c_functionality(struct i2c_adapter *adap) I2C_FUNC_SMBUS_EMUL | I2C_FUNC_SMBUS_BLOCK_DATA | I2C_FUNC_SMBUS_PEC | - I2C_FUNC_SMBUS_V3_BLOCK | I2C_FUNC_SLAVE; } diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index 743415584aba..e5b2d1465e7e 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c @@ -303,8 +303,7 @@ static void i2c_smbus_try_get_dmabuf(struct i2c_msg *msg, u8 init_val) bool is_read = msg->flags & I2C_M_RD; unsigned char *dma_buf; - dma_buf = kzalloc(I2C_SMBUS_V3_BLOCK_MAX + (is_read ? 2 : 3), - GFP_KERNEL); + dma_buf = kzalloc(I2C_SMBUS_BLOCK_MAX + (is_read ? 2 : 3), GFP_KERNEL); if (!dma_buf) return; @@ -330,10 +329,9 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, * initialize most things with sane defaults, to keep the code below * somewhat simpler. */ - unsigned char msgbuf0[I2C_SMBUS_V3_BLOCK_MAX+3]; - unsigned char msgbuf1[I2C_SMBUS_V3_BLOCK_MAX+2]; + unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX+3]; + unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX+2]; int nmsgs = read_write == I2C_SMBUS_READ ? 2 : 1; - u16 block_max; u8 partial_pec = 0; int status; struct i2c_msg msg[2] = { @@ -352,10 +350,6 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, bool wants_pec = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK && size != I2C_SMBUS_I2C_BLOCK_DATA); - /* Drivers must opt in to 255 byte max block size */ - block_max = i2c_check_functionality(adapter, I2C_FUNC_SMBUS_V3_BLOCK) - ? I2C_SMBUS_V3_BLOCK_MAX : I2C_SMBUS_BLOCK_MAX; - msgbuf0[0] = command; switch (size) { case I2C_SMBUS_QUICK: @@ -405,7 +399,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, i2c_smbus_try_get_dmabuf(&msg[1], 0); } else { msg[0].len = data->block[0] + 2; - if (msg[0].len > block_max + 2) { + if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) { dev_err(&adapter->dev, "Invalid block write size %d\n", data->block[0]); @@ -419,7 +413,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, case I2C_SMBUS_BLOCK_PROC_CALL: nmsgs = 2; /* Another special case */ read_write = I2C_SMBUS_READ; - if (data->block[0] > block_max) { + if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { dev_err(&adapter->dev, "Invalid block write size %d\n", data->block[0]); @@ -436,7 +430,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, i2c_smbus_try_get_dmabuf(&msg[1], 0); break; case I2C_SMBUS_I2C_BLOCK_DATA: - if (data->block[0] > block_max) { + if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { dev_err(&adapter->dev, "Invalid block %s size %d\n", read_write == I2C_SMBUS_READ ? "read" : "write", data->block[0]); @@ -504,7 +498,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, break; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_BLOCK_PROC_CALL: - if (msg[1].buf[0] > block_max) { + if (msg[1].buf[0] > I2C_SMBUS_BLOCK_MAX) { dev_err(&adapter->dev, "Invalid block size returned: %d\n", msg[1].buf[0]); diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 5ee9118c0407..bce0e8bb7852 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -46,24 +46,6 @@ struct i2c_dev { struct cdev cdev; }; -/* The userspace union i2c_smbus_data for I2C_SMBUS ioctl is limited - * to 32 bytes (I2C_SMBUS_BLOCK_MAX) for compatibility. - */ -union compat_i2c_smbus_data { - __u8 byte; - __u16 word; - __u8 block[I2C_SMBUS_BLOCK_MAX + 2]; /* block[0] is used for length */ - /* and one more for user-space compatibility */ -}; - -/* Must match i2c-dev.h definition with compat .data member */ -struct i2c_smbus_ioctl_data { - __u8 read_write; - __u8 command; - __u32 size; - union compat_i2c_smbus_data __user *data; -}; - #define I2C_MINORS (MINORMASK + 1) static LIST_HEAD(i2c_dev_list); static DEFINE_SPINLOCK(i2c_dev_list_lock); @@ -253,17 +235,14 @@ static int i2cdev_check_addr(struct i2c_adapter *adapter, unsigned int addr) static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, unsigned nmsgs, struct i2c_msg *msgs) { - u8 __user **data_ptrs = NULL; - u16 *orig_lens = NULL; + u8 __user **data_ptrs; int i, res; - res = -ENOMEM; data_ptrs = kmalloc_array(nmsgs, sizeof(u8 __user *), GFP_KERNEL); - if (data_ptrs == NULL) - goto out; - orig_lens = kmalloc_array(nmsgs, sizeof(u16), GFP_KERNEL); - if (orig_lens == NULL) - goto out; + if (data_ptrs == NULL) { + kfree(msgs); + return -ENOMEM; + } res = 0; for (i = 0; i < nmsgs; i++) { @@ -274,30 +253,12 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, } data_ptrs[i] = (u8 __user *)msgs[i].buf; - msgs[i].buf = NULL; - if (msgs[i].len < 1) { - /* Sanity check */ - res = -EINVAL; - break; - - } - /* Allocate a larger buffer to accommodate possible 255 byte - * blocks. Read results will be dropped later - * if they are too large for the original length. - */ - orig_lens[i] = msgs[i].len; - msgs[i].buf = kmalloc(msgs[i].len + I2C_SMBUS_V3_BLOCK_MAX, - GFP_USER | __GFP_NOWARN); + msgs[i].buf = memdup_user(data_ptrs[i], msgs[i].len); if (IS_ERR(msgs[i].buf)) { res = PTR_ERR(msgs[i].buf); break; } - if (copy_from_user(msgs[i].buf, data_ptrs[i], msgs[i].len)) { - kfree(msgs[i].buf); - res = -EFAULT; - break; - } - /* Buffer from kmalloc, so DMA is ok */ + /* memdup_user allocates with GFP_KERNEL, so DMA is ok */ msgs[i].flags |= I2C_M_DMA_SAFE; /* @@ -313,7 +274,7 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, */ if (msgs[i].flags & I2C_M_RECV_LEN) { if (!(msgs[i].flags & I2C_M_RD) || - msgs[i].buf[0] < 1 || + msgs[i].len < 1 || msgs[i].buf[0] < 1 || msgs[i].len < msgs[i].buf[0] + I2C_SMBUS_BLOCK_MAX) { i++; @@ -336,16 +297,12 @@ static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, res = i2c_transfer(client->adapter, msgs, nmsgs); while (i-- > 0) { if (res >= 0 && (msgs[i].flags & I2C_M_RD)) { - if (orig_lens[i] < msgs[i].len) - res = -EINVAL; - else if (copy_to_user(data_ptrs[i], msgs[i].buf, - msgs[i].len)) + if (copy_to_user(data_ptrs[i], msgs[i].buf, + msgs[i].len)) res = -EFAULT; } kfree(msgs[i].buf); } -out: - kfree(orig_lens); kfree(data_ptrs); kfree(msgs); return res; @@ -353,7 +310,7 @@ out: static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, u8 read_write, u8 command, u32 size, - union compat_i2c_smbus_data __user *data) + union i2c_smbus_data __user *data) { union i2c_smbus_data temp = {}; int datasize, res; @@ -414,16 +371,6 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, if (copy_from_user(&temp, data, datasize)) return -EFAULT; } - if ((size == I2C_SMBUS_BLOCK_PROC_CALL || - size == I2C_SMBUS_I2C_BLOCK_DATA || - size == I2C_SMBUS_BLOCK_DATA) && - read_write == I2C_SMBUS_WRITE && - temp.block[0] > I2C_SMBUS_BLOCK_MAX) { - /* Don't accept writes larger than the buffer size */ - dev_dbg(&client->adapter->dev, "block write is too large"); - return -EINVAL; - - } if (size == I2C_SMBUS_I2C_BLOCK_BROKEN) { /* Convert old I2C block commands to the new convention. This preserves binary compatibility. */ @@ -433,21 +380,9 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, } res = i2c_smbus_xfer(client->adapter, client->addr, client->flags, read_write, command, size, &temp); - if (res) - return res; - if ((size == I2C_SMBUS_BLOCK_PROC_CALL || - size == I2C_SMBUS_I2C_BLOCK_DATA || - size == I2C_SMBUS_BLOCK_DATA) && - read_write == I2C_SMBUS_READ && - temp.block[0] > I2C_SMBUS_BLOCK_MAX) { - /* Don't accept reads larger than the buffer size */ - dev_dbg(&client->adapter->dev, "block read is too large"); - return -EINVAL; - - } - if ((size == I2C_SMBUS_PROC_CALL) || - (size == I2C_SMBUS_BLOCK_PROC_CALL) || - (read_write == I2C_SMBUS_READ)) { + if (!res && ((size == I2C_SMBUS_PROC_CALL) || + (size == I2C_SMBUS_BLOCK_PROC_CALL) || + (read_write == I2C_SMBUS_READ))) { if (copy_to_user(data, &temp, datasize)) return -EFAULT; } diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig index b758b29c2ddf..d8f966cedc89 100644 --- a/drivers/net/mctp/Kconfig +++ b/drivers/net/mctp/Kconfig @@ -3,18 +3,6 @@ if MCTP menu "MCTP Device Drivers" -config MCTP_TRANSPORT_I2C - tristate "MCTP SMBus/I2C transport" - # i2c-mux is optional, but we must build as a module if i2c-mux is a module - depends on I2C_MUX || !I2C_MUX - depends on I2C - depends on I2C_SLAVE - select MCTP_FLOWS - help - Provides a driver to access MCTP devices over SMBus/I2C transport, - from DMTF specification DSP0237. A MCTP protocol network device is - created for each I2C bus that has been assigned a mctp-i2c device. - endmenu endif diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile index 73dc411986a6..e69de29bb2d1 100644 --- a/drivers/net/mctp/Makefile +++ b/drivers/net/mctp/Makefile @@ -1 +0,0 @@ -obj-$(CONFIG_MCTP_TRANSPORT_I2C) += mctp-i2c.o diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c deleted file mode 100644 index ed213b4765a1..000000000000 --- a/drivers/net/mctp/mctp-i2c.c +++ /dev/null @@ -1,982 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Management Controller Transport Protocol (MCTP) - * - * Copyright (c) 2021 Code Construct - * Copyright (c) 2021 Google - */ - -#include -#include -#include -#include -#include -#include -#include - -/* SMBus 3.0 allows 255 data bytes (plus PEC), but the - * first byte is taken for source slave address. - */ -#define MCTP_I2C_MAXBLOCK 255 -#define MCTP_I2C_MAXMTU (MCTP_I2C_MAXBLOCK - 1) -#define MCTP_I2C_MINMTU (64 + 4) -/* Allow space for address, command, byte_count, databytes, PEC */ -#define MCTP_I2C_RXBUFSZ (3 + MCTP_I2C_MAXBLOCK + 1) -#define MCTP_I2C_MINLEN 8 -#define MCTP_I2C_COMMANDCODE 0x0f -#define MCTP_I2C_TX_WORK_LEN 100 -// sufficient for 64kB at min mtu -#define MCTP_I2C_TX_QUEUE_LEN 1100 - -#define MCTP_I2C_OF_PROP "mctp-controller" - -enum { - MCTP_I2C_FLOW_STATE_NEW = 0, - MCTP_I2C_FLOW_STATE_ACTIVE, -}; - -static struct { - /* lock protects clients and also prevents adding/removing adapters - * during mctp_i2c_client probe/remove. - */ - struct mutex lock; - // list of struct mctp_i2c_client - struct list_head clients; -} mi_driver_state; - -struct mctp_i2c_client; - -// The netdev structure. One of these per I2C adapter. -struct mctp_i2c_dev { - struct net_device *ndev; - struct i2c_adapter *adapter; - struct mctp_i2c_client *client; - struct list_head list; // for mctp_i2c_client.devs - - size_t pos; - u8 buffer[MCTP_I2C_RXBUFSZ]; - - struct task_struct *tx_thread; - wait_queue_head_t tx_wq; - struct sk_buff_head tx_queue; - - // a fake entry in our tx queue to perform an unlock operation - struct sk_buff unlock_marker; - - spinlock_t flow_lock; // protects i2c_lock_count and release_count - int i2c_lock_count; - int release_count; -}; - -/* The i2c client structure. One per hardware i2c bus at the top of the - * mux tree, shared by multiple netdevs - */ -struct mctp_i2c_client { - struct i2c_client *client; - u8 lladdr; - - struct mctp_i2c_dev *sel; - struct list_head devs; - spinlock_t curr_lock; // protects sel - - struct list_head list; // for mi_driver_state.clients -}; - -// Header on the wire -struct mctp_i2c_hdr { - u8 dest_slave; - u8 command; - u8 byte_count; - u8 source_slave; -}; - -static int mctp_i2c_recv(struct mctp_i2c_dev *midev); -static int mctp_i2c_slave_cb(struct i2c_client *client, - enum i2c_slave_event event, u8 *val); - -static struct i2c_adapter *mux_root_adapter(struct i2c_adapter *adap) -{ -#if IS_ENABLED(CONFIG_I2C_MUX) - return i2c_root_adapter(&adap->dev); -#else - /* In non-mux config all i2c adapters are root adapters */ - return adap; -#endif -} - -static ssize_t mctp_current_mux_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct mctp_i2c_client *mcli = i2c_get_clientdata(to_i2c_client(dev)); - struct net_device *ndev = NULL; - unsigned long flags; - ssize_t l; - - spin_lock_irqsave(&mcli->curr_lock, flags); - if (mcli->sel) { - ndev = mcli->sel->ndev; - dev_hold(ndev); - } - spin_unlock_irqrestore(&mcli->curr_lock, flags); - l = scnprintf(buf, PAGE_SIZE, "%s\n", ndev ? ndev->name : "(none)"); - if (ndev) - dev_put(ndev); - return l; -} -static DEVICE_ATTR_RO(mctp_current_mux); - -/* Creates a new i2c slave device attached to the root adapter. - * Sets up the slave callback. - * Must be called with a client on a root adapter. - */ -static struct mctp_i2c_client *mctp_i2c_new_client(struct i2c_client *client) -{ - struct mctp_i2c_client *mcli = NULL; - struct i2c_adapter *root = NULL; - int rc; - - if (client->flags & I2C_CLIENT_TEN) { - dev_err(&client->dev, "%s failed, MCTP requires a 7-bit I2C address, addr=0x%x", - __func__, client->addr); - rc = -EINVAL; - goto err; - } - - root = mux_root_adapter(client->adapter); - if (!root) { - dev_err(&client->dev, "%s failed to find root adapter\n", __func__); - rc = -ENOENT; - goto err; - } - if (root != client->adapter) { - dev_err(&client->dev, - "A mctp-i2c-controller client cannot be placed on an I2C mux adapter.\n" - " It should be placed on the mux tree root adapter\n" - " then set mctp-controller property on adapters to attach\n"); - rc = -EINVAL; - goto err; - } - - mcli = kzalloc(sizeof(*mcli), GFP_KERNEL); - if (!mcli) { - rc = -ENOMEM; - goto err; - } - spin_lock_init(&mcli->curr_lock); - INIT_LIST_HEAD(&mcli->devs); - INIT_LIST_HEAD(&mcli->list); - mcli->lladdr = client->addr & 0xff; - mcli->client = client; - i2c_set_clientdata(client, mcli); - - rc = i2c_slave_register(mcli->client, mctp_i2c_slave_cb); - if (rc) { - dev_err(&client->dev, "%s i2c register failed %d\n", __func__, rc); - mcli->client = NULL; - i2c_set_clientdata(client, NULL); - goto err; - } - - rc = device_create_file(&client->dev, &dev_attr_mctp_current_mux); - if (rc) { - dev_err(&client->dev, "%s adding sysfs \"%s\" failed %d\n", __func__, - dev_attr_mctp_current_mux.attr.name, rc); - // continue anyway - } - - return mcli; -err: - if (mcli) { - if (mcli->client) { - device_remove_file(&mcli->client->dev, &dev_attr_mctp_current_mux); - i2c_unregister_device(mcli->client); - } - kfree(mcli); - } - return ERR_PTR(rc); -} - -static void mctp_i2c_free_client(struct mctp_i2c_client *mcli) -{ - int rc; - - WARN_ON(!mutex_is_locked(&mi_driver_state.lock)); - WARN_ON(!list_empty(&mcli->devs)); - WARN_ON(mcli->sel); // sanity check, no locking - - device_remove_file(&mcli->client->dev, &dev_attr_mctp_current_mux); - rc = i2c_slave_unregister(mcli->client); - // leak if it fails, we can't propagate errors upwards - if (rc) - dev_err(&mcli->client->dev, "%s i2c unregister failed %d\n", __func__, rc); - else - kfree(mcli); -} - -/* Switch the mctp i2c device to receive responses. - * Call with curr_lock held - */ -static void __mctp_i2c_device_select(struct mctp_i2c_client *mcli, - struct mctp_i2c_dev *midev) -{ - assert_spin_locked(&mcli->curr_lock); - if (midev) - dev_hold(midev->ndev); - if (mcli->sel) - dev_put(mcli->sel->ndev); - mcli->sel = midev; -} - -// Switch the mctp i2c device to receive responses -static void mctp_i2c_device_select(struct mctp_i2c_client *mcli, - struct mctp_i2c_dev *midev) -{ - unsigned long flags; - - spin_lock_irqsave(&mcli->curr_lock, flags); - __mctp_i2c_device_select(mcli, midev); - spin_unlock_irqrestore(&mcli->curr_lock, flags); -} - -static int mctp_i2c_slave_cb(struct i2c_client *client, - enum i2c_slave_event event, u8 *val) -{ - struct mctp_i2c_client *mcli = i2c_get_clientdata(client); - struct mctp_i2c_dev *midev = NULL; - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&mcli->curr_lock, flags); - midev = mcli->sel; - if (midev) - dev_hold(midev->ndev); - spin_unlock_irqrestore(&mcli->curr_lock, flags); - - if (!midev) - return 0; - - switch (event) { - case I2C_SLAVE_WRITE_RECEIVED: - if (midev->pos < MCTP_I2C_RXBUFSZ) { - midev->buffer[midev->pos] = *val; - midev->pos++; - } else { - midev->ndev->stats.rx_over_errors++; - } - - break; - case I2C_SLAVE_WRITE_REQUESTED: - /* dest_slave as first byte */ - midev->buffer[0] = mcli->lladdr << 1; - midev->pos = 1; - break; - case I2C_SLAVE_STOP: - rc = mctp_i2c_recv(midev); - break; - default: - break; - } - - dev_put(midev->ndev); - return rc; -} - -// Processes incoming data that has been accumulated by the slave cb -static int mctp_i2c_recv(struct mctp_i2c_dev *midev) -{ - struct net_device *ndev = midev->ndev; - struct mctp_i2c_hdr *hdr; - struct mctp_skb_cb *cb; - struct sk_buff *skb; - u8 pec, calc_pec; - size_t recvlen; - - /* + 1 for the PEC */ - if (midev->pos < MCTP_I2C_MINLEN + 1) { - ndev->stats.rx_length_errors++; - return -EINVAL; - } - recvlen = midev->pos - 1; - - hdr = (void *)midev->buffer; - if (hdr->command != MCTP_I2C_COMMANDCODE) { - ndev->stats.rx_dropped++; - return -EINVAL; - } - - pec = midev->buffer[midev->pos - 1]; - calc_pec = i2c_smbus_pec(0, midev->buffer, recvlen); - if (pec != calc_pec) { - ndev->stats.rx_crc_errors++; - return -EINVAL; - } - - skb = netdev_alloc_skb(ndev, recvlen); - if (!skb) { - ndev->stats.rx_dropped++; - return -ENOMEM; - } - - skb->protocol = htons(ETH_P_MCTP); - skb_put_data(skb, midev->buffer, recvlen); - skb_reset_mac_header(skb); - skb_pull(skb, sizeof(struct mctp_i2c_hdr)); - skb_reset_network_header(skb); - - cb = __mctp_cb(skb); - cb->halen = 1; - cb->haddr[0] = hdr->source_slave; - - if (netif_rx(skb) == NET_RX_SUCCESS) { - ndev->stats.rx_packets++; - ndev->stats.rx_bytes += skb->len; - } else { - ndev->stats.rx_dropped++; - } - return 0; -} - -enum mctp_i2c_flow_state { - MCTP_I2C_TX_FLOW_INVALID, - MCTP_I2C_TX_FLOW_NONE, - MCTP_I2C_TX_FLOW_NEW, - MCTP_I2C_TX_FLOW_EXISTING, -}; - -static enum mctp_i2c_flow_state -mctp_i2c_get_tx_flow_state(struct mctp_i2c_dev *midev, struct sk_buff *skb) -{ - enum mctp_i2c_flow_state state; - struct mctp_sk_key *key; - struct mctp_flow *flow; - unsigned long flags; - - flow = skb_ext_find(skb, SKB_EXT_MCTP); - if (!flow) - return MCTP_I2C_TX_FLOW_NONE; - - key = flow->key; - if (!key) - return MCTP_I2C_TX_FLOW_NONE; - - spin_lock_irqsave(&key->lock, flags); - /* if the key is present but invalid, we're unlikely to be able - * to handle the flow at all; just drop now - */ - if (!key->valid) { - state = MCTP_I2C_TX_FLOW_INVALID; - - } else if (key->dev_flow_state == MCTP_I2C_FLOW_STATE_NEW) { - key->dev_flow_state = MCTP_I2C_FLOW_STATE_ACTIVE; - state = MCTP_I2C_TX_FLOW_NEW; - } else { - state = MCTP_I2C_TX_FLOW_EXISTING; - } - - spin_unlock_irqrestore(&key->lock, flags); - - return state; -} - -/* We're not contending with ourselves here; we only need to exclude other - * i2c clients from using the bus. refcounts are simply to prevent - * recursive locking. - */ -static void mctp_i2c_lock_nest(struct mctp_i2c_dev *midev) -{ - unsigned long flags; - bool lock; - - spin_lock_irqsave(&midev->flow_lock, flags); - lock = midev->i2c_lock_count == 0; - midev->i2c_lock_count++; - spin_unlock_irqrestore(&midev->flow_lock, flags); - - if (lock) - i2c_lock_bus(midev->adapter, I2C_LOCK_SEGMENT); -} - -static void mctp_i2c_unlock_nest(struct mctp_i2c_dev *midev) -{ - unsigned long flags; - bool unlock; - - spin_lock_irqsave(&midev->flow_lock, flags); - if (!WARN_ONCE(midev->i2c_lock_count == 0, "lock count underflow!")) - midev->i2c_lock_count--; - unlock = midev->i2c_lock_count == 0; - spin_unlock_irqrestore(&midev->flow_lock, flags); - - if (unlock) - i2c_unlock_bus(midev->adapter, I2C_LOCK_SEGMENT); -} - -static void mctp_i2c_xmit(struct mctp_i2c_dev *midev, struct sk_buff *skb) -{ - struct net_device_stats *stats = &midev->ndev->stats; - enum mctp_i2c_flow_state fs; - union i2c_smbus_data *data; - struct mctp_i2c_hdr *hdr; - unsigned int len; - u16 daddr; - int rc; - - fs = mctp_i2c_get_tx_flow_state(midev, skb); - - len = skb->len; - hdr = (void *)skb_mac_header(skb); - data = (void *)&hdr->byte_count; - daddr = hdr->dest_slave >> 1; - - switch (fs) { - case MCTP_I2C_TX_FLOW_NONE: - /* no flow: full lock & unlock */ - mctp_i2c_lock_nest(midev); - mctp_i2c_device_select(midev->client, midev); - rc = __i2c_smbus_xfer(midev->adapter, daddr, I2C_CLIENT_PEC, - I2C_SMBUS_WRITE, hdr->command, - I2C_SMBUS_BLOCK_DATA, data); - mctp_i2c_unlock_nest(midev); - break; - - case MCTP_I2C_TX_FLOW_NEW: - /* new flow: lock, tx, but don't unlock; that will happen - * on flow release - */ - mctp_i2c_lock_nest(midev); - mctp_i2c_device_select(midev->client, midev); - fallthrough; - - case MCTP_I2C_TX_FLOW_EXISTING: - /* existing flow: we already have the lock; just tx */ - rc = __i2c_smbus_xfer(midev->adapter, daddr, I2C_CLIENT_PEC, - I2C_SMBUS_WRITE, hdr->command, - I2C_SMBUS_BLOCK_DATA, data); - break; - - case MCTP_I2C_TX_FLOW_INVALID: - return; - } - - if (rc) { - dev_warn_ratelimited(&midev->adapter->dev, - "%s i2c_smbus_xfer failed %d", __func__, rc); - stats->tx_errors++; - } else { - stats->tx_bytes += len; - stats->tx_packets++; - } -} - -static void mctp_i2c_flow_release(struct mctp_i2c_dev *midev) -{ - unsigned long flags; - bool unlock; - - spin_lock_irqsave(&midev->flow_lock, flags); - if (midev->release_count > midev->i2c_lock_count) { - WARN_ONCE(1, "release count overflow"); - midev->release_count = midev->i2c_lock_count; - } - - midev->i2c_lock_count -= midev->release_count; - unlock = midev->i2c_lock_count == 0 && midev->release_count > 0; - midev->release_count = 0; - spin_unlock_irqrestore(&midev->flow_lock, flags); - - if (unlock) - i2c_unlock_bus(midev->adapter, I2C_LOCK_SEGMENT); -} - -static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned int len) -{ - struct mctp_i2c_hdr *hdr; - struct mctp_hdr *mhdr; - u8 lldst, llsrc; - - lldst = *((u8 *)daddr); - llsrc = *((u8 *)saddr); - - skb_push(skb, sizeof(struct mctp_i2c_hdr)); - skb_reset_mac_header(skb); - hdr = (void *)skb_mac_header(skb); - mhdr = mctp_hdr(skb); - hdr->dest_slave = (lldst << 1) & 0xff; - hdr->command = MCTP_I2C_COMMANDCODE; - hdr->byte_count = len + 1; - if (hdr->byte_count > MCTP_I2C_MAXBLOCK) - return -EMSGSIZE; - hdr->source_slave = ((llsrc << 1) & 0xff) | 0x01; - mhdr->ver = 0x01; - - return 0; -} - -static int mctp_i2c_tx_thread(void *data) -{ - struct mctp_i2c_dev *midev = data; - struct sk_buff *skb; - unsigned long flags; - - for (;;) { - if (kthread_should_stop()) - break; - - spin_lock_irqsave(&midev->tx_queue.lock, flags); - skb = __skb_dequeue(&midev->tx_queue); - if (netif_queue_stopped(midev->ndev)) - netif_wake_queue(midev->ndev); - spin_unlock_irqrestore(&midev->tx_queue.lock, flags); - - if (skb == &midev->unlock_marker) { - mctp_i2c_flow_release(midev); - - } else if (skb) { - mctp_i2c_xmit(midev, skb); - kfree_skb(skb); - - } else { - wait_event(midev->tx_wq, - !skb_queue_empty(&midev->tx_queue) || - kthread_should_stop()); - } - } - - return 0; -} - -static netdev_tx_t mctp_i2c_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct mctp_i2c_dev *midev = netdev_priv(dev); - unsigned long flags; - - spin_lock_irqsave(&midev->tx_queue.lock, flags); - if (skb_queue_len(&midev->tx_queue) >= MCTP_I2C_TX_WORK_LEN) { - netif_stop_queue(dev); - spin_unlock_irqrestore(&midev->tx_queue.lock, flags); - netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); - return NETDEV_TX_BUSY; - } - - __skb_queue_tail(&midev->tx_queue, skb); - if (skb_queue_len(&midev->tx_queue) == MCTP_I2C_TX_WORK_LEN) - netif_stop_queue(dev); - spin_unlock_irqrestore(&midev->tx_queue.lock, flags); - - wake_up(&midev->tx_wq); - return NETDEV_TX_OK; -} - -static void mctp_i2c_release_flow(struct mctp_dev *mdev, - struct mctp_sk_key *key) - -{ - struct mctp_i2c_dev *midev = netdev_priv(mdev->dev); - unsigned long flags; - - spin_lock_irqsave(&midev->flow_lock, flags); - midev->release_count++; - spin_unlock_irqrestore(&midev->flow_lock, flags); - - /* Ensure we have a release operation queued, through the fake - * marker skb - */ - spin_lock(&midev->tx_queue.lock); - if (!midev->unlock_marker.next) - __skb_queue_tail(&midev->tx_queue, &midev->unlock_marker); - spin_unlock(&midev->tx_queue.lock); - - wake_up(&midev->tx_wq); -} - -static const struct net_device_ops mctp_i2c_ops = { - .ndo_start_xmit = mctp_i2c_start_xmit, -}; - -static const struct header_ops mctp_i2c_headops = { - .create = mctp_i2c_header_create, -}; - -static const struct mctp_netdev_ops mctp_i2c_mctp_ops = { - .release_flow = mctp_i2c_release_flow, -}; - -static void mctp_i2c_net_setup(struct net_device *dev) -{ - dev->type = ARPHRD_MCTP; - - dev->mtu = MCTP_I2C_MAXMTU; - dev->min_mtu = MCTP_I2C_MINMTU; - dev->max_mtu = MCTP_I2C_MAXMTU; - dev->tx_queue_len = MCTP_I2C_TX_QUEUE_LEN; - - dev->hard_header_len = sizeof(struct mctp_i2c_hdr); - dev->addr_len = 1; - - dev->netdev_ops = &mctp_i2c_ops; - dev->header_ops = &mctp_i2c_headops; - dev->needs_free_netdev = true; -} - -static int mctp_i2c_add_netdev(struct mctp_i2c_client *mcli, - struct i2c_adapter *adap) -{ - unsigned long flags; - struct mctp_i2c_dev *midev = NULL; - struct net_device *ndev = NULL; - struct i2c_adapter *root; - char namebuf[30]; - int rc; - - root = mux_root_adapter(adap); - if (root != mcli->client->adapter) { - dev_err(&mcli->client->dev, - "I2C adapter %s is not a child bus of %s", - mcli->client->adapter->name, root->name); - return -EINVAL; - } - - WARN_ON(!mutex_is_locked(&mi_driver_state.lock)); - snprintf(namebuf, sizeof(namebuf), "mctpi2c%d", adap->nr); - ndev = alloc_netdev(sizeof(*midev), namebuf, NET_NAME_ENUM, mctp_i2c_net_setup); - if (!ndev) { - dev_err(&mcli->client->dev, "%s alloc netdev failed\n", __func__); - rc = -ENOMEM; - goto err; - } - dev_net_set(ndev, current->nsproxy->net_ns); - SET_NETDEV_DEV(ndev, &adap->dev); - ndev->dev_addr = &mcli->lladdr; - - midev = netdev_priv(ndev); - skb_queue_head_init(&midev->tx_queue); - INIT_LIST_HEAD(&midev->list); - midev->adapter = adap; - midev->client = mcli; - spin_lock_init(&midev->flow_lock); - midev->i2c_lock_count = 0; - midev->release_count = 0; - /* Hold references */ - get_device(&midev->adapter->dev); - get_device(&midev->client->client->dev); - midev->ndev = ndev; - init_waitqueue_head(&midev->tx_wq); - midev->tx_thread = kthread_create(mctp_i2c_tx_thread, midev, - "%s/tx", namebuf); - if (IS_ERR_OR_NULL(midev->tx_thread)) { - rc = -ENOMEM; - goto err_free; - } - - rc = mctp_register_netdev(ndev, &mctp_i2c_mctp_ops); - if (rc) { - dev_err(&mcli->client->dev, - "%s register netdev \"%s\" failed %d\n", __func__, - ndev->name, rc); - goto err_stop_kthread; - } - spin_lock_irqsave(&mcli->curr_lock, flags); - list_add(&midev->list, &mcli->devs); - // Select a device by default - if (!mcli->sel) - __mctp_i2c_device_select(mcli, midev); - spin_unlock_irqrestore(&mcli->curr_lock, flags); - - wake_up_process(midev->tx_thread); - - return 0; - -err_stop_kthread: - kthread_stop(midev->tx_thread); - -err_free: - free_netdev(ndev); - -err: - return rc; -} - -// Removes and unregisters a mctp-i2c netdev -static void mctp_i2c_free_netdev(struct mctp_i2c_dev *midev) -{ - struct mctp_i2c_client *mcli = midev->client; - unsigned long flags; - - netif_stop_queue(midev->ndev); - kthread_stop(midev->tx_thread); - skb_queue_purge(&midev->tx_queue); - - /* Release references, used only for TX which has stopped */ - put_device(&midev->adapter->dev); - put_device(&mcli->client->dev); - - /* Remove it from the parent mcli */ - spin_lock_irqsave(&mcli->curr_lock, flags); - list_del(&midev->list); - if (mcli->sel == midev) { - struct mctp_i2c_dev *first; - - first = list_first_entry_or_null(&mcli->devs, struct mctp_i2c_dev, list); - __mctp_i2c_device_select(mcli, first); - } - spin_unlock_irqrestore(&mcli->curr_lock, flags); - - /* Remove netdev. mctp_i2c_slave_cb() takes a dev_hold() so removing - * it now is safe. unregister_netdev() frees ndev and midev. - */ - mctp_unregister_netdev(midev->ndev); -} - -// Removes any netdev for adap. mcli is the parent root i2c client -static void mctp_i2c_remove_netdev(struct mctp_i2c_client *mcli, - struct i2c_adapter *adap) -{ - unsigned long flags; - struct mctp_i2c_dev *midev = NULL, *m = NULL; - - WARN_ON(!mutex_is_locked(&mi_driver_state.lock)); - spin_lock_irqsave(&mcli->curr_lock, flags); - // list size is limited by number of MCTP netdevs on a single hardware bus - list_for_each_entry(m, &mcli->devs, list) - if (m->adapter == adap) { - midev = m; - break; - } - spin_unlock_irqrestore(&mcli->curr_lock, flags); - - if (midev) - mctp_i2c_free_netdev(midev); -} - -/* Determines whether a device is an i2c adapter. - * Optionally returns the root i2c_adapter - */ -static struct i2c_adapter *mctp_i2c_get_adapter(struct device *dev, - struct i2c_adapter **ret_root) -{ - struct i2c_adapter *root, *adap; - - if (dev->type != &i2c_adapter_type) - return NULL; - adap = to_i2c_adapter(dev); - root = mux_root_adapter(adap); - WARN_ONCE(!root, "%s failed to find root adapter for %s\n", - __func__, dev_name(dev)); - if (!root) - return NULL; - if (ret_root) - *ret_root = root; - return adap; -} - -/* Determines whether a device is an i2c adapter with the "mctp-controller" - * devicetree property set. If adap is not an OF node, returns match_no_of - */ -static bool mctp_i2c_adapter_match(struct i2c_adapter *adap, bool match_no_of) -{ - if (!adap->dev.of_node) - return match_no_of; - return of_property_read_bool(adap->dev.of_node, MCTP_I2C_OF_PROP); -} - -/* Called for each existing i2c device (adapter or client) when a - * new mctp-i2c client is probed. - */ -static int mctp_i2c_client_try_attach(struct device *dev, void *data) -{ - struct i2c_adapter *adap = NULL, *root = NULL; - struct mctp_i2c_client *mcli = data; - - adap = mctp_i2c_get_adapter(dev, &root); - if (!adap) - return 0; - if (mcli->client->adapter != root) - return 0; - // Must either have mctp-controller property on the adapter, or - // be a root adapter if it's non-devicetree - if (!mctp_i2c_adapter_match(adap, adap == root)) - return 0; - - return mctp_i2c_add_netdev(mcli, adap); -} - -static void mctp_i2c_notify_add(struct device *dev) -{ - struct mctp_i2c_client *mcli = NULL, *m = NULL; - struct i2c_adapter *root = NULL, *adap = NULL; - int rc; - - adap = mctp_i2c_get_adapter(dev, &root); - if (!adap) - return; - // Check for mctp-controller property on the adapter - if (!mctp_i2c_adapter_match(adap, false)) - return; - - /* Find an existing mcli for adap's root */ - mutex_lock(&mi_driver_state.lock); - list_for_each_entry(m, &mi_driver_state.clients, list) { - if (m->client->adapter == root) { - mcli = m; - break; - } - } - - if (mcli) { - rc = mctp_i2c_add_netdev(mcli, adap); - if (rc) - dev_warn(dev, "%s Failed adding mctp-i2c device", - __func__); - } - mutex_unlock(&mi_driver_state.lock); -} - -static void mctp_i2c_notify_del(struct device *dev) -{ - struct i2c_adapter *root = NULL, *adap = NULL; - struct mctp_i2c_client *mcli = NULL; - - adap = mctp_i2c_get_adapter(dev, &root); - if (!adap) - return; - - mutex_lock(&mi_driver_state.lock); - list_for_each_entry(mcli, &mi_driver_state.clients, list) { - if (mcli->client->adapter == root) { - mctp_i2c_remove_netdev(mcli, adap); - break; - } - } - mutex_unlock(&mi_driver_state.lock); -} - -static int mctp_i2c_probe(struct i2c_client *client) -{ - struct mctp_i2c_client *mcli = NULL; - int rc; - - /* Check for >32 byte block support required for MCTP */ - if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_V3_BLOCK)) { - dev_err(&client->dev, - "%s failed, I2C bus driver does not support 255 byte block transfer\n", - __func__); - return -EOPNOTSUPP; - } - - mutex_lock(&mi_driver_state.lock); - mcli = mctp_i2c_new_client(client); - if (IS_ERR(mcli)) { - rc = PTR_ERR(mcli); - mcli = NULL; - goto out; - } else { - list_add(&mcli->list, &mi_driver_state.clients); - } - - // Add a netdev for adapters that have a 'mctp-controller' property - i2c_for_each_dev(mcli, mctp_i2c_client_try_attach); - rc = 0; -out: - mutex_unlock(&mi_driver_state.lock); - return rc; -} - -static int mctp_i2c_remove(struct i2c_client *client) -{ - struct mctp_i2c_client *mcli = i2c_get_clientdata(client); - struct mctp_i2c_dev *midev = NULL, *tmp = NULL; - - mutex_lock(&mi_driver_state.lock); - list_del(&mcli->list); - // Remove all child adapter netdevs - list_for_each_entry_safe(midev, tmp, &mcli->devs, list) - mctp_i2c_free_netdev(midev); - - mctp_i2c_free_client(mcli); - mutex_unlock(&mi_driver_state.lock); - // Callers ignore return code - return 0; -} - -/* We look for a 'mctp-controller' property on I2C busses as they are - * added/deleted, creating/removing netdevs as required. - */ -static int mctp_i2c_notifier_call(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - - switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - mctp_i2c_notify_add(dev); - break; - case BUS_NOTIFY_DEL_DEVICE: - mctp_i2c_notify_del(dev); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block mctp_i2c_notifier = { - .notifier_call = mctp_i2c_notifier_call, -}; - -static const struct i2c_device_id mctp_i2c_id[] = { - { "mctp-i2c", 0 }, - {}, -}; -MODULE_DEVICE_TABLE(i2c, mctp_i2c_id); - -static const struct of_device_id mctp_i2c_of_match[] = { - { .compatible = "mctp-i2c-controller" }, - {}, -}; -MODULE_DEVICE_TABLE(of, mctp_i2c_of_match); - -static struct i2c_driver mctp_i2c_driver = { - .driver = { - .name = "mctp-i2c", - .of_match_table = mctp_i2c_of_match, - }, - .probe_new = mctp_i2c_probe, - .remove = mctp_i2c_remove, - .id_table = mctp_i2c_id, -}; - -static __init int mctp_i2c_init(void) -{ - int rc; - - INIT_LIST_HEAD(&mi_driver_state.clients); - mutex_init(&mi_driver_state.lock); - pr_info("MCTP SMBus/I2C transport driver\n"); - rc = i2c_add_driver(&mctp_i2c_driver); - if (rc) - return rc; - rc = bus_register_notifier(&i2c_bus_type, &mctp_i2c_notifier); - if (rc) { - i2c_del_driver(&mctp_i2c_driver); - return rc; - } - return 0; -} - -static __exit void mctp_i2c_exit(void) -{ - int rc; - - rc = bus_unregister_notifier(&i2c_bus_type, &mctp_i2c_notifier); - if (rc) - pr_warn("%s Could not unregister notifier, %d", __func__, rc); - i2c_del_driver(&mctp_i2c_driver); -} - -module_init(mctp_i2c_init); -module_exit(mctp_i2c_exit); - -MODULE_DESCRIPTION("MCTP SMBus/I2C device"); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Matt Johnston "); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 353d6b4e7a53..16119ac1aa97 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -52,19 +52,6 @@ typedef int (*i2c_slave_cb_t)(struct i2c_client *client, struct module; struct property_entry; -/* SMBus 3.0 extends the maximum block read/write size to 255 (from 32). - * The larger size is only supported by some drivers, indicated by - * the I2C_FUNC_SMBUS_V3_BLOCK functionality bit. - */ -#define I2C_SMBUS_V3_BLOCK_MAX 255 /* As specified in SMBus 3.0 standard */ - -/* Note compatibility definition in uapi header with 32 byte block */ -union i2c_smbus_data { - __u8 byte; - __u16 word; - __u8 block[I2C_SMBUS_V3_BLOCK_MAX + 1]; /* block[0] is used for length */ -}; - #if IS_ENABLED(CONFIG_I2C) /* Return the Frequency mode string based on the bus frequency */ const char *i2c_freq_mode_string(u32 bus_freq_hz); diff --git a/include/uapi/linux/i2c-dev.h b/include/uapi/linux/i2c-dev.h index 46ce31d42f7d..1c4cec4ddd84 100644 --- a/include/uapi/linux/i2c-dev.h +++ b/include/uapi/linux/i2c-dev.h @@ -39,14 +39,12 @@ /* This is the structure as used in the I2C_SMBUS ioctl call */ -#ifndef __KERNEL__ struct i2c_smbus_ioctl_data { __u8 read_write; __u8 command; __u32 size; union i2c_smbus_data __user *data; }; -#endif /* This is the structure as used in the I2C_RDWR ioctl call */ struct i2c_rdwr_ioctl_data { diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h index c3534ab1ae53..92326ebde350 100644 --- a/include/uapi/linux/i2c.h +++ b/include/uapi/linux/i2c.h @@ -108,9 +108,6 @@ struct i2c_msg { #define I2C_FUNC_SMBUS_READ_I2C_BLOCK 0x04000000 /* I2C-like block xfer */ #define I2C_FUNC_SMBUS_WRITE_I2C_BLOCK 0x08000000 /* w/ 1-byte reg. addr. */ #define I2C_FUNC_SMBUS_HOST_NOTIFY 0x10000000 /* SMBus 2.0 or later */ -#define I2C_FUNC_SMBUS_V3_BLOCK 0x20000000 /* Device supports 255 byte block */ - /* Note that I2C_SMBUS ioctl only */ - /* supports a 32 byte block */ #define I2C_FUNC_SMBUS_BYTE (I2C_FUNC_SMBUS_READ_BYTE | \ I2C_FUNC_SMBUS_WRITE_BYTE) @@ -140,15 +137,13 @@ struct i2c_msg { /* * Data for SMBus Messages */ -#define I2C_SMBUS_BLOCK_MAX 32 /* As specified in SMBus 2.0 standard */ -#ifndef __KERNEL__ +#define I2C_SMBUS_BLOCK_MAX 32 /* As specified in SMBus standard */ union i2c_smbus_data { __u8 byte; __u16 word; __u8 block[I2C_SMBUS_BLOCK_MAX + 2]; /* block[0] is used for length */ /* and one more for user-space compatibility */ }; -#endif /* i2c_smbus_xfer read or write markers */ #define I2C_SMBUS_READ 1 -- cgit v1.2.3 From ebf7f6f0a6cdcc17a3da52b81e4b3a98c4005028 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 5 Nov 2021 09:30:00 +0800 Subject: bpf: Change value of MAX_TAIL_CALL_CNT from 32 to 33 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the current code, the actual max tail call count is 33 which is greater than MAX_TAIL_CALL_CNT (defined as 32). The actual limit is not consistent with the meaning of MAX_TAIL_CALL_CNT and thus confusing at first glance. We can see the historical evolution from commit 04fd61ab36ec ("bpf: allow bpf programs to tail-call other bpf programs") and commit f9dabe016b63 ("bpf: Undo off-by-one in interpreter tail call count limit"). In order to avoid changing existing behavior, the actual limit is 33 now, this is reasonable. After commit 874be05f525e ("bpf, tests: Add tail call test suite"), we can see there exists failed testcase. On all archs when CONFIG_BPF_JIT_ALWAYS_ON is not set: # echo 0 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf # dmesg | grep -w FAIL Tail call error path, max count reached jited:0 ret 34 != 33 FAIL On some archs: # echo 1 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf # dmesg | grep -w FAIL Tail call error path, max count reached jited:1 ret 34 != 33 FAIL Although the above failed testcase has been fixed in commit 18935a72eb25 ("bpf/tests: Fix error in tail call limit tests"), it would still be good to change the value of MAX_TAIL_CALL_CNT from 32 to 33 to make the code more readable. The 32-bit x86 JIT was using a limit of 32, just fix the wrong comments and limit to 33 tail calls as the constant MAX_TAIL_CALL_CNT updated. For the mips64 JIT, use "ori" instead of "addiu" as suggested by Johan Almbladh. For the riscv JIT, use RV_REG_TCC directly to save one register move as suggested by Björn Töpel. For the other implementations, no function changes, it does not change the current limit 33, the new value of MAX_TAIL_CALL_CNT can reflect the actual max tail call count, the related tail call testcases in test_bpf module and selftests can work well for the interpreter and the JIT. Here are the test results on x86_64: # uname -m x86_64 # echo 0 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf test_suite=test_tail_calls # dmesg | tail -1 test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [0/8 JIT'ed] # rmmod test_bpf # echo 1 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf test_suite=test_tail_calls # dmesg | tail -1 test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [8/8 JIT'ed] # rmmod test_bpf # ./test_progs -t tailcalls #142 tailcalls:OK Summary: 1/11 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Tested-by: Johan Almbladh Tested-by: Ilya Leoshkevich Acked-by: Björn Töpel Acked-by: Johan Almbladh Acked-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/1636075800-3264-1-git-send-email-yangtiezhu@loongson.cn --- arch/arm/net/bpf_jit_32.c | 5 +++-- arch/arm64/net/bpf_jit_comp.c | 5 +++-- arch/mips/net/bpf_jit_comp32.c | 3 +-- arch/mips/net/bpf_jit_comp64.c | 2 +- arch/powerpc/net/bpf_jit_comp32.c | 4 ++-- arch/powerpc/net/bpf_jit_comp64.c | 4 ++-- arch/riscv/net/bpf_jit_comp32.c | 6 ++---- arch/riscv/net/bpf_jit_comp64.c | 7 +++---- arch/s390/net/bpf_jit_comp.c | 6 +++--- arch/sparc/net/bpf_jit_comp_64.c | 2 +- arch/x86/net/bpf_jit_comp.c | 10 +++++----- arch/x86/net/bpf_jit_comp32.c | 4 ++-- include/linux/bpf.h | 2 +- include/uapi/linux/bpf.h | 2 +- kernel/bpf/core.c | 3 ++- lib/test_bpf.c | 4 ++-- tools/include/uapi/linux/bpf.h | 2 +- 17 files changed, 35 insertions(+), 36 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index eeb6dc0ecf46..e59b41e9ab0c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1199,7 +1199,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) /* tmp2[0] = array, tmp2[1] = index */ - /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) + /* + * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; * tail_call_cnt++; */ @@ -1208,7 +1209,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) tc = arm_bpf_get_reg64(tcc, tmp, ctx); emit(ARM_CMP_I(tc[0], hi), ctx); _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx); - _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); + _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx); emit(ARM_ADC_I(tc[0], tc[0], 0), ctx); arm_bpf_put_reg64(tcc, tmp, ctx); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 86c9dc0681cc..07c12c42b751 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -287,13 +287,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(A64_CMP(0, r3, tmp), ctx); emit(A64_B_(A64_COND_CS, jmp_offset), ctx); - /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) + /* + * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; * tail_call_cnt++; */ emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); emit(A64_CMP(1, tcc, tmp), ctx); - emit(A64_B_(A64_COND_HI, jmp_offset), ctx); + emit(A64_B_(A64_COND_CS, jmp_offset), ctx); emit(A64_ADD_I(1, tcc, tcc, 1), ctx); /* prog = array->ptrs[index]; diff --git a/arch/mips/net/bpf_jit_comp32.c b/arch/mips/net/bpf_jit_comp32.c index bd996ede12f8..044b11b65bca 100644 --- a/arch/mips/net/bpf_jit_comp32.c +++ b/arch/mips/net/bpf_jit_comp32.c @@ -1381,8 +1381,7 @@ void build_prologue(struct jit_context *ctx) * 16-byte area in the parent's stack frame. On a tail call, the * calling function jumps into the prologue after these instructions. */ - emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, - min(MAX_TAIL_CALL_CNT + 1, 0xffff)); + emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT, 0xffff)); emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP); /* diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c index 815ade724227..6475828ffb36 100644 --- a/arch/mips/net/bpf_jit_comp64.c +++ b/arch/mips/net/bpf_jit_comp64.c @@ -552,7 +552,7 @@ void build_prologue(struct jit_context *ctx) * On a tail call, the calling function jumps into the prologue * after this instruction. */ - emit(ctx, addiu, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT + 1, 0xffff)); + emit(ctx, ori, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT, 0xffff)); /* === Entry-point for tail calls === */ diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 0da31d41d413..8a4faa05f9e4 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -221,13 +221,13 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o PPC_BCC(COND_GE, out); /* - * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); /* tail_call_cnt++; */ EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); - PPC_BCC(COND_GT, out); + PPC_BCC(COND_GE, out); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 8b5157ccfeba..8571aafcc9e1 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -228,12 +228,12 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o PPC_BCC(COND_GE, out); /* - * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); - PPC_BCC(COND_GT, out); + PPC_BCC(COND_GE, out); /* * tail_call_cnt++; diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index e6497424cbf6..529a83b85c1c 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -799,11 +799,10 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx); /* - * temp_tcc = tcc - 1; - * if (tcc < 0) + * if (--tcc < 0) * goto out; */ - emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx); + emit(rv_addi(RV_REG_TCC, RV_REG_TCC, -1), ctx); off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); @@ -829,7 +828,6 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) if (is_12b_check(off, insn)) return -1; emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); - emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); /* Epilogue jumps to *(t0 + 4). */ __build_epilogue(true, ctx); return 0; diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index f2a779c7e225..603630b6f3c5 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -327,12 +327,12 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); - /* if (TCC-- < 0) + /* if (--TCC < 0) * goto out; */ - emit_addi(RV_REG_T1, tcc, -1, ctx); + emit_addi(RV_REG_TCC, tcc, -1, ctx); off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); - emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); + emit_branch(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); /* prog = array->ptrs[index]; * if (!prog) @@ -352,7 +352,6 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) if (is_12b_check(off, insn)) return -1; emit_ld(RV_REG_T3, off, RV_REG_T2, ctx); - emit_mv(RV_REG_TCC, RV_REG_T1, ctx); __build_epilogue(true, ctx); return 0; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 233cc9bcd652..9ff2bd83aad7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1369,7 +1369,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, jit->prg); /* - * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) + * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) * goto out; */ @@ -1381,9 +1381,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4_IMM(0xa7080000, REG_W0, 1); /* laal %w1,%w0,off(%r15) */ EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off); - /* clij %w1,MAX_TAIL_CALL_CNT,0x2,out */ + /* clij %w1,MAX_TAIL_CALL_CNT-1,0x2,out */ patch_2_clij = jit->prg; - EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT, + EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT - 1, 2, jit->prg); /* diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 9a2f20cbd48b..0bfe1c72a0c9 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -867,7 +867,7 @@ static void emit_tail_call(struct jit_ctx *ctx) emit(LD32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx); emit_cmpi(tmp, MAX_TAIL_CALL_CNT, ctx); #define OFFSET2 13 - emit_branch(BGU, ctx->idx, ctx->idx + OFFSET2, ctx); + emit_branch(BGEU, ctx->idx, ctx->idx + OFFSET2, ctx); emit_nop(ctx); emit_alu_K(ADD, tmp, 1, ctx); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 726700fabca6..631847907786 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -412,7 +412,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... * if (index >= array->map.max_entries) * goto out; - * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) + * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) * goto out; * prog = array->ptrs[index]; * if (prog == NULL) @@ -446,14 +446,14 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, EMIT2(X86_JBE, offset); /* jbe out */ /* - * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) * goto out; */ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ offset = ctx->tail_call_indirect_label - (prog + 2 - start); - EMIT2(X86_JA, offset); /* ja out */ + EMIT2(X86_JAE, offset); /* jae out */ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ @@ -504,14 +504,14 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, int offset; /* - * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) * goto out; */ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ offset = ctx->tail_call_direct_label - (prog + 2 - start); - EMIT2(X86_JA, offset); /* ja out */ + EMIT2(X86_JAE, offset); /* jae out */ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index da9b7cfa4632..429a89c5468b 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1323,7 +1323,7 @@ static void emit_bpf_tail_call(u8 **pprog, u8 *ip) EMIT2(IA32_JBE, jmp_label(jmp_label1, 2)); /* - * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) * goto out; */ lo = (u32)MAX_TAIL_CALL_CNT; @@ -1337,7 +1337,7 @@ static void emit_bpf_tail_call(u8 **pprog, u8 *ip) /* cmp ecx,lo */ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo); - /* ja out */ + /* jae out */ EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); /* add eax,0x1 */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 56098c866704..cc7a0c36e7df 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1081,7 +1081,7 @@ struct bpf_array { }; #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ -#define MAX_TAIL_CALL_CNT 32 +#define MAX_TAIL_CALL_CNT 33 #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6297eafdc40f..a69e4b04ffeb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1744,7 +1744,7 @@ union bpf_attr { * if the maximum number of tail calls has been reached for this * chain of programs. This limit is defined in the kernel by the * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 32. + * which is currently set to 33. * Return * 0 on success, or a negative error in case of failure. * diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2405e39d800f..b52dc845ecea 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1574,7 +1574,8 @@ select_insn: if (unlikely(index >= array->map.max_entries)) goto out; - if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) + + if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT)) goto out; tail_call_cnt++; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index adae39567264..0c5cb2d6436a 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -14683,7 +14683,7 @@ static struct tail_call_test tail_call_tests[] = { BPF_EXIT_INSN(), }, .flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE, - .result = (MAX_TAIL_CALL_CNT + 1 + 1) * MAX_TESTRUNS, + .result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS, }, { "Tail call count preserved across function calls", @@ -14705,7 +14705,7 @@ static struct tail_call_test tail_call_tests[] = { }, .stack_depth = 8, .flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE, - .result = (MAX_TAIL_CALL_CNT + 1 + 1) * MAX_TESTRUNS, + .result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS, }, { "Tail call error path, NULL target", diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6297eafdc40f..a69e4b04ffeb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1744,7 +1744,7 @@ union bpf_attr { * if the maximum number of tail calls has been reached for this * chain of programs. This limit is defined in the kernel by the * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 32. + * which is currently set to 33. * Return * 0 on success, or a negative error in case of failure. * -- cgit v1.2.3 From b5f57384805a34f497edb8b04d694a8a1b3d81d4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 10 Sep 2021 15:18:20 -0400 Subject: drm/amdkfd: Add sysfs bitfields and enums to uAPI These bits are de-facto part of the uAPI, so declare them in a uAPI header. The corresponding bit-fields and enums in user mode are defined in https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/master/include/hsakmttypes.h HSA_CAP_... -> HSA_CAPABILITY HSA_MEM_HEAP_TYPE_... -> HSA_HEAPTYPE HSA_MEM_FLAGS_... -> HSA_MEMORYPROPERTY HSA_CACHE_TYPE_... -> HsaCacheType HSA_IOLINK_TYPE_... -> HSA_IOLINKTYPE HSA_IOLINK_FLAGS_... -> HSA_LINKPROPERTY Signed-off-by: Felix Kuehling Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- MAINTAINERS | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 46 +------------ include/uapi/linux/kfd_sysfs.h | 108 ++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 45 deletions(-) create mode 100644 include/uapi/linux/kfd_sysfs.h (limited to 'include/uapi/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 7a2345ce8521..f05a19b22505 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -966,6 +966,7 @@ F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h F: drivers/gpu/drm/amd/include/v9_structs.h F: drivers/gpu/drm/amd/include/vi_structs.h F: include/uapi/linux/kfd_ioctl.h +F: include/uapi/linux/kfd_sysfs.h AMD SPI DRIVER M: Sanjay R Mehta diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index a8db017c9b8e..f0cc59d2fd5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -25,38 +25,11 @@ #include #include +#include #include "kfd_crat.h" #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32 -#define HSA_CAP_HOT_PLUGGABLE 0x00000001 -#define HSA_CAP_ATS_PRESENT 0x00000002 -#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004 -#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008 -#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010 -#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020 -#define HSA_CAP_VA_LIMIT 0x00000040 -#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 - -#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 -#define HSA_CAP_DOORBELL_TYPE_1_0 0x1 -#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 -#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 - -#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 /* Old buggy user mode depends on this being 0 */ -#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 -#define HSA_CAP_RASEVENTNOTIFY 0x00200000 -#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 -#define HSA_CAP_ASIC_REVISION_SHIFT 22 -#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 -#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 -#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 -#define HSA_CAP_RESERVED 0xe00f8000 - struct kfd_node_properties { uint64_t hive_id; uint32_t cpu_cores_count; @@ -93,17 +66,6 @@ struct kfd_node_properties { char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; -#define HSA_MEM_HEAP_TYPE_SYSTEM 0 -#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1 -#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2 -#define HSA_MEM_HEAP_TYPE_GPU_GDS 3 -#define HSA_MEM_HEAP_TYPE_GPU_LDS 4 -#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 - -#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 -#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 -#define HSA_MEM_FLAGS_RESERVED 0xfffffffc - struct kfd_mem_properties { struct list_head list; uint32_t heap_type; @@ -116,12 +78,6 @@ struct kfd_mem_properties { struct attribute attr; }; -#define HSA_CACHE_TYPE_DATA 0x00000001 -#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002 -#define HSA_CACHE_TYPE_CPU 0x00000004 -#define HSA_CACHE_TYPE_HSACU 0x00000008 -#define HSA_CACHE_TYPE_RESERVED 0xfffffff0 - struct kfd_cache_properties { struct list_head list; uint32_t processor_id_low; diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h new file mode 100644 index 000000000000..e1fb78b4bf09 --- /dev/null +++ b/include/uapi/linux/kfd_sysfs.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT WITH Linux-syscall-note */ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_SYSFS_H_INCLUDED +#define KFD_SYSFS_H_INCLUDED + +/* Capability bits in node properties */ +#define HSA_CAP_HOT_PLUGGABLE 0x00000001 +#define HSA_CAP_ATS_PRESENT 0x00000002 +#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004 +#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008 +#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010 +#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020 +#define HSA_CAP_VA_LIMIT 0x00000040 +#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080 +#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 +#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 + +#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 +#define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 +#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 + +/* Old buggy user mode depends on this being 0 */ +#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 + +#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 +#define HSA_CAP_RASEVENTNOTIFY 0x00200000 +#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 +#define HSA_CAP_ASIC_REVISION_SHIFT 22 +#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 +#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 +#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 +#define HSA_CAP_RESERVED 0xe00f8000 + +/* Heap types in memory properties */ +#define HSA_MEM_HEAP_TYPE_SYSTEM 0 +#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1 +#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2 +#define HSA_MEM_HEAP_TYPE_GPU_GDS 3 +#define HSA_MEM_HEAP_TYPE_GPU_LDS 4 +#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 + +/* Flag bits in memory properties */ +#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 +#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 +#define HSA_MEM_FLAGS_RESERVED 0xfffffffc + +/* Cache types in cache properties */ +#define HSA_CACHE_TYPE_DATA 0x00000001 +#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002 +#define HSA_CACHE_TYPE_CPU 0x00000004 +#define HSA_CACHE_TYPE_HSACU 0x00000008 +#define HSA_CACHE_TYPE_RESERVED 0xfffffff0 + +/* Link types in IO link properties (matches CRAT link types) */ +#define HSA_IOLINK_TYPE_UNDEFINED 0 +#define HSA_IOLINK_TYPE_HYPERTRANSPORT 1 +#define HSA_IOLINK_TYPE_PCIEXPRESS 2 +#define HSA_IOLINK_TYPE_AMBA 3 +#define HSA_IOLINK_TYPE_MIPI 4 +#define HSA_IOLINK_TYPE_QPI_1_1 5 +#define HSA_IOLINK_TYPE_RESERVED1 6 +#define HSA_IOLINK_TYPE_RESERVED2 7 +#define HSA_IOLINK_TYPE_RAPID_IO 8 +#define HSA_IOLINK_TYPE_INFINIBAND 9 +#define HSA_IOLINK_TYPE_RESERVED3 10 +#define HSA_IOLINK_TYPE_XGMI 11 +#define HSA_IOLINK_TYPE_XGOP 12 +#define HSA_IOLINK_TYPE_GZ 13 +#define HSA_IOLINK_TYPE_ETHERNET_RDMA 14 +#define HSA_IOLINK_TYPE_RDMA_OTHER 15 +#define HSA_IOLINK_TYPE_OTHER 16 + +/* Flag bits in IO link properties (matches CRAT flags, excluding the + * bi-directional flag, which is not offially part of the CRAT spec, and + * only used internally in KFD) + */ +#define HSA_IOLINK_FLAGS_ENABLED (1 << 0) +#define HSA_IOLINK_FLAGS_NON_COHERENT (1 << 1) +#define HSA_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +#define HSA_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +#define HSA_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +#define HSA_IOLINK_FLAGS_RESERVED 0xffffffe0 + +#endif -- cgit v1.2.3 From bc2dfc02836b1133d1bf4d22aa13d48ac98eabef Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 23 Oct 2021 11:10:50 +0200 Subject: cfg80211: implement APIs for dedicated radar detection HW If a dedicated (off-channel) radar detection hardware (chain) is available in the hardware/driver, allow this to be used by calling the NL80211_CMD_RADAR_DETECT command with a new flag attribute requesting off-channel radar detection is used. Offchannel CAC (channel availability check) avoids the CAC downtime when switching to a radar channel or when turning on the AP. Drivers advertise support for this using the new feature flag NL80211_EXT_FEATURE_RADAR_OFFCHAN. Tested-by: Evelyn Tsai Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/7468e291ef5d05d692c1738d25b8f778d8ea5c3f.1634979655.git.lorenzo@kernel.org Link: https://lore.kernel.org/r/1e60e60fef00e14401adae81c3d49f3e5f307537.1634979655.git.lorenzo@kernel.org Link: https://lore.kernel.org/r/85fa50f57fc3adb2934c8d9ca0be30394de6b7e8.1634979655.git.lorenzo@kernel.org Link: https://lore.kernel.org/r/4b6c08671ad59aae0ac46fc94c02f31b1610eb72.1634979655.git.lorenzo@kernel.org Link: https://lore.kernel.org/r/241849ccaf2c228873c6f8495bf87b19159ba458.1634979655.git.lorenzo@kernel.org [remove offchan_mutex, fix cfg80211_stop_offchan_radar_detection(), remove gfp_t argument, fix documentation, fix tracing] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 25 ++++++++++ include/uapi/linux/nl80211.h | 13 +++++ net/wireless/core.c | 3 ++ net/wireless/core.h | 13 +++++ net/wireless/mlme.c | 113 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 17 ++++--- net/wireless/rdev-ops.h | 17 +++++++ net/wireless/trace.h | 19 ++++++++ 8 files changed, 214 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 423f97b982ff..db8866d42a4b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4072,6 +4072,15 @@ struct mgmt_frame_regs { * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use * those to decrypt (Re)Association Request and encrypt (Re)Association * Response frame. + * + * @set_radar_offchan: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Offchannel radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable offchannel CAC/radar detection. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4404,6 +4413,8 @@ struct cfg80211_ops { struct cfg80211_color_change_settings *params); int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_fils_aad *fils_aad); + int (*set_radar_offchan)(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef); }; /* @@ -7633,6 +7644,20 @@ void cfg80211_cac_event(struct net_device *netdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, gfp_t gfp); +/** + * cfg80211_offchan_cac_event - Channel Availability Check (CAC) offchan event + * @wiphy: the wiphy + * @chandef: chandef for the current channel + * @event: type of event + * + * This function is called when a Channel Availability Check (CAC) is finished, + * started or aborted by a offchannel dedicated chain. + * + * Note that this acquires the wiphy lock. + */ +void cfg80211_offchan_cac_event(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event); /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 61cab81e920d..3e734826792f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2639,6 +2639,13 @@ enum nl80211_commands { * Mandatory parameter for the transmitting interface to enable MBSSID. * Optional for the non-transmitting interfaces. * + * @NL80211_ATTR_RADAR_OFFCHAN: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Offchannel radar/CAC detection allows to avoid the CAC downtime + * switching on a different channel during CAC detection on the selected + * radar channel. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3145,6 +3152,8 @@ enum nl80211_attrs { NL80211_ATTR_MBSSID_CONFIG, NL80211_ATTR_MBSSID_ELEMS, + NL80211_ATTR_RADAR_OFFCHAN, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -6051,6 +6060,9 @@ enum nl80211_feature_flags { * frames. Userspace has to share FILS AAD details to the driver by using * @NL80211_CMD_SET_FILS_AAD. * + * @NL80211_EXT_FEATURE_RADAR_OFFCHAN: Device supports offchannel radar/CAC + * detection. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6117,6 +6129,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, NL80211_EXT_FEATURE_BSS_COLOR, NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, + NL80211_EXT_FEATURE_RADAR_OFFCHAN, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/core.c b/net/wireless/core.c index eb297e1015e0..39b2d4ae581d 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -545,6 +545,7 @@ use_default_name: INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); + INIT_DELAYED_WORK(&rdev->offchan_cac_work, cfg80211_offchan_cac_work); init_waitqueue_head(&rdev->dev_wait); @@ -1207,6 +1208,8 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, cfg80211_pmsr_wdev_down(wdev); + cfg80211_stop_offchan_radar_detection(wdev); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: __cfg80211_leave_ibss(rdev, dev, true); diff --git a/net/wireless/core.h b/net/wireless/core.h index 1720abf36f92..612d460dcde0 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -84,6 +84,10 @@ struct cfg80211_registered_device { struct delayed_work dfs_update_channels_wk; + struct wireless_dev *offchan_radar_wdev; + struct cfg80211_chan_def offchan_radar_chandef; + struct delayed_work offchan_cac_work; + /* netlink port which started critical protocol (0 means not started) */ u32 crit_proto_nlportid; @@ -491,6 +495,15 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); +int +cfg80211_start_offchan_radar_detection(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_chan_def *chandef); + +void cfg80211_stop_offchan_radar_detection(struct wireless_dev *wdev); + +void cfg80211_offchan_cac_work(struct work_struct *work); + bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 783acd2c4211..46f2ec4d50d7 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -970,3 +970,116 @@ void cfg80211_cac_event(struct net_device *netdev, nl80211_radar_notify(rdev, chandef, event, netdev, gfp); } EXPORT_SYMBOL(cfg80211_cac_event); + +void cfg80211_offchan_cac_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct cfg80211_registered_device *rdev; + + rdev = container_of(delayed_work, struct cfg80211_registered_device, + offchan_cac_work); + cfg80211_offchan_cac_event(&rdev->wiphy, &rdev->offchan_radar_chandef, + NL80211_RADAR_CAC_FINISHED); +} + +static void +__cfg80211_offchan_cac_event(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + const struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event) +{ + struct wiphy *wiphy = &rdev->wiphy; + struct net_device *netdev; + + lockdep_assert_wiphy(&rdev->wiphy); + + if (event != NL80211_RADAR_CAC_STARTED && !rdev->offchan_radar_wdev) + return; + + switch (event) { + case NL80211_RADAR_CAC_FINISHED: + cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); + memcpy(&rdev->cac_done_chandef, chandef, sizeof(*chandef)); + queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk); + cfg80211_sched_dfs_chan_update(rdev); + wdev = rdev->offchan_radar_wdev; + rdev->offchan_radar_wdev = NULL; + break; + case NL80211_RADAR_CAC_ABORTED: + cancel_delayed_work(&rdev->offchan_cac_work); + wdev = rdev->offchan_radar_wdev; + rdev->offchan_radar_wdev = NULL; + break; + case NL80211_RADAR_CAC_STARTED: + WARN_ON(!wdev); + rdev->offchan_radar_wdev = wdev; + break; + default: + return; + } + + netdev = wdev ? wdev->netdev : NULL; + nl80211_radar_notify(rdev, chandef, event, netdev, GFP_KERNEL); +} + +void cfg80211_offchan_cac_event(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + + wiphy_lock(wiphy); + __cfg80211_offchan_cac_event(rdev, NULL, chandef, event); + wiphy_unlock(wiphy); +} +EXPORT_SYMBOL(cfg80211_offchan_cac_event); + +int +cfg80211_start_offchan_radar_detection(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_chan_def *chandef) +{ + unsigned int cac_time_ms; + int err; + + lockdep_assert_wiphy(&rdev->wiphy); + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_RADAR_OFFCHAN)) + return -EOPNOTSUPP; + + if (rdev->offchan_radar_wdev) + return -EBUSY; + + err = rdev_set_radar_offchan(rdev, chandef); + if (err) + return err; + + cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, chandef); + if (!cac_time_ms) + cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; + + rdev->offchan_radar_chandef = *chandef; + __cfg80211_offchan_cac_event(rdev, wdev, chandef, + NL80211_RADAR_CAC_STARTED); + queue_delayed_work(cfg80211_wq, &rdev->offchan_cac_work, + msecs_to_jiffies(cac_time_ms)); + + return 0; +} + +void cfg80211_stop_offchan_radar_detection(struct wireless_dev *wdev) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + + lockdep_assert_wiphy(wiphy); + + if (wdev != rdev->offchan_radar_wdev) + return; + + rdev_set_radar_offchan(rdev, NULL); + + __cfg80211_offchan_cac_event(rdev, NULL, NULL, + NL80211_RADAR_CAC_ABORTED); +} diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a27b3b5fa210..83a1ba96e172 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -776,6 +776,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MBSSID_CONFIG] = NLA_POLICY_NESTED(nl80211_mbssid_config_policy), [NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_RADAR_OFFCHAN] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -9284,12 +9285,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (err) return err; - if (netif_carrier_ok(dev)) - return -EBUSY; - - if (wdev->cac_started) - return -EBUSY; - err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) return err; @@ -9300,6 +9295,16 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) return -EINVAL; + if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_OFFCHAN])) + return cfg80211_start_offchan_radar_detection(rdev, wdev, + &chandef); + + if (netif_carrier_ok(dev)) + return -EBUSY; + + if (wdev->cac_started) + return -EBUSY; + /* CAC start is offloaded to HW and can't be started manually */ if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) return -EOPNOTSUPP; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index cc1efec4b27b..8672b3ef99e4 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1395,4 +1395,21 @@ rdev_set_fils_aad(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_radar_offchan(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef) +{ + struct wiphy *wiphy = &rdev->wiphy; + int ret; + + if (!rdev->ops->set_radar_offchan) + return -EOPNOTSUPP; + + trace_rdev_set_radar_offchan(wiphy, chandef); + ret = rdev->ops->set_radar_offchan(wiphy, chandef); + trace_rdev_return_int(wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ad6c16a06bcb..0b27eaa14a18 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3674,6 +3674,25 @@ TRACE_EVENT(cfg80211_bss_color_notify, __entry->color_bitmap) ); +TRACE_EVENT(rdev_set_radar_offchan, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + + TP_ARGS(wiphy, chandef), + + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef) + ), + + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From b88dbe38dca82425a273d126785866af39ba0770 Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Tue, 16 Nov 2021 14:38:35 +0000 Subject: media: uapi: Add VP9 stateless decoder controls Add the VP9 stateless decoder controls plus the documentation that goes with it. Signed-off-by: Boris Brezillon Co-developed-by: Ezequiel Garcia Signed-off-by: Ezequiel Garcia Signed-off-by: Adrian Ratiu Signed-off-by: Andrzej Pietrasiewicz Co-developed-by: Daniel Almeida Signed-off-by: Daniel Almeida Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/userspace-api/media/v4l/biblio.rst | 10 + .../media/v4l/ext-ctrls-codec-stateless.rst | 573 +++++++++++++++++++++ .../userspace-api/media/v4l/pixfmt-compressed.rst | 15 + .../userspace-api/media/v4l/vidioc-g-ext-ctrls.rst | 8 + .../userspace-api/media/v4l/vidioc-queryctrl.rst | 12 + .../userspace-api/media/videodev2.h.rst.exceptions | 2 + drivers/media/v4l2-core/v4l2-ctrls-core.c | 180 +++++++ drivers/media/v4l2-core/v4l2-ctrls-defs.c | 8 + drivers/media/v4l2-core/v4l2-ioctl.c | 1 + include/media/v4l2-ctrls.h | 4 + include/uapi/linux/v4l2-controls.h | 284 ++++++++++ include/uapi/linux/videodev2.h | 6 + 12 files changed, 1103 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/userspace-api/media/v4l/biblio.rst b/Documentation/userspace-api/media/v4l/biblio.rst index 7b8e6738ff9e..9cd18c153d19 100644 --- a/Documentation/userspace-api/media/v4l/biblio.rst +++ b/Documentation/userspace-api/media/v4l/biblio.rst @@ -417,3 +417,13 @@ VP8 :title: RFC 6386: "VP8 Data Format and Decoding Guide" :author: J. Bankoski et al. + +.. _vp9: + +VP9 +=== + + +:title: VP9 Bitstream & Decoding Process Specification + +:author: Adrian Grange (Google), Peter de Rivaz (Argon Design), Jonathan Hunt (Argon Design) diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst index 72f5e85b4f34..cc080c4257d0 100644 --- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst @@ -1458,3 +1458,576 @@ FWHT Flags .. raw:: latex \normalsize + +.. _v4l2-codec-stateless-vp9: + +``V4L2_CID_STATELESS_VP9_COMPRESSED_HDR (struct)`` + Stores VP9 probabilities updates as parsed from the current compressed frame + header. A value of zero in an array element means no update of the relevant + probability. Motion vector-related updates contain a new value or zero. All + other updates contain values translated with inv_map_table[] (see 6.3.5 in + :ref:`vp9`). + +.. c:type:: v4l2_ctrl_vp9_compressed_hdr + +.. tabularcolumns:: |p{1cm}|p{4.8cm}|p{11.4cm}| + +.. cssclass:: longtable + +.. flat-table:: struct v4l2_ctrl_vp9_compressed_hdr + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - __u8 + - ``tx_mode`` + - Specifies the TX mode. See :ref:`TX Mode ` for more details. + * - __u8 + - ``tx8[2][1]`` + - TX 8x8 probabilities delta. + * - __u8 + - ``tx16[2][2]`` + - TX 16x16 probabilities delta. + * - __u8 + - ``tx32[2][3]`` + - TX 32x32 probabilities delta. + * - __u8 + - ``coef[4][2][2][6][6][3]`` + - Coefficient probabilities delta. + * - __u8 + - ``skip[3]`` + - Skip probabilities delta. + * - __u8 + - ``inter_mode[7][3]`` + - Inter prediction mode probabilities delta. + * - __u8 + - ``interp_filter[4][2]`` + - Interpolation filter probabilities delta. + * - __u8 + - ``is_inter[4]`` + - Is inter-block probabilities delta. + * - __u8 + - ``comp_mode[5]`` + - Compound prediction mode probabilities delta. + * - __u8 + - ``single_ref[5][2]`` + - Single reference probabilities delta. + * - __u8 + - ``comp_ref[5]`` + - Compound reference probabilities delta. + * - __u8 + - ``y_mode[4][9]`` + - Y prediction mode probabilities delta. + * - __u8 + - ``uv_mode[10][9]`` + - UV prediction mode probabilities delta. + * - __u8 + - ``partition[16][3]`` + - Partition probabilities delta. + * - __u8 + - ``mv.joint[3]`` + - Motion vector joint probabilities delta. + * - __u8 + - ``mv.sign[2]`` + - Motion vector sign probabilities delta. + * - __u8 + - ``mv.classes[2][10]`` + - Motion vector class probabilities delta. + * - __u8 + - ``mv.class0_bit[2]`` + - Motion vector class0 bit probabilities delta. + * - __u8 + - ``mv.bits[2][10]`` + - Motion vector bits probabilities delta. + * - __u8 + - ``mv.class0_fr[2][2][3]`` + - Motion vector class0 fractional bit probabilities delta. + * - __u8 + - ``mv.fr[2][3]`` + - Motion vector fractional bit probabilities delta. + * - __u8 + - ``mv.class0_hp[2]`` + - Motion vector class0 high precision fractional bit probabilities delta. + * - __u8 + - ``mv.hp[2]`` + - Motion vector high precision fractional bit probabilities delta. + +.. _vp9_tx_mode: + +``TX Mode`` + +.. tabularcolumns:: |p{6.5cm}|p{0.5cm}|p{10.3cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_VP9_TX_MODE_ONLY_4X4`` + - 0 + - Transform size is 4x4. + * - ``V4L2_VP9_TX_MODE_ALLOW_8X8`` + - 1 + - Transform size can be up to 8x8. + * - ``V4L2_VP9_TX_MODE_ALLOW_16X16`` + - 2 + - Transform size can be up to 16x16. + * - ``V4L2_VP9_TX_MODE_ALLOW_32X32`` + - 3 + - transform size can be up to 32x32. + * - ``V4L2_VP9_TX_MODE_SELECT`` + - 4 + - Bitstream contains the transform size for each block. + +See section '7.3.1 Tx mode semantics' of the :ref:`vp9` specification for more details. + +``V4L2_CID_STATELESS_VP9_FRAME (struct)`` + Specifies the frame parameters for the associated VP9 frame decode request. + This includes the necessary parameters for configuring a stateless hardware + decoding pipeline for VP9. The bitstream parameters are defined according + to :ref:`vp9`. + +.. c:type:: v4l2_ctrl_vp9_frame + +.. raw:: latex + + \small + +.. tabularcolumns:: |p{4.7cm}|p{5.5cm}|p{7.1cm}| + +.. cssclass:: longtable + +.. flat-table:: struct v4l2_ctrl_vp9_frame + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - struct :c:type:`v4l2_vp9_loop_filter` + - ``lf`` + - Loop filter parameters. See struct :c:type:`v4l2_vp9_loop_filter` for more details. + * - struct :c:type:`v4l2_vp9_quantization` + - ``quant`` + - Quantization parameters. See :c:type:`v4l2_vp9_quantization` for more details. + * - struct :c:type:`v4l2_vp9_segmentation` + - ``seg`` + - Segmentation parameters. See :c:type:`v4l2_vp9_segmentation` for more details. + * - __u32 + - ``flags`` + - Combination of V4L2_VP9_FRAME_FLAG_* flags. See :ref:`Frame Flags`. + * - __u16 + - ``compressed_header_size`` + - Compressed header size in bytes. + * - __u16 + - ``uncompressed_header_size`` + - Uncompressed header size in bytes. + * - __u16 + - ``frame_width_minus_1`` + - Add 1 to get the frame width expressed in pixels. See section 7.2.3 in :ref:`vp9`. + * - __u16 + - ``frame_height_minus_1`` + - Add 1 to get the frame height expressed in pixels. See section 7.2.3 in :ref:`vp9`. + * - __u16 + - ``render_width_minus_1`` + - Add 1 to get the expected render width expressed in pixels. This is + not used during the decoding process but might be used by HW scalers to + prepare a frame that's ready for scanout. See section 7.2.4 in :ref:`vp9`. + * - __u16 + - render_height_minus_1 + - Add 1 to get the expected render height expressed in pixels. This is + not used during the decoding process but might be used by HW scalers to + prepare a frame that's ready for scanout. See section 7.2.4 in :ref:`vp9`. + * - __u64 + - ``last_frame_ts`` + - "last" reference buffer timestamp. + The timestamp refers to the ``timestamp`` field in + struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()` + function to convert the struct :c:type:`timeval` in struct + :c:type:`v4l2_buffer` to a __u64. + * - __u64 + - ``golden_frame_ts`` + - "golden" reference buffer timestamp. + The timestamp refers to the ``timestamp`` field in + struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()` + function to convert the struct :c:type:`timeval` in struct + :c:type:`v4l2_buffer` to a __u64. + * - __u64 + - ``alt_frame_ts`` + - "alt" reference buffer timestamp. + The timestamp refers to the ``timestamp`` field in + struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()` + function to convert the struct :c:type:`timeval` in struct + :c:type:`v4l2_buffer` to a __u64. + * - __u8 + - ``ref_frame_sign_bias`` + - a bitfield specifying whether the sign bias is set for a given + reference frame. See :ref:`Reference Frame Sign Bias` + for more details. + * - __u8 + - ``reset_frame_context`` + - specifies whether the frame context should be reset to default values. See + :ref:`Reset Frame Context` for more details. + * - __u8 + - ``frame_context_idx`` + - Frame context that should be used/updated. + * - __u8 + - ``profile`` + - VP9 profile. Can be 0, 1, 2 or 3. + * - __u8 + - ``bit_depth`` + - Component depth in bits. Can be 8, 10 or 12. Note that not all profiles + support 10 and/or 12 bits depths. + * - __u8 + - ``interpolation_filter`` + - Specifies the filter selection used for performing inter prediction. See + :ref:`Interpolation Filter` for more details. + * - __u8 + - ``tile_cols_log2`` + - Specifies the base 2 logarithm of the width of each tile (where the + width is measured in units of 8x8 blocks). Shall be less than or equal + to 6. + * - __u8 + - ``tile_rows_log2`` + - Specifies the base 2 logarithm of the height of each tile (where the + height is measured in units of 8x8 blocks). + * - __u8 + - ``reference_mode`` + - Specifies the type of inter prediction to be used. See + :ref:`Reference Mode` for more details. + * - __u8 + - ``reserved[7]`` + - Applications and drivers must set this to zero. + +.. raw:: latex + + \normalsize + +.. _vp9_frame_flags: + +``Frame Flags`` + +.. tabularcolumns:: |p{10.0cm}|p{1.2cm}|p{6.1cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_VP9_FRAME_FLAG_KEY_FRAME`` + - 0x001 + - The frame is a key frame. + * - ``V4L2_VP9_FRAME_FLAG_SHOW_FRAME`` + - 0x002 + - The frame should be displayed. + * - ``V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT`` + - 0x004 + - The decoding should be error resilient. + * - ``V4L2_VP9_FRAME_FLAG_INTRA_ONLY`` + - 0x008 + - The frame does not reference other frames. + * - ``V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV`` + - 0x010 + - The frame can use high precision motion vectors. + * - ``V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX`` + - 0x020 + - Frame context should be updated after decoding. + * - ``V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE`` + - 0x040 + - Parallel decoding is used. + * - ``V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING`` + - 0x080 + - Vertical subsampling is enabled. + * - ``V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING`` + - 0x100 + - Horizontal subsampling is enabled. + * - ``V4L2_VP9_FRAME_FLAG_COLOR_RANGE_FULL_SWING`` + - 0x200 + - The full UV range is used. + +.. _vp9_ref_frame_sign_bias: + +``Reference Frame Sign Bias`` + +.. tabularcolumns:: |p{7.0cm}|p{1.2cm}|p{9.1cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_VP9_SIGN_BIAS_LAST`` + - 0x1 + - Sign bias is set for the last reference frame. + * - ``V4L2_VP9_SIGN_BIAS_GOLDEN`` + - 0x2 + - Sign bias is set for the golden reference frame. + * - ``V4L2_VP9_SIGN_BIAS_ALT`` + - 0x2 + - Sign bias is set for the alt reference frame. + +.. _vp9_reset_frame_context: + +``Reset Frame Context`` + +.. tabularcolumns:: |p{7.0cm}|p{1.2cm}|p{9.1cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_VP9_RESET_FRAME_CTX_NONE`` + - 0 + - Do not reset any frame context. + * - ``V4L2_VP9_RESET_FRAME_CTX_SPEC`` + - 1 + - Reset the frame context pointed to by + :c:type:`v4l2_ctrl_vp9_frame`.frame_context_idx. + * - ``V4L2_VP9_RESET_FRAME_CTX_ALL`` + - 2 + - Reset all frame contexts. + +See section '7.2 Uncompressed header semantics' of the :ref:`vp9` specification +for more details. + +.. _vp9_interpolation_filter: + +``Interpolation Filter`` + +.. tabularcolumns:: |p{9.0cm}|p{1.2cm}|p{7.1cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_VP9_INTERP_FILTER_EIGHTTAP`` + - 0 + - Eight tap filter. + * - ``V4L2_VP9_INTERP_FILTER_EIGHTTAP_SMOOTH`` + - 1 + - Eight tap smooth filter. + * - ``V4L2_VP9_INTERP_FILTER_EIGHTTAP_SHARP`` + - 2 + - Eeight tap sharp filter. + * - ``V4L2_VP9_INTERP_FILTER_BILINEAR`` + - 3 + - Bilinear filter. + * - ``V4L2_VP9_INTERP_FILTER_SWITCHABLE`` + - 4 + - Filter selection is signaled at the block level. + +See section '7.2.7 Interpolation filter semantics' of the :ref:`vp9` specification +for more details. + +.. _vp9_reference_mode: + +``Reference Mode`` + +.. tabularcolumns:: |p{9.6cm}|p{0.5cm}|p{7.2cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE`` + - 0 + - Indicates that all the inter blocks use only a single reference frame + to generate motion compensated prediction. + * - ``V4L2_VP9_REFERENCE_MODE_COMPOUND_REFERENCE`` + - 1 + - Requires all the inter blocks to use compound mode. Single reference + frame prediction is not allowed. + * - ``V4L2_VP9_REFERENCE_MODE_SELECT`` + - 2 + - Allows each individual inter block to select between single and + compound prediction modes. + +See section '7.3.6 Frame reference mode semantics' of the :ref:`vp9` specification for more details. + +.. c:type:: v4l2_vp9_segmentation + +Encodes the quantization parameters. See section '7.2.10 Segmentation +params syntax' of the :ref:`vp9` specification for more details. + +.. tabularcolumns:: |p{0.8cm}|p{5cm}|p{11.4cm}| + +.. cssclass:: longtable + +.. flat-table:: struct v4l2_vp9_segmentation + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - __u8 + - ``feature_data[8][4]`` + - Data attached to each feature. Data entry is only valid if the feature + is enabled. The array shall be indexed with segment number as the first dimension + (0..7) and one of V4L2_VP9_SEG_* as the second dimension. + See :ref:`Segment Feature IDs`. + * - __u8 + - ``feature_enabled[8]`` + - Bitmask defining which features are enabled in each segment. The value for each + segment is a combination of V4L2_VP9_SEGMENT_FEATURE_ENABLED(id) values where id is + one of V4L2_VP9_SEG_*. See :ref:`Segment Feature IDs`. + * - __u8 + - ``tree_probs[7]`` + - Specifies the probability values to be used when decoding a Segment-ID. + See '5.15. Segmentation map' section of :ref:`vp9` for more details. + * - __u8 + - ``pred_probs[3]`` + - Specifies the probability values to be used when decoding a + Predicted-Segment-ID. See '6.4.14. Get segment id syntax' + section of :ref:`vp9` for more details. + * - __u8 + - ``flags`` + - Combination of V4L2_VP9_SEGMENTATION_FLAG_* flags. See + :ref:`Segmentation Flags`. + * - __u8 + - ``reserved[5]`` + - Applications and drivers must set this to zero. + +.. _vp9_segment_feature: + +``Segment feature IDs`` + +.. tabularcolumns:: |p{6.0cm}|p{1cm}|p{10.3cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_VP9_SEG_LVL_ALT_Q`` + - 0 + - Quantizer segment feature. + * - ``V4L2_VP9_SEG_LVL_ALT_L`` + - 1 + - Loop filter segment feature. + * - ``V4L2_VP9_SEG_LVL_REF_FRAME`` + - 2 + - Reference frame segment feature. + * - ``V4L2_VP9_SEG_LVL_SKIP`` + - 3 + - Skip segment feature. + * - ``V4L2_VP9_SEG_LVL_MAX`` + - 4 + - Number of segment features. + +.. _vp9_segmentation_flags: + +``Segmentation Flags`` + +.. tabularcolumns:: |p{10.6cm}|p{0.8cm}|p{5.9cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_VP9_SEGMENTATION_FLAG_ENABLED`` + - 0x01 + - Indicates that this frame makes use of the segmentation tool. + * - ``V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP`` + - 0x02 + - Indicates that the segmentation map should be updated during the + decoding of this frame. + * - ``V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE`` + - 0x04 + - Indicates that the updates to the segmentation map are coded + relative to the existing segmentation map. + * - ``V4L2_VP9_SEGMENTATION_FLAG_UPDATE_DATA`` + - 0x08 + - Indicates that new parameters are about to be specified for each + segment. + * - ``V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE`` + - 0x10 + - Indicates that the segmentation parameters represent the actual values + to be used. + +.. c:type:: v4l2_vp9_quantization + +Encodes the quantization parameters. See section '7.2.9 Quantization params +syntax' of the VP9 specification for more details. + +.. tabularcolumns:: |p{0.8cm}|p{4cm}|p{12.4cm}| + +.. cssclass:: longtable + +.. flat-table:: struct v4l2_vp9_quantization + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - __u8 + - ``base_q_idx`` + - Indicates the base frame qindex. + * - __s8 + - ``delta_q_y_dc`` + - Indicates the Y DC quantizer relative to base_q_idx. + * - __s8 + - ``delta_q_uv_dc`` + - Indicates the UV DC quantizer relative to base_q_idx. + * - __s8 + - ``delta_q_uv_ac`` + - Indicates the UV AC quantizer relative to base_q_idx. + * - __u8 + - ``reserved[4]`` + - Applications and drivers must set this to zero. + +.. c:type:: v4l2_vp9_loop_filter + +This structure contains all loop filter related parameters. See sections +'7.2.8 Loop filter semantics' of the :ref:`vp9` specification for more details. + +.. tabularcolumns:: |p{0.8cm}|p{4cm}|p{12.4cm}| + +.. cssclass:: longtable + +.. flat-table:: struct v4l2_vp9_loop_filter + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - __s8 + - ``ref_deltas[4]`` + - Contains the adjustment needed for the filter level based on the chosen + reference frame. + * - __s8 + - ``mode_deltas[2]`` + - Contains the adjustment needed for the filter level based on the chosen + mode. + * - __u8 + - ``level`` + - Indicates the loop filter strength. + * - __u8 + - ``sharpness`` + - Indicates the sharpness level. + * - __u8 + - ``flags`` + - Combination of V4L2_VP9_LOOP_FILTER_FLAG_* flags. + See :ref:`Loop Filter Flags `. + * - __u8 + - ``reserved[7]`` + - Applications and drivers must set this to zero. + + +.. _vp9_loop_filter_flags: + +``Loop Filter Flags`` + +.. tabularcolumns:: |p{9.6cm}|p{0.5cm}|p{7.2cm}| + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED`` + - 0x1 + - When set, the filter level depends on the mode and reference frame used + to predict a block. + * - ``V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE`` + - 0x2 + - When set, the bitstream contains additional syntax elements that + specify which mode and reference frame deltas are to be updated. diff --git a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst index 0ede39907ee2..967fc803ef94 100644 --- a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst +++ b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst @@ -172,6 +172,21 @@ Compressed Formats - VP9 compressed video frame. The encoder generates one compressed frame per buffer, and the decoder requires one compressed frame per buffer. + * .. _V4L2-PIX-FMT-VP9-FRAME: + + - ``V4L2_PIX_FMT_VP9_FRAME`` + - 'VP9F' + - VP9 parsed frame, including the frame header, as extracted from the container. + This format is adapted for stateless video decoders that implement a + VP9 pipeline with the :ref:`stateless_decoder`. + Metadata associated with the frame to decode is required to be passed + through the ``V4L2_CID_STATELESS_VP9_FRAME`` and + the ``V4L2_CID_STATELESS_VP9_COMPRESSED_HDR`` controls. + See the :ref:`associated Codec Control IDs `. + Exactly one output and one capture buffer must be provided for use with + this pixel format. The output buffer must contain the appropriate number + of macroblocks to decode a full corresponding frame to the matching + capture buffer. * .. _V4L2-PIX-FMT-HEVC: - ``V4L2_PIX_FMT_HEVC`` diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst index fdde0ae6d521..29971a45a2d4 100644 --- a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst +++ b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst @@ -233,6 +233,14 @@ still cause this situation. - ``p_mpeg2_quantisation`` - A pointer to a struct :c:type:`v4l2_ctrl_mpeg2_quantisation`. Valid if this control is of type ``V4L2_CTRL_TYPE_MPEG2_QUANTISATION``. + * - struct :c:type:`v4l2_ctrl_vp9_compressed_hdr` * + - ``p_vp9_compressed_hdr_probs`` + - A pointer to a struct :c:type:`v4l2_ctrl_vp9_compressed_hdr`. Valid if this + control is of type ``V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR``. + * - struct :c:type:`v4l2_ctrl_vp9_frame` * + - ``p_vp9_frame`` + - A pointer to a struct :c:type:`v4l2_ctrl_vp9_frame`. Valid if this + control is of type ``V4L2_CTRL_TYPE_VP9_FRAME``. * - struct :c:type:`v4l2_ctrl_hdr10_cll_info` * - ``p_hdr10_cll`` - A pointer to a struct :c:type:`v4l2_ctrl_hdr10_cll_info`. Valid if this control is diff --git a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst index 2f491c17dd5d..88f630252d98 100644 --- a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst +++ b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst @@ -513,6 +513,18 @@ See also the examples in :ref:`control`. - n/a - A struct :c:type:`v4l2_ctrl_hevc_decode_params`, containing HEVC decoding parameters for stateless video decoders. + * - ``V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR`` + - n/a + - n/a + - n/a + - A struct :c:type:`v4l2_ctrl_vp9_compressed_hdr`, containing VP9 + probabilities updates for stateless video decoders. + * - ``V4L2_CTRL_TYPE_VP9_FRAME`` + - n/a + - n/a + - n/a + - A struct :c:type:`v4l2_ctrl_vp9_frame`, containing VP9 + frame decode parameters for stateless video decoders. .. raw:: latex diff --git a/Documentation/userspace-api/media/videodev2.h.rst.exceptions b/Documentation/userspace-api/media/videodev2.h.rst.exceptions index eb0b1cd37abd..9cbb7a0c354a 100644 --- a/Documentation/userspace-api/media/videodev2.h.rst.exceptions +++ b/Documentation/userspace-api/media/videodev2.h.rst.exceptions @@ -149,6 +149,8 @@ replace symbol V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS :c:type:`v4l2_ctrl_type` replace symbol V4L2_CTRL_TYPE_AREA :c:type:`v4l2_ctrl_type` replace symbol V4L2_CTRL_TYPE_FWHT_PARAMS :c:type:`v4l2_ctrl_type` replace symbol V4L2_CTRL_TYPE_VP8_FRAME :c:type:`v4l2_ctrl_type` +replace symbol V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR :c:type:`v4l2_ctrl_type` +replace symbol V4L2_CTRL_TYPE_VP9_FRAME :c:type:`v4l2_ctrl_type` replace symbol V4L2_CTRL_TYPE_HDR10_CLL_INFO :c:type:`v4l2_ctrl_type` replace symbol V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY :c:type:`v4l2_ctrl_type` diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c b/drivers/media/v4l2-core/v4l2-ctrls-core.c index 70adfc1b9c81..54abe5245dcc 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-core.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c @@ -283,6 +283,12 @@ static void std_log(const struct v4l2_ctrl *ctrl) case V4L2_CTRL_TYPE_MPEG2_PICTURE: pr_cont("MPEG2_PICTURE"); break; + case V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR: + pr_cont("VP9_COMPRESSED_HDR"); + break; + case V4L2_CTRL_TYPE_VP9_FRAME: + pr_cont("VP9_FRAME"); + break; default: pr_cont("unknown type %d", ctrl->type); break; @@ -317,6 +323,168 @@ static void std_log(const struct v4l2_ctrl *ctrl) #define zero_reserved(s) \ memset(&(s).reserved, 0, sizeof((s).reserved)) +static int +validate_vp9_lf_params(struct v4l2_vp9_loop_filter *lf) +{ + unsigned int i; + + if (lf->flags & ~(V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED | + V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE)) + return -EINVAL; + + /* That all values are in the accepted range. */ + if (lf->level > GENMASK(5, 0)) + return -EINVAL; + + if (lf->sharpness > GENMASK(2, 0)) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(lf->ref_deltas); i++) + if (lf->ref_deltas[i] < -63 || lf->ref_deltas[i] > 63) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(lf->mode_deltas); i++) + if (lf->mode_deltas[i] < -63 || lf->mode_deltas[i] > 63) + return -EINVAL; + + zero_reserved(*lf); + return 0; +} + +static int +validate_vp9_quant_params(struct v4l2_vp9_quantization *quant) +{ + if (quant->delta_q_y_dc < -15 || quant->delta_q_y_dc > 15 || + quant->delta_q_uv_dc < -15 || quant->delta_q_uv_dc > 15 || + quant->delta_q_uv_ac < -15 || quant->delta_q_uv_ac > 15) + return -EINVAL; + + zero_reserved(*quant); + return 0; +} + +static int +validate_vp9_seg_params(struct v4l2_vp9_segmentation *seg) +{ + unsigned int i, j; + + if (seg->flags & ~(V4L2_VP9_SEGMENTATION_FLAG_ENABLED | + V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP | + V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE | + V4L2_VP9_SEGMENTATION_FLAG_UPDATE_DATA | + V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE)) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(seg->feature_enabled); i++) { + if (seg->feature_enabled[i] & + ~V4L2_VP9_SEGMENT_FEATURE_ENABLED_MASK) + return -EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(seg->feature_data); i++) { + const int range[] = { 255, 63, 3, 0 }; + + for (j = 0; j < ARRAY_SIZE(seg->feature_data[j]); j++) { + if (seg->feature_data[i][j] < -range[j] || + seg->feature_data[i][j] > range[j]) + return -EINVAL; + } + } + + zero_reserved(*seg); + return 0; +} + +static int +validate_vp9_compressed_hdr(struct v4l2_ctrl_vp9_compressed_hdr *hdr) +{ + if (hdr->tx_mode > V4L2_VP9_TX_MODE_SELECT) + return -EINVAL; + + return 0; +} + +static int +validate_vp9_frame(struct v4l2_ctrl_vp9_frame *frame) +{ + int ret; + + /* Make sure we're not passed invalid flags. */ + if (frame->flags & ~(V4L2_VP9_FRAME_FLAG_KEY_FRAME | + V4L2_VP9_FRAME_FLAG_SHOW_FRAME | + V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT | + V4L2_VP9_FRAME_FLAG_INTRA_ONLY | + V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV | + V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX | + V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE | + V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING | + V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING | + V4L2_VP9_FRAME_FLAG_COLOR_RANGE_FULL_SWING)) + return -EINVAL; + + if (frame->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT && + frame->flags & V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX) + return -EINVAL; + + if (frame->profile > V4L2_VP9_PROFILE_MAX) + return -EINVAL; + + if (frame->reset_frame_context > V4L2_VP9_RESET_FRAME_CTX_ALL) + return -EINVAL; + + if (frame->frame_context_idx >= V4L2_VP9_NUM_FRAME_CTX) + return -EINVAL; + + /* + * Profiles 0 and 1 only support 8-bit depth, profiles 2 and 3 only 10 + * and 12 bit depths. + */ + if ((frame->profile < 2 && frame->bit_depth != 8) || + (frame->profile >= 2 && + (frame->bit_depth != 10 && frame->bit_depth != 12))) + return -EINVAL; + + /* Profile 0 and 2 only accept YUV 4:2:0. */ + if ((frame->profile == 0 || frame->profile == 2) && + (!(frame->flags & V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING) || + !(frame->flags & V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING))) + return -EINVAL; + + /* Profile 1 and 3 only accept YUV 4:2:2, 4:4:0 and 4:4:4. */ + if ((frame->profile == 1 || frame->profile == 3) && + ((frame->flags & V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING) && + (frame->flags & V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING))) + return -EINVAL; + + if (frame->interpolation_filter > V4L2_VP9_INTERP_FILTER_SWITCHABLE) + return -EINVAL; + + /* + * According to the spec, tile_cols_log2 shall be less than or equal + * to 6. + */ + if (frame->tile_cols_log2 > 6) + return -EINVAL; + + if (frame->reference_mode > V4L2_VP9_REFERENCE_MODE_SELECT) + return -EINVAL; + + ret = validate_vp9_lf_params(&frame->lf); + if (ret) + return ret; + + ret = validate_vp9_quant_params(&frame->quant); + if (ret) + return ret; + + ret = validate_vp9_seg_params(&frame->seg); + if (ret) + return ret; + + zero_reserved(*frame); + return 0; +} + /* * Compound controls validation requires setting unused fields/flags to zero * in order to properly detect unchanged controls with std_equal's memcmp. @@ -690,6 +858,12 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx, case V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX: break; + case V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR: + return validate_vp9_compressed_hdr(p); + + case V4L2_CTRL_TYPE_VP9_FRAME: + return validate_vp9_frame(p); + case V4L2_CTRL_TYPE_AREA: area = p; if (!area->width || !area->height) @@ -1255,6 +1429,12 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl, case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY: elem_size = sizeof(struct v4l2_ctrl_hdr10_mastering_display); break; + case V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR: + elem_size = sizeof(struct v4l2_ctrl_vp9_compressed_hdr); + break; + case V4L2_CTRL_TYPE_VP9_FRAME: + elem_size = sizeof(struct v4l2_ctrl_vp9_frame); + break; case V4L2_CTRL_TYPE_AREA: elem_size = sizeof(struct v4l2_area); break; diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c b/drivers/media/v4l2-core/v4l2-ctrls-defs.c index 431f7ec17557..54ca4e6b820b 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c @@ -1178,6 +1178,8 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_STATELESS_MPEG2_SEQUENCE: return "MPEG-2 Sequence Header"; case V4L2_CID_STATELESS_MPEG2_PICTURE: return "MPEG-2 Picture Header"; case V4L2_CID_STATELESS_MPEG2_QUANTISATION: return "MPEG-2 Quantisation Matrices"; + case V4L2_CID_STATELESS_VP9_COMPRESSED_HDR: return "VP9 Probabilities Updates"; + case V4L2_CID_STATELESS_VP9_FRAME: return "VP9 Frame Decode Parameters"; /* Colorimetry controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ @@ -1506,6 +1508,12 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS: *type = V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS; break; + case V4L2_CID_STATELESS_VP9_COMPRESSED_HDR: + *type = V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR; + break; + case V4L2_CID_STATELESS_VP9_FRAME: + *type = V4L2_CTRL_TYPE_VP9_FRAME; + break; case V4L2_CID_UNIT_CELL_SIZE: *type = V4L2_CTRL_TYPE_AREA; *flags |= V4L2_CTRL_FLAG_READ_ONLY; diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 69b74d0e8a90..9ac557b8e146 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1413,6 +1413,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt) case V4L2_PIX_FMT_VP8: descr = "VP8"; break; case V4L2_PIX_FMT_VP8_FRAME: descr = "VP8 Frame"; break; case V4L2_PIX_FMT_VP9: descr = "VP9"; break; + case V4L2_PIX_FMT_VP9_FRAME: descr = "VP9 Frame"; break; case V4L2_PIX_FMT_HEVC: descr = "HEVC"; break; /* aka H.265 */ case V4L2_PIX_FMT_HEVC_SLICE: descr = "HEVC Parsed Slice Data"; break; case V4L2_PIX_FMT_FWHT: descr = "FWHT"; break; /* used in vicodec */ diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index 575b59fbac77..b3ce438f1329 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -50,6 +50,8 @@ struct video_device; * @p_h264_decode_params: Pointer to a struct v4l2_ctrl_h264_decode_params. * @p_h264_pred_weights: Pointer to a struct v4l2_ctrl_h264_pred_weights. * @p_vp8_frame: Pointer to a VP8 frame params structure. + * @p_vp9_compressed_hdr_probs: Pointer to a VP9 frame compressed header probs structure. + * @p_vp9_frame: Pointer to a VP9 frame params structure. * @p_hevc_sps: Pointer to an HEVC sequence parameter set structure. * @p_hevc_pps: Pointer to an HEVC picture parameter set structure. * @p_hevc_slice_params: Pointer to an HEVC slice parameters structure. @@ -80,6 +82,8 @@ union v4l2_ctrl_ptr { struct v4l2_ctrl_hevc_sps *p_hevc_sps; struct v4l2_ctrl_hevc_pps *p_hevc_pps; struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params; + struct v4l2_ctrl_vp9_compressed_hdr *p_vp9_compressed_hdr_probs; + struct v4l2_ctrl_vp9_frame *p_vp9_frame; struct v4l2_ctrl_hdr10_cll_info *p_hdr10_cll; struct v4l2_ctrl_hdr10_mastering_display *p_hdr10_mastering; struct v4l2_area *p_area; diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index c9eea93a43a9..c8e0f84d204d 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -2018,6 +2018,290 @@ struct v4l2_ctrl_hdr10_mastering_display { __u32 min_display_mastering_luminance; }; +/* Stateless VP9 controls */ + +#define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED 0x1 +#define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE 0x2 + +/** + * struct v4l2_vp9_loop_filter - VP9 loop filter parameters + * + * @ref_deltas: contains the adjustment needed for the filter level based on the + * chosen reference frame. If this syntax element is not present in the bitstream, + * users should pass its last value. + * @mode_deltas: contains the adjustment needed for the filter level based on the + * chosen mode. If this syntax element is not present in the bitstream, users should + * pass its last value. + * @level: indicates the loop filter strength. + * @sharpness: indicates the sharpness level. + * @flags: combination of V4L2_VP9_LOOP_FILTER_FLAG_{} flags. + * @reserved: padding field. Should be zeroed by applications. + * + * This structure contains all loop filter related parameters. See sections + * '7.2.8 Loop filter semantics' of the VP9 specification for more details. + */ +struct v4l2_vp9_loop_filter { + __s8 ref_deltas[4]; + __s8 mode_deltas[2]; + __u8 level; + __u8 sharpness; + __u8 flags; + __u8 reserved[7]; +}; + +/** + * struct v4l2_vp9_quantization - VP9 quantization parameters + * + * @base_q_idx: indicates the base frame qindex. + * @delta_q_y_dc: indicates the Y DC quantizer relative to base_q_idx. + * @delta_q_uv_dc: indicates the UV DC quantizer relative to base_q_idx. + * @delta_q_uv_ac: indicates the UV AC quantizer relative to base_q_idx. + * @reserved: padding field. Should be zeroed by applications. + * + * Encodes the quantization parameters. See section '7.2.9 Quantization params + * syntax' of the VP9 specification for more details. + */ +struct v4l2_vp9_quantization { + __u8 base_q_idx; + __s8 delta_q_y_dc; + __s8 delta_q_uv_dc; + __s8 delta_q_uv_ac; + __u8 reserved[4]; +}; + +#define V4L2_VP9_SEGMENTATION_FLAG_ENABLED 0x01 +#define V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP 0x02 +#define V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE 0x04 +#define V4L2_VP9_SEGMENTATION_FLAG_UPDATE_DATA 0x08 +#define V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE 0x10 + +#define V4L2_VP9_SEG_LVL_ALT_Q 0 +#define V4L2_VP9_SEG_LVL_ALT_L 1 +#define V4L2_VP9_SEG_LVL_REF_FRAME 2 +#define V4L2_VP9_SEG_LVL_SKIP 3 +#define V4L2_VP9_SEG_LVL_MAX 4 + +#define V4L2_VP9_SEGMENT_FEATURE_ENABLED(id) (1 << (id)) +#define V4L2_VP9_SEGMENT_FEATURE_ENABLED_MASK 0xf + +/** + * struct v4l2_vp9_segmentation - VP9 segmentation parameters + * + * @feature_data: data attached to each feature. Data entry is only valid if + * the feature is enabled. The array shall be indexed with segment number as + * the first dimension (0..7) and one of V4L2_VP9_SEG_{} as the second dimension. + * @feature_enabled: bitmask defining which features are enabled in each segment. + * The value for each segment is a combination of V4L2_VP9_SEGMENT_FEATURE_ENABLED(id) + * values where id is one of V4L2_VP9_SEG_LVL_{}. + * @tree_probs: specifies the probability values to be used when decoding a + * Segment-ID. See '5.15. Segmentation map' section of the VP9 specification + * for more details. + * @pred_probs: specifies the probability values to be used when decoding a + * Predicted-Segment-ID. See '6.4.14. Get segment id syntax' section of :ref:`vp9` + * for more details. + * @flags: combination of V4L2_VP9_SEGMENTATION_FLAG_{} flags. + * @reserved: padding field. Should be zeroed by applications. + * + * Encodes the quantization parameters. See section '7.2.10 Segmentation params syntax' of + * the VP9 specification for more details. + */ +struct v4l2_vp9_segmentation { + __s16 feature_data[8][4]; + __u8 feature_enabled[8]; + __u8 tree_probs[7]; + __u8 pred_probs[3]; + __u8 flags; + __u8 reserved[5]; +}; + +#define V4L2_VP9_FRAME_FLAG_KEY_FRAME 0x001 +#define V4L2_VP9_FRAME_FLAG_SHOW_FRAME 0x002 +#define V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT 0x004 +#define V4L2_VP9_FRAME_FLAG_INTRA_ONLY 0x008 +#define V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV 0x010 +#define V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX 0x020 +#define V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE 0x040 +#define V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING 0x080 +#define V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING 0x100 +#define V4L2_VP9_FRAME_FLAG_COLOR_RANGE_FULL_SWING 0x200 + +#define V4L2_VP9_SIGN_BIAS_LAST 0x1 +#define V4L2_VP9_SIGN_BIAS_GOLDEN 0x2 +#define V4L2_VP9_SIGN_BIAS_ALT 0x4 + +#define V4L2_VP9_RESET_FRAME_CTX_NONE 0 +#define V4L2_VP9_RESET_FRAME_CTX_SPEC 1 +#define V4L2_VP9_RESET_FRAME_CTX_ALL 2 + +#define V4L2_VP9_INTERP_FILTER_EIGHTTAP 0 +#define V4L2_VP9_INTERP_FILTER_EIGHTTAP_SMOOTH 1 +#define V4L2_VP9_INTERP_FILTER_EIGHTTAP_SHARP 2 +#define V4L2_VP9_INTERP_FILTER_BILINEAR 3 +#define V4L2_VP9_INTERP_FILTER_SWITCHABLE 4 + +#define V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE 0 +#define V4L2_VP9_REFERENCE_MODE_COMPOUND_REFERENCE 1 +#define V4L2_VP9_REFERENCE_MODE_SELECT 2 + +#define V4L2_VP9_PROFILE_MAX 3 + +#define V4L2_CID_STATELESS_VP9_FRAME (V4L2_CID_CODEC_STATELESS_BASE + 300) +/** + * struct v4l2_ctrl_vp9_frame - VP9 frame decoding control + * + * @lf: loop filter parameters. See &v4l2_vp9_loop_filter for more details. + * @quant: quantization parameters. See &v4l2_vp9_quantization for more details. + * @seg: segmentation parameters. See &v4l2_vp9_segmentation for more details. + * @flags: combination of V4L2_VP9_FRAME_FLAG_{} flags. + * @compressed_header_size: compressed header size in bytes. + * @uncompressed_header_size: uncompressed header size in bytes. + * @frame_width_minus_1: add 1 to it and you'll get the frame width expressed in pixels. + * @frame_height_minus_1: add 1 to it and you'll get the frame height expressed in pixels. + * @render_width_minus_1: add 1 to it and you'll get the expected render width expressed in + * pixels. This is not used during the decoding process but might be used by HW scalers + * to prepare a frame that's ready for scanout. + * @render_height_minus_1: add 1 to it and you'll get the expected render height expressed in + * pixels. This is not used during the decoding process but might be used by HW scalers + * to prepare a frame that's ready for scanout. + * @last_frame_ts: "last" reference buffer timestamp. + * The timestamp refers to the timestamp field in struct v4l2_buffer. + * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64. + * @golden_frame_ts: "golden" reference buffer timestamp. + * The timestamp refers to the timestamp field in struct v4l2_buffer. + * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64. + * @alt_frame_ts: "alt" reference buffer timestamp. + * The timestamp refers to the timestamp field in struct v4l2_buffer. + * Use v4l2_timeval_to_ns() to convert the struct timeval to a __u64. + * @ref_frame_sign_bias: a bitfield specifying whether the sign bias is set for a given + * reference frame. Either of V4L2_VP9_SIGN_BIAS_{}. + * @reset_frame_context: specifies whether the frame context should be reset to default values. + * Either of V4L2_VP9_RESET_FRAME_CTX_{}. + * @frame_context_idx: frame context that should be used/updated. + * @profile: VP9 profile. Can be 0, 1, 2 or 3. + * @bit_depth: bits per components. Can be 8, 10 or 12. Note that not all profiles support + * 10 and/or 12 bits depths. + * @interpolation_filter: specifies the filter selection used for performing inter prediction. + * Set to one of V4L2_VP9_INTERP_FILTER_{}. + * @tile_cols_log2: specifies the base 2 logarithm of the width of each tile (where the width + * is measured in units of 8x8 blocks). Shall be less than or equal to 6. + * @tile_rows_log2: specifies the base 2 logarithm of the height of each tile (where the height + * is measured in units of 8x8 blocks). + * @reference_mode: specifies the type of inter prediction to be used. + * Set to one of V4L2_VP9_REFERENCE_MODE_{}. + * @reserved: padding field. Should be zeroed by applications. + */ +struct v4l2_ctrl_vp9_frame { + struct v4l2_vp9_loop_filter lf; + struct v4l2_vp9_quantization quant; + struct v4l2_vp9_segmentation seg; + __u32 flags; + __u16 compressed_header_size; + __u16 uncompressed_header_size; + __u16 frame_width_minus_1; + __u16 frame_height_minus_1; + __u16 render_width_minus_1; + __u16 render_height_minus_1; + __u64 last_frame_ts; + __u64 golden_frame_ts; + __u64 alt_frame_ts; + __u8 ref_frame_sign_bias; + __u8 reset_frame_context; + __u8 frame_context_idx; + __u8 profile; + __u8 bit_depth; + __u8 interpolation_filter; + __u8 tile_cols_log2; + __u8 tile_rows_log2; + __u8 reference_mode; + __u8 reserved[7]; +}; + +#define V4L2_VP9_NUM_FRAME_CTX 4 + +/** + * struct v4l2_vp9_mv_probs - VP9 Motion vector probability updates + * @joint: motion vector joint probability updates. + * @sign: motion vector sign probability updates. + * @classes: motion vector class probability updates. + * @class0_bit: motion vector class0 bit probability updates. + * @bits: motion vector bits probability updates. + * @class0_fr: motion vector class0 fractional bit probability updates. + * @fr: motion vector fractional bit probability updates. + * @class0_hp: motion vector class0 high precision fractional bit probability updates. + * @hp: motion vector high precision fractional bit probability updates. + * + * This structure contains new values of motion vector probabilities. + * A value of zero in an array element means there is no update of the relevant probability. + * See `struct v4l2_vp9_prob_updates` for details. + */ +struct v4l2_vp9_mv_probs { + __u8 joint[3]; + __u8 sign[2]; + __u8 classes[2][10]; + __u8 class0_bit[2]; + __u8 bits[2][10]; + __u8 class0_fr[2][2][3]; + __u8 fr[2][3]; + __u8 class0_hp[2]; + __u8 hp[2]; +}; + +#define V4L2_CID_STATELESS_VP9_COMPRESSED_HDR (V4L2_CID_CODEC_STATELESS_BASE + 301) + +#define V4L2_VP9_TX_MODE_ONLY_4X4 0 +#define V4L2_VP9_TX_MODE_ALLOW_8X8 1 +#define V4L2_VP9_TX_MODE_ALLOW_16X16 2 +#define V4L2_VP9_TX_MODE_ALLOW_32X32 3 +#define V4L2_VP9_TX_MODE_SELECT 4 + +/** + * struct v4l2_ctrl_vp9_compressed_hdr - VP9 probability updates control + * @tx_mode: specifies the TX mode. Set to one of V4L2_VP9_TX_MODE_{}. + * @tx8: TX 8x8 probability updates. + * @tx16: TX 16x16 probability updates. + * @tx32: TX 32x32 probability updates. + * @coef: coefficient probability updates. + * @skip: skip probability updates. + * @inter_mode: inter mode probability updates. + * @interp_filter: interpolation filter probability updates. + * @is_inter: is inter-block probability updates. + * @comp_mode: compound prediction mode probability updates. + * @single_ref: single ref probability updates. + * @comp_ref: compound ref probability updates. + * @y_mode: Y prediction mode probability updates. + * @uv_mode: UV prediction mode probability updates. + * @partition: partition probability updates. + * @mv: motion vector probability updates. + * + * This structure holds the probabilities update as parsed in the compressed + * header (Spec 6.3). These values represent the value of probability update after + * being translated with inv_map_table[] (see 6.3.5). A value of zero in an array element + * means that there is no update of the relevant probability. + * + * This control is optional and needs to be used when dealing with the hardware which is + * not capable of parsing the compressed header itself. Only drivers which need it will + * implement it. + */ +struct v4l2_ctrl_vp9_compressed_hdr { + __u8 tx_mode; + __u8 tx8[2][1]; + __u8 tx16[2][2]; + __u8 tx32[2][3]; + __u8 coef[4][2][2][6][6][3]; + __u8 skip[3]; + __u8 inter_mode[7][3]; + __u8 interp_filter[4][2]; + __u8 is_inter[4]; + __u8 comp_mode[5]; + __u8 single_ref[5][2]; + __u8 comp_ref[5]; + __u8 y_mode[4][9]; + __u8 uv_mode[10][9]; + __u8 partition[16][3]; + + struct v4l2_vp9_mv_probs mv; +}; + /* MPEG-compression definitions kept for backwards compatibility */ #ifndef __KERNEL__ #define V4L2_CTRL_CLASS_MPEG V4L2_CTRL_CLASS_CODEC diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index f118fe7a9f58..df8b9c486ba1 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -703,6 +703,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_VP8 v4l2_fourcc('V', 'P', '8', '0') /* VP8 */ #define V4L2_PIX_FMT_VP8_FRAME v4l2_fourcc('V', 'P', '8', 'F') /* VP8 parsed frame */ #define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') /* VP9 */ +#define V4L2_PIX_FMT_VP9_FRAME v4l2_fourcc('V', 'P', '9', 'F') /* VP9 parsed frame */ #define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') /* HEVC aka H.265 */ #define V4L2_PIX_FMT_FWHT v4l2_fourcc('F', 'W', 'H', 'T') /* Fast Walsh Hadamard Transform (vicodec) */ #define V4L2_PIX_FMT_FWHT_STATELESS v4l2_fourcc('S', 'F', 'W', 'H') /* Stateless FWHT (vicodec) */ @@ -1759,6 +1760,8 @@ struct v4l2_ext_control { struct v4l2_ctrl_mpeg2_sequence __user *p_mpeg2_sequence; struct v4l2_ctrl_mpeg2_picture __user *p_mpeg2_picture; struct v4l2_ctrl_mpeg2_quantisation __user *p_mpeg2_quantisation; + struct v4l2_ctrl_vp9_compressed_hdr __user *p_vp9_compressed_hdr_probs; + struct v4l2_ctrl_vp9_frame __user *p_vp9_frame; void __user *ptr; }; } __attribute__ ((packed)); @@ -1823,6 +1826,9 @@ enum v4l2_ctrl_type { V4L2_CTRL_TYPE_MPEG2_QUANTISATION = 0x0250, V4L2_CTRL_TYPE_MPEG2_SEQUENCE = 0x0251, V4L2_CTRL_TYPE_MPEG2_PICTURE = 0x0252, + + V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR = 0x0260, + V4L2_CTRL_TYPE_VP9_FRAME = 0x0261, }; /* Used in the VIDIOC_QUERYCTRL ioctl for querying controls */ -- cgit v1.2.3 From 448f413a8bdc727d25d9a786ccbdb974fb85d973 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 18 Nov 2021 20:12:40 +0800 Subject: ethtool: add support to set/get tx copybreak buf size via ethtool Add support for ethtool to set/get tx copybreak buf size. Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 1 + net/ethtool/common.c | 1 + net/ethtool/ioctl.c | 1 + 3 files changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index a2223b685451..7bc4b8def12c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -231,6 +231,7 @@ enum tunable_id { ETHTOOL_RX_COPYBREAK, ETHTOOL_TX_COPYBREAK, ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ + ETHTOOL_TX_COPYBREAK_BUF_SIZE, /* * Add your fresh new tunable attribute above and remember to update * tunable_strings[] in net/ethtool/common.c diff --git a/net/ethtool/common.c b/net/ethtool/common.c index c63e0739dc6a..0c5210015911 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -89,6 +89,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout", + [ETHTOOL_TX_COPYBREAK_BUF_SIZE] = "tx-copybreak-buf-size", }; const char diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 65e9bc1058b5..5c5f170a9851 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2396,6 +2396,7 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna) switch (tuna->id) { case ETHTOOL_RX_COPYBREAK: case ETHTOOL_TX_COPYBREAK: + case ETHTOOL_TX_COPYBREAK_BUF_SIZE: if (tuna->len != sizeof(u32) || tuna->type_id != ETHTOOL_TUNABLE_U32) return -EINVAL; -- cgit v1.2.3 From 0b70c256eba8448b072d25c95ee65e59da8970de Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 18 Nov 2021 20:12:42 +0800 Subject: ethtool: add support to set/get rx buf len via ethtool Add support to set rx buf len via ethtool -G parameter and get rx buf len via ethtool -g parameter. Signed-off-by: Hao Chen Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 10 ++++++---- include/linux/ethtool.h | 18 ++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.h | 2 +- net/ethtool/rings.c | 23 +++++++++++++++++++++-- 5 files changed, 47 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 7b598c7e3912..9d98e0511249 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -849,7 +849,7 @@ Request contents: Kernel response contents: - ==================================== ====== ========================== + ==================================== ====== =========================== ``ETHTOOL_A_RINGS_HEADER`` nested reply header ``ETHTOOL_A_RINGS_RX_MAX`` u32 max size of RX ring ``ETHTOOL_A_RINGS_RX_MINI_MAX`` u32 max size of RX mini ring @@ -859,7 +859,8 @@ Kernel response contents: ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring - ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring + ==================================== ====== =========================== RINGS_SET @@ -869,13 +870,14 @@ Sets ring sizes like ``ETHTOOL_SRINGPARAM`` ioctl request. Request contents: - ==================================== ====== ========================== + ==================================== ====== =========================== ``ETHTOOL_A_RINGS_HEADER`` nested reply header ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring - ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring + ==================================== ====== =========================== Kernel checks that requested ring sizes do not exceed limits reported by driver. Driver may impose additional constraints and may not suspport all diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 845a0ffc16ee..0b252b82988b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -67,6 +67,22 @@ enum { ETH_RSS_HASH_FUNCS_COUNT }; +/** + * struct kernel_ethtool_ringparam - RX/TX ring configuration + * @rx_buf_len: Current length of buffers on the rx ring. + */ +struct kernel_ethtool_ringparam { + u32 rx_buf_len; +}; + +/** + * enum ethtool_supported_ring_param - indicator caps for setting ring params + * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len + */ +enum ethtool_supported_ring_param { + ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), +}; + #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) #define __ETH_RSS_HASH(name) __ETH_RSS_HASH_BIT(ETH_RSS_HASH_##name##_BIT) @@ -432,6 +448,7 @@ struct ethtool_module_power_mode_params { * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. * @supported_coalesce_params: supported types of interrupt coalescing. + * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not * implemented, the @driver and @bus_info fields will be filled in @@ -613,6 +630,7 @@ struct ethtool_module_power_mode_params { struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 supported_coalesce_params; + u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 999777d32dcf..cca6e474a085 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -329,6 +329,7 @@ enum { ETHTOOL_A_RINGS_RX_MINI, /* u32 */ ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ ETHTOOL_A_RINGS_TX, /* u32 */ + ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 836ee7157848..490598e5eedd 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -356,7 +356,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_BUF_LEN + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 4e097812a967..bd8e9a7530eb 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -10,6 +10,7 @@ struct rings_req_info { struct rings_reply_data { struct ethnl_reply_data base; struct ethtool_ringparam ringparam; + struct kernel_ethtool_ringparam kernel_ringparam; }; #define RINGS_REPDATA(__reply_base) \ @@ -49,7 +50,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RINGS_RX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ - nla_total_size(sizeof(u32)); /* _RINGS_TX */ + nla_total_size(sizeof(u32)) + /* _RINGS_TX */ + nla_total_size(sizeof(u32)); /* _RINGS_RX_BUF_LEN */ } static int rings_fill_reply(struct sk_buff *skb, @@ -57,6 +59,7 @@ static int rings_fill_reply(struct sk_buff *skb, const struct ethnl_reply_data *reply_base) { const struct rings_reply_data *data = RINGS_REPDATA(reply_base); + const struct kernel_ethtool_ringparam *kernel_ringparam = &data->kernel_ringparam; const struct ethtool_ringparam *ringparam = &data->ringparam; if ((ringparam->rx_max_pending && @@ -78,7 +81,10 @@ static int rings_fill_reply(struct sk_buff *skb, (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_MAX, ringparam->tx_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX, - ringparam->tx_pending)))) + ringparam->tx_pending))) || + (kernel_ringparam->rx_buf_len && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, + kernel_ringparam->rx_buf_len)))) return -EMSGSIZE; return 0; @@ -105,10 +111,12 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), }; int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) { + struct kernel_ethtool_ringparam kernel_ringparam = {}; struct ethtool_ringparam ringparam = {}; struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; @@ -142,6 +150,8 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) ethnl_update_u32(&ringparam.rx_jumbo_pending, tb[ETHTOOL_A_RINGS_RX_JUMBO], &mod); ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); + ethnl_update_u32(&kernel_ringparam.rx_buf_len, + tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); ret = 0; if (!mod) goto out_ops; @@ -164,6 +174,15 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) goto out_ops; } + if (kernel_ringparam.rx_buf_len != 0 && + !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) { + ret = -EOPNOTSUPP; + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_RX_BUF_LEN], + "setting rx buf len not supported"); + goto out_ops; + } + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); if (ret < 0) goto out_ops; -- cgit v1.2.3 From a0c2ccd9b5ad0a9e838158404e041b5a8ff762dd Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 23 Nov 2021 15:50:46 +0800 Subject: mctp: Add MCTP-over-serial transport binding This change adds a MCTP Serial transport binding, as defined by DMTF specificiation DSP0253 - "MCTP Serial Transport Binding". This is implemented as a new serial line discipline, and can be attached to arbitrary tty devices. From the Kconfig description: This driver provides an MCTP-over-serial interface, through a serial line-discipline, as defined by DMTF specification "DSP0253 - MCTP Serial Transport Binding". By attaching the ldisc to a serial device, we get a new net device to transport MCTP packets. This allows communication with external MCTP endpoints which use serial as their transport. It can also be used as an easy way to provide MCTP connectivity between virtual machines, by forwarding data between simple virtual serial devices. Say y here if you need to connect to MCTP endpoints over serial. To compile as a module, use m; the module will be called mctp-serial. Once the N_MCTP line discipline is set [using ioctl(TCIOSETD)], we get a new netdev suitable for MCTP communication. The 'mctp' utility[1] provides a simple wrapper for this ioctl, using 'link serial ': # mctp link serial /dev/ttyS0 & # mctp link dev lo index 1 address 0x00:00:00:00:00:00 net 1 mtu 65536 up dev mctpserial0 index 5 address 0x(no-addr) net 1 mtu 68 down [1]: https://github.com/CodeConstruct/mctp Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- drivers/net/mctp/Kconfig | 18 ++ drivers/net/mctp/Makefile | 1 + drivers/net/mctp/mctp-serial.c | 510 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/tty.h | 1 + 4 files changed, 530 insertions(+) create mode 100644 drivers/net/mctp/mctp-serial.c (limited to 'include/uapi/linux') diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig index d8f966cedc89..2929471395ae 100644 --- a/drivers/net/mctp/Kconfig +++ b/drivers/net/mctp/Kconfig @@ -3,6 +3,24 @@ if MCTP menu "MCTP Device Drivers" +config MCTP_SERIAL + tristate "MCTP serial transport" + depends on TTY + select CRC_CCITT + help + This driver provides an MCTP-over-serial interface, through a + serial line-discipline, as defined by DMTF specification "DSP0253 - + MCTP Serial Transport Binding". By attaching the ldisc to a serial + device, we get a new net device to transport MCTP packets. + + This allows communication with external MCTP endpoints which use + serial as their transport. It can also be used as an easy way to + provide MCTP connectivity between virtual machines, by forwarding + data between simple virtual serial devices. + + Say y here if you need to connect to MCTP endpoints over serial. To + compile as a module, use m; the module will be called mctp-serial. + endmenu endif diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile index e69de29bb2d1..d32622613ce4 100644 --- a/drivers/net/mctp/Makefile +++ b/drivers/net/mctp/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_MCTP_SERIAL) += mctp-serial.o diff --git a/drivers/net/mctp/mctp-serial.c b/drivers/net/mctp/mctp-serial.c new file mode 100644 index 000000000000..9ac0e187f36e --- /dev/null +++ b/drivers/net/mctp/mctp-serial.c @@ -0,0 +1,510 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Management Component Transport Protocol (MCTP) - serial transport + * binding. This driver is an implementation of the DMTF specificiation + * "DSP0253 - Management Component Transport Protocol (MCTP) Serial Transport + * Binding", available at: + * + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0253_1.0.0.pdf + * + * This driver provides DSP0253-type MCTP-over-serial transport using a Linux + * tty device, by setting the N_MCTP line discipline on the tty. + * + * Copyright (c) 2021 Code Construct + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define MCTP_SERIAL_MTU 68 /* base mtu (64) + mctp header */ +#define MCTP_SERIAL_FRAME_MTU (MCTP_SERIAL_MTU + 6) /* + serial framing */ + +#define MCTP_SERIAL_VERSION 0x1 /* DSP0253 defines a single version: 1 */ + +#define BUFSIZE MCTP_SERIAL_FRAME_MTU + +#define BYTE_FRAME 0x7e +#define BYTE_ESC 0x7d + +static DEFINE_IDA(mctp_serial_ida); + +enum mctp_serial_state { + STATE_IDLE, + STATE_START, + STATE_HEADER, + STATE_DATA, + STATE_ESCAPE, + STATE_TRAILER, + STATE_DONE, + STATE_ERR, +}; + +struct mctp_serial { + struct net_device *netdev; + struct tty_struct *tty; + + int idx; + + /* protects our rx & tx state machines; held during both paths */ + spinlock_t lock; + + struct work_struct tx_work; + enum mctp_serial_state txstate, rxstate; + u16 txfcs, rxfcs, rxfcs_rcvd; + unsigned int txlen, rxlen; + unsigned int txpos, rxpos; + unsigned char txbuf[BUFSIZE], + rxbuf[BUFSIZE]; +}; + +static bool needs_escape(unsigned char c) +{ + return c == BYTE_ESC || c == BYTE_FRAME; +} + +static int next_chunk_len(struct mctp_serial *dev) +{ + int i; + + /* either we have no bytes to send ... */ + if (dev->txpos == dev->txlen) + return 0; + + /* ... or the next byte to send is an escaped byte; requiring a + * single-byte chunk... + */ + if (needs_escape(dev->txbuf[dev->txpos])) + return 1; + + /* ... or we have one or more bytes up to the next escape - this chunk + * will be those non-escaped bytes, and does not include the escaped + * byte. + */ + for (i = 1; i + dev->txpos + 1 < dev->txlen; i++) { + if (needs_escape(dev->txbuf[dev->txpos + i + 1])) + break; + } + + return i; +} + +static int write_chunk(struct mctp_serial *dev, unsigned char *buf, int len) +{ + return dev->tty->ops->write(dev->tty, buf, len); +} + +static void mctp_serial_tx_work(struct work_struct *work) +{ + struct mctp_serial *dev = container_of(work, struct mctp_serial, + tx_work); + unsigned char c, buf[3]; + unsigned long flags; + int len, txlen; + + spin_lock_irqsave(&dev->lock, flags); + + /* txstate represents the next thing to send */ + switch (dev->txstate) { + case STATE_START: + dev->txpos = 0; + fallthrough; + case STATE_HEADER: + buf[0] = BYTE_FRAME; + buf[1] = MCTP_SERIAL_VERSION; + buf[2] = dev->txlen; + + if (!dev->txpos) + dev->txfcs = crc_ccitt(0, buf + 1, 2); + + txlen = write_chunk(dev, buf + dev->txpos, 3 - dev->txpos); + if (txlen <= 0) { + dev->txstate = STATE_ERR; + } else { + dev->txpos += txlen; + if (dev->txpos == 3) { + dev->txstate = STATE_DATA; + dev->txpos = 0; + } + } + break; + + case STATE_ESCAPE: + buf[0] = dev->txbuf[dev->txpos] & ~0x20; + txlen = write_chunk(dev, buf, 1); + if (txlen <= 0) { + dev->txstate = STATE_ERR; + } else { + dev->txpos += txlen; + if (dev->txpos == dev->txlen) { + dev->txstate = STATE_TRAILER; + dev->txpos = 0; + } + } + + break; + + case STATE_DATA: + len = next_chunk_len(dev); + if (len) { + c = dev->txbuf[dev->txpos]; + if (len == 1 && needs_escape(c)) { + buf[0] = BYTE_ESC; + buf[1] = c & ~0x20; + dev->txfcs = crc_ccitt_byte(dev->txfcs, c); + txlen = write_chunk(dev, buf, 2); + if (txlen == 2) + dev->txpos++; + else if (txlen == 1) + dev->txstate = STATE_ESCAPE; + else + dev->txstate = STATE_ERR; + } else { + txlen = write_chunk(dev, + dev->txbuf + dev->txpos, + len); + if (txlen <= 0) { + dev->txstate = STATE_ERR; + } else { + dev->txfcs = crc_ccitt(dev->txfcs, + dev->txbuf + + dev->txpos, + txlen); + dev->txpos += txlen; + } + } + if (dev->txstate == STATE_DATA && + dev->txpos == dev->txlen) { + dev->txstate = STATE_TRAILER; + dev->txpos = 0; + } + break; + } + dev->txstate = STATE_TRAILER; + dev->txpos = 0; + fallthrough; + + case STATE_TRAILER: + buf[0] = dev->txfcs >> 8; + buf[1] = dev->txfcs & 0xff; + buf[2] = BYTE_FRAME; + txlen = write_chunk(dev, buf + dev->txpos, 3 - dev->txpos); + if (txlen <= 0) { + dev->txstate = STATE_ERR; + } else { + dev->txpos += txlen; + if (dev->txpos == 3) { + dev->txstate = STATE_DONE; + dev->txpos = 0; + } + } + break; + default: + netdev_err_once(dev->netdev, "invalid tx state %d\n", + dev->txstate); + } + + if (dev->txstate == STATE_DONE) { + dev->netdev->stats.tx_packets++; + dev->netdev->stats.tx_bytes += dev->txlen; + dev->txlen = 0; + dev->txpos = 0; + clear_bit(TTY_DO_WRITE_WAKEUP, &dev->tty->flags); + dev->txstate = STATE_IDLE; + spin_unlock_irqrestore(&dev->lock, flags); + + netif_wake_queue(dev->netdev); + } else { + spin_unlock_irqrestore(&dev->lock, flags); + } +} + +static netdev_tx_t mctp_serial_tx(struct sk_buff *skb, struct net_device *ndev) +{ + struct mctp_serial *dev = netdev_priv(ndev); + unsigned long flags; + + WARN_ON(dev->txstate != STATE_IDLE); + + if (skb->len > MCTP_SERIAL_MTU) { + dev->netdev->stats.tx_dropped++; + goto out; + } + + spin_lock_irqsave(&dev->lock, flags); + netif_stop_queue(dev->netdev); + skb_copy_bits(skb, 0, dev->txbuf, skb->len); + dev->txpos = 0; + dev->txlen = skb->len; + dev->txstate = STATE_START; + spin_unlock_irqrestore(&dev->lock, flags); + + set_bit(TTY_DO_WRITE_WAKEUP, &dev->tty->flags); + schedule_work(&dev->tx_work); + +out: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static void mctp_serial_tty_write_wakeup(struct tty_struct *tty) +{ + struct mctp_serial *dev = tty->disc_data; + + schedule_work(&dev->tx_work); +} + +static void mctp_serial_rx(struct mctp_serial *dev) +{ + struct mctp_skb_cb *cb; + struct sk_buff *skb; + + if (dev->rxfcs != dev->rxfcs_rcvd) { + dev->netdev->stats.rx_dropped++; + dev->netdev->stats.rx_crc_errors++; + return; + } + + skb = netdev_alloc_skb(dev->netdev, dev->rxlen); + if (!skb) { + dev->netdev->stats.rx_dropped++; + return; + } + + skb->protocol = htons(ETH_P_MCTP); + skb_put_data(skb, dev->rxbuf, dev->rxlen); + skb_reset_network_header(skb); + + cb = __mctp_cb(skb); + cb->halen = 0; + + netif_rx_ni(skb); + dev->netdev->stats.rx_packets++; + dev->netdev->stats.rx_bytes += dev->rxlen; +} + +static void mctp_serial_push_header(struct mctp_serial *dev, unsigned char c) +{ + switch (dev->rxpos) { + case 0: + if (c == BYTE_FRAME) + dev->rxpos++; + else + dev->rxstate = STATE_ERR; + break; + case 1: + if (c == MCTP_SERIAL_VERSION) { + dev->rxpos++; + dev->rxfcs = crc_ccitt_byte(0, c); + } else { + dev->rxstate = STATE_ERR; + } + break; + case 2: + if (c > MCTP_SERIAL_FRAME_MTU) { + dev->rxstate = STATE_ERR; + } else { + dev->rxlen = c; + dev->rxpos = 0; + dev->rxstate = STATE_DATA; + dev->rxfcs = crc_ccitt_byte(dev->rxfcs, c); + } + break; + } +} + +static void mctp_serial_push_trailer(struct mctp_serial *dev, unsigned char c) +{ + switch (dev->rxpos) { + case 0: + dev->rxfcs_rcvd = c << 8; + dev->rxpos++; + break; + case 1: + dev->rxfcs_rcvd |= c; + dev->rxpos++; + break; + case 2: + if (c != BYTE_FRAME) { + dev->rxstate = STATE_ERR; + } else { + mctp_serial_rx(dev); + dev->rxlen = 0; + dev->rxpos = 0; + dev->rxstate = STATE_IDLE; + } + break; + } +} + +static void mctp_serial_push(struct mctp_serial *dev, unsigned char c) +{ + switch (dev->rxstate) { + case STATE_IDLE: + dev->rxstate = STATE_HEADER; + fallthrough; + case STATE_HEADER: + mctp_serial_push_header(dev, c); + break; + + case STATE_ESCAPE: + c |= 0x20; + fallthrough; + case STATE_DATA: + if (dev->rxstate != STATE_ESCAPE && c == BYTE_ESC) { + dev->rxstate = STATE_ESCAPE; + } else { + dev->rxfcs = crc_ccitt_byte(dev->rxfcs, c); + dev->rxbuf[dev->rxpos] = c; + dev->rxpos++; + dev->rxstate = STATE_DATA; + if (dev->rxpos == dev->rxlen) { + dev->rxpos = 0; + dev->rxstate = STATE_TRAILER; + } + } + break; + + case STATE_TRAILER: + mctp_serial_push_trailer(dev, c); + break; + + case STATE_ERR: + if (c == BYTE_FRAME) + dev->rxstate = STATE_IDLE; + break; + + default: + netdev_err_once(dev->netdev, "invalid rx state %d\n", + dev->rxstate); + } +} + +static void mctp_serial_tty_receive_buf(struct tty_struct *tty, + const unsigned char *c, + const char *f, int len) +{ + struct mctp_serial *dev = tty->disc_data; + int i; + + if (!netif_running(dev->netdev)) + return; + + /* we don't (currently) use the flag bytes, just data. */ + for (i = 0; i < len; i++) + mctp_serial_push(dev, c[i]); +} + +static const struct net_device_ops mctp_serial_netdev_ops = { + .ndo_start_xmit = mctp_serial_tx, +}; + +static void mctp_serial_setup(struct net_device *ndev) +{ + ndev->type = ARPHRD_MCTP; + ndev->mtu = MCTP_SERIAL_MTU; + ndev->hard_header_len = 0; + ndev->addr_len = 0; + ndev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; + ndev->flags = IFF_NOARP; + ndev->netdev_ops = &mctp_serial_netdev_ops; + ndev->needs_free_netdev = true; +} + +static int mctp_serial_open(struct tty_struct *tty) +{ + struct mctp_serial *dev; + struct net_device *ndev; + char name[32]; + int idx, rc; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!tty->ops->write) + return -EOPNOTSUPP; + + if (tty->disc_data) + return -EEXIST; + + idx = ida_alloc(&mctp_serial_ida, GFP_KERNEL); + if (idx < 0) + return idx; + + snprintf(name, sizeof(name), "mctpserial%d", idx); + ndev = alloc_netdev(sizeof(*dev), name, NET_NAME_ENUM, + mctp_serial_setup); + if (!ndev) { + rc = -ENOMEM; + goto free_ida; + } + + dev = netdev_priv(ndev); + dev->idx = idx; + dev->tty = tty; + dev->netdev = ndev; + dev->txstate = STATE_IDLE; + dev->rxstate = STATE_IDLE; + spin_lock_init(&dev->lock); + INIT_WORK(&dev->tx_work, mctp_serial_tx_work); + + rc = register_netdev(ndev); + if (rc) + goto free_netdev; + + tty->receive_room = 64 * 1024; + tty->disc_data = dev; + + return 0; + +free_netdev: + free_netdev(ndev); + +free_ida: + ida_free(&mctp_serial_ida, idx); + return rc; +} + +static void mctp_serial_close(struct tty_struct *tty) +{ + struct mctp_serial *dev = tty->disc_data; + int idx = dev->idx; + + unregister_netdev(dev->netdev); + ida_free(&mctp_serial_ida, idx); +} + +static struct tty_ldisc_ops mctp_ldisc = { + .owner = THIS_MODULE, + .num = N_MCTP, + .name = "mctp", + .open = mctp_serial_open, + .close = mctp_serial_close, + .receive_buf = mctp_serial_tty_receive_buf, + .write_wakeup = mctp_serial_tty_write_wakeup, +}; + +static int __init mctp_serial_init(void) +{ + return tty_register_ldisc(&mctp_ldisc); +} + +static void __exit mctp_serial_exit(void) +{ + tty_unregister_ldisc(&mctp_ldisc); +} + +module_init(mctp_serial_init); +module_exit(mctp_serial_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jeremy Kerr "); +MODULE_DESCRIPTION("MCTP Serial transport"); diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h index 376cccf397be..a58deb3061eb 100644 --- a/include/uapi/linux/tty.h +++ b/include/uapi/linux/tty.h @@ -38,5 +38,6 @@ #define N_NCI 25 /* NFC NCI UART */ #define N_SPEAKUP 26 /* Speakup communication with synths */ #define N_NULL 27 /* Null ldisc used for error handling */ +#define N_MCTP 28 /* MCTP-over-serial */ #endif /* _UAPI_LINUX_TTY_H */ -- cgit v1.2.3 From 04c76b41ca974b508522831441dd7e5b1b59cbb0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 10 Nov 2021 15:49:32 +0000 Subject: io_uring: add option to skip CQE posting Emitting a CQE is expensive from the kernel perspective. Often, it's also not convenient for the userspace, spends some cycles on processing and just complicates the logic. A similar problems goes for linked requests, where we post an CQE for each request in the link. Introduce a new flags, IOSQE_CQE_SKIP_SUCCESS, trying to help with it. When set and a request completed successfully, it won't generate a CQE. When fails, it produces an CQE, but all following linked requests will be CQE-less, regardless whether they have IOSQE_CQE_SKIP_SUCCESS or not. The notion of "fail" is the same as for link failing-cancellation, where it's opcode dependent, and _usually_ result >= 0 is a success, but not always. Linked timeouts are a bit special. When the requests it's linked to was not attempted to be executed, e.g. failing linked requests, it follows the description above. Otherwise, whether a linked timeout will post a completion or not solely depends on IOSQE_CQE_SKIP_SUCCESS of that linked timeout request. Linked timeout never "fail" during execution, so for them it's unconditional. It's expected for users to not really care about the result of it but rely solely on the result of the master request. Another reason for such a treatment is that it's racy, and the timeout callback may be running awhile the master request posts its completion. use case 1: If one doesn't care about results of some requests, e.g. normal timeouts, just set IOSQE_CQE_SKIP_SUCCESS. Error result will still be posted and need to be handled. use case 2: Set IOSQE_CQE_SKIP_SUCCESS for all requests of a link but the last, and it'll post a completion only for the last one if everything goes right, otherwise there will be one only one CQE for the first failed request. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0220fbe06f7cf99e6fc71b4297bb1cb6c0e89c2c.1636559119.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 42 +++++++++++++++++++++++++++++++++--------- include/uapi/linux/io_uring.h | 4 ++++ 2 files changed, 37 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/io_uring.c b/fs/io_uring.c index 2a89928ac1ba..7d3589e3a277 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -106,7 +106,8 @@ #define IORING_MAX_REG_BUFFERS (1U << 14) #define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \ - IOSQE_IO_HARDLINK | IOSQE_ASYNC) + IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ + IOSQE_CQE_SKIP_SUCCESS) #define SQE_VALID_FLAGS (SQE_COMMON_FLAGS|IOSQE_BUFFER_SELECT|IOSQE_IO_DRAIN) @@ -721,6 +722,7 @@ enum { REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, + REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, /* first byte is taken by user flags, shift it to not overlap */ REQ_F_FAIL_BIT = 8, @@ -737,6 +739,7 @@ enum { REQ_F_REFCOUNT_BIT, REQ_F_ARM_LTIMEOUT_BIT, REQ_F_ASYNC_DATA_BIT, + REQ_F_SKIP_LINK_CQES_BIT, /* keep async read/write and isreg together and in order */ REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, @@ -758,6 +761,8 @@ enum { REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), /* IOSQE_BUFFER_SELECT */ REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + /* IOSQE_CQE_SKIP_SUCCESS */ + REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), /* fail rest of links */ REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), @@ -791,6 +796,8 @@ enum { REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), /* ->async_data allocated */ REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), + /* don't post CQEs while failing linked requests */ + REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), }; struct async_poll { @@ -1301,6 +1308,10 @@ static inline bool req_has_async_data(struct io_kiocb *req) static inline void req_set_fail(struct io_kiocb *req) { req->flags |= REQ_F_FAIL; + if (req->flags & REQ_F_CQE_SKIP) { + req->flags &= ~REQ_F_CQE_SKIP; + req->flags |= REQ_F_SKIP_LINK_CQES; + } } static inline void req_fail_link_node(struct io_kiocb *req, int res) @@ -1843,7 +1854,8 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data, static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags) { - __io_fill_cqe(req->ctx, req->user_data, res, cflags); + if (!(req->flags & REQ_F_CQE_SKIP)) + __io_fill_cqe(req->ctx, req->user_data, res, cflags); } static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, @@ -1859,7 +1871,8 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res, struct io_ring_ctx *ctx = req->ctx; spin_lock(&ctx->completion_lock); - __io_fill_cqe(ctx, req->user_data, res, cflags); + if (!(req->flags & REQ_F_CQE_SKIP)) + __io_fill_cqe(ctx, req->user_data, res, cflags); /* * If we're the last reference to this request, add to our locked * free_list cache. @@ -2067,6 +2080,7 @@ static bool io_kill_linked_timeout(struct io_kiocb *req) link->timeout.head = NULL; if (hrtimer_try_to_cancel(&io->timer) != -1) { list_del(&link->timeout.list); + /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */ io_fill_cqe_req(link, -ECANCELED, 0); io_put_req_deferred(link); return true; @@ -2079,6 +2093,7 @@ static void io_fail_links(struct io_kiocb *req) __must_hold(&req->ctx->completion_lock) { struct io_kiocb *nxt, *link = req->link; + bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; req->link = NULL; while (link) { @@ -2091,7 +2106,10 @@ static void io_fail_links(struct io_kiocb *req) link->link = NULL; trace_io_uring_fail_link(req, link); - io_fill_cqe_req(link, res, 0); + if (!ignore_cqes) { + link->flags &= ~REQ_F_CQE_SKIP; + io_fill_cqe_req(link, res, 0); + } io_put_req_deferred(link); link = nxt; } @@ -2108,6 +2126,7 @@ static bool io_disarm_next(struct io_kiocb *req) req->flags &= ~REQ_F_ARM_LTIMEOUT; if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { io_remove_next_linked(req); + /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */ io_fill_cqe_req(link, -ECANCELED, 0); io_put_req_deferred(link); posted = true; @@ -2372,8 +2391,9 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx) struct io_kiocb *req = container_of(node, struct io_kiocb, comp_list); - __io_fill_cqe(ctx, req->user_data, req->result, - req->cflags); + if (!(req->flags & REQ_F_CQE_SKIP)) + __io_fill_cqe(ctx, req->user_data, req->result, + req->cflags); } io_commit_cqring(ctx); spin_unlock(&ctx->completion_lock); @@ -2503,12 +2523,14 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) prev = start; wq_list_for_each_resume(pos, prev) { struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); + u32 cflags; /* order with io_complete_rw_iopoll(), e.g. ->result updates */ if (!smp_load_acquire(&req->iopoll_completed)) break; - __io_fill_cqe(ctx, req->user_data, req->result, - io_put_rw_kbuf(req)); + cflags = io_put_rw_kbuf(req); + if (!(req->flags & REQ_F_CQE_SKIP)) + __io_fill_cqe(ctx, req->user_data, req->result, cflags); nr_events++; } @@ -5832,6 +5854,8 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe flags = READ_ONCE(sqe->len); if (flags & ~IORING_POLL_ADD_MULTI) return -EINVAL; + if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) + return -EINVAL; io_req_set_refcount(req); poll->events = io_poll_parse_events(sqe, flags); @@ -10442,7 +10466,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | - IORING_FEAT_RSRC_TAGS; + IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index c45b5e9a9387..787f491f0d2a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -70,6 +70,7 @@ enum { IOSQE_IO_HARDLINK_BIT, IOSQE_ASYNC_BIT, IOSQE_BUFFER_SELECT_BIT, + IOSQE_CQE_SKIP_SUCCESS_BIT, }; /* @@ -87,6 +88,8 @@ enum { #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) /* select buffer from sqe->buf_group */ #define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) +/* don't post CQE if request succeeded */ +#define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT) /* * io_uring_setup() flags @@ -289,6 +292,7 @@ struct io_uring_params { #define IORING_FEAT_EXT_ARG (1U << 8) #define IORING_FEAT_NATIVE_WORKERS (1U << 9) #define IORING_FEAT_RSRC_TAGS (1U << 10) +#define IORING_FEAT_CQE_SKIP (1U << 11) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3 From 53db28933e952a8536b002ba8b8c9443ccc0e939 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 25 Nov 2021 14:05:18 +0100 Subject: fuse: extend init flags FUSE_INIT flags are close to running out, so add another 32bits worth of space. Add FUSE_INIT_EXT flag to the old flags field in fuse_init_in. If this flag is set, then fuse_init_in is extended by 48bytes, in which a flags_hi field is allocated to contain the high 32bits of the flags. A flags_hi field is also added to fuse_init_out, allocated out of the remaining unused fields. Known userspace implementations of the fuse protocol have been checked to accept the extended FUSE_INIT request, but this might cause problems with other implementations. If that happens to be the case, the protocol negotiation will have to be extended with an extra initialization request roundtrip. Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 60 ++++++++++++++++++++++++++--------------------- include/uapi/linux/fuse.h | 16 +++++++++++-- 2 files changed, 47 insertions(+), 29 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 8b89e3ba7df3..5a1dad8c1f92 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1109,72 +1109,74 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, process_init_limits(fc, arg); if (arg->minor >= 6) { + u64 flags = arg->flags | (u64) arg->flags2 << 32; + ra_pages = arg->max_readahead / PAGE_SIZE; - if (arg->flags & FUSE_ASYNC_READ) + if (flags & FUSE_ASYNC_READ) fc->async_read = 1; - if (!(arg->flags & FUSE_POSIX_LOCKS)) + if (!(flags & FUSE_POSIX_LOCKS)) fc->no_lock = 1; if (arg->minor >= 17) { - if (!(arg->flags & FUSE_FLOCK_LOCKS)) + if (!(flags & FUSE_FLOCK_LOCKS)) fc->no_flock = 1; } else { - if (!(arg->flags & FUSE_POSIX_LOCKS)) + if (!(flags & FUSE_POSIX_LOCKS)) fc->no_flock = 1; } - if (arg->flags & FUSE_ATOMIC_O_TRUNC) + if (flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; if (arg->minor >= 9) { /* LOOKUP has dependency on proto version */ - if (arg->flags & FUSE_EXPORT_SUPPORT) + if (flags & FUSE_EXPORT_SUPPORT) fc->export_support = 1; } - if (arg->flags & FUSE_BIG_WRITES) + if (flags & FUSE_BIG_WRITES) fc->big_writes = 1; - if (arg->flags & FUSE_DONT_MASK) + if (flags & FUSE_DONT_MASK) fc->dont_mask = 1; - if (arg->flags & FUSE_AUTO_INVAL_DATA) + if (flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; - else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA) + else if (flags & FUSE_EXPLICIT_INVAL_DATA) fc->explicit_inval_data = 1; - if (arg->flags & FUSE_DO_READDIRPLUS) { + if (flags & FUSE_DO_READDIRPLUS) { fc->do_readdirplus = 1; - if (arg->flags & FUSE_READDIRPLUS_AUTO) + if (flags & FUSE_READDIRPLUS_AUTO) fc->readdirplus_auto = 1; } - if (arg->flags & FUSE_ASYNC_DIO) + if (flags & FUSE_ASYNC_DIO) fc->async_dio = 1; - if (arg->flags & FUSE_WRITEBACK_CACHE) + if (flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; - if (arg->flags & FUSE_PARALLEL_DIROPS) + if (flags & FUSE_PARALLEL_DIROPS) fc->parallel_dirops = 1; - if (arg->flags & FUSE_HANDLE_KILLPRIV) + if (flags & FUSE_HANDLE_KILLPRIV) fc->handle_killpriv = 1; if (arg->time_gran && arg->time_gran <= 1000000000) fm->sb->s_time_gran = arg->time_gran; - if ((arg->flags & FUSE_POSIX_ACL)) { + if ((flags & FUSE_POSIX_ACL)) { fc->default_permissions = 1; fc->posix_acl = 1; fm->sb->s_xattr = fuse_acl_xattr_handlers; } - if (arg->flags & FUSE_CACHE_SYMLINKS) + if (flags & FUSE_CACHE_SYMLINKS) fc->cache_symlinks = 1; - if (arg->flags & FUSE_ABORT_ERROR) + if (flags & FUSE_ABORT_ERROR) fc->abort_err = 1; - if (arg->flags & FUSE_MAX_PAGES) { + if (flags & FUSE_MAX_PAGES) { fc->max_pages = min_t(unsigned int, fc->max_pages_limit, max_t(unsigned int, arg->max_pages, 1)); } if (IS_ENABLED(CONFIG_FUSE_DAX) && - arg->flags & FUSE_MAP_ALIGNMENT && + flags & FUSE_MAP_ALIGNMENT && !fuse_dax_check_alignment(fc, arg->map_alignment)) { ok = false; } - if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) { + if (flags & FUSE_HANDLE_KILLPRIV_V2) { fc->handle_killpriv_v2 = 1; fm->sb->s_flags |= SB_NOSEC; } - if (arg->flags & FUSE_SETXATTR_EXT) + if (flags & FUSE_SETXATTR_EXT) fc->setxattr_ext = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; @@ -1203,13 +1205,14 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, void fuse_send_init(struct fuse_mount *fm) { struct fuse_init_args *ia; + u64 flags; ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL); ia->in.major = FUSE_KERNEL_VERSION; ia->in.minor = FUSE_KERNEL_MINOR_VERSION; ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; - ia->in.flags |= + flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | @@ -1219,13 +1222,16 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | - FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT; + FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) - ia->in.flags |= FUSE_MAP_ALIGNMENT; + flags |= FUSE_MAP_ALIGNMENT; #endif if (fm->fc->auto_submounts) - ia->in.flags |= FUSE_SUBMOUNTS; + flags |= FUSE_SUBMOUNTS; + + ia->in.flags = flags; + ia->in.flags2 = flags >> 32; ia->args.opcode = FUSE_INIT; ia->args.in_numargs = 1; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index a1dc3ee1d17c..980f3998c11b 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -187,6 +187,10 @@ * * 7.35 * - add FOPEN_NOFLUSH + * + * 7.36 + * - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag + * - add flags2 to fuse_init_in and fuse_init_out */ #ifndef _LINUX_FUSE_H @@ -222,7 +226,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 35 +#define FUSE_KERNEL_MINOR_VERSION 36 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -341,6 +345,8 @@ struct fuse_file_lock { * write/truncate sgid is killed only if file has group * execute permission. (Same as Linux VFS behavior). * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in + * FUSE_INIT_EXT: extended fuse_init_in request + * FUSE_INIT_RESERVED: reserved, do not use */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -372,6 +378,9 @@ struct fuse_file_lock { #define FUSE_SUBMOUNTS (1 << 27) #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) #define FUSE_SETXATTR_EXT (1 << 29) +#define FUSE_INIT_EXT (1 << 30) +#define FUSE_INIT_RESERVED (1 << 31) +/* bits 32..63 get shifted down 32 bits into the flags2 field */ /** * CUSE INIT request/reply flags @@ -741,6 +750,8 @@ struct fuse_init_in { uint32_t minor; uint32_t max_readahead; uint32_t flags; + uint32_t flags2; + uint32_t unused[11]; }; #define FUSE_COMPAT_INIT_OUT_SIZE 8 @@ -757,7 +768,8 @@ struct fuse_init_out { uint32_t time_gran; uint16_t max_pages; uint16_t map_alignment; - uint32_t unused[8]; + uint32_t flags2; + uint32_t unused[7]; }; #define CUSE_INIT_INFO_MAX 4096 -- cgit v1.2.3 From 3e2b6fdbdc9ab5a02d9d5676a005f30780b97553 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 11 Nov 2021 09:32:49 -0500 Subject: fuse: send security context of inode on file When a new inode is created, send its security context to server along with creation request (FUSE_CREAT, FUSE_MKNOD, FUSE_MKDIR and FUSE_SYMLINK). This gives server an opportunity to create new file and set security context (possibly atomically). In all the configurations it might not be possible to set context atomically. Like nfs and ceph, use security_dentry_init_security() to dermine security context of inode and send it with create, mkdir, mknod, and symlink requests. Following is the information sent to server. fuse_sectx_header, fuse_secctx, xattr_name, security_context - struct fuse_secctx_header This contains total number of security contexts being sent and total size of all the security contexts (including size of fuse_secctx_header). - struct fuse_secctx This contains size of security context which follows this structure. There is one fuse_secctx instance per security context. - xattr name string This string represents name of xattr which should be used while setting security context. - security context This is the actual security context whose size is specified in fuse_secctx struct. Also add the FUSE_SECURITY_CTX flag for the `flags` field of the fuse_init_out struct. When this flag is set the kernel will append the security context for a newly created inode to the request (create, mkdir, mknod, and symlink). The server is responsible for ensuring that the inode appears atomically (preferrably) with the requested security context. For example, If the server is using SELinux and backed by a "real" linux file system that supports extended attributes it can write the security context value to /proc/thread-self/attr/fscreate before making the syscall to create the inode. This patch is based on patch from Chirantan Ekbote Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 3 ++ fs/fuse/inode.c | 5 ++- include/uapi/linux/fuse.h | 34 ++++++++++++++++-- 4 files changed, 130 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0654bfedcbb0..656e921f3506 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include static void fuse_advise_use_readdirplus(struct inode *dir) { @@ -456,6 +459,62 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, return ERR_PTR(err); } +static int get_security_context(struct dentry *entry, umode_t mode, + void **security_ctx, u32 *security_ctxlen) +{ + struct fuse_secctx *fctx; + struct fuse_secctx_header *header; + void *ctx = NULL, *ptr; + u32 ctxlen, total_len = sizeof(*header); + int err, nr_ctx = 0; + const char *name; + size_t namelen; + + err = security_dentry_init_security(entry, mode, &entry->d_name, + &name, &ctx, &ctxlen); + if (err) { + if (err != -EOPNOTSUPP) + goto out_err; + /* No LSM is supporting this security hook. Ignore error */ + ctxlen = 0; + ctx = NULL; + } + + if (ctxlen) { + nr_ctx = 1; + namelen = strlen(name) + 1; + err = -EIO; + if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX)) + goto out_err; + total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen); + } + + err = -ENOMEM; + header = ptr = kzalloc(total_len, GFP_KERNEL); + if (!ptr) + goto out_err; + + header->nr_secctx = nr_ctx; + header->size = total_len; + ptr += sizeof(*header); + if (nr_ctx) { + fctx = ptr; + fctx->size = ctxlen; + ptr += sizeof(*fctx); + + strcpy(ptr, name); + ptr += namelen; + + memcpy(ptr, ctx, ctxlen); + } + *security_ctxlen = total_len; + *security_ctx = header; + err = 0; +out_err: + kfree(ctx); + return err; +} + /* * Atomic create+open operation * @@ -476,6 +535,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_entry_out outentry; struct fuse_inode *fi; struct fuse_file *ff; + void *security_ctx = NULL; + u32 security_ctxlen; /* Userspace expects S_IFREG in create mode */ BUG_ON((mode & S_IFMT) != S_IFREG); @@ -517,7 +578,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.out_args[0].value = &outentry; args.out_args[1].size = sizeof(outopen); args.out_args[1].value = &outopen; + + if (fm->fc->init_security) { + err = get_security_context(entry, mode, &security_ctx, + &security_ctxlen); + if (err) + goto out_put_forget_req; + + args.in_numargs = 3; + args.in_args[2].size = security_ctxlen; + args.in_args[2].value = security_ctx; + } + err = fuse_simple_request(fm, &args); + kfree(security_ctx); if (err) goto out_free_ff; @@ -620,6 +694,8 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, struct dentry *d; int err; struct fuse_forget_link *forget; + void *security_ctx = NULL; + u32 security_ctxlen; if (fuse_is_bad(dir)) return -EIO; @@ -633,7 +709,22 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, args->out_numargs = 1; args->out_args[0].size = sizeof(outarg); args->out_args[0].value = &outarg; + + if (fm->fc->init_security && args->opcode != FUSE_LINK) { + err = get_security_context(entry, mode, &security_ctx, + &security_ctxlen); + if (err) + goto out_put_forget_req; + + BUG_ON(args->in_numargs != 2); + + args->in_numargs = 3; + args->in_args[2].size = security_ctxlen; + args->in_args[2].value = security_ctx; + } + err = fuse_simple_request(fm, args); + kfree(security_ctx); if (err) goto out_put_forget_req; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 198637b41e19..c1a8b313e6ed 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -765,6 +765,9 @@ struct fuse_conn { /* Propagate syncfs() to server */ unsigned int sync_fs:1; + /* Initialize security xattrs when creating a new inode */ + unsigned int init_security:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 5a1dad8c1f92..63ab45427de5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1178,6 +1178,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, } if (flags & FUSE_SETXATTR_EXT) fc->setxattr_ext = 1; + if (flags & FUSE_SECURITY_CTX) + fc->init_security = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1222,7 +1224,8 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | - FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT; + FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | + FUSE_SECURITY_CTX; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 980f3998c11b..3f0ea63fec08 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -191,6 +191,8 @@ * 7.36 * - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag * - add flags2 to fuse_init_in and fuse_init_out + * - add FUSE_SECURITY_CTX init flag + * - add security context to create, mkdir, symlink, and mknod requests */ #ifndef _LINUX_FUSE_H @@ -347,6 +349,8 @@ struct fuse_file_lock { * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in * FUSE_INIT_EXT: extended fuse_init_in request * FUSE_INIT_RESERVED: reserved, do not use + * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and + * mknod */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -381,6 +385,7 @@ struct fuse_file_lock { #define FUSE_INIT_EXT (1 << 30) #define FUSE_INIT_RESERVED (1 << 31) /* bits 32..63 get shifted down 32 bits into the flags2 field */ +#define FUSE_SECURITY_CTX (1ULL << 32) /** * CUSE INIT request/reply flags @@ -877,9 +882,12 @@ struct fuse_dirent { char name[]; }; -#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) -#define FUSE_DIRENT_ALIGN(x) \ +/* Align variable length records to 64bit boundary */ +#define FUSE_REC_ALIGN(x) \ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) + +#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +#define FUSE_DIRENT_ALIGN(x) FUSE_REC_ALIGN(x) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) @@ -996,4 +1004,26 @@ struct fuse_syncfs_in { uint64_t padding; }; +/* + * For each security context, send fuse_secctx with size of security context + * fuse_secctx will be followed by security context name and this in turn + * will be followed by actual context label. + * fuse_secctx, name, context + */ +struct fuse_secctx { + uint32_t size; + uint32_t padding; +}; + +/* + * Contains the information about how many fuse_secctx structures are being + * sent and what's the total size of all security contexts (including + * size of fuse_secctx_header). + * + */ +struct fuse_secctx_header { + uint32_t size; + uint32_t nr_secctx; +}; + #endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From aeeecb889165617a841e939117f9a8095d0e7d80 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 28 Nov 2021 14:01:02 +0800 Subject: net: snmp: add statistics for tcp small queue check Once tcp small queue check failed in tcp_small_queue_check(), the throughput of tcp will be limited, and it's hard to distinguish whether it is out of tcp congestion control. Add statistics of LINUX_MIB_TCPSMALLQUEUEFAILURE for this scene. Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_output.c | 5 ++++- 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 904909d020e2..e32ec6932e82 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -292,6 +292,7 @@ enum LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ + LINUX_MIB_TCPSMALLQUEUEFAILURE, /* TCPSmallQueueFailure */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f30273afb539..43b7a77cd6b4 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -297,6 +297,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS), SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS), SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE), + SNMP_MIB_ITEM("TCPSmallQueueFailure", LINUX_MIB_TCPSMALLQUEUEFAILURE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5079832af5c1..c4ab6c8f0c77 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2524,8 +2524,11 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, * test again the condition. */ smp_mb__after_atomic(); - if (refcount_read(&sk->sk_wmem_alloc) > limit) + if (refcount_read(&sk->sk_wmem_alloc) > limit) { + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSMALLQUEUEFAILURE); return true; + } } return false; } -- cgit v1.2.3 From 5944b5abd8646e8c6ac6af2b55f87dede1dae898 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 30 Nov 2021 12:29:47 +0800 Subject: Bonding: add arp_missed_max option Currently, we use hard code number to verify if we are in the arp_interval timeslice. But some user may want to reduce/extend the verify timeslice. With the similar team option 'missed_max' the uers could change that number based on their own environment. Acked-by: Jay Vosburgh Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- Documentation/networking/bonding.rst | 11 +++++++++++ drivers/net/bonding/bond_main.c | 17 +++++++++-------- drivers/net/bonding/bond_netlink.c | 15 +++++++++++++++ drivers/net/bonding/bond_options.c | 28 ++++++++++++++++++++++++++++ drivers/net/bonding/bond_procfs.c | 2 ++ drivers/net/bonding/bond_sysfs.c | 13 +++++++++++++ include/net/bond_options.h | 1 + include/net/bonding.h | 1 + include/uapi/linux/if_link.h | 1 + tools/include/uapi/linux/if_link.h | 1 + 10 files changed, 82 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 31cfd7d674a6..e5a07cb48f68 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -421,6 +421,17 @@ arp_all_targets consider the slave up only when all of the arp_ip_targets are reachable +arp_missed_max + + Specifies the number of arp_interval monitor checks that must + fail in order for an interface to be marked down by the ARP monitor. + + In order to provide orderly failover semantics, backup interfaces + are permitted an extra monitor check (i.e., they must fail + arp_missed_max + 1 times before being marked down). + + The default value is 2, and the allowable range is 1 - 255. + downdelay Specifies the time, in milliseconds, to wait before disabling diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index cf73eacdda91..f3aa49b97f81 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3129,8 +3129,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) * when the source ip is 0, so don't take the link down * if we don't know our ip yet */ - if (!bond_time_in_interval(bond, trans_start, 2) || - !bond_time_in_interval(bond, slave->last_rx, 2)) { + if (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) || + !bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) { bond_propose_link_state(slave, BOND_LINK_DOWN); slave_state_changed = 1; @@ -3224,7 +3224,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) /* Backup slave is down if: * - No current_arp_slave AND - * - more than 3*delta since last receive AND + * - more than (missed_max+1)*delta since last receive AND * - the bond has an IP address * * Note: a non-null current_arp_slave indicates @@ -3236,20 +3236,20 @@ static int bond_ab_arp_inspect(struct bonding *bond) */ if (!bond_is_active_slave(slave) && !rcu_access_pointer(bond->current_arp_slave) && - !bond_time_in_interval(bond, last_rx, 3)) { + !bond_time_in_interval(bond, last_rx, bond->params.missed_max + 1)) { bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } /* Active slave is down if: - * - more than 2*delta since transmitting OR - * - (more than 2*delta since receive AND + * - more than missed_max*delta since transmitting OR + * - (more than missed_max*delta since receive AND * the bond has an IP address) */ trans_start = dev_trans_start(slave->dev); if (bond_is_active_slave(slave) && - (!bond_time_in_interval(bond, trans_start, 2) || - !bond_time_in_interval(bond, last_rx, 2))) { + (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) || + !bond_time_in_interval(bond, last_rx, bond->params.missed_max))) { bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } @@ -5822,6 +5822,7 @@ static int bond_check_params(struct bond_params *params) params->arp_interval = arp_interval; params->arp_validate = arp_validate_value; params->arp_all_targets = arp_all_targets_value; + params->missed_max = 2; params->updelay = updelay; params->downdelay = downdelay; params->peer_notif_delay = 0; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 5d54e11d18fa..1007bf6d385d 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -110,6 +110,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { .len = ETH_ALEN }, [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 }, [IFLA_BOND_PEER_NOTIF_DELAY] = { .type = NLA_U32 }, + [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { @@ -453,6 +454,15 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], return err; } + if (data[IFLA_BOND_MISSED_MAX]) { + int missed_max = nla_get_u8(data[IFLA_BOND_MISSED_MAX]); + + bond_opt_initval(&newval, missed_max); + err = __bond_opt_set(bond, BOND_OPT_MISSED_MAX, &newval); + if (err) + return err; + } + return 0; } @@ -515,6 +525,7 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_TLB_DYNAMIC_LB */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PEER_NOTIF_DELAY */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_MISSED_MAX */ 0; } @@ -650,6 +661,10 @@ static int bond_fill_info(struct sk_buff *skb, bond->params.tlb_dynamic_lb)) goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_MISSED_MAX, + bond->params.missed_max)) + goto nla_put_failure; + if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info info; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index a8fde3bc458f..0f48921c4f15 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -78,6 +78,8 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_user_port_key_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_missed_max_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { @@ -213,6 +215,13 @@ static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { { NULL, -1, 0}, }; +static const struct bond_opt_value bond_missed_max_tbl[] = { + { "minval", 1, BOND_VALFLAG_MIN}, + { "maxval", 255, BOND_VALFLAG_MAX}, + { "default", 2, BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0}, +}; + static const struct bond_option bond_opts[BOND_OPT_LAST] = { [BOND_OPT_MODE] = { .id = BOND_OPT_MODE, @@ -270,6 +279,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_intmax_tbl, .set = bond_option_arp_interval_set }, + [BOND_OPT_MISSED_MAX] = { + .id = BOND_OPT_MISSED_MAX, + .name = "arp_missed_max", + .desc = "Maximum number of missed ARP interval", + .unsuppmodes = BIT(BOND_MODE_8023AD) | BIT(BOND_MODE_TLB) | + BIT(BOND_MODE_ALB), + .values = bond_missed_max_tbl, + .set = bond_option_missed_max_set + }, [BOND_OPT_ARP_TARGETS] = { .id = BOND_OPT_ARP_TARGETS, .name = "arp_ip_target", @@ -1186,6 +1204,16 @@ static int bond_option_arp_all_targets_set(struct bonding *bond, return 0; } +static int bond_option_missed_max_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_dbg(bond->dev, "Setting missed max to %s (%llu)\n", + newval->string, newval->value); + bond->params.missed_max = newval->value; + + return 0; +} + static int bond_option_primary_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index f3e3bfd72556..2ec11af5f0cc 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -115,6 +115,8 @@ static void bond_info_show_master(struct seq_file *seq) seq_printf(seq, "ARP Polling Interval (ms): %d\n", bond->params.arp_interval); + seq_printf(seq, "ARP Missed Max: %u\n", + bond->params.missed_max); seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index c48b77167fab..9b5a5df23d21 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -303,6 +303,18 @@ static ssize_t bonding_show_arp_targets(struct device *d, static DEVICE_ATTR(arp_ip_target, 0644, bonding_show_arp_targets, bonding_sysfs_store_option); +/* Show the arp missed max. */ +static ssize_t bonding_show_missed_max(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + return sprintf(buf, "%u\n", bond->params.missed_max); +} +static DEVICE_ATTR(arp_missed_max, 0644, + bonding_show_missed_max, bonding_sysfs_store_option); + /* Show the up and down delays. */ static ssize_t bonding_show_downdelay(struct device *d, struct device_attribute *attr, @@ -779,6 +791,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_ad_actor_sys_prio.attr, &dev_attr_ad_actor_system.attr, &dev_attr_ad_user_port_key.attr, + &dev_attr_arp_missed_max.attr, NULL, }; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index e64833a674eb..dd75c071f67e 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -65,6 +65,7 @@ enum { BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_PEER_NOTIF_DELAY, BOND_OPT_LACP_ACTIVE, + BOND_OPT_MISSED_MAX, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 15e083e18f75..f6ae3a4baea4 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -121,6 +121,7 @@ struct bond_params { int xmit_policy; int miimon; u8 num_peer_notif; + u8 missed_max; int arp_interval; int arp_validate; int arp_all_targets; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index eebd3894fe89..4ac53b30b6dc 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -858,6 +858,7 @@ enum { IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, + IFLA_BOND_MISSED_MAX, __IFLA_BOND_MAX, }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index eebd3894fe89..4ac53b30b6dc 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -858,6 +858,7 @@ enum { IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, + IFLA_BOND_MISSED_MAX, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From e6f2dd0f80674e9d5960337b3e9c2a242441b326 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 29 Nov 2021 19:06:19 -0800 Subject: bpf: Add bpf_loop helper This patch adds the kernel-side and API changes for a new helper function, bpf_loop: long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags); where long (*callback_fn)(u32 index, void *ctx); bpf_loop invokes the "callback_fn" **nr_loops** times or until the callback_fn returns 1. The callback_fn can only return 0 or 1, and this is enforced by the verifier. The callback_fn index is zero-indexed. A few things to please note: ~ The "u64 flags" parameter is currently unused but is included in case a future use case for it arises. ~ In the kernel-side implementation of bpf_loop (kernel/bpf/bpf_iter.c), bpf_callback_t is used as the callback function cast. ~ A program can have nested bpf_loop calls but the program must still adhere to the verifier constraint of its stack depth (the stack depth cannot exceed MAX_BPF_STACK)) ~ Recursive callback_fns do not pass the verifier, due to the call stack for these being too deep. ~ The next patch will include the tests and benchmark Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211130030622.4131246-2-joannekoong@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 25 ++++++++++++ kernel/bpf/bpf_iter.c | 35 +++++++++++++++++ kernel/bpf/helpers.c | 2 + kernel/bpf/verifier.c | 88 ++++++++++++++++++++++++++---------------- tools/include/uapi/linux/bpf.h | 25 ++++++++++++ 6 files changed, 142 insertions(+), 34 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cc7a0c36e7df..cad0829710be 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; +extern const struct bpf_func_proto bpf_loop_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a69e4b04ffeb..211b43afd0fb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4957,6 +4957,30 @@ union bpf_attr { * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. * **-EBUSY** if failed to try lock mmap_lock. * **-EINVAL** for invalid **flags**. + * + * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_loops**, call **callback_fn** function + * with **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. Currently, nr_loops is + * limited to 1 << 23 (~8 million) loops. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the loop. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * loop. If return value is 1, the helper will skip the rest of + * the loops and return. Other return values are not used now, + * and will be rejected by the verifier. + * + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5140,6 +5164,7 @@ union bpf_attr { FN(skc_to_unix_sock), \ FN(kallsyms_lookup_name), \ FN(find_vma), \ + FN(loop), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b2ee45064e06..b7aef5b3416d 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = { .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; + +/* maximum number of loops */ +#define MAX_LOOPS BIT(23) + +BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, + u64, flags) +{ + bpf_callback_t callback = (bpf_callback_t)callback_fn; + u64 ret; + u32 i; + + if (flags) + return -EINVAL; + if (nr_loops > MAX_LOOPS) + return -E2BIG; + + for (i = 0; i < nr_loops; i++) { + ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); + /* return value: 0 - continue, 1 - stop and return */ + if (ret) + return i + 1; + } + + return i; +} + +const struct bpf_func_proto bpf_loop_proto = { + .func = bpf_loop, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_FUNC, + .arg3_type = ARG_PTR_TO_STACK_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1ffd469c217f..52188004a9c3 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1378,6 +1378,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_query_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; + case BPF_FUNC_loop: + return &bpf_loop_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0763cca139a7..d7678d8a925c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6085,6 +6085,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_loop_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, + * u64 flags); + * callback_fn(u32 index, void *callback_ctx); + */ + callee->regs[BPF_REG_1].type = SCALAR_VALUE; + callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + + callee->in_callback_fn = true; + return 0; +} + static int set_timer_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, @@ -6458,13 +6479,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return err; } - if (func_id == BPF_FUNC_tail_call) { - err = check_reference_leak(env); - if (err) { - verbose(env, "tail_call would lead to reference leak\n"); - return err; - } - } else if (is_release_function(func_id)) { + if (is_release_function(func_id)) { err = release_reference(env, meta.ref_obj_id); if (err) { verbose(env, "func %s#%d reference has not been acquired before\n", @@ -6475,42 +6490,47 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); - /* check that flags argument in get_local_storage(map, flags) is 0, - * this is required because get_local_storage() can't return an error. - */ - if (func_id == BPF_FUNC_get_local_storage && - !register_is_null(®s[BPF_REG_2])) { - verbose(env, "get_local_storage() doesn't support non-zero flags\n"); - return -EINVAL; - } - - if (func_id == BPF_FUNC_for_each_map_elem) { + switch (func_id) { + case BPF_FUNC_tail_call: + err = check_reference_leak(env); + if (err) { + verbose(env, "tail_call would lead to reference leak\n"); + return err; + } + break; + case BPF_FUNC_get_local_storage: + /* check that flags argument in get_local_storage(map, flags) is 0, + * this is required because get_local_storage() can't return an error. + */ + if (!register_is_null(®s[BPF_REG_2])) { + verbose(env, "get_local_storage() doesn't support non-zero flags\n"); + return -EINVAL; + } + break; + case BPF_FUNC_for_each_map_elem: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_map_elem_callback_state); - if (err < 0) - return -EINVAL; - } - - if (func_id == BPF_FUNC_timer_set_callback) { + break; + case BPF_FUNC_timer_set_callback: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_timer_callback_state); - if (err < 0) - return -EINVAL; - } - - if (func_id == BPF_FUNC_find_vma) { + break; + case BPF_FUNC_find_vma: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_find_vma_callback_state); - if (err < 0) - return -EINVAL; - } - - if (func_id == BPF_FUNC_snprintf) { + break; + case BPF_FUNC_snprintf: err = check_bpf_snprintf_call(env, regs); - if (err < 0) - return err; + break; + case BPF_FUNC_loop: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_loop_callback_state); + break; } + if (err) + return err; + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a69e4b04ffeb..211b43afd0fb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4957,6 +4957,30 @@ union bpf_attr { * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. * **-EBUSY** if failed to try lock mmap_lock. * **-EINVAL** for invalid **flags**. + * + * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_loops**, call **callback_fn** function + * with **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. Currently, nr_loops is + * limited to 1 << 23 (~8 million) loops. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the loop. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * loop. If return value is 1, the helper will skip the rest of + * the loops and return. Other return values are not used now, + * and will be rejected by the verifier. + * + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5140,6 +5164,7 @@ union bpf_attr { FN(skc_to_unix_sock), \ FN(kallsyms_lookup_name), \ FN(find_vma), \ + FN(loop), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From ce8299b6f76f28326fedce2b4da90888bd97eab2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Nov 2021 19:32:46 -0800 Subject: Revert "net: snmp: add statistics for tcp small queue check" This reverts commit aeeecb889165617a841e939117f9a8095d0e7d80. The new SNMP variable (TCPSmallQueueFailure) can be incremented for good reasons, even on a 100Gbit single TCP_STREAM flow. If we really wanted to ease driver debugging [1], this would require something more sophisticated. [1] Usually, if a driver is delaying TX completions too much, this can lead to stalls in TCP output. Various work arounds have been used in the past, like skb_orphan() in ndo_start_xmit(). Signed-off-by: Eric Dumazet Cc: Menglong Dong Link: https://lore.kernel.org/r/20211201033246.2826224-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/snmp.h | 1 - net/ipv4/proc.c | 1 - net/ipv4/tcp_output.c | 5 +---- 3 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index e32ec6932e82..904909d020e2 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -292,7 +292,6 @@ enum LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ - LINUX_MIB_TCPSMALLQUEUEFAILURE, /* TCPSmallQueueFailure */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 43b7a77cd6b4..f30273afb539 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -297,7 +297,6 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS), SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS), SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE), - SNMP_MIB_ITEM("TCPSmallQueueFailure", LINUX_MIB_TCPSMALLQUEUEFAILURE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c4ab6c8f0c77..5079832af5c1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2524,11 +2524,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, * test again the condition. */ smp_mb__after_atomic(); - if (refcount_read(&sk->sk_wmem_alloc) > limit) { - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPSMALLQUEUEFAILURE); + if (refcount_read(&sk->sk_wmem_alloc) > limit) return true; - } } return false; } -- cgit v1.2.3 From 46334a0cd21bed70d6f1ddef1464f75a0ebe1774 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 1 Dec 2021 10:10:27 -0800 Subject: bpf: Define enum bpf_core_relo_kind as uapi. enum bpf_core_relo_kind is generated by llvm and processed by libbpf. It's a de-facto uapi. With CO-RE in the kernel the bpf_core_relo_kind values become uapi de-jure. Also rename them with BPF_CORE_ prefix to distinguish from conflicting names in bpf_core_read.h. The enums bpf_field_info_kind, bpf_type_id_kind, bpf_type_info_kind, bpf_enum_value_kind are passing different values from bpf program into llvm. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211201181040.23337-5-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 19 ++++++++++ tools/include/uapi/linux/bpf.h | 19 ++++++++++ tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/relo_core.c | 84 +++++++++++++++++++++--------------------- tools/lib/bpf/relo_core.h | 18 +-------- 5 files changed, 82 insertions(+), 60 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 211b43afd0fb..9e66b1880020 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6374,4 +6374,23 @@ enum { BTF_F_ZERO = (1ULL << 3), }; +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. It is emitted by llvm and passed to + * libbpf and later to the kernel. + */ +enum bpf_core_relo_kind { + BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ + BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 211b43afd0fb..9e66b1880020 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6374,4 +6374,23 @@ enum { BTF_F_ZERO = (1ULL << 3), }; +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. It is emitted by llvm and passed to + * libbpf and later to the kernel. + */ +enum bpf_core_relo_kind { + BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ + BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5a2f5a6ae2f9..9eaf2d9820e6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5523,7 +5523,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, return -ENOTSUP; } - if (relo->kind != BPF_TYPE_ID_LOCAL && + if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && !hashmap__find(cand_cache, type_key, (void **)&cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 56dbe6d16664..d194fb9306ed 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -113,18 +113,18 @@ static bool is_flex_arr(const struct btf *btf, static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_FIELD_BYTE_OFFSET: return "byte_off"; - case BPF_FIELD_BYTE_SIZE: return "byte_sz"; - case BPF_FIELD_EXISTS: return "field_exists"; - case BPF_FIELD_SIGNED: return "signed"; - case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; - case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; - case BPF_TYPE_ID_LOCAL: return "local_type_id"; - case BPF_TYPE_ID_TARGET: return "target_type_id"; - case BPF_TYPE_EXISTS: return "type_exists"; - case BPF_TYPE_SIZE: return "type_size"; - case BPF_ENUMVAL_EXISTS: return "enumval_exists"; - case BPF_ENUMVAL_VALUE: return "enumval_value"; + case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off"; + case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz"; + case BPF_CORE_FIELD_EXISTS: return "field_exists"; + case BPF_CORE_FIELD_SIGNED: return "signed"; + case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64"; + case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64"; + case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id"; + case BPF_CORE_TYPE_ID_TARGET: return "target_type_id"; + case BPF_CORE_TYPE_EXISTS: return "type_exists"; + case BPF_CORE_TYPE_SIZE: return "type_size"; + case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists"; + case BPF_CORE_ENUMVAL_VALUE: return "enumval_value"; default: return "unknown"; } } @@ -132,12 +132,12 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_FIELD_BYTE_OFFSET: - case BPF_FIELD_BYTE_SIZE: - case BPF_FIELD_EXISTS: - case BPF_FIELD_SIGNED: - case BPF_FIELD_LSHIFT_U64: - case BPF_FIELD_RSHIFT_U64: + case BPF_CORE_FIELD_BYTE_OFFSET: + case BPF_CORE_FIELD_BYTE_SIZE: + case BPF_CORE_FIELD_EXISTS: + case BPF_CORE_FIELD_SIGNED: + case BPF_CORE_FIELD_LSHIFT_U64: + case BPF_CORE_FIELD_RSHIFT_U64: return true; default: return false; @@ -147,10 +147,10 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_TYPE_ID_LOCAL: - case BPF_TYPE_ID_TARGET: - case BPF_TYPE_EXISTS: - case BPF_TYPE_SIZE: + case BPF_CORE_TYPE_ID_LOCAL: + case BPF_CORE_TYPE_ID_TARGET: + case BPF_CORE_TYPE_EXISTS: + case BPF_CORE_TYPE_SIZE: return true; default: return false; @@ -160,8 +160,8 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_ENUMVAL_EXISTS: - case BPF_ENUMVAL_VALUE: + case BPF_CORE_ENUMVAL_EXISTS: + case BPF_CORE_ENUMVAL_VALUE: return true; default: return false; @@ -624,7 +624,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, *field_sz = 0; - if (relo->kind == BPF_FIELD_EXISTS) { + if (relo->kind == BPF_CORE_FIELD_EXISTS) { *val = spec ? 1 : 0; return 0; } @@ -637,7 +637,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, /* a[n] accessor needs special handling */ if (!acc->name) { - if (relo->kind == BPF_FIELD_BYTE_OFFSET) { + if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { *val = spec->bit_offset / 8; /* remember field size for load/store mem size */ sz = btf__resolve_size(spec->btf, acc->type_id); @@ -645,7 +645,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, return -EINVAL; *field_sz = sz; *type_id = acc->type_id; - } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { + } else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) { sz = btf__resolve_size(spec->btf, acc->type_id); if (sz < 0) return -EINVAL; @@ -697,36 +697,36 @@ static int bpf_core_calc_field_relo(const char *prog_name, *validate = !bitfield; switch (relo->kind) { - case BPF_FIELD_BYTE_OFFSET: + case BPF_CORE_FIELD_BYTE_OFFSET: *val = byte_off; if (!bitfield) { *field_sz = byte_sz; *type_id = field_type_id; } break; - case BPF_FIELD_BYTE_SIZE: + case BPF_CORE_FIELD_BYTE_SIZE: *val = byte_sz; break; - case BPF_FIELD_SIGNED: + case BPF_CORE_FIELD_SIGNED: /* enums will be assumed unsigned */ *val = btf_is_enum(mt) || (btf_int_encoding(mt) & BTF_INT_SIGNED); if (validate) *validate = true; /* signedness is never ambiguous */ break; - case BPF_FIELD_LSHIFT_U64: + case BPF_CORE_FIELD_LSHIFT_U64: #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ *val = 64 - (bit_off + bit_sz - byte_off * 8); #else *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); #endif break; - case BPF_FIELD_RSHIFT_U64: + case BPF_CORE_FIELD_RSHIFT_U64: *val = 64 - bit_sz; if (validate) *validate = true; /* right shift is never ambiguous */ break; - case BPF_FIELD_EXISTS: + case BPF_CORE_FIELD_EXISTS: default: return -EOPNOTSUPP; } @@ -747,20 +747,20 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, } switch (relo->kind) { - case BPF_TYPE_ID_TARGET: + case BPF_CORE_TYPE_ID_TARGET: *val = spec->root_type_id; break; - case BPF_TYPE_EXISTS: + case BPF_CORE_TYPE_EXISTS: *val = 1; break; - case BPF_TYPE_SIZE: + case BPF_CORE_TYPE_SIZE: sz = btf__resolve_size(spec->btf, spec->root_type_id); if (sz < 0) return -EINVAL; *val = sz; break; - case BPF_TYPE_ID_LOCAL: - /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ + case BPF_CORE_TYPE_ID_LOCAL: + /* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */ default: return -EOPNOTSUPP; } @@ -776,10 +776,10 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, const struct btf_enum *e; switch (relo->kind) { - case BPF_ENUMVAL_EXISTS: + case BPF_CORE_ENUMVAL_EXISTS: *val = spec ? 1 : 0; break; - case BPF_ENUMVAL_VALUE: + case BPF_CORE_ENUMVAL_VALUE: if (!spec) return -EUCLEAN; /* request instruction poisoning */ t = btf_type_by_id(spec->btf, spec->spec[0].type_id); @@ -1236,7 +1236,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, libbpf_print(LIBBPF_DEBUG, "\n"); /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ - if (relo->kind == BPF_TYPE_ID_LOCAL) { + if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { targ_res.validate = true; targ_res.poison = false; targ_res.orig_val = local_spec.root_type_id; @@ -1302,7 +1302,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, } /* - * For BPF_FIELD_EXISTS relo or when used BPF program has field + * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field * existence checks or kernel version/config checks, it's expected * that we might not find any candidates. In this case, if field * wasn't found in any candidate, the list of candidates shouldn't diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 3b9f8f18346c..3d0b86e7f439 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -4,23 +4,7 @@ #ifndef __RELO_CORE_H #define __RELO_CORE_H -/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value - * has to be adjusted by relocations. - */ -enum bpf_core_relo_kind { - BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ - BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ - BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ - BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ - BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ - BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ - BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ - BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ - BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ - BPF_TYPE_SIZE = 9, /* type size in bytes */ - BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ - BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ -}; +#include /* The minimum bpf_core_relo checked by the loader * -- cgit v1.2.3 From fbd94c7afcf99c9f3b1ba1168657ecc428eb2c8d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 1 Dec 2021 10:10:28 -0800 Subject: bpf: Pass a set of bpf_core_relo-s to prog_load command. struct bpf_core_relo is generated by llvm and processed by libbpf. It's a de-facto uapi. With CO-RE in the kernel the struct bpf_core_relo becomes uapi de-jure. Add an ability to pass a set of 'struct bpf_core_relo' to prog_load command and let the kernel perform CO-RE relocations. Note the struct bpf_line_info and struct bpf_func_info have the same layout when passed from LLVM to libbpf and from libbpf to the kernel except "insn_off" fields means "byte offset" when LLVM generates it. Then libbpf converts it to "insn index" to pass to the kernel. The struct bpf_core_relo's "insn_off" field is always "byte offset". Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211201181040.23337-6-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 8 +++++ include/uapi/linux/bpf.h | 59 +++++++++++++++++++++++++++++++- kernel/bpf/btf.c | 6 ++++ kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 76 ++++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 59 +++++++++++++++++++++++++++++++- tools/lib/bpf/relo_core.h | 53 ----------------------------- 7 files changed, 207 insertions(+), 56 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cad0829710be..8bbf08fbab66 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1732,6 +1732,14 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn); +struct bpf_core_ctx { + struct bpf_verifier_log *log; + const struct btf *btf; +}; + +int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, + int relo_idx, void *insn); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9e66b1880020..c26871263f1f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1342,8 +1342,10 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; - __u32 :32; /* pad */ + __u32 core_relo_cnt; /* number of bpf_core_relo */ __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -6393,4 +6395,59 @@ enum bpf_core_relo_kind { BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ }; +/* + * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf + * and from libbpf to the kernel. + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * - kind - one of enum bpf_core_relo_kind; + * + * Example: + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int *x = &s->a; // encoded as "0:0" (a is field #0) + * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c79595aad55b..0d070461e2b8 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6439,3 +6439,9 @@ size_t bpf_core_essential_name_len(const char *name) } return n; } + +int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, + int relo_idx, void *insn) +{ + return -EOPNOTSUPP; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 47089d1d67a4..b3ada4085f85 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2184,7 +2184,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD fd_array +#define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6c9c0d9a04a0..6522ffdea487 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10273,6 +10273,78 @@ err_free: return err; } +#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo) +#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE + +static int check_core_relo(struct bpf_verifier_env *env, + const union bpf_attr *attr, + bpfptr_t uattr) +{ + u32 i, nr_core_relo, ncopy, expected_size, rec_size; + struct bpf_core_relo core_relo = {}; + struct bpf_prog *prog = env->prog; + const struct btf *btf = prog->aux->btf; + struct bpf_core_ctx ctx = { + .log = &env->log, + .btf = btf, + }; + bpfptr_t u_core_relo; + int err; + + nr_core_relo = attr->core_relo_cnt; + if (!nr_core_relo) + return 0; + if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo)) + return -EINVAL; + + rec_size = attr->core_relo_rec_size; + if (rec_size < MIN_CORE_RELO_SIZE || + rec_size > MAX_CORE_RELO_SIZE || + rec_size % sizeof(u32)) + return -EINVAL; + + u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel); + expected_size = sizeof(struct bpf_core_relo); + ncopy = min_t(u32, expected_size, rec_size); + + /* Unlike func_info and line_info, copy and apply each CO-RE + * relocation record one at a time. + */ + for (i = 0; i < nr_core_relo; i++) { + /* future proofing when sizeof(bpf_core_relo) changes */ + err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size); + if (err) { + if (err == -E2BIG) { + verbose(env, "nonzero tailing record in core_relo"); + if (copy_to_bpfptr_offset(uattr, + offsetof(union bpf_attr, core_relo_rec_size), + &expected_size, sizeof(expected_size))) + err = -EFAULT; + } + break; + } + + if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) { + err = -EFAULT; + break; + } + + if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) { + verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n", + i, core_relo.insn_off, prog->len); + err = -EINVAL; + break; + } + + err = bpf_core_apply(&ctx, &core_relo, i, + &prog->insnsi[core_relo.insn_off / 8]); + if (err) + break; + bpfptr_add(&u_core_relo, rec_size); + } + return err; +} + static int check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, bpfptr_t uattr) @@ -10303,6 +10375,10 @@ static int check_btf_info(struct bpf_verifier_env *env, if (err) return err; + err = check_core_relo(env, attr, uattr); + if (err) + return err; + return 0; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9e66b1880020..c26871263f1f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1342,8 +1342,10 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; - __u32 :32; /* pad */ + __u32 core_relo_cnt; /* number of bpf_core_relo */ __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -6393,4 +6395,59 @@ enum bpf_core_relo_kind { BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ }; +/* + * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf + * and from libbpf to the kernel. + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * - kind - one of enum bpf_core_relo_kind; + * + * Example: + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int *x = &s->a; // encoded as "0:0" (a is field #0) + * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 3d0b86e7f439..f410691cc4e5 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -6,59 +6,6 @@ #include -/* The minimum bpf_core_relo checked by the loader - * - * CO-RE relocation captures the following data: - * - insn_off - instruction offset (in bytes) within a BPF program that needs - * its insn->imm field to be relocated with actual field info; - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * type or field; - * - access_str_off - offset into corresponding .BTF string section. String - * interpretation depends on specific relocation kind: - * - for field-based relocations, string encodes an accessed field using - * a sequence of field and array indices, separated by colon (:). It's - * conceptually very close to LLVM's getelementptr ([0]) instruction's - * arguments for identifying offset to a field. - * - for type-based relocations, strings is expected to be just "0"; - * - for enum value-based relocations, string contains an index of enum - * value within its enum type; - * - * Example to provide a better feel. - * - * struct sample { - * int a; - * struct { - * int b[10]; - * }; - * }; - * - * struct sample *s = ...; - * int x = &s->a; // encoded as "0:0" (a is field #0) - * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, - * // b is field #0 inside anon struct, accessing elem #5) - * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - * - * type_id for all relocs in this example will capture BTF type id of - * `struct sample`. - * - * Such relocation is emitted when using __builtin_preserve_access_index() - * Clang built-in, passing expression that captures field address, e.g.: - * - * bpf_probe_read(&dst, sizeof(dst), - * __builtin_preserve_access_index(&src->a.b.c)); - * - * In this case Clang will emit field relocation recording necessary data to - * be able to find offset of embedded `a.b.c` field within `src` struct. - * - * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction - */ -struct bpf_core_relo { - __u32 insn_off; - __u32 type_id; - __u32 access_str_off; - enum bpf_core_relo_kind kind; -}; - struct bpf_core_cand { const struct btf *btf; const struct btf_type *t; -- cgit v1.2.3 From 063ebb19d962b45a1b505748d464bd12b5074797 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 1 Dec 2021 17:33:21 +0000 Subject: iommu/virtio: Add definitions for VIRTIO_IOMMU_F_BYPASS_CONFIG Add definitions for the VIRTIO_IOMMU_F_BYPASS_CONFIG, which supersedes VIRTIO_IOMMU_F_BYPASS. Reviewed-by: Kevin Tian Signed-off-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20211201173323.1045819-2-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- include/uapi/linux/virtio_iommu.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index 237e36a280cb..1ff357f0d72e 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -16,6 +16,7 @@ #define VIRTIO_IOMMU_F_BYPASS 3 #define VIRTIO_IOMMU_F_PROBE 4 #define VIRTIO_IOMMU_F_MMIO 5 +#define VIRTIO_IOMMU_F_BYPASS_CONFIG 6 struct virtio_iommu_range_64 { __le64 start; @@ -36,6 +37,8 @@ struct virtio_iommu_config { struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ __le32 probe_size; + __u8 bypass; + __u8 reserved[3]; }; /* Request types */ @@ -66,11 +69,14 @@ struct virtio_iommu_req_tail { __u8 reserved[3]; }; +#define VIRTIO_IOMMU_ATTACH_F_BYPASS (1 << 0) + struct virtio_iommu_req_attach { struct virtio_iommu_req_head head; __le32 domain; __le32 endpoint; - __u8 reserved[8]; + __le32 flags; + __u8 reserved[4]; struct virtio_iommu_req_tail tail; }; -- cgit v1.2.3 From c5fb19937455095573a19ddcbff32e993ed10e35 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 10 Dec 2021 22:16:49 +0800 Subject: bpf: Add bpf_strncmp helper The helper compares two strings: one string is a null-terminated read-only string, and another string has const max storage size but doesn't need to be null-terminated. It can be used to compare file name in tracing or LSM program. Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211210141652.877186-2-houtao1@huawei.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 11 +++++++++++ kernel/bpf/helpers.c | 16 ++++++++++++++++ tools/include/uapi/linux/bpf.h | 11 +++++++++++ 4 files changed, 39 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0ceb54c6342f..7a40022e3d00 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2163,6 +2163,7 @@ extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; +extern const struct bpf_func_proto bpf_strncmp_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c26871263f1f..2820c77e4846 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4983,6 +4983,16 @@ union bpf_attr { * Return * The number of loops performed, **-EINVAL** for invalid **flags**, * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. + * + * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) + * Description + * Do strncmp() between **s1** and **s2**. **s1** doesn't need + * to be null-terminated and **s1_sz** is the maximum storage + * size of **s1**. **s2** must be a read-only string. + * Return + * An integer less than, equal to, or greater than zero + * if the first **s1_sz** bytes of **s1** is found to be + * less than, to match, or be greater than **s2**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5167,6 +5177,7 @@ union bpf_attr { FN(kallsyms_lookup_name), \ FN(find_vma), \ FN(loop), \ + FN(strncmp), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 19fecfeaa9c2..8babae03d30a 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -565,6 +565,20 @@ const struct bpf_func_proto bpf_strtoul_proto = { }; #endif +BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) +{ + return strncmp(s1, s2, s1_sz); +} + +const struct bpf_func_proto bpf_strncmp_proto = { + .func = bpf_strncmp, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_PTR_TO_CONST_STR, +}; + BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, struct bpf_pidns_info *, nsdata, u32, size) { @@ -1378,6 +1392,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: return &bpf_loop_proto; + case BPF_FUNC_strncmp: + return &bpf_strncmp_proto; default: break; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c26871263f1f..2820c77e4846 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4983,6 +4983,16 @@ union bpf_attr { * Return * The number of loops performed, **-EINVAL** for invalid **flags**, * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. + * + * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) + * Description + * Do strncmp() between **s1** and **s2**. **s1** doesn't need + * to be null-terminated and **s1_sz** is the maximum storage + * size of **s1**. **s2** must be a read-only string. + * Return + * An integer less than, equal to, or greater than zero + * if the first **s1_sz** bytes of **s1** is found to be + * less than, to match, or be greater than **s2**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5167,6 +5177,7 @@ union bpf_attr { FN(kallsyms_lookup_name), \ FN(find_vma), \ FN(loop), \ + FN(strncmp), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From f92c1e183604c20ce00eb889315fdaa8f2d9e509 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Dec 2021 20:32:44 +0100 Subject: bpf: Add get_func_[arg|ret|arg_cnt] helpers Adding following helpers for tracing programs: Get n-th argument of the traced function: long bpf_get_func_arg(void *ctx, u32 n, u64 *value) Get return value of the traced function: long bpf_get_func_ret(void *ctx, u64 *value) Get arguments count of the traced function: long bpf_get_func_arg_cnt(void *ctx) The trampoline now stores number of arguments on ctx-8 address, so it's easy to verify argument index and find return value argument's position. Moving function ip address on the trampoline stack behind the number of functions arguments, so it's now stored on ctx-16 address if it's needed. All helpers above are inlined by verifier. Also bit unrelated small change - using newly added function bpf_prog_has_trampoline in check_get_func_ip. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211208193245.172141-5-jolsa@kernel.org --- arch/x86/net/bpf_jit_comp.c | 15 +++++++- include/linux/bpf.h | 5 +++ include/uapi/linux/bpf.h | 28 +++++++++++++++ kernel/bpf/trampoline.c | 8 +++++ kernel/bpf/verifier.c | 77 +++++++++++++++++++++++++++++++++++++++--- kernel/trace/bpf_trace.c | 55 +++++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 28 +++++++++++++++ 7 files changed, 209 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 10fab8cb3fb5..4bbcded07415 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1941,7 +1941,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i void *orig_call) { int ret, i, nr_args = m->nr_args; - int regs_off, ip_off, stack_size = nr_args * 8; + int regs_off, ip_off, args_off, stack_size = nr_args * 8; struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; @@ -1968,6 +1968,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * [ ... ] * RBP - regs_off [ reg_arg1 ] program's ctx pointer * + * RBP - args_off [ args count ] always + * * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag */ @@ -1978,6 +1980,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i regs_off = stack_size; + /* args count */ + stack_size += 8; + args_off = stack_size; + if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; /* room for IP address argument */ @@ -1996,6 +2002,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ EMIT1(0x53); /* push rbx */ + /* Store number of arguments of the traced function: + * mov rax, nr_args + * mov QWORD PTR [rbp - args_off], rax + */ + emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args); + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off); + if (flags & BPF_TRAMP_F_IP_ARG) { /* Store IP address of the traced function: * mov rax, QWORD PTR [rbp + 8] diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7a40022e3d00..965fffaf0308 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -777,6 +777,7 @@ void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); int bpf_jit_charge_modmem(u32 pages); void bpf_jit_uncharge_modmem(u32 pages); +bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) @@ -805,6 +806,10 @@ static inline bool is_bpf_image_address(unsigned long address) { return false; } +static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) +{ + return false; +} #endif struct bpf_func_info_aux { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2820c77e4846..b0383d371b9a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4993,6 +4993,31 @@ union bpf_attr { * An integer less than, equal to, or greater than zero * if the first **s1_sz** bytes of **s1** is found to be * less than, to match, or be greater than **s2**. + * + * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) + * Description + * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * returned in **value**. + * + * Return + * 0 on success. + * **-EINVAL** if n >= arguments count of traced function. + * + * long bpf_get_func_ret(void *ctx, u64 *value) + * Description + * Get return value of the traced function (for tracing programs) + * in **value**. + * + * Return + * 0 on success. + * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. + * + * long bpf_get_func_arg_cnt(void *ctx) + * Description + * Get number of arguments of the traced function (for tracing programs). + * + * Return + * The number of arguments of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5178,6 +5203,9 @@ union bpf_attr { FN(find_vma), \ FN(loop), \ FN(strncmp), \ + FN(get_func_arg), \ + FN(get_func_ret), \ + FN(get_func_arg_cnt), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index e98de5e73ba5..4b6974a195c1 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -27,6 +27,14 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; /* serializes access to trampoline_table */ static DEFINE_MUTEX(trampoline_mutex); +bool bpf_prog_has_trampoline(const struct bpf_prog *prog) +{ + enum bpf_attach_type eatype = prog->expected_attach_type; + + return eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || + eatype == BPF_MODIFY_RETURN; +} + void *bpf_jit_alloc_exec_page(void) { void *image; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b39de3ae50f5..d74e8a99412e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6395,13 +6395,11 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, static int check_get_func_ip(struct bpf_verifier_env *env) { - enum bpf_attach_type eatype = env->prog->expected_attach_type; enum bpf_prog_type type = resolve_prog_type(env->prog); int func_id = BPF_FUNC_get_func_ip; if (type == BPF_PROG_TYPE_TRACING) { - if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT && - eatype != BPF_MODIFY_RETURN) { + if (!bpf_prog_has_trampoline(env->prog)) { verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n", func_id_name(func_id), func_id); return -ENOTSUPP; @@ -12997,6 +12995,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, static int do_misc_fixups(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; + enum bpf_attach_type eatype = prog->expected_attach_type; bool expect_blinding = bpf_jit_blinding_enabled(prog); enum bpf_prog_type prog_type = resolve_prog_type(prog); struct bpf_insn *insn = prog->insnsi; @@ -13367,11 +13366,79 @@ patch_map_ops_generic: continue; } + /* Implement bpf_get_func_arg inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_arg) { + /* Load nr_args from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); + insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); + insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); + insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); + insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); + insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); + insn_buf[7] = BPF_JMP_A(1); + insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); + cnt = 9; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + + /* Implement bpf_get_func_ret inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_ret) { + if (eatype == BPF_TRACE_FEXIT || + eatype == BPF_MODIFY_RETURN) { + /* Load nr_args from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); + insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); + insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); + insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); + insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0); + cnt = 6; + } else { + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP); + cnt = 1; + } + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + + /* Implement get_func_arg_cnt inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_arg_cnt) { + /* Load nr_args from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); + if (!new_prog) + return -ENOMEM; + + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + /* Implement bpf_get_func_ip inline. */ if (prog_type == BPF_PROG_TYPE_TRACING && insn->imm == BPF_FUNC_get_func_ip) { - /* Load IP address from ctx - 8 */ - insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + /* Load IP address from ctx - 16 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16); new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); if (!new_prog) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 623dd0684429..cea2ca6df949 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1012,7 +1012,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) { /* This helper call is inlined by verifier. */ - return ((u64 *)ctx)[-1]; + return ((u64 *)ctx)[-2]; } static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { @@ -1091,6 +1091,53 @@ static const struct bpf_func_proto bpf_get_branch_snapshot_proto = { .arg2_type = ARG_CONST_SIZE_OR_ZERO, }; +BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) +{ + /* This helper call is inlined by verifier. */ + u64 nr_args = ((u64 *)ctx)[-1]; + + if ((u64) n >= nr_args) + return -EINVAL; + *value = ((u64 *)ctx)[n]; + return 0; +} + +static const struct bpf_func_proto bpf_get_func_arg_proto = { + .func = get_func_arg, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_LONG, +}; + +BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) +{ + /* This helper call is inlined by verifier. */ + u64 nr_args = ((u64 *)ctx)[-1]; + + *value = ((u64 *)ctx)[nr_args]; + return 0; +} + +static const struct bpf_func_proto bpf_get_func_ret_proto = { + .func = get_func_ret, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_LONG, +}; + +BPF_CALL_1(get_func_arg_cnt, void *, ctx) +{ + /* This helper call is inlined by verifier. */ + return ((u64 *)ctx)[-1]; +} + +static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { + .func = get_func_arg_cnt, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1629,6 +1676,12 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) NULL; case BPF_FUNC_d_path: return &bpf_d_path_proto; + case BPF_FUNC_get_func_arg: + return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL; + case BPF_FUNC_get_func_ret: + return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; + case BPF_FUNC_get_func_arg_cnt: + return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; default: fn = raw_tp_prog_func_proto(func_id, prog); if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2820c77e4846..b0383d371b9a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4993,6 +4993,31 @@ union bpf_attr { * An integer less than, equal to, or greater than zero * if the first **s1_sz** bytes of **s1** is found to be * less than, to match, or be greater than **s2**. + * + * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) + * Description + * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * returned in **value**. + * + * Return + * 0 on success. + * **-EINVAL** if n >= arguments count of traced function. + * + * long bpf_get_func_ret(void *ctx, u64 *value) + * Description + * Get return value of the traced function (for tracing programs) + * in **value**. + * + * Return + * 0 on success. + * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. + * + * long bpf_get_func_arg_cnt(void *ctx) + * Description + * Get number of arguments of the traced function (for tracing programs). + * + * Return + * The number of arguments of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5178,6 +5203,9 @@ union bpf_attr { FN(find_vma), \ FN(loop), \ FN(strncmp), \ + FN(get_func_arg), \ + FN(get_func_ret), \ + FN(get_func_arg_cnt), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 98046f7486db723ec8bb99a950a4fa5f5be55cd1 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Thu, 25 Nov 2021 15:05:26 +0800 Subject: fuse: support per inode DAX in fuse protocol Expand the fuse protocol to support per inode DAX. FUSE_HAS_INODE_DAX flag is added indicating if fuse server/client supporting per inode DAX. It can be conveyed in both FUSE_INIT request and reply. FUSE_ATTR_DAX flag is added indicating if DAX shall be enabled for corresponding file. It is conveyed in FUSE_LOOKUP reply. Signed-off-by: Jeffle Xu Reviewed-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 3f0ea63fec08..d6ccee961891 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -193,6 +193,7 @@ * - add flags2 to fuse_init_in and fuse_init_out * - add FUSE_SECURITY_CTX init flag * - add security context to create, mkdir, symlink, and mknod requests + * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX */ #ifndef _LINUX_FUSE_H @@ -351,6 +352,7 @@ struct fuse_file_lock { * FUSE_INIT_RESERVED: reserved, do not use * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and * mknod + * FUSE_HAS_INODE_DAX: use per inode DAX */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -386,6 +388,7 @@ struct fuse_file_lock { #define FUSE_INIT_RESERVED (1 << 31) /* bits 32..63 get shifted down 32 bits into the flags2 field */ #define FUSE_SECURITY_CTX (1ULL << 32) +#define FUSE_HAS_INODE_DAX (1ULL << 33) /** * CUSE INIT request/reply flags @@ -468,8 +471,10 @@ struct fuse_file_lock { * fuse_attr flags * * FUSE_ATTR_SUBMOUNT: Object is a submount root + * FUSE_ATTR_DAX: Enable DAX for this file in per inode DAX mode */ #define FUSE_ATTR_SUBMOUNT (1 << 0) +#define FUSE_ATTR_DAX (1 << 1) /** * Open flags -- cgit v1.2.3 From 9c9211a3fc7aa41b2952765b62000443b3bb6f23 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 10 Dec 2021 16:59:58 +0800 Subject: net_tstamp: add new flag HWTSTAMP_FLAG_BONDED_PHC_INDEX Since commit 94dd016ae538 ("bond: pass get_ts_info and SIOC[SG]HWTSTAMP ioctl to active device") the user could get bond active interface's PHC index directly. But when there is a failover, the bond active interface will change, thus the PHC index is also changed. This may break the user's program if they did not update the PHC timely. This patch adds a new hwtstamp_config flag HWTSTAMP_FLAG_BONDED_PHC_INDEX. When the user wants to get the bond active interface's PHC, they need to add this flag and be aware the PHC index may be changed. With the new flag. All flag checks in current drivers are removed. Only the checking in net_hwtstamp_validate() is kept. Suggested-by: Jakub Kicinski Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c | 4 ---- drivers/net/dsa/mv88e6xxx/hwtstamp.c | 4 ---- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 3 --- drivers/net/ethernet/aquantia/atlantic/aq_main.c | 3 --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 5 ----- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 3 --- drivers/net/ethernet/broadcom/tg3.c | 3 --- drivers/net/ethernet/cadence/macb_ptp.c | 4 ---- drivers/net/ethernet/cavium/liquidio/lio_main.c | 3 --- drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 3 --- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 3 --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 4 ---- drivers/net/ethernet/engleder/tsnep_ptp.c | 3 --- drivers/net/ethernet/freescale/fec_ptp.c | 4 ---- drivers/net/ethernet/freescale/gianfar.c | 4 ---- drivers/net/ethernet/intel/e1000e/netdev.c | 4 ---- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 4 ---- drivers/net/ethernet/intel/ice/ice_ptp.c | 4 ---- drivers/net/ethernet/intel/igb/igb_ptp.c | 4 ---- drivers/net/ethernet/intel/igc/igc_ptp.c | 4 ---- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 4 ---- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 3 --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 4 ---- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ---- drivers/net/ethernet/microchip/lan743x_ptp.c | 6 ------ drivers/net/ethernet/mscc/ocelot.c | 4 ---- drivers/net/ethernet/neterion/vxge/vxge-main.c | 4 ---- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 3 --- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 5 ----- drivers/net/ethernet/renesas/ravb_main.c | 4 ---- drivers/net/ethernet/sfc/ptp.c | 3 --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ---- drivers/net/ethernet/ti/cpsw_priv.c | 4 ---- drivers/net/ethernet/ti/netcp_ethss.c | 4 ---- drivers/net/ethernet/xscale/ixp4xx_eth.c | 3 --- drivers/net/phy/dp83640.c | 3 --- drivers/net/phy/mscc/mscc_ptp.c | 3 --- drivers/ptp/ptp_ines.c | 4 ---- include/uapi/linux/net_tstamp.h | 16 +++++++++++++++- net/core/dev_ioctl.c | 2 +- 40 files changed, 16 insertions(+), 144 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c index 40b41c794dfa..b3bc948d6145 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c @@ -52,10 +52,6 @@ static int hellcreek_set_hwtstamp_config(struct hellcreek *hellcreek, int port, */ clear_bit_unlock(HELLCREEK_HWTSTAMP_ENABLED, &ps->state); - /* Reserved for future extensions */ - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_ON: tx_tstamp_enable = true; diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c index 8f74ffc7a279..389f8a6ec0ab 100644 --- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c @@ -100,10 +100,6 @@ static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port, */ clear_bit_unlock(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state); - /* reserved for future extensions */ - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_OFF: tstamp_enable = false; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 30d24d19f40d..492ac383f16d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1508,9 +1508,6 @@ static int xgbe_set_hwtstamp_settings(struct xgbe_prv_data *pdata, if (copy_from_user(&config, ifreq->ifr_data, sizeof(config))) return -EFAULT; - if (config.flags) - return -EINVAL; - mac_tscr = 0; switch (config.tx_type) { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index e22935ce9573..e65ce7199dac 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -231,9 +231,6 @@ static void aq_ndev_set_multicast_settings(struct net_device *ndev) static int aq_ndev_config_hwtstamp(struct aq_nic_s *aq_nic, struct hwtstamp_config *config) { - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index aec666e97683..651bc1d7a57a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -15356,11 +15356,6 @@ static int bnx2x_hwtstamp_ioctl(struct bnx2x *bp, struct ifreq *ifr) DP(BNX2X_MSG_PTP, "Requested tx_type: %d, requested rx_filters = %d\n", config.tx_type, config.rx_filter); - if (config.flags) { - BNX2X_ERR("config.flags is reserved for future use\n"); - return -EINVAL; - } - bp->hwtstamp_ioctl_called = true; bp->tx_type = config.tx_type; bp->rx_filter = config.rx_filter; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 8388be119f9a..48520967746f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -417,9 +417,6 @@ int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) if (copy_from_user(&stmpconf, ifr->ifr_data, sizeof(stmpconf))) return -EFAULT; - if (stmpconf.flags) - return -EINVAL; - if (stmpconf.tx_type != HWTSTAMP_TX_ON && stmpconf.tx_type != HWTSTAMP_TX_OFF) return -ERANGE; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 283f3c1f1195..c28f8cc00d1c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -13806,9 +13806,6 @@ static int tg3_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) if (copy_from_user(&stmpconf, ifr->ifr_data, sizeof(stmpconf))) return -EFAULT; - if (stmpconf.flags) - return -EINVAL; - if (stmpconf.tx_type != HWTSTAMP_TX_ON && stmpconf.tx_type != HWTSTAMP_TX_OFF) return -ERANGE; diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 095c5a2144a7..fb6b27f46b15 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -464,10 +464,6 @@ int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(*tstamp_config))) return -EFAULT; - /* reserved for future extensions */ - if (tstamp_config->flags) - return -EINVAL; - switch (tstamp_config->tx_type) { case HWTSTAMP_TX_OFF: break; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 12eee2bc7f5c..ba28aa444e5a 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2114,9 +2114,6 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) if (copy_from_user(&conf, ifr->ifr_data, sizeof(conf))) return -EFAULT; - if (conf.flags) - return -EINVAL; - switch (conf.tx_type) { case HWTSTAMP_TX_ON: case HWTSTAMP_TX_OFF: diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index c607756b731f..568f211d91cc 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1254,9 +1254,6 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) if (copy_from_user(&conf, ifr->ifr_data, sizeof(conf))) return -EFAULT; - if (conf.flags) - return -EINVAL; - switch (conf.tx_type) { case HWTSTAMP_TX_ON: case HWTSTAMP_TX_OFF: diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 4b4ffdd1044d..103591dcea1c 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -702,9 +702,6 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev, if (copy_from_user(&config, rq->ifr_data, sizeof(config))) return -EFAULT; - if (config.flags) /* reserved for future extensions */ - return -EINVAL; - /* Check the status of hardware for tiemstamps */ if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) { /* Get the current state of the PTP clock */ diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index bb45d5df2856..63191692f624 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1917,10 +1917,6 @@ static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - switch (config.tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: diff --git a/drivers/net/ethernet/engleder/tsnep_ptp.c b/drivers/net/ethernet/engleder/tsnep_ptp.c index 4bfb4d8508f5..eaad453d487e 100644 --- a/drivers/net/ethernet/engleder/tsnep_ptp.c +++ b/drivers/net/ethernet/engleder/tsnep_ptp.c @@ -31,9 +31,6 @@ int tsnep_ptp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; - if (config.flags) - return -EINVAL; - switch (config.tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index d71eac7e1924..af99017a5453 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -473,10 +473,6 @@ int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr) if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - switch (config.tx_type) { case HWTSTAMP_TX_OFF: fep->hwts_tx_en = 0; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index acab58fd3db3..206b7a35eaf5 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2076,10 +2076,6 @@ static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - switch (config.tx_type) { case HWTSTAMP_TX_OFF: priv->hwts_tx_en = 0; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 44e2dc8328a2..635a95927e93 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3614,10 +3614,6 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter, if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP)) return -EINVAL; - /* flags reserved for future extensions - must be zero */ - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_OFF: tsync_tx_ctl = 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 09b1d5aed1c9..61e5789d78db 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -1205,10 +1205,6 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, INIT_WORK(&pf->ptp_extts0_work, i40e_ptp_extts0_work); - /* Reserved for future extensions. */ - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_OFF: pf->ptp_tx = false; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index bf7247c6f58e..dfc7c830acf6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1205,10 +1205,6 @@ int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr) static int ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) { - /* Reserved for future extensions. */ - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_OFF: ice_set_tx_tstamp(pf, false); diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 0011b15e678c..0ac4cc5eaa2d 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1015,10 +1015,6 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter, bool is_l2 = false; u32 regval; - /* reserved for future extensions */ - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_OFF: tsync_tx_ctl = 0; diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 30568e3544cd..71813fa8f928 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -560,10 +560,6 @@ static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter) static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, struct hwtstamp_config *config) { - /* reserved for future extensions */ - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_OFF: igc_ptp_disable_tx_timestamp(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 23ddfd79fc8b..336426a67ac1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -992,10 +992,6 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, bool is_l2 = false; u32 regval; - /* reserved for future extensions */ - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_OFF: tsync_tx_ctl = 0; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 8e5820d12362..b1cce4425296 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5142,9 +5142,6 @@ static int mvpp2_set_ts_config(struct mvpp2_port *port, struct ifreq *ifr) if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; - if (config.flags) - return -EINVAL; - if (config.tx_type != HWTSTAMP_TX_OFF && config.tx_type != HWTSTAMP_TX_ON) return -ERANGE; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 1333edf1c361..6080ebd9bd94 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2002,10 +2002,6 @@ int otx2_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - switch (config.tx_type) { case HWTSTAMP_TX_OFF: otx2_config_hw_tx_tstamp(pfvf, false); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index f1c10f2bda78..ad1e4caf48bf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2427,10 +2427,6 @@ static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - /* device doesn't support time stamping */ if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)) return -EINVAL; diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c index 9380e396f648..8b7a8d879083 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.c +++ b/drivers/net/ethernet/microchip/lan743x_ptp.c @@ -1305,12 +1305,6 @@ int lan743x_ptp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; - if (config.flags) { - netif_warn(adapter, drv, adapter->netdev, - "ignoring hwtstamp_config.flags == 0x%08X, expected 0\n", - config.flags); - } - switch (config.tx_type) { case HWTSTAMP_TX_OFF: for (index = 0; index < LAN743X_MAX_TX_CHANNELS; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 876a7ecf86eb..9b42187a026a 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1617,10 +1617,6 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - /* reserved for future extensions */ - if (cfg.flags) - return -EINVAL; - /* Tx type sanity check */ switch (cfg.tx_type) { case HWTSTAMP_TX_ON: diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 1969009a91e7..2c2e9e56ed4e 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3159,10 +3159,6 @@ static int vxge_hwtstamp_set(struct vxgedev *vdev, void __user *data) if (copy_from_user(&config, data, sizeof(config))) return -EFAULT; - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - /* Transmit HW Timestamp not supported */ switch (config.tx_type) { case HWTSTAMP_TX_OFF: diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 71d234291fc5..1dc40c537281 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -210,9 +210,6 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - if (cfg.flags) /* reserved for future extensions */ - return -EINVAL; - /* Get ieee1588's dev information */ pdev = adapter->ptp_pdev; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 8c28fabb0ff6..39176e765767 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -304,11 +304,6 @@ int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *ifr) "HWTSTAMP IOCTL: Requested tx_type = %d, requested rx_filters = %d\n", config.tx_type, config.rx_filter); - if (config.flags) { - DP_ERR(edev, "config.flags is reserved for future use\n"); - return -EINVAL; - } - ptp->hw_ts_ioctl_called = 1; ptp->tx_type = config.tx_type; ptp->rx_filter = config.rx_filter; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ce09bd45527e..b215cde68e10 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2221,10 +2221,6 @@ static int ravb_hwtstamp_set(struct net_device *ndev, struct ifreq *req) if (copy_from_user(&config, req->ifr_data, sizeof(config))) return -EFAULT; - /* Reserved for future extensions */ - if (config.flags) - return -EINVAL; - switch (config.tx_type) { case HWTSTAMP_TX_OFF: tstamp_tx_ctrl = 0; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 797e51802ccb..f0ef515e2ade 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1765,9 +1765,6 @@ static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init) { int rc; - if (init->flags) - return -EINVAL; - if ((init->tx_type != HWTSTAMP_TX_OFF) && (init->tx_type != HWTSTAMP_TX_ON)) return -ERANGE; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 7e3e1bc0f61d..c26ac288f981 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -638,10 +638,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) netdev_dbg(priv->dev, "%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n", __func__, config.flags, config.tx_type, config.rx_filter); - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - if (config.tx_type != HWTSTAMP_TX_OFF && config.tx_type != HWTSTAMP_TX_ON) return -ERANGE; diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index c99dd9735087..8624a044776f 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -626,10 +626,6 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - /* reserved for future extensions */ - if (cfg.flags) - return -EINVAL; - if (cfg.tx_type != HWTSTAMP_TX_OFF && cfg.tx_type != HWTSTAMP_TX_ON) return -ERANGE; diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 33c1592d5381..751fb0bc65c5 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2654,10 +2654,6 @@ static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - /* reserved for future extensions */ - if (cfg.flags) - return -EINVAL; - switch (cfg.tx_type) { case HWTSTAMP_TX_OFF: gbe_dev->tx_ts_enabled = 0; diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 65fdad1107fc..df77a22d1b81 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -382,9 +382,6 @@ static int hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - if (cfg.flags) /* reserved for future extensions */ - return -EINVAL; - ret = ixp46x_ptp_find(&port->timesync_regs, &port->phc_index); if (ret) return ret; diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 705c16675b80..c2d1a85ec559 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1235,9 +1235,6 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - if (cfg.flags) /* reserved for future extensions */ - return -EINVAL; - if (cfg.tx_type < 0 || cfg.tx_type > HWTSTAMP_TX_ONESTEP_SYNC) return -ERANGE; diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index edb951695b13..34f829845d06 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1057,9 +1057,6 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - if (cfg.flags) - return -EINVAL; - switch (cfg.tx_type) { case HWTSTAMP_TX_ONESTEP_SYNC: one_step = true; diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c index 6c7c2843ba0b..61f47fb9d997 100644 --- a/drivers/ptp/ptp_ines.c +++ b/drivers/ptp/ptp_ines.c @@ -338,10 +338,6 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - /* reserved for future extensions */ - if (cfg.flags) - return -EINVAL; - switch (cfg.tx_type) { case HWTSTAMP_TX_OFF: ts_stat_tx = 0; diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index fcc61c73a666..e258e52cfd1f 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -62,7 +62,7 @@ struct so_timestamping { /** * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter * - * @flags: no flags defined right now, must be zero for %SIOCSHWTSTAMP + * @flags: one of HWTSTAMP_FLAG_* * @tx_type: one of HWTSTAMP_TX_* * @rx_filter: one of HWTSTAMP_FILTER_* * @@ -78,6 +78,20 @@ struct hwtstamp_config { int rx_filter; }; +/* possible values for hwtstamp_config->flags */ +enum hwtstamp_flags { + /* + * With this flag, the user could get bond active interface's + * PHC index. Note this PHC index is not stable as when there + * is a failover, the bond active interface will be changed, so + * will be the PHC index. + */ + HWTSTAMP_FLAG_BONDED_PHC_INDEX = (1<<0), + + HWTSTAMP_FLAG_LAST = HWTSTAMP_FLAG_BONDED_PHC_INDEX, + HWTSTAMP_FLAG_MASK = (HWTSTAMP_FLAG_LAST - 1) | HWTSTAMP_FLAG_LAST +}; + /* possible values for hwtstamp_config->tx_type */ enum hwtstamp_tx_types { /* diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 1d309a666932..1b807d119da5 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -192,7 +192,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr) if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; - if (cfg.flags) /* reserved for future extensions */ + if (cfg.flags & ~HWTSTAMP_FLAG_MASK) return -EINVAL; tx_type = cfg.tx_type; -- cgit v1.2.3 From d61fd650e9d206a71fda789f02a1ced4b19944c4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:29 +0200 Subject: fanotify: introduce group flag FAN_REPORT_TARGET_FID FAN_REPORT_FID is ambiguous in that it reports the fid of the child for some events and the fid of the parent for create/delete/move events. The new FAN_REPORT_TARGET_FID flag is an implicit request to report the fid of the target object of the operation (a.k.a the child inode) also in create/delete/move events in addition to the fid of the parent and the name of the child. To reduce the test matrix for uninteresting use cases, the new FAN_REPORT_TARGET_FID flag requires both FAN_REPORT_NAME and FAN_REPORT_FID. The convenience macro FAN_REPORT_DFID_NAME_TARGET combines FAN_REPORT_TARGET_FID with all the required flags. Link: https://lore.kernel.org/r/20211129201537.1932819-4-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 48 ++++++++++++++++++++++++++++---------- fs/notify/fanotify/fanotify_user.c | 11 ++++++++- include/linux/fanotify.h | 2 +- include/uapi/linux/fanotify.h | 4 ++++ 4 files changed, 51 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 652fe84cb8ac..85e542b164c8 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -458,17 +458,41 @@ out_err: } /* - * The inode to use as identifier when reporting fid depends on the event. - * Report the modified directory inode on dirent modification events. - * Report the "victim" inode otherwise. + * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for + * some events and the fid of the parent for create/delete/move events. + * + * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported + * also in create/delete/move events in addition to the fid of the parent + * and the name of the child. + */ +static inline bool fanotify_report_child_fid(unsigned int fid_mode, u32 mask) +{ + if (mask & ALL_FSNOTIFY_DIRENT_EVENTS) + return (fid_mode & FAN_REPORT_TARGET_FID); + + return (fid_mode & FAN_REPORT_FID) && !(mask & FAN_ONDIR); +} + +/* + * The inode to use as identifier when reporting fid depends on the event + * and the group flags. + * + * With the group flag FAN_REPORT_TARGET_FID, always report the child fid. + * + * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory + * fid on dirent events and the child fid otherwise. + * * For example: - * FS_ATTRIB reports the child inode even if reported on a watched parent. - * FS_CREATE reports the modified dir inode and not the created inode. + * FS_ATTRIB reports the child fid even if reported on a watched parent. + * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID. + * and reports the created child fid with FAN_REPORT_TARGET_FID. */ static struct inode *fanotify_fid_inode(u32 event_mask, const void *data, - int data_type, struct inode *dir) + int data_type, struct inode *dir, + unsigned int fid_mode) { - if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) + if ((event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) && + !(fid_mode & FAN_REPORT_TARGET_FID)) return dir; return fsnotify_data_inode(data, data_type); @@ -647,10 +671,11 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, { struct fanotify_event *event = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; - struct inode *id = fanotify_fid_inode(mask, data, data_type, dir); + unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); + struct inode *id = fanotify_fid_inode(mask, data, data_type, dir, + fid_mode); struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); const struct path *path = fsnotify_data_path(data, data_type); - unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); struct mem_cgroup *old_memcg; struct inode *child = NULL; bool name_event = false; @@ -660,11 +685,10 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { /* - * With both flags FAN_REPORT_DIR_FID and FAN_REPORT_FID, we - * report the child fid for events reported on a non-dir child + * For certain events and group flags, report the child fid * in addition to reporting the parent fid and maybe child name. */ - if ((fid_mode & FAN_REPORT_FID) && id != dirid && !ondir) + if (fanotify_report_child_fid(fid_mode, mask) && id != dirid) child = id; id = dirid; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index ab24c37f1fdf..00bbc29712bb 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1275,6 +1275,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID)) return -EINVAL; + /* + * FAN_REPORT_TARGET_FID requires FAN_REPORT_NAME and FAN_REPORT_FID + * and is used as an indication to report both dir and child fid on all + * dirent events. + */ + if ((fid_mode & FAN_REPORT_TARGET_FID) && + (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) + return -EINVAL; + f_flags = O_RDWR | FMODE_NONOTIFY; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; @@ -1667,7 +1676,7 @@ static int __init fanotify_user_setup(void) FANOTIFY_DEFAULT_MAX_USER_MARKS); BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 11); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 616af2ea20f3..376e050e6f38 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ #define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES) -#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) +#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) #define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index bd1932c2074d..60f73639a896 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -57,9 +57,13 @@ #define FAN_REPORT_FID 0x00000200 /* Report unique file id */ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ -- cgit v1.2.3 From 3982534ba5ce45e890b2f5ef5e7372c1accd14c7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:34 +0200 Subject: fanotify: record old and new parent and name in FAN_RENAME event In the special case of FAN_RENAME event, we record both the old and new parent and name. Link: https://lore.kernel.org/r/20211129201537.1932819-9-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 42 ++++++++++++++++++++++++++++++++++++++---- include/uapi/linux/fanotify.h | 2 ++ 2 files changed, 40 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 5f184b2d6ea7..db81eab90544 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -592,21 +592,28 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir, __kernel_fsid_t *fsid, const struct qstr *name, struct inode *child, + struct dentry *moved, unsigned int *hash, gfp_t gfp) { struct fanotify_name_event *fne; struct fanotify_info *info; struct fanotify_fh *dfh, *ffh; + struct inode *dir2 = moved ? d_inode(moved->d_parent) : NULL; + const struct qstr *name2 = moved ? &moved->d_name : NULL; unsigned int dir_fh_len = fanotify_encode_fh_len(dir); + unsigned int dir2_fh_len = fanotify_encode_fh_len(dir2); unsigned int child_fh_len = fanotify_encode_fh_len(child); unsigned long name_len = name ? name->len : 0; + unsigned long name2_len = name2 ? name2->len : 0; unsigned int len, size; /* Reserve terminating null byte even for empty name */ - size = sizeof(*fne) + name_len + 1; + size = sizeof(*fne) + name_len + name2_len + 2; if (dir_fh_len) size += FANOTIFY_FH_HDR_LEN + dir_fh_len; + if (dir2_fh_len) + size += FANOTIFY_FH_HDR_LEN + dir2_fh_len; if (child_fh_len) size += FANOTIFY_FH_HDR_LEN + child_fh_len; fne = kmalloc(size, gfp); @@ -623,6 +630,11 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir, len = fanotify_encode_fh(dfh, dir, dir_fh_len, hash, 0); fanotify_info_set_dir_fh(info, len); } + if (dir2_fh_len) { + dfh = fanotify_info_dir2_fh(info); + len = fanotify_encode_fh(dfh, dir2, dir2_fh_len, hash, 0); + fanotify_info_set_dir2_fh(info, len); + } if (child_fh_len) { ffh = fanotify_info_file_fh(info); len = fanotify_encode_fh(ffh, child, child_fh_len, hash, 0); @@ -632,11 +644,22 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir, fanotify_info_copy_name(info, name); *hash ^= full_name_hash((void *)name_len, name->name, name_len); } + if (name2_len) { + fanotify_info_copy_name2(info, name2); + *hash ^= full_name_hash((void *)name2_len, name2->name, + name2_len); + } pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", __func__, size, dir_fh_len, child_fh_len, info->name_len, info->name_len, fanotify_info_name(info)); + if (dir2_fh_len) { + pr_debug("%s: dir2_fh_len=%u name2_len=%u name2='%.*s'\n", + __func__, dir2_fh_len, info->name2_len, + info->name2_len, fanotify_info_name2(info)); + } + return &fne->fae; } @@ -692,6 +715,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); const struct path *path = fsnotify_data_path(data, data_type); struct mem_cgroup *old_memcg; + struct dentry *moved = NULL; struct inode *child = NULL; bool name_event = false; unsigned int hash = 0; @@ -727,6 +751,15 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) { name_event = true; } + + /* + * In the special case of FAN_RENAME event, we record both + * old and new parent+name. + * 'dirid' and 'file_name' are the old parent+name and + * 'moved' has the new parent+name. + */ + if (mask & FAN_RENAME) + moved = fsnotify_data_dentry(data, data_type); } /* @@ -748,9 +781,9 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, } else if (fanotify_is_error_event(mask)) { event = fanotify_alloc_error_event(group, fsid, data, data_type, &hash); - } else if (name_event && (file_name || child)) { - event = fanotify_alloc_name_event(id, fsid, file_name, child, - &hash, gfp); + } else if (name_event && (file_name || moved || child)) { + event = fanotify_alloc_name_event(dirid, fsid, file_name, child, + moved, &hash, gfp); } else if (fid_mode) { event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); } else { @@ -860,6 +893,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); + BUILD_BUG_ON(FAN_RENAME != FS_RENAME); BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20); diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 60f73639a896..9d0e2dc5767b 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -28,6 +28,8 @@ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ +#define FAN_RENAME 0x10000000 /* File was renamed */ + #define FAN_ONDIR 0x40000000 /* Event occurred against dir */ /* helper events */ -- cgit v1.2.3 From 7326e382c21e9c23c89c88369afdc90b82a14da8 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 29 Nov 2021 22:15:36 +0200 Subject: fanotify: report old and/or new parent+name in FAN_RENAME event In the special case of FAN_RENAME event, we report old or new or both old and new parent+name. A single info record will be reported if either the old or new dir is watched and two records will be reported if both old and new dir (or their filesystem) are watched. The old and new parent+name are reported using new info record types FAN_EVENT_INFO_TYPE_{OLD,NEW}_DFID_NAME, so if a single info record is reported, it is clear to the application, to which dir entry the fid+name info is referring to. Link: https://lore.kernel.org/r/20211129201537.1932819-11-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 7 +++++ fs/notify/fanotify/fanotify.h | 18 +++++++++++++ fs/notify/fanotify/fanotify_user.c | 52 ++++++++++++++++++++++++++++++++++---- include/uapi/linux/fanotify.h | 6 +++++ 4 files changed, 78 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 14bc0f12cc9f..0da305b6f3e2 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -153,6 +153,13 @@ static bool fanotify_should_merge(struct fanotify_event *old, if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) return false; + /* + * FAN_RENAME event is reported with special info record types, + * so we cannot merge it with other events. + */ + if ((old->mask & FAN_RENAME) != (new->mask & FAN_RENAME)) + return false; + switch (old->type) { case FANOTIFY_EVENT_TYPE_PATH: return fanotify_path_equal(fanotify_event_path(old), diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 8fa3bc0effd4..a3d5b751cac5 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -373,6 +373,13 @@ static inline int fanotify_event_dir_fh_len(struct fanotify_event *event) return info ? fanotify_info_dir_fh_len(info) : 0; } +static inline int fanotify_event_dir2_fh_len(struct fanotify_event *event) +{ + struct fanotify_info *info = fanotify_event_info(event); + + return info ? fanotify_info_dir2_fh_len(info) : 0; +} + static inline bool fanotify_event_has_object_fh(struct fanotify_event *event) { /* For error events, even zeroed fh are reported. */ @@ -386,6 +393,17 @@ static inline bool fanotify_event_has_dir_fh(struct fanotify_event *event) return fanotify_event_dir_fh_len(event) > 0; } +static inline bool fanotify_event_has_dir2_fh(struct fanotify_event *event) +{ + return fanotify_event_dir2_fh_len(event) > 0; +} + +static inline bool fanotify_event_has_any_dir_fh(struct fanotify_event *event) +{ + return fanotify_event_has_dir_fh(event) || + fanotify_event_has_dir2_fh(event); +} + struct fanotify_path_event { struct fanotify_event fae; struct path path; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index e4a11f56782d..eb2a0251b318 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -129,12 +129,29 @@ static int fanotify_fid_info_len(int fh_len, int name_len) FANOTIFY_EVENT_ALIGN); } +/* FAN_RENAME may have one or two dir+name info records */ +static int fanotify_dir_name_info_len(struct fanotify_event *event) +{ + struct fanotify_info *info = fanotify_event_info(event); + int dir_fh_len = fanotify_event_dir_fh_len(event); + int dir2_fh_len = fanotify_event_dir2_fh_len(event); + int info_len = 0; + + if (dir_fh_len) + info_len += fanotify_fid_info_len(dir_fh_len, + info->name_len); + if (dir2_fh_len) + info_len += fanotify_fid_info_len(dir2_fh_len, + info->name2_len); + + return info_len; +} + static size_t fanotify_event_len(unsigned int info_mode, struct fanotify_event *event) { size_t event_len = FAN_EVENT_METADATA_LEN; struct fanotify_info *info; - int dir_fh_len; int fh_len; int dot_len = 0; @@ -146,9 +163,8 @@ static size_t fanotify_event_len(unsigned int info_mode, info = fanotify_event_info(event); - if (fanotify_event_has_dir_fh(event)) { - dir_fh_len = fanotify_event_dir_fh_len(event); - event_len += fanotify_fid_info_len(dir_fh_len, info->name_len); + if (fanotify_event_has_any_dir_fh(event)) { + event_len += fanotify_dir_name_info_len(event); } else if ((info_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) { /* @@ -379,6 +395,8 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, return -EFAULT; break; case FAN_EVENT_INFO_TYPE_DFID_NAME: + case FAN_EVENT_INFO_TYPE_OLD_DFID_NAME: + case FAN_EVENT_INFO_TYPE_NEW_DFID_NAME: if (WARN_ON_ONCE(!name || !name_len)) return -EFAULT; break; @@ -478,11 +496,19 @@ static int copy_info_records_to_user(struct fanotify_event *event, unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; /* - * Event info records order is as follows: dir fid + name, child fid. + * Event info records order is as follows: + * 1. dir fid + name + * 2. (optional) new dir fid + new name + * 3. (optional) child fid */ if (fanotify_event_has_dir_fh(event)) { info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : FAN_EVENT_INFO_TYPE_DFID; + + /* FAN_RENAME uses special info types */ + if (event->mask & FAN_RENAME) + info_type = FAN_EVENT_INFO_TYPE_OLD_DFID_NAME; + ret = copy_fid_info_to_user(fanotify_event_fsid(event), fanotify_info_dir_fh(info), info_type, @@ -496,6 +522,22 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + /* New dir fid+name may be reported in addition to old dir fid+name */ + if (fanotify_event_has_dir2_fh(event)) { + info_type = FAN_EVENT_INFO_TYPE_NEW_DFID_NAME; + ret = copy_fid_info_to_user(fanotify_event_fsid(event), + fanotify_info_dir2_fh(info), + info_type, + fanotify_info_name2(info), + info->name2_len, buf, count); + if (ret < 0) + return ret; + + buf += ret; + count -= ret; + total_bytes += ret; + } + if (fanotify_event_has_object_fh(event)) { const char *dot = NULL; int dot_len = 0; diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 9d0e2dc5767b..e8ac38cc2fd6 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -134,6 +134,12 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_PIDFD 4 #define FAN_EVENT_INFO_TYPE_ERROR 5 +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ + /* Variable length info record following event metadata */ struct fanotify_event_info_header { __u8 info_type; -- cgit v1.2.3 From 7adc576512110ef32b0424a727ee1d04359fc205 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 17 Dec 2021 19:16:23 +0100 Subject: flow_offload: add skip_hw and skip_sw to control if offload the action We add skip_hw and skip_sw for user to control if offload the action to hardware. We also add in_hw_count for user to indicate if the action is offloaded to any hardware. Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 1 + include/uapi/linux/pkt_cls.h | 9 +++-- net/sched/act_api.c | 83 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 84 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/act_api.h b/include/net/act_api.h index b418bb0e44e0..15c6a881817d 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -44,6 +44,7 @@ struct tc_action { u8 hw_stats; u8 used_hw_stats; bool used_hw_stats_valid; + u32 in_hw_count; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 6836ccb9c45d..ee38b35c3f57 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -19,13 +19,16 @@ enum { TCA_ACT_FLAGS, TCA_ACT_HW_STATS, TCA_ACT_USED_HW_STATS, + TCA_ACT_IN_HW_COUNT, __TCA_ACT_MAX }; /* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */ -#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for - * actions stats. - */ +#define TCA_ACT_FLAGS_NO_PERCPU_STATS (1 << 0) /* Don't use percpu allocator for + * actions stats. + */ +#define TCA_ACT_FLAGS_SKIP_HW (1 << 1) /* don't offload action to HW */ +#define TCA_ACT_FLAGS_SKIP_SW (1 << 2) /* don't use action in SW */ /* tca HW stats type * When user does not pass the attribute, he does not care. diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 5c21401b0555..d446e89ececc 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -131,6 +131,12 @@ static void free_tcf(struct tc_action *p) kfree(p); } +static void offload_action_hw_count_set(struct tc_action *act, + u32 hw_count) +{ + act->in_hw_count = hw_count; +} + static unsigned int tcf_offload_act_num_actions_single(struct tc_action *act) { if (is_tcf_pedit(act)) @@ -139,6 +145,29 @@ static unsigned int tcf_offload_act_num_actions_single(struct tc_action *act) return 1; } +static bool tc_act_skip_hw(u32 flags) +{ + return (flags & TCA_ACT_FLAGS_SKIP_HW) ? true : false; +} + +static bool tc_act_skip_sw(u32 flags) +{ + return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false; +} + +static bool tc_act_in_hw(struct tc_action *act) +{ + return !!act->in_hw_count; +} + +/* SKIP_HW and SKIP_SW are mutually exclusive flags. */ +static bool tc_act_flags_valid(u32 flags) +{ + flags &= TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW; + + return flags ^ (TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW); +} + static int offload_action_init(struct flow_offload_action *fl_action, struct tc_action *act, enum offload_act_command cmd, @@ -155,6 +184,7 @@ static int offload_action_init(struct flow_offload_action *fl_action, } static int tcf_action_offload_cmd(struct flow_offload_action *fl_act, + u32 *hw_count, struct netlink_ext_ack *extack) { int err; @@ -164,6 +194,9 @@ static int tcf_action_offload_cmd(struct flow_offload_action *fl_act, if (err < 0) return err; + if (hw_count) + *hw_count = err; + return 0; } @@ -171,12 +204,17 @@ static int tcf_action_offload_cmd(struct flow_offload_action *fl_act, static int tcf_action_offload_add(struct tc_action *action, struct netlink_ext_ack *extack) { + bool skip_sw = tc_act_skip_sw(action->tcfa_flags); struct tc_action *actions[TCA_ACT_MAX_PRIO] = { [0] = action, }; struct flow_offload_action *fl_action; + u32 in_hw_count = 0; int num, err = 0; + if (tc_act_skip_hw(action->tcfa_flags)) + return 0; + num = tcf_offload_act_num_actions_single(action); fl_action = offload_action_alloc(num); if (!fl_action) @@ -193,7 +231,13 @@ static int tcf_action_offload_add(struct tc_action *action, goto fl_err; } - err = tcf_action_offload_cmd(fl_action, extack); + err = tcf_action_offload_cmd(fl_action, &in_hw_count, extack); + if (!err) + offload_action_hw_count_set(action, in_hw_count); + + if (skip_sw && !tc_act_in_hw(action)) + err = -EINVAL; + tc_cleanup_offload_action(&fl_action->action); fl_err: @@ -205,13 +249,24 @@ fl_err: static int tcf_action_offload_del(struct tc_action *action) { struct flow_offload_action fl_act = {}; + u32 in_hw_count = 0; int err = 0; + if (!tc_act_in_hw(action)) + return 0; + err = offload_action_init(&fl_act, action, FLOW_ACT_DESTROY, NULL); if (err) return err; - return tcf_action_offload_cmd(&fl_act, NULL); + err = tcf_action_offload_cmd(&fl_act, &in_hw_count, NULL); + if (err) + return err; + + if (action->in_hw_count != in_hw_count) + return -EINVAL; + + return 0; } static void tcf_action_cleanup(struct tc_action *p) @@ -821,6 +876,9 @@ restart_act_graph: jmp_prgcnt -= 1; continue; } + + if (tc_act_skip_sw(a->tcfa_flags)) + continue; repeat: ret = a->ops->act(skb, a, res); if (ret == TC_ACT_REPEAT) @@ -926,6 +984,9 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) a->tcfa_flags, a->tcfa_flags)) goto nla_put_failure; + if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) + goto nla_put_failure; + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; @@ -1005,7 +1066,9 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_COOKIE] = { .type = NLA_BINARY, .len = TC_COOKIE_MAX_SIZE }, [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, - [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS), + [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS | + TCA_ACT_FLAGS_SKIP_HW | + TCA_ACT_FLAGS_SKIP_SW), [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; @@ -1118,8 +1181,13 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, } } hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); - if (tb[TCA_ACT_FLAGS]) + if (tb[TCA_ACT_FLAGS]) { userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); + if (!tc_act_flags_valid(userflags.value)) { + err = -EINVAL; + goto err_out; + } + } err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp, userflags.value | flags, extack); @@ -1194,8 +1262,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, sz += tcf_action_fill_size(act); /* Start from index 0 */ actions[i - 1] = act; - if (!tc_act_bind(flags)) - tcf_action_offload_add(act, extack); + if (!tc_act_bind(flags)) { + err = tcf_action_offload_add(act, extack); + if (tc_act_skip_sw(act->tcfa_flags) && err) + goto err; + } } /* We have to commit them all together, because if any error happened in -- cgit v1.2.3 From 28f350a67d291575492057c92ceb8518ecbace95 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 29 Nov 2021 15:32:41 +0200 Subject: cfg80211: Fix order of enum nl80211_band_iftype_attr documentation And fix the comment. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.4ef43aff0c5d.I96dcb743bcd4f387ba4cfaa61987aeb642ad762b@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3e734826792f..bf69ecbc1c24 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3747,13 +3747,12 @@ enum nl80211_mpath_info { * capabilities IE * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as * defined in HE capabilities IE - * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently - * defined * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16), * given for all 6 GHz band channels * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are * advertised on this band/for this iftype (binary) * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use + * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined */ enum nl80211_band_iftype_attr { __NL80211_BAND_IFTYPE_ATTR_INVALID, -- cgit v1.2.3 From a083ee8a4e03348fb90a4b24cbe957b3252c7b04 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 29 Nov 2021 15:32:34 +0200 Subject: cfg80211: Add support for notifying association comeback Thought the underline driver MLME can handle association temporal rejection with comeback, it is still useful to notify this to user space, as user space might want to handle the temporal rejection differently. For example, in case the comeback time is too long, user space can deauthenticate immediately and try to associate with a different AP. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.2467809e8cb3.I45574185b582666bc78eef0c29a4c36b478e5382@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ include/uapi/linux/nl80211.h | 7 +++++++ net/wireless/nl80211.c | 38 ++++++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 17 +++++++++++++++++ 4 files changed, 74 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a887086cb103..c9a9073d4c2a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8287,6 +8287,18 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, bool is_4addr, u8 check_swif); +/** + * cfg80211_assoc_comeback - notification of association that was + * temporarly rejected with a comeback + * @netdev: network device + * @bss: the bss entry with which association is in progress. + * @timeout: timeout interval value TUs. + * + * this function may sleep. the caller must hold the corresponding wdev's mutex. + */ +void cfg80211_assoc_comeback(struct net_device *netdev, + struct cfg80211_bss *bss, u32 timeout); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index bf69ecbc1c24..a8e20d25252b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1232,6 +1232,11 @@ * &NL80211_ATTR_FILS_NONCES - for FILS Nonces * (STA Nonce 16 bytes followed by AP Nonce 16 bytes) * + * @NL80211_CMD_ASSOC_COMEBACK: notification about an association + * temporal rejection with comeback. The event includes %NL80211_ATTR_MAC + * to describe the BSSID address of the AP and %NL80211_ATTR_TIMEOUT to + * specify the timeout value. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1474,6 +1479,8 @@ enum nl80211_commands { NL80211_CMD_SET_FILS_AAD, + NL80211_CMD_ASSOC_COMEBACK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bfa5d7428a3f..71da0d506502 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -17064,6 +17064,44 @@ static void nl80211_send_remain_on_chan_event( nlmsg_free(msg); } +void cfg80211_assoc_comeback(struct net_device *netdev, + struct cfg80211_bss *bss, u32 timeout) +{ + struct wireless_dev *wdev = netdev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_assoc_comeback(wdev, bss->bssid, timeout); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ASSOC_COMEBACK); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bss->bssid) || + nla_put_u32(msg, NL80211_ATTR_TIMEOUT, timeout)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, GFP_KERNEL); + return; + + nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_assoc_comeback); + void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, unsigned int duration, gfp_t gfp) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index e854d52db1a6..01710f0c8a03 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3696,6 +3696,23 @@ TRACE_EVENT(rdev_set_radar_offchan, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_assoc_comeback, + TP_PROTO(struct wireless_dev *wdev, const u8 *bssid, u32 timeout), + TP_ARGS(wdev, bssid, timeout), + TP_STRUCT__entry( + WDEV_ENTRY + MAC_ENTRY(bssid) + __field(u32, timeout) + ), + TP_fast_assign( + WDEV_ASSIGN; + MAC_ASSIGN(bssid, bssid); + __entry->timeout = timeout; + ), + TP_printk(WDEV_PR_FMT ", " MAC_PR_FMT ", timeout: %u TUs", + WDEV_PR_ARG, MAC_PR_ARG(bssid), __entry->timeout) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From a95bfb876fa87e2d0fa718ee61a8030ddf162d2b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 29 Nov 2021 14:11:24 +0100 Subject: cfg80211: rename offchannel_chain structs to background_chain to avoid confusion with ETSI standard ETSI standard defines "Offchannel CAC" as: "Off-Channel CAC is performed by a number of non-continuous checks spread over a period in time. This period, which is required to determine the presence of radar signals, is defined as the Off-Channel CAC Time.. Minimum Off-Channel CAC Time 6 minutes and Maximum Off-Channel CAC Time 4 hours..". mac80211 implementation refers to a dedicated hw chain used for continuous radar monitoring. Rename offchannel_* references to background_* in order to avoid confusion with ETSI standard. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/4204cc1d648d76b44557981713231e030a3bd991.1638190762.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 20 +++++----- include/net/mac80211.h | 10 ++--- include/uapi/linux/nl80211.h | 14 +++---- net/mac80211/cfg.c | 10 ++--- net/wireless/chan.c | 6 +-- net/wireless/core.c | 13 ++++--- net/wireless/core.h | 20 +++++----- net/wireless/mlme.c | 89 ++++++++++++++++++++++---------------------- net/wireless/nl80211.c | 8 ++-- net/wireless/rdev-ops.h | 10 ++--- net/wireless/trace.h | 2 +- 11 files changed, 102 insertions(+), 100 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c9a9073d4c2a..e29d904eccff 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4073,14 +4073,14 @@ struct mgmt_frame_regs { * those to decrypt (Re)Association Request and encrypt (Re)Association * Response frame. * - * @set_radar_offchan: Configure dedicated offchannel chain available for + * @set_radar_background: Configure dedicated offchannel chain available for * radar/CAC detection on some hw. This chain can't be used to transmit * or receive frames and it is bounded to a running wdev. - * Offchannel radar/CAC detection allows to avoid the CAC downtime + * Background radar/CAC detection allows to avoid the CAC downtime * switching to a different channel during CAC detection on the selected * radar channel. * The caller is expected to set chandef pointer to NULL in order to - * disable offchannel CAC/radar detection. + * disable background CAC/radar detection. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4413,8 +4413,8 @@ struct cfg80211_ops { struct cfg80211_color_change_settings *params); int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_fils_aad *fils_aad); - int (*set_radar_offchan)(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef); + int (*set_radar_background)(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef); }; /* @@ -7626,9 +7626,9 @@ cfg80211_radar_event(struct wiphy *wiphy, } static inline void -cfg80211_offchan_radar_event(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - gfp_t gfp) +cfg80211_background_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) { __cfg80211_radar_event(wiphy, chandef, true, gfp); } @@ -7663,13 +7663,13 @@ void cfg80211_cac_event(struct net_device *netdev, enum nl80211_radar_event event, gfp_t gfp); /** - * cfg80211_offchan_cac_abort - Channel Availability Check offchan abort event + * cfg80211_background_cac_abort - Channel Availability Check offchan abort event * @wiphy: the wiphy * * This function is called by the driver when a Channel Availability Check * (CAC) is aborted by a offchannel dedicated chain. */ -void cfg80211_offchan_cac_abort(struct wiphy *wiphy); +void cfg80211_background_cac_abort(struct wiphy *wiphy); /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 28e66cdae3ec..8907338d52b5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3939,14 +3939,14 @@ struct ieee80211_prep_tx_info { * twt structure. * @twt_teardown_request: Update the hw with TWT teardown request received * from the peer. - * @set_radar_offchan: Configure dedicated offchannel chain available for + * @set_radar_background: Configure dedicated offchannel chain available for * radar/CAC detection on some hw. This chain can't be used to transmit * or receive frames and it is bounded to a running wdev. - * Offchannel radar/CAC detection allows to avoid the CAC downtime + * Background radar/CAC detection allows to avoid the CAC downtime * switching to a different channel during CAC detection on the selected * radar channel. * The caller is expected to set chandef pointer to NULL in order to - * disable offchannel CAC/radar detection. + * disable background CAC/radar detection. * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to * resolve a path for hardware flow offloading */ @@ -4277,8 +4277,8 @@ struct ieee80211_ops { struct ieee80211_twt_setup *twt); void (*twt_teardown_request)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, u8 flowid); - int (*set_radar_offchan)(struct ieee80211_hw *hw, - struct cfg80211_chan_def *chandef); + int (*set_radar_background)(struct ieee80211_hw *hw, + struct cfg80211_chan_def *chandef); int (*net_fill_forward_path)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a8e20d25252b..5ce20fb44f24 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2646,10 +2646,10 @@ enum nl80211_commands { * Mandatory parameter for the transmitting interface to enable MBSSID. * Optional for the non-transmitting interfaces. * - * @NL80211_ATTR_RADAR_OFFCHAN: Configure dedicated offchannel chain available for - * radar/CAC detection on some hw. This chain can't be used to transmit - * or receive frames and it is bounded to a running wdev. - * Offchannel radar/CAC detection allows to avoid the CAC downtime + * @NL80211_ATTR_RADAR_BACKGROUND: Configure dedicated offchannel chain + * available for radar/CAC detection on some hw. This chain can't be used + * to transmit or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime * switching on a different channel during CAC detection on the selected * radar channel. * @@ -3159,7 +3159,7 @@ enum nl80211_attrs { NL80211_ATTR_MBSSID_CONFIG, NL80211_ATTR_MBSSID_ELEMS, - NL80211_ATTR_RADAR_OFFCHAN, + NL80211_ATTR_RADAR_BACKGROUND, /* add attributes here, update the policy in nl80211.c */ @@ -6066,7 +6066,7 @@ enum nl80211_feature_flags { * frames. Userspace has to share FILS AAD details to the driver by using * @NL80211_CMD_SET_FILS_AAD. * - * @NL80211_EXT_FEATURE_RADAR_OFFCHAN: Device supports offchannel radar/CAC + * @NL80211_EXT_FEATURE_RADAR_BACKGROUND: Device supports background radar/CAC * detection. * * @NUM_NL80211_EXT_FEATURES: number of extended features. @@ -6135,7 +6135,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, NL80211_EXT_FEATURE_BSS_COLOR, NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, - NL80211_EXT_FEATURE_RADAR_OFFCHAN, + NL80211_EXT_FEATURE_RADAR_BACKGROUND, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8f15d9f30aac..dc3746d5cdc1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4414,15 +4414,15 @@ out: } static int -ieee80211_set_radar_offchan(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef) +ieee80211_set_radar_background(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); - if (!local->ops->set_radar_offchan) + if (!local->ops->set_radar_background) return -EOPNOTSUPP; - return local->ops->set_radar_offchan(&local->hw, chandef); + return local->ops->set_radar_background(&local->hw, chandef); } const struct cfg80211_ops mac80211_config_ops = { @@ -4529,5 +4529,5 @@ const struct cfg80211_ops mac80211_config_ops = { .reset_tid_config = ieee80211_reset_tid_config, .set_sar_specs = ieee80211_set_sar_specs, .color_change = ieee80211_color_change, - .set_radar_offchan = ieee80211_set_radar_offchan, + .set_radar_background = ieee80211_set_radar_background, }; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index ac2e5677524a..eb822052d344 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -718,13 +718,13 @@ static bool cfg80211_offchan_chain_is_active(struct cfg80211_registered_device *rdev, struct ieee80211_channel *channel) { - if (!rdev->offchan_radar_wdev) + if (!rdev->background_radar_wdev) return false; - if (!cfg80211_chandef_valid(&rdev->offchan_radar_chandef)) + if (!cfg80211_chandef_valid(&rdev->background_radar_chandef)) return false; - return cfg80211_is_sub_chan(&rdev->offchan_radar_chandef, channel); + return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel); } bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, diff --git a/net/wireless/core.c b/net/wireless/core.c index c4ea903f8184..132c575c5540 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -545,9 +545,10 @@ use_default_name: INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); - INIT_WORK(&rdev->offchan_cac_abort_wk, cfg80211_offchan_cac_abort_wk); - INIT_DELAYED_WORK(&rdev->offchan_cac_done_wk, - cfg80211_offchan_cac_done_wk); + INIT_WORK(&rdev->background_cac_abort_wk, + cfg80211_background_cac_abort_wk); + INIT_DELAYED_WORK(&rdev->background_cac_done_wk, + cfg80211_background_cac_done_wk); init_waitqueue_head(&rdev->dev_wait); @@ -1057,13 +1058,13 @@ void wiphy_unregister(struct wiphy *wiphy) cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); - cancel_delayed_work_sync(&rdev->offchan_cac_done_wk); + cancel_delayed_work_sync(&rdev->background_cac_done_wk); flush_work(&rdev->destroy_work); flush_work(&rdev->sched_scan_stop_wk); flush_work(&rdev->propagate_radar_detect_wk); flush_work(&rdev->propagate_cac_done_wk); flush_work(&rdev->mgmt_registrations_update_wk); - flush_work(&rdev->offchan_cac_abort_wk); + flush_work(&rdev->background_cac_abort_wk); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -1212,7 +1213,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, cfg80211_pmsr_wdev_down(wdev); - cfg80211_stop_offchan_radar_detection(wdev); + cfg80211_stop_background_radar_detection(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: diff --git a/net/wireless/core.h b/net/wireless/core.h index fb8d9006d838..3a7dbd63d8c6 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -84,10 +84,10 @@ struct cfg80211_registered_device { struct delayed_work dfs_update_channels_wk; - struct wireless_dev *offchan_radar_wdev; - struct cfg80211_chan_def offchan_radar_chandef; - struct delayed_work offchan_cac_done_wk; - struct work_struct offchan_cac_abort_wk; + struct wireless_dev *background_radar_wdev; + struct cfg80211_chan_def background_radar_chandef; + struct delayed_work background_cac_done_wk; + struct work_struct background_cac_abort_wk; /* netlink port which started critical protocol (0 means not started) */ u32 crit_proto_nlportid; @@ -497,15 +497,15 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); int -cfg80211_start_offchan_radar_detection(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - struct cfg80211_chan_def *chandef); +cfg80211_start_background_radar_detection(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_chan_def *chandef); -void cfg80211_stop_offchan_radar_detection(struct wireless_dev *wdev); +void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev); -void cfg80211_offchan_cac_done_wk(struct work_struct *work); +void cfg80211_background_cac_done_wk(struct work_struct *work); -void cfg80211_offchan_cac_abort_wk(struct work_struct *work); +void cfg80211_background_cac_abort_wk(struct work_struct *work); bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index e970076e1098..c8155a483ec2 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -920,7 +920,7 @@ void __cfg80211_radar_event(struct wiphy *wiphy, cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); if (offchan) - queue_work(cfg80211_wq, &rdev->offchan_cac_abort_wk); + queue_work(cfg80211_wq, &rdev->background_cac_abort_wk); cfg80211_sched_dfs_chan_update(rdev); @@ -975,10 +975,10 @@ void cfg80211_cac_event(struct net_device *netdev, EXPORT_SYMBOL(cfg80211_cac_event); static void -__cfg80211_offchan_cac_event(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - const struct cfg80211_chan_def *chandef, - enum nl80211_radar_event event) +__cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + const struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event) { struct wiphy *wiphy = &rdev->wiphy; struct net_device *netdev; @@ -988,7 +988,7 @@ __cfg80211_offchan_cac_event(struct cfg80211_registered_device *rdev, if (!cfg80211_chandef_valid(chandef)) return; - if (!rdev->offchan_radar_wdev) + if (!rdev->background_radar_wdev) return; switch (event) { @@ -997,12 +997,12 @@ __cfg80211_offchan_cac_event(struct cfg80211_registered_device *rdev, memcpy(&rdev->cac_done_chandef, chandef, sizeof(*chandef)); queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk); cfg80211_sched_dfs_chan_update(rdev); - wdev = rdev->offchan_radar_wdev; + wdev = rdev->background_radar_wdev; break; case NL80211_RADAR_CAC_ABORTED: - if (!cancel_delayed_work(&rdev->offchan_cac_done_wk)) + if (!cancel_delayed_work(&rdev->background_cac_done_wk)) return; - wdev = rdev->offchan_radar_wdev; + wdev = rdev->background_radar_wdev; break; case NL80211_RADAR_CAC_STARTED: break; @@ -1015,49 +1015,49 @@ __cfg80211_offchan_cac_event(struct cfg80211_registered_device *rdev, } static void -cfg80211_offchan_cac_event(struct cfg80211_registered_device *rdev, - const struct cfg80211_chan_def *chandef, - enum nl80211_radar_event event) +cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, + const struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event) { wiphy_lock(&rdev->wiphy); - __cfg80211_offchan_cac_event(rdev, rdev->offchan_radar_wdev, - chandef, event); + __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, + chandef, event); wiphy_unlock(&rdev->wiphy); } -void cfg80211_offchan_cac_done_wk(struct work_struct *work) +void cfg80211_background_cac_done_wk(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct cfg80211_registered_device *rdev; rdev = container_of(delayed_work, struct cfg80211_registered_device, - offchan_cac_done_wk); - cfg80211_offchan_cac_event(rdev, &rdev->offchan_radar_chandef, - NL80211_RADAR_CAC_FINISHED); + background_cac_done_wk); + cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, + NL80211_RADAR_CAC_FINISHED); } -void cfg80211_offchan_cac_abort_wk(struct work_struct *work) +void cfg80211_background_cac_abort_wk(struct work_struct *work) { struct cfg80211_registered_device *rdev; rdev = container_of(work, struct cfg80211_registered_device, - offchan_cac_abort_wk); - cfg80211_offchan_cac_event(rdev, &rdev->offchan_radar_chandef, - NL80211_RADAR_CAC_ABORTED); + background_cac_abort_wk); + cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, + NL80211_RADAR_CAC_ABORTED); } -void cfg80211_offchan_cac_abort(struct wiphy *wiphy) +void cfg80211_background_cac_abort(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - queue_work(cfg80211_wq, &rdev->offchan_cac_abort_wk); + queue_work(cfg80211_wq, &rdev->background_cac_abort_wk); } -EXPORT_SYMBOL(cfg80211_offchan_cac_abort); +EXPORT_SYMBOL(cfg80211_background_cac_abort); int -cfg80211_start_offchan_radar_detection(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - struct cfg80211_chan_def *chandef) +cfg80211_start_background_radar_detection(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_chan_def *chandef) { unsigned int cac_time_ms; int err; @@ -1065,19 +1065,19 @@ cfg80211_start_offchan_radar_detection(struct cfg80211_registered_device *rdev, lockdep_assert_wiphy(&rdev->wiphy); if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_RADAR_OFFCHAN)) + NL80211_EXT_FEATURE_RADAR_BACKGROUND)) return -EOPNOTSUPP; /* Offchannel chain already locked by another wdev */ - if (rdev->offchan_radar_wdev && rdev->offchan_radar_wdev != wdev) + if (rdev->background_radar_wdev && rdev->background_radar_wdev != wdev) return -EBUSY; /* CAC already in progress on the offchannel chain */ - if (rdev->offchan_radar_wdev == wdev && - delayed_work_pending(&rdev->offchan_cac_done_wk)) + if (rdev->background_radar_wdev == wdev && + delayed_work_pending(&rdev->background_cac_done_wk)) return -EBUSY; - err = rdev_set_radar_offchan(rdev, chandef); + err = rdev_set_radar_background(rdev, chandef); if (err) return err; @@ -1085,30 +1085,31 @@ cfg80211_start_offchan_radar_detection(struct cfg80211_registered_device *rdev, if (!cac_time_ms) cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; - rdev->offchan_radar_chandef = *chandef; - rdev->offchan_radar_wdev = wdev; /* Get offchain ownership */ + rdev->background_radar_chandef = *chandef; + rdev->background_radar_wdev = wdev; /* Get offchain ownership */ - __cfg80211_offchan_cac_event(rdev, wdev, chandef, - NL80211_RADAR_CAC_STARTED); - queue_delayed_work(cfg80211_wq, &rdev->offchan_cac_done_wk, + __cfg80211_background_cac_event(rdev, wdev, chandef, + NL80211_RADAR_CAC_STARTED); + queue_delayed_work(cfg80211_wq, &rdev->background_cac_done_wk, msecs_to_jiffies(cac_time_ms)); return 0; } -void cfg80211_stop_offchan_radar_detection(struct wireless_dev *wdev) +void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); lockdep_assert_wiphy(wiphy); - if (wdev != rdev->offchan_radar_wdev) + if (wdev != rdev->background_radar_wdev) return; - rdev_set_radar_offchan(rdev, NULL); - rdev->offchan_radar_wdev = NULL; /* Release offchain ownership */ + rdev_set_radar_background(rdev, NULL); + rdev->background_radar_wdev = NULL; /* Release offchain ownership */ - __cfg80211_offchan_cac_event(rdev, wdev, &rdev->offchan_radar_chandef, - NL80211_RADAR_CAC_ABORTED); + __cfg80211_background_cac_event(rdev, wdev, + &rdev->background_radar_chandef, + NL80211_RADAR_CAC_ABORTED); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 71da0d506502..6ee21049b028 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -776,7 +776,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MBSSID_CONFIG] = NLA_POLICY_NESTED(nl80211_mbssid_config_policy), [NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED }, - [NL80211_ATTR_RADAR_OFFCHAN] = { .type = NLA_FLAG }, + [NL80211_ATTR_RADAR_BACKGROUND] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -9305,9 +9305,9 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, goto unlock; } - if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_OFFCHAN])) { - err = cfg80211_start_offchan_radar_detection(rdev, wdev, - &chandef); + if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_BACKGROUND])) { + err = cfg80211_start_background_radar_detection(rdev, wdev, + &chandef); goto unlock; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 8672b3ef99e4..439bcf52369c 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1396,17 +1396,17 @@ rdev_set_fils_aad(struct cfg80211_registered_device *rdev, } static inline int -rdev_set_radar_offchan(struct cfg80211_registered_device *rdev, - struct cfg80211_chan_def *chandef) +rdev_set_radar_background(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef) { struct wiphy *wiphy = &rdev->wiphy; int ret; - if (!rdev->ops->set_radar_offchan) + if (!rdev->ops->set_radar_background) return -EOPNOTSUPP; - trace_rdev_set_radar_offchan(wiphy, chandef); - ret = rdev->ops->set_radar_offchan(wiphy, chandef); + trace_rdev_set_radar_background(wiphy, chandef); + ret = rdev->ops->set_radar_background(wiphy, chandef); trace_rdev_return_int(wiphy, ret); return ret; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 01710f0c8a03..228079d7690a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3677,7 +3677,7 @@ TRACE_EVENT(cfg80211_bss_color_notify, __entry->color_bitmap) ); -TRACE_EVENT(rdev_set_radar_offchan, +TRACE_EVENT(rdev_set_radar_background, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), TP_ARGS(wiphy, chandef), -- cgit v1.2.3 From 47301a74bbfa80cef876e646a8c5fec03c20fc8d Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Fri, 26 Nov 2021 12:55:18 +0530 Subject: nl80211: Add support to set AP settings flags with single attribute In previous method each AP settings flag is represented by a top-level flag attribute and conversion to enum cfg80211_ap_settings_flags had to be done before sending them to driver. This commit is to make it easier to define new AP settings flags and sending them to driver. This commit also deprecate sending of %NL80211_ATTR_EXTERNAL_AUTH_SUPPORT in %NL80211_CMD_START_AP. But to maintain backwards compatibility checks for %NL80211_ATTR_EXTERNAL_AUTH_SUPPORT in %NL80211_CMD_START_AP when %NL80211_ATTR_AP_SETTINGS_FLAGS not present in %NL80211_CMD_START_AP. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1637911519-21306-1-git-send-email-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 ----------- include/uapi/linux/nl80211.h | 20 +++++++++++++++++++- net/wireless/nl80211.c | 8 ++++++-- 3 files changed, 25 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b59baf9dfd67..d19e48f9fdc6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1187,17 +1187,6 @@ struct cfg80211_unsol_bcast_probe_resp { const u8 *tmpl; }; -/** - * enum cfg80211_ap_settings_flags - AP settings flags - * - * Used by cfg80211_ap_settings - * - * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication - */ -enum cfg80211_ap_settings_flags { - AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0), -}; - /** * struct cfg80211_ap_settings - AP configuration * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5ce20fb44f24..ab461b2be157 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2477,7 +2477,9 @@ enum nl80211_commands { * space supports external authentication. This attribute shall be used * with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver * may offload authentication processing to user space if this capability - * is indicated in the respective requests from the user space. + * is indicated in the respective requests from the user space. (This flag + * attribute deprecated for %NL80211_CMD_START_AP, use + * %NL80211_ATTR_AP_SETTINGS_FLAGS) * * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED. @@ -2653,6 +2655,10 @@ enum nl80211_commands { * switching on a different channel during CAC detection on the selected * radar channel. * + * @NL80211_ATTR_AP_SETTINGS_FLAGS: u32 attribute contains ap settings flags, + * enumerated in &enum nl80211_ap_settings_flags. This attribute shall be + * used with %NL80211_CMD_START_AP request. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3161,6 +3167,8 @@ enum nl80211_attrs { NL80211_ATTR_RADAR_BACKGROUND, + NL80211_ATTR_AP_SETTINGS_FLAGS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -7481,4 +7489,14 @@ enum nl80211_mbssid_config_attributes { NL80211_MBSSID_CONFIG_ATTR_MAX = __NL80211_MBSSID_CONFIG_ATTR_LAST - 1, }; +/** + * enum nl80211_ap_settings_flags - AP settings flags + * + * @NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external + * authentication. + */ +enum nl80211_ap_settings_flags { + NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = 1 << 0, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6ee21049b028..578bff9c378b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -777,6 +777,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NLA_POLICY_NESTED(nl80211_mbssid_config_policy), [NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED }, [NL80211_ATTR_RADAR_BACKGROUND] = { .type = NLA_FLAG }, + [NL80211_ATTR_AP_SETTINGS_FLAGS] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -5714,8 +5715,11 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) nl80211_calculate_ap_params(params); - if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) - params->flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; + if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]) + params->flags = nla_get_u32( + info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]); + else if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) + params->flags |= NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; wdev_lock(wdev); err = rdev_start_ap(rdev, dev, params); -- cgit v1.2.3 From 87c1aec15dee8bdb245aabbd181f9f9e1a4770ae Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Fri, 26 Nov 2021 12:55:19 +0530 Subject: nl80211: Add support to offload SA Query procedures for AP SME device Add a flag attribute to use in ap settings to indicate userspace supports offloading of SA Query procedures to driver. Also add AP SME device feature flag to advertise that the SA Query procedures offloaded to driver when userspace indicates support for offloading of SA Query procedures. Driver handles SA Query procedures in driver's SME it self and skip sending SA Query request or response frames to userspace when userspace indicates support for SA Query procedures offload. But if userspace doesn't advertise support for SA Query procedures offload driver shall not offload SA Query procedures handling. Also userspace with SA Query procedures offload capability shall skip SA Query specific validations when driver indicates support for handling SA Query procedures. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1637911519-21306-2-git-send-email-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ab461b2be157..d997252de986 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5760,13 +5760,15 @@ enum nl80211_tdls_operation { NL80211_TDLS_DISABLE_LINK, }; -/* +/** * enum nl80211_ap_sme_features - device-integrated AP features - * Reserved for future use, no bits are defined in - * NL80211_ATTR_DEVICE_AP_SME yet. + * @NL80211_AP_SME_SA_QUERY_OFFLOAD: SA Query procedures offloaded to driver + * when user space indicates support for SA Query procedures offload during + * "start ap" with %NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT. + */ enum nl80211_ap_sme_features { + NL80211_AP_SME_SA_QUERY_OFFLOAD = 1 << 0, }; - */ /** * enum nl80211_feature_flags - device/driver features @@ -7494,9 +7496,15 @@ enum nl80211_mbssid_config_attributes { * * @NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external * authentication. + * @NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT: Userspace supports SA Query + * procedures offload to driver. If driver advertises + * %NL80211_AP_SME_SA_QUERY_OFFLOAD in AP SME features, userspace shall + * ignore SA Query procedures and validations when this flag is set by + * userspace. */ enum nl80211_ap_settings_flags { NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = 1 << 0, + NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT = 1 << 1, }; #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From d9a8297e873eda9d47aa5895ca47dc12eca0b198 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Dec 2021 11:09:25 +0100 Subject: nl82011: clarify interface combinations wrt. channels Thinking about MLD (Draft 802.11be) now, it's clear that we'll have new limitations where different stations (or links) must be on _different_ channels (or in different frequency ranges even.) Clarify that the current limit of multiple channels is a maximum and the same one is OK. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20211201110924.2816d91b6862.I2d997abd525574529f88e941d90aeb640dbb1abf@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d997252de986..f1a9d6594df2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5578,7 +5578,7 @@ enum nl80211_iface_limit_attrs { * => allows 8 of AP/GO that can have BI gcd >= min gcd * * numbers = [ #{STA} <= 2 ], channels = 2, max = 2 - * => allows two STAs on different channels + * => allows two STAs on the same or on different channels * * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4 * => allows a STA plus three P2P interfaces -- cgit v1.2.3 From ed98ea2128b6fd83bce13716edf8f5fe6c47f574 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 17 Dec 2021 09:01:52 +0800 Subject: audit: replace zero-length array with flexible-array member Zero-length arrays are deprecated and should be replaced with flexible-array members. Link: https://github.com/KSPP/linux/issues/78 Signed-off-by: Xiu Jianfeng Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 9176a095fefc..8eda133ca4c1 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -514,7 +514,7 @@ struct audit_rule_data { __u32 values[AUDIT_MAX_FIELDS]; __u32 fieldflags[AUDIT_MAX_FIELDS]; __u32 buflen; /* total length of string fields */ - char buf[0]; /* string fields buffer */ + char buf[]; /* string fields buffer */ }; #endif /* _UAPI_LINUX_AUDIT_H_ */ -- cgit v1.2.3 From dbcefdeb2a58039f4c81d0361056fbdd9be906a1 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Mon, 20 Dec 2021 10:31:04 +0800 Subject: mctp: emit RTM_NEWADDR and RTM_DELADDR Userspace can receive notification of MCTP address changes via RTNLGRP_MCTP_IFADDR rtnetlink multicast group. Signed-off-by: Matt Johnston Link: https://lore.kernel.org/r/20211220023104.1965509-1-matt@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- include/uapi/linux/rtnetlink.h | 2 ++ net/mctp/device.c | 53 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5888492a5257..93d934cc4613 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -754,6 +754,8 @@ enum rtnetlink_groups { #define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP RTNLGRP_BRVLAN, #define RTNLGRP_BRVLAN RTNLGRP_BRVLAN + RTNLGRP_MCTP_IFADDR, +#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/mctp/device.c b/net/mctp/device.c index 8799ee77e7b7..ef2755f82f87 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -35,14 +35,24 @@ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->mctp_ptr); } -static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb, - struct mctp_dev *mdev, mctp_eid_t eid) +static int mctp_addrinfo_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(1) // IFA_LOCAL + + nla_total_size(1) // IFA_ADDRESS + ; +} + +/* flag should be NLM_F_MULTI for dump calls */ +static int mctp_fill_addrinfo(struct sk_buff *skb, + struct mctp_dev *mdev, mctp_eid_t eid, + int msg_type, u32 portid, u32 seq, int flag) { struct ifaddrmsg *hdr; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, + msg_type, sizeof(*hdr), flag); if (!nlh) return -EMSGSIZE; @@ -72,10 +82,14 @@ static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb, struct netlink_callback *cb) { struct mctp_dump_cb *mcb = (void *)cb->ctx; + u32 portid, seq; int rc = 0; + portid = NETLINK_CB(cb->skb).portid; + seq = cb->nlh->nlmsg_seq; for (; mcb->a_idx < mdev->num_addrs; mcb->a_idx++) { - rc = mctp_fill_addrinfo(skb, cb, mdev, mdev->addrs[mcb->a_idx]); + rc = mctp_fill_addrinfo(skb, mdev, mdev->addrs[mcb->a_idx], + RTM_NEWADDR, portid, seq, NLM_F_MULTI); if (rc < 0) break; } @@ -127,6 +141,32 @@ out: return skb->len; } +static void mctp_addr_notify(struct mctp_dev *mdev, mctp_eid_t eid, int msg_type, + struct sk_buff *req_skb, struct nlmsghdr *req_nlh) +{ + u32 portid = NETLINK_CB(req_skb).portid; + struct net *net = dev_net(mdev->dev); + struct sk_buff *skb; + int rc = -ENOBUFS; + + skb = nlmsg_new(mctp_addrinfo_size(), GFP_KERNEL); + if (!skb) + goto out; + + rc = mctp_fill_addrinfo(skb, mdev, eid, msg_type, + portid, req_nlh->nlmsg_seq, 0); + if (rc < 0) { + WARN_ON_ONCE(rc == -EMSGSIZE); + goto out; + } + + rtnl_notify(skb, net, portid, RTNLGRP_MCTP_IFADDR, req_nlh, GFP_KERNEL); + return; +out: + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_MCTP_IFADDR, rc); +} + static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = { [IFA_ADDRESS] = { .type = NLA_U8 }, [IFA_LOCAL] = { .type = NLA_U8 }, @@ -189,6 +229,7 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, kfree(tmp_addrs); + mctp_addr_notify(mdev, addr->s_addr, RTM_NEWADDR, skb, nlh); mctp_route_add_local(mdev, addr->s_addr); return 0; @@ -244,6 +285,8 @@ static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, mdev->num_addrs--; spin_unlock_irqrestore(&mdev->addrs_lock, flags); + mctp_addr_notify(mdev, addr->s_addr, RTM_DELADDR, skb, nlh); + return 0; } -- cgit v1.2.3 From 80b3485f7d7bdb2468f2a9d6a346a1132d248309 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 7 Dec 2021 17:50:10 -0800 Subject: PCI: Add #defines for accessing PCIe DVSEC fields Add #defines for accessing Vendor ID, Revision, Length, and ID offsets in the Designated Vendor Specific Extended Capability (DVSEC). Defined in PCIe r5.0, sec 7.9.6. Reviewed-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20211208015015.891275-2-david.e.box@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/pci_regs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index ff6ccbc6efe9..318f3f1f9e92 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1086,7 +1086,11 @@ /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ #define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +#define PCI_DVSEC_HEADER1_VID(x) ((x) & 0xffff) +#define PCI_DVSEC_HEADER1_REV(x) (((x) >> 16) & 0xf) +#define PCI_DVSEC_HEADER1_LEN(x) (((x) >> 20) & 0xfff) #define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ +#define PCI_DVSEC_HEADER2_ID(x) ((x) & 0xffff) /* Data Link Feature */ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ -- cgit v1.2.3 From e6911affa416dc4e0c0b3f04cbe6b02ce13277f1 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Wed, 22 Dec 2021 17:06:58 +0800 Subject: xfrm: Add support for SM3 secure hash This patch allows IPsec to use SM3 HMAC authentication algorithm. Signed-off-by: Xu Jia Signed-off-by: Steffen Klassert --- include/uapi/linux/pfkeyv2.h | 1 + net/xfrm/xfrm_algo.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h index d65b11785260..798ba9ffd48c 100644 --- a/include/uapi/linux/pfkeyv2.h +++ b/include/uapi/linux/pfkeyv2.h @@ -309,6 +309,7 @@ struct sadb_x_filter { #define SADB_X_AALG_SHA2_512HMAC 7 #define SADB_X_AALG_RIPEMD160HMAC 8 #define SADB_X_AALG_AES_XCBC_MAC 9 +#define SADB_X_AALG_SM3_256HMAC 10 #define SADB_X_AALG_NULL 251 /* kame */ #define SADB_AALG_MAX 251 diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 4dae3ab8d030..00b5444a4d86 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -341,6 +341,26 @@ static struct xfrm_algo_desc aalg_list[] = { .pfkey_supported = 0, }, +{ + .name = "hmac(sm3)", + .compat = "sm3", + + .uinfo = { + .auth = { + .icv_truncbits = 256, + .icv_fullbits = 256, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SM3_256HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 256, + .sadb_alg_maxbits = 256 + } +}, }; static struct xfrm_algo_desc ealg_list[] = { -- cgit v1.2.3 From 23b6a6df94c6ce434e7947cfad14b1640fb9f794 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Wed, 22 Dec 2021 17:06:59 +0800 Subject: xfrm: Add support for SM4 symmetric cipher algorithm This patch adds SM4 encryption algorithm entry to ealg_list. Signed-off-by: Xu Jia Signed-off-by: Steffen Klassert --- include/uapi/linux/pfkeyv2.h | 1 + net/xfrm/xfrm_algo.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h index 798ba9ffd48c..8abae1f6749c 100644 --- a/include/uapi/linux/pfkeyv2.h +++ b/include/uapi/linux/pfkeyv2.h @@ -330,6 +330,7 @@ struct sadb_x_filter { #define SADB_X_EALG_AES_GCM_ICV16 20 #define SADB_X_EALG_CAMELLIACBC 22 #define SADB_X_EALG_NULL_AES_GMAC 23 +#define SADB_X_EALG_SM4CBC 24 #define SADB_EALG_MAX 253 /* last EALG */ /* private allocations should use 249-255 (RFC2407) */ #define SADB_X_EALG_SERPENTCBC 252 /* draft-ietf-ipsec-ciph-aes-cbc-00 */ diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 00b5444a4d86..094734fbec96 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -572,6 +572,27 @@ static struct xfrm_algo_desc ealg_list[] = { .sadb_alg_maxbits = 288 } }, +{ + .name = "cbc(sm4)", + .compat = "sm4", + + .uinfo = { + .encr = { + .geniv = "echainiv", + .blockbits = 128, + .defkeybits = 128, + } + }, + + .pfkey_supported = 1, + + .desc = { + .sadb_alg_id = SADB_X_EALG_SM4CBC, + .sadb_alg_ivlen = 16, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, }; static struct xfrm_algo_desc calg_list[] = { -- cgit v1.2.3 From 4e484b3e969b52effd95c17f7a86f39208b2ccf4 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 22 Dec 2021 14:11:18 +0100 Subject: xfrm: rate limit SA mapping change message to user space Kernel generates mapping change message, XFRM_MSG_MAPPING, when a source port chage is detected on a input state with UDP encapsulation set. Kernel generates a message for each IPsec packet with new source port. For a high speed flow per packet mapping change message can be excessive, and can overload the user space listener. Introduce rate limiting for XFRM_MSG_MAPPING message to the user space. The rate limiting is configurable via netlink, when adding a new SA or updating it. Use the new attribute XFRMA_MTIMER_THRESH in seconds. v1->v2 change: update xfrm_sa_len() v2->v3 changes: use u32 insted unsigned long to reduce size of struct xfrm_state fix xfrm_ompat size Reported-by: kernel test robot accept XFRM_MSG_MAPPING only when XFRMA_ENCAP is present Co-developed-by: Thomas Egerer Signed-off-by: Thomas Egerer Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 5 +++++ include/uapi/linux/xfrm.h | 1 + net/xfrm/xfrm_compat.c | 6 ++++-- net/xfrm/xfrm_state.c | 23 ++++++++++++++++++++++- net/xfrm/xfrm_user.c | 18 +++++++++++++++++- 5 files changed, 49 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2308210793a0..2589e4c0501b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -200,6 +200,11 @@ struct xfrm_state { struct xfrm_algo_aead *aead; const char *geniv; + /* mapping change rate limiting */ + __be16 new_mapping_sport; + u32 new_mapping; /* seconds */ + u32 mapping_maxage; /* seconds for input SA */ + /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; struct sock __rcu *encap_sk; diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index eda0426ec4c2..4e29d7851890 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -313,6 +313,7 @@ enum xfrm_attr_type_t { XFRMA_SET_MARK, /* __u32 */ XFRMA_SET_MARK_MASK, /* __u32 */ XFRMA_IF_ID, /* __u32 */ + XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 2bf269390163..a0f62fa02e06 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -127,6 +127,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK] = { .type = NLA_U32 }, [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, + [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, @@ -274,9 +275,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_SET_MARK: case XFRMA_SET_MARK_MASK: case XFRMA_IF_ID: + case XFRMA_MTIMER_THRESH: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -431,7 +433,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a2f4001221d1..78d51399a0f4 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1593,6 +1593,9 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->km.seq = orig->km.seq; x->replay = orig->replay; x->preplay = orig->preplay; + x->mapping_maxage = orig->mapping_maxage; + x->new_mapping = 0; + x->new_mapping_sport = 0; return x; @@ -2242,7 +2245,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) } EXPORT_SYMBOL(km_query); -int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) +static int __km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { int err = -EINVAL; struct xfrm_mgr *km; @@ -2257,6 +2260,24 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) rcu_read_unlock(); return err; } + +int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) +{ + int ret = 0; + + if (x->mapping_maxage) { + if ((jiffies / HZ - x->new_mapping) > x->mapping_maxage || + x->new_mapping_sport != sport) { + x->new_mapping_sport = sport; + x->new_mapping = jiffies / HZ; + ret = __km_new_mapping(x, ipaddr, sport); + } + } else { + ret = __km_new_mapping(x, ipaddr, sport); + } + + return ret; +} EXPORT_SYMBOL(km_new_mapping); void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7c36cc1f3d79..130240680655 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -282,6 +282,10 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = 0; + if (attrs[XFRMA_MTIMER_THRESH]) + if (!attrs[XFRMA_ENCAP]) + err = -EINVAL; + out: return err; } @@ -521,6 +525,7 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; + struct nlattr *mt = attrs[XFRMA_MTIMER_THRESH]; if (re) { struct xfrm_replay_state_esn *replay_esn; @@ -552,6 +557,9 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, if (rt) x->replay_maxdiff = nla_get_u32(rt); + + if (mt) + x->mapping_maxage = nla_get_u32(mt); } static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m) @@ -1024,8 +1032,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (ret) goto out; } - if (x->security) + if (x->security) { ret = copy_sec_ctx(x->security, skb); + if (ret) + goto out; + } + if (x->mapping_maxage) + ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage); out: return ret; } @@ -3069,6 +3082,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); + if (x->mapping_maxage) + l += nla_total_size(sizeof(x->mapping_maxage)); + return l; } -- cgit v1.2.3 From 1bb412d46ca9df90a3fe4b704f5385f03621455d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 29 Dec 2021 16:09:37 +0800 Subject: net_tstamp: define new flag HWTSTAMP_FLAG_BONDED_PHC_INDEX As we defined the new hwtstamp_config flag HWTSTAMP_FLAG_BONDED_PHC_INDEX as enum, it's not easy for userspace program to check if the flag is supported when build. Let's define the new flag so user space could build it on old kernel with ifdef check. Fixes: 9c9211a3fc7a ("net_tstamp: add new flag HWTSTAMP_FLAG_BONDED_PHC_INDEX") Signed-off-by: Hangbin Liu Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_tstamp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index e258e52cfd1f..55501e5e7ac8 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -87,6 +87,7 @@ enum hwtstamp_flags { * will be the PHC index. */ HWTSTAMP_FLAG_BONDED_PHC_INDEX = (1<<0), +#define HWTSTAMP_FLAG_BONDED_PHC_INDEX HWTSTAMP_FLAG_BONDED_PHC_INDEX HWTSTAMP_FLAG_LAST = HWTSTAMP_FLAG_BONDED_PHC_INDEX, HWTSTAMP_FLAG_MASK = (HWTSTAMP_FLAG_LAST - 1) | HWTSTAMP_FLAG_LAST -- cgit v1.2.3 From 79d39fc503b43b566feae5bc9a57dfcffdf41bd1 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Tue, 28 Dec 2021 21:06:10 +0800 Subject: net/smc: Add netlink net namespace support This adds net namespace ID to diag of linkgroup, helps us to distinguish different namespaces, and net_cookie is unique in the whole system. Signed-off-by: Tony Lu Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 2 ++ include/uapi/linux/smc_diag.h | 11 ++++++----- net/smc/smc_core.c | 3 +++ net/smc/smc_diag.c | 16 +++++++++------- 4 files changed, 20 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 20f33b27787f..6c2874fd2c00 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -119,6 +119,8 @@ enum { SMC_NLA_LGR_R_CONNS_NUM, /* u32 */ SMC_NLA_LGR_R_V2_COMMON, /* nest */ SMC_NLA_LGR_R_V2, /* nest */ + SMC_NLA_LGR_R_NET_COOKIE, /* u64 */ + SMC_NLA_LGR_R_PAD, /* flag */ __SMC_NLA_LGR_R_MAX, SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1 }; diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 8cb3a6fef553..c7008d87f1a4 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -84,11 +84,12 @@ struct smc_diag_conninfo { /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { - __u8 link_id; /* link identifier */ - __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ - __u8 ibport; /* RDMA device port number */ - __u8 gid[40]; /* local GID */ - __u8 peer_gid[40]; /* peer GID */ + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ + __aligned_u64 net_cookie; /* RDMA device net namespace */ }; struct smc_diag_lgrinfo { diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index b53cdecf621d..e47538451612 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -348,6 +348,9 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id)) goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE, + lgr->net->net_cookie, SMC_NLA_LGR_R_PAD)) + goto errattr; memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN); smc_target[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index c952986a6aca..7c8dad28c18d 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -145,19 +145,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (smc->conn.lgr && !smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { + struct smc_link *link = smc->conn.lnk; + struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net); + struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, - .lnk[0].ibport = smc->conn.lnk->ibport, - .lnk[0].link_id = smc->conn.lnk->link_id, + .lnk[0].ibport = link->ibport, + .lnk[0].link_id = link->link_id, + .lnk[0].net_cookie = net->net_cookie, }; memcpy(linfo.lnk[0].ibname, smc->conn.lgr->lnk[0].smcibdev->ibdev->name, - sizeof(smc->conn.lnk->smcibdev->ibdev->name)); - smc_gid_be16_convert(linfo.lnk[0].gid, - smc->conn.lnk->gid); - smc_gid_be16_convert(linfo.lnk[0].peer_gid, - smc->conn.lnk->peer_gid); + sizeof(link->smcibdev->ibdev->name)); + smc_gid_be16_convert(linfo.lnk[0].gid, link->gid); + smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid); if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; -- cgit v1.2.3 From ccae4a19c9140a34a0c5f0658812496dd8bbdeaf Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 25 Oct 2021 17:31:54 +0100 Subject: btrfs: remove no longer needed logic for replaying directory deletes Now that we log only dir index keys when logging a directory, we no longer need to deal with dir item keys in the log replay code for replaying directory deletes. This is also true for the case when we replay a log tree created by a kernel that still logs dir items. So remove the remaining code of the replay of directory deletes algorithm that deals with dir item keys. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 158 ++++++++++++++++++---------------------- include/uapi/linux/btrfs_tree.h | 4 +- 2 files changed, 72 insertions(+), 90 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 06defcd559a0..3da8452f682c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2203,7 +2203,7 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, */ static noinline int find_dir_range(struct btrfs_root *root, struct btrfs_path *path, - u64 dirid, int key_type, + u64 dirid, u64 *start_ret, u64 *end_ret) { struct btrfs_key key; @@ -2216,7 +2216,7 @@ static noinline int find_dir_range(struct btrfs_root *root, return 1; key.objectid = dirid; - key.type = key_type; + key.type = BTRFS_DIR_LOG_INDEX_KEY; key.offset = *start_ret; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -2230,7 +2230,7 @@ static noinline int find_dir_range(struct btrfs_root *root, if (ret != 0) btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.type != key_type || key.objectid != dirid) { + if (key.type != BTRFS_DIR_LOG_INDEX_KEY || key.objectid != dirid) { ret = 1; goto next; } @@ -2257,7 +2257,7 @@ next: btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.type != key_type || key.objectid != dirid) { + if (key.type != BTRFS_DIR_LOG_INDEX_KEY || key.objectid != dirid) { ret = 1; goto out; } @@ -2288,95 +2288,82 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, int ret; struct extent_buffer *eb; int slot; - u32 item_size; struct btrfs_dir_item *di; - struct btrfs_dir_item *log_di; int name_len; - unsigned long ptr; - unsigned long ptr_end; char *name; - struct inode *inode; + struct inode *inode = NULL; struct btrfs_key location; -again: + /* + * Currenly we only log dir index keys. Even if we replay a log created + * by an older kernel that logged both dir index and dir item keys, all + * we need to do is process the dir index keys, we (and our caller) can + * safely ignore dir item keys (key type BTRFS_DIR_ITEM_KEY). + */ + ASSERT(dir_key->type == BTRFS_DIR_INDEX_KEY); + eb = path->nodes[0]; slot = path->slots[0]; - item_size = btrfs_item_size_nr(eb, slot); - ptr = btrfs_item_ptr_offset(eb, slot); - ptr_end = ptr + item_size; - while (ptr < ptr_end) { - di = (struct btrfs_dir_item *)ptr; - name_len = btrfs_dir_name_len(eb, di); - name = kmalloc(name_len, GFP_NOFS); - if (!name) { - ret = -ENOMEM; - goto out; - } - read_extent_buffer(eb, name, (unsigned long)(di + 1), - name_len); - log_di = NULL; - if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { - log_di = btrfs_lookup_dir_item(trans, log, log_path, - dir_key->objectid, - name, name_len, 0); - } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { - log_di = btrfs_lookup_dir_index_item(trans, log, - log_path, - dir_key->objectid, - dir_key->offset, - name, name_len, 0); - } - if (!log_di) { - btrfs_dir_item_key_to_cpu(eb, di, &location); - btrfs_release_path(path); - btrfs_release_path(log_path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - kfree(name); - return -EIO; - } + di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); + name_len = btrfs_dir_name_len(eb, di); + name = kmalloc(name_len, GFP_NOFS); + if (!name) { + ret = -ENOMEM; + goto out; + } - ret = link_to_fixup_dir(trans, root, - path, location.objectid); - if (ret) { - kfree(name); - iput(inode); - goto out; - } + read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len); - inc_nlink(inode); - ret = btrfs_unlink_inode(trans, BTRFS_I(dir), - BTRFS_I(inode), name, name_len); - if (!ret) - ret = btrfs_run_delayed_items(trans); - kfree(name); - iput(inode); - if (ret) - goto out; + if (log) { + struct btrfs_dir_item *log_di; - /* there might still be more names under this key - * check and repeat if required - */ - ret = btrfs_search_slot(NULL, root, dir_key, path, - 0, 0); - if (ret == 0) - goto again; + log_di = btrfs_lookup_dir_index_item(trans, log, log_path, + dir_key->objectid, + dir_key->offset, + name, name_len, 0); + if (IS_ERR(log_di)) { + ret = PTR_ERR(log_di); + goto out; + } else if (log_di) { + /* The dentry exists in the log, we have nothing to do. */ ret = 0; goto out; - } else if (IS_ERR(log_di)) { - kfree(name); - return PTR_ERR(log_di); } - btrfs_release_path(log_path); - kfree(name); + } - ptr = (unsigned long)(di + 1); - ptr += name_len; + btrfs_dir_item_key_to_cpu(eb, di, &location); + btrfs_release_path(path); + btrfs_release_path(log_path); + inode = read_one_inode(root, location.objectid); + if (!inode) { + ret = -EIO; + goto out; } - ret = 0; + + ret = link_to_fixup_dir(trans, root, path, location.objectid); + if (ret) + goto out; + + inc_nlink(inode); + ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(inode), name, + name_len); + if (ret) + goto out; + + ret = btrfs_run_delayed_items(trans); + if (ret) + goto out; + + /* + * Unlike dir item keys, dir index keys can only have one name (entry) in + * them, as there are no key collisions since each key has a unique offset + * (an index number), so we're done. + */ out: btrfs_release_path(path); btrfs_release_path(log_path); + kfree(name); + iput(inode); return ret; } @@ -2496,7 +2483,6 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, { u64 range_start; u64 range_end; - int key_type = BTRFS_DIR_LOG_ITEM_KEY; int ret = 0; struct btrfs_key dir_key; struct btrfs_key found_key; @@ -2504,7 +2490,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, struct inode *dir; dir_key.objectid = dirid; - dir_key.type = BTRFS_DIR_ITEM_KEY; + dir_key.type = BTRFS_DIR_INDEX_KEY; log_path = btrfs_alloc_path(); if (!log_path) return -ENOMEM; @@ -2518,14 +2504,14 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, btrfs_free_path(log_path); return 0; } -again: + range_start = 0; range_end = 0; while (1) { if (del_all) range_end = (u64)-1; else { - ret = find_dir_range(log, path, dirid, key_type, + ret = find_dir_range(log, path, dirid, &range_start, &range_end); if (ret < 0) goto out; @@ -2552,8 +2538,10 @@ again: btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); if (found_key.objectid != dirid || - found_key.type != dir_key.type) - goto next_type; + found_key.type != dir_key.type) { + ret = 0; + goto out; + } if (found_key.offset > range_end) break; @@ -2572,15 +2560,7 @@ again: break; range_start = range_end + 1; } - -next_type: ret = 0; - if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { - key_type = BTRFS_DIR_LOG_INDEX_KEY; - dir_key.type = BTRFS_DIR_INDEX_KEY; - btrfs_release_path(path); - goto again; - } out: btrfs_release_path(path); btrfs_free_path(log_path); diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index e1c4c732aaba..5416f1f1a77a 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -146,7 +146,9 @@ /* * dir items are the name -> inode pointers in a directory. There is one - * for every name in a directory. + * for every name in a directory. BTRFS_DIR_LOG_ITEM_KEY is no longer used + * but it's still defined here for documentation purposes and to help avoid + * having its numerical value reused in the future. */ #define BTRFS_DIR_LOG_ITEM_KEY 60 #define BTRFS_DIR_LOG_INDEX_KEY 72 -- cgit v1.2.3 From 5bc03b28ec24670e67c453ecb2bfbb9053b86838 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 20 Dec 2021 11:51:47 +0100 Subject: nl80211: clarify comment for mesh PLINK_BLOCKED state When a mesh link is in blocked state, it is very useful to still allow auth requests from the peer to re-establish it. When a remote node is power cycled, the peer state can easily end up in blocked state if multiple auth attempts are performed. Since this can lead to several minutes of downtime, we should accept auth attempts of the peer after it has come back. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20211220105147.88625-1-nbd@nbd.name Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f1a9d6594df2..195a238a322e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5623,7 +5623,7 @@ enum nl80211_if_combination_attrs { * @NL80211_PLINK_ESTAB: mesh peer link is established * @NL80211_PLINK_HOLDING: mesh peer link is being closed or cancelled * @NL80211_PLINK_BLOCKED: all frames transmitted from this mesh - * plink are discarded + * plink are discarded, except for authentication frames * @NUM_NL80211_PLINK_STATES: number of peer link states * @MAX_NL80211_PLINK_STATES: highest numerical value of plink states */ -- cgit v1.2.3 From 383f0993fc77152b0773c85ed69d6734baf9cb48 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 14 Dec 2021 01:02:26 +0900 Subject: can: netlink: report the CAN controller mode supported flags Currently, the CAN netlink interface provides no easy ways to check the capabilities of a given controller. The only method from the command line is to try each CAN_CTRLMODE_* individually to check whether the netlink interface returns an -EOPNOTSUPP error or not (alternatively, one may find it easier to directly check the source code of the driver instead...) This patch introduces a method for the user to check both the supported and the static capabilities. The proposed method introduces a new IFLA nest: IFLA_CAN_CTRLMODE_EXT which extends the current IFLA_CAN_CTRLMODE. This is done to guaranty a full forward and backward compatibility between the kernel and the user land applications. The IFLA_CAN_CTRLMODE_EXT nest contains one single entry: IFLA_CAN_CTRLMODE_SUPPORTED. Because this entry is only used in one direction: kernel to userland, no new struct nla_policy are introduced. Below table explains how IFLA_CAN_CTRLMODE_SUPPORTED (hereafter: "supported") and can_ctrlmode::flags (hereafter: "flags") allow us to identify both the supported and the static capabilities, when masked with any of the CAN_CTRLMODE_* bit flags: supported & flags & Controller capabilities CAN_CTRLMODE_* CAN_CTRLMODE_* ----------------------------------------------------------------------- false false Feature not supported (always disabled) false true Static feature (always enabled) true false Feature supported but disabled true true Feature supported and enabled Link: https://lore.kernel.org/all/20211213160226.56219-5-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/netlink.c | 31 ++++++++++++++++++++++++++++++- include/uapi/linux/can/netlink.h | 13 +++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 26c336808be5..7633d98e3912 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -21,6 +21,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, [IFLA_CAN_TDC] = { .type = NLA_NESTED }, + [IFLA_CAN_CTRLMODE_EXT] = { .type = NLA_NESTED }, }; static const struct nla_policy can_tdc_policy[IFLA_CAN_TDC_MAX + 1] = { @@ -383,6 +384,12 @@ static size_t can_tdc_get_size(const struct net_device *dev) return size; } +static size_t can_ctrlmode_ext_get_size(void) +{ + return nla_total_size(0) + /* nest IFLA_CAN_CTRLMODE_EXT */ + nla_total_size(sizeof(u32)); /* IFLA_CAN_CTRLMODE_SUPPORTED */ +} + static size_t can_get_size(const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); @@ -415,6 +422,7 @@ static size_t can_get_size(const struct net_device *dev) priv->data_bitrate_const_cnt); size += sizeof(priv->bitrate_max); /* IFLA_CAN_BITRATE_MAX */ size += can_tdc_get_size(dev); /* IFLA_CAN_TDC */ + size += can_ctrlmode_ext_get_size(); /* IFLA_CAN_CTRLMODE_EXT */ return size; } @@ -472,6 +480,25 @@ err_cancel: return -EMSGSIZE; } +static int can_ctrlmode_ext_fill_info(struct sk_buff *skb, + const struct can_priv *priv) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, IFLA_CAN_CTRLMODE_EXT); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, IFLA_CAN_CTRLMODE_SUPPORTED, + priv->ctrlmode_supported)) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, nest); + return 0; +} + static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); @@ -531,7 +558,9 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) sizeof(priv->bitrate_max), &priv->bitrate_max)) || - (can_tdc_fill_info(skb, dev)) + can_tdc_fill_info(skb, dev) || + + can_ctrlmode_ext_fill_info(skb, priv) ) return -EMSGSIZE; diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 75b85c60efb2..02ec32d69474 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -137,6 +137,7 @@ enum { IFLA_CAN_DATA_BITRATE_CONST, IFLA_CAN_BITRATE_MAX, IFLA_CAN_TDC, + IFLA_CAN_CTRLMODE_EXT, /* add new constants above here */ __IFLA_CAN_MAX, @@ -166,6 +167,18 @@ enum { IFLA_CAN_TDC_MAX = __IFLA_CAN_TDC - 1 }; +/* + * IFLA_CAN_CTRLMODE_EXT nest: controller mode extended parameters + */ +enum { + IFLA_CAN_CTRLMODE_UNSPEC, + IFLA_CAN_CTRLMODE_SUPPORTED, /* u32 */ + + /* add new constants above here */ + __IFLA_CAN_CTRLMODE, + IFLA_CAN_CTRLMODE_MAX = __IFLA_CAN_CTRLMODE - 1 +}; + /* u16 termination range: 1..65535 Ohms */ #define CAN_TERMINATION_DISABLED 0 -- cgit v1.2.3 From eac1b93c14d645ef147b049ace0d5230df755548 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Wed, 5 Jan 2022 02:48:38 -0800 Subject: gro: add ability to control gro max packet size Eric Dumazet suggested to allow users to modify max GRO packet size. We have seen GRO being disabled by users of appliances (such as wifi access points) because of claimed bufferbloat issues, or some work arounds in sch_cake, to split GRO/GSO packets. Instead of disabling GRO completely, one can chose to limit the maximum packet size of GRO packets, depending on their latency constraints. This patch adds a per device gro_max_size attribute that can be changed with ip link command. ip link set dev eth0 gro_max_size 16000 Suggested-by: Eric Dumazet Signed-off-by: Coco Li Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 +++++++++++ include/uapi/linux/if_link.h | 1 + net/core/dev.c | 1 + net/core/gro.c | 6 +++++- net/core/rtnetlink.c | 22 ++++++++++++++++++++++ tools/include/uapi/linux/if_link.h | 1 + 6 files changed, 41 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6f99c8f51b60..3213c7227b59 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1942,6 +1942,8 @@ enum netdev_ml_priv_type { * dev->addr_list_lock. * @unlink_list: As netif_addr_lock() can be called recursively, * keep a list of interfaces to be deleted. + * @gro_max_size: Maximum size of aggregated packet in generic + * receive offload (GRO) * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. @@ -2131,6 +2133,8 @@ struct net_device { struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; int napi_defer_hard_irqs; +#define GRO_MAX_SIZE 65536 + unsigned int gro_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -4806,6 +4810,13 @@ static inline void netif_set_gso_max_segs(struct net_device *dev, WRITE_ONCE(dev->gso_max_segs, segs); } +static inline void netif_set_gro_max_size(struct net_device *dev, + unsigned int size) +{ + /* This pairs with the READ_ONCE() in skb_gro_receive() */ + WRITE_ONCE(dev->gro_max_size, size); +} + static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, int pulled_hlen, u16 mac_offset, int mac_len) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4ac53b30b6dc..6218f93f5c1a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -347,6 +347,7 @@ enum { */ IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_BUS_NAME, + IFLA_GRO_MAX_SIZE, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 6c8b226b5f2f..83a4089990a0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10180,6 +10180,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; + dev->gro_max_size = GRO_MAX_SIZE; dev->upper_level = 1; dev->lower_level = 1; #ifdef CONFIG_LOCKDEP diff --git a/net/core/gro.c b/net/core/gro.c index 8ec8b44596da..a11b286d1495 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -132,10 +132,14 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int headlen = skb_headlen(skb); unsigned int len = skb_gro_len(skb); unsigned int delta_truesize; + unsigned int gro_max_size; unsigned int new_truesize; struct sk_buff *lp; - if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) + /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */ + gro_max_size = READ_ONCE(p->dev->gro_max_size); + + if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush)) return -E2BIG; lp = NAPI_GRO_CB(p)->last; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d6eba554b137..e476403231f0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1026,6 +1026,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1728,6 +1729,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || + nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif @@ -1880,6 +1882,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING }, + [IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2299,6 +2302,14 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], } } + if (tb[IFLA_GRO_MAX_SIZE]) { + u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]); + + if (gro_max_size > GRO_MAX_SIZE) { + NL_SET_ERR_MSG(extack, "too big gro_max_size"); + return -EINVAL; + } + } return 0; } @@ -2772,6 +2783,15 @@ static int do_setlink(const struct sk_buff *skb, } } + if (tb[IFLA_GRO_MAX_SIZE]) { + u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]); + + if (dev->gro_max_size ^ gro_max_size) { + netif_set_gro_max_size(dev, gro_max_size); + status |= DO_SETLINK_MODIFIED; + } + } + if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); @@ -3222,6 +3242,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE])); if (tb[IFLA_GSO_MAX_SEGS]) netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS])); + if (tb[IFLA_GRO_MAX_SIZE]) + netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE])); return dev; } diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 4ac53b30b6dc..6218f93f5c1a 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -347,6 +347,7 @@ enum { */ IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_BUS_NAME, + IFLA_GRO_MAX_SIZE, __IFLA_MAX }; -- cgit v1.2.3