From e500d497c16c29304907f5de8bc79a8d4904c3e9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Nov 2024 19:37:56 -0800 Subject: usb: gadget: functionfs: fix spellos Fix typos in documentation as reported by codespell. Fixes: f0175ab51993 ("usb: gadget: f_fs: OS descriptors support") Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Randy Dunlap Cc: Michal Nazarewicz Cc: Andrzej Pietrasiewicz Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Link: https://lore.kernel.org/r/20241128033756.373517-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/functionfs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index 2ebdba111a8f..beef1752e36e 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -206,7 +206,7 @@ struct usb_ffs_dmabuf_transfer_req { * +-----+-----------------+------+--------------------------+ * | off | name | type | description | * +-----+-----------------+------+--------------------------+ - * | 0 | inteface | U8 | related interface number | + * | 0 | interface | U8 | related interface number | * +-----+-----------------+------+--------------------------+ * | 1 | dwLength | U32 | length of the descriptor | * +-----+-----------------+------+--------------------------+ @@ -224,7 +224,7 @@ struct usb_ffs_dmabuf_transfer_req { * +-----+-----------------+------+--------------------------+ * | off | name | type | description | * +-----+-----------------+------+--------------------------+ - * | 0 | inteface | U8 | related interface number | + * | 0 | interface | U8 | related interface number | * +-----+-----------------+------+--------------------------+ * | 1 | dwLength | U32 | length of the descriptor | * +-----+-----------------+------+--------------------------+ @@ -237,7 +237,7 @@ struct usb_ffs_dmabuf_transfer_req { * | 11 | ExtProp[] | | list of ext. prop. d. | * +-----+-----------------+------+--------------------------+ * - * ExtCompat[] is an array of valid Extended Compatiblity descriptors + * ExtCompat[] is an array of valid Extended Compatibility descriptors * which have the following format: * * +-----+-----------------------+------+-------------------------------------+ @@ -295,7 +295,7 @@ struct usb_functionfs_strings_head { * | 16 | stringtab | StringTab[lang_count] | table of strings per lang | * * For each language there is one stringtab entry (ie. there are lang_count - * stringtab entires). Each StringTab has following format: + * stringtab entries). Each StringTab has following format: * * | off | name | type | description | * |-----+---------+-------------------+------------------------------------| -- cgit v1.2.3 From 64e844505bc08cde3f346f193cbbbab0096fef54 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:06:59 -0500 Subject: include: uapi: protocol number and packet structs for AGGFRAG in ESP Add the RFC assigned IP protocol number for AGGFRAG. Add the on-wire basic and congestion-control IP-TFS packet headers. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/in.h | 2 ++ include/uapi/linux/ip.h | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 283dec7e3645..5bd7ce934d74 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -137,6 +137,22 @@ struct ip_beet_phdr { __u8 reserved; }; +struct ip_iptfs_hdr { + __u8 subtype; /* 0*: basic, 1: CC */ + __u8 flags; + __be16 block_offset; +}; + +struct ip_iptfs_cc_hdr { + __u8 subtype; /* 0: basic, 1*: CC */ + __u8 flags; + __be16 block_offset; + __be32 loss_rate; + __be64 rtt_adelay_xdelay; + __be32 tval; + __be32 techo; +}; + /* index values for the variables in ipv4_devconf */ enum { -- cgit v1.2.3 From f69eb4f65c58f5a081dbafb76011dad73757420c Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:00 -0500 Subject: xfrm: netlink: add config (netlink) options Add netlink options for configuring IP-TFS SAs. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 9 +++++++- net/xfrm/xfrm_compat.c | 10 +++++++-- net/xfrm/xfrm_user.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index d73a97e3030a..a23495c0e0a1 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -158,7 +158,8 @@ enum { #define XFRM_MODE_ROUTEOPTIMIZATION 2 #define XFRM_MODE_IN_TRIGGER 3 #define XFRM_MODE_BEET 4 -#define XFRM_MODE_MAX 5 +#define XFRM_MODE_IPTFS 5 +#define XFRM_MODE_MAX 6 /* Netlink configuration messages. */ enum { @@ -323,6 +324,12 @@ enum xfrm_attr_type_t { XFRMA_SA_DIR, /* __u8 */ XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */ XFRMA_SA_PCPU, /* __u32 */ + XFRMA_IPTFS_DROP_TIME, /* __u32 in: usec to wait for next seq */ + XFRMA_IPTFS_REORDER_WINDOW, /* __u16 in: reorder window size (pkts) */ + XFRMA_IPTFS_DONT_FRAG, /* out: don't use fragmentation */ + XFRMA_IPTFS_INIT_DELAY, /* __u32 out: initial packet wait delay (usec) */ + XFRMA_IPTFS_MAX_QSIZE, /* __u32 out: max ingress queue size (octets) */ + XFRMA_IPTFS_PKT_SIZE, /* __u32 out: size of outer packet, 0 for PMTU */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 5b9ee63e30b6..b8d2e6930041 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -284,9 +284,15 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_SA_DIR: case XFRMA_NAT_KEEPALIVE_INTERVAL: case XFRMA_SA_PCPU: + case XFRMA_IPTFS_DROP_TIME: + case XFRMA_IPTFS_REORDER_WINDOW: + case XFRMA_IPTFS_DONT_FRAG: + case XFRMA_IPTFS_INIT_DELAY: + case XFRMA_IPTFS_MAX_QSIZE: + case XFRMA_IPTFS_PKT_SIZE: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -441,7 +447,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b2876e09328b..749ec56101ac 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -301,6 +301,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, NL_SET_ERR_MSG(extack, "TFC padding can only be used in tunnel mode"); goto out; } + if ((attrs[XFRMA_IPTFS_DROP_TIME] || + attrs[XFRMA_IPTFS_REORDER_WINDOW] || + attrs[XFRMA_IPTFS_DONT_FRAG] || + attrs[XFRMA_IPTFS_INIT_DELAY] || + attrs[XFRMA_IPTFS_MAX_QSIZE] || + attrs[XFRMA_IPTFS_PKT_SIZE]) && + p->mode != XFRM_MODE_IPTFS) { + NL_SET_ERR_MSG(extack, "IP-TFS options can only be used in IP-TFS mode"); + goto out; + } break; case IPPROTO_COMP: @@ -421,6 +431,18 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; } + if (attrs[XFRMA_IPTFS_DROP_TIME]) { + NL_SET_ERR_MSG(extack, "IP-TFS drop time should not be set for output SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_REORDER_WINDOW]) { + NL_SET_ERR_MSG(extack, "IP-TFS reorder window should not be set for output SA"); + err = -EINVAL; + goto out; + } + if (attrs[XFRMA_REPLAY_VAL]) { struct xfrm_replay_state *replay; @@ -458,6 +480,30 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, } } + + if (attrs[XFRMA_IPTFS_DONT_FRAG]) { + NL_SET_ERR_MSG(extack, "IP-TFS don't fragment should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_INIT_DELAY]) { + NL_SET_ERR_MSG(extack, "IP-TFS initial delay should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_MAX_QSIZE]) { + NL_SET_ERR_MSG(extack, "IP-TFS max queue size should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_PKT_SIZE]) { + NL_SET_ERR_MSG(extack, "IP-TFS packet size should not be set for input SA"); + err = -EINVAL; + goto out; + } } if (!sa_dir && attrs[XFRMA_SA_PCPU]) { @@ -3220,6 +3266,12 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, [XFRMA_SA_PCPU] = { .type = NLA_U32 }, + [XFRMA_IPTFS_DROP_TIME] = { .type = NLA_U32 }, + [XFRMA_IPTFS_REORDER_WINDOW] = { .type = NLA_U16 }, + [XFRMA_IPTFS_DONT_FRAG] = { .type = NLA_FLAG }, + [XFRMA_IPTFS_INIT_DELAY] = { .type = NLA_U32 }, + [XFRMA_IPTFS_MAX_QSIZE] = { .type = NLA_U32 }, + [XFRMA_IPTFS_PKT_SIZE] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); -- cgit v1.2.3 From d1716d5a44c37e5743bf6ea4e5cdbdab37727f27 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:02 -0500 Subject: xfrm: add generic iptfs defines and functionality Define `XFRM_MODE_IPTFS` and `IPSEC_MODE_IPTFS` constants, and add these to switch case and conditionals adjacent with the existing TUNNEL modes. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + include/uapi/linux/ipsec.h | 3 ++- include/uapi/linux/snmp.h | 2 ++ net/ipv4/esp4.c | 3 ++- net/ipv6/esp6.c | 3 ++- net/netfilter/nft_xfrm.c | 3 ++- net/xfrm/xfrm_device.c | 1 + net/xfrm/xfrm_output.c | 4 ++++ net/xfrm/xfrm_policy.c | 8 ++++++-- net/xfrm/xfrm_proc.c | 2 ++ net/xfrm/xfrm_state.c | 12 ++++++++++++ net/xfrm/xfrm_user.c | 12 ++++++++++++ 12 files changed, 48 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1ebc09cde627..4b0677e48190 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -38,6 +38,7 @@ #define XFRM_PROTO_COMP 108 #define XFRM_PROTO_IPIP 4 #define XFRM_PROTO_IPV6 41 +#define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG #define XFRM_PROTO_ROUTING IPPROTO_ROUTING #define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS diff --git a/include/uapi/linux/ipsec.h b/include/uapi/linux/ipsec.h index 50d8ee1791e2..696b790f4346 100644 --- a/include/uapi/linux/ipsec.h +++ b/include/uapi/linux/ipsec.h @@ -14,7 +14,8 @@ enum { IPSEC_MODE_ANY = 0, /* We do not support this for SA */ IPSEC_MODE_TRANSPORT = 1, IPSEC_MODE_TUNNEL = 2, - IPSEC_MODE_BEET = 3 + IPSEC_MODE_BEET = 3, + IPSEC_MODE_IPTFS = 4 }; enum { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index adf5fd78dd50..5a2553511190 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -339,6 +339,8 @@ enum LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */ LINUX_MIB_XFRMOUTSTATEDIRERROR, /* XfrmOutStateDirError */ LINUX_MIB_XFRMINSTATEDIRERROR, /* XfrmInStateDirError */ + LINUX_MIB_XFRMINIPTFSERROR, /* XfrmInIptfsError */ + LINUX_MIB_XFRMOUTNOQSPACE, /* XfrmOutNoQueueSpace */ __LINUX_MIB_XFRMMAX }; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index f3281312eb5e..b0fbf804bbba 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -816,7 +816,8 @@ int esp_input_done2(struct sk_buff *skb, int err) } skb_pull_rcsum(skb, hlen); - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode == XFRM_MODE_TUNNEL || + x->props.mode == XFRM_MODE_IPTFS) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -ihl); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b2400c226a32..5f3d0cc1555a 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -859,7 +859,8 @@ int esp6_input_done2(struct sk_buff *skb, int err) skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull_rcsum(skb, hlen); - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode == XFRM_MODE_TUNNEL || + x->props.mode == XFRM_MODE_IPTFS) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -hdr_len); diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 8a07b46cc8fb..3210cfc966ab 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -112,7 +112,8 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode) return true; } - return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL; + return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL || + mode == XFRM_MODE_IPTFS; } static void nft_xfrm_state_get_key(const struct nft_xfrm *priv, diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 1fe1b07d879d..d1fa94e52cea 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -69,6 +69,7 @@ static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) { switch (x->outer_mode.encap) { + case XFRM_MODE_IPTFS: case XFRM_MODE_TUNNEL: if (x->outer_mode.family == AF_INET) return __xfrm_mode_tunnel_prep(x, skb, diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ef81359e4038..b5025cf6136e 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -677,6 +677,10 @@ static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) return; } + if (x->outer_mode.encap == XFRM_MODE_IPTFS) { + xo->inner_ipproto = IPPROTO_AGGFRAG; + return; + } /* non-Tunnel Mode */ if (!skb->encapsulation) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c04014ee623f..9e510021ee91 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2497,6 +2497,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; if (tmpl->mode == XFRM_MODE_TUNNEL || + tmpl->mode == XFRM_MODE_IPTFS || tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; @@ -3294,7 +3295,8 @@ no_transform: ok: xfrm_pols_put(pols, drop_pols); if (dst && dst->xfrm && - dst->xfrm->props.mode == XFRM_MODE_TUNNEL) + (dst->xfrm->props.mode == XFRM_MODE_TUNNEL || + dst->xfrm->props.mode == XFRM_MODE_IPTFS)) dst->flags |= DST_XFRM_TUNNEL; return dst; @@ -4523,6 +4525,7 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, m->old_family) && xfrm_addr_equal(&t->saddr, &m->old_saddr, @@ -4565,7 +4568,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, continue; n++; if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && - pol->xfrm_vec[i].mode != XFRM_MODE_BEET) + pol->xfrm_vec[i].mode != XFRM_MODE_BEET && + pol->xfrm_vec[i].mode != XFRM_MODE_IPTFS) continue; /* update endpoints */ memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index eeb984be03a7..8e07dd614b0b 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -43,6 +43,8 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR), SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR), + SNMP_MIB_ITEM("XfrmInIptfsError", LINUX_MIB_XFRMINIPTFSERROR), + SNMP_MIB_ITEM("XfrmOutNoQueueSpace", LINUX_MIB_XFRMOUTNOQSPACE), SNMP_MIB_SENTINEL }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index cf68ba891729..34067cb8a479 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -467,6 +467,11 @@ static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = { .flags = XFRM_MODE_FLAG_TUNNEL, .family = AF_INET, }, + [XFRM_MODE_IPTFS] = { + .encap = XFRM_MODE_IPTFS, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET, + }, }; static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { @@ -488,6 +493,11 @@ static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { .flags = XFRM_MODE_FLAG_TUNNEL, .family = AF_INET6, }, + [XFRM_MODE_IPTFS] = { + .encap = XFRM_MODE_IPTFS, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET6, + }, }; static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) @@ -2334,6 +2344,7 @@ static int __xfrm6_state_sort_cmp(const void *p) #endif case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: return 4; } return 5; @@ -2360,6 +2371,7 @@ static int __xfrm6_tmpl_sort_cmp(const void *p) #endif case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: return 3; } return 4; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 71b452fff8db..08c6d6f0179f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -383,6 +383,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_BEET: break; + case XFRM_MODE_IPTFS: + if (p->id.proto != IPPROTO_ESP) { + NL_SET_ERR_MSG(extack, "IP-TFS mode only supported with ESP"); + goto out; + } + if (sa_dir == 0) { + NL_SET_ERR_MSG(extack, "IP-TFS mode requires in or out direction attribute"); + goto out; + } + break; default: NL_SET_ERR_MSG(extack, "Unsupported mode"); @@ -2014,6 +2024,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family, return -EINVAL; } break; + case XFRM_MODE_IPTFS: + break; default: if (ut[i].family != prev_family) { NL_SET_ERR_MSG(extack, "Mode in template doesn't support a family change"); -- cgit v1.2.3 From 49922401c2190713c5cc03902dc68c3ecd3f13e8 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:47 -0800 Subject: ethtool: separate definitions that are gonna be generated Reshuffle definitions that are gonna be generated into ethtool_netlink_generated.h and match ynl spec order. This should make it easier to compare the output of the ynl-gen-c to the existing uapi header. No functional changes. Things that are still remaining to be manually defined: - ETHTOOL_FLAG_ALL - probably no good way to add to spec? - some of the cable test bits (not sure whether it's possible to move to spec) - some of the stats definitions (no way currently to move to spec) Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-7-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- include/uapi/linux/ethtool_netlink.h | 893 +----------------------- include/uapi/linux/ethtool_netlink_generated.h | 899 +++++++++++++++++++++++++ 3 files changed, 901 insertions(+), 893 deletions(-) create mode 100644 include/uapi/linux/ethtool_netlink_generated.h (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index 686109008d8e..79756f2100e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16280,7 +16280,7 @@ F: include/linux/inetdevice.h F: include/linux/netdev* F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h -F: include/uapi/linux/ethtool_netlink.h +F: include/uapi/linux/ethtool_netlink* F: include/uapi/linux/if_* F: include/uapi/linux/net_shaper.h F: include/uapi/linux/netdev* diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 283305f6b063..9c909ce733a5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -10,545 +10,12 @@ #define _UAPI_LINUX_ETHTOOL_NETLINK_H_ #include - -/* message types - userspace to kernel */ -enum { - ETHTOOL_MSG_USER_NONE, - ETHTOOL_MSG_STRSET_GET, - ETHTOOL_MSG_LINKINFO_GET, - ETHTOOL_MSG_LINKINFO_SET, - ETHTOOL_MSG_LINKMODES_GET, - ETHTOOL_MSG_LINKMODES_SET, - ETHTOOL_MSG_LINKSTATE_GET, - ETHTOOL_MSG_DEBUG_GET, - ETHTOOL_MSG_DEBUG_SET, - ETHTOOL_MSG_WOL_GET, - ETHTOOL_MSG_WOL_SET, - ETHTOOL_MSG_FEATURES_GET, - ETHTOOL_MSG_FEATURES_SET, - ETHTOOL_MSG_PRIVFLAGS_GET, - ETHTOOL_MSG_PRIVFLAGS_SET, - ETHTOOL_MSG_RINGS_GET, - ETHTOOL_MSG_RINGS_SET, - ETHTOOL_MSG_CHANNELS_GET, - ETHTOOL_MSG_CHANNELS_SET, - ETHTOOL_MSG_COALESCE_GET, - ETHTOOL_MSG_COALESCE_SET, - ETHTOOL_MSG_PAUSE_GET, - ETHTOOL_MSG_PAUSE_SET, - ETHTOOL_MSG_EEE_GET, - ETHTOOL_MSG_EEE_SET, - ETHTOOL_MSG_TSINFO_GET, - ETHTOOL_MSG_CABLE_TEST_ACT, - ETHTOOL_MSG_CABLE_TEST_TDR_ACT, - ETHTOOL_MSG_TUNNEL_INFO_GET, - ETHTOOL_MSG_FEC_GET, - ETHTOOL_MSG_FEC_SET, - ETHTOOL_MSG_MODULE_EEPROM_GET, - ETHTOOL_MSG_STATS_GET, - ETHTOOL_MSG_PHC_VCLOCKS_GET, - ETHTOOL_MSG_MODULE_GET, - ETHTOOL_MSG_MODULE_SET, - ETHTOOL_MSG_PSE_GET, - ETHTOOL_MSG_PSE_SET, - ETHTOOL_MSG_RSS_GET, - ETHTOOL_MSG_PLCA_GET_CFG, - ETHTOOL_MSG_PLCA_SET_CFG, - ETHTOOL_MSG_PLCA_GET_STATUS, - ETHTOOL_MSG_MM_GET, - ETHTOOL_MSG_MM_SET, - ETHTOOL_MSG_MODULE_FW_FLASH_ACT, - ETHTOOL_MSG_PHY_GET, - - /* add new constants above here */ - __ETHTOOL_MSG_USER_CNT, - ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 -}; - -/* message types - kernel to userspace */ -enum { - ETHTOOL_MSG_KERNEL_NONE, - ETHTOOL_MSG_STRSET_GET_REPLY, - ETHTOOL_MSG_LINKINFO_GET_REPLY, - ETHTOOL_MSG_LINKINFO_NTF, - ETHTOOL_MSG_LINKMODES_GET_REPLY, - ETHTOOL_MSG_LINKMODES_NTF, - ETHTOOL_MSG_LINKSTATE_GET_REPLY, - ETHTOOL_MSG_DEBUG_GET_REPLY, - ETHTOOL_MSG_DEBUG_NTF, - ETHTOOL_MSG_WOL_GET_REPLY, - ETHTOOL_MSG_WOL_NTF, - ETHTOOL_MSG_FEATURES_GET_REPLY, - ETHTOOL_MSG_FEATURES_SET_REPLY, - ETHTOOL_MSG_FEATURES_NTF, - ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, - ETHTOOL_MSG_PRIVFLAGS_NTF, - ETHTOOL_MSG_RINGS_GET_REPLY, - ETHTOOL_MSG_RINGS_NTF, - ETHTOOL_MSG_CHANNELS_GET_REPLY, - ETHTOOL_MSG_CHANNELS_NTF, - ETHTOOL_MSG_COALESCE_GET_REPLY, - ETHTOOL_MSG_COALESCE_NTF, - ETHTOOL_MSG_PAUSE_GET_REPLY, - ETHTOOL_MSG_PAUSE_NTF, - ETHTOOL_MSG_EEE_GET_REPLY, - ETHTOOL_MSG_EEE_NTF, - ETHTOOL_MSG_TSINFO_GET_REPLY, - ETHTOOL_MSG_CABLE_TEST_NTF, - ETHTOOL_MSG_CABLE_TEST_TDR_NTF, - ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, - ETHTOOL_MSG_FEC_GET_REPLY, - ETHTOOL_MSG_FEC_NTF, - ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, - ETHTOOL_MSG_STATS_GET_REPLY, - ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, - ETHTOOL_MSG_MODULE_GET_REPLY, - ETHTOOL_MSG_MODULE_NTF, - ETHTOOL_MSG_PSE_GET_REPLY, - ETHTOOL_MSG_RSS_GET_REPLY, - ETHTOOL_MSG_PLCA_GET_CFG_REPLY, - ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, - ETHTOOL_MSG_PLCA_NTF, - ETHTOOL_MSG_MM_GET_REPLY, - ETHTOOL_MSG_MM_NTF, - ETHTOOL_MSG_MODULE_FW_FLASH_NTF, - ETHTOOL_MSG_PHY_GET_REPLY, - ETHTOOL_MSG_PHY_NTF, - - /* add new constants above here */ - __ETHTOOL_MSG_KERNEL_CNT, - ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 -}; - -/* request header */ - -enum ethtool_header_flags { - ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ - ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ - ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ -}; +#include #define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \ ETHTOOL_FLAG_OMIT_REPLY | \ ETHTOOL_FLAG_STATS) -enum { - ETHTOOL_A_HEADER_UNSPEC, - ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ - ETHTOOL_A_HEADER_DEV_NAME, /* string */ - ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ - ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_HEADER_CNT, - ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 -}; - -/* bit sets */ - -enum { - ETHTOOL_A_BITSET_BIT_UNSPEC, - ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */ - ETHTOOL_A_BITSET_BIT_NAME, /* string */ - ETHTOOL_A_BITSET_BIT_VALUE, /* flag */ - - /* add new constants above here */ - __ETHTOOL_A_BITSET_BIT_CNT, - ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 -}; - -enum { - ETHTOOL_A_BITSET_BITS_UNSPEC, - ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */ - - /* add new constants above here */ - __ETHTOOL_A_BITSET_BITS_CNT, - ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 -}; - -enum { - ETHTOOL_A_BITSET_UNSPEC, - ETHTOOL_A_BITSET_NOMASK, /* flag */ - ETHTOOL_A_BITSET_SIZE, /* u32 */ - ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */ - ETHTOOL_A_BITSET_VALUE, /* binary */ - ETHTOOL_A_BITSET_MASK, /* binary */ - - /* add new constants above here */ - __ETHTOOL_A_BITSET_CNT, - ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 -}; - -/* string sets */ - -enum { - ETHTOOL_A_STRING_UNSPEC, - ETHTOOL_A_STRING_INDEX, /* u32 */ - ETHTOOL_A_STRING_VALUE, /* string */ - - /* add new constants above here */ - __ETHTOOL_A_STRING_CNT, - ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 -}; - -enum { - ETHTOOL_A_STRINGS_UNSPEC, - ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */ - - /* add new constants above here */ - __ETHTOOL_A_STRINGS_CNT, - ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 -}; - -enum { - ETHTOOL_A_STRINGSET_UNSPEC, - ETHTOOL_A_STRINGSET_ID, /* u32 */ - ETHTOOL_A_STRINGSET_COUNT, /* u32 */ - ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */ - - /* add new constants above here */ - __ETHTOOL_A_STRINGSET_CNT, - ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 -}; - -enum { - ETHTOOL_A_STRINGSETS_UNSPEC, - ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */ - - /* add new constants above here */ - __ETHTOOL_A_STRINGSETS_CNT, - ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 -}; - -/* STRSET */ - -enum { - ETHTOOL_A_STRSET_UNSPEC, - ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */ - ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */ - - /* add new constants above here */ - __ETHTOOL_A_STRSET_CNT, - ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 -}; - -/* LINKINFO */ - -enum { - ETHTOOL_A_LINKINFO_UNSPEC, - ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKINFO_PORT, /* u8 */ - ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */ - ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_LINKINFO_CNT, - ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 -}; - -/* LINKMODES */ - -enum { - ETHTOOL_A_LINKMODES_UNSPEC, - ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */ - ETHTOOL_A_LINKMODES_OURS, /* bitset */ - ETHTOOL_A_LINKMODES_PEER, /* bitset */ - ETHTOOL_A_LINKMODES_SPEED, /* u32 */ - ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ - ETHTOOL_A_LINKMODES_LANES, /* u32 */ - ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_LINKMODES_CNT, - ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 -}; - -/* LINKSTATE */ - -enum { - ETHTOOL_A_LINKSTATE_UNSPEC, - ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKSTATE_LINK, /* u8 */ - ETHTOOL_A_LINKSTATE_SQI, /* u32 */ - ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ - ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_LINKSTATE_CNT, - ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 -}; - -/* DEBUG */ - -enum { - ETHTOOL_A_DEBUG_UNSPEC, - ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_DEBUG_MSGMASK, /* bitset */ - - /* add new constants above here */ - __ETHTOOL_A_DEBUG_CNT, - ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 -}; - -/* WOL */ - -enum { - ETHTOOL_A_WOL_UNSPEC, - ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_WOL_MODES, /* bitset */ - ETHTOOL_A_WOL_SOPASS, /* binary */ - - /* add new constants above here */ - __ETHTOOL_A_WOL_CNT, - ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 -}; - -/* FEATURES */ - -enum { - ETHTOOL_A_FEATURES_UNSPEC, - ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEATURES_HW, /* bitset */ - ETHTOOL_A_FEATURES_WANTED, /* bitset */ - ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ - ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ - - /* add new constants above here */ - __ETHTOOL_A_FEATURES_CNT, - ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 -}; - -/* PRIVFLAGS */ - -enum { - ETHTOOL_A_PRIVFLAGS_UNSPEC, - ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ - - /* add new constants above here */ - __ETHTOOL_A_PRIVFLAGS_CNT, - ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 -}; - -/* RINGS */ - -enum { - ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, - ETHTOOL_TCP_DATA_SPLIT_DISABLED, - ETHTOOL_TCP_DATA_SPLIT_ENABLED, -}; - -enum { - ETHTOOL_A_RINGS_UNSPEC, - ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_RINGS_RX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ - ETHTOOL_A_RINGS_TX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ - ETHTOOL_A_RINGS_TX, /* u32 */ - ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ - ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_RINGS_CNT, - ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) -}; - -/* CHANNELS */ - -enum { - ETHTOOL_A_CHANNELS_UNSPEC, - ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_CHANNELS_CNT, - ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) -}; - -/* COALESCE */ - -enum { - ETHTOOL_A_COALESCE_UNSPEC, - ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_COALESCE_RX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */ - ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ - /* nest - _A_PROFILE_IRQ_MODERATION */ - ETHTOOL_A_COALESCE_RX_PROFILE, - /* nest - _A_PROFILE_IRQ_MODERATION */ - ETHTOOL_A_COALESCE_TX_PROFILE, - - /* add new constants above here */ - __ETHTOOL_A_COALESCE_CNT, - ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) -}; - -enum { - ETHTOOL_A_PROFILE_UNSPEC, - /* nest, _A_IRQ_MODERATION_* */ - ETHTOOL_A_PROFILE_IRQ_MODERATION, - __ETHTOOL_A_PROFILE_CNT, - ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) -}; - -enum { - ETHTOOL_A_IRQ_MODERATION_UNSPEC, - ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ - - __ETHTOOL_A_IRQ_MODERATION_CNT, - ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) -}; - -/* PAUSE */ - -enum { - ETHTOOL_A_PAUSE_UNSPEC, - ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ - ETHTOOL_A_PAUSE_RX, /* u8 */ - ETHTOOL_A_PAUSE_TX, /* u8 */ - ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ - ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_PAUSE_CNT, - ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) -}; - -enum { - ETHTOOL_A_PAUSE_STAT_UNSPEC, - ETHTOOL_A_PAUSE_STAT_PAD, - - ETHTOOL_A_PAUSE_STAT_TX_FRAMES, - ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - - /* add new constants above here - * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! - */ - __ETHTOOL_A_PAUSE_STAT_CNT, - ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) -}; - -/* EEE */ - -enum { - ETHTOOL_A_EEE_UNSPEC, - ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_EEE_MODES_OURS, /* bitset */ - ETHTOOL_A_EEE_MODES_PEER, /* bitset */ - ETHTOOL_A_EEE_ACTIVE, /* u8 */ - ETHTOOL_A_EEE_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_EEE_CNT, - ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1) -}; - -/* TSINFO */ - -enum { - ETHTOOL_A_TSINFO_UNSPEC, - ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */ - ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ - ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ - ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ - ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ - - /* add new constants above here */ - __ETHTOOL_A_TSINFO_CNT, - ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) -}; - -enum { - ETHTOOL_A_TS_STAT_UNSPEC, - - ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ - ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ - ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ - - /* add new constants above here */ - __ETHTOOL_A_TS_STAT_CNT, - ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) - -}; - -/* PHC VCLOCKS */ - -enum { - ETHTOOL_A_PHC_VCLOCKS_UNSPEC, - ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ - ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ - - /* add new constants above here */ - __ETHTOOL_A_PHC_VCLOCKS_CNT, - ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) -}; - -/* CABLE TEST */ - -enum { - ETHTOOL_A_CABLE_TEST_UNSPEC, - ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_CNT, - ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 -}; - /* CABLE TEST NOTIFY */ enum { ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC, @@ -582,74 +49,12 @@ enum { ETHTOOL_A_CABLE_INF_SRC_ALCD, }; -enum { - ETHTOOL_A_CABLE_RESULT_UNSPEC, - ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ - ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ - - __ETHTOOL_A_CABLE_RESULT_CNT, - ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) -}; - -enum { - ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, - ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ - ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ - - __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, - ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) -}; - enum { ETHTOOL_A_CABLE_TEST_NTF_STATUS_UNSPEC, ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED, ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED }; -enum { - ETHTOOL_A_CABLE_NEST_UNSPEC, - ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ - ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ - __ETHTOOL_A_CABLE_NEST_CNT, - ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) -}; - -enum { - ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ - - __ETHTOOL_A_CABLE_TEST_NTF_CNT, - ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) -}; - -/* CABLE TEST TDR */ - -enum { - ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, - ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 -}; - -enum { - ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_TDR_CNT, - ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 -}; - /* CABLE TEST TDR NOTIFY */ enum { @@ -689,132 +94,6 @@ enum { ETHTOOL_A_CABLE_TDR_NEST_MAX = (__ETHTOOL_A_CABLE_TDR_NEST_CNT - 1) }; -enum { - ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, - ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 -}; - -/* TUNNEL INFO */ - -enum { - ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, - ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, - ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, - - __ETHTOOL_UDP_TUNNEL_TYPE_CNT -}; - -enum { - ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, - - ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ - ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, - ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) -}; - -enum { - ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, - - ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ - ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ - ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, - ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) -}; - -enum { - ETHTOOL_A_TUNNEL_UDP_UNSPEC, - - ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_UDP_CNT, - ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) -}; - -enum { - ETHTOOL_A_TUNNEL_INFO_UNSPEC, - ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ - - ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_INFO_CNT, - ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) -}; - -/* FEC */ - -enum { - ETHTOOL_A_FEC_UNSPEC, - ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEC_MODES, /* bitset */ - ETHTOOL_A_FEC_AUTO, /* u8 */ - ETHTOOL_A_FEC_ACTIVE, /* u32 */ - ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */ - - __ETHTOOL_A_FEC_CNT, - ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) -}; - -enum { - ETHTOOL_A_FEC_STAT_UNSPEC, - ETHTOOL_A_FEC_STAT_PAD, - - ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */ - ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */ - ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */ - - /* add new constants above here */ - __ETHTOOL_A_FEC_STAT_CNT, - ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) -}; - -/* MODULE EEPROM */ - -enum { - ETHTOOL_A_MODULE_EEPROM_UNSPEC, - ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ - - ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ - - __ETHTOOL_A_MODULE_EEPROM_CNT, - ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) -}; - -/* STATS */ - -enum { - ETHTOOL_A_STATS_UNSPEC, - ETHTOOL_A_STATS_PAD, - ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STATS_GROUPS, /* bitset */ - - ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */ - - ETHTOOL_A_STATS_SRC, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_STATS_CNT, - ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1) -}; - enum { ETHTOOL_STATS_ETH_PHY, ETHTOOL_STATS_ETH_MAC, @@ -825,27 +104,6 @@ enum { __ETHTOOL_STATS_CNT }; -enum { - ETHTOOL_A_STATS_GRP_UNSPEC, - ETHTOOL_A_STATS_GRP_PAD, - - ETHTOOL_A_STATS_GRP_ID, /* u32 */ - ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */ - - ETHTOOL_A_STATS_GRP_STAT, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */ - ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */ - - /* add new constants above here */ - __ETHTOOL_A_STATS_GRP_CNT, - ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) -}; - enum { /* 30.3.2.1.5 aSymbolErrorDuringCarrier */ ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR, @@ -935,155 +193,6 @@ enum { ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1) }; -/* MODULE */ - -enum { - ETHTOOL_A_MODULE_UNSPEC, - ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ - ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_MODULE_CNT, - ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) -}; - -/* Power Sourcing Equipment */ -enum { - ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, - ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ -}; - -enum { - ETHTOOL_A_PSE_UNSPEC, - ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ - ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ - ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ - - /* add new constants above here */ - __ETHTOOL_A_PSE_CNT, - ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) -}; - -enum { - ETHTOOL_A_RSS_UNSPEC, - ETHTOOL_A_RSS_HEADER, - ETHTOOL_A_RSS_CONTEXT, /* u32 */ - ETHTOOL_A_RSS_HFUNC, /* u32 */ - ETHTOOL_A_RSS_INDIR, /* binary */ - ETHTOOL_A_RSS_HKEY, /* binary */ - ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ - ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ - - __ETHTOOL_A_RSS_CNT, - ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), -}; - -/* PLCA */ - -enum { - ETHTOOL_A_PLCA_UNSPEC, - ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PLCA_VERSION, /* u16 */ - ETHTOOL_A_PLCA_ENABLED, /* u8 */ - ETHTOOL_A_PLCA_STATUS, /* u8 */ - ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ - ETHTOOL_A_PLCA_NODE_ID, /* u32 */ - ETHTOOL_A_PLCA_TO_TMR, /* u32 */ - ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ - ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_PLCA_CNT, - ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) -}; - -/* MAC Merge (802.3) */ - -enum { - ETHTOOL_A_MM_STAT_UNSPEC, - ETHTOOL_A_MM_STAT_PAD, - - /* aMACMergeFrameAssErrorCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */ - /* aMACMergeFrameSmdErrorCount */ - ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */ - /* aMACMergeFrameAssOkCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */ - /* aMACMergeFragCountRx */ - ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */ - /* aMACMergeFragCountTx */ - ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */ - /* aMACMergeHoldCount */ - ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */ - - /* add new constants above here */ - __ETHTOOL_A_MM_STAT_CNT, - ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1) -}; - -enum { - ETHTOOL_A_MM_UNSPEC, - ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ACTIVE, /* u8 */ - ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */ - ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */ - ETHTOOL_A_MM_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */ - - /* add new constants above here */ - __ETHTOOL_A_MM_CNT, - ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) -}; - -/* MODULE_FW_FLASH */ - -enum { - ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, - ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ - ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ - - /* add new constants above here */ - __ETHTOOL_A_MODULE_FW_FLASH_CNT, - ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) -}; - -enum { - ETHTOOL_A_PHY_UNSPEC, - ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHY_INDEX, /* u32 */ - ETHTOOL_A_PHY_DRVNAME, /* string */ - ETHTOOL_A_PHY_NAME, /* string */ - ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ - ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ - - /* add new constants above here */ - __ETHTOOL_A_PHY_CNT, - ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) -}; /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h new file mode 100644 index 000000000000..4b4bf17d1a88 --- /dev/null +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -0,0 +1,899 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +#ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H +#define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H + +/* TUNNEL INFO */ + +enum { + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, + ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, + + __ETHTOOL_UDP_TUNNEL_TYPE_CNT +}; + +/* request header */ + +enum ethtool_header_flags { + ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ + ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ + ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ +}; + +enum { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_TCP_DATA_SPLIT_DISABLED, + ETHTOOL_TCP_DATA_SPLIT_ENABLED, +}; + +enum { + ETHTOOL_A_HEADER_UNSPEC, + ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ + ETHTOOL_A_HEADER_DEV_NAME, /* string */ + ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ + ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_HEADER_CNT, + ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 +}; + +/* bit sets */ + +enum { + ETHTOOL_A_BITSET_BIT_UNSPEC, + ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */ + ETHTOOL_A_BITSET_BIT_NAME, /* string */ + ETHTOOL_A_BITSET_BIT_VALUE, /* flag */ + + /* add new constants above here */ + __ETHTOOL_A_BITSET_BIT_CNT, + ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 +}; + +enum { + ETHTOOL_A_BITSET_BITS_UNSPEC, + ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */ + + /* add new constants above here */ + __ETHTOOL_A_BITSET_BITS_CNT, + ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 +}; + +enum { + ETHTOOL_A_BITSET_UNSPEC, + ETHTOOL_A_BITSET_NOMASK, /* flag */ + ETHTOOL_A_BITSET_SIZE, /* u32 */ + ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */ + ETHTOOL_A_BITSET_VALUE, /* binary */ + ETHTOOL_A_BITSET_MASK, /* binary */ + + /* add new constants above here */ + __ETHTOOL_A_BITSET_CNT, + ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 +}; + +/* string sets */ + +enum { + ETHTOOL_A_STRING_UNSPEC, + ETHTOOL_A_STRING_INDEX, /* u32 */ + ETHTOOL_A_STRING_VALUE, /* string */ + + /* add new constants above here */ + __ETHTOOL_A_STRING_CNT, + ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 +}; + +enum { + ETHTOOL_A_STRINGS_UNSPEC, + ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */ + + /* add new constants above here */ + __ETHTOOL_A_STRINGS_CNT, + ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 +}; + +enum { + ETHTOOL_A_STRINGSET_UNSPEC, + ETHTOOL_A_STRINGSET_ID, /* u32 */ + ETHTOOL_A_STRINGSET_COUNT, /* u32 */ + ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */ + + /* add new constants above here */ + __ETHTOOL_A_STRINGSET_CNT, + ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 +}; + +enum { + ETHTOOL_A_STRINGSETS_UNSPEC, + ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */ + + /* add new constants above here */ + __ETHTOOL_A_STRINGSETS_CNT, + ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 +}; + +/* STRSET */ + +enum { + ETHTOOL_A_STRSET_UNSPEC, + ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */ + ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */ + + /* add new constants above here */ + __ETHTOOL_A_STRSET_CNT, + ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 +}; + +/* PRIVFLAGS */ + +enum { + ETHTOOL_A_PRIVFLAGS_UNSPEC, + ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_PRIVFLAGS_CNT, + ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 +}; + +/* RINGS */ + +enum { + ETHTOOL_A_RINGS_UNSPEC, + ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_RINGS_RX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ + ETHTOOL_A_RINGS_TX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ + ETHTOOL_A_RINGS_TX, /* u32 */ + ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ + ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ + ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_RINGS_CNT, + ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) +}; + +/* MAC Merge (802.3) */ + +enum { + ETHTOOL_A_MM_STAT_UNSPEC, + ETHTOOL_A_MM_STAT_PAD, + + /* aMACMergeFrameAssErrorCount */ + ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */ + /* aMACMergeFrameSmdErrorCount */ + ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */ + /* aMACMergeFrameAssOkCount */ + ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */ + /* aMACMergeFragCountRx */ + ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */ + /* aMACMergeFragCountTx */ + ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */ + /* aMACMergeHoldCount */ + ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */ + + /* add new constants above here */ + __ETHTOOL_A_MM_STAT_CNT, + ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1) +}; + +enum { + ETHTOOL_A_MM_UNSPEC, + ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */ + ETHTOOL_A_MM_TX_ENABLED, /* u8 */ + ETHTOOL_A_MM_TX_ACTIVE, /* u8 */ + ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */ + ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */ + ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */ + ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */ + ETHTOOL_A_MM_VERIFY_TIME, /* u32 */ + ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */ + ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */ + + /* add new constants above here */ + __ETHTOOL_A_MM_CNT, + ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) +}; + +/* LINKINFO */ + +enum { + ETHTOOL_A_LINKINFO_UNSPEC, + ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_LINKINFO_PORT, /* u8 */ + ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */ + ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */ + ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */ + ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_LINKINFO_CNT, + ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 +}; + +/* LINKMODES */ + +enum { + ETHTOOL_A_LINKMODES_UNSPEC, + ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */ + ETHTOOL_A_LINKMODES_OURS, /* bitset */ + ETHTOOL_A_LINKMODES_PEER, /* bitset */ + ETHTOOL_A_LINKMODES_SPEED, /* u32 */ + ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ + ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ + ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ + ETHTOOL_A_LINKMODES_LANES, /* u32 */ + ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_LINKMODES_CNT, + ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 +}; + +/* LINKSTATE */ + +enum { + ETHTOOL_A_LINKSTATE_UNSPEC, + ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_LINKSTATE_LINK, /* u8 */ + ETHTOOL_A_LINKSTATE_SQI, /* u32 */ + ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ + ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_LINKSTATE_CNT, + ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 +}; + +/* DEBUG */ + +enum { + ETHTOOL_A_DEBUG_UNSPEC, + ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_DEBUG_MSGMASK, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_DEBUG_CNT, + ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 +}; + +/* WOL */ + +enum { + ETHTOOL_A_WOL_UNSPEC, + ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_WOL_MODES, /* bitset */ + ETHTOOL_A_WOL_SOPASS, /* binary */ + + /* add new constants above here */ + __ETHTOOL_A_WOL_CNT, + ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 +}; + +/* FEATURES */ + +enum { + ETHTOOL_A_FEATURES_UNSPEC, + ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEATURES_HW, /* bitset */ + ETHTOOL_A_FEATURES_WANTED, /* bitset */ + ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ + ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_FEATURES_CNT, + ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 +}; + +/* CHANNELS */ + +enum { + ETHTOOL_A_CHANNELS_UNSPEC, + ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_CHANNELS_CNT, + ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) +}; + +enum { + ETHTOOL_A_IRQ_MODERATION_UNSPEC, + ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + + __ETHTOOL_A_IRQ_MODERATION_CNT, + ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) +}; + +enum { + ETHTOOL_A_PROFILE_UNSPEC, + /* nest, _A_IRQ_MODERATION_* */ + ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, + ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) +}; + +/* COALESCE */ + +enum { + ETHTOOL_A_COALESCE_UNSPEC, + ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_COALESCE_RX_USECS, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */ + ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */ + ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */ + ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */ + ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */ + ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */ + ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ + ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ + ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ + ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_RX_PROFILE, + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_TX_PROFILE, + + /* add new constants above here */ + __ETHTOOL_A_COALESCE_CNT, + ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) +}; + +/* PAUSE */ + +enum { + ETHTOOL_A_PAUSE_STAT_UNSPEC, + ETHTOOL_A_PAUSE_STAT_PAD, + + ETHTOOL_A_PAUSE_STAT_TX_FRAMES, + ETHTOOL_A_PAUSE_STAT_RX_FRAMES, + + /* add new constants above here + * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! + */ + __ETHTOOL_A_PAUSE_STAT_CNT, + ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) +}; + +enum { + ETHTOOL_A_PAUSE_UNSPEC, + ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ + ETHTOOL_A_PAUSE_RX, /* u8 */ + ETHTOOL_A_PAUSE_TX, /* u8 */ + ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ + ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PAUSE_CNT, + ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) +}; + +/* EEE */ + +enum { + ETHTOOL_A_EEE_UNSPEC, + ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_EEE_MODES_OURS, /* bitset */ + ETHTOOL_A_EEE_MODES_PEER, /* bitset */ + ETHTOOL_A_EEE_ACTIVE, /* u8 */ + ETHTOOL_A_EEE_ENABLED, /* u8 */ + ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */ + ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_EEE_CNT, + ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1) +}; + +/* TSINFO */ + +enum { + ETHTOOL_A_TS_STAT_UNSPEC, + + ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ + ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ + ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_TS_STAT_CNT, + ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) + +}; + +enum { + ETHTOOL_A_TSINFO_UNSPEC, + ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */ + ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ + ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ + ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ + ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ + + /* add new constants above here */ + __ETHTOOL_A_TSINFO_CNT, + ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_RESULT_UNSPEC, + ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ + ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ + ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + + __ETHTOOL_A_CABLE_RESULT_CNT, + ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, + ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ + ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ + ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + + __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, + ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_NEST_UNSPEC, + ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ + ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ + __ETHTOOL_A_CABLE_NEST_CNT, + ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) +}; + +/* CABLE TEST */ + +enum { + ETHTOOL_A_CABLE_TEST_UNSPEC, + ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_CNT, + ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 +}; + +enum { + ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, + ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ + ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ + + __ETHTOOL_A_CABLE_TEST_NTF_CNT, + ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) +}; + +/* CABLE TEST TDR */ + +enum { + ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, + ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 +}; + +enum { + ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, + ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 +}; + +enum { + ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_CNT, + ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ + ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, + ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ + ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ + ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, + ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_CNT, + ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_INFO_UNSPEC, + ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_INFO_CNT, + ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) +}; + +/* FEC */ + +enum { + ETHTOOL_A_FEC_STAT_UNSPEC, + ETHTOOL_A_FEC_STAT_PAD, + + ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */ + ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */ + ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */ + + /* add new constants above here */ + __ETHTOOL_A_FEC_STAT_CNT, + ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) +}; + +enum { + ETHTOOL_A_FEC_UNSPEC, + ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEC_MODES, /* bitset */ + ETHTOOL_A_FEC_AUTO, /* u8 */ + ETHTOOL_A_FEC_ACTIVE, /* u32 */ + ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */ + + __ETHTOOL_A_FEC_CNT, + ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) +}; + +/* MODULE EEPROM */ + +enum { + ETHTOOL_A_MODULE_EEPROM_UNSPEC, + ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ + ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ + ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ + + __ETHTOOL_A_MODULE_EEPROM_CNT, + ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) +}; + + +enum { + ETHTOOL_A_STATS_GRP_UNSPEC, + ETHTOOL_A_STATS_GRP_PAD, + + ETHTOOL_A_STATS_GRP_ID, /* u32 */ + ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */ + + ETHTOOL_A_STATS_GRP_STAT, /* nest */ + + ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */ + ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */ + + ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */ + ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */ + ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */ + + /* add new constants above here */ + __ETHTOOL_A_STATS_GRP_CNT, + ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) +}; + +/* STATS */ + +enum { + ETHTOOL_A_STATS_UNSPEC, + ETHTOOL_A_STATS_PAD, + ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_STATS_GROUPS, /* bitset */ + + ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */ + + ETHTOOL_A_STATS_SRC, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_STATS_CNT, + ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1) +}; + +/* PHC VCLOCKS */ + +enum { + ETHTOOL_A_PHC_VCLOCKS_UNSPEC, + ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ + ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + + /* add new constants above here */ + __ETHTOOL_A_PHC_VCLOCKS_CNT, + ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) +}; + +/* MODULE */ + +enum { + ETHTOOL_A_MODULE_UNSPEC, + ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ + ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_CNT, + ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) +}; + +/* Power Sourcing Equipment */ +enum { + ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ +}; + +enum { + ETHTOOL_A_PSE_UNSPEC, + ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ + ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ + + /* add new constants above here */ + __ETHTOOL_A_PSE_CNT, + ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) +}; + +enum { + ETHTOOL_A_RSS_UNSPEC, + ETHTOOL_A_RSS_HEADER, + ETHTOOL_A_RSS_CONTEXT, /* u32 */ + ETHTOOL_A_RSS_HFUNC, /* u32 */ + ETHTOOL_A_RSS_INDIR, /* binary */ + ETHTOOL_A_RSS_HKEY, /* binary */ + ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ + ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ + + __ETHTOOL_A_RSS_CNT, + ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), +}; + +/* PLCA */ + +enum { + ETHTOOL_A_PLCA_UNSPEC, + ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PLCA_VERSION, /* u16 */ + ETHTOOL_A_PLCA_ENABLED, /* u8 */ + ETHTOOL_A_PLCA_STATUS, /* u8 */ + ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ + ETHTOOL_A_PLCA_NODE_ID, /* u32 */ + ETHTOOL_A_PLCA_TO_TMR, /* u32 */ + ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ + ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PLCA_CNT, + ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) +}; + +/* MODULE_FW_FLASH */ + +enum { + ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, + ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_FW_FLASH_CNT, + ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) +}; + +enum { + ETHTOOL_A_PHY_UNSPEC, + ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHY_INDEX, /* u32 */ + ETHTOOL_A_PHY_DRVNAME, /* string */ + ETHTOOL_A_PHY_NAME, /* string */ + ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ + ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ + + /* add new constants above here */ + __ETHTOOL_A_PHY_CNT, + ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) +}; + +/* message types - userspace to kernel */ +enum { + ETHTOOL_MSG_USER_NONE, + ETHTOOL_MSG_STRSET_GET, + ETHTOOL_MSG_LINKINFO_GET, + ETHTOOL_MSG_LINKINFO_SET, + ETHTOOL_MSG_LINKMODES_GET, + ETHTOOL_MSG_LINKMODES_SET, + ETHTOOL_MSG_LINKSTATE_GET, + ETHTOOL_MSG_DEBUG_GET, + ETHTOOL_MSG_DEBUG_SET, + ETHTOOL_MSG_WOL_GET, + ETHTOOL_MSG_WOL_SET, + ETHTOOL_MSG_FEATURES_GET, + ETHTOOL_MSG_FEATURES_SET, + ETHTOOL_MSG_PRIVFLAGS_GET, + ETHTOOL_MSG_PRIVFLAGS_SET, + ETHTOOL_MSG_RINGS_GET, + ETHTOOL_MSG_RINGS_SET, + ETHTOOL_MSG_CHANNELS_GET, + ETHTOOL_MSG_CHANNELS_SET, + ETHTOOL_MSG_COALESCE_GET, + ETHTOOL_MSG_COALESCE_SET, + ETHTOOL_MSG_PAUSE_GET, + ETHTOOL_MSG_PAUSE_SET, + ETHTOOL_MSG_EEE_GET, + ETHTOOL_MSG_EEE_SET, + ETHTOOL_MSG_TSINFO_GET, + ETHTOOL_MSG_CABLE_TEST_ACT, + ETHTOOL_MSG_CABLE_TEST_TDR_ACT, + ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_MSG_FEC_GET, + ETHTOOL_MSG_FEC_SET, + ETHTOOL_MSG_MODULE_EEPROM_GET, + ETHTOOL_MSG_STATS_GET, + ETHTOOL_MSG_PHC_VCLOCKS_GET, + ETHTOOL_MSG_MODULE_GET, + ETHTOOL_MSG_MODULE_SET, + ETHTOOL_MSG_PSE_GET, + ETHTOOL_MSG_PSE_SET, + ETHTOOL_MSG_RSS_GET, + ETHTOOL_MSG_PLCA_GET_CFG, + ETHTOOL_MSG_PLCA_SET_CFG, + ETHTOOL_MSG_PLCA_GET_STATUS, + ETHTOOL_MSG_MM_GET, + ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_MODULE_FW_FLASH_ACT, + ETHTOOL_MSG_PHY_GET, + + /* add new constants above here */ + __ETHTOOL_MSG_USER_CNT, + ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 +}; + +/* message types - kernel to userspace */ +enum { + ETHTOOL_MSG_KERNEL_NONE, + ETHTOOL_MSG_STRSET_GET_REPLY, + ETHTOOL_MSG_LINKINFO_GET_REPLY, + ETHTOOL_MSG_LINKINFO_NTF, + ETHTOOL_MSG_LINKMODES_GET_REPLY, + ETHTOOL_MSG_LINKMODES_NTF, + ETHTOOL_MSG_LINKSTATE_GET_REPLY, + ETHTOOL_MSG_DEBUG_GET_REPLY, + ETHTOOL_MSG_DEBUG_NTF, + ETHTOOL_MSG_WOL_GET_REPLY, + ETHTOOL_MSG_WOL_NTF, + ETHTOOL_MSG_FEATURES_GET_REPLY, + ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_MSG_FEATURES_NTF, + ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + ETHTOOL_MSG_PRIVFLAGS_NTF, + ETHTOOL_MSG_RINGS_GET_REPLY, + ETHTOOL_MSG_RINGS_NTF, + ETHTOOL_MSG_CHANNELS_GET_REPLY, + ETHTOOL_MSG_CHANNELS_NTF, + ETHTOOL_MSG_COALESCE_GET_REPLY, + ETHTOOL_MSG_COALESCE_NTF, + ETHTOOL_MSG_PAUSE_GET_REPLY, + ETHTOOL_MSG_PAUSE_NTF, + ETHTOOL_MSG_EEE_GET_REPLY, + ETHTOOL_MSG_EEE_NTF, + ETHTOOL_MSG_TSINFO_GET_REPLY, + ETHTOOL_MSG_CABLE_TEST_NTF, + ETHTOOL_MSG_CABLE_TEST_TDR_NTF, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, + ETHTOOL_MSG_FEC_GET_REPLY, + ETHTOOL_MSG_FEC_NTF, + ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, + ETHTOOL_MSG_STATS_GET_REPLY, + ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, + ETHTOOL_MSG_MODULE_GET_REPLY, + ETHTOOL_MSG_MODULE_NTF, + ETHTOOL_MSG_PSE_GET_REPLY, + ETHTOOL_MSG_RSS_GET_REPLY, + ETHTOOL_MSG_PLCA_GET_CFG_REPLY, + ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, + ETHTOOL_MSG_PLCA_NTF, + ETHTOOL_MSG_MM_GET_REPLY, + ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_MODULE_FW_FLASH_NTF, + ETHTOOL_MSG_PHY_GET_REPLY, + ETHTOOL_MSG_PHY_NTF, + + /* add new constants above here */ + __ETHTOOL_MSG_KERNEL_CNT, + ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 +}; + +#endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */ -- cgit v1.2.3 From dd7cde36de15b071b5f9163d21d7c9142089b424 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:48 -0800 Subject: ethtool: remove the comments that are not gonna be generated Cleanup the header manually to make it easier to review the changes that ynl generator brings in. No functional changes. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 678 ++++++++++--------------- 1 file changed, 274 insertions(+), 404 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 4b4bf17d1a88..35a24d490efe 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -2,8 +2,6 @@ #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H -/* TUNNEL INFO */ - enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, @@ -12,8 +10,6 @@ enum { __ETHTOOL_UDP_TUNNEL_TYPE_CNT }; -/* request header */ - enum ethtool_header_flags { ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ @@ -28,303 +24,250 @@ enum { enum { ETHTOOL_A_HEADER_UNSPEC, - ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ - ETHTOOL_A_HEADER_DEV_NAME, /* string */ - ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ - ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ + ETHTOOL_A_HEADER_DEV_INDEX, + ETHTOOL_A_HEADER_DEV_NAME, + ETHTOOL_A_HEADER_FLAGS, + ETHTOOL_A_HEADER_PHY_INDEX, - /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 }; -/* bit sets */ - enum { ETHTOOL_A_BITSET_BIT_UNSPEC, - ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */ - ETHTOOL_A_BITSET_BIT_NAME, /* string */ - ETHTOOL_A_BITSET_BIT_VALUE, /* flag */ + ETHTOOL_A_BITSET_BIT_INDEX, + ETHTOOL_A_BITSET_BIT_NAME, + ETHTOOL_A_BITSET_BIT_VALUE, - /* add new constants above here */ __ETHTOOL_A_BITSET_BIT_CNT, ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 }; enum { ETHTOOL_A_BITSET_BITS_UNSPEC, - ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */ + ETHTOOL_A_BITSET_BITS_BIT, - /* add new constants above here */ __ETHTOOL_A_BITSET_BITS_CNT, ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 }; enum { ETHTOOL_A_BITSET_UNSPEC, - ETHTOOL_A_BITSET_NOMASK, /* flag */ - ETHTOOL_A_BITSET_SIZE, /* u32 */ - ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */ - ETHTOOL_A_BITSET_VALUE, /* binary */ - ETHTOOL_A_BITSET_MASK, /* binary */ + ETHTOOL_A_BITSET_NOMASK, + ETHTOOL_A_BITSET_SIZE, + ETHTOOL_A_BITSET_BITS, + ETHTOOL_A_BITSET_VALUE, + ETHTOOL_A_BITSET_MASK, - /* add new constants above here */ __ETHTOOL_A_BITSET_CNT, ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 }; -/* string sets */ - enum { ETHTOOL_A_STRING_UNSPEC, - ETHTOOL_A_STRING_INDEX, /* u32 */ - ETHTOOL_A_STRING_VALUE, /* string */ + ETHTOOL_A_STRING_INDEX, + ETHTOOL_A_STRING_VALUE, - /* add new constants above here */ __ETHTOOL_A_STRING_CNT, ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 }; enum { ETHTOOL_A_STRINGS_UNSPEC, - ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */ + ETHTOOL_A_STRINGS_STRING, - /* add new constants above here */ __ETHTOOL_A_STRINGS_CNT, ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 }; enum { ETHTOOL_A_STRINGSET_UNSPEC, - ETHTOOL_A_STRINGSET_ID, /* u32 */ - ETHTOOL_A_STRINGSET_COUNT, /* u32 */ - ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */ + ETHTOOL_A_STRINGSET_ID, + ETHTOOL_A_STRINGSET_COUNT, + ETHTOOL_A_STRINGSET_STRINGS, - /* add new constants above here */ __ETHTOOL_A_STRINGSET_CNT, ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 }; enum { ETHTOOL_A_STRINGSETS_UNSPEC, - ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */ + ETHTOOL_A_STRINGSETS_STRINGSET, - /* add new constants above here */ __ETHTOOL_A_STRINGSETS_CNT, ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 }; -/* STRSET */ - enum { ETHTOOL_A_STRSET_UNSPEC, - ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */ - ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */ + ETHTOOL_A_STRSET_HEADER, + ETHTOOL_A_STRSET_STRINGSETS, + ETHTOOL_A_STRSET_COUNTS_ONLY, - /* add new constants above here */ __ETHTOOL_A_STRSET_CNT, ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 }; -/* PRIVFLAGS */ - enum { ETHTOOL_A_PRIVFLAGS_UNSPEC, - ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ + ETHTOOL_A_PRIVFLAGS_HEADER, + ETHTOOL_A_PRIVFLAGS_FLAGS, - /* add new constants above here */ __ETHTOOL_A_PRIVFLAGS_CNT, ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 }; -/* RINGS */ - enum { ETHTOOL_A_RINGS_UNSPEC, - ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_RINGS_RX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ - ETHTOOL_A_RINGS_TX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ - ETHTOOL_A_RINGS_TX, /* u32 */ - ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ - ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_RINGS_HEADER, + ETHTOOL_A_RINGS_RX_MAX, + ETHTOOL_A_RINGS_RX_MINI_MAX, + ETHTOOL_A_RINGS_RX_JUMBO_MAX, + ETHTOOL_A_RINGS_TX_MAX, + ETHTOOL_A_RINGS_RX, + ETHTOOL_A_RINGS_RX_MINI, + ETHTOOL_A_RINGS_RX_JUMBO, + ETHTOOL_A_RINGS_TX, + ETHTOOL_A_RINGS_RX_BUF_LEN, + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, + ETHTOOL_A_RINGS_CQE_SIZE, + ETHTOOL_A_RINGS_TX_PUSH, + ETHTOOL_A_RINGS_RX_PUSH, + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, + __ETHTOOL_A_RINGS_CNT, ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) }; -/* MAC Merge (802.3) */ - enum { ETHTOOL_A_MM_STAT_UNSPEC, ETHTOOL_A_MM_STAT_PAD, + ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, + ETHTOOL_A_MM_STAT_SMD_ERRORS, + ETHTOOL_A_MM_STAT_REASSEMBLY_OK, + ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, + ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, + ETHTOOL_A_MM_STAT_HOLD_COUNT, - /* aMACMergeFrameAssErrorCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */ - /* aMACMergeFrameSmdErrorCount */ - ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */ - /* aMACMergeFrameAssOkCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */ - /* aMACMergeFragCountRx */ - ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */ - /* aMACMergeFragCountTx */ - ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */ - /* aMACMergeHoldCount */ - ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */ - - /* add new constants above here */ __ETHTOOL_A_MM_STAT_CNT, ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1) }; enum { ETHTOOL_A_MM_UNSPEC, - ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ACTIVE, /* u8 */ - ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */ - ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */ - ETHTOOL_A_MM_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */ - - /* add new constants above here */ + ETHTOOL_A_MM_HEADER, + ETHTOOL_A_MM_PMAC_ENABLED, + ETHTOOL_A_MM_TX_ENABLED, + ETHTOOL_A_MM_TX_ACTIVE, + ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, + ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, + ETHTOOL_A_MM_VERIFY_ENABLED, + ETHTOOL_A_MM_VERIFY_STATUS, + ETHTOOL_A_MM_VERIFY_TIME, + ETHTOOL_A_MM_MAX_VERIFY_TIME, + ETHTOOL_A_MM_STATS, + __ETHTOOL_A_MM_CNT, ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; -/* LINKINFO */ - enum { ETHTOOL_A_LINKINFO_UNSPEC, - ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKINFO_PORT, /* u8 */ - ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */ - ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */ - - /* add new constants above here */ + ETHTOOL_A_LINKINFO_HEADER, + ETHTOOL_A_LINKINFO_PORT, + ETHTOOL_A_LINKINFO_PHYADDR, + ETHTOOL_A_LINKINFO_TP_MDIX, + ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, + ETHTOOL_A_LINKINFO_TRANSCEIVER, + __ETHTOOL_A_LINKINFO_CNT, ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 }; -/* LINKMODES */ - enum { ETHTOOL_A_LINKMODES_UNSPEC, - ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */ - ETHTOOL_A_LINKMODES_OURS, /* bitset */ - ETHTOOL_A_LINKMODES_PEER, /* bitset */ - ETHTOOL_A_LINKMODES_SPEED, /* u32 */ - ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ - ETHTOOL_A_LINKMODES_LANES, /* u32 */ - ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ - - /* add new constants above here */ + ETHTOOL_A_LINKMODES_HEADER, + ETHTOOL_A_LINKMODES_AUTONEG, + ETHTOOL_A_LINKMODES_OURS, + ETHTOOL_A_LINKMODES_PEER, + ETHTOOL_A_LINKMODES_SPEED, + ETHTOOL_A_LINKMODES_DUPLEX, + ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, + ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, + ETHTOOL_A_LINKMODES_LANES, + ETHTOOL_A_LINKMODES_RATE_MATCHING, + __ETHTOOL_A_LINKMODES_CNT, ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 }; -/* LINKSTATE */ - enum { ETHTOOL_A_LINKSTATE_UNSPEC, - ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKSTATE_LINK, /* u8 */ - ETHTOOL_A_LINKSTATE_SQI, /* u32 */ - ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ - ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_LINKSTATE_HEADER, + ETHTOOL_A_LINKSTATE_LINK, + ETHTOOL_A_LINKSTATE_SQI, + ETHTOOL_A_LINKSTATE_SQI_MAX, + ETHTOOL_A_LINKSTATE_EXT_STATE, + ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, + ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, + __ETHTOOL_A_LINKSTATE_CNT, ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 }; -/* DEBUG */ - enum { ETHTOOL_A_DEBUG_UNSPEC, - ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_DEBUG_MSGMASK, /* bitset */ + ETHTOOL_A_DEBUG_HEADER, + ETHTOOL_A_DEBUG_MSGMASK, - /* add new constants above here */ __ETHTOOL_A_DEBUG_CNT, ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 }; -/* WOL */ - enum { ETHTOOL_A_WOL_UNSPEC, - ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_WOL_MODES, /* bitset */ - ETHTOOL_A_WOL_SOPASS, /* binary */ + ETHTOOL_A_WOL_HEADER, + ETHTOOL_A_WOL_MODES, + ETHTOOL_A_WOL_SOPASS, - /* add new constants above here */ __ETHTOOL_A_WOL_CNT, ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 }; -/* FEATURES */ - enum { ETHTOOL_A_FEATURES_UNSPEC, - ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEATURES_HW, /* bitset */ - ETHTOOL_A_FEATURES_WANTED, /* bitset */ - ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ - ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ + ETHTOOL_A_FEATURES_HEADER, + ETHTOOL_A_FEATURES_HW, + ETHTOOL_A_FEATURES_WANTED, + ETHTOOL_A_FEATURES_ACTIVE, + ETHTOOL_A_FEATURES_NOCHANGE, - /* add new constants above here */ __ETHTOOL_A_FEATURES_CNT, ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 }; -/* CHANNELS */ - enum { ETHTOOL_A_CHANNELS_UNSPEC, - ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_CHANNELS_HEADER, + ETHTOOL_A_CHANNELS_RX_MAX, + ETHTOOL_A_CHANNELS_TX_MAX, + ETHTOOL_A_CHANNELS_OTHER_MAX, + ETHTOOL_A_CHANNELS_COMBINED_MAX, + ETHTOOL_A_CHANNELS_RX_COUNT, + ETHTOOL_A_CHANNELS_TX_COUNT, + ETHTOOL_A_CHANNELS_OTHER_COUNT, + ETHTOOL_A_CHANNELS_COMBINED_COUNT, + __ETHTOOL_A_CHANNELS_CNT, ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) }; enum { ETHTOOL_A_IRQ_MODERATION_UNSPEC, - ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_USEC, + ETHTOOL_A_IRQ_MODERATION_PKTS, + ETHTOOL_A_IRQ_MODERATION_COMPS, __ETHTOOL_A_IRQ_MODERATION_CNT, ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) @@ -332,111 +275,91 @@ enum { enum { ETHTOOL_A_PROFILE_UNSPEC, - /* nest, _A_IRQ_MODERATION_* */ ETHTOOL_A_PROFILE_IRQ_MODERATION, __ETHTOOL_A_PROFILE_CNT, ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) }; -/* COALESCE */ - enum { ETHTOOL_A_COALESCE_UNSPEC, - ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_COALESCE_RX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */ - ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ - /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_HEADER, + ETHTOOL_A_COALESCE_RX_USECS, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES, + ETHTOOL_A_COALESCE_RX_USECS_IRQ, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, + ETHTOOL_A_COALESCE_TX_USECS, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES, + ETHTOOL_A_COALESCE_TX_USECS_IRQ, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, + ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, + ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, + ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, + ETHTOOL_A_COALESCE_PKT_RATE_LOW, + ETHTOOL_A_COALESCE_RX_USECS_LOW, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, + ETHTOOL_A_COALESCE_TX_USECS_LOW, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, + ETHTOOL_A_COALESCE_PKT_RATE_HIGH, + ETHTOOL_A_COALESCE_RX_USECS_HIGH, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, + ETHTOOL_A_COALESCE_TX_USECS_HIGH, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, + ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, + ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, + ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, + ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, + ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, + ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, ETHTOOL_A_COALESCE_RX_PROFILE, - /* nest - _A_PROFILE_IRQ_MODERATION */ ETHTOOL_A_COALESCE_TX_PROFILE, - /* add new constants above here */ __ETHTOOL_A_COALESCE_CNT, ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) }; -/* PAUSE */ - enum { ETHTOOL_A_PAUSE_STAT_UNSPEC, ETHTOOL_A_PAUSE_STAT_PAD, - ETHTOOL_A_PAUSE_STAT_TX_FRAMES, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - /* add new constants above here - * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! - */ __ETHTOOL_A_PAUSE_STAT_CNT, ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) }; enum { ETHTOOL_A_PAUSE_UNSPEC, - ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ - ETHTOOL_A_PAUSE_RX, /* u8 */ - ETHTOOL_A_PAUSE_TX, /* u8 */ - ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ - ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_PAUSE_HEADER, + ETHTOOL_A_PAUSE_AUTONEG, + ETHTOOL_A_PAUSE_RX, + ETHTOOL_A_PAUSE_TX, + ETHTOOL_A_PAUSE_STATS, + ETHTOOL_A_PAUSE_STATS_SRC, + __ETHTOOL_A_PAUSE_CNT, ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) }; -/* EEE */ - enum { ETHTOOL_A_EEE_UNSPEC, - ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_EEE_MODES_OURS, /* bitset */ - ETHTOOL_A_EEE_MODES_PEER, /* bitset */ - ETHTOOL_A_EEE_ACTIVE, /* u8 */ - ETHTOOL_A_EEE_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_EEE_HEADER, + ETHTOOL_A_EEE_MODES_OURS, + ETHTOOL_A_EEE_MODES_PEER, + ETHTOOL_A_EEE_ACTIVE, + ETHTOOL_A_EEE_ENABLED, + ETHTOOL_A_EEE_TX_LPI_ENABLED, + ETHTOOL_A_EEE_TX_LPI_TIMER, + __ETHTOOL_A_EEE_CNT, ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1) }; -/* TSINFO */ - enum { ETHTOOL_A_TS_STAT_UNSPEC, + ETHTOOL_A_TS_STAT_TX_PKTS, + ETHTOOL_A_TS_STAT_TX_LOST, + ETHTOOL_A_TS_STAT_TX_ERR, - ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ - ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ - ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ - - /* add new constants above here */ __ETHTOOL_A_TS_STAT_CNT, ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) @@ -444,23 +367,22 @@ enum { enum { ETHTOOL_A_TSINFO_UNSPEC, - ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */ - ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ - ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ - ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ - ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ - - /* add new constants above here */ + ETHTOOL_A_TSINFO_HEADER, + ETHTOOL_A_TSINFO_TIMESTAMPING, + ETHTOOL_A_TSINFO_TX_TYPES, + ETHTOOL_A_TSINFO_RX_FILTERS, + ETHTOOL_A_TSINFO_PHC_INDEX, + ETHTOOL_A_TSINFO_STATS, + __ETHTOOL_A_TSINFO_CNT, ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) }; enum { ETHTOOL_A_CABLE_RESULT_UNSPEC, - ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ - ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + ETHTOOL_A_CABLE_RESULT_PAIR, + ETHTOOL_A_CABLE_RESULT_CODE, + ETHTOOL_A_CABLE_RESULT_SRC, __ETHTOOL_A_CABLE_RESULT_CNT, ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) @@ -468,9 +390,9 @@ enum { enum { ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, - ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ - ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, + ETHTOOL_A_CABLE_FAULT_LENGTH_CM, + ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) @@ -478,245 +400,204 @@ enum { enum { ETHTOOL_A_CABLE_NEST_UNSPEC, - ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ - ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ + ETHTOOL_A_CABLE_NEST_RESULT, + ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, + __ETHTOOL_A_CABLE_NEST_CNT, ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) }; -/* CABLE TEST */ - enum { ETHTOOL_A_CABLE_TEST_UNSPEC, - ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_HEADER, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_CNT, ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 }; enum { ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ + ETHTOOL_A_CABLE_TEST_NTF_HEADER, + ETHTOOL_A_CABLE_TEST_NTF_STATUS, + ETHTOOL_A_CABLE_TEST_NTF_NEST, __ETHTOOL_A_CABLE_TEST_NTF_CNT, ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) }; -/* CABLE TEST TDR */ - enum { ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, + ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, + ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, + ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 }; enum { ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, + ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, + ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 }; enum { ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ + ETHTOOL_A_CABLE_TEST_TDR_HEADER, + ETHTOOL_A_CABLE_TEST_TDR_CFG, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_CNT, ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 }; enum { ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, + ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, + ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, - ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ - ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) }; enum { ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, + ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, + ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, + ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, - ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ - ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ - ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) }; enum { ETHTOOL_A_TUNNEL_UDP_UNSPEC, + ETHTOOL_A_TUNNEL_UDP_TABLE, - ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_UDP_CNT, ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) }; enum { ETHTOOL_A_TUNNEL_INFO_UNSPEC, - ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TUNNEL_INFO_HEADER, + ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, - ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_INFO_CNT, ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) }; -/* FEC */ - enum { ETHTOOL_A_FEC_STAT_UNSPEC, ETHTOOL_A_FEC_STAT_PAD, + ETHTOOL_A_FEC_STAT_CORRECTED, + ETHTOOL_A_FEC_STAT_UNCORR, + ETHTOOL_A_FEC_STAT_CORR_BITS, - ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */ - ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */ - ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */ - - /* add new constants above here */ __ETHTOOL_A_FEC_STAT_CNT, ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) }; enum { ETHTOOL_A_FEC_UNSPEC, - ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEC_MODES, /* bitset */ - ETHTOOL_A_FEC_AUTO, /* u8 */ - ETHTOOL_A_FEC_ACTIVE, /* u32 */ - ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */ + ETHTOOL_A_FEC_HEADER, + ETHTOOL_A_FEC_MODES, + ETHTOOL_A_FEC_AUTO, + ETHTOOL_A_FEC_ACTIVE, + ETHTOOL_A_FEC_STATS, __ETHTOOL_A_FEC_CNT, ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) }; -/* MODULE EEPROM */ - enum { ETHTOOL_A_MODULE_EEPROM_UNSPEC, - ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ - - ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ + ETHTOOL_A_MODULE_EEPROM_HEADER, + ETHTOOL_A_MODULE_EEPROM_OFFSET, + ETHTOOL_A_MODULE_EEPROM_LENGTH, + ETHTOOL_A_MODULE_EEPROM_PAGE, + ETHTOOL_A_MODULE_EEPROM_BANK, + ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, + ETHTOOL_A_MODULE_EEPROM_DATA, __ETHTOOL_A_MODULE_EEPROM_CNT, ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) }; - enum { ETHTOOL_A_STATS_GRP_UNSPEC, ETHTOOL_A_STATS_GRP_PAD, + ETHTOOL_A_STATS_GRP_ID, + ETHTOOL_A_STATS_GRP_SS_ID, + ETHTOOL_A_STATS_GRP_STAT, + ETHTOOL_A_STATS_GRP_HIST_RX, + ETHTOOL_A_STATS_GRP_HIST_TX, + ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, + ETHTOOL_A_STATS_GRP_HIST_BKT_HI, + ETHTOOL_A_STATS_GRP_HIST_VAL, - ETHTOOL_A_STATS_GRP_ID, /* u32 */ - ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */ - - ETHTOOL_A_STATS_GRP_STAT, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */ - ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */ - - /* add new constants above here */ __ETHTOOL_A_STATS_GRP_CNT, ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) }; -/* STATS */ - enum { ETHTOOL_A_STATS_UNSPEC, ETHTOOL_A_STATS_PAD, - ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STATS_GROUPS, /* bitset */ + ETHTOOL_A_STATS_HEADER, + ETHTOOL_A_STATS_GROUPS, + ETHTOOL_A_STATS_GRP, + ETHTOOL_A_STATS_SRC, - ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */ - - ETHTOOL_A_STATS_SRC, /* u32 */ - - /* add new constants above here */ __ETHTOOL_A_STATS_CNT, ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1) }; -/* PHC VCLOCKS */ - enum { ETHTOOL_A_PHC_VCLOCKS_UNSPEC, - ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ - ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + ETHTOOL_A_PHC_VCLOCKS_HEADER, + ETHTOOL_A_PHC_VCLOCKS_NUM, + ETHTOOL_A_PHC_VCLOCKS_INDEX, - /* add new constants above here */ __ETHTOOL_A_PHC_VCLOCKS_CNT, ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) }; -/* MODULE */ - enum { ETHTOOL_A_MODULE_UNSPEC, - ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ - ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ + ETHTOOL_A_MODULE_HEADER, + ETHTOOL_A_MODULE_POWER_MODE_POLICY, + ETHTOOL_A_MODULE_POWER_MODE, - /* add new constants above here */ __ETHTOOL_A_MODULE_CNT, ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) }; -/* Power Sourcing Equipment */ enum { ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, - ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, }; enum { ETHTOOL_A_PSE_UNSPEC, - ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ - ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ - ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ - - /* add new constants above here */ + ETHTOOL_A_PSE_HEADER, + ETHTOOL_A_PODL_PSE_ADMIN_STATE, + ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, + ETHTOOL_A_PODL_PSE_PW_D_STATUS, + ETHTOOL_A_C33_PSE_ADMIN_STATE, + ETHTOOL_A_C33_PSE_ADMIN_CONTROL, + ETHTOOL_A_C33_PSE_PW_D_STATUS, + ETHTOOL_A_C33_PSE_PW_CLASS, + ETHTOOL_A_C33_PSE_ACTUAL_PW, + ETHTOOL_A_C33_PSE_EXT_STATE, + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, + __ETHTOOL_A_PSE_CNT, ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) }; @@ -724,70 +605,62 @@ enum { enum { ETHTOOL_A_RSS_UNSPEC, ETHTOOL_A_RSS_HEADER, - ETHTOOL_A_RSS_CONTEXT, /* u32 */ - ETHTOOL_A_RSS_HFUNC, /* u32 */ - ETHTOOL_A_RSS_INDIR, /* binary */ - ETHTOOL_A_RSS_HKEY, /* binary */ - ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ - ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ + ETHTOOL_A_RSS_CONTEXT, + ETHTOOL_A_RSS_HFUNC, + ETHTOOL_A_RSS_INDIR, + ETHTOOL_A_RSS_HKEY, + ETHTOOL_A_RSS_INPUT_XFRM, + ETHTOOL_A_RSS_START_CONTEXT, __ETHTOOL_A_RSS_CNT, ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), }; -/* PLCA */ - enum { ETHTOOL_A_PLCA_UNSPEC, - ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PLCA_VERSION, /* u16 */ - ETHTOOL_A_PLCA_ENABLED, /* u8 */ - ETHTOOL_A_PLCA_STATUS, /* u8 */ - ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ - ETHTOOL_A_PLCA_NODE_ID, /* u32 */ - ETHTOOL_A_PLCA_TO_TMR, /* u32 */ - ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ - ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_PLCA_HEADER, + ETHTOOL_A_PLCA_VERSION, + ETHTOOL_A_PLCA_ENABLED, + ETHTOOL_A_PLCA_STATUS, + ETHTOOL_A_PLCA_NODE_CNT, + ETHTOOL_A_PLCA_NODE_ID, + ETHTOOL_A_PLCA_TO_TMR, + ETHTOOL_A_PLCA_BURST_CNT, + ETHTOOL_A_PLCA_BURST_TMR, + __ETHTOOL_A_PLCA_CNT, ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) }; -/* MODULE_FW_FLASH */ - enum { ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, - ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ - ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ - - /* add new constants above here */ + ETHTOOL_A_MODULE_FW_FLASH_HEADER, + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, + ETHTOOL_A_MODULE_FW_FLASH_STATUS, + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, + ETHTOOL_A_MODULE_FW_FLASH_DONE, + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, + __ETHTOOL_A_MODULE_FW_FLASH_CNT, ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) }; enum { ETHTOOL_A_PHY_UNSPEC, - ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHY_INDEX, /* u32 */ - ETHTOOL_A_PHY_DRVNAME, /* string */ - ETHTOOL_A_PHY_NAME, /* string */ - ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ - ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ - - /* add new constants above here */ + ETHTOOL_A_PHY_HEADER, + ETHTOOL_A_PHY_INDEX, + ETHTOOL_A_PHY_DRVNAME, + ETHTOOL_A_PHY_NAME, + ETHTOOL_A_PHY_UPSTREAM_TYPE, + ETHTOOL_A_PHY_UPSTREAM_INDEX, + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, + ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, + __ETHTOOL_A_PHY_CNT, ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) }; -/* message types - userspace to kernel */ enum { ETHTOOL_MSG_USER_NONE, ETHTOOL_MSG_STRSET_GET, @@ -836,12 +709,10 @@ enum { ETHTOOL_MSG_MODULE_FW_FLASH_ACT, ETHTOOL_MSG_PHY_GET, - /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 }; -/* message types - kernel to userspace */ enum { ETHTOOL_MSG_KERNEL_NONE, ETHTOOL_MSG_STRSET_GET_REPLY, @@ -891,7 +762,6 @@ enum { ETHTOOL_MSG_PHY_GET_REPLY, ETHTOOL_MSG_PHY_NTF, - /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 }; -- cgit v1.2.3 From 8d0580c6ebdd27879c83483f53bc71e2e470f6fe Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:49 -0800 Subject: ethtool: regenerate uapi header from the spec No functional changes. Mostly the following formatting: - extra docs - extra enums - XXX_MAX = __XXX_CNT - 1 -> XXX_MAX = (__XXX_CNT - 1) - newlines Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-9-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 89 ++++++++++++++++---------- 1 file changed, 56 insertions(+), 33 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 35a24d490efe..b58f352fe4f2 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -1,23 +1,43 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/ethtool.yaml */ +/* YNL-GEN uapi header */ + #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H +#define ETHTOOL_FAMILY_NAME "ethtool" +#define ETHTOOL_FAMILY_VERSION 1 + enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, - __ETHTOOL_UDP_TUNNEL_TYPE_CNT + /* private: */ + __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + ETHTOOL_UDP_TUNNEL_TYPE_MAX = (__ETHTOOL_UDP_TUNNEL_TYPE_CNT - 1) }; +/** + * enum ethtool_header_flags - common ethtool header flags + * @ETHTOOL_FLAG_COMPACT_BITSETS: use compact bitsets in reply + * @ETHTOOL_FLAG_OMIT_REPLY: provide optional reply for SET or ACT requests + * @ETHTOOL_FLAG_STATS: request statistics, if supported by the driver + */ enum ethtool_header_flags { - ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ - ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ - ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ + ETHTOOL_FLAG_COMPACT_BITSETS = 1, + ETHTOOL_FLAG_OMIT_REPLY = 2, + ETHTOOL_FLAG_STATS = 4, }; enum { - ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_PHY_UPSTREAM_TYPE_MAC, + ETHTOOL_PHY_UPSTREAM_TYPE_PHY, +}; + +enum ethtool_tcp_data_split { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN, ETHTOOL_TCP_DATA_SPLIT_DISABLED, ETHTOOL_TCP_DATA_SPLIT_ENABLED, }; @@ -30,7 +50,7 @@ enum { ETHTOOL_A_HEADER_PHY_INDEX, __ETHTOOL_A_HEADER_CNT, - ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 + ETHTOOL_A_HEADER_MAX = (__ETHTOOL_A_HEADER_CNT - 1) }; enum { @@ -40,7 +60,7 @@ enum { ETHTOOL_A_BITSET_BIT_VALUE, __ETHTOOL_A_BITSET_BIT_CNT, - ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 + ETHTOOL_A_BITSET_BIT_MAX = (__ETHTOOL_A_BITSET_BIT_CNT - 1) }; enum { @@ -48,7 +68,7 @@ enum { ETHTOOL_A_BITSET_BITS_BIT, __ETHTOOL_A_BITSET_BITS_CNT, - ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 + ETHTOOL_A_BITSET_BITS_MAX = (__ETHTOOL_A_BITSET_BITS_CNT - 1) }; enum { @@ -60,7 +80,7 @@ enum { ETHTOOL_A_BITSET_MASK, __ETHTOOL_A_BITSET_CNT, - ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 + ETHTOOL_A_BITSET_MAX = (__ETHTOOL_A_BITSET_CNT - 1) }; enum { @@ -69,7 +89,7 @@ enum { ETHTOOL_A_STRING_VALUE, __ETHTOOL_A_STRING_CNT, - ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 + ETHTOOL_A_STRING_MAX = (__ETHTOOL_A_STRING_CNT - 1) }; enum { @@ -77,7 +97,7 @@ enum { ETHTOOL_A_STRINGS_STRING, __ETHTOOL_A_STRINGS_CNT, - ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 + ETHTOOL_A_STRINGS_MAX = (__ETHTOOL_A_STRINGS_CNT - 1) }; enum { @@ -87,7 +107,7 @@ enum { ETHTOOL_A_STRINGSET_STRINGS, __ETHTOOL_A_STRINGSET_CNT, - ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 + ETHTOOL_A_STRINGSET_MAX = (__ETHTOOL_A_STRINGSET_CNT - 1) }; enum { @@ -95,7 +115,7 @@ enum { ETHTOOL_A_STRINGSETS_STRINGSET, __ETHTOOL_A_STRINGSETS_CNT, - ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 + ETHTOOL_A_STRINGSETS_MAX = (__ETHTOOL_A_STRINGSETS_CNT - 1) }; enum { @@ -105,7 +125,7 @@ enum { ETHTOOL_A_STRSET_COUNTS_ONLY, __ETHTOOL_A_STRSET_CNT, - ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 + ETHTOOL_A_STRSET_MAX = (__ETHTOOL_A_STRSET_CNT - 1) }; enum { @@ -114,7 +134,7 @@ enum { ETHTOOL_A_PRIVFLAGS_FLAGS, __ETHTOOL_A_PRIVFLAGS_CNT, - ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 + ETHTOOL_A_PRIVFLAGS_MAX = (__ETHTOOL_A_PRIVFLAGS_CNT - 1) }; enum { @@ -182,7 +202,7 @@ enum { ETHTOOL_A_LINKINFO_TRANSCEIVER, __ETHTOOL_A_LINKINFO_CNT, - ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 + ETHTOOL_A_LINKINFO_MAX = (__ETHTOOL_A_LINKINFO_CNT - 1) }; enum { @@ -199,7 +219,7 @@ enum { ETHTOOL_A_LINKMODES_RATE_MATCHING, __ETHTOOL_A_LINKMODES_CNT, - ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 + ETHTOOL_A_LINKMODES_MAX = (__ETHTOOL_A_LINKMODES_CNT - 1) }; enum { @@ -213,7 +233,7 @@ enum { ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, __ETHTOOL_A_LINKSTATE_CNT, - ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 + ETHTOOL_A_LINKSTATE_MAX = (__ETHTOOL_A_LINKSTATE_CNT - 1) }; enum { @@ -222,7 +242,7 @@ enum { ETHTOOL_A_DEBUG_MSGMASK, __ETHTOOL_A_DEBUG_CNT, - ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 + ETHTOOL_A_DEBUG_MAX = (__ETHTOOL_A_DEBUG_CNT - 1) }; enum { @@ -232,7 +252,7 @@ enum { ETHTOOL_A_WOL_SOPASS, __ETHTOOL_A_WOL_CNT, - ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 + ETHTOOL_A_WOL_MAX = (__ETHTOOL_A_WOL_CNT - 1) }; enum { @@ -244,7 +264,7 @@ enum { ETHTOOL_A_FEATURES_NOCHANGE, __ETHTOOL_A_FEATURES_CNT, - ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 + ETHTOOL_A_FEATURES_MAX = (__ETHTOOL_A_FEATURES_CNT - 1) }; enum { @@ -276,6 +296,7 @@ enum { enum { ETHTOOL_A_PROFILE_UNSPEC, ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) }; @@ -362,7 +383,6 @@ enum { __ETHTOOL_A_TS_STAT_CNT, ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) - }; enum { @@ -412,7 +432,7 @@ enum { ETHTOOL_A_CABLE_TEST_HEADER, __ETHTOOL_A_CABLE_TEST_CNT, - ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 + ETHTOOL_A_CABLE_TEST_MAX = (__ETHTOOL_A_CABLE_TEST_CNT - 1) }; enum { @@ -433,7 +453,7 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, - ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 + ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1) }; enum { @@ -443,7 +463,7 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, - ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 + ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1) }; enum { @@ -452,7 +472,7 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_CFG, __ETHTOOL_A_CABLE_TEST_TDR_CNT, - ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 + ETHTOOL_A_CABLE_TEST_TDR_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CNT - 1) }; enum { @@ -580,6 +600,9 @@ enum { ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, + + __ETHTOOL_A_C33_PSE_PW_LIMIT_CNT, + __ETHTOOL_A_C33_PSE_PW_LIMIT_MAX = (__ETHTOOL_A_C33_PSE_PW_LIMIT_CNT - 1) }; enum { @@ -613,7 +636,7 @@ enum { ETHTOOL_A_RSS_START_CONTEXT, __ETHTOOL_A_RSS_CNT, - ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), + ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1) }; enum { @@ -662,8 +685,8 @@ enum { }; enum { - ETHTOOL_MSG_USER_NONE, - ETHTOOL_MSG_STRSET_GET, + ETHTOOL_MSG_USER_NONE = 0, + ETHTOOL_MSG_STRSET_GET = 1, ETHTOOL_MSG_LINKINFO_GET, ETHTOOL_MSG_LINKINFO_SET, ETHTOOL_MSG_LINKMODES_GET, @@ -710,12 +733,12 @@ enum { ETHTOOL_MSG_PHY_GET, __ETHTOOL_MSG_USER_CNT, - ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 + ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) }; enum { - ETHTOOL_MSG_KERNEL_NONE, - ETHTOOL_MSG_STRSET_GET_REPLY, + ETHTOOL_MSG_KERNEL_NONE = 0, + ETHTOOL_MSG_STRSET_GET_REPLY = 1, ETHTOOL_MSG_LINKINFO_GET_REPLY, ETHTOOL_MSG_LINKINFO_NTF, ETHTOOL_MSG_LINKMODES_GET_REPLY, @@ -763,7 +786,7 @@ enum { ETHTOOL_MSG_PHY_NTF, __ETHTOOL_MSG_KERNEL_CNT, - ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 + ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) }; #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */ -- cgit v1.2.3 From ebe559609d7829b52c6642b581860760984faf9d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Nov 2024 10:30:14 -0500 Subject: fs: get rid of __FMODE_NONOTIFY kludge All it takes to get rid of the __FMODE_NONOTIFY kludge is switching fanotify from anon_inode_getfd() to anon_inode_getfile_fmode() and adding a dentry_open_nonotify() helper to be used by fanotify on the other path. That's it - no more weird shit in OPEN_FMODE(), etc. Signed-off-by: Al Viro Link: https://lore.kernel.org/linux-fsdevel/20241113043003.GH3387508@ZenIV/ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/d1231137e7b661a382459e79a764259509a4115d.1731684329.git.josef@toxicpanda.com --- fs/fcntl.c | 4 ++-- fs/notify/fanotify/fanotify_user.c | 25 ++++++++++++++++--------- fs/open.c | 23 +++++++++++++++++++---- include/linux/fs.h | 6 +++--- include/uapi/asm-generic/fcntl.h | 1 - 5 files changed, 40 insertions(+), 19 deletions(-) (limited to 'include/uapi') diff --git a/fs/fcntl.c b/fs/fcntl.c index 49884fa3c81d..5598e4d57422 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -1158,10 +1158,10 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != + BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | - __FMODE_EXEC | __FMODE_NONOTIFY)); + __FMODE_EXEC)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 2d85c71717d6..919ff59cb802 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -100,8 +100,7 @@ static void __init fanotify_sysctls_init(void) * * Internal and external open flags are stored together in field f_flags of * struct file. Only external open flags shall be allowed in event_f_flags. - * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be - * excluded. + * Internal flags like FMODE_EXEC shall be excluded. */ #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ O_ACCMODE | O_APPEND | O_NONBLOCK | \ @@ -258,12 +257,11 @@ static int create_fd(struct fsnotify_group *group, const struct path *path, return client_fd; /* - * we need a new file handle for the userspace program so it can read even if it was - * originally opened O_WRONLY. + * We provide an fd for the userspace program, so it could access the + * file without generating fanotify events itself. */ - new_file = dentry_open(path, - group->fanotify_data.f_flags | __FMODE_NONOTIFY, - current_cred()); + new_file = dentry_open_nonotify(path, group->fanotify_data.f_flags, + current_cred()); if (IS_ERR(new_file)) { put_unused_fd(client_fd); client_fd = PTR_ERR(new_file); @@ -1409,6 +1407,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; unsigned int internal_flags = 0; + struct file *file; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); @@ -1477,7 +1476,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) return -EINVAL; - f_flags = O_RDWR | __FMODE_NONOTIFY; + f_flags = O_RDWR; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) @@ -1555,10 +1554,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_destroy_group; } - fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); + fd = get_unused_fd_flags(f_flags); if (fd < 0) goto out_destroy_group; + file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group, + f_flags, FMODE_NONOTIFY); + if (IS_ERR(file)) { + fd = PTR_ERR(file); + put_unused_fd(fd); + goto out_destroy_group; + } + fd_install(fd, file); return fd; out_destroy_group: diff --git a/fs/open.c b/fs/open.c index e6911101fe71..c3490286092e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1105,6 +1105,23 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *cred) +{ + struct file *f = alloc_empty_file(flags, cred); + if (!IS_ERR(f)) { + int error; + + f->f_mode |= FMODE_NONOTIFY; + error = vfs_open(path, f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } + return f; +} + /** * dentry_create - Create and open a file * @path: path to create @@ -1202,7 +1219,7 @@ inline struct open_how build_open_how(int flags, umode_t mode) inline int build_open_flags(const struct open_how *how, struct open_flags *op) { u64 flags = how->flags; - u64 strip = __FMODE_NONOTIFY | O_CLOEXEC; + u64 strip = O_CLOEXEC; int lookup_flags = 0; int acc_mode = ACC_MODE(flags); @@ -1210,9 +1227,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) "struct open_flags doesn't yet handle flags > 32 bits"); /* - * Strip flags that either shouldn't be set by userspace like - * FMODE_NONOTIFY or that aren't relevant in determining struct - * open_flags like O_CLOEXEC. + * Strip flags that aren't relevant in determining struct open_flags. */ flags &= ~strip; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e29433c5ecc..93c2b720271e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2751,6 +2751,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt, } struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *cred); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); struct path *backing_file_user_path(struct file *f); @@ -3707,11 +3709,9 @@ struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) -#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) -#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ - (flag & __FMODE_NONOTIFY))) +#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE)) static inline bool is_sxid(umode_t mode) { diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 80f37a0d40d7..613475285643 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -6,7 +6,6 @@ /* * FMODE_EXEC is 0x20 - * FMODE_NONOTIFY is 0x4000000 * These cannot be used by userspace O_* until internal and external open * flags are split. * -Eric Paris -- cgit v1.2.3 From 6c11379b104e3718135fd7fc37bb254b41e4cf65 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:57 +0100 Subject: vxlan: Add an attribute to make VXLAN header validation configurable The set of bits that the VXLAN netdevice currently considers reserved is defined by the features enabled at the netdevice construction. In order to make this configurable, add an attribute, IFLA_VXLAN_RESERVED_BITS. The payload is a pair of big-endian u32's covering the VXLAN header. This is validated against the set of flags used by the various enabled VXLAN features, and attempts to override bits used by an enabled feature are bounced. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/c657275e5ceed301e62c69fe8e559e32909442e2.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 53 ++++++++++++++++++++++++++++++++++++------ include/uapi/linux/if_link.h | 1 + 2 files changed, 47 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index ff5684a2103a..43cf672b7b9f 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -3428,6 +3428,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 }, [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX), + [IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)), }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -4315,13 +4316,44 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], used_bits.vx_flags |= VXLAN_GPE_USED_BITS; } - /* For backwards compatibility, only allow reserved fields to be - * used by VXLAN extensions if explicitly requested. - */ - conf->reserved_bits = (struct vxlanhdr) { - .vx_flags = ~used_bits.vx_flags, - .vx_vni = ~used_bits.vx_vni, - }; + if (data[IFLA_VXLAN_RESERVED_BITS]) { + struct vxlanhdr reserved_bits; + + if (changelink) { + NL_SET_ERR_MSG_ATTR(extack, + data[IFLA_VXLAN_RESERVED_BITS], + "Cannot change reserved_bits"); + return -EOPNOTSUPP; + } + + nla_memcpy(&reserved_bits, data[IFLA_VXLAN_RESERVED_BITS], + sizeof(reserved_bits)); + if (used_bits.vx_flags & reserved_bits.vx_flags || + used_bits.vx_vni & reserved_bits.vx_vni) { + __be64 ub_be64, rb_be64; + + memcpy(&ub_be64, &used_bits, sizeof(ub_be64)); + memcpy(&rb_be64, &reserved_bits, sizeof(rb_be64)); + + NL_SET_ERR_MSG_ATTR_FMT(extack, + data[IFLA_VXLAN_RESERVED_BITS], + "Used bits %#018llx cannot overlap reserved bits %#018llx", + be64_to_cpu(ub_be64), + be64_to_cpu(rb_be64)); + return -EINVAL; + } + + conf->reserved_bits = reserved_bits; + } else { + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + conf->reserved_bits = (struct vxlanhdr) { + .vx_flags = ~used_bits.vx_flags, + .vx_vni = ~used_bits.vx_vni, + }; + } + if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) { err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL, VXLAN_F_REMCSUM_NOPARTIAL, changelink, @@ -4506,6 +4538,8 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(0) + /* IFLA_VXLAN_GPE */ nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */ + /* IFLA_VXLAN_RESERVED_BITS */ + nla_total_size(sizeof(struct vxlanhdr)) + 0; } @@ -4608,6 +4642,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))) goto nla_put_failure; + if (nla_put(skb, IFLA_VXLAN_RESERVED_BITS, + sizeof(vxlan->cfg.reserved_bits), + &vxlan->cfg.reserved_bits)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2575e0cd9b48..77730c340c8f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1394,6 +1394,7 @@ enum { IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ IFLA_VXLAN_LOCALBYPASS, IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ + IFLA_VXLAN_RESERVED_BITS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From 0a076036b631f086a6bce93a45eaa216f234f121 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:19 -0500 Subject: fanotify: reserve event bit of deprecated FAN_DIR_MODIFY Avoid reusing it, because we would like to reserve it for future FAN_PATH_MODIFY pre-content event. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/632d9f80428e2e7a6b6a8ccc2925d87c92bbb518.1731684329.git.josef@toxicpanda.com --- include/linux/fsnotify_backend.h | 1 + include/uapi/linux/fanotify.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9c105244815d..c38762b62bf1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -55,6 +55,7 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +/* #define FS_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ /* * Set on inode mark that cares about things that happen to its children. diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 34f221d3a1b9..79072b6894f2 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -25,6 +25,7 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -- cgit v1.2.3 From 4f8afa33817a6420398d1c177c6e220a05081f51 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:23 -0500 Subject: fanotify: introduce FAN_PRE_ACCESS permission event Similar to FAN_ACCESS_PERM permission event, but it is only allowed with class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs. Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed in the context of the event handler. This pre-content event is meant to be used by hierarchical storage managers that want to fill the content of files on first read access. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/b80986f8d5b860acea2c9a73c0acd93587be5fe4.1731684329.git.josef@toxicpanda.com --- fs/notify/fanotify/fanotify.c | 3 ++- fs/notify/fanotify/fanotify_user.c | 35 ++++++++++++++++++++++++++++++----- include/linux/fanotify.h | 14 ++++++++++---- include/uapi/linux/fanotify.h | 2 ++ 4 files changed, 44 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 2e6ba94ec405..da6c3c1c7edf 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -916,8 +916,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); BUILD_BUG_ON(FAN_RENAME != FS_RENAME); + BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22); mask = fanotify_group_event_mask(group, iter_info, &match_mask, mask, data, data_type, dir); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 456cc3e92c88..08e4d8659ef5 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1287,7 +1287,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group) } static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, - unsigned int fan_flags) + __u32 mask, unsigned int fan_flags) { /* * Non evictable mark cannot be downgraded to evictable mark. @@ -1314,6 +1314,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) return -EEXIST; + /* For now pre-content events are not generated for directories */ + mask |= fsn_mark->mask; + if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) + return -EEXIST; + return 0; } @@ -1340,7 +1345,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, /* * Check if requested mark flags conflict with an existing mark flags. */ - ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags); + ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags); if (ret) goto out; @@ -1640,11 +1645,23 @@ static int fanotify_events_supported(struct fsnotify_group *group, unsigned int flags) { unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + bool is_dir = d_is_dir(path->dentry); /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || (mask & FAN_RENAME) || (flags & FAN_MARK_IGNORE); + /* + * Filesystems need to opt-into pre-content evnets (a.k.a HSM) + * and they are only supported on regular files and directories. + */ + if (mask & FANOTIFY_PRE_CONTENT_EVENTS) { + if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM)) + return -EOPNOTSUPP; + if (!is_dir && !d_is_reg(path->dentry)) + return -EINVAL; + } + /* * Some filesystems such as 'proc' acquire unusual locks when opening * files. For them fanotify permission events have high chances of @@ -1677,7 +1694,7 @@ static int fanotify_events_supported(struct fsnotify_group *group, * but because we always allowed it, error only when using new APIs. */ if (strict_dir_events && mark_type == FAN_MARK_INODE && - !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) + !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) return -ENOTDIR; return 0; @@ -1778,10 +1795,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EPERM; /* - * Permission events require minimum priority FAN_CLASS_CONTENT. + * Permission events are not allowed for FAN_CLASS_NOTIF. + * Pre-content permission events are not allowed for FAN_CLASS_CONTENT. */ if (mask & FANOTIFY_PERM_EVENTS && - group->priority < FSNOTIFY_PRIO_CONTENT) + group->priority == FSNOTIFY_PRIO_NORMAL) + return -EINVAL; + else if (mask & FANOTIFY_PRE_CONTENT_EVENTS && + group->priority == FSNOTIFY_PRIO_CONTENT) return -EINVAL; if (mask & FAN_FS_ERROR && @@ -1816,6 +1837,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) return -EINVAL; + /* Pre-content events are not currently generated for directories. */ + if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) + return -EINVAL; + if (mark_cmd == FAN_MARK_FLUSH) { if (mark_type == FAN_MARK_MOUNT) fsnotify_clear_vfsmount_marks_by_group(group); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 89ff45bd6f01..c747af064d2c 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -89,6 +89,16 @@ #define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ FAN_RENAME) +/* Content events can be used to inspect file content */ +#define FANOTIFY_CONTENT_PERM_EVENTS (FAN_OPEN_PERM | FAN_OPEN_EXEC_PERM | \ + FAN_ACCESS_PERM) +/* Pre-content events can be used to fill file content */ +#define FANOTIFY_PRE_CONTENT_EVENTS (FAN_PRE_ACCESS) + +/* Events that require a permission response from user */ +#define FANOTIFY_PERM_EVENTS (FANOTIFY_CONTENT_PERM_EVENTS | \ + FANOTIFY_PRE_CONTENT_EVENTS) + /* Events that can be reported with event->fd */ #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS) @@ -104,10 +114,6 @@ FANOTIFY_INODE_EVENTS | \ FANOTIFY_ERROR_EVENTS) -/* Events that require a permission response from user */ -#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ - FAN_OPEN_EXEC_PERM) - /* Extra flags that may be reported with event or control handling of events */ #define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 79072b6894f2..7596168c80eb 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -27,6 +27,8 @@ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ /* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ +#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ + #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ #define FAN_RENAME 0x10000000 /* File was renamed */ -- cgit v1.2.3 From 870499bc1d4dc04cba1f63dd5e7bc02b983e2458 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:24 -0500 Subject: fanotify: report file range info with pre-content events With group class FAN_CLASS_PRE_CONTENT, report offset and length info along with FAN_PRE_ACCESS pre-content events. This information is meant to be used by hierarchical storage managers that want to fill partial content of files on first access to range. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/b90a9e6c809dd3cad5684da90f23ea93ec6ce8c8.1731684329.git.josef@toxicpanda.com --- fs/notify/fanotify/fanotify.h | 8 ++++++++ fs/notify/fanotify/fanotify_user.c | 38 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/fanotify.h | 8 ++++++++ 3 files changed, 54 insertions(+) (limited to 'include/uapi') diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 93598b7d5952..7f06355afa1f 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -448,6 +448,14 @@ static inline bool fanotify_is_perm_event(u32 mask) mask & FANOTIFY_PERM_EVENTS; } +static inline bool fanotify_event_has_access_range(struct fanotify_event *event) +{ + if (!(event->mask & FANOTIFY_PRE_CONTENT_EVENTS)) + return false; + + return FANOTIFY_PERM(event)->ppos; +} + static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) { return container_of(fse, struct fanotify_event, fse); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 08e4d8659ef5..6ef3cc7de5e4 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -121,6 +121,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; sizeof(struct fanotify_event_info_pidfd) #define FANOTIFY_ERROR_INFO_LEN \ (sizeof(struct fanotify_event_info_error)) +#define FANOTIFY_RANGE_INFO_LEN \ + (sizeof(struct fanotify_event_info_range)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -180,6 +182,9 @@ static size_t fanotify_event_len(unsigned int info_mode, if (info_mode & FAN_REPORT_PIDFD) event_len += FANOTIFY_PIDFD_INFO_LEN; + if (fanotify_event_has_access_range(event)) + event_len += FANOTIFY_RANGE_INFO_LEN; + return event_len; } @@ -516,6 +521,30 @@ static int copy_pidfd_info_to_user(int pidfd, return info_len; } +static size_t copy_range_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_perm_event *pevent = FANOTIFY_PERM(event); + struct fanotify_event_info_range info = { }; + size_t info_len = FANOTIFY_RANGE_INFO_LEN; + + if (WARN_ON_ONCE(info_len > count)) + return -EFAULT; + + if (WARN_ON_ONCE(!pevent->ppos)) + return -EINVAL; + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE; + info.hdr.len = info_len; + info.offset = *(pevent->ppos); + info.count = pevent->count; + + if (copy_to_user(buf, &info, info_len)) + return -EFAULT; + + return info_len; +} + static int copy_info_records_to_user(struct fanotify_event *event, struct fanotify_info *info, unsigned int info_mode, int pidfd, @@ -637,6 +666,15 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + if (fanotify_event_has_access_range(event)) { + ret = copy_range_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } + return total_bytes; } diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 7596168c80eb..0636a9c85dd0 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -146,6 +146,7 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_DFID 3 #define FAN_EVENT_INFO_TYPE_PIDFD 4 #define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_RANGE 6 /* Special info types for FAN_RENAME */ #define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 @@ -192,6 +193,13 @@ struct fanotify_event_info_error { __u32 error_count; }; +struct fanotify_event_info_range { + struct fanotify_event_info_header hdr; + __u32 pad; + __u64 offset; + __u64 count; +}; + /* * User space may need to record additional information about its decision. * The extra information type records what kind of information is included. -- cgit v1.2.3 From b4b2ff4f61ded819bfa22e50fdec7693f51cbbee Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:25 -0500 Subject: fanotify: allow to set errno in FAN_DENY permission response With FAN_DENY response, user trying to perform the filesystem operation gets an error with errno set to EPERM. It is useful for hierarchical storage management (HSM) service to be able to deny access for reasons more diverse than EPERM, for example EAGAIN, if HSM could retry the operation later. Allow fanotify groups with priority FAN_CLASSS_PRE_CONTENT to responsd to permission events with the response value FAN_DENY_ERRNO(errno), instead of FAN_DENY to return a custom error. Limit custom error values to errors expected on read(2)/write(2) and open(2) of regular files. This list could be extended in the future. Userspace can test for legitimate values of FAN_DENY_ERRNO(errno) by writing a response to an fanotify group fd with a value of FAN_NOFD in the fd field of the response. The change in fanotify_response is backward compatible, because errno is written in the high 8 bits of the 32bit response field and old kernels reject respose value with high bits set. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/1e5fb6af84b69ca96b5c849fa5f10bdf4d1dc414.1731684329.git.josef@toxicpanda.com --- fs/notify/fanotify/fanotify.c | 17 +++++++++++++---- fs/notify/fanotify/fanotify.h | 5 +++++ fs/notify/fanotify/fanotify_user.c | 29 +++++++++++++++++++++++++++-- include/linux/fanotify.h | 4 +++- include/uapi/linux/fanotify.h | 7 +++++++ 5 files changed, 55 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index da6c3c1c7edf..95646f7c46ca 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -223,7 +223,7 @@ static int fanotify_get_response(struct fsnotify_group *group, struct fanotify_perm_event *event, struct fsnotify_iter_info *iter_info) { - int ret; + int ret, errno; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -262,14 +262,23 @@ static int fanotify_get_response(struct fsnotify_group *group, ret = 0; break; case FAN_DENY: + /* Check custom errno from pre-content events */ + errno = fanotify_get_response_errno(event->response); + if (errno) { + ret = -errno; + break; + } + fallthrough; default: ret = -EPERM; } /* Check if the response should be audited */ - if (event->response & FAN_AUDIT) - audit_fanotify(event->response & ~FAN_AUDIT, - &event->audit_rule); + if (event->response & FAN_AUDIT) { + u32 response = event->response & + (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS); + audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule); + } pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, group, event, ret); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 7f06355afa1f..c12cbc270539 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -528,3 +528,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark) return mflags; } + +static inline u32 fanotify_get_response_errno(int res) +{ + return (res >> FAN_ERRNO_SHIFT) & FAN_ERRNO_MASK; +} diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6ef3cc7de5e4..19435cd2c41f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -327,11 +327,12 @@ static int process_access_response(struct fsnotify_group *group, struct fanotify_perm_event *event; int fd = response_struct->fd; u32 response = response_struct->response; + int errno = fanotify_get_response_errno(response); int ret = info_len; struct fanotify_response_info_audit_rule friar; - pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__, - group, fd, response, info, info_len); + pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n", + __func__, group, fd, response, errno, info, info_len); /* * make sure the response is valid, if invalid we do nothing and either * userspace can send a valid response or we will clean it up after the @@ -342,7 +343,31 @@ static int process_access_response(struct fsnotify_group *group, switch (response & FANOTIFY_RESPONSE_ACCESS) { case FAN_ALLOW: + if (errno) + return -EINVAL; + break; case FAN_DENY: + /* Custom errno is supported only for pre-content groups */ + if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT) + return -EINVAL; + + /* + * Limit errno to values expected on open(2)/read(2)/write(2) + * of regular files. + */ + switch (errno) { + case 0: + case EIO: + case EPERM: + case EBUSY: + case ETXTBSY: + case EAGAIN: + case ENOSPC: + case EDQUOT: + break; + default: + return -EINVAL; + } break; default: return -EINVAL; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index c747af064d2c..78f660ebc318 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -132,7 +132,9 @@ /* These masks check for invalid bits in permission responses. */ #define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY) #define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO) -#define FANOTIFY_RESPONSE_VALID_MASK (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS) +#define FANOTIFY_RESPONSE_VALID_MASK \ + (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS | \ + (FAN_ERRNO_MASK << FAN_ERRNO_SHIFT)) /* Do not use these old uapi constants internally */ #undef FAN_ALL_CLASS_BITS diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 0636a9c85dd0..bd8167979707 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -235,6 +235,13 @@ struct fanotify_response_info_audit_rule { /* Legit userspace responses to a _PERM event */ #define FAN_ALLOW 0x01 #define FAN_DENY 0x02 +/* errno other than EPERM can specified in upper byte of deny response */ +#define FAN_ERRNO_BITS 8 +#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS) +#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1) +#define FAN_DENY_ERRNO(err) \ + (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT)) + #define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */ #define FAN_INFO 0x20 /* Bitmask to indicate additional information */ -- cgit v1.2.3 From f4425e3ab2f796d442a44f31262eade9b6427ff7 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 12 Dec 2024 15:45:04 +0800 Subject: ALSA: compress: Add output rate and output format support Add 'pcm_format' for struct snd_codec, add 'pcm_formats' for struct snd_codec_desc, these are used for accelerator usage. Current accelerator example is sample rate converter (SRC). Define struct snd_codec_desc_src for descript minmum and maxmum sample rates. And add 'src_d' in union snd_codec_options structure. These are mainly used for capbility query. Signed-off-by: Jaroslav Kysela Signed-off-by: Shengjiu Wang Acked-by: Jaroslav Kysela Acked-by: Vinod Koul Link: https://patch.msgid.link/20241212074509.3445859-2-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- include/uapi/sound/compress_params.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h index ddc77322d571..bc7648a30746 100644 --- a/include/uapi/sound/compress_params.h +++ b/include/uapi/sound/compress_params.h @@ -334,6 +334,14 @@ union snd_codec_options { struct snd_dec_wma wma_d; struct snd_dec_alac alac_d; struct snd_dec_ape ape_d; + struct { + __u32 out_sample_rate; + } src_d; +} __attribute__((packed, aligned(4))); + +struct snd_codec_desc_src { + __u32 out_sample_rate_min; + __u32 out_sample_rate_max; } __attribute__((packed, aligned(4))); /** struct snd_codec_desc - description of codec capabilities @@ -347,6 +355,9 @@ union snd_codec_options { * @modes: Supported modes. See SND_AUDIOMODE defines * @formats: Supported formats. See SND_AUDIOSTREAMFORMAT defines * @min_buffer: Minimum buffer size handled by codec implementation + * @pcm_formats: Output (for decoders) or input (for encoders) + * PCM formats (required to accel mode, 0 for other modes) + * @u_space: union space (for codec dependent data) * @reserved: reserved for future use * * This structure provides a scalar value for profiles, modes and stream @@ -370,7 +381,12 @@ struct snd_codec_desc { __u32 modes; __u32 formats; __u32 min_buffer; - __u32 reserved[15]; + __u32 pcm_formats; + union { + __u32 u_space[6]; + struct snd_codec_desc_src src; + } __attribute__((packed, aligned(4))); + __u32 reserved[8]; } __attribute__((packed, aligned(4))); /** struct snd_codec @@ -395,6 +411,8 @@ struct snd_codec_desc { * @align: Block alignment in bytes of an audio sample. * Only required for PCM or IEC formats. * @options: encoder-specific settings + * @pcm_format: Output (for decoders) or input (for encoders) + * PCM formats (required to accel mode, 0 for other modes) * @reserved: reserved for future use */ @@ -411,7 +429,8 @@ struct snd_codec { __u32 format; __u32 align; union snd_codec_options options; - __u32 reserved[3]; + __u32 pcm_format; + __u32 reserved[2]; } __attribute__((packed, aligned(4))); #endif -- cgit v1.2.3 From 70a667d70cce338ab8552dd762ae114a5ab96500 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 13 Dec 2024 12:11:22 +0200 Subject: ASoC: SOF: Add support for pause supported tokens from topology New tokens are added to topology: 1202: SOF_TKN_STREAM_PLAYBACK_PAUSE_SUPPORTED 1203: SOF_TKN_STREAM_CAPTURE_PAUSE_SUPPORTED The new tokens are used to advertise support for PAUSE/RESUME operation on a PCM device depending on firmware product, use case, pipeline topology. The snd_sof_pcm_stream.pause_supported is updated to reflect the advertised value for the PCM device. If the token does not exist then the pause_supported is set to false. Note: it is up to the platform code to use this flag to decide to advertise the PAUSE support for user space or not. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Kai Vehmanen Reviewed-by: Liam Girdwood Link: https://patch.msgid.link/20241213101123.27318-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/uapi/sound/sof/tokens.h | 2 ++ sound/soc/sof/sof-audio.h | 1 + sound/soc/sof/topology.c | 4 ++++ 3 files changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h index 0a246bc218d3..c28c766270de 100644 --- a/include/uapi/sound/sof/tokens.h +++ b/include/uapi/sound/sof/tokens.h @@ -153,6 +153,8 @@ /* Stream */ #define SOF_TKN_STREAM_PLAYBACK_COMPATIBLE_D0I3 1200 #define SOF_TKN_STREAM_CAPTURE_COMPATIBLE_D0I3 1201 +#define SOF_TKN_STREAM_PLAYBACK_PAUSE_SUPPORTED 1202 +#define SOF_TKN_STREAM_CAPTURE_PAUSE_SUPPORTED 1203 /* Led control for mute switches */ #define SOF_TKN_MUTE_LED_USE 1300 diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h index 01b819dd8498..62f3c11a9216 100644 --- a/sound/soc/sof/sof-audio.h +++ b/sound/soc/sof/sof-audio.h @@ -332,6 +332,7 @@ struct snd_sof_pcm_stream { struct work_struct period_elapsed_work; struct snd_soc_dapm_widget_list *list; /* list of connected DAPM widgets */ bool d0i3_compatible; /* DSP can be in D0I3 when this pcm is opened */ + bool pause_supported; /* PCM device supports PAUSE operation */ unsigned int dsp_max_burst_size_in_ms; /* The maximum size of the host DMA burst in ms */ /* * flag to indicate that the DSP pipelines should be kept diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index b3fca5fd87d6..688cc7ac1714 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -407,6 +407,10 @@ static const struct sof_topology_token stream_tokens[] = { offsetof(struct snd_sof_pcm, stream[0].d0i3_compatible)}, {SOF_TKN_STREAM_CAPTURE_COMPATIBLE_D0I3, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16, offsetof(struct snd_sof_pcm, stream[1].d0i3_compatible)}, + {SOF_TKN_STREAM_PLAYBACK_PAUSE_SUPPORTED, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16, + offsetof(struct snd_sof_pcm, stream[0].pause_supported)}, + {SOF_TKN_STREAM_CAPTURE_PAUSE_SUPPORTED, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16, + offsetof(struct snd_sof_pcm, stream[1].pause_supported)}, }; /* Leds */ -- cgit v1.2.3 From 4d3ae294f900fb7232fb6c890dbd3176b8a5f121 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 13 Dec 2024 13:09:31 +0000 Subject: bpf: Add fd_array_cnt attribute for prog_load The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set of file descriptors: maps or btfs. This field was introduced as a sparse array. Introduce a new attribute, fd_array_cnt, which, if present, indicates that the fd_array is a continuous array of the corresponding length. If fd_array_cnt is non-zero, then every map in the fd_array will be bound to the program, as if it was used by the program. This functionality is similar to the BPF_PROG_BIND_MAP syscall, but such maps can be used by the verifier during the program load. Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241213130934.1087929-5-aspsk@isovalent.com --- include/uapi/linux/bpf.h | 10 ++++ kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 106 +++++++++++++++++++++++++++++++++++------ tools/include/uapi/linux/bpf.h | 10 ++++ 4 files changed, 112 insertions(+), 16 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4162afc6b5d0..2acf9b336371 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1573,6 +1573,16 @@ union bpf_attr { * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. */ __s32 prog_token_fd; + /* The fd_array_cnt can be used to pass the length of the + * fd_array array. In this case all the [map] file descriptors + * passed in this array will be bound to the program, even if + * the maps are not referenced directly. The functionality is + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be + * used by the verifier during the program load. If provided, + * then the fd_array[0,...,fd_array_cnt-1] is expected to be + * continuous. + */ + __u32 fd_array_cnt; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5684e8ce132d..4e88797fdbeb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2730,7 +2730,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd +#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 296765ffbdc5..c0e772996c52 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19505,22 +19505,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, return 0; } -/* Add map behind fd to used maps list, if it's not already there, and return - * its index. - * Returns <0 on error, or >= 0 index, on success. - */ -static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd) +static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) { - CLASS(fd, f)(fd); - struct bpf_map *map; int i, err; - map = __bpf_map_get(f); - if (IS_ERR(map)) { - verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); - return PTR_ERR(map); - } - /* check whether we recorded this map already */ for (i = 0; i < env->used_map_cnt; i++) if (env->used_maps[i] == map) @@ -19551,6 +19539,24 @@ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd) return env->used_map_cnt - 1; } +/* Add map behind fd to used maps list, if it's not already there, and return + * its index. + * Returns <0 on error, or >= 0 index, on success. + */ +static int add_used_map(struct bpf_verifier_env *env, int fd) +{ + struct bpf_map *map; + CLASS(fd, f)(fd); + + map = __bpf_map_get(f); + if (IS_ERR(map)) { + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); + return PTR_ERR(map); + } + + return __add_used_map(env, map); +} + /* find and rewrite pseudo imm in ld_imm64 instructions: * * 1. if it accesses map FD, replace it with actual map pointer. @@ -19642,7 +19648,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) break; } - map_idx = add_used_map_from_fd(env, fd); + map_idx = add_used_map(env, fd); if (map_idx < 0) return map_idx; map = env->used_maps[map_idx]; @@ -22850,6 +22856,73 @@ struct btf *bpf_get_btf_vmlinux(void) return btf_vmlinux; } +/* + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In + * this case expect that every file descriptor in the array is either a map or + * a BTF. Everything else is considered to be trash. + */ +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd) +{ + struct bpf_map *map; + struct btf *btf; + CLASS(fd, f)(fd); + int err; + + map = __bpf_map_get(f); + if (!IS_ERR(map)) { + err = __add_used_map(env, map); + if (err < 0) + return err; + return 0; + } + + btf = __btf_get_by_fd(f); + if (!IS_ERR(btf)) { + err = __add_used_btf(env, btf); + if (err < 0) + return err; + return 0; + } + + verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd); + return PTR_ERR(map); +} + +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr) +{ + size_t size = sizeof(int); + int ret; + int fd; + u32 i; + + env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); + + /* + * The only difference between old (no fd_array_cnt is given) and new + * APIs is that in the latter case the fd_array is expected to be + * continuous and is scanned for map fds right away + */ + if (!attr->fd_array_cnt) + return 0; + + /* Check for integer overflow */ + if (attr->fd_array_cnt >= (U32_MAX / size)) { + verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt); + return -EINVAL; + } + + for (i = 0; i < attr->fd_array_cnt; i++) { + if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size)) + return -EFAULT; + + ret = add_fd_from_fd_array(env, fd); + if (ret) + return ret; + } + + return 0; +} + int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { u64 start_time = ktime_get_ns(); @@ -22881,7 +22954,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; - env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token); env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token); @@ -22904,6 +22976,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret) goto err_unlock; + ret = process_fd_array(env, attr, uattr); + if (ret) + goto skip_full_check; + mark_verifier_state_clean(env); if (IS_ERR(btf_vmlinux)) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4162afc6b5d0..2acf9b336371 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1573,6 +1573,16 @@ union bpf_attr { * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. */ __s32 prog_token_fd; + /* The fd_array_cnt can be used to pass the length of the + * fd_array array. In this case all the [map] file descriptors + * passed in this array will be bound to the program, even if + * the maps are not referenced directly. The functionality is + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be + * used by the verifier during the program load. If provided, + * then the fd_array[0,...,fd_array_cnt-1] is expected to be + * continuous. + */ + __u32 fd_array_cnt; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 2c2b61d2138f472e50b5531ec0cb4a1485837e21 Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Wed, 11 Dec 2024 17:22:41 +0900 Subject: netlink: add IGMP/MLD join/leave notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change introduces netlink notifications for multicast address changes. The following features are included: * Addition and deletion of multicast addresses are reported using RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and AF_INET6. * Two new notification groups: RTNLGRP_IPV4_MCADDR and RTNLGRP_IPV6_MCADDR are introduced for receiving these events. This change allows user space applications (e.g., ip monitor) to efficiently track multicast group memberships by listening for netlink events. Previously, applications relied on inefficient polling of procfs, introducing delays. With netlink notifications, applications receive realtime updates on multicast group membership changes, enabling more precise metrics collection and system monitoring.  This change also unlocks the potential for implementing a wide range of sophisticated multicast related features in user space by allowing applications to combine kernel provided multicast address information with user space data and communicate decisions back to the kernel for more fine grained control. This mechanism can be used for various purposes, including multicast filtering, IGMP/MLD offload, and IGMP/MLD snooping. Cc: Maciej Ć»enczykowski Cc: Lorenzo Colitti Co-developed-by: Patrick Ruddy Signed-off-by: Patrick Ruddy Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com Signed-off-by: Yuyang Huang Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 ++ include/net/addrconf.h | 21 ++++++++++++++ include/uapi/linux/rtnetlink.h | 10 ++++++- net/ipv4/igmp.c | 64 ++++++++++++++++++++++++++++++++++++++++++ net/ipv6/addrconf.c | 29 ++++++------------- net/ipv6/mcast.c | 39 +++++++++++++++++++++++++ 6 files changed, 144 insertions(+), 21 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 5171231f70a8..073b30a9b850 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -87,6 +87,8 @@ struct ip_mc_list { char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; + unsigned long mca_cstamp; + unsigned long mca_tstamp; struct rcu_head rcu; }; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 363dd63babe7..58337898fa21 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -88,6 +88,23 @@ struct ifa6_config { u16 scope; }; +enum addr_type_t { + UNICAST_ADDR, + MULTICAST_ADDR, + ANYCAST_ADDR, +}; + +struct inet6_fill_args { + u32 portid; + u32 seq; + int event; + unsigned int flags; + int netnsid; + int ifindex; + enum addr_type_t type; + bool force_rt_scope_universe; +}; + int addrconf_init(void); void addrconf_cleanup(void); @@ -525,4 +542,8 @@ int if6_proc_init(void); void if6_proc_exit(void); #endif +int inet6_fill_ifmcaddr(struct sk_buff *skb, + const struct ifmcaddr6 *ifmca, + struct inet6_fill_args *args); + #endif diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index db7254d52d93..eccc0e7dcb7d 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -93,7 +93,11 @@ enum { RTM_NEWPREFIX = 52, #define RTM_NEWPREFIX RTM_NEWPREFIX - RTM_GETMULTICAST = 58, + RTM_NEWMULTICAST = 56, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST + RTM_GETMULTICAST, #define RTM_GETMULTICAST RTM_GETMULTICAST RTM_GETANYCAST = 62, @@ -774,6 +778,10 @@ enum rtnetlink_groups { #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL RTNLGRP_STATS, #define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR + RTNLGRP_IPV6_MCADDR, +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6a238398acc9..8a370ef37d3f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -88,6 +88,8 @@ #include #include +#include +#include #include #include #include @@ -1430,6 +1432,63 @@ static void ip_mc_hash_remove(struct in_device *in_dev, *mc_hash = im->next_hash; } +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, int event) +{ + struct ifa_cacheinfo ci; + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + if (!nlh) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_INET; + ifm->ifa_prefixlen = 32; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + ifm->ifa_index = dev->ifindex; + + ci.cstamp = (READ_ONCE(im->mca_cstamp) - INITIAL_JIFFIES) * 100UL / HZ; + ci.tstamp = ci.cstamp; + ci.ifa_prefered = INFINITY_LIFE_TIME; + ci.ifa_valid = INFINITY_LIFE_TIME; + + if (nla_put_in_addr(skb, IFA_MULTICAST, im->multiaddr) < 0 || + nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static void inet_ifmcaddr_notify(struct net_device *dev, + const struct ip_mc_list *im, int event) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOMEM; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(sizeof(__be32)), GFP_ATOMIC); + if (!skb) + goto error; + + err = inet_fill_ifmcaddr(skb, dev, im, event); + if (err < 0) { + WARN_ON_ONCE(err == -EMSGSIZE); + nlmsg_free(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err); +} /* * A socket has joined a multicast group on device dev. @@ -1473,6 +1532,8 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, im->interface = in_dev; in_dev_hold(in_dev); im->multiaddr = addr; + im->mca_cstamp = jiffies; + im->mca_tstamp = im->mca_cstamp; /* initial mode is (EX, empty) */ im->sfmode = mode; im->sfcount[mode] = 1; @@ -1492,6 +1553,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, igmpv3_del_delrec(in_dev, im); #endif igmp_group_added(im); + inet_ifmcaddr_notify(in_dev->dev, im, RTM_NEWMULTICAST); if (!in_dev->dead) ip_rt_multicast_event(in_dev); out: @@ -1705,6 +1767,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) *ip = i->next_rcu; in_dev->mc_count--; __igmp_group_dropped(i, gfp); + inet_ifmcaddr_notify(in_dev->dev, i, + RTM_DELMULTICAST); ip_mc_clear_src(i); if (!in_dev->dead) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e765466d7f7..2e2684886953 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5127,22 +5127,6 @@ static inline int inet6_ifaddr_msgsize(void) + nla_total_size(4) /* IFA_RT_PRIORITY */; } -enum addr_type_t { - UNICAST_ADDR, - MULTICAST_ADDR, - ANYCAST_ADDR, -}; - -struct inet6_fill_args { - u32 portid; - u32 seq; - int event; - unsigned int flags; - int netnsid; - int ifindex; - enum addr_type_t type; -}; - static int inet6_fill_ifaddr(struct sk_buff *skb, const struct inet6_ifaddr *ifa, struct inet6_fill_args *args) @@ -5221,15 +5205,16 @@ error: return -EMSGSIZE; } -static int inet6_fill_ifmcaddr(struct sk_buff *skb, - const struct ifmcaddr6 *ifmca, - struct inet6_fill_args *args) +int inet6_fill_ifmcaddr(struct sk_buff *skb, + const struct ifmcaddr6 *ifmca, + struct inet6_fill_args *args) { int ifindex = ifmca->idev->dev->ifindex; u8 scope = RT_SCOPE_UNIVERSE; struct nlmsghdr *nlh; - if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) + if (!args->force_rt_scope_universe && + ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, @@ -5254,6 +5239,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, nlmsg_end(skb, nlh); return 0; } +EXPORT_SYMBOL(inet6_fill_ifmcaddr); static int inet6_fill_ifacaddr(struct sk_buff *skb, const struct ifacaddr6 *ifaca, @@ -5418,6 +5404,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, .flags = NLM_F_MULTI, .netnsid = -1, .type = type, + .force_rt_scope_universe = false, }; struct { unsigned long ifindex; @@ -5546,6 +5533,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, .event = RTM_NEWADDR, .flags = 0, .netnsid = -1, + .force_rt_scope_universe = false, }; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; @@ -5617,6 +5605,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) .event = event, .flags = 0, .netnsid = -1, + .force_rt_scope_universe = false, }; int err = -ENOBUFS; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ca8692d565d..587831c148de 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -33,8 +33,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -47,6 +49,7 @@ #include #include +#include #include #include @@ -901,6 +904,39 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, return mc; } +static void inet6_ifmcaddr_notify(struct net_device *dev, + const struct ifmcaddr6 *ifmca, int event) +{ + struct inet6_fill_args fillargs = { + .portid = 0, + .seq = 0, + .event = event, + .flags = 0, + .netnsid = -1, + .force_rt_scope_universe = true, + }; + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOMEM; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(16), GFP_ATOMIC); + if (!skb) + goto error; + + err = inet6_fill_ifmcaddr(skb, ifmca, &fillargs); + if (err < 0) { + WARN_ON_ONCE(err == -EMSGSIZE); + nlmsg_free(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_ATOMIC); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV6_MCADDR, err); +} + /* * device multicast group inc (add if not found) */ @@ -948,6 +984,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, mld_del_delrec(idev, mc); igmp6_group_added(mc); + inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST); mutex_unlock(&idev->mc_lock); ma_put(mc); return 0; @@ -977,6 +1014,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) *map = ma->next; igmp6_group_dropped(ma); + inet6_ifmcaddr_notify(idev->dev, ma, + RTM_DELMULTICAST); ip6_mc_clear_src(ma); mutex_unlock(&idev->mc_lock); -- cgit v1.2.3 From 510128b30f2db1600172e9aaec44f66db3c16e15 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:06 +0100 Subject: tls: add counters for rekey This introduces 5 counters to keep track of key updates: Tls{Rx,Tx}Rekey{Ok,Error} and TlsRxRekeyReceived. Suggested-by: Jakub Kicinski Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 5 +++++ net/tls/tls_main.c | 27 ++++++++++++++++++++++----- net/tls/tls_proc.c | 5 +++++ net/tls/tls_sw.c | 6 ++++-- 4 files changed, 36 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index adf5fd78dd50..51da2e00112d 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -358,6 +358,11 @@ enum LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ LINUX_MIB_TLSDECRYPTRETRY, /* TlsDecryptRetry */ LINUX_MIB_TLSRXNOPADVIOL, /* TlsRxNoPadViolation */ + LINUX_MIB_TLSRXREKEYOK, /* TlsRxRekeyOk */ + LINUX_MIB_TLSRXREKEYERROR, /* TlsRxRekeyError */ + LINUX_MIB_TLSTXREKEYOK, /* TlsTxRekeyOk */ + LINUX_MIB_TLSTXREKEYERROR, /* TlsTxRekeyError */ + LINUX_MIB_TLSRXREKEYRECEIVED, /* TlsRxRekeyReceived */ __LINUX_MIB_TLSMAX }; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 68b5735dafc1..9ee5a83c5b40 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -640,8 +640,11 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, /* Currently we only support setting crypto info more * than one time for TLS 1.3 */ - if (crypto_info->version != TLS_1_3_VERSION) + if (crypto_info->version != TLS_1_3_VERSION) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); return -EBUSY; + } update = true; old_crypto_info = crypto_info; @@ -696,8 +699,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + } conf = TLS_SW; } } else { @@ -711,8 +719,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + } conf = TLS_SW; } if (!update) @@ -735,6 +748,10 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, return 0; err_crypto_info: + if (update) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); + } memzero_explicit(crypto_ctx, sizeof(*crypto_ctx)); return rc; } diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c index 68982728f620..367666aa07b8 100644 --- a/net/tls/tls_proc.c +++ b/net/tls/tls_proc.c @@ -22,6 +22,11 @@ static const struct snmp_mib tls_mib_list[] = { SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC), SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIB_TLSDECRYPTRETRY), SNMP_MIB_ITEM("TlsRxNoPadViolation", LINUX_MIB_TLSRXNOPADVIOL), + SNMP_MIB_ITEM("TlsRxRekeyOk", LINUX_MIB_TLSRXREKEYOK), + SNMP_MIB_ITEM("TlsRxRekeyError", LINUX_MIB_TLSRXREKEYERROR), + SNMP_MIB_ITEM("TlsTxRekeyOk", LINUX_MIB_TLSTXREKEYOK), + SNMP_MIB_ITEM("TlsTxRekeyError", LINUX_MIB_TLSTXREKEYERROR), + SNMP_MIB_ITEM("TlsRxRekeyReceived", LINUX_MIB_TLSRXREKEYRECEIVED), SNMP_MIB_SENTINEL }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9e5aff5bab98..47550d485819 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1724,7 +1724,8 @@ tls_decrypt_device(struct sock *sk, struct msghdr *msg, return 1; } -static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb) +static int tls_check_pending_rekey(struct sock *sk, struct tls_context *ctx, + struct sk_buff *skb) { const struct strp_msg *rxm = strp_msg(skb); const struct tls_msg *tlm = tls_msg(skb); @@ -1747,6 +1748,7 @@ static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb) struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx; WRITE_ONCE(rx_ctx->key_update_pending, true); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYRECEIVED); } return 0; @@ -1771,7 +1773,7 @@ static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); - return tls_check_pending_rekey(tls_ctx, darg->skb); + return tls_check_pending_rekey(sk, tls_ctx, darg->skb); } int decrypt_skb(struct sock *sk, struct scatterlist *sgout) -- cgit v1.2.3 From 35f7cad1743e04bf2944a2aadb6b6a42adc57bca Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:43 +0100 Subject: net: Add the possibility to support a selected hwtstamp in netdevice Introduce the description of a hwtstamp provider, mainly defined with a the hwtstamp source and the phydev pointer. Add a hwtstamp provider description within the netdev structure to allow saving the hwtstamp we want to use. This prepares for future support of an ethtool netlink command to select the desired hwtstamp provider. By default, the old API that does not support hwtstamp selectability is used, meaning the hwtstamp provider pointer is unset. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 10 ++++++++ include/linux/net_tstamp.h | 29 +++++++++++++++++++++++ include/linux/netdevice.h | 4 ++++ include/uapi/linux/net_tstamp.h | 11 +++++++++ net/core/dev_ioctl.c | 41 ++++++++++++++++++++++++++++++-- net/core/timestamping.c | 52 +++++++++++++++++++++++++++++++++++++---- 6 files changed, 140 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b26bb33cd1d4..1a908af4175b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1998,6 +1999,15 @@ void phy_detach(struct phy_device *phydev) phy_suspend(phydev); if (dev) { + struct hwtstamp_provider *hwprov; + + hwprov = rtnl_dereference(dev->hwprov); + /* Disable timestamp if it is the one selected */ + if (hwprov && hwprov->phydev == phydev) { + rcu_assign_pointer(dev->hwprov, NULL); + kfree_rcu(hwprov, rcu_head); + } + phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; phy_link_topo_del_phy(dev, phydev); diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index 662074b08c94..ff0758e88ea1 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -19,6 +19,33 @@ enum hwtstamp_source { HWTSTAMP_SOURCE_PHYLIB, }; +/** + * struct hwtstamp_provider_desc - hwtstamp provider description + * + * @index: index of the hwtstamp provider. + * @qualifier: hwtstamp provider qualifier. + */ +struct hwtstamp_provider_desc { + int index; + enum hwtstamp_provider_qualifier qualifier; +}; + +/** + * struct hwtstamp_provider - hwtstamp provider object + * + * @rcu_head: RCU callback used to free the struct. + * @source: source of the hwtstamp provider. + * @phydev: pointer of the phydev source in case a PTP coming from phylib + * @desc: hwtstamp provider description. + */ + +struct hwtstamp_provider { + struct rcu_head rcu_head; + enum hwtstamp_source source; + struct phy_device *phydev; + struct hwtstamp_provider_desc desc; +}; + /** * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config * @@ -31,6 +58,7 @@ enum hwtstamp_source { * copied the ioctl request back to user space * @source: indication whether timestamps should come from the netdev or from * an attached phylib PHY + * @qualifier: qualifier of the hwtstamp provider * * Prefer using this structure for in-kernel processing of hardware * timestamping configuration, over the inextensible struct hwtstamp_config @@ -43,6 +71,7 @@ struct kernel_hwtstamp_config { struct ifreq *ifr; bool copied_to_user; enum hwtstamp_source source; + enum hwtstamp_provider_qualifier qualifier; }; static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d917949bba03..2593019ad5b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -82,6 +82,7 @@ struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; struct phy_link_topology; +struct hwtstamp_provider; typedef u32 xdp_features_t; @@ -2045,6 +2046,7 @@ enum netdev_reg_state { * * @neighbours: List heads pointing to this device's neighbours' * dev_list, one per address-family. + * @hwprov: Tracks which PTP performs hardware packet time stamping. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2457,6 +2459,8 @@ struct net_device { struct hlist_head neighbours[NEIGH_NR_TABLES]; + struct hwtstamp_provider __rcu *hwprov; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 858339d1c1c4..55b0ab51096c 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,6 +13,17 @@ #include #include /* for SO_TIMESTAMPING */ +/* + * Possible type of hwtstamp provider. Mainly "precise" the default one + * is for IEEE 1588 quality and "approx" is for NICs DMA point. + */ +enum hwtstamp_provider_qualifier { + HWTSTAMP_PROVIDER_QUALIFIER_PRECISE, + HWTSTAMP_PROVIDER_QUALIFIER_APPROX, + + HWTSTAMP_PROVIDER_QUALIFIER_CNT, +}; + /* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 1f09930fca26..087a57b7e4fa 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -269,6 +270,21 @@ static int dev_eth_ioctl(struct net_device *dev, int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { + struct hwtstamp_provider *hwprov; + + hwprov = rtnl_dereference(dev->hwprov); + if (hwprov) { + cfg->qualifier = hwprov->desc.qualifier; + if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && + hwprov->phydev) + return phy_hwtstamp_get(hwprov->phydev, cfg); + + if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) + return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); + + return -EOPNOTSUPP; + } + if (phy_is_default_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); @@ -324,11 +340,32 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; - bool phy_ts = phy_is_default_hwtstamp(dev->phydev); struct kernel_hwtstamp_config old_cfg = {}; + struct hwtstamp_provider *hwprov; + struct phy_device *phydev; bool changed = false; + bool phy_ts; int err; + hwprov = rtnl_dereference(dev->hwprov); + if (hwprov) { + if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && + hwprov->phydev) { + phy_ts = true; + phydev = hwprov->phydev; + } else if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) { + phy_ts = false; + } else { + return -EOPNOTSUPP; + } + + cfg->qualifier = hwprov->desc.qualifier; + } else { + phy_ts = phy_is_default_hwtstamp(dev->phydev); + if (phy_ts) + phydev = dev->phydev; + } + cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV; if (phy_ts && dev->see_all_hwtstamp_requests) { @@ -350,7 +387,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, changed = kernel_hwtstamp_config_changed(&old_cfg, cfg); if (phy_ts) { - err = phy_hwtstamp_set(dev->phydev, cfg, extack); + err = phy_hwtstamp_set(phydev, cfg, extack); if (err) { if (changed) ops->ndo_hwtstamp_set(dev, &old_cfg, NULL); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 3717fb152ecc..a50a7ef49ae8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -9,6 +9,7 @@ #include #include #include +#include static unsigned int classify(const struct sk_buff *skb) { @@ -21,19 +22,39 @@ static unsigned int classify(const struct sk_buff *skb) void skb_clone_tx_timestamp(struct sk_buff *skb) { + struct hwtstamp_provider *hwprov; struct mii_timestamper *mii_ts; + struct phy_device *phydev; struct sk_buff *clone; unsigned int type; - if (!skb->sk || !skb->dev || - !phy_is_default_hwtstamp(skb->dev->phydev)) + if (!skb->sk || !skb->dev) return; + rcu_read_lock(); + hwprov = rcu_dereference(skb->dev->hwprov); + if (hwprov) { + if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB || + !hwprov->phydev) { + rcu_read_unlock(); + return; + } + + phydev = hwprov->phydev; + } else { + phydev = skb->dev->phydev; + if (!phy_is_default_hwtstamp(phydev)) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + type = classify(skb); if (type == PTP_CLASS_NONE) return; - mii_ts = skb->dev->phydev->mii_ts; + mii_ts = phydev->mii_ts; if (likely(mii_ts->txtstamp)) { clone = skb_clone_sk(skb); if (!clone) @@ -45,12 +66,33 @@ EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); bool skb_defer_rx_timestamp(struct sk_buff *skb) { + struct hwtstamp_provider *hwprov; struct mii_timestamper *mii_ts; + struct phy_device *phydev; unsigned int type; - if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev)) + if (!skb->dev) return false; + rcu_read_lock(); + hwprov = rcu_dereference(skb->dev->hwprov); + if (hwprov) { + if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB || + !hwprov->phydev) { + rcu_read_unlock(); + return false; + } + + phydev = hwprov->phydev; + } else { + phydev = skb->dev->phydev; + if (!phy_is_default_hwtstamp(phydev)) { + rcu_read_unlock(); + return false; + } + } + rcu_read_unlock(); + if (skb_headroom(skb) < ETH_HLEN) return false; @@ -63,7 +105,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) if (type == PTP_CLASS_NONE) return false; - mii_ts = skb->dev->phydev->mii_ts; + mii_ts = phydev->mii_ts; if (likely(mii_ts->rxtstamp)) return mii_ts->rxtstamp(mii_ts, skb, type); -- cgit v1.2.3 From b9e3f7dc9ed95daeb83cfa45b821cacaa01aa906 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:44 +0100 Subject: net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by net topology Either the MAC or the PHY can provide hwtstamp, so we should be able to read the tsinfo for any hwtstamp provider. Enhance 'get' command to retrieve tsinfo of hwtstamp providers within a network topology. Add support for a specific dump command to retrieve all hwtstamp providers within the network topology, with added functionality for filtered dump to target a single interface. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 20 ++ Documentation/networking/ethtool-netlink.rst | 7 +- include/linux/ethtool.h | 4 + include/uapi/linux/ethtool_netlink_generated.h | 10 + net/ethtool/common.c | 141 +++++++++- net/ethtool/common.h | 13 + net/ethtool/netlink.c | 6 +- net/ethtool/netlink.h | 5 +- net/ethtool/ts.h | 20 ++ net/ethtool/tsinfo.c | 358 ++++++++++++++++++++++++- 10 files changed, 563 insertions(+), 21 deletions(-) create mode 100644 net/ethtool/ts.h (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index c7634e957d9c..4082816e5f3d 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -836,6 +836,20 @@ attribute-sets: - name: tx-err type: uint + - + name: ts-hwtstamp-provider + attr-cnt-name: __ethtool-a-ts-hwtstamp-provider-cnt + attributes: + - + name: unspec + type: unused + value: 0 + - + name: index + type: u32 + - + name: qualifier + type: u32 - name: tsinfo attr-cnt-name: __ethtool-a-tsinfo-cnt @@ -867,6 +881,10 @@ attribute-sets: name: stats type: nest nested-attributes: ts-stat + - + name: hwtstamp-provider + type: nest + nested-attributes: ts-hwtstamp-provider - name: cable-result attr-cnt-name: __ethtool-a-cable-result-cnt @@ -1912,6 +1930,7 @@ operations: request: attributes: - header + - hwtstamp-provider reply: attributes: - header @@ -1920,6 +1939,7 @@ operations: - rx-filters - phc-index - stats + - hwtstamp-provider dump: *tsinfo-get-op - name: cable-test-act diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index b25926071ece..c585e2f0ddfa 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1245,9 +1245,10 @@ Gets timestamping information like ``ETHTOOL_GET_TS_INFO`` ioctl request. Request contents: - ===================================== ====== ========================== - ``ETHTOOL_A_TSINFO_HEADER`` nested request header - ===================================== ====== ========================== + ======================================== ====== ============================ + ``ETHTOOL_A_TSINFO_HEADER`` nested request header + ``ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ======================================== ====== ============================ Kernel response contents: diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e217c6321ed0..f711bfd75c4d 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -711,6 +711,7 @@ struct ethtool_rxfh_param { * @cmd: command number = %ETHTOOL_GET_TS_INFO * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags * @phc_index: device index of the associated PHC, or -1 if there is none + * @phc_qualifier: qualifier of the associated PHC * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values */ @@ -718,6 +719,7 @@ struct kernel_ethtool_ts_info { u32 cmd; u32 so_timestamping; int phc_index; + enum hwtstamp_provider_qualifier phc_qualifier; enum hwtstamp_tx_types tx_types; enum hwtstamp_rx_filters rx_filters; }; @@ -749,6 +751,7 @@ struct kernel_ethtool_ts_info { * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. + * @supported_hwtstamp_qualifiers: bitfield of supported hwtstamp qualifier. * @get_drvinfo: Report driver/device information. Modern drivers no * longer have to implement this callback. Most fields are * correctly filled in by the core using system information, or @@ -966,6 +969,7 @@ struct ethtool_ops { u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; + u32 supported_hwtstamp_qualifiers; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index b58f352fe4f2..df289dde0f61 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -385,6 +385,15 @@ enum { ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) }; +enum { + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_UNSPEC, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, + + __ETHTOOL_A_TS_HWTSTAMP_PROVIDER_CNT, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_MAX = (__ETHTOOL_A_TS_HWTSTAMP_PROVIDER_CNT - 1) +}; + enum { ETHTOOL_A_TSINFO_UNSPEC, ETHTOOL_A_TSINFO_HEADER, @@ -393,6 +402,7 @@ enum { ETHTOOL_A_TSINFO_RX_FILTERS, ETHTOOL_A_TSINFO_PHC_INDEX, ETHTOOL_A_TSINFO_STATS, + ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER, __ETHTOOL_A_TSINFO_CNT, ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 05ce4f8080b3..666db40bcfda 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -5,9 +5,12 @@ #include #include #include +#include #include "netlink.h" #include "common.h" +#include "../core/dev.h" + const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_SG_BIT] = "tx-scatter-gather", @@ -763,20 +766,91 @@ int ethtool_check_ops(const struct ethtool_ops *ops) return 0; } -int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) +static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) { - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - int err = 0; - memset(info, 0, sizeof(*info)); info->cmd = ETHTOOL_GET_TS_INFO; info->phc_index = -1; +} + +int ethtool_net_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + int err; + + if (!ops->get_ts_info) + return -ENODEV; + + /* Does ptp comes from netdev */ + ethtool_init_tsinfo(info); + info->phc_qualifier = hwprov_desc->qualifier; + err = ops->get_ts_info(dev, info); + if (err) + return err; + + if (info->phc_index == hwprov_desc->index && + net_support_hwtstamp_qualifier(dev, hwprov_desc->qualifier)) + return 0; + + return -ENODEV; +} + +int +ethtool_phy_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + int err; + + /* Only precise qualifier is supported in phydev */ + if (hwprov_desc->qualifier != HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) + return -ENODEV; + + /* Look in the phy topology */ + if (dev->link_topo) { + struct phy_device_node *pdn; + unsigned long phy_index; + + xa_for_each(&dev->link_topo->phys, phy_index, pdn) { + if (!phy_has_tsinfo(pdn->phy)) + continue; + + ethtool_init_tsinfo(info); + err = phy_ts_info(pdn->phy, info); + if (err) + return err; + + if (info->phc_index == hwprov_desc->index) + return 0; + } + return -ENODEV; + } + + /* Look on the dev->phydev */ + if (phy_has_tsinfo(dev->phydev)) { + ethtool_init_tsinfo(info); + err = phy_ts_info(dev->phydev, info); + if (err) + return err; + + if (info->phc_index == hwprov_desc->index) + return 0; + } + + return -ENODEV; +} + +int ethtool_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + int err; - if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev)) - err = phy_ts_info(phydev, info); - else if (ops->get_ts_info) - err = ops->get_ts_info(dev, info); + err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc); + if (err == -ENODEV) + err = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; @@ -784,6 +858,55 @@ int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info return err; } +int __ethtool_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *info) +{ + struct hwtstamp_provider *hwprov; + + hwprov = rtnl_dereference(dev->hwprov); + /* No provider specified, use default behavior */ + if (!hwprov) { + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + int err = 0; + + ethtool_init_tsinfo(info); + if (phy_is_default_hwtstamp(phydev) && + phy_has_tsinfo(phydev)) + err = phy_ts_info(phydev, info); + else if (ops->get_ts_info) + err = ops->get_ts_info(dev, info); + + info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + + return err; + } + + return ethtool_get_ts_info_by_phc(dev, info, &hwprov->desc); +} + +bool net_support_hwtstamp_qualifier(struct net_device *dev, + enum hwtstamp_provider_qualifier qualifier) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops) + return false; + + /* Return true with precise qualifier and with NIC without + * qualifier description to not break the old behavior. + */ + if (!ops->supported_hwtstamp_qualifiers && + qualifier == HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) + return true; + + if (ops->supported_hwtstamp_qualifiers & BIT(qualifier)) + return true; + + return false; +} + int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) { struct kernel_ethtool_ts_info info = { }; diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 4a2de3ce7354..f5119204c8ff 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -21,6 +21,7 @@ struct link_mode_info { }; struct genl_info; +struct hwtstamp_provider_desc; extern const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]; @@ -49,6 +50,18 @@ int ethtool_check_max_channel(struct net_device *dev, struct genl_info *info); int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context); int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); +int ethtool_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc); +int ethtool_net_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc); +int +ethtool_phy_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc); +bool net_support_hwtstamp_qualifier(struct net_device *dev, + enum hwtstamp_provider_qualifier qualifier); extern const struct ethtool_phy_ops *ethtool_phy_ops; extern const struct ethtool_pse_ops *ethtool_pse_ops; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index e3f0ef6b851b..6ae1d91f36e7 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -1074,9 +1074,9 @@ static const struct genl_ops ethtool_genl_ops[] = { { .cmd = ETHTOOL_MSG_TSINFO_GET, .doit = ethnl_default_doit, - .start = ethnl_default_start, - .dumpit = ethnl_default_dumpit, - .done = ethnl_default_done, + .start = ethnl_tsinfo_start, + .dumpit = ethnl_tsinfo_dumpit, + .done = ethnl_tsinfo_done, .policy = ethnl_tsinfo_get_policy, .maxattr = ARRAY_SIZE(ethnl_tsinfo_get_policy) - 1, }, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 203b08eb6c6f..960cda13e4fc 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -464,7 +464,7 @@ extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_STATS_SRC extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1]; extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1]; extern const struct nla_policy ethnl_eee_set_policy[ETHTOOL_A_EEE_TX_LPI_TIMER + 1]; -extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER + 1]; +extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1]; extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1]; extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1]; extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1]; @@ -499,6 +499,9 @@ int ethnl_phy_start(struct netlink_callback *cb); int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int ethnl_phy_done(struct netlink_callback *cb); +int ethnl_tsinfo_start(struct netlink_callback *cb); +int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ethnl_tsinfo_done(struct netlink_callback *cb); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; diff --git a/net/ethtool/ts.h b/net/ethtool/ts.h new file mode 100644 index 000000000000..d901a879a671 --- /dev/null +++ b/net/ethtool/ts.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _NET_ETHTOOL_TS_H +#define _NET_ETHTOOL_TS_H + +#include "netlink.h" + +static const struct nla_policy +ethnl_ts_hwtst_prov_policy[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_MAX + 1] = { + [ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX] = { .type = NLA_U32 }, + [ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER] = + NLA_POLICY_MAX(NLA_U32, HWTSTAMP_PROVIDER_QUALIFIER_CNT - 1) +}; + +int ts_parse_hwtst_provider(const struct nlattr *nest, + struct hwtstamp_provider_desc *hwprov_desc, + struct netlink_ext_ack *extack, + bool *mod); + +#endif /* _NET_ETHTOOL_TS_H */ diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 03d12d6f79ca..7e495a41aeec 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -1,13 +1,18 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include +#include +#include #include "netlink.h" #include "common.h" #include "bitset.h" +#include "ts.h" struct tsinfo_req_info { struct ethnl_req_info base; + struct hwtstamp_provider_desc hwprov_desc; }; struct tsinfo_reply_data { @@ -16,34 +21,96 @@ struct tsinfo_reply_data { struct ethtool_ts_stats stats; }; +#define TSINFO_REQINFO(__req_base) \ + container_of(__req_base, struct tsinfo_req_info, base) + #define TSINFO_REPDATA(__reply_base) \ container_of(__reply_base, struct tsinfo_reply_data, base) #define ETHTOOL_TS_STAT_CNT \ (__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1)) -const struct nla_policy ethnl_tsinfo_get_policy[] = { +const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1] = { [ETHTOOL_A_TSINFO_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), + [ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER] = + NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), }; +int ts_parse_hwtst_provider(const struct nlattr *nest, + struct hwtstamp_provider_desc *hwprov_desc, + struct netlink_ext_ack *extack, + bool *mod) +{ + struct nlattr *tb[ARRAY_SIZE(ethnl_ts_hwtst_prov_policy)]; + int ret; + + ret = nla_parse_nested(tb, + ARRAY_SIZE(ethnl_ts_hwtst_prov_policy) - 1, + nest, + ethnl_ts_hwtst_prov_policy, extack); + if (ret < 0) + return ret; + + if (NL_REQ_ATTR_CHECK(extack, nest, tb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX) || + NL_REQ_ATTR_CHECK(extack, nest, tb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER)) + return -EINVAL; + + ethnl_update_u32(&hwprov_desc->index, + tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX], + mod); + ethnl_update_u32(&hwprov_desc->qualifier, + tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER], + mod); + + return 0; +} + +static int +tsinfo_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); + bool mod = false; + + req->hwprov_desc.index = -1; + + if (!tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER]) + return 0; + + return ts_parse_hwtst_provider(tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER], + &req->hwprov_desc, extack, &mod); +} + static int tsinfo_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); + struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); struct net_device *dev = reply_base->dev; int ret; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; + + if (req->hwprov_desc.index != -1) { + ret = ethtool_get_ts_info_by_phc(dev, &data->ts_info, + &req->hwprov_desc); + ethnl_ops_complete(dev); + return ret; + } + if (req_base->flags & ETHTOOL_FLAG_STATS) { ethtool_stats_init((u64 *)&data->stats, sizeof(data->stats) / sizeof(u64)); if (dev->ethtool_ops->get_ts_stats) dev->ethtool_ops->get_ts_stats(dev, &data->stats); } + ret = __ethtool_get_ts_info(dev, &data->ts_info); ethnl_ops_complete(dev); @@ -87,8 +154,11 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base, return ret; len += ret; /* _TSINFO_RX_FILTERS */ } - if (ts_info->phc_index >= 0) + if (ts_info->phc_index >= 0) { len += nla_total_size(sizeof(u32)); /* _TSINFO_PHC_INDEX */ + /* _TSINFO_HWTSTAMP_PROVIDER */ + len += nla_total_size(0) + 2 * nla_total_size(sizeof(u32)); + } if (req_base->flags & ETHTOOL_FLAG_STATS) len += nla_total_size(0) + /* _TSINFO_STATS */ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT; @@ -163,9 +233,29 @@ static int tsinfo_fill_reply(struct sk_buff *skb, if (ret < 0) return ret; } - if (ts_info->phc_index >= 0 && - nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index)) - return -EMSGSIZE; + if (ts_info->phc_index >= 0) { + struct nlattr *nest; + + ret = nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, + ts_info->phc_index); + if (ret) + return -EMSGSIZE; + + nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, + ts_info->phc_index) || + nla_put_u32(skb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, + ts_info->phc_qualifier)) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, nest); + } if (req_base->flags & ETHTOOL_FLAG_STATS && tsinfo_put_stats(skb, &data->stats)) return -EMSGSIZE; @@ -173,6 +263,263 @@ static int tsinfo_fill_reply(struct sk_buff *skb, return 0; } +struct ethnl_tsinfo_dump_ctx { + struct tsinfo_req_info *req_info; + struct tsinfo_reply_data *reply_data; + unsigned long pos_ifindex; + bool netdev_dump_done; + unsigned long pos_phyindex; + enum hwtstamp_provider_qualifier pos_phcqualifier; +}; + +static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb, + struct net_device *dev, + struct tsinfo_reply_data *reply_data, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + void *ehdr = NULL; + + ehdr = ethnl_dump_put(skb, cb, + ETHTOOL_MSG_TSINFO_GET_REPLY); + if (!ehdr) + return ERR_PTR(-EMSGSIZE); + + reply_data = ctx->reply_data; + memset(reply_data, 0, sizeof(*reply_data)); + reply_data->base.dev = dev; + memset(&reply_data->ts_info, 0, sizeof(reply_data->ts_info)); + + return ehdr; +} + +static int ethnl_tsinfo_end_dump(struct sk_buff *skb, + struct net_device *dev, + struct tsinfo_req_info *req_info, + struct tsinfo_reply_data *reply_data, + void *ehdr) +{ + int ret; + + reply_data->ts_info.so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + + ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TSINFO_HEADER); + if (ret < 0) + return ret; + + ret = tsinfo_fill_reply(skb, &req_info->base, &reply_data->base); + if (ret < 0) + return ret; + + reply_data->base.dev = NULL; + genlmsg_end(skb, ehdr); + + return ret; +} + +static int ethnl_tsinfo_dump_one_phydev(struct sk_buff *skb, + struct net_device *dev, + struct phy_device *phydev, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct tsinfo_reply_data *reply_data; + struct tsinfo_req_info *req_info; + void *ehdr = NULL; + int ret = 0; + + if (!phy_has_tsinfo(phydev)) + return -EOPNOTSUPP; + + reply_data = ctx->reply_data; + req_info = ctx->req_info; + ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); + if (IS_ERR(ehdr)) + return PTR_ERR(ehdr); + + ret = phy_ts_info(phydev, &reply_data->ts_info); + if (ret < 0) + goto err; + + ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, ehdr); + if (ret < 0) + goto err; + + return ret; +err: + genlmsg_cancel(skb, ehdr); + return ret; +} + +static int ethnl_tsinfo_dump_one_netdev(struct sk_buff *skb, + struct net_device *dev, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct tsinfo_reply_data *reply_data; + struct tsinfo_req_info *req_info; + void *ehdr = NULL; + int ret = 0; + + if (!ops->get_ts_info) + return -EOPNOTSUPP; + + reply_data = ctx->reply_data; + req_info = ctx->req_info; + for (; ctx->pos_phcqualifier < HWTSTAMP_PROVIDER_QUALIFIER_CNT; + ctx->pos_phcqualifier++) { + if (!net_support_hwtstamp_qualifier(dev, + ctx->pos_phcqualifier)) + continue; + + ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); + if (IS_ERR(ehdr)) { + ret = PTR_ERR(ehdr); + goto err; + } + + reply_data->ts_info.phc_qualifier = ctx->pos_phcqualifier; + ret = ops->get_ts_info(dev, &reply_data->ts_info); + if (ret < 0) + goto err; + + ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, + ehdr); + if (ret < 0) + goto err; + } + + return ret; + +err: + genlmsg_cancel(skb, ehdr); + return ret; +} + +static int ethnl_tsinfo_dump_one_net_topo(struct sk_buff *skb, + struct net_device *dev, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct phy_device_node *pdn; + int ret = 0; + + if (!ctx->netdev_dump_done) { + ret = ethnl_tsinfo_dump_one_netdev(skb, dev, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + ctx->netdev_dump_done = true; + } + + if (!dev->link_topo) { + if (phy_has_tsinfo(dev->phydev)) { + ret = ethnl_tsinfo_dump_one_phydev(skb, dev, + dev->phydev, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + } + + return 0; + } + + xa_for_each_start(&dev->link_topo->phys, ctx->pos_phyindex, pdn, + ctx->pos_phyindex) { + if (phy_has_tsinfo(pdn->phy)) { + ret = ethnl_tsinfo_dump_one_phydev(skb, dev, + pdn->phy, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + } + } + + return ret; +} + +int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + if (ctx->req_info->base.dev) { + ret = ethnl_tsinfo_dump_one_net_topo(skb, + ctx->req_info->base.dev, + cb); + } else { + for_each_netdev_dump(net, dev, ctx->pos_ifindex) { + ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + break; + ctx->pos_phyindex = 0; + ctx->netdev_dump_done = false; + ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; + } + } + rtnl_unlock(); + + return ret; +} + +int ethnl_tsinfo_start(struct netlink_callback *cb) +{ + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct nlattr **tb = info->info.attrs; + struct tsinfo_reply_data *reply_data; + struct tsinfo_req_info *req_info; + int ret; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + req_info = kzalloc(sizeof(*req_info), GFP_KERNEL); + if (!req_info) + return -ENOMEM; + reply_data = kzalloc(sizeof(*reply_data), GFP_KERNEL); + if (!reply_data) { + ret = -ENOMEM; + goto free_req_info; + } + + ret = ethnl_parse_header_dev_get(&req_info->base, + tb[ETHTOOL_A_TSINFO_HEADER], + sock_net(cb->skb->sk), cb->extack, + false); + if (ret < 0) + goto free_reply_data; + + ctx->req_info = req_info; + ctx->reply_data = reply_data; + ctx->pos_ifindex = 0; + ctx->pos_phyindex = 0; + ctx->netdev_dump_done = false; + ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; + + return 0; + +free_reply_data: + kfree(reply_data); +free_req_info: + kfree(req_info); + + return ret; +} + +int ethnl_tsinfo_done(struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct tsinfo_req_info *req_info = ctx->req_info; + + ethnl_parse_header_dev_put(&req_info->base); + kfree(ctx->reply_data); + kfree(ctx->req_info); + + return 0; +} + const struct ethnl_request_ops ethnl_tsinfo_request_ops = { .request_cmd = ETHTOOL_MSG_TSINFO_GET, .reply_cmd = ETHTOOL_MSG_TSINFO_GET_REPLY, @@ -180,6 +527,7 @@ const struct ethnl_request_ops ethnl_tsinfo_request_ops = { .req_info_size = sizeof(struct tsinfo_req_info), .reply_data_size = sizeof(struct tsinfo_reply_data), + .parse_request = tsinfo_parse_request, .prepare_data = tsinfo_prepare_data, .reply_size = tsinfo_reply_size, .fill_reply = tsinfo_fill_reply, -- cgit v1.2.3 From 6e9e2eed4f39d52edf5fd006409d211facf49f6b Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:45 +0100 Subject: net: ethtool: Add support for tsconfig command to get/set hwtstamp config Introduce support for ETHTOOL_MSG_TSCONFIG_GET/SET ethtool netlink socket to read and configure hwtstamp configuration of a PHC provider. Note that simultaneous hwtstamp isn't supported; configuring a new one disables the previous setting. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 56 ++++ Documentation/networking/ethtool-netlink.rst | 75 +++++ Documentation/networking/timestamping.rst | 38 ++- include/uapi/linux/ethtool_netlink_generated.h | 16 + net/ethtool/Makefile | 2 +- net/ethtool/common.c | 27 +- net/ethtool/common.h | 2 +- net/ethtool/netlink.c | 18 + net/ethtool/netlink.h | 3 + net/ethtool/tsconfig.c | 444 +++++++++++++++++++++++++ 10 files changed, 655 insertions(+), 26 deletions(-) create mode 100644 net/ethtool/tsconfig.c (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 4082816e5f3d..60f85fbf4156 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1489,6 +1489,33 @@ attribute-sets: - name: downstream-sfp-name type: string + - + name: tsconfig + attr-cnt-name: __ethtool-a-tsconfig-cnt + attributes: + - + name: unspec + type: unused + value: 0 + - + name: header + type: nest + nested-attributes: header + - + name: hwtstamp-provider + type: nest + nested-attributes: ts-hwtstamp-provider + - + name: tx-types + type: nest + nested-attributes: bitset + - + name: rx-filters + type: nest + nested-attributes: bitset + - + name: hwtstamp-flags + type: u32 operations: enum-model: directional @@ -2314,3 +2341,32 @@ operations: name: phy-ntf doc: Notification for change in PHY devices. notify: phy-get + - + name: tsconfig-get + doc: Get hwtstamp config. + + attribute-set: tsconfig + + do: &tsconfig-get-op + request: + attributes: + - header + reply: + attributes: &tsconfig + - header + - hwtstamp-provider + - tx-types + - rx-filters + - hwtstamp-flags + dump: *tsconfig-get-op + - + name: tsconfig-set + doc: Set hwtstamp config. + + attribute-set: tsconfig + + do: + request: + attributes: *tsconfig + reply: + attributes: *tsconfig diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index c585e2f0ddfa..a7ba6368a4d5 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -237,6 +237,8 @@ Userspace to kernel: ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` flash transceiver module firmware ``ETHTOOL_MSG_PHY_GET`` get Ethernet PHY information + ``ETHTOOL_MSG_TSCONFIG_GET`` get hw timestamping configuration + ``ETHTOOL_MSG_TSCONFIG_SET`` set hw timestamping configuration ===================================== ================================= Kernel to userspace: @@ -286,6 +288,8 @@ Kernel to userspace: ``ETHTOOL_MSG_MODULE_FW_FLASH_NTF`` transceiver module flash updates ``ETHTOOL_MSG_PHY_GET_REPLY`` Ethernet PHY information ``ETHTOOL_MSG_PHY_NTF`` Ethernet PHY information change + ``ETHTOOL_MSG_TSCONFIG_GET_REPLY`` hw timestamping configuration + ``ETHTOOL_MSG_TSCONFIG_SET_REPLY`` new hw timestamping configuration ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -2244,6 +2248,75 @@ Kernel response contents: When ``ETHTOOL_A_PHY_UPSTREAM_TYPE`` is PHY_UPSTREAM_PHY, the PHY's parent is another PHY. +TSCONFIG_GET +============ + +Retrieves the information about the current hardware timestamping source and +configuration. + +It is similar to the deprecated ``SIOCGHWTSTAMP`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ======================================== ====== ============================ + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ``ETHTOOL_A_TSCONFIG_TX_TYPES`` bitset hwtstamp Tx type + ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` bitset hwtstamp Rx filter + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` u32 hwtstamp flags + ======================================== ====== ============================ + +When set the ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` attribute identifies the +source of the hw timestamping provider. It is composed by +``ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX`` attribute which describe the index of +the PTP device and ``ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER`` which describe +the qualifier of the timestamp. + +When set the ``ETHTOOL_A_TSCONFIG_TX_TYPES``, ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` +and the ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` attributes identify the Tx +type, the Rx filter and the flags configured for the current hw timestamping +provider. The attributes are propagated to the driver through the following +structure: + +.. kernel-doc:: include/linux/net_tstamp.h + :identifiers: kernel_hwtstamp_config + +TSCONFIG_SET +============ + +Set the information about the current hardware timestamping source and +configuration. + +It is similar to the deprecated ``SIOCSHWTSTAMP`` ioctl request. + +Request contents: + + ======================================== ====== ============================ + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ``ETHTOOL_A_TSCONFIG_TX_TYPES`` bitset hwtstamp Tx type + ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` bitset hwtstamp Rx filter + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` u32 hwtstamp flags + ======================================== ====== ============================ + +Kernel response contents: + + ======================================== ====== ============================ + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ``ETHTOOL_A_TSCONFIG_TX_TYPES`` bitset hwtstamp Tx type + ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` bitset hwtstamp Rx filter + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` u32 hwtstamp flags + ======================================== ====== ============================ + +For a description of each attribute, see ``TSCONFIG_GET``. + Request translation =================== @@ -2352,4 +2425,6 @@ are netlink only. n/a ``ETHTOOL_MSG_MM_SET`` n/a ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` n/a ``ETHTOOL_MSG_PHY_GET`` + ``SIOCGHWTSTAMP`` ``ETHTOOL_MSG_TSCONFIG_GET`` + ``SIOCSHWTSTAMP`` ``ETHTOOL_MSG_TSCONFIG_SET`` =================================== ===================================== diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index b37bfbfc7d79..61ef9da10e28 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -525,8 +525,8 @@ implicitly defined. ts[0] holds a software timestamp if set, ts[1] is again deprecated and ts[2] holds a hardware timestamp if set. -3. Hardware Timestamping configuration: SIOCSHWTSTAMP and SIOCGHWTSTAMP -======================================================================= +3. Hardware Timestamping configuration: ETHTOOL_MSG_TSCONFIG_SET/GET +==================================================================== Hardware time stamping must also be initialized for each device driver that is expected to do hardware time stamping. The parameter is defined in @@ -539,12 +539,14 @@ include/uapi/linux/net_tstamp.h as:: }; Desired behavior is passed into the kernel and to a specific device by -calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose -ifr_data points to a struct hwtstamp_config. The tx_type and -rx_filter are hints to the driver what it is expected to do. If -the requested fine-grained filtering for incoming packets is not -supported, the driver may time stamp more than just the requested types -of packets. +calling the tsconfig netlink socket ``ETHTOOL_MSG_TSCONFIG_SET``. +The ``ETHTOOL_A_TSCONFIG_TX_TYPES``, ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` and +``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` netlink attributes are then used to set +the struct hwtstamp_config accordingly. + +The ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` netlink nested attribute is used +to select the source of the hardware time stamping. It is composed of an index +for the device source and a qualifier for the type of time stamping. Drivers are free to use a more permissive configuration than the requested configuration. It is expected that drivers should only implement directly the @@ -563,9 +565,16 @@ Only a processes with admin rights may change the configuration. User space is responsible to ensure that multiple processes don't interfere with each other and that the settings are reset. -Any process can read the actual configuration by passing this -structure to ioctl(SIOCGHWTSTAMP) in the same way. However, this has -not been implemented in all drivers. +Any process can read the actual configuration by requesting tsconfig netlink +socket ``ETHTOOL_MSG_TSCONFIG_GET``. + +The legacy configuration is the use of the ioctl(SIOCSHWTSTAMP) with a pointer +to a struct ifreq whose ifr_data points to a struct hwtstamp_config. +The tx_type and rx_filter are hints to the driver what it is expected to do. +If the requested fine-grained filtering for incoming packets is not +supported, the driver may time stamp more than just the requested types +of packets. ioctl(SIOCGHWTSTAMP) is used in the same way as the +ioctl(SIOCSHWTSTAMP). However, this has not been implemented in all drivers. :: @@ -610,9 +619,10 @@ not been implemented in all drivers. -------------------------------------------------------- A driver which supports hardware time stamping must support the -SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with -the actual values as described in the section on SIOCSHWTSTAMP. It -should also support SIOCGHWTSTAMP. +ndo_hwtstamp_set NDO or the legacy SIOCSHWTSTAMP ioctl and update the +supplied struct hwtstamp_config with the actual values as described in +the section on SIOCSHWTSTAMP. It should also support ndo_hwtstamp_get or +the legacy SIOCGHWTSTAMP. Time stamps for received packets must be stored in the skb. To get a pointer to the shared time stamp structure of the skb call skb_hwtstamps(). Then diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index df289dde0f61..43993a2d68e5 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -694,6 +694,18 @@ enum { ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) }; +enum { + ETHTOOL_A_TSCONFIG_UNSPEC, + ETHTOOL_A_TSCONFIG_HEADER, + ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER, + ETHTOOL_A_TSCONFIG_TX_TYPES, + ETHTOOL_A_TSCONFIG_RX_FILTERS, + ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, + + __ETHTOOL_A_TSCONFIG_CNT, + ETHTOOL_A_TSCONFIG_MAX = (__ETHTOOL_A_TSCONFIG_CNT - 1) +}; + enum { ETHTOOL_MSG_USER_NONE = 0, ETHTOOL_MSG_STRSET_GET = 1, @@ -741,6 +753,8 @@ enum { ETHTOOL_MSG_MM_SET, ETHTOOL_MSG_MODULE_FW_FLASH_ACT, ETHTOOL_MSG_PHY_GET, + ETHTOOL_MSG_TSCONFIG_GET, + ETHTOOL_MSG_TSCONFIG_SET, __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) @@ -794,6 +808,8 @@ enum { ETHTOOL_MSG_MODULE_FW_FLASH_NTF, ETHTOOL_MSG_PHY_GET_REPLY, ETHTOOL_MSG_PHY_NTF, + ETHTOOL_MSG_TSCONFIG_GET_REPLY, + ETHTOOL_MSG_TSCONFIG_SET_REPLY, __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 9b540644ba31..a1490c4afe6b 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -9,4 +9,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o \ - phy.o + phy.o tsconfig.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 666db40bcfda..02f941f667dd 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -797,7 +797,7 @@ int ethtool_net_get_ts_info_by_phc(struct net_device *dev, return -ENODEV; } -int +struct phy_device * ethtool_phy_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc) @@ -806,7 +806,7 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev, /* Only precise qualifier is supported in phydev */ if (hwprov_desc->qualifier != HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) - return -ENODEV; + return ERR_PTR(-ENODEV); /* Look in the phy topology */ if (dev->link_topo) { @@ -820,12 +820,12 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev, ethtool_init_tsinfo(info); err = phy_ts_info(pdn->phy, info); if (err) - return err; + return ERR_PTR(err); if (info->phc_index == hwprov_desc->index) - return 0; + return pdn->phy; } - return -ENODEV; + return ERR_PTR(-ENODEV); } /* Look on the dev->phydev */ @@ -833,13 +833,13 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev, ethtool_init_tsinfo(info); err = phy_ts_info(dev->phydev, info); if (err) - return err; + return ERR_PTR(err); if (info->phc_index == hwprov_desc->index) - return 0; + return dev->phydev; } - return -ENODEV; + return ERR_PTR(-ENODEV); } int ethtool_get_ts_info_by_phc(struct net_device *dev, @@ -849,8 +849,15 @@ int ethtool_get_ts_info_by_phc(struct net_device *dev, int err; err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc); - if (err == -ENODEV) - err = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); + if (err == -ENODEV) { + struct phy_device *phy; + + phy = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); + if (IS_ERR(phy)) + err = PTR_ERR(phy); + else + err = 0; + } info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; diff --git a/net/ethtool/common.h b/net/ethtool/common.h index f5119204c8ff..850eadde4bfc 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -56,7 +56,7 @@ int ethtool_get_ts_info_by_phc(struct net_device *dev, int ethtool_net_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc); -int +struct phy_device * ethtool_phy_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 6ae1d91f36e7..849c98e637c6 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -394,6 +394,8 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_PLCA_GET_STATUS] = ðnl_plca_status_request_ops, [ETHTOOL_MSG_MM_GET] = ðnl_mm_request_ops, [ETHTOOL_MSG_MM_SET] = ðnl_mm_request_ops, + [ETHTOOL_MSG_TSCONFIG_GET] = ðnl_tsconfig_request_ops, + [ETHTOOL_MSG_TSCONFIG_SET] = ðnl_tsconfig_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -1243,6 +1245,22 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_phy_get_policy, .maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_TSCONFIG_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_tsconfig_get_policy, + .maxattr = ARRAY_SIZE(ethnl_tsconfig_get_policy) - 1, + }, + { + .cmd = ETHTOOL_MSG_TSCONFIG_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_default_set_doit, + .policy = ethnl_tsconfig_set_policy, + .maxattr = ARRAY_SIZE(ethnl_tsconfig_set_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 960cda13e4fc..0a09298fff92 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -435,6 +435,7 @@ extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; extern const struct ethnl_request_ops ethnl_phy_request_ops; +extern const struct ethnl_request_ops ethnl_tsconfig_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -485,6 +486,8 @@ extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1]; extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1]; +extern const struct nla_policy ethnl_tsconfig_get_policy[ETHTOOL_A_TSCONFIG_HEADER + 1]; +extern const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c new file mode 100644 index 000000000000..9188e088fb2f --- /dev/null +++ b/net/ethtool/tsconfig.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "netlink.h" +#include "common.h" +#include "bitset.h" +#include "../core/dev.h" +#include "ts.h" + +struct tsconfig_req_info { + struct ethnl_req_info base; +}; + +struct tsconfig_reply_data { + struct ethnl_reply_data base; + struct hwtstamp_provider_desc hwprov_desc; + struct { + u32 tx_type; + u32 rx_filter; + u32 flags; + } hwtst_config; +}; + +#define TSCONFIG_REPDATA(__reply_base) \ + container_of(__reply_base, struct tsconfig_reply_data, base) + +const struct nla_policy ethnl_tsconfig_get_policy[ETHTOOL_A_TSCONFIG_HEADER + 1] = { + [ETHTOOL_A_TSCONFIG_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int tsconfig_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + const struct genl_info *info) +{ + struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base); + struct hwtstamp_provider *hwprov = NULL; + struct net_device *dev = reply_base->dev; + struct kernel_hwtstamp_config cfg = {}; + int ret; + + if (!dev->netdev_ops->ndo_hwtstamp_get) + return -EOPNOTSUPP; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + ret = dev_get_hwtstamp_phylib(dev, &cfg); + if (ret) + goto out; + + data->hwtst_config.tx_type = BIT(cfg.tx_type); + data->hwtst_config.rx_filter = BIT(cfg.rx_filter); + data->hwtst_config.flags = BIT(cfg.flags); + + data->hwprov_desc.index = -1; + hwprov = rtnl_dereference(dev->hwprov); + if (hwprov) { + data->hwprov_desc.index = hwprov->desc.index; + data->hwprov_desc.qualifier = hwprov->desc.qualifier; + } else { + struct kernel_ethtool_ts_info ts_info = {}; + + ts_info.phc_index = -1; + ret = __ethtool_get_ts_info(dev, &ts_info); + if (ret) + goto out; + + if (ts_info.phc_index == -1) + return -ENODEV; + + data->hwprov_desc.index = ts_info.phc_index; + data->hwprov_desc.qualifier = ts_info.phc_qualifier; + } + +out: + ethnl_ops_complete(dev); + return ret; +} + +static int tsconfig_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + int len = 0; + int ret; + + BUILD_BUG_ON(__HWTSTAMP_TX_CNT > 32); + BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT > 32); + + if (data->hwtst_config.flags) + /* _TSCONFIG_HWTSTAMP_FLAGS */ + len += nla_total_size(sizeof(u32)); + + if (data->hwtst_config.tx_type) { + ret = ethnl_bitset32_size(&data->hwtst_config.tx_type, + NULL, __HWTSTAMP_TX_CNT, + ts_tx_type_names, compact); + if (ret < 0) + return ret; + len += ret; /* _TSCONFIG_TX_TYPES */ + } + if (data->hwtst_config.rx_filter) { + ret = ethnl_bitset32_size(&data->hwtst_config.rx_filter, + NULL, __HWTSTAMP_FILTER_CNT, + ts_rx_filter_names, compact); + if (ret < 0) + return ret; + len += ret; /* _TSCONFIG_RX_FILTERS */ + } + + if (data->hwprov_desc.index >= 0) + /* _TSCONFIG_HWTSTAMP_PROVIDER */ + len += nla_total_size(0) + + 2 * nla_total_size(sizeof(u32)); + + return len; +} + +static int tsconfig_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + int ret; + + if (data->hwtst_config.flags) { + ret = nla_put_u32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, + data->hwtst_config.flags); + if (ret < 0) + return ret; + } + + if (data->hwtst_config.tx_type) { + ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_TX_TYPES, + &data->hwtst_config.tx_type, NULL, + __HWTSTAMP_TX_CNT, + ts_tx_type_names, compact); + if (ret < 0) + return ret; + } + + if (data->hwtst_config.rx_filter) { + ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_RX_FILTERS, + &data->hwtst_config.rx_filter, + NULL, __HWTSTAMP_FILTER_CNT, + ts_rx_filter_names, compact); + if (ret < 0) + return ret; + } + + if (data->hwprov_desc.index >= 0) { + struct nlattr *nest; + + nest = nla_nest_start(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, + data->hwprov_desc.index) || + nla_put_u32(skb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, + data->hwprov_desc.qualifier)) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, nest); + } + return 0; +} + +/* TSCONFIG_SET */ +const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1] = { + [ETHTOOL_A_TSCONFIG_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER] = + NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), + [ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_U32 }, + [ETHTOOL_A_TSCONFIG_RX_FILTERS] = { .type = NLA_NESTED }, + [ETHTOOL_A_TSCONFIG_TX_TYPES] = { .type = NLA_NESTED }, +}; + +static int tsconfig_send_reply(struct net_device *dev, struct genl_info *info) +{ + struct tsconfig_reply_data *reply_data; + struct tsconfig_req_info *req_info; + struct sk_buff *rskb; + void *reply_payload; + int reply_len = 0; + int ret; + + req_info = kzalloc(sizeof(*req_info), GFP_KERNEL); + if (!req_info) + return -ENOMEM; + reply_data = kmalloc(sizeof(*reply_data), GFP_KERNEL); + if (!reply_data) { + kfree(req_info); + return -ENOMEM; + } + + ASSERT_RTNL(); + reply_data->base.dev = dev; + ret = tsconfig_prepare_data(&req_info->base, &reply_data->base, info); + if (ret < 0) + goto err_cleanup; + + ret = tsconfig_reply_size(&req_info->base, &reply_data->base); + if (ret < 0) + goto err_cleanup; + + reply_len = ret + ethnl_reply_header_size(); + rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_TSCONFIG_SET_REPLY, + ETHTOOL_A_TSCONFIG_HEADER, info, &reply_payload); + if (!rskb) + goto err_cleanup; + + ret = tsconfig_fill_reply(rskb, &req_info->base, &reply_data->base); + if (ret < 0) + goto err_cleanup; + + genlmsg_end(rskb, reply_payload); + ret = genlmsg_reply(rskb, info); + +err_cleanup: + kfree(reply_data); + kfree(req_info); + return ret; +} + +static int ethnl_set_tsconfig_validate(struct ethnl_req_info *req_base, + struct genl_info *info) +{ + const struct net_device_ops *ops = req_base->dev->netdev_ops; + + if (!ops->ndo_hwtstamp_set || !ops->ndo_hwtstamp_get) + return -EOPNOTSUPP; + + return 1; +} + +static struct hwtstamp_provider * +tsconfig_set_hwprov_from_desc(struct net_device *dev, + struct genl_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + struct kernel_ethtool_ts_info ts_info; + struct hwtstamp_provider *hwprov; + struct nlattr **tb = info->attrs; + struct phy_device *phy = NULL; + enum hwtstamp_source source; + int ret; + + ret = ethtool_net_get_ts_info_by_phc(dev, &ts_info, hwprov_desc); + if (!ret) { + /* Found */ + source = HWTSTAMP_SOURCE_NETDEV; + } else { + phy = ethtool_phy_get_ts_info_by_phc(dev, &ts_info, hwprov_desc); + if (IS_ERR(phy)) { + if (PTR_ERR(phy) == -ENODEV) + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER], + "phc not in this net device topology"); + return ERR_CAST(phy); + } + + source = HWTSTAMP_SOURCE_PHYLIB; + } + + hwprov = kzalloc(sizeof(*hwprov), GFP_KERNEL); + if (!hwprov) + return ERR_PTR(-ENOMEM); + + hwprov->desc.index = hwprov_desc->index; + hwprov->desc.qualifier = hwprov_desc->qualifier; + hwprov->source = source; + hwprov->phydev = phy; + + return hwprov; +} + +static int ethnl_set_tsconfig(struct ethnl_req_info *req_base, + struct genl_info *info) +{ + struct kernel_hwtstamp_config hwtst_config = {0}; + bool hwprov_mod = false, config_mod = false; + struct hwtstamp_provider *hwprov = NULL; + struct net_device *dev = req_base->dev; + struct nlattr **tb = info->attrs; + int ret; + + BUILD_BUG_ON(__HWTSTAMP_TX_CNT >= 32); + BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT >= 32); + + if (!netif_device_present(dev)) + return -ENODEV; + + if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER]) { + struct hwtstamp_provider_desc __hwprov_desc = {.index = -1}; + struct hwtstamp_provider *__hwprov; + + __hwprov = rtnl_dereference(dev->hwprov); + if (__hwprov) { + __hwprov_desc.index = __hwprov->desc.index; + __hwprov_desc.qualifier = __hwprov->desc.qualifier; + } + + ret = ts_parse_hwtst_provider(tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER], + &__hwprov_desc, info->extack, + &hwprov_mod); + if (ret < 0) + return ret; + + if (hwprov_mod) { + hwprov = tsconfig_set_hwprov_from_desc(dev, info, + &__hwprov_desc); + if (IS_ERR(hwprov)) + return PTR_ERR(hwprov); + } + } + + /* Get current hwtstamp config if we are not changing the + * hwtstamp source. It will be zeroed in the other case. + */ + if (!hwprov_mod) { + ret = dev_get_hwtstamp_phylib(dev, &hwtst_config); + if (ret < 0 && ret != -EOPNOTSUPP) + goto err_free_hwprov; + } + + /* Get the hwtstamp config from netlink */ + if (tb[ETHTOOL_A_TSCONFIG_TX_TYPES]) { + u32 req_tx_type; + + req_tx_type = BIT(hwtst_config.tx_type); + ret = ethnl_update_bitset32(&req_tx_type, + __HWTSTAMP_TX_CNT, + tb[ETHTOOL_A_TSCONFIG_TX_TYPES], + ts_tx_type_names, info->extack, + &config_mod); + if (ret < 0) + goto err_free_hwprov; + + /* Select only one tx type at a time */ + if (ffs(req_tx_type) != fls(req_tx_type)) { + ret = -EINVAL; + goto err_free_hwprov; + } + + hwtst_config.tx_type = ffs(req_tx_type) - 1; + } + + if (tb[ETHTOOL_A_TSCONFIG_RX_FILTERS]) { + u32 req_rx_filter; + + req_rx_filter = BIT(hwtst_config.rx_filter); + ret = ethnl_update_bitset32(&req_rx_filter, + __HWTSTAMP_FILTER_CNT, + tb[ETHTOOL_A_TSCONFIG_RX_FILTERS], + ts_rx_filter_names, info->extack, + &config_mod); + if (ret < 0) + goto err_free_hwprov; + + /* Select only one rx filter at a time */ + if (ffs(req_rx_filter) != fls(req_rx_filter)) { + ret = -EINVAL; + goto err_free_hwprov; + } + + hwtst_config.rx_filter = ffs(req_rx_filter) - 1; + } + + if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS]) { + ethnl_update_u32(&hwtst_config.flags, + tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS], + &config_mod); + } + + ret = net_hwtstamp_validate(&hwtst_config); + if (ret) + goto err_free_hwprov; + + if (hwprov_mod) { + struct kernel_hwtstamp_config zero_config = {0}; + struct hwtstamp_provider *__hwprov; + + /* Disable current time stamping if we try to enable + * another one + */ + ret = dev_set_hwtstamp_phylib(dev, &zero_config, info->extack); + if (ret < 0) + goto err_free_hwprov; + + /* Change the selected hwtstamp source */ + __hwprov = rcu_replace_pointer_rtnl(dev->hwprov, hwprov); + if (__hwprov) + kfree_rcu(__hwprov, rcu_head); + } + + if (config_mod) { + ret = dev_set_hwtstamp_phylib(dev, &hwtst_config, + info->extack); + if (ret < 0) + return ret; + } + + if (hwprov_mod || config_mod) { + ret = tsconfig_send_reply(dev, info); + if (ret && ret != -EOPNOTSUPP) { + NL_SET_ERR_MSG(info->extack, + "error while reading the new configuration set"); + return ret; + } + } + + /* tsconfig has no notification */ + return 0; + +err_free_hwprov: + kfree(hwprov); + + return ret; +} + +const struct ethnl_request_ops ethnl_tsconfig_request_ops = { + .request_cmd = ETHTOOL_MSG_TSCONFIG_GET, + .reply_cmd = ETHTOOL_MSG_TSCONFIG_GET_REPLY, + .hdr_attr = ETHTOOL_A_TSCONFIG_HEADER, + .req_info_size = sizeof(struct tsconfig_req_info), + .reply_data_size = sizeof(struct tsconfig_reply_data), + + .prepare_data = tsconfig_prepare_data, + .reply_size = tsconfig_reply_size, + .fill_reply = tsconfig_fill_reply, + + .set_validate = ethnl_set_tsconfig_validate, + .set = ethnl_set_tsconfig, +}; -- cgit v1.2.3 From 65c8c78cc74d5bcbc43f1f785a004796a2d78360 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 12 Dec 2024 21:13:10 +0100 Subject: thermal/thresholds: Fix uapi header macros leading to a compilation error The macros giving the direction of the crossing thresholds use the BIT macro which is not exported to the userspace. Consequently when an userspace program includes the header, it fails to compile. Replace the macros by their litteral to allow the compilation of userspace program using this header. Fixes: 445936f9e258 ("thermal: core: Add user thresholds support") Signed-off-by: Daniel Lezcano Link: https://patch.msgid.link/20241212201311.4143196-1-daniel.lezcano@linaro.org [ rjw: Add Fixes: ] Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/thermal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index ba8604bdf206..349718c271eb 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -3,8 +3,8 @@ #define _UAPI_LINUX_THERMAL_H #define THERMAL_NAME_LENGTH 20 -#define THERMAL_THRESHOLD_WAY_UP BIT(0) -#define THERMAL_THRESHOLD_WAY_DOWN BIT(1) +#define THERMAL_THRESHOLD_WAY_UP 0x1 +#define THERMAL_THRESHOLD_WAY_DOWN 0x2 enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, -- cgit v1.2.3 From 5637797add2af632a5d037044ab1b0b35643902e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 12 Dec 2024 14:49:03 -0800 Subject: drm/xe/oa/uapi: Expose an unblock after N reports OA property Expose an "unblock after N reports" OA property, to allow userspace threads to be woken up less frequently. Co-developed-by: Umesh Nerlige Ramappa Signed-off-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Reviewed-by: Jonathan Cavitt Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20241212224903.1853862-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 30 ++++++++++++++++++++++++++---- drivers/gpu/drm/xe/xe_oa_types.h | 3 +++ drivers/gpu/drm/xe/xe_query.c | 3 ++- include/uapi/drm/xe_drm.h | 7 +++++++ 4 files changed, 38 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index ec88b18e9baa..56bf375a9d4b 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -97,6 +97,7 @@ struct xe_oa_open_param { int num_syncs; struct xe_sync_entry *syncs; size_t oa_buffer_size; + int wait_num_reports; }; struct xe_oa_config_bo { @@ -241,11 +242,10 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 tail, hw_tail, partial_report_size, available; int report_size = stream->oa_buffer.format->size; - u32 tail, hw_tail; unsigned long flags; bool pollin; - u32 partial_report_size; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); @@ -289,8 +289,8 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) stream->oa_buffer.tail = tail; - pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, - stream->oa_buffer.head) >= report_size; + available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); + pollin = available >= stream->wait_num_reports * report_size; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -1285,6 +1285,17 @@ static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, return 0; } +static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (!value) { + drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value); + return -EINVAL; + } + param->wait_num_reports = value; + return 0; +} + static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { @@ -1306,6 +1317,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports, }; static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { @@ -1321,6 +1333,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval, }; static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, @@ -1797,6 +1810,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; + stream->wait_num_reports = param->wait_num_reports; stream->xef = xe_file_get(param->xef); stream->num_syncs = param->num_syncs; @@ -2156,6 +2170,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (!param.oa_buffer_size) param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; + if (!param.wait_num_reports) + param.wait_num_reports = 1; + if (param.wait_num_reports > param.oa_buffer_size / f->size) { + drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports); + ret = -EINVAL; + goto err_exec_q; + } + ret = xe_oa_parse_syncs(oa, ¶m); if (ret) goto err_exec_q; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index df7793915628..2dcd3b9562e9 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -218,6 +218,9 @@ struct xe_oa_stream { /** @pollin: Whether there is data available to read */ bool pollin; + /** @wait_num_reports: Number of reports to wait for before signalling pollin */ + int wait_num_reports; + /** @periodic: Whether periodic sampling is currently enabled */ bool periodic; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index d2a816f71bf2..c059639613f7 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -672,7 +672,8 @@ static int query_oa_units(struct xe_device *xe, du->oa_unit_type = u->type; du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | - DRM_XE_OA_CAPS_OA_BUFFER_SIZE; + DRM_XE_OA_CAPS_OA_BUFFER_SIZE | + DRM_XE_OA_CAPS_WAIT_NUM_REPORTS; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 0383b52cbd86..f62689ca861a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1487,6 +1487,7 @@ struct drm_xe_oa_unit { #define DRM_XE_OA_CAPS_BASE (1 << 0) #define DRM_XE_OA_CAPS_SYNCS (1 << 1) #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) +#define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; @@ -1660,6 +1661,12 @@ enum drm_xe_oa_property_id { * buffer is allocated by default. */ DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE, + + /** + * @DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS: Number of reports to wait + * for before unblocking poll or read + */ + DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS, }; /** -- cgit v1.2.3 From e45469e594b255ef8d750ed5576698743450d2ac Mon Sep 17 00:00:00 2001 From: Anna Emese Nyiri Date: Fri, 13 Dec 2024 09:44:57 +0100 Subject: sock: Introduce SO_RCVPRIORITY socket option Add new socket option, SO_RCVPRIORITY, to include SO_PRIORITY in the ancillary data returned by recvmsg(). This is analogous to the existing support for SO_RCVMARK, as implemented in commit 6fd1d51cfa253 ("net: SO_RCVMARK socket option for SO_MARK with recvmsg()"). Reviewed-by: Willem de Bruijn Suggested-by: Ferenc Fejes Signed-off-by: Anna Emese Nyiri Link: https://patch.msgid.link/20241213084457.45120-5-annaemesenyiri@gmail.com Signed-off-by: Jakub Kicinski --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 4 +++- include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 8 ++++++++ net/socket.c | 11 +++++++++++ tools/include/uapi/asm-generic/socket.h | 2 ++ 9 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 302507bf9b5d..3df5f2dd4c0f 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -148,6 +148,8 @@ #define SCM_TS_OPT_ID 81 +#define SO_RCVPRIORITY 82 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index d118d4731580..22fa8f19924a 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -159,6 +159,8 @@ #define SCM_TS_OPT_ID 81 +#define SO_RCVPRIORITY 82 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index d268d69bfcd2..aa9cd4b951fe 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -140,6 +140,8 @@ #define SCM_TS_OPT_ID 0x404C +#define SO_RCVPRIORITY 0x404D + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 113cd9f353e3..5b464a568664 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -141,6 +141,8 @@ #define SCM_TS_OPT_ID 0x005a +#define SO_RCVPRIORITY 0x005b + #if !defined(__KERNEL__) diff --git a/include/net/sock.h b/include/net/sock.h index 316a34d6c48b..d4bdd3286e03 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -953,6 +953,7 @@ enum sock_flags { SOCK_XDP, /* XDP is attached */ SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ + SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -2660,7 +2661,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, { #define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_RCVTSTAMP) | \ - (1UL << SOCK_RCVMARK)) + (1UL << SOCK_RCVMARK) |\ + (1UL << SOCK_RCVPRIORITY)) #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index deacfd6dd197..aa5016ff3d91 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -143,6 +143,8 @@ #define SCM_TS_OPT_ID 81 +#define SO_RCVPRIORITY 82 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/core/sock.c b/net/core/sock.c index a3d9941c1d32..e7bcc8952248 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1519,6 +1519,10 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_RCVMARK, valbool); break; + case SO_RCVPRIORITY: + sock_valbool_flag(sk, SOCK_RCVPRIORITY, valbool); + break; + case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; @@ -1947,6 +1951,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = sock_flag(sk, SOCK_RCVMARK); break; + case SO_RCVPRIORITY: + v.val = sock_flag(sk, SOCK_RCVPRIORITY); + break; + case SO_RXQ_OVFL: v.val = sock_flag(sk, SOCK_RXQ_OVFL); break; diff --git a/net/socket.c b/net/socket.c index 9a117248f18f..16402b8be5a7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1008,12 +1008,23 @@ static void sock_recv_mark(struct msghdr *msg, struct sock *sk, } } +static void sock_recv_priority(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RCVPRIORITY) && skb) { + __u32 priority = skb->priority; + + put_cmsg(msg, SOL_SOCKET, SO_PRIORITY, sizeof(__u32), &priority); + } +} + void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); sock_recv_mark(msg, sk, skb); + sock_recv_priority(msg, sk, skb); } EXPORT_SYMBOL_GPL(__sock_recv_cmsgs); diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 281df9139d2b..ffff554a5230 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -126,6 +126,8 @@ #define SCM_TS_OPT_ID 78 +#define SO_RCVPRIORITY 79 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From 915d2f0718a42ee0b334be34cc53664a865a5928 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:32 -0800 Subject: KVM: Move KVM_REG_SIZE() definition to common uAPI header Define KVM_REG_SIZE() in the common kvm.h header, and delete the arm64 and RISC-V versions. As evidenced by the surrounding definitions, all aspects of the register size encoding are generic, i.e. RISC-V should have moved arm64's definition to common code instead of copy+pasting. Acked-by: Anup Patel Reviewed-by: Andrew Jones Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20241128005547.4077116-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/arm64/include/uapi/asm/kvm.h | 3 --- arch/riscv/include/uapi/asm/kvm.h | 3 --- include/uapi/linux/kvm.h | 4 ++++ 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 66736ff04011..568bf858f319 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -43,9 +43,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 3482c9a73d1b..9f60d6185077 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -211,9 +211,6 @@ struct kvm_riscv_sbi_sta { #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - /* If you need to interpret the index values, here is the key: */ #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 #define KVM_REG_RISCV_TYPE_SHIFT 24 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 502ea63b5d2e..343de0a51797 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1070,6 +1070,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL -- cgit v1.2.3 From af5366bea2cb9dfb5da2880e1dff544f87505300 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:42 -0800 Subject: KVM: x86: Drop the now unused KVM_X86_DISABLE_VALID_EXITS Drop the KVM_X86_DISABLE_VALID_EXITS definition, as it is misleading, and unused in KVM *because* it is misleading. The set of exits that can be disabled is dynamic, i.e. userspace (and KVM) must check KVM's actual capabilities. Suggested-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-16-seanjc@google.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 343de0a51797..45e6d8fca9b9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { -- cgit v1.2.3 From e10c5cbd1c912c06d887c8a1980ac57e21d87b6f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 16 Dec 2024 20:18:41 +0100 Subject: PCI: Update code comment on PCI_EXP_LNKCAP_SLS for PCIe r3.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Niklas notes that the code comment on the PCI_EXP_LNKCAP_SLS macro is outdated as it reflects the meaning of the field prior to PCIe r3.0. Update it to avoid confusion. Closes: https://lore.kernel.org/r/70829798889c6d779ca0f6cd3260a765780d1369.camel@kernel.org Link: https://lore.kernel.org/r/6152bd17cbe0876365d5f4624fc317529f4bbc85.1734376438.git.lukas@wunner.de Reported-by: Niklas Schnelle Signed-off-by: Lukas Wunner Signed-off-by: Krzysztof WilczyƄski Reviewed-by: Ilpo JĂ€rvinen Reviewed-by: Niklas Schnelle --- include/uapi/linux/pci_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 1601c7ed5fab..02d0ba2999e0 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -533,7 +533,7 @@ #define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */ #define PCI_EXP_LNKCAP 0x0c /* Link Capabilities */ -#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Max Link Speed (prior to PCIe r3.0: Supported Link Speeds) */ #define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ #define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ #define PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */ -- cgit v1.2.3 From a5874fde3c0884a33ed4145101052318c5e17c74 Mon Sep 17 00:00:00 2001 From: MickaĂ«l SalaĂŒn Date: Thu, 12 Dec 2024 18:42:16 +0100 Subject: exec: Add a new AT_EXECVE_CHECK flag to execveat(2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new AT_EXECVE_CHECK flag to execveat(2) to check if a file would be allowed for execution. The main use case is for script interpreters and dynamic linkers to check execution permission according to the kernel's security policy. Another use case is to add context to access logs e.g., which script (instead of interpreter) accessed a file. As any executable code, scripts could also use this check [1]. This is different from faccessat(2) + X_OK which only checks a subset of access rights (i.e. inode permission and mount options for regular files), but not the full context (e.g. all LSM access checks). The main use case for access(2) is for SUID processes to (partially) check access on behalf of their caller. The main use case for execveat(2) + AT_EXECVE_CHECK is to check if a script execution would be allowed, according to all the different restrictions in place. Because the use of AT_EXECVE_CHECK follows the exact kernel semantic as for a real execution, user space gets the same error codes. An interesting point of using execveat(2) instead of openat2(2) is that it decouples the check from the enforcement. Indeed, the security check can be logged (e.g. with audit) without blocking an execution environment not yet ready to enforce a strict security policy. LSMs can control or log execution requests with security_bprm_creds_for_exec(). However, to enforce a consistent and complete access control (e.g. on binary's dependencies) LSMs should restrict file executability, or measure executed files, with security_file_open() by checking file->f_flags & __FMODE_EXEC. Because AT_EXECVE_CHECK is dedicated to user space interpreters, it doesn't make sense for the kernel to parse the checked files, look for interpreters known to the kernel (e.g. ELF, shebang), and return ENOEXEC if the format is unknown. Because of that, security_bprm_check() is never called when AT_EXECVE_CHECK is used. It should be noted that script interpreters cannot directly use execveat(2) (without this new AT_EXECVE_CHECK flag) because this could lead to unexpected behaviors e.g., `python script.sh` could lead to Bash being executed to interpret the script. Unlike the kernel, script interpreters may just interpret the shebang as a simple comment, which should not change for backward compatibility reasons. Because scripts or libraries files might not currently have the executable permission set, or because we might want specific users to be allowed to run arbitrary scripts, the following patch provides a dynamic configuration mechanism with the SECBIT_EXEC_RESTRICT_FILE and SECBIT_EXEC_DENY_INTERACTIVE securebits. This is a redesign of the CLIP OS 4's O_MAYEXEC: https://github.com/clipos-archive/src_platform_clip-patches/blob/f5cb330d6b684752e403b4e41b39f7004d88e561/1901_open_mayexec.patch This patch has been used for more than a decade with customized script interpreters. Some examples can be found here: https://github.com/clipos-archive/clipos4_portage-overlay/search?q=O_MAYEXEC Cc: Al Viro Cc: Christian Brauner Cc: Kees Cook Acked-by: Paul Moore Reviewed-by: Serge Hallyn Reviewed-by: Jeff Xu Tested-by: Jeff Xu Link: https://docs.python.org/3/library/io.html#io.open_code [1] Signed-off-by: MickaĂ«l SalaĂŒn Link: https://lore.kernel.org/r/20241212174223.389435-2-mic@digikod.net Signed-off-by: Kees Cook --- Documentation/userspace-api/check_exec.rst | 37 ++++++++++++++++++++++++++++++ Documentation/userspace-api/index.rst | 1 + fs/exec.c | 20 ++++++++++++++-- include/linux/binfmts.h | 7 +++++- include/uapi/linux/fcntl.h | 4 ++++ security/security.c | 10 ++++++++ 6 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 Documentation/userspace-api/check_exec.rst (limited to 'include/uapi') diff --git a/Documentation/userspace-api/check_exec.rst b/Documentation/userspace-api/check_exec.rst new file mode 100644 index 000000000000..393dd7ca19c4 --- /dev/null +++ b/Documentation/userspace-api/check_exec.rst @@ -0,0 +1,37 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright © 2024 Microsoft Corporation + +=================== +Executability check +=================== + +AT_EXECVE_CHECK +=============== + +Passing the ``AT_EXECVE_CHECK`` flag to :manpage:`execveat(2)` only performs a +check on a regular file and returns 0 if execution of this file would be +allowed, ignoring the file format and then the related interpreter dependencies +(e.g. ELF libraries, script's shebang). + +Programs should always perform this check to apply kernel-level checks against +files that are not directly executed by the kernel but passed to a user space +interpreter instead. All files that contain executable code, from the point of +view of the interpreter, should be checked. However the result of this check +should only be enforced according to ``SECBIT_EXEC_RESTRICT_FILE`` or +``SECBIT_EXEC_DENY_INTERACTIVE.``. + +The main purpose of this flag is to improve the security and consistency of an +execution environment to ensure that direct file execution (e.g. +``./script.sh``) and indirect file execution (e.g. ``sh script.sh``) lead to +the same result. For instance, this can be used to check if a file is +trustworthy according to the caller's environment. + +In a secure environment, libraries and any executable dependencies should also +be checked. For instance, dynamic linking should make sure that all libraries +are allowed for execution to avoid trivial bypass (e.g. using ``LD_PRELOAD``). +For such secure execution environment to make sense, only trusted code should +be executable, which also requires integrity guarantees. + +To avoid race conditions leading to time-of-check to time-of-use issues, +``AT_EXECVE_CHECK`` should be used with ``AT_EMPTY_PATH`` to check against a +file descriptor instead of a path. diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index 274cc7546efc..6272bcf11296 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -35,6 +35,7 @@ Security-related interfaces mfd_noexec spec_ctrl tee + check_exec Devices and I/O =============== diff --git a/fs/exec.c b/fs/exec.c index 98cb7ba9983c..e3f461096e84 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -892,7 +892,8 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) .lookup_flags = LOOKUP_FOLLOW, }; - if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) + if ((flags & + ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_EXECVE_CHECK)) != 0) return ERR_PTR(-EINVAL); if (flags & AT_SYMLINK_NOFOLLOW) open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW; @@ -1541,6 +1542,21 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl } bprm->interp = bprm->filename; + /* + * At this point, security_file_open() has already been called (with + * __FMODE_EXEC) and access control checks for AT_EXECVE_CHECK will + * stop just after the security_bprm_creds_for_exec() call in + * bprm_execve(). Indeed, the kernel should not try to parse the + * content of the file with exec_binprm() nor change the calling + * thread, which means that the following security functions will not + * be called: + * - security_bprm_check() + * - security_bprm_creds_from_file() + * - security_bprm_committing_creds() + * - security_bprm_committed_creds() + */ + bprm->is_check = !!(flags & AT_EXECVE_CHECK); + retval = bprm_mm_init(bprm); if (!retval) return bprm; @@ -1836,7 +1852,7 @@ static int bprm_execve(struct linux_binprm *bprm) /* Set the unchanging part of bprm->cred */ retval = security_bprm_creds_for_exec(bprm); - if (retval) + if (retval || bprm->is_check) goto out; retval = exec_binprm(bprm); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index e6c00e860951..8ff0eb3644a1 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -42,7 +42,12 @@ struct linux_binprm { * Set when errors can no longer be returned to the * original userspace. */ - point_of_no_return:1; + point_of_no_return:1, + /* + * Set by user space to check executability according to the + * caller's environment. + */ + is_check:1; struct file *executable; /* Executable to pass to the interpreter */ struct file *interpreter; struct file *file; diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6e6907e63bfc..a15ac2fa4b20 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -155,4 +155,8 @@ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ #define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ +/* Flags for execveat2(2). */ +#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution + would be allowed. */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/security/security.c b/security/security.c index 09664e09fec9..dae7e903947f 100644 --- a/security/security.c +++ b/security/security.c @@ -1248,6 +1248,12 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) * to 1 if AT_SECURE should be set to request libc enable secure mode. @bprm * contains the linux_binprm structure. * + * If execveat(2) is called with the AT_EXECVE_CHECK flag, bprm->is_check is + * set. The result must be the same as without this flag even if the execution + * will never really happen and @bprm will always be dropped. + * + * This hook must not change current->cred, only @bprm->cred. + * * Return: Returns 0 if the hook is successful and permission is granted. */ int security_bprm_creds_for_exec(struct linux_binprm *bprm) @@ -3098,6 +3104,10 @@ int security_file_receive(struct file *file) * Save open-time permission checking state for later use upon file_permission, * and recheck access if anything has changed since inode_permission. * + * We can check if a file is opened for execution (e.g. execve(2) call), either + * directly or indirectly (e.g. ELF's ld.so) by checking file->f_flags & + * __FMODE_EXEC . + * * Return: Returns 0 if permission is granted. */ int security_file_open(struct file *file) -- cgit v1.2.3 From a0623b2a1d595341971c189b90a6b06f42cd209d Mon Sep 17 00:00:00 2001 From: MickaĂ«l SalaĂŒn Date: Thu, 12 Dec 2024 18:42:17 +0100 Subject: security: Add EXEC_RESTRICT_FILE and EXEC_DENY_INTERACTIVE securebits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE, and their *_LOCKED counterparts are designed to be set by processes setting up an execution environment, such as a user session, a container, or a security sandbox. Unlike other securebits, these ones can be set by unprivileged processes. Like seccomp filters or Landlock domains, the securebits are inherited across processes. When SECBIT_EXEC_RESTRICT_FILE is set, programs interpreting code should control executable resources according to execveat(2) + AT_EXECVE_CHECK (see previous commit). When SECBIT_EXEC_DENY_INTERACTIVE is set, a process should deny execution of user interactive commands (which excludes executable regular files). Being able to configure each of these securebits enables system administrators or owner of image containers to gradually validate the related changes and to identify potential issues (e.g. with interpreter or audit logs). It should be noted that unlike other security bits, the SECBIT_EXEC_RESTRICT_FILE and SECBIT_EXEC_DENY_INTERACTIVE bits are dedicated to user space willing to restrict itself. Because of that, they only make sense in the context of a trusted environment (e.g. sandbox, container, user session, full system) where the process changing its behavior (according to these bits) and all its parent processes are trusted. Otherwise, any parent process could just execute its own malicious code (interpreting a script or not), or even enforce a seccomp filter to mask these bits. Such a secure environment can be achieved with an appropriate access control (e.g. mount's noexec option, file access rights, LSM policy) and an enlighten ld.so checking that libraries are allowed for execution e.g., to protect against illegitimate use of LD_PRELOAD. Ptrace restrictions according to these securebits would not make sense because of the processes' trust assumption. Scripts may need some changes to deal with untrusted data (e.g. stdin, environment variables), but that is outside the scope of the kernel. See chromeOS's documentation about script execution control and the related threat model: https://www.chromium.org/chromium-os/developer-library/guides/security/noexec-shell-scripts/ Cc: Al Viro Cc: Andy Lutomirski Cc: Christian Brauner Cc: Kees Cook Cc: Paul Moore Reviewed-by: Serge Hallyn Reviewed-by: Jeff Xu Tested-by: Jeff Xu Signed-off-by: MickaĂ«l SalaĂŒn Link: https://lore.kernel.org/r/20241212174223.389435-3-mic@digikod.net Signed-off-by: Kees Cook --- Documentation/userspace-api/check_exec.rst | 107 +++++++++++++++++++++++++++++ include/uapi/linux/securebits.h | 24 ++++++- security/commoncap.c | 29 ++++++-- 3 files changed, 153 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/userspace-api/check_exec.rst b/Documentation/userspace-api/check_exec.rst index 393dd7ca19c4..05dfe3b56f71 100644 --- a/Documentation/userspace-api/check_exec.rst +++ b/Documentation/userspace-api/check_exec.rst @@ -5,6 +5,31 @@ Executability check =================== +The ``AT_EXECVE_CHECK`` :manpage:`execveat(2)` flag, and the +``SECBIT_EXEC_RESTRICT_FILE`` and ``SECBIT_EXEC_DENY_INTERACTIVE`` securebits +are intended for script interpreters and dynamic linkers to enforce a +consistent execution security policy handled by the kernel. See the +`samples/check-exec/inc.c`_ example. + +Whether an interpreter should check these securebits or not depends on the +security risk of running malicious scripts with respect to the execution +environment, and whether the kernel can check if a script is trustworthy or +not. For instance, Python scripts running on a server can use arbitrary +syscalls and access arbitrary files. Such interpreters should then be +enlighten to use these securebits and let users define their security policy. +However, a JavaScript engine running in a web browser should already be +sandboxed and then should not be able to harm the user's environment. + +Script interpreters or dynamic linkers built for tailored execution environments +(e.g. hardened Linux distributions or hermetic container images) could use +``AT_EXECVE_CHECK`` without checking the related securebits if backward +compatibility is handled by something else (e.g. atomic update ensuring that +all legitimate libraries are allowed to be executed). It is then recommended +for script interpreters and dynamic linkers to check the securebits at run time +by default, but also to provide the ability for custom builds to behave like if +``SECBIT_EXEC_RESTRICT_FILE`` or ``SECBIT_EXEC_DENY_INTERACTIVE`` were always +set to 1 (i.e. always enforce restrictions). + AT_EXECVE_CHECK =============== @@ -35,3 +60,85 @@ be executable, which also requires integrity guarantees. To avoid race conditions leading to time-of-check to time-of-use issues, ``AT_EXECVE_CHECK`` should be used with ``AT_EMPTY_PATH`` to check against a file descriptor instead of a path. + +SECBIT_EXEC_RESTRICT_FILE and SECBIT_EXEC_DENY_INTERACTIVE +========================================================== + +When ``SECBIT_EXEC_RESTRICT_FILE`` is set, a process should only interpret or +execute a file if a call to :manpage:`execveat(2)` with the related file +descriptor and the ``AT_EXECVE_CHECK`` flag succeed. + +This secure bit may be set by user session managers, service managers, +container runtimes, sandboxer tools... Except for test environments, the +related ``SECBIT_EXEC_RESTRICT_FILE_LOCKED`` bit should also be set. + +Programs should only enforce consistent restrictions according to the +securebits but without relying on any other user-controlled configuration. +Indeed, the use case for these securebits is to only trust executable code +vetted by the system configuration (through the kernel), so we should be +careful to not let untrusted users control this configuration. + +However, script interpreters may still use user configuration such as +environment variables as long as it is not a way to disable the securebits +checks. For instance, the ``PATH`` and ``LD_PRELOAD`` variables can be set by +a script's caller. Changing these variables may lead to unintended code +executions, but only from vetted executable programs, which is OK. For this to +make sense, the system should provide a consistent security policy to avoid +arbitrary code execution e.g., by enforcing a write xor execute policy. + +When ``SECBIT_EXEC_DENY_INTERACTIVE`` is set, a process should never interpret +interactive user commands (e.g. scripts). However, if such commands are passed +through a file descriptor (e.g. stdin), its content should be interpreted if a +call to :manpage:`execveat(2)` with the related file descriptor and the +``AT_EXECVE_CHECK`` flag succeed. + +For instance, script interpreters called with a script snippet as argument +should always deny such execution if ``SECBIT_EXEC_DENY_INTERACTIVE`` is set. + +This secure bit may be set by user session managers, service managers, +container runtimes, sandboxer tools... Except for test environments, the +related ``SECBIT_EXEC_DENY_INTERACTIVE_LOCKED`` bit should also be set. + +Here is the expected behavior for a script interpreter according to combination +of any exec securebits: + +1. ``SECBIT_EXEC_RESTRICT_FILE=0`` and ``SECBIT_EXEC_DENY_INTERACTIVE=0`` + + Always interpret scripts, and allow arbitrary user commands (default). + + No threat, everyone and everything is trusted, but we can get ahead of + potential issues thanks to the call to :manpage:`execveat(2)` with + ``AT_EXECVE_CHECK`` which should always be performed but ignored by the + script interpreter. Indeed, this check is still important to enable systems + administrators to verify requests (e.g. with audit) and prepare for + migration to a secure mode. + +2. ``SECBIT_EXEC_RESTRICT_FILE=1`` and ``SECBIT_EXEC_DENY_INTERACTIVE=0`` + + Deny script interpretation if they are not executable, but allow + arbitrary user commands. + + The threat is (potential) malicious scripts run by trusted (and not fooled) + users. That can protect against unintended script executions (e.g. ``sh + /tmp/*.sh``). This makes sense for (semi-restricted) user sessions. + +3. ``SECBIT_EXEC_RESTRICT_FILE=0`` and ``SECBIT_EXEC_DENY_INTERACTIVE=1`` + + Always interpret scripts, but deny arbitrary user commands. + + This use case may be useful for secure services (i.e. without interactive + user session) where scripts' integrity is verified (e.g. with IMA/EVM or + dm-verity/IPE) but where access rights might not be ready yet. Indeed, + arbitrary interactive commands would be much more difficult to check. + +4. ``SECBIT_EXEC_RESTRICT_FILE=1`` and ``SECBIT_EXEC_DENY_INTERACTIVE=1`` + + Deny script interpretation if they are not executable, and also deny + any arbitrary user commands. + + The threat is malicious scripts run by untrusted users (but trusted code). + This makes sense for system services that may only execute trusted scripts. + +.. Links +.. _samples/check-exec/inc.c: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/samples/check-exec/inc.c diff --git a/include/uapi/linux/securebits.h b/include/uapi/linux/securebits.h index d6d98877ff1a..3fba30dbd68b 100644 --- a/include/uapi/linux/securebits.h +++ b/include/uapi/linux/securebits.h @@ -52,10 +52,32 @@ #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \ (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED)) +/* See Documentation/userspace-api/check_exec.rst */ +#define SECURE_EXEC_RESTRICT_FILE 8 +#define SECURE_EXEC_RESTRICT_FILE_LOCKED 9 /* make bit-8 immutable */ + +#define SECBIT_EXEC_RESTRICT_FILE (issecure_mask(SECURE_EXEC_RESTRICT_FILE)) +#define SECBIT_EXEC_RESTRICT_FILE_LOCKED \ + (issecure_mask(SECURE_EXEC_RESTRICT_FILE_LOCKED)) + +/* See Documentation/userspace-api/check_exec.rst */ +#define SECURE_EXEC_DENY_INTERACTIVE 10 +#define SECURE_EXEC_DENY_INTERACTIVE_LOCKED 11 /* make bit-10 immutable */ + +#define SECBIT_EXEC_DENY_INTERACTIVE \ + (issecure_mask(SECURE_EXEC_DENY_INTERACTIVE)) +#define SECBIT_EXEC_DENY_INTERACTIVE_LOCKED \ + (issecure_mask(SECURE_EXEC_DENY_INTERACTIVE_LOCKED)) + #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ issecure_mask(SECURE_KEEP_CAPS) | \ - issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) + issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \ + issecure_mask(SECURE_EXEC_RESTRICT_FILE) | \ + issecure_mask(SECURE_EXEC_DENY_INTERACTIVE)) #define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1) +#define SECURE_ALL_UNPRIVILEGED (issecure_mask(SECURE_EXEC_RESTRICT_FILE) | \ + issecure_mask(SECURE_EXEC_DENY_INTERACTIVE)) + #endif /* _UAPI_LINUX_SECUREBITS_H */ diff --git a/security/commoncap.c b/security/commoncap.c index cefad323a0b1..52ea01acb453 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -1302,21 +1302,38 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, & (old->securebits ^ arg2)) /*[1]*/ || ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ - || (cap_capable(current_cred(), - current_cred()->user_ns, - CAP_SETPCAP, - CAP_OPT_NONE) != 0) /*[4]*/ /* * [1] no changing of bits that are locked * [2] no unlocking of locks * [3] no setting of unsupported bits - * [4] doing anything requires privilege (go read about - * the "sendmail capabilities bug") */ ) /* cannot change a locked bit */ return -EPERM; + /* + * Doing anything requires privilege (go read about the + * "sendmail capabilities bug"), except for unprivileged bits. + * Indeed, the SECURE_ALL_UNPRIVILEGED bits are not + * restrictions enforced by the kernel but by user space on + * itself. + */ + if (cap_capable(current_cred(), current_cred()->user_ns, + CAP_SETPCAP, CAP_OPT_NONE) != 0) { + const unsigned long unpriv_and_locks = + SECURE_ALL_UNPRIVILEGED | + SECURE_ALL_UNPRIVILEGED << 1; + const unsigned long changed = old->securebits ^ arg2; + + /* For legacy reason, denies non-change. */ + if (!changed) + return -EPERM; + + /* Denies privileged changes. */ + if (changed & ~unpriv_and_locks) + return -EPERM; + } + new = prepare_creds(); if (!new) return -ENOMEM; -- cgit v1.2.3 From 95b3cdafd7cb74414070893445a9b731793f7b55 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Thu, 12 Dec 2024 18:42:23 +0100 Subject: ima: instantiate the bprm_creds_for_exec() hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like direct file execution (e.g. ./script.sh), indirect file execution (e.g. sh script.sh) needs to be measured and appraised. Instantiate the new security_bprm_creds_for_exec() hook to measure and verify the indirect file's integrity. Unlike direct file execution, indirect file execution is optionally enforced by the interpreter. Differentiate kernel and userspace enforced integrity audit messages. Co-developed-by: Roberto Sassu Signed-off-by: Roberto Sassu Signed-off-by: Mimi Zohar Tested-by: Stefan Berger Reviewed-by: MickaĂ«l SalaĂŒn Signed-off-by: MickaĂ«l SalaĂŒn Link: https://lore.kernel.org/r/20241212174223.389435-9-mic@digikod.net Signed-off-by: Kees Cook --- include/uapi/linux/audit.h | 1 + security/integrity/ima/ima_appraise.c | 27 +++++++++++++++++++++++++-- security/integrity/ima/ima_main.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 75e21a135483..d9a069b4a775 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -161,6 +161,7 @@ #define AUDIT_INTEGRITY_RULE 1805 /* policy rule */ #define AUDIT_INTEGRITY_EVM_XATTR 1806 /* New EVM-covered xattr */ #define AUDIT_INTEGRITY_POLICY_RULE 1807 /* IMA policy rules */ +#define AUDIT_INTEGRITY_USERSPACE 1808 /* Userspace enforced data integrity */ #define AUDIT_KERNEL 2000 /* Asynchronous audit record. NOT A REQUEST. */ diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 884a3533f7af..f435eff4667f 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -469,6 +470,17 @@ int ima_check_blacklist(struct ima_iint_cache *iint, return rc; } +static bool is_bprm_creds_for_exec(enum ima_hooks func, struct file *file) +{ + struct linux_binprm *bprm; + + if (func == BPRM_CHECK) { + bprm = container_of(&file, struct linux_binprm, file); + return bprm->is_check; + } + return false; +} + /* * ima_appraise_measurement - appraise file measurement * @@ -483,6 +495,7 @@ int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint, int xattr_len, const struct modsig *modsig) { static const char op[] = "appraise_data"; + int audit_msgno = AUDIT_INTEGRITY_DATA; const char *cause = "unknown"; struct dentry *dentry = file_dentry(file); struct inode *inode = d_backing_inode(dentry); @@ -494,6 +507,16 @@ int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint, if (!(inode->i_opflags & IOP_XATTR) && !try_modsig) return INTEGRITY_UNKNOWN; + /* + * Unlike any of the other LSM hooks where the kernel enforces file + * integrity, enforcing file integrity for the bprm_creds_for_exec() + * LSM hook with the AT_EXECVE_CHECK flag is left up to the discretion + * of the script interpreter(userspace). Differentiate kernel and + * userspace enforced integrity audit messages. + */ + if (is_bprm_creds_for_exec(func, file)) + audit_msgno = AUDIT_INTEGRITY_USERSPACE; + /* If reading the xattr failed and there's no modsig, error out. */ if (rc <= 0 && !try_modsig) { if (rc && rc != -ENODATA) @@ -569,7 +592,7 @@ out: (iint->flags & IMA_FAIL_UNVERIFIABLE_SIGS))) { status = INTEGRITY_FAIL; cause = "unverifiable-signature"; - integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename, + integrity_audit_msg(audit_msgno, inode, filename, op, cause, rc, 0); } else if (status != INTEGRITY_PASS) { /* Fix mode, but don't replace file signatures. */ @@ -589,7 +612,7 @@ out: status = INTEGRITY_PASS; } - integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename, + integrity_audit_msg(audit_msgno, inode, filename, op, cause, rc, 0); } else { ima_cache_flags(iint, func); diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 9b87556b03a7..9f9897a7c217 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -554,6 +554,34 @@ static int ima_bprm_check(struct linux_binprm *bprm) MAY_EXEC, CREDS_CHECK); } +/** + * ima_bprm_creds_for_exec - collect/store/appraise measurement. + * @bprm: contains the linux_binprm structure + * + * Based on the IMA policy and the execveat(2) AT_EXECVE_CHECK flag, measure + * and appraise the integrity of a file to be executed by script interpreters. + * Unlike any of the other LSM hooks where the kernel enforces file integrity, + * enforcing file integrity is left up to the discretion of the script + * interpreter (userspace). + * + * On success return 0. On integrity appraisal error, assuming the file + * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. + */ +static int ima_bprm_creds_for_exec(struct linux_binprm *bprm) +{ + /* + * As security_bprm_check() is called multiple times, both + * the script and the shebang interpreter are measured, appraised, + * and audited. Limit usage of this LSM hook to just measuring, + * appraising, and auditing the indirect script execution + * (e.g. ./sh example.sh). + */ + if (!bprm->is_check) + return 0; + + return ima_bprm_check(bprm); +} + /** * ima_file_check - based on policy, collect/store measurement. * @file: pointer to the file to be measured @@ -1174,6 +1202,7 @@ static int __init init_ima(void) static struct security_hook_list ima_hooks[] __ro_after_init = { LSM_HOOK_INIT(bprm_check_security, ima_bprm_check), + LSM_HOOK_INIT(bprm_creds_for_exec, ima_bprm_creds_for_exec), LSM_HOOK_INIT(file_post_open, ima_file_check), LSM_HOOK_INIT(inode_post_create_tmpfile, ima_post_create_tmpfile), LSM_HOOK_INIT(file_release, ima_file_free), -- cgit v1.2.3 From d1d761b3012e99d55d288d435384be606302cb2c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:11:53 +0200 Subject: net: fib_rules: Add flow label selector attributes Add new FIB rule attributes which will allow user space to match on the IPv6 flow label with a mask. Temporarily set the type of the attributes to 'NLA_REJECT' while support is being added in the IPv6 code. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- include/uapi/linux/fib_rules.h | 2 ++ net/core/fib_rules.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index a6924dd3aff1..00e9890ca3c0 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -68,6 +68,8 @@ enum { FRA_SPORT_RANGE, /* sport */ FRA_DPORT_RANGE, /* dport */ FRA_DSCP, /* dscp */ + FRA_FLOWLABEL, /* flowlabel */ + FRA_FLOWLABEL_MASK, /* flowlabel mask */ __FRA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 34185d138c95..153b14aade42 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -770,6 +770,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2), + [FRA_FLOWLABEL] = { .type = NLA_REJECT }, + [FRA_FLOWLABEL_MASK] = { .type = NLA_REJECT }, }; int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, -- cgit v1.2.3 From ba4138032ae3b5b8e2b68d2f2647cdc0817b05a6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:11:58 +0200 Subject: ipv6: Add flow label to route get requests The default IPv6 multipath hash policy takes the flow label into account when calculating a multipath hash and previous patches added a flow label selector to IPv6 FIB rules. Allow user space to specify a flow label in route get requests by adding a new netlink attribute and using its value to populate the "flowlabel" field in the IPv6 flow info structure prior to a route lookup. Deny the attribute in RTM_{NEW,DEL}ROUTE requests by checking for it in rtm_to_fib6_config() and returning an error if present. A subsequent patch will use this capability to test the new flow label selector in IPv6 FIB rules. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- include/uapi/linux/rtnetlink.h | 1 + net/ipv6/route.c | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index eccc0e7dcb7d..5ee94c511a28 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -393,6 +393,7 @@ enum rtattr_type_t { RTA_SPORT, RTA_DPORT, RTA_NH_ID, + RTA_FLOWLABEL, __RTA_MAX }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 67ff16c04718..78362822b907 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5005,6 +5005,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_SPORT] = { .type = NLA_U16 }, [RTA_DPORT] = { .type = NLA_U16 }, [RTA_NH_ID] = { .type = NLA_U32 }, + [RTA_FLOWLABEL] = { .type = NLA_BE32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -5030,6 +5031,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } + if (tb[RTA_FLOWLABEL]) { + NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL], + "Flow label cannot be specified for this operation"); + goto errout; + } + *cfg = (struct fib6_config){ .fc_table = rtm->rtm_table, .fc_dst_len = rtm->rtm_dst_len, @@ -6013,6 +6020,13 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } + if (tb[RTA_FLOWLABEL] && + (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) { + NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL], + "Invalid flow label"); + return -EINVAL; + } + for (i = 0; i <= RTA_MAX; i++) { if (!tb[i]) continue; @@ -6027,6 +6041,7 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, case RTA_SPORT: case RTA_DPORT: case RTA_IP_PROTO: + case RTA_FLOWLABEL: break; default: NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); @@ -6049,6 +6064,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb; struct rtmsg *rtm; struct flowi6 fl6 = {}; + __be32 flowlabel; bool fibmatch; err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); @@ -6057,7 +6073,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, err = -EINVAL; rtm = nlmsg_data(nlh); - fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); if (tb[RTA_SRC]) { @@ -6103,6 +6118,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; } + flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0); + fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel); + if (iif) { struct net_device *dev; int flags = 0; -- cgit v1.2.3 From 724c6ce38bbaeb4b3f109b0e066d6c0ecd15446c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 19 Dec 2024 14:57:34 +0100 Subject: stddef: make __struct_group() UAPI C++-friendly For the most part of the C++ history, it couldn't have type declarations inside anonymous unions for different reasons. At the same time, __struct_group() relies on the latters, so when the @TAG argument is not empty, C++ code doesn't want to build (even under `extern "C"`): ../linux/include/uapi/linux/pkt_cls.h:25:24: error: 'struct tc_u32_sel::::tc_u32_sel_hdr,' invalid; an anonymous union may only have public non-static data members [-fpermissive] The safest way to fix this without trying to switch standards (which is impossible in UAPI anyway) etc., is to disable tag declaration for that language. This won't break anything since for now it's not buildable at all. Use a separate definition for __struct_group() when __cplusplus is defined to mitigate the error, including the version from tools/. Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Reported-by: Christopher Ferris Closes: https://lore.kernel.org/linux-hardening/Z1HZpe3WE5As8UAz@google.com Suggested-by: Kees Cook # __struct_group_tag() Signed-off-by: Alexander Lobakin Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20241219135734.2130002-1-aleksander.lobakin@intel.com Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 13 ++++++++++--- tools/include/uapi/linux/stddef.h | 15 +++++++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 58154117d9b0..a6fce46aeb37 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -8,6 +8,13 @@ #define __always_inline inline #endif +/* Not all C++ standards support type declarations inside an anonymous union */ +#ifndef __cplusplus +#define __struct_group_tag(TAG) TAG +#else +#define __struct_group_tag(TAG) +#endif + /** * __struct_group() - Create a mirrored named and anonyomous struct * @@ -20,13 +27,13 @@ * and size: one anonymous and one named. The former's members can be used * normally without sub-struct naming, and the latter can be used to * reason about the start, end, and size of the group of struct members. - * The named struct can also be explicitly tagged for layer reuse, as well - * as both having struct attributes appended. + * The named struct can also be explicitly tagged for layer reuse (C only), + * as well as both having struct attributes appended. */ #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ union { \ struct { MEMBERS } ATTRS; \ - struct TAG { MEMBERS } ATTRS NAME; \ + struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ } ATTRS #ifdef __cplusplus diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h index bb6ea517efb5..c53cde425406 100644 --- a/tools/include/uapi/linux/stddef.h +++ b/tools/include/uapi/linux/stddef.h @@ -8,6 +8,13 @@ #define __always_inline __inline__ #endif +/* Not all C++ standards support type declarations inside an anonymous union */ +#ifndef __cplusplus +#define __struct_group_tag(TAG) TAG +#else +#define __struct_group_tag(TAG) +#endif + /** * __struct_group() - Create a mirrored named and anonyomous struct * @@ -20,14 +27,14 @@ * and size: one anonymous and one named. The former's members can be used * normally without sub-struct naming, and the latter can be used to * reason about the start, end, and size of the group of struct members. - * The named struct can also be explicitly tagged for layer reuse, as well - * as both having struct attributes appended. + * The named struct can also be explicitly tagged for layer reuse (C only), + * as well as both having struct attributes appended. */ #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ union { \ struct { MEMBERS } ATTRS; \ - struct TAG { MEMBERS } ATTRS NAME; \ - } + struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ + } ATTRS /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union -- cgit v1.2.3 From 135ec43eb29c68ed26e2d10f221d43f7d9139a8f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 20 Nov 2024 17:13:52 -0800 Subject: fiemap: use kernel-doc includes in fiemap docbook Add some kernel-doc notation to structs in fiemap header files then pull that into Documentation/filesystems/fiemap.rst instead of duplicating the header file structs in fiemap.rst. This helps to future-proof fiemap.rst against struct changes. Add missing flags documentation from header files into fiemap.rst for FIEMAP_FLAG_CACHE and FIEMAP_EXTENT_SHARED. Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20241121011352.201907-1-rdunlap@infradead.org Cc: Christoph Hellwig Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: Matthew Wilcox Signed-off-by: Christian Brauner --- Documentation/filesystems/fiemap.rst | 49 +++++++++++------------------------- include/linux/fiemap.h | 16 ++++++++---- include/uapi/linux/fiemap.h | 47 +++++++++++++++++++++++----------- 3 files changed, 59 insertions(+), 53 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/filesystems/fiemap.rst b/Documentation/filesystems/fiemap.rst index 93fc96f760aa..23b3ed229e49 100644 --- a/Documentation/filesystems/fiemap.rst +++ b/Documentation/filesystems/fiemap.rst @@ -12,21 +12,10 @@ returns a list of extents. Request Basics -------------- -A fiemap request is encoded within struct fiemap:: - - struct fiemap { - __u64 fm_start; /* logical offset (inclusive) at - * which to start mapping (in) */ - __u64 fm_length; /* logical length of mapping which - * userspace cares about (in) */ - __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ - __u32 fm_mapped_extents; /* number of extents that were - * mapped (out) */ - __u32 fm_extent_count; /* size of fm_extents array (in) */ - __u32 fm_reserved; - struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ - }; +A fiemap request is encoded within struct fiemap: +.. kernel-doc:: include/uapi/linux/fiemap.h + :identifiers: fiemap fm_start, and fm_length specify the logical range within the file which the process would like mappings for. Extents returned mirror @@ -60,6 +49,8 @@ FIEMAP_FLAG_XATTR If this flag is set, the extents returned will describe the inodes extended attribute lookup tree, instead of its data tree. +FIEMAP_FLAG_CACHE + This flag requests caching of the extents. Extent Mapping -------------- @@ -77,18 +68,10 @@ complete the requested range and will not have the FIEMAP_EXTENT_LAST flag set (see the next section on extent flags). Each extent is described by a single fiemap_extent structure as -returned in fm_extents:: - - struct fiemap_extent { - __u64 fe_logical; /* logical offset in bytes for the start of - * the extent */ - __u64 fe_physical; /* physical offset in bytes for the start - * of the extent */ - __u64 fe_length; /* length in bytes for the extent */ - __u64 fe_reserved64[2]; - __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ - __u32 fe_reserved[3]; - }; +returned in fm_extents: + +.. kernel-doc:: include/uapi/linux/fiemap.h + :identifiers: fiemap_extent All offsets and lengths are in bytes and mirror those on disk. It is valid for an extents logical offset to start before the request or its logical @@ -175,6 +158,8 @@ FIEMAP_EXTENT_MERGED userspace would be highly inefficient, the kernel will try to merge most adjacent blocks into 'extents'. +FIEMAP_EXTENT_SHARED + This flag is set to request that space be shared with other files. VFS -> File System Implementation --------------------------------- @@ -191,14 +176,10 @@ each discovered extent:: u64 len); ->fiemap is passed struct fiemap_extent_info which describes the -fiemap request:: - - struct fiemap_extent_info { - unsigned int fi_flags; /* Flags as passed from user */ - unsigned int fi_extents_mapped; /* Number of mapped extents */ - unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */ - }; +fiemap request: + +.. kernel-doc:: include/linux/fiemap.h + :identifiers: fiemap_extent_info It is intended that the file system should not need to access any of this structure directly. Filesystem handlers should be tolerant to signals and return diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h index c50882f19235..966092ffa89a 100644 --- a/include/linux/fiemap.h +++ b/include/linux/fiemap.h @@ -5,12 +5,18 @@ #include #include +/** + * struct fiemap_extent_info - fiemap request to a filesystem + * @fi_flags: Flags as passed from user + * @fi_extents_mapped: Number of mapped extents + * @fi_extents_max: Size of fiemap_extent array + * @fi_extents_start: Start of fiemap_extent array + */ struct fiemap_extent_info { - unsigned int fi_flags; /* Flags as passed from user */ - unsigned int fi_extents_mapped; /* Number of mapped extents */ - unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent __user *fi_extents_start; /* Start of - fiemap_extent array */ + unsigned int fi_flags; + unsigned int fi_extents_mapped; + unsigned int fi_extents_max; + struct fiemap_extent __user *fi_extents_start; }; int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index 24ca0c00cae3..9d9e8ae32b41 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h @@ -14,37 +14,56 @@ #include +/** + * struct fiemap_extent - description of one fiemap extent + * @fe_logical: byte offset of the extent in the file + * @fe_physical: byte offset of extent on disk + * @fe_length: length in bytes for this extent + * @fe_flags: FIEMAP_EXTENT_* flags for this extent + */ struct fiemap_extent { - __u64 fe_logical; /* logical offset in bytes for the start of - * the extent from the beginning of the file */ - __u64 fe_physical; /* physical offset in bytes for the start - * of the extent from the beginning of the disk */ - __u64 fe_length; /* length in bytes for this extent */ + __u64 fe_logical; + __u64 fe_physical; + __u64 fe_length; + /* private: */ __u64 fe_reserved64[2]; - __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ + /* public: */ + __u32 fe_flags; + /* private: */ __u32 fe_reserved[3]; }; +/** + * struct fiemap - file extent mappings + * @fm_start: byte offset (inclusive) at which to start mapping (in) + * @fm_length: logical length of mapping which userspace wants (in) + * @fm_flags: FIEMAP_FLAG_* flags for request (in/out) + * @fm_mapped_extents: number of extents that were mapped (out) + * @fm_extent_count: size of fm_extents array (in) + * @fm_extents: array of mapped extents (out) + */ struct fiemap { - __u64 fm_start; /* logical offset (inclusive) at - * which to start mapping (in) */ - __u64 fm_length; /* logical length of mapping which - * userspace wants (in) */ - __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ - __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ - __u32 fm_extent_count; /* size of fm_extents array (in) */ + __u64 fm_start; + __u64 fm_length; + __u32 fm_flags; + __u32 fm_mapped_extents; + __u32 fm_extent_count; + /* private: */ __u32 fm_reserved; - struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */ + /* public: */ + struct fiemap_extent fm_extents[]; }; #define FIEMAP_MAX_OFFSET (~0ULL) +/* flags used in fm_flags: */ #define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ #define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ #define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */ #define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) +/* flags used in fe_flags: */ #define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */ #define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */ #define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending. -- cgit v1.2.3 From 10783d0ba0d7731ec81d88c54f83cf0ff89d1c2a Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Thu, 28 Nov 2024 16:52:34 +0530 Subject: fs, iov_iter: define meta io descriptor Add flags to describe checks for integrity meta buffer. Also, introduce a new 'uio_meta' structure that upper layer can use to pass the meta/integrity information. Signed-off-by: Kanchan Joshi Signed-off-by: Anuj Gupta Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241128112240.8867-5-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- include/linux/uio.h | 9 +++++++++ include/uapi/linux/fs.h | 9 +++++++++ 2 files changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/linux/uio.h b/include/linux/uio.h index 853f9de5aa05..8ada84e85447 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -82,6 +82,15 @@ struct iov_iter { }; }; +typedef __u16 uio_meta_flags_t; + +struct uio_meta { + uio_meta_flags_t flags; + u16 app_tag; + u64 seed; + struct iov_iter iter; +}; + static inline const struct iovec *iter_iov(const struct iov_iter *iter) { if (iter->iter_type == ITER_UBUF) diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 753971770733..9070ef19f0a3 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1< Date: Thu, 28 Nov 2024 16:52:36 +0530 Subject: io_uring: introduce attributes for read/write and PI support Add the ability to pass additional attributes along with read/write. Application can prepare attibute specific information and pass its address using the SQE field: __u64 attr_ptr; Along with setting a mask indicating attributes being passed: __u64 attr_type_mask; Overall 64 attributes are allowed and currently one attribute 'IORING_RW_ATTR_FLAG_PI' is supported. With PI attribute, userspace can pass following information: - flags: integrity check flags IO_INTEGRITY_CHK_{GUARD/APPTAG/REFTAG} - len: length of PI/metadata buffer - addr: address of metadata buffer - seed: seed value for reftag remapping - app_tag: application defined 16b value Process this information to prepare uio_meta_descriptor and pass it down using kiocb->private. PI attribute is supported only for direct IO. Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20241128112240.8867-7-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 16 +++++++++ io_uring/io_uring.c | 2 ++ io_uring/rw.c | 83 +++++++++++++++++++++++++++++++++++++++++-- io_uring/rw.h | 14 +++++++- 4 files changed, 112 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index aac9a4f8fa9a..38f0d6b10eaf 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -98,6 +98,10 @@ struct io_uring_sqe { __u64 addr3; __u64 __pad2[1]; }; + struct { + __u64 attr_ptr; /* pointer to attribute information */ + __u64 attr_type_mask; /* bit mask of attributes */ + }; __u64 optval; /* * If the ring is initialized with IORING_SETUP_SQE128, then @@ -107,6 +111,18 @@ struct io_uring_sqe { }; }; +/* sqe->attr_type_mask flags */ +#define IORING_RW_ATTR_FLAG_PI (1U << 0) +/* PI attribute information */ +struct io_uring_attr_pi { + __u16 flags; + __u16 app_tag; + __u32 len; + __u64 addr; + __u64 seed; + __u64 rsvd; +}; + /* * If sqe->file_index is set to this for opcodes that instantiate a new * direct descriptor (like openat/openat2/accept), then io_uring will allocate diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 8dd3fa9be993..2e6891aadecc 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3876,6 +3876,8 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); BUILD_BUG_SQE_ELEM(48, __u64, addr3); BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd); + BUILD_BUG_SQE_ELEM(48, __u64, attr_ptr); + BUILD_BUG_SQE_ELEM(56, __u64, attr_type_mask); BUILD_BUG_SQE_ELEM(56, __u64, __pad2); BUILD_BUG_ON(sizeof(struct io_uring_files_update) != diff --git a/io_uring/rw.c b/io_uring/rw.c index 0bcb83e4ce3c..04e4467ab0ee 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -257,11 +257,53 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import) return 0; } +static inline void io_meta_save_state(struct io_async_rw *io) +{ + io->meta_state.seed = io->meta.seed; + iov_iter_save_state(&io->meta.iter, &io->meta_state.iter_meta); +} + +static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb) +{ + if (kiocb->ki_flags & IOCB_HAS_METADATA) { + io->meta.seed = io->meta_state.seed; + iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta); + } +} + +static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir, + u64 attr_ptr, u64 attr_type_mask) +{ + struct io_uring_attr_pi pi_attr; + struct io_async_rw *io; + int ret; + + if (copy_from_user(&pi_attr, u64_to_user_ptr(attr_ptr), + sizeof(pi_attr))) + return -EFAULT; + + if (pi_attr.rsvd) + return -EINVAL; + + io = req->async_data; + io->meta.flags = pi_attr.flags; + io->meta.app_tag = pi_attr.app_tag; + io->meta.seed = pi_attr.seed; + ret = import_ubuf(ddir, u64_to_user_ptr(pi_attr.addr), + pi_attr.len, &io->meta.iter); + if (unlikely(ret < 0)) + return ret; + rw->kiocb.ki_flags |= IOCB_HAS_METADATA; + io_meta_save_state(io); + return ret; +} + static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, int ddir, bool do_import) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); unsigned ioprio; + u64 attr_type_mask; int ret; rw->kiocb.ki_pos = READ_ONCE(sqe->off); @@ -279,11 +321,28 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, rw->kiocb.ki_ioprio = get_current_ioprio(); } rw->kiocb.dio_complete = NULL; + rw->kiocb.ki_flags = 0; rw->addr = READ_ONCE(sqe->addr); rw->len = READ_ONCE(sqe->len); rw->flags = READ_ONCE(sqe->rw_flags); - return io_prep_rw_setup(req, ddir, do_import); + ret = io_prep_rw_setup(req, ddir, do_import); + + if (unlikely(ret)) + return ret; + + attr_type_mask = READ_ONCE(sqe->attr_type_mask); + if (attr_type_mask) { + u64 attr_ptr; + + /* only PI attribute is supported currently */ + if (attr_type_mask != IORING_RW_ATTR_FLAG_PI) + return -EINVAL; + + attr_ptr = READ_ONCE(sqe->attr_ptr); + ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask); + } + return ret; } int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -409,7 +468,9 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) static void io_resubmit_prep(struct io_kiocb *req) { struct io_async_rw *io = req->async_data; + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + io_meta_restore(io, &rw->kiocb); iov_iter_restore(&io->iter, &io->iter_state); } @@ -744,6 +805,10 @@ static bool io_rw_should_retry(struct io_kiocb *req) if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) return false; + /* never retry for meta io */ + if (kiocb->ki_flags & IOCB_HAS_METADATA) + return false; + /* * just use poll if we can, and don't attempt if the fs doesn't * support callback based unlocks @@ -794,7 +859,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) if (!(req->flags & REQ_F_FIXED_FILE)) req->flags |= io_file_get_flags(file); - kiocb->ki_flags = file->f_iocb_flags; + kiocb->ki_flags |= file->f_iocb_flags; ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type); if (unlikely(ret)) return ret; @@ -828,6 +893,18 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) kiocb->ki_complete = io_complete_rw; } + if (kiocb->ki_flags & IOCB_HAS_METADATA) { + struct io_async_rw *io = req->async_data; + + /* + * We have a union of meta fields with wpq used for buffered-io + * in io_async_rw, so fail it here. + */ + if (!(req->file->f_flags & O_DIRECT)) + return -EOPNOTSUPP; + kiocb->private = &io->meta; + } + return 0; } @@ -902,6 +979,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) * manually if we need to. */ iov_iter_restore(&io->iter, &io->iter_state); + io_meta_restore(io, kiocb); do { /* @@ -1125,6 +1203,7 @@ done: } else { ret_eagain: iov_iter_restore(&io->iter, &io->iter_state); + io_meta_restore(io, kiocb); if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); return -EAGAIN; diff --git a/io_uring/rw.h b/io_uring/rw.h index 3f432dc75441..2d7656bd268d 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -2,6 +2,11 @@ #include +struct io_meta_state { + u32 seed; + struct iov_iter_state iter_meta; +}; + struct io_async_rw { size_t bytes_done; struct iov_iter iter; @@ -9,7 +14,14 @@ struct io_async_rw { struct iovec fast_iov; struct iovec *free_iovec; int free_iov_nr; - struct wait_page_queue wpq; + /* wpq is for buffered io, while meta fields are used with direct io */ + union { + struct wait_page_queue wpq; + struct { + struct uio_meta meta; + struct io_meta_state meta_state; + }; + }; }; int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); -- cgit v1.2.3 From 6b830c6a023ff6e8fe05dbe47a9e5cd276df09ee Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 21 Dec 2024 12:09:14 +0100 Subject: netlink: specs: mptcp: add missing 'server-side' attr This attribute is added with the 'created' and 'established' events, but the documentation didn't mention it. The documentation in the UAPI header has been auto-generated by: ./tools/net/ynl/ynl-regen.sh Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241221-net-mptcp-netlink-specs-pm-doc-fixes-v2-1-e54f2db3f844@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/mptcp_pm.yaml | 6 ++++-- include/uapi/linux/mptcp_pm.h | 11 ++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index dc190bf838fe..fc0603f51665 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -23,7 +23,8 @@ definitions: - name: created doc: - token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport + token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport, + server-side A new MPTCP connection has been created. It is the good time to allocate memory and send ADD_ADDR if needed. Depending on the traffic-patterns it can take a long time until the @@ -31,7 +32,8 @@ definitions: - name: established doc: - token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport + token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport, + server-side A MPTCP connection is established (can start new subflows). - name: closed diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index 50589e5dd6a3..b34fd95b6f84 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -13,12 +13,13 @@ * enum mptcp_event_type * @MPTCP_EVENT_UNSPEC: unused event * @MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport A new MPTCP connection has been created. It is the good time - * to allocate memory and send ADD_ADDR if needed. Depending on the - * traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED - * is sent. + * sport, dport, server-side A new MPTCP connection has been created. It is + * the good time to allocate memory and send ADD_ADDR if needed. Depending on + * the traffic-patterns it can take a long time until the + * MPTCP_EVENT_ESTABLISHED is sent. * @MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport A MPTCP connection is established (can start new subflows). + * sport, dport, server-side A MPTCP connection is established (can start new + * subflows). * @MPTCP_EVENT_CLOSED: token A MPTCP connection has stopped. * @MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport] A * new address has been announced by the peer. -- cgit v1.2.3 From bea87657b5ee8e6f18af2833ee4b88212ef52d28 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 21 Dec 2024 12:09:15 +0100 Subject: netlink: specs: mptcp: clearly mention attributes The rendered version of the MPTCP events [1] looked strange, because the whole content of the 'doc' was displayed in the same block. It was then not clear that the first words, not even ended by a period, were the attributes that are defined when such events are emitted. These attributes have now been moved to the end, prefixed by 'Attributes:' and ended with a period. Note that '>-' has been added after 'doc:' to allow ':' in the text below. The documentation in the UAPI header has been auto-generated by: ./tools/net/ynl/ynl-regen.sh Link: https://docs.kernel.org/networking/netlink_spec/mptcp_pm.html#event-type [1] Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241221-net-mptcp-netlink-specs-pm-doc-fixes-v2-2-e54f2db3f844@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/mptcp_pm.yaml | 50 ++++++++++++++--------------- include/uapi/linux/mptcp_pm.h | 53 ++++++++++++++++--------------- 2 files changed, 52 insertions(+), 51 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index fc0603f51665..59087a230565 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -22,67 +22,67 @@ definitions: doc: unused event - name: created - doc: - token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport, - server-side + doc: >- A new MPTCP connection has been created. It is the good time to allocate memory and send ADD_ADDR if needed. Depending on the traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent. + Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport, + dport, server-side. - name: established - doc: - token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport, - server-side + doc: >- A MPTCP connection is established (can start new subflows). + Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport, + dport, server-side. - name: closed - doc: - token + doc: >- A MPTCP connection has stopped. + Attribute: token. - name: announced value: 6 - doc: - token, rem_id, family, daddr4 | daddr6 [, dport] + doc: >- A new address has been announced by the peer. + Attributes: token, rem_id, family, daddr4 | daddr6 [, dport]. - name: removed - doc: - token, rem_id + doc: >- An address has been lost by the peer. + Attributes: token, rem_id. - name: sub-established value: 10 - doc: - token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, - dport, backup, if_idx [, error] + doc: >- A new subflow has been established. 'error' should not be set. + Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | + daddr6, sport, dport, backup, if_idx [, error]. - name: sub-closed - doc: - token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, - dport, backup, if_idx [, error] + doc: >- A subflow has been closed. An error (copy of sk_err) could be set if an error has been detected for this subflow. + Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | + daddr6, sport, dport, backup, if_idx [, error]. - name: sub-priority value: 13 - doc: - token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, - dport, backup, if_idx [, error] + doc: >- The priority of a subflow has changed. 'error' should not be set. + Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | + daddr6, sport, dport, backup, if_idx [, error]. - name: listener-created value: 15 - doc: - family, sport, saddr4 | saddr6 + doc: >- A new PM listener is created. + Attributes: family, sport, saddr4 | saddr6. - name: listener-closed - doc: - family, sport, saddr4 | saddr6 + doc: >- A PM listener is closed. + Attributes: family, sport, saddr4 | saddr6. attribute-sets: - diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index b34fd95b6f84..84fa8a21dfd0 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -12,32 +12,33 @@ /** * enum mptcp_event_type * @MPTCP_EVENT_UNSPEC: unused event - * @MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, server-side A new MPTCP connection has been created. It is - * the good time to allocate memory and send ADD_ADDR if needed. Depending on - * the traffic-patterns it can take a long time until the - * MPTCP_EVENT_ESTABLISHED is sent. - * @MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, server-side A MPTCP connection is established (can start new - * subflows). - * @MPTCP_EVENT_CLOSED: token A MPTCP connection has stopped. - * @MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport] A - * new address has been announced by the peer. - * @MPTCP_EVENT_REMOVED: token, rem_id An address has been lost by the peer. - * @MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id, saddr4 | - * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error] A new - * subflow has been established. 'error' should not be set. - * @MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6, - * daddr4 | daddr6, sport, dport, backup, if_idx [, error] A subflow has been - * closed. An error (copy of sk_err) could be set if an error has been - * detected for this subflow. - * @MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6, - * daddr4 | daddr6, sport, dport, backup, if_idx [, error] The priority of a - * subflow has changed. 'error' should not be set. - * @MPTCP_EVENT_LISTENER_CREATED: family, sport, saddr4 | saddr6 A new PM - * listener is created. - * @MPTCP_EVENT_LISTENER_CLOSED: family, sport, saddr4 | saddr6 A PM listener - * is closed. + * @MPTCP_EVENT_CREATED: A new MPTCP connection has been created. It is the + * good time to allocate memory and send ADD_ADDR if needed. Depending on the + * traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED + * is sent. Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport, server-side. + * @MPTCP_EVENT_ESTABLISHED: A MPTCP connection is established (can start new + * subflows). Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport, server-side. + * @MPTCP_EVENT_CLOSED: A MPTCP connection has stopped. Attribute: token. + * @MPTCP_EVENT_ANNOUNCED: A new address has been announced by the peer. + * Attributes: token, rem_id, family, daddr4 | daddr6 [, dport]. + * @MPTCP_EVENT_REMOVED: An address has been lost by the peer. Attributes: + * token, rem_id. + * @MPTCP_EVENT_SUB_ESTABLISHED: A new subflow has been established. 'error' + * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | + * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * @MPTCP_EVENT_SUB_CLOSED: A subflow has been closed. An error (copy of + * sk_err) could be set if an error has been detected for this subflow. + * Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | + * daddr6, sport, dport, backup, if_idx [, error]. + * @MPTCP_EVENT_SUB_PRIORITY: The priority of a subflow has changed. 'error' + * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | + * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * @MPTCP_EVENT_LISTENER_CREATED: A new PM listener is created. Attributes: + * family, sport, saddr4 | saddr6. + * @MPTCP_EVENT_LISTENER_CLOSED: A PM listener is closed. Attributes: family, + * sport, saddr4 | saddr6. */ enum mptcp_event_type { MPTCP_EVENT_UNSPEC, -- cgit v1.2.3 From 7a637e5e27a68fd52327a80136d5d0184c43888f Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Tue, 3 Dec 2024 10:59:20 +0100 Subject: drm/msm: Expose uche trap base via uapi This adds MSM_PARAM_UCHE_TRAP_BASE that will be used by Mesa implementation for VK_KHR_shader_clock and GL_ARB_shader_clock. Signed-off-by: Danylo Piliaiev Patchwork: https://patchwork.freedesktop.org/patch/627036/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 10 ++++++---- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 12 +++++++----- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 +++ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 ++ include/uapi/drm/msm_drm.h | 1 + 6 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 50c490b492f0..f1b18a6663f7 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -251,8 +251,8 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); /* Disable L2 bypass to avoid UCHE out of bounds errors */ - gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); - gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); + gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0)); @@ -693,6 +693,8 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) if (ret) goto fail; + adreno_gpu->uche_trap_base = 0xffff0000ffff0000ull; + if (!gpu->aspace) { /* TODO we think it is possible to configure the GPU to * restrict access to VRAM carveout. But the required diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index ee89db72e36e..caf2c0a7a29f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -750,10 +750,10 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02); /* Disable L2 bypass in the UCHE */ - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000); - gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000); - gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF); + gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base)); + gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base)); /* Set the GMEM VA range (0 to gpu->gmem) */ gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); @@ -1805,5 +1805,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) adreno_gpu->ubwc_config.macrotile_mode = 0; adreno_gpu->ubwc_config.ubwc_swizzle = 0x7; + adreno_gpu->uche_trap_base = 0x0001ffffffff0000ull; + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 019610341df1..0ae29a7c8a4d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1123,12 +1123,12 @@ static int hw_init(struct msm_gpu *gpu) /* Disable L2 bypass in the UCHE */ if (adreno_is_a7xx(adreno_gpu)) { - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } else { - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu); - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } if (!(adreno_is_a650_family(adreno_gpu) || @@ -2533,6 +2533,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) } } + adreno_gpu->uche_trap_base = 0x1fffffffff000ull; + if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 75f5367e73ca..c7454b6cfb7f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -385,6 +385,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, case MSM_PARAM_MACROTILE_MODE: *value = adreno_gpu->ubwc_config.macrotile_mode; return 0; + case MSM_PARAM_UCHE_TRAP_BASE: + *value = adreno_gpu->uche_trap_base; + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index f5d6087376f5..dcf454629ce0 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -253,6 +253,8 @@ struct adreno_gpu { bool gmu_is_wrapper; bool has_ray_tracing; + + u64 uche_trap_base; }; #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index b916aab80dde..2342cb90857e 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -90,6 +90,7 @@ struct drm_msm_timespec { #define MSM_PARAM_RAYTRACING 0x11 /* RO */ #define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */ #define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */ +#define MSM_PARAM_UCHE_TRAP_BASE 0x14 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From af6505e5745b9f3a670de405b08b73573343c15c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 Dec 2024 08:47:45 -0700 Subject: fs: add RWF_DONTCACHE iocb and FOP_DONTCACHE file_operations flag If a file system supports uncached buffered IO, it may set FOP_DONTCACHE and enable support for RWF_DONTCACHE. If RWF_DONTCACHE is attempted without the file system supporting it, it'll get errored with -EOPNOTSUPP. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20241220154831.1086649-8-axboe@kernel.dk Signed-off-by: Christian Brauner --- include/linux/fs.h | 14 +++++++++++++- include/uapi/linux/fs.h | 6 +++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e29433c5ecc..6a838b5479a6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -322,6 +322,7 @@ struct readahead_control; #define IOCB_NOWAIT (__force int) RWF_NOWAIT #define IOCB_APPEND (__force int) RWF_APPEND #define IOCB_ATOMIC (__force int) RWF_ATOMIC +#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) @@ -356,7 +357,8 @@ struct readahead_control; { IOCB_SYNC, "SYNC" }, \ { IOCB_NOWAIT, "NOWAIT" }, \ { IOCB_APPEND, "APPEND" }, \ - { IOCB_ATOMIC, "ATOMIC"}, \ + { IOCB_ATOMIC, "ATOMIC" }, \ + { IOCB_DONTCACHE, "DONTCACHE" }, \ { IOCB_EVENTFD, "EVENTFD"}, \ { IOCB_DIRECT, "DIRECT" }, \ { IOCB_WRITE, "WRITE" }, \ @@ -2127,6 +2129,8 @@ struct file_operations { #define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5)) /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) +/* File system supports uncached read/write buffered IO */ +#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, @@ -3614,6 +3618,14 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE)) return -EOPNOTSUPP; } + if (flags & RWF_DONTCACHE) { + /* file system must support it */ + if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE)) + return -EOPNOTSUPP; + /* DAX mappings not supported */ + if (IS_DAX(ki->ki_filp->f_mapping->host)) + return -EOPNOTSUPP; + } kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 753971770733..56a4f93a08f4 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -332,9 +332,13 @@ typedef int __bitwise __kernel_rwf_t; /* Atomic Write */ #define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) +/* buffered IO that drops the cache after reading or writing data */ +#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ + RWF_DONTCACHE) #define PROCFS_IOCTL_MAGIC 'f' -- cgit v1.2.3 From b9ed315d3c4c0c294a4348edb6874d489bac47fa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Dec 2024 00:46:56 +0100 Subject: netkit: Allow for configuring needed_{head,tail}room Allow the user to configure needed_{head,tail}room for both netkit devices. The idea is similar to 163e529200af ("veth: implement ndo_set_rx_headroom") with the difference that the two parameters can be specified upon device creation. By default the current behavior stays as is which is needed_{head,tail}room is 0. In case of Cilium, for example, the netkit devices are not enslaved into a bridge or openvswitch device (rather, BPF-based redirection is used out of tcx), and as such these parameters are not propagated into the Pod's netns via peer device. Given Cilium can run in vxlan/geneve tunneling mode (needed_headroom) and/or be used in combination with WireGuard (needed_{head,tail}room), allow the Cilium CNI plugin to specify these two upon netkit device creation. Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/bpf/20241220234658.490686-1-daniel@iogearbox.net --- drivers/net/netkit.c | 66 +++++++++++++++++++++++++------------- include/uapi/linux/if_link.h | 2 ++ tools/include/uapi/linux/if_link.h | 2 ++ 3 files changed, 47 insertions(+), 23 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index c1d881dc6409..1e1b00756be7 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -338,6 +338,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT; enum netkit_mode mode = NETKIT_L3; unsigned char ifname_assign_type; + u16 headroom = 0, tailroom = 0; struct ifinfomsg *ifmp = NULL; struct net_device *peer; char ifname[IFNAMSIZ]; @@ -371,6 +372,10 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, if (err < 0) return err; } + if (data[IFLA_NETKIT_HEADROOM]) + headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]); + if (data[IFLA_NETKIT_TAILROOM]) + tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]); } if (ifmp && tbp[IFLA_IFNAME]) { @@ -390,6 +395,14 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, return PTR_ERR(peer); netif_inherit_tso_max(peer, dev); + if (headroom) { + peer->needed_headroom = headroom; + dev->needed_headroom = headroom; + } + if (tailroom) { + peer->needed_tailroom = tailroom; + dev->needed_tailroom = tailroom; + } if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) eth_hw_addr_random(peer); @@ -401,6 +414,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, nk->policy = policy_peer; nk->scrub = scrub_peer; nk->mode = mode; + nk->headroom = headroom; bpf_mprog_bundle_init(&nk->bundle); err = register_netdevice(peer); @@ -426,6 +440,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, nk->policy = policy_prim; nk->scrub = scrub_prim; nk->mode = mode; + nk->headroom = headroom; bpf_mprog_bundle_init(&nk->bundle); err = register_netdevice(dev); @@ -850,7 +865,18 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], struct net_device *peer = rtnl_dereference(nk->peer); enum netkit_action policy; struct nlattr *attr; - int err; + int err, i; + static const struct { + u32 attr; + char *name; + } fixed_params[] = { + { IFLA_NETKIT_MODE, "operating mode" }, + { IFLA_NETKIT_SCRUB, "scrubbing" }, + { IFLA_NETKIT_PEER_SCRUB, "peer scrubbing" }, + { IFLA_NETKIT_PEER_INFO, "peer info" }, + { IFLA_NETKIT_HEADROOM, "headroom" }, + { IFLA_NETKIT_TAILROOM, "tailroom" }, + }; if (!nk->primary) { NL_SET_ERR_MSG(extack, @@ -858,28 +884,14 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], return -EACCES; } - if (data[IFLA_NETKIT_MODE]) { - NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_MODE], - "netkit link operating mode cannot be changed after device creation"); - return -EACCES; - } - - if (data[IFLA_NETKIT_SCRUB]) { - NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB], - "netkit scrubbing cannot be changed after device creation"); - return -EACCES; - } - - if (data[IFLA_NETKIT_PEER_SCRUB]) { - NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB], - "netkit scrubbing cannot be changed after device creation"); - return -EACCES; - } - - if (data[IFLA_NETKIT_PEER_INFO]) { - NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO], - "netkit peer info cannot be changed after device creation"); - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(fixed_params); i++) { + attr = data[fixed_params[i].attr]; + if (attr) { + NL_SET_ERR_MSG_ATTR_FMT(extack, attr, + "netkit link %s cannot be changed after device creation", + fixed_params[i].name); + return -EACCES; + } } if (data[IFLA_NETKIT_POLICY]) { @@ -914,6 +926,8 @@ static size_t netkit_get_size(const struct net_device *dev) nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */ nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */ nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ + nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */ + nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */ 0; } @@ -930,6 +944,10 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub)) return -EMSGSIZE; + if (nla_put_u16(skb, IFLA_NETKIT_HEADROOM, dev->needed_headroom)) + return -EMSGSIZE; + if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom)) + return -EMSGSIZE; if (peer) { nk = netkit_priv(peer); @@ -947,6 +965,8 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { [IFLA_NETKIT_MODE] = NLA_POLICY_MAX(NLA_U32, NETKIT_L3), [IFLA_NETKIT_POLICY] = { .type = NLA_U32 }, [IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 }, + [IFLA_NETKIT_HEADROOM] = { .type = NLA_U16 }, + [IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 }, [IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), [IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2575e0cd9b48..2fa2c265dcba 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1315,6 +1315,8 @@ enum { IFLA_NETKIT_MODE, IFLA_NETKIT_SCRUB, IFLA_NETKIT_PEER_SCRUB, + IFLA_NETKIT_HEADROOM, + IFLA_NETKIT_TAILROOM, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8516c1ccd57a..7e46ca4cd31b 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -1315,6 +1315,8 @@ enum { IFLA_NETKIT_MODE, IFLA_NETKIT_SCRUB, IFLA_NETKIT_PEER_SCRUB, + IFLA_NETKIT_HEADROOM, + IFLA_NETKIT_TAILROOM, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) -- cgit v1.2.3 From b989531e0bd7704dc837e37f8d16bbc921e6cf6c Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Tue, 19 Nov 2024 15:42:38 +0800 Subject: vduse: relicense under GPL-2.0 OR BSD-3-Clause Dual-license the vduse kernel header file to dual GPL-2.0 OR BSD-3-Clause license to make it possible to ship it with DPDK (under BSD-3-Clause) for older distros. Signed-off-by: Yongji Xie Message-Id: <20241119074238.38299-1-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vduse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 11bd48c72c6c..68a627d04afa 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ #ifndef _UAPI_VDUSE_H_ #define _UAPI_VDUSE_H_ -- cgit v1.2.3 From 86b525bed2758878e788c9fb6b8fb281fd61bdb0 Mon Sep 17 00:00:00 2001 From: Rodolfo Giometti Date: Fri, 8 Nov 2024 08:31:12 +0100 Subject: drivers pps: add PPS generators support Sometimes one needs to be able not only to catch PPS signals but to produce them also. For example, running a distributed simulation, which requires computers' clock to be synchronized very tightly. This patch adds PPS generators class in order to have a well-defined interface for these devices. Signed-off-by: Rodolfo Giometti Link: https://lore.kernel.org/r/20241108073115.759039-2-giometti@enneenne.com Signed-off-by: Greg Kroah-Hartman --- Documentation/userspace-api/ioctl/ioctl-number.rst | 1 + MAINTAINERS | 1 + drivers/pps/Makefile | 3 +- drivers/pps/generators/Kconfig | 13 +- drivers/pps/generators/Makefile | 3 + drivers/pps/generators/pps_gen.c | 344 +++++++++++++++++++++ drivers/pps/generators/sysfs.c | 75 +++++ include/linux/pps_gen_kernel.h | 78 +++++ include/uapi/linux/pps_gen.h | 37 +++ 9 files changed, 553 insertions(+), 2 deletions(-) create mode 100644 drivers/pps/generators/pps_gen.c create mode 100644 drivers/pps/generators/sysfs.c create mode 100644 include/linux/pps_gen_kernel.h create mode 100644 include/uapi/linux/pps_gen.h (limited to 'include/uapi') diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 243f1f1b554a..f7cb871cb714 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -283,6 +283,7 @@ Code Seq# Include File Comments 'p' 80-9F linux/ppdev.h user-space parport 'p' A1-A5 linux/pps.h LinuxPPS +'p' B1-B3 linux/pps-gen.h LinuxPPS 'q' 00-1F linux/serio.h 'q' 80-FF linux/telephony.h Internet PhoneJACK, Internet LineJACK diff --git a/MAINTAINERS b/MAINTAINERS index 124b8574aa76..530eca844bd4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18719,6 +18719,7 @@ F: Documentation/devicetree/bindings/pps/pps-gpio.yaml F: Documentation/driver-api/pps.rst F: drivers/pps/ F: include/linux/pps*.h +F: include/uapi/linux/pps-gen.h F: include/uapi/linux/pps.h PRESSURE STALL INFORMATION (PSI) diff --git a/drivers/pps/Makefile b/drivers/pps/Makefile index ceaf65cc1f1d..0aea394d4e4d 100644 --- a/drivers/pps/Makefile +++ b/drivers/pps/Makefile @@ -6,6 +6,7 @@ pps_core-y := pps.o kapi.o sysfs.o pps_core-$(CONFIG_NTP_PPS) += kc.o obj-$(CONFIG_PPS) := pps_core.o -obj-y += clients/ generators/ +obj-y += clients/ +obj-$(CONFIG_PPS_GENERATOR) += generators/ ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG diff --git a/drivers/pps/generators/Kconfig b/drivers/pps/generators/Kconfig index d615e640fcad..b34e483eff21 100644 --- a/drivers/pps/generators/Kconfig +++ b/drivers/pps/generators/Kconfig @@ -3,7 +3,16 @@ # PPS generators configuration # -comment "PPS generators support" +menuconfig PPS_GENERATOR + tristate "PPS generators support" + help + PPS generators are special hardware which are able to produce PPS + (Pulse Per Second) signals. + + To compile this driver as a module, choose M here: the module + will be called pps_gen_core. + +if PPS_GENERATOR config PPS_GENERATOR_PARPORT tristate "Parallel port PPS signal generator" @@ -12,3 +21,5 @@ config PPS_GENERATOR_PARPORT If you say yes here you get support for a PPS signal generator which utilizes STROBE pin of a parallel port to send PPS signals. It uses parport abstraction layer and hrtimers to precisely control the signal. + +endif # PPS_GENERATOR diff --git a/drivers/pps/generators/Makefile b/drivers/pps/generators/Makefile index 2589fd0f2481..034a78edfa26 100644 --- a/drivers/pps/generators/Makefile +++ b/drivers/pps/generators/Makefile @@ -3,6 +3,9 @@ # Makefile for PPS generators. # +pps_gen_core-y := pps_gen.o sysfs.o +obj-$(CONFIG_PPS_GENERATOR) := pps_gen_core.o + obj-$(CONFIG_PPS_GENERATOR_PARPORT) += pps_gen_parport.o ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG diff --git a/drivers/pps/generators/pps_gen.c b/drivers/pps/generators/pps_gen.c new file mode 100644 index 000000000000..ca592f1736f4 --- /dev/null +++ b/drivers/pps/generators/pps_gen.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PPS generators core file + * + * Copyright (C) 2024 Rodolfo Giometti + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Local variables + */ + +static dev_t pps_gen_devt; +static struct class *pps_gen_class; + +static DEFINE_IDA(pps_gen_ida); + +/* + * Char device methods + */ + +static __poll_t pps_gen_cdev_poll(struct file *file, poll_table *wait) +{ + struct pps_gen_device *pps_gen = file->private_data; + + poll_wait(file, &pps_gen->queue, wait); + return EPOLLIN | EPOLLRDNORM; +} + +static int pps_gen_cdev_fasync(int fd, struct file *file, int on) +{ + struct pps_gen_device *pps_gen = file->private_data; + + return fasync_helper(fd, file, on, &pps_gen->async_queue); +} + +static long pps_gen_cdev_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct pps_gen_device *pps_gen = file->private_data; + void __user *uarg = (void __user *) arg; + unsigned int __user *uiuarg = (unsigned int __user *) arg; + unsigned int status; + int ret; + + switch (cmd) { + case PPS_GEN_SETENABLE: + dev_dbg(pps_gen->dev, "PPS_GEN_SETENABLE\n"); + + ret = get_user(status, uiuarg); + if (ret) + return -EFAULT; + + ret = pps_gen->info.enable(pps_gen, status); + if (ret) + return ret; + pps_gen->enabled = status; + + break; + + case PPS_GEN_USESYSTEMCLOCK: + dev_dbg(pps_gen->dev, "PPS_GEN_USESYSTEMCLOCK\n"); + + ret = put_user(pps_gen->info.use_system_clock, uiuarg); + if (ret) + return -EFAULT; + + break; + + case PPS_GEN_FETCHEVENT: { + struct pps_gen_event info; + unsigned int ev = pps_gen->last_ev; + + dev_dbg(pps_gen->dev, "PPS_GEN_FETCHEVENT\n"); + + ret = wait_event_interruptible(pps_gen->queue, + ev != pps_gen->last_ev); + if (ret == -ERESTARTSYS) { + dev_dbg(pps_gen->dev, "pending signal caught\n"); + return -EINTR; + } + + spin_lock_irq(&pps_gen->lock); + info.sequence = pps_gen->sequence; + info.event = pps_gen->event; + spin_unlock_irq(&pps_gen->lock); + + ret = copy_to_user(uarg, &info, sizeof(struct pps_gen_event)); + if (ret) + return -EFAULT; + + break; + } + default: + return -ENOTTY; + } + + return 0; +} + +static int pps_gen_cdev_open(struct inode *inode, struct file *file) +{ + struct pps_gen_device *pps_gen = container_of(inode->i_cdev, + struct pps_gen_device, cdev); + + get_device(pps_gen->dev); + file->private_data = pps_gen; + return 0; +} + +static int pps_gen_cdev_release(struct inode *inode, struct file *file) +{ + struct pps_gen_device *pps_gen = file->private_data; + + put_device(pps_gen->dev); + return 0; +} + +/* + * Char device stuff + */ + +static const struct file_operations pps_gen_cdev_fops = { + .owner = THIS_MODULE, + .poll = pps_gen_cdev_poll, + .fasync = pps_gen_cdev_fasync, + .unlocked_ioctl = pps_gen_cdev_ioctl, + .open = pps_gen_cdev_open, + .release = pps_gen_cdev_release, +}; + +static void pps_gen_device_destruct(struct device *dev) +{ + struct pps_gen_device *pps_gen = dev_get_drvdata(dev); + + cdev_del(&pps_gen->cdev); + + pr_debug("deallocating pps-gen%d\n", pps_gen->id); + ida_free(&pps_gen_ida, pps_gen->id); + + kfree(dev); + kfree(pps_gen); +} + +static int pps_gen_register_cdev(struct pps_gen_device *pps_gen) +{ + int err; + dev_t devt; + + err = ida_alloc_max(&pps_gen_ida, PPS_GEN_MAX_SOURCES - 1, GFP_KERNEL); + if (err < 0) { + if (err == -ENOSPC) { + pr_err("too many PPS sources in the system\n"); + err = -EBUSY; + } + return err; + } + pps_gen->id = err; + + devt = MKDEV(MAJOR(pps_gen_devt), pps_gen->id); + + cdev_init(&pps_gen->cdev, &pps_gen_cdev_fops); + pps_gen->cdev.owner = pps_gen->info.owner; + + err = cdev_add(&pps_gen->cdev, devt, 1); + if (err) { + pr_err("failed to add char device %d:%d\n", + MAJOR(pps_gen_devt), pps_gen->id); + goto free_ida; + } + pps_gen->dev = device_create(pps_gen_class, pps_gen->info.parent, devt, + pps_gen, "pps-gen%d", pps_gen->id); + if (IS_ERR(pps_gen->dev)) { + err = PTR_ERR(pps_gen->dev); + goto del_cdev; + } + pps_gen->dev->release = pps_gen_device_destruct; + dev_set_drvdata(pps_gen->dev, pps_gen); + + pr_debug("generator got cdev (%d:%d)\n", + MAJOR(pps_gen_devt), pps_gen->id); + + return 0; + +del_cdev: + cdev_del(&pps_gen->cdev); +free_ida: + ida_free(&pps_gen_ida, pps_gen->id); + return err; +} + +static void pps_gen_unregister_cdev(struct pps_gen_device *pps_gen) +{ + pr_debug("unregistering pps-gen%d\n", pps_gen->id); + device_destroy(pps_gen_class, pps_gen->dev->devt); +} + +/* + * Exported functions + */ + +/** + * pps_gen_register_source() - add a PPS generator in the system + * @info: the PPS generator info struct + * + * This function is used to register a new PPS generator in the system. + * When it returns successfully the new generator is up and running, and + * it can be managed by the userspace. + * + * Return: the PPS generator device in case of success, and ERR_PTR(errno) + * otherwise. + */ +struct pps_gen_device *pps_gen_register_source(struct pps_gen_source_info *info) +{ + struct pps_gen_device *pps_gen; + int err; + + pps_gen = kzalloc(sizeof(struct pps_gen_device), GFP_KERNEL); + if (pps_gen == NULL) { + err = -ENOMEM; + goto pps_gen_register_source_exit; + } + pps_gen->info = *info; + pps_gen->enabled = false; + + init_waitqueue_head(&pps_gen->queue); + spin_lock_init(&pps_gen->lock); + + /* Create the char device */ + err = pps_gen_register_cdev(pps_gen); + if (err < 0) { + pr_err(" unable to create char device\n"); + goto kfree_pps_gen; + } + + return pps_gen; + +kfree_pps_gen: + kfree(pps_gen); + +pps_gen_register_source_exit: + pr_err("unable to register generator\n"); + + return ERR_PTR(err); +} +EXPORT_SYMBOL(pps_gen_register_source); + +/** + * pps_gen_unregister_source() - remove a PPS generator from the system + * @pps_gen: the PPS generator device to be removed + * + * This function is used to deregister a PPS generator from the system. When + * called, it disables the generator so no pulses are generated anymore. + */ +void pps_gen_unregister_source(struct pps_gen_device *pps_gen) +{ + pps_gen_unregister_cdev(pps_gen); +} +EXPORT_SYMBOL(pps_gen_unregister_source); + +/* pps_gen_event - register a PPS generator event into the system + * @pps: the PPS generator device + * @event: the event type + * @data: userdef pointer + * + * This function is used by each PPS generator in order to register a new + * PPS event into the system (it's usually called inside an IRQ handler). + */ +void pps_gen_event(struct pps_gen_device *pps_gen, + unsigned int event, void *data) +{ + unsigned long flags; + + dev_dbg(pps_gen->dev, "PPS generator event %u\n", event); + + spin_lock_irqsave(&pps_gen->lock, flags); + + pps_gen->event = event; + pps_gen->sequence++; + + pps_gen->last_ev++; + wake_up_interruptible_all(&pps_gen->queue); + kill_fasync(&pps_gen->async_queue, SIGIO, POLL_IN); + + spin_unlock_irqrestore(&pps_gen->lock, flags); +} +EXPORT_SYMBOL(pps_gen_event); + +/* + * Module stuff + */ + +static void __exit pps_gen_exit(void) +{ + class_destroy(pps_gen_class); + unregister_chrdev_region(pps_gen_devt, PPS_GEN_MAX_SOURCES); +} + +static int __init pps_gen_init(void) +{ + int err; + + pps_gen_class = class_create("pps-gen"); + if (IS_ERR(pps_gen_class)) { + pr_err("failed to allocate class\n"); + return PTR_ERR(pps_gen_class); + } + pps_gen_class->dev_groups = pps_gen_groups; + + err = alloc_chrdev_region(&pps_gen_devt, 0, + PPS_GEN_MAX_SOURCES, "pps-gen"); + if (err < 0) { + pr_err("failed to allocate char device region\n"); + goto remove_class; + } + + return 0; + +remove_class: + class_destroy(pps_gen_class); + return err; +} + +subsys_initcall(pps_gen_init); +module_exit(pps_gen_exit); + +MODULE_AUTHOR("Rodolfo Giometti "); +MODULE_DESCRIPTION("LinuxPPS generators support"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pps/generators/sysfs.c b/drivers/pps/generators/sysfs.c new file mode 100644 index 000000000000..faf8b1c6d202 --- /dev/null +++ b/drivers/pps/generators/sysfs.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PPS generators sysfs support + * + * Copyright (C) 2024 Rodolfo Giometti + */ + +#include +#include +#include +#include + +/* + * Attribute functions + */ + +static ssize_t system_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pps_gen_device *pps_gen = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", pps_gen->info.use_system_clock); +} +static DEVICE_ATTR_RO(system); + +static ssize_t time_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pps_gen_device *pps_gen = dev_get_drvdata(dev); + struct timespec64 time; + int ret; + + ret = pps_gen->info.get_time(pps_gen, &time); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu %09lu\n", time.tv_sec, time.tv_nsec); +} +static DEVICE_ATTR_RO(time); + +static ssize_t enable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pps_gen_device *pps_gen = dev_get_drvdata(dev); + bool status; + int ret; + + ret = kstrtobool(buf, &status); + if (ret) + return ret; + + ret = pps_gen->info.enable(pps_gen, status); + if (ret) + return ret; + pps_gen->enabled = status; + + return count; +} +static DEVICE_ATTR_WO(enable); + +static struct attribute *pps_gen_attrs[] = { + &dev_attr_enable.attr, + &dev_attr_time.attr, + &dev_attr_system.attr, + NULL, +}; + +static const struct attribute_group pps_gen_group = { + .attrs = pps_gen_attrs, +}; + +const struct attribute_group *pps_gen_groups[] = { + &pps_gen_group, + NULL, +}; diff --git a/include/linux/pps_gen_kernel.h b/include/linux/pps_gen_kernel.h new file mode 100644 index 000000000000..022ea0ac4440 --- /dev/null +++ b/include/linux/pps_gen_kernel.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PPS generator API kernel header + * + * Copyright (C) 2024 Rodolfo Giometti + */ + +#ifndef LINUX_PPS_GEN_KERNEL_H +#define LINUX_PPS_GEN_KERNEL_H + +#include +#include +#include + +/* + * Global defines + */ + +#define PPS_GEN_MAX_SOURCES 16 /* should be enough... */ + +struct pps_gen_device; + +/** + * struct pps_gen_source_info - the specific PPS generator info + * @use_system_clock: true, if the system clock is used to generate pulses + * @get_time: query the time stored into the generator clock + * @enable: enable/disable the PPS pulses generation + * + * This is the main generator struct where all needed information must be + * placed before calling the pps_gen_register_source(). + */ +struct pps_gen_source_info { + bool use_system_clock; + + int (*get_time)(struct pps_gen_device *pps_gen, + struct timespec64 *time); + int (*enable)(struct pps_gen_device *pps_gen, bool enable); + +/* private: internal use only */ + struct module *owner; + struct device *parent; /* for device_create */ +}; + +/* The main struct */ +struct pps_gen_device { + struct pps_gen_source_info info; /* PSS generator info */ + bool enabled; /* PSS generator status */ + + unsigned int event; + unsigned int sequence; + + unsigned int last_ev; /* last PPS event id */ + wait_queue_head_t queue; /* PPS event queue */ + + unsigned int id; /* PPS generator unique ID */ + struct cdev cdev; + struct device *dev; + struct fasync_struct *async_queue; /* fasync method */ + spinlock_t lock; +}; + +/* + * Global variables + */ + +extern const struct attribute_group *pps_gen_groups[]; + +/* + * Exported functions + */ + +extern struct pps_gen_device *pps_gen_register_source( + struct pps_gen_source_info *info); +extern void pps_gen_unregister_source(struct pps_gen_device *pps_gen); +extern void pps_gen_event(struct pps_gen_device *pps_gen, + unsigned int event, void *data); + +#endif /* LINUX_PPS_GEN_KERNEL_H */ diff --git a/include/uapi/linux/pps_gen.h b/include/uapi/linux/pps_gen.h new file mode 100644 index 000000000000..60a5d0fcfa68 --- /dev/null +++ b/include/uapi/linux/pps_gen.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * PPS generator API header + * + * Copyright (C) 2024 Rodolfo Giometti + */ + +#ifndef _PPS_GEN_H_ +#define _PPS_GEN_H_ + +#include +#include + +/** + * struct pps_gen_event - the PPS generator events + * @event: the event type + * @sequence: the event sequence number + * + * Userspace can get the last PPS generator event by using the + * ioctl(pps_gen, PPS_GEN_FETCHEVENT, ...) syscall. + * The sequence field can be used to save the last event ID, while in the + * event field is stored the last event type. Currently known event is: + * + * PPS_GEN_EVENT_MISSEDPULSE : last pulse was not generated + */ +struct pps_gen_event { + unsigned int event; + unsigned int sequence; +}; + +#define PPS_GEN_EVENT_MISSEDPULSE 1 + +#define PPS_GEN_SETENABLE _IOW('p', 0xb1, unsigned int *) +#define PPS_GEN_USESYSTEMCLOCK _IOR('p', 0xb2, unsigned int *) +#define PPS_GEN_FETCHEVENT _IOR('p', 0xb3, struct pps_gen_event *) + +#endif /* _PPS_GEN_H_ */ -- cgit v1.2.3 From d75abf2f9f2e584633bd1072267a2ff25deb422b Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:42 -0600 Subject: ntsync: Return the fd from NTSYNC_IOC_CREATE_SEM. Simplify the user API a bit by returning the fd as return value from the ioctl instead of through the argument pointer. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-2-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 7 ++----- include/uapi/linux/ntsync.h | 3 +-- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 4954553b7baa..2e7f698268c1 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -165,7 +165,6 @@ static int ntsync_obj_get_fd(struct ntsync_obj *obj) static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp) { - struct ntsync_sem_args __user *user_args = argp; struct ntsync_sem_args args; struct ntsync_obj *sem; int fd; @@ -182,12 +181,10 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp) sem->u.sem.count = args.count; sem->u.sem.max = args.max; fd = ntsync_obj_get_fd(sem); - if (fd < 0) { + if (fd < 0) kfree(sem); - return fd; - } - return put_user(fd, &user_args->sem); + return fd; } static int ntsync_char_open(struct inode *inode, struct file *file) diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index dcfa38fdc93c..27d8cb3dd5b7 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -11,12 +11,11 @@ #include struct ntsync_sem_args { - __u32 sem; __u32 count; __u32 max; }; -#define NTSYNC_IOC_CREATE_SEM _IOWR('N', 0x80, struct ntsync_sem_args) +#define NTSYNC_IOC_CREATE_SEM _IOW ('N', 0x80, struct ntsync_sem_args) #define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32) -- cgit v1.2.3 From 5ec43d6b0328a2383525a2b90306c9077480e0bf Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:43 -0600 Subject: ntsync: Rename NTSYNC_IOC_SEM_POST to NTSYNC_IOC_SEM_RELEASE. Use the more common "release" terminology, which is also the term used by NT, instead of "post" (which is used by POSIX). Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-3-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 10 +++++----- include/uapi/linux/ntsync.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 2e7f698268c1..cb3a3bd97ba0 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -57,7 +57,7 @@ struct ntsync_device { * Actually change the semaphore state, returning -EOVERFLOW if it is made * invalid. */ -static int post_sem_state(struct ntsync_obj *sem, __u32 count) +static int release_sem_state(struct ntsync_obj *sem, __u32 count) { __u32 sum; @@ -71,7 +71,7 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 count) return 0; } -static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp) +static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp) { __u32 __user *user_args = argp; __u32 prev_count; @@ -87,7 +87,7 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp) spin_lock(&sem->lock); prev_count = sem->u.sem.count; - ret = post_sem_state(sem, args); + ret = release_sem_state(sem, args); spin_unlock(&sem->lock); @@ -114,8 +114,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, void __user *argp = (void __user *)parm; switch (cmd) { - case NTSYNC_IOC_SEM_POST: - return ntsync_sem_post(obj, argp); + case NTSYNC_IOC_SEM_RELEASE: + return ntsync_sem_release(obj, argp); default: return -ENOIOCTLCMD; } diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index 27d8cb3dd5b7..9af9d8125553 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -17,6 +17,6 @@ struct ntsync_sem_args { #define NTSYNC_IOC_CREATE_SEM _IOW ('N', 0x80, struct ntsync_sem_args) -#define NTSYNC_IOC_SEM_POST _IOWR('N', 0x81, __u32) +#define NTSYNC_IOC_SEM_RELEASE _IOWR('N', 0x81, __u32) #endif -- cgit v1.2.3 From b4a7b5fe3f5149fa35278807e0dc13ddb093f4b8 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:44 -0600 Subject: ntsync: Introduce NTSYNC_IOC_WAIT_ANY. This corresponds to part of the functionality of the NT syscall NtWaitForMultipleObjects(). Specifically, it implements the behaviour where the third argument (wait_any) is TRUE, and it does not handle alertable waits. Those features have been split out into separate patches to ease review. This patch therefore implements the wait/wake infrastructure which comprises the core of ntsync's functionality. NTSYNC_IOC_WAIT_ANY is a vectored wait function similar to poll(). Unlike poll(), it "consumes" objects when they are signaled. For semaphores, this means decreasing one from the internal counter. At most one object can be consumed by this function. This wait/wake model is fundamentally different from that used anywhere else in the kernel, and for that reason ntsync does not use any existing infrastructure, such as futexes, kernel mutexes or semaphores, or wait_event(). Up to 64 objects can be waited on at once. As soon as one is signaled, the object with the lowest index is consumed, and that index is returned via the "index" field. A timeout is supported. The timeout is passed as a u64 nanosecond value, which represents absolute time measured against either the MONOTONIC or REALTIME clock (controlled by the flags argument). If U64_MAX is passed, the ioctl waits indefinitely. This ioctl validates that all objects belong to the relevant device. This is not necessary for any technical reason related to NTSYNC_IOC_WAIT_ANY, but will be necessary for NTSYNC_IOC_WAIT_ALL introduced in the following patch. Some padding fields are added for alignment and for fields which will be added in future patches (split out to ease review). Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-4-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 247 +++++++++++++++++++++++++++++++++++++++++++- include/uapi/linux/ntsync.h | 14 +++ 2 files changed, 260 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index cb3a3bd97ba0..900cc5ce5761 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -6,11 +6,16 @@ */ #include +#include #include #include +#include +#include #include #include #include +#include +#include #include #include #include @@ -30,6 +35,8 @@ enum ntsync_type { * * Both rely on struct file for reference counting. Individual * ntsync_obj objects take a reference to the device when created. + * Wait operations take a reference to each object being waited on for + * the duration of the wait. */ struct ntsync_obj { @@ -47,12 +54,55 @@ struct ntsync_obj { __u32 max; } sem; } u; + + struct list_head any_waiters; +}; + +struct ntsync_q_entry { + struct list_head node; + struct ntsync_q *q; + struct ntsync_obj *obj; + __u32 index; +}; + +struct ntsync_q { + struct task_struct *task; + + /* + * Protected via atomic_try_cmpxchg(). Only the thread that wins the + * compare-and-swap may actually change object states and wake this + * task. + */ + atomic_t signaled; + + __u32 count; + struct ntsync_q_entry entries[]; }; struct ntsync_device { struct file *file; }; +static void try_wake_any_sem(struct ntsync_obj *sem) +{ + struct ntsync_q_entry *entry; + + lockdep_assert_held(&sem->lock); + + list_for_each_entry(entry, &sem->any_waiters, node) { + struct ntsync_q *q = entry->q; + int signaled = -1; + + if (!sem->u.sem.count) + break; + + if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) { + sem->u.sem.count--; + wake_up_process(q->task); + } + } +} + /* * Actually change the semaphore state, returning -EOVERFLOW if it is made * invalid. @@ -87,7 +137,9 @@ static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp) spin_lock(&sem->lock); prev_count = sem->u.sem.count; - ret = release_sem_state(sem, args); + ret = post_sem_state(sem, args); + if (!ret) + try_wake_any_sem(sem); spin_unlock(&sem->lock); @@ -140,6 +192,7 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev, obj->dev = dev; get_file(dev->file); spin_lock_init(&obj->lock); + INIT_LIST_HEAD(&obj->any_waiters); return obj; } @@ -187,6 +240,196 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp) return fd; } +static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd) +{ + struct file *file = fget(fd); + struct ntsync_obj *obj; + + if (!file) + return NULL; + + if (file->f_op != &ntsync_obj_fops) { + fput(file); + return NULL; + } + + obj = file->private_data; + if (obj->dev != dev) { + fput(file); + return NULL; + } + + return obj; +} + +static void put_obj(struct ntsync_obj *obj) +{ + fput(obj->file); +} + +static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_args *args) +{ + ktime_t timeout = ns_to_ktime(args->timeout); + clockid_t clock = CLOCK_MONOTONIC; + ktime_t *timeout_ptr; + int ret = 0; + + timeout_ptr = (args->timeout == U64_MAX ? NULL : &timeout); + + if (args->flags & NTSYNC_WAIT_REALTIME) + clock = CLOCK_REALTIME; + + do { + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + set_current_state(TASK_INTERRUPTIBLE); + if (atomic_read(&q->signaled) != -1) { + ret = 0; + break; + } + ret = schedule_hrtimeout_range_clock(timeout_ptr, 0, HRTIMER_MODE_ABS, clock); + } while (ret < 0); + __set_current_state(TASK_RUNNING); + + return ret; +} + +/* + * Allocate and initialize the ntsync_q structure, but do not queue us yet. + */ +static int setup_wait(struct ntsync_device *dev, + const struct ntsync_wait_args *args, + struct ntsync_q **ret_q) +{ + const __u32 count = args->count; + int fds[NTSYNC_MAX_WAIT_COUNT]; + struct ntsync_q *q; + __u32 i, j; + + if (args->pad[0] || args->pad[1] || args->pad[2] || (args->flags & ~NTSYNC_WAIT_REALTIME)) + return -EINVAL; + + if (args->count > NTSYNC_MAX_WAIT_COUNT) + return -EINVAL; + + if (copy_from_user(fds, u64_to_user_ptr(args->objs), + array_size(count, sizeof(*fds)))) + return -EFAULT; + + q = kmalloc(struct_size(q, entries, count), GFP_KERNEL); + if (!q) + return -ENOMEM; + q->task = current; + atomic_set(&q->signaled, -1); + q->count = count; + + for (i = 0; i < count; i++) { + struct ntsync_q_entry *entry = &q->entries[i]; + struct ntsync_obj *obj = get_obj(dev, fds[i]); + + if (!obj) + goto err; + + entry->obj = obj; + entry->q = q; + entry->index = i; + } + + *ret_q = q; + return 0; + +err: + for (j = 0; j < i; j++) + put_obj(q->entries[j].obj); + kfree(q); + return -EINVAL; +} + +static void try_wake_any_obj(struct ntsync_obj *obj) +{ + switch (obj->type) { + case NTSYNC_TYPE_SEM: + try_wake_any_sem(obj); + break; + } +} + +static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) +{ + struct ntsync_wait_args args; + struct ntsync_q *q; + int signaled; + __u32 i; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + ret = setup_wait(dev, &args, &q); + if (ret < 0) + return ret; + + /* queue ourselves */ + + for (i = 0; i < args.count; i++) { + struct ntsync_q_entry *entry = &q->entries[i]; + struct ntsync_obj *obj = entry->obj; + + spin_lock(&obj->lock); + list_add_tail(&entry->node, &obj->any_waiters); + spin_unlock(&obj->lock); + } + + /* check if we are already signaled */ + + for (i = 0; i < args.count; i++) { + struct ntsync_obj *obj = q->entries[i].obj; + + if (atomic_read(&q->signaled) != -1) + break; + + spin_lock(&obj->lock); + try_wake_any_obj(obj); + spin_unlock(&obj->lock); + } + + /* sleep */ + + ret = ntsync_schedule(q, &args); + + /* and finally, unqueue */ + + for (i = 0; i < args.count; i++) { + struct ntsync_q_entry *entry = &q->entries[i]; + struct ntsync_obj *obj = entry->obj; + + spin_lock(&obj->lock); + list_del(&entry->node); + spin_unlock(&obj->lock); + + put_obj(obj); + } + + signaled = atomic_read(&q->signaled); + if (signaled != -1) { + struct ntsync_wait_args __user *user_args = argp; + + /* even if we caught a signal, we need to communicate success */ + ret = 0; + + if (put_user(signaled, &user_args->index)) + ret = -EFAULT; + } else if (!ret) { + ret = -ETIMEDOUT; + } + + kfree(q); + return ret; +} + static int ntsync_char_open(struct inode *inode, struct file *file) { struct ntsync_device *dev; @@ -218,6 +461,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NTSYNC_IOC_CREATE_SEM: return ntsync_create_sem(dev, argp); + case NTSYNC_IOC_WAIT_ANY: + return ntsync_wait_any(dev, argp); default: return -ENOIOCTLCMD; } diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index 9af9d8125553..40ffdc41d5bb 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -15,7 +15,21 @@ struct ntsync_sem_args { __u32 max; }; +#define NTSYNC_WAIT_REALTIME 0x1 + +struct ntsync_wait_args { + __u64 timeout; + __u64 objs; + __u32 count; + __u32 index; + __u32 flags; + __u32 pad[3]; +}; + +#define NTSYNC_MAX_WAIT_COUNT 64 + #define NTSYNC_IOC_CREATE_SEM _IOW ('N', 0x80, struct ntsync_sem_args) +#define NTSYNC_IOC_WAIT_ANY _IOWR('N', 0x82, struct ntsync_wait_args) #define NTSYNC_IOC_SEM_RELEASE _IOWR('N', 0x81, __u32) -- cgit v1.2.3 From cdbb997822806cc1071619b67aef485bb2b921b1 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:45 -0600 Subject: ntsync: Introduce NTSYNC_IOC_WAIT_ALL. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is similar to NTSYNC_IOC_WAIT_ANY, but waits until all of the objects are simultaneously signaled, and then acquires all of them as a single atomic operation. Because acquisition of multiple objects is atomic, some complex locking is required. We cannot simply spin-lock multiple objects simultaneously, as that may disable preĂ«mption for a problematically long time. Instead, modifying any object which may be involved in a wait-all operation takes a device-wide sleeping mutex, "wait_all_lock", instead of the normal object spinlock. Because wait-for-all is a rare operation, in order to optimize wait-for-any, this lock is only taken when necessary. "all_hint" is used to mark objects which are involved in a wait-for-all operation, and if an object is not, only its spinlock is taken. The locking scheme used here was written by Peter Zijlstra. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-5-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 336 ++++++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/ntsync.h | 1 + 2 files changed, 323 insertions(+), 14 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 900cc5ce5761..c8beb5504bcc 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ enum ntsync_type { struct ntsync_obj { spinlock_t lock; + int dev_locked; enum ntsync_type type; @@ -55,7 +57,30 @@ struct ntsync_obj { } sem; } u; + /* + * any_waiters is protected by the object lock, but all_waiters is + * protected by the device wait_all_lock. + */ struct list_head any_waiters; + struct list_head all_waiters; + + /* + * Hint describing how many tasks are queued on this object in a + * wait-all operation. + * + * Any time we do a wake, we may need to wake "all" waiters as well as + * "any" waiters. In order to atomically wake "all" waiters, we must + * lock all of the objects, and that means grabbing the wait_all_lock + * below (and, due to lock ordering rules, before locking this object). + * However, wait-all is a rare operation, and grabbing the wait-all + * lock for every wake would create unnecessary contention. + * Therefore we first check whether all_hint is zero, and, if it is, + * we skip trying to wake "all" waiters. + * + * Since wait requests must originate from user-space threads, we're + * limited here by PID_MAX_LIMIT, so there's no risk of overflow. + */ + atomic_t all_hint; }; struct ntsync_q_entry { @@ -75,19 +100,198 @@ struct ntsync_q { */ atomic_t signaled; + bool all; __u32 count; struct ntsync_q_entry entries[]; }; struct ntsync_device { + /* + * Wait-all operations must atomically grab all objects, and be totally + * ordered with respect to each other and wait-any operations. + * If one thread is trying to acquire several objects, another thread + * cannot touch the object at the same time. + * + * This device-wide lock is used to serialize wait-for-all + * operations, and operations on an object that is involved in a + * wait-for-all. + */ + struct mutex wait_all_lock; + struct file *file; }; +/* + * Single objects are locked using obj->lock. + * + * Multiple objects are 'locked' while holding dev->wait_all_lock. + * In this case however, individual objects are not locked by holding + * obj->lock, but by setting obj->dev_locked. + * + * This means that in order to lock a single object, the sequence is slightly + * more complicated than usual. Specifically it needs to check obj->dev_locked + * after acquiring obj->lock, if set, it needs to drop the lock and acquire + * dev->wait_all_lock in order to serialize against the multi-object operation. + */ + +static void dev_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj) +{ + lockdep_assert_held(&dev->wait_all_lock); + lockdep_assert(obj->dev == dev); + spin_lock(&obj->lock); + /* + * By setting obj->dev_locked inside obj->lock, it is ensured that + * anyone holding obj->lock must see the value. + */ + obj->dev_locked = 1; + spin_unlock(&obj->lock); +} + +static void dev_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj) +{ + lockdep_assert_held(&dev->wait_all_lock); + lockdep_assert(obj->dev == dev); + spin_lock(&obj->lock); + obj->dev_locked = 0; + spin_unlock(&obj->lock); +} + +static void obj_lock(struct ntsync_obj *obj) +{ + struct ntsync_device *dev = obj->dev; + + for (;;) { + spin_lock(&obj->lock); + if (likely(!obj->dev_locked)) + break; + + spin_unlock(&obj->lock); + mutex_lock(&dev->wait_all_lock); + spin_lock(&obj->lock); + /* + * obj->dev_locked should be set and released under the same + * wait_all_lock section, since we now own this lock, it should + * be clear. + */ + lockdep_assert(!obj->dev_locked); + spin_unlock(&obj->lock); + mutex_unlock(&dev->wait_all_lock); + } +} + +static void obj_unlock(struct ntsync_obj *obj) +{ + spin_unlock(&obj->lock); +} + +static bool ntsync_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj) +{ + bool all; + + obj_lock(obj); + all = atomic_read(&obj->all_hint); + if (unlikely(all)) { + obj_unlock(obj); + mutex_lock(&dev->wait_all_lock); + dev_lock_obj(dev, obj); + } + + return all; +} + +static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj, bool all) +{ + if (all) { + dev_unlock_obj(dev, obj); + mutex_unlock(&dev->wait_all_lock); + } else { + obj_unlock(obj); + } +} + +#define ntsync_assert_held(obj) \ + lockdep_assert((lockdep_is_held(&(obj)->lock) != LOCK_STATE_NOT_HELD) || \ + ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \ + (obj)->dev_locked)) + +static bool is_signaled(struct ntsync_obj *obj) +{ + ntsync_assert_held(obj); + + switch (obj->type) { + case NTSYNC_TYPE_SEM: + return !!obj->u.sem.count; + } + + WARN(1, "bad object type %#x\n", obj->type); + return false; +} + +/* + * "locked_obj" is an optional pointer to an object which is already locked and + * should not be locked again. This is necessary so that changing an object's + * state and waking it can be a single atomic operation. + */ +static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q, + struct ntsync_obj *locked_obj) +{ + __u32 count = q->count; + bool can_wake = true; + int signaled = -1; + __u32 i; + + lockdep_assert_held(&dev->wait_all_lock); + if (locked_obj) + lockdep_assert(locked_obj->dev_locked); + + for (i = 0; i < count; i++) { + if (q->entries[i].obj != locked_obj) + dev_lock_obj(dev, q->entries[i].obj); + } + + for (i = 0; i < count; i++) { + if (!is_signaled(q->entries[i].obj)) { + can_wake = false; + break; + } + } + + if (can_wake && atomic_try_cmpxchg(&q->signaled, &signaled, 0)) { + for (i = 0; i < count; i++) { + struct ntsync_obj *obj = q->entries[i].obj; + + switch (obj->type) { + case NTSYNC_TYPE_SEM: + obj->u.sem.count--; + break; + } + } + wake_up_process(q->task); + } + + for (i = 0; i < count; i++) { + if (q->entries[i].obj != locked_obj) + dev_unlock_obj(dev, q->entries[i].obj); + } +} + +static void try_wake_all_obj(struct ntsync_device *dev, struct ntsync_obj *obj) +{ + struct ntsync_q_entry *entry; + + lockdep_assert_held(&dev->wait_all_lock); + lockdep_assert(obj->dev_locked); + + list_for_each_entry(entry, &obj->all_waiters, node) + try_wake_all(dev, entry->q, obj); +} + static void try_wake_any_sem(struct ntsync_obj *sem) { struct ntsync_q_entry *entry; - lockdep_assert_held(&sem->lock); + ntsync_assert_held(sem); + lockdep_assert(sem->type == NTSYNC_TYPE_SEM); list_for_each_entry(entry, &sem->any_waiters, node) { struct ntsync_q *q = entry->q; @@ -111,7 +315,7 @@ static int release_sem_state(struct ntsync_obj *sem, __u32 count) { __u32 sum; - lockdep_assert_held(&sem->lock); + ntsync_assert_held(sem); if (check_add_overflow(sem->u.sem.count, count, &sum) || sum > sem->u.sem.max) @@ -123,9 +327,11 @@ static int release_sem_state(struct ntsync_obj *sem, __u32 count) static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp) { + struct ntsync_device *dev = sem->dev; __u32 __user *user_args = argp; __u32 prev_count; __u32 args; + bool all; int ret; if (copy_from_user(&args, argp, sizeof(args))) @@ -134,14 +340,17 @@ static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp) if (sem->type != NTSYNC_TYPE_SEM) return -EINVAL; - spin_lock(&sem->lock); + all = ntsync_lock_obj(dev, sem); prev_count = sem->u.sem.count; - ret = post_sem_state(sem, args); - if (!ret) + ret = release_sem_state(sem, args); + if (!ret) { + if (all) + try_wake_all_obj(dev, sem); try_wake_any_sem(sem); + } - spin_unlock(&sem->lock); + ntsync_unlock_obj(dev, sem, all); if (!ret && put_user(prev_count, user_args)) ret = -EFAULT; @@ -193,6 +402,8 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev, get_file(dev->file); spin_lock_init(&obj->lock); INIT_LIST_HEAD(&obj->any_waiters); + INIT_LIST_HEAD(&obj->all_waiters); + atomic_set(&obj->all_hint, 0); return obj; } @@ -301,7 +512,7 @@ static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_ar * Allocate and initialize the ntsync_q structure, but do not queue us yet. */ static int setup_wait(struct ntsync_device *dev, - const struct ntsync_wait_args *args, + const struct ntsync_wait_args *args, bool all, struct ntsync_q **ret_q) { const __u32 count = args->count; @@ -324,6 +535,7 @@ static int setup_wait(struct ntsync_device *dev, return -ENOMEM; q->task = current; atomic_set(&q->signaled, -1); + q->all = all; q->count = count; for (i = 0; i < count; i++) { @@ -333,6 +545,16 @@ static int setup_wait(struct ntsync_device *dev, if (!obj) goto err; + if (all) { + /* Check that the objects are all distinct. */ + for (j = 0; j < i; j++) { + if (obj == q->entries[j].obj) { + put_obj(obj); + goto err; + } + } + } + entry->obj = obj; entry->q = q; entry->index = i; @@ -362,13 +584,14 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) struct ntsync_wait_args args; struct ntsync_q *q; int signaled; + bool all; __u32 i; int ret; if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; - ret = setup_wait(dev, &args, &q); + ret = setup_wait(dev, &args, false, &q); if (ret < 0) return ret; @@ -378,9 +601,9 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) struct ntsync_q_entry *entry = &q->entries[i]; struct ntsync_obj *obj = entry->obj; - spin_lock(&obj->lock); + all = ntsync_lock_obj(dev, obj); list_add_tail(&entry->node, &obj->any_waiters); - spin_unlock(&obj->lock); + ntsync_unlock_obj(dev, obj, all); } /* check if we are already signaled */ @@ -391,9 +614,9 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) if (atomic_read(&q->signaled) != -1) break; - spin_lock(&obj->lock); + all = ntsync_lock_obj(dev, obj); try_wake_any_obj(obj); - spin_unlock(&obj->lock); + ntsync_unlock_obj(dev, obj, all); } /* sleep */ @@ -406,13 +629,94 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) struct ntsync_q_entry *entry = &q->entries[i]; struct ntsync_obj *obj = entry->obj; - spin_lock(&obj->lock); + all = ntsync_lock_obj(dev, obj); list_del(&entry->node); - spin_unlock(&obj->lock); + ntsync_unlock_obj(dev, obj, all); + + put_obj(obj); + } + + signaled = atomic_read(&q->signaled); + if (signaled != -1) { + struct ntsync_wait_args __user *user_args = argp; + + /* even if we caught a signal, we need to communicate success */ + ret = 0; + + if (put_user(signaled, &user_args->index)) + ret = -EFAULT; + } else if (!ret) { + ret = -ETIMEDOUT; + } + + kfree(q); + return ret; +} + +static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp) +{ + struct ntsync_wait_args args; + struct ntsync_q *q; + int signaled; + __u32 i; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + ret = setup_wait(dev, &args, true, &q); + if (ret < 0) + return ret; + + /* queue ourselves */ + + mutex_lock(&dev->wait_all_lock); + + for (i = 0; i < args.count; i++) { + struct ntsync_q_entry *entry = &q->entries[i]; + struct ntsync_obj *obj = entry->obj; + + atomic_inc(&obj->all_hint); + + /* + * obj->all_waiters is protected by dev->wait_all_lock rather + * than obj->lock, so there is no need to acquire obj->lock + * here. + */ + list_add_tail(&entry->node, &obj->all_waiters); + } + + /* check if we are already signaled */ + + try_wake_all(dev, q, NULL); + + mutex_unlock(&dev->wait_all_lock); + + /* sleep */ + + ret = ntsync_schedule(q, &args); + + /* and finally, unqueue */ + + mutex_lock(&dev->wait_all_lock); + + for (i = 0; i < args.count; i++) { + struct ntsync_q_entry *entry = &q->entries[i]; + struct ntsync_obj *obj = entry->obj; + + /* + * obj->all_waiters is protected by dev->wait_all_lock rather + * than obj->lock, so there is no need to acquire it here. + */ + list_del(&entry->node); + + atomic_dec(&obj->all_hint); put_obj(obj); } + mutex_unlock(&dev->wait_all_lock); + signaled = atomic_read(&q->signaled); if (signaled != -1) { struct ntsync_wait_args __user *user_args = argp; @@ -438,6 +742,8 @@ static int ntsync_char_open(struct inode *inode, struct file *file) if (!dev) return -ENOMEM; + mutex_init(&dev->wait_all_lock); + file->private_data = dev; dev->file = file; return nonseekable_open(inode, file); @@ -461,6 +767,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NTSYNC_IOC_CREATE_SEM: return ntsync_create_sem(dev, argp); + case NTSYNC_IOC_WAIT_ALL: + return ntsync_wait_all(dev, argp); case NTSYNC_IOC_WAIT_ANY: return ntsync_wait_any(dev, argp); default: diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index 40ffdc41d5bb..d64420ffbcd7 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -30,6 +30,7 @@ struct ntsync_wait_args { #define NTSYNC_IOC_CREATE_SEM _IOW ('N', 0x80, struct ntsync_sem_args) #define NTSYNC_IOC_WAIT_ANY _IOWR('N', 0x82, struct ntsync_wait_args) +#define NTSYNC_IOC_WAIT_ALL _IOWR('N', 0x83, struct ntsync_wait_args) #define NTSYNC_IOC_SEM_RELEASE _IOWR('N', 0x81, __u32) -- cgit v1.2.3 From 5bc2479a3585be35ea32e431bc82282bbf5a5d5b Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:46 -0600 Subject: ntsync: Introduce NTSYNC_IOC_CREATE_MUTEX. This corresponds to the NT syscall NtCreateMutant(). An NT mutex is recursive, with a 32-bit recursion counter. When acquired via NtWaitForMultipleObjects(), the recursion counter is incremented by one. The OS records the thread which acquired it. The OS records the thread which acquired it. However, in order to keep this driver self-contained, the owning thread ID is managed by user-space, and passed as a parameter to all relevant ioctls. The initial owner and recursion count, if any, are specified when the mutex is created. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-6-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 74 +++++++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/ntsync.h | 9 +++++- 2 files changed, 79 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index c8beb5504bcc..a2826dbff2f4 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -25,6 +25,7 @@ enum ntsync_type { NTSYNC_TYPE_SEM, + NTSYNC_TYPE_MUTEX, }; /* @@ -55,6 +56,10 @@ struct ntsync_obj { __u32 count; __u32 max; } sem; + struct { + __u32 count; + pid_t owner; + } mutex; } u; /* @@ -92,6 +97,7 @@ struct ntsync_q_entry { struct ntsync_q { struct task_struct *task; + __u32 owner; /* * Protected via atomic_try_cmpxchg(). Only the thread that wins the @@ -214,13 +220,17 @@ static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj, ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \ (obj)->dev_locked)) -static bool is_signaled(struct ntsync_obj *obj) +static bool is_signaled(struct ntsync_obj *obj, __u32 owner) { ntsync_assert_held(obj); switch (obj->type) { case NTSYNC_TYPE_SEM: return !!obj->u.sem.count; + case NTSYNC_TYPE_MUTEX: + if (obj->u.mutex.owner && obj->u.mutex.owner != owner) + return false; + return obj->u.mutex.count < UINT_MAX; } WARN(1, "bad object type %#x\n", obj->type); @@ -250,7 +260,7 @@ static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q, } for (i = 0; i < count; i++) { - if (!is_signaled(q->entries[i].obj)) { + if (!is_signaled(q->entries[i].obj, q->owner)) { can_wake = false; break; } @@ -264,6 +274,10 @@ static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q, case NTSYNC_TYPE_SEM: obj->u.sem.count--; break; + case NTSYNC_TYPE_MUTEX: + obj->u.mutex.count++; + obj->u.mutex.owner = q->owner; + break; } } wake_up_process(q->task); @@ -307,6 +321,30 @@ static void try_wake_any_sem(struct ntsync_obj *sem) } } +static void try_wake_any_mutex(struct ntsync_obj *mutex) +{ + struct ntsync_q_entry *entry; + + ntsync_assert_held(mutex); + lockdep_assert(mutex->type == NTSYNC_TYPE_MUTEX); + + list_for_each_entry(entry, &mutex->any_waiters, node) { + struct ntsync_q *q = entry->q; + int signaled = -1; + + if (mutex->u.mutex.count == UINT_MAX) + break; + if (mutex->u.mutex.owner && mutex->u.mutex.owner != q->owner) + continue; + + if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) { + mutex->u.mutex.count++; + mutex->u.mutex.owner = q->owner; + wake_up_process(q->task); + } + } +} + /* * Actually change the semaphore state, returning -EOVERFLOW if it is made * invalid. @@ -451,6 +489,30 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp) return fd; } +static int ntsync_create_mutex(struct ntsync_device *dev, void __user *argp) +{ + struct ntsync_mutex_args args; + struct ntsync_obj *mutex; + int fd; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + if (!args.owner != !args.count) + return -EINVAL; + + mutex = ntsync_alloc_obj(dev, NTSYNC_TYPE_MUTEX); + if (!mutex) + return -ENOMEM; + mutex->u.mutex.count = args.count; + mutex->u.mutex.owner = args.owner; + fd = ntsync_obj_get_fd(mutex); + if (fd < 0) + kfree(mutex); + + return fd; +} + static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd) { struct file *file = fget(fd); @@ -520,7 +582,7 @@ static int setup_wait(struct ntsync_device *dev, struct ntsync_q *q; __u32 i, j; - if (args->pad[0] || args->pad[1] || args->pad[2] || (args->flags & ~NTSYNC_WAIT_REALTIME)) + if (args->pad[0] || args->pad[1] || (args->flags & ~NTSYNC_WAIT_REALTIME)) return -EINVAL; if (args->count > NTSYNC_MAX_WAIT_COUNT) @@ -534,6 +596,7 @@ static int setup_wait(struct ntsync_device *dev, if (!q) return -ENOMEM; q->task = current; + q->owner = args->owner; atomic_set(&q->signaled, -1); q->all = all; q->count = count; @@ -576,6 +639,9 @@ static void try_wake_any_obj(struct ntsync_obj *obj) case NTSYNC_TYPE_SEM: try_wake_any_sem(obj); break; + case NTSYNC_TYPE_MUTEX: + try_wake_any_mutex(obj); + break; } } @@ -765,6 +831,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd, void __user *argp = (void __user *)parm; switch (cmd) { + case NTSYNC_IOC_CREATE_MUTEX: + return ntsync_create_mutex(dev, argp); case NTSYNC_IOC_CREATE_SEM: return ntsync_create_sem(dev, argp); case NTSYNC_IOC_WAIT_ALL: diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index d64420ffbcd7..bb7fb94f5856 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -15,6 +15,11 @@ struct ntsync_sem_args { __u32 max; }; +struct ntsync_mutex_args { + __u32 owner; + __u32 count; +}; + #define NTSYNC_WAIT_REALTIME 0x1 struct ntsync_wait_args { @@ -23,7 +28,8 @@ struct ntsync_wait_args { __u32 count; __u32 index; __u32 flags; - __u32 pad[3]; + __u32 owner; + __u32 pad[2]; }; #define NTSYNC_MAX_WAIT_COUNT 64 @@ -31,6 +37,7 @@ struct ntsync_wait_args { #define NTSYNC_IOC_CREATE_SEM _IOW ('N', 0x80, struct ntsync_sem_args) #define NTSYNC_IOC_WAIT_ANY _IOWR('N', 0x82, struct ntsync_wait_args) #define NTSYNC_IOC_WAIT_ALL _IOWR('N', 0x83, struct ntsync_wait_args) +#define NTSYNC_IOC_CREATE_MUTEX _IOW ('N', 0x84, struct ntsync_mutex_args) #define NTSYNC_IOC_SEM_RELEASE _IOWR('N', 0x81, __u32) -- cgit v1.2.3 From 31ca7bb8e8539b454414a2c4f8ad643c939cb0cf Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:47 -0600 Subject: ntsync: Introduce NTSYNC_IOC_MUTEX_UNLOCK. This corresponds to the NT syscall NtReleaseMutant(). This syscall decrements the mutex's recursion count by one, and returns the previous value. If the mutex is not owned by the current task, the function instead fails and returns -EPERM. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-7-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/ntsync.h | 1 + 2 files changed, 54 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index a2826dbff2f4..33e26240d9e7 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -396,6 +396,57 @@ static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp) return ret; } +/* + * Actually change the mutex state, returning -EPERM if not the owner. + */ +static int unlock_mutex_state(struct ntsync_obj *mutex, + const struct ntsync_mutex_args *args) +{ + ntsync_assert_held(mutex); + + if (mutex->u.mutex.owner != args->owner) + return -EPERM; + + if (!--mutex->u.mutex.count) + mutex->u.mutex.owner = 0; + return 0; +} + +static int ntsync_mutex_unlock(struct ntsync_obj *mutex, void __user *argp) +{ + struct ntsync_mutex_args __user *user_args = argp; + struct ntsync_device *dev = mutex->dev; + struct ntsync_mutex_args args; + __u32 prev_count; + bool all; + int ret; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + if (!args.owner) + return -EINVAL; + + if (mutex->type != NTSYNC_TYPE_MUTEX) + return -EINVAL; + + all = ntsync_lock_obj(dev, mutex); + + prev_count = mutex->u.mutex.count; + ret = unlock_mutex_state(mutex, &args); + if (!ret) { + if (all) + try_wake_all_obj(dev, mutex); + try_wake_any_mutex(mutex); + } + + ntsync_unlock_obj(dev, mutex, all); + + if (!ret && put_user(prev_count, &user_args->count)) + ret = -EFAULT; + + return ret; +} + static int ntsync_obj_release(struct inode *inode, struct file *file) { struct ntsync_obj *obj = file->private_data; @@ -415,6 +466,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NTSYNC_IOC_SEM_RELEASE: return ntsync_sem_release(obj, argp); + case NTSYNC_IOC_MUTEX_UNLOCK: + return ntsync_mutex_unlock(obj, argp); default: return -ENOIOCTLCMD; } diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index bb7fb94f5856..9186304b253c 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -40,5 +40,6 @@ struct ntsync_wait_args { #define NTSYNC_IOC_CREATE_MUTEX _IOW ('N', 0x84, struct ntsync_mutex_args) #define NTSYNC_IOC_SEM_RELEASE _IOWR('N', 0x81, __u32) +#define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args) #endif -- cgit v1.2.3 From ecc2ee361466e4a22e95b1c27d90f8cbd4f22e93 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:48 -0600 Subject: ntsync: Introduce NTSYNC_IOC_MUTEX_KILL. This does not correspond to any NT syscall. Rather, when a thread dies, it should be called by the NT emulator for each mutex, with the TID of the dying thread. NT mutexes are robust (in the pthread sense). When an NT thread dies, any mutexes it owned are immediately released. Acquisition of those mutexes by other threads will return a special value indicating that the mutex was abandoned, like EOWNERDEAD returned from pthread_mutex_lock(), and EOWNERDEAD is indeed used here for that purpose. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-8-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 61 +++++++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/ntsync.h | 1 + 2 files changed, 60 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 33e26240d9e7..03768ac25425 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -59,6 +59,7 @@ struct ntsync_obj { struct { __u32 count; pid_t owner; + bool ownerdead; } mutex; } u; @@ -107,6 +108,7 @@ struct ntsync_q { atomic_t signaled; bool all; + bool ownerdead; __u32 count; struct ntsync_q_entry entries[]; }; @@ -275,6 +277,9 @@ static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q, obj->u.sem.count--; break; case NTSYNC_TYPE_MUTEX: + if (obj->u.mutex.ownerdead) + q->ownerdead = true; + obj->u.mutex.ownerdead = false; obj->u.mutex.count++; obj->u.mutex.owner = q->owner; break; @@ -338,6 +343,9 @@ static void try_wake_any_mutex(struct ntsync_obj *mutex) continue; if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) { + if (mutex->u.mutex.ownerdead) + q->ownerdead = true; + mutex->u.mutex.ownerdead = false; mutex->u.mutex.count++; mutex->u.mutex.owner = q->owner; wake_up_process(q->task); @@ -447,6 +455,52 @@ static int ntsync_mutex_unlock(struct ntsync_obj *mutex, void __user *argp) return ret; } +/* + * Actually change the mutex state to mark its owner as dead, + * returning -EPERM if not the owner. + */ +static int kill_mutex_state(struct ntsync_obj *mutex, __u32 owner) +{ + ntsync_assert_held(mutex); + + if (mutex->u.mutex.owner != owner) + return -EPERM; + + mutex->u.mutex.ownerdead = true; + mutex->u.mutex.owner = 0; + mutex->u.mutex.count = 0; + return 0; +} + +static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp) +{ + struct ntsync_device *dev = mutex->dev; + __u32 owner; + bool all; + int ret; + + if (get_user(owner, (__u32 __user *)argp)) + return -EFAULT; + if (!owner) + return -EINVAL; + + if (mutex->type != NTSYNC_TYPE_MUTEX) + return -EINVAL; + + all = ntsync_lock_obj(dev, mutex); + + ret = kill_mutex_state(mutex, owner); + if (!ret) { + if (all) + try_wake_all_obj(dev, mutex); + try_wake_any_mutex(mutex); + } + + ntsync_unlock_obj(dev, mutex, all); + + return ret; +} + static int ntsync_obj_release(struct inode *inode, struct file *file) { struct ntsync_obj *obj = file->private_data; @@ -468,6 +522,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, return ntsync_sem_release(obj, argp); case NTSYNC_IOC_MUTEX_UNLOCK: return ntsync_mutex_unlock(obj, argp); + case NTSYNC_IOC_MUTEX_KILL: + return ntsync_mutex_kill(obj, argp); default: return -ENOIOCTLCMD; } @@ -652,6 +708,7 @@ static int setup_wait(struct ntsync_device *dev, q->owner = args->owner; atomic_set(&q->signaled, -1); q->all = all; + q->ownerdead = false; q->count = count; for (i = 0; i < count; i++) { @@ -760,7 +817,7 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) struct ntsync_wait_args __user *user_args = argp; /* even if we caught a signal, we need to communicate success */ - ret = 0; + ret = q->ownerdead ? -EOWNERDEAD : 0; if (put_user(signaled, &user_args->index)) ret = -EFAULT; @@ -841,7 +898,7 @@ static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp) struct ntsync_wait_args __user *user_args = argp; /* even if we caught a signal, we need to communicate success */ - ret = 0; + ret = q->ownerdead ? -EOWNERDEAD : 0; if (put_user(signaled, &user_args->index)) ret = -EFAULT; diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index 9186304b253c..633958d90be3 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -41,5 +41,6 @@ struct ntsync_wait_args { #define NTSYNC_IOC_SEM_RELEASE _IOWR('N', 0x81, __u32) #define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args) +#define NTSYNC_IOC_MUTEX_KILL _IOW ('N', 0x86, __u32) #endif -- cgit v1.2.3 From 4c7404b9c2b572b42dc63bfde5e862290dab53b5 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:49 -0600 Subject: ntsync: Introduce NTSYNC_IOC_CREATE_EVENT. This correspond to the NT syscall NtCreateEvent(). An NT event holds a single bit of state denoting whether it is signaled or unsignaled. There are two types of events: manual-reset and automatic-reset. When an automatic-reset event is acquired via a wait function, its state is reset to unsignaled. Manual-reset events are not affected by wait functions. Whether the event is manual-reset, and its initial state, are specified at creation time. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-9-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/ntsync.h | 6 +++++ 2 files changed, 65 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 03768ac25425..3e8827b6f480 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -26,6 +26,7 @@ enum ntsync_type { NTSYNC_TYPE_SEM, NTSYNC_TYPE_MUTEX, + NTSYNC_TYPE_EVENT, }; /* @@ -61,6 +62,10 @@ struct ntsync_obj { pid_t owner; bool ownerdead; } mutex; + struct { + bool manual; + bool signaled; + } event; } u; /* @@ -233,6 +238,8 @@ static bool is_signaled(struct ntsync_obj *obj, __u32 owner) if (obj->u.mutex.owner && obj->u.mutex.owner != owner) return false; return obj->u.mutex.count < UINT_MAX; + case NTSYNC_TYPE_EVENT: + return obj->u.event.signaled; } WARN(1, "bad object type %#x\n", obj->type); @@ -283,6 +290,10 @@ static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q, obj->u.mutex.count++; obj->u.mutex.owner = q->owner; break; + case NTSYNC_TYPE_EVENT: + if (!obj->u.event.manual) + obj->u.event.signaled = false; + break; } } wake_up_process(q->task); @@ -353,6 +364,28 @@ static void try_wake_any_mutex(struct ntsync_obj *mutex) } } +static void try_wake_any_event(struct ntsync_obj *event) +{ + struct ntsync_q_entry *entry; + + ntsync_assert_held(event); + lockdep_assert(event->type == NTSYNC_TYPE_EVENT); + + list_for_each_entry(entry, &event->any_waiters, node) { + struct ntsync_q *q = entry->q; + int signaled = -1; + + if (!event->u.event.signaled) + break; + + if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) { + if (!event->u.event.manual) + event->u.event.signaled = false; + wake_up_process(q->task); + } + } +} + /* * Actually change the semaphore state, returning -EOVERFLOW if it is made * invalid. @@ -622,6 +655,27 @@ static int ntsync_create_mutex(struct ntsync_device *dev, void __user *argp) return fd; } +static int ntsync_create_event(struct ntsync_device *dev, void __user *argp) +{ + struct ntsync_event_args args; + struct ntsync_obj *event; + int fd; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + event = ntsync_alloc_obj(dev, NTSYNC_TYPE_EVENT); + if (!event) + return -ENOMEM; + event->u.event.manual = args.manual; + event->u.event.signaled = args.signaled; + fd = ntsync_obj_get_fd(event); + if (fd < 0) + kfree(event); + + return fd; +} + static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd) { struct file *file = fget(fd); @@ -752,6 +806,9 @@ static void try_wake_any_obj(struct ntsync_obj *obj) case NTSYNC_TYPE_MUTEX: try_wake_any_mutex(obj); break; + case NTSYNC_TYPE_EVENT: + try_wake_any_event(obj); + break; } } @@ -941,6 +998,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd, void __user *argp = (void __user *)parm; switch (cmd) { + case NTSYNC_IOC_CREATE_EVENT: + return ntsync_create_event(dev, argp); case NTSYNC_IOC_CREATE_MUTEX: return ntsync_create_mutex(dev, argp); case NTSYNC_IOC_CREATE_SEM: diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index 633958d90be3..e08aa02f4dcc 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -20,6 +20,11 @@ struct ntsync_mutex_args { __u32 count; }; +struct ntsync_event_args { + __u32 manual; + __u32 signaled; +}; + #define NTSYNC_WAIT_REALTIME 0x1 struct ntsync_wait_args { @@ -38,6 +43,7 @@ struct ntsync_wait_args { #define NTSYNC_IOC_WAIT_ANY _IOWR('N', 0x82, struct ntsync_wait_args) #define NTSYNC_IOC_WAIT_ALL _IOWR('N', 0x83, struct ntsync_wait_args) #define NTSYNC_IOC_CREATE_MUTEX _IOW ('N', 0x84, struct ntsync_mutex_args) +#define NTSYNC_IOC_CREATE_EVENT _IOW ('N', 0x87, struct ntsync_event_args) #define NTSYNC_IOC_SEM_RELEASE _IOWR('N', 0x81, __u32) #define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args) -- cgit v1.2.3 From 2dcba6fc15a442b1cf1df8cdd0da12f20db38c43 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:50 -0600 Subject: ntsync: Introduce NTSYNC_IOC_EVENT_SET. This corresponds to the NT syscall NtSetEvent(). This sets the event to the signaled state, and returns its previous state. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-10-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 27 +++++++++++++++++++++++++++ include/uapi/linux/ntsync.h | 1 + 2 files changed, 28 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 3e8827b6f480..0a87f8ad5993 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -534,6 +534,31 @@ static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp) return ret; } +static int ntsync_event_set(struct ntsync_obj *event, void __user *argp) +{ + struct ntsync_device *dev = event->dev; + __u32 prev_state; + bool all; + + if (event->type != NTSYNC_TYPE_EVENT) + return -EINVAL; + + all = ntsync_lock_obj(dev, event); + + prev_state = event->u.event.signaled; + event->u.event.signaled = true; + if (all) + try_wake_all_obj(dev, event); + try_wake_any_event(event); + + ntsync_unlock_obj(dev, event, all); + + if (put_user(prev_state, (__u32 __user *)argp)) + return -EFAULT; + + return 0; +} + static int ntsync_obj_release(struct inode *inode, struct file *file) { struct ntsync_obj *obj = file->private_data; @@ -557,6 +582,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, return ntsync_mutex_unlock(obj, argp); case NTSYNC_IOC_MUTEX_KILL: return ntsync_mutex_kill(obj, argp); + case NTSYNC_IOC_EVENT_SET: + return ntsync_event_set(obj, argp); default: return -ENOIOCTLCMD; } diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index e08aa02f4dcc..db2512f6e3f4 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -48,5 +48,6 @@ struct ntsync_wait_args { #define NTSYNC_IOC_SEM_RELEASE _IOWR('N', 0x81, __u32) #define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args) #define NTSYNC_IOC_MUTEX_KILL _IOW ('N', 0x86, __u32) +#define NTSYNC_IOC_EVENT_SET _IOR ('N', 0x88, __u32) #endif -- cgit v1.2.3 From bbb9797514b20c316314b78a439f87fd16b7c509 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:51 -0600 Subject: ntsync: Introduce NTSYNC_IOC_EVENT_RESET. This corresponds to the NT syscall NtResetEvent(). This sets the event to the unsignaled state, and returns its previous state. Signed-off-by: Elizabeth Figura Acked-by: Peter Zijlstra (Intel) Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20241213193511.457338-11-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 24 ++++++++++++++++++++++++ include/uapi/linux/ntsync.h | 1 + 2 files changed, 25 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 0a87f8ad5993..b31443aa9692 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -559,6 +559,28 @@ static int ntsync_event_set(struct ntsync_obj *event, void __user *argp) return 0; } +static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp) +{ + struct ntsync_device *dev = event->dev; + __u32 prev_state; + bool all; + + if (event->type != NTSYNC_TYPE_EVENT) + return -EINVAL; + + all = ntsync_lock_obj(dev, event); + + prev_state = event->u.event.signaled; + event->u.event.signaled = false; + + ntsync_unlock_obj(dev, event, all); + + if (put_user(prev_state, (__u32 __user *)argp)) + return -EFAULT; + + return 0; +} + static int ntsync_obj_release(struct inode *inode, struct file *file) { struct ntsync_obj *obj = file->private_data; @@ -584,6 +606,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, return ntsync_mutex_kill(obj, argp); case NTSYNC_IOC_EVENT_SET: return ntsync_event_set(obj, argp); + case NTSYNC_IOC_EVENT_RESET: + return ntsync_event_reset(obj, argp); default: return -ENOIOCTLCMD; } diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index db2512f6e3f4..d74c4e4f93d8 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -49,5 +49,6 @@ struct ntsync_wait_args { #define NTSYNC_IOC_MUTEX_UNLOCK _IOWR('N', 0x85, struct ntsync_mutex_args) #define NTSYNC_IOC_MUTEX_KILL _IOW ('N', 0x86, __u32) #define NTSYNC_IOC_EVENT_SET _IOR ('N', 0x88, __u32) +#define NTSYNC_IOC_EVENT_RESET _IOR ('N', 0x89, __u32) #endif -- cgit v1.2.3 From 12b29d3008e6bd5118af716ae2971fe6823b117a Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:52 -0600 Subject: ntsync: Introduce NTSYNC_IOC_EVENT_PULSE. This corresponds to the NT syscall NtPulseEvent(). This wakes up any waiters as if the event had been set, but does not set the event, instead resetting it if it had been signalled. Thus, for a manual-reset event, all waiters are woken, whereas for an auto-reset event, at most one waiter is woken. Signed-off-by: Elizabeth Figura Acked-by: Peter Zijlstra (Intel) Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20241213193511.457338-12-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 8 ++++++-- include/uapi/linux/ntsync.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index b31443aa9692..141ebe8a7d4f 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -534,7 +534,7 @@ static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp) return ret; } -static int ntsync_event_set(struct ntsync_obj *event, void __user *argp) +static int ntsync_event_set(struct ntsync_obj *event, void __user *argp, bool pulse) { struct ntsync_device *dev = event->dev; __u32 prev_state; @@ -550,6 +550,8 @@ static int ntsync_event_set(struct ntsync_obj *event, void __user *argp) if (all) try_wake_all_obj(dev, event); try_wake_any_event(event); + if (pulse) + event->u.event.signaled = false; ntsync_unlock_obj(dev, event, all); @@ -605,9 +607,11 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, case NTSYNC_IOC_MUTEX_KILL: return ntsync_mutex_kill(obj, argp); case NTSYNC_IOC_EVENT_SET: - return ntsync_event_set(obj, argp); + return ntsync_event_set(obj, argp, false); case NTSYNC_IOC_EVENT_RESET: return ntsync_event_reset(obj, argp); + case NTSYNC_IOC_EVENT_PULSE: + return ntsync_event_set(obj, argp, true); default: return -ENOIOCTLCMD; } diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index d74c4e4f93d8..9eab7666b8b9 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -50,5 +50,6 @@ struct ntsync_wait_args { #define NTSYNC_IOC_MUTEX_KILL _IOW ('N', 0x86, __u32) #define NTSYNC_IOC_EVENT_SET _IOR ('N', 0x88, __u32) #define NTSYNC_IOC_EVENT_RESET _IOR ('N', 0x89, __u32) +#define NTSYNC_IOC_EVENT_PULSE _IOR ('N', 0x8a, __u32) #endif -- cgit v1.2.3 From a948f4177c3cce5f27648696da95c62c0253a099 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:53 -0600 Subject: ntsync: Introduce NTSYNC_IOC_SEM_READ. This corresponds to the NT syscall NtQuerySemaphore(). This returns the current count and maximum count of the semaphore. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-13-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 24 ++++++++++++++++++++++++ include/uapi/linux/ntsync.h | 1 + 2 files changed, 25 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 141ebe8a7d4f..b2043412c5cd 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -583,6 +583,28 @@ static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp) return 0; } +static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp) +{ + struct ntsync_sem_args __user *user_args = argp; + struct ntsync_device *dev = sem->dev; + struct ntsync_sem_args args; + bool all; + + if (sem->type != NTSYNC_TYPE_SEM) + return -EINVAL; + + all = ntsync_lock_obj(dev, sem); + + args.count = sem->u.sem.count; + args.max = sem->u.sem.max; + + ntsync_unlock_obj(dev, sem, all); + + if (copy_to_user(user_args, &args, sizeof(args))) + return -EFAULT; + return 0; +} + static int ntsync_obj_release(struct inode *inode, struct file *file) { struct ntsync_obj *obj = file->private_data; @@ -602,6 +624,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NTSYNC_IOC_SEM_RELEASE: return ntsync_sem_release(obj, argp); + case NTSYNC_IOC_SEM_READ: + return ntsync_sem_read(obj, argp); case NTSYNC_IOC_MUTEX_UNLOCK: return ntsync_mutex_unlock(obj, argp); case NTSYNC_IOC_MUTEX_KILL: diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index 9eab7666b8b9..e36b4cfb7d34 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -51,5 +51,6 @@ struct ntsync_wait_args { #define NTSYNC_IOC_EVENT_SET _IOR ('N', 0x88, __u32) #define NTSYNC_IOC_EVENT_RESET _IOR ('N', 0x89, __u32) #define NTSYNC_IOC_EVENT_PULSE _IOR ('N', 0x8a, __u32) +#define NTSYNC_IOC_SEM_READ _IOR ('N', 0x8b, struct ntsync_sem_args) #endif -- cgit v1.2.3 From 0b3c31449d28c83c0b8a8a7be569aa30d70b7764 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:54 -0600 Subject: ntsync: Introduce NTSYNC_IOC_MUTEX_READ. This corresponds to the NT syscall NtQueryMutant(). This returns the recursion count, owner, and abandoned state of the mutex. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-14-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 26 ++++++++++++++++++++++++++ include/uapi/linux/ntsync.h | 1 + 2 files changed, 27 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index b2043412c5cd..0047b13b6ebd 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -605,6 +605,30 @@ static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp) return 0; } +static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp) +{ + struct ntsync_mutex_args __user *user_args = argp; + struct ntsync_device *dev = mutex->dev; + struct ntsync_mutex_args args; + bool all; + int ret; + + if (mutex->type != NTSYNC_TYPE_MUTEX) + return -EINVAL; + + all = ntsync_lock_obj(dev, mutex); + + args.count = mutex->u.mutex.count; + args.owner = mutex->u.mutex.owner; + ret = mutex->u.mutex.ownerdead ? -EOWNERDEAD : 0; + + ntsync_unlock_obj(dev, mutex, all); + + if (copy_to_user(user_args, &args, sizeof(args))) + return -EFAULT; + return ret; +} + static int ntsync_obj_release(struct inode *inode, struct file *file) { struct ntsync_obj *obj = file->private_data; @@ -630,6 +654,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, return ntsync_mutex_unlock(obj, argp); case NTSYNC_IOC_MUTEX_KILL: return ntsync_mutex_kill(obj, argp); + case NTSYNC_IOC_MUTEX_READ: + return ntsync_mutex_read(obj, argp); case NTSYNC_IOC_EVENT_SET: return ntsync_event_set(obj, argp, false); case NTSYNC_IOC_EVENT_RESET: diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index e36b4cfb7d34..207646e63ef3 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -52,5 +52,6 @@ struct ntsync_wait_args { #define NTSYNC_IOC_EVENT_RESET _IOR ('N', 0x89, __u32) #define NTSYNC_IOC_EVENT_PULSE _IOR ('N', 0x8a, __u32) #define NTSYNC_IOC_SEM_READ _IOR ('N', 0x8b, struct ntsync_sem_args) +#define NTSYNC_IOC_MUTEX_READ _IOR ('N', 0x8c, struct ntsync_mutex_args) #endif -- cgit v1.2.3 From e864071a630cfbbd55251e7b45461003f4f79877 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:55 -0600 Subject: ntsync: Introduce NTSYNC_IOC_EVENT_READ. This corresponds to the NT syscall NtQueryEvent(). This returns the signaled state of the event and whether it is manual-reset. Signed-off-by: Elizabeth Figura Link: https://lore.kernel.org/r/20241213193511.457338-15-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 24 ++++++++++++++++++++++++ include/uapi/linux/ntsync.h | 1 + 2 files changed, 25 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 0047b13b6ebd..78dc405bb759 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -629,6 +629,28 @@ static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp) return ret; } +static int ntsync_event_read(struct ntsync_obj *event, void __user *argp) +{ + struct ntsync_event_args __user *user_args = argp; + struct ntsync_device *dev = event->dev; + struct ntsync_event_args args; + bool all; + + if (event->type != NTSYNC_TYPE_EVENT) + return -EINVAL; + + all = ntsync_lock_obj(dev, event); + + args.manual = event->u.event.manual; + args.signaled = event->u.event.signaled; + + ntsync_unlock_obj(dev, event, all); + + if (copy_to_user(user_args, &args, sizeof(args))) + return -EFAULT; + return 0; +} + static int ntsync_obj_release(struct inode *inode, struct file *file) { struct ntsync_obj *obj = file->private_data; @@ -662,6 +684,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd, return ntsync_event_reset(obj, argp); case NTSYNC_IOC_EVENT_PULSE: return ntsync_event_set(obj, argp, true); + case NTSYNC_IOC_EVENT_READ: + return ntsync_event_read(obj, argp); default: return -ENOIOCTLCMD; } diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index 207646e63ef3..b9d208a8c00f 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -53,5 +53,6 @@ struct ntsync_wait_args { #define NTSYNC_IOC_EVENT_PULSE _IOR ('N', 0x8a, __u32) #define NTSYNC_IOC_SEM_READ _IOR ('N', 0x8b, struct ntsync_sem_args) #define NTSYNC_IOC_MUTEX_READ _IOR ('N', 0x8c, struct ntsync_mutex_args) +#define NTSYNC_IOC_EVENT_READ _IOR ('N', 0x8d, struct ntsync_event_args) #endif -- cgit v1.2.3 From a138179a59d47690b069fbded1be12f47648ef07 Mon Sep 17 00:00:00 2001 From: Elizabeth Figura Date: Fri, 13 Dec 2024 13:34:56 -0600 Subject: ntsync: Introduce alertable waits. NT waits can optionally be made "alertable". This is a special channel for thread wakeup that is mildly similar to SIGIO. A thread has an internal single bit of "alerted" state, and if a thread is alerted while an alertable wait, the wait will return a special value, consume the "alerted" state, and will not consume any of its objects. Alerts are implemented using events; the user-space NT emulator is expected to create an internal ntsync event for each thread and pass that event to wait functions. Signed-off-by: Elizabeth Figura Acked-by: Peter Zijlstra (Intel) Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20241213193511.457338-16-zfigura@codeweavers.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ntsync.c | 70 +++++++++++++++++++++++++++++++++++++++------ include/uapi/linux/ntsync.h | 3 +- 2 files changed, 63 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c index 78dc405bb759..457ff28b789f 100644 --- a/drivers/misc/ntsync.c +++ b/drivers/misc/ntsync.c @@ -869,22 +869,29 @@ static int setup_wait(struct ntsync_device *dev, const struct ntsync_wait_args *args, bool all, struct ntsync_q **ret_q) { + int fds[NTSYNC_MAX_WAIT_COUNT + 1]; const __u32 count = args->count; - int fds[NTSYNC_MAX_WAIT_COUNT]; struct ntsync_q *q; + __u32 total_count; __u32 i, j; - if (args->pad[0] || args->pad[1] || (args->flags & ~NTSYNC_WAIT_REALTIME)) + if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME)) return -EINVAL; if (args->count > NTSYNC_MAX_WAIT_COUNT) return -EINVAL; + total_count = count; + if (args->alert) + total_count++; + if (copy_from_user(fds, u64_to_user_ptr(args->objs), array_size(count, sizeof(*fds)))) return -EFAULT; + if (args->alert) + fds[count] = args->alert; - q = kmalloc(struct_size(q, entries, count), GFP_KERNEL); + q = kmalloc(struct_size(q, entries, total_count), GFP_KERNEL); if (!q) return -ENOMEM; q->task = current; @@ -894,7 +901,7 @@ static int setup_wait(struct ntsync_device *dev, q->ownerdead = false; q->count = count; - for (i = 0; i < count; i++) { + for (i = 0; i < total_count; i++) { struct ntsync_q_entry *entry = &q->entries[i]; struct ntsync_obj *obj = get_obj(dev, fds[i]); @@ -944,10 +951,10 @@ static void try_wake_any_obj(struct ntsync_obj *obj) static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) { struct ntsync_wait_args args; + __u32 i, total_count; struct ntsync_q *q; int signaled; bool all; - __u32 i; int ret; if (copy_from_user(&args, argp, sizeof(args))) @@ -957,9 +964,13 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) if (ret < 0) return ret; + total_count = args.count; + if (args.alert) + total_count++; + /* queue ourselves */ - for (i = 0; i < args.count; i++) { + for (i = 0; i < total_count; i++) { struct ntsync_q_entry *entry = &q->entries[i]; struct ntsync_obj *obj = entry->obj; @@ -968,9 +979,15 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) ntsync_unlock_obj(dev, obj, all); } - /* check if we are already signaled */ + /* + * Check if we are already signaled. + * + * Note that the API requires that normal objects are checked before + * the alert event. Hence we queue the alert event last, and check + * objects in order. + */ - for (i = 0; i < args.count; i++) { + for (i = 0; i < total_count; i++) { struct ntsync_obj *obj = q->entries[i].obj; if (atomic_read(&q->signaled) != -1) @@ -987,7 +1004,7 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp) /* and finally, unqueue */ - for (i = 0; i < args.count; i++) { + for (i = 0; i < total_count; i++) { struct ntsync_q_entry *entry = &q->entries[i]; struct ntsync_obj *obj = entry->obj; @@ -1047,6 +1064,14 @@ static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp) */ list_add_tail(&entry->node, &obj->all_waiters); } + if (args.alert) { + struct ntsync_q_entry *entry = &q->entries[args.count]; + struct ntsync_obj *obj = entry->obj; + + dev_lock_obj(dev, obj); + list_add_tail(&entry->node, &obj->any_waiters); + dev_unlock_obj(dev, obj); + } /* check if we are already signaled */ @@ -1054,6 +1079,21 @@ static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp) mutex_unlock(&dev->wait_all_lock); + /* + * Check if the alert event is signaled, making sure to do so only + * after checking if the other objects are signaled. + */ + + if (args.alert) { + struct ntsync_obj *obj = q->entries[args.count].obj; + + if (atomic_read(&q->signaled) == -1) { + bool all = ntsync_lock_obj(dev, obj); + try_wake_any_obj(obj); + ntsync_unlock_obj(dev, obj, all); + } + } + /* sleep */ ret = ntsync_schedule(q, &args); @@ -1079,6 +1119,18 @@ static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp) mutex_unlock(&dev->wait_all_lock); + if (args.alert) { + struct ntsync_q_entry *entry = &q->entries[args.count]; + struct ntsync_obj *obj = entry->obj; + bool all; + + all = ntsync_lock_obj(dev, obj); + list_del(&entry->node); + ntsync_unlock_obj(dev, obj, all); + + put_obj(obj); + } + signaled = atomic_read(&q->signaled); if (signaled != -1) { struct ntsync_wait_args __user *user_args = argp; diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h index b9d208a8c00f..6d06793512b1 100644 --- a/include/uapi/linux/ntsync.h +++ b/include/uapi/linux/ntsync.h @@ -34,7 +34,8 @@ struct ntsync_wait_args { __u32 index; __u32 flags; __u32 owner; - __u32 pad[2]; + __u32 alert; + __u32 pad; }; #define NTSYNC_MAX_WAIT_COUNT 64 -- cgit v1.2.3 From 33d97a07b3ae6fa713919de4e1864ca04fff8f80 Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Tue, 7 Jan 2025 20:43:55 +0900 Subject: netlink: add IPv6 anycast join/leave notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change introduces a mechanism for notifying userspace applications about changes to IPv6 anycast addresses via netlink. It includes: * Addition and deletion of IPv6 anycast addresses are reported using RTM_NEWANYCAST and RTM_DELANYCAST. * A new netlink group (RTNLGRP_IPV6_ACADDR) for subscribing to these notifications. This enables user space applications(e.g. ip monitor) to efficiently track anycast addresses through netlink messages, improving metrics collection and system monitoring. It also unlocks the potential for advanced anycast management in user space, such as hardware offload control and fine grained network control. Cc: Maciej Ć»enczykowski Cc: Lorenzo Colitti Signed-off-by: Yuyang Huang Reviewed-by: David Ahern Link: https://patch.msgid.link/20250107114355.1766086-1-yuyanghuang@google.com Signed-off-by: Paolo Abeni --- include/net/addrconf.h | 3 +++ include/uapi/linux/rtnetlink.h | 8 +++++++- net/ipv6/addrconf.c | 6 +++--- net/ipv6/anycast.c | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 58337898fa21..f8f91b2038ea 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -546,4 +546,7 @@ int inet6_fill_ifmcaddr(struct sk_buff *skb, const struct ifmcaddr6 *ifmca, struct inet6_fill_args *args); +int inet6_fill_ifacaddr(struct sk_buff *skb, + const struct ifacaddr6 *ifaca, + struct inet6_fill_args *args); #endif diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5ee94c511a28..66c3903d29cf 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -100,7 +100,11 @@ enum { RTM_GETMULTICAST, #define RTM_GETMULTICAST RTM_GETMULTICAST - RTM_GETANYCAST = 62, + RTM_NEWANYCAST = 60, +#define RTM_NEWANYCAST RTM_NEWANYCAST + RTM_DELANYCAST, +#define RTM_DELANYCAST RTM_DELANYCAST + RTM_GETANYCAST, #define RTM_GETANYCAST RTM_GETANYCAST RTM_NEWNEIGHTBL = 64, @@ -783,6 +787,8 @@ enum rtnetlink_groups { #define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR RTNLGRP_IPV6_MCADDR, #define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR + RTNLGRP_IPV6_ACADDR, +#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4da409bc4577..c3729382be3b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5240,9 +5240,9 @@ int inet6_fill_ifmcaddr(struct sk_buff *skb, return 0; } -static int inet6_fill_ifacaddr(struct sk_buff *skb, - const struct ifacaddr6 *ifaca, - struct inet6_fill_args *args) +int inet6_fill_ifacaddr(struct sk_buff *skb, + const struct ifacaddr6 *ifaca, + struct inet6_fill_args *args) { struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt); int ifindex = dev ? dev->ifindex : 1; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 562cace50ca9..21e01695b48c 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -278,6 +278,37 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, return aca; } +static void inet6_ifacaddr_notify(struct net_device *dev, + const struct ifacaddr6 *ifaca, int event) +{ + struct inet6_fill_args fillargs = { + .event = event, + .netnsid = -1, + }; + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOMEM; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(sizeof(struct in6_addr)) + + nla_total_size(sizeof(struct ifa_cacheinfo)), + GFP_KERNEL); + if (!skb) + goto error; + + err = inet6_fill_ifacaddr(skb, ifaca, &fillargs); + if (err < 0) { + pr_err("Failed to fill in anycast addresses (err %d)\n", err); + nlmsg_free(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ACADDR, NULL, GFP_KERNEL); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV6_ACADDR, err); +} + /* * device anycast group inc (add if not found) */ @@ -333,6 +364,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) addrconf_join_solict(idev->dev, &aca->aca_addr); + inet6_ifacaddr_notify(idev->dev, aca, RTM_NEWANYCAST); + aca_put(aca); return 0; out: @@ -375,6 +408,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); + inet6_ifacaddr_notify(idev->dev, aca, RTM_DELANYCAST); + aca_put(aca); return 0; } -- cgit v1.2.3 From 601731fc7c6111bbca49ce3c9499c2e4d426079d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 15 Nov 2024 14:46:09 +0100 Subject: netfilter: conntrack: add conntrack event timestamp Nadia Pinaeva writes: I am working on a tool that allows collecting network performance metrics by using conntrack events. Start time of a conntrack entry is used to evaluate seen_reply latency, therefore the sooner it is timestamped, the better the precision is. In particular, when using this tool to compare the performance of the same feature implemented using iptables/nftables/OVS it is crucial to have the entry timestamped earlier to see any difference. At this time, conntrack events can only get timestamped at recv time in userspace, so there can be some delay between the event being generated and the userspace process consuming the message. There is sys/net/netfilter/nf_conntrack_timestamp, which adds a 64bit timestamp (ns resolution) that records start and stop times, but its not suited for this either, start time is the 'hashtable insertion time', not 'conntrack allocation time'. There is concern that moving the start-time moment to conntrack allocation will add overhead in case of flooding, where conntrack entries are allocated and released right away without getting inserted into the hashtable. Also, even if this was changed it would not with events other than new (start time) and destroy (stop time). Pablo suggested to add new CTA_TIMESTAMP_EVENT, this adds this feature. The timestamp is recorded in case both events are requested and the sys/net/netfilter/nf_conntrack_timestamp toggle is enabled. Reported-by: Nadia Pinaeva Suggested-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 12 +++++++++++ include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_ecache.c | 23 ++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 25 ++++++++++++++++++++++ 4 files changed, 61 insertions(+) (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 0c1dac318e02..8dcf7c371ee9 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,6 +12,7 @@ #include #include #include +#include enum nf_ct_ecache_state { NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ @@ -20,6 +21,9 @@ enum nf_ct_ecache_state { struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + local64_t timestamp; /* event timestamp, in nanoseconds */ +#endif u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ u32 missed; /* missed events */ @@ -108,6 +112,14 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) if (e == NULL) return; +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + /* renew only if this is the first cached event, so that the + * timestamp reflects the first, not the last, generated event. + */ + if (local64_read(&e->timestamp) && READ_ONCE(e->cache) == 0) + local64_set(&e->timestamp, ktime_get_real_ns()); +#endif + set_bit(event, &e->cache); #endif } diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index c2ac7269acf7..43233af75b9d 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -57,6 +57,7 @@ enum ctattr_type { CTA_SYNPROXY, CTA_FILTER, CTA_STATUS_MASK, + CTA_TIMESTAMP_EVENT, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 69948e1d6974..af68c64acaab 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -162,6 +162,14 @@ static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, return ret; } +static void nf_ct_ecache_tstamp_refresh(struct nf_conntrack_ecache *e) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + if (local64_read(&e->timestamp)) + local64_set(&e->timestamp, ktime_get_real_ns()); +#endif +} + int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, u32 portid, int report) { @@ -186,6 +194,8 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, /* This is a resent of a destroy event? If so, skip missed */ missed = e->portid ? 0 : e->missed; + nf_ct_ecache_tstamp_refresh(e); + ret = __nf_conntrack_eventmask_report(e, events, missed, &item); if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) { /* This is a destroy event that has been triggered by a process, @@ -297,6 +307,18 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) } } +static void nf_ct_ecache_tstamp_new(const struct nf_conn *ct, struct nf_conntrack_ecache *e) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + u64 ts = 0; + + if (nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) + ts = ktime_get_real_ns(); + + local64_set(&e->timestamp, ts); +#endif +} + bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) { struct net *net = nf_ct_net(ct); @@ -326,6 +348,7 @@ bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); if (e) { + nf_ct_ecache_tstamp_new(ct, e); e->ctmask = ctmask; e->expmask = expmask; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 36168f8b6efa..2277b744eb2c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -383,6 +383,23 @@ nla_put_failure: #endif #ifdef CONFIG_NF_CONNTRACK_EVENTS +static int +ctnetlink_dump_event_timestamp(struct sk_buff *skb, const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + const struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct); + + if (e) { + u64 ts = local64_read(&e->timestamp); + + if (ts) + return nla_put_be64(skb, CTA_TIMESTAMP_EVENT, + cpu_to_be64(ts), CTA_TIMESTAMP_PAD); + } +#endif + return 0; +} + static inline int ctnetlink_label_size(const struct nf_conn *ct) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); @@ -717,6 +734,9 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) #endif + ctnetlink_proto_size(ct) + ctnetlink_label_size(ct) +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + + nla_total_size(sizeof(u64)) /* CTA_TIMESTAMP_EVENT */ +#endif ; } @@ -838,6 +858,10 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK))) goto nla_put_failure; #endif + + if (ctnetlink_dump_event_timestamp(skb, ct)) + goto nla_put_failure; + nlmsg_end(skb, nlh); err = nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); @@ -1557,6 +1581,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { .len = NF_CT_LABELS_MAX_SIZE }, [CTA_FILTER] = { .type = NLA_NESTED }, [CTA_STATUS_MASK] = { .type = NLA_U32 }, + [CTA_TIMESTAMP_EVENT] = { .type = NLA_REJECT }, }; static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) -- cgit v1.2.3 From 8fc7e23a9bd851e6997d3ea2ea1c3475b91862d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Jan 2025 09:31:01 +0100 Subject: fs: reformat the statx definition The comments after the declaration are becoming rather unreadable with long enough comments. Move them into lines of their own. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250109083109.1441561-2-hch@lst.de Reviewed-by: John Garry Reviewed-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/uapi/linux/stat.h | 95 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 72 insertions(+), 23 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 887a25286441..8b35d7d511a2 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -98,43 +98,92 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + __u32 __spare1[1]; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; -- cgit v1.2.3 From 7ed6cbe0f8caa6ee38a2dc8f1b925acb904cc01f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Jan 2025 09:31:02 +0100 Subject: fs: add STATX_DIO_READ_ALIGN Add a separate dio read align field, as many out of place write file systems can easily do reads aligned to the device sector size, but require bigger alignment for writes. This is usually papered over by falling back to buffered I/O for smaller writes and doing read-modify-write cycles, but performance for this sucks, so applications benefit from knowing the actual write alignment. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250109083109.1441561-3-hch@lst.de Reviewed-by: John Garry Reviewed-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/stat.c | 1 + include/linux/stat.h | 1 + include/uapi/linux/stat.h | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/fs/stat.c b/fs/stat.c index 0870e969a8a0..2c0e111a098a 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -725,6 +725,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_mnt_id = stat->mnt_id; tmp.stx_dio_mem_align = stat->dio_mem_align; tmp.stx_dio_offset_align = stat->dio_offset_align; + tmp.stx_dio_read_offset_align = stat->dio_read_offset_align; tmp.stx_subvol = stat->subvol; tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min; tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max; diff --git a/include/linux/stat.h b/include/linux/stat.h index 3d900c86981c..9d8382e23a9c 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -52,6 +52,7 @@ struct kstat { u64 mnt_id; u32 dio_mem_align; u32 dio_offset_align; + u32 dio_read_offset_align; u64 change_cookie; u64 subvol; u32 atomic_write_unit_min; diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 8b35d7d511a2..f78ee3670dd5 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -179,7 +179,8 @@ struct statx { /* Max atomic write segment count */ __u32 stx_atomic_write_segments_max; - __u32 __spare1[1]; + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ @@ -213,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ -- cgit v1.2.3 From 94d57442e56d2ad2ca20d096040b8ae6f216a921 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Thu, 5 Dec 2024 11:51:09 +0530 Subject: io_uring: expose read/write attribute capability After commit 9a213d3b80c0, we can pass additional attributes along with read/write. However, userspace doesn't know that. Add a new feature flag IORING_FEAT_RW_ATTR, to notify the userspace that the kernel has this ability. Signed-off-by: Anuj Gupta Reviewed-by: Li Zetao Reviewed-by: Martin K. Petersen Tested-by: Martin K. Petersen Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20241205062109.1788-1-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + io_uring/io_uring.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 38f0d6b10eaf..e11c82638527 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -577,6 +577,7 @@ struct io_uring_params { #define IORING_FEAT_REG_REG_RING (1U << 13) #define IORING_FEAT_RECVSEND_BUNDLE (1U << 14) #define IORING_FEAT_MIN_TIMEOUT (1U << 15) +#define IORING_FEAT_RW_ATTR (1U << 16) /* * io_uring_register(2) opcodes and arguments diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 92ba2fdcd087..af03e9973b58 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3713,7 +3713,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING | - IORING_FEAT_RECVSEND_BUNDLE | IORING_FEAT_MIN_TIMEOUT; + IORING_FEAT_RECVSEND_BUNDLE | IORING_FEAT_MIN_TIMEOUT | + IORING_FEAT_RW_ATTR; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; -- cgit v1.2.3 From bdf46443f350dd5d226fd528a5a5954ff762f591 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 10 Jan 2025 16:59:34 +0100 Subject: ALSA: rawmidi: Expose the tied device number in info ioctl The UMP legacy rawmidi is derived from the UMP rawmidi, but currently there is no way to know which device is involved in other side. This patch extends the rawmidi info ioctl to show the tied device number. As default it stores -1, indicating that no tied device. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250110155943.31578-2-tiwai@suse.de --- Documentation/sound/designs/midi-2.0.rst | 5 +++++ include/sound/rawmidi.h | 1 + include/uapi/sound/asound.h | 5 ++++- sound/core/rawmidi.c | 2 ++ sound/core/ump.c | 3 +++ 5 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/sound/designs/midi-2.0.rst b/Documentation/sound/designs/midi-2.0.rst index 086487ca7ab1..d525bc2805f7 100644 --- a/Documentation/sound/designs/midi-2.0.rst +++ b/Documentation/sound/designs/midi-2.0.rst @@ -293,6 +293,11 @@ Rawmidi API Extensions status 0x05). When UMP core receives such a message, it updates the UMP EP info and the corresponding sequencer clients as well. +* The legacy rawmidi device number is found in the new `tied_device` + field of the rawmidi info. + On the other hand, the UMP rawmidi device number is found in + `tied_device` field of the legacy rawmidi info, too. + Control API Extensions ====================== diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index f31cabf0158c..7f1fec786b72 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -118,6 +118,7 @@ struct snd_rawmidi { struct list_head list; unsigned int device; /* device number */ unsigned int info_flags; /* SNDRV_RAWMIDI_INFO_XXXX */ + unsigned int tied_device; char id[64]; char name[80]; diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 4cd513215bcd..1fcff031b5e3 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -729,6 +729,8 @@ enum { #define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004 #define SNDRV_RAWMIDI_INFO_UMP 0x00000008 +#define SNDRV_RAWMIDI_DEVICE_UNKNOWN -1 + struct snd_rawmidi_info { unsigned int device; /* RO/WR (control): device number */ unsigned int subdevice; /* RO/WR (control): subdevice number */ @@ -740,7 +742,8 @@ struct snd_rawmidi_info { unsigned char subname[32]; /* name of active or selected subdevice */ unsigned int subdevices_count; unsigned int subdevices_avail; - unsigned char reserved[64]; /* reserved for future use */ + int tied_device; /* R: tied rawmidi device (UMP/legacy) */ + unsigned char reserved[60]; /* reserved for future use */ }; #define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 348ce1b7725e..858878fe487a 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -635,6 +635,7 @@ static int snd_rawmidi_info(struct snd_rawmidi_substream *substream, info->subdevices_count = substream->pstr->substream_count; info->subdevices_avail = (substream->pstr->substream_count - substream->pstr->substream_opened); + info->tied_device = rmidi->tied_device; return 0; } @@ -1834,6 +1835,7 @@ int snd_rawmidi_init(struct snd_rawmidi *rmidi, INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams); INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams); rmidi->info_flags = info_flags; + rmidi->tied_device = SNDRV_RAWMIDI_DEVICE_UNKNOWN; if (id != NULL) strscpy(rmidi->id, id, sizeof(rmidi->id)); diff --git a/sound/core/ump.c b/sound/core/ump.c index 9198bff4768c..0bfab84eaafb 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1314,6 +1314,9 @@ int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump, ump->legacy_rmidi = rmidi; update_legacy_names(ump); + rmidi->tied_device = ump->core.device; + ump->core.tied_device = rmidi->device; + ump_dbg(ump, "Created a legacy rawmidi #%d (%s)\n", device, id); return 0; } -- cgit v1.2.3 From b8fefed73a952a33521ad416fb39683abd87454b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 10 Jan 2025 16:59:35 +0100 Subject: ALSA: rawmidi: Show substream activity in info ioctl The UMP legacy rawmidi may turn on/off the substream dynamically depending on the UMP Function Block information. So far, there was no direct way to know whether the substream is disabled (inactive) or not; at most one can take a look at the substream name string or try to open and get -ENODEV. This patch extends the rawmidi info ioctl to show the current inactive state of the given substream. When the selected substream is inactive, info flags field contains the new bit flag SNDRV_RAWMIDI_INFO_STREAM_INACTIVE. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250110155943.31578-3-tiwai@suse.de --- Documentation/sound/designs/midi-2.0.rst | 6 ++++++ include/sound/rawmidi.h | 1 + include/uapi/sound/asound.h | 1 + sound/core/rawmidi.c | 2 ++ sound/core/ump.c | 9 +++++---- 5 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/sound/designs/midi-2.0.rst b/Documentation/sound/designs/midi-2.0.rst index d525bc2805f7..d6c17a47c832 100644 --- a/Documentation/sound/designs/midi-2.0.rst +++ b/Documentation/sound/designs/midi-2.0.rst @@ -298,6 +298,12 @@ Rawmidi API Extensions On the other hand, the UMP rawmidi device number is found in `tied_device` field of the legacy rawmidi info, too. +* Each substream of the legacy rawmidi may be enabled / disabled + dynamically depending on the UMP FB state. + When the selected substream is inactive, it's indicated by the bit + 0x10 (`SNDRV_RAWMIDI_INFO_STREAM_INACTIVE`) in the `flags` field of + the legacy rawmidi info. + Control API Extensions ====================== diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 7f1fec786b72..6f2e95298fc7 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -89,6 +89,7 @@ struct snd_rawmidi_substream { unsigned int framing; /* whether to frame input data */ unsigned int clock_type; /* clock source to use for input framing */ int use_count; /* use counter (for output) */ + bool inactive; /* inactive substream (for UMP legacy) */ size_t bytes; spinlock_t lock; struct snd_rawmidi *rmidi; diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 1fcff031b5e3..9f3b32602ac1 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -728,6 +728,7 @@ enum { #define SNDRV_RAWMIDI_INFO_INPUT 0x00000002 #define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004 #define SNDRV_RAWMIDI_INFO_UMP 0x00000008 +#define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE 0x00000010 #define SNDRV_RAWMIDI_DEVICE_UNKNOWN -1 diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 858878fe487a..8a681bff4f7f 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -629,6 +629,8 @@ static int snd_rawmidi_info(struct snd_rawmidi_substream *substream, info->subdevice = substream->number; info->stream = substream->stream; info->flags = rmidi->info_flags; + if (substream->inactive) + info->flags |= SNDRV_RAWMIDI_INFO_STREAM_INACTIVE; strcpy(info->id, rmidi->id); strcpy(info->name, rmidi->name); strcpy(info->subname, substream->name); diff --git a/sound/core/ump.c b/sound/core/ump.c index 0bfab84eaafb..d6cd11be8750 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1250,8 +1250,8 @@ static int fill_legacy_mapping(struct snd_ump_endpoint *ump) return num; } -static void fill_substream_names(struct snd_ump_endpoint *ump, - struct snd_rawmidi *rmidi, int dir) +static void update_legacy_substreams(struct snd_ump_endpoint *ump, + struct snd_rawmidi *rmidi, int dir) { struct snd_rawmidi_substream *s; const char *name; @@ -1265,6 +1265,7 @@ static void fill_substream_names(struct snd_ump_endpoint *ump, scnprintf(s->name, sizeof(s->name), "Group %d (%.16s)%s", idx + 1, name, ump->groups[idx].active ? "" : " [Inactive]"); + s->inactive = !ump->groups[idx].active; } } @@ -1272,8 +1273,8 @@ static void update_legacy_names(struct snd_ump_endpoint *ump) { struct snd_rawmidi *rmidi = ump->legacy_rmidi; - fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_INPUT); - fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT); + update_legacy_substreams(ump, rmidi, SNDRV_RAWMIDI_STREAM_INPUT); + update_legacy_substreams(ump, rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT); } int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump, -- cgit v1.2.3 From 7bb49d2e8b52adace88249d9dece41e36af3bba0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 10 Jan 2025 16:59:36 +0100 Subject: ALSA: rawmidi: Bump protocol version to 2.0.5 Bump the protocol version to 2.0.5, as we extended the rawmidi ABI for the new tied_device info and the substream inactive flag. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250110155943.31578-4-tiwai@suse.de --- include/uapi/sound/asound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 9f3b32602ac1..000b36c0e2b9 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -716,7 +716,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 5) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, -- cgit v1.2.3 From 3ab4a3199c782eae9b683b952f3ec5a766e4a096 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 10 Jan 2025 16:59:41 +0100 Subject: ALSA: seq: Notify UMP EP and FB changes So far we notify the sequencer client and port changes upon UMP FB changes, but those aren't really corresponding to the UMP updates. e.g. when a FB info gets updated, it's not notified but done only when some of sequencer port attribute is changed. This is no ideal behavior. This patch adds the two new sequencer event types for notifying the UMP EP and FB changes via the announce port. The new event takes snd_seq_ev_ump_notify type data, which is compatible with snd_seq_addr (where the port number is replaced with the block number). The events are sent when the EP and FB info gets updated explicitly via ioctl, or the backend UMP receives the corresponding UMP messages. The sequencer protocol version is bumped to 1.0.5 along with it. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250110155943.31578-9-tiwai@suse.de --- Documentation/sound/designs/midi-2.0.rst | 7 ++++++ include/sound/ump.h | 1 + include/uapi/sound/asequencer.h | 12 +++++++++- sound/core/seq/seq_clientmgr.c | 10 ++++++++ sound/core/seq/seq_system.h | 10 ++++++++ sound/core/seq/seq_ump_client.c | 40 ++++++++++++++++++++++++++++++-- sound/core/ump.c | 23 +++++++++++++++--- 7 files changed, 97 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/sound/designs/midi-2.0.rst b/Documentation/sound/designs/midi-2.0.rst index d6c17a47c832..71a343c93fe7 100644 --- a/Documentation/sound/designs/midi-2.0.rst +++ b/Documentation/sound/designs/midi-2.0.rst @@ -388,6 +388,13 @@ Sequencer API Extensions announcement to the ALSA sequencer system port, similarly like the normal port change notification. +* There are two extended event types for notifying the UMP Endpoint and + Function Block changes via the system announcement port: + type 68 (`SNDRV_SEQ_EVENT_UMP_EP_CHANGE`) and type 69 + (`SNDRV_SEQ_EVENT_UMP_BLOCK_CHANGE`). They take the new type, + `snd_seq_ev_ump_notify` in the payload, indicating the client number + and the FB number that are changed. + MIDI2 USB Gadget Function Driver ================================ diff --git a/include/sound/ump.h b/include/sound/ump.h index 532c2c3ea28e..73f97f88e2ed 100644 --- a/include/sound/ump.h +++ b/include/sound/ump.h @@ -83,6 +83,7 @@ struct snd_ump_ops { struct snd_seq_ump_ops { void (*input_receive)(struct snd_ump_endpoint *ump, const u32 *data, int words); + int (*notify_ep_change)(struct snd_ump_endpoint *ump); int (*notify_fb_change)(struct snd_ump_endpoint *ump, struct snd_ump_block *fb); int (*switch_protocol)(struct snd_ump_endpoint *ump); diff --git a/include/uapi/sound/asequencer.h b/include/uapi/sound/asequencer.h index bc30c1f2a109..a5c41f771e05 100644 --- a/include/uapi/sound/asequencer.h +++ b/include/uapi/sound/asequencer.h @@ -10,7 +10,7 @@ #include /** version of the sequencer */ -#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 4) +#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 5) /** * definition of sequencer event types @@ -92,6 +92,9 @@ #define SNDRV_SEQ_EVENT_PORT_SUBSCRIBED 66 /* ports connected */ #define SNDRV_SEQ_EVENT_PORT_UNSUBSCRIBED 67 /* ports disconnected */ +#define SNDRV_SEQ_EVENT_UMP_EP_CHANGE 68 /* UMP EP info has changed */ +#define SNDRV_SEQ_EVENT_UMP_BLOCK_CHANGE 69 /* UMP block info has changed */ + /* 70-89: synthesizer events - obsoleted */ /** user-defined events with fixed length @@ -253,6 +256,12 @@ struct snd_seq_ev_quote { struct snd_seq_event *event; /* quoted event */ } __packed; + /* UMP info change notify */ +struct snd_seq_ev_ump_notify { + unsigned char client; /**< Client number */ + unsigned char block; /**< Block number (optional) */ +}; + union snd_seq_event_data { /* event data... */ struct snd_seq_ev_note note; struct snd_seq_ev_ctrl control; @@ -265,6 +274,7 @@ union snd_seq_event_data { /* event data... */ struct snd_seq_connect connect; struct snd_seq_result result; struct snd_seq_ev_quote quote; + struct snd_seq_ev_ump_notify ump_notify; }; /* sequencer event */ diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 3d27f777f29e..75783d69e708 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -2230,6 +2230,16 @@ static int snd_seq_ioctl_client_ump_info(struct snd_seq_client *caller, error: mutex_unlock(&cptr->ioctl_mutex); snd_seq_client_unlock(cptr); + if (!err && cmd == SNDRV_SEQ_IOCTL_SET_CLIENT_UMP_INFO) { + if (type == SNDRV_SEQ_CLIENT_UMP_INFO_ENDPOINT) + snd_seq_system_ump_notify(client, 0, + SNDRV_SEQ_EVENT_UMP_EP_CHANGE, + false); + else + snd_seq_system_ump_notify(client, type - 1, + SNDRV_SEQ_EVENT_UMP_BLOCK_CHANGE, + false); + } return err; } #endif diff --git a/sound/core/seq/seq_system.h b/sound/core/seq/seq_system.h index a118f7252b62..62e513f74871 100644 --- a/sound/core/seq/seq_system.h +++ b/sound/core/seq/seq_system.h @@ -16,6 +16,16 @@ void snd_seq_system_broadcast(int client, int port, int type, bool atomic); #define notify_event(client, port, type) \ snd_seq_system_broadcast(client, port, type, false) +/* notify UMP EP/FB change event */ +static inline void snd_seq_system_ump_notify(int client, int block, int type, + bool atomic) +{ + /* reuse the existing snd_seq_system_broadcast(): + * struct snd_seq_ev_ump_notify is compatible with struct snd_seq_addr + */ + snd_seq_system_broadcast(client, block, type, atomic); +} + #define snd_seq_system_client_ev_client_start(client) notify_event(client, 0, SNDRV_SEQ_EVENT_CLIENT_START) #define snd_seq_system_client_ev_client_exit(client) notify_event(client, 0, SNDRV_SEQ_EVENT_CLIENT_EXIT) #define snd_seq_system_client_ev_client_change(client) notify_event(client, 0, SNDRV_SEQ_EVENT_CLIENT_CHANGE) diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index 27c4dd9940ff..1255351b59ce 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -388,6 +388,33 @@ static void handle_group_notify(struct work_struct *work) setup_client_group_filter(client); } +/* UMP EP change notification */ +static int seq_ump_notify_ep_change(struct snd_ump_endpoint *ump) +{ + struct seq_ump_client *client = ump->seq_client; + struct snd_seq_client *cptr; + int client_id; + + if (!client) + return -ENODEV; + client_id = client->seq_client; + cptr = snd_seq_kernel_client_get(client_id); + if (!cptr) + return -ENODEV; + + snd_seq_system_ump_notify(client_id, 0, SNDRV_SEQ_EVENT_UMP_EP_CHANGE, + true); + + /* update sequencer client name if needed */ + if (*ump->core.name && strcmp(ump->core.name, cptr->name)) { + strscpy(cptr->name, ump->core.name, sizeof(cptr->name)); + snd_seq_system_client_ev_client_change(client_id); + } + + snd_seq_kernel_client_put(cptr); + return 0; +} + /* UMP FB change notification */ static int seq_ump_notify_fb_change(struct snd_ump_endpoint *ump, struct snd_ump_block *fb) @@ -397,20 +424,29 @@ static int seq_ump_notify_fb_change(struct snd_ump_endpoint *ump, if (!client) return -ENODEV; schedule_work(&client->group_notify_work); + snd_seq_system_ump_notify(client->seq_client, fb->info.block_id, + SNDRV_SEQ_EVENT_UMP_BLOCK_CHANGE, + true); return 0; } /* UMP protocol change notification; just update the midi_version field */ static int seq_ump_switch_protocol(struct snd_ump_endpoint *ump) { - if (!ump->seq_client) + struct seq_ump_client *client = ump->seq_client; + + if (!client) return -ENODEV; - setup_client_midi_version(ump->seq_client); + setup_client_midi_version(client); + snd_seq_system_ump_notify(client->seq_client, 0, + SNDRV_SEQ_EVENT_UMP_EP_CHANGE, + true); return 0; } static const struct snd_seq_ump_ops seq_ump_ops = { .input_receive = seq_ump_input_receive, + .notify_ep_change = seq_ump_notify_ep_change, .notify_fb_change = seq_ump_notify_fb_change, .switch_protocol = seq_ump_switch_protocol, }; diff --git a/sound/core/ump.c b/sound/core/ump.c index c80a0a8b7ad0..ff3cc2386ece 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -695,6 +695,15 @@ static void choose_default_protocol(struct snd_ump_endpoint *ump) ump->info.protocol |= SNDRV_UMP_EP_INFO_PROTO_MIDI1; } +/* notify the EP info/name change to sequencer */ +static void seq_notify_ep_change(struct snd_ump_endpoint *ump) +{ +#if IS_ENABLED(CONFIG_SND_SEQUENCER) + if (ump->parsed && ump->seq_ops && ump->seq_ops->notify_ep_change) + ump->seq_ops->notify_ep_change(ump); +#endif +} + /* handle EP info stream message; update the UMP attributes */ static int ump_handle_ep_info_msg(struct snd_ump_endpoint *ump, const union snd_ump_stream_msg *buf) @@ -719,6 +728,7 @@ static int ump_handle_ep_info_msg(struct snd_ump_endpoint *ump, ump->info.protocol &= ump->info.protocol_caps; choose_default_protocol(ump); + seq_notify_ep_change(ump); return 1; /* finished */ } @@ -741,6 +751,7 @@ static int ump_handle_device_info_msg(struct snd_ump_endpoint *ump, ump->info.family_id, ump->info.model_id, ump->info.sw_revision); + seq_notify_ep_change(ump); return 1; /* finished */ } @@ -762,6 +773,7 @@ static int ump_handle_ep_name_msg(struct snd_ump_endpoint *ump, if (ret && ump->parsed) { ump_set_rawmidi_name(ump); ump_legacy_set_rawmidi_name(ump); + seq_notify_ep_change(ump); } return ret; @@ -771,9 +783,14 @@ static int ump_handle_ep_name_msg(struct snd_ump_endpoint *ump, static int ump_handle_product_id_msg(struct snd_ump_endpoint *ump, const union snd_ump_stream_msg *buf) { - return ump_append_string(ump, ump->info.product_id, - sizeof(ump->info.product_id), - buf->raw, 2); + int ret; + + ret = ump_append_string(ump, ump->info.product_id, + sizeof(ump->info.product_id), + buf->raw, 2); + if (ret) + seq_notify_ep_change(ump); + return ret; } /* notify the protocol change to sequencer */ -- cgit v1.2.3 From 658eb5ab916ddc92f294dbce8e3d449470be9f86 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Tue, 3 Dec 2024 16:48:48 +0800 Subject: delayacct: add delay max to record delay peak Introduce the use cases of delay max, which can help quickly detect potential abnormal delays in the system and record the types and specific details of delay spikes. Problem ======== Delay accounting can track the average delay of processes to show system workload. However, when a process experiences a significant delay, maybe a delay spike, which adversely affects performance, getdelays can only display the average system delay over a period of time. Yet, average delay is unhelpful for diagnosing delay peak. It is not even possible to determine which type of delay has spiked, as this information might be masked by the average delay. Solution ========= the 'delay max' can display delay peak since the system's startup, which can record potential abnormal delays over time, including the type of delay and the maximum delay. This is helpful for quickly identifying crash caused by delay. Use case ========= bash# ./getdelays -d -p 244 print delayacct stats ON PID 244 CPU count real total virtual total delay total delay average delay max 68 192000000 213676651 705643 0.010ms 0.306381ms IO count delay total delay average delay max 0 0 0.000ms 0.000000ms SWAP count delay total delay average delay max 0 0 0.000ms 0.000000ms RECLAIM count delay total delay average delay max 0 0 0.000ms 0.000000ms THRASHING count delay total delay average delay max 0 0 0.000ms 0.000000ms COMPACT count delay total delay average delay max 0 0 0.000ms 0.000000ms WPCOPY count delay total delay average delay max 235 15648284 0.067ms 0.263842ms IRQ count delay total delay average delay max 0 0 0.000ms 0.000000ms [wang.yaxin@zte.com.cn: update docs and fix some spelling errors] Link: https://lkml.kernel.org/r/20241213192700771XKZ8H30OtHSeziGqRVMs0@zte.com.cn Link: https://lkml.kernel.org/r/20241203164848805CS62CQPQWG9GLdQj2_BxS@zte.com.cn Co-developed-by: Wang Yong Signed-off-by: Wang Yong Co-developed-by: xu xin Signed-off-by: xu xin Co-developed-by: Wang Yaxin Signed-off-by: Wang Yaxin Signed-off-by: Kun Jiang Cc: Balbir Singh Cc: David Hildenbrand Cc: Fan Yu Cc: Peilin He Cc: tuqiang Cc: Yang Yang Cc: ye xingchen Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 42 +++++++++---------- include/linux/delayacct.h | 7 ++++ include/linux/sched.h | 3 ++ include/uapi/linux/taskstats.h | 9 ++++ kernel/delayacct.c | 37 ++++++++++++----- kernel/sched/stats.h | 5 ++- tools/accounting/getdelays.c | 59 +++++++++++++++------------ 7 files changed, 105 insertions(+), 57 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index f61c01fc376e..8a0277428ccf 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -100,29 +100,29 @@ Get delays, since system boot, for pid 10:: # ./getdelays -d -p 10 (output similar to next case) -Get sum of delays, since system boot, for all pids with tgid 5:: +Get sum and peak of delays, since system boot, for all pids with tgid 242:: - # ./getdelays -d -t 5 + bash-4.4# ./getdelays -d -t 242 print delayacct stats ON - TGID 5 - - - CPU count real total virtual total delay total delay average - 8 7000000 6872122 3382277 0.423ms - IO count delay total delay average - 0 0 0.000ms - SWAP count delay total delay average - 0 0 0.000ms - RECLAIM count delay total delay average - 0 0 0.000ms - THRASHING count delay total delay average - 0 0 0.000ms - COMPACT count delay total delay average - 0 0 0.000ms - WPCOPY count delay total delay average - 0 0 0.000ms - IRQ count delay total delay average - 0 0 0.000ms + TGID 242 + + + CPU count real total virtual total delay total delay average delay max + 239 296000000 307724885 1127792 0.005ms 0.238382ms + IO count delay total delay average delay max + 0 0 0.000ms 0.000000ms + SWAP count delay total delay average delay max + 0 0 0.000ms 0.000000ms + RECLAIM count delay total delay average delay max + 0 0 0.000ms 0.000000ms + THRASHING count delay total delay average delay max + 0 0 0.000ms 0.000000ms + COMPACT count delay total delay average delay max + 0 0 0.000ms 0.000000ms + WPCOPY count delay total delay average delay max + 230 19100476 0.083ms 0.383822ms + IRQ count delay total delay average delay max + 0 0 0.000ms 0.000000ms Get IO accounting for pid 1, it works only with -p:: diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 6639f48dac36..56fbfa2c2ac5 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -29,25 +29,32 @@ struct task_delay_info { * XXX_delay contains the accumulated delay time in nanoseconds. */ u64 blkio_start; + u64 blkio_delay_max; u64 blkio_delay; /* wait for sync block io completion */ u64 swapin_start; + u64 swapin_delay_max; u64 swapin_delay; /* wait for swapin */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ u32 swapin_count; /* total count of swapin */ u64 freepages_start; + u64 freepages_delay_max; u64 freepages_delay; /* wait for memory reclaim */ u64 thrashing_start; + u64 thrashing_delay_max; u64 thrashing_delay; /* wait for thrashing page */ u64 compact_start; + u64 compact_delay_max; u64 compact_delay; /* wait for memory compact */ u64 wpcopy_start; + u64 wpcopy_delay_max; u64 wpcopy_delay; /* wait for write-protect copy */ + u64 irq_delay_max; u64 irq_delay; /* wait for IRQ/SOFTIRQ */ u32 freepages_count; /* total count of memory reclaim */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 64934e0830af..a0ae3923b41d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -398,6 +398,9 @@ struct sched_info { /* Time spent waiting on a runqueue: */ unsigned long long run_delay; + /* Max time spent waiting on a runqueue: */ + unsigned long long max_run_delay; + /* Timestamps: */ /* When did we last run on a CPU? */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index b50b2eb257a0..e0d1c6fc9f3b 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -72,6 +72,7 @@ struct taskstats { */ __u64 cpu_count __attribute__((aligned(8))); __u64 cpu_delay_total; + __u64 cpu_delay_max; /* Following four fields atomically updated using task->delays->lock */ @@ -80,10 +81,12 @@ struct taskstats { */ __u64 blkio_count; __u64 blkio_delay_total; + __u64 blkio_delay_max; /* Delay waiting for page fault I/O (swap in only) */ __u64 swapin_count; __u64 swapin_delay_total; + __u64 swapin_delay_max; /* cpu "wall-clock" running time * On some architectures, value will adjust for cpu time stolen @@ -166,10 +169,12 @@ struct taskstats { /* Delay waiting for memory reclaim */ __u64 freepages_count; __u64 freepages_delay_total; + __u64 freepages_delay_max; /* Delay waiting for thrashing page */ __u64 thrashing_count; __u64 thrashing_delay_total; + __u64 thrashing_delay_max; /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ @@ -177,6 +182,7 @@ struct taskstats { /* v11: Delay waiting for memory compact */ __u64 compact_count; __u64 compact_delay_total; + __u64 compact_delay_max; /* v12 begin */ __u32 ac_tgid; /* thread group ID */ @@ -198,10 +204,13 @@ struct taskstats { /* v13: Delay waiting for write-protect copy */ __u64 wpcopy_count; __u64 wpcopy_delay_total; + __u64 wpcopy_delay_max; /* v14: Delay waiting for IRQ/SOFTIRQ */ __u64 irq_count; __u64 irq_delay_total; + __u64 irq_delay_max; + /* v15: add Delay max */ }; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index dead51de8eb5..23212a0c88e4 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -93,9 +93,9 @@ void __delayacct_tsk_init(struct task_struct *tsk) /* * Finish delay accounting for a statistic using its timestamps (@start), - * accumalator (@total) and @count + * accumulator (@total) and @count */ -static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count) +static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max) { s64 ns = local_clock() - *start; unsigned long flags; @@ -104,6 +104,8 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou raw_spin_lock_irqsave(lock, flags); *total += ns; (*count)++; + if (ns > *max) + *max = ns; raw_spin_unlock_irqrestore(lock, flags); } } @@ -122,7 +124,8 @@ void __delayacct_blkio_end(struct task_struct *p) delayacct_end(&p->delays->lock, &p->delays->blkio_start, &p->delays->blkio_delay, - &p->delays->blkio_count); + &p->delays->blkio_count, + &p->delays->blkio_delay_max); } int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) @@ -153,10 +156,11 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->cpu_count += t1; + d->cpu_delay_max = tsk->sched_info.max_run_delay; tmp = (s64)d->cpu_delay_total + t2; d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; - tmp = (s64)d->cpu_run_virtual_total + t3; + d->cpu_run_virtual_total = (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; @@ -164,20 +168,26 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) return 0; /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ - raw_spin_lock_irqsave(&tsk->delays->lock, flags); + d->blkio_delay_max = tsk->delays->blkio_delay_max; tmp = d->blkio_delay_total + tsk->delays->blkio_delay; d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; + d->swapin_delay_max = tsk->delays->swapin_delay_max; tmp = d->swapin_delay_total + tsk->delays->swapin_delay; d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; + d->freepages_delay_max = tsk->delays->freepages_delay_max; tmp = d->freepages_delay_total + tsk->delays->freepages_delay; d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; + d->thrashing_delay_max = tsk->delays->thrashing_delay_max; tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; + d->compact_delay_max = tsk->delays->compact_delay_max; tmp = d->compact_delay_total + tsk->delays->compact_delay; d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; + d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max; tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay; d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp; + d->irq_delay_max = tsk->delays->irq_delay_max; tmp = d->irq_delay_total + tsk->delays->irq_delay; d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; @@ -213,7 +223,8 @@ void __delayacct_freepages_end(void) delayacct_end(¤t->delays->lock, ¤t->delays->freepages_start, ¤t->delays->freepages_delay, - ¤t->delays->freepages_count); + ¤t->delays->freepages_count, + ¤t->delays->freepages_delay_max); } void __delayacct_thrashing_start(bool *in_thrashing) @@ -235,7 +246,8 @@ void __delayacct_thrashing_end(bool *in_thrashing) delayacct_end(¤t->delays->lock, ¤t->delays->thrashing_start, ¤t->delays->thrashing_delay, - ¤t->delays->thrashing_count); + ¤t->delays->thrashing_count, + ¤t->delays->thrashing_delay_max); } void __delayacct_swapin_start(void) @@ -248,7 +260,8 @@ void __delayacct_swapin_end(void) delayacct_end(¤t->delays->lock, ¤t->delays->swapin_start, ¤t->delays->swapin_delay, - ¤t->delays->swapin_count); + ¤t->delays->swapin_count, + ¤t->delays->swapin_delay_max); } void __delayacct_compact_start(void) @@ -261,7 +274,8 @@ void __delayacct_compact_end(void) delayacct_end(¤t->delays->lock, ¤t->delays->compact_start, ¤t->delays->compact_delay, - ¤t->delays->compact_count); + ¤t->delays->compact_count, + ¤t->delays->compact_delay_max); } void __delayacct_wpcopy_start(void) @@ -274,7 +288,8 @@ void __delayacct_wpcopy_end(void) delayacct_end(¤t->delays->lock, ¤t->delays->wpcopy_start, ¤t->delays->wpcopy_delay, - ¤t->delays->wpcopy_count); + ¤t->delays->wpcopy_count, + ¤t->delays->wpcopy_delay_max); } void __delayacct_irq(struct task_struct *task, u32 delta) @@ -284,6 +299,8 @@ void __delayacct_irq(struct task_struct *task, u32 delta) raw_spin_lock_irqsave(&task->delays->lock, flags); task->delays->irq_delay += delta; task->delays->irq_count++; + if (delta > task->delays->irq_delay_max) + task->delays->irq_delay_max = delta; raw_spin_unlock_irqrestore(&task->delays->lock, flags); } diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 8ee0add5a48a..ed72435aef51 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -244,7 +244,8 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) delta = rq_clock(rq) - t->sched_info.last_queued; t->sched_info.last_queued = 0; t->sched_info.run_delay += delta; - + if (delta > t->sched_info.max_run_delay) + t->sched_info.max_run_delay = delta; rq_sched_info_dequeue(rq, delta); } @@ -266,6 +267,8 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) t->sched_info.run_delay += delta; t->sched_info.last_arrival = now; t->sched_info.pcount++; + if (delta > t->sched_info.max_run_delay) + t->sched_info.max_run_delay = delta; rq_sched_info_arrive(rq, delta); } diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 1334214546d7..e570bcad185d 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -192,60 +192,69 @@ static int get_family_id(int sd) } #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) +#define delay_max_ms(t) (t / 1000000ULL) static void print_delayacct(struct taskstats *t) { - printf("\n\nCPU %15s%15s%15s%15s%15s\n" - " %15llu%15llu%15llu%15llu%15.3fms\n" - "IO %15s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "SWAP %15s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "RECLAIM %12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "THRASHING%12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "COMPACT %12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "WPCOPY %12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "IRQ %15s%15s%15s\n" - " %15llu%15llu%15.3fms\n", + printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n" + " %15llu%15llu%15llu%15llu%15.3fms%13.6fms\n" + "IO %15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "SWAP %15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "RECLAIM %12s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "THRASHING%12s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "COMPACT %12s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "WPCOPY %12s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "IRQ %15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n", "count", "real total", "virtual total", - "delay total", "delay average", + "delay total", "delay average", "delay max", (unsigned long long)t->cpu_count, (unsigned long long)t->cpu_run_real_total, (unsigned long long)t->cpu_run_virtual_total, (unsigned long long)t->cpu_delay_total, average_ms((double)t->cpu_delay_total, t->cpu_count), - "count", "delay total", "delay average", + delay_max_ms((double)t->cpu_delay_max), + "count", "delay total", "delay average", "delay max", (unsigned long long)t->blkio_count, (unsigned long long)t->blkio_delay_total, average_ms((double)t->blkio_delay_total, t->blkio_count), - "count", "delay total", "delay average", + delay_max_ms((double)t->blkio_delay_max), + "count", "delay total", "delay average", "delay max", (unsigned long long)t->swapin_count, (unsigned long long)t->swapin_delay_total, average_ms((double)t->swapin_delay_total, t->swapin_count), - "count", "delay total", "delay average", + delay_max_ms((double)t->swapin_delay_max), + "count", "delay total", "delay average", "delay max", (unsigned long long)t->freepages_count, (unsigned long long)t->freepages_delay_total, average_ms((double)t->freepages_delay_total, t->freepages_count), - "count", "delay total", "delay average", + delay_max_ms((double)t->freepages_delay_max), + "count", "delay total", "delay average", "delay max", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, average_ms((double)t->thrashing_delay_total, t->thrashing_count), - "count", "delay total", "delay average", + delay_max_ms((double)t->thrashing_delay_max), + "count", "delay total", "delay average", "delay max", (unsigned long long)t->compact_count, (unsigned long long)t->compact_delay_total, average_ms((double)t->compact_delay_total, t->compact_count), - "count", "delay total", "delay average", + delay_max_ms((double)t->compact_delay_max), + "count", "delay total", "delay average", "delay max", (unsigned long long)t->wpcopy_count, (unsigned long long)t->wpcopy_delay_total, average_ms((double)t->wpcopy_delay_total, t->wpcopy_count), - "count", "delay total", "delay average", + delay_max_ms((double)t->wpcopy_delay_max), + "count", "delay total", "delay average", "delay max", (unsigned long long)t->irq_count, (unsigned long long)t->irq_delay_total, - average_ms((double)t->irq_delay_total, t->irq_count)); + average_ms((double)t->irq_delay_total, t->irq_count), + delay_max_ms((double)t->irq_delay_max)); } static void task_context_switch_counts(struct taskstats *t) -- cgit v1.2.3 From f65c64f311ee2f1ddc1eb395ed8b20e6b9d14e85 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Fri, 20 Dec 2024 17:31:05 +0800 Subject: delayacct: add delay min to record delay peak Delay accounting can now calculate the average delay of processes, detect the overall system load, and also record the 'delay max' to identify potential abnormal delays. However, 'delay min' can help us identify another useful delay peak. By comparing the difference between 'delay max' and 'delay min', we can understand the optimization space for latency, providing a reference for the optimization of latency performance. Use case ========= bash-4.4# ./getdelays -d -t 242 print delayacct stats ON TGID 242 CPU count real total virtual total delay total delay average delay max delay min 39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms IO count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms SWAP count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms RECLAIM count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms THRASHING count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms COMPACT count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms WPCOPY count delay total delay average delay max delay min 156 11215873 0.072ms 0.207403ms 0.033913ms IRQ count delay total delay average delay max delay min 0 0 0.000ms 0.000000ms 0.000000ms Link: https://lkml.kernel.org/r/20241220173105906EOdsPhzjMLYNJJBqgz1ga@zte.com.cn Co-developed-by: Wang Yong Signed-off-by: Wang Yong Co-developed-by: xu xin Signed-off-by: xu xin Signed-off-by: Wang Yaxin Co-developed-by: Kun Jiang Signed-off-by: Kun Jiang Cc: Balbir Singh Cc: David Hildenbrand Cc: Fan Yu Cc: Peilin He Cc: tuqiang Cc: ye xingchen Cc: Yunkai Zhang Signed-off-by: Andrew Morton --- Documentation/accounting/delay-accounting.rst | 32 ++++++++++---------- include/linux/delayacct.h | 7 +++++ include/linux/sched.h | 3 ++ include/uapi/linux/taskstats.h | 8 +++++ kernel/delayacct.c | 32 +++++++++++++++----- kernel/sched/stats.h | 4 +++ tools/accounting/getdelays.c | 42 ++++++++++++++++----------- 7 files changed, 88 insertions(+), 40 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 8a0277428ccf..210c194d4a7b 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -107,22 +107,22 @@ Get sum and peak of delays, since system boot, for all pids with tgid 242:: TGID 242 - CPU count real total virtual total delay total delay average delay max - 239 296000000 307724885 1127792 0.005ms 0.238382ms - IO count delay total delay average delay max - 0 0 0.000ms 0.000000ms - SWAP count delay total delay average delay max - 0 0 0.000ms 0.000000ms - RECLAIM count delay total delay average delay max - 0 0 0.000ms 0.000000ms - THRASHING count delay total delay average delay max - 0 0 0.000ms 0.000000ms - COMPACT count delay total delay average delay max - 0 0 0.000ms 0.000000ms - WPCOPY count delay total delay average delay max - 230 19100476 0.083ms 0.383822ms - IRQ count delay total delay average delay max - 0 0 0.000ms 0.000000ms + CPU count real total virtual total delay total delay average delay max delay min + 39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms + IO count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + SWAP count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + RECLAIM count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + THRASHING count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + COMPACT count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms + WPCOPY count delay total delay average delay max delay min + 156 11215873 0.072ms 0.207403ms 0.033913ms + IRQ count delay total delay average delay max delay min + 0 0 0.000ms 0.000000ms 0.000000ms Get IO accounting for pid 1, it works only with -p:: diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 56fbfa2c2ac5..800dcc360db2 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -30,9 +30,11 @@ struct task_delay_info { */ u64 blkio_start; u64 blkio_delay_max; + u64 blkio_delay_min; u64 blkio_delay; /* wait for sync block io completion */ u64 swapin_start; u64 swapin_delay_max; + u64 swapin_delay_min; u64 swapin_delay; /* wait for swapin */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ @@ -40,21 +42,26 @@ struct task_delay_info { u64 freepages_start; u64 freepages_delay_max; + u64 freepages_delay_min; u64 freepages_delay; /* wait for memory reclaim */ u64 thrashing_start; u64 thrashing_delay_max; + u64 thrashing_delay_min; u64 thrashing_delay; /* wait for thrashing page */ u64 compact_start; u64 compact_delay_max; + u64 compact_delay_min; u64 compact_delay; /* wait for memory compact */ u64 wpcopy_start; u64 wpcopy_delay_max; + u64 wpcopy_delay_min; u64 wpcopy_delay; /* wait for write-protect copy */ u64 irq_delay_max; + u64 irq_delay_min; u64 irq_delay; /* wait for IRQ/SOFTIRQ */ u32 freepages_count; /* total count of memory reclaim */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a0ae3923b41d..155012467b21 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -401,6 +401,9 @@ struct sched_info { /* Max time spent waiting on a runqueue: */ unsigned long long max_run_delay; + /* Min time spent waiting on a runqueue: */ + unsigned long long min_run_delay; + /* Timestamps: */ /* When did we last run on a CPU? */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index e0d1c6fc9f3b..934e20ef7f79 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -73,6 +73,7 @@ struct taskstats { __u64 cpu_count __attribute__((aligned(8))); __u64 cpu_delay_total; __u64 cpu_delay_max; + __u64 cpu_delay_min; /* Following four fields atomically updated using task->delays->lock */ @@ -82,11 +83,13 @@ struct taskstats { __u64 blkio_count; __u64 blkio_delay_total; __u64 blkio_delay_max; + __u64 blkio_delay_min; /* Delay waiting for page fault I/O (swap in only) */ __u64 swapin_count; __u64 swapin_delay_total; __u64 swapin_delay_max; + __u64 swapin_delay_min; /* cpu "wall-clock" running time * On some architectures, value will adjust for cpu time stolen @@ -170,11 +173,13 @@ struct taskstats { __u64 freepages_count; __u64 freepages_delay_total; __u64 freepages_delay_max; + __u64 freepages_delay_min; /* Delay waiting for thrashing page */ __u64 thrashing_count; __u64 thrashing_delay_total; __u64 thrashing_delay_max; + __u64 thrashing_delay_min; /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ @@ -183,6 +188,7 @@ struct taskstats { __u64 compact_count; __u64 compact_delay_total; __u64 compact_delay_max; + __u64 compact_delay_min; /* v12 begin */ __u32 ac_tgid; /* thread group ID */ @@ -205,11 +211,13 @@ struct taskstats { __u64 wpcopy_count; __u64 wpcopy_delay_total; __u64 wpcopy_delay_max; + __u64 wpcopy_delay_min; /* v14: Delay waiting for IRQ/SOFTIRQ */ __u64 irq_count; __u64 irq_delay_total; __u64 irq_delay_max; + __u64 irq_delay_min; /* v15: add Delay max */ }; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 23212a0c88e4..b238eb8c6573 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -95,7 +95,7 @@ void __delayacct_tsk_init(struct task_struct *tsk) * Finish delay accounting for a statistic using its timestamps (@start), * accumulator (@total) and @count */ -static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max) +static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min) { s64 ns = local_clock() - *start; unsigned long flags; @@ -106,6 +106,8 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou (*count)++; if (ns > *max) *max = ns; + if (*min == 0 || ns < *min) + *min = ns; raw_spin_unlock_irqrestore(lock, flags); } } @@ -125,7 +127,8 @@ void __delayacct_blkio_end(struct task_struct *p) &p->delays->blkio_start, &p->delays->blkio_delay, &p->delays->blkio_count, - &p->delays->blkio_delay_max); + &p->delays->blkio_delay_max, + &p->delays->blkio_delay_min); } int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) @@ -157,6 +160,7 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->cpu_count += t1; d->cpu_delay_max = tsk->sched_info.max_run_delay; + d->cpu_delay_min = tsk->sched_info.min_run_delay; tmp = (s64)d->cpu_delay_total + t2; d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; tmp = (s64)d->cpu_run_virtual_total + t3; @@ -170,24 +174,31 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ raw_spin_lock_irqsave(&tsk->delays->lock, flags); d->blkio_delay_max = tsk->delays->blkio_delay_max; + d->blkio_delay_min = tsk->delays->blkio_delay_min; tmp = d->blkio_delay_total + tsk->delays->blkio_delay; d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; d->swapin_delay_max = tsk->delays->swapin_delay_max; + d->swapin_delay_min = tsk->delays->swapin_delay_min; tmp = d->swapin_delay_total + tsk->delays->swapin_delay; d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; d->freepages_delay_max = tsk->delays->freepages_delay_max; + d->freepages_delay_min = tsk->delays->freepages_delay_min; tmp = d->freepages_delay_total + tsk->delays->freepages_delay; d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; d->thrashing_delay_max = tsk->delays->thrashing_delay_max; + d->thrashing_delay_min = tsk->delays->thrashing_delay_min; tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; d->compact_delay_max = tsk->delays->compact_delay_max; + d->compact_delay_min = tsk->delays->compact_delay_min; tmp = d->compact_delay_total + tsk->delays->compact_delay; d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max; + d->wpcopy_delay_min = tsk->delays->wpcopy_delay_min; tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay; d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp; d->irq_delay_max = tsk->delays->irq_delay_max; + d->irq_delay_min = tsk->delays->irq_delay_min; tmp = d->irq_delay_total + tsk->delays->irq_delay; d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; @@ -224,7 +235,8 @@ void __delayacct_freepages_end(void) ¤t->delays->freepages_start, ¤t->delays->freepages_delay, ¤t->delays->freepages_count, - ¤t->delays->freepages_delay_max); + ¤t->delays->freepages_delay_max, + ¤t->delays->freepages_delay_min); } void __delayacct_thrashing_start(bool *in_thrashing) @@ -247,7 +259,8 @@ void __delayacct_thrashing_end(bool *in_thrashing) ¤t->delays->thrashing_start, ¤t->delays->thrashing_delay, ¤t->delays->thrashing_count, - ¤t->delays->thrashing_delay_max); + ¤t->delays->thrashing_delay_max, + ¤t->delays->thrashing_delay_min); } void __delayacct_swapin_start(void) @@ -261,7 +274,8 @@ void __delayacct_swapin_end(void) ¤t->delays->swapin_start, ¤t->delays->swapin_delay, ¤t->delays->swapin_count, - ¤t->delays->swapin_delay_max); + ¤t->delays->swapin_delay_max, + ¤t->delays->swapin_delay_min); } void __delayacct_compact_start(void) @@ -275,7 +289,8 @@ void __delayacct_compact_end(void) ¤t->delays->compact_start, ¤t->delays->compact_delay, ¤t->delays->compact_count, - ¤t->delays->compact_delay_max); + ¤t->delays->compact_delay_max, + ¤t->delays->compact_delay_min); } void __delayacct_wpcopy_start(void) @@ -289,7 +304,8 @@ void __delayacct_wpcopy_end(void) ¤t->delays->wpcopy_start, ¤t->delays->wpcopy_delay, ¤t->delays->wpcopy_count, - ¤t->delays->wpcopy_delay_max); + ¤t->delays->wpcopy_delay_max, + ¤t->delays->wpcopy_delay_min); } void __delayacct_irq(struct task_struct *task, u32 delta) @@ -301,6 +317,8 @@ void __delayacct_irq(struct task_struct *task, u32 delta) task->delays->irq_count++; if (delta > task->delays->irq_delay_max) task->delays->irq_delay_max = delta; + if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min)) + task->delays->irq_delay_min = delta; raw_spin_unlock_irqrestore(&task->delays->lock, flags); } diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index ed72435aef51..693537b908a1 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -246,6 +246,8 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) t->sched_info.run_delay += delta; if (delta > t->sched_info.max_run_delay) t->sched_info.max_run_delay = delta; + if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay)) + t->sched_info.min_run_delay = delta; rq_sched_info_dequeue(rq, delta); } @@ -269,6 +271,8 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) t->sched_info.pcount++; if (delta > t->sched_info.max_run_delay) t->sched_info.max_run_delay = delta; + if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay)) + t->sched_info.min_run_delay = delta; rq_sched_info_arrive(rq, delta); } diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index e570bcad185d..100ad3dc091a 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -192,7 +192,7 @@ static int get_family_id(int sd) } #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) -#define delay_max_ms(t) (t / 1000000ULL) +#define delay_ms(t) (t / 1000000ULL) static void print_delayacct(struct taskstats *t) { @@ -213,48 +213,56 @@ static void print_delayacct(struct taskstats *t) "IRQ %15s%15s%15s%15s\n" " %15llu%15llu%15.3fms%13.6fms\n", "count", "real total", "virtual total", - "delay total", "delay average", "delay max", + "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->cpu_count, (unsigned long long)t->cpu_run_real_total, (unsigned long long)t->cpu_run_virtual_total, (unsigned long long)t->cpu_delay_total, average_ms((double)t->cpu_delay_total, t->cpu_count), - delay_max_ms((double)t->cpu_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->cpu_delay_max), + delay_ms((double)t->cpu_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->blkio_count, (unsigned long long)t->blkio_delay_total, average_ms((double)t->blkio_delay_total, t->blkio_count), - delay_max_ms((double)t->blkio_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->blkio_delay_max), + delay_ms((double)t->blkio_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->swapin_count, (unsigned long long)t->swapin_delay_total, average_ms((double)t->swapin_delay_total, t->swapin_count), - delay_max_ms((double)t->swapin_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->swapin_delay_max), + delay_ms((double)t->swapin_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->freepages_count, (unsigned long long)t->freepages_delay_total, average_ms((double)t->freepages_delay_total, t->freepages_count), - delay_max_ms((double)t->freepages_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->freepages_delay_max), + delay_ms((double)t->freepages_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, average_ms((double)t->thrashing_delay_total, t->thrashing_count), - delay_max_ms((double)t->thrashing_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->thrashing_delay_max), + delay_ms((double)t->thrashing_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->compact_count, (unsigned long long)t->compact_delay_total, average_ms((double)t->compact_delay_total, t->compact_count), - delay_max_ms((double)t->compact_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->compact_delay_max), + delay_ms((double)t->compact_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->wpcopy_count, (unsigned long long)t->wpcopy_delay_total, average_ms((double)t->wpcopy_delay_total, t->wpcopy_count), - delay_max_ms((double)t->wpcopy_delay_max), - "count", "delay total", "delay average", "delay max", + delay_ms((double)t->wpcopy_delay_max), + delay_ms((double)t->wpcopy_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->irq_count, (unsigned long long)t->irq_delay_total, average_ms((double)t->irq_delay_total, t->irq_count), - delay_max_ms((double)t->irq_delay_max)); + delay_ms((double)t->irq_delay_max), + delay_ms((double)t->irq_delay_min)); } static void task_context_switch_counts(struct taskstats *t) -- cgit v1.2.3 From f6d2e5abf154da59ccb3bcac23438f2230c8948a Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 1 Jan 2025 07:05:35 +0200 Subject: wifi: nl80211: permit userspace to pass supported selectors Currently the SAE_H2E selector already exists, which needs to be implemented by the SME. As new such selectors might be added in the future, add a feature to permit userspace to report a selector as supported. If not given, the kernel should assume that userspace does support SAE_H2E. Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250101070249.fe67b871cc39.Ieb98390328927e998e612345a58b6dbc00b0e3a2@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ include/uapi/linux/nl80211.h | 9 +++++++++ net/wireless/nl80211.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) (limited to 'include/uapi') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7790af534b7f..8a38d66be2c9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3023,6 +3023,10 @@ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id) * * @bss: The BSS to authenticate with, the callee must obtain a reference * to it if it needs to keep it. + * @supported_selectors: List of selectors that should be assumed to be + * supported by the station. + * SAE_H2E must be assumed supported if set to %NULL. + * @supported_selectors_len: Length of supported_selectors in octets. * @auth_type: Authentication type (algorithm) * @ie: Extra IEs to add to Authentication frame or %NULL * @ie_len: Length of ie buffer in octets @@ -3045,6 +3049,8 @@ struct cfg80211_auth_request { struct cfg80211_bss *bss; const u8 *ie; size_t ie_len; + const u8 *supported_selectors; + u8 supported_selectors_len; enum nl80211_auth_type auth_type; const u8 *key; u8 key_len; @@ -3124,6 +3130,10 @@ enum cfg80211_assoc_req_flags { * included in the Current AP address field of the Reassociation Request * frame. * @flags: See &enum cfg80211_assoc_req_flags + * @supported_selectors: supported selectors in IEEE 802.11 format + * (or %NULL for no change). + * If %NULL, then support for SAE_H2E should be assumed. + * @supported_selectors_len: Length of supported_selectors in octets. * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. * @ht_capa_mask: The bits of ht_capa which are to be used. @@ -3150,6 +3160,8 @@ struct cfg80211_assoc_request { struct cfg80211_crypto_settings crypto; bool use_mfp; u32 flags; + const u8 *supported_selectors; + u8 supported_selectors_len; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; struct ieee80211_vht_cap vht_capa, vht_capa_mask; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6d11437596b9..ee1cf19803ea 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2871,6 +2871,12 @@ enum nl80211_commands { * @NL80211_ATTR_VIF_RADIO_MASK: Bitmask of allowed radios (u32). * A value of 0 means all radios. * + * @NL80211_ATTR_SUPPORTED_SELECTORS: supported selectors, array of + * supported selectors as defined by IEEE 802.11 7.3.2.2 but without the + * length restriction (at most %NL80211_MAX_SUPP_SELECTORS). + * This can be used to provide a list of selectors that are implemented + * by the supplicant. If not given, support for SAE_H2E is assumed. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3421,6 +3427,8 @@ enum nl80211_attrs { NL80211_ATTR_VIF_RADIO_MASK, + NL80211_ATTR_SUPPORTED_SELECTORS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3465,6 +3473,7 @@ enum nl80211_attrs { #define NL80211_WIPHY_NAME_MAXLEN 64 #define NL80211_MAX_SUPP_RATES 32 +#define NL80211_MAX_SUPP_SELECTORS 128 #define NL80211_MAX_SUPP_HT_RATES 77 #define NL80211_MAX_SUPP_REG_RULES 128 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8789d8b73f0f..39c114265db8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -294,6 +294,21 @@ static int validate_he_capa(const struct nlattr *attr, return 0; } +static int validate_supported_selectors(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const u8 *supported_selectors = nla_data(attr); + u8 supported_selectors_len = nla_len(attr); + + /* The top bit must not be set as it is not part of the selector */ + for (int i = 0; i < supported_selectors_len; i++) { + if (supported_selectors[i] & 0x80) + return -EINVAL; + } + + return 0; +} + /* policy for the attributes */ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR]; @@ -830,6 +845,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), [NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG }, [NL80211_ATTR_VIF_RADIO_MASK] = { .type = NLA_U32 }, + [NL80211_ATTR_SUPPORTED_SELECTORS] = + NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_supported_selectors, + NL80211_MAX_SUPP_SELECTORS), }; /* policy for the key attributes */ @@ -10858,6 +10876,13 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } + if (info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]) { + req.supported_selectors = + nla_data(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]); + req.supported_selectors_len = + nla_len(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]); + } + auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE)) return -EINVAL; @@ -11140,6 +11165,13 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_PREV_BSSID]) req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); + if (info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]) { + req.supported_selectors = + nla_data(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]); + req.supported_selectors_len = + nla_len(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]); + } + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) req.flags |= ASSOC_REQ_DISABLE_HT; -- cgit v1.2.3 From 65c1c041798484da54cbad5fb5833b81694c43cf Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 2 Jan 2025 16:19:55 +0200 Subject: wifi: cfg80211: Add support for dynamic addition/removal of links Add support for requesting dynamic addition/removal of links to the current MLO association. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250102161730.cef23352f2a2.I79c849974c494cb1cbf9e1b22a5d2d37395ff5ac@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 45 ++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 10 ++++++ net/wireless/core.h | 4 +++ net/wireless/mlme.c | 77 ++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 84 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 3 ++ net/wireless/rdev-ops.h | 19 ++++++++++ net/wireless/trace.h | 44 +++++++++++++++++++++++ 8 files changed, 286 insertions(+) (limited to 'include/uapi') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8a38d66be2c9..d4b4cabac029 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4596,6 +4596,15 @@ struct mgmt_frame_regs { * @set_ttlm: set the TID to link mapping. * @get_radio_mask: get bitmask of radios in use. * (invoked with the wiphy mutex held) + * @assoc_ml_reconf: Request a non-AP MLO connection to perform ML + * reconfiguration, i.e., add and/or remove links to/from the + * association using ML reconfiguration action frames. Successfully added + * links will be added to the set of valid links. Successfully removed + * links will be removed from the set of valid links. The driver must + * indicate removed links by calling cfg80211_links_removed() and added + * links by calling cfg80211_mlo_reconf_add_done(). When calling + * cfg80211_mlo_reconf_add_done() the bss pointer must be given for each + * link for which MLO reconfiguration 'add' operation was requested. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4959,6 +4968,9 @@ struct cfg80211_ops { int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ttlm_params *params); u32 (*get_radio_mask)(struct wiphy *wiphy, struct net_device *dev); + int (*assoc_ml_reconf)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_assoc_link *add_links, + u16 rem_links); }; /* @@ -9716,6 +9728,39 @@ static inline int cfg80211_color_change_notify(struct net_device *dev, */ void cfg80211_links_removed(struct net_device *dev, u16 link_mask); +/** + * struct cfg80211_mlo_reconf_done_data - MLO reconfiguration data + * @buf: MLO Reconfiguration Response frame (header + body) + * @len: length of the frame data + * @added_links: BIT mask of links successfully added to the association + * @links: per-link information indexed by link ID + * @links.bss: the BSS that MLO reconfiguration was requested for, ownership of + * the pointer moves to cfg80211 in the call to + * cfg80211_mlo_reconf_add_done(). + * + * The BSS pointer must be set for each link for which 'add' operation was + * requested in the assoc_ml_reconf callback. + */ +struct cfg80211_mlo_reconf_done_data { + const u8 *buf; + size_t len; + u16 added_links; + struct { + struct cfg80211_bss *bss; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; +}; + +/** + * cfg80211_mlo_reconf_add_done - Notify about MLO reconfiguration result + * @dev: network device. + * @data: MLO reconfiguration done data, &struct cfg80211_mlo_reconf_done_data + * + * Inform cfg80211 and the userspace that processing of ML reconfiguration + * request to add links to the association is done. + */ +void cfg80211_mlo_reconf_add_done(struct net_device *dev, + struct cfg80211_mlo_reconf_done_data *data); + /** * cfg80211_schedule_channels_check - schedule regulatory check if needed * @wdev: the wireless device to check diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ee1cf19803ea..f900d7cc42bc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1329,6 +1329,9 @@ * %NL80211_ATTR_MLO_TTLM_ULINK attributes are used to specify the * TID to Link mapping for downlink/uplink traffic. * + * @NL80211_CMD_ASSOC_MLO_RECONF: For a non-AP MLD station, request to + * add/remove links to/from the association. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1586,6 +1589,8 @@ enum nl80211_commands { NL80211_CMD_SET_TID_TO_LINK_MAPPING, + NL80211_CMD_ASSOC_MLO_RECONF, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2877,6 +2882,9 @@ enum nl80211_commands { * This can be used to provide a list of selectors that are implemented * by the supplicant. If not given, support for SAE_H2E is assumed. * + * @NL80211_ATTR_MLO_RECONF_REM_LINKS: (u16) A bitmask of the links requested + * to be removed from the MLO association. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3429,6 +3437,8 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORTED_SELECTORS, + NL80211_ATTR_MLO_RECONF_REM_LINKS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/core.h b/net/wireless/core.h index 4c45f994a8c0..826299f3d781 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -567,6 +567,10 @@ int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask); +int cfg80211_assoc_ml_reconf(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_assoc_link *links, + u16 rem_links); /** * struct cfg80211_colocated_ap - colocated AP information * diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 5c09bf4cdc2e..e10f2b3b4b7f 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -1294,3 +1294,80 @@ void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev) &rdev->background_radar_chandef, NL80211_RADAR_CAC_ABORTED); } + +int cfg80211_assoc_ml_reconf(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_assoc_link *links, + u16 rem_links) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + lockdep_assert_wiphy(wdev->wiphy); + + err = rdev_assoc_ml_reconf(rdev, dev, links, rem_links); + if (!err) { + int link_id; + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; + link_id++) { + if (!links[link_id].bss) + continue; + + cfg80211_ref_bss(&rdev->wiphy, links[link_id].bss); + cfg80211_hold_bss(bss_from_pub(links[link_id].bss)); + } + } + + return err; +} + +void cfg80211_mlo_reconf_add_done(struct net_device *dev, + struct cfg80211_mlo_reconf_done_data *data) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + int link_id; + + lockdep_assert_wiphy(wiphy); + + trace_cfg80211_mlo_reconf_add_done(dev, data->added_links, + data->buf, data->len); + + if (WARN_ON(!wdev->valid_links)) + return; + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) + return; + + /* validate that a BSS is given for each added link */ + for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) { + struct cfg80211_bss *bss = data->links[link_id].bss; + + if (!(data->added_links & BIT(link_id))) + continue; + + if (WARN_ON(!bss)) + return; + } + + for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) { + struct cfg80211_bss *bss = data->links[link_id].bss; + + if (!bss) + continue; + + if (data->added_links & BIT(link_id)) { + wdev->links[link_id].client.current_bss = + bss_from_pub(bss); + } else { + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wiphy, bss); + } + } + + wdev->valid_links |= data->added_links; + nl80211_mlo_reconf_add_done(dev, data); +} +EXPORT_SYMBOL(cfg80211_mlo_reconf_add_done); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5bf9a27e2c68..199fa3da31e9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -848,6 +848,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_SUPPORTED_SELECTORS] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_supported_selectors, NL80211_MAX_SUPP_SELECTORS), + [NL80211_ATTR_MLO_RECONF_REM_LINKS] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -16476,6 +16477,66 @@ nl80211_set_ttlm(struct sk_buff *skb, struct genl_info *info) return rdev_set_ttlm(rdev, dev, ¶ms); } +static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_assoc_link links[IEEE80211_MLD_MAX_NUM_LINKS] = {}; + unsigned int link_id; + u16 add_links, rem_links; + int err; + + if (!wdev->valid_links) + return -EINVAL; + + if (dev->ieee80211_ptr->conn_owner_nlportid && + dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) + return -EPERM; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) + return -EOPNOTSUPP; + + add_links = 0; + if (info->attrs[NL80211_ATTR_MLO_LINKS]) { + err = nl80211_process_links(rdev, links, NULL, 0, info); + if (err) + return err; + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; + link_id++) { + if (!links[link_id].bss) + continue; + add_links |= BIT(link_id); + } + } + + if (info->attrs[NL80211_ATTR_MLO_RECONF_REM_LINKS]) + rem_links = + nla_get_u16(info->attrs[NL80211_ATTR_MLO_RECONF_REM_LINKS]); + else + rem_links = 0; + + /* Validate that existing links are not added, removed links are valid + * and don't allow adding and removing the same links + */ + if ((add_links & rem_links) || !(add_links | rem_links) || + (wdev->valid_links & add_links) || + ((wdev->valid_links & rem_links) != rem_links)) { + err = -EINVAL; + goto out; + } + + err = cfg80211_assoc_ml_reconf(rdev, dev, links, rem_links); + +out: + for (link_id = 0; link_id < ARRAY_SIZE(links); link_id++) + cfg80211_put_bss(&rdev->wiphy, links[link_id].bss); + + return err; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -17668,6 +17729,12 @@ static const struct genl_small_ops nl80211_small_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, + { + .cmd = NL80211_CMD_ASSOC_MLO_RECONF, + .doit = nl80211_assoc_ml_reconf, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -18564,6 +18631,23 @@ void cfg80211_links_removed(struct net_device *dev, u16 link_mask) } EXPORT_SYMBOL(cfg80211_links_removed); +void nl80211_mlo_reconf_add_done(struct net_device *dev, + struct cfg80211_mlo_reconf_done_data *data) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct nl80211_mlme_event event = { + .cmd = NL80211_CMD_ASSOC_MLO_RECONF, + .buf = data->buf, + .buf_len = data->len, + .uapsd_queues = -1, + }; + + nl80211_send_mlme_event(rdev, dev, &event, GFP_KERNEL); +} +EXPORT_SYMBOL(nl80211_mlo_reconf_add_done); + void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, gfp_t gfp) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index ffaab9a92e5b..5e25782af1e0 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -124,4 +124,7 @@ void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce); /* peer measurement */ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info); +void nl80211_mlo_reconf_add_done(struct net_device *dev, + struct cfg80211_mlo_reconf_done_data *data); + #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 8f2aa7e76c0a..2393a25577ad 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1547,4 +1547,23 @@ rdev_get_radio_mask(struct cfg80211_registered_device *rdev, return rdev->ops->get_radio_mask(wiphy, dev); } + +static inline int +rdev_assoc_ml_reconf(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_assoc_link *add_links, + u16 rem_links) +{ + struct wiphy *wiphy = &rdev->wiphy; + int ret = -EOPNOTSUPP; + + trace_rdev_assoc_ml_reconf(wiphy, dev, add_links, rem_links); + if (rdev->ops->assoc_ml_reconf) + ret = rdev->ops->assoc_ml_reconf(wiphy, dev, add_links, + rem_links); + trace_rdev_return_int(wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index a57210c8087c..627217e25c2f 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -4104,6 +4104,50 @@ TRACE_EVENT(cfg80211_links_removed, __entry->link_mask) ); +TRACE_EVENT(cfg80211_mlo_reconf_add_done, + TP_PROTO(struct net_device *netdev, u16 link_mask, + const u8 *buf, size_t len), + TP_ARGS(netdev, link_mask, buf, len), + TP_STRUCT__entry( + NETDEV_ENTRY + __field(u16, link_mask) + __dynamic_array(u8, buf, len) + ), + TP_fast_assign( + NETDEV_ASSIGN; + __entry->link_mask = link_mask; + memcpy(__get_dynamic_array(buf), buf, len); + ), + TP_printk(NETDEV_PR_FMT ", link_mask:0x%x", + NETDEV_PR_ARG, __entry->link_mask) +); + +TRACE_EVENT(rdev_assoc_ml_reconf, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_assoc_link *add_links, + u16 rem_links), + TP_ARGS(wiphy, netdev, add_links, rem_links), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u16, add_links) + __field(u16, rem_links) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + u32 i; + + __entry->add_links = 0; + __entry->rem_links = rem_links; + for (i = 0; add_links && i < IEEE80211_MLD_MAX_NUM_LINKS; i++) + if (add_links[i].bss) + __entry->add_links |= BIT(i); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", add_links=0x%x, rem_links=0x%x", + WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->add_links, __entry->rem_links) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 904c277342936b75ae55999d87abacd4c1ab1fd3 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 2 Jan 2025 16:19:59 +0200 Subject: wifi: cfg80211: Add support for controlling EPCS Add support for configuring Emergency Preparedness Communication Services (EPCS) for station mode. Signed-off-by: Ilan Peer Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250102161730.ea54ac94445c.I11d750188bc0871e13e86146a3b5cc048d853e69@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 ++++++++ include/uapi/linux/nl80211.h | 9 +++++++ net/wireless/nl80211.c | 60 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 15 +++++++++++ net/wireless/trace.h | 34 +++++++++++++++++++++++++ 5 files changed, 128 insertions(+) (limited to 'include/uapi') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d4b4cabac029..363d7dd2255a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4594,6 +4594,7 @@ struct mgmt_frame_regs { * * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. * @set_ttlm: set the TID to link mapping. + * @set_epcs: Enable/Disable EPCS for station mode. * @get_radio_mask: get bitmask of radios in use. * (invoked with the wiphy mutex held) * @assoc_ml_reconf: Request a non-AP MLO connection to perform ML @@ -4971,6 +4972,8 @@ struct cfg80211_ops { int (*assoc_ml_reconf)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_assoc_link *add_links, u16 rem_links); + int (*set_epcs)(struct wiphy *wiphy, struct net_device *dev, + bool val); }; /* @@ -9771,6 +9774,13 @@ void cfg80211_mlo_reconf_add_done(struct net_device *dev, */ void cfg80211_schedule_channels_check(struct wireless_dev *wdev); +/** + * cfg80211_epcs_changed - Notify about a change in EPCS state + * @netdev: the wireless device whose EPCS state changed + * @enabled: set to true if EPCS was enabled, otherwise set to false. + */ +void cfg80211_epcs_changed(struct net_device *netdev, bool enabled); + #ifdef CONFIG_CFG80211_DEBUGFS /** * wiphy_locked_debugfs_read - do a locked read in debugfs diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f900d7cc42bc..f6c1b181c886 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1332,6 +1332,10 @@ * @NL80211_CMD_ASSOC_MLO_RECONF: For a non-AP MLD station, request to * add/remove links to/from the association. * + * @NL80211_CMD_EPCS_CFG: EPCS configuration for a station. Used by userland to + * control EPCS configuration. Used to notify userland on the current state + * of EPCS. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1590,6 +1594,7 @@ enum nl80211_commands { NL80211_CMD_SET_TID_TO_LINK_MAPPING, NL80211_CMD_ASSOC_MLO_RECONF, + NL80211_CMD_EPCS_CFG, /* add new commands above here */ @@ -2885,6 +2890,9 @@ enum nl80211_commands { * @NL80211_ATTR_MLO_RECONF_REM_LINKS: (u16) A bitmask of the links requested * to be removed from the MLO association. * + * @NL80211_ATTR_EPCS: Flag attribute indicating that EPCS is enabled for a + * station interface. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3438,6 +3446,7 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORTED_SELECTORS, NL80211_ATTR_MLO_RECONF_REM_LINKS, + NL80211_ATTR_EPCS, /* add attributes here, update the policy in nl80211.c */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 199fa3da31e9..351b863623af 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -849,6 +849,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_supported_selectors, NL80211_MAX_SUPP_SELECTORS), [NL80211_ATTR_MLO_RECONF_REM_LINKS] = { .type = NLA_U16 }, + [NL80211_ATTR_EPCS] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -16537,6 +16538,26 @@ out: return err; } +static int +nl80211_epcs_cfg(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + bool val; + + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) + return -EOPNOTSUPP; + + if (!wdev->connected) + return -ENOLINK; + + val = nla_get_flag(info->attrs[NL80211_ATTR_EPCS]); + + return rdev_set_epcs(rdev, dev, val); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -17735,6 +17756,12 @@ static const struct genl_small_ops nl80211_small_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, + { + .cmd = NL80211_CMD_EPCS_CFG, + .doit = nl80211_epcs_cfg, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -20480,6 +20507,39 @@ void cfg80211_schedule_channels_check(struct wireless_dev *wdev) } EXPORT_SYMBOL(cfg80211_schedule_channels_check); +void cfg80211_epcs_changed(struct net_device *netdev, bool enabled) +{ + struct wireless_dev *wdev = netdev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_epcs_changed(wdev, enabled); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EPCS_CFG); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (enabled && nla_put_flag(msg, NL80211_ATTR_EPCS)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, GFP_KERNEL); + return; + + nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_epcs_changed); + /* initialisation/exit functions */ int __init nl80211_init(void) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 2393a25577ad..759da1623342 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1566,4 +1566,19 @@ rdev_assoc_ml_reconf(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_epcs(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool val) +{ + struct wiphy *wiphy = &rdev->wiphy; + int ret = -EOPNOTSUPP; + + trace_rdev_set_epcs(wiphy, dev, val); + if (rdev->ops->set_epcs) + ret = rdev->ops->set_epcs(wiphy, dev, val); + trace_rdev_return_int(wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 627217e25c2f..4f0abd5d49df 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3049,6 +3049,24 @@ TRACE_EVENT(rdev_set_ttlm, WIPHY_PR_ARG, NETDEV_PR_ARG) ); +TRACE_EVENT(rdev_set_epcs, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + bool val), + TP_ARGS(wiphy, netdev, val), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(bool, val) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->val = val; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", config=%u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->val) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -4148,6 +4166,22 @@ TRACE_EVENT(rdev_assoc_ml_reconf, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->add_links, __entry->rem_links) ); + +TRACE_EVENT(cfg80211_epcs_changed, + TP_PROTO(struct wireless_dev *wdev, bool enabled), + TP_ARGS(wdev, enabled), + TP_STRUCT__entry( + WDEV_ENTRY + __field(u32, enabled) + ), + TP_fast_assign( + WDEV_ASSIGN; + __entry->enabled = enabled; + ), + TP_printk(WDEV_PR_FMT ", enabled=%u", + WDEV_PR_ARG, __entry->enabled) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 127186cfb184eaccdfe948e6da66940cfa03efc5 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 2 Jan 2025 19:28:41 +0800 Subject: md: reintroduce md-linear THe md-linear is removed by commit 849d18e27be9 ("md: Remove deprecated CONFIG_MD_LINEAR") because it has been marked as deprecated for a long time. However, md-linear is used widely for underlying disks with different size, sadly we didn't know this until now, and it's true useful to create partitions and assemble multiple raid and then append one to the other. People have to use dm-linear in this case now, however, they will prefer to minimize the number of involved modules. Fixes: 849d18e27be9 ("md: Remove deprecated CONFIG_MD_LINEAR") Cc: stable@vger.kernel.org Signed-off-by: Yu Kuai Acked-by: Coly Li Acked-by: Mike Snitzer Link: https://lore.kernel.org/r/20250102112841.1227111-1-yukuai1@huaweicloud.com Signed-off-by: Song Liu --- drivers/md/Kconfig | 13 ++ drivers/md/Makefile | 2 + drivers/md/md-autodetect.c | 8 +- drivers/md/md-linear.c | 354 +++++++++++++++++++++++++++++++++++++++++ drivers/md/md.c | 2 +- include/uapi/linux/raid/md_p.h | 2 +- include/uapi/linux/raid/md_u.h | 2 + 7 files changed, 379 insertions(+), 4 deletions(-) create mode 100644 drivers/md/md-linear.c (limited to 'include/uapi') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 1e9db8e4acdf..0b1870a09e1f 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -61,6 +61,19 @@ config MD_BITMAP_FILE various kernel APIs and can only work with files on a file system not actually sitting on the MD device. +config MD_LINEAR + tristate "Linear (append) mode" + depends on BLK_DEV_MD + help + If you say Y here, then your multiple devices driver will be able to + use the so-called linear mode, i.e. it will combine the hard disk + partitions by simply appending one to the other. + + To compile this as a module, choose M here: the module + will be called linear. + + If unsure, say Y. + config MD_RAID0 tristate "RAID-0 (striping) mode" depends on BLK_DEV_MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 476a214e4bdc..87bdfc9fe14c 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -29,12 +29,14 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o md-mod-y += md.o md-bitmap.o raid456-y += raid5.o raid5-cache.o raid5-ppl.o +linear-y += md-linear.o # Note: link order is important. All raid personalities # and must come before md.o, as they each initialise # themselves, and md.o may use the personalities when it # auto-initialised. +obj-$(CONFIG_MD_LINEAR) += linear.o obj-$(CONFIG_MD_RAID0) += raid0.o obj-$(CONFIG_MD_RAID1) += raid1.o obj-$(CONFIG_MD_RAID10) += raid10.o diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c index b2a00f213c2c..4b80165afd23 100644 --- a/drivers/md/md-autodetect.c +++ b/drivers/md/md-autodetect.c @@ -49,6 +49,7 @@ static int md_setup_ents __initdata; * instead of just one. -- KTK * 18May2000: Added support for persistent-superblock arrays: * md=n,0,factor,fault,device-list uses RAID0 for device n + * md=n,-1,factor,fault,device-list uses LINEAR for device n * md=n,device-list reads a RAID superblock from the devices * elements in device-list are read by name_to_kdev_t so can be * a hex number or something like /dev/hda1 /dev/sdb @@ -87,7 +88,7 @@ static int __init md_setup(char *str) md_setup_ents++; switch (get_option(&str, &level)) { /* RAID level */ case 2: /* could be 0 or -1.. */ - if (level == 0) { + if (level == 0 || level == LEVEL_LINEAR) { if (get_option(&str, &factor) != 2 || /* Chunk Size */ get_option(&str, &fault) != 2) { printk(KERN_WARNING "md: Too few arguments supplied to md=.\n"); @@ -95,7 +96,10 @@ static int __init md_setup(char *str) } md_setup_args[ent].level = level; md_setup_args[ent].chunk = 1 << (factor+12); - pername = "raid0"; + if (level == LEVEL_LINEAR) + pername = "linear"; + else + pername = "raid0"; break; } fallthrough; diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c new file mode 100644 index 000000000000..53bc3fda9edb --- /dev/null +++ b/drivers/md/md-linear.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * linear.c : Multiple Devices driver for Linux Copyright (C) 1994-96 Marc + * ZYNGIER or + */ + +#include +#include +#include +#include +#include +#include +#include "md.h" + +struct dev_info { + struct md_rdev *rdev; + sector_t end_sector; +}; + +struct linear_conf { + struct rcu_head rcu; + sector_t array_sectors; + /* a copy of mddev->raid_disks */ + int raid_disks; + struct dev_info disks[] __counted_by(raid_disks); +}; + +/* + * find which device holds a particular offset + */ +static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) +{ + int lo, mid, hi; + struct linear_conf *conf; + + lo = 0; + hi = mddev->raid_disks - 1; + conf = mddev->private; + + /* + * Binary Search + */ + + while (hi > lo) { + + mid = (hi + lo) / 2; + if (sector < conf->disks[mid].end_sector) + hi = mid; + else + lo = mid + 1; + } + + return conf->disks + lo; +} + +static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks) +{ + struct linear_conf *conf; + sector_t array_sectors; + + conf = mddev->private; + WARN_ONCE(sectors || raid_disks, + "%s does not support generic reshape\n", __func__); + array_sectors = conf->array_sectors; + + return array_sectors; +} + +static int linear_set_limits(struct mddev *mddev) +{ + struct queue_limits lim; + int err; + + md_init_stacking_limits(&lim); + lim.max_hw_sectors = mddev->chunk_sectors; + lim.max_write_zeroes_sectors = mddev->chunk_sectors; + lim.io_min = mddev->chunk_sectors << 9; + err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); + if (err) { + queue_limits_cancel_update(mddev->gendisk->queue); + return err; + } + + return queue_limits_set(mddev->gendisk->queue, &lim); +} + +static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) +{ + struct linear_conf *conf; + struct md_rdev *rdev; + int ret = -EINVAL; + int cnt; + int i; + + conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL); + if (!conf) + return ERR_PTR(-ENOMEM); + + /* + * conf->raid_disks is copy of mddev->raid_disks. The reason to + * keep a copy of mddev->raid_disks in struct linear_conf is, + * mddev->raid_disks may not be consistent with pointers number of + * conf->disks[] when it is updated in linear_add() and used to + * iterate old conf->disks[] earray in linear_congested(). + * Here conf->raid_disks is always consitent with number of + * pointers in conf->disks[] array, and mddev->private is updated + * with rcu_assign_pointer() in linear_addr(), such race can be + * avoided. + */ + conf->raid_disks = raid_disks; + + cnt = 0; + conf->array_sectors = 0; + + rdev_for_each(rdev, mddev) { + int j = rdev->raid_disk; + struct dev_info *disk = conf->disks + j; + sector_t sectors; + + if (j < 0 || j >= raid_disks || disk->rdev) { + pr_warn("md/linear:%s: disk numbering problem. Aborting!\n", + mdname(mddev)); + goto out; + } + + disk->rdev = rdev; + if (mddev->chunk_sectors) { + sectors = rdev->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev->sectors = sectors * mddev->chunk_sectors; + } + + conf->array_sectors += rdev->sectors; + cnt++; + } + if (cnt != raid_disks) { + pr_warn("md/linear:%s: not enough drives present. Aborting!\n", + mdname(mddev)); + goto out; + } + + /* + * Here we calculate the device offsets. + */ + conf->disks[0].end_sector = conf->disks[0].rdev->sectors; + + for (i = 1; i < raid_disks; i++) + conf->disks[i].end_sector = + conf->disks[i-1].end_sector + + conf->disks[i].rdev->sectors; + + if (!mddev_is_dm(mddev)) { + ret = linear_set_limits(mddev); + if (ret) + goto out; + } + + return conf; + +out: + kfree(conf); + return ERR_PTR(ret); +} + +static int linear_run(struct mddev *mddev) +{ + struct linear_conf *conf; + int ret; + + if (md_check_no_bitmap(mddev)) + return -EINVAL; + + conf = linear_conf(mddev, mddev->raid_disks); + if (IS_ERR(conf)) + return PTR_ERR(conf); + + mddev->private = conf; + md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); + + ret = md_integrity_register(mddev); + if (ret) { + kfree(conf); + mddev->private = NULL; + } + return ret; +} + +static int linear_add(struct mddev *mddev, struct md_rdev *rdev) +{ + /* Adding a drive to a linear array allows the array to grow. + * It is permitted if the new drive has a matching superblock + * already on it, with raid_disk equal to raid_disks. + * It is achieved by creating a new linear_private_data structure + * and swapping it in in-place of the current one. + * The current one is never freed until the array is stopped. + * This avoids races. + */ + struct linear_conf *newconf, *oldconf; + + if (rdev->saved_raid_disk != mddev->raid_disks) + return -EINVAL; + + rdev->raid_disk = rdev->saved_raid_disk; + rdev->saved_raid_disk = -1; + + newconf = linear_conf(mddev, mddev->raid_disks + 1); + if (!newconf) + return -ENOMEM; + + /* newconf->raid_disks already keeps a copy of * the increased + * value of mddev->raid_disks, WARN_ONCE() is just used to make + * sure of this. It is possible that oldconf is still referenced + * in linear_congested(), therefore kfree_rcu() is used to free + * oldconf until no one uses it anymore. + */ + oldconf = rcu_dereference_protected(mddev->private, + lockdep_is_held(&mddev->reconfig_mutex)); + mddev->raid_disks++; + WARN_ONCE(mddev->raid_disks != newconf->raid_disks, + "copied raid_disks doesn't match mddev->raid_disks"); + rcu_assign_pointer(mddev->private, newconf); + md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); + kfree_rcu(oldconf, rcu); + return 0; +} + +static void linear_free(struct mddev *mddev, void *priv) +{ + struct linear_conf *conf = priv; + + kfree(conf); +} + +static bool linear_make_request(struct mddev *mddev, struct bio *bio) +{ + struct dev_info *tmp_dev; + sector_t start_sector, end_sector, data_offset; + sector_t bio_sector = bio->bi_iter.bi_sector; + + if (unlikely(bio->bi_opf & REQ_PREFLUSH) + && md_flush_request(mddev, bio)) + return true; + + tmp_dev = which_dev(mddev, bio_sector); + start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; + end_sector = tmp_dev->end_sector; + data_offset = tmp_dev->rdev->data_offset; + + if (unlikely(bio_sector >= end_sector || + bio_sector < start_sector)) + goto out_of_bounds; + + if (unlikely(is_rdev_broken(tmp_dev->rdev))) { + md_error(mddev, tmp_dev->rdev); + bio_io_error(bio); + return true; + } + + if (unlikely(bio_end_sector(bio) > end_sector)) { + /* This bio crosses a device boundary, so we have to split it */ + struct bio *split = bio_split(bio, end_sector - bio_sector, + GFP_NOIO, &mddev->bio_set); + + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return true; + } + + bio_chain(split, bio); + submit_bio_noacct(bio); + bio = split; + } + + md_account_bio(mddev, &bio); + bio_set_dev(bio, tmp_dev->rdev->bdev); + bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - + start_sector + data_offset; + + if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && + !bdev_max_discard_sectors(bio->bi_bdev))) { + /* Just ignore it */ + bio_endio(bio); + } else { + if (mddev->gendisk) + trace_block_bio_remap(bio, disk_devt(mddev->gendisk), + bio_sector); + mddev_check_write_zeroes(mddev, bio); + submit_bio_noacct(bio); + } + return true; + +out_of_bounds: + pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n", + mdname(mddev), + (unsigned long long)bio->bi_iter.bi_sector, + tmp_dev->rdev->bdev, + (unsigned long long)tmp_dev->rdev->sectors, + (unsigned long long)start_sector); + bio_io_error(bio); + return true; +} + +static void linear_status(struct seq_file *seq, struct mddev *mddev) +{ + seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); +} + +static void linear_error(struct mddev *mddev, struct md_rdev *rdev) +{ + if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) { + char *md_name = mdname(mddev); + + pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n", + md_name, rdev->bdev); + } +} + +static void linear_quiesce(struct mddev *mddev, int state) +{ +} + +static struct md_personality linear_personality = { + .name = "linear", + .level = LEVEL_LINEAR, + .owner = THIS_MODULE, + .make_request = linear_make_request, + .run = linear_run, + .free = linear_free, + .status = linear_status, + .hot_add_disk = linear_add, + .size = linear_size, + .quiesce = linear_quiesce, + .error_handler = linear_error, +}; + +static int __init linear_init(void) +{ + return register_md_personality(&linear_personality); +} + +static void linear_exit(void) +{ + unregister_md_personality(&linear_personality); +} + +module_init(linear_init); +module_exit(linear_exit); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)"); +MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ +MODULE_ALIAS("md-linear"); +MODULE_ALIAS("md-level--1"); diff --git a/drivers/md/md.c b/drivers/md/md.c index aebe12b0ee27..3dd013f82e26 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8124,7 +8124,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) return; mddev->pers->error_handler(mddev, rdev); - if (mddev->pers->level == 0) + if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR) return; if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags)) diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 5a43c23f53bf..ff47b6f0ba0f 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -233,7 +233,7 @@ struct mdp_superblock_1 { char set_name[32]; /* set and interpreted by user-space */ __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __le32 level; /* 0,1,4,5 */ + __le32 level; /* 0,1,4,5, -1 (linear) */ __le32 layout; /* only for raid5 and raid10 currently */ __le64 size; /* used size of component devices, in 512byte sectors */ diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index 7be89a4906e7..a893010735fb 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -103,6 +103,8 @@ typedef struct mdu_array_info_s { } mdu_array_info_t; +#define LEVEL_LINEAR (-1) + /* we need a value for 'no level specified' and 0 * means 'raid0', so we need something else. This is * for internal use only -- cgit v1.2.3 From 6167c0b6e8d7ddb6b3e5efffcac34a85f7872997 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 10 Jan 2025 07:05:13 +0100 Subject: net: ethtool: add support for structured PHY statistics Introduce a new way to report PHY statistics in a structured and standardized format using the netlink API. This new method does not replace the old driver-specific stats, which can still be accessed with `ethtool -S `. The structured stats are available with `ethtool -S --all-groups`. This new method makes it easier to diagnose problems by organizing stats in a consistent and documented way. Signed-off-by: Jakub Kicinski Signed-off-by: Oleksij Rempel Signed-off-by: Paolo Abeni --- Documentation/networking/ethtool-netlink.rst | 1 + include/uapi/linux/ethtool.h | 2 ++ include/uapi/linux/ethtool_netlink.h | 14 ++++++++++ net/ethtool/netlink.h | 1 + net/ethtool/stats.c | 39 +++++++++++++++++++++++++++- net/ethtool/strset.c | 5 ++++ 6 files changed, 61 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index a7ba6368a4d5..da846f1d998e 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1616,6 +1616,7 @@ the ``ETHTOOL_A_STATS_GROUPS`` bitset. Currently defined values are: ETHTOOL_STATS_ETH_PHY eth-phy Basic IEEE 802.3 PHY statistics (30.3.2.1.*) ETHTOOL_STATS_ETH_CTRL eth-ctrl Basic IEEE 802.3 MAC Ctrl statistics (30.3.3.*) ETHTOOL_STATS_RMON rmon RMON (RFC 2819) statistics + ETHTOOL_STATS_PHY phy Additional PHY statistics, not defined by IEEE ====================== ======== =============================================== Each group should have a corresponding ``ETHTOOL_A_STATS_GRP`` in the reply. diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 7e1b3820f91f..d1089b88efc7 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -681,6 +681,7 @@ enum ethtool_link_ext_substate_module { * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics * @ETH_SS_STATS_RMON: names of RMON statistics + * @ETH_SS_STATS_PHY: names of PHY(dev) statistics * * @ETH_SS_COUNT: number of defined string sets */ @@ -706,6 +707,7 @@ enum ethtool_stringset { ETH_SS_STATS_ETH_MAC, ETH_SS_STATS_ETH_CTRL, ETH_SS_STATS_RMON, + ETH_SS_STATS_PHY, /* add new constants above here */ ETH_SS_COUNT diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 9c909ce733a5..9ff72cfb2e98 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -99,6 +99,7 @@ enum { ETHTOOL_STATS_ETH_MAC, ETHTOOL_STATS_ETH_CTRL, ETHTOOL_STATS_RMON, + ETHTOOL_STATS_PHY, /* add new constants above here */ __ETHTOOL_STATS_CNT @@ -193,6 +194,19 @@ enum { ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1) }; +enum { + /* Basic packet counters if PHY has separate counters from the MAC */ + ETHTOOL_A_STATS_PHY_RX_PKTS, + ETHTOOL_A_STATS_PHY_RX_BYTES, + ETHTOOL_A_STATS_PHY_RX_ERRORS, + ETHTOOL_A_STATS_PHY_TX_PKTS, + ETHTOOL_A_STATS_PHY_TX_BYTES, + ETHTOOL_A_STATS_PHY_TX_ERRORS, + + /* add new constants above here */ + __ETHTOOL_A_STATS_PHY_CNT, + ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1) +}; /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 0a09298fff92..1ce0a3de1430 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -511,5 +511,6 @@ extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING extern const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN]; extern const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN]; +extern const char stats_phy_names[__ETHTOOL_A_STATS_PHY_CNT][ETH_GSTRING_LEN]; #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index f4d822c225db..038a2558f052 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -36,6 +36,7 @@ const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN] = { [ETHTOOL_STATS_ETH_MAC] = "eth-mac", [ETHTOOL_STATS_ETH_CTRL] = "eth-ctrl", [ETHTOOL_STATS_RMON] = "rmon", + [ETHTOOL_STATS_PHY] = "phydev", }; const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN] = { @@ -80,6 +81,15 @@ const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN] = { [ETHTOOL_A_STATS_RMON_JABBER] = "etherStatsJabbers", }; +const char stats_phy_names[__ETHTOOL_A_STATS_PHY_CNT][ETH_GSTRING_LEN] = { + [ETHTOOL_A_STATS_PHY_RX_PKTS] = "RxFrames", + [ETHTOOL_A_STATS_PHY_RX_BYTES] = "RxOctets", + [ETHTOOL_A_STATS_PHY_RX_ERRORS] = "RxErrors", + [ETHTOOL_A_STATS_PHY_TX_PKTS] = "TxFrames", + [ETHTOOL_A_STATS_PHY_TX_BYTES] = "TxOctets", + [ETHTOOL_A_STATS_PHY_TX_ERRORS] = "TxErrors", +}; + const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1] = { [ETHTOOL_A_STATS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), @@ -156,7 +166,8 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base, data->ctrl_stats.src = src; data->rmon_stats.src = src; - if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) && + if ((test_bit(ETHTOOL_STATS_PHY, req_info->stat_mask) || + test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) && src == ETHTOOL_MAC_STATS_SRC_AGGREGATE) { if (phydev) phy_ethtool_get_phy_stats(phydev, &data->phy_stats, @@ -212,6 +223,10 @@ static int stats_reply_size(const struct ethnl_req_info *req_base, nla_total_size(4)) * /* _A_STATS_GRP_HIST_BKT_HI */ ETHTOOL_RMON_HIST_MAX * 2; } + if (test_bit(ETHTOOL_STATS_PHY, req_info->stat_mask)) { + n_stats += sizeof(struct ethtool_phy_stats) / sizeof(u64); + n_grps++; + } len += n_grps * (nla_total_size(0) + /* _A_STATS_GRP */ nla_total_size(4) + /* _A_STATS_GRP_ID */ @@ -265,6 +280,25 @@ static int stats_put_phy_stats(struct sk_buff *skb, return 0; } +static int stats_put_phydev_stats(struct sk_buff *skb, + const struct stats_reply_data *data) +{ + if (stat_put(skb, ETHTOOL_A_STATS_PHY_RX_PKTS, + data->phydev_stats.rx_packets) || + stat_put(skb, ETHTOOL_A_STATS_PHY_RX_BYTES, + data->phydev_stats.rx_bytes) || + stat_put(skb, ETHTOOL_A_STATS_PHY_RX_ERRORS, + data->phydev_stats.rx_errors) || + stat_put(skb, ETHTOOL_A_STATS_PHY_TX_PKTS, + data->phydev_stats.tx_packets) || + stat_put(skb, ETHTOOL_A_STATS_PHY_TX_BYTES, + data->phydev_stats.tx_bytes) || + stat_put(skb, ETHTOOL_A_STATS_PHY_TX_ERRORS, + data->phydev_stats.tx_errors)) + return -EMSGSIZE; + return 0; +} + static int stats_put_mac_stats(struct sk_buff *skb, const struct stats_reply_data *data) { @@ -441,6 +475,9 @@ static int stats_fill_reply(struct sk_buff *skb, if (!ret && test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask)) ret = stats_put_stats(skb, data, ETHTOOL_STATS_RMON, ETH_SS_STATS_RMON, stats_put_rmon_stats); + if (!ret && test_bit(ETHTOOL_STATS_PHY, req_info->stat_mask)) + ret = stats_put_stats(skb, data, ETHTOOL_STATS_PHY, + ETH_SS_STATS_PHY, stats_put_phydev_stats); return ret; } diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index b3382b3cf325..818cf01f0911 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -105,6 +105,11 @@ static const struct strset_info info_template[] = { .count = __ETHTOOL_A_STATS_RMON_CNT, .strings = stats_rmon_names, }, + [ETH_SS_STATS_PHY] = { + .per_dev = false, + .count = __ETHTOOL_A_STATS_PHY_CNT, + .strings = stats_phy_names, + }, }; struct strset_req_info { -- cgit v1.2.3 From 384669921779806105c56751abff41fa0127f93a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 14 Jan 2025 11:47:01 +0100 Subject: ALSA: rawmidi: Make tied_device=0 as default / unknown In the original change, rawmidi_info.tied_device showed -1 for the unknown or untied device. But this would require the user-space to check the protocol version and judge the value conditionally, which is rather error-prone. Instead, set the tied_device = 0 to be default as unknown, and indicate the real device with the offset 1, for achieving more backward compatibility. Suggested-by: Jaroslav Kysela Link: https://patch.msgid.link/20250114104711.19197-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/rawmidi.h | 9 +++++++++ include/uapi/sound/asound.h | 2 +- sound/core/rawmidi.c | 1 - sound/core/ump.c | 3 +-- 4 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 6f2e95298fc7..6916f7133597 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -191,4 +191,13 @@ long snd_rawmidi_kernel_read(struct snd_rawmidi_substream *substream, long snd_rawmidi_kernel_write(struct snd_rawmidi_substream *substream, const unsigned char *buf, long count); +/* set up the tied devices */ +static inline void snd_rawmidi_tie_devices(struct snd_rawmidi *r1, + struct snd_rawmidi *r2) +{ + /* tied_device field keeps the device+1 (so that 0 being unknown) */ + r1->tied_device = r2->device + 1; + r2->tied_device = r1->device + 1; +} + #endif /* __SOUND_RAWMIDI_H */ diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 000b36c0e2b9..5a049eeaecce 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -730,7 +730,7 @@ enum { #define SNDRV_RAWMIDI_INFO_UMP 0x00000008 #define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE 0x00000010 -#define SNDRV_RAWMIDI_DEVICE_UNKNOWN -1 +#define SNDRV_RAWMIDI_DEVICE_UNKNOWN 0 struct snd_rawmidi_info { unsigned int device; /* RO/WR (control): device number */ diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 8a681bff4f7f..70a958ac1112 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1837,7 +1837,6 @@ int snd_rawmidi_init(struct snd_rawmidi *rmidi, INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams); INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams); rmidi->info_flags = info_flags; - rmidi->tied_device = SNDRV_RAWMIDI_DEVICE_UNKNOWN; if (id != NULL) strscpy(rmidi->id, id, sizeof(rmidi->id)); diff --git a/sound/core/ump.c b/sound/core/ump.c index ff3cc2386ece..8d8681a42ca5 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -1382,8 +1382,7 @@ int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump, ump_legacy_set_rawmidi_name(ump); update_legacy_names(ump); - rmidi->tied_device = ump->core.device; - ump->core.tied_device = rmidi->device; + snd_rawmidi_tie_devices(rmidi, &ump->core); ump_dbg(ump, "Created a legacy rawmidi #%d (%s)\n", device, id); return 0; -- cgit v1.2.3 From d16b34479064fcb8dbb66a72308b5034be819161 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jan 2025 13:55:58 +0000 Subject: tcp: add LINUX_MIB_PAWS_OLD_ACK SNMP counter Prior patch in the series added TCP_RFC7323_PAWS_ACK drop reason. This patch adds the corresponding SNMP counter, for folks using nstat instead of tracing for TCP diagnostics. nstat -az | grep PAWSOldAck Suggested-by: Neal Cardwell Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Jason Xing Tested-by: Neal Cardwell Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250113135558.3180360-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 1 + include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_input.c | 7 ++++--- 4 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 28555109f9bd..ed864934e20b 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -262,6 +262,7 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_RFC7323_PAWS, /** * @SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK: PAWS check, old ACK packet. + * Corresponds to LINUX_MIB_PAWS_OLD_ACK. */ SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK, /** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 2e75674e7d4f..848c7784e684 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -186,6 +186,7 @@ enum LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */ LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */ LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ + LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */ LINUX_MIB_DELAYEDACKS, /* DelayedACKs */ LINUX_MIB_DELAYEDACKLOCKED, /* DelayedACKLocked */ LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 40053a02bae1..affd21a0f572 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), + SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK), SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS), SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED), SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dc0e88bcc535..eb82e01da911 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5969,12 +5969,13 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (unlikely(th->syn)) goto syn_challenge; - /* Old ACK are common, do not change PAWSESTABREJECTED + /* Old ACK are common, increment PAWS_OLD_ACK * and do not send a dupack. */ - if (reason == SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK) + if (reason == SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWS_OLD_ACK); goto discard; - + } NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDPAWS, -- cgit v1.2.3 From e5321ae10e1323359a5067a26dfe98b5f44cc5e6 Mon Sep 17 00:00:00 2001 From: Ilpo JĂ€rvinen Date: Tue, 14 Jan 2025 19:08:38 +0200 Subject: PCI: Store number of supported End-End TLP Prefixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eetlp_prefix_path in the struct pci_dev tells if End-End TLP Prefixes are supported by the path or not, and the value is only calculated if CONFIG_PCI_PASID is set. The Max End-End TLP Prefixes field in the Device Capabilities Register 2 also tells how many (1-4) End-End TLP Prefixes are supported (PCIe r6.2 sec 7.5.3.15). The number of supported End-End Prefixes is useful for reading correct number of DWORDs from TLP Prefix Log register in AER capability (PCIe r6.2 sec 7.8.4.12). Replace eetlp_prefix_path with eetlp_prefix_max and determine the number of supported End-End Prefixes regardless of CONFIG_PCI_PASID so that an upcoming commit generalizing TLP Prefix Log register reading does not have to read extra DWORDs for End-End Prefixes that never will be there. The value stored into eetlp_prefix_max is directly derived from device's Max End-End TLP Prefixes and does not consider limitations imposed by bridges or the Root Port beyond supported/not supported flags. This is intentional for two reasons: 1) PCIe r6.2 spec sections 2.2.10.4 & 6.2.4.4 indicate that a TLP is malformed only if the number of prefixes exceed the number of Max End-End TLP Prefixes, which seems to be the case even if the device could never receive that many prefixes due to smaller maximum imposed by a bridge or the Root Port. If TLP parsing is later added, this distinction is significant in interpreting what is logged by the TLP Prefix Log registers and the value matching to the Malformed TLP threshold is going to be more useful. 2) TLP Prefix handling happens autonomously on a low layer and the value in eetlp_prefix_max is not programmed anywhere by the kernel (i.e., there is no limiter OS can control to prevent sending more than N TLP Prefixes). Link: https://lore.kernel.org/r/20250114170840.1633-7-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo JĂ€rvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Yazen Ghannam --- drivers/pci/ats.c | 2 +- drivers/pci/probe.c | 14 +++++++++----- include/linux/pci.h | 2 +- include/uapi/linux/pci_regs.h | 1 + 4 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 6afff1f1b143..c6b266c772c8 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -410,7 +410,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features) if (WARN_ON(pdev->pasid_enabled)) return -EBUSY; - if (!pdev->eetlp_prefix_path && !pdev->pasid_no_tlp) + if (!pdev->eetlp_prefix_max && !pdev->pasid_no_tlp) return -EINVAL; if (!pasid) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 2e81ab0f5a25..381c22e3ccdb 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2251,8 +2251,8 @@ static void pci_configure_relaxed_ordering(struct pci_dev *dev) static void pci_configure_eetlp_prefix(struct pci_dev *dev) { -#ifdef CONFIG_PCI_PASID struct pci_dev *bridge; + unsigned int eetlp_max; int pcie_type; u32 cap; @@ -2264,15 +2264,19 @@ static void pci_configure_eetlp_prefix(struct pci_dev *dev) return; pcie_type = pci_pcie_type(dev); + + eetlp_max = FIELD_GET(PCI_EXP_DEVCAP2_EE_PREFIX_MAX, cap); + /* 00b means 4 */ + eetlp_max = eetlp_max ?: 4; + if (pcie_type == PCI_EXP_TYPE_ROOT_PORT || pcie_type == PCI_EXP_TYPE_RC_END) - dev->eetlp_prefix_path = 1; + dev->eetlp_prefix_max = eetlp_max; else { bridge = pci_upstream_bridge(dev); - if (bridge && bridge->eetlp_prefix_path) - dev->eetlp_prefix_path = 1; + if (bridge && bridge->eetlp_prefix_max) + dev->eetlp_prefix_max = eetlp_max; } -#endif } static void pci_configure_serr(struct pci_dev *dev) diff --git a/include/linux/pci.h b/include/linux/pci.h index db9b47ce3eef..21be5a1edf1a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -407,7 +407,7 @@ struct pci_dev { supported from root to here */ #endif unsigned int pasid_no_tlp:1; /* PASID works without TLP Prefix */ - unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ + unsigned int eetlp_prefix_max:3; /* Max # of End-End TLP Prefixes, 0=not supported */ pci_channel_state_t error_state; /* Current connectivity state */ struct device dev; /* Generic device interface */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 1601c7ed5fab..14a6306c4ce1 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -665,6 +665,7 @@ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ #define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */ +#define PCI_EXP_DEVCAP2_EE_PREFIX_MAX 0x00c00000 /* Max End-End TLP Prefixes */ #define PCI_EXP_DEVCTL2 0x28 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */ -- cgit v1.2.3 From 1bebc7869c99d466f819dd2cffaef0edf7d7a035 Mon Sep 17 00:00:00 2001 From: Illia Ostapyshyn Date: Thu, 14 Nov 2024 18:39:29 +0100 Subject: Input: allocate keycode for phone linking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The F11 key on the new Lenovo Thinkpad T14 Gen 5, T16 Gen 3, and P14s Gen 5 laptops includes a symbol showing a smartphone and a laptop chained together. According to the user manual, it starts the Microsoft Phone Link software used to connect to Android/iOS devices and relay messages/calls or sync data. As there are no suitable keycodes for this action, introduce a new one. Signed-off-by: Illia Ostapyshyn Acked-by: Dmitry Torokhov Link: https://lore.kernel.org/r/20241114173930.44983-2-illia@yshyn.com Reviewed-by: Ilpo JĂ€rvinen Signed-off-by: Ilpo JĂ€rvinen --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index a4206723f503..5a199f3d4a26 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -519,6 +519,7 @@ #define KEY_NOTIFICATION_CENTER 0x1bc /* Show/hide the notification center */ #define KEY_PICKUP_PHONE 0x1bd /* Answer incoming call */ #define KEY_HANGUP_PHONE 0x1be /* Decline incoming call */ +#define KEY_LINK_PHONE 0x1bf /* AL Phone Syncing */ #define KEY_DEL_EOL 0x1c0 #define KEY_DEL_EOS 0x1c1 -- cgit v1.2.3 From eec8359f0797ef87c6ef6cbed6de08b02073b833 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:44 +0000 Subject: net: ethtool: add support for configuring hds-thresh The hds-thresh option configures the threshold value of the header-data-split. If a received packet size is larger than this threshold value, a packet will be split into header and payload. The header indicates TCP and UDP header, but it depends on driver spec. The bnxt_en driver supports HDS(Header-Data-Split) configuration at FW level, affecting TCP and UDP too. So, If hds-thresh is set, it affects UDP and TCP packets. Example: # ethtool -G hds-thresh # ethtool -G enp14s0f0np0 tcp-data-split on hds-thresh 256 # ethtool -g enp14s0f0np0 Ring parameters for enp14s0f0np0: Pre-set maximums: ... HDS thresh: 1023 Current hardware settings: ... TCP data split: on HDS thresh: 256 The default/min/max values are not defined in the ethtool so the drivers should define themself. The 0 value means that all TCP/UDP packets' header and payload will be split. Tested-by: Stanislav Fomichev Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-3-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 8 ++++++++ Documentation/networking/ethtool-netlink.rst | 10 +++++++++ include/linux/ethtool.h | 9 +++++++++ include/uapi/linux/ethtool_netlink_generated.h | 2 ++ net/ethtool/netlink.h | 2 +- net/ethtool/rings.c | 28 +++++++++++++++++++++++--- 6 files changed, 55 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 60f85fbf4156..66be04013048 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -332,6 +332,12 @@ attribute-sets: - name: tx-push-buf-len-max type: u32 + - + name: hds-thresh + type: u32 + - + name: hds-thresh-max + type: u32 - name: mm-stat @@ -1777,6 +1783,8 @@ operations: - rx-push - tx-push-buf-len - tx-push-buf-len-max + - hds-thresh + - hds-thresh-max dump: *ring-get-op - name: rings-set diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index da846f1d998e..f70c0249860c 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -899,6 +899,10 @@ Kernel response contents: ``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX`` u32 max size of TX push buffer + ``ETHTOOL_A_RINGS_HDS_THRESH`` u32 threshold of + header / data split + ``ETHTOOL_A_RINGS_HDS_THRESH_MAX`` u32 max threshold of + header / data split ======================================= ====== =========================== ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with @@ -941,10 +945,12 @@ Request contents: ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring + ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode ``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer + ``ETHTOOL_A_RINGS_HDS_THRESH`` u32 threshold of header / data split ==================================== ====== =========================== Kernel checks that requested ring sizes do not exceed limits reported by @@ -961,6 +967,10 @@ A bigger CQE can have more receive buffer pointers, and in turn the NIC can transfer a bigger frame from wire. Based on the NIC hardware, the overall completion queue size can be adjusted in the driver if CQE size is modified. +``ETHTOOL_A_RINGS_HDS_THRESH`` specifies the threshold value of +header / data split feature. If a received packet size is larger than this +threshold value, header and data will be split. + CHANNELS_GET ============ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d79bd201c1c8..e4136b0df892 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -78,6 +78,9 @@ enum { * @cqe_size: Size of TX/RX completion queue event * @tx_push_buf_len: Size of TX push buffer * @tx_push_buf_max_len: Maximum allowed size of TX push buffer + * @hds_thresh: Packet size threshold for header data split (HDS) + * @hds_thresh_max: Maximum supported setting for @hds_threshold + * */ struct kernel_ethtool_ringparam { u32 rx_buf_len; @@ -87,6 +90,8 @@ struct kernel_ethtool_ringparam { u32 cqe_size; u32 tx_push_buf_len; u32 tx_push_buf_max_len; + u32 hds_thresh; + u32 hds_thresh_max; }; /** @@ -97,6 +102,7 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len * @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split + * @ETHTOOL_RING_USE_HDS_THRS: capture for setting header-data-split-thresh */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), @@ -105,6 +111,7 @@ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_PUSH = BIT(3), ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4), ETHTOOL_RING_USE_TCP_DATA_SPLIT = BIT(5), + ETHTOOL_RING_USE_HDS_THRS = BIT(6), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) @@ -1157,6 +1164,7 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_ctx: XArray of custom RSS contexts * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. + * @hds_thresh: HDS Threshold value. * @hds_config: HDS value from userspace. * @wol_enabled: Wake-on-LAN is enabled * @module_fw_flash_in_progress: Module firmware flashing is in progress. @@ -1164,6 +1172,7 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; + u32 hds_thresh; u8 hds_config; unsigned wol_enabled:1; unsigned module_fw_flash_in_progress:1; diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 43993a2d68e5..2e17ff348f89 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -155,6 +155,8 @@ enum { ETHTOOL_A_RINGS_RX_PUSH, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, + ETHTOOL_A_RINGS_HDS_THRESH, + ETHTOOL_A_RINGS_HDS_THRESH_MAX, __ETHTOOL_A_RINGS_CNT, ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 1ce0a3de1430..ff69ca0715de 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -456,7 +456,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_HDS_THRESH_MAX + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index b2a2586b241f..a381913a19f0 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -61,7 +61,9 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */ nla_total_size(sizeof(u8))) + /* _RINGS_RX_PUSH */ nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN */ - nla_total_size(sizeof(u32)); /* _RINGS_TX_PUSH_BUF_LEN_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_HDS_THRESH */ + nla_total_size(sizeof(u32)); /* _RINGS_HDS_THRESH_MAX*/ } static int rings_fill_reply(struct sk_buff *skb, @@ -108,7 +110,12 @@ static int rings_fill_reply(struct sk_buff *skb, (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, kr->tx_push_buf_max_len) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, - kr->tx_push_buf_len)))) + kr->tx_push_buf_len))) || + ((supported_ring_params & ETHTOOL_RING_USE_HDS_THRS) && + (nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH, + kr->hds_thresh) || + nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH_MAX, + kr->hds_thresh_max)))) return -EMSGSIZE; return 0; @@ -130,6 +137,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_HDS_THRESH] = { .type = NLA_U32 }, }; static int @@ -155,6 +163,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, return -EOPNOTSUPP; } + if (tb[ETHTOOL_A_RINGS_HDS_THRESH] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_HDS_THRS)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_HDS_THRESH], + "setting hds-thresh is not supported"); + return -EOPNOTSUPP; + } + if (tb[ETHTOOL_A_RINGS_CQE_SIZE] && !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { NL_SET_ERR_MSG_ATTR(info->extack, @@ -223,6 +239,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) tb[ETHTOOL_A_RINGS_RX_PUSH], &mod); ethnl_update_u32(&kernel_ringparam.tx_push_buf_len, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod); + ethnl_update_u32(&kernel_ringparam.hds_thresh, + tb[ETHTOOL_A_RINGS_HDS_THRESH], &mod); if (!mod) return 0; @@ -243,6 +261,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO]; else if (ringparam.tx_pending > ringparam.tx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_TX]; + else if (kernel_ringparam.hds_thresh > kernel_ringparam.hds_thresh_max) + err_attr = tb[ETHTOOL_A_RINGS_HDS_THRESH]; else err_attr = NULL; if (err_attr) { @@ -261,8 +281,10 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); - if (!ret) + if (!ret) { dev->ethtool->hds_config = kernel_ringparam.tcp_data_split; + dev->ethtool->hds_thresh = kernel_ringparam.hds_thresh; + } return ret < 0 ? ret : 1; } -- cgit v1.2.3 From ad41ddeeac216417a52fbc1060577f3098f4e90e Mon Sep 17 00:00:00 2001 From: Ilpo JĂ€rvinen Date: Tue, 14 Jan 2025 19:08:39 +0200 Subject: PCI: Add TLP Prefix reading to pcie_read_tlp_log() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pcie_read_tlp_log() handles only 4 Header Log DWORDs but TLP Prefix Log (PCIe r6.1 secs 7.8.4.12 & 7.9.14.13) may also be present. Generalize pcie_read_tlp_log() and struct pcie_tlp_log to also handle TLP Prefix Log. The relevant registers are formatted identically in AER and DPC Capability, but has these variations: a) The offsets of TLP Prefix Log registers vary. b) DPC RP PIO TLP Prefix Log register can be < 4 DWORDs. c) AER TLP Prefix Log Present (PCIe r6.1 sec 7.8.4.7) can indicate Prefix Log is not present. Therefore callers must pass the offset of the TLP Prefix Log register and the entire length to pcie_read_tlp_log() to be able to read the correct number of TLP Prefix DWORDs from the correct offset. Link: https://lore.kernel.org/r/20250114170840.1633-8-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo JĂ€rvinen [bhelgaas: squash ternary fix from https://lore.kernel.org/r/20250116172019.88116-1-colin.i.king@gmail.com] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/pci.h | 5 ++++- drivers/pci/pcie/aer.c | 5 ++++- drivers/pci/pcie/dpc.c | 13 +++++------ drivers/pci/pcie/tlp.c | 52 ++++++++++++++++++++++++++++++++++++++----- include/linux/aer.h | 1 + include/uapi/linux/pci_regs.h | 10 +++++---- 6 files changed, 68 insertions(+), 18 deletions(-) (limited to 'include/uapi') diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 55fcf3bac4f7..7797b2544d00 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -550,7 +550,9 @@ struct aer_err_info { int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); -int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log); +int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, + unsigned int tlp_len, struct pcie_tlp_log *log); +unsigned int aer_tlp_log_len(struct pci_dev *dev, u32 aercc); #endif /* CONFIG_PCIEAER */ #ifdef CONFIG_PCIEPORTBUS @@ -569,6 +571,7 @@ void pci_dpc_init(struct pci_dev *pdev); void dpc_process_error(struct pci_dev *pdev); pci_ers_result_t dpc_reset_link(struct pci_dev *pdev); bool pci_dpc_recovered(struct pci_dev *pdev); +unsigned int dpc_tlp_log_len(struct pci_dev *dev); #else static inline void pci_save_dpc_state(struct pci_dev *dev) { } static inline void pci_restore_dpc_state(struct pci_dev *dev) { } diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 80c5ba8d8296..656dbf1ac45b 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1248,7 +1248,10 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) if (info->status & AER_LOG_TLP_MASKS) { info->tlp_header_valid = 1; - pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG, &info->tlp); + pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG, + aer + PCI_ERR_PREFIX_LOG, + aer_tlp_log_len(dev, aercc), + &info->tlp); } } diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 0674d8c89bfa..0aa20bc58697 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -190,7 +190,7 @@ out: static void dpc_process_rp_pio_error(struct pci_dev *pdev) { u16 cap = pdev->dpc_cap, dpc_status, first_error; - u32 status, mask, sev, syserr, exc, log, prefix; + u32 status, mask, sev, syserr, exc, log; struct pcie_tlp_log tlp_log; int i; @@ -217,20 +217,19 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG) goto clear_status; - pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, &tlp_log); + pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, + cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, + dpc_tlp_log_len(pdev), &tlp_log); pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n", tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], tlp_log.dw[3]); + for (i = 0; i < pdev->dpc_rp_log_size - PCIE_STD_NUM_TLP_HEADERLOG - 1; i++) + pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, tlp_log.prefix[i]); if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG + 1) goto clear_status; pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log); pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log); - for (i = 0; i < pdev->dpc_rp_log_size - PCIE_STD_NUM_TLP_HEADERLOG - 1; i++) { - pci_read_config_dword(pdev, - cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG + i * 4, &prefix); - pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); - } clear_status: pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status); } diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c index d7ad99f466b9..0071ea8903ed 100644 --- a/drivers/pci/pcie/tlp.c +++ b/drivers/pci/pcie/tlp.c @@ -11,26 +11,68 @@ #include "../pci.h" +/** + * aer_tlp_log_len - Calculate AER Capability TLP Header/Prefix Log length + * @dev: PCIe device + * @aercc: AER Capabilities and Control register value + * + * Return: TLP Header/Prefix Log length + */ +unsigned int aer_tlp_log_len(struct pci_dev *dev, u32 aercc) +{ + return PCIE_STD_NUM_TLP_HEADERLOG + + ((aercc & PCI_ERR_CAP_PREFIX_LOG_PRESENT) ? + dev->eetlp_prefix_max : 0); +} + +#ifdef CONFIG_PCIE_DPC +/** + * dpc_tlp_log_len - Calculate DPC RP PIO TLP Header/Prefix Log length + * @dev: PCIe device + * + * Return: TLP Header/Prefix Log length + */ +unsigned int dpc_tlp_log_len(struct pci_dev *dev) +{ + /* Remove ImpSpec Log register from the count */ + if (dev->dpc_rp_log_size >= PCIE_STD_NUM_TLP_HEADERLOG + 1) + return dev->dpc_rp_log_size - 1; + + return dev->dpc_rp_log_size; +} +#endif + /** * pcie_read_tlp_log - read TLP Header Log * @dev: PCIe device * @where: PCI Config offset of TLP Header Log + * @where2: PCI Config offset of TLP Prefix Log + * @tlp_len: TLP Log length (Header Log + TLP Prefix Log in DWORDs) * @log: TLP Log structure to fill * * Fill @log from TLP Header Log registers, e.g., AER or DPC. * * Return: 0 on success and filled TLP Log structure, <0 on error. */ -int pcie_read_tlp_log(struct pci_dev *dev, int where, - struct pcie_tlp_log *log) +int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, + unsigned int tlp_len, struct pcie_tlp_log *log) { unsigned int i; - int ret; + int off, ret; + u32 *to; memset(log, 0, sizeof(*log)); - for (i = 0; i < PCIE_STD_NUM_TLP_HEADERLOG; i++) { - ret = pci_read_config_dword(dev, where + i * 4, &log->dw[i]); + for (i = 0; i < tlp_len; i++) { + if (i < PCIE_STD_NUM_TLP_HEADERLOG) { + off = where + i * 4; + to = &log->dw[i]; + } else { + off = where2 + (i - PCIE_STD_NUM_TLP_HEADERLOG) * 4; + to = &log->prefix[i - PCIE_STD_NUM_TLP_HEADERLOG]; + } + + ret = pci_read_config_dword(dev, off, to); if (ret) return pcibios_err_to_errno(ret); } diff --git a/include/linux/aer.h b/include/linux/aer.h index 4ef6515c3205..947b63091902 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -27,6 +27,7 @@ struct pci_dev; struct pcie_tlp_log { u32 dw[PCIE_STD_NUM_TLP_HEADERLOG]; + u32 prefix[PCIE_STD_MAX_TLP_PREFIXLOG]; }; struct aer_capability_regs { diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 14a6306c4ce1..82866ac0bda7 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -790,10 +790,11 @@ /* Same bits as above */ #define PCI_ERR_CAP 0x18 /* Advanced Error Capabilities & Ctrl*/ #define PCI_ERR_CAP_FEP(x) ((x) & 0x1f) /* First Error Pointer */ -#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ -#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ -#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ -#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ +#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ +#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ +#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +#define PCI_ERR_CAP_PREFIX_LOG_PRESENT 0x00000800 /* TLP Prefix Log Present */ #define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */ #define PCI_ERR_ROOT_COMMAND 0x2c /* Root Error Command */ #define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ @@ -809,6 +810,7 @@ #define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ #define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */ #define PCI_ERR_ROOT_ERR_SRC 0x34 /* Error Source Identification */ +#define PCI_ERR_PREFIX_LOG 0x38 /* TLP Prefix LOG Register (up to 16 bytes) */ /* Virtual Channel */ #define PCI_VC_PORT_CAP1 0x04 -- cgit v1.2.3 From 3ba0262a8fed29efe28e3ce3162d1794a58aa94f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 15 Jan 2025 20:42:27 +0000 Subject: net: mdio: add definition for clock stop capable bit Add a definition for the clock stop capable bit in the PCS MMD. This bit indicates whether the MAC is able to stop the transmit xMII clock while it is signalling LPI. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Reviewed-by: Jacob Keller Link: https://patch.msgid.link/E1tYADb-0014PV-6T@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/uapi/linux/mdio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index f0d3f268240d..6975f182b22c 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -125,6 +125,7 @@ #define MDIO_STAT1_LPOWERABLE 0x0002 /* Low-power ability */ #define MDIO_STAT1_LSTATUS BMSR_LSTATUS #define MDIO_STAT1_FAULT 0x0080 /* Fault */ +#define MDIO_PCS_STAT1_CLKSTOP_CAP 0x0040 #define MDIO_AN_STAT1_LPABLE 0x0001 /* Link partner AN ability */ #define MDIO_AN_STAT1_ABLE BMSR_ANEGCAPABLE #define MDIO_AN_STAT1_RFAULT BMSR_RFAULT -- cgit v1.2.3 From 3194e36488e2dae05f9a822c73eaa367e47b2e3d Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Jan 2025 17:02:56 +0000 Subject: dm-table: atomic writes support Support stacking atomic write limits for DM devices. All the pre-existing code in blk_stack_atomic_writes_limits() already takes care of finding the aggregrate limits from the bottom devices. Feature flag DM_TARGET_ATOMIC_WRITES is introduced so that atomic writes can be enabled on personalities selectively. This is to ensure that atomic writes are only enabled when verified to be working properly (for a specific personality). In addition, it just may not make sense to enable atomic writes on some personalities (so this flag also helps there). Signed-off-by: John Garry Reviewed-by: Mike Snitzer Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 29 +++++++++++++++++++++++++++++ include/linux/device-mapper.h | 3 +++ include/uapi/linux/dm-ioctl.h | 4 ++-- 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index bd8b796ae683..0ef5203387b2 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1806,6 +1806,32 @@ static bool dm_table_supports_secure_erase(struct dm_table *t) return true; } +static int device_not_atomic_write_capable(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + return !bdev_can_atomic_write(dev->bdev); +} + +static bool dm_table_supports_atomic_writes(struct dm_table *t) +{ + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + + if (!dm_target_supports_atomic_writes(ti->type)) + return false; + + if (!ti->type->iterate_devices) + return false; + + if (ti->type->iterate_devices(ti, + device_not_atomic_write_capable, NULL)) { + return false; + } + } + return true; +} + int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1854,6 +1880,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, return r; } + if (dm_table_supports_atomic_writes(t)) + limits->features |= BLK_FEAT_ATOMIC_WRITES; + r = queue_limits_set(q, limits); if (r) return r; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8321f65897f3..bcc6d7b69470 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -299,6 +299,9 @@ struct target_type { #define dm_target_supports_mixed_zoned_model(type) (false) #endif +#define DM_TARGET_ATOMIC_WRITES 0x00000400 +#define dm_target_supports_atomic_writes(type) ((type)->features & DM_TARGET_ATOMIC_WRITES) + struct dm_target { struct dm_table *table; struct target_type *type; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 1990b5700f69..b08c7378164d 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -286,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 48 +#define DM_VERSION_MINOR 49 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2023-03-01)" +#define DM_VERSION_EXTRA "-ioctl (2025-01-17)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 6a128cdf1926b20a94d6af7d7d03b76ba19a4f8b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 16 Jan 2025 12:46:25 +0200 Subject: net: ethtool: ts: add separate counter for unconfirmed one-step TX timestamps For packets with two-step timestamp requests, the hardware timestamp comes back to the driver through a confirmation mechanism of sorts, which allows the driver to confidently bump the successful "pkts" counter. For one-step PTP, the NIC is supposed to autonomously insert its hardware TX timestamp in the packet headers while simultaneously transmitting it. There may be a confirmation that this was done successfully, or there may not. None of the current drivers which implement ethtool_ops :: get_ts_stats() also support HWTSTAMP_TX_ONESTEP_SYNC or HWTSTAMP_TX_ONESTEP_SYNC, so it is a bit unclear which model to follow. But there are NICs, such as DSA, where there is no transmit confirmation at all. Here, it would be wrong / misleading to increment the successful "pkts" counter, because one-step PTP packets can be dropped on TX just like any other packets. So introduce a special counter which signifies "yes, an attempt was made, but we don't know whether it also exited the port or not". I expect that for one-step PTP packets where a confirmation is available, the "pkts" counter would be bumped. Signed-off-by: Vladimir Oltean Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20250116104628.123555-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 3 +++ Documentation/networking/ethtool-netlink.rst | 16 +++++++++++----- include/linux/ethtool.h | 7 +++++++ include/uapi/linux/ethtool_netlink_generated.h | 1 + net/ethtool/tsinfo.c | 2 ++ 5 files changed, 24 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 66be04013048..259cb211a338 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -842,6 +842,9 @@ attribute-sets: - name: tx-err type: uint + - + name: tx-onestep-pkts-unconfirmed + type: uint - name: ts-hwtstamp-provider attr-cnt-name: __ethtool-a-ts-hwtstamp-provider-cnt diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index f70c0249860c..3770a2294509 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1281,11 +1281,17 @@ would be empty (no bit set). Additional hardware timestamping statistics response contents: - ===================================== ====== =================================== - ``ETHTOOL_A_TS_STAT_TX_PKTS`` uint Packets with Tx HW timestamps - ``ETHTOOL_A_TS_STAT_TX_LOST`` uint Tx HW timestamp not arrived count - ``ETHTOOL_A_TS_STAT_TX_ERR`` uint HW error request Tx timestamp count - ===================================== ====== =================================== + ================================================== ====== ===================== + ``ETHTOOL_A_TS_STAT_TX_PKTS`` uint Packets with Tx + HW timestamps + ``ETHTOOL_A_TS_STAT_TX_LOST`` uint Tx HW timestamp + not arrived count + ``ETHTOOL_A_TS_STAT_TX_ERR`` uint HW error request + Tx timestamp count + ``ETHTOOL_A_TS_STAT_TX_ONESTEP_PKTS_UNCONFIRMED`` uint Packets with one-step + HW TX timestamps with + unconfirmed delivery + ================================================== ====== ===================== CABLE_TEST ========== diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e4136b0df892..64301ddf2f59 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -559,6 +559,12 @@ struct ethtool_rmon_stats { /** * struct ethtool_ts_stats - HW timestamping statistics * @pkts: Number of packets successfully timestamped by the hardware. + * @onestep_pkts_unconfirmed: Number of PTP packets with one-step TX + * timestamping that were sent, but for which the + * device offers no confirmation whether they made + * it onto the wire and the timestamp was inserted + * in the originTimestamp or correctionField, or + * not. * @lost: Number of hardware timestamping requests where the timestamping * information from the hardware never arrived for submission with * the skb. @@ -571,6 +577,7 @@ struct ethtool_rmon_stats { struct ethtool_ts_stats { struct_group(tx_stats, u64 pkts; + u64 onestep_pkts_unconfirmed; u64 lost; u64 err; ); diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 2e17ff348f89..fe24c3459ac0 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -382,6 +382,7 @@ enum { ETHTOOL_A_TS_STAT_TX_PKTS, ETHTOOL_A_TS_STAT_TX_LOST, ETHTOOL_A_TS_STAT_TX_ERR, + ETHTOOL_A_TS_STAT_TX_ONESTEP_PKTS_UNCONFIRMED, __ETHTOOL_A_TS_STAT_CNT, ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 7e495a41aeec..691be6c445b3 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -186,6 +186,8 @@ static int tsinfo_put_stats(struct sk_buff *skb, if (tsinfo_put_stat(skb, stats->tx_stats.pkts, ETHTOOL_A_TS_STAT_TX_PKTS) || + tsinfo_put_stat(skb, stats->tx_stats.onestep_pkts_unconfirmed, + ETHTOOL_A_TS_STAT_TX_ONESTEP_PKTS_UNCONFIRMED) || tsinfo_put_stat(skb, stats->tx_stats.lost, ETHTOOL_A_TS_STAT_TX_LOST) || tsinfo_put_stat(skb, stats->tx_stats.err, -- cgit v1.2.3 From 46757a3e7d50dac923888e7fbe68377736f13c70 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 17 Jan 2025 04:17:38 +1030 Subject: ALSA: FCP: Add Focusrite Control Protocol driver Add a new kernel driver for the Focusrite Control Protocol (FCP), which is used by Focusrite Scarlett 2nd Gen, 3rd Gen, 4th Gen, Clarett USB, Clarett+, and Vocaster series audio interfaces. This driver provides a user-space control interface via ALSA's hwdep subsystem. Unlike the existing Scarlett2 driver which implements all ALSA controls in kernel space, this new FCP driver takes a different approach by providing a minimal kernel interface that allows a user-space driver to send FCP commands and receive notifications. The only control implemented in kernel space is the Level Meter, since it requires frequent polling of volatile data. While this driver supports all interfaces that the Scarlett2 driver works with, it is initially enabled only for 4th Gen 16i16, 18i16, and 18i20 interfaces that are not supported by the Scarlett2 driver. Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/597741a9b1198b965561547511d3d345f91cba20.1737048528.git.g@b4.vu Signed-off-by: Takashi Iwai --- MAINTAINERS | 10 +- include/uapi/sound/fcp.h | 120 +++++ include/uapi/sound/tlv.h | 2 + sound/usb/Makefile | 1 + sound/usb/fcp.c | 1134 ++++++++++++++++++++++++++++++++++++++++++++++ sound/usb/fcp.h | 7 + sound/usb/mixer_quirks.c | 7 + 7 files changed, 1277 insertions(+), 4 deletions(-) create mode 100644 include/uapi/sound/fcp.h create mode 100644 sound/usb/fcp.c create mode 100644 sound/usb/fcp.h (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index baf0eeb9a355..336be70068e9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8951,14 +8951,16 @@ L: linux-input@vger.kernel.org S: Maintained F: drivers/input/joystick/fsia6b.c -FOCUSRITE SCARLETT2 MIXER DRIVER (Scarlett Gen 2+ and Clarett) +FOCUSRITE CONTROL PROTOCOL/SCARLETT2 MIXER DRIVERS (Scarlett Gen 2+, Clarett, and Vocaster) M: Geoffrey D. Bennett L: linux-sound@vger.kernel.org S: Maintained -W: https://github.com/geoffreybennett/scarlett-gen2 -B: https://github.com/geoffreybennett/scarlett-gen2/issues -T: git https://github.com/geoffreybennett/scarlett-gen2.git +W: https://github.com/geoffreybennett/linux-fcp +B: https://github.com/geoffreybennett/linux-fcp/issues +T: git https://github.com/geoffreybennett/linux-fcp.git +F: include/uapi/sound/fcp.h F: include/uapi/sound/scarlett2.h +F: sound/usb/fcp.c F: sound/usb/mixer_scarlett2.c FORCEDETH GIGABIT ETHERNET DRIVER diff --git a/include/uapi/sound/fcp.h b/include/uapi/sound/fcp.h new file mode 100644 index 000000000000..aea428956d8f --- /dev/null +++ b/include/uapi/sound/fcp.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Focusrite Control Protocol Driver for ALSA + * + * Copyright (c) 2024-2025 by Geoffrey D. Bennett + */ +/* + * DOC: FCP (Focusrite Control Protocol) User-Space API + * + * This header defines the interface between the FCP kernel driver and + * user-space programs to enable the use of the proprietary features + * available in Focusrite USB audio interfaces. This includes Scarlett + * 2nd Gen, 3rd Gen, 4th Gen, Clarett USB, Clarett+, and Vocaster + * series devices. + * + * The interface is provided via ALSA's hwdep interface. Opening the + * hwdep device requires CAP_SYS_RAWIO privileges as this interface + * provides near-direct access. + * + * For details on the FCP protocol, refer to the kernel scarlett2 + * driver in sound/usb/mixer_scarlett2.c and the fcp-support project + * at https://github.com/geoffreybennett/fcp-support + * + * For examples of using these IOCTLs, see the fcp-server source in + * the fcp-support project. + * + * IOCTL Interface + * -------------- + * FCP_IOCTL_PVERSION: + * Returns the protocol version supported by the driver. + * + * FCP_IOCTL_INIT: + * Initialises the protocol and synchronises sequence numbers + * between the driver and device. Must be called at least once + * before sending commands. Can be safely called again at any time. + * + * FCP_IOCTL_CMD: + * Sends an FCP command to the device and returns the response. + * Requires prior initialisation via FCP_IOCTL_INIT. + * + * FCP_IOCTL_SET_METER_MAP: + * Configures the Level Meter control's mapping between device + * meters and control channels. Requires FCP_IOCTL_INIT to have been + * called first. The map size and number of slots cannot be changed + * after initial configuration, although the map itself can be + * updated. Once configured, the Level Meter remains functional even + * after the hwdep device is closed. + * + * FCP_IOCTL_SET_METER_LABELS: + * Set the labels for the Level Meter control. Requires + * FCP_IOCTL_SET_METER_MAP to have been called first. labels[] + * should contain a sequence of null-terminated labels corresponding + * to the control's channels. + */ +#ifndef __UAPI_SOUND_FCP_H +#define __UAPI_SOUND_FCP_H + +#include +#include + +#define FCP_HWDEP_MAJOR 2 +#define FCP_HWDEP_MINOR 0 +#define FCP_HWDEP_SUBMINOR 0 + +#define FCP_HWDEP_VERSION \ + ((FCP_HWDEP_MAJOR << 16) | \ + (FCP_HWDEP_MINOR << 8) | \ + FCP_HWDEP_SUBMINOR) + +#define FCP_HWDEP_VERSION_MAJOR(v) (((v) >> 16) & 0xFF) +#define FCP_HWDEP_VERSION_MINOR(v) (((v) >> 8) & 0xFF) +#define FCP_HWDEP_VERSION_SUBMINOR(v) ((v) & 0xFF) + +/* Get protocol version */ +#define FCP_IOCTL_PVERSION _IOR('S', 0x60, int) + +/* Start the protocol */ + +/* Step 0 and step 2 responses are variable length and placed in + * resp[] one after the other. + */ +struct fcp_init { + __u16 step0_resp_size; + __u16 step2_resp_size; + __u32 init1_opcode; + __u32 init2_opcode; + __u8 resp[]; +} __attribute__((packed)); + +#define FCP_IOCTL_INIT _IOWR('S', 0x64, struct fcp_init) + +/* Perform a command */ + +/* The request data is placed in data[] and the response data will + * overwrite it. + */ +struct fcp_cmd { + __u32 opcode; + __u16 req_size; + __u16 resp_size; + __u8 data[]; +} __attribute__((packed)); +#define FCP_IOCTL_CMD _IOWR('S', 0x65, struct fcp_cmd) + +/* Set the meter map */ +struct fcp_meter_map { + __u16 map_size; + __u16 meter_slots; + __s16 map[]; +} __attribute__((packed)); +#define FCP_IOCTL_SET_METER_MAP _IOW('S', 0x66, struct fcp_meter_map) + +/* Set the meter labels */ +struct fcp_meter_labels { + __u16 labels_size; + char labels[]; +} __attribute__((packed)); +#define FCP_IOCTL_SET_METER_LABELS _IOW('S', 0x67, struct fcp_meter_labels) + +#endif /* __UAPI_SOUND_FCP_H */ diff --git a/include/uapi/sound/tlv.h b/include/uapi/sound/tlv.h index b99a2414b53d..5bb55c80e095 100644 --- a/include/uapi/sound/tlv.h +++ b/include/uapi/sound/tlv.h @@ -18,6 +18,8 @@ #define SNDRV_CTL_TLVT_CHMAP_VAR 0x102 /* channels freely swappable */ #define SNDRV_CTL_TLVT_CHMAP_PAIRED 0x103 /* pair-wise swappable */ +#define SNDRV_CTL_TLVT_FCP_CHANNEL_LABELS 0x110 /* channel labels */ + /* * TLV structure is right behind the struct snd_ctl_tlv: * unsigned int type - see SNDRV_CTL_TLVT_* diff --git a/sound/usb/Makefile b/sound/usb/Makefile index 0532499dbc6d..30bd5348477b 100644 --- a/sound/usb/Makefile +++ b/sound/usb/Makefile @@ -6,6 +6,7 @@ snd-usb-audio-y := card.o \ clock.o \ endpoint.o \ + fcp.o \ format.o \ helper.o \ implicit.o \ diff --git a/sound/usb/fcp.c b/sound/usb/fcp.c new file mode 100644 index 000000000000..655e4f7338ed --- /dev/null +++ b/sound/usb/fcp.c @@ -0,0 +1,1134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Focusrite Control Protocol Driver for ALSA + * + * Copyright (c) 2024-2025 by Geoffrey D. Bennett + */ +/* + * DOC: Theory of Operation + * + * The Focusrite Control Protocol (FCP) driver provides a minimal + * kernel interface that allows a user-space driver (primarily + * fcp-server) to communicate with Focusrite USB audio interfaces + * using their vendor-specific protocol. This protocol is used by + * Scarlett 2nd Gen, 3rd Gen, 4th Gen, Clarett USB, Clarett+, and + * Vocaster series devices. + * + * Unlike the existing scarlett2 driver which implements all controls + * in kernel space, this driver takes a lighter-weight approach by + * moving most functionality to user space. The only control + * implemented in kernel space is the Level Meter, since it requires + * frequent polling of volatile data. + * + * The driver provides an hwdep interface that allows the user-space + * driver to: + * - Initialise the protocol + * - Send arbitrary FCP commands to the device + * - Receive notifications from the device + * - Configure the Level Meter control + * + * Usage Flow + * ---------- + * 1. Open the hwdep device (requires CAP_SYS_RAWIO) + * 2. Get protocol version using FCP_IOCTL_PVERSION + * 3. Initialise protocol using FCP_IOCTL_INIT + * 4. Send commands using FCP_IOCTL_CMD + * 5. Receive notifications using read() + * 6. Optionally set up the Level Meter control using + * FCP_IOCTL_SET_METER_MAP + * 7. Optionally add labels to the Level Meter control using + * FCP_IOCTL_SET_METER_LABELS + * + * Level Meter + * ----------- + * The Level Meter is implemented as an ALSA control that provides + * real-time level monitoring. When the control is read, the driver + * requests the current meter levels from the device, translates the + * levels using the configured mapping, and returns the result to the + * user. The mapping between device meters and the ALSA control's + * channels is configured with FCP_IOCTL_SET_METER_MAP. + * + * Labels for the Level Meter channels can be set using + * FCP_IOCTL_SET_METER_LABELS and read by applications through the + * control's TLV data. The labels are transferred as a sequence of + * null-terminated strings. + */ + +#include +#include + +#include +#include +#include + +#include + +#include "usbaudio.h" +#include "mixer.h" +#include "helper.h" + +#include "fcp.h" + +/* notify waiting to send to *file */ +struct fcp_notify { + wait_queue_head_t queue; + u32 event; + spinlock_t lock; +}; + +struct fcp_data { + struct usb_mixer_interface *mixer; + + struct mutex mutex; /* serialise access to the device */ + struct completion cmd_done; /* wait for command completion */ + struct file *file; /* hwdep file */ + + struct fcp_notify notify; + + u8 bInterfaceNumber; + u8 bEndpointAddress; + u16 wMaxPacketSize; + u8 bInterval; + + uint16_t step0_resp_size; + uint16_t step2_resp_size; + uint32_t init1_opcode; + uint32_t init2_opcode; + + u8 init; + u16 seq; + + u8 num_meter_slots; + s16 *meter_level_map; + u32 *meter_levels; + struct snd_kcontrol *meter_ctl; + + unsigned int *meter_labels_tlv; + int meter_labels_tlv_size; +}; + +/*** USB Interactions ***/ + +/* FCP Command ACK notification bit */ +#define FCP_NOTIFY_ACK 1 + +/* Vendor-specific USB control requests */ +#define FCP_USB_REQ_STEP0 0 +#define FCP_USB_REQ_CMD_TX 2 +#define FCP_USB_REQ_CMD_RX 3 + +/* Focusrite Control Protocol opcodes that the kernel side needs to + * know about + */ +#define FCP_USB_REBOOT 0x00000003 +#define FCP_USB_GET_METER 0x00001001 +#define FCP_USB_FLASH_ERASE 0x00004002 +#define FCP_USB_FLASH_WRITE 0x00004004 + +#define FCP_USB_METER_LEVELS_GET_MAGIC 1 + +#define FCP_SEGMENT_APP_GOLD 0 + +/* Forward declarations */ +static int fcp_init(struct usb_mixer_interface *mixer, + void *step0_resp, void *step2_resp); + +/* FCP command request/response format */ +struct fcp_usb_packet { + __le32 opcode; + __le16 size; + __le16 seq; + __le32 error; + __le32 pad; + u8 data[]; +}; + +static void fcp_fill_request_header(struct fcp_data *private, + struct fcp_usb_packet *req, + u32 opcode, u16 req_size) +{ + /* sequence must go up by 1 for each request */ + u16 seq = private->seq++; + + req->opcode = cpu_to_le32(opcode); + req->size = cpu_to_le16(req_size); + req->seq = cpu_to_le16(seq); + req->error = 0; + req->pad = 0; +} + +static int fcp_usb_tx(struct usb_device *dev, int interface, + void *buf, u16 size) +{ + return snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), + FCP_USB_REQ_CMD_TX, + USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT, + 0, interface, buf, size); +} + +static int fcp_usb_rx(struct usb_device *dev, int interface, + void *buf, u16 size) +{ + return snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), + FCP_USB_REQ_CMD_RX, + USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, + 0, interface, buf, size); +} + +/* Send an FCP command and get the response */ +static int fcp_usb(struct usb_mixer_interface *mixer, u32 opcode, + const void *req_data, u16 req_size, + void *resp_data, u16 resp_size) +{ + struct fcp_data *private = mixer->private_data; + struct usb_device *dev = mixer->chip->dev; + struct fcp_usb_packet *req __free(kfree) = NULL; + struct fcp_usb_packet *resp __free(kfree) = NULL; + size_t req_buf_size = struct_size(req, data, req_size); + size_t resp_buf_size = struct_size(resp, data, resp_size); + int retries = 0; + const int max_retries = 5; + int err; + + if (!mixer->urb) + return -ENODEV; + + req = kmalloc(req_buf_size, GFP_KERNEL); + if (!req) + return -ENOMEM; + + resp = kmalloc(resp_buf_size, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + /* build request message */ + fcp_fill_request_header(private, req, opcode, req_size); + if (req_size) + memcpy(req->data, req_data, req_size); + + /* send the request and retry on EPROTO */ +retry: + err = fcp_usb_tx(dev, private->bInterfaceNumber, req, req_buf_size); + if (err == -EPROTO && ++retries <= max_retries) { + msleep(1 << (retries - 1)); + goto retry; + } + + if (err != req_buf_size) { + usb_audio_err(mixer->chip, + "FCP request %08x failed: %d\n", opcode, err); + return -EINVAL; + } + + if (!wait_for_completion_timeout(&private->cmd_done, + msecs_to_jiffies(1000))) { + usb_audio_err(mixer->chip, + "FCP request %08x timed out\n", opcode); + + return -ETIMEDOUT; + } + + /* send a second message to get the response */ + err = fcp_usb_rx(dev, private->bInterfaceNumber, resp, resp_buf_size); + + /* validate the response */ + + if (err < 0) { + + /* ESHUTDOWN and EPROTO are valid responses to a + * reboot request + */ + if (opcode == FCP_USB_REBOOT && + (err == -ESHUTDOWN || err == -EPROTO)) + return 0; + + usb_audio_err(mixer->chip, + "FCP read response %08x failed: %d\n", + opcode, err); + return -EINVAL; + } + + if (err < sizeof(*resp)) { + usb_audio_err(mixer->chip, + "FCP response %08x too short: %d\n", + opcode, err); + return -EINVAL; + } + + if (req->seq != resp->seq) { + usb_audio_err(mixer->chip, + "FCP response %08x seq mismatch %d/%d\n", + opcode, + le16_to_cpu(req->seq), le16_to_cpu(resp->seq)); + return -EINVAL; + } + + if (req->opcode != resp->opcode) { + usb_audio_err(mixer->chip, + "FCP response %08x opcode mismatch %08x\n", + opcode, le16_to_cpu(resp->opcode)); + return -EINVAL; + } + + if (resp->error) { + usb_audio_err(mixer->chip, + "FCP response %08x error %d\n", + opcode, le32_to_cpu(resp->error)); + return -EINVAL; + } + + if (err != resp_buf_size) { + usb_audio_err(mixer->chip, + "FCP response %08x buffer size mismatch %d/%zu\n", + opcode, err, resp_buf_size); + return -EINVAL; + } + + if (resp_size != le16_to_cpu(resp->size)) { + usb_audio_err(mixer->chip, + "FCP response %08x size mismatch %d/%d\n", + opcode, resp_size, le16_to_cpu(resp->size)); + return -EINVAL; + } + + if (resp_data && resp_size > 0) + memcpy(resp_data, resp->data, resp_size); + + return 0; +} + +static int fcp_reinit(struct usb_mixer_interface *mixer) +{ + struct fcp_data *private = mixer->private_data; + void *step0_resp __free(kfree) = NULL; + void *step2_resp __free(kfree) = NULL; + + if (mixer->urb) + return 0; + + step0_resp = kmalloc(private->step0_resp_size, GFP_KERNEL); + if (!step0_resp) + return -ENOMEM; + step2_resp = kmalloc(private->step2_resp_size, GFP_KERNEL); + if (!step2_resp) + return -ENOMEM; + + return fcp_init(mixer, step0_resp, step2_resp); +} + +/*** Control Functions ***/ + +/* helper function to create a new control */ +static int fcp_add_new_ctl(struct usb_mixer_interface *mixer, + const struct snd_kcontrol_new *ncontrol, + int index, int channels, const char *name, + struct snd_kcontrol **kctl_return) +{ + struct snd_kcontrol *kctl; + struct usb_mixer_elem_info *elem; + int err; + + elem = kzalloc(sizeof(*elem), GFP_KERNEL); + if (!elem) + return -ENOMEM; + + /* We set USB_MIXER_BESPOKEN type, so that the core USB mixer code + * ignores them for resume and other operations. + * Also, the head.id field is set to 0, as we don't use this field. + */ + elem->head.mixer = mixer; + elem->control = index; + elem->head.id = 0; + elem->channels = channels; + elem->val_type = USB_MIXER_BESPOKEN; + + kctl = snd_ctl_new1(ncontrol, elem); + if (!kctl) { + kfree(elem); + return -ENOMEM; + } + kctl->private_free = snd_usb_mixer_elem_free; + + strscpy(kctl->id.name, name, sizeof(kctl->id.name)); + + err = snd_usb_mixer_add_control(&elem->head, kctl); + if (err < 0) + return err; + + if (kctl_return) + *kctl_return = kctl; + + return 0; +} + +/*** Level Meter Control ***/ + +static int fcp_meter_ctl_info(struct snd_kcontrol *kctl, + struct snd_ctl_elem_info *uinfo) +{ + struct usb_mixer_elem_info *elem = kctl->private_data; + + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = elem->channels; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 4095; + uinfo->value.integer.step = 1; + return 0; +} + +static int fcp_meter_ctl_get(struct snd_kcontrol *kctl, + struct snd_ctl_elem_value *ucontrol) +{ + struct usb_mixer_elem_info *elem = kctl->private_data; + struct usb_mixer_interface *mixer = elem->head.mixer; + struct fcp_data *private = mixer->private_data; + int num_meter_slots, resp_size; + u32 *resp = private->meter_levels; + int i, err = 0; + + struct { + __le16 pad; + __le16 num_meters; + __le32 magic; + } __packed req; + + guard(mutex)(&private->mutex); + + err = fcp_reinit(mixer); + if (err < 0) + return err; + + num_meter_slots = private->num_meter_slots; + resp_size = num_meter_slots * sizeof(u32); + + req.pad = 0; + req.num_meters = cpu_to_le16(num_meter_slots); + req.magic = cpu_to_le32(FCP_USB_METER_LEVELS_GET_MAGIC); + err = fcp_usb(mixer, FCP_USB_GET_METER, + &req, sizeof(req), resp, resp_size); + if (err < 0) + return err; + + /* copy & translate from resp[] using meter_level_map[] */ + for (i = 0; i < elem->channels; i++) { + int idx = private->meter_level_map[i]; + int value = idx < 0 ? 0 : le32_to_cpu(resp[idx]); + + ucontrol->value.integer.value[i] = value; + } + + return 0; +} + +static int fcp_meter_tlv_callback(struct snd_kcontrol *kctl, + int op_flag, unsigned int size, + unsigned int __user *tlv) +{ + struct usb_mixer_elem_info *elem = kctl->private_data; + struct usb_mixer_interface *mixer = elem->head.mixer; + struct fcp_data *private = mixer->private_data; + + guard(mutex)(&private->mutex); + + if (op_flag == SNDRV_CTL_TLV_OP_READ) { + if (private->meter_labels_tlv_size == 0) + return 0; + + if (size > private->meter_labels_tlv_size) + size = private->meter_labels_tlv_size; + + if (copy_to_user(tlv, private->meter_labels_tlv, size)) + return -EFAULT; + + return size; + } + + return -EINVAL; +} + +static const struct snd_kcontrol_new fcp_meter_ctl = { + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .info = fcp_meter_ctl_info, + .get = fcp_meter_ctl_get, + .tlv = { .c = fcp_meter_tlv_callback }, +}; + +/*** hwdep interface ***/ + +/* FCP initialisation */ +static int fcp_ioctl_init(struct usb_mixer_interface *mixer, + struct fcp_init __user *arg) +{ + struct fcp_init init; + struct usb_device *dev = mixer->chip->dev; + struct fcp_data *private = mixer->private_data; + void *resp __free(kfree) = NULL; + void *step2_resp; + int err, buf_size; + + if (usb_pipe_type_check(dev, usb_sndctrlpipe(dev, 0))) + return -EINVAL; + + /* Get initialisation parameters */ + if (copy_from_user(&init, arg, sizeof(init))) + return -EFAULT; + + /* Validate the response sizes */ + if (init.step0_resp_size < 1 || + init.step0_resp_size > 255 || + init.step2_resp_size < 1 || + init.step2_resp_size > 255) + return -EINVAL; + + /* Allocate response buffer */ + buf_size = init.step0_resp_size + init.step2_resp_size; + + resp = kmalloc(buf_size, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + private->step0_resp_size = init.step0_resp_size; + private->step2_resp_size = init.step2_resp_size; + private->init1_opcode = init.init1_opcode; + private->init2_opcode = init.init2_opcode; + + step2_resp = resp + private->step0_resp_size; + + err = fcp_init(mixer, resp, step2_resp); + if (err < 0) + return err; + + if (copy_to_user(arg->resp, resp, buf_size)) + return -EFAULT; + + return 0; +} + +/* Check that the command is allowed + * Don't permit erasing/writing segment 0 (App_Gold) + */ +static int fcp_validate_cmd(u32 opcode, void *data, u16 size) +{ + if (opcode == FCP_USB_FLASH_ERASE) { + struct { + __le32 segment_num; + __le32 pad; + } __packed *req = data; + + if (size != sizeof(*req)) + return -EINVAL; + + if (le32_to_cpu(req->segment_num) == FCP_SEGMENT_APP_GOLD) + return -EPERM; + + if (req->pad != 0) + return -EINVAL; + + } else if (opcode == FCP_USB_FLASH_WRITE) { + struct { + __le32 segment_num; + __le32 offset; + __le32 pad; + u8 data[]; + } __packed *req = data; + + if (size < sizeof(*req)) + return -EINVAL; + + if (le32_to_cpu(req->segment_num) == FCP_SEGMENT_APP_GOLD) + return -EPERM; + + if (req->pad != 0) + return -EINVAL; + } + + return 0; +} + +/* Execute an FCP command specified by the user */ +static int fcp_ioctl_cmd(struct usb_mixer_interface *mixer, + struct fcp_cmd __user *arg) +{ + struct fcp_cmd cmd; + int err, buf_size; + void *data __free(kfree) = NULL; + + /* get opcode and request/response size */ + if (copy_from_user(&cmd, arg, sizeof(cmd))) + return -EFAULT; + + /* validate request and response sizes */ + if (cmd.req_size > 4096 || cmd.resp_size > 4096) + return -EINVAL; + + /* reinit if needed */ + err = fcp_reinit(mixer); + if (err < 0) + return err; + + /* allocate request/response buffer */ + buf_size = max(cmd.req_size, cmd.resp_size); + + if (buf_size > 0) { + data = kmalloc(buf_size, GFP_KERNEL); + if (!data) + return -ENOMEM; + } + + /* copy request from user */ + if (cmd.req_size > 0) + if (copy_from_user(data, arg->data, cmd.req_size)) + return -EFAULT; + + /* check that the command is allowed */ + err = fcp_validate_cmd(cmd.opcode, data, cmd.req_size); + if (err < 0) + return err; + + /* send request, get response */ + err = fcp_usb(mixer, cmd.opcode, + data, cmd.req_size, data, cmd.resp_size); + if (err < 0) + return err; + + /* copy response to user */ + if (cmd.resp_size > 0) + if (copy_to_user(arg->data, data, cmd.resp_size)) + return -EFAULT; + + return 0; +} + +/* Validate the Level Meter map passed by the user */ +static int validate_meter_map(const s16 *map, int map_size, int meter_slots) +{ + int i; + + for (i = 0; i < map_size; i++) + if (map[i] < -1 || map[i] >= meter_slots) + return -EINVAL; + + return 0; +} + +/* Set the Level Meter map and add the control */ +static int fcp_ioctl_set_meter_map(struct usb_mixer_interface *mixer, + struct fcp_meter_map __user *arg) +{ + struct fcp_meter_map map; + struct fcp_data *private = mixer->private_data; + s16 *tmp_map __free(kfree) = NULL; + int err; + + if (copy_from_user(&map, arg, sizeof(map))) + return -EFAULT; + + /* Don't allow changing the map size or meter slots once set */ + if (private->meter_ctl) { + struct usb_mixer_elem_info *elem = + private->meter_ctl->private_data; + + if (map.map_size != elem->channels || + map.meter_slots != private->num_meter_slots) + return -EINVAL; + } + + /* Validate the map size */ + if (map.map_size < 1 || map.map_size > 255 || + map.meter_slots < 1 || map.meter_slots > 255) + return -EINVAL; + + /* Allocate and copy the map data */ + tmp_map = kmalloc_array(map.map_size, sizeof(s16), GFP_KERNEL); + if (!tmp_map) + return -ENOMEM; + + if (copy_from_user(tmp_map, arg->map, map.map_size * sizeof(s16))) + return -EFAULT; + + err = validate_meter_map(tmp_map, map.map_size, map.meter_slots); + if (err < 0) + return err; + + /* If the control doesn't exist, create it */ + if (!private->meter_ctl) { + s16 *new_map __free(kfree) = NULL; + u32 *meter_levels __free(kfree) = NULL; + + /* Allocate buffer for the map */ + new_map = kmalloc_array(map.map_size, sizeof(s16), GFP_KERNEL); + if (!new_map) + return -ENOMEM; + + /* Allocate buffer for reading meter levels */ + meter_levels = kmalloc_array(map.meter_slots, sizeof(u32), + GFP_KERNEL); + if (!meter_levels) + return -ENOMEM; + + /* Create the Level Meter control */ + err = fcp_add_new_ctl(mixer, &fcp_meter_ctl, 0, map.map_size, + "Level Meter", &private->meter_ctl); + if (err < 0) + return err; + + /* Success; save the pointers in private and don't free them */ + private->meter_level_map = new_map; + private->meter_levels = meter_levels; + private->num_meter_slots = map.meter_slots; + new_map = NULL; + meter_levels = NULL; + } + + /* Install the new map */ + memcpy(private->meter_level_map, tmp_map, map.map_size * sizeof(s16)); + + return 0; +} + +/* Set the Level Meter labels */ +static int fcp_ioctl_set_meter_labels(struct usb_mixer_interface *mixer, + struct fcp_meter_labels __user *arg) +{ + struct fcp_meter_labels labels; + struct fcp_data *private = mixer->private_data; + unsigned int *tlv_data; + unsigned int tlv_size, data_size; + + if (copy_from_user(&labels, arg, sizeof(labels))) + return -EFAULT; + + /* Remove existing labels if size is zero */ + if (!labels.labels_size) { + + /* Clear TLV read/callback bits if labels were present */ + if (private->meter_labels_tlv) { + private->meter_ctl->vd[0].access &= + ~(SNDRV_CTL_ELEM_ACCESS_TLV_READ | + SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK); + snd_ctl_notify(mixer->chip->card, + SNDRV_CTL_EVENT_MASK_INFO, + &private->meter_ctl->id); + } + + kfree(private->meter_labels_tlv); + private->meter_labels_tlv = NULL; + private->meter_labels_tlv_size = 0; + + return 0; + } + + /* Validate size */ + if (labels.labels_size > 4096) + return -EINVAL; + + /* Calculate padded data size */ + data_size = ALIGN(labels.labels_size, sizeof(unsigned int)); + + /* Calculate total TLV size including header */ + tlv_size = sizeof(unsigned int) * 2 + data_size; + + /* Allocate, set up TLV header, and copy the labels data */ + tlv_data = kzalloc(tlv_size, GFP_KERNEL); + if (!tlv_data) + return -ENOMEM; + tlv_data[0] = SNDRV_CTL_TLVT_FCP_CHANNEL_LABELS; + tlv_data[1] = data_size; + if (copy_from_user(&tlv_data[2], arg->labels, labels.labels_size)) { + kfree(tlv_data); + return -EFAULT; + } + + /* Set TLV read/callback bits if labels weren't present */ + if (!private->meter_labels_tlv) { + private->meter_ctl->vd[0].access |= + SNDRV_CTL_ELEM_ACCESS_TLV_READ | + SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK; + snd_ctl_notify(mixer->chip->card, + SNDRV_CTL_EVENT_MASK_INFO, + &private->meter_ctl->id); + } + + /* Swap in the new labels */ + kfree(private->meter_labels_tlv); + private->meter_labels_tlv = tlv_data; + private->meter_labels_tlv_size = tlv_size; + + return 0; +} + +static int fcp_hwdep_open(struct snd_hwdep *hw, struct file *file) +{ + struct usb_mixer_interface *mixer = hw->private_data; + struct fcp_data *private = mixer->private_data; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + private->file = file; + + return 0; +} + +static int fcp_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct usb_mixer_interface *mixer = hw->private_data; + struct fcp_data *private = mixer->private_data; + void __user *argp = (void __user *)arg; + + guard(mutex)(&private->mutex); + + switch (cmd) { + + case FCP_IOCTL_PVERSION: + return put_user(FCP_HWDEP_VERSION, + (int __user *)argp) ? -EFAULT : 0; + break; + + case FCP_IOCTL_INIT: + return fcp_ioctl_init(mixer, argp); + + case FCP_IOCTL_CMD: + if (!private->init) + return -EINVAL; + return fcp_ioctl_cmd(mixer, argp); + + case FCP_IOCTL_SET_METER_MAP: + if (!private->init) + return -EINVAL; + return fcp_ioctl_set_meter_map(mixer, argp); + + case FCP_IOCTL_SET_METER_LABELS: + if (!private->init) + return -EINVAL; + if (!private->meter_ctl) + return -EINVAL; + return fcp_ioctl_set_meter_labels(mixer, argp); + + default: + return -ENOIOCTLCMD; + } + + /* not reached */ +} + +static ssize_t fcp_hwdep_read(struct snd_hwdep *hw, char __user *buf, + ssize_t count, loff_t *offset) +{ + struct usb_mixer_interface *mixer = hw->private_data; + struct fcp_data *private = mixer->private_data; + unsigned long flags; + ssize_t ret = 0; + u32 event; + + if (count < sizeof(event)) + return -EINVAL; + + ret = wait_event_interruptible(private->notify.queue, + private->notify.event); + if (ret) + return ret; + + spin_lock_irqsave(&private->notify.lock, flags); + event = private->notify.event; + private->notify.event = 0; + spin_unlock_irqrestore(&private->notify.lock, flags); + + if (copy_to_user(buf, &event, sizeof(event))) + return -EFAULT; + + return sizeof(event); +} + +static unsigned int fcp_hwdep_poll(struct snd_hwdep *hw, + struct file *file, + poll_table *wait) +{ + struct usb_mixer_interface *mixer = hw->private_data; + struct fcp_data *private = mixer->private_data; + unsigned int mask = 0; + + poll_wait(file, &private->notify.queue, wait); + + if (private->notify.event) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +static int fcp_hwdep_release(struct snd_hwdep *hw, struct file *file) +{ + struct usb_mixer_interface *mixer = hw->private_data; + struct fcp_data *private = mixer->private_data; + + if (!private) + return 0; + + private->file = NULL; + + return 0; +} + +static int fcp_hwdep_init(struct usb_mixer_interface *mixer) +{ + struct snd_hwdep *hw; + int err; + + err = snd_hwdep_new(mixer->chip->card, "Focusrite Control", 0, &hw); + if (err < 0) + return err; + + hw->private_data = mixer; + hw->exclusive = 1; + hw->ops.open = fcp_hwdep_open; + hw->ops.ioctl = fcp_hwdep_ioctl; + hw->ops.ioctl_compat = fcp_hwdep_ioctl; + hw->ops.read = fcp_hwdep_read; + hw->ops.poll = fcp_hwdep_poll; + hw->ops.release = fcp_hwdep_release; + + return 0; +} + +/*** Cleanup ***/ + +static void fcp_cleanup_urb(struct usb_mixer_interface *mixer) +{ + if (!mixer->urb) + return; + + usb_kill_urb(mixer->urb); + kfree(mixer->urb->transfer_buffer); + usb_free_urb(mixer->urb); + mixer->urb = NULL; +} + +static void fcp_private_free(struct usb_mixer_interface *mixer) +{ + struct fcp_data *private = mixer->private_data; + + fcp_cleanup_urb(mixer); + + kfree(private->meter_level_map); + kfree(private->meter_levels); + kfree(private->meter_labels_tlv); + kfree(private); + mixer->private_data = NULL; +} + +static void fcp_private_suspend(struct usb_mixer_interface *mixer) +{ + fcp_cleanup_urb(mixer); +} + +/*** Callbacks ***/ + +static void fcp_notify(struct urb *urb) +{ + struct usb_mixer_interface *mixer = urb->context; + struct fcp_data *private = mixer->private_data; + int len = urb->actual_length; + int ustatus = urb->status; + u32 data; + + if (ustatus != 0 || len != 8) + goto requeue; + + data = le32_to_cpu(*(__le32 *)urb->transfer_buffer); + + /* Handle command acknowledgement */ + if (data & FCP_NOTIFY_ACK) { + complete(&private->cmd_done); + data &= ~FCP_NOTIFY_ACK; + } + + if (data) { + unsigned long flags; + + spin_lock_irqsave(&private->notify.lock, flags); + private->notify.event |= data; + spin_unlock_irqrestore(&private->notify.lock, flags); + + wake_up_interruptible(&private->notify.queue); + } + +requeue: + if (ustatus != -ENOENT && + ustatus != -ECONNRESET && + ustatus != -ESHUTDOWN) { + urb->dev = mixer->chip->dev; + usb_submit_urb(urb, GFP_ATOMIC); + } else { + complete(&private->cmd_done); + } +} + +/* Submit a URB to receive notifications from the device */ +static int fcp_init_notify(struct usb_mixer_interface *mixer) +{ + struct usb_device *dev = mixer->chip->dev; + struct fcp_data *private = mixer->private_data; + unsigned int pipe = usb_rcvintpipe(dev, private->bEndpointAddress); + void *transfer_buffer; + int err; + + /* Already set up */ + if (mixer->urb) + return 0; + + if (usb_pipe_type_check(dev, pipe)) + return -EINVAL; + + mixer->urb = usb_alloc_urb(0, GFP_KERNEL); + if (!mixer->urb) + return -ENOMEM; + + transfer_buffer = kmalloc(private->wMaxPacketSize, GFP_KERNEL); + if (!transfer_buffer) { + usb_free_urb(mixer->urb); + mixer->urb = NULL; + return -ENOMEM; + } + + usb_fill_int_urb(mixer->urb, dev, pipe, + transfer_buffer, private->wMaxPacketSize, + fcp_notify, mixer, private->bInterval); + + init_completion(&private->cmd_done); + + err = usb_submit_urb(mixer->urb, GFP_KERNEL); + if (err) { + usb_audio_err(mixer->chip, + "%s: usb_submit_urb failed: %d\n", + __func__, err); + kfree(transfer_buffer); + usb_free_urb(mixer->urb); + mixer->urb = NULL; + } + + return err; +} + +/*** Initialisation ***/ + +static int fcp_init(struct usb_mixer_interface *mixer, + void *step0_resp, void *step2_resp) +{ + struct fcp_data *private = mixer->private_data; + struct usb_device *dev = mixer->chip->dev; + int err; + + err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), + FCP_USB_REQ_STEP0, + USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, + 0, private->bInterfaceNumber, + step0_resp, private->step0_resp_size); + if (err < 0) + return err; + + err = fcp_init_notify(mixer); + if (err < 0) + return err; + + private->seq = 0; + private->init = 1; + + err = fcp_usb(mixer, private->init1_opcode, NULL, 0, NULL, 0); + if (err < 0) + return err; + + err = fcp_usb(mixer, private->init2_opcode, + NULL, 0, step2_resp, private->step2_resp_size); + if (err < 0) + return err; + + return 0; +} + +static int fcp_init_private(struct usb_mixer_interface *mixer) +{ + struct fcp_data *private = + kzalloc(sizeof(struct fcp_data), GFP_KERNEL); + + if (!private) + return -ENOMEM; + + mutex_init(&private->mutex); + init_waitqueue_head(&private->notify.queue); + spin_lock_init(&private->notify.lock); + + mixer->private_data = private; + mixer->private_free = fcp_private_free; + mixer->private_suspend = fcp_private_suspend; + + private->mixer = mixer; + + return 0; +} + +/* Look through the interface descriptors for the Focusrite Control + * interface (bInterfaceClass = 255 Vendor Specific Class) and set + * bInterfaceNumber, bEndpointAddress, wMaxPacketSize, and bInterval + * in private + */ +static int fcp_find_fc_interface(struct usb_mixer_interface *mixer) +{ + struct snd_usb_audio *chip = mixer->chip; + struct fcp_data *private = mixer->private_data; + struct usb_host_config *config = chip->dev->actconfig; + int i; + + for (i = 0; i < config->desc.bNumInterfaces; i++) { + struct usb_interface *intf = config->interface[i]; + struct usb_interface_descriptor *desc = + &intf->altsetting[0].desc; + struct usb_endpoint_descriptor *epd; + + if (desc->bInterfaceClass != 255) + continue; + + epd = get_endpoint(intf->altsetting, 0); + private->bInterfaceNumber = desc->bInterfaceNumber; + private->bEndpointAddress = epd->bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; + private->wMaxPacketSize = le16_to_cpu(epd->wMaxPacketSize); + private->bInterval = epd->bInterval; + return 0; + } + + usb_audio_err(chip, "Focusrite vendor-specific interface not found\n"); + return -EINVAL; +} + +int snd_fcp_init(struct usb_mixer_interface *mixer) +{ + struct snd_usb_audio *chip = mixer->chip; + int err; + + /* only use UAC_VERSION_2 */ + if (!mixer->protocol) + return 0; + + err = fcp_init_private(mixer); + if (err < 0) + return err; + + err = fcp_find_fc_interface(mixer); + if (err < 0) + return err; + + err = fcp_hwdep_init(mixer); + if (err < 0) + return err; + + usb_audio_info(chip, + "Focusrite Control Protocol Driver ready (pid=0x%04x); " + "report any issues to " + "https://github.com/geoffreybennett/fcp-support/issues", + USB_ID_PRODUCT(chip->usb_id)); + + return err; +} diff --git a/sound/usb/fcp.h b/sound/usb/fcp.h new file mode 100644 index 000000000000..d58cc7db75b1 --- /dev/null +++ b/sound/usb/fcp.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __USBAUDIO_FCP_H +#define __USBAUDIO_FCP_H + +int snd_fcp_init(struct usb_mixer_interface *mixer); + +#endif /* __USBAUDIO_FCP_H */ diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 23fcd680167d..ed6127b0389f 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -38,6 +38,7 @@ #include "mixer_us16x08.h" #include "mixer_s1810c.h" #include "helper.h" +#include "fcp.h" struct std_mono_table { unsigned int unitid, control, cmask; @@ -4090,6 +4091,12 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) err = snd_scarlett2_init(mixer); break; + case USB_ID(0x1235, 0x821b): /* Focusrite Scarlett 16i16 4th Gen */ + case USB_ID(0x1235, 0x821c): /* Focusrite Scarlett 18i16 4th Gen */ + case USB_ID(0x1235, 0x821d): /* Focusrite Scarlett 18i20 4th Gen */ + err = snd_fcp_init(mixer); + break; + case USB_ID(0x041e, 0x323b): /* Creative Sound Blaster E1 */ err = snd_soundblaster_e1_switch_create(mixer); break; -- cgit v1.2.3 From 3156ceb222414456084d964f43ada071206039b8 Mon Sep 17 00:00:00 2001 From: Rickard Andersson Date: Mon, 16 Dec 2024 09:54:19 +0100 Subject: ubi: Expose interface for detailed erase counters Using the ioctl command 'UBI_IOCECNFO' user space can obtain detailed erase counter information of all blocks of a device. Signed-off-by: Rickard Andersson Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- include/uapi/mtd/ubi-user.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h index e1571603175e..aa872a41ffb9 100644 --- a/include/uapi/mtd/ubi-user.h +++ b/include/uapi/mtd/ubi-user.h @@ -175,6 +175,8 @@ #define UBI_IOCRPEB _IOW(UBI_IOC_MAGIC, 4, __s32) /* Force scrubbing on the specified PEB */ #define UBI_IOCSPEB _IOW(UBI_IOC_MAGIC, 5, __s32) +/* Read detailed device erase counter information */ +#define UBI_IOCECNFO _IOWR(UBI_IOC_MAGIC, 6, struct ubi_ecinfo_req) /* ioctl commands of the UBI control character device */ @@ -412,6 +414,37 @@ struct ubi_rnvol_req { } ents[UBI_MAX_RNVOL]; } __packed; +/** + * struct ubi_ecinfo_req - a data structure used for requesting and receiving + * erase block counter information from a UBI device. + * + * @start: index of first physical erase block to read (in) + * @length: number of erase counters to read (in) + * @read_length: number of erase counters that was actually read (out) + * @padding: reserved for future, not used, has to be zeroed + * @erase_counters: array of erase counter values (out) + * + * This structure is used to retrieve erase counter information for a specified + * range of PEBs on a UBI device. + * Erase counters are read from @start and attempts to read @length number of + * erase counters. + * The retrieved values are stored in the @erase_counters array. It is the + * responsibility of the caller to allocate enough memory for storing @length + * elements in the @erase_counters array. + * If a block is bad or if the erase counter is unknown the corresponding value + * in the array will be set to -1. + * The @read_length field will indicate the number of erase counters actually + * read. Typically @read_length will be limited due to memory or the number of + * PEBs on the UBI device. + */ +struct ubi_ecinfo_req { + __s32 start; + __s32 length; + __s32 read_length; + __s8 padding[16]; + __s32 erase_counters[]; +} __packed; + /** * struct ubi_leb_change_req - a data structure used in atomic LEB change * requests. -- cgit v1.2.3 From d6658d3338f84173fb55c9d6c6cdfa57f879712d Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 16 Nov 2024 04:20:45 +0100 Subject: misc: pci_endpoint_test: Add consecutive BAR test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a more advanced BAR test that writes all BARs in one go, and then reads them back and verifies that the value matches the BAR number bitwise OR'ed with offset, this allows us to verify: - The BAR number was what we intended to read - The offset was what we intended to read This allows us to detect potential address translation issues on the EP. Reading back the BAR directly after writing will not allow us to detect the case where inbound address translation on the endpoint incorrectly causes multiple BARs to be redirected to the same memory region (within the EP). Link: https://lore.kernel.org/r/20241116032045.2574168-2-cassel@kernel.org Signed-off-by: Niklas Cassel Signed-off-by: Krzysztof WilczyƄski Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam --- drivers/misc/pci_endpoint_test.c | 88 ++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/pcitest.h | 1 + tools/pci/pcitest.c | 16 +++++++- tools/pci/pcitest.sh | 1 + 4 files changed, 105 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 8a31bd4c237d..5c99da952b7a 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -325,6 +325,91 @@ static bool pci_endpoint_test_bar(struct pci_endpoint_test *test, return true; } +static u32 bar_test_pattern_with_offset(enum pci_barno barno, int offset) +{ + u32 val; + + /* Keep the BAR pattern in the top byte. */ + val = bar_test_pattern[barno] & 0xff000000; + /* Store the (partial) offset in the remaining bytes. */ + val |= offset & 0x00ffffff; + + return val; +} + +static bool pci_endpoint_test_bars_write_bar(struct pci_endpoint_test *test, + enum pci_barno barno) +{ + struct pci_dev *pdev = test->pdev; + int j, size; + + size = pci_resource_len(pdev, barno); + + if (barno == test->test_reg_bar) + size = 0x4; + + for (j = 0; j < size; j += 4) + writel_relaxed(bar_test_pattern_with_offset(barno, j), + test->bar[barno] + j); + + return true; +} + +static bool pci_endpoint_test_bars_read_bar(struct pci_endpoint_test *test, + enum pci_barno barno) +{ + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + int j, size; + u32 val; + + size = pci_resource_len(pdev, barno); + + if (barno == test->test_reg_bar) + size = 0x4; + + for (j = 0; j < size; j += 4) { + u32 expected = bar_test_pattern_with_offset(barno, j); + + val = readl_relaxed(test->bar[barno] + j); + if (val != expected) { + dev_err(dev, + "BAR%d incorrect data at offset: %#x, got: %#x expected: %#x\n", + barno, j, val, expected); + return false; + } + } + + return true; +} + +static bool pci_endpoint_test_bars(struct pci_endpoint_test *test) +{ + enum pci_barno bar; + bool ret; + + /* Write all BARs in order (without reading). */ + for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) + if (test->bar[bar]) + pci_endpoint_test_bars_write_bar(test, bar); + + /* + * Read all BARs in order (without writing). + * If there is an address translation issue on the EP, writing one BAR + * might have overwritten another BAR. Ensure that this is not the case. + * (Reading back the BAR directly after writing can not detect this.) + */ + for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { + if (test->bar[bar]) { + ret = pci_endpoint_test_bars_read_bar(test, bar); + if (!ret) + return ret; + } + } + + return true; +} + static bool pci_endpoint_test_intx_irq(struct pci_endpoint_test *test) { u32 val; @@ -771,6 +856,9 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, goto ret; ret = pci_endpoint_test_bar(test, bar); break; + case PCITEST_BARS: + ret = pci_endpoint_test_bars(test); + break; case PCITEST_INTX_IRQ: ret = pci_endpoint_test_intx_irq(test); break; diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index 94b46b043b53..acd261f49866 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -20,6 +20,7 @@ #define PCITEST_MSIX _IOW('P', 0x7, int) #define PCITEST_SET_IRQTYPE _IOW('P', 0x8, int) #define PCITEST_GET_IRQTYPE _IO('P', 0x9) +#define PCITEST_BARS _IO('P', 0xa) #define PCITEST_CLEAR_IRQ _IO('P', 0x10) #define PCITEST_FLAGS_USE_DMA 0x00000001 diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index 7b530d838d40..08f355083754 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -22,6 +22,7 @@ static char *irq[] = { "LEGACY", "MSI", "MSI-X" }; struct pci_test { char *device; char barnum; + bool consecutive_bar_test; bool legacyirq; unsigned int msinum; unsigned int msixnum; @@ -57,6 +58,15 @@ static int run_test(struct pci_test *test) fprintf(stdout, "%s\n", result[ret]); } + if (test->consecutive_bar_test) { + ret = ioctl(fd, PCITEST_BARS); + fprintf(stdout, "Consecutive BAR test:\t\t"); + if (ret < 0) + fprintf(stdout, "TEST FAILED\n"); + else + fprintf(stdout, "%s\n", result[ret]); + } + if (test->set_irqtype) { ret = ioctl(fd, PCITEST_SET_IRQTYPE, test->irqtype); fprintf(stdout, "SET IRQ TYPE TO %s:\t\t", irq[test->irqtype]); @@ -172,7 +182,7 @@ int main(int argc, char **argv) /* set default endpoint device */ test->device = "/dev/pci-endpoint-test.0"; - while ((c = getopt(argc, argv, "D:b:m:x:i:deIlhrwcs:")) != EOF) + while ((c = getopt(argc, argv, "D:b:Cm:x:i:deIlhrwcs:")) != EOF) switch (c) { case 'D': test->device = optarg; @@ -182,6 +192,9 @@ int main(int argc, char **argv) if (test->barnum < 0 || test->barnum > 5) goto usage; continue; + case 'C': + test->consecutive_bar_test = true; + continue; case 'l': test->legacyirq = true; continue; @@ -230,6 +243,7 @@ usage: "Options:\n" "\t-D PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n" "\t-b BAR test (bar number between 0..5)\n" + "\t-C Consecutive BAR test\n" "\t-m MSI test (msi number between 1..32)\n" "\t-x \tMSI-X test (msix number between 1..2048)\n" "\t-i \tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n" diff --git a/tools/pci/pcitest.sh b/tools/pci/pcitest.sh index 75ed48ff2990..770f4d6df34b 100644 --- a/tools/pci/pcitest.sh +++ b/tools/pci/pcitest.sh @@ -11,6 +11,7 @@ do pcitest -b $bar bar=`expr $bar + 1` done +pcitest -C echo echo "Interrupt tests" -- cgit v1.2.3 From e721f619e3ec9bae08bf419c3944cf1e6966c821 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 20 Jan 2025 11:50:51 -0800 Subject: iommufd: Fix struct iommu_hwpt_pgfault init and padding The iommu_hwpt_pgfault is used to report IO page fault data to userspace, but iommufd_fault_fops_read was never zeroing its padding. This leaks the content of the kernel stack memory to userspace. Also, the iommufd uAPI requires explicit padding and use of __aligned_u64 to ensure ABI compatibility's with 32 bit. pahole result, before: struct iommu_hwpt_pgfault { __u32 flags; /* 0 4 */ __u32 dev_id; /* 4 4 */ __u32 pasid; /* 8 4 */ __u32 grpid; /* 12 4 */ __u32 perm; /* 16 4 */ /* XXX 4 bytes hole, try to pack */ __u64 addr; /* 24 8 */ __u32 length; /* 32 4 */ __u32 cookie; /* 36 4 */ /* size: 40, cachelines: 1, members: 8 */ /* sum members: 36, holes: 1, sum holes: 4 */ /* last cacheline: 40 bytes */ }; pahole result, after: struct iommu_hwpt_pgfault { __u32 flags; /* 0 4 */ __u32 dev_id; /* 4 4 */ __u32 pasid; /* 8 4 */ __u32 grpid; /* 12 4 */ __u32 perm; /* 16 4 */ __u32 __reserved; /* 20 4 */ __u64 addr __attribute__((__aligned__(8))); /* 24 8 */ __u32 length; /* 32 4 */ __u32 cookie; /* 36 4 */ /* size: 40, cachelines: 1, members: 9 */ /* forced alignments: 1 */ /* last cacheline: 40 bytes */ } __attribute__((__aligned__(8))); Fixes: c714f15860fc ("iommufd: Add fault and response message definitions") Link: https://patch.msgid.link/r/20250120195051.2450-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/fault.c | 2 +- include/uapi/linux/iommufd.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index a9160f4443d2..d9a937450e55 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -263,7 +263,7 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, { size_t fault_size = sizeof(struct iommu_hwpt_pgfault); struct iommufd_fault *fault = filep->private_data; - struct iommu_hwpt_pgfault data; + struct iommu_hwpt_pgfault data = {}; struct iommufd_device *idev; struct iopf_group *group; struct iopf_fault *iopf; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 34810f6ae2b5..78747b24bd0f 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -868,6 +868,7 @@ enum iommu_hwpt_pgfault_perm { * @pasid: Process Address Space ID * @grpid: Page Request Group Index * @perm: Combination of enum iommu_hwpt_pgfault_perm + * @__reserved: Must be 0. * @addr: Fault address * @length: a hint of how much data the requestor is expecting to fetch. For * example, if the PRI initiator knows it is going to do a 10MB @@ -883,7 +884,8 @@ struct iommu_hwpt_pgfault { __u32 pasid; __u32 grpid; __u32 perm; - __u64 addr; + __u32 __reserved; + __aligned_u64 addr; __u32 length; __u32 cookie; }; -- cgit v1.2.3 From cee9b4ef42512a6e57562460a15f18a022c84dda Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 9 Dec 2024 16:13:59 -0500 Subject: nfsd: rework NFS4_SHARE_WANT_* flag handling The delstid draft adds new NFS4_SHARE_WANT_TYPE_MASK values that don't fit neatly into the existing WANT_MASK or WHEN_MASK. Add a new NFS4_SHARE_WANT_MOD_MASK value and redefine NFS4_SHARE_WANT_MASK to include it. Also fix the checks in nfsd4_deleg_xgrade_none_ext() to check for the flags instead of equality, since there may be modifier flags in the value. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 4 ++-- fs/nfsd/nfs4xdr.c | 2 +- include/uapi/linux/nfs4.h | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e7b26806797a..780db0992c9f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6224,10 +6224,10 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, struct nfs4_delegation *dp) { if (deleg_is_write(dp->dl_type)) { - if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_READ_DELEG) { + if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; - } else if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) { + } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 147564a74b46..5407fd0d464c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1067,7 +1067,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh return nfs_ok; if (!argp->minorversion) return nfserr_bad_xdr; - switch (w & NFS4_SHARE_WANT_MASK) { + switch (w & NFS4_SHARE_WANT_TYPE_MASK) { case OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE: case OPEN4_SHARE_ACCESS_WANT_READ_DELEG: case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG: diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index caf4db2fcbb9..4273e0249fcb 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -58,7 +58,7 @@ #define NFS4_SHARE_DENY_BOTH 0x0003 /* nfs41 */ -#define NFS4_SHARE_WANT_MASK 0xFF00 +#define NFS4_SHARE_WANT_TYPE_MASK 0xFF00 #define NFS4_SHARE_WANT_NO_PREFERENCE 0x0000 #define NFS4_SHARE_WANT_READ_DELEG 0x0100 #define NFS4_SHARE_WANT_WRITE_DELEG 0x0200 @@ -66,13 +66,16 @@ #define NFS4_SHARE_WANT_NO_DELEG 0x0400 #define NFS4_SHARE_WANT_CANCEL 0x0500 -#define NFS4_SHARE_WHEN_MASK 0xF0000 +#define NFS4_SHARE_WHEN_MASK 0xF0000 #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 +#define NFS4_SHARE_WANT_MOD_MASK 0xF00000 #define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000 #define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION 0x200000 +#define NFS4_SHARE_WANT_MASK (NFS4_SHARE_WANT_TYPE_MASK | NFS4_SHARE_WANT_MOD_MASK) + #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 #define NFS4_CDFC4_BOTH 0x3 -- cgit v1.2.3 From 188973e3536a18aebee3af486cd3d2ef82b09f88 Mon Sep 17 00:00:00 2001 From: Dongdong Zhang Date: Mon, 16 Dec 2024 09:35:36 +0800 Subject: PCI: Remove redundant PCI_VSEC_HDR and PCI_VSEC_HDR_LEN_SHIFT Remove duplicate macro PCI_VSEC_HDR and its related macro PCI_VSEC_HDR_LEN_SHIFT from pci_regs.h to avoid redundancy and inconsistencies. Update VFIO PCI code to use PCI_VNDR_HEADER and PCI_VNDR_HEADER_LEN() for consistent naming and functionality. These changes aim to streamline header handling while minimizing impact, given the niche usage of these macros in userspace. Link: https://lore.kernel.org/r/20241216013536.4487-1-zhangdongdong@eswincomputing.com Signed-off-by: Dongdong Zhang Signed-off-by: Bjorn Helgaas Acked-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 5 +++-- include/uapi/linux/pci_regs.h | 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index ea2745c1ac5e..5572fd99b921 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1389,11 +1389,12 @@ static int vfio_ext_cap_len(struct vfio_pci_core_device *vdev, u16 ecap, u16 epo switch (ecap) { case PCI_EXT_CAP_ID_VNDR: - ret = pci_read_config_dword(pdev, epos + PCI_VSEC_HDR, &dword); + ret = pci_read_config_dword(pdev, epos + PCI_VNDR_HEADER, + &dword); if (ret) return pcibios_err_to_errno(ret); - return dword >> PCI_VSEC_HDR_LEN_SHIFT; + return PCI_VNDR_HEADER_LEN(dword); case PCI_EXT_CAP_ID_VC: case PCI_EXT_CAP_ID_VC9: case PCI_EXT_CAP_ID_MFVC: diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 02d0ba2999e0..c2ccc85c43ed 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1001,9 +1001,6 @@ #define PCI_ACS_CTRL 0x06 /* ACS Control Register */ #define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ -#define PCI_VSEC_HDR 4 /* extended cap - vendor-specific */ -#define PCI_VSEC_HDR_LEN_SHIFT 20 /* shift for length field */ - /* SATA capability */ #define PCI_SATA_REGS 4 /* SATA REGs specifier */ #define PCI_SATA_REGS_MASK 0xF /* location - BAR#/inline */ -- cgit v1.2.3 From 24fe962c86f55347385933a1b06ca71b60854690 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Mon, 20 Jan 2025 02:28:59 +0100 Subject: fuse: {io-uring} Handle SQEs - register commands This adds basic support for ring SQEs (with opcode=IORING_OP_URING_CMD). For now only FUSE_IO_URING_CMD_REGISTER is handled to register queue entries. Signed-off-by: Bernd Schubert Reviewed-by: Pavel Begunkov # io_uring Reviewed-by: Luis Henriques Signed-off-by: Miklos Szeredi --- fs/fuse/Kconfig | 12 ++ fs/fuse/Makefile | 1 + fs/fuse/dev_uring.c | 326 ++++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/dev_uring_i.h | 113 ++++++++++++++++ fs/fuse/fuse_i.h | 5 + fs/fuse/inode.c | 10 ++ include/uapi/linux/fuse.h | 76 ++++++++++- 7 files changed, 542 insertions(+), 1 deletion(-) create mode 100644 fs/fuse/dev_uring.c create mode 100644 fs/fuse/dev_uring_i.h (limited to 'include/uapi') diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 8674dbfbe59d..ca215a3cba3e 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -63,3 +63,15 @@ config FUSE_PASSTHROUGH to be performed directly on a backing file. If you want to allow passthrough operations, answer Y. + +config FUSE_IO_URING + bool "FUSE communication over io-uring" + default y + depends on FUSE_FS + depends on IO_URING + help + This allows sending FUSE requests over the io-uring interface and + also adds request core affinity. + + If you want to allow fuse server/client communication through io-uring, + answer Y diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 2c372180d631..3f0f312a31c1 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -15,5 +15,6 @@ fuse-y += iomode.o fuse-$(CONFIG_FUSE_DAX) += dax.o fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o fuse-$(CONFIG_SYSCTL) += sysctl.o +fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o virtiofs-y := virtio_fs.o diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c new file mode 100644 index 000000000000..42092a234570 --- /dev/null +++ b/fs/fuse/dev_uring.c @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * FUSE: Filesystem in Userspace + * Copyright (c) 2023-2024 DataDirect Networks. + */ + +#include "fuse_i.h" +#include "dev_uring_i.h" +#include "fuse_dev_i.h" + +#include +#include + +static bool __read_mostly enable_uring; +module_param(enable_uring, bool, 0644); +MODULE_PARM_DESC(enable_uring, + "Enable userspace communication through io-uring"); + +#define FUSE_URING_IOV_SEGS 2 /* header and payload */ + + +bool fuse_uring_enabled(void) +{ + return enable_uring; +} + +void fuse_uring_destruct(struct fuse_conn *fc) +{ + struct fuse_ring *ring = fc->ring; + int qid; + + if (!ring) + return; + + for (qid = 0; qid < ring->nr_queues; qid++) { + struct fuse_ring_queue *queue = ring->queues[qid]; + + if (!queue) + continue; + + WARN_ON(!list_empty(&queue->ent_avail_queue)); + WARN_ON(!list_empty(&queue->ent_commit_queue)); + + kfree(queue); + ring->queues[qid] = NULL; + } + + kfree(ring->queues); + kfree(ring); + fc->ring = NULL; +} + +/* + * Basic ring setup for this connection based on the provided configuration + */ +static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) +{ + struct fuse_ring *ring; + size_t nr_queues = num_possible_cpus(); + struct fuse_ring *res = NULL; + size_t max_payload_size; + + ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT); + if (!ring) + return NULL; + + ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *), + GFP_KERNEL_ACCOUNT); + if (!ring->queues) + goto out_err; + + max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write); + max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE); + + spin_lock(&fc->lock); + if (fc->ring) { + /* race, another thread created the ring in the meantime */ + spin_unlock(&fc->lock); + res = fc->ring; + goto out_err; + } + + fc->ring = ring; + ring->nr_queues = nr_queues; + ring->fc = fc; + ring->max_payload_sz = max_payload_size; + + spin_unlock(&fc->lock); + return ring; + +out_err: + kfree(ring->queues); + kfree(ring); + return res; +} + +static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, + int qid) +{ + struct fuse_conn *fc = ring->fc; + struct fuse_ring_queue *queue; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT); + if (!queue) + return NULL; + queue->qid = qid; + queue->ring = ring; + spin_lock_init(&queue->lock); + + INIT_LIST_HEAD(&queue->ent_avail_queue); + INIT_LIST_HEAD(&queue->ent_commit_queue); + + spin_lock(&fc->lock); + if (ring->queues[qid]) { + spin_unlock(&fc->lock); + kfree(queue); + return ring->queues[qid]; + } + + /* + * write_once and lock as the caller mostly doesn't take the lock at all + */ + WRITE_ONCE(ring->queues[qid], queue); + spin_unlock(&fc->lock); + + return queue; +} + +/* + * Make a ring entry available for fuse_req assignment + */ +static void fuse_uring_ent_avail(struct fuse_ring_ent *ent, + struct fuse_ring_queue *queue) +{ + WARN_ON_ONCE(!ent->cmd); + list_move(&ent->list, &queue->ent_avail_queue); + ent->state = FRRS_AVAILABLE; +} + +/* + * fuse_uring_req_fetch command handling + */ +static void fuse_uring_do_register(struct fuse_ring_ent *ent, + struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct fuse_ring_queue *queue = ent->queue; + + spin_lock(&queue->lock); + ent->cmd = cmd; + fuse_uring_ent_avail(ent, queue); + spin_unlock(&queue->lock); +} + +/* + * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1] + * the payload + */ +static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe, + struct iovec iov[FUSE_URING_IOV_SEGS]) +{ + struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr)); + struct iov_iter iter; + ssize_t ret; + + if (sqe->len != FUSE_URING_IOV_SEGS) + return -EINVAL; + + /* + * Direction for buffer access will actually be READ and WRITE, + * using write for the import should include READ access as well. + */ + ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS, + FUSE_URING_IOV_SEGS, &iov, &iter); + if (ret < 0) + return ret; + + return 0; +} + +static struct fuse_ring_ent * +fuse_uring_create_ring_ent(struct io_uring_cmd *cmd, + struct fuse_ring_queue *queue) +{ + struct fuse_ring *ring = queue->ring; + struct fuse_ring_ent *ent; + size_t payload_size; + struct iovec iov[FUSE_URING_IOV_SEGS]; + int err; + + err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov); + if (err) { + pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n", + err); + return ERR_PTR(err); + } + + err = -EINVAL; + if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) { + pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len); + return ERR_PTR(err); + } + + payload_size = iov[1].iov_len; + if (payload_size < ring->max_payload_sz) { + pr_info_ratelimited("Invalid req payload len %zu\n", + payload_size); + return ERR_PTR(err); + } + + err = -ENOMEM; + ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT); + if (!ent) + return ERR_PTR(err); + + INIT_LIST_HEAD(&ent->list); + + ent->queue = queue; + ent->headers = iov[0].iov_base; + ent->payload = iov[1].iov_base; + + return ent; +} + +/* + * Register header and payload buffer with the kernel and puts the + * entry as "ready to get fuse requests" on the queue + */ +static int fuse_uring_register(struct io_uring_cmd *cmd, + unsigned int issue_flags, struct fuse_conn *fc) +{ + const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe); + struct fuse_ring *ring = fc->ring; + struct fuse_ring_queue *queue; + struct fuse_ring_ent *ent; + int err; + unsigned int qid = READ_ONCE(cmd_req->qid); + + err = -ENOMEM; + if (!ring) { + ring = fuse_uring_create(fc); + if (!ring) + return err; + } + + if (qid >= ring->nr_queues) { + pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid); + return -EINVAL; + } + + queue = ring->queues[qid]; + if (!queue) { + queue = fuse_uring_create_queue(ring, qid); + if (!queue) + return err; + } + + /* + * The created queue above does not need to be destructed in + * case of entry errors below, will be done at ring destruction time. + */ + + ent = fuse_uring_create_ring_ent(cmd, queue); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + fuse_uring_do_register(ent, cmd, issue_flags); + + return 0; +} + +/* + * Entry function from io_uring to handle the given passthrough command + * (op code IORING_OP_URING_CMD) + */ +int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct fuse_dev *fud; + struct fuse_conn *fc; + u32 cmd_op = cmd->cmd_op; + int err; + + if (!enable_uring) { + pr_info_ratelimited("fuse-io-uring is disabled\n"); + return -EOPNOTSUPP; + } + + /* This extra SQE size holds struct fuse_uring_cmd_req */ + if (!(issue_flags & IO_URING_F_SQE128)) + return -EINVAL; + + fud = fuse_get_dev(cmd->file); + if (!fud) { + pr_info_ratelimited("No fuse device found\n"); + return -ENOTCONN; + } + fc = fud->fc; + + if (fc->aborted) + return -ECONNABORTED; + if (!fc->connected) + return -ENOTCONN; + + /* + * fuse_uring_register() needs the ring to be initialized, + * we need to know the max payload size + */ + if (!fc->initialized) + return -EAGAIN; + + switch (cmd_op) { + case FUSE_IO_URING_CMD_REGISTER: + err = fuse_uring_register(cmd, issue_flags, fc); + if (err) { + pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n", + err); + return err; + } + break; + default: + return -EINVAL; + } + + return -EIOCBQUEUED; +} diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h new file mode 100644 index 000000000000..ae1536355b36 --- /dev/null +++ b/fs/fuse/dev_uring_i.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * FUSE: Filesystem in Userspace + * Copyright (c) 2023-2024 DataDirect Networks. + */ + +#ifndef _FS_FUSE_DEV_URING_I_H +#define _FS_FUSE_DEV_URING_I_H + +#include "fuse_i.h" + +#ifdef CONFIG_FUSE_IO_URING + +enum fuse_ring_req_state { + FRRS_INVALID = 0, + + /* The ring entry received from userspace and it is being processed */ + FRRS_COMMIT, + + /* The ring entry is waiting for new fuse requests */ + FRRS_AVAILABLE, + + /* The ring entry is in or on the way to user space */ + FRRS_USERSPACE, +}; + +/** A fuse ring entry, part of the ring queue */ +struct fuse_ring_ent { + /* userspace buffer */ + struct fuse_uring_req_header __user *headers; + void __user *payload; + + /* the ring queue that owns the request */ + struct fuse_ring_queue *queue; + + /* fields below are protected by queue->lock */ + + struct io_uring_cmd *cmd; + + struct list_head list; + + enum fuse_ring_req_state state; + + struct fuse_req *fuse_req; +}; + +struct fuse_ring_queue { + /* + * back pointer to the main fuse uring structure that holds this + * queue + */ + struct fuse_ring *ring; + + /* queue id, corresponds to the cpu core */ + unsigned int qid; + + /* + * queue lock, taken when any value in the queue changes _and_ also + * a ring entry state changes. + */ + spinlock_t lock; + + /* available ring entries (struct fuse_ring_ent) */ + struct list_head ent_avail_queue; + + /* + * entries in the process of being committed or in the process + * to be sent to userspace + */ + struct list_head ent_commit_queue; +}; + +/** + * Describes if uring is for communication and holds alls the data needed + * for uring communication + */ +struct fuse_ring { + /* back pointer */ + struct fuse_conn *fc; + + /* number of ring queues */ + size_t nr_queues; + + /* maximum payload/arg size */ + size_t max_payload_sz; + + struct fuse_ring_queue **queues; +}; + +bool fuse_uring_enabled(void); +void fuse_uring_destruct(struct fuse_conn *fc); +int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); + +#else /* CONFIG_FUSE_IO_URING */ + +struct fuse_ring; + +static inline void fuse_uring_create(struct fuse_conn *fc) +{ +} + +static inline void fuse_uring_destruct(struct fuse_conn *fc) +{ +} + +static inline bool fuse_uring_enabled(void) +{ + return false; +} + +#endif /* CONFIG_FUSE_IO_URING */ + +#endif /* _FS_FUSE_DEV_URING_I_H */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5666900bee5e..bce8cc482d64 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -923,6 +923,11 @@ struct fuse_conn { /** IDR for backing files ids */ struct idr backing_files_map; #endif + +#ifdef CONFIG_FUSE_IO_URING + /** uring connection information*/ + struct fuse_ring *ring; +#endif }; /* diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3ce4f4e81d09..e4f9bbacfc1b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "dev_uring_i.h" #include #include @@ -992,6 +993,8 @@ static void delayed_release(struct rcu_head *p) { struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); + fuse_uring_destruct(fc); + put_user_ns(fc->user_ns); fc->release(fc); } @@ -1446,6 +1449,13 @@ void fuse_send_init(struct fuse_mount *fm) if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) flags |= FUSE_PASSTHROUGH; + /* + * This is just an information flag for fuse server. No need to check + * the reply - server is either sending IORING_OP_URING_CMD or not. + */ + if (fuse_uring_enabled()) + flags |= FUSE_OVER_IO_URING; + ia->in.flags = flags; ia->in.flags2 = flags >> 32; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index f1e99458e29e..5e0eb41d967e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -220,6 +220,15 @@ * * 7.41 * - add FUSE_ALLOW_IDMAP + * 7.42 + * - Add FUSE_OVER_IO_URING and all other io-uring related flags and data + * structures: + * - struct fuse_uring_ent_in_out + * - struct fuse_uring_req_header + * - struct fuse_uring_cmd_req + * - FUSE_URING_IN_OUT_HEADER_SZ + * - FUSE_URING_OP_IN_OUT_SZ + * - enum fuse_uring_cmd */ #ifndef _LINUX_FUSE_H @@ -255,7 +264,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 41 +#define FUSE_KERNEL_MINOR_VERSION 42 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -425,6 +434,7 @@ struct fuse_file_lock { * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit * of the request ID indicates resend requests * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts + * FUSE_OVER_IO_URING: Indicate that client supports io-uring */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -471,6 +481,7 @@ struct fuse_file_lock { /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP #define FUSE_ALLOW_IDMAP (1ULL << 40) +#define FUSE_OVER_IO_URING (1ULL << 41) /** * CUSE INIT request/reply flags @@ -1206,4 +1217,67 @@ struct fuse_supp_groups { uint32_t groups[]; }; +/** + * Size of the ring buffer header + */ +#define FUSE_URING_IN_OUT_HEADER_SZ 128 +#define FUSE_URING_OP_IN_OUT_SZ 128 + +/* Used as part of the fuse_uring_req_header */ +struct fuse_uring_ent_in_out { + uint64_t flags; + + /* + * commit ID to be used in a reply to a ring request (see also + * struct fuse_uring_cmd_req) + */ + uint64_t commit_id; + + /* size of user payload buffer */ + uint32_t payload_sz; + uint32_t padding; + + uint64_t reserved; +}; + +/** + * Header for all fuse-io-uring requests + */ +struct fuse_uring_req_header { + /* struct fuse_in_header / struct fuse_out_header */ + char in_out[FUSE_URING_IN_OUT_HEADER_SZ]; + + /* per op code header */ + char op_in[FUSE_URING_OP_IN_OUT_SZ]; + + struct fuse_uring_ent_in_out ring_ent_in_out; +}; + +/** + * sqe commands to the kernel + */ +enum fuse_uring_cmd { + FUSE_IO_URING_CMD_INVALID = 0, + + /* register the request buffer and fetch a fuse request */ + FUSE_IO_URING_CMD_REGISTER = 1, + + /* commit fuse request result and fetch next request */ + FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2, +}; + +/** + * In the 80B command area of the SQE. + */ +struct fuse_uring_cmd_req { + uint64_t flags; + + /* entry identifier for commits */ + uint64_t commit_id; + + /* queue the command is for (queue index) */ + uint16_t qid; + uint8_t padding[6]; +}; + #endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From 1629ee1078fc2ac38e324adb0e50c8b04ec714a2 Mon Sep 17 00:00:00 2001 From: Shijith Thotton Date: Fri, 3 Jan 2025 21:01:36 +0530 Subject: virtio-pci: define type and header for PCI vendor data Added macro definition for VIRTIO_PCI_CAP_VENDOR_CFG to identify the PCI vendor data type in the virtio_pci_cap structure. Defined a new struct virtio_pci_vndr_data for the vendor data capability header as per the specification. Acked-by: Jason Wang Signed-off-by: Shijith Thotton Message-Id: <20250103153226.1933479-3-sthotton@marvell.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_pci.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 1beb317df1b9..8549d4571257 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -116,6 +116,8 @@ #define VIRTIO_PCI_CAP_PCI_CFG 5 /* Additional shared memory capability */ #define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8 +/* PCI vendor data configuration */ +#define VIRTIO_PCI_CAP_VENDOR_CFG 9 /* This is the PCI capability header: */ struct virtio_pci_cap { @@ -130,6 +132,18 @@ struct virtio_pci_cap { __le32 length; /* Length of the structure, in bytes. */ }; +/* This is the PCI vendor data capability header: */ +struct virtio_pci_vndr_data { + __u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + __u8 cap_next; /* Generic PCI field: next ptr. */ + __u8 cap_len; /* Generic PCI field: capability length */ + __u8 cfg_type; /* Identifies the structure. */ + __u16 vendor_id; /* Identifies the vendor-specific format. */ + /* For Vendor Definition */ + /* Pads structure to a multiple of 4 bytes */ + /* Reads must not have side effects */ +}; + struct virtio_pci_cap64 { struct virtio_pci_cap cap; __le32 offset_hi; /* Most sig 32 bits of offset */ -- cgit v1.2.3