From e500d497c16c29304907f5de8bc79a8d4904c3e9 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 27 Nov 2024 19:37:56 -0800
Subject: usb: gadget: functionfs: fix spellos

Fix typos in documentation as reported by codespell.

Fixes: f0175ab51993 ("usb: gadget: f_fs: OS descriptors support")
Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-usb@vger.kernel.org
Link: https://lore.kernel.org/r/20241128033756.373517-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/usb/functionfs.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 2ebdba111a8f..beef1752e36e 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -206,7 +206,7 @@ struct usb_ffs_dmabuf_transfer_req {
  * +-----+-----------------+------+--------------------------+
  * | off | name            | type | description              |
  * +-----+-----------------+------+--------------------------+
- * |   0 | inteface        | U8   | related interface number |
+ * |   0 | interface       | U8   | related interface number |
  * +-----+-----------------+------+--------------------------+
  * |   1 | dwLength        | U32  | length of the descriptor |
  * +-----+-----------------+------+--------------------------+
@@ -224,7 +224,7 @@ struct usb_ffs_dmabuf_transfer_req {
  * +-----+-----------------+------+--------------------------+
  * | off | name            | type | description              |
  * +-----+-----------------+------+--------------------------+
- * |   0 | inteface        | U8   | related interface number |
+ * |   0 | interface       | U8   | related interface number |
  * +-----+-----------------+------+--------------------------+
  * |   1 | dwLength        | U32  | length of the descriptor |
  * +-----+-----------------+------+--------------------------+
@@ -237,7 +237,7 @@ struct usb_ffs_dmabuf_transfer_req {
  * |  11 | ExtProp[]       |      | list of ext. prop. d.    |
  * +-----+-----------------+------+--------------------------+
  *
- * ExtCompat[] is an array of valid Extended Compatiblity descriptors
+ * ExtCompat[] is an array of valid Extended Compatibility descriptors
  * which have the following format:
  *
  * +-----+-----------------------+------+-------------------------------------+
@@ -295,7 +295,7 @@ struct usb_functionfs_strings_head {
  * |  16 | stringtab  | StringTab[lang_count] | table of strings per lang  |
  *
  * For each language there is one stringtab entry (ie. there are lang_count
- * stringtab entires).  Each StringTab has following format:
+ * stringtab entries).  Each StringTab has following format:
  *
  * | off | name    | type              | description                        |
  * |-----+---------+-------------------+------------------------------------|
-- 
cgit v1.2.3


From 64e844505bc08cde3f346f193cbbbab0096fef54 Mon Sep 17 00:00:00 2001
From: Christian Hopps <chopps@labn.net>
Date: Thu, 14 Nov 2024 02:06:59 -0500
Subject: include: uapi: protocol number and packet structs for AGGFRAG in ESP

Add the RFC assigned IP protocol number for AGGFRAG.
Add the on-wire basic and congestion-control IP-TFS packet headers.

Signed-off-by: Christian Hopps <chopps@labn.net>
Tested-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/in.h |  2 ++
 include/uapi/linux/ip.h | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 5d32d53508d9..ced0fc3c3aa5 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -79,6 +79,8 @@ enum {
 #define IPPROTO_MPLS		IPPROTO_MPLS
   IPPROTO_ETHERNET = 143,	/* Ethernet-within-IPv6 Encapsulation	*/
 #define IPPROTO_ETHERNET	IPPROTO_ETHERNET
+  IPPROTO_AGGFRAG = 144,	/* AGGFRAG in ESP (RFC 9347)		*/
+#define IPPROTO_AGGFRAG		IPPROTO_AGGFRAG
   IPPROTO_RAW = 255,		/* Raw IP packets			*/
 #define IPPROTO_RAW		IPPROTO_RAW
   IPPROTO_SMC = 256,		/* Shared Memory Communications		*/
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index 283dec7e3645..5bd7ce934d74 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -137,6 +137,22 @@ struct ip_beet_phdr {
 	__u8 reserved;
 };
 
+struct ip_iptfs_hdr {
+	__u8 subtype;		/* 0*: basic, 1: CC */
+	__u8 flags;
+	__be16 block_offset;
+};
+
+struct ip_iptfs_cc_hdr {
+	__u8 subtype;		/* 0: basic, 1*: CC */
+	__u8 flags;
+	__be16 block_offset;
+	__be32 loss_rate;
+	__be64 rtt_adelay_xdelay;
+	__be32 tval;
+	__be32 techo;
+};
+
 /* index values for the variables in ipv4_devconf */
 enum
 {
-- 
cgit v1.2.3


From f69eb4f65c58f5a081dbafb76011dad73757420c Mon Sep 17 00:00:00 2001
From: Christian Hopps <chopps@labn.net>
Date: Thu, 14 Nov 2024 02:07:00 -0500
Subject: xfrm: netlink: add config (netlink) options

Add netlink options for configuring IP-TFS SAs.

Signed-off-by: Christian Hopps <chopps@labn.net>
Tested-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/xfrm.h |  9 +++++++-
 net/xfrm/xfrm_compat.c    | 10 +++++++--
 net/xfrm/xfrm_user.c      | 52 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index d73a97e3030a..a23495c0e0a1 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -158,7 +158,8 @@ enum {
 #define XFRM_MODE_ROUTEOPTIMIZATION 2
 #define XFRM_MODE_IN_TRIGGER 3
 #define XFRM_MODE_BEET 4
-#define XFRM_MODE_MAX 5
+#define XFRM_MODE_IPTFS 5
+#define XFRM_MODE_MAX 6
 
 /* Netlink configuration messages.  */
 enum {
@@ -323,6 +324,12 @@ enum xfrm_attr_type_t {
 	XFRMA_SA_DIR,		/* __u8 */
 	XFRMA_NAT_KEEPALIVE_INTERVAL,	/* __u32 in seconds for NAT keepalive */
 	XFRMA_SA_PCPU,		/* __u32 */
+	XFRMA_IPTFS_DROP_TIME,	/* __u32 in: usec to wait for next seq */
+	XFRMA_IPTFS_REORDER_WINDOW, /* __u16 in: reorder window size (pkts) */
+	XFRMA_IPTFS_DONT_FRAG,	/* out: don't use fragmentation */
+	XFRMA_IPTFS_INIT_DELAY,	/* __u32 out: initial packet wait delay (usec) */
+	XFRMA_IPTFS_MAX_QSIZE,	/* __u32 out: max ingress queue size (octets) */
+	XFRMA_IPTFS_PKT_SIZE,	/* __u32 out: size of outer packet, 0 for PMTU */
 	__XFRMA_MAX
 
 #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK	/* Compatibility */
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 5b9ee63e30b6..b8d2e6930041 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -284,9 +284,15 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
 	case XFRMA_SA_DIR:
 	case XFRMA_NAT_KEEPALIVE_INTERVAL:
 	case XFRMA_SA_PCPU:
+	case XFRMA_IPTFS_DROP_TIME:
+	case XFRMA_IPTFS_REORDER_WINDOW:
+	case XFRMA_IPTFS_DONT_FRAG:
+	case XFRMA_IPTFS_INIT_DELAY:
+	case XFRMA_IPTFS_MAX_QSIZE:
+	case XFRMA_IPTFS_PKT_SIZE:
 		return xfrm_nla_cpy(dst, src, nla_len(src));
 	default:
-		BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
+		BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE);
 		pr_warn_once("unsupported nla_type %d\n", src->nla_type);
 		return -EOPNOTSUPP;
 	}
@@ -441,7 +447,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
 	int err;
 
 	if (type > XFRMA_MAX) {
-		BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
+		BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE);
 		NL_SET_ERR_MSG(extack, "Bad attribute");
 		return -EOPNOTSUPP;
 	}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b2876e09328b..749ec56101ac 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -301,6 +301,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 			NL_SET_ERR_MSG(extack, "TFC padding can only be used in tunnel mode");
 			goto out;
 		}
+		if ((attrs[XFRMA_IPTFS_DROP_TIME] ||
+		     attrs[XFRMA_IPTFS_REORDER_WINDOW] ||
+		     attrs[XFRMA_IPTFS_DONT_FRAG] ||
+		     attrs[XFRMA_IPTFS_INIT_DELAY] ||
+		     attrs[XFRMA_IPTFS_MAX_QSIZE] ||
+		     attrs[XFRMA_IPTFS_PKT_SIZE]) &&
+		    p->mode != XFRM_MODE_IPTFS) {
+			NL_SET_ERR_MSG(extack, "IP-TFS options can only be used in IP-TFS mode");
+			goto out;
+		}
 		break;
 
 	case IPPROTO_COMP:
@@ -421,6 +431,18 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 			goto out;
 		}
 
+		if (attrs[XFRMA_IPTFS_DROP_TIME]) {
+			NL_SET_ERR_MSG(extack, "IP-TFS drop time should not be set for output SA");
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (attrs[XFRMA_IPTFS_REORDER_WINDOW]) {
+			NL_SET_ERR_MSG(extack, "IP-TFS reorder window should not be set for output SA");
+			err = -EINVAL;
+			goto out;
+		}
+
 		if (attrs[XFRMA_REPLAY_VAL]) {
 			struct xfrm_replay_state *replay;
 
@@ -458,6 +480,30 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 			}
 
 		}
+
+		if (attrs[XFRMA_IPTFS_DONT_FRAG]) {
+			NL_SET_ERR_MSG(extack, "IP-TFS don't fragment should not be set for input SA");
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (attrs[XFRMA_IPTFS_INIT_DELAY]) {
+			NL_SET_ERR_MSG(extack, "IP-TFS initial delay should not be set for input SA");
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (attrs[XFRMA_IPTFS_MAX_QSIZE]) {
+			NL_SET_ERR_MSG(extack, "IP-TFS max queue size should not be set for input SA");
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (attrs[XFRMA_IPTFS_PKT_SIZE]) {
+			NL_SET_ERR_MSG(extack, "IP-TFS packet size should not be set for input SA");
+			err = -EINVAL;
+			goto out;
+		}
 	}
 
 	if (!sa_dir && attrs[XFRMA_SA_PCPU]) {
@@ -3220,6 +3266,12 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_SA_DIR]          = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
 	[XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
 	[XFRMA_SA_PCPU]		= { .type = NLA_U32 },
+	[XFRMA_IPTFS_DROP_TIME]		= { .type = NLA_U32 },
+	[XFRMA_IPTFS_REORDER_WINDOW]	= { .type = NLA_U16 },
+	[XFRMA_IPTFS_DONT_FRAG]		= { .type = NLA_FLAG },
+	[XFRMA_IPTFS_INIT_DELAY]	= { .type = NLA_U32 },
+	[XFRMA_IPTFS_MAX_QSIZE]		= { .type = NLA_U32 },
+	[XFRMA_IPTFS_PKT_SIZE]	= { .type = NLA_U32 },
 };
 EXPORT_SYMBOL_GPL(xfrma_policy);
 
-- 
cgit v1.2.3


From d1716d5a44c37e5743bf6ea4e5cdbdab37727f27 Mon Sep 17 00:00:00 2001
From: Christian Hopps <chopps@labn.net>
Date: Thu, 14 Nov 2024 02:07:02 -0500
Subject: xfrm: add generic iptfs defines and functionality

Define `XFRM_MODE_IPTFS` and `IPSEC_MODE_IPTFS` constants, and add these to
switch case and conditionals adjacent with the existing TUNNEL modes.

Signed-off-by: Christian Hopps <chopps@labn.net>
Tested-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h         |  1 +
 include/uapi/linux/ipsec.h |  3 ++-
 include/uapi/linux/snmp.h  |  2 ++
 net/ipv4/esp4.c            |  3 ++-
 net/ipv6/esp6.c            |  3 ++-
 net/netfilter/nft_xfrm.c   |  3 ++-
 net/xfrm/xfrm_device.c     |  1 +
 net/xfrm/xfrm_output.c     |  4 ++++
 net/xfrm/xfrm_policy.c     |  8 ++++++--
 net/xfrm/xfrm_proc.c       |  2 ++
 net/xfrm/xfrm_state.c      | 12 ++++++++++++
 net/xfrm/xfrm_user.c       | 12 ++++++++++++
 12 files changed, 48 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1ebc09cde627..4b0677e48190 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -38,6 +38,7 @@
 #define XFRM_PROTO_COMP		108
 #define XFRM_PROTO_IPIP		4
 #define XFRM_PROTO_IPV6		41
+#define XFRM_PROTO_IPTFS	IPPROTO_AGGFRAG
 #define XFRM_PROTO_ROUTING	IPPROTO_ROUTING
 #define XFRM_PROTO_DSTOPTS	IPPROTO_DSTOPTS
 
diff --git a/include/uapi/linux/ipsec.h b/include/uapi/linux/ipsec.h
index 50d8ee1791e2..696b790f4346 100644
--- a/include/uapi/linux/ipsec.h
+++ b/include/uapi/linux/ipsec.h
@@ -14,7 +14,8 @@ enum {
 	IPSEC_MODE_ANY		= 0,	/* We do not support this for SA */
 	IPSEC_MODE_TRANSPORT	= 1,
 	IPSEC_MODE_TUNNEL	= 2,
-	IPSEC_MODE_BEET         = 3
+	IPSEC_MODE_BEET         = 3,
+	IPSEC_MODE_IPTFS        = 4
 };
 
 enum {
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index adf5fd78dd50..5a2553511190 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -339,6 +339,8 @@ enum
 	LINUX_MIB_XFRMACQUIREERROR,		/* XfrmAcquireError */
 	LINUX_MIB_XFRMOUTSTATEDIRERROR,		/* XfrmOutStateDirError */
 	LINUX_MIB_XFRMINSTATEDIRERROR,		/* XfrmInStateDirError */
+	LINUX_MIB_XFRMINIPTFSERROR,		/* XfrmInIptfsError */
+	LINUX_MIB_XFRMOUTNOQSPACE,		/* XfrmOutNoQueueSpace */
 	__LINUX_MIB_XFRMMAX
 };
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index f3281312eb5e..b0fbf804bbba 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -816,7 +816,8 @@ int esp_input_done2(struct sk_buff *skb, int err)
 	}
 
 	skb_pull_rcsum(skb, hlen);
-	if (x->props.mode == XFRM_MODE_TUNNEL)
+	if (x->props.mode == XFRM_MODE_TUNNEL ||
+	    x->props.mode == XFRM_MODE_IPTFS)
 		skb_reset_transport_header(skb);
 	else
 		skb_set_transport_header(skb, -ihl);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b2400c226a32..5f3d0cc1555a 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -859,7 +859,8 @@ int esp6_input_done2(struct sk_buff *skb, int err)
 	skb_postpull_rcsum(skb, skb_network_header(skb),
 			   skb_network_header_len(skb));
 	skb_pull_rcsum(skb, hlen);
-	if (x->props.mode == XFRM_MODE_TUNNEL)
+	if (x->props.mode == XFRM_MODE_TUNNEL ||
+	    x->props.mode == XFRM_MODE_IPTFS)
 		skb_reset_transport_header(skb);
 	else
 		skb_set_transport_header(skb, -hdr_len);
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 8a07b46cc8fb..3210cfc966ab 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -112,7 +112,8 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
 		return true;
 	}
 
-	return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL;
+	return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL ||
+	       mode == XFRM_MODE_IPTFS;
 }
 
 static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 1fe1b07d879d..d1fa94e52cea 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -69,6 +69,7 @@ static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb,
 static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb)
 {
 	switch (x->outer_mode.encap) {
+	case XFRM_MODE_IPTFS:
 	case XFRM_MODE_TUNNEL:
 		if (x->outer_mode.family == AF_INET)
 			return __xfrm_mode_tunnel_prep(x, skb,
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ef81359e4038..b5025cf6136e 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -677,6 +677,10 @@ static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x)
 
 		return;
 	}
+	if (x->outer_mode.encap == XFRM_MODE_IPTFS) {
+		xo->inner_ipproto = IPPROTO_AGGFRAG;
+		return;
+	}
 
 	/* non-Tunnel Mode */
 	if (!skb->encapsulation)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c04014ee623f..9e510021ee91 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2497,6 +2497,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
 
 		if (tmpl->mode == XFRM_MODE_TUNNEL ||
+		    tmpl->mode == XFRM_MODE_IPTFS ||
 		    tmpl->mode == XFRM_MODE_BEET) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
@@ -3294,7 +3295,8 @@ no_transform:
 ok:
 	xfrm_pols_put(pols, drop_pols);
 	if (dst && dst->xfrm &&
-	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
+	    (dst->xfrm->props.mode == XFRM_MODE_TUNNEL ||
+	     dst->xfrm->props.mode == XFRM_MODE_IPTFS))
 		dst->flags |= DST_XFRM_TUNNEL;
 	return dst;
 
@@ -4523,6 +4525,7 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm
 		switch (t->mode) {
 		case XFRM_MODE_TUNNEL:
 		case XFRM_MODE_BEET:
+		case XFRM_MODE_IPTFS:
 			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
 					    m->old_family) &&
 			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
@@ -4565,7 +4568,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
 				continue;
 			n++;
 			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
-			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
+			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET &&
+			    pol->xfrm_vec[i].mode != XFRM_MODE_IPTFS)
 				continue;
 			/* update endpoints */
 			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index eeb984be03a7..8e07dd614b0b 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -43,6 +43,8 @@ static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR),
 	SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR),
 	SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR),
+	SNMP_MIB_ITEM("XfrmInIptfsError", LINUX_MIB_XFRMINIPTFSERROR),
+	SNMP_MIB_ITEM("XfrmOutNoQueueSpace", LINUX_MIB_XFRMOUTNOQSPACE),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index cf68ba891729..34067cb8a479 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -467,6 +467,11 @@ static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
 		.flags = XFRM_MODE_FLAG_TUNNEL,
 		.family = AF_INET,
 	},
+	[XFRM_MODE_IPTFS] = {
+		.encap = XFRM_MODE_IPTFS,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET,
+	},
 };
 
 static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
@@ -488,6 +493,11 @@ static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
 		.flags = XFRM_MODE_FLAG_TUNNEL,
 		.family = AF_INET6,
 	},
+	[XFRM_MODE_IPTFS] = {
+		.encap = XFRM_MODE_IPTFS,
+		.flags = XFRM_MODE_FLAG_TUNNEL,
+		.family = AF_INET6,
+	},
 };
 
 static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
@@ -2334,6 +2344,7 @@ static int __xfrm6_state_sort_cmp(const void *p)
 #endif
 	case XFRM_MODE_TUNNEL:
 	case XFRM_MODE_BEET:
+	case XFRM_MODE_IPTFS:
 		return 4;
 	}
 	return 5;
@@ -2360,6 +2371,7 @@ static int __xfrm6_tmpl_sort_cmp(const void *p)
 #endif
 	case XFRM_MODE_TUNNEL:
 	case XFRM_MODE_BEET:
+	case XFRM_MODE_IPTFS:
 		return 3;
 	}
 	return 4;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 71b452fff8db..08c6d6f0179f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -383,6 +383,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 	case XFRM_MODE_ROUTEOPTIMIZATION:
 	case XFRM_MODE_BEET:
 		break;
+	case XFRM_MODE_IPTFS:
+		if (p->id.proto != IPPROTO_ESP) {
+			NL_SET_ERR_MSG(extack, "IP-TFS mode only supported with ESP");
+			goto out;
+		}
+		if (sa_dir == 0) {
+			NL_SET_ERR_MSG(extack, "IP-TFS mode requires in or out direction attribute");
+			goto out;
+		}
+		break;
 
 	default:
 		NL_SET_ERR_MSG(extack, "Unsupported mode");
@@ -2014,6 +2024,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family,
 				return -EINVAL;
 			}
 			break;
+		case XFRM_MODE_IPTFS:
+			break;
 		default:
 			if (ut[i].family != prev_family) {
 				NL_SET_ERR_MSG(extack, "Mode in template doesn't support a family change");
-- 
cgit v1.2.3


From 49922401c2190713c5cc03902dc68c3ecd3f13e8 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@fomichev.me>
Date: Wed, 4 Dec 2024 07:55:47 -0800
Subject: ethtool: separate definitions that are gonna be generated

Reshuffle definitions that are gonna be generated into
ethtool_netlink_generated.h and match ynl spec order.
This should make it easier to compare the output of the ynl-gen-c
to the existing uapi header. No functional changes.

Things that are still remaining to be manually defined:
- ETHTOOL_FLAG_ALL - probably no good way to add to spec?
- some of the cable test bits (not sure whether it's possible to move to
  spec)
- some of the stats definitions (no way currently to move to spec)

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20241204155549.641348-7-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS                                    |   2 +-
 include/uapi/linux/ethtool_netlink.h           | 893 +-----------------------
 include/uapi/linux/ethtool_netlink_generated.h | 899 +++++++++++++++++++++++++
 3 files changed, 901 insertions(+), 893 deletions(-)
 create mode 100644 include/uapi/linux/ethtool_netlink_generated.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index 686109008d8e..79756f2100e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16280,7 +16280,7 @@ F:	include/linux/inetdevice.h
 F:	include/linux/netdev*
 F:	include/linux/platform_data/wiznet.h
 F:	include/uapi/linux/cn_proc.h
-F:	include/uapi/linux/ethtool_netlink.h
+F:	include/uapi/linux/ethtool_netlink*
 F:	include/uapi/linux/if_*
 F:	include/uapi/linux/net_shaper.h
 F:	include/uapi/linux/netdev*
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 283305f6b063..9c909ce733a5 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -10,545 +10,12 @@
 #define _UAPI_LINUX_ETHTOOL_NETLINK_H_
 
 #include <linux/ethtool.h>
-
-/* message types - userspace to kernel */
-enum {
-	ETHTOOL_MSG_USER_NONE,
-	ETHTOOL_MSG_STRSET_GET,
-	ETHTOOL_MSG_LINKINFO_GET,
-	ETHTOOL_MSG_LINKINFO_SET,
-	ETHTOOL_MSG_LINKMODES_GET,
-	ETHTOOL_MSG_LINKMODES_SET,
-	ETHTOOL_MSG_LINKSTATE_GET,
-	ETHTOOL_MSG_DEBUG_GET,
-	ETHTOOL_MSG_DEBUG_SET,
-	ETHTOOL_MSG_WOL_GET,
-	ETHTOOL_MSG_WOL_SET,
-	ETHTOOL_MSG_FEATURES_GET,
-	ETHTOOL_MSG_FEATURES_SET,
-	ETHTOOL_MSG_PRIVFLAGS_GET,
-	ETHTOOL_MSG_PRIVFLAGS_SET,
-	ETHTOOL_MSG_RINGS_GET,
-	ETHTOOL_MSG_RINGS_SET,
-	ETHTOOL_MSG_CHANNELS_GET,
-	ETHTOOL_MSG_CHANNELS_SET,
-	ETHTOOL_MSG_COALESCE_GET,
-	ETHTOOL_MSG_COALESCE_SET,
-	ETHTOOL_MSG_PAUSE_GET,
-	ETHTOOL_MSG_PAUSE_SET,
-	ETHTOOL_MSG_EEE_GET,
-	ETHTOOL_MSG_EEE_SET,
-	ETHTOOL_MSG_TSINFO_GET,
-	ETHTOOL_MSG_CABLE_TEST_ACT,
-	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
-	ETHTOOL_MSG_TUNNEL_INFO_GET,
-	ETHTOOL_MSG_FEC_GET,
-	ETHTOOL_MSG_FEC_SET,
-	ETHTOOL_MSG_MODULE_EEPROM_GET,
-	ETHTOOL_MSG_STATS_GET,
-	ETHTOOL_MSG_PHC_VCLOCKS_GET,
-	ETHTOOL_MSG_MODULE_GET,
-	ETHTOOL_MSG_MODULE_SET,
-	ETHTOOL_MSG_PSE_GET,
-	ETHTOOL_MSG_PSE_SET,
-	ETHTOOL_MSG_RSS_GET,
-	ETHTOOL_MSG_PLCA_GET_CFG,
-	ETHTOOL_MSG_PLCA_SET_CFG,
-	ETHTOOL_MSG_PLCA_GET_STATUS,
-	ETHTOOL_MSG_MM_GET,
-	ETHTOOL_MSG_MM_SET,
-	ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
-	ETHTOOL_MSG_PHY_GET,
-
-	/* add new constants above here */
-	__ETHTOOL_MSG_USER_CNT,
-	ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1
-};
-
-/* message types - kernel to userspace */
-enum {
-	ETHTOOL_MSG_KERNEL_NONE,
-	ETHTOOL_MSG_STRSET_GET_REPLY,
-	ETHTOOL_MSG_LINKINFO_GET_REPLY,
-	ETHTOOL_MSG_LINKINFO_NTF,
-	ETHTOOL_MSG_LINKMODES_GET_REPLY,
-	ETHTOOL_MSG_LINKMODES_NTF,
-	ETHTOOL_MSG_LINKSTATE_GET_REPLY,
-	ETHTOOL_MSG_DEBUG_GET_REPLY,
-	ETHTOOL_MSG_DEBUG_NTF,
-	ETHTOOL_MSG_WOL_GET_REPLY,
-	ETHTOOL_MSG_WOL_NTF,
-	ETHTOOL_MSG_FEATURES_GET_REPLY,
-	ETHTOOL_MSG_FEATURES_SET_REPLY,
-	ETHTOOL_MSG_FEATURES_NTF,
-	ETHTOOL_MSG_PRIVFLAGS_GET_REPLY,
-	ETHTOOL_MSG_PRIVFLAGS_NTF,
-	ETHTOOL_MSG_RINGS_GET_REPLY,
-	ETHTOOL_MSG_RINGS_NTF,
-	ETHTOOL_MSG_CHANNELS_GET_REPLY,
-	ETHTOOL_MSG_CHANNELS_NTF,
-	ETHTOOL_MSG_COALESCE_GET_REPLY,
-	ETHTOOL_MSG_COALESCE_NTF,
-	ETHTOOL_MSG_PAUSE_GET_REPLY,
-	ETHTOOL_MSG_PAUSE_NTF,
-	ETHTOOL_MSG_EEE_GET_REPLY,
-	ETHTOOL_MSG_EEE_NTF,
-	ETHTOOL_MSG_TSINFO_GET_REPLY,
-	ETHTOOL_MSG_CABLE_TEST_NTF,
-	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
-	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
-	ETHTOOL_MSG_FEC_GET_REPLY,
-	ETHTOOL_MSG_FEC_NTF,
-	ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
-	ETHTOOL_MSG_STATS_GET_REPLY,
-	ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY,
-	ETHTOOL_MSG_MODULE_GET_REPLY,
-	ETHTOOL_MSG_MODULE_NTF,
-	ETHTOOL_MSG_PSE_GET_REPLY,
-	ETHTOOL_MSG_RSS_GET_REPLY,
-	ETHTOOL_MSG_PLCA_GET_CFG_REPLY,
-	ETHTOOL_MSG_PLCA_GET_STATUS_REPLY,
-	ETHTOOL_MSG_PLCA_NTF,
-	ETHTOOL_MSG_MM_GET_REPLY,
-	ETHTOOL_MSG_MM_NTF,
-	ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
-	ETHTOOL_MSG_PHY_GET_REPLY,
-	ETHTOOL_MSG_PHY_NTF,
-
-	/* add new constants above here */
-	__ETHTOOL_MSG_KERNEL_CNT,
-	ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1
-};
-
-/* request header */
-
-enum ethtool_header_flags {
-	ETHTOOL_FLAG_COMPACT_BITSETS	= 1 << 0,	/* use compact bitsets in reply */
-	ETHTOOL_FLAG_OMIT_REPLY		= 1 << 1,	/* provide optional reply for SET or ACT requests */
-	ETHTOOL_FLAG_STATS		= 1 << 2,	/* request statistics, if supported by the driver */
-};
+#include <linux/ethtool_netlink_generated.h>
 
 #define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \
 			  ETHTOOL_FLAG_OMIT_REPLY | \
 			  ETHTOOL_FLAG_STATS)
 
-enum {
-	ETHTOOL_A_HEADER_UNSPEC,
-	ETHTOOL_A_HEADER_DEV_INDEX,		/* u32 */
-	ETHTOOL_A_HEADER_DEV_NAME,		/* string */
-	ETHTOOL_A_HEADER_FLAGS,			/* u32 - ETHTOOL_FLAG_* */
-	ETHTOOL_A_HEADER_PHY_INDEX,		/* u32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_HEADER_CNT,
-	ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1
-};
-
-/* bit sets */
-
-enum {
-	ETHTOOL_A_BITSET_BIT_UNSPEC,
-	ETHTOOL_A_BITSET_BIT_INDEX,		/* u32 */
-	ETHTOOL_A_BITSET_BIT_NAME,		/* string */
-	ETHTOOL_A_BITSET_BIT_VALUE,		/* flag */
-
-	/* add new constants above here */
-	__ETHTOOL_A_BITSET_BIT_CNT,
-	ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1
-};
-
-enum {
-	ETHTOOL_A_BITSET_BITS_UNSPEC,
-	ETHTOOL_A_BITSET_BITS_BIT,		/* nest - _A_BITSET_BIT_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_BITSET_BITS_CNT,
-	ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1
-};
-
-enum {
-	ETHTOOL_A_BITSET_UNSPEC,
-	ETHTOOL_A_BITSET_NOMASK,		/* flag */
-	ETHTOOL_A_BITSET_SIZE,			/* u32 */
-	ETHTOOL_A_BITSET_BITS,			/* nest - _A_BITSET_BITS_* */
-	ETHTOOL_A_BITSET_VALUE,			/* binary */
-	ETHTOOL_A_BITSET_MASK,			/* binary */
-
-	/* add new constants above here */
-	__ETHTOOL_A_BITSET_CNT,
-	ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1
-};
-
-/* string sets */
-
-enum {
-	ETHTOOL_A_STRING_UNSPEC,
-	ETHTOOL_A_STRING_INDEX,			/* u32 */
-	ETHTOOL_A_STRING_VALUE,			/* string */
-
-	/* add new constants above here */
-	__ETHTOOL_A_STRING_CNT,
-	ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1
-};
-
-enum {
-	ETHTOOL_A_STRINGS_UNSPEC,
-	ETHTOOL_A_STRINGS_STRING,		/* nest - _A_STRINGS_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_STRINGS_CNT,
-	ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1
-};
-
-enum {
-	ETHTOOL_A_STRINGSET_UNSPEC,
-	ETHTOOL_A_STRINGSET_ID,			/* u32 */
-	ETHTOOL_A_STRINGSET_COUNT,		/* u32 */
-	ETHTOOL_A_STRINGSET_STRINGS,		/* nest - _A_STRINGS_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_STRINGSET_CNT,
-	ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1
-};
-
-enum {
-	ETHTOOL_A_STRINGSETS_UNSPEC,
-	ETHTOOL_A_STRINGSETS_STRINGSET,		/* nest - _A_STRINGSET_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_STRINGSETS_CNT,
-	ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1
-};
-
-/* STRSET */
-
-enum {
-	ETHTOOL_A_STRSET_UNSPEC,
-	ETHTOOL_A_STRSET_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_STRSET_STRINGSETS,		/* nest - _A_STRINGSETS_* */
-	ETHTOOL_A_STRSET_COUNTS_ONLY,		/* flag */
-
-	/* add new constants above here */
-	__ETHTOOL_A_STRSET_CNT,
-	ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1
-};
-
-/* LINKINFO */
-
-enum {
-	ETHTOOL_A_LINKINFO_UNSPEC,
-	ETHTOOL_A_LINKINFO_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_LINKINFO_PORT,		/* u8 */
-	ETHTOOL_A_LINKINFO_PHYADDR,		/* u8 */
-	ETHTOOL_A_LINKINFO_TP_MDIX,		/* u8 */
-	ETHTOOL_A_LINKINFO_TP_MDIX_CTRL,	/* u8 */
-	ETHTOOL_A_LINKINFO_TRANSCEIVER,		/* u8 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_LINKINFO_CNT,
-	ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1
-};
-
-/* LINKMODES */
-
-enum {
-	ETHTOOL_A_LINKMODES_UNSPEC,
-	ETHTOOL_A_LINKMODES_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_LINKMODES_AUTONEG,		/* u8 */
-	ETHTOOL_A_LINKMODES_OURS,		/* bitset */
-	ETHTOOL_A_LINKMODES_PEER,		/* bitset */
-	ETHTOOL_A_LINKMODES_SPEED,		/* u32 */
-	ETHTOOL_A_LINKMODES_DUPLEX,		/* u8 */
-	ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG,	/* u8 */
-	ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE,	/* u8 */
-	ETHTOOL_A_LINKMODES_LANES,		/* u32 */
-	ETHTOOL_A_LINKMODES_RATE_MATCHING,	/* u8 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_LINKMODES_CNT,
-	ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1
-};
-
-/* LINKSTATE */
-
-enum {
-	ETHTOOL_A_LINKSTATE_UNSPEC,
-	ETHTOOL_A_LINKSTATE_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_LINKSTATE_LINK,		/* u8 */
-	ETHTOOL_A_LINKSTATE_SQI,		/* u32 */
-	ETHTOOL_A_LINKSTATE_SQI_MAX,		/* u32 */
-	ETHTOOL_A_LINKSTATE_EXT_STATE,		/* u8 */
-	ETHTOOL_A_LINKSTATE_EXT_SUBSTATE,	/* u8 */
-	ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT,	/* u32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_LINKSTATE_CNT,
-	ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1
-};
-
-/* DEBUG */
-
-enum {
-	ETHTOOL_A_DEBUG_UNSPEC,
-	ETHTOOL_A_DEBUG_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_DEBUG_MSGMASK,		/* bitset */
-
-	/* add new constants above here */
-	__ETHTOOL_A_DEBUG_CNT,
-	ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1
-};
-
-/* WOL */
-
-enum {
-	ETHTOOL_A_WOL_UNSPEC,
-	ETHTOOL_A_WOL_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_WOL_MODES,			/* bitset */
-	ETHTOOL_A_WOL_SOPASS,			/* binary */
-
-	/* add new constants above here */
-	__ETHTOOL_A_WOL_CNT,
-	ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1
-};
-
-/* FEATURES */
-
-enum {
-	ETHTOOL_A_FEATURES_UNSPEC,
-	ETHTOOL_A_FEATURES_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_FEATURES_HW,				/* bitset */
-	ETHTOOL_A_FEATURES_WANTED,			/* bitset */
-	ETHTOOL_A_FEATURES_ACTIVE,			/* bitset */
-	ETHTOOL_A_FEATURES_NOCHANGE,			/* bitset */
-
-	/* add new constants above here */
-	__ETHTOOL_A_FEATURES_CNT,
-	ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1
-};
-
-/* PRIVFLAGS */
-
-enum {
-	ETHTOOL_A_PRIVFLAGS_UNSPEC,
-	ETHTOOL_A_PRIVFLAGS_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PRIVFLAGS_FLAGS,			/* bitset */
-
-	/* add new constants above here */
-	__ETHTOOL_A_PRIVFLAGS_CNT,
-	ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1
-};
-
-/* RINGS */
-
-enum {
-	ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0,
-	ETHTOOL_TCP_DATA_SPLIT_DISABLED,
-	ETHTOOL_TCP_DATA_SPLIT_ENABLED,
-};
-
-enum {
-	ETHTOOL_A_RINGS_UNSPEC,
-	ETHTOOL_A_RINGS_HEADER,				/* nest - _A_HEADER_* */
-	ETHTOOL_A_RINGS_RX_MAX,				/* u32 */
-	ETHTOOL_A_RINGS_RX_MINI_MAX,			/* u32 */
-	ETHTOOL_A_RINGS_RX_JUMBO_MAX,			/* u32 */
-	ETHTOOL_A_RINGS_TX_MAX,				/* u32 */
-	ETHTOOL_A_RINGS_RX,				/* u32 */
-	ETHTOOL_A_RINGS_RX_MINI,			/* u32 */
-	ETHTOOL_A_RINGS_RX_JUMBO,			/* u32 */
-	ETHTOOL_A_RINGS_TX,				/* u32 */
-	ETHTOOL_A_RINGS_RX_BUF_LEN,                     /* u32 */
-	ETHTOOL_A_RINGS_TCP_DATA_SPLIT,			/* u8 */
-	ETHTOOL_A_RINGS_CQE_SIZE,			/* u32 */
-	ETHTOOL_A_RINGS_TX_PUSH,			/* u8 */
-	ETHTOOL_A_RINGS_RX_PUSH,			/* u8 */
-	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,		/* u32 */
-	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,		/* u32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_RINGS_CNT,
-	ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1)
-};
-
-/* CHANNELS */
-
-enum {
-	ETHTOOL_A_CHANNELS_UNSPEC,
-	ETHTOOL_A_CHANNELS_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_CHANNELS_RX_MAX,			/* u32 */
-	ETHTOOL_A_CHANNELS_TX_MAX,			/* u32 */
-	ETHTOOL_A_CHANNELS_OTHER_MAX,			/* u32 */
-	ETHTOOL_A_CHANNELS_COMBINED_MAX,		/* u32 */
-	ETHTOOL_A_CHANNELS_RX_COUNT,			/* u32 */
-	ETHTOOL_A_CHANNELS_TX_COUNT,			/* u32 */
-	ETHTOOL_A_CHANNELS_OTHER_COUNT,			/* u32 */
-	ETHTOOL_A_CHANNELS_COMBINED_COUNT,		/* u32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_CHANNELS_CNT,
-	ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1)
-};
-
-/* COALESCE */
-
-enum {
-	ETHTOOL_A_COALESCE_UNSPEC,
-	ETHTOOL_A_COALESCE_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_COALESCE_RX_USECS,			/* u32 */
-	ETHTOOL_A_COALESCE_RX_MAX_FRAMES,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_USECS_IRQ,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_USECS,			/* u32 */
-	ETHTOOL_A_COALESCE_TX_MAX_FRAMES,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_USECS_IRQ,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ,		/* u32 */
-	ETHTOOL_A_COALESCE_STATS_BLOCK_USECS,		/* u32 */
-	ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX,		/* u8 */
-	ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX,		/* u8 */
-	ETHTOOL_A_COALESCE_PKT_RATE_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_USECS_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_USECS_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_PKT_RATE_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_USECS_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_USECS_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL,	/* u32 */
-	ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,		/* u8 */
-	ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,		/* u8 */
-	ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,		/* u32 */
-	/* nest - _A_PROFILE_IRQ_MODERATION */
-	ETHTOOL_A_COALESCE_RX_PROFILE,
-	/* nest - _A_PROFILE_IRQ_MODERATION */
-	ETHTOOL_A_COALESCE_TX_PROFILE,
-
-	/* add new constants above here */
-	__ETHTOOL_A_COALESCE_CNT,
-	ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_PROFILE_UNSPEC,
-	/* nest, _A_IRQ_MODERATION_* */
-	ETHTOOL_A_PROFILE_IRQ_MODERATION,
-	__ETHTOOL_A_PROFILE_CNT,
-	ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_IRQ_MODERATION_UNSPEC,
-	ETHTOOL_A_IRQ_MODERATION_USEC,			/* u32 */
-	ETHTOOL_A_IRQ_MODERATION_PKTS,			/* u32 */
-	ETHTOOL_A_IRQ_MODERATION_COMPS,			/* u32 */
-
-	__ETHTOOL_A_IRQ_MODERATION_CNT,
-	ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1)
-};
-
-/* PAUSE */
-
-enum {
-	ETHTOOL_A_PAUSE_UNSPEC,
-	ETHTOOL_A_PAUSE_HEADER,				/* nest - _A_HEADER_* */
-	ETHTOOL_A_PAUSE_AUTONEG,			/* u8 */
-	ETHTOOL_A_PAUSE_RX,				/* u8 */
-	ETHTOOL_A_PAUSE_TX,				/* u8 */
-	ETHTOOL_A_PAUSE_STATS,				/* nest - _PAUSE_STAT_* */
-	ETHTOOL_A_PAUSE_STATS_SRC,			/* u32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_PAUSE_CNT,
-	ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_PAUSE_STAT_UNSPEC,
-	ETHTOOL_A_PAUSE_STAT_PAD,
-
-	ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
-	ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
-
-	/* add new constants above here
-	 * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats!
-	 */
-	__ETHTOOL_A_PAUSE_STAT_CNT,
-	ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
-};
-
-/* EEE */
-
-enum {
-	ETHTOOL_A_EEE_UNSPEC,
-	ETHTOOL_A_EEE_HEADER,				/* nest - _A_HEADER_* */
-	ETHTOOL_A_EEE_MODES_OURS,			/* bitset */
-	ETHTOOL_A_EEE_MODES_PEER,			/* bitset */
-	ETHTOOL_A_EEE_ACTIVE,				/* u8 */
-	ETHTOOL_A_EEE_ENABLED,				/* u8 */
-	ETHTOOL_A_EEE_TX_LPI_ENABLED,			/* u8 */
-	ETHTOOL_A_EEE_TX_LPI_TIMER,			/* u32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_EEE_CNT,
-	ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1)
-};
-
-/* TSINFO */
-
-enum {
-	ETHTOOL_A_TSINFO_UNSPEC,
-	ETHTOOL_A_TSINFO_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_TSINFO_TIMESTAMPING,			/* bitset */
-	ETHTOOL_A_TSINFO_TX_TYPES,			/* bitset */
-	ETHTOOL_A_TSINFO_RX_FILTERS,			/* bitset */
-	ETHTOOL_A_TSINFO_PHC_INDEX,			/* u32 */
-	ETHTOOL_A_TSINFO_STATS,				/* nest - _A_TSINFO_STAT */
-
-	/* add new constants above here */
-	__ETHTOOL_A_TSINFO_CNT,
-	ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_TS_STAT_UNSPEC,
-
-	ETHTOOL_A_TS_STAT_TX_PKTS,			/* uint */
-	ETHTOOL_A_TS_STAT_TX_LOST,			/* uint */
-	ETHTOOL_A_TS_STAT_TX_ERR,			/* uint */
-
-	/* add new constants above here */
-	__ETHTOOL_A_TS_STAT_CNT,
-	ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1)
-
-};
-
-/* PHC VCLOCKS */
-
-enum {
-	ETHTOOL_A_PHC_VCLOCKS_UNSPEC,
-	ETHTOOL_A_PHC_VCLOCKS_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PHC_VCLOCKS_NUM,			/* u32 */
-	ETHTOOL_A_PHC_VCLOCKS_INDEX,			/* array, s32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_PHC_VCLOCKS_CNT,
-	ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1)
-};
-
-/* CABLE TEST */
-
-enum {
-	ETHTOOL_A_CABLE_TEST_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_HEADER,		/* nest - _A_HEADER_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_CABLE_TEST_CNT,
-	ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1
-};
-
 /* CABLE TEST NOTIFY */
 enum {
 	ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC,
@@ -582,74 +49,12 @@ enum {
 	ETHTOOL_A_CABLE_INF_SRC_ALCD,
 };
 
-enum {
-	ETHTOOL_A_CABLE_RESULT_UNSPEC,
-	ETHTOOL_A_CABLE_RESULT_PAIR,		/* u8 ETHTOOL_A_CABLE_PAIR_ */
-	ETHTOOL_A_CABLE_RESULT_CODE,		/* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */
-	ETHTOOL_A_CABLE_RESULT_SRC,		/* u32 ETHTOOL_A_CABLE_INF_SRC_ */
-
-	__ETHTOOL_A_CABLE_RESULT_CNT,
-	ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC,
-	ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR,	/* u8 ETHTOOL_A_CABLE_PAIR_ */
-	ETHTOOL_A_CABLE_FAULT_LENGTH_CM,	/* u32 */
-	ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,	/* u32 ETHTOOL_A_CABLE_INF_SRC_ */
-
-	__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT,
-	ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1)
-};
-
 enum {
 	ETHTOOL_A_CABLE_TEST_NTF_STATUS_UNSPEC,
 	ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED,
 	ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED
 };
 
-enum {
-	ETHTOOL_A_CABLE_NEST_UNSPEC,
-	ETHTOOL_A_CABLE_NEST_RESULT,		/* nest - ETHTOOL_A_CABLE_RESULT_ */
-	ETHTOOL_A_CABLE_NEST_FAULT_LENGTH,	/* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */
-	__ETHTOOL_A_CABLE_NEST_CNT,
-	ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_CABLE_TEST_NTF_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_NTF_HEADER,	/* nest - ETHTOOL_A_HEADER_* */
-	ETHTOOL_A_CABLE_TEST_NTF_STATUS,	/* u8 - _STARTED/_COMPLETE */
-	ETHTOOL_A_CABLE_TEST_NTF_NEST,		/* nest - of results: */
-
-	__ETHTOOL_A_CABLE_TEST_NTF_CNT,
-	ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1)
-};
-
-/* CABLE TEST TDR */
-
-enum {
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST,		/* u32 */
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST,		/* u32 */
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP,		/* u32 */
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR,		/* u8 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT,
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1
-};
-
-enum {
-	ETHTOOL_A_CABLE_TEST_TDR_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_TDR_HEADER,	/* nest - _A_HEADER_* */
-	ETHTOOL_A_CABLE_TEST_TDR_CFG,		/* nest - *_TDR_CFG_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_CABLE_TEST_TDR_CNT,
-	ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1
-};
-
 /* CABLE TEST TDR NOTIFY */
 
 enum {
@@ -689,132 +94,6 @@ enum {
 	ETHTOOL_A_CABLE_TDR_NEST_MAX = (__ETHTOOL_A_CABLE_TDR_NEST_CNT - 1)
 };
 
-enum {
-	ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER,	/* nest - ETHTOOL_A_HEADER_* */
-	ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS,	/* u8 - _STARTED/_COMPLETE */
-	ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST,	/* nest - of results: */
-
-	/* add new constants above here */
-	__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT,
-	ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1
-};
-
-/* TUNNEL INFO */
-
-enum {
-	ETHTOOL_UDP_TUNNEL_TYPE_VXLAN,
-	ETHTOOL_UDP_TUNNEL_TYPE_GENEVE,
-	ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE,
-
-	__ETHTOOL_UDP_TUNNEL_TYPE_CNT
-};
-
-enum {
-	ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC,
-
-	ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,		/* be16 */
-	ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,		/* u32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT,
-	ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC,
-
-	ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE,		/* u32 */
-	ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES,		/* bitset */
-	ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY,		/* nest - _UDP_ENTRY_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT,
-	ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_TUNNEL_UDP_UNSPEC,
-
-	ETHTOOL_A_TUNNEL_UDP_TABLE,			/* nest - _UDP_TABLE_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_TUNNEL_UDP_CNT,
-	ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_TUNNEL_INFO_UNSPEC,
-	ETHTOOL_A_TUNNEL_INFO_HEADER,			/* nest - _A_HEADER_* */
-
-	ETHTOOL_A_TUNNEL_INFO_UDP_PORTS,		/* nest - _UDP_TABLE */
-
-	/* add new constants above here */
-	__ETHTOOL_A_TUNNEL_INFO_CNT,
-	ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
-};
-
-/* FEC */
-
-enum {
-	ETHTOOL_A_FEC_UNSPEC,
-	ETHTOOL_A_FEC_HEADER,				/* nest - _A_HEADER_* */
-	ETHTOOL_A_FEC_MODES,				/* bitset */
-	ETHTOOL_A_FEC_AUTO,				/* u8 */
-	ETHTOOL_A_FEC_ACTIVE,				/* u32 */
-	ETHTOOL_A_FEC_STATS,				/* nest - _A_FEC_STAT */
-
-	__ETHTOOL_A_FEC_CNT,
-	ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_FEC_STAT_UNSPEC,
-	ETHTOOL_A_FEC_STAT_PAD,
-
-	ETHTOOL_A_FEC_STAT_CORRECTED,			/* array, u64 */
-	ETHTOOL_A_FEC_STAT_UNCORR,			/* array, u64 */
-	ETHTOOL_A_FEC_STAT_CORR_BITS,			/* array, u64 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_FEC_STAT_CNT,
-	ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1)
-};
-
-/* MODULE EEPROM */
-
-enum {
-	ETHTOOL_A_MODULE_EEPROM_UNSPEC,
-	ETHTOOL_A_MODULE_EEPROM_HEADER,			/* nest - _A_HEADER_* */
-
-	ETHTOOL_A_MODULE_EEPROM_OFFSET,			/* u32 */
-	ETHTOOL_A_MODULE_EEPROM_LENGTH,			/* u32 */
-	ETHTOOL_A_MODULE_EEPROM_PAGE,			/* u8 */
-	ETHTOOL_A_MODULE_EEPROM_BANK,			/* u8 */
-	ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS,		/* u8 */
-	ETHTOOL_A_MODULE_EEPROM_DATA,			/* binary */
-
-	__ETHTOOL_A_MODULE_EEPROM_CNT,
-	ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1)
-};
-
-/* STATS */
-
-enum {
-	ETHTOOL_A_STATS_UNSPEC,
-	ETHTOOL_A_STATS_PAD,
-	ETHTOOL_A_STATS_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_STATS_GROUPS,			/* bitset */
-
-	ETHTOOL_A_STATS_GRP,			/* nest - _A_STATS_GRP_* */
-
-	ETHTOOL_A_STATS_SRC,			/* u32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_STATS_CNT,
-	ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1)
-};
-
 enum {
 	ETHTOOL_STATS_ETH_PHY,
 	ETHTOOL_STATS_ETH_MAC,
@@ -825,27 +104,6 @@ enum {
 	__ETHTOOL_STATS_CNT
 };
 
-enum {
-	ETHTOOL_A_STATS_GRP_UNSPEC,
-	ETHTOOL_A_STATS_GRP_PAD,
-
-	ETHTOOL_A_STATS_GRP_ID,			/* u32 */
-	ETHTOOL_A_STATS_GRP_SS_ID,		/* u32 */
-
-	ETHTOOL_A_STATS_GRP_STAT,		/* nest */
-
-	ETHTOOL_A_STATS_GRP_HIST_RX,		/* nest */
-	ETHTOOL_A_STATS_GRP_HIST_TX,		/* nest */
-
-	ETHTOOL_A_STATS_GRP_HIST_BKT_LOW,	/* u32 */
-	ETHTOOL_A_STATS_GRP_HIST_BKT_HI,	/* u32 */
-	ETHTOOL_A_STATS_GRP_HIST_VAL,		/* u64 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_STATS_GRP_CNT,
-	ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1)
-};
-
 enum {
 	/* 30.3.2.1.5 aSymbolErrorDuringCarrier */
 	ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR,
@@ -935,155 +193,6 @@ enum {
 	ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1)
 };
 
-/* MODULE */
-
-enum {
-	ETHTOOL_A_MODULE_UNSPEC,
-	ETHTOOL_A_MODULE_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_MODULE_POWER_MODE_POLICY,	/* u8 */
-	ETHTOOL_A_MODULE_POWER_MODE,		/* u8 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_MODULE_CNT,
-	ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1)
-};
-
-/* Power Sourcing Equipment */
-enum {
-	ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC,
-	ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,	/* u32 */
-	ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,	/* u32 */
-};
-
-enum {
-	ETHTOOL_A_PSE_UNSPEC,
-	ETHTOOL_A_PSE_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PODL_PSE_ADMIN_STATE,		/* u32 */
-	ETHTOOL_A_PODL_PSE_ADMIN_CONTROL,	/* u32 */
-	ETHTOOL_A_PODL_PSE_PW_D_STATUS,		/* u32 */
-	ETHTOOL_A_C33_PSE_ADMIN_STATE,		/* u32 */
-	ETHTOOL_A_C33_PSE_ADMIN_CONTROL,	/* u32 */
-	ETHTOOL_A_C33_PSE_PW_D_STATUS,		/* u32 */
-	ETHTOOL_A_C33_PSE_PW_CLASS,		/* u32 */
-	ETHTOOL_A_C33_PSE_ACTUAL_PW,		/* u32 */
-	ETHTOOL_A_C33_PSE_EXT_STATE,		/* u32 */
-	ETHTOOL_A_C33_PSE_EXT_SUBSTATE,		/* u32 */
-	ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,	/* u32 */
-	ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES,	/* nest - _C33_PSE_PW_LIMIT_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_PSE_CNT,
-	ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_RSS_UNSPEC,
-	ETHTOOL_A_RSS_HEADER,
-	ETHTOOL_A_RSS_CONTEXT,		/* u32 */
-	ETHTOOL_A_RSS_HFUNC,		/* u32 */
-	ETHTOOL_A_RSS_INDIR,		/* binary */
-	ETHTOOL_A_RSS_HKEY,		/* binary */
-	ETHTOOL_A_RSS_INPUT_XFRM,	/* u32 */
-	ETHTOOL_A_RSS_START_CONTEXT,	/* u32 */
-
-	__ETHTOOL_A_RSS_CNT,
-	ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1),
-};
-
-/* PLCA */
-
-enum {
-	ETHTOOL_A_PLCA_UNSPEC,
-	ETHTOOL_A_PLCA_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PLCA_VERSION,			/* u16 */
-	ETHTOOL_A_PLCA_ENABLED,			/* u8  */
-	ETHTOOL_A_PLCA_STATUS,			/* u8  */
-	ETHTOOL_A_PLCA_NODE_CNT,		/* u32 */
-	ETHTOOL_A_PLCA_NODE_ID,			/* u32 */
-	ETHTOOL_A_PLCA_TO_TMR,			/* u32 */
-	ETHTOOL_A_PLCA_BURST_CNT,		/* u32 */
-	ETHTOOL_A_PLCA_BURST_TMR,		/* u32 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_PLCA_CNT,
-	ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1)
-};
-
-/* MAC Merge (802.3) */
-
-enum {
-	ETHTOOL_A_MM_STAT_UNSPEC,
-	ETHTOOL_A_MM_STAT_PAD,
-
-	/* aMACMergeFrameAssErrorCount */
-	ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS,	/* u64 */
-	/* aMACMergeFrameSmdErrorCount */
-	ETHTOOL_A_MM_STAT_SMD_ERRORS,		/* u64 */
-	/* aMACMergeFrameAssOkCount */
-	ETHTOOL_A_MM_STAT_REASSEMBLY_OK,	/* u64 */
-	/* aMACMergeFragCountRx */
-	ETHTOOL_A_MM_STAT_RX_FRAG_COUNT,	/* u64 */
-	/* aMACMergeFragCountTx */
-	ETHTOOL_A_MM_STAT_TX_FRAG_COUNT,	/* u64 */
-	/* aMACMergeHoldCount */
-	ETHTOOL_A_MM_STAT_HOLD_COUNT,		/* u64 */
-
-	/* add new constants above here */
-	__ETHTOOL_A_MM_STAT_CNT,
-	ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_MM_UNSPEC,
-	ETHTOOL_A_MM_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_MM_PMAC_ENABLED,		/* u8 */
-	ETHTOOL_A_MM_TX_ENABLED,		/* u8 */
-	ETHTOOL_A_MM_TX_ACTIVE,			/* u8 */
-	ETHTOOL_A_MM_TX_MIN_FRAG_SIZE,		/* u32 */
-	ETHTOOL_A_MM_RX_MIN_FRAG_SIZE,		/* u32 */
-	ETHTOOL_A_MM_VERIFY_ENABLED,		/* u8 */
-	ETHTOOL_A_MM_VERIFY_STATUS,		/* u8 */
-	ETHTOOL_A_MM_VERIFY_TIME,		/* u32 */
-	ETHTOOL_A_MM_MAX_VERIFY_TIME,		/* u32 */
-	ETHTOOL_A_MM_STATS,			/* nest - _A_MM_STAT_* */
-
-	/* add new constants above here */
-	__ETHTOOL_A_MM_CNT,
-	ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1)
-};
-
-/* MODULE_FW_FLASH */
-
-enum {
-	ETHTOOL_A_MODULE_FW_FLASH_UNSPEC,
-	ETHTOOL_A_MODULE_FW_FLASH_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME,		/* string */
-	ETHTOOL_A_MODULE_FW_FLASH_PASSWORD,		/* u32 */
-	ETHTOOL_A_MODULE_FW_FLASH_STATUS,		/* u32 */
-	ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG,		/* string */
-	ETHTOOL_A_MODULE_FW_FLASH_DONE,			/* uint */
-	ETHTOOL_A_MODULE_FW_FLASH_TOTAL,		/* uint */
-
-	/* add new constants above here */
-	__ETHTOOL_A_MODULE_FW_FLASH_CNT,
-	ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
-};
-
-enum {
-	ETHTOOL_A_PHY_UNSPEC,
-	ETHTOOL_A_PHY_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PHY_INDEX,			/* u32 */
-	ETHTOOL_A_PHY_DRVNAME,			/* string */
-	ETHTOOL_A_PHY_NAME,			/* string */
-	ETHTOOL_A_PHY_UPSTREAM_TYPE,		/* u32 */
-	ETHTOOL_A_PHY_UPSTREAM_INDEX,		/* u32 */
-	ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,	/* string */
-	ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,	/* string */
-
-	/* add new constants above here */
-	__ETHTOOL_A_PHY_CNT,
-	ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1)
-};
 
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
new file mode 100644
index 000000000000..4b4bf17d1a88
--- /dev/null
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -0,0 +1,899 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+#ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
+#define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
+
+/* TUNNEL INFO */
+
+enum {
+	ETHTOOL_UDP_TUNNEL_TYPE_VXLAN,
+	ETHTOOL_UDP_TUNNEL_TYPE_GENEVE,
+	ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE,
+
+	__ETHTOOL_UDP_TUNNEL_TYPE_CNT
+};
+
+/* request header */
+
+enum ethtool_header_flags {
+	ETHTOOL_FLAG_COMPACT_BITSETS	= 1 << 0,	/* use compact bitsets in reply */
+	ETHTOOL_FLAG_OMIT_REPLY		= 1 << 1,	/* provide optional reply for SET or ACT requests */
+	ETHTOOL_FLAG_STATS		= 1 << 2,	/* request statistics, if supported by the driver */
+};
+
+enum {
+	ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0,
+	ETHTOOL_TCP_DATA_SPLIT_DISABLED,
+	ETHTOOL_TCP_DATA_SPLIT_ENABLED,
+};
+
+enum {
+	ETHTOOL_A_HEADER_UNSPEC,
+	ETHTOOL_A_HEADER_DEV_INDEX,		/* u32 */
+	ETHTOOL_A_HEADER_DEV_NAME,		/* string */
+	ETHTOOL_A_HEADER_FLAGS,			/* u32 - ETHTOOL_FLAG_* */
+	ETHTOOL_A_HEADER_PHY_INDEX,		/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_HEADER_CNT,
+	ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1
+};
+
+/* bit sets */
+
+enum {
+	ETHTOOL_A_BITSET_BIT_UNSPEC,
+	ETHTOOL_A_BITSET_BIT_INDEX,		/* u32 */
+	ETHTOOL_A_BITSET_BIT_NAME,		/* string */
+	ETHTOOL_A_BITSET_BIT_VALUE,		/* flag */
+
+	/* add new constants above here */
+	__ETHTOOL_A_BITSET_BIT_CNT,
+	ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1
+};
+
+enum {
+	ETHTOOL_A_BITSET_BITS_UNSPEC,
+	ETHTOOL_A_BITSET_BITS_BIT,		/* nest - _A_BITSET_BIT_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_BITSET_BITS_CNT,
+	ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1
+};
+
+enum {
+	ETHTOOL_A_BITSET_UNSPEC,
+	ETHTOOL_A_BITSET_NOMASK,		/* flag */
+	ETHTOOL_A_BITSET_SIZE,			/* u32 */
+	ETHTOOL_A_BITSET_BITS,			/* nest - _A_BITSET_BITS_* */
+	ETHTOOL_A_BITSET_VALUE,			/* binary */
+	ETHTOOL_A_BITSET_MASK,			/* binary */
+
+	/* add new constants above here */
+	__ETHTOOL_A_BITSET_CNT,
+	ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1
+};
+
+/* string sets */
+
+enum {
+	ETHTOOL_A_STRING_UNSPEC,
+	ETHTOOL_A_STRING_INDEX,			/* u32 */
+	ETHTOOL_A_STRING_VALUE,			/* string */
+
+	/* add new constants above here */
+	__ETHTOOL_A_STRING_CNT,
+	ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1
+};
+
+enum {
+	ETHTOOL_A_STRINGS_UNSPEC,
+	ETHTOOL_A_STRINGS_STRING,		/* nest - _A_STRINGS_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_STRINGS_CNT,
+	ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1
+};
+
+enum {
+	ETHTOOL_A_STRINGSET_UNSPEC,
+	ETHTOOL_A_STRINGSET_ID,			/* u32 */
+	ETHTOOL_A_STRINGSET_COUNT,		/* u32 */
+	ETHTOOL_A_STRINGSET_STRINGS,		/* nest - _A_STRINGS_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_STRINGSET_CNT,
+	ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1
+};
+
+enum {
+	ETHTOOL_A_STRINGSETS_UNSPEC,
+	ETHTOOL_A_STRINGSETS_STRINGSET,		/* nest - _A_STRINGSET_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_STRINGSETS_CNT,
+	ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1
+};
+
+/* STRSET */
+
+enum {
+	ETHTOOL_A_STRSET_UNSPEC,
+	ETHTOOL_A_STRSET_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_STRSET_STRINGSETS,		/* nest - _A_STRINGSETS_* */
+	ETHTOOL_A_STRSET_COUNTS_ONLY,		/* flag */
+
+	/* add new constants above here */
+	__ETHTOOL_A_STRSET_CNT,
+	ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1
+};
+
+/* PRIVFLAGS */
+
+enum {
+	ETHTOOL_A_PRIVFLAGS_UNSPEC,
+	ETHTOOL_A_PRIVFLAGS_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_PRIVFLAGS_FLAGS,			/* bitset */
+
+	/* add new constants above here */
+	__ETHTOOL_A_PRIVFLAGS_CNT,
+	ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1
+};
+
+/* RINGS */
+
+enum {
+	ETHTOOL_A_RINGS_UNSPEC,
+	ETHTOOL_A_RINGS_HEADER,				/* nest - _A_HEADER_* */
+	ETHTOOL_A_RINGS_RX_MAX,				/* u32 */
+	ETHTOOL_A_RINGS_RX_MINI_MAX,			/* u32 */
+	ETHTOOL_A_RINGS_RX_JUMBO_MAX,			/* u32 */
+	ETHTOOL_A_RINGS_TX_MAX,				/* u32 */
+	ETHTOOL_A_RINGS_RX,				/* u32 */
+	ETHTOOL_A_RINGS_RX_MINI,			/* u32 */
+	ETHTOOL_A_RINGS_RX_JUMBO,			/* u32 */
+	ETHTOOL_A_RINGS_TX,				/* u32 */
+	ETHTOOL_A_RINGS_RX_BUF_LEN,                     /* u32 */
+	ETHTOOL_A_RINGS_TCP_DATA_SPLIT,			/* u8 */
+	ETHTOOL_A_RINGS_CQE_SIZE,			/* u32 */
+	ETHTOOL_A_RINGS_TX_PUSH,			/* u8 */
+	ETHTOOL_A_RINGS_RX_PUSH,			/* u8 */
+	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,		/* u32 */
+	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,		/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_RINGS_CNT,
+	ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1)
+};
+
+/* MAC Merge (802.3) */
+
+enum {
+	ETHTOOL_A_MM_STAT_UNSPEC,
+	ETHTOOL_A_MM_STAT_PAD,
+
+	/* aMACMergeFrameAssErrorCount */
+	ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS,	/* u64 */
+	/* aMACMergeFrameSmdErrorCount */
+	ETHTOOL_A_MM_STAT_SMD_ERRORS,		/* u64 */
+	/* aMACMergeFrameAssOkCount */
+	ETHTOOL_A_MM_STAT_REASSEMBLY_OK,	/* u64 */
+	/* aMACMergeFragCountRx */
+	ETHTOOL_A_MM_STAT_RX_FRAG_COUNT,	/* u64 */
+	/* aMACMergeFragCountTx */
+	ETHTOOL_A_MM_STAT_TX_FRAG_COUNT,	/* u64 */
+	/* aMACMergeHoldCount */
+	ETHTOOL_A_MM_STAT_HOLD_COUNT,		/* u64 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_MM_STAT_CNT,
+	ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_MM_UNSPEC,
+	ETHTOOL_A_MM_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_MM_PMAC_ENABLED,		/* u8 */
+	ETHTOOL_A_MM_TX_ENABLED,		/* u8 */
+	ETHTOOL_A_MM_TX_ACTIVE,			/* u8 */
+	ETHTOOL_A_MM_TX_MIN_FRAG_SIZE,		/* u32 */
+	ETHTOOL_A_MM_RX_MIN_FRAG_SIZE,		/* u32 */
+	ETHTOOL_A_MM_VERIFY_ENABLED,		/* u8 */
+	ETHTOOL_A_MM_VERIFY_STATUS,		/* u8 */
+	ETHTOOL_A_MM_VERIFY_TIME,		/* u32 */
+	ETHTOOL_A_MM_MAX_VERIFY_TIME,		/* u32 */
+	ETHTOOL_A_MM_STATS,			/* nest - _A_MM_STAT_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_MM_CNT,
+	ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1)
+};
+
+/* LINKINFO */
+
+enum {
+	ETHTOOL_A_LINKINFO_UNSPEC,
+	ETHTOOL_A_LINKINFO_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_LINKINFO_PORT,		/* u8 */
+	ETHTOOL_A_LINKINFO_PHYADDR,		/* u8 */
+	ETHTOOL_A_LINKINFO_TP_MDIX,		/* u8 */
+	ETHTOOL_A_LINKINFO_TP_MDIX_CTRL,	/* u8 */
+	ETHTOOL_A_LINKINFO_TRANSCEIVER,		/* u8 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_LINKINFO_CNT,
+	ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1
+};
+
+/* LINKMODES */
+
+enum {
+	ETHTOOL_A_LINKMODES_UNSPEC,
+	ETHTOOL_A_LINKMODES_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_LINKMODES_AUTONEG,		/* u8 */
+	ETHTOOL_A_LINKMODES_OURS,		/* bitset */
+	ETHTOOL_A_LINKMODES_PEER,		/* bitset */
+	ETHTOOL_A_LINKMODES_SPEED,		/* u32 */
+	ETHTOOL_A_LINKMODES_DUPLEX,		/* u8 */
+	ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG,	/* u8 */
+	ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE,	/* u8 */
+	ETHTOOL_A_LINKMODES_LANES,		/* u32 */
+	ETHTOOL_A_LINKMODES_RATE_MATCHING,	/* u8 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_LINKMODES_CNT,
+	ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1
+};
+
+/* LINKSTATE */
+
+enum {
+	ETHTOOL_A_LINKSTATE_UNSPEC,
+	ETHTOOL_A_LINKSTATE_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_LINKSTATE_LINK,		/* u8 */
+	ETHTOOL_A_LINKSTATE_SQI,		/* u32 */
+	ETHTOOL_A_LINKSTATE_SQI_MAX,		/* u32 */
+	ETHTOOL_A_LINKSTATE_EXT_STATE,		/* u8 */
+	ETHTOOL_A_LINKSTATE_EXT_SUBSTATE,	/* u8 */
+	ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT,	/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_LINKSTATE_CNT,
+	ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1
+};
+
+/* DEBUG */
+
+enum {
+	ETHTOOL_A_DEBUG_UNSPEC,
+	ETHTOOL_A_DEBUG_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_DEBUG_MSGMASK,		/* bitset */
+
+	/* add new constants above here */
+	__ETHTOOL_A_DEBUG_CNT,
+	ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1
+};
+
+/* WOL */
+
+enum {
+	ETHTOOL_A_WOL_UNSPEC,
+	ETHTOOL_A_WOL_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_WOL_MODES,			/* bitset */
+	ETHTOOL_A_WOL_SOPASS,			/* binary */
+
+	/* add new constants above here */
+	__ETHTOOL_A_WOL_CNT,
+	ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1
+};
+
+/* FEATURES */
+
+enum {
+	ETHTOOL_A_FEATURES_UNSPEC,
+	ETHTOOL_A_FEATURES_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_FEATURES_HW,				/* bitset */
+	ETHTOOL_A_FEATURES_WANTED,			/* bitset */
+	ETHTOOL_A_FEATURES_ACTIVE,			/* bitset */
+	ETHTOOL_A_FEATURES_NOCHANGE,			/* bitset */
+
+	/* add new constants above here */
+	__ETHTOOL_A_FEATURES_CNT,
+	ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1
+};
+
+/* CHANNELS */
+
+enum {
+	ETHTOOL_A_CHANNELS_UNSPEC,
+	ETHTOOL_A_CHANNELS_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_CHANNELS_RX_MAX,			/* u32 */
+	ETHTOOL_A_CHANNELS_TX_MAX,			/* u32 */
+	ETHTOOL_A_CHANNELS_OTHER_MAX,			/* u32 */
+	ETHTOOL_A_CHANNELS_COMBINED_MAX,		/* u32 */
+	ETHTOOL_A_CHANNELS_RX_COUNT,			/* u32 */
+	ETHTOOL_A_CHANNELS_TX_COUNT,			/* u32 */
+	ETHTOOL_A_CHANNELS_OTHER_COUNT,			/* u32 */
+	ETHTOOL_A_CHANNELS_COMBINED_COUNT,		/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_CHANNELS_CNT,
+	ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_IRQ_MODERATION_UNSPEC,
+	ETHTOOL_A_IRQ_MODERATION_USEC,			/* u32 */
+	ETHTOOL_A_IRQ_MODERATION_PKTS,			/* u32 */
+	ETHTOOL_A_IRQ_MODERATION_COMPS,			/* u32 */
+
+	__ETHTOOL_A_IRQ_MODERATION_CNT,
+	ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_PROFILE_UNSPEC,
+	/* nest, _A_IRQ_MODERATION_* */
+	ETHTOOL_A_PROFILE_IRQ_MODERATION,
+	__ETHTOOL_A_PROFILE_CNT,
+	ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1)
+};
+
+/* COALESCE */
+
+enum {
+	ETHTOOL_A_COALESCE_UNSPEC,
+	ETHTOOL_A_COALESCE_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_COALESCE_RX_USECS,			/* u32 */
+	ETHTOOL_A_COALESCE_RX_MAX_FRAMES,		/* u32 */
+	ETHTOOL_A_COALESCE_RX_USECS_IRQ,		/* u32 */
+	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ,		/* u32 */
+	ETHTOOL_A_COALESCE_TX_USECS,			/* u32 */
+	ETHTOOL_A_COALESCE_TX_MAX_FRAMES,		/* u32 */
+	ETHTOOL_A_COALESCE_TX_USECS_IRQ,		/* u32 */
+	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ,		/* u32 */
+	ETHTOOL_A_COALESCE_STATS_BLOCK_USECS,		/* u32 */
+	ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX,		/* u8 */
+	ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX,		/* u8 */
+	ETHTOOL_A_COALESCE_PKT_RATE_LOW,		/* u32 */
+	ETHTOOL_A_COALESCE_RX_USECS_LOW,		/* u32 */
+	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW,		/* u32 */
+	ETHTOOL_A_COALESCE_TX_USECS_LOW,		/* u32 */
+	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW,		/* u32 */
+	ETHTOOL_A_COALESCE_PKT_RATE_HIGH,		/* u32 */
+	ETHTOOL_A_COALESCE_RX_USECS_HIGH,		/* u32 */
+	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH,		/* u32 */
+	ETHTOOL_A_COALESCE_TX_USECS_HIGH,		/* u32 */
+	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH,		/* u32 */
+	ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL,	/* u32 */
+	ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,		/* u8 */
+	ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,		/* u8 */
+	ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES,		/* u32 */
+	ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,		/* u32 */
+	ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,		/* u32 */
+	/* nest - _A_PROFILE_IRQ_MODERATION */
+	ETHTOOL_A_COALESCE_RX_PROFILE,
+	/* nest - _A_PROFILE_IRQ_MODERATION */
+	ETHTOOL_A_COALESCE_TX_PROFILE,
+
+	/* add new constants above here */
+	__ETHTOOL_A_COALESCE_CNT,
+	ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1)
+};
+
+/* PAUSE */
+
+enum {
+	ETHTOOL_A_PAUSE_STAT_UNSPEC,
+	ETHTOOL_A_PAUSE_STAT_PAD,
+
+	ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
+	ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
+
+	/* add new constants above here
+	 * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats!
+	 */
+	__ETHTOOL_A_PAUSE_STAT_CNT,
+	ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_PAUSE_UNSPEC,
+	ETHTOOL_A_PAUSE_HEADER,				/* nest - _A_HEADER_* */
+	ETHTOOL_A_PAUSE_AUTONEG,			/* u8 */
+	ETHTOOL_A_PAUSE_RX,				/* u8 */
+	ETHTOOL_A_PAUSE_TX,				/* u8 */
+	ETHTOOL_A_PAUSE_STATS,				/* nest - _PAUSE_STAT_* */
+	ETHTOOL_A_PAUSE_STATS_SRC,			/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_PAUSE_CNT,
+	ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1)
+};
+
+/* EEE */
+
+enum {
+	ETHTOOL_A_EEE_UNSPEC,
+	ETHTOOL_A_EEE_HEADER,				/* nest - _A_HEADER_* */
+	ETHTOOL_A_EEE_MODES_OURS,			/* bitset */
+	ETHTOOL_A_EEE_MODES_PEER,			/* bitset */
+	ETHTOOL_A_EEE_ACTIVE,				/* u8 */
+	ETHTOOL_A_EEE_ENABLED,				/* u8 */
+	ETHTOOL_A_EEE_TX_LPI_ENABLED,			/* u8 */
+	ETHTOOL_A_EEE_TX_LPI_TIMER,			/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_EEE_CNT,
+	ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1)
+};
+
+/* TSINFO */
+
+enum {
+	ETHTOOL_A_TS_STAT_UNSPEC,
+
+	ETHTOOL_A_TS_STAT_TX_PKTS,			/* uint */
+	ETHTOOL_A_TS_STAT_TX_LOST,			/* uint */
+	ETHTOOL_A_TS_STAT_TX_ERR,			/* uint */
+
+	/* add new constants above here */
+	__ETHTOOL_A_TS_STAT_CNT,
+	ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1)
+
+};
+
+enum {
+	ETHTOOL_A_TSINFO_UNSPEC,
+	ETHTOOL_A_TSINFO_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_TSINFO_TIMESTAMPING,			/* bitset */
+	ETHTOOL_A_TSINFO_TX_TYPES,			/* bitset */
+	ETHTOOL_A_TSINFO_RX_FILTERS,			/* bitset */
+	ETHTOOL_A_TSINFO_PHC_INDEX,			/* u32 */
+	ETHTOOL_A_TSINFO_STATS,				/* nest - _A_TSINFO_STAT */
+
+	/* add new constants above here */
+	__ETHTOOL_A_TSINFO_CNT,
+	ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_RESULT_UNSPEC,
+	ETHTOOL_A_CABLE_RESULT_PAIR,		/* u8 ETHTOOL_A_CABLE_PAIR_ */
+	ETHTOOL_A_CABLE_RESULT_CODE,		/* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */
+	ETHTOOL_A_CABLE_RESULT_SRC,		/* u32 ETHTOOL_A_CABLE_INF_SRC_ */
+
+	__ETHTOOL_A_CABLE_RESULT_CNT,
+	ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC,
+	ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR,	/* u8 ETHTOOL_A_CABLE_PAIR_ */
+	ETHTOOL_A_CABLE_FAULT_LENGTH_CM,	/* u32 */
+	ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,	/* u32 ETHTOOL_A_CABLE_INF_SRC_ */
+
+	__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT,
+	ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_NEST_UNSPEC,
+	ETHTOOL_A_CABLE_NEST_RESULT,		/* nest - ETHTOOL_A_CABLE_RESULT_ */
+	ETHTOOL_A_CABLE_NEST_FAULT_LENGTH,	/* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */
+	__ETHTOOL_A_CABLE_NEST_CNT,
+	ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1)
+};
+
+/* CABLE TEST */
+
+enum {
+	ETHTOOL_A_CABLE_TEST_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_HEADER,		/* nest - _A_HEADER_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_CABLE_TEST_CNT,
+	ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1
+};
+
+enum {
+	ETHTOOL_A_CABLE_TEST_NTF_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_NTF_HEADER,	/* nest - ETHTOOL_A_HEADER_* */
+	ETHTOOL_A_CABLE_TEST_NTF_STATUS,	/* u8 - _STARTED/_COMPLETE */
+	ETHTOOL_A_CABLE_TEST_NTF_NEST,		/* nest - of results: */
+
+	__ETHTOOL_A_CABLE_TEST_NTF_CNT,
+	ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1)
+};
+
+/* CABLE TEST TDR */
+
+enum {
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST,		/* u32 */
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST,		/* u32 */
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP,		/* u32 */
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR,		/* u8 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT,
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1
+};
+
+enum {
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER,	/* nest - ETHTOOL_A_HEADER_* */
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS,	/* u8 - _STARTED/_COMPLETE */
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST,	/* nest - of results: */
+
+	/* add new constants above here */
+	__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT,
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1
+};
+
+enum {
+	ETHTOOL_A_CABLE_TEST_TDR_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_TDR_HEADER,	/* nest - _A_HEADER_* */
+	ETHTOOL_A_CABLE_TEST_TDR_CFG,		/* nest - *_TDR_CFG_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_CABLE_TEST_TDR_CNT,
+	ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1
+};
+
+enum {
+	ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC,
+
+	ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,		/* be16 */
+	ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,		/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT,
+	ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC,
+
+	ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE,		/* u32 */
+	ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES,		/* bitset */
+	ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY,		/* nest - _UDP_ENTRY_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT,
+	ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_TUNNEL_UDP_UNSPEC,
+
+	ETHTOOL_A_TUNNEL_UDP_TABLE,			/* nest - _UDP_TABLE_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_TUNNEL_UDP_CNT,
+	ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_TUNNEL_INFO_UNSPEC,
+	ETHTOOL_A_TUNNEL_INFO_HEADER,			/* nest - _A_HEADER_* */
+
+	ETHTOOL_A_TUNNEL_INFO_UDP_PORTS,		/* nest - _UDP_TABLE */
+
+	/* add new constants above here */
+	__ETHTOOL_A_TUNNEL_INFO_CNT,
+	ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
+};
+
+/* FEC */
+
+enum {
+	ETHTOOL_A_FEC_STAT_UNSPEC,
+	ETHTOOL_A_FEC_STAT_PAD,
+
+	ETHTOOL_A_FEC_STAT_CORRECTED,			/* array, u64 */
+	ETHTOOL_A_FEC_STAT_UNCORR,			/* array, u64 */
+	ETHTOOL_A_FEC_STAT_CORR_BITS,			/* array, u64 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_FEC_STAT_CNT,
+	ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_FEC_UNSPEC,
+	ETHTOOL_A_FEC_HEADER,				/* nest - _A_HEADER_* */
+	ETHTOOL_A_FEC_MODES,				/* bitset */
+	ETHTOOL_A_FEC_AUTO,				/* u8 */
+	ETHTOOL_A_FEC_ACTIVE,				/* u32 */
+	ETHTOOL_A_FEC_STATS,				/* nest - _A_FEC_STAT */
+
+	__ETHTOOL_A_FEC_CNT,
+	ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1)
+};
+
+/* MODULE EEPROM */
+
+enum {
+	ETHTOOL_A_MODULE_EEPROM_UNSPEC,
+	ETHTOOL_A_MODULE_EEPROM_HEADER,			/* nest - _A_HEADER_* */
+
+	ETHTOOL_A_MODULE_EEPROM_OFFSET,			/* u32 */
+	ETHTOOL_A_MODULE_EEPROM_LENGTH,			/* u32 */
+	ETHTOOL_A_MODULE_EEPROM_PAGE,			/* u8 */
+	ETHTOOL_A_MODULE_EEPROM_BANK,			/* u8 */
+	ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS,		/* u8 */
+	ETHTOOL_A_MODULE_EEPROM_DATA,			/* binary */
+
+	__ETHTOOL_A_MODULE_EEPROM_CNT,
+	ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1)
+};
+
+
+enum {
+	ETHTOOL_A_STATS_GRP_UNSPEC,
+	ETHTOOL_A_STATS_GRP_PAD,
+
+	ETHTOOL_A_STATS_GRP_ID,			/* u32 */
+	ETHTOOL_A_STATS_GRP_SS_ID,		/* u32 */
+
+	ETHTOOL_A_STATS_GRP_STAT,		/* nest */
+
+	ETHTOOL_A_STATS_GRP_HIST_RX,		/* nest */
+	ETHTOOL_A_STATS_GRP_HIST_TX,		/* nest */
+
+	ETHTOOL_A_STATS_GRP_HIST_BKT_LOW,	/* u32 */
+	ETHTOOL_A_STATS_GRP_HIST_BKT_HI,	/* u32 */
+	ETHTOOL_A_STATS_GRP_HIST_VAL,		/* u64 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_STATS_GRP_CNT,
+	ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1)
+};
+
+/* STATS */
+
+enum {
+	ETHTOOL_A_STATS_UNSPEC,
+	ETHTOOL_A_STATS_PAD,
+	ETHTOOL_A_STATS_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_STATS_GROUPS,			/* bitset */
+
+	ETHTOOL_A_STATS_GRP,			/* nest - _A_STATS_GRP_* */
+
+	ETHTOOL_A_STATS_SRC,			/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_STATS_CNT,
+	ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1)
+};
+
+/* PHC VCLOCKS */
+
+enum {
+	ETHTOOL_A_PHC_VCLOCKS_UNSPEC,
+	ETHTOOL_A_PHC_VCLOCKS_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_PHC_VCLOCKS_NUM,			/* u32 */
+	ETHTOOL_A_PHC_VCLOCKS_INDEX,			/* array, s32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_PHC_VCLOCKS_CNT,
+	ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1)
+};
+
+/* MODULE */
+
+enum {
+	ETHTOOL_A_MODULE_UNSPEC,
+	ETHTOOL_A_MODULE_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_MODULE_POWER_MODE_POLICY,	/* u8 */
+	ETHTOOL_A_MODULE_POWER_MODE,		/* u8 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_MODULE_CNT,
+	ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1)
+};
+
+/* Power Sourcing Equipment */
+enum {
+	ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC,
+	ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,	/* u32 */
+	ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,	/* u32 */
+};
+
+enum {
+	ETHTOOL_A_PSE_UNSPEC,
+	ETHTOOL_A_PSE_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_PODL_PSE_ADMIN_STATE,		/* u32 */
+	ETHTOOL_A_PODL_PSE_ADMIN_CONTROL,	/* u32 */
+	ETHTOOL_A_PODL_PSE_PW_D_STATUS,		/* u32 */
+	ETHTOOL_A_C33_PSE_ADMIN_STATE,		/* u32 */
+	ETHTOOL_A_C33_PSE_ADMIN_CONTROL,	/* u32 */
+	ETHTOOL_A_C33_PSE_PW_D_STATUS,		/* u32 */
+	ETHTOOL_A_C33_PSE_PW_CLASS,		/* u32 */
+	ETHTOOL_A_C33_PSE_ACTUAL_PW,		/* u32 */
+	ETHTOOL_A_C33_PSE_EXT_STATE,		/* u32 */
+	ETHTOOL_A_C33_PSE_EXT_SUBSTATE,		/* u32 */
+	ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,	/* u32 */
+	ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES,	/* nest - _C33_PSE_PW_LIMIT_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_PSE_CNT,
+	ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_RSS_UNSPEC,
+	ETHTOOL_A_RSS_HEADER,
+	ETHTOOL_A_RSS_CONTEXT,		/* u32 */
+	ETHTOOL_A_RSS_HFUNC,		/* u32 */
+	ETHTOOL_A_RSS_INDIR,		/* binary */
+	ETHTOOL_A_RSS_HKEY,		/* binary */
+	ETHTOOL_A_RSS_INPUT_XFRM,	/* u32 */
+	ETHTOOL_A_RSS_START_CONTEXT,	/* u32 */
+
+	__ETHTOOL_A_RSS_CNT,
+	ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1),
+};
+
+/* PLCA */
+
+enum {
+	ETHTOOL_A_PLCA_UNSPEC,
+	ETHTOOL_A_PLCA_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_PLCA_VERSION,			/* u16 */
+	ETHTOOL_A_PLCA_ENABLED,			/* u8  */
+	ETHTOOL_A_PLCA_STATUS,			/* u8  */
+	ETHTOOL_A_PLCA_NODE_CNT,		/* u32 */
+	ETHTOOL_A_PLCA_NODE_ID,			/* u32 */
+	ETHTOOL_A_PLCA_TO_TMR,			/* u32 */
+	ETHTOOL_A_PLCA_BURST_CNT,		/* u32 */
+	ETHTOOL_A_PLCA_BURST_TMR,		/* u32 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_PLCA_CNT,
+	ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1)
+};
+
+/* MODULE_FW_FLASH */
+
+enum {
+	ETHTOOL_A_MODULE_FW_FLASH_UNSPEC,
+	ETHTOOL_A_MODULE_FW_FLASH_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME,		/* string */
+	ETHTOOL_A_MODULE_FW_FLASH_PASSWORD,		/* u32 */
+	ETHTOOL_A_MODULE_FW_FLASH_STATUS,		/* u32 */
+	ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG,		/* string */
+	ETHTOOL_A_MODULE_FW_FLASH_DONE,			/* uint */
+	ETHTOOL_A_MODULE_FW_FLASH_TOTAL,		/* uint */
+
+	/* add new constants above here */
+	__ETHTOOL_A_MODULE_FW_FLASH_CNT,
+	ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_PHY_UNSPEC,
+	ETHTOOL_A_PHY_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_PHY_INDEX,			/* u32 */
+	ETHTOOL_A_PHY_DRVNAME,			/* string */
+	ETHTOOL_A_PHY_NAME,			/* string */
+	ETHTOOL_A_PHY_UPSTREAM_TYPE,		/* u32 */
+	ETHTOOL_A_PHY_UPSTREAM_INDEX,		/* u32 */
+	ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,	/* string */
+	ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,	/* string */
+
+	/* add new constants above here */
+	__ETHTOOL_A_PHY_CNT,
+	ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1)
+};
+
+/* message types - userspace to kernel */
+enum {
+	ETHTOOL_MSG_USER_NONE,
+	ETHTOOL_MSG_STRSET_GET,
+	ETHTOOL_MSG_LINKINFO_GET,
+	ETHTOOL_MSG_LINKINFO_SET,
+	ETHTOOL_MSG_LINKMODES_GET,
+	ETHTOOL_MSG_LINKMODES_SET,
+	ETHTOOL_MSG_LINKSTATE_GET,
+	ETHTOOL_MSG_DEBUG_GET,
+	ETHTOOL_MSG_DEBUG_SET,
+	ETHTOOL_MSG_WOL_GET,
+	ETHTOOL_MSG_WOL_SET,
+	ETHTOOL_MSG_FEATURES_GET,
+	ETHTOOL_MSG_FEATURES_SET,
+	ETHTOOL_MSG_PRIVFLAGS_GET,
+	ETHTOOL_MSG_PRIVFLAGS_SET,
+	ETHTOOL_MSG_RINGS_GET,
+	ETHTOOL_MSG_RINGS_SET,
+	ETHTOOL_MSG_CHANNELS_GET,
+	ETHTOOL_MSG_CHANNELS_SET,
+	ETHTOOL_MSG_COALESCE_GET,
+	ETHTOOL_MSG_COALESCE_SET,
+	ETHTOOL_MSG_PAUSE_GET,
+	ETHTOOL_MSG_PAUSE_SET,
+	ETHTOOL_MSG_EEE_GET,
+	ETHTOOL_MSG_EEE_SET,
+	ETHTOOL_MSG_TSINFO_GET,
+	ETHTOOL_MSG_CABLE_TEST_ACT,
+	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
+	ETHTOOL_MSG_TUNNEL_INFO_GET,
+	ETHTOOL_MSG_FEC_GET,
+	ETHTOOL_MSG_FEC_SET,
+	ETHTOOL_MSG_MODULE_EEPROM_GET,
+	ETHTOOL_MSG_STATS_GET,
+	ETHTOOL_MSG_PHC_VCLOCKS_GET,
+	ETHTOOL_MSG_MODULE_GET,
+	ETHTOOL_MSG_MODULE_SET,
+	ETHTOOL_MSG_PSE_GET,
+	ETHTOOL_MSG_PSE_SET,
+	ETHTOOL_MSG_RSS_GET,
+	ETHTOOL_MSG_PLCA_GET_CFG,
+	ETHTOOL_MSG_PLCA_SET_CFG,
+	ETHTOOL_MSG_PLCA_GET_STATUS,
+	ETHTOOL_MSG_MM_GET,
+	ETHTOOL_MSG_MM_SET,
+	ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
+	ETHTOOL_MSG_PHY_GET,
+
+	/* add new constants above here */
+	__ETHTOOL_MSG_USER_CNT,
+	ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1
+};
+
+/* message types - kernel to userspace */
+enum {
+	ETHTOOL_MSG_KERNEL_NONE,
+	ETHTOOL_MSG_STRSET_GET_REPLY,
+	ETHTOOL_MSG_LINKINFO_GET_REPLY,
+	ETHTOOL_MSG_LINKINFO_NTF,
+	ETHTOOL_MSG_LINKMODES_GET_REPLY,
+	ETHTOOL_MSG_LINKMODES_NTF,
+	ETHTOOL_MSG_LINKSTATE_GET_REPLY,
+	ETHTOOL_MSG_DEBUG_GET_REPLY,
+	ETHTOOL_MSG_DEBUG_NTF,
+	ETHTOOL_MSG_WOL_GET_REPLY,
+	ETHTOOL_MSG_WOL_NTF,
+	ETHTOOL_MSG_FEATURES_GET_REPLY,
+	ETHTOOL_MSG_FEATURES_SET_REPLY,
+	ETHTOOL_MSG_FEATURES_NTF,
+	ETHTOOL_MSG_PRIVFLAGS_GET_REPLY,
+	ETHTOOL_MSG_PRIVFLAGS_NTF,
+	ETHTOOL_MSG_RINGS_GET_REPLY,
+	ETHTOOL_MSG_RINGS_NTF,
+	ETHTOOL_MSG_CHANNELS_GET_REPLY,
+	ETHTOOL_MSG_CHANNELS_NTF,
+	ETHTOOL_MSG_COALESCE_GET_REPLY,
+	ETHTOOL_MSG_COALESCE_NTF,
+	ETHTOOL_MSG_PAUSE_GET_REPLY,
+	ETHTOOL_MSG_PAUSE_NTF,
+	ETHTOOL_MSG_EEE_GET_REPLY,
+	ETHTOOL_MSG_EEE_NTF,
+	ETHTOOL_MSG_TSINFO_GET_REPLY,
+	ETHTOOL_MSG_CABLE_TEST_NTF,
+	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
+	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
+	ETHTOOL_MSG_FEC_GET_REPLY,
+	ETHTOOL_MSG_FEC_NTF,
+	ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
+	ETHTOOL_MSG_STATS_GET_REPLY,
+	ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY,
+	ETHTOOL_MSG_MODULE_GET_REPLY,
+	ETHTOOL_MSG_MODULE_NTF,
+	ETHTOOL_MSG_PSE_GET_REPLY,
+	ETHTOOL_MSG_RSS_GET_REPLY,
+	ETHTOOL_MSG_PLCA_GET_CFG_REPLY,
+	ETHTOOL_MSG_PLCA_GET_STATUS_REPLY,
+	ETHTOOL_MSG_PLCA_NTF,
+	ETHTOOL_MSG_MM_GET_REPLY,
+	ETHTOOL_MSG_MM_NTF,
+	ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
+	ETHTOOL_MSG_PHY_GET_REPLY,
+	ETHTOOL_MSG_PHY_NTF,
+
+	/* add new constants above here */
+	__ETHTOOL_MSG_KERNEL_CNT,
+	ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1
+};
+
+#endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */
-- 
cgit v1.2.3


From dd7cde36de15b071b5f9163d21d7c9142089b424 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@fomichev.me>
Date: Wed, 4 Dec 2024 07:55:48 -0800
Subject: ethtool: remove the comments that are not gonna be generated

Cleanup the header manually to make it easier to review the changes that ynl
generator brings in. No functional changes.

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20241204155549.641348-8-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool_netlink_generated.h | 678 ++++++++++---------------
 1 file changed, 274 insertions(+), 404 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index 4b4bf17d1a88..35a24d490efe 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -2,8 +2,6 @@
 #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
 #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
 
-/* TUNNEL INFO */
-
 enum {
 	ETHTOOL_UDP_TUNNEL_TYPE_VXLAN,
 	ETHTOOL_UDP_TUNNEL_TYPE_GENEVE,
@@ -12,8 +10,6 @@ enum {
 	__ETHTOOL_UDP_TUNNEL_TYPE_CNT
 };
 
-/* request header */
-
 enum ethtool_header_flags {
 	ETHTOOL_FLAG_COMPACT_BITSETS	= 1 << 0,	/* use compact bitsets in reply */
 	ETHTOOL_FLAG_OMIT_REPLY		= 1 << 1,	/* provide optional reply for SET or ACT requests */
@@ -28,303 +24,250 @@ enum {
 
 enum {
 	ETHTOOL_A_HEADER_UNSPEC,
-	ETHTOOL_A_HEADER_DEV_INDEX,		/* u32 */
-	ETHTOOL_A_HEADER_DEV_NAME,		/* string */
-	ETHTOOL_A_HEADER_FLAGS,			/* u32 - ETHTOOL_FLAG_* */
-	ETHTOOL_A_HEADER_PHY_INDEX,		/* u32 */
+	ETHTOOL_A_HEADER_DEV_INDEX,
+	ETHTOOL_A_HEADER_DEV_NAME,
+	ETHTOOL_A_HEADER_FLAGS,
+	ETHTOOL_A_HEADER_PHY_INDEX,
 
-	/* add new constants above here */
 	__ETHTOOL_A_HEADER_CNT,
 	ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1
 };
 
-/* bit sets */
-
 enum {
 	ETHTOOL_A_BITSET_BIT_UNSPEC,
-	ETHTOOL_A_BITSET_BIT_INDEX,		/* u32 */
-	ETHTOOL_A_BITSET_BIT_NAME,		/* string */
-	ETHTOOL_A_BITSET_BIT_VALUE,		/* flag */
+	ETHTOOL_A_BITSET_BIT_INDEX,
+	ETHTOOL_A_BITSET_BIT_NAME,
+	ETHTOOL_A_BITSET_BIT_VALUE,
 
-	/* add new constants above here */
 	__ETHTOOL_A_BITSET_BIT_CNT,
 	ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1
 };
 
 enum {
 	ETHTOOL_A_BITSET_BITS_UNSPEC,
-	ETHTOOL_A_BITSET_BITS_BIT,		/* nest - _A_BITSET_BIT_* */
+	ETHTOOL_A_BITSET_BITS_BIT,
 
-	/* add new constants above here */
 	__ETHTOOL_A_BITSET_BITS_CNT,
 	ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1
 };
 
 enum {
 	ETHTOOL_A_BITSET_UNSPEC,
-	ETHTOOL_A_BITSET_NOMASK,		/* flag */
-	ETHTOOL_A_BITSET_SIZE,			/* u32 */
-	ETHTOOL_A_BITSET_BITS,			/* nest - _A_BITSET_BITS_* */
-	ETHTOOL_A_BITSET_VALUE,			/* binary */
-	ETHTOOL_A_BITSET_MASK,			/* binary */
+	ETHTOOL_A_BITSET_NOMASK,
+	ETHTOOL_A_BITSET_SIZE,
+	ETHTOOL_A_BITSET_BITS,
+	ETHTOOL_A_BITSET_VALUE,
+	ETHTOOL_A_BITSET_MASK,
 
-	/* add new constants above here */
 	__ETHTOOL_A_BITSET_CNT,
 	ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1
 };
 
-/* string sets */
-
 enum {
 	ETHTOOL_A_STRING_UNSPEC,
-	ETHTOOL_A_STRING_INDEX,			/* u32 */
-	ETHTOOL_A_STRING_VALUE,			/* string */
+	ETHTOOL_A_STRING_INDEX,
+	ETHTOOL_A_STRING_VALUE,
 
-	/* add new constants above here */
 	__ETHTOOL_A_STRING_CNT,
 	ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1
 };
 
 enum {
 	ETHTOOL_A_STRINGS_UNSPEC,
-	ETHTOOL_A_STRINGS_STRING,		/* nest - _A_STRINGS_* */
+	ETHTOOL_A_STRINGS_STRING,
 
-	/* add new constants above here */
 	__ETHTOOL_A_STRINGS_CNT,
 	ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1
 };
 
 enum {
 	ETHTOOL_A_STRINGSET_UNSPEC,
-	ETHTOOL_A_STRINGSET_ID,			/* u32 */
-	ETHTOOL_A_STRINGSET_COUNT,		/* u32 */
-	ETHTOOL_A_STRINGSET_STRINGS,		/* nest - _A_STRINGS_* */
+	ETHTOOL_A_STRINGSET_ID,
+	ETHTOOL_A_STRINGSET_COUNT,
+	ETHTOOL_A_STRINGSET_STRINGS,
 
-	/* add new constants above here */
 	__ETHTOOL_A_STRINGSET_CNT,
 	ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1
 };
 
 enum {
 	ETHTOOL_A_STRINGSETS_UNSPEC,
-	ETHTOOL_A_STRINGSETS_STRINGSET,		/* nest - _A_STRINGSET_* */
+	ETHTOOL_A_STRINGSETS_STRINGSET,
 
-	/* add new constants above here */
 	__ETHTOOL_A_STRINGSETS_CNT,
 	ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1
 };
 
-/* STRSET */
-
 enum {
 	ETHTOOL_A_STRSET_UNSPEC,
-	ETHTOOL_A_STRSET_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_STRSET_STRINGSETS,		/* nest - _A_STRINGSETS_* */
-	ETHTOOL_A_STRSET_COUNTS_ONLY,		/* flag */
+	ETHTOOL_A_STRSET_HEADER,
+	ETHTOOL_A_STRSET_STRINGSETS,
+	ETHTOOL_A_STRSET_COUNTS_ONLY,
 
-	/* add new constants above here */
 	__ETHTOOL_A_STRSET_CNT,
 	ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1
 };
 
-/* PRIVFLAGS */
-
 enum {
 	ETHTOOL_A_PRIVFLAGS_UNSPEC,
-	ETHTOOL_A_PRIVFLAGS_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PRIVFLAGS_FLAGS,			/* bitset */
+	ETHTOOL_A_PRIVFLAGS_HEADER,
+	ETHTOOL_A_PRIVFLAGS_FLAGS,
 
-	/* add new constants above here */
 	__ETHTOOL_A_PRIVFLAGS_CNT,
 	ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1
 };
 
-/* RINGS */
-
 enum {
 	ETHTOOL_A_RINGS_UNSPEC,
-	ETHTOOL_A_RINGS_HEADER,				/* nest - _A_HEADER_* */
-	ETHTOOL_A_RINGS_RX_MAX,				/* u32 */
-	ETHTOOL_A_RINGS_RX_MINI_MAX,			/* u32 */
-	ETHTOOL_A_RINGS_RX_JUMBO_MAX,			/* u32 */
-	ETHTOOL_A_RINGS_TX_MAX,				/* u32 */
-	ETHTOOL_A_RINGS_RX,				/* u32 */
-	ETHTOOL_A_RINGS_RX_MINI,			/* u32 */
-	ETHTOOL_A_RINGS_RX_JUMBO,			/* u32 */
-	ETHTOOL_A_RINGS_TX,				/* u32 */
-	ETHTOOL_A_RINGS_RX_BUF_LEN,                     /* u32 */
-	ETHTOOL_A_RINGS_TCP_DATA_SPLIT,			/* u8 */
-	ETHTOOL_A_RINGS_CQE_SIZE,			/* u32 */
-	ETHTOOL_A_RINGS_TX_PUSH,			/* u8 */
-	ETHTOOL_A_RINGS_RX_PUSH,			/* u8 */
-	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,		/* u32 */
-	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,		/* u32 */
-
-	/* add new constants above here */
+	ETHTOOL_A_RINGS_HEADER,
+	ETHTOOL_A_RINGS_RX_MAX,
+	ETHTOOL_A_RINGS_RX_MINI_MAX,
+	ETHTOOL_A_RINGS_RX_JUMBO_MAX,
+	ETHTOOL_A_RINGS_TX_MAX,
+	ETHTOOL_A_RINGS_RX,
+	ETHTOOL_A_RINGS_RX_MINI,
+	ETHTOOL_A_RINGS_RX_JUMBO,
+	ETHTOOL_A_RINGS_TX,
+	ETHTOOL_A_RINGS_RX_BUF_LEN,
+	ETHTOOL_A_RINGS_TCP_DATA_SPLIT,
+	ETHTOOL_A_RINGS_CQE_SIZE,
+	ETHTOOL_A_RINGS_TX_PUSH,
+	ETHTOOL_A_RINGS_RX_PUSH,
+	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,
+	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,
+
 	__ETHTOOL_A_RINGS_CNT,
 	ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1)
 };
 
-/* MAC Merge (802.3) */
-
 enum {
 	ETHTOOL_A_MM_STAT_UNSPEC,
 	ETHTOOL_A_MM_STAT_PAD,
+	ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS,
+	ETHTOOL_A_MM_STAT_SMD_ERRORS,
+	ETHTOOL_A_MM_STAT_REASSEMBLY_OK,
+	ETHTOOL_A_MM_STAT_RX_FRAG_COUNT,
+	ETHTOOL_A_MM_STAT_TX_FRAG_COUNT,
+	ETHTOOL_A_MM_STAT_HOLD_COUNT,
 
-	/* aMACMergeFrameAssErrorCount */
-	ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS,	/* u64 */
-	/* aMACMergeFrameSmdErrorCount */
-	ETHTOOL_A_MM_STAT_SMD_ERRORS,		/* u64 */
-	/* aMACMergeFrameAssOkCount */
-	ETHTOOL_A_MM_STAT_REASSEMBLY_OK,	/* u64 */
-	/* aMACMergeFragCountRx */
-	ETHTOOL_A_MM_STAT_RX_FRAG_COUNT,	/* u64 */
-	/* aMACMergeFragCountTx */
-	ETHTOOL_A_MM_STAT_TX_FRAG_COUNT,	/* u64 */
-	/* aMACMergeHoldCount */
-	ETHTOOL_A_MM_STAT_HOLD_COUNT,		/* u64 */
-
-	/* add new constants above here */
 	__ETHTOOL_A_MM_STAT_CNT,
 	ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1)
 };
 
 enum {
 	ETHTOOL_A_MM_UNSPEC,
-	ETHTOOL_A_MM_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_MM_PMAC_ENABLED,		/* u8 */
-	ETHTOOL_A_MM_TX_ENABLED,		/* u8 */
-	ETHTOOL_A_MM_TX_ACTIVE,			/* u8 */
-	ETHTOOL_A_MM_TX_MIN_FRAG_SIZE,		/* u32 */
-	ETHTOOL_A_MM_RX_MIN_FRAG_SIZE,		/* u32 */
-	ETHTOOL_A_MM_VERIFY_ENABLED,		/* u8 */
-	ETHTOOL_A_MM_VERIFY_STATUS,		/* u8 */
-	ETHTOOL_A_MM_VERIFY_TIME,		/* u32 */
-	ETHTOOL_A_MM_MAX_VERIFY_TIME,		/* u32 */
-	ETHTOOL_A_MM_STATS,			/* nest - _A_MM_STAT_* */
-
-	/* add new constants above here */
+	ETHTOOL_A_MM_HEADER,
+	ETHTOOL_A_MM_PMAC_ENABLED,
+	ETHTOOL_A_MM_TX_ENABLED,
+	ETHTOOL_A_MM_TX_ACTIVE,
+	ETHTOOL_A_MM_TX_MIN_FRAG_SIZE,
+	ETHTOOL_A_MM_RX_MIN_FRAG_SIZE,
+	ETHTOOL_A_MM_VERIFY_ENABLED,
+	ETHTOOL_A_MM_VERIFY_STATUS,
+	ETHTOOL_A_MM_VERIFY_TIME,
+	ETHTOOL_A_MM_MAX_VERIFY_TIME,
+	ETHTOOL_A_MM_STATS,
+
 	__ETHTOOL_A_MM_CNT,
 	ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1)
 };
 
-/* LINKINFO */
-
 enum {
 	ETHTOOL_A_LINKINFO_UNSPEC,
-	ETHTOOL_A_LINKINFO_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_LINKINFO_PORT,		/* u8 */
-	ETHTOOL_A_LINKINFO_PHYADDR,		/* u8 */
-	ETHTOOL_A_LINKINFO_TP_MDIX,		/* u8 */
-	ETHTOOL_A_LINKINFO_TP_MDIX_CTRL,	/* u8 */
-	ETHTOOL_A_LINKINFO_TRANSCEIVER,		/* u8 */
-
-	/* add new constants above here */
+	ETHTOOL_A_LINKINFO_HEADER,
+	ETHTOOL_A_LINKINFO_PORT,
+	ETHTOOL_A_LINKINFO_PHYADDR,
+	ETHTOOL_A_LINKINFO_TP_MDIX,
+	ETHTOOL_A_LINKINFO_TP_MDIX_CTRL,
+	ETHTOOL_A_LINKINFO_TRANSCEIVER,
+
 	__ETHTOOL_A_LINKINFO_CNT,
 	ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1
 };
 
-/* LINKMODES */
-
 enum {
 	ETHTOOL_A_LINKMODES_UNSPEC,
-	ETHTOOL_A_LINKMODES_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_LINKMODES_AUTONEG,		/* u8 */
-	ETHTOOL_A_LINKMODES_OURS,		/* bitset */
-	ETHTOOL_A_LINKMODES_PEER,		/* bitset */
-	ETHTOOL_A_LINKMODES_SPEED,		/* u32 */
-	ETHTOOL_A_LINKMODES_DUPLEX,		/* u8 */
-	ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG,	/* u8 */
-	ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE,	/* u8 */
-	ETHTOOL_A_LINKMODES_LANES,		/* u32 */
-	ETHTOOL_A_LINKMODES_RATE_MATCHING,	/* u8 */
-
-	/* add new constants above here */
+	ETHTOOL_A_LINKMODES_HEADER,
+	ETHTOOL_A_LINKMODES_AUTONEG,
+	ETHTOOL_A_LINKMODES_OURS,
+	ETHTOOL_A_LINKMODES_PEER,
+	ETHTOOL_A_LINKMODES_SPEED,
+	ETHTOOL_A_LINKMODES_DUPLEX,
+	ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG,
+	ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE,
+	ETHTOOL_A_LINKMODES_LANES,
+	ETHTOOL_A_LINKMODES_RATE_MATCHING,
+
 	__ETHTOOL_A_LINKMODES_CNT,
 	ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1
 };
 
-/* LINKSTATE */
-
 enum {
 	ETHTOOL_A_LINKSTATE_UNSPEC,
-	ETHTOOL_A_LINKSTATE_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_LINKSTATE_LINK,		/* u8 */
-	ETHTOOL_A_LINKSTATE_SQI,		/* u32 */
-	ETHTOOL_A_LINKSTATE_SQI_MAX,		/* u32 */
-	ETHTOOL_A_LINKSTATE_EXT_STATE,		/* u8 */
-	ETHTOOL_A_LINKSTATE_EXT_SUBSTATE,	/* u8 */
-	ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT,	/* u32 */
-
-	/* add new constants above here */
+	ETHTOOL_A_LINKSTATE_HEADER,
+	ETHTOOL_A_LINKSTATE_LINK,
+	ETHTOOL_A_LINKSTATE_SQI,
+	ETHTOOL_A_LINKSTATE_SQI_MAX,
+	ETHTOOL_A_LINKSTATE_EXT_STATE,
+	ETHTOOL_A_LINKSTATE_EXT_SUBSTATE,
+	ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT,
+
 	__ETHTOOL_A_LINKSTATE_CNT,
 	ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1
 };
 
-/* DEBUG */
-
 enum {
 	ETHTOOL_A_DEBUG_UNSPEC,
-	ETHTOOL_A_DEBUG_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_DEBUG_MSGMASK,		/* bitset */
+	ETHTOOL_A_DEBUG_HEADER,
+	ETHTOOL_A_DEBUG_MSGMASK,
 
-	/* add new constants above here */
 	__ETHTOOL_A_DEBUG_CNT,
 	ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1
 };
 
-/* WOL */
-
 enum {
 	ETHTOOL_A_WOL_UNSPEC,
-	ETHTOOL_A_WOL_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_WOL_MODES,			/* bitset */
-	ETHTOOL_A_WOL_SOPASS,			/* binary */
+	ETHTOOL_A_WOL_HEADER,
+	ETHTOOL_A_WOL_MODES,
+	ETHTOOL_A_WOL_SOPASS,
 
-	/* add new constants above here */
 	__ETHTOOL_A_WOL_CNT,
 	ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1
 };
 
-/* FEATURES */
-
 enum {
 	ETHTOOL_A_FEATURES_UNSPEC,
-	ETHTOOL_A_FEATURES_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_FEATURES_HW,				/* bitset */
-	ETHTOOL_A_FEATURES_WANTED,			/* bitset */
-	ETHTOOL_A_FEATURES_ACTIVE,			/* bitset */
-	ETHTOOL_A_FEATURES_NOCHANGE,			/* bitset */
+	ETHTOOL_A_FEATURES_HEADER,
+	ETHTOOL_A_FEATURES_HW,
+	ETHTOOL_A_FEATURES_WANTED,
+	ETHTOOL_A_FEATURES_ACTIVE,
+	ETHTOOL_A_FEATURES_NOCHANGE,
 
-	/* add new constants above here */
 	__ETHTOOL_A_FEATURES_CNT,
 	ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1
 };
 
-/* CHANNELS */
-
 enum {
 	ETHTOOL_A_CHANNELS_UNSPEC,
-	ETHTOOL_A_CHANNELS_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_CHANNELS_RX_MAX,			/* u32 */
-	ETHTOOL_A_CHANNELS_TX_MAX,			/* u32 */
-	ETHTOOL_A_CHANNELS_OTHER_MAX,			/* u32 */
-	ETHTOOL_A_CHANNELS_COMBINED_MAX,		/* u32 */
-	ETHTOOL_A_CHANNELS_RX_COUNT,			/* u32 */
-	ETHTOOL_A_CHANNELS_TX_COUNT,			/* u32 */
-	ETHTOOL_A_CHANNELS_OTHER_COUNT,			/* u32 */
-	ETHTOOL_A_CHANNELS_COMBINED_COUNT,		/* u32 */
-
-	/* add new constants above here */
+	ETHTOOL_A_CHANNELS_HEADER,
+	ETHTOOL_A_CHANNELS_RX_MAX,
+	ETHTOOL_A_CHANNELS_TX_MAX,
+	ETHTOOL_A_CHANNELS_OTHER_MAX,
+	ETHTOOL_A_CHANNELS_COMBINED_MAX,
+	ETHTOOL_A_CHANNELS_RX_COUNT,
+	ETHTOOL_A_CHANNELS_TX_COUNT,
+	ETHTOOL_A_CHANNELS_OTHER_COUNT,
+	ETHTOOL_A_CHANNELS_COMBINED_COUNT,
+
 	__ETHTOOL_A_CHANNELS_CNT,
 	ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1)
 };
 
 enum {
 	ETHTOOL_A_IRQ_MODERATION_UNSPEC,
-	ETHTOOL_A_IRQ_MODERATION_USEC,			/* u32 */
-	ETHTOOL_A_IRQ_MODERATION_PKTS,			/* u32 */
-	ETHTOOL_A_IRQ_MODERATION_COMPS,			/* u32 */
+	ETHTOOL_A_IRQ_MODERATION_USEC,
+	ETHTOOL_A_IRQ_MODERATION_PKTS,
+	ETHTOOL_A_IRQ_MODERATION_COMPS,
 
 	__ETHTOOL_A_IRQ_MODERATION_CNT,
 	ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1)
@@ -332,111 +275,91 @@ enum {
 
 enum {
 	ETHTOOL_A_PROFILE_UNSPEC,
-	/* nest, _A_IRQ_MODERATION_* */
 	ETHTOOL_A_PROFILE_IRQ_MODERATION,
 	__ETHTOOL_A_PROFILE_CNT,
 	ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1)
 };
 
-/* COALESCE */
-
 enum {
 	ETHTOOL_A_COALESCE_UNSPEC,
-	ETHTOOL_A_COALESCE_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_COALESCE_RX_USECS,			/* u32 */
-	ETHTOOL_A_COALESCE_RX_MAX_FRAMES,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_USECS_IRQ,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_USECS,			/* u32 */
-	ETHTOOL_A_COALESCE_TX_MAX_FRAMES,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_USECS_IRQ,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ,		/* u32 */
-	ETHTOOL_A_COALESCE_STATS_BLOCK_USECS,		/* u32 */
-	ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX,		/* u8 */
-	ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX,		/* u8 */
-	ETHTOOL_A_COALESCE_PKT_RATE_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_USECS_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_USECS_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW,		/* u32 */
-	ETHTOOL_A_COALESCE_PKT_RATE_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_USECS_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_USECS_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH,		/* u32 */
-	ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL,	/* u32 */
-	ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,		/* u8 */
-	ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,		/* u8 */
-	ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,		/* u32 */
-	ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,		/* u32 */
-	/* nest - _A_PROFILE_IRQ_MODERATION */
+	ETHTOOL_A_COALESCE_HEADER,
+	ETHTOOL_A_COALESCE_RX_USECS,
+	ETHTOOL_A_COALESCE_RX_MAX_FRAMES,
+	ETHTOOL_A_COALESCE_RX_USECS_IRQ,
+	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ,
+	ETHTOOL_A_COALESCE_TX_USECS,
+	ETHTOOL_A_COALESCE_TX_MAX_FRAMES,
+	ETHTOOL_A_COALESCE_TX_USECS_IRQ,
+	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ,
+	ETHTOOL_A_COALESCE_STATS_BLOCK_USECS,
+	ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX,
+	ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX,
+	ETHTOOL_A_COALESCE_PKT_RATE_LOW,
+	ETHTOOL_A_COALESCE_RX_USECS_LOW,
+	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW,
+	ETHTOOL_A_COALESCE_TX_USECS_LOW,
+	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW,
+	ETHTOOL_A_COALESCE_PKT_RATE_HIGH,
+	ETHTOOL_A_COALESCE_RX_USECS_HIGH,
+	ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH,
+	ETHTOOL_A_COALESCE_TX_USECS_HIGH,
+	ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH,
+	ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL,
+	ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,
+	ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,
+	ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES,
+	ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,
+	ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,
 	ETHTOOL_A_COALESCE_RX_PROFILE,
-	/* nest - _A_PROFILE_IRQ_MODERATION */
 	ETHTOOL_A_COALESCE_TX_PROFILE,
 
-	/* add new constants above here */
 	__ETHTOOL_A_COALESCE_CNT,
 	ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1)
 };
 
-/* PAUSE */
-
 enum {
 	ETHTOOL_A_PAUSE_STAT_UNSPEC,
 	ETHTOOL_A_PAUSE_STAT_PAD,
-
 	ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
 	ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
 
-	/* add new constants above here
-	 * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats!
-	 */
 	__ETHTOOL_A_PAUSE_STAT_CNT,
 	ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
 };
 
 enum {
 	ETHTOOL_A_PAUSE_UNSPEC,
-	ETHTOOL_A_PAUSE_HEADER,				/* nest - _A_HEADER_* */
-	ETHTOOL_A_PAUSE_AUTONEG,			/* u8 */
-	ETHTOOL_A_PAUSE_RX,				/* u8 */
-	ETHTOOL_A_PAUSE_TX,				/* u8 */
-	ETHTOOL_A_PAUSE_STATS,				/* nest - _PAUSE_STAT_* */
-	ETHTOOL_A_PAUSE_STATS_SRC,			/* u32 */
-
-	/* add new constants above here */
+	ETHTOOL_A_PAUSE_HEADER,
+	ETHTOOL_A_PAUSE_AUTONEG,
+	ETHTOOL_A_PAUSE_RX,
+	ETHTOOL_A_PAUSE_TX,
+	ETHTOOL_A_PAUSE_STATS,
+	ETHTOOL_A_PAUSE_STATS_SRC,
+
 	__ETHTOOL_A_PAUSE_CNT,
 	ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1)
 };
 
-/* EEE */
-
 enum {
 	ETHTOOL_A_EEE_UNSPEC,
-	ETHTOOL_A_EEE_HEADER,				/* nest - _A_HEADER_* */
-	ETHTOOL_A_EEE_MODES_OURS,			/* bitset */
-	ETHTOOL_A_EEE_MODES_PEER,			/* bitset */
-	ETHTOOL_A_EEE_ACTIVE,				/* u8 */
-	ETHTOOL_A_EEE_ENABLED,				/* u8 */
-	ETHTOOL_A_EEE_TX_LPI_ENABLED,			/* u8 */
-	ETHTOOL_A_EEE_TX_LPI_TIMER,			/* u32 */
-
-	/* add new constants above here */
+	ETHTOOL_A_EEE_HEADER,
+	ETHTOOL_A_EEE_MODES_OURS,
+	ETHTOOL_A_EEE_MODES_PEER,
+	ETHTOOL_A_EEE_ACTIVE,
+	ETHTOOL_A_EEE_ENABLED,
+	ETHTOOL_A_EEE_TX_LPI_ENABLED,
+	ETHTOOL_A_EEE_TX_LPI_TIMER,
+
 	__ETHTOOL_A_EEE_CNT,
 	ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1)
 };
 
-/* TSINFO */
-
 enum {
 	ETHTOOL_A_TS_STAT_UNSPEC,
+	ETHTOOL_A_TS_STAT_TX_PKTS,
+	ETHTOOL_A_TS_STAT_TX_LOST,
+	ETHTOOL_A_TS_STAT_TX_ERR,
 
-	ETHTOOL_A_TS_STAT_TX_PKTS,			/* uint */
-	ETHTOOL_A_TS_STAT_TX_LOST,			/* uint */
-	ETHTOOL_A_TS_STAT_TX_ERR,			/* uint */
-
-	/* add new constants above here */
 	__ETHTOOL_A_TS_STAT_CNT,
 	ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1)
 
@@ -444,23 +367,22 @@ enum {
 
 enum {
 	ETHTOOL_A_TSINFO_UNSPEC,
-	ETHTOOL_A_TSINFO_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_TSINFO_TIMESTAMPING,			/* bitset */
-	ETHTOOL_A_TSINFO_TX_TYPES,			/* bitset */
-	ETHTOOL_A_TSINFO_RX_FILTERS,			/* bitset */
-	ETHTOOL_A_TSINFO_PHC_INDEX,			/* u32 */
-	ETHTOOL_A_TSINFO_STATS,				/* nest - _A_TSINFO_STAT */
-
-	/* add new constants above here */
+	ETHTOOL_A_TSINFO_HEADER,
+	ETHTOOL_A_TSINFO_TIMESTAMPING,
+	ETHTOOL_A_TSINFO_TX_TYPES,
+	ETHTOOL_A_TSINFO_RX_FILTERS,
+	ETHTOOL_A_TSINFO_PHC_INDEX,
+	ETHTOOL_A_TSINFO_STATS,
+
 	__ETHTOOL_A_TSINFO_CNT,
 	ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1)
 };
 
 enum {
 	ETHTOOL_A_CABLE_RESULT_UNSPEC,
-	ETHTOOL_A_CABLE_RESULT_PAIR,		/* u8 ETHTOOL_A_CABLE_PAIR_ */
-	ETHTOOL_A_CABLE_RESULT_CODE,		/* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */
-	ETHTOOL_A_CABLE_RESULT_SRC,		/* u32 ETHTOOL_A_CABLE_INF_SRC_ */
+	ETHTOOL_A_CABLE_RESULT_PAIR,
+	ETHTOOL_A_CABLE_RESULT_CODE,
+	ETHTOOL_A_CABLE_RESULT_SRC,
 
 	__ETHTOOL_A_CABLE_RESULT_CNT,
 	ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1)
@@ -468,9 +390,9 @@ enum {
 
 enum {
 	ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC,
-	ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR,	/* u8 ETHTOOL_A_CABLE_PAIR_ */
-	ETHTOOL_A_CABLE_FAULT_LENGTH_CM,	/* u32 */
-	ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,	/* u32 ETHTOOL_A_CABLE_INF_SRC_ */
+	ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR,
+	ETHTOOL_A_CABLE_FAULT_LENGTH_CM,
+	ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,
 
 	__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT,
 	ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1)
@@ -478,245 +400,204 @@ enum {
 
 enum {
 	ETHTOOL_A_CABLE_NEST_UNSPEC,
-	ETHTOOL_A_CABLE_NEST_RESULT,		/* nest - ETHTOOL_A_CABLE_RESULT_ */
-	ETHTOOL_A_CABLE_NEST_FAULT_LENGTH,	/* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */
+	ETHTOOL_A_CABLE_NEST_RESULT,
+	ETHTOOL_A_CABLE_NEST_FAULT_LENGTH,
+
 	__ETHTOOL_A_CABLE_NEST_CNT,
 	ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1)
 };
 
-/* CABLE TEST */
-
 enum {
 	ETHTOOL_A_CABLE_TEST_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_CABLE_TEST_HEADER,
 
-	/* add new constants above here */
 	__ETHTOOL_A_CABLE_TEST_CNT,
 	ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1
 };
 
 enum {
 	ETHTOOL_A_CABLE_TEST_NTF_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_NTF_HEADER,	/* nest - ETHTOOL_A_HEADER_* */
-	ETHTOOL_A_CABLE_TEST_NTF_STATUS,	/* u8 - _STARTED/_COMPLETE */
-	ETHTOOL_A_CABLE_TEST_NTF_NEST,		/* nest - of results: */
+	ETHTOOL_A_CABLE_TEST_NTF_HEADER,
+	ETHTOOL_A_CABLE_TEST_NTF_STATUS,
+	ETHTOOL_A_CABLE_TEST_NTF_NEST,
 
 	__ETHTOOL_A_CABLE_TEST_NTF_CNT,
 	ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1)
 };
 
-/* CABLE TEST TDR */
-
 enum {
 	ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST,		/* u32 */
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST,		/* u32 */
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP,		/* u32 */
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR,		/* u8 */
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST,
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST,
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP,
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR,
 
-	/* add new constants above here */
 	__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT,
 	ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1
 };
 
 enum {
 	ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER,	/* nest - ETHTOOL_A_HEADER_* */
-	ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS,	/* u8 - _STARTED/_COMPLETE */
-	ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST,	/* nest - of results: */
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER,
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS,
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST,
 
-	/* add new constants above here */
 	__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT,
 	ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1
 };
 
 enum {
 	ETHTOOL_A_CABLE_TEST_TDR_UNSPEC,
-	ETHTOOL_A_CABLE_TEST_TDR_HEADER,	/* nest - _A_HEADER_* */
-	ETHTOOL_A_CABLE_TEST_TDR_CFG,		/* nest - *_TDR_CFG_* */
+	ETHTOOL_A_CABLE_TEST_TDR_HEADER,
+	ETHTOOL_A_CABLE_TEST_TDR_CFG,
 
-	/* add new constants above here */
 	__ETHTOOL_A_CABLE_TEST_TDR_CNT,
 	ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1
 };
 
 enum {
 	ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC,
+	ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
+	ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,
 
-	ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,		/* be16 */
-	ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,		/* u32 */
-
-	/* add new constants above here */
 	__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT,
 	ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1)
 };
 
 enum {
 	ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC,
+	ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE,
+	ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES,
+	ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY,
 
-	ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE,		/* u32 */
-	ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES,		/* bitset */
-	ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY,		/* nest - _UDP_ENTRY_* */
-
-	/* add new constants above here */
 	__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT,
 	ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1)
 };
 
 enum {
 	ETHTOOL_A_TUNNEL_UDP_UNSPEC,
+	ETHTOOL_A_TUNNEL_UDP_TABLE,
 
-	ETHTOOL_A_TUNNEL_UDP_TABLE,			/* nest - _UDP_TABLE_* */
-
-	/* add new constants above here */
 	__ETHTOOL_A_TUNNEL_UDP_CNT,
 	ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1)
 };
 
 enum {
 	ETHTOOL_A_TUNNEL_INFO_UNSPEC,
-	ETHTOOL_A_TUNNEL_INFO_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_TUNNEL_INFO_HEADER,
+	ETHTOOL_A_TUNNEL_INFO_UDP_PORTS,
 
-	ETHTOOL_A_TUNNEL_INFO_UDP_PORTS,		/* nest - _UDP_TABLE */
-
-	/* add new constants above here */
 	__ETHTOOL_A_TUNNEL_INFO_CNT,
 	ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
 };
 
-/* FEC */
-
 enum {
 	ETHTOOL_A_FEC_STAT_UNSPEC,
 	ETHTOOL_A_FEC_STAT_PAD,
+	ETHTOOL_A_FEC_STAT_CORRECTED,
+	ETHTOOL_A_FEC_STAT_UNCORR,
+	ETHTOOL_A_FEC_STAT_CORR_BITS,
 
-	ETHTOOL_A_FEC_STAT_CORRECTED,			/* array, u64 */
-	ETHTOOL_A_FEC_STAT_UNCORR,			/* array, u64 */
-	ETHTOOL_A_FEC_STAT_CORR_BITS,			/* array, u64 */
-
-	/* add new constants above here */
 	__ETHTOOL_A_FEC_STAT_CNT,
 	ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1)
 };
 
 enum {
 	ETHTOOL_A_FEC_UNSPEC,
-	ETHTOOL_A_FEC_HEADER,				/* nest - _A_HEADER_* */
-	ETHTOOL_A_FEC_MODES,				/* bitset */
-	ETHTOOL_A_FEC_AUTO,				/* u8 */
-	ETHTOOL_A_FEC_ACTIVE,				/* u32 */
-	ETHTOOL_A_FEC_STATS,				/* nest - _A_FEC_STAT */
+	ETHTOOL_A_FEC_HEADER,
+	ETHTOOL_A_FEC_MODES,
+	ETHTOOL_A_FEC_AUTO,
+	ETHTOOL_A_FEC_ACTIVE,
+	ETHTOOL_A_FEC_STATS,
 
 	__ETHTOOL_A_FEC_CNT,
 	ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1)
 };
 
-/* MODULE EEPROM */
-
 enum {
 	ETHTOOL_A_MODULE_EEPROM_UNSPEC,
-	ETHTOOL_A_MODULE_EEPROM_HEADER,			/* nest - _A_HEADER_* */
-
-	ETHTOOL_A_MODULE_EEPROM_OFFSET,			/* u32 */
-	ETHTOOL_A_MODULE_EEPROM_LENGTH,			/* u32 */
-	ETHTOOL_A_MODULE_EEPROM_PAGE,			/* u8 */
-	ETHTOOL_A_MODULE_EEPROM_BANK,			/* u8 */
-	ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS,		/* u8 */
-	ETHTOOL_A_MODULE_EEPROM_DATA,			/* binary */
+	ETHTOOL_A_MODULE_EEPROM_HEADER,
+	ETHTOOL_A_MODULE_EEPROM_OFFSET,
+	ETHTOOL_A_MODULE_EEPROM_LENGTH,
+	ETHTOOL_A_MODULE_EEPROM_PAGE,
+	ETHTOOL_A_MODULE_EEPROM_BANK,
+	ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS,
+	ETHTOOL_A_MODULE_EEPROM_DATA,
 
 	__ETHTOOL_A_MODULE_EEPROM_CNT,
 	ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1)
 };
 
-
 enum {
 	ETHTOOL_A_STATS_GRP_UNSPEC,
 	ETHTOOL_A_STATS_GRP_PAD,
+	ETHTOOL_A_STATS_GRP_ID,
+	ETHTOOL_A_STATS_GRP_SS_ID,
+	ETHTOOL_A_STATS_GRP_STAT,
+	ETHTOOL_A_STATS_GRP_HIST_RX,
+	ETHTOOL_A_STATS_GRP_HIST_TX,
+	ETHTOOL_A_STATS_GRP_HIST_BKT_LOW,
+	ETHTOOL_A_STATS_GRP_HIST_BKT_HI,
+	ETHTOOL_A_STATS_GRP_HIST_VAL,
 
-	ETHTOOL_A_STATS_GRP_ID,			/* u32 */
-	ETHTOOL_A_STATS_GRP_SS_ID,		/* u32 */
-
-	ETHTOOL_A_STATS_GRP_STAT,		/* nest */
-
-	ETHTOOL_A_STATS_GRP_HIST_RX,		/* nest */
-	ETHTOOL_A_STATS_GRP_HIST_TX,		/* nest */
-
-	ETHTOOL_A_STATS_GRP_HIST_BKT_LOW,	/* u32 */
-	ETHTOOL_A_STATS_GRP_HIST_BKT_HI,	/* u32 */
-	ETHTOOL_A_STATS_GRP_HIST_VAL,		/* u64 */
-
-	/* add new constants above here */
 	__ETHTOOL_A_STATS_GRP_CNT,
 	ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1)
 };
 
-/* STATS */
-
 enum {
 	ETHTOOL_A_STATS_UNSPEC,
 	ETHTOOL_A_STATS_PAD,
-	ETHTOOL_A_STATS_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_STATS_GROUPS,			/* bitset */
+	ETHTOOL_A_STATS_HEADER,
+	ETHTOOL_A_STATS_GROUPS,
+	ETHTOOL_A_STATS_GRP,
+	ETHTOOL_A_STATS_SRC,
 
-	ETHTOOL_A_STATS_GRP,			/* nest - _A_STATS_GRP_* */
-
-	ETHTOOL_A_STATS_SRC,			/* u32 */
-
-	/* add new constants above here */
 	__ETHTOOL_A_STATS_CNT,
 	ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1)
 };
 
-/* PHC VCLOCKS */
-
 enum {
 	ETHTOOL_A_PHC_VCLOCKS_UNSPEC,
-	ETHTOOL_A_PHC_VCLOCKS_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PHC_VCLOCKS_NUM,			/* u32 */
-	ETHTOOL_A_PHC_VCLOCKS_INDEX,			/* array, s32 */
+	ETHTOOL_A_PHC_VCLOCKS_HEADER,
+	ETHTOOL_A_PHC_VCLOCKS_NUM,
+	ETHTOOL_A_PHC_VCLOCKS_INDEX,
 
-	/* add new constants above here */
 	__ETHTOOL_A_PHC_VCLOCKS_CNT,
 	ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1)
 };
 
-/* MODULE */
-
 enum {
 	ETHTOOL_A_MODULE_UNSPEC,
-	ETHTOOL_A_MODULE_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_MODULE_POWER_MODE_POLICY,	/* u8 */
-	ETHTOOL_A_MODULE_POWER_MODE,		/* u8 */
+	ETHTOOL_A_MODULE_HEADER,
+	ETHTOOL_A_MODULE_POWER_MODE_POLICY,
+	ETHTOOL_A_MODULE_POWER_MODE,
 
-	/* add new constants above here */
 	__ETHTOOL_A_MODULE_CNT,
 	ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1)
 };
 
-/* Power Sourcing Equipment */
 enum {
 	ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC,
-	ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,	/* u32 */
-	ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,	/* u32 */
+	ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,
+	ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,
 };
 
 enum {
 	ETHTOOL_A_PSE_UNSPEC,
-	ETHTOOL_A_PSE_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PODL_PSE_ADMIN_STATE,		/* u32 */
-	ETHTOOL_A_PODL_PSE_ADMIN_CONTROL,	/* u32 */
-	ETHTOOL_A_PODL_PSE_PW_D_STATUS,		/* u32 */
-	ETHTOOL_A_C33_PSE_ADMIN_STATE,		/* u32 */
-	ETHTOOL_A_C33_PSE_ADMIN_CONTROL,	/* u32 */
-	ETHTOOL_A_C33_PSE_PW_D_STATUS,		/* u32 */
-	ETHTOOL_A_C33_PSE_PW_CLASS,		/* u32 */
-	ETHTOOL_A_C33_PSE_ACTUAL_PW,		/* u32 */
-	ETHTOOL_A_C33_PSE_EXT_STATE,		/* u32 */
-	ETHTOOL_A_C33_PSE_EXT_SUBSTATE,		/* u32 */
-	ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,	/* u32 */
-	ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES,	/* nest - _C33_PSE_PW_LIMIT_* */
-
-	/* add new constants above here */
+	ETHTOOL_A_PSE_HEADER,
+	ETHTOOL_A_PODL_PSE_ADMIN_STATE,
+	ETHTOOL_A_PODL_PSE_ADMIN_CONTROL,
+	ETHTOOL_A_PODL_PSE_PW_D_STATUS,
+	ETHTOOL_A_C33_PSE_ADMIN_STATE,
+	ETHTOOL_A_C33_PSE_ADMIN_CONTROL,
+	ETHTOOL_A_C33_PSE_PW_D_STATUS,
+	ETHTOOL_A_C33_PSE_PW_CLASS,
+	ETHTOOL_A_C33_PSE_ACTUAL_PW,
+	ETHTOOL_A_C33_PSE_EXT_STATE,
+	ETHTOOL_A_C33_PSE_EXT_SUBSTATE,
+	ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,
+	ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES,
+
 	__ETHTOOL_A_PSE_CNT,
 	ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
 };
@@ -724,70 +605,62 @@ enum {
 enum {
 	ETHTOOL_A_RSS_UNSPEC,
 	ETHTOOL_A_RSS_HEADER,
-	ETHTOOL_A_RSS_CONTEXT,		/* u32 */
-	ETHTOOL_A_RSS_HFUNC,		/* u32 */
-	ETHTOOL_A_RSS_INDIR,		/* binary */
-	ETHTOOL_A_RSS_HKEY,		/* binary */
-	ETHTOOL_A_RSS_INPUT_XFRM,	/* u32 */
-	ETHTOOL_A_RSS_START_CONTEXT,	/* u32 */
+	ETHTOOL_A_RSS_CONTEXT,
+	ETHTOOL_A_RSS_HFUNC,
+	ETHTOOL_A_RSS_INDIR,
+	ETHTOOL_A_RSS_HKEY,
+	ETHTOOL_A_RSS_INPUT_XFRM,
+	ETHTOOL_A_RSS_START_CONTEXT,
 
 	__ETHTOOL_A_RSS_CNT,
 	ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1),
 };
 
-/* PLCA */
-
 enum {
 	ETHTOOL_A_PLCA_UNSPEC,
-	ETHTOOL_A_PLCA_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PLCA_VERSION,			/* u16 */
-	ETHTOOL_A_PLCA_ENABLED,			/* u8  */
-	ETHTOOL_A_PLCA_STATUS,			/* u8  */
-	ETHTOOL_A_PLCA_NODE_CNT,		/* u32 */
-	ETHTOOL_A_PLCA_NODE_ID,			/* u32 */
-	ETHTOOL_A_PLCA_TO_TMR,			/* u32 */
-	ETHTOOL_A_PLCA_BURST_CNT,		/* u32 */
-	ETHTOOL_A_PLCA_BURST_TMR,		/* u32 */
-
-	/* add new constants above here */
+	ETHTOOL_A_PLCA_HEADER,
+	ETHTOOL_A_PLCA_VERSION,
+	ETHTOOL_A_PLCA_ENABLED,
+	ETHTOOL_A_PLCA_STATUS,
+	ETHTOOL_A_PLCA_NODE_CNT,
+	ETHTOOL_A_PLCA_NODE_ID,
+	ETHTOOL_A_PLCA_TO_TMR,
+	ETHTOOL_A_PLCA_BURST_CNT,
+	ETHTOOL_A_PLCA_BURST_TMR,
+
 	__ETHTOOL_A_PLCA_CNT,
 	ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1)
 };
 
-/* MODULE_FW_FLASH */
-
 enum {
 	ETHTOOL_A_MODULE_FW_FLASH_UNSPEC,
-	ETHTOOL_A_MODULE_FW_FLASH_HEADER,		/* nest - _A_HEADER_* */
-	ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME,		/* string */
-	ETHTOOL_A_MODULE_FW_FLASH_PASSWORD,		/* u32 */
-	ETHTOOL_A_MODULE_FW_FLASH_STATUS,		/* u32 */
-	ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG,		/* string */
-	ETHTOOL_A_MODULE_FW_FLASH_DONE,			/* uint */
-	ETHTOOL_A_MODULE_FW_FLASH_TOTAL,		/* uint */
-
-	/* add new constants above here */
+	ETHTOOL_A_MODULE_FW_FLASH_HEADER,
+	ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME,
+	ETHTOOL_A_MODULE_FW_FLASH_PASSWORD,
+	ETHTOOL_A_MODULE_FW_FLASH_STATUS,
+	ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG,
+	ETHTOOL_A_MODULE_FW_FLASH_DONE,
+	ETHTOOL_A_MODULE_FW_FLASH_TOTAL,
+
 	__ETHTOOL_A_MODULE_FW_FLASH_CNT,
 	ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
 };
 
 enum {
 	ETHTOOL_A_PHY_UNSPEC,
-	ETHTOOL_A_PHY_HEADER,			/* nest - _A_HEADER_* */
-	ETHTOOL_A_PHY_INDEX,			/* u32 */
-	ETHTOOL_A_PHY_DRVNAME,			/* string */
-	ETHTOOL_A_PHY_NAME,			/* string */
-	ETHTOOL_A_PHY_UPSTREAM_TYPE,		/* u32 */
-	ETHTOOL_A_PHY_UPSTREAM_INDEX,		/* u32 */
-	ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,	/* string */
-	ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,	/* string */
-
-	/* add new constants above here */
+	ETHTOOL_A_PHY_HEADER,
+	ETHTOOL_A_PHY_INDEX,
+	ETHTOOL_A_PHY_DRVNAME,
+	ETHTOOL_A_PHY_NAME,
+	ETHTOOL_A_PHY_UPSTREAM_TYPE,
+	ETHTOOL_A_PHY_UPSTREAM_INDEX,
+	ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,
+	ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,
+
 	__ETHTOOL_A_PHY_CNT,
 	ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1)
 };
 
-/* message types - userspace to kernel */
 enum {
 	ETHTOOL_MSG_USER_NONE,
 	ETHTOOL_MSG_STRSET_GET,
@@ -836,12 +709,10 @@ enum {
 	ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
 	ETHTOOL_MSG_PHY_GET,
 
-	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
 	ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1
 };
 
-/* message types - kernel to userspace */
 enum {
 	ETHTOOL_MSG_KERNEL_NONE,
 	ETHTOOL_MSG_STRSET_GET_REPLY,
@@ -891,7 +762,6 @@ enum {
 	ETHTOOL_MSG_PHY_GET_REPLY,
 	ETHTOOL_MSG_PHY_NTF,
 
-	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
 	ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1
 };
-- 
cgit v1.2.3


From 8d0580c6ebdd27879c83483f53bc71e2e470f6fe Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@fomichev.me>
Date: Wed, 4 Dec 2024 07:55:49 -0800
Subject: ethtool: regenerate uapi header from the spec

No functional changes. Mostly the following formatting:
- extra docs
- extra enums
- XXX_MAX = __XXX_CNT - 1 -> XXX_MAX = (__XXX_CNT - 1)
- newlines

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20241204155549.641348-9-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool_netlink_generated.h | 89 ++++++++++++++++----------
 1 file changed, 56 insertions(+), 33 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index 35a24d490efe..b58f352fe4f2 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -1,23 +1,43 @@
 /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*	Documentation/netlink/specs/ethtool.yaml */
+/* YNL-GEN uapi header */
+
 #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
 #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
 
+#define ETHTOOL_FAMILY_NAME	"ethtool"
+#define ETHTOOL_FAMILY_VERSION	1
+
 enum {
 	ETHTOOL_UDP_TUNNEL_TYPE_VXLAN,
 	ETHTOOL_UDP_TUNNEL_TYPE_GENEVE,
 	ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE,
 
-	__ETHTOOL_UDP_TUNNEL_TYPE_CNT
+	/* private: */
+	__ETHTOOL_UDP_TUNNEL_TYPE_CNT,
+	ETHTOOL_UDP_TUNNEL_TYPE_MAX = (__ETHTOOL_UDP_TUNNEL_TYPE_CNT - 1)
 };
 
+/**
+ * enum ethtool_header_flags - common ethtool header flags
+ * @ETHTOOL_FLAG_COMPACT_BITSETS: use compact bitsets in reply
+ * @ETHTOOL_FLAG_OMIT_REPLY: provide optional reply for SET or ACT requests
+ * @ETHTOOL_FLAG_STATS: request statistics, if supported by the driver
+ */
 enum ethtool_header_flags {
-	ETHTOOL_FLAG_COMPACT_BITSETS	= 1 << 0,	/* use compact bitsets in reply */
-	ETHTOOL_FLAG_OMIT_REPLY		= 1 << 1,	/* provide optional reply for SET or ACT requests */
-	ETHTOOL_FLAG_STATS		= 1 << 2,	/* request statistics, if supported by the driver */
+	ETHTOOL_FLAG_COMPACT_BITSETS = 1,
+	ETHTOOL_FLAG_OMIT_REPLY = 2,
+	ETHTOOL_FLAG_STATS = 4,
 };
 
 enum {
-	ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0,
+	ETHTOOL_PHY_UPSTREAM_TYPE_MAC,
+	ETHTOOL_PHY_UPSTREAM_TYPE_PHY,
+};
+
+enum ethtool_tcp_data_split {
+	ETHTOOL_TCP_DATA_SPLIT_UNKNOWN,
 	ETHTOOL_TCP_DATA_SPLIT_DISABLED,
 	ETHTOOL_TCP_DATA_SPLIT_ENABLED,
 };
@@ -30,7 +50,7 @@ enum {
 	ETHTOOL_A_HEADER_PHY_INDEX,
 
 	__ETHTOOL_A_HEADER_CNT,
-	ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1
+	ETHTOOL_A_HEADER_MAX = (__ETHTOOL_A_HEADER_CNT - 1)
 };
 
 enum {
@@ -40,7 +60,7 @@ enum {
 	ETHTOOL_A_BITSET_BIT_VALUE,
 
 	__ETHTOOL_A_BITSET_BIT_CNT,
-	ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1
+	ETHTOOL_A_BITSET_BIT_MAX = (__ETHTOOL_A_BITSET_BIT_CNT - 1)
 };
 
 enum {
@@ -48,7 +68,7 @@ enum {
 	ETHTOOL_A_BITSET_BITS_BIT,
 
 	__ETHTOOL_A_BITSET_BITS_CNT,
-	ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1
+	ETHTOOL_A_BITSET_BITS_MAX = (__ETHTOOL_A_BITSET_BITS_CNT - 1)
 };
 
 enum {
@@ -60,7 +80,7 @@ enum {
 	ETHTOOL_A_BITSET_MASK,
 
 	__ETHTOOL_A_BITSET_CNT,
-	ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1
+	ETHTOOL_A_BITSET_MAX = (__ETHTOOL_A_BITSET_CNT - 1)
 };
 
 enum {
@@ -69,7 +89,7 @@ enum {
 	ETHTOOL_A_STRING_VALUE,
 
 	__ETHTOOL_A_STRING_CNT,
-	ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1
+	ETHTOOL_A_STRING_MAX = (__ETHTOOL_A_STRING_CNT - 1)
 };
 
 enum {
@@ -77,7 +97,7 @@ enum {
 	ETHTOOL_A_STRINGS_STRING,
 
 	__ETHTOOL_A_STRINGS_CNT,
-	ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1
+	ETHTOOL_A_STRINGS_MAX = (__ETHTOOL_A_STRINGS_CNT - 1)
 };
 
 enum {
@@ -87,7 +107,7 @@ enum {
 	ETHTOOL_A_STRINGSET_STRINGS,
 
 	__ETHTOOL_A_STRINGSET_CNT,
-	ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1
+	ETHTOOL_A_STRINGSET_MAX = (__ETHTOOL_A_STRINGSET_CNT - 1)
 };
 
 enum {
@@ -95,7 +115,7 @@ enum {
 	ETHTOOL_A_STRINGSETS_STRINGSET,
 
 	__ETHTOOL_A_STRINGSETS_CNT,
-	ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1
+	ETHTOOL_A_STRINGSETS_MAX = (__ETHTOOL_A_STRINGSETS_CNT - 1)
 };
 
 enum {
@@ -105,7 +125,7 @@ enum {
 	ETHTOOL_A_STRSET_COUNTS_ONLY,
 
 	__ETHTOOL_A_STRSET_CNT,
-	ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1
+	ETHTOOL_A_STRSET_MAX = (__ETHTOOL_A_STRSET_CNT - 1)
 };
 
 enum {
@@ -114,7 +134,7 @@ enum {
 	ETHTOOL_A_PRIVFLAGS_FLAGS,
 
 	__ETHTOOL_A_PRIVFLAGS_CNT,
-	ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1
+	ETHTOOL_A_PRIVFLAGS_MAX = (__ETHTOOL_A_PRIVFLAGS_CNT - 1)
 };
 
 enum {
@@ -182,7 +202,7 @@ enum {
 	ETHTOOL_A_LINKINFO_TRANSCEIVER,
 
 	__ETHTOOL_A_LINKINFO_CNT,
-	ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1
+	ETHTOOL_A_LINKINFO_MAX = (__ETHTOOL_A_LINKINFO_CNT - 1)
 };
 
 enum {
@@ -199,7 +219,7 @@ enum {
 	ETHTOOL_A_LINKMODES_RATE_MATCHING,
 
 	__ETHTOOL_A_LINKMODES_CNT,
-	ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1
+	ETHTOOL_A_LINKMODES_MAX = (__ETHTOOL_A_LINKMODES_CNT - 1)
 };
 
 enum {
@@ -213,7 +233,7 @@ enum {
 	ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT,
 
 	__ETHTOOL_A_LINKSTATE_CNT,
-	ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1
+	ETHTOOL_A_LINKSTATE_MAX = (__ETHTOOL_A_LINKSTATE_CNT - 1)
 };
 
 enum {
@@ -222,7 +242,7 @@ enum {
 	ETHTOOL_A_DEBUG_MSGMASK,
 
 	__ETHTOOL_A_DEBUG_CNT,
-	ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1
+	ETHTOOL_A_DEBUG_MAX = (__ETHTOOL_A_DEBUG_CNT - 1)
 };
 
 enum {
@@ -232,7 +252,7 @@ enum {
 	ETHTOOL_A_WOL_SOPASS,
 
 	__ETHTOOL_A_WOL_CNT,
-	ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1
+	ETHTOOL_A_WOL_MAX = (__ETHTOOL_A_WOL_CNT - 1)
 };
 
 enum {
@@ -244,7 +264,7 @@ enum {
 	ETHTOOL_A_FEATURES_NOCHANGE,
 
 	__ETHTOOL_A_FEATURES_CNT,
-	ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1
+	ETHTOOL_A_FEATURES_MAX = (__ETHTOOL_A_FEATURES_CNT - 1)
 };
 
 enum {
@@ -276,6 +296,7 @@ enum {
 enum {
 	ETHTOOL_A_PROFILE_UNSPEC,
 	ETHTOOL_A_PROFILE_IRQ_MODERATION,
+
 	__ETHTOOL_A_PROFILE_CNT,
 	ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1)
 };
@@ -362,7 +383,6 @@ enum {
 
 	__ETHTOOL_A_TS_STAT_CNT,
 	ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1)
-
 };
 
 enum {
@@ -412,7 +432,7 @@ enum {
 	ETHTOOL_A_CABLE_TEST_HEADER,
 
 	__ETHTOOL_A_CABLE_TEST_CNT,
-	ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1
+	ETHTOOL_A_CABLE_TEST_MAX = (__ETHTOOL_A_CABLE_TEST_CNT - 1)
 };
 
 enum {
@@ -433,7 +453,7 @@ enum {
 	ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR,
 
 	__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT,
-	ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1)
 };
 
 enum {
@@ -443,7 +463,7 @@ enum {
 	ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST,
 
 	__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT,
-	ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1)
 };
 
 enum {
@@ -452,7 +472,7 @@ enum {
 	ETHTOOL_A_CABLE_TEST_TDR_CFG,
 
 	__ETHTOOL_A_CABLE_TEST_TDR_CNT,
-	ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1
+	ETHTOOL_A_CABLE_TEST_TDR_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CNT - 1)
 };
 
 enum {
@@ -580,6 +600,9 @@ enum {
 	ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC,
 	ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,
 	ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,
+
+	__ETHTOOL_A_C33_PSE_PW_LIMIT_CNT,
+	__ETHTOOL_A_C33_PSE_PW_LIMIT_MAX = (__ETHTOOL_A_C33_PSE_PW_LIMIT_CNT - 1)
 };
 
 enum {
@@ -613,7 +636,7 @@ enum {
 	ETHTOOL_A_RSS_START_CONTEXT,
 
 	__ETHTOOL_A_RSS_CNT,
-	ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1),
+	ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1)
 };
 
 enum {
@@ -662,8 +685,8 @@ enum {
 };
 
 enum {
-	ETHTOOL_MSG_USER_NONE,
-	ETHTOOL_MSG_STRSET_GET,
+	ETHTOOL_MSG_USER_NONE = 0,
+	ETHTOOL_MSG_STRSET_GET = 1,
 	ETHTOOL_MSG_LINKINFO_GET,
 	ETHTOOL_MSG_LINKINFO_SET,
 	ETHTOOL_MSG_LINKMODES_GET,
@@ -710,12 +733,12 @@ enum {
 	ETHTOOL_MSG_PHY_GET,
 
 	__ETHTOOL_MSG_USER_CNT,
-	ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1
+	ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1)
 };
 
 enum {
-	ETHTOOL_MSG_KERNEL_NONE,
-	ETHTOOL_MSG_STRSET_GET_REPLY,
+	ETHTOOL_MSG_KERNEL_NONE = 0,
+	ETHTOOL_MSG_STRSET_GET_REPLY = 1,
 	ETHTOOL_MSG_LINKINFO_GET_REPLY,
 	ETHTOOL_MSG_LINKINFO_NTF,
 	ETHTOOL_MSG_LINKMODES_GET_REPLY,
@@ -763,7 +786,7 @@ enum {
 	ETHTOOL_MSG_PHY_NTF,
 
 	__ETHTOOL_MSG_KERNEL_CNT,
-	ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1
+	ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1)
 };
 
 #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */
-- 
cgit v1.2.3


From ebe559609d7829b52c6642b581860760984faf9d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 15 Nov 2024 10:30:14 -0500
Subject: fs: get rid of __FMODE_NONOTIFY kludge

All it takes to get rid of the __FMODE_NONOTIFY kludge is switching
fanotify from anon_inode_getfd() to anon_inode_getfile_fmode() and adding
a dentry_open_nonotify() helper to be used by fanotify on the other path.
That's it - no more weird shit in OPEN_FMODE(), etc.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/linux-fsdevel/20241113043003.GH3387508@ZenIV/
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/d1231137e7b661a382459e79a764259509a4115d.1731684329.git.josef@toxicpanda.com
---
 fs/fcntl.c                         |  4 ++--
 fs/notify/fanotify/fanotify_user.c | 25 ++++++++++++++++---------
 fs/open.c                          | 23 +++++++++++++++++++----
 include/linux/fs.h                 |  6 +++---
 include/uapi/asm-generic/fcntl.h   |  1 -
 5 files changed, 40 insertions(+), 19 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 49884fa3c81d..5598e4d57422 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -1158,10 +1158,10 @@ static int __init fcntl_init(void)
 	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
 	 * is defined as O_NONBLOCK on some platforms and not on others.
 	 */
-	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
+	BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ !=
 		HWEIGHT32(
 			(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
-			__FMODE_EXEC | __FMODE_NONOTIFY));
+			__FMODE_EXEC));
 
 	fasync_cache = kmem_cache_create("fasync_cache",
 					 sizeof(struct fasync_struct), 0,
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 2d85c71717d6..919ff59cb802 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -100,8 +100,7 @@ static void __init fanotify_sysctls_init(void)
  *
  * Internal and external open flags are stored together in field f_flags of
  * struct file. Only external open flags shall be allowed in event_f_flags.
- * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be
- * excluded.
+ * Internal flags like FMODE_EXEC shall be excluded.
  */
 #define	FANOTIFY_INIT_ALL_EVENT_F_BITS				( \
 		O_ACCMODE	| O_APPEND	| O_NONBLOCK	| \
@@ -258,12 +257,11 @@ static int create_fd(struct fsnotify_group *group, const struct path *path,
 		return client_fd;
 
 	/*
-	 * we need a new file handle for the userspace program so it can read even if it was
-	 * originally opened O_WRONLY.
+	 * We provide an fd for the userspace program, so it could access the
+	 * file without generating fanotify events itself.
 	 */
-	new_file = dentry_open(path,
-			       group->fanotify_data.f_flags | __FMODE_NONOTIFY,
-			       current_cred());
+	new_file = dentry_open_nonotify(path, group->fanotify_data.f_flags,
+					current_cred());
 	if (IS_ERR(new_file)) {
 		put_unused_fd(client_fd);
 		client_fd = PTR_ERR(new_file);
@@ -1409,6 +1407,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
 	unsigned int class = flags & FANOTIFY_CLASS_BITS;
 	unsigned int internal_flags = 0;
+	struct file *file;
 
 	pr_debug("%s: flags=%x event_f_flags=%x\n",
 		 __func__, flags, event_f_flags);
@@ -1477,7 +1476,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	    (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID)))
 		return -EINVAL;
 
-	f_flags = O_RDWR | __FMODE_NONOTIFY;
+	f_flags = O_RDWR;
 	if (flags & FAN_CLOEXEC)
 		f_flags |= O_CLOEXEC;
 	if (flags & FAN_NONBLOCK)
@@ -1555,10 +1554,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 			goto out_destroy_group;
 	}
 
-	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
+	fd = get_unused_fd_flags(f_flags);
 	if (fd < 0)
 		goto out_destroy_group;
 
+	file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group,
+					f_flags, FMODE_NONOTIFY);
+	if (IS_ERR(file)) {
+		fd = PTR_ERR(file);
+		put_unused_fd(fd);
+		goto out_destroy_group;
+	}
+	fd_install(fd, file);
 	return fd;
 
 out_destroy_group:
diff --git a/fs/open.c b/fs/open.c
index e6911101fe71..c3490286092e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1105,6 +1105,23 @@ struct file *dentry_open(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(dentry_open);
 
+struct file *dentry_open_nonotify(const struct path *path, int flags,
+				  const struct cred *cred)
+{
+	struct file *f = alloc_empty_file(flags, cred);
+	if (!IS_ERR(f)) {
+		int error;
+
+		f->f_mode |= FMODE_NONOTIFY;
+		error = vfs_open(path, f);
+		if (error) {
+			fput(f);
+			f = ERR_PTR(error);
+		}
+	}
+	return f;
+}
+
 /**
  * dentry_create - Create and open a file
  * @path: path to create
@@ -1202,7 +1219,7 @@ inline struct open_how build_open_how(int flags, umode_t mode)
 inline int build_open_flags(const struct open_how *how, struct open_flags *op)
 {
 	u64 flags = how->flags;
-	u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
+	u64 strip = O_CLOEXEC;
 	int lookup_flags = 0;
 	int acc_mode = ACC_MODE(flags);
 
@@ -1210,9 +1227,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
 			 "struct open_flags doesn't yet handle flags > 32 bits");
 
 	/*
-	 * Strip flags that either shouldn't be set by userspace like
-	 * FMODE_NONOTIFY or that aren't relevant in determining struct
-	 * open_flags like O_CLOEXEC.
+	 * Strip flags that aren't relevant in determining struct open_flags.
 	 */
 	flags &= ~strip;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e29433c5ecc..93c2b720271e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2751,6 +2751,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
 }
 struct file *dentry_open(const struct path *path, int flags,
 			 const struct cred *creds);
+struct file *dentry_open_nonotify(const struct path *path, int flags,
+				  const struct cred *cred);
 struct file *dentry_create(const struct path *path, int flags, umode_t mode,
 			   const struct cred *cred);
 struct path *backing_file_user_path(struct file *f);
@@ -3707,11 +3709,9 @@ struct ctl_table;
 int __init list_bdev_fs_names(char *buf, size_t size);
 
 #define __FMODE_EXEC		((__force int) FMODE_EXEC)
-#define __FMODE_NONOTIFY	((__force int) FMODE_NONOTIFY)
 
 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
-#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
-					    (flag & __FMODE_NONOTIFY)))
+#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
 
 static inline bool is_sxid(umode_t mode)
 {
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 80f37a0d40d7..613475285643 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -6,7 +6,6 @@
 
 /*
  * FMODE_EXEC is 0x20
- * FMODE_NONOTIFY is 0x4000000
  * These cannot be used by userspace O_* until internal and external open
  * flags are split.
  * -Eric Paris
-- 
cgit v1.2.3


From 6c11379b104e3718135fd7fc37bb254b41e4cf65 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 5 Dec 2024 16:40:57 +0100
Subject: vxlan: Add an attribute to make VXLAN header validation configurable

The set of bits that the VXLAN netdevice currently considers reserved is
defined by the features enabled at the netdevice construction. In order to
make this configurable, add an attribute, IFLA_VXLAN_RESERVED_BITS. The
payload is a pair of big-endian u32's covering the VXLAN header. This is
validated against the set of flags used by the various enabled VXLAN
features, and attempts to override bits used by an enabled feature are
bounced.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/c657275e5ceed301e62c69fe8e559e32909442e2.1733412063.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/vxlan/vxlan_core.c | 53 ++++++++++++++++++++++++++++++++++++------
 include/uapi/linux/if_link.h   |  1 +
 2 files changed, 47 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index ff5684a2103a..43cf672b7b9f 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -3428,6 +3428,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_VNIFILTER]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_LOCALBYPASS]	= NLA_POLICY_MAX(NLA_U8, 1),
 	[IFLA_VXLAN_LABEL_POLICY]       = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
+	[IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)),
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -4315,13 +4316,44 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 		used_bits.vx_flags |= VXLAN_GPE_USED_BITS;
 	}
 
-	/* For backwards compatibility, only allow reserved fields to be
-	 * used by VXLAN extensions if explicitly requested.
-	 */
-	conf->reserved_bits = (struct vxlanhdr) {
-		.vx_flags = ~used_bits.vx_flags,
-		.vx_vni = ~used_bits.vx_vni,
-	};
+	if (data[IFLA_VXLAN_RESERVED_BITS]) {
+		struct vxlanhdr reserved_bits;
+
+		if (changelink) {
+			NL_SET_ERR_MSG_ATTR(extack,
+					    data[IFLA_VXLAN_RESERVED_BITS],
+					    "Cannot change reserved_bits");
+			return -EOPNOTSUPP;
+		}
+
+		nla_memcpy(&reserved_bits, data[IFLA_VXLAN_RESERVED_BITS],
+			   sizeof(reserved_bits));
+		if (used_bits.vx_flags & reserved_bits.vx_flags ||
+		    used_bits.vx_vni & reserved_bits.vx_vni) {
+			__be64 ub_be64, rb_be64;
+
+			memcpy(&ub_be64, &used_bits, sizeof(ub_be64));
+			memcpy(&rb_be64, &reserved_bits, sizeof(rb_be64));
+
+			NL_SET_ERR_MSG_ATTR_FMT(extack,
+						data[IFLA_VXLAN_RESERVED_BITS],
+						"Used bits %#018llx cannot overlap reserved bits %#018llx",
+						be64_to_cpu(ub_be64),
+						be64_to_cpu(rb_be64));
+			return -EINVAL;
+		}
+
+		conf->reserved_bits = reserved_bits;
+	} else {
+		/* For backwards compatibility, only allow reserved fields to be
+		 * used by VXLAN extensions if explicitly requested.
+		 */
+		conf->reserved_bits = (struct vxlanhdr) {
+			.vx_flags = ~used_bits.vx_flags,
+			.vx_vni = ~used_bits.vx_vni,
+		};
+	}
+
 	if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
 		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL,
 				    VXLAN_F_REMCSUM_NOPARTIAL, changelink,
@@ -4506,6 +4538,8 @@ static size_t vxlan_get_size(const struct net_device *dev)
 		nla_total_size(0) + /* IFLA_VXLAN_GPE */
 		nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */
+		/* IFLA_VXLAN_RESERVED_BITS */
+		nla_total_size(sizeof(struct vxlanhdr)) +
 		0;
 }
 
@@ -4608,6 +4642,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		       !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)))
 		goto nla_put_failure;
 
+	if (nla_put(skb, IFLA_VXLAN_RESERVED_BITS,
+		    sizeof(vxlan->cfg.reserved_bits),
+		    &vxlan->cfg.reserved_bits))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 2575e0cd9b48..77730c340c8f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1394,6 +1394,7 @@ enum {
 	IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
 	IFLA_VXLAN_LOCALBYPASS,
 	IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
+	IFLA_VXLAN_RESERVED_BITS,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
-- 
cgit v1.2.3


From 0a076036b631f086a6bce93a45eaa216f234f121 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 15 Nov 2024 10:30:19 -0500
Subject: fanotify: reserve event bit of deprecated FAN_DIR_MODIFY

Avoid reusing it, because we would like to reserve it for future
FAN_PATH_MODIFY pre-content event.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/632d9f80428e2e7a6b6a8ccc2925d87c92bbb518.1731684329.git.josef@toxicpanda.com
---
 include/linux/fsnotify_backend.h | 1 +
 include/uapi/linux/fanotify.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 9c105244815d..c38762b62bf1 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -55,6 +55,7 @@
 #define FS_OPEN_PERM		0x00010000	/* open event in an permission hook */
 #define FS_ACCESS_PERM		0x00020000	/* access event in a permissions hook */
 #define FS_OPEN_EXEC_PERM	0x00040000	/* open/exec event in a permission hook */
+/* #define FS_DIR_MODIFY	0x00080000 */	/* Deprecated (reserved) */
 
 /*
  * Set on inode mark that cares about things that happen to its children.
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 34f221d3a1b9..79072b6894f2 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -25,6 +25,7 @@
 #define FAN_OPEN_PERM		0x00010000	/* File open in perm check */
 #define FAN_ACCESS_PERM		0x00020000	/* File accessed in perm check */
 #define FAN_OPEN_EXEC_PERM	0x00040000	/* File open/exec in perm check */
+/* #define FAN_DIR_MODIFY	0x00080000 */	/* Deprecated (reserved) */
 
 #define FAN_EVENT_ON_CHILD	0x08000000	/* Interested in child events */
 
-- 
cgit v1.2.3


From 4f8afa33817a6420398d1c177c6e220a05081f51 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 15 Nov 2024 10:30:23 -0500
Subject: fanotify: introduce FAN_PRE_ACCESS permission event

Similar to FAN_ACCESS_PERM permission event, but it is only allowed with
class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs.

Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed
in the context of the event handler.

This pre-content event is meant to be used by hierarchical storage
managers that want to fill the content of files on first read access.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/b80986f8d5b860acea2c9a73c0acd93587be5fe4.1731684329.git.josef@toxicpanda.com
---
 fs/notify/fanotify/fanotify.c      |  3 ++-
 fs/notify/fanotify/fanotify_user.c | 35 ++++++++++++++++++++++++++++++-----
 include/linux/fanotify.h           | 14 ++++++++++----
 include/uapi/linux/fanotify.h      |  2 ++
 4 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 2e6ba94ec405..da6c3c1c7edf 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -916,8 +916,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 	BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
 	BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
 	BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
+	BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
 
-	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
+	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
 
 	mask = fanotify_group_event_mask(group, iter_info, &match_mask,
 					 mask, data, data_type, dir);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 456cc3e92c88..08e4d8659ef5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1287,7 +1287,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
 }
 
 static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
-					      unsigned int fan_flags)
+					     __u32 mask, unsigned int fan_flags)
 {
 	/*
 	 * Non evictable mark cannot be downgraded to evictable mark.
@@ -1314,6 +1314,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
 	    fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
 		return -EEXIST;
 
+	/* For now pre-content events are not generated for directories */
+	mask |= fsn_mark->mask;
+	if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
+		return -EEXIST;
+
 	return 0;
 }
 
@@ -1340,7 +1345,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 	/*
 	 * Check if requested mark flags conflict with an existing mark flags.
 	 */
-	ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
+	ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags);
 	if (ret)
 		goto out;
 
@@ -1640,11 +1645,23 @@ static int fanotify_events_supported(struct fsnotify_group *group,
 				     unsigned int flags)
 {
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+	bool is_dir = d_is_dir(path->dentry);
 	/* Strict validation of events in non-dir inode mask with v5.17+ APIs */
 	bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
 				 (mask & FAN_RENAME) ||
 				 (flags & FAN_MARK_IGNORE);
 
+	/*
+	 * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
+	 * and they are only supported on regular files and directories.
+	 */
+	if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
+		if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
+			return -EOPNOTSUPP;
+		if (!is_dir && !d_is_reg(path->dentry))
+			return -EINVAL;
+	}
+
 	/*
 	 * Some filesystems such as 'proc' acquire unusual locks when opening
 	 * files. For them fanotify permission events have high chances of
@@ -1677,7 +1694,7 @@ static int fanotify_events_supported(struct fsnotify_group *group,
 	 * but because we always allowed it, error only when using new APIs.
 	 */
 	if (strict_dir_events && mark_type == FAN_MARK_INODE &&
-	    !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
+	    !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
 		return -ENOTDIR;
 
 	return 0;
@@ -1778,10 +1795,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		return -EPERM;
 
 	/*
-	 * Permission events require minimum priority FAN_CLASS_CONTENT.
+	 * Permission events are not allowed for FAN_CLASS_NOTIF.
+	 * Pre-content permission events are not allowed for FAN_CLASS_CONTENT.
 	 */
 	if (mask & FANOTIFY_PERM_EVENTS &&
-	    group->priority < FSNOTIFY_PRIO_CONTENT)
+	    group->priority == FSNOTIFY_PRIO_NORMAL)
+		return -EINVAL;
+	else if (mask & FANOTIFY_PRE_CONTENT_EVENTS &&
+		 group->priority == FSNOTIFY_PRIO_CONTENT)
 		return -EINVAL;
 
 	if (mask & FAN_FS_ERROR &&
@@ -1816,6 +1837,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
 		return -EINVAL;
 
+	/* Pre-content events are not currently generated for directories. */
+	if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
+		return -EINVAL;
+
 	if (mark_cmd == FAN_MARK_FLUSH) {
 		if (mark_type == FAN_MARK_MOUNT)
 			fsnotify_clear_vfsmount_marks_by_group(group);
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 89ff45bd6f01..c747af064d2c 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -89,6 +89,16 @@
 #define FANOTIFY_DIRENT_EVENTS	(FAN_MOVE | FAN_CREATE | FAN_DELETE | \
 				 FAN_RENAME)
 
+/* Content events can be used to inspect file content */
+#define FANOTIFY_CONTENT_PERM_EVENTS (FAN_OPEN_PERM | FAN_OPEN_EXEC_PERM | \
+				      FAN_ACCESS_PERM)
+/* Pre-content events can be used to fill file content */
+#define FANOTIFY_PRE_CONTENT_EVENTS  (FAN_PRE_ACCESS)
+
+/* Events that require a permission response from user */
+#define FANOTIFY_PERM_EVENTS	(FANOTIFY_CONTENT_PERM_EVENTS | \
+				 FANOTIFY_PRE_CONTENT_EVENTS)
+
 /* Events that can be reported with event->fd */
 #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
 
@@ -104,10 +114,6 @@
 				 FANOTIFY_INODE_EVENTS | \
 				 FANOTIFY_ERROR_EVENTS)
 
-/* Events that require a permission response from user */
-#define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM | \
-				 FAN_OPEN_EXEC_PERM)
-
 /* Extra flags that may be reported with event or control handling of events */
 #define FANOTIFY_EVENT_FLAGS	(FAN_EVENT_ON_CHILD | FAN_ONDIR)
 
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 79072b6894f2..7596168c80eb 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -27,6 +27,8 @@
 #define FAN_OPEN_EXEC_PERM	0x00040000	/* File open/exec in perm check */
 /* #define FAN_DIR_MODIFY	0x00080000 */	/* Deprecated (reserved) */
 
+#define FAN_PRE_ACCESS		0x00100000	/* Pre-content access hook */
+
 #define FAN_EVENT_ON_CHILD	0x08000000	/* Interested in child events */
 
 #define FAN_RENAME		0x10000000	/* File was renamed */
-- 
cgit v1.2.3


From 870499bc1d4dc04cba1f63dd5e7bc02b983e2458 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 15 Nov 2024 10:30:24 -0500
Subject: fanotify: report file range info with pre-content events

With group class FAN_CLASS_PRE_CONTENT, report offset and length info
along with FAN_PRE_ACCESS pre-content events.

This information is meant to be used by hierarchical storage managers
that want to fill partial content of files on first access to range.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/b90a9e6c809dd3cad5684da90f23ea93ec6ce8c8.1731684329.git.josef@toxicpanda.com
---
 fs/notify/fanotify/fanotify.h      |  8 ++++++++
 fs/notify/fanotify/fanotify_user.c | 38 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/fanotify.h      |  8 ++++++++
 3 files changed, 54 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 93598b7d5952..7f06355afa1f 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -448,6 +448,14 @@ static inline bool fanotify_is_perm_event(u32 mask)
 		mask & FANOTIFY_PERM_EVENTS;
 }
 
+static inline bool fanotify_event_has_access_range(struct fanotify_event *event)
+{
+	if (!(event->mask & FANOTIFY_PRE_CONTENT_EVENTS))
+		return false;
+
+	return FANOTIFY_PERM(event)->ppos;
+}
+
 static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
 {
 	return container_of(fse, struct fanotify_event, fse);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 08e4d8659ef5..6ef3cc7de5e4 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -121,6 +121,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
 	sizeof(struct fanotify_event_info_pidfd)
 #define FANOTIFY_ERROR_INFO_LEN \
 	(sizeof(struct fanotify_event_info_error))
+#define FANOTIFY_RANGE_INFO_LEN \
+	(sizeof(struct fanotify_event_info_range))
 
 static int fanotify_fid_info_len(int fh_len, int name_len)
 {
@@ -180,6 +182,9 @@ static size_t fanotify_event_len(unsigned int info_mode,
 	if (info_mode & FAN_REPORT_PIDFD)
 		event_len += FANOTIFY_PIDFD_INFO_LEN;
 
+	if (fanotify_event_has_access_range(event))
+		event_len += FANOTIFY_RANGE_INFO_LEN;
+
 	return event_len;
 }
 
@@ -516,6 +521,30 @@ static int copy_pidfd_info_to_user(int pidfd,
 	return info_len;
 }
 
+static size_t copy_range_info_to_user(struct fanotify_event *event,
+				      char __user *buf, int count)
+{
+	struct fanotify_perm_event *pevent = FANOTIFY_PERM(event);
+	struct fanotify_event_info_range info = { };
+	size_t info_len = FANOTIFY_RANGE_INFO_LEN;
+
+	if (WARN_ON_ONCE(info_len > count))
+		return -EFAULT;
+
+	if (WARN_ON_ONCE(!pevent->ppos))
+		return -EINVAL;
+
+	info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE;
+	info.hdr.len = info_len;
+	info.offset = *(pevent->ppos);
+	info.count = pevent->count;
+
+	if (copy_to_user(buf, &info, info_len))
+		return -EFAULT;
+
+	return info_len;
+}
+
 static int copy_info_records_to_user(struct fanotify_event *event,
 				     struct fanotify_info *info,
 				     unsigned int info_mode, int pidfd,
@@ -637,6 +666,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
 		total_bytes += ret;
 	}
 
+	if (fanotify_event_has_access_range(event)) {
+		ret = copy_range_info_to_user(event, buf, count);
+		if (ret < 0)
+			return ret;
+		buf += ret;
+		count -= ret;
+		total_bytes += ret;
+	}
+
 	return total_bytes;
 }
 
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 7596168c80eb..0636a9c85dd0 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -146,6 +146,7 @@ struct fanotify_event_metadata {
 #define FAN_EVENT_INFO_TYPE_DFID	3
 #define FAN_EVENT_INFO_TYPE_PIDFD	4
 #define FAN_EVENT_INFO_TYPE_ERROR	5
+#define FAN_EVENT_INFO_TYPE_RANGE	6
 
 /* Special info types for FAN_RENAME */
 #define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME	10
@@ -192,6 +193,13 @@ struct fanotify_event_info_error {
 	__u32 error_count;
 };
 
+struct fanotify_event_info_range {
+	struct fanotify_event_info_header hdr;
+	__u32 pad;
+	__u64 offset;
+	__u64 count;
+};
+
 /*
  * User space may need to record additional information about its decision.
  * The extra information type records what kind of information is included.
-- 
cgit v1.2.3


From b4b2ff4f61ded819bfa22e50fdec7693f51cbbee Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 15 Nov 2024 10:30:25 -0500
Subject: fanotify: allow to set errno in FAN_DENY permission response

With FAN_DENY response, user trying to perform the filesystem operation
gets an error with errno set to EPERM.

It is useful for hierarchical storage management (HSM) service to be able
to deny access for reasons more diverse than EPERM, for example EAGAIN,
if HSM could retry the operation later.

Allow fanotify groups with priority FAN_CLASSS_PRE_CONTENT to responsd
to permission events with the response value FAN_DENY_ERRNO(errno),
instead of FAN_DENY to return a custom error.

Limit custom error values to errors expected on read(2)/write(2) and
open(2) of regular files. This list could be extended in the future.
Userspace can test for legitimate values of FAN_DENY_ERRNO(errno) by
writing a response to an fanotify group fd with a value of FAN_NOFD in
the fd field of the response.

The change in fanotify_response is backward compatible, because errno is
written in the high 8 bits of the 32bit response field and old kernels
reject respose value with high bits set.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/1e5fb6af84b69ca96b5c849fa5f10bdf4d1dc414.1731684329.git.josef@toxicpanda.com
---
 fs/notify/fanotify/fanotify.c      | 17 +++++++++++++----
 fs/notify/fanotify/fanotify.h      |  5 +++++
 fs/notify/fanotify/fanotify_user.c | 29 +++++++++++++++++++++++++++--
 include/linux/fanotify.h           |  4 +++-
 include/uapi/linux/fanotify.h      |  7 +++++++
 5 files changed, 55 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index da6c3c1c7edf..95646f7c46ca 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -223,7 +223,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
 				 struct fanotify_perm_event *event,
 				 struct fsnotify_iter_info *iter_info)
 {
-	int ret;
+	int ret, errno;
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
@@ -262,14 +262,23 @@ static int fanotify_get_response(struct fsnotify_group *group,
 		ret = 0;
 		break;
 	case FAN_DENY:
+		/* Check custom errno from pre-content events */
+		errno = fanotify_get_response_errno(event->response);
+		if (errno) {
+			ret = -errno;
+			break;
+		}
+		fallthrough;
 	default:
 		ret = -EPERM;
 	}
 
 	/* Check if the response should be audited */
-	if (event->response & FAN_AUDIT)
-		audit_fanotify(event->response & ~FAN_AUDIT,
-			       &event->audit_rule);
+	if (event->response & FAN_AUDIT) {
+		u32 response = event->response &
+			(FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS);
+		audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule);
+	}
 
 	pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
 		 group, event, ret);
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 7f06355afa1f..c12cbc270539 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -528,3 +528,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
 
 	return mflags;
 }
+
+static inline u32 fanotify_get_response_errno(int res)
+{
+	return (res >> FAN_ERRNO_SHIFT) & FAN_ERRNO_MASK;
+}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6ef3cc7de5e4..19435cd2c41f 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -327,11 +327,12 @@ static int process_access_response(struct fsnotify_group *group,
 	struct fanotify_perm_event *event;
 	int fd = response_struct->fd;
 	u32 response = response_struct->response;
+	int errno = fanotify_get_response_errno(response);
 	int ret = info_len;
 	struct fanotify_response_info_audit_rule friar;
 
-	pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__,
-		 group, fd, response, info, info_len);
+	pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n",
+		 __func__, group, fd, response, errno, info, info_len);
 	/*
 	 * make sure the response is valid, if invalid we do nothing and either
 	 * userspace can send a valid response or we will clean it up after the
@@ -342,7 +343,31 @@ static int process_access_response(struct fsnotify_group *group,
 
 	switch (response & FANOTIFY_RESPONSE_ACCESS) {
 	case FAN_ALLOW:
+		if (errno)
+			return -EINVAL;
+		break;
 	case FAN_DENY:
+		/* Custom errno is supported only for pre-content groups */
+		if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT)
+			return -EINVAL;
+
+		/*
+		 * Limit errno to values expected on open(2)/read(2)/write(2)
+		 * of regular files.
+		 */
+		switch (errno) {
+		case 0:
+		case EIO:
+		case EPERM:
+		case EBUSY:
+		case ETXTBSY:
+		case EAGAIN:
+		case ENOSPC:
+		case EDQUOT:
+			break;
+		default:
+			return -EINVAL;
+		}
 		break;
 	default:
 		return -EINVAL;
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index c747af064d2c..78f660ebc318 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -132,7 +132,9 @@
 /* These masks check for invalid bits in permission responses. */
 #define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY)
 #define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO)
-#define FANOTIFY_RESPONSE_VALID_MASK (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS)
+#define FANOTIFY_RESPONSE_VALID_MASK \
+	(FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS | \
+	 (FAN_ERRNO_MASK << FAN_ERRNO_SHIFT))
 
 /* Do not use these old uapi constants internally */
 #undef FAN_ALL_CLASS_BITS
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 0636a9c85dd0..bd8167979707 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -235,6 +235,13 @@ struct fanotify_response_info_audit_rule {
 /* Legit userspace responses to a _PERM event */
 #define FAN_ALLOW	0x01
 #define FAN_DENY	0x02
+/* errno other than EPERM can specified in upper byte of deny response */
+#define FAN_ERRNO_BITS	8
+#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS)
+#define FAN_ERRNO_MASK	((1 << FAN_ERRNO_BITS) - 1)
+#define FAN_DENY_ERRNO(err) \
+	(FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT))
+
 #define FAN_AUDIT	0x10	/* Bitmask to create audit record for result */
 #define FAN_INFO	0x20	/* Bitmask to indicate additional information */
 
-- 
cgit v1.2.3


From f4425e3ab2f796d442a44f31262eade9b6427ff7 Mon Sep 17 00:00:00 2001
From: Shengjiu Wang <shengjiu.wang@nxp.com>
Date: Thu, 12 Dec 2024 15:45:04 +0800
Subject: ALSA: compress: Add output rate and output format support

Add 'pcm_format' for struct snd_codec, add 'pcm_formats' for
struct snd_codec_desc, these are used for accelerator usage.

Current accelerator example is sample rate converter (SRC).
Define struct snd_codec_desc_src for descript minmum and maxmum
sample rates. And add 'src_d' in union snd_codec_options
structure. These are mainly used for capbility query.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Acked-by: Jaroslav Kysela <perex@perex.cz>
Acked-by: Vinod Koul <vkoul@kernel.org>
Link: https://patch.msgid.link/20241212074509.3445859-2-shengjiu.wang@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/compress_params.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
index ddc77322d571..bc7648a30746 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -334,6 +334,14 @@ union snd_codec_options {
 	struct snd_dec_wma wma_d;
 	struct snd_dec_alac alac_d;
 	struct snd_dec_ape ape_d;
+	struct {
+		__u32 out_sample_rate;
+	} src_d;
+} __attribute__((packed, aligned(4)));
+
+struct snd_codec_desc_src {
+	__u32 out_sample_rate_min;
+	__u32 out_sample_rate_max;
 } __attribute__((packed, aligned(4)));
 
 /** struct snd_codec_desc - description of codec capabilities
@@ -347,6 +355,9 @@ union snd_codec_options {
  * @modes: Supported modes. See SND_AUDIOMODE defines
  * @formats: Supported formats. See SND_AUDIOSTREAMFORMAT defines
  * @min_buffer: Minimum buffer size handled by codec implementation
+ * @pcm_formats: Output (for decoders) or input (for encoders)
+ *               PCM formats (required to accel mode, 0 for other modes)
+ * @u_space: union space (for codec dependent data)
  * @reserved: reserved for future use
  *
  * This structure provides a scalar value for profiles, modes and stream
@@ -370,7 +381,12 @@ struct snd_codec_desc {
 	__u32 modes;
 	__u32 formats;
 	__u32 min_buffer;
-	__u32 reserved[15];
+	__u32 pcm_formats;
+	union {
+		__u32 u_space[6];
+		struct snd_codec_desc_src src;
+	} __attribute__((packed, aligned(4)));
+	__u32 reserved[8];
 } __attribute__((packed, aligned(4)));
 
 /** struct snd_codec
@@ -395,6 +411,8 @@ struct snd_codec_desc {
  * @align: Block alignment in bytes of an audio sample.
  *		Only required for PCM or IEC formats.
  * @options: encoder-specific settings
+ * @pcm_format: Output (for decoders) or input (for encoders)
+ *               PCM formats (required to accel mode, 0 for other modes)
  * @reserved: reserved for future use
  */
 
@@ -411,7 +429,8 @@ struct snd_codec {
 	__u32 format;
 	__u32 align;
 	union snd_codec_options options;
-	__u32 reserved[3];
+	__u32 pcm_format;
+	__u32 reserved[2];
 } __attribute__((packed, aligned(4)));
 
 #endif
-- 
cgit v1.2.3


From 70a667d70cce338ab8552dd762ae114a5ab96500 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Fri, 13 Dec 2024 12:11:22 +0200
Subject: ASoC: SOF: Add support for pause supported tokens from topology

New tokens are added to topology:
1202: SOF_TKN_STREAM_PLAYBACK_PAUSE_SUPPORTED
1203: SOF_TKN_STREAM_CAPTURE_PAUSE_SUPPORTED

The new tokens are used to advertise support for PAUSE/RESUME operation on
a PCM device depending on firmware product, use case, pipeline topology.

The snd_sof_pcm_stream.pause_supported is updated to reflect the advertised
value for the PCM device.

If the token does not exist then the pause_supported is set to false.

Note: it is up to the platform code to use this flag to decide to advertise
the PAUSE support for user space or not.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Liam Girdwood <liam.r.girdwood@intel.com>
Link: https://patch.msgid.link/20241213101123.27318-2-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/sof/tokens.h | 2 ++
 sound/soc/sof/sof-audio.h       | 1 +
 sound/soc/sof/topology.c        | 4 ++++
 3 files changed, 7 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h
index 0a246bc218d3..c28c766270de 100644
--- a/include/uapi/sound/sof/tokens.h
+++ b/include/uapi/sound/sof/tokens.h
@@ -153,6 +153,8 @@
 /* Stream */
 #define SOF_TKN_STREAM_PLAYBACK_COMPATIBLE_D0I3	1200
 #define SOF_TKN_STREAM_CAPTURE_COMPATIBLE_D0I3	1201
+#define SOF_TKN_STREAM_PLAYBACK_PAUSE_SUPPORTED	1202
+#define SOF_TKN_STREAM_CAPTURE_PAUSE_SUPPORTED	1203
 
 /* Led control for mute switches */
 #define SOF_TKN_MUTE_LED_USE			1300
diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h
index 01b819dd8498..62f3c11a9216 100644
--- a/sound/soc/sof/sof-audio.h
+++ b/sound/soc/sof/sof-audio.h
@@ -332,6 +332,7 @@ struct snd_sof_pcm_stream {
 	struct work_struct period_elapsed_work;
 	struct snd_soc_dapm_widget_list *list; /* list of connected DAPM widgets */
 	bool d0i3_compatible; /* DSP can be in D0I3 when this pcm is opened */
+	bool pause_supported; /* PCM device supports PAUSE operation */
 	unsigned int dsp_max_burst_size_in_ms; /* The maximum size of the host DMA burst in ms */
 	/*
 	 * flag to indicate that the DSP pipelines should be kept
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index b3fca5fd87d6..688cc7ac1714 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -407,6 +407,10 @@ static const struct sof_topology_token stream_tokens[] = {
 		offsetof(struct snd_sof_pcm, stream[0].d0i3_compatible)},
 	{SOF_TKN_STREAM_CAPTURE_COMPATIBLE_D0I3, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16,
 		offsetof(struct snd_sof_pcm, stream[1].d0i3_compatible)},
+	{SOF_TKN_STREAM_PLAYBACK_PAUSE_SUPPORTED, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16,
+		offsetof(struct snd_sof_pcm, stream[0].pause_supported)},
+	{SOF_TKN_STREAM_CAPTURE_PAUSE_SUPPORTED, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16,
+		offsetof(struct snd_sof_pcm, stream[1].pause_supported)},
 };
 
 /* Leds */
-- 
cgit v1.2.3


From 4d3ae294f900fb7232fb6c890dbd3176b8a5f121 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Fri, 13 Dec 2024 13:09:31 +0000
Subject: bpf: Add fd_array_cnt attribute for prog_load

The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
of file descriptors: maps or btfs. This field was introduced as a
sparse array. Introduce a new attribute, fd_array_cnt, which, if
present, indicates that the fd_array is a continuous array of the
corresponding length.

If fd_array_cnt is non-zero, then every map in the fd_array will be
bound to the program, as if it was used by the program. This
functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
maps can be used by the verifier during the program load.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20241213130934.1087929-5-aspsk@isovalent.com
---
 include/uapi/linux/bpf.h       |  10 ++++
 kernel/bpf/syscall.c           |   2 +-
 kernel/bpf/verifier.c          | 106 +++++++++++++++++++++++++++++++++++------
 tools/include/uapi/linux/bpf.h |  10 ++++
 4 files changed, 112 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4162afc6b5d0..2acf9b336371 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1573,6 +1573,16 @@ union bpf_attr {
 		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
 		 */
 		__s32		prog_token_fd;
+		/* The fd_array_cnt can be used to pass the length of the
+		 * fd_array array. In this case all the [map] file descriptors
+		 * passed in this array will be bound to the program, even if
+		 * the maps are not referenced directly. The functionality is
+		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+		 * used by the verifier during the program load. If provided,
+		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+		 * continuous.
+		 */
+		__u32		fd_array_cnt;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5684e8ce132d..4e88797fdbeb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2730,7 +2730,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
+#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
 
 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 296765ffbdc5..c0e772996c52 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -19505,22 +19505,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 	return 0;
 }
 
-/* Add map behind fd to used maps list, if it's not already there, and return
- * its index.
- * Returns <0 on error, or >= 0 index, on success.
- */
-static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
+static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
 {
-	CLASS(fd, f)(fd);
-	struct bpf_map *map;
 	int i, err;
 
-	map = __bpf_map_get(f);
-	if (IS_ERR(map)) {
-		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
-		return PTR_ERR(map);
-	}
-
 	/* check whether we recorded this map already */
 	for (i = 0; i < env->used_map_cnt; i++)
 		if (env->used_maps[i] == map)
@@ -19551,6 +19539,24 @@ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
 	return env->used_map_cnt - 1;
 }
 
+/* Add map behind fd to used maps list, if it's not already there, and return
+ * its index.
+ * Returns <0 on error, or >= 0 index, on success.
+ */
+static int add_used_map(struct bpf_verifier_env *env, int fd)
+{
+	struct bpf_map *map;
+	CLASS(fd, f)(fd);
+
+	map = __bpf_map_get(f);
+	if (IS_ERR(map)) {
+		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
+		return PTR_ERR(map);
+	}
+
+	return __add_used_map(env, map);
+}
+
 /* find and rewrite pseudo imm in ld_imm64 instructions:
  *
  * 1. if it accesses map FD, replace it with actual map pointer.
@@ -19642,7 +19648,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
 				break;
 			}
 
-			map_idx = add_used_map_from_fd(env, fd);
+			map_idx = add_used_map(env, fd);
 			if (map_idx < 0)
 				return map_idx;
 			map = env->used_maps[map_idx];
@@ -22850,6 +22856,73 @@ struct btf *bpf_get_btf_vmlinux(void)
 	return btf_vmlinux;
 }
 
+/*
+ * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
+ * this case expect that every file descriptor in the array is either a map or
+ * a BTF. Everything else is considered to be trash.
+ */
+static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
+{
+	struct bpf_map *map;
+	struct btf *btf;
+	CLASS(fd, f)(fd);
+	int err;
+
+	map = __bpf_map_get(f);
+	if (!IS_ERR(map)) {
+		err = __add_used_map(env, map);
+		if (err < 0)
+			return err;
+		return 0;
+	}
+
+	btf = __btf_get_by_fd(f);
+	if (!IS_ERR(btf)) {
+		err = __add_used_btf(env, btf);
+		if (err < 0)
+			return err;
+		return 0;
+	}
+
+	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
+	return PTR_ERR(map);
+}
+
+static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
+{
+	size_t size = sizeof(int);
+	int ret;
+	int fd;
+	u32 i;
+
+	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
+
+	/*
+	 * The only difference between old (no fd_array_cnt is given) and new
+	 * APIs is that in the latter case the fd_array is expected to be
+	 * continuous and is scanned for map fds right away
+	 */
+	if (!attr->fd_array_cnt)
+		return 0;
+
+	/* Check for integer overflow */
+	if (attr->fd_array_cnt >= (U32_MAX / size)) {
+		verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < attr->fd_array_cnt; i++) {
+		if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
+			return -EFAULT;
+
+		ret = add_fd_from_fd_array(env, fd);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
 {
 	u64 start_time = ktime_get_ns();
@@ -22881,7 +22954,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 		env->insn_aux_data[i].orig_idx = i;
 	env->prog = *prog;
 	env->ops = bpf_verifier_ops[env->prog->type];
-	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
 
 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
@@ -22904,6 +22976,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret)
 		goto err_unlock;
 
+	ret = process_fd_array(env, attr, uattr);
+	if (ret)
+		goto skip_full_check;
+
 	mark_verifier_state_clean(env);
 
 	if (IS_ERR(btf_vmlinux)) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4162afc6b5d0..2acf9b336371 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1573,6 +1573,16 @@ union bpf_attr {
 		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
 		 */
 		__s32		prog_token_fd;
+		/* The fd_array_cnt can be used to pass the length of the
+		 * fd_array array. In this case all the [map] file descriptors
+		 * passed in this array will be bound to the program, even if
+		 * the maps are not referenced directly. The functionality is
+		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+		 * used by the verifier during the program load. If provided,
+		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+		 * continuous.
+		 */
+		__u32		fd_array_cnt;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
-- 
cgit v1.2.3


From 2c2b61d2138f472e50b5531ec0cb4a1485837e21 Mon Sep 17 00:00:00 2001
From: Yuyang Huang <yuyanghuang@google.com>
Date: Wed, 11 Dec 2024 17:22:41 +0900
Subject: netlink: add IGMP/MLD join/leave notifications
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change introduces netlink notifications for multicast address
changes. The following features are included:
* Addition and deletion of multicast addresses are reported using
  RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and
  AF_INET6.
* Two new notification groups: RTNLGRP_IPV4_MCADDR and
  RTNLGRP_IPV6_MCADDR are introduced for receiving these events.

This change allows user space applications (e.g., ip monitor) to
efficiently track multicast group memberships by listening for netlink
events. Previously, applications relied on inefficient polling of
procfs, introducing delays. With netlink notifications, applications
receive realtime updates on multicast group membership changes,
enabling more precise metrics collection and system monitoring. 

This change also unlocks the potential for implementing a wide range
of sophisticated multicast related features in user space by allowing
applications to combine kernel provided multicast address information
with user space data and communicate decisions back to the kernel for
more fine grained control. This mechanism can be used for various
purposes, including multicast filtering, IGMP/MLD offload, and
IGMP/MLD snooping.

Cc: Maciej Żenczykowski <maze@google.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Co-developed-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
Signed-off-by: Patrick Ruddy <pruddy@vyatta.att-mail.com>
Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com
Signed-off-by: Yuyang Huang <yuyanghuang@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h           |  2 ++
 include/net/addrconf.h         | 21 ++++++++++++++
 include/uapi/linux/rtnetlink.h | 10 ++++++-
 net/ipv4/igmp.c                | 64 ++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/addrconf.c            | 29 ++++++-------------
 net/ipv6/mcast.c               | 39 +++++++++++++++++++++++++
 6 files changed, 144 insertions(+), 21 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 5171231f70a8..073b30a9b850 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -87,6 +87,8 @@ struct ip_mc_list {
 	char			loaded;
 	unsigned char		gsquery;	/* check source marks? */
 	unsigned char		crcount;
+	unsigned long		mca_cstamp;
+	unsigned long		mca_tstamp;
 	struct rcu_head		rcu;
 };
 
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 363dd63babe7..58337898fa21 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -88,6 +88,23 @@ struct ifa6_config {
 	u16			scope;
 };
 
+enum addr_type_t {
+	UNICAST_ADDR,
+	MULTICAST_ADDR,
+	ANYCAST_ADDR,
+};
+
+struct inet6_fill_args {
+	u32 portid;
+	u32 seq;
+	int event;
+	unsigned int flags;
+	int netnsid;
+	int ifindex;
+	enum addr_type_t type;
+	bool force_rt_scope_universe;
+};
+
 int addrconf_init(void);
 void addrconf_cleanup(void);
 
@@ -525,4 +542,8 @@ int if6_proc_init(void);
 void if6_proc_exit(void);
 #endif
 
+int inet6_fill_ifmcaddr(struct sk_buff *skb,
+			const struct ifmcaddr6 *ifmca,
+			struct inet6_fill_args *args);
+
 #endif
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index db7254d52d93..eccc0e7dcb7d 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -93,7 +93,11 @@ enum {
 	RTM_NEWPREFIX	= 52,
 #define RTM_NEWPREFIX	RTM_NEWPREFIX
 
-	RTM_GETMULTICAST = 58,
+	RTM_NEWMULTICAST = 56,
+#define RTM_NEWMULTICAST RTM_NEWMULTICAST
+	RTM_DELMULTICAST,
+#define RTM_DELMULTICAST RTM_DELMULTICAST
+	RTM_GETMULTICAST,
 #define RTM_GETMULTICAST RTM_GETMULTICAST
 
 	RTM_GETANYCAST	= 62,
@@ -774,6 +778,10 @@ enum rtnetlink_groups {
 #define RTNLGRP_TUNNEL		RTNLGRP_TUNNEL
 	RTNLGRP_STATS,
 #define RTNLGRP_STATS		RTNLGRP_STATS
+	RTNLGRP_IPV4_MCADDR,
+#define RTNLGRP_IPV4_MCADDR	RTNLGRP_IPV4_MCADDR
+	RTNLGRP_IPV6_MCADDR,
+#define RTNLGRP_IPV6_MCADDR	RTNLGRP_IPV6_MCADDR
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6a238398acc9..8a370ef37d3f 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -88,6 +88,8 @@
 #include <linux/byteorder/generic.h>
 
 #include <net/net_namespace.h>
+#include <net/netlink.h>
+#include <net/addrconf.h>
 #include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -1430,6 +1432,63 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
 	*mc_hash = im->next_hash;
 }
 
+static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev,
+			      const struct ip_mc_list *im, int event)
+{
+	struct ifa_cacheinfo ci;
+	struct ifaddrmsg *ifm;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ifm = nlmsg_data(nlh);
+	ifm->ifa_family = AF_INET;
+	ifm->ifa_prefixlen = 32;
+	ifm->ifa_flags = IFA_F_PERMANENT;
+	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
+	ifm->ifa_index = dev->ifindex;
+
+	ci.cstamp = (READ_ONCE(im->mca_cstamp) - INITIAL_JIFFIES) * 100UL / HZ;
+	ci.tstamp = ci.cstamp;
+	ci.ifa_prefered = INFINITY_LIFE_TIME;
+	ci.ifa_valid = INFINITY_LIFE_TIME;
+
+	if (nla_put_in_addr(skb, IFA_MULTICAST, im->multiaddr) < 0 ||
+	    nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci) < 0) {
+		nlmsg_cancel(skb, nlh);
+		return -EMSGSIZE;
+	}
+
+	nlmsg_end(skb, nlh);
+	return 0;
+}
+
+static void inet_ifmcaddr_notify(struct net_device *dev,
+				 const struct ip_mc_list *im, int event)
+{
+	struct net *net = dev_net(dev);
+	struct sk_buff *skb;
+	int err = -ENOMEM;
+
+	skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+			nla_total_size(sizeof(__be32)), GFP_ATOMIC);
+	if (!skb)
+		goto error;
+
+	err = inet_fill_ifmcaddr(skb, dev, im, event);
+	if (err < 0) {
+		WARN_ON_ONCE(err == -EMSGSIZE);
+		nlmsg_free(skb);
+		goto error;
+	}
+
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC);
+	return;
+error:
+	rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err);
+}
 
 /*
  *	A socket has joined a multicast group on device dev.
@@ -1473,6 +1532,8 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
 	im->interface = in_dev;
 	in_dev_hold(in_dev);
 	im->multiaddr = addr;
+	im->mca_cstamp = jiffies;
+	im->mca_tstamp = im->mca_cstamp;
 	/* initial mode is (EX, empty) */
 	im->sfmode = mode;
 	im->sfcount[mode] = 1;
@@ -1492,6 +1553,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
 	igmpv3_del_delrec(in_dev, im);
 #endif
 	igmp_group_added(im);
+	inet_ifmcaddr_notify(in_dev->dev, im, RTM_NEWMULTICAST);
 	if (!in_dev->dead)
 		ip_rt_multicast_event(in_dev);
 out:
@@ -1705,6 +1767,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
 				*ip = i->next_rcu;
 				in_dev->mc_count--;
 				__igmp_group_dropped(i, gfp);
+				inet_ifmcaddr_notify(in_dev->dev, i,
+						     RTM_DELMULTICAST);
 				ip_mc_clear_src(i);
 
 				if (!in_dev->dead)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0e765466d7f7..2e2684886953 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5127,22 +5127,6 @@ static inline int inet6_ifaddr_msgsize(void)
 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */;
 }
 
-enum addr_type_t {
-	UNICAST_ADDR,
-	MULTICAST_ADDR,
-	ANYCAST_ADDR,
-};
-
-struct inet6_fill_args {
-	u32 portid;
-	u32 seq;
-	int event;
-	unsigned int flags;
-	int netnsid;
-	int ifindex;
-	enum addr_type_t type;
-};
-
 static int inet6_fill_ifaddr(struct sk_buff *skb,
 			     const struct inet6_ifaddr *ifa,
 			     struct inet6_fill_args *args)
@@ -5221,15 +5205,16 @@ error:
 	return -EMSGSIZE;
 }
 
-static int inet6_fill_ifmcaddr(struct sk_buff *skb,
-			       const struct ifmcaddr6 *ifmca,
-			       struct inet6_fill_args *args)
+int inet6_fill_ifmcaddr(struct sk_buff *skb,
+			const struct ifmcaddr6 *ifmca,
+			struct inet6_fill_args *args)
 {
 	int ifindex = ifmca->idev->dev->ifindex;
 	u8 scope = RT_SCOPE_UNIVERSE;
 	struct nlmsghdr *nlh;
 
-	if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+	if (!args->force_rt_scope_universe &&
+	    ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
 
 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
@@ -5254,6 +5239,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb,
 	nlmsg_end(skb, nlh);
 	return 0;
 }
+EXPORT_SYMBOL(inet6_fill_ifmcaddr);
 
 static int inet6_fill_ifacaddr(struct sk_buff *skb,
 			       const struct ifacaddr6 *ifaca,
@@ -5418,6 +5404,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 		.flags = NLM_F_MULTI,
 		.netnsid = -1,
 		.type = type,
+		.force_rt_scope_universe = false,
 	};
 	struct {
 		unsigned long ifindex;
@@ -5546,6 +5533,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		.event = RTM_NEWADDR,
 		.flags = 0,
 		.netnsid = -1,
+		.force_rt_scope_universe = false,
 	};
 	struct ifaddrmsg *ifm;
 	struct nlattr *tb[IFA_MAX+1];
@@ -5617,6 +5605,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 		.event = event,
 		.flags = 0,
 		.netnsid = -1,
+		.force_rt_scope_universe = false,
 	};
 	int err = -ENOBUFS;
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ca8692d565d..587831c148de 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -33,8 +33,10 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/route.h>
+#include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -47,6 +49,7 @@
 #include <linux/netfilter_ipv6.h>
 
 #include <net/net_namespace.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 
@@ -901,6 +904,39 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
 	return mc;
 }
 
+static void inet6_ifmcaddr_notify(struct net_device *dev,
+				  const struct ifmcaddr6 *ifmca, int event)
+{
+	struct inet6_fill_args fillargs = {
+		.portid = 0,
+		.seq = 0,
+		.event = event,
+		.flags = 0,
+		.netnsid = -1,
+		.force_rt_scope_universe = true,
+	};
+	struct net *net = dev_net(dev);
+	struct sk_buff *skb;
+	int err = -ENOMEM;
+
+	skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+			nla_total_size(16), GFP_ATOMIC);
+	if (!skb)
+		goto error;
+
+	err = inet6_fill_ifmcaddr(skb, ifmca, &fillargs);
+	if (err < 0) {
+		WARN_ON_ONCE(err == -EMSGSIZE);
+		nlmsg_free(skb);
+		goto error;
+	}
+
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_ATOMIC);
+	return;
+error:
+	rtnl_set_sk_err(net, RTNLGRP_IPV6_MCADDR, err);
+}
+
 /*
  *	device multicast group inc (add if not found)
  */
@@ -948,6 +984,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
 
 	mld_del_delrec(idev, mc);
 	igmp6_group_added(mc);
+	inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST);
 	mutex_unlock(&idev->mc_lock);
 	ma_put(mc);
 	return 0;
@@ -977,6 +1014,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 				*map = ma->next;
 
 				igmp6_group_dropped(ma);
+				inet6_ifmcaddr_notify(idev->dev, ma,
+						      RTM_DELMULTICAST);
 				ip6_mc_clear_src(ma);
 				mutex_unlock(&idev->mc_lock);
 
-- 
cgit v1.2.3


From 510128b30f2db1600172e9aaec44f66db3c16e15 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 12 Dec 2024 16:36:06 +0100
Subject: tls: add counters for rekey

This introduces 5 counters to keep track of key updates:
Tls{Rx,Tx}Rekey{Ok,Error} and TlsRxRekeyReceived.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h |  5 +++++
 net/tls/tls_main.c        | 27 ++++++++++++++++++++++-----
 net/tls/tls_proc.c        |  5 +++++
 net/tls/tls_sw.c          |  6 ++++--
 4 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index adf5fd78dd50..51da2e00112d 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -358,6 +358,11 @@ enum
 	LINUX_MIB_TLSRXDEVICERESYNC,		/* TlsRxDeviceResync */
 	LINUX_MIB_TLSDECRYPTRETRY,		/* TlsDecryptRetry */
 	LINUX_MIB_TLSRXNOPADVIOL,		/* TlsRxNoPadViolation */
+	LINUX_MIB_TLSRXREKEYOK,			/* TlsRxRekeyOk */
+	LINUX_MIB_TLSRXREKEYERROR,		/* TlsRxRekeyError */
+	LINUX_MIB_TLSTXREKEYOK,			/* TlsTxRekeyOk */
+	LINUX_MIB_TLSTXREKEYERROR,		/* TlsTxRekeyError */
+	LINUX_MIB_TLSRXREKEYRECEIVED,		/* TlsRxRekeyReceived */
 	__LINUX_MIB_TLSMAX
 };
 
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 68b5735dafc1..9ee5a83c5b40 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -640,8 +640,11 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
 		/* Currently we only support setting crypto info more
 		 * than one time for TLS 1.3
 		 */
-		if (crypto_info->version != TLS_1_3_VERSION)
+		if (crypto_info->version != TLS_1_3_VERSION) {
+			TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR
+						       : LINUX_MIB_TLSRXREKEYERROR);
 			return -EBUSY;
+		}
 
 		update = true;
 		old_crypto_info = crypto_info;
@@ -696,8 +699,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
 						update ? crypto_info : NULL);
 			if (rc)
 				goto err_crypto_info;
-			TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW);
-			TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW);
+
+			if (update) {
+				TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXREKEYOK);
+			} else {
+				TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW);
+				TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW);
+			}
 			conf = TLS_SW;
 		}
 	} else {
@@ -711,8 +719,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
 						update ? crypto_info : NULL);
 			if (rc)
 				goto err_crypto_info;
-			TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW);
-			TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW);
+
+			if (update) {
+				TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYOK);
+			} else {
+				TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW);
+				TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW);
+			}
 			conf = TLS_SW;
 		}
 		if (!update)
@@ -735,6 +748,10 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
 	return 0;
 
 err_crypto_info:
+	if (update) {
+		TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR
+					       : LINUX_MIB_TLSRXREKEYERROR);
+	}
 	memzero_explicit(crypto_ctx, sizeof(*crypto_ctx));
 	return rc;
 }
diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c
index 68982728f620..367666aa07b8 100644
--- a/net/tls/tls_proc.c
+++ b/net/tls/tls_proc.c
@@ -22,6 +22,11 @@ static const struct snmp_mib tls_mib_list[] = {
 	SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC),
 	SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIB_TLSDECRYPTRETRY),
 	SNMP_MIB_ITEM("TlsRxNoPadViolation", LINUX_MIB_TLSRXNOPADVIOL),
+	SNMP_MIB_ITEM("TlsRxRekeyOk", LINUX_MIB_TLSRXREKEYOK),
+	SNMP_MIB_ITEM("TlsRxRekeyError", LINUX_MIB_TLSRXREKEYERROR),
+	SNMP_MIB_ITEM("TlsTxRekeyOk", LINUX_MIB_TLSTXREKEYOK),
+	SNMP_MIB_ITEM("TlsTxRekeyError", LINUX_MIB_TLSTXREKEYERROR),
+	SNMP_MIB_ITEM("TlsRxRekeyReceived", LINUX_MIB_TLSRXREKEYRECEIVED),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9e5aff5bab98..47550d485819 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1724,7 +1724,8 @@ tls_decrypt_device(struct sock *sk, struct msghdr *msg,
 	return 1;
 }
 
-static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb)
+static int tls_check_pending_rekey(struct sock *sk, struct tls_context *ctx,
+				   struct sk_buff *skb)
 {
 	const struct strp_msg *rxm = strp_msg(skb);
 	const struct tls_msg *tlm = tls_msg(skb);
@@ -1747,6 +1748,7 @@ static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb)
 		struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx;
 
 		WRITE_ONCE(rx_ctx->key_update_pending, true);
+		TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYRECEIVED);
 	}
 
 	return 0;
@@ -1771,7 +1773,7 @@ static int tls_rx_one_record(struct sock *sk, struct msghdr *msg,
 	rxm->full_len -= prot->overhead_size;
 	tls_advance_record_sn(sk, prot, &tls_ctx->rx);
 
-	return tls_check_pending_rekey(tls_ctx, darg->skb);
+	return tls_check_pending_rekey(sk, tls_ctx, darg->skb);
 }
 
 int decrypt_skb(struct sock *sk, struct scatterlist *sgout)
-- 
cgit v1.2.3


From 35f7cad1743e04bf2944a2aadb6b6a42adc57bca Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Thu, 12 Dec 2024 18:06:43 +0100
Subject: net: Add the possibility to support a selected hwtstamp in netdevice

Introduce the description of a hwtstamp provider, mainly defined with a
the hwtstamp source and the phydev pointer.

Add a hwtstamp provider description within the netdev structure to
allow saving the hwtstamp we want to use. This prepares for future
support of an ethtool netlink command to select the desired hwtstamp
provider. By default, the old API that does not support hwtstamp
selectability is used, meaning the hwtstamp provider pointer is unset.

Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c    | 10 ++++++++
 include/linux/net_tstamp.h      | 29 +++++++++++++++++++++++
 include/linux/netdevice.h       |  4 ++++
 include/uapi/linux/net_tstamp.h | 11 +++++++++
 net/core/dev_ioctl.c            | 41 ++++++++++++++++++++++++++++++--
 net/core/timestamping.c         | 52 +++++++++++++++++++++++++++++++++++++----
 6 files changed, 140 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b26bb33cd1d4..1a908af4175b 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -32,6 +32,7 @@
 #include <linux/phy_link_topology.h>
 #include <linux/pse-pd/pse.h>
 #include <linux/property.h>
+#include <linux/ptp_clock_kernel.h>
 #include <linux/rtnetlink.h>
 #include <linux/sfp.h>
 #include <linux/skbuff.h>
@@ -1998,6 +1999,15 @@ void phy_detach(struct phy_device *phydev)
 
 	phy_suspend(phydev);
 	if (dev) {
+		struct hwtstamp_provider *hwprov;
+
+		hwprov = rtnl_dereference(dev->hwprov);
+		/* Disable timestamp if it is the one selected */
+		if (hwprov && hwprov->phydev == phydev) {
+			rcu_assign_pointer(dev->hwprov, NULL);
+			kfree_rcu(hwprov, rcu_head);
+		}
+
 		phydev->attached_dev->phydev = NULL;
 		phydev->attached_dev = NULL;
 		phy_link_topo_del_phy(dev, phydev);
diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h
index 662074b08c94..ff0758e88ea1 100644
--- a/include/linux/net_tstamp.h
+++ b/include/linux/net_tstamp.h
@@ -19,6 +19,33 @@ enum hwtstamp_source {
 	HWTSTAMP_SOURCE_PHYLIB,
 };
 
+/**
+ * struct hwtstamp_provider_desc - hwtstamp provider description
+ *
+ * @index: index of the hwtstamp provider.
+ * @qualifier: hwtstamp provider qualifier.
+ */
+struct hwtstamp_provider_desc {
+	int index;
+	enum hwtstamp_provider_qualifier qualifier;
+};
+
+/**
+ * struct hwtstamp_provider - hwtstamp provider object
+ *
+ * @rcu_head: RCU callback used to free the struct.
+ * @source: source of the hwtstamp provider.
+ * @phydev: pointer of the phydev source in case a PTP coming from phylib
+ * @desc: hwtstamp provider description.
+ */
+
+struct hwtstamp_provider {
+	struct rcu_head rcu_head;
+	enum hwtstamp_source source;
+	struct phy_device *phydev;
+	struct hwtstamp_provider_desc desc;
+};
+
 /**
  * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config
  *
@@ -31,6 +58,7 @@ enum hwtstamp_source {
  *	copied the ioctl request back to user space
  * @source: indication whether timestamps should come from the netdev or from
  *	an attached phylib PHY
+ * @qualifier: qualifier of the hwtstamp provider
  *
  * Prefer using this structure for in-kernel processing of hardware
  * timestamping configuration, over the inextensible struct hwtstamp_config
@@ -43,6 +71,7 @@ struct kernel_hwtstamp_config {
 	struct ifreq *ifr;
 	bool copied_to_user;
 	enum hwtstamp_source source;
+	enum hwtstamp_provider_qualifier qualifier;
 };
 
 static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d917949bba03..2593019ad5b1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -82,6 +82,7 @@ struct xdp_metadata_ops;
 struct xdp_md;
 struct ethtool_netdev_state;
 struct phy_link_topology;
+struct hwtstamp_provider;
 
 typedef u32 xdp_features_t;
 
@@ -2045,6 +2046,7 @@ enum netdev_reg_state {
  *
  *	@neighbours:	List heads pointing to this device's neighbours'
  *			dev_list, one per address-family.
+ *	@hwprov: Tracks which PTP performs hardware packet time stamping.
  *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
@@ -2457,6 +2459,8 @@ struct net_device {
 
 	struct hlist_head neighbours[NEIGH_NR_TABLES];
 
+	struct hwtstamp_provider __rcu	*hwprov;
+
 	u8			priv[] ____cacheline_aligned
 				       __counted_by(priv_len);
 } ____cacheline_aligned;
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 858339d1c1c4..55b0ab51096c 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -13,6 +13,17 @@
 #include <linux/types.h>
 #include <linux/socket.h>   /* for SO_TIMESTAMPING */
 
+/*
+ * Possible type of hwtstamp provider. Mainly "precise" the default one
+ * is for IEEE 1588 quality and "approx" is for NICs DMA point.
+ */
+enum hwtstamp_provider_qualifier {
+	HWTSTAMP_PROVIDER_QUALIFIER_PRECISE,
+	HWTSTAMP_PROVIDER_QUALIFIER_APPROX,
+
+	HWTSTAMP_PROVIDER_QUALIFIER_CNT,
+};
+
 /* SO_TIMESTAMPING flags */
 enum {
 	SOF_TIMESTAMPING_TX_HARDWARE = (1<<0),
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 1f09930fca26..087a57b7e4fa 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -6,6 +6,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/net_tstamp.h>
 #include <linux/phylib_stubs.h>
+#include <linux/ptp_clock_kernel.h>
 #include <linux/wireless.h>
 #include <linux/if_bridge.h>
 #include <net/dsa_stubs.h>
@@ -269,6 +270,21 @@ static int dev_eth_ioctl(struct net_device *dev,
 int dev_get_hwtstamp_phylib(struct net_device *dev,
 			    struct kernel_hwtstamp_config *cfg)
 {
+	struct hwtstamp_provider *hwprov;
+
+	hwprov = rtnl_dereference(dev->hwprov);
+	if (hwprov) {
+		cfg->qualifier = hwprov->desc.qualifier;
+		if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB &&
+		    hwprov->phydev)
+			return phy_hwtstamp_get(hwprov->phydev, cfg);
+
+		if (hwprov->source == HWTSTAMP_SOURCE_NETDEV)
+			return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg);
+
+		return -EOPNOTSUPP;
+	}
+
 	if (phy_is_default_hwtstamp(dev->phydev))
 		return phy_hwtstamp_get(dev->phydev, cfg);
 
@@ -324,11 +340,32 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
 			    struct netlink_ext_ack *extack)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-	bool phy_ts = phy_is_default_hwtstamp(dev->phydev);
 	struct kernel_hwtstamp_config old_cfg = {};
+	struct hwtstamp_provider *hwprov;
+	struct phy_device *phydev;
 	bool changed = false;
+	bool phy_ts;
 	int err;
 
+	hwprov = rtnl_dereference(dev->hwprov);
+	if (hwprov) {
+		if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB &&
+		    hwprov->phydev) {
+			phy_ts = true;
+			phydev = hwprov->phydev;
+		} else if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) {
+			phy_ts = false;
+		} else {
+			return -EOPNOTSUPP;
+		}
+
+		cfg->qualifier = hwprov->desc.qualifier;
+	} else {
+		phy_ts = phy_is_default_hwtstamp(dev->phydev);
+		if (phy_ts)
+			phydev = dev->phydev;
+	}
+
 	cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV;
 
 	if (phy_ts && dev->see_all_hwtstamp_requests) {
@@ -350,7 +387,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
 		changed = kernel_hwtstamp_config_changed(&old_cfg, cfg);
 
 	if (phy_ts) {
-		err = phy_hwtstamp_set(dev->phydev, cfg, extack);
+		err = phy_hwtstamp_set(phydev, cfg, extack);
 		if (err) {
 			if (changed)
 				ops->ndo_hwtstamp_set(dev, &old_cfg, NULL);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 3717fb152ecc..a50a7ef49ae8 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -9,6 +9,7 @@
 #include <linux/ptp_classify.h>
 #include <linux/skbuff.h>
 #include <linux/export.h>
+#include <linux/ptp_clock_kernel.h>
 
 static unsigned int classify(const struct sk_buff *skb)
 {
@@ -21,19 +22,39 @@ static unsigned int classify(const struct sk_buff *skb)
 
 void skb_clone_tx_timestamp(struct sk_buff *skb)
 {
+	struct hwtstamp_provider *hwprov;
 	struct mii_timestamper *mii_ts;
+	struct phy_device *phydev;
 	struct sk_buff *clone;
 	unsigned int type;
 
-	if (!skb->sk || !skb->dev ||
-	    !phy_is_default_hwtstamp(skb->dev->phydev))
+	if (!skb->sk || !skb->dev)
 		return;
 
+	rcu_read_lock();
+	hwprov = rcu_dereference(skb->dev->hwprov);
+	if (hwprov) {
+		if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB ||
+		    !hwprov->phydev) {
+			rcu_read_unlock();
+			return;
+		}
+
+		phydev = hwprov->phydev;
+	} else {
+		phydev = skb->dev->phydev;
+		if (!phy_is_default_hwtstamp(phydev)) {
+			rcu_read_unlock();
+			return;
+		}
+	}
+	rcu_read_unlock();
+
 	type = classify(skb);
 	if (type == PTP_CLASS_NONE)
 		return;
 
-	mii_ts = skb->dev->phydev->mii_ts;
+	mii_ts = phydev->mii_ts;
 	if (likely(mii_ts->txtstamp)) {
 		clone = skb_clone_sk(skb);
 		if (!clone)
@@ -45,12 +66,33 @@ EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
 
 bool skb_defer_rx_timestamp(struct sk_buff *skb)
 {
+	struct hwtstamp_provider *hwprov;
 	struct mii_timestamper *mii_ts;
+	struct phy_device *phydev;
 	unsigned int type;
 
-	if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev))
+	if (!skb->dev)
 		return false;
 
+	rcu_read_lock();
+	hwprov = rcu_dereference(skb->dev->hwprov);
+	if (hwprov) {
+		if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB ||
+		    !hwprov->phydev) {
+			rcu_read_unlock();
+			return false;
+		}
+
+		phydev = hwprov->phydev;
+	} else {
+		phydev = skb->dev->phydev;
+		if (!phy_is_default_hwtstamp(phydev)) {
+			rcu_read_unlock();
+			return false;
+		}
+	}
+	rcu_read_unlock();
+
 	if (skb_headroom(skb) < ETH_HLEN)
 		return false;
 
@@ -63,7 +105,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
 	if (type == PTP_CLASS_NONE)
 		return false;
 
-	mii_ts = skb->dev->phydev->mii_ts;
+	mii_ts = phydev->mii_ts;
 	if (likely(mii_ts->rxtstamp))
 		return mii_ts->rxtstamp(mii_ts, skb, type);
 
-- 
cgit v1.2.3


From b9e3f7dc9ed95daeb83cfa45b821cacaa01aa906 Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Thu, 12 Dec 2024 18:06:44 +0100
Subject: net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by
 net topology

Either the MAC or the PHY can provide hwtstamp, so we should be able to
read the tsinfo for any hwtstamp provider.

Enhance 'get' command to retrieve tsinfo of hwtstamp providers within a
network topology.

Add support for a specific dump command to retrieve all hwtstamp
providers within the network topology, with added functionality for
filtered dump to target a single interface.

Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/netlink/specs/ethtool.yaml       |  20 ++
 Documentation/networking/ethtool-netlink.rst   |   7 +-
 include/linux/ethtool.h                        |   4 +
 include/uapi/linux/ethtool_netlink_generated.h |  10 +
 net/ethtool/common.c                           | 141 +++++++++-
 net/ethtool/common.h                           |  13 +
 net/ethtool/netlink.c                          |   6 +-
 net/ethtool/netlink.h                          |   5 +-
 net/ethtool/ts.h                               |  20 ++
 net/ethtool/tsinfo.c                           | 358 ++++++++++++++++++++++++-
 10 files changed, 563 insertions(+), 21 deletions(-)
 create mode 100644 net/ethtool/ts.h

(limited to 'include/uapi')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index c7634e957d9c..4082816e5f3d 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -836,6 +836,20 @@ attribute-sets:
       -
         name: tx-err
         type: uint
+  -
+    name: ts-hwtstamp-provider
+    attr-cnt-name: __ethtool-a-ts-hwtstamp-provider-cnt
+    attributes:
+      -
+        name: unspec
+        type: unused
+        value: 0
+      -
+        name: index
+        type: u32
+      -
+        name: qualifier
+        type: u32
   -
     name: tsinfo
     attr-cnt-name: __ethtool-a-tsinfo-cnt
@@ -867,6 +881,10 @@ attribute-sets:
         name: stats
         type: nest
         nested-attributes: ts-stat
+      -
+        name: hwtstamp-provider
+        type: nest
+        nested-attributes: ts-hwtstamp-provider
   -
     name: cable-result
     attr-cnt-name: __ethtool-a-cable-result-cnt
@@ -1912,6 +1930,7 @@ operations:
         request:
           attributes:
             - header
+            - hwtstamp-provider
         reply:
           attributes:
             - header
@@ -1920,6 +1939,7 @@ operations:
             - rx-filters
             - phc-index
             - stats
+            - hwtstamp-provider
       dump: *tsinfo-get-op
     -
       name: cable-test-act
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index b25926071ece..c585e2f0ddfa 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1245,9 +1245,10 @@ Gets timestamping information like ``ETHTOOL_GET_TS_INFO`` ioctl request.
 
 Request contents:
 
-  =====================================  ======  ==========================
-  ``ETHTOOL_A_TSINFO_HEADER``            nested  request header
-  =====================================  ======  ==========================
+  ========================================  ======  ============================
+  ``ETHTOOL_A_TSINFO_HEADER``               nested  request header
+  ``ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER``    nested  PTP hw clock provider
+  ========================================  ======  ============================
 
 Kernel response contents:
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e217c6321ed0..f711bfd75c4d 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -711,6 +711,7 @@ struct ethtool_rxfh_param {
  * @cmd: command number = %ETHTOOL_GET_TS_INFO
  * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
  * @phc_index: device index of the associated PHC, or -1 if there is none
+ * @phc_qualifier: qualifier of the associated PHC
  * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
  * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
  */
@@ -718,6 +719,7 @@ struct kernel_ethtool_ts_info {
 	u32 cmd;
 	u32 so_timestamping;
 	int phc_index;
+	enum hwtstamp_provider_qualifier phc_qualifier;
 	enum hwtstamp_tx_types tx_types;
 	enum hwtstamp_rx_filters rx_filters;
 };
@@ -749,6 +751,7 @@ struct kernel_ethtool_ts_info {
  *	@rss_context argument to @create_rxfh_context and friends.
  * @supported_coalesce_params: supported types of interrupt coalescing.
  * @supported_ring_params: supported ring params.
+ * @supported_hwtstamp_qualifiers: bitfield of supported hwtstamp qualifier.
  * @get_drvinfo: Report driver/device information. Modern drivers no
  *	longer have to implement this callback. Most fields are
  *	correctly filled in by the core using system information, or
@@ -966,6 +969,7 @@ struct ethtool_ops {
 	u32	rxfh_max_num_contexts;
 	u32	supported_coalesce_params;
 	u32	supported_ring_params;
+	u32	supported_hwtstamp_qualifiers;
 	void	(*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
 	int	(*get_regs_len)(struct net_device *);
 	void	(*get_regs)(struct net_device *, struct ethtool_regs *, void *);
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index b58f352fe4f2..df289dde0f61 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -385,6 +385,15 @@ enum {
 	ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1)
 };
 
+enum {
+	ETHTOOL_A_TS_HWTSTAMP_PROVIDER_UNSPEC,
+	ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX,
+	ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER,
+
+	__ETHTOOL_A_TS_HWTSTAMP_PROVIDER_CNT,
+	ETHTOOL_A_TS_HWTSTAMP_PROVIDER_MAX = (__ETHTOOL_A_TS_HWTSTAMP_PROVIDER_CNT - 1)
+};
+
 enum {
 	ETHTOOL_A_TSINFO_UNSPEC,
 	ETHTOOL_A_TSINFO_HEADER,
@@ -393,6 +402,7 @@ enum {
 	ETHTOOL_A_TSINFO_RX_FILTERS,
 	ETHTOOL_A_TSINFO_PHC_INDEX,
 	ETHTOOL_A_TSINFO_STATS,
+	ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER,
 
 	__ETHTOOL_A_TSINFO_CNT,
 	ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1)
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 05ce4f8080b3..666db40bcfda 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -5,9 +5,12 @@
 #include <linux/phy.h>
 #include <linux/rtnetlink.h>
 #include <linux/ptp_clock_kernel.h>
+#include <linux/phy_link_topology.h>
 
 #include "netlink.h"
 #include "common.h"
+#include "../core/dev.h"
+
 
 const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
 	[NETIF_F_SG_BIT] =               "tx-scatter-gather",
@@ -763,20 +766,91 @@ int ethtool_check_ops(const struct ethtool_ops *ops)
 	return 0;
 }
 
-int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info)
+static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info)
 {
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-	struct phy_device *phydev = dev->phydev;
-	int err = 0;
-
 	memset(info, 0, sizeof(*info));
 	info->cmd = ETHTOOL_GET_TS_INFO;
 	info->phc_index = -1;
+}
+
+int ethtool_net_get_ts_info_by_phc(struct net_device *dev,
+				   struct kernel_ethtool_ts_info *info,
+				   struct hwtstamp_provider_desc *hwprov_desc)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int err;
+
+	if (!ops->get_ts_info)
+		return -ENODEV;
+
+	/* Does ptp comes from netdev */
+	ethtool_init_tsinfo(info);
+	info->phc_qualifier = hwprov_desc->qualifier;
+	err = ops->get_ts_info(dev, info);
+	if (err)
+		return err;
+
+	if (info->phc_index == hwprov_desc->index &&
+	    net_support_hwtstamp_qualifier(dev, hwprov_desc->qualifier))
+		return 0;
+
+	return -ENODEV;
+}
+
+int
+ethtool_phy_get_ts_info_by_phc(struct net_device *dev,
+			       struct kernel_ethtool_ts_info *info,
+			       struct hwtstamp_provider_desc *hwprov_desc)
+{
+	int err;
+
+	/* Only precise qualifier is supported in phydev */
+	if (hwprov_desc->qualifier != HWTSTAMP_PROVIDER_QUALIFIER_PRECISE)
+		return -ENODEV;
+
+	/* Look in the phy topology */
+	if (dev->link_topo) {
+		struct phy_device_node *pdn;
+		unsigned long phy_index;
+
+		xa_for_each(&dev->link_topo->phys, phy_index, pdn) {
+			if (!phy_has_tsinfo(pdn->phy))
+				continue;
+
+			ethtool_init_tsinfo(info);
+			err = phy_ts_info(pdn->phy, info);
+			if (err)
+				return err;
+
+			if (info->phc_index == hwprov_desc->index)
+				return 0;
+		}
+		return -ENODEV;
+	}
+
+	/* Look on the dev->phydev */
+	if (phy_has_tsinfo(dev->phydev)) {
+		ethtool_init_tsinfo(info);
+		err = phy_ts_info(dev->phydev, info);
+		if (err)
+			return err;
+
+		if (info->phc_index == hwprov_desc->index)
+			return 0;
+	}
+
+	return -ENODEV;
+}
+
+int ethtool_get_ts_info_by_phc(struct net_device *dev,
+			       struct kernel_ethtool_ts_info *info,
+			       struct hwtstamp_provider_desc *hwprov_desc)
+{
+	int err;
 
-	if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev))
-		err = phy_ts_info(phydev, info);
-	else if (ops->get_ts_info)
-		err = ops->get_ts_info(dev, info);
+	err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc);
+	if (err == -ENODEV)
+		err = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc);
 
 	info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE |
 				 SOF_TIMESTAMPING_SOFTWARE;
@@ -784,6 +858,55 @@ int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info
 	return err;
 }
 
+int __ethtool_get_ts_info(struct net_device *dev,
+			  struct kernel_ethtool_ts_info *info)
+{
+	struct hwtstamp_provider *hwprov;
+
+	hwprov = rtnl_dereference(dev->hwprov);
+	/* No provider specified, use default behavior */
+	if (!hwprov) {
+		const struct ethtool_ops *ops = dev->ethtool_ops;
+		struct phy_device *phydev = dev->phydev;
+		int err = 0;
+
+		ethtool_init_tsinfo(info);
+		if (phy_is_default_hwtstamp(phydev) &&
+		    phy_has_tsinfo(phydev))
+			err = phy_ts_info(phydev, info);
+		else if (ops->get_ts_info)
+			err = ops->get_ts_info(dev, info);
+
+		info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE |
+					 SOF_TIMESTAMPING_SOFTWARE;
+
+		return err;
+	}
+
+	return ethtool_get_ts_info_by_phc(dev, info, &hwprov->desc);
+}
+
+bool net_support_hwtstamp_qualifier(struct net_device *dev,
+				    enum hwtstamp_provider_qualifier qualifier)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (!ops)
+		return false;
+
+	/* Return true with precise qualifier and with NIC without
+	 * qualifier description to not break the old behavior.
+	 */
+	if (!ops->supported_hwtstamp_qualifiers &&
+	    qualifier == HWTSTAMP_PROVIDER_QUALIFIER_PRECISE)
+		return true;
+
+	if (ops->supported_hwtstamp_qualifiers & BIT(qualifier))
+		return true;
+
+	return false;
+}
+
 int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
 {
 	struct kernel_ethtool_ts_info info = { };
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index 4a2de3ce7354..f5119204c8ff 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -21,6 +21,7 @@ struct link_mode_info {
 };
 
 struct genl_info;
+struct hwtstamp_provider_desc;
 
 extern const char
 netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN];
@@ -49,6 +50,18 @@ int ethtool_check_max_channel(struct net_device *dev,
 			      struct genl_info *info);
 int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context);
 int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info);
+int ethtool_get_ts_info_by_phc(struct net_device *dev,
+			       struct kernel_ethtool_ts_info *info,
+			       struct hwtstamp_provider_desc *hwprov_desc);
+int ethtool_net_get_ts_info_by_phc(struct net_device *dev,
+				   struct kernel_ethtool_ts_info *info,
+				   struct hwtstamp_provider_desc *hwprov_desc);
+int
+ethtool_phy_get_ts_info_by_phc(struct net_device *dev,
+			       struct kernel_ethtool_ts_info *info,
+			       struct hwtstamp_provider_desc *hwprov_desc);
+bool net_support_hwtstamp_qualifier(struct net_device *dev,
+				    enum hwtstamp_provider_qualifier qualifier);
 
 extern const struct ethtool_phy_ops *ethtool_phy_ops;
 extern const struct ethtool_pse_ops *ethtool_pse_ops;
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index e3f0ef6b851b..6ae1d91f36e7 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -1074,9 +1074,9 @@ static const struct genl_ops ethtool_genl_ops[] = {
 	{
 		.cmd	= ETHTOOL_MSG_TSINFO_GET,
 		.doit	= ethnl_default_doit,
-		.start	= ethnl_default_start,
-		.dumpit	= ethnl_default_dumpit,
-		.done	= ethnl_default_done,
+		.start	= ethnl_tsinfo_start,
+		.dumpit	= ethnl_tsinfo_dumpit,
+		.done	= ethnl_tsinfo_done,
 		.policy = ethnl_tsinfo_get_policy,
 		.maxattr = ARRAY_SIZE(ethnl_tsinfo_get_policy) - 1,
 	},
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 203b08eb6c6f..960cda13e4fc 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -464,7 +464,7 @@ extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_STATS_SRC
 extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1];
 extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1];
 extern const struct nla_policy ethnl_eee_set_policy[ETHTOOL_A_EEE_TX_LPI_TIMER + 1];
-extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER + 1];
+extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1];
 extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1];
 extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
 extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
@@ -499,6 +499,9 @@ int ethnl_phy_start(struct netlink_callback *cb);
 int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info);
 int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
 int ethnl_phy_done(struct netlink_callback *cb);
+int ethnl_tsinfo_start(struct netlink_callback *cb);
+int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_tsinfo_done(struct netlink_callback *cb);
 
 extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/ts.h b/net/ethtool/ts.h
new file mode 100644
index 000000000000..d901a879a671
--- /dev/null
+++ b/net/ethtool/ts.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _NET_ETHTOOL_TS_H
+#define _NET_ETHTOOL_TS_H
+
+#include "netlink.h"
+
+static const struct nla_policy
+ethnl_ts_hwtst_prov_policy[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_MAX + 1] = {
+	[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX] = { .type = NLA_U32 },
+	[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER] =
+		NLA_POLICY_MAX(NLA_U32, HWTSTAMP_PROVIDER_QUALIFIER_CNT - 1)
+};
+
+int ts_parse_hwtst_provider(const struct nlattr *nest,
+			    struct hwtstamp_provider_desc *hwprov_desc,
+			    struct netlink_ext_ack *extack,
+			    bool *mod);
+
+#endif /* _NET_ETHTOOL_TS_H */
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 03d12d6f79ca..7e495a41aeec 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -1,13 +1,18 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/net_tstamp.h>
+#include <linux/phy.h>
+#include <linux/phy_link_topology.h>
+#include <linux/ptp_clock_kernel.h>
 
 #include "netlink.h"
 #include "common.h"
 #include "bitset.h"
+#include "ts.h"
 
 struct tsinfo_req_info {
 	struct ethnl_req_info		base;
+	struct hwtstamp_provider_desc	hwprov_desc;
 };
 
 struct tsinfo_reply_data {
@@ -16,34 +21,96 @@ struct tsinfo_reply_data {
 	struct ethtool_ts_stats		stats;
 };
 
+#define TSINFO_REQINFO(__req_base) \
+	container_of(__req_base, struct tsinfo_req_info, base)
+
 #define TSINFO_REPDATA(__reply_base) \
 	container_of(__reply_base, struct tsinfo_reply_data, base)
 
 #define ETHTOOL_TS_STAT_CNT \
 	(__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1))
 
-const struct nla_policy ethnl_tsinfo_get_policy[] = {
+const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1] = {
 	[ETHTOOL_A_TSINFO_HEADER]		=
 		NLA_POLICY_NESTED(ethnl_header_policy_stats),
+	[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER] =
+		NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy),
 };
 
+int ts_parse_hwtst_provider(const struct nlattr *nest,
+			    struct hwtstamp_provider_desc *hwprov_desc,
+			    struct netlink_ext_ack *extack,
+			    bool *mod)
+{
+	struct nlattr *tb[ARRAY_SIZE(ethnl_ts_hwtst_prov_policy)];
+	int ret;
+
+	ret = nla_parse_nested(tb,
+			       ARRAY_SIZE(ethnl_ts_hwtst_prov_policy) - 1,
+			       nest,
+			       ethnl_ts_hwtst_prov_policy, extack);
+	if (ret < 0)
+		return ret;
+
+	if (NL_REQ_ATTR_CHECK(extack, nest, tb,
+			      ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX) ||
+	    NL_REQ_ATTR_CHECK(extack, nest, tb,
+			      ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER))
+		return -EINVAL;
+
+	ethnl_update_u32(&hwprov_desc->index,
+			 tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX],
+			 mod);
+	ethnl_update_u32(&hwprov_desc->qualifier,
+			 tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER],
+			 mod);
+
+	return 0;
+}
+
+static int
+tsinfo_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb,
+		     struct netlink_ext_ack *extack)
+{
+	struct tsinfo_req_info *req = TSINFO_REQINFO(req_base);
+	bool mod = false;
+
+	req->hwprov_desc.index = -1;
+
+	if (!tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER])
+		return 0;
+
+	return ts_parse_hwtst_provider(tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER],
+				       &req->hwprov_desc, extack, &mod);
+}
+
 static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
 			       struct ethnl_reply_data *reply_base,
 			       const struct genl_info *info)
 {
 	struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base);
+	struct tsinfo_req_info *req = TSINFO_REQINFO(req_base);
 	struct net_device *dev = reply_base->dev;
 	int ret;
 
 	ret = ethnl_ops_begin(dev);
 	if (ret < 0)
 		return ret;
+
+	if (req->hwprov_desc.index != -1) {
+		ret = ethtool_get_ts_info_by_phc(dev, &data->ts_info,
+						 &req->hwprov_desc);
+		ethnl_ops_complete(dev);
+		return ret;
+	}
+
 	if (req_base->flags & ETHTOOL_FLAG_STATS) {
 		ethtool_stats_init((u64 *)&data->stats,
 				   sizeof(data->stats) / sizeof(u64));
 		if (dev->ethtool_ops->get_ts_stats)
 			dev->ethtool_ops->get_ts_stats(dev, &data->stats);
 	}
+
 	ret = __ethtool_get_ts_info(dev, &data->ts_info);
 	ethnl_ops_complete(dev);
 
@@ -87,8 +154,11 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base,
 			return ret;
 		len += ret;	/* _TSINFO_RX_FILTERS */
 	}
-	if (ts_info->phc_index >= 0)
+	if (ts_info->phc_index >= 0) {
 		len += nla_total_size(sizeof(u32));	/* _TSINFO_PHC_INDEX */
+		/* _TSINFO_HWTSTAMP_PROVIDER */
+		len += nla_total_size(0) + 2 * nla_total_size(sizeof(u32));
+	}
 	if (req_base->flags & ETHTOOL_FLAG_STATS)
 		len += nla_total_size(0) + /* _TSINFO_STATS */
 		       nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT;
@@ -163,9 +233,29 @@ static int tsinfo_fill_reply(struct sk_buff *skb,
 		if (ret < 0)
 			return ret;
 	}
-	if (ts_info->phc_index >= 0 &&
-	    nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index))
-		return -EMSGSIZE;
+	if (ts_info->phc_index >= 0) {
+		struct nlattr *nest;
+
+		ret = nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX,
+				  ts_info->phc_index);
+		if (ret)
+			return -EMSGSIZE;
+
+		nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER);
+		if (!nest)
+			return -EMSGSIZE;
+
+		if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX,
+				ts_info->phc_index) ||
+		    nla_put_u32(skb,
+				ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER,
+				ts_info->phc_qualifier)) {
+			nla_nest_cancel(skb, nest);
+			return -EMSGSIZE;
+		}
+
+		nla_nest_end(skb, nest);
+	}
 	if (req_base->flags & ETHTOOL_FLAG_STATS &&
 	    tsinfo_put_stats(skb, &data->stats))
 		return -EMSGSIZE;
@@ -173,6 +263,263 @@ static int tsinfo_fill_reply(struct sk_buff *skb,
 	return 0;
 }
 
+struct ethnl_tsinfo_dump_ctx {
+	struct tsinfo_req_info		*req_info;
+	struct tsinfo_reply_data	*reply_data;
+	unsigned long			pos_ifindex;
+	bool				netdev_dump_done;
+	unsigned long			pos_phyindex;
+	enum hwtstamp_provider_qualifier pos_phcqualifier;
+};
+
+static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb,
+				       struct net_device *dev,
+				       struct tsinfo_reply_data *reply_data,
+				       struct netlink_callback *cb)
+{
+	struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx;
+	void *ehdr = NULL;
+
+	ehdr = ethnl_dump_put(skb, cb,
+			      ETHTOOL_MSG_TSINFO_GET_REPLY);
+	if (!ehdr)
+		return ERR_PTR(-EMSGSIZE);
+
+	reply_data = ctx->reply_data;
+	memset(reply_data, 0, sizeof(*reply_data));
+	reply_data->base.dev = dev;
+	memset(&reply_data->ts_info, 0, sizeof(reply_data->ts_info));
+
+	return ehdr;
+}
+
+static int ethnl_tsinfo_end_dump(struct sk_buff *skb,
+				 struct net_device *dev,
+				 struct tsinfo_req_info *req_info,
+				 struct tsinfo_reply_data *reply_data,
+				 void *ehdr)
+{
+	int ret;
+
+	reply_data->ts_info.so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE |
+					       SOF_TIMESTAMPING_SOFTWARE;
+
+	ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TSINFO_HEADER);
+	if (ret < 0)
+		return ret;
+
+	ret = tsinfo_fill_reply(skb, &req_info->base, &reply_data->base);
+	if (ret < 0)
+		return ret;
+
+	reply_data->base.dev = NULL;
+	genlmsg_end(skb, ehdr);
+
+	return ret;
+}
+
+static int ethnl_tsinfo_dump_one_phydev(struct sk_buff *skb,
+					struct net_device *dev,
+					struct phy_device *phydev,
+					struct netlink_callback *cb)
+{
+	struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx;
+	struct tsinfo_reply_data *reply_data;
+	struct tsinfo_req_info *req_info;
+	void *ehdr = NULL;
+	int ret = 0;
+
+	if (!phy_has_tsinfo(phydev))
+		return -EOPNOTSUPP;
+
+	reply_data = ctx->reply_data;
+	req_info = ctx->req_info;
+	ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb);
+	if (IS_ERR(ehdr))
+		return PTR_ERR(ehdr);
+
+	ret = phy_ts_info(phydev, &reply_data->ts_info);
+	if (ret < 0)
+		goto err;
+
+	ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, ehdr);
+	if (ret < 0)
+		goto err;
+
+	return ret;
+err:
+	genlmsg_cancel(skb, ehdr);
+	return ret;
+}
+
+static int ethnl_tsinfo_dump_one_netdev(struct sk_buff *skb,
+					struct net_device *dev,
+					struct netlink_callback *cb)
+{
+	struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct tsinfo_reply_data *reply_data;
+	struct tsinfo_req_info *req_info;
+	void *ehdr = NULL;
+	int ret = 0;
+
+	if (!ops->get_ts_info)
+		return -EOPNOTSUPP;
+
+	reply_data = ctx->reply_data;
+	req_info = ctx->req_info;
+	for (; ctx->pos_phcqualifier < HWTSTAMP_PROVIDER_QUALIFIER_CNT;
+	     ctx->pos_phcqualifier++) {
+		if (!net_support_hwtstamp_qualifier(dev,
+						    ctx->pos_phcqualifier))
+			continue;
+
+		ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb);
+		if (IS_ERR(ehdr)) {
+			ret = PTR_ERR(ehdr);
+			goto err;
+		}
+
+		reply_data->ts_info.phc_qualifier = ctx->pos_phcqualifier;
+		ret = ops->get_ts_info(dev, &reply_data->ts_info);
+		if (ret < 0)
+			goto err;
+
+		ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data,
+					    ehdr);
+		if (ret < 0)
+			goto err;
+	}
+
+	return ret;
+
+err:
+	genlmsg_cancel(skb, ehdr);
+	return ret;
+}
+
+static int ethnl_tsinfo_dump_one_net_topo(struct sk_buff *skb,
+					  struct net_device *dev,
+					  struct netlink_callback *cb)
+{
+	struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx;
+	struct phy_device_node *pdn;
+	int ret = 0;
+
+	if (!ctx->netdev_dump_done) {
+		ret = ethnl_tsinfo_dump_one_netdev(skb, dev, cb);
+		if (ret < 0 && ret != -EOPNOTSUPP)
+			return ret;
+		ctx->netdev_dump_done = true;
+	}
+
+	if (!dev->link_topo) {
+		if (phy_has_tsinfo(dev->phydev)) {
+			ret = ethnl_tsinfo_dump_one_phydev(skb, dev,
+							   dev->phydev, cb);
+			if (ret < 0 && ret != -EOPNOTSUPP)
+				return ret;
+		}
+
+		return 0;
+	}
+
+	xa_for_each_start(&dev->link_topo->phys, ctx->pos_phyindex, pdn,
+			  ctx->pos_phyindex) {
+		if (phy_has_tsinfo(pdn->phy)) {
+			ret = ethnl_tsinfo_dump_one_phydev(skb, dev,
+							   pdn->phy, cb);
+			if (ret < 0 && ret != -EOPNOTSUPP)
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
+int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx;
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	int ret = 0;
+
+	rtnl_lock();
+	if (ctx->req_info->base.dev) {
+		ret = ethnl_tsinfo_dump_one_net_topo(skb,
+						     ctx->req_info->base.dev,
+						     cb);
+	} else {
+		for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
+			ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb);
+			if (ret < 0 && ret != -EOPNOTSUPP)
+				break;
+			ctx->pos_phyindex = 0;
+			ctx->netdev_dump_done = false;
+			ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE;
+		}
+	}
+	rtnl_unlock();
+
+	return ret;
+}
+
+int ethnl_tsinfo_start(struct netlink_callback *cb)
+{
+	const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+	struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx;
+	struct nlattr **tb = info->info.attrs;
+	struct tsinfo_reply_data *reply_data;
+	struct tsinfo_req_info *req_info;
+	int ret;
+
+	BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+	req_info = kzalloc(sizeof(*req_info), GFP_KERNEL);
+	if (!req_info)
+		return -ENOMEM;
+	reply_data = kzalloc(sizeof(*reply_data), GFP_KERNEL);
+	if (!reply_data) {
+		ret = -ENOMEM;
+		goto free_req_info;
+	}
+
+	ret = ethnl_parse_header_dev_get(&req_info->base,
+					 tb[ETHTOOL_A_TSINFO_HEADER],
+					 sock_net(cb->skb->sk), cb->extack,
+					 false);
+	if (ret < 0)
+		goto free_reply_data;
+
+	ctx->req_info = req_info;
+	ctx->reply_data = reply_data;
+	ctx->pos_ifindex = 0;
+	ctx->pos_phyindex = 0;
+	ctx->netdev_dump_done = false;
+	ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE;
+
+	return 0;
+
+free_reply_data:
+	kfree(reply_data);
+free_req_info:
+	kfree(req_info);
+
+	return ret;
+}
+
+int ethnl_tsinfo_done(struct netlink_callback *cb)
+{
+	struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx;
+	struct tsinfo_req_info *req_info = ctx->req_info;
+
+	ethnl_parse_header_dev_put(&req_info->base);
+	kfree(ctx->reply_data);
+	kfree(ctx->req_info);
+
+	return 0;
+}
+
 const struct ethnl_request_ops ethnl_tsinfo_request_ops = {
 	.request_cmd		= ETHTOOL_MSG_TSINFO_GET,
 	.reply_cmd		= ETHTOOL_MSG_TSINFO_GET_REPLY,
@@ -180,6 +527,7 @@ const struct ethnl_request_ops ethnl_tsinfo_request_ops = {
 	.req_info_size		= sizeof(struct tsinfo_req_info),
 	.reply_data_size	= sizeof(struct tsinfo_reply_data),
 
+	.parse_request		= tsinfo_parse_request,
 	.prepare_data		= tsinfo_prepare_data,
 	.reply_size		= tsinfo_reply_size,
 	.fill_reply		= tsinfo_fill_reply,
-- 
cgit v1.2.3


From 6e9e2eed4f39d52edf5fd006409d211facf49f6b Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Thu, 12 Dec 2024 18:06:45 +0100
Subject: net: ethtool: Add support for tsconfig command to get/set hwtstamp
 config

Introduce support for ETHTOOL_MSG_TSCONFIG_GET/SET ethtool netlink socket
to read and configure hwtstamp configuration of a PHC provider. Note that
simultaneous hwtstamp isn't supported; configuring a new one disables the
previous setting.

Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/netlink/specs/ethtool.yaml       |  56 ++++
 Documentation/networking/ethtool-netlink.rst   |  75 +++++
 Documentation/networking/timestamping.rst      |  38 ++-
 include/uapi/linux/ethtool_netlink_generated.h |  16 +
 net/ethtool/Makefile                           |   2 +-
 net/ethtool/common.c                           |  27 +-
 net/ethtool/common.h                           |   2 +-
 net/ethtool/netlink.c                          |  18 +
 net/ethtool/netlink.h                          |   3 +
 net/ethtool/tsconfig.c                         | 444 +++++++++++++++++++++++++
 10 files changed, 655 insertions(+), 26 deletions(-)
 create mode 100644 net/ethtool/tsconfig.c

(limited to 'include/uapi')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 4082816e5f3d..60f85fbf4156 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -1489,6 +1489,33 @@ attribute-sets:
       -
         name: downstream-sfp-name
         type: string
+  -
+    name: tsconfig
+    attr-cnt-name: __ethtool-a-tsconfig-cnt
+    attributes:
+      -
+        name: unspec
+        type: unused
+        value: 0
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: hwtstamp-provider
+        type: nest
+        nested-attributes: ts-hwtstamp-provider
+      -
+        name: tx-types
+        type: nest
+        nested-attributes: bitset
+      -
+        name: rx-filters
+        type: nest
+        nested-attributes: bitset
+      -
+        name: hwtstamp-flags
+        type: u32
 
 operations:
   enum-model: directional
@@ -2314,3 +2341,32 @@ operations:
       name: phy-ntf
       doc: Notification for change in PHY devices.
       notify: phy-get
+    -
+      name: tsconfig-get
+      doc: Get hwtstamp config.
+
+      attribute-set: tsconfig
+
+      do: &tsconfig-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &tsconfig
+            - header
+            - hwtstamp-provider
+            - tx-types
+            - rx-filters
+            - hwtstamp-flags
+      dump: *tsconfig-get-op
+    -
+      name: tsconfig-set
+      doc: Set hwtstamp config.
+
+      attribute-set: tsconfig
+
+      do:
+        request:
+          attributes: *tsconfig
+        reply:
+          attributes: *tsconfig
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index c585e2f0ddfa..a7ba6368a4d5 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -237,6 +237,8 @@ Userspace to kernel:
   ``ETHTOOL_MSG_MM_SET``                set MAC merge layer parameters
   ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT``   flash transceiver module firmware
   ``ETHTOOL_MSG_PHY_GET``               get Ethernet PHY information
+  ``ETHTOOL_MSG_TSCONFIG_GET``          get hw timestamping configuration
+  ``ETHTOOL_MSG_TSCONFIG_SET``          set hw timestamping configuration
   ===================================== =================================
 
 Kernel to userspace:
@@ -286,6 +288,8 @@ Kernel to userspace:
   ``ETHTOOL_MSG_MODULE_FW_FLASH_NTF``      transceiver module flash updates
   ``ETHTOOL_MSG_PHY_GET_REPLY``            Ethernet PHY information
   ``ETHTOOL_MSG_PHY_NTF``                  Ethernet PHY information change
+  ``ETHTOOL_MSG_TSCONFIG_GET_REPLY``       hw timestamping configuration
+  ``ETHTOOL_MSG_TSCONFIG_SET_REPLY``       new hw timestamping configuration
   ======================================== =================================
 
 ``GET`` requests are sent by userspace applications to retrieve device
@@ -2244,6 +2248,75 @@ Kernel response contents:
 When ``ETHTOOL_A_PHY_UPSTREAM_TYPE`` is PHY_UPSTREAM_PHY, the PHY's parent is
 another PHY.
 
+TSCONFIG_GET
+============
+
+Retrieves the information about the current hardware timestamping source and
+configuration.
+
+It is similar to the deprecated ``SIOCGHWTSTAMP`` ioctl request.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_TSCONFIG_HEADER``         nested  request header
+  ====================================  ======  ==========================
+
+Kernel response contents:
+
+  ======================================== ======  ============================
+  ``ETHTOOL_A_TSCONFIG_HEADER``            nested  request header
+  ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested  PTP hw clock provider
+  ``ETHTOOL_A_TSCONFIG_TX_TYPES``          bitset  hwtstamp Tx type
+  ``ETHTOOL_A_TSCONFIG_RX_FILTERS``        bitset  hwtstamp Rx filter
+  ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS``	   u32     hwtstamp flags
+  ======================================== ======  ============================
+
+When set the ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` attribute identifies the
+source of the hw timestamping provider. It is composed by
+``ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX`` attribute which describe the index of
+the PTP device and ``ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER`` which describe
+the qualifier of the timestamp.
+
+When set the ``ETHTOOL_A_TSCONFIG_TX_TYPES``, ``ETHTOOL_A_TSCONFIG_RX_FILTERS``
+and the ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` attributes identify the Tx
+type, the Rx filter and the flags configured for the current hw timestamping
+provider. The attributes are propagated to the driver through the following
+structure:
+
+.. kernel-doc:: include/linux/net_tstamp.h
+    :identifiers: kernel_hwtstamp_config
+
+TSCONFIG_SET
+============
+
+Set the information about the current hardware timestamping source and
+configuration.
+
+It is similar to the deprecated ``SIOCSHWTSTAMP`` ioctl request.
+
+Request contents:
+
+  ======================================== ======  ============================
+  ``ETHTOOL_A_TSCONFIG_HEADER``            nested  request header
+  ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested  PTP hw clock provider
+  ``ETHTOOL_A_TSCONFIG_TX_TYPES``          bitset  hwtstamp Tx type
+  ``ETHTOOL_A_TSCONFIG_RX_FILTERS``        bitset  hwtstamp Rx filter
+  ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS``	   u32     hwtstamp flags
+  ======================================== ======  ============================
+
+Kernel response contents:
+
+  ======================================== ======  ============================
+  ``ETHTOOL_A_TSCONFIG_HEADER``            nested  request header
+  ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested  PTP hw clock provider
+  ``ETHTOOL_A_TSCONFIG_TX_TYPES``          bitset  hwtstamp Tx type
+  ``ETHTOOL_A_TSCONFIG_RX_FILTERS``        bitset  hwtstamp Rx filter
+  ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS``	   u32     hwtstamp flags
+  ======================================== ======  ============================
+
+For a description of each attribute, see ``TSCONFIG_GET``.
+
 Request translation
 ===================
 
@@ -2352,4 +2425,6 @@ are netlink only.
   n/a                                 ``ETHTOOL_MSG_MM_SET``
   n/a                                 ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT``
   n/a                                 ``ETHTOOL_MSG_PHY_GET``
+  ``SIOCGHWTSTAMP``                   ``ETHTOOL_MSG_TSCONFIG_GET``
+  ``SIOCSHWTSTAMP``                   ``ETHTOOL_MSG_TSCONFIG_SET``
   =================================== =====================================
diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst
index b37bfbfc7d79..61ef9da10e28 100644
--- a/Documentation/networking/timestamping.rst
+++ b/Documentation/networking/timestamping.rst
@@ -525,8 +525,8 @@ implicitly defined. ts[0] holds a software timestamp if set, ts[1]
 is again deprecated and ts[2] holds a hardware timestamp if set.
 
 
-3. Hardware Timestamping configuration: SIOCSHWTSTAMP and SIOCGHWTSTAMP
-=======================================================================
+3. Hardware Timestamping configuration: ETHTOOL_MSG_TSCONFIG_SET/GET
+====================================================================
 
 Hardware time stamping must also be initialized for each device driver
 that is expected to do hardware time stamping. The parameter is defined in
@@ -539,12 +539,14 @@ include/uapi/linux/net_tstamp.h as::
 	};
 
 Desired behavior is passed into the kernel and to a specific device by
-calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose
-ifr_data points to a struct hwtstamp_config. The tx_type and
-rx_filter are hints to the driver what it is expected to do. If
-the requested fine-grained filtering for incoming packets is not
-supported, the driver may time stamp more than just the requested types
-of packets.
+calling the tsconfig netlink socket ``ETHTOOL_MSG_TSCONFIG_SET``.
+The ``ETHTOOL_A_TSCONFIG_TX_TYPES``, ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` and
+``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` netlink attributes are then used to set
+the struct hwtstamp_config accordingly.
+
+The ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` netlink nested attribute is used
+to select the source of the hardware time stamping. It is composed of an index
+for the device source and a qualifier for the type of time stamping.
 
 Drivers are free to use a more permissive configuration than the requested
 configuration. It is expected that drivers should only implement directly the
@@ -563,9 +565,16 @@ Only a processes with admin rights may change the configuration. User
 space is responsible to ensure that multiple processes don't interfere
 with each other and that the settings are reset.
 
-Any process can read the actual configuration by passing this
-structure to ioctl(SIOCGHWTSTAMP) in the same way.  However, this has
-not been implemented in all drivers.
+Any process can read the actual configuration by requesting tsconfig netlink
+socket ``ETHTOOL_MSG_TSCONFIG_GET``.
+
+The legacy configuration is the use of the ioctl(SIOCSHWTSTAMP) with a pointer
+to a struct ifreq whose ifr_data points to a struct hwtstamp_config.
+The tx_type and rx_filter are hints to the driver what it is expected to do.
+If the requested fine-grained filtering for incoming packets is not
+supported, the driver may time stamp more than just the requested types
+of packets. ioctl(SIOCGHWTSTAMP) is used in the same way as the
+ioctl(SIOCSHWTSTAMP). However, this has not been implemented in all drivers.
 
 ::
 
@@ -610,9 +619,10 @@ not been implemented in all drivers.
 --------------------------------------------------------
 
 A driver which supports hardware time stamping must support the
-SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with
-the actual values as described in the section on SIOCSHWTSTAMP.  It
-should also support SIOCGHWTSTAMP.
+ndo_hwtstamp_set NDO or the legacy SIOCSHWTSTAMP ioctl and update the
+supplied struct hwtstamp_config with the actual values as described in
+the section on SIOCSHWTSTAMP. It should also support ndo_hwtstamp_get or
+the legacy SIOCGHWTSTAMP.
 
 Time stamps for received packets must be stored in the skb. To get a pointer
 to the shared time stamp structure of the skb call skb_hwtstamps(). Then
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index df289dde0f61..43993a2d68e5 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -694,6 +694,18 @@ enum {
 	ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1)
 };
 
+enum {
+	ETHTOOL_A_TSCONFIG_UNSPEC,
+	ETHTOOL_A_TSCONFIG_HEADER,
+	ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER,
+	ETHTOOL_A_TSCONFIG_TX_TYPES,
+	ETHTOOL_A_TSCONFIG_RX_FILTERS,
+	ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS,
+
+	__ETHTOOL_A_TSCONFIG_CNT,
+	ETHTOOL_A_TSCONFIG_MAX = (__ETHTOOL_A_TSCONFIG_CNT - 1)
+};
+
 enum {
 	ETHTOOL_MSG_USER_NONE = 0,
 	ETHTOOL_MSG_STRSET_GET = 1,
@@ -741,6 +753,8 @@ enum {
 	ETHTOOL_MSG_MM_SET,
 	ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
 	ETHTOOL_MSG_PHY_GET,
+	ETHTOOL_MSG_TSCONFIG_GET,
+	ETHTOOL_MSG_TSCONFIG_SET,
 
 	__ETHTOOL_MSG_USER_CNT,
 	ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1)
@@ -794,6 +808,8 @@ enum {
 	ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
 	ETHTOOL_MSG_PHY_GET_REPLY,
 	ETHTOOL_MSG_PHY_NTF,
+	ETHTOOL_MSG_TSCONFIG_GET_REPLY,
+	ETHTOOL_MSG_TSCONFIG_SET_REPLY,
 
 	__ETHTOOL_MSG_KERNEL_CNT,
 	ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1)
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 9b540644ba31..a1490c4afe6b 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -9,4 +9,4 @@ ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
 		   tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \
 		   module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o \
-		   phy.o
+		   phy.o tsconfig.o
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 666db40bcfda..02f941f667dd 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -797,7 +797,7 @@ int ethtool_net_get_ts_info_by_phc(struct net_device *dev,
 	return -ENODEV;
 }
 
-int
+struct phy_device *
 ethtool_phy_get_ts_info_by_phc(struct net_device *dev,
 			       struct kernel_ethtool_ts_info *info,
 			       struct hwtstamp_provider_desc *hwprov_desc)
@@ -806,7 +806,7 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev,
 
 	/* Only precise qualifier is supported in phydev */
 	if (hwprov_desc->qualifier != HWTSTAMP_PROVIDER_QUALIFIER_PRECISE)
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
 	/* Look in the phy topology */
 	if (dev->link_topo) {
@@ -820,12 +820,12 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev,
 			ethtool_init_tsinfo(info);
 			err = phy_ts_info(pdn->phy, info);
 			if (err)
-				return err;
+				return ERR_PTR(err);
 
 			if (info->phc_index == hwprov_desc->index)
-				return 0;
+				return pdn->phy;
 		}
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 	}
 
 	/* Look on the dev->phydev */
@@ -833,13 +833,13 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev,
 		ethtool_init_tsinfo(info);
 		err = phy_ts_info(dev->phydev, info);
 		if (err)
-			return err;
+			return ERR_PTR(err);
 
 		if (info->phc_index == hwprov_desc->index)
-			return 0;
+			return dev->phydev;
 	}
 
-	return -ENODEV;
+	return ERR_PTR(-ENODEV);
 }
 
 int ethtool_get_ts_info_by_phc(struct net_device *dev,
@@ -849,8 +849,15 @@ int ethtool_get_ts_info_by_phc(struct net_device *dev,
 	int err;
 
 	err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc);
-	if (err == -ENODEV)
-		err = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc);
+	if (err == -ENODEV) {
+		struct phy_device *phy;
+
+		phy = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc);
+		if (IS_ERR(phy))
+			err = PTR_ERR(phy);
+		else
+			err = 0;
+	}
 
 	info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE |
 				 SOF_TIMESTAMPING_SOFTWARE;
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index f5119204c8ff..850eadde4bfc 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -56,7 +56,7 @@ int ethtool_get_ts_info_by_phc(struct net_device *dev,
 int ethtool_net_get_ts_info_by_phc(struct net_device *dev,
 				   struct kernel_ethtool_ts_info *info,
 				   struct hwtstamp_provider_desc *hwprov_desc);
-int
+struct phy_device *
 ethtool_phy_get_ts_info_by_phc(struct net_device *dev,
 			       struct kernel_ethtool_ts_info *info,
 			       struct hwtstamp_provider_desc *hwprov_desc);
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 6ae1d91f36e7..849c98e637c6 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -394,6 +394,8 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_PLCA_GET_STATUS]	= &ethnl_plca_status_request_ops,
 	[ETHTOOL_MSG_MM_GET]		= &ethnl_mm_request_ops,
 	[ETHTOOL_MSG_MM_SET]		= &ethnl_mm_request_ops,
+	[ETHTOOL_MSG_TSCONFIG_GET]	= &ethnl_tsconfig_request_ops,
+	[ETHTOOL_MSG_TSCONFIG_SET]	= &ethnl_tsconfig_request_ops,
 };
 
 static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -1243,6 +1245,22 @@ static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_phy_get_policy,
 		.maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_TSCONFIG_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_tsconfig_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_tsconfig_get_policy) - 1,
+	},
+	{
+		.cmd	= ETHTOOL_MSG_TSCONFIG_SET,
+		.flags	= GENL_UNS_ADMIN_PERM,
+		.doit	= ethnl_default_set_doit,
+		.policy = ethnl_tsconfig_set_policy,
+		.maxattr = ARRAY_SIZE(ethnl_tsconfig_set_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 960cda13e4fc..0a09298fff92 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -435,6 +435,7 @@ extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops;
 extern const struct ethnl_request_ops ethnl_plca_status_request_ops;
 extern const struct ethnl_request_ops ethnl_mm_request_ops;
 extern const struct ethnl_request_ops ethnl_phy_request_ops;
+extern const struct ethnl_request_ops ethnl_tsconfig_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -485,6 +486,8 @@ extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1];
 extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1];
 extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1];
 extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1];
+extern const struct nla_policy ethnl_tsconfig_get_policy[ETHTOOL_A_TSCONFIG_HEADER + 1];
+extern const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1];
 
 int ethnl_set_features(struct sk_buff *skb, struct genl_info *info);
 int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c
new file mode 100644
index 000000000000..9188e088fb2f
--- /dev/null
+++ b/net/ethtool/tsconfig.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+#include "../core/dev.h"
+#include "ts.h"
+
+struct tsconfig_req_info {
+	struct ethnl_req_info base;
+};
+
+struct tsconfig_reply_data {
+	struct ethnl_reply_data		base;
+	struct hwtstamp_provider_desc	hwprov_desc;
+	struct {
+		u32 tx_type;
+		u32 rx_filter;
+		u32 flags;
+	} hwtst_config;
+};
+
+#define TSCONFIG_REPDATA(__reply_base) \
+	container_of(__reply_base, struct tsconfig_reply_data, base)
+
+const struct nla_policy ethnl_tsconfig_get_policy[ETHTOOL_A_TSCONFIG_HEADER + 1] = {
+	[ETHTOOL_A_TSCONFIG_HEADER]		=
+		NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int tsconfig_prepare_data(const struct ethnl_req_info *req_base,
+				 struct ethnl_reply_data *reply_base,
+				 const struct genl_info *info)
+{
+	struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base);
+	struct hwtstamp_provider *hwprov = NULL;
+	struct net_device *dev = reply_base->dev;
+	struct kernel_hwtstamp_config cfg = {};
+	int ret;
+
+	if (!dev->netdev_ops->ndo_hwtstamp_get)
+		return -EOPNOTSUPP;
+
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		return ret;
+
+	ret = dev_get_hwtstamp_phylib(dev, &cfg);
+	if (ret)
+		goto out;
+
+	data->hwtst_config.tx_type = BIT(cfg.tx_type);
+	data->hwtst_config.rx_filter = BIT(cfg.rx_filter);
+	data->hwtst_config.flags = BIT(cfg.flags);
+
+	data->hwprov_desc.index = -1;
+	hwprov = rtnl_dereference(dev->hwprov);
+	if (hwprov) {
+		data->hwprov_desc.index = hwprov->desc.index;
+		data->hwprov_desc.qualifier = hwprov->desc.qualifier;
+	} else {
+		struct kernel_ethtool_ts_info ts_info = {};
+
+		ts_info.phc_index = -1;
+		ret = __ethtool_get_ts_info(dev, &ts_info);
+		if (ret)
+			goto out;
+
+		if (ts_info.phc_index == -1)
+			return -ENODEV;
+
+		data->hwprov_desc.index = ts_info.phc_index;
+		data->hwprov_desc.qualifier = ts_info.phc_qualifier;
+	}
+
+out:
+	ethnl_ops_complete(dev);
+	return ret;
+}
+
+static int tsconfig_reply_size(const struct ethnl_req_info *req_base,
+			       const struct ethnl_reply_data *reply_base)
+{
+	const struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base);
+	bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+	int len = 0;
+	int ret;
+
+	BUILD_BUG_ON(__HWTSTAMP_TX_CNT > 32);
+	BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT > 32);
+
+	if (data->hwtst_config.flags)
+		/* _TSCONFIG_HWTSTAMP_FLAGS */
+		len += nla_total_size(sizeof(u32));
+
+	if (data->hwtst_config.tx_type) {
+		ret = ethnl_bitset32_size(&data->hwtst_config.tx_type,
+					  NULL, __HWTSTAMP_TX_CNT,
+					  ts_tx_type_names, compact);
+		if (ret < 0)
+			return ret;
+		len += ret;	/* _TSCONFIG_TX_TYPES */
+	}
+	if (data->hwtst_config.rx_filter) {
+		ret = ethnl_bitset32_size(&data->hwtst_config.rx_filter,
+					  NULL, __HWTSTAMP_FILTER_CNT,
+					  ts_rx_filter_names, compact);
+		if (ret < 0)
+			return ret;
+		len += ret;	/* _TSCONFIG_RX_FILTERS */
+	}
+
+	if (data->hwprov_desc.index >= 0)
+		/* _TSCONFIG_HWTSTAMP_PROVIDER */
+		len += nla_total_size(0) +
+		       2 * nla_total_size(sizeof(u32));
+
+	return len;
+}
+
+static int tsconfig_fill_reply(struct sk_buff *skb,
+			       const struct ethnl_req_info *req_base,
+			       const struct ethnl_reply_data *reply_base)
+{
+	const struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base);
+	bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+	int ret;
+
+	if (data->hwtst_config.flags) {
+		ret = nla_put_u32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS,
+				  data->hwtst_config.flags);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (data->hwtst_config.tx_type) {
+		ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_TX_TYPES,
+					 &data->hwtst_config.tx_type, NULL,
+					 __HWTSTAMP_TX_CNT,
+					 ts_tx_type_names, compact);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (data->hwtst_config.rx_filter) {
+		ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_RX_FILTERS,
+					 &data->hwtst_config.rx_filter,
+					 NULL, __HWTSTAMP_FILTER_CNT,
+					 ts_rx_filter_names, compact);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (data->hwprov_desc.index >= 0) {
+		struct nlattr *nest;
+
+		nest = nla_nest_start(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER);
+		if (!nest)
+			return -EMSGSIZE;
+
+		if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX,
+				data->hwprov_desc.index) ||
+		    nla_put_u32(skb,
+				ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER,
+				data->hwprov_desc.qualifier)) {
+			nla_nest_cancel(skb, nest);
+			return -EMSGSIZE;
+		}
+
+		nla_nest_end(skb, nest);
+	}
+	return 0;
+}
+
+/* TSCONFIG_SET */
+const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1] = {
+	[ETHTOOL_A_TSCONFIG_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER] =
+		NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy),
+	[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_U32 },
+	[ETHTOOL_A_TSCONFIG_RX_FILTERS] = { .type = NLA_NESTED },
+	[ETHTOOL_A_TSCONFIG_TX_TYPES] = { .type = NLA_NESTED },
+};
+
+static int tsconfig_send_reply(struct net_device *dev, struct genl_info *info)
+{
+	struct tsconfig_reply_data *reply_data;
+	struct tsconfig_req_info *req_info;
+	struct sk_buff *rskb;
+	void *reply_payload;
+	int reply_len = 0;
+	int ret;
+
+	req_info = kzalloc(sizeof(*req_info), GFP_KERNEL);
+	if (!req_info)
+		return -ENOMEM;
+	reply_data = kmalloc(sizeof(*reply_data), GFP_KERNEL);
+	if (!reply_data) {
+		kfree(req_info);
+		return -ENOMEM;
+	}
+
+	ASSERT_RTNL();
+	reply_data->base.dev = dev;
+	ret = tsconfig_prepare_data(&req_info->base, &reply_data->base, info);
+	if (ret < 0)
+		goto err_cleanup;
+
+	ret = tsconfig_reply_size(&req_info->base, &reply_data->base);
+	if (ret < 0)
+		goto err_cleanup;
+
+	reply_len = ret + ethnl_reply_header_size();
+	rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_TSCONFIG_SET_REPLY,
+				ETHTOOL_A_TSCONFIG_HEADER, info, &reply_payload);
+	if (!rskb)
+		goto err_cleanup;
+
+	ret = tsconfig_fill_reply(rskb, &req_info->base, &reply_data->base);
+	if (ret < 0)
+		goto err_cleanup;
+
+	genlmsg_end(rskb, reply_payload);
+	ret = genlmsg_reply(rskb, info);
+
+err_cleanup:
+	kfree(reply_data);
+	kfree(req_info);
+	return ret;
+}
+
+static int ethnl_set_tsconfig_validate(struct ethnl_req_info *req_base,
+				       struct genl_info *info)
+{
+	const struct net_device_ops *ops = req_base->dev->netdev_ops;
+
+	if (!ops->ndo_hwtstamp_set || !ops->ndo_hwtstamp_get)
+		return -EOPNOTSUPP;
+
+	return 1;
+}
+
+static struct hwtstamp_provider *
+tsconfig_set_hwprov_from_desc(struct net_device *dev,
+			      struct genl_info *info,
+			      struct hwtstamp_provider_desc *hwprov_desc)
+{
+	struct kernel_ethtool_ts_info ts_info;
+	struct hwtstamp_provider *hwprov;
+	struct nlattr **tb = info->attrs;
+	struct phy_device *phy = NULL;
+	enum hwtstamp_source source;
+	int ret;
+
+	ret = ethtool_net_get_ts_info_by_phc(dev, &ts_info, hwprov_desc);
+	if (!ret) {
+		/* Found */
+		source = HWTSTAMP_SOURCE_NETDEV;
+	} else {
+		phy = ethtool_phy_get_ts_info_by_phc(dev, &ts_info, hwprov_desc);
+		if (IS_ERR(phy)) {
+			if (PTR_ERR(phy) == -ENODEV)
+				NL_SET_ERR_MSG_ATTR(info->extack,
+						    tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER],
+						    "phc not in this net device topology");
+			return ERR_CAST(phy);
+		}
+
+		source = HWTSTAMP_SOURCE_PHYLIB;
+	}
+
+	hwprov = kzalloc(sizeof(*hwprov), GFP_KERNEL);
+	if (!hwprov)
+		return ERR_PTR(-ENOMEM);
+
+	hwprov->desc.index = hwprov_desc->index;
+	hwprov->desc.qualifier = hwprov_desc->qualifier;
+	hwprov->source = source;
+	hwprov->phydev = phy;
+
+	return hwprov;
+}
+
+static int ethnl_set_tsconfig(struct ethnl_req_info *req_base,
+			      struct genl_info *info)
+{
+	struct kernel_hwtstamp_config hwtst_config = {0};
+	bool hwprov_mod = false, config_mod = false;
+	struct hwtstamp_provider *hwprov = NULL;
+	struct net_device *dev = req_base->dev;
+	struct nlattr **tb = info->attrs;
+	int ret;
+
+	BUILD_BUG_ON(__HWTSTAMP_TX_CNT >= 32);
+	BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT >= 32);
+
+	if (!netif_device_present(dev))
+		return -ENODEV;
+
+	if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER]) {
+		struct hwtstamp_provider_desc __hwprov_desc = {.index = -1};
+		struct hwtstamp_provider *__hwprov;
+
+		__hwprov = rtnl_dereference(dev->hwprov);
+		if (__hwprov) {
+			__hwprov_desc.index = __hwprov->desc.index;
+			__hwprov_desc.qualifier = __hwprov->desc.qualifier;
+		}
+
+		ret = ts_parse_hwtst_provider(tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER],
+					      &__hwprov_desc, info->extack,
+					      &hwprov_mod);
+		if (ret < 0)
+			return ret;
+
+		if (hwprov_mod) {
+			hwprov = tsconfig_set_hwprov_from_desc(dev, info,
+							       &__hwprov_desc);
+			if (IS_ERR(hwprov))
+				return PTR_ERR(hwprov);
+		}
+	}
+
+	/* Get current hwtstamp config if we are not changing the
+	 * hwtstamp source. It will be zeroed in the other case.
+	 */
+	if (!hwprov_mod) {
+		ret = dev_get_hwtstamp_phylib(dev, &hwtst_config);
+		if (ret < 0 && ret != -EOPNOTSUPP)
+			goto err_free_hwprov;
+	}
+
+	/* Get the hwtstamp config from netlink */
+	if (tb[ETHTOOL_A_TSCONFIG_TX_TYPES]) {
+		u32 req_tx_type;
+
+		req_tx_type = BIT(hwtst_config.tx_type);
+		ret = ethnl_update_bitset32(&req_tx_type,
+					    __HWTSTAMP_TX_CNT,
+					    tb[ETHTOOL_A_TSCONFIG_TX_TYPES],
+					    ts_tx_type_names, info->extack,
+					    &config_mod);
+		if (ret < 0)
+			goto err_free_hwprov;
+
+		/* Select only one tx type at a time */
+		if (ffs(req_tx_type) != fls(req_tx_type)) {
+			ret = -EINVAL;
+			goto err_free_hwprov;
+		}
+
+		hwtst_config.tx_type = ffs(req_tx_type) - 1;
+	}
+
+	if (tb[ETHTOOL_A_TSCONFIG_RX_FILTERS]) {
+		u32 req_rx_filter;
+
+		req_rx_filter = BIT(hwtst_config.rx_filter);
+		ret = ethnl_update_bitset32(&req_rx_filter,
+					    __HWTSTAMP_FILTER_CNT,
+					    tb[ETHTOOL_A_TSCONFIG_RX_FILTERS],
+					    ts_rx_filter_names, info->extack,
+					    &config_mod);
+		if (ret < 0)
+			goto err_free_hwprov;
+
+		/* Select only one rx filter at a time */
+		if (ffs(req_rx_filter) != fls(req_rx_filter)) {
+			ret = -EINVAL;
+			goto err_free_hwprov;
+		}
+
+		hwtst_config.rx_filter = ffs(req_rx_filter) - 1;
+	}
+
+	if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS]) {
+		ethnl_update_u32(&hwtst_config.flags,
+				 tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS],
+				 &config_mod);
+	}
+
+	ret = net_hwtstamp_validate(&hwtst_config);
+	if (ret)
+		goto err_free_hwprov;
+
+	if (hwprov_mod) {
+		struct kernel_hwtstamp_config zero_config = {0};
+		struct hwtstamp_provider *__hwprov;
+
+		/* Disable current time stamping if we try to enable
+		 * another one
+		 */
+		ret = dev_set_hwtstamp_phylib(dev, &zero_config, info->extack);
+		if (ret < 0)
+			goto err_free_hwprov;
+
+		/* Change the selected hwtstamp source */
+		__hwprov = rcu_replace_pointer_rtnl(dev->hwprov, hwprov);
+		if (__hwprov)
+			kfree_rcu(__hwprov, rcu_head);
+	}
+
+	if (config_mod) {
+		ret = dev_set_hwtstamp_phylib(dev, &hwtst_config,
+					      info->extack);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (hwprov_mod || config_mod) {
+		ret = tsconfig_send_reply(dev, info);
+		if (ret && ret != -EOPNOTSUPP) {
+			NL_SET_ERR_MSG(info->extack,
+				       "error while reading the new configuration set");
+			return ret;
+		}
+	}
+
+	/* tsconfig has no notification */
+	return 0;
+
+err_free_hwprov:
+	kfree(hwprov);
+
+	return ret;
+}
+
+const struct ethnl_request_ops ethnl_tsconfig_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_TSCONFIG_GET,
+	.reply_cmd		= ETHTOOL_MSG_TSCONFIG_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_TSCONFIG_HEADER,
+	.req_info_size		= sizeof(struct tsconfig_req_info),
+	.reply_data_size	= sizeof(struct tsconfig_reply_data),
+
+	.prepare_data		= tsconfig_prepare_data,
+	.reply_size		= tsconfig_reply_size,
+	.fill_reply		= tsconfig_fill_reply,
+
+	.set_validate		= ethnl_set_tsconfig_validate,
+	.set			= ethnl_set_tsconfig,
+};
-- 
cgit v1.2.3


From 65c8c78cc74d5bcbc43f1f785a004796a2d78360 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 12 Dec 2024 21:13:10 +0100
Subject: thermal/thresholds: Fix uapi header macros leading to a compilation
 error

The macros giving the direction of the crossing thresholds use the BIT
macro which is not exported to the userspace. Consequently when an
userspace program includes the header, it fails to compile.

Replace the macros by their litteral to allow the compilation of
userspace program using this header.

Fixes: 445936f9e258 ("thermal: core: Add user thresholds support")
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://patch.msgid.link/20241212201311.4143196-1-daniel.lezcano@linaro.org
[ rjw: Add Fixes: ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/uapi/linux/thermal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index ba8604bdf206..349718c271eb 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -3,8 +3,8 @@
 #define _UAPI_LINUX_THERMAL_H
 
 #define THERMAL_NAME_LENGTH	20
-#define THERMAL_THRESHOLD_WAY_UP	BIT(0)
-#define THERMAL_THRESHOLD_WAY_DOWN	BIT(1)
+#define THERMAL_THRESHOLD_WAY_UP	0x1
+#define THERMAL_THRESHOLD_WAY_DOWN	0x2
 
 enum thermal_device_mode {
 	THERMAL_DEVICE_DISABLED = 0,
-- 
cgit v1.2.3


From 5637797add2af632a5d037044ab1b0b35643902e Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Thu, 12 Dec 2024 14:49:03 -0800
Subject: drm/xe/oa/uapi: Expose an unblock after N reports OA property

Expose an "unblock after N reports" OA property, to allow userspace threads
to be woken up less frequently.

Co-developed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241212224903.1853862-1-ashutosh.dixit@intel.com
---
 drivers/gpu/drm/xe/xe_oa.c       | 30 ++++++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_oa_types.h |  3 +++
 drivers/gpu/drm/xe/xe_query.c    |  3 ++-
 include/uapi/drm/xe_drm.h        |  7 +++++++
 4 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index ec88b18e9baa..56bf375a9d4b 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -97,6 +97,7 @@ struct xe_oa_open_param {
 	int num_syncs;
 	struct xe_sync_entry *syncs;
 	size_t oa_buffer_size;
+	int wait_num_reports;
 };
 
 struct xe_oa_config_bo {
@@ -241,11 +242,10 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
 static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
 {
 	u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
+	u32 tail, hw_tail, partial_report_size, available;
 	int report_size = stream->oa_buffer.format->size;
-	u32 tail, hw_tail;
 	unsigned long flags;
 	bool pollin;
-	u32 partial_report_size;
 
 	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
 
@@ -289,8 +289,8 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
 
 	stream->oa_buffer.tail = tail;
 
-	pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail,
-				 stream->oa_buffer.head) >= report_size;
+	available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head);
+	pollin = available >= stream->wait_num_reports * report_size;
 
 	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
@@ -1285,6 +1285,17 @@ static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value,
 	return 0;
 }
 
+static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value,
+					   struct xe_oa_open_param *param)
+{
+	if (!value) {
+		drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value);
+		return -EINVAL;
+	}
+	param->wait_num_reports = value;
+	return 0;
+}
+
 static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
 				    struct xe_oa_open_param *param)
 {
@@ -1306,6 +1317,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
 	[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
 	[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
 	[DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size,
+	[DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports,
 };
 
 static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
@@ -1321,6 +1333,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
 	[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
 	[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
 	[DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval,
+	[DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval,
 };
 
 static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
@@ -1797,6 +1810,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
 	stream->periodic = param->period_exponent > 0;
 	stream->period_exponent = param->period_exponent;
 	stream->no_preempt = param->no_preempt;
+	stream->wait_num_reports = param->wait_num_reports;
 
 	stream->xef = xe_file_get(param->xef);
 	stream->num_syncs = param->num_syncs;
@@ -2156,6 +2170,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
 	if (!param.oa_buffer_size)
 		param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE;
 
+	if (!param.wait_num_reports)
+		param.wait_num_reports = 1;
+	if (param.wait_num_reports > param.oa_buffer_size / f->size) {
+		drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports);
+		ret = -EINVAL;
+		goto err_exec_q;
+	}
+
 	ret = xe_oa_parse_syncs(oa, &param);
 	if (ret)
 		goto err_exec_q;
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index df7793915628..2dcd3b9562e9 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -218,6 +218,9 @@ struct xe_oa_stream {
 	/** @pollin: Whether there is data available to read */
 	bool pollin;
 
+	/** @wait_num_reports: Number of reports to wait for before signalling pollin */
+	int wait_num_reports;
+
 	/** @periodic: Whether periodic sampling is currently enabled */
 	bool periodic;
 
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index d2a816f71bf2..c059639613f7 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -672,7 +672,8 @@ static int query_oa_units(struct xe_device *xe,
 			du->oa_unit_type = u->type;
 			du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
 			du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS |
-					   DRM_XE_OA_CAPS_OA_BUFFER_SIZE;
+					   DRM_XE_OA_CAPS_OA_BUFFER_SIZE |
+					   DRM_XE_OA_CAPS_WAIT_NUM_REPORTS;
 
 			j = 0;
 			for_each_hw_engine(hwe, gt, hwe_id) {
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 0383b52cbd86..f62689ca861a 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1487,6 +1487,7 @@ struct drm_xe_oa_unit {
 #define DRM_XE_OA_CAPS_BASE		(1 << 0)
 #define DRM_XE_OA_CAPS_SYNCS		(1 << 1)
 #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE	(1 << 2)
+#define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS	(1 << 3)
 
 	/** @oa_timestamp_freq: OA timestamp freq */
 	__u64 oa_timestamp_freq;
@@ -1660,6 +1661,12 @@ enum drm_xe_oa_property_id {
 	 * buffer is allocated by default.
 	 */
 	DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE,
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS: Number of reports to wait
+	 * for before unblocking poll or read
+	 */
+	DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS,
 };
 
 /**
-- 
cgit v1.2.3


From e45469e594b255ef8d750ed5576698743450d2ac Mon Sep 17 00:00:00 2001
From: Anna Emese Nyiri <annaemesenyiri@gmail.com>
Date: Fri, 13 Dec 2024 09:44:57 +0100
Subject: sock: Introduce SO_RCVPRIORITY socket option

Add new socket option, SO_RCVPRIORITY, to include SO_PRIORITY in the
ancillary data returned by recvmsg().
This is analogous to the existing support for SO_RCVMARK,
as implemented in commit 6fd1d51cfa253 ("net: SO_RCVMARK socket option
for SO_MARK with recvmsg()").

Reviewed-by: Willem de Bruijn <willemb@google.com>
Suggested-by: Ferenc Fejes <fejes@inf.elte.hu>
Signed-off-by: Anna Emese Nyiri <annaemesenyiri@gmail.com>
Link: https://patch.msgid.link/20241213084457.45120-5-annaemesenyiri@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 arch/alpha/include/uapi/asm/socket.h    |  2 ++
 arch/mips/include/uapi/asm/socket.h     |  2 ++
 arch/parisc/include/uapi/asm/socket.h   |  2 ++
 arch/sparc/include/uapi/asm/socket.h    |  2 ++
 include/net/sock.h                      |  4 +++-
 include/uapi/asm-generic/socket.h       |  2 ++
 net/core/sock.c                         |  8 ++++++++
 net/socket.c                            | 11 +++++++++++
 tools/include/uapi/asm-generic/socket.h |  2 ++
 9 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 302507bf9b5d..3df5f2dd4c0f 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -148,6 +148,8 @@
 
 #define SCM_TS_OPT_ID		81
 
+#define SO_RCVPRIORITY		82
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index d118d4731580..22fa8f19924a 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -159,6 +159,8 @@
 
 #define SCM_TS_OPT_ID		81
 
+#define SO_RCVPRIORITY		82
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index d268d69bfcd2..aa9cd4b951fe 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -140,6 +140,8 @@
 
 #define SCM_TS_OPT_ID		0x404C
 
+#define SO_RCVPRIORITY		0x404D
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 113cd9f353e3..5b464a568664 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -141,6 +141,8 @@
 
 #define SCM_TS_OPT_ID            0x005a
 
+#define SO_RCVPRIORITY           0x005b
+
 #if !defined(__KERNEL__)
 
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 316a34d6c48b..d4bdd3286e03 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -953,6 +953,7 @@ enum sock_flags {
 	SOCK_XDP, /* XDP is attached */
 	SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
 	SOCK_RCVMARK, /* Receive SO_MARK  ancillary data with packet */
+	SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */
 };
 
 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -2660,7 +2661,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
 {
 #define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL)			| \
 			   (1UL << SOCK_RCVTSTAMP)			| \
-			   (1UL << SOCK_RCVMARK))
+			   (1UL << SOCK_RCVMARK)			|\
+			   (1UL << SOCK_RCVPRIORITY))
 #define TSFLAGS_ANY	  (SOF_TIMESTAMPING_SOFTWARE			| \
 			   SOF_TIMESTAMPING_RAW_HARDWARE)
 
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index deacfd6dd197..aa5016ff3d91 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -143,6 +143,8 @@
 
 #define SCM_TS_OPT_ID		81
 
+#define SO_RCVPRIORITY		82
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/net/core/sock.c b/net/core/sock.c
index a3d9941c1d32..e7bcc8952248 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1519,6 +1519,10 @@ set_sndbuf:
 		sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
 		break;
 
+	case SO_RCVPRIORITY:
+		sock_valbool_flag(sk, SOCK_RCVPRIORITY, valbool);
+		break;
+
 	case SO_RXQ_OVFL:
 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
 		break;
@@ -1947,6 +1951,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
 		v.val = sock_flag(sk, SOCK_RCVMARK);
 		break;
 
+	case SO_RCVPRIORITY:
+		v.val = sock_flag(sk, SOCK_RCVPRIORITY);
+		break;
+
 	case SO_RXQ_OVFL:
 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
 		break;
diff --git a/net/socket.c b/net/socket.c
index 9a117248f18f..16402b8be5a7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1008,12 +1008,23 @@ static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
 	}
 }
 
+static void sock_recv_priority(struct msghdr *msg, struct sock *sk,
+			       struct sk_buff *skb)
+{
+	if (sock_flag(sk, SOCK_RCVPRIORITY) && skb) {
+		__u32 priority = skb->priority;
+
+		put_cmsg(msg, SOL_SOCKET, SO_PRIORITY, sizeof(__u32), &priority);
+	}
+}
+
 void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
 		       struct sk_buff *skb)
 {
 	sock_recv_timestamp(msg, sk, skb);
 	sock_recv_drops(msg, sk, skb);
 	sock_recv_mark(msg, sk, skb);
+	sock_recv_priority(msg, sk, skb);
 }
 EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
 
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 281df9139d2b..ffff554a5230 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -126,6 +126,8 @@
 
 #define SCM_TS_OPT_ID		78
 
+#define SO_RCVPRIORITY		79
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
-- 
cgit v1.2.3


From 915d2f0718a42ee0b334be34cc53664a865a5928 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 27 Nov 2024 16:55:32 -0800
Subject: KVM: Move KVM_REG_SIZE() definition to common uAPI header

Define KVM_REG_SIZE() in the common kvm.h header, and delete the arm64 and
RISC-V versions.  As evidenced by the surrounding definitions, all aspects
of the register size encoding are generic, i.e. RISC-V should have moved
arm64's definition to common code instead of copy+pasting.

Acked-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Link: https://lore.kernel.org/r/20241128005547.4077116-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/arm64/include/uapi/asm/kvm.h | 3 ---
 arch/riscv/include/uapi/asm/kvm.h | 3 ---
 include/uapi/linux/kvm.h          | 4 ++++
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 66736ff04011..568bf858f319 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -43,9 +43,6 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
-#define KVM_REG_SIZE(id)						\
-	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
 struct kvm_regs {
 	struct user_pt_regs regs;	/* sp = sp_el0 */
 
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 3482c9a73d1b..9f60d6185077 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -211,9 +211,6 @@ struct kvm_riscv_sbi_sta {
 #define KVM_RISCV_TIMER_STATE_OFF	0
 #define KVM_RISCV_TIMER_STATE_ON	1
 
-#define KVM_REG_SIZE(id)		\
-	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_RISCV_TYPE_MASK		0x00000000FF000000
 #define KVM_REG_RISCV_TYPE_SHIFT	24
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 502ea63b5d2e..343de0a51797 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1070,6 +1070,10 @@ struct kvm_dirty_tlb {
 
 #define KVM_REG_SIZE_SHIFT	52
 #define KVM_REG_SIZE_MASK	0x00f0000000000000ULL
+
+#define KVM_REG_SIZE(id)		\
+	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
 #define KVM_REG_SIZE_U8		0x0000000000000000ULL
 #define KVM_REG_SIZE_U16	0x0010000000000000ULL
 #define KVM_REG_SIZE_U32	0x0020000000000000ULL
-- 
cgit v1.2.3


From af5366bea2cb9dfb5da2880e1dff544f87505300 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 27 Nov 2024 17:33:42 -0800
Subject: KVM: x86: Drop the now unused KVM_X86_DISABLE_VALID_EXITS

Drop the KVM_X86_DISABLE_VALID_EXITS definition, as it is misleading, and
unused in KVM *because* it is misleading.  The set of exits that can be
disabled is dynamic, i.e. userspace (and KVM) must check KVM's actual
capabilities.

Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20241128013424.4096668-16-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/uapi/linux/kvm.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 343de0a51797..45e6d8fca9b9 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -617,10 +617,6 @@ struct kvm_ioeventfd {
 #define KVM_X86_DISABLE_EXITS_HLT            (1 << 1)
 #define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
 #define KVM_X86_DISABLE_EXITS_CSTATE         (1 << 3)
-#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT | \
-                                              KVM_X86_DISABLE_EXITS_HLT | \
-                                              KVM_X86_DISABLE_EXITS_PAUSE | \
-                                              KVM_X86_DISABLE_EXITS_CSTATE)
 
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
-- 
cgit v1.2.3


From e10c5cbd1c912c06d887c8a1980ac57e21d87b6f Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Mon, 16 Dec 2024 20:18:41 +0100
Subject: PCI: Update code comment on PCI_EXP_LNKCAP_SLS for PCIe r3.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Niklas notes that the code comment on the PCI_EXP_LNKCAP_SLS macro is
outdated as it reflects the meaning of the field prior to PCIe r3.0.
Update it to avoid confusion.

Closes: https://lore.kernel.org/r/70829798889c6d779ca0f6cd3260a765780d1369.camel@kernel.org
Link: https://lore.kernel.org/r/6152bd17cbe0876365d5f4624fc317529f4bbc85.1734376438.git.lukas@wunner.de
Reported-by: Niklas Schnelle <niks@kernel.org>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
 include/uapi/linux/pci_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 1601c7ed5fab..02d0ba2999e0 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -533,7 +533,7 @@
 #define  PCI_EXP_DEVSTA_TRPND	0x0020	/* Transactions Pending */
 #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1	12	/* v1 endpoints without link end here */
 #define PCI_EXP_LNKCAP		0x0c	/* Link Capabilities */
-#define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Supported Link Speeds */
+#define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Max Link Speed (prior to PCIe r3.0: Supported Link Speeds) */
 #define  PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */
 #define  PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */
 #define  PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */
-- 
cgit v1.2.3


From a5874fde3c0884a33ed4145101052318c5e17c74 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Thu, 12 Dec 2024 18:42:16 +0100
Subject: exec: Add a new AT_EXECVE_CHECK flag to execveat(2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new AT_EXECVE_CHECK flag to execveat(2) to check if a file would
be allowed for execution.  The main use case is for script interpreters
and dynamic linkers to check execution permission according to the
kernel's security policy. Another use case is to add context to access
logs e.g., which script (instead of interpreter) accessed a file.  As
any executable code, scripts could also use this check [1].

This is different from faccessat(2) + X_OK which only checks a subset of
access rights (i.e. inode permission and mount options for regular
files), but not the full context (e.g. all LSM access checks).  The main
use case for access(2) is for SUID processes to (partially) check access
on behalf of their caller.  The main use case for execveat(2) +
AT_EXECVE_CHECK is to check if a script execution would be allowed,
according to all the different restrictions in place.  Because the use
of AT_EXECVE_CHECK follows the exact kernel semantic as for a real
execution, user space gets the same error codes.

An interesting point of using execveat(2) instead of openat2(2) is that
it decouples the check from the enforcement.  Indeed, the security check
can be logged (e.g. with audit) without blocking an execution
environment not yet ready to enforce a strict security policy.

LSMs can control or log execution requests with
security_bprm_creds_for_exec().  However, to enforce a consistent and
complete access control (e.g. on binary's dependencies) LSMs should
restrict file executability, or measure executed files, with
security_file_open() by checking file->f_flags & __FMODE_EXEC.

Because AT_EXECVE_CHECK is dedicated to user space interpreters, it
doesn't make sense for the kernel to parse the checked files, look for
interpreters known to the kernel (e.g. ELF, shebang), and return ENOEXEC
if the format is unknown.  Because of that, security_bprm_check() is
never called when AT_EXECVE_CHECK is used.

It should be noted that script interpreters cannot directly use
execveat(2) (without this new AT_EXECVE_CHECK flag) because this could
lead to unexpected behaviors e.g., `python script.sh` could lead to Bash
being executed to interpret the script.  Unlike the kernel, script
interpreters may just interpret the shebang as a simple comment, which
should not change for backward compatibility reasons.

Because scripts or libraries files might not currently have the
executable permission set, or because we might want specific users to be
allowed to run arbitrary scripts, the following patch provides a dynamic
configuration mechanism with the SECBIT_EXEC_RESTRICT_FILE and
SECBIT_EXEC_DENY_INTERACTIVE securebits.

This is a redesign of the CLIP OS 4's O_MAYEXEC:
https://github.com/clipos-archive/src_platform_clip-patches/blob/f5cb330d6b684752e403b4e41b39f7004d88e561/1901_open_mayexec.patch
This patch has been used for more than a decade with customized script
interpreters.  Some examples can be found here:
https://github.com/clipos-archive/clipos4_portage-overlay/search?q=O_MAYEXEC

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: Jeff Xu <jeffxu@chromium.org>
Tested-by: Jeff Xu <jeffxu@chromium.org>
Link: https://docs.python.org/3/library/io.html#io.open_code [1]
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Link: https://lore.kernel.org/r/20241212174223.389435-2-mic@digikod.net
Signed-off-by: Kees Cook <kees@kernel.org>
---
 Documentation/userspace-api/check_exec.rst | 37 ++++++++++++++++++++++++++++++
 Documentation/userspace-api/index.rst      |  1 +
 fs/exec.c                                  | 20 ++++++++++++++--
 include/linux/binfmts.h                    |  7 +++++-
 include/uapi/linux/fcntl.h                 |  4 ++++
 security/security.c                        | 10 ++++++++
 6 files changed, 76 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/userspace-api/check_exec.rst

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/check_exec.rst b/Documentation/userspace-api/check_exec.rst
new file mode 100644
index 000000000000..393dd7ca19c4
--- /dev/null
+++ b/Documentation/userspace-api/check_exec.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright © 2024 Microsoft Corporation
+
+===================
+Executability check
+===================
+
+AT_EXECVE_CHECK
+===============
+
+Passing the ``AT_EXECVE_CHECK`` flag to :manpage:`execveat(2)` only performs a
+check on a regular file and returns 0 if execution of this file would be
+allowed, ignoring the file format and then the related interpreter dependencies
+(e.g. ELF libraries, script's shebang).
+
+Programs should always perform this check to apply kernel-level checks against
+files that are not directly executed by the kernel but passed to a user space
+interpreter instead.  All files that contain executable code, from the point of
+view of the interpreter, should be checked.  However the result of this check
+should only be enforced according to ``SECBIT_EXEC_RESTRICT_FILE`` or
+``SECBIT_EXEC_DENY_INTERACTIVE.``.
+
+The main purpose of this flag is to improve the security and consistency of an
+execution environment to ensure that direct file execution (e.g.
+``./script.sh``) and indirect file execution (e.g. ``sh script.sh``) lead to
+the same result.  For instance, this can be used to check if a file is
+trustworthy according to the caller's environment.
+
+In a secure environment, libraries and any executable dependencies should also
+be checked.  For instance, dynamic linking should make sure that all libraries
+are allowed for execution to avoid trivial bypass (e.g. using ``LD_PRELOAD``).
+For such secure execution environment to make sense, only trusted code should
+be executable, which also requires integrity guarantees.
+
+To avoid race conditions leading to time-of-check to time-of-use issues,
+``AT_EXECVE_CHECK`` should be used with ``AT_EMPTY_PATH`` to check against a
+file descriptor instead of a path.
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 274cc7546efc..6272bcf11296 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -35,6 +35,7 @@ Security-related interfaces
    mfd_noexec
    spec_ctrl
    tee
+   check_exec
 
 Devices and I/O
 ===============
diff --git a/fs/exec.c b/fs/exec.c
index 98cb7ba9983c..e3f461096e84 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -892,7 +892,8 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
 		.lookup_flags = LOOKUP_FOLLOW,
 	};
 
-	if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
+	if ((flags &
+	     ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_EXECVE_CHECK)) != 0)
 		return ERR_PTR(-EINVAL);
 	if (flags & AT_SYMLINK_NOFOLLOW)
 		open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
@@ -1541,6 +1542,21 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl
 	}
 	bprm->interp = bprm->filename;
 
+	/*
+	 * At this point, security_file_open() has already been called (with
+	 * __FMODE_EXEC) and access control checks for AT_EXECVE_CHECK will
+	 * stop just after the security_bprm_creds_for_exec() call in
+	 * bprm_execve().  Indeed, the kernel should not try to parse the
+	 * content of the file with exec_binprm() nor change the calling
+	 * thread, which means that the following security functions will not
+	 * be called:
+	 * - security_bprm_check()
+	 * - security_bprm_creds_from_file()
+	 * - security_bprm_committing_creds()
+	 * - security_bprm_committed_creds()
+	 */
+	bprm->is_check = !!(flags & AT_EXECVE_CHECK);
+
 	retval = bprm_mm_init(bprm);
 	if (!retval)
 		return bprm;
@@ -1836,7 +1852,7 @@ static int bprm_execve(struct linux_binprm *bprm)
 
 	/* Set the unchanging part of bprm->cred */
 	retval = security_bprm_creds_for_exec(bprm);
-	if (retval)
+	if (retval || bprm->is_check)
 		goto out;
 
 	retval = exec_binprm(bprm);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e6c00e860951..8ff0eb3644a1 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -42,7 +42,12 @@ struct linux_binprm {
 		 * Set when errors can no longer be returned to the
 		 * original userspace.
 		 */
-		point_of_no_return:1;
+		point_of_no_return:1,
+		/*
+		 * Set by user space to check executability according to the
+		 * caller's environment.
+		 */
+		is_check:1;
 	struct file *executable; /* Executable to pass to the interpreter */
 	struct file *interpreter;
 	struct file *file;
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 6e6907e63bfc..a15ac2fa4b20 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -155,4 +155,8 @@
 #define AT_HANDLE_MNT_ID_UNIQUE	0x001	/* Return the u64 unique mount ID. */
 #define AT_HANDLE_CONNECTABLE	0x002	/* Request a connectable file handle */
 
+/* Flags for execveat2(2). */
+#define AT_EXECVE_CHECK		0x10000	/* Only perform a check if execution
+					   would be allowed. */
+
 #endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/security/security.c b/security/security.c
index 09664e09fec9..dae7e903947f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1248,6 +1248,12 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
  * to 1 if AT_SECURE should be set to request libc enable secure mode.  @bprm
  * contains the linux_binprm structure.
  *
+ * If execveat(2) is called with the AT_EXECVE_CHECK flag, bprm->is_check is
+ * set.  The result must be the same as without this flag even if the execution
+ * will never really happen and @bprm will always be dropped.
+ *
+ * This hook must not change current->cred, only @bprm->cred.
+ *
  * Return: Returns 0 if the hook is successful and permission is granted.
  */
 int security_bprm_creds_for_exec(struct linux_binprm *bprm)
@@ -3098,6 +3104,10 @@ int security_file_receive(struct file *file)
  * Save open-time permission checking state for later use upon file_permission,
  * and recheck access if anything has changed since inode_permission.
  *
+ * We can check if a file is opened for execution (e.g. execve(2) call), either
+ * directly or indirectly (e.g. ELF's ld.so) by checking file->f_flags &
+ * __FMODE_EXEC .
+ *
  * Return: Returns 0 if permission is granted.
  */
 int security_file_open(struct file *file)
-- 
cgit v1.2.3


From a0623b2a1d595341971c189b90a6b06f42cd209d Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Thu, 12 Dec 2024 18:42:17 +0100
Subject: security: Add EXEC_RESTRICT_FILE and EXEC_DENY_INTERACTIVE securebits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The new SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE, and
their *_LOCKED counterparts are designed to be set by processes setting
up an execution environment, such as a user session, a container, or a
security sandbox.  Unlike other securebits, these ones can be set by
unprivileged processes.  Like seccomp filters or Landlock domains, the
securebits are inherited across processes.

When SECBIT_EXEC_RESTRICT_FILE is set, programs interpreting code should
control executable resources according to execveat(2) + AT_EXECVE_CHECK
(see previous commit).

When SECBIT_EXEC_DENY_INTERACTIVE is set, a process should deny
execution of user interactive commands (which excludes executable
regular files).

Being able to configure each of these securebits enables system
administrators or owner of image containers to gradually validate the
related changes and to identify potential issues (e.g. with interpreter
or audit logs).

It should be noted that unlike other security bits, the
SECBIT_EXEC_RESTRICT_FILE and SECBIT_EXEC_DENY_INTERACTIVE bits are
dedicated to user space willing to restrict itself.  Because of that,
they only make sense in the context of a trusted environment (e.g.
sandbox, container, user session, full system) where the process
changing its behavior (according to these bits) and all its parent
processes are trusted.  Otherwise, any parent process could just execute
its own malicious code (interpreting a script or not), or even enforce a
seccomp filter to mask these bits.

Such a secure environment can be achieved with an appropriate access
control (e.g. mount's noexec option, file access rights, LSM policy) and
an enlighten ld.so checking that libraries are allowed for execution
e.g., to protect against illegitimate use of LD_PRELOAD.

Ptrace restrictions according to these securebits would not make sense
because of the processes' trust assumption.

Scripts may need some changes to deal with untrusted data (e.g. stdin,
environment variables), but that is outside the scope of the kernel.

See chromeOS's documentation about script execution control and the
related threat model:
https://www.chromium.org/chromium-os/developer-library/guides/security/noexec-shell-scripts/

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Paul Moore <paul@paul-moore.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: Jeff Xu <jeffxu@chromium.org>
Tested-by: Jeff Xu <jeffxu@chromium.org>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Link: https://lore.kernel.org/r/20241212174223.389435-3-mic@digikod.net
Signed-off-by: Kees Cook <kees@kernel.org>
---
 Documentation/userspace-api/check_exec.rst | 107 +++++++++++++++++++++++++++++
 include/uapi/linux/securebits.h            |  24 ++++++-
 security/commoncap.c                       |  29 ++++++--
 3 files changed, 153 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/check_exec.rst b/Documentation/userspace-api/check_exec.rst
index 393dd7ca19c4..05dfe3b56f71 100644
--- a/Documentation/userspace-api/check_exec.rst
+++ b/Documentation/userspace-api/check_exec.rst
@@ -5,6 +5,31 @@
 Executability check
 ===================
 
+The ``AT_EXECVE_CHECK`` :manpage:`execveat(2)` flag, and the
+``SECBIT_EXEC_RESTRICT_FILE`` and ``SECBIT_EXEC_DENY_INTERACTIVE`` securebits
+are intended for script interpreters and dynamic linkers to enforce a
+consistent execution security policy handled by the kernel.  See the
+`samples/check-exec/inc.c`_ example.
+
+Whether an interpreter should check these securebits or not depends on the
+security risk of running malicious scripts with respect to the execution
+environment, and whether the kernel can check if a script is trustworthy or
+not.  For instance, Python scripts running on a server can use arbitrary
+syscalls and access arbitrary files.  Such interpreters should then be
+enlighten to use these securebits and let users define their security policy.
+However, a JavaScript engine running in a web browser should already be
+sandboxed and then should not be able to harm the user's environment.
+
+Script interpreters or dynamic linkers built for tailored execution environments
+(e.g. hardened Linux distributions or hermetic container images) could use
+``AT_EXECVE_CHECK`` without checking the related securebits if backward
+compatibility is handled by something else (e.g. atomic update ensuring that
+all legitimate libraries are allowed to be executed).  It is then recommended
+for script interpreters and dynamic linkers to check the securebits at run time
+by default, but also to provide the ability for custom builds to behave like if
+``SECBIT_EXEC_RESTRICT_FILE`` or ``SECBIT_EXEC_DENY_INTERACTIVE`` were always
+set to 1 (i.e. always enforce restrictions).
+
 AT_EXECVE_CHECK
 ===============
 
@@ -35,3 +60,85 @@ be executable, which also requires integrity guarantees.
 To avoid race conditions leading to time-of-check to time-of-use issues,
 ``AT_EXECVE_CHECK`` should be used with ``AT_EMPTY_PATH`` to check against a
 file descriptor instead of a path.
+
+SECBIT_EXEC_RESTRICT_FILE and SECBIT_EXEC_DENY_INTERACTIVE
+==========================================================
+
+When ``SECBIT_EXEC_RESTRICT_FILE`` is set, a process should only interpret or
+execute a file if a call to :manpage:`execveat(2)` with the related file
+descriptor and the ``AT_EXECVE_CHECK`` flag succeed.
+
+This secure bit may be set by user session managers, service managers,
+container runtimes, sandboxer tools...  Except for test environments, the
+related ``SECBIT_EXEC_RESTRICT_FILE_LOCKED`` bit should also be set.
+
+Programs should only enforce consistent restrictions according to the
+securebits but without relying on any other user-controlled configuration.
+Indeed, the use case for these securebits is to only trust executable code
+vetted by the system configuration (through the kernel), so we should be
+careful to not let untrusted users control this configuration.
+
+However, script interpreters may still use user configuration such as
+environment variables as long as it is not a way to disable the securebits
+checks.  For instance, the ``PATH`` and ``LD_PRELOAD`` variables can be set by
+a script's caller.  Changing these variables may lead to unintended code
+executions, but only from vetted executable programs, which is OK.  For this to
+make sense, the system should provide a consistent security policy to avoid
+arbitrary code execution e.g., by enforcing a write xor execute policy.
+
+When ``SECBIT_EXEC_DENY_INTERACTIVE`` is set, a process should never interpret
+interactive user commands (e.g. scripts).  However, if such commands are passed
+through a file descriptor (e.g. stdin), its content should be interpreted if a
+call to :manpage:`execveat(2)` with the related file descriptor and the
+``AT_EXECVE_CHECK`` flag succeed.
+
+For instance, script interpreters called with a script snippet as argument
+should always deny such execution if ``SECBIT_EXEC_DENY_INTERACTIVE`` is set.
+
+This secure bit may be set by user session managers, service managers,
+container runtimes, sandboxer tools...  Except for test environments, the
+related ``SECBIT_EXEC_DENY_INTERACTIVE_LOCKED`` bit should also be set.
+
+Here is the expected behavior for a script interpreter according to combination
+of any exec securebits:
+
+1. ``SECBIT_EXEC_RESTRICT_FILE=0`` and ``SECBIT_EXEC_DENY_INTERACTIVE=0``
+
+   Always interpret scripts, and allow arbitrary user commands (default).
+
+   No threat, everyone and everything is trusted, but we can get ahead of
+   potential issues thanks to the call to :manpage:`execveat(2)` with
+   ``AT_EXECVE_CHECK`` which should always be performed but ignored by the
+   script interpreter.  Indeed, this check is still important to enable systems
+   administrators to verify requests (e.g. with audit) and prepare for
+   migration to a secure mode.
+
+2. ``SECBIT_EXEC_RESTRICT_FILE=1`` and ``SECBIT_EXEC_DENY_INTERACTIVE=0``
+
+   Deny script interpretation if they are not executable, but allow
+   arbitrary user commands.
+
+   The threat is (potential) malicious scripts run by trusted (and not fooled)
+   users.  That can protect against unintended script executions (e.g. ``sh
+   /tmp/*.sh``).  This makes sense for (semi-restricted) user sessions.
+
+3. ``SECBIT_EXEC_RESTRICT_FILE=0`` and ``SECBIT_EXEC_DENY_INTERACTIVE=1``
+
+   Always interpret scripts, but deny arbitrary user commands.
+
+   This use case may be useful for secure services (i.e. without interactive
+   user session) where scripts' integrity is verified (e.g.  with IMA/EVM or
+   dm-verity/IPE) but where access rights might not be ready yet.  Indeed,
+   arbitrary interactive commands would be much more difficult to check.
+
+4. ``SECBIT_EXEC_RESTRICT_FILE=1`` and ``SECBIT_EXEC_DENY_INTERACTIVE=1``
+
+   Deny script interpretation if they are not executable, and also deny
+   any arbitrary user commands.
+
+   The threat is malicious scripts run by untrusted users (but trusted code).
+   This makes sense for system services that may only execute trusted scripts.
+
+.. Links
+.. _samples/check-exec/inc.c:
+   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/samples/check-exec/inc.c
diff --git a/include/uapi/linux/securebits.h b/include/uapi/linux/securebits.h
index d6d98877ff1a..3fba30dbd68b 100644
--- a/include/uapi/linux/securebits.h
+++ b/include/uapi/linux/securebits.h
@@ -52,10 +52,32 @@
 #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \
 			(issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED))
 
+/* See Documentation/userspace-api/check_exec.rst */
+#define SECURE_EXEC_RESTRICT_FILE		8
+#define SECURE_EXEC_RESTRICT_FILE_LOCKED	9  /* make bit-8 immutable */
+
+#define SECBIT_EXEC_RESTRICT_FILE (issecure_mask(SECURE_EXEC_RESTRICT_FILE))
+#define SECBIT_EXEC_RESTRICT_FILE_LOCKED \
+			(issecure_mask(SECURE_EXEC_RESTRICT_FILE_LOCKED))
+
+/* See Documentation/userspace-api/check_exec.rst */
+#define SECURE_EXEC_DENY_INTERACTIVE		10
+#define SECURE_EXEC_DENY_INTERACTIVE_LOCKED	11  /* make bit-10 immutable */
+
+#define SECBIT_EXEC_DENY_INTERACTIVE \
+			(issecure_mask(SECURE_EXEC_DENY_INTERACTIVE))
+#define SECBIT_EXEC_DENY_INTERACTIVE_LOCKED \
+			(issecure_mask(SECURE_EXEC_DENY_INTERACTIVE_LOCKED))
+
 #define SECURE_ALL_BITS		(issecure_mask(SECURE_NOROOT) | \
 				 issecure_mask(SECURE_NO_SETUID_FIXUP) | \
 				 issecure_mask(SECURE_KEEP_CAPS) | \
-				 issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+				 issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \
+				 issecure_mask(SECURE_EXEC_RESTRICT_FILE) | \
+				 issecure_mask(SECURE_EXEC_DENY_INTERACTIVE))
 #define SECURE_ALL_LOCKS	(SECURE_ALL_BITS << 1)
 
+#define SECURE_ALL_UNPRIVILEGED (issecure_mask(SECURE_EXEC_RESTRICT_FILE) | \
+				 issecure_mask(SECURE_EXEC_DENY_INTERACTIVE))
+
 #endif /* _UAPI_LINUX_SECUREBITS_H */
diff --git a/security/commoncap.c b/security/commoncap.c
index cefad323a0b1..52ea01acb453 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1302,21 +1302,38 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		     & (old->securebits ^ arg2))			/*[1]*/
 		    || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
-		    || (cap_capable(current_cred(),
-				    current_cred()->user_ns,
-				    CAP_SETPCAP,
-				    CAP_OPT_NONE) != 0)			/*[4]*/
 			/*
 			 * [1] no changing of bits that are locked
 			 * [2] no unlocking of locks
 			 * [3] no setting of unsupported bits
-			 * [4] doing anything requires privilege (go read about
-			 *     the "sendmail capabilities bug")
 			 */
 		    )
 			/* cannot change a locked bit */
 			return -EPERM;
 
+		/*
+		 * Doing anything requires privilege (go read about the
+		 * "sendmail capabilities bug"), except for unprivileged bits.
+		 * Indeed, the SECURE_ALL_UNPRIVILEGED bits are not
+		 * restrictions enforced by the kernel but by user space on
+		 * itself.
+		 */
+		if (cap_capable(current_cred(), current_cred()->user_ns,
+				CAP_SETPCAP, CAP_OPT_NONE) != 0) {
+			const unsigned long unpriv_and_locks =
+				SECURE_ALL_UNPRIVILEGED |
+				SECURE_ALL_UNPRIVILEGED << 1;
+			const unsigned long changed = old->securebits ^ arg2;
+
+			/* For legacy reason, denies non-change. */
+			if (!changed)
+				return -EPERM;
+
+			/* Denies privileged changes. */
+			if (changed & ~unpriv_and_locks)
+				return -EPERM;
+		}
+
 		new = prepare_creds();
 		if (!new)
 			return -ENOMEM;
-- 
cgit v1.2.3


From 95b3cdafd7cb74414070893445a9b731793f7b55 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Thu, 12 Dec 2024 18:42:23 +0100
Subject: ima: instantiate the bprm_creds_for_exec() hook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Like direct file execution (e.g. ./script.sh), indirect file execution
(e.g. sh script.sh) needs to be measured and appraised.  Instantiate
the new security_bprm_creds_for_exec() hook to measure and verify the
indirect file's integrity.  Unlike direct file execution, indirect file
execution is optionally enforced by the interpreter.

Differentiate kernel and userspace enforced integrity audit messages.

Co-developed-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Tested-by: Stefan Berger <stefanb@linux.ibm.com>
Reviewed-by: Mickaël Salaün <mic@digikod.net>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Link: https://lore.kernel.org/r/20241212174223.389435-9-mic@digikod.net
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/uapi/linux/audit.h            |  1 +
 security/integrity/ima/ima_appraise.c | 27 +++++++++++++++++++++++++--
 security/integrity/ima/ima_main.c     | 29 +++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 75e21a135483..d9a069b4a775 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -161,6 +161,7 @@
 #define AUDIT_INTEGRITY_RULE	    1805 /* policy rule */
 #define AUDIT_INTEGRITY_EVM_XATTR   1806 /* New EVM-covered xattr */
 #define AUDIT_INTEGRITY_POLICY_RULE 1807 /* IMA policy rules */
+#define AUDIT_INTEGRITY_USERSPACE   1808 /* Userspace enforced data integrity */
 
 #define AUDIT_KERNEL		2000	/* Asynchronous audit record. NOT A REQUEST. */
 
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 884a3533f7af..f435eff4667f 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/file.h>
+#include <linux/binfmts.h>
 #include <linux/fs.h>
 #include <linux/xattr.h>
 #include <linux/magic.h>
@@ -469,6 +470,17 @@ int ima_check_blacklist(struct ima_iint_cache *iint,
 	return rc;
 }
 
+static bool is_bprm_creds_for_exec(enum ima_hooks func, struct file *file)
+{
+	struct linux_binprm *bprm;
+
+	if (func == BPRM_CHECK) {
+		bprm = container_of(&file, struct linux_binprm, file);
+		return bprm->is_check;
+	}
+	return false;
+}
+
 /*
  * ima_appraise_measurement - appraise file measurement
  *
@@ -483,6 +495,7 @@ int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint,
 			     int xattr_len, const struct modsig *modsig)
 {
 	static const char op[] = "appraise_data";
+	int audit_msgno = AUDIT_INTEGRITY_DATA;
 	const char *cause = "unknown";
 	struct dentry *dentry = file_dentry(file);
 	struct inode *inode = d_backing_inode(dentry);
@@ -494,6 +507,16 @@ int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint,
 	if (!(inode->i_opflags & IOP_XATTR) && !try_modsig)
 		return INTEGRITY_UNKNOWN;
 
+	/*
+	 * Unlike any of the other LSM hooks where the kernel enforces file
+	 * integrity, enforcing file integrity for the bprm_creds_for_exec()
+	 * LSM hook with the AT_EXECVE_CHECK flag is left up to the discretion
+	 * of the script interpreter(userspace). Differentiate kernel and
+	 * userspace enforced integrity audit messages.
+	 */
+	if (is_bprm_creds_for_exec(func, file))
+		audit_msgno = AUDIT_INTEGRITY_USERSPACE;
+
 	/* If reading the xattr failed and there's no modsig, error out. */
 	if (rc <= 0 && !try_modsig) {
 		if (rc && rc != -ENODATA)
@@ -569,7 +592,7 @@ out:
 	     (iint->flags & IMA_FAIL_UNVERIFIABLE_SIGS))) {
 		status = INTEGRITY_FAIL;
 		cause = "unverifiable-signature";
-		integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename,
+		integrity_audit_msg(audit_msgno, inode, filename,
 				    op, cause, rc, 0);
 	} else if (status != INTEGRITY_PASS) {
 		/* Fix mode, but don't replace file signatures. */
@@ -589,7 +612,7 @@ out:
 			status = INTEGRITY_PASS;
 		}
 
-		integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename,
+		integrity_audit_msg(audit_msgno, inode, filename,
 				    op, cause, rc, 0);
 	} else {
 		ima_cache_flags(iint, func);
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 9b87556b03a7..9f9897a7c217 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -554,6 +554,34 @@ static int ima_bprm_check(struct linux_binprm *bprm)
 				   MAY_EXEC, CREDS_CHECK);
 }
 
+/**
+ * ima_bprm_creds_for_exec - collect/store/appraise measurement.
+ * @bprm: contains the linux_binprm structure
+ *
+ * Based on the IMA policy and the execveat(2) AT_EXECVE_CHECK flag, measure
+ * and appraise the integrity of a file to be executed by script interpreters.
+ * Unlike any of the other LSM hooks where the kernel enforces file integrity,
+ * enforcing file integrity is left up to the discretion of the script
+ * interpreter (userspace).
+ *
+ * On success return 0.  On integrity appraisal error, assuming the file
+ * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
+ */
+static int ima_bprm_creds_for_exec(struct linux_binprm *bprm)
+{
+	/*
+	 * As security_bprm_check() is called multiple times, both
+	 * the script and the shebang interpreter are measured, appraised,
+	 * and audited. Limit usage of this LSM hook to just measuring,
+	 * appraising, and auditing the indirect script execution
+	 * (e.g. ./sh example.sh).
+	 */
+	if (!bprm->is_check)
+		return 0;
+
+	return ima_bprm_check(bprm);
+}
+
 /**
  * ima_file_check - based on policy, collect/store measurement.
  * @file: pointer to the file to be measured
@@ -1174,6 +1202,7 @@ static int __init init_ima(void)
 
 static struct security_hook_list ima_hooks[] __ro_after_init = {
 	LSM_HOOK_INIT(bprm_check_security, ima_bprm_check),
+	LSM_HOOK_INIT(bprm_creds_for_exec, ima_bprm_creds_for_exec),
 	LSM_HOOK_INIT(file_post_open, ima_file_check),
 	LSM_HOOK_INIT(inode_post_create_tmpfile, ima_post_create_tmpfile),
 	LSM_HOOK_INIT(file_release, ima_file_free),
-- 
cgit v1.2.3


From d1d761b3012e99d55d288d435384be606302cb2c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 16 Dec 2024 19:11:53 +0200
Subject: net: fib_rules: Add flow label selector attributes

Add new FIB rule attributes which will allow user space to match on the
IPv6 flow label with a mask. Temporarily set the type of the attributes
to 'NLA_REJECT' while support is being added in the IPv6 code.

Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/fib_rules.h | 2 ++
 net/core/fib_rules.c           | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index a6924dd3aff1..00e9890ca3c0 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -68,6 +68,8 @@ enum {
 	FRA_SPORT_RANGE, /* sport */
 	FRA_DPORT_RANGE, /* dport */
 	FRA_DSCP,	/* dscp */
+	FRA_FLOWLABEL,	/* flowlabel */
+	FRA_FLOWLABEL_MASK,	/* flowlabel mask */
 	__FRA_MAX
 };
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 34185d138c95..153b14aade42 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -770,6 +770,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
 	[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
 	[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
 	[FRA_DSCP]	= NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
+	[FRA_FLOWLABEL] = { .type = NLA_REJECT },
+	[FRA_FLOWLABEL_MASK] = { .type = NLA_REJECT },
 };
 
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
-- 
cgit v1.2.3


From ba4138032ae3b5b8e2b68d2f2647cdc0817b05a6 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 16 Dec 2024 19:11:58 +0200
Subject: ipv6: Add flow label to route get requests

The default IPv6 multipath hash policy takes the flow label into account
when calculating a multipath hash and previous patches added a flow
label selector to IPv6 FIB rules.

Allow user space to specify a flow label in route get requests by adding
a new netlink attribute and using its value to populate the "flowlabel"
field in the IPv6 flow info structure prior to a route lookup.

Deny the attribute in RTM_{NEW,DEL}ROUTE requests by checking for it in
rtm_to_fib6_config() and returning an error if present.

A subsequent patch will use this capability to test the new flow label
selector in IPv6 FIB rules.

Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/rtnetlink.h |  1 +
 net/ipv6/route.c               | 20 +++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index eccc0e7dcb7d..5ee94c511a28 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -393,6 +393,7 @@ enum rtattr_type_t {
 	RTA_SPORT,
 	RTA_DPORT,
 	RTA_NH_ID,
+	RTA_FLOWLABEL,
 	__RTA_MAX
 };
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 67ff16c04718..78362822b907 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5005,6 +5005,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_SPORT]		= { .type = NLA_U16 },
 	[RTA_DPORT]		= { .type = NLA_U16 },
 	[RTA_NH_ID]		= { .type = NLA_U32 },
+	[RTA_FLOWLABEL]		= { .type = NLA_BE32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -5030,6 +5031,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto errout;
 	}
 
+	if (tb[RTA_FLOWLABEL]) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+				    "Flow label cannot be specified for this operation");
+		goto errout;
+	}
+
 	*cfg = (struct fib6_config){
 		.fc_table = rtm->rtm_table,
 		.fc_dst_len = rtm->rtm_dst_len,
@@ -6013,6 +6020,13 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
 		return -EINVAL;
 	}
 
+	if (tb[RTA_FLOWLABEL] &&
+	    (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+				    "Invalid flow label");
+		return -EINVAL;
+	}
+
 	for (i = 0; i <= RTA_MAX; i++) {
 		if (!tb[i])
 			continue;
@@ -6027,6 +6041,7 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
 		case RTA_SPORT:
 		case RTA_DPORT:
 		case RTA_IP_PROTO:
+		case RTA_FLOWLABEL:
 			break;
 		default:
 			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
@@ -6049,6 +6064,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	struct sk_buff *skb;
 	struct rtmsg *rtm;
 	struct flowi6 fl6 = {};
+	__be32 flowlabel;
 	bool fibmatch;
 
 	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
@@ -6057,7 +6073,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	err = -EINVAL;
 	rtm = nlmsg_data(nlh);
-	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
 
 	if (tb[RTA_SRC]) {
@@ -6103,6 +6118,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 			goto errout;
 	}
 
+	flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0);
+	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel);
+
 	if (iif) {
 		struct net_device *dev;
 		int flags = 0;
-- 
cgit v1.2.3


From 724c6ce38bbaeb4b3f109b0e066d6c0ecd15446c Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 19 Dec 2024 14:57:34 +0100
Subject: stddef: make __struct_group() UAPI C++-friendly

For the most part of the C++ history, it couldn't have type
declarations inside anonymous unions for different reasons. At the
same time, __struct_group() relies on the latters, so when the @TAG
argument is not empty, C++ code doesn't want to build (even under
`extern "C"`):

../linux/include/uapi/linux/pkt_cls.h:25:24: error:
'struct tc_u32_sel::<unnamed union>::tc_u32_sel_hdr,' invalid;
an anonymous union may only have public non-static data members
[-fpermissive]

The safest way to fix this without trying to switch standards (which
is impossible in UAPI anyway) etc., is to disable tag declaration
for that language. This won't break anything since for now it's not
buildable at all.
Use a separate definition for __struct_group() when __cplusplus is
defined to mitigate the error, including the version from tools/.

Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro")
Reported-by: Christopher Ferris <cferris@google.com>
Closes: https://lore.kernel.org/linux-hardening/Z1HZpe3WE5As8UAz@google.com
Suggested-by: Kees Cook <kees@kernel.org> # __struct_group_tag()
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20241219135734.2130002-1-aleksander.lobakin@intel.com
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/uapi/linux/stddef.h       | 13 ++++++++++---
 tools/include/uapi/linux/stddef.h | 15 +++++++++++----
 2 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index 58154117d9b0..a6fce46aeb37 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -8,6 +8,13 @@
 #define __always_inline inline
 #endif
 
+/* Not all C++ standards support type declarations inside an anonymous union */
+#ifndef __cplusplus
+#define __struct_group_tag(TAG)		TAG
+#else
+#define __struct_group_tag(TAG)
+#endif
+
 /**
  * __struct_group() - Create a mirrored named and anonyomous struct
  *
@@ -20,13 +27,13 @@
  * and size: one anonymous and one named. The former's members can be used
  * normally without sub-struct naming, and the latter can be used to
  * reason about the start, end, and size of the group of struct members.
- * The named struct can also be explicitly tagged for layer reuse, as well
- * as both having struct attributes appended.
+ * The named struct can also be explicitly tagged for layer reuse (C only),
+ * as well as both having struct attributes appended.
  */
 #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
 	union { \
 		struct { MEMBERS } ATTRS; \
-		struct TAG { MEMBERS } ATTRS NAME; \
+		struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
 	} ATTRS
 
 #ifdef __cplusplus
diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h
index bb6ea517efb5..c53cde425406 100644
--- a/tools/include/uapi/linux/stddef.h
+++ b/tools/include/uapi/linux/stddef.h
@@ -8,6 +8,13 @@
 #define __always_inline __inline__
 #endif
 
+/* Not all C++ standards support type declarations inside an anonymous union */
+#ifndef __cplusplus
+#define __struct_group_tag(TAG)		TAG
+#else
+#define __struct_group_tag(TAG)
+#endif
+
 /**
  * __struct_group() - Create a mirrored named and anonyomous struct
  *
@@ -20,14 +27,14 @@
  * and size: one anonymous and one named. The former's members can be used
  * normally without sub-struct naming, and the latter can be used to
  * reason about the start, end, and size of the group of struct members.
- * The named struct can also be explicitly tagged for layer reuse, as well
- * as both having struct attributes appended.
+ * The named struct can also be explicitly tagged for layer reuse (C only),
+ * as well as both having struct attributes appended.
  */
 #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
 	union { \
 		struct { MEMBERS } ATTRS; \
-		struct TAG { MEMBERS } ATTRS NAME; \
-	}
+		struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
+	} ATTRS
 
 /**
  * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
-- 
cgit v1.2.3


From 135ec43eb29c68ed26e2d10f221d43f7d9139a8f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 20 Nov 2024 17:13:52 -0800
Subject: fiemap: use kernel-doc includes in fiemap docbook

Add some kernel-doc notation to structs in fiemap header files
then pull that into Documentation/filesystems/fiemap.rst
instead of duplicating the header file structs in fiemap.rst.
This helps to future-proof fiemap.rst against struct changes.

Add missing flags documentation from header files into fiemap.rst
for FIEMAP_FLAG_CACHE and FIEMAP_EXTENT_SHARED.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20241121011352.201907-1-rdunlap@infradead.org
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/fiemap.rst | 49 +++++++++++-------------------------
 include/linux/fiemap.h               | 16 ++++++++----
 include/uapi/linux/fiemap.h          | 47 +++++++++++++++++++++++-----------
 3 files changed, 59 insertions(+), 53 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/filesystems/fiemap.rst b/Documentation/filesystems/fiemap.rst
index 93fc96f760aa..23b3ed229e49 100644
--- a/Documentation/filesystems/fiemap.rst
+++ b/Documentation/filesystems/fiemap.rst
@@ -12,21 +12,10 @@ returns a list of extents.
 Request Basics
 --------------
 
-A fiemap request is encoded within struct fiemap::
-
-  struct fiemap {
-	__u64	fm_start;	 /* logical offset (inclusive) at
-				  * which to start mapping (in) */
-	__u64	fm_length;	 /* logical length of mapping which
-				  * userspace cares about (in) */
-	__u32	fm_flags;	 /* FIEMAP_FLAG_* flags for request (in/out) */
-	__u32	fm_mapped_extents; /* number of extents that were
-				    * mapped (out) */
-	__u32	fm_extent_count; /* size of fm_extents array (in) */
-	__u32	fm_reserved;
-	struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
-  };
+A fiemap request is encoded within struct fiemap:
 
+.. kernel-doc:: include/uapi/linux/fiemap.h
+   :identifiers: fiemap
 
 fm_start, and fm_length specify the logical range within the file
 which the process would like mappings for. Extents returned mirror
@@ -60,6 +49,8 @@ FIEMAP_FLAG_XATTR
   If this flag is set, the extents returned will describe the inodes
   extended attribute lookup tree, instead of its data tree.
 
+FIEMAP_FLAG_CACHE
+  This flag requests caching of the extents.
 
 Extent Mapping
 --------------
@@ -77,18 +68,10 @@ complete the requested range and will not have the FIEMAP_EXTENT_LAST
 flag set (see the next section on extent flags).
 
 Each extent is described by a single fiemap_extent structure as
-returned in fm_extents::
-
-    struct fiemap_extent {
-	    __u64	fe_logical;  /* logical offset in bytes for the start of
-				* the extent */
-	    __u64	fe_physical; /* physical offset in bytes for the start
-				* of the extent */
-	    __u64	fe_length;   /* length in bytes for the extent */
-	    __u64	fe_reserved64[2];
-	    __u32	fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
-	    __u32	fe_reserved[3];
-    };
+returned in fm_extents:
+
+.. kernel-doc:: include/uapi/linux/fiemap.h
+    :identifiers: fiemap_extent
 
 All offsets and lengths are in bytes and mirror those on disk.  It is valid
 for an extents logical offset to start before the request or its logical
@@ -175,6 +158,8 @@ FIEMAP_EXTENT_MERGED
   userspace would be highly inefficient, the kernel will try to merge most
   adjacent blocks into 'extents'.
 
+FIEMAP_EXTENT_SHARED
+  This flag is set to request that space be shared with other files.
 
 VFS -> File System Implementation
 ---------------------------------
@@ -191,14 +176,10 @@ each discovered extent::
                      u64 len);
 
 ->fiemap is passed struct fiemap_extent_info which describes the
-fiemap request::
-
-  struct fiemap_extent_info {
-	unsigned int fi_flags;		/* Flags as passed from user */
-	unsigned int fi_extents_mapped;	/* Number of mapped extents */
-	unsigned int fi_extents_max;	/* Size of fiemap_extent array */
-	struct fiemap_extent *fi_extents_start;	/* Start of fiemap_extent array */
-  };
+fiemap request:
+
+.. kernel-doc:: include/linux/fiemap.h
+    :identifiers: fiemap_extent_info
 
 It is intended that the file system should not need to access any of this
 structure directly. Filesystem handlers should be tolerant to signals and return
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index c50882f19235..966092ffa89a 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -5,12 +5,18 @@
 #include <uapi/linux/fiemap.h>
 #include <linux/fs.h>
 
+/**
+ * struct fiemap_extent_info - fiemap request to a filesystem
+ * @fi_flags:		Flags as passed from user
+ * @fi_extents_mapped:	Number of mapped extents
+ * @fi_extents_max:	Size of fiemap_extent array
+ * @fi_extents_start:	Start of fiemap_extent array
+ */
 struct fiemap_extent_info {
-	unsigned int fi_flags;		/* Flags as passed from user */
-	unsigned int fi_extents_mapped;	/* Number of mapped extents */
-	unsigned int fi_extents_max;	/* Size of fiemap_extent array */
-	struct fiemap_extent __user *fi_extents_start; /* Start of
-							fiemap_extent array */
+	unsigned int fi_flags;
+	unsigned int fi_extents_mapped;
+	unsigned int fi_extents_max;
+	struct fiemap_extent __user *fi_extents_start;
 };
 
 int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h
index 24ca0c00cae3..9d9e8ae32b41 100644
--- a/include/uapi/linux/fiemap.h
+++ b/include/uapi/linux/fiemap.h
@@ -14,37 +14,56 @@
 
 #include <linux/types.h>
 
+/**
+ * struct fiemap_extent - description of one fiemap extent
+ * @fe_logical: byte offset of the extent in the file
+ * @fe_physical: byte offset of extent on disk
+ * @fe_length: length in bytes for this extent
+ * @fe_flags: FIEMAP_EXTENT_* flags for this extent
+ */
 struct fiemap_extent {
-	__u64 fe_logical;  /* logical offset in bytes for the start of
-			    * the extent from the beginning of the file */
-	__u64 fe_physical; /* physical offset in bytes for the start
-			    * of the extent from the beginning of the disk */
-	__u64 fe_length;   /* length in bytes for this extent */
+	__u64 fe_logical;
+	__u64 fe_physical;
+	__u64 fe_length;
+	/* private: */
 	__u64 fe_reserved64[2];
-	__u32 fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
+	/* public: */
+	__u32 fe_flags;
+	/* private: */
 	__u32 fe_reserved[3];
 };
 
+/**
+ * struct fiemap - file extent mappings
+ * @fm_start: byte offset (inclusive) at which to start mapping (in)
+ * @fm_length: logical length of mapping which userspace wants (in)
+ * @fm_flags: FIEMAP_FLAG_* flags for request (in/out)
+ * @fm_mapped_extents: number of extents that were mapped (out)
+ * @fm_extent_count: size of fm_extents array (in)
+ * @fm_extents: array of mapped extents (out)
+ */
 struct fiemap {
-	__u64 fm_start;		/* logical offset (inclusive) at
-				 * which to start mapping (in) */
-	__u64 fm_length;	/* logical length of mapping which
-				 * userspace wants (in) */
-	__u32 fm_flags;		/* FIEMAP_FLAG_* flags for request (in/out) */
-	__u32 fm_mapped_extents;/* number of extents that were mapped (out) */
-	__u32 fm_extent_count;  /* size of fm_extents array (in) */
+	__u64 fm_start;
+	__u64 fm_length;
+	__u32 fm_flags;
+	__u32 fm_mapped_extents;
+	__u32 fm_extent_count;
+	/* private: */
 	__u32 fm_reserved;
-	struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */
+	/* public: */
+	struct fiemap_extent fm_extents[];
 };
 
 #define FIEMAP_MAX_OFFSET	(~0ULL)
 
+/* flags used in fm_flags: */
 #define FIEMAP_FLAG_SYNC	0x00000001 /* sync file data before map */
 #define FIEMAP_FLAG_XATTR	0x00000002 /* map extended attribute tree */
 #define FIEMAP_FLAG_CACHE	0x00000004 /* request caching of the extents */
 
 #define FIEMAP_FLAGS_COMPAT	(FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
 
+/* flags used in fe_flags: */
 #define FIEMAP_EXTENT_LAST		0x00000001 /* Last extent in file. */
 #define FIEMAP_EXTENT_UNKNOWN		0x00000002 /* Data location unknown. */
 #define FIEMAP_EXTENT_DELALLOC		0x00000004 /* Location still pending.
-- 
cgit v1.2.3


From 10783d0ba0d7731ec81d88c54f83cf0ff89d1c2a Mon Sep 17 00:00:00 2001
From: Anuj Gupta <anuj20.g@samsung.com>
Date: Thu, 28 Nov 2024 16:52:34 +0530
Subject: fs, iov_iter: define meta io descriptor

Add flags to describe checks for integrity meta buffer. Also, introduce
a  new 'uio_meta' structure that upper layer can use to pass the
meta/integrity information.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20241128112240.8867-5-anuj20.g@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/uio.h     | 9 +++++++++
 include/uapi/linux/fs.h | 9 +++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/uapi')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 853f9de5aa05..8ada84e85447 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -82,6 +82,15 @@ struct iov_iter {
 	};
 };
 
+typedef __u16 uio_meta_flags_t;
+
+struct uio_meta {
+	uio_meta_flags_t	flags;
+	u16			app_tag;
+	u64			seed;
+	struct iov_iter		iter;
+};
+
 static inline const struct iovec *iter_iov(const struct iov_iter *iter)
 {
 	if (iter->iter_type == ITER_UBUF)
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 753971770733..9070ef19f0a3 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -40,6 +40,15 @@
 #define BLOCK_SIZE_BITS 10
 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
 
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD		(1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG		(1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG		(1U << 2) /* enforce app check */
+
+#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
+				  IO_INTEGRITY_CHK_REFTAG | \
+				  IO_INTEGRITY_CHK_APPTAG)
+
 #define SEEK_SET	0	/* seek relative to beginning of file */
 #define SEEK_CUR	1	/* seek relative to current file position */
 #define SEEK_END	2	/* seek relative to end of file */
-- 
cgit v1.2.3


From 59a7d12a7fb5ab24efa893e6980a00ffc090c777 Mon Sep 17 00:00:00 2001
From: Anuj Gupta <anuj20.g@samsung.com>
Date: Thu, 28 Nov 2024 16:52:36 +0530
Subject: io_uring: introduce attributes for read/write and PI support

Add the ability to pass additional attributes along with read/write.
Application can prepare attibute specific information and pass its
address using the SQE field:
	__u64	attr_ptr;

Along with setting a mask indicating attributes being passed:
	__u64	attr_type_mask;

Overall 64 attributes are allowed and currently one attribute
'IORING_RW_ATTR_FLAG_PI' is supported.

With PI attribute, userspace can pass following information:
- flags: integrity check flags IO_INTEGRITY_CHK_{GUARD/APPTAG/REFTAG}
- len: length of PI/metadata buffer
- addr: address of metadata buffer
- seed: seed value for reftag remapping
- app_tag: application defined 16b value

Process this information to prepare uio_meta_descriptor and pass it down
using kiocb->private.

PI attribute is supported only for direct IO.

Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20241128112240.8867-7-anuj20.g@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 16 +++++++++
 io_uring/io_uring.c           |  2 ++
 io_uring/rw.c                 | 83 +++++++++++++++++++++++++++++++++++++++++--
 io_uring/rw.h                 | 14 +++++++-
 4 files changed, 112 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index aac9a4f8fa9a..38f0d6b10eaf 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -98,6 +98,10 @@ struct io_uring_sqe {
 			__u64	addr3;
 			__u64	__pad2[1];
 		};
+		struct {
+			__u64	attr_ptr; /* pointer to attribute information */
+			__u64	attr_type_mask; /* bit mask of attributes */
+		};
 		__u64	optval;
 		/*
 		 * If the ring is initialized with IORING_SETUP_SQE128, then
@@ -107,6 +111,18 @@ struct io_uring_sqe {
 	};
 };
 
+/* sqe->attr_type_mask flags */
+#define IORING_RW_ATTR_FLAG_PI	(1U << 0)
+/* PI attribute information */
+struct io_uring_attr_pi {
+		__u16	flags;
+		__u16	app_tag;
+		__u32	len;
+		__u64	addr;
+		__u64	seed;
+		__u64	rsvd;
+};
+
 /*
  * If sqe->file_index is set to this for opcodes that instantiate a new
  * direct descriptor (like openat/openat2/accept), then io_uring will allocate
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 8dd3fa9be993..2e6891aadecc 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3876,6 +3876,8 @@ static int __init io_uring_init(void)
 	BUILD_BUG_SQE_ELEM(46, __u16,  __pad3[0]);
 	BUILD_BUG_SQE_ELEM(48, __u64,  addr3);
 	BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd);
+	BUILD_BUG_SQE_ELEM(48, __u64, attr_ptr);
+	BUILD_BUG_SQE_ELEM(56, __u64, attr_type_mask);
 	BUILD_BUG_SQE_ELEM(56, __u64,  __pad2);
 
 	BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 0bcb83e4ce3c..04e4467ab0ee 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -257,11 +257,53 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
 	return 0;
 }
 
+static inline void io_meta_save_state(struct io_async_rw *io)
+{
+	io->meta_state.seed = io->meta.seed;
+	iov_iter_save_state(&io->meta.iter, &io->meta_state.iter_meta);
+}
+
+static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb)
+{
+	if (kiocb->ki_flags & IOCB_HAS_METADATA) {
+		io->meta.seed = io->meta_state.seed;
+		iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta);
+	}
+}
+
+static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
+			 u64 attr_ptr, u64 attr_type_mask)
+{
+	struct io_uring_attr_pi pi_attr;
+	struct io_async_rw *io;
+	int ret;
+
+	if (copy_from_user(&pi_attr, u64_to_user_ptr(attr_ptr),
+	    sizeof(pi_attr)))
+		return -EFAULT;
+
+	if (pi_attr.rsvd)
+		return -EINVAL;
+
+	io = req->async_data;
+	io->meta.flags = pi_attr.flags;
+	io->meta.app_tag = pi_attr.app_tag;
+	io->meta.seed = pi_attr.seed;
+	ret = import_ubuf(ddir, u64_to_user_ptr(pi_attr.addr),
+			  pi_attr.len, &io->meta.iter);
+	if (unlikely(ret < 0))
+		return ret;
+	rw->kiocb.ki_flags |= IOCB_HAS_METADATA;
+	io_meta_save_state(io);
+	return ret;
+}
+
 static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		      int ddir, bool do_import)
 {
 	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 	unsigned ioprio;
+	u64 attr_type_mask;
 	int ret;
 
 	rw->kiocb.ki_pos = READ_ONCE(sqe->off);
@@ -279,11 +321,28 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		rw->kiocb.ki_ioprio = get_current_ioprio();
 	}
 	rw->kiocb.dio_complete = NULL;
+	rw->kiocb.ki_flags = 0;
 
 	rw->addr = READ_ONCE(sqe->addr);
 	rw->len = READ_ONCE(sqe->len);
 	rw->flags = READ_ONCE(sqe->rw_flags);
-	return io_prep_rw_setup(req, ddir, do_import);
+	ret = io_prep_rw_setup(req, ddir, do_import);
+
+	if (unlikely(ret))
+		return ret;
+
+	attr_type_mask = READ_ONCE(sqe->attr_type_mask);
+	if (attr_type_mask) {
+		u64 attr_ptr;
+
+		/* only PI attribute is supported currently */
+		if (attr_type_mask != IORING_RW_ATTR_FLAG_PI)
+			return -EINVAL;
+
+		attr_ptr = READ_ONCE(sqe->attr_ptr);
+		ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
+	}
+	return ret;
 }
 
 int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -409,7 +468,9 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
 static void io_resubmit_prep(struct io_kiocb *req)
 {
 	struct io_async_rw *io = req->async_data;
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 
+	io_meta_restore(io, &rw->kiocb);
 	iov_iter_restore(&io->iter, &io->iter_state);
 }
 
@@ -744,6 +805,10 @@ static bool io_rw_should_retry(struct io_kiocb *req)
 	if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
 		return false;
 
+	/* never retry for meta io */
+	if (kiocb->ki_flags & IOCB_HAS_METADATA)
+		return false;
+
 	/*
 	 * just use poll if we can, and don't attempt if the fs doesn't
 	 * support callback based unlocks
@@ -794,7 +859,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 	if (!(req->flags & REQ_F_FIXED_FILE))
 		req->flags |= io_file_get_flags(file);
 
-	kiocb->ki_flags = file->f_iocb_flags;
+	kiocb->ki_flags |= file->f_iocb_flags;
 	ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
 	if (unlikely(ret))
 		return ret;
@@ -828,6 +893,18 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 		kiocb->ki_complete = io_complete_rw;
 	}
 
+	if (kiocb->ki_flags & IOCB_HAS_METADATA) {
+		struct io_async_rw *io = req->async_data;
+
+		/*
+		 * We have a union of meta fields with wpq used for buffered-io
+		 * in io_async_rw, so fail it here.
+		 */
+		if (!(req->file->f_flags & O_DIRECT))
+			return -EOPNOTSUPP;
+		kiocb->private = &io->meta;
+	}
+
 	return 0;
 }
 
@@ -902,6 +979,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
 	 * manually if we need to.
 	 */
 	iov_iter_restore(&io->iter, &io->iter_state);
+	io_meta_restore(io, kiocb);
 
 	do {
 		/*
@@ -1125,6 +1203,7 @@ done:
 	} else {
 ret_eagain:
 		iov_iter_restore(&io->iter, &io->iter_state);
+		io_meta_restore(io, kiocb);
 		if (kiocb->ki_flags & IOCB_WRITE)
 			io_req_end_write(req);
 		return -EAGAIN;
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 3f432dc75441..2d7656bd268d 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -2,6 +2,11 @@
 
 #include <linux/pagemap.h>
 
+struct io_meta_state {
+	u32			seed;
+	struct iov_iter_state	iter_meta;
+};
+
 struct io_async_rw {
 	size_t				bytes_done;
 	struct iov_iter			iter;
@@ -9,7 +14,14 @@ struct io_async_rw {
 	struct iovec			fast_iov;
 	struct iovec			*free_iovec;
 	int				free_iov_nr;
-	struct wait_page_queue		wpq;
+	/* wpq is for buffered io, while meta fields are used with direct io */
+	union {
+		struct wait_page_queue		wpq;
+		struct {
+			struct uio_meta			meta;
+			struct io_meta_state		meta_state;
+		};
+	};
 };
 
 int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
-- 
cgit v1.2.3


From 6b830c6a023ff6e8fe05dbe47a9e5cd276df09ee Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Sat, 21 Dec 2024 12:09:14 +0100
Subject: netlink: specs: mptcp: add missing 'server-side' attr

This attribute is added with the 'created' and 'established' events, but
the documentation didn't mention it.

The documentation in the UAPI header has been auto-generated by:

  ./tools/net/ynl/ynl-regen.sh

Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20241221-net-mptcp-netlink-specs-pm-doc-fixes-v2-1-e54f2db3f844@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/mptcp_pm.yaml |  6 ++++--
 include/uapi/linux/mptcp_pm.h             | 11 ++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
index dc190bf838fe..fc0603f51665 100644
--- a/Documentation/netlink/specs/mptcp_pm.yaml
+++ b/Documentation/netlink/specs/mptcp_pm.yaml
@@ -23,7 +23,8 @@ definitions:
      -
       name: created
       doc:
-        token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport
+        token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport,
+        server-side
         A new MPTCP connection has been created. It is the good time to
         allocate memory and send ADD_ADDR if needed. Depending on the
         traffic-patterns it can take a long time until the
@@ -31,7 +32,8 @@ definitions:
      -
       name: established
       doc:
-        token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport
+        token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport,
+        server-side
         A MPTCP connection is established (can start new subflows).
      -
       name: closed
diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h
index 50589e5dd6a3..b34fd95b6f84 100644
--- a/include/uapi/linux/mptcp_pm.h
+++ b/include/uapi/linux/mptcp_pm.h
@@ -13,12 +13,13 @@
  * enum mptcp_event_type
  * @MPTCP_EVENT_UNSPEC: unused event
  * @MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6,
- *   sport, dport A new MPTCP connection has been created. It is the good time
- *   to allocate memory and send ADD_ADDR if needed. Depending on the
- *   traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED
- *   is sent.
+ *   sport, dport, server-side A new MPTCP connection has been created. It is
+ *   the good time to allocate memory and send ADD_ADDR if needed. Depending on
+ *   the traffic-patterns it can take a long time until the
+ *   MPTCP_EVENT_ESTABLISHED is sent.
  * @MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6,
- *   sport, dport A MPTCP connection is established (can start new subflows).
+ *   sport, dport, server-side A MPTCP connection is established (can start new
+ *   subflows).
  * @MPTCP_EVENT_CLOSED: token A MPTCP connection has stopped.
  * @MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport] A
  *   new address has been announced by the peer.
-- 
cgit v1.2.3


From bea87657b5ee8e6f18af2833ee4b88212ef52d28 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Sat, 21 Dec 2024 12:09:15 +0100
Subject: netlink: specs: mptcp: clearly mention attributes

The rendered version of the MPTCP events [1] looked strange, because the
whole content of the 'doc' was displayed in the same block.

It was then not clear that the first words, not even ended by a period,
were the attributes that are defined when such events are emitted. These
attributes have now been moved to the end, prefixed by 'Attributes:' and
ended with a period. Note that '>-' has been added after 'doc:' to allow
':' in the text below.

The documentation in the UAPI header has been auto-generated by:

  ./tools/net/ynl/ynl-regen.sh

Link: https://docs.kernel.org/networking/netlink_spec/mptcp_pm.html#event-type [1]
Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20241221-net-mptcp-netlink-specs-pm-doc-fixes-v2-2-e54f2db3f844@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/mptcp_pm.yaml | 50 ++++++++++++++---------------
 include/uapi/linux/mptcp_pm.h             | 53 ++++++++++++++++---------------
 2 files changed, 52 insertions(+), 51 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
index fc0603f51665..59087a230565 100644
--- a/Documentation/netlink/specs/mptcp_pm.yaml
+++ b/Documentation/netlink/specs/mptcp_pm.yaml
@@ -22,67 +22,67 @@ definitions:
       doc: unused event
      -
       name: created
-      doc:
-        token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport,
-        server-side
+      doc: >-
         A new MPTCP connection has been created. It is the good time to
         allocate memory and send ADD_ADDR if needed. Depending on the
         traffic-patterns it can take a long time until the
         MPTCP_EVENT_ESTABLISHED is sent.
+        Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
+        dport, server-side.
      -
       name: established
-      doc:
-        token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport,
-        server-side
+      doc: >-
         A MPTCP connection is established (can start new subflows).
+        Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
+        dport, server-side.
      -
       name: closed
-      doc:
-        token
+      doc: >-
         A MPTCP connection has stopped.
+        Attribute: token.
      -
       name: announced
       value: 6
-      doc:
-        token, rem_id, family, daddr4 | daddr6 [, dport]
+      doc: >-
         A new address has been announced by the peer.
+        Attributes: token, rem_id, family, daddr4 | daddr6 [, dport].
      -
       name: removed
-      doc:
-        token, rem_id
+      doc: >-
         An address has been lost by the peer.
+        Attributes: token, rem_id.
      -
       name: sub-established
       value: 10
-      doc:
-        token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport,
-        dport, backup, if_idx [, error]
+      doc: >-
         A new subflow has been established. 'error' should not be set.
+        Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
+        daddr6, sport, dport, backup, if_idx [, error].
      -
       name: sub-closed
-      doc:
-        token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport,
-        dport, backup, if_idx [, error]
+      doc: >-
         A subflow has been closed. An error (copy of sk_err) could be set if an
         error has been detected for this subflow.
+        Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
+        daddr6, sport, dport, backup, if_idx [, error].
      -
       name: sub-priority
       value: 13
-      doc:
-        token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport,
-        dport, backup, if_idx [, error]
+      doc: >-
         The priority of a subflow has changed. 'error' should not be set.
+        Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
+        daddr6, sport, dport, backup, if_idx [, error].
      -
       name: listener-created
       value: 15
-      doc:
-        family, sport, saddr4 | saddr6
+      doc: >-
         A new PM listener is created.
+        Attributes: family, sport, saddr4 | saddr6.
      -
       name: listener-closed
-      doc:
-        family, sport, saddr4 | saddr6
+      doc: >-
         A PM listener is closed.
+        Attributes: family, sport, saddr4 | saddr6.
 
 attribute-sets:
   -
diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h
index b34fd95b6f84..84fa8a21dfd0 100644
--- a/include/uapi/linux/mptcp_pm.h
+++ b/include/uapi/linux/mptcp_pm.h
@@ -12,32 +12,33 @@
 /**
  * enum mptcp_event_type
  * @MPTCP_EVENT_UNSPEC: unused event
- * @MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6,
- *   sport, dport, server-side A new MPTCP connection has been created. It is
- *   the good time to allocate memory and send ADD_ADDR if needed. Depending on
- *   the traffic-patterns it can take a long time until the
- *   MPTCP_EVENT_ESTABLISHED is sent.
- * @MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6,
- *   sport, dport, server-side A MPTCP connection is established (can start new
- *   subflows).
- * @MPTCP_EVENT_CLOSED: token A MPTCP connection has stopped.
- * @MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport] A
- *   new address has been announced by the peer.
- * @MPTCP_EVENT_REMOVED: token, rem_id An address has been lost by the peer.
- * @MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id, saddr4 |
- *   saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error] A new
- *   subflow has been established. 'error' should not be set.
- * @MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6,
- *   daddr4 | daddr6, sport, dport, backup, if_idx [, error] A subflow has been
- *   closed. An error (copy of sk_err) could be set if an error has been
- *   detected for this subflow.
- * @MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6,
- *   daddr4 | daddr6, sport, dport, backup, if_idx [, error] The priority of a
- *   subflow has changed. 'error' should not be set.
- * @MPTCP_EVENT_LISTENER_CREATED: family, sport, saddr4 | saddr6 A new PM
- *   listener is created.
- * @MPTCP_EVENT_LISTENER_CLOSED: family, sport, saddr4 | saddr6 A PM listener
- *   is closed.
+ * @MPTCP_EVENT_CREATED: A new MPTCP connection has been created. It is the
+ *   good time to allocate memory and send ADD_ADDR if needed. Depending on the
+ *   traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED
+ *   is sent. Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ *   sport, dport, server-side.
+ * @MPTCP_EVENT_ESTABLISHED: A MPTCP connection is established (can start new
+ *   subflows). Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
+ *   sport, dport, server-side.
+ * @MPTCP_EVENT_CLOSED: A MPTCP connection has stopped. Attribute: token.
+ * @MPTCP_EVENT_ANNOUNCED: A new address has been announced by the peer.
+ *   Attributes: token, rem_id, family, daddr4 | daddr6 [, dport].
+ * @MPTCP_EVENT_REMOVED: An address has been lost by the peer. Attributes:
+ *   token, rem_id.
+ * @MPTCP_EVENT_SUB_ESTABLISHED: A new subflow has been established. 'error'
+ *   should not be set. Attributes: token, family, loc_id, rem_id, saddr4 |
+ *   saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error].
+ * @MPTCP_EVENT_SUB_CLOSED: A subflow has been closed. An error (copy of
+ *   sk_err) could be set if an error has been detected for this subflow.
+ *   Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
+ *   daddr6, sport, dport, backup, if_idx [, error].
+ * @MPTCP_EVENT_SUB_PRIORITY: The priority of a subflow has changed. 'error'
+ *   should not be set. Attributes: token, family, loc_id, rem_id, saddr4 |
+ *   saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error].
+ * @MPTCP_EVENT_LISTENER_CREATED: A new PM listener is created. Attributes:
+ *   family, sport, saddr4 | saddr6.
+ * @MPTCP_EVENT_LISTENER_CLOSED: A PM listener is closed. Attributes: family,
+ *   sport, saddr4 | saddr6.
  */
 enum mptcp_event_type {
 	MPTCP_EVENT_UNSPEC,
-- 
cgit v1.2.3


From 7a637e5e27a68fd52327a80136d5d0184c43888f Mon Sep 17 00:00:00 2001
From: Danylo Piliaiev <danylo.piliaiev@gmail.com>
Date: Tue, 3 Dec 2024 10:59:20 +0100
Subject: drm/msm: Expose uche trap base via uapi

This adds MSM_PARAM_UCHE_TRAP_BASE that will be used by Mesa
implementation for VK_KHR_shader_clock and GL_ARB_shader_clock.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Patchwork: https://patchwork.freedesktop.org/patch/627036/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c   |  6 ++++--
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   | 10 ++++++----
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 12 +++++++-----
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |  3 +++
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  2 ++
 include/uapi/drm/msm_drm.h              |  1 +
 6 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 50c490b492f0..f1b18a6663f7 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -251,8 +251,8 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
 
 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
-	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
-	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
+	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base));
+	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base));
 
 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
@@ -693,6 +693,8 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
 	if (ret)
 		goto fail;
 
+	adreno_gpu->uche_trap_base = 0xffff0000ffff0000ull;
+
 	if (!gpu->aspace) {
 		/* TODO we think it is possible to configure the GPU to
 		 * restrict access to VRAM carveout.  But the required
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index ee89db72e36e..caf2c0a7a29f 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -750,10 +750,10 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 
 	/* Disable L2 bypass in the UCHE */
-	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
-	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
-	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
-	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
+	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base));
+	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base));
+	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base));
+	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base));
 
 	/* Set the GMEM VA range (0 to gpu->gmem) */
 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
@@ -1805,5 +1805,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
 	adreno_gpu->ubwc_config.macrotile_mode = 0;
 	adreno_gpu->ubwc_config.ubwc_swizzle = 0x7;
 
+	adreno_gpu->uche_trap_base = 0x0001ffffffff0000ull;
+
 	return gpu;
 }
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 019610341df1..0ae29a7c8a4d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1123,12 +1123,12 @@ static int hw_init(struct msm_gpu *gpu)
 
 	/* Disable L2 bypass in the UCHE */
 	if (adreno_is_a7xx(adreno_gpu)) {
-		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
-		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
+		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
+		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
 	} else {
-		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu);
-		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
-		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
+		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0);
+		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
+		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
 	}
 
 	if (!(adreno_is_a650_family(adreno_gpu) ||
@@ -2533,6 +2533,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
 		}
 	}
 
+	adreno_gpu->uche_trap_base = 0x1fffffffff000ull;
+
 	if (gpu->aspace)
 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
 				a6xx_fault_handler);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 75f5367e73ca..c7454b6cfb7f 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -385,6 +385,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
 	case MSM_PARAM_MACROTILE_MODE:
 		*value = adreno_gpu->ubwc_config.macrotile_mode;
 		return 0;
+	case MSM_PARAM_UCHE_TRAP_BASE:
+		*value = adreno_gpu->uche_trap_base;
+		return 0;
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index f5d6087376f5..dcf454629ce0 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -253,6 +253,8 @@ struct adreno_gpu {
 	bool gmu_is_wrapper;
 
 	bool has_ray_tracing;
+
+	u64 uche_trap_base;
 };
 #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base)
 
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index b916aab80dde..2342cb90857e 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -90,6 +90,7 @@ struct drm_msm_timespec {
 #define MSM_PARAM_RAYTRACING 0x11 /* RO */
 #define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */
 #define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */
+#define MSM_PARAM_UCHE_TRAP_BASE 0x14 /* RO */
 
 /* For backwards compat.  The original support for preemption was based on
  * a single ring per priority level so # of priority levels equals the #
-- 
cgit v1.2.3


From af6505e5745b9f3a670de405b08b73573343c15c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 20 Dec 2024 08:47:45 -0700
Subject: fs: add RWF_DONTCACHE iocb and FOP_DONTCACHE file_operations flag

If a file system supports uncached buffered IO, it may set FOP_DONTCACHE
and enable support for RWF_DONTCACHE. If RWF_DONTCACHE is attempted
without the file system supporting it, it'll get errored with -EOPNOTSUPP.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20241220154831.1086649-8-axboe@kernel.dk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h      | 14 +++++++++++++-
 include/uapi/linux/fs.h |  6 +++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e29433c5ecc..6a838b5479a6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -322,6 +322,7 @@ struct readahead_control;
 #define IOCB_NOWAIT		(__force int) RWF_NOWAIT
 #define IOCB_APPEND		(__force int) RWF_APPEND
 #define IOCB_ATOMIC		(__force int) RWF_ATOMIC
+#define IOCB_DONTCACHE		(__force int) RWF_DONTCACHE
 
 /* non-RWF related bits - start at 16 */
 #define IOCB_EVENTFD		(1 << 16)
@@ -356,7 +357,8 @@ struct readahead_control;
 	{ IOCB_SYNC,		"SYNC" }, \
 	{ IOCB_NOWAIT,		"NOWAIT" }, \
 	{ IOCB_APPEND,		"APPEND" }, \
-	{ IOCB_ATOMIC,		"ATOMIC"}, \
+	{ IOCB_ATOMIC,		"ATOMIC" }, \
+	{ IOCB_DONTCACHE,	"DONTCACHE" }, \
 	{ IOCB_EVENTFD,		"EVENTFD"}, \
 	{ IOCB_DIRECT,		"DIRECT" }, \
 	{ IOCB_WRITE,		"WRITE" }, \
@@ -2127,6 +2129,8 @@ struct file_operations {
 #define FOP_UNSIGNED_OFFSET	((__force fop_flags_t)(1 << 5))
 /* Supports asynchronous lock callbacks */
 #define FOP_ASYNC_LOCK		((__force fop_flags_t)(1 << 6))
+/* File system supports uncached read/write buffered IO */
+#define FOP_DONTCACHE		((__force fop_flags_t)(1 << 7))
 
 /* Wrap a directory iterator that needs exclusive inode access */
 int wrap_directory_iterator(struct file *, struct dir_context *,
@@ -3614,6 +3618,14 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
 		if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
 			return -EOPNOTSUPP;
 	}
+	if (flags & RWF_DONTCACHE) {
+		/* file system must support it */
+		if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE))
+			return -EOPNOTSUPP;
+		/* DAX mappings not supported */
+		if (IS_DAX(ki->ki_filp->f_mapping->host))
+			return -EOPNOTSUPP;
+	}
 	kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
 	if (flags & RWF_SYNC)
 		kiocb_flags |= IOCB_DSYNC;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 753971770733..56a4f93a08f4 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -332,9 +332,13 @@ typedef int __bitwise __kernel_rwf_t;
 /* Atomic Write */
 #define RWF_ATOMIC	((__force __kernel_rwf_t)0x00000040)
 
+/* buffered IO that drops the cache after reading or writing data */
+#define RWF_DONTCACHE	((__force __kernel_rwf_t)0x00000080)
+
 /* mask of flags supported by the kernel */
 #define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
-			 RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
+			 RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
+			 RWF_DONTCACHE)
 
 #define PROCFS_IOCTL_MAGIC 'f'
 
-- 
cgit v1.2.3


From b9ed315d3c4c0c294a4348edb6874d489bac47fa Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 21 Dec 2024 00:46:56 +0100
Subject: netkit: Allow for configuring needed_{head,tail}room

Allow the user to configure needed_{head,tail}room for both netkit
devices. The idea is similar to 163e529200af ("veth: implement
ndo_set_rx_headroom") with the difference that the two parameters
can be specified upon device creation. By default the current behavior
stays as is which is needed_{head,tail}room is 0.

In case of Cilium, for example, the netkit devices are not enslaved
into a bridge or openvswitch device (rather, BPF-based redirection
is used out of tcx), and as such these parameters are not propagated
into the Pod's netns via peer device.

Given Cilium can run in vxlan/geneve tunneling mode (needed_headroom)
and/or be used in combination with WireGuard (needed_{head,tail}room),
allow the Cilium CNI plugin to specify these two upon netkit device
creation.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://lore.kernel.org/bpf/20241220234658.490686-1-daniel@iogearbox.net
---
 drivers/net/netkit.c               | 66 +++++++++++++++++++++++++-------------
 include/uapi/linux/if_link.h       |  2 ++
 tools/include/uapi/linux/if_link.h |  2 ++
 3 files changed, 47 insertions(+), 23 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index c1d881dc6409..1e1b00756be7 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -338,6 +338,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
 	enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
 	enum netkit_mode mode = NETKIT_L3;
 	unsigned char ifname_assign_type;
+	u16 headroom = 0, tailroom = 0;
 	struct ifinfomsg *ifmp = NULL;
 	struct net_device *peer;
 	char ifname[IFNAMSIZ];
@@ -371,6 +372,10 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
 			if (err < 0)
 				return err;
 		}
+		if (data[IFLA_NETKIT_HEADROOM])
+			headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]);
+		if (data[IFLA_NETKIT_TAILROOM])
+			tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]);
 	}
 
 	if (ifmp && tbp[IFLA_IFNAME]) {
@@ -390,6 +395,14 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
 		return PTR_ERR(peer);
 
 	netif_inherit_tso_max(peer, dev);
+	if (headroom) {
+		peer->needed_headroom = headroom;
+		dev->needed_headroom = headroom;
+	}
+	if (tailroom) {
+		peer->needed_tailroom = tailroom;
+		dev->needed_tailroom = tailroom;
+	}
 
 	if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
 		eth_hw_addr_random(peer);
@@ -401,6 +414,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
 	nk->policy = policy_peer;
 	nk->scrub = scrub_peer;
 	nk->mode = mode;
+	nk->headroom = headroom;
 	bpf_mprog_bundle_init(&nk->bundle);
 
 	err = register_netdevice(peer);
@@ -426,6 +440,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
 	nk->policy = policy_prim;
 	nk->scrub = scrub_prim;
 	nk->mode = mode;
+	nk->headroom = headroom;
 	bpf_mprog_bundle_init(&nk->bundle);
 
 	err = register_netdevice(dev);
@@ -850,7 +865,18 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
 	struct net_device *peer = rtnl_dereference(nk->peer);
 	enum netkit_action policy;
 	struct nlattr *attr;
-	int err;
+	int err, i;
+	static const struct {
+		u32 attr;
+		char *name;
+	} fixed_params[] = {
+		{ IFLA_NETKIT_MODE,       "operating mode" },
+		{ IFLA_NETKIT_SCRUB,      "scrubbing" },
+		{ IFLA_NETKIT_PEER_SCRUB, "peer scrubbing" },
+		{ IFLA_NETKIT_PEER_INFO,  "peer info" },
+		{ IFLA_NETKIT_HEADROOM,   "headroom" },
+		{ IFLA_NETKIT_TAILROOM,   "tailroom" },
+	};
 
 	if (!nk->primary) {
 		NL_SET_ERR_MSG(extack,
@@ -858,28 +884,14 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
 		return -EACCES;
 	}
 
-	if (data[IFLA_NETKIT_MODE]) {
-		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_MODE],
-				    "netkit link operating mode cannot be changed after device creation");
-		return -EACCES;
-	}
-
-	if (data[IFLA_NETKIT_SCRUB]) {
-		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB],
-				    "netkit scrubbing cannot be changed after device creation");
-		return -EACCES;
-	}
-
-	if (data[IFLA_NETKIT_PEER_SCRUB]) {
-		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB],
-				    "netkit scrubbing cannot be changed after device creation");
-		return -EACCES;
-	}
-
-	if (data[IFLA_NETKIT_PEER_INFO]) {
-		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
-				    "netkit peer info cannot be changed after device creation");
-		return -EINVAL;
+	for (i = 0; i < ARRAY_SIZE(fixed_params); i++) {
+		attr = data[fixed_params[i].attr];
+		if (attr) {
+			NL_SET_ERR_MSG_ATTR_FMT(extack, attr,
+						"netkit link %s cannot be changed after device creation",
+						fixed_params[i].name);
+			return -EACCES;
+		}
 	}
 
 	if (data[IFLA_NETKIT_POLICY]) {
@@ -914,6 +926,8 @@ static size_t netkit_get_size(const struct net_device *dev)
 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */
 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
 	       nla_total_size(sizeof(u8))  + /* IFLA_NETKIT_PRIMARY */
+	       nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */
+	       nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */
 	       0;
 }
 
@@ -930,6 +944,10 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		return -EMSGSIZE;
 	if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub))
 		return -EMSGSIZE;
+	if (nla_put_u16(skb, IFLA_NETKIT_HEADROOM, dev->needed_headroom))
+		return -EMSGSIZE;
+	if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom))
+		return -EMSGSIZE;
 
 	if (peer) {
 		nk = netkit_priv(peer);
@@ -947,6 +965,8 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
 	[IFLA_NETKIT_MODE]		= NLA_POLICY_MAX(NLA_U32, NETKIT_L3),
 	[IFLA_NETKIT_POLICY]		= { .type = NLA_U32 },
 	[IFLA_NETKIT_PEER_POLICY]	= { .type = NLA_U32 },
+	[IFLA_NETKIT_HEADROOM]		= { .type = NLA_U16 },
+	[IFLA_NETKIT_TAILROOM]		= { .type = NLA_U16 },
 	[IFLA_NETKIT_SCRUB]		= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
 	[IFLA_NETKIT_PEER_SCRUB]	= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
 	[IFLA_NETKIT_PRIMARY]		= { .type = NLA_REJECT,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 2575e0cd9b48..2fa2c265dcba 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1315,6 +1315,8 @@ enum {
 	IFLA_NETKIT_MODE,
 	IFLA_NETKIT_SCRUB,
 	IFLA_NETKIT_PEER_SCRUB,
+	IFLA_NETKIT_HEADROOM,
+	IFLA_NETKIT_TAILROOM,
 	__IFLA_NETKIT_MAX,
 };
 #define IFLA_NETKIT_MAX	(__IFLA_NETKIT_MAX - 1)
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 8516c1ccd57a..7e46ca4cd31b 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -1315,6 +1315,8 @@ enum {
 	IFLA_NETKIT_MODE,
 	IFLA_NETKIT_SCRUB,
 	IFLA_NETKIT_PEER_SCRUB,
+	IFLA_NETKIT_HEADROOM,
+	IFLA_NETKIT_TAILROOM,
 	__IFLA_NETKIT_MAX,
 };
 #define IFLA_NETKIT_MAX	(__IFLA_NETKIT_MAX - 1)
-- 
cgit v1.2.3


From b989531e0bd7704dc837e37f8d16bbc921e6cf6c Mon Sep 17 00:00:00 2001
From: Yongji Xie <xieyongji@bytedance.com>
Date: Tue, 19 Nov 2024 15:42:38 +0800
Subject: vduse: relicense under GPL-2.0 OR BSD-3-Clause

Dual-license the vduse kernel header file to dual
GPL-2.0 OR BSD-3-Clause license to make it possible
to ship it with DPDK (under BSD-3-Clause) for older
distros.

Signed-off-by: Yongji Xie <xieyongji@bytedance.com>
Message-Id: <20241119074238.38299-1-xieyongji@bytedance.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vduse.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
index 11bd48c72c6c..68a627d04afa 100644
--- a/include/uapi/linux/vduse.h
+++ b/include/uapi/linux/vduse.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 #ifndef _UAPI_VDUSE_H_
 #define _UAPI_VDUSE_H_
 
-- 
cgit v1.2.3


From 86b525bed2758878e788c9fb6b8fb281fd61bdb0 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@enneenne.com>
Date: Fri, 8 Nov 2024 08:31:12 +0100
Subject: drivers pps: add PPS generators support

Sometimes one needs to be able not only to catch PPS signals but to
produce them also. For example, running a distributed simulation,
which requires computers' clock to be synchronized very tightly.

This patch adds PPS generators class in order to have a well-defined
interface for these devices.

Signed-off-by: Rodolfo Giometti <giometti@enneenne.com>
Link: https://lore.kernel.org/r/20241108073115.759039-2-giometti@enneenne.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   1 +
 MAINTAINERS                                        |   1 +
 drivers/pps/Makefile                               |   3 +-
 drivers/pps/generators/Kconfig                     |  13 +-
 drivers/pps/generators/Makefile                    |   3 +
 drivers/pps/generators/pps_gen.c                   | 344 +++++++++++++++++++++
 drivers/pps/generators/sysfs.c                     |  75 +++++
 include/linux/pps_gen_kernel.h                     |  78 +++++
 include/uapi/linux/pps_gen.h                       |  37 +++
 9 files changed, 553 insertions(+), 2 deletions(-)
 create mode 100644 drivers/pps/generators/pps_gen.c
 create mode 100644 drivers/pps/generators/sysfs.c
 create mode 100644 include/linux/pps_gen_kernel.h
 create mode 100644 include/uapi/linux/pps_gen.h

(limited to 'include/uapi')

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 243f1f1b554a..f7cb871cb714 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -283,6 +283,7 @@ Code  Seq#    Include File                                           Comments
 'p'   80-9F  linux/ppdev.h                                           user-space parport
                                                                      <mailto:tim@cyberelk.net>
 'p'   A1-A5  linux/pps.h                                             LinuxPPS
+'p'   B1-B3  linux/pps-gen.h                                         LinuxPPS
                                                                      <mailto:giometti@linux.it>
 'q'   00-1F  linux/serio.h
 'q'   80-FF  linux/telephony.h                                       Internet PhoneJACK, Internet LineJACK
diff --git a/MAINTAINERS b/MAINTAINERS
index 124b8574aa76..530eca844bd4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18719,6 +18719,7 @@ F:	Documentation/devicetree/bindings/pps/pps-gpio.yaml
 F:	Documentation/driver-api/pps.rst
 F:	drivers/pps/
 F:	include/linux/pps*.h
+F:	include/uapi/linux/pps-gen.h
 F:	include/uapi/linux/pps.h
 
 PRESSURE STALL INFORMATION (PSI)
diff --git a/drivers/pps/Makefile b/drivers/pps/Makefile
index ceaf65cc1f1d..0aea394d4e4d 100644
--- a/drivers/pps/Makefile
+++ b/drivers/pps/Makefile
@@ -6,6 +6,7 @@
 pps_core-y			:= pps.o kapi.o sysfs.o
 pps_core-$(CONFIG_NTP_PPS)	+= kc.o
 obj-$(CONFIG_PPS)		:= pps_core.o
-obj-y				+= clients/ generators/
+obj-y				+= clients/
+obj-$(CONFIG_PPS_GENERATOR)	+= generators/
 
 ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG
diff --git a/drivers/pps/generators/Kconfig b/drivers/pps/generators/Kconfig
index d615e640fcad..b34e483eff21 100644
--- a/drivers/pps/generators/Kconfig
+++ b/drivers/pps/generators/Kconfig
@@ -3,7 +3,16 @@
 # PPS generators configuration
 #
 
-comment "PPS generators support"
+menuconfig PPS_GENERATOR
+	tristate "PPS generators support"
+	help
+	  PPS generators are special hardware which are able to produce PPS
+	  (Pulse Per Second) signals.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pps_gen_core.
+
+if PPS_GENERATOR
 
 config PPS_GENERATOR_PARPORT
 	tristate "Parallel port PPS signal generator"
@@ -12,3 +21,5 @@ config PPS_GENERATOR_PARPORT
 	  If you say yes here you get support for a PPS signal generator which
 	  utilizes STROBE pin of a parallel port to send PPS signals. It uses
 	  parport abstraction layer and hrtimers to precisely control the signal.
+
+endif # PPS_GENERATOR
diff --git a/drivers/pps/generators/Makefile b/drivers/pps/generators/Makefile
index 2589fd0f2481..034a78edfa26 100644
--- a/drivers/pps/generators/Makefile
+++ b/drivers/pps/generators/Makefile
@@ -3,6 +3,9 @@
 # Makefile for PPS generators.
 #
 
+pps_gen_core-y			:= pps_gen.o sysfs.o
+obj-$(CONFIG_PPS_GENERATOR)	:= pps_gen_core.o
+
 obj-$(CONFIG_PPS_GENERATOR_PARPORT) += pps_gen_parport.o
 
 ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG
diff --git a/drivers/pps/generators/pps_gen.c b/drivers/pps/generators/pps_gen.c
new file mode 100644
index 000000000000..ca592f1736f4
--- /dev/null
+++ b/drivers/pps/generators/pps_gen.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PPS generators core file
+ *
+ * Copyright (C) 2024 Rodolfo Giometti <giometti@enneenne.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/uaccess.h>
+#include <linux/idr.h>
+#include <linux/cdev.h>
+#include <linux/poll.h>
+#include <linux/fs.h>
+#include <linux/pps_gen_kernel.h>
+#include <linux/slab.h>
+
+/*
+ * Local variables
+ */
+
+static dev_t pps_gen_devt;
+static struct class *pps_gen_class;
+
+static DEFINE_IDA(pps_gen_ida);
+
+/*
+ * Char device methods
+ */
+
+static __poll_t pps_gen_cdev_poll(struct file *file, poll_table *wait)
+{
+	struct pps_gen_device *pps_gen = file->private_data;
+
+	poll_wait(file, &pps_gen->queue, wait);
+	return EPOLLIN | EPOLLRDNORM;
+}
+
+static int pps_gen_cdev_fasync(int fd, struct file *file, int on)
+{
+	struct pps_gen_device *pps_gen = file->private_data;
+
+	return fasync_helper(fd, file, on, &pps_gen->async_queue);
+}
+
+static long pps_gen_cdev_ioctl(struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	struct pps_gen_device *pps_gen = file->private_data;
+	void __user *uarg = (void __user *) arg;
+	unsigned int __user *uiuarg = (unsigned int __user *) arg;
+	unsigned int status;
+	int ret;
+
+	switch (cmd) {
+	case PPS_GEN_SETENABLE:
+		dev_dbg(pps_gen->dev, "PPS_GEN_SETENABLE\n");
+
+		ret = get_user(status, uiuarg);
+		if (ret)
+			return -EFAULT;
+
+		ret = pps_gen->info.enable(pps_gen, status);
+		if (ret)
+			return ret;
+		pps_gen->enabled = status;
+
+		break;
+
+	case PPS_GEN_USESYSTEMCLOCK:
+		dev_dbg(pps_gen->dev, "PPS_GEN_USESYSTEMCLOCK\n");
+
+		ret = put_user(pps_gen->info.use_system_clock, uiuarg);
+		if (ret)
+			return -EFAULT;
+
+		break;
+
+	case PPS_GEN_FETCHEVENT: {
+		struct pps_gen_event info;
+		unsigned int ev = pps_gen->last_ev;
+
+		dev_dbg(pps_gen->dev, "PPS_GEN_FETCHEVENT\n");
+
+		ret = wait_event_interruptible(pps_gen->queue,
+				ev != pps_gen->last_ev);
+		if (ret == -ERESTARTSYS) {
+			dev_dbg(pps_gen->dev, "pending signal caught\n");
+			return -EINTR;
+		}
+
+		spin_lock_irq(&pps_gen->lock);
+		info.sequence = pps_gen->sequence;
+		info.event = pps_gen->event;
+		spin_unlock_irq(&pps_gen->lock);
+
+		ret = copy_to_user(uarg, &info, sizeof(struct pps_gen_event));
+		if (ret)
+			return -EFAULT;
+
+		break;
+	}
+	default:
+		return -ENOTTY;
+	}
+
+	return 0;
+}
+
+static int pps_gen_cdev_open(struct inode *inode, struct file *file)
+{
+	struct pps_gen_device *pps_gen = container_of(inode->i_cdev,
+				struct pps_gen_device, cdev);
+
+	get_device(pps_gen->dev);
+	file->private_data = pps_gen;
+	return 0;
+}
+
+static int pps_gen_cdev_release(struct inode *inode, struct file *file)
+{
+	struct pps_gen_device *pps_gen = file->private_data;
+
+	put_device(pps_gen->dev);
+	return 0;
+}
+
+/*
+ * Char device stuff
+ */
+
+static const struct file_operations pps_gen_cdev_fops = {
+	.owner		= THIS_MODULE,
+	.poll	   = pps_gen_cdev_poll,
+	.fasync	 = pps_gen_cdev_fasync,
+	.unlocked_ioctl	= pps_gen_cdev_ioctl,
+	.open		= pps_gen_cdev_open,
+	.release	= pps_gen_cdev_release,
+};
+
+static void pps_gen_device_destruct(struct device *dev)
+{
+	struct pps_gen_device *pps_gen = dev_get_drvdata(dev);
+
+	cdev_del(&pps_gen->cdev);
+
+	pr_debug("deallocating pps-gen%d\n", pps_gen->id);
+	ida_free(&pps_gen_ida, pps_gen->id);
+
+	kfree(dev);
+	kfree(pps_gen);
+}
+
+static int pps_gen_register_cdev(struct pps_gen_device *pps_gen)
+{
+	int err;
+	dev_t devt;
+
+	err = ida_alloc_max(&pps_gen_ida, PPS_GEN_MAX_SOURCES - 1, GFP_KERNEL);
+	if (err < 0) {
+		if (err == -ENOSPC) {
+			pr_err("too many PPS sources in the system\n");
+			err = -EBUSY;
+		}
+		return err;
+	}
+	pps_gen->id = err;
+
+	devt = MKDEV(MAJOR(pps_gen_devt), pps_gen->id);
+
+	cdev_init(&pps_gen->cdev, &pps_gen_cdev_fops);
+	pps_gen->cdev.owner = pps_gen->info.owner;
+
+	err = cdev_add(&pps_gen->cdev, devt, 1);
+	if (err) {
+		pr_err("failed to add char device %d:%d\n",
+				MAJOR(pps_gen_devt), pps_gen->id);
+		goto free_ida;
+	}
+	pps_gen->dev = device_create(pps_gen_class, pps_gen->info.parent, devt,
+					pps_gen, "pps-gen%d", pps_gen->id);
+	if (IS_ERR(pps_gen->dev)) {
+		err = PTR_ERR(pps_gen->dev);
+		goto del_cdev;
+	}
+	pps_gen->dev->release = pps_gen_device_destruct;
+	dev_set_drvdata(pps_gen->dev, pps_gen);
+
+	pr_debug("generator got cdev (%d:%d)\n",
+			MAJOR(pps_gen_devt), pps_gen->id);
+
+	return 0;
+
+del_cdev:
+	cdev_del(&pps_gen->cdev);
+free_ida:
+	ida_free(&pps_gen_ida, pps_gen->id);
+	return err;
+}
+
+static void pps_gen_unregister_cdev(struct pps_gen_device *pps_gen)
+{
+	pr_debug("unregistering pps-gen%d\n", pps_gen->id);
+	device_destroy(pps_gen_class, pps_gen->dev->devt);
+}
+
+/*
+ * Exported functions
+ */
+
+/**
+ * pps_gen_register_source() - add a PPS generator in the system
+ * @info: the PPS generator info struct
+ *
+ * This function is used to register a new PPS generator in the system.
+ * When it returns successfully the new generator is up and running, and
+ * it can be managed by the userspace.
+ *
+ * Return: the PPS generator device in case of success, and ERR_PTR(errno)
+ *	 otherwise.
+ */
+struct pps_gen_device *pps_gen_register_source(struct pps_gen_source_info *info)
+{
+	struct pps_gen_device *pps_gen;
+	int err;
+
+	pps_gen = kzalloc(sizeof(struct pps_gen_device), GFP_KERNEL);
+	if (pps_gen == NULL) {
+		err = -ENOMEM;
+		goto pps_gen_register_source_exit;
+	}
+	pps_gen->info = *info;
+	pps_gen->enabled = false;
+
+	init_waitqueue_head(&pps_gen->queue);
+	spin_lock_init(&pps_gen->lock);
+
+	/* Create the char device */
+	err = pps_gen_register_cdev(pps_gen);
+	if (err < 0) {
+		pr_err(" unable to create char device\n");
+		goto kfree_pps_gen;
+	}
+
+	return pps_gen;
+
+kfree_pps_gen:
+	kfree(pps_gen);
+
+pps_gen_register_source_exit:
+	pr_err("unable to register generator\n");
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(pps_gen_register_source);
+
+/**
+ * pps_gen_unregister_source() - remove a PPS generator from the system
+ * @pps_gen: the PPS generator device to be removed
+ *
+ * This function is used to deregister a PPS generator from the system. When
+ * called, it disables the generator so no pulses are generated anymore.
+ */
+void pps_gen_unregister_source(struct pps_gen_device *pps_gen)
+{
+	pps_gen_unregister_cdev(pps_gen);
+}
+EXPORT_SYMBOL(pps_gen_unregister_source);
+
+/* pps_gen_event - register a PPS generator event into the system
+ * @pps: the PPS generator device
+ * @event: the event type
+ * @data: userdef pointer
+ *
+ * This function is used by each PPS generator in order to register a new
+ * PPS event into the system (it's usually called inside an IRQ handler).
+ */
+void pps_gen_event(struct pps_gen_device *pps_gen,
+			unsigned int event, void *data)
+{
+	unsigned long flags;
+
+	dev_dbg(pps_gen->dev, "PPS generator event %u\n", event);
+
+	spin_lock_irqsave(&pps_gen->lock, flags);
+
+	pps_gen->event = event;
+	pps_gen->sequence++;
+
+	pps_gen->last_ev++;
+	wake_up_interruptible_all(&pps_gen->queue);
+	kill_fasync(&pps_gen->async_queue, SIGIO, POLL_IN);
+
+	spin_unlock_irqrestore(&pps_gen->lock, flags);
+}
+EXPORT_SYMBOL(pps_gen_event);
+
+/*
+ * Module stuff
+ */
+
+static void __exit pps_gen_exit(void)
+{
+	class_destroy(pps_gen_class);
+	unregister_chrdev_region(pps_gen_devt, PPS_GEN_MAX_SOURCES);
+}
+
+static int __init pps_gen_init(void)
+{
+	int err;
+
+	pps_gen_class = class_create("pps-gen");
+	if (IS_ERR(pps_gen_class)) {
+		pr_err("failed to allocate class\n");
+		return PTR_ERR(pps_gen_class);
+	}
+	pps_gen_class->dev_groups = pps_gen_groups;
+
+	err = alloc_chrdev_region(&pps_gen_devt, 0,
+					PPS_GEN_MAX_SOURCES, "pps-gen");
+	if (err < 0) {
+		pr_err("failed to allocate char device region\n");
+		goto remove_class;
+	}
+
+	return 0;
+
+remove_class:
+	class_destroy(pps_gen_class);
+	return err;
+}
+
+subsys_initcall(pps_gen_init);
+module_exit(pps_gen_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@enneenne.com>");
+MODULE_DESCRIPTION("LinuxPPS generators support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pps/generators/sysfs.c b/drivers/pps/generators/sysfs.c
new file mode 100644
index 000000000000..faf8b1c6d202
--- /dev/null
+++ b/drivers/pps/generators/sysfs.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PPS generators sysfs support
+ *
+ * Copyright (C) 2024 Rodolfo Giometti <giometti@enneenne.com>
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/pps_gen_kernel.h>
+
+/*
+ * Attribute functions
+ */
+
+static ssize_t system_show(struct device *dev, struct device_attribute *attr,
+				char *buf)
+{
+	struct pps_gen_device *pps_gen = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%d\n", pps_gen->info.use_system_clock);
+}
+static DEVICE_ATTR_RO(system);
+
+static ssize_t time_show(struct device *dev, struct device_attribute *attr,
+				char *buf)
+{
+	struct pps_gen_device *pps_gen = dev_get_drvdata(dev);
+	struct timespec64 time;
+	int ret;
+
+	ret = pps_gen->info.get_time(pps_gen, &time);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%llu %09lu\n", time.tv_sec, time.tv_nsec);
+}
+static DEVICE_ATTR_RO(time);
+
+static ssize_t enable_store(struct device *dev, struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct pps_gen_device *pps_gen = dev_get_drvdata(dev);
+	bool status;
+	int ret;
+
+	ret = kstrtobool(buf, &status);
+	if (ret)
+		return ret;
+
+	ret = pps_gen->info.enable(pps_gen, status);
+	if (ret)
+		return ret;
+	pps_gen->enabled = status;
+
+	return count;
+}
+static DEVICE_ATTR_WO(enable);
+
+static struct attribute *pps_gen_attrs[] = {
+	&dev_attr_enable.attr,
+	&dev_attr_time.attr,
+	&dev_attr_system.attr,
+	NULL,
+};
+
+static const struct attribute_group pps_gen_group = {
+	.attrs = pps_gen_attrs,
+};
+
+const struct attribute_group *pps_gen_groups[] = {
+	&pps_gen_group,
+	NULL,
+};
diff --git a/include/linux/pps_gen_kernel.h b/include/linux/pps_gen_kernel.h
new file mode 100644
index 000000000000..022ea0ac4440
--- /dev/null
+++ b/include/linux/pps_gen_kernel.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PPS generator API kernel header
+ *
+ * Copyright (C) 2024 Rodolfo Giometti <giometti@enneenne.com>
+ */
+
+#ifndef LINUX_PPS_GEN_KERNEL_H
+#define LINUX_PPS_GEN_KERNEL_H
+
+#include <linux/pps_gen.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+
+/*
+ * Global defines
+ */
+
+#define PPS_GEN_MAX_SOURCES	16		/* should be enough... */
+
+struct pps_gen_device;
+
+/**
+ * struct pps_gen_source_info - the specific PPS generator info
+ * @use_system_clock: true, if the system clock is used to generate pulses
+ * @get_time: query the time stored into the generator clock
+ * @enable: enable/disable the PPS pulses generation
+ *
+ * This is the main generator struct where all needed information must be
+ * placed before calling the pps_gen_register_source().
+ */
+struct pps_gen_source_info {
+	bool use_system_clock;
+
+	int (*get_time)(struct pps_gen_device *pps_gen,
+					struct timespec64 *time);
+	int (*enable)(struct pps_gen_device *pps_gen, bool enable);
+
+/* private: internal use only */
+	struct module *owner;
+	struct device *parent;			/* for device_create */
+};
+
+/* The main struct */
+struct pps_gen_device {
+	struct pps_gen_source_info info;	/* PSS generator info */
+	bool enabled;				/* PSS generator status */
+
+	unsigned int event;
+	unsigned int sequence;
+
+	unsigned int last_ev;			/* last PPS event id */
+	wait_queue_head_t queue;		/* PPS event queue */
+
+	unsigned int id;			/* PPS generator unique ID */
+	struct cdev cdev;
+	struct device *dev;
+	struct fasync_struct *async_queue;	/* fasync method */
+	spinlock_t lock;
+};
+
+/*
+ * Global variables
+ */
+
+extern const struct attribute_group *pps_gen_groups[];
+
+/*
+ * Exported functions
+ */
+
+extern struct pps_gen_device *pps_gen_register_source(
+				struct pps_gen_source_info *info);
+extern void pps_gen_unregister_source(struct pps_gen_device *pps_gen);
+extern void pps_gen_event(struct pps_gen_device *pps_gen,
+				unsigned int event, void *data);
+
+#endif /* LINUX_PPS_GEN_KERNEL_H */
diff --git a/include/uapi/linux/pps_gen.h b/include/uapi/linux/pps_gen.h
new file mode 100644
index 000000000000..60a5d0fcfa68
--- /dev/null
+++ b/include/uapi/linux/pps_gen.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * PPS generator API header
+ *
+ * Copyright (C) 2024 Rodolfo Giometti <giometti@enneenne.com>
+ */
+
+#ifndef _PPS_GEN_H_
+#define _PPS_GEN_H_
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/**
+ * struct pps_gen_event - the PPS generator events
+ * @event: the event type
+ * @sequence: the event sequence number
+ *
+ * Userspace can get the last PPS generator event by using the
+ * ioctl(pps_gen, PPS_GEN_FETCHEVENT, ...) syscall.
+ * The sequence field can be used to save the last event ID, while in the
+ * event field is stored the last event type. Currently known event is:
+ *
+ *     PPS_GEN_EVENT_MISSEDPULSE	: last pulse was not generated
+ */
+struct pps_gen_event {
+	unsigned int event;
+	unsigned int sequence;
+};
+
+#define PPS_GEN_EVENT_MISSEDPULSE	1
+
+#define PPS_GEN_SETENABLE		_IOW('p', 0xb1, unsigned int *)
+#define PPS_GEN_USESYSTEMCLOCK		_IOR('p', 0xb2, unsigned int *)
+#define PPS_GEN_FETCHEVENT		_IOR('p', 0xb3, struct pps_gen_event *)
+
+#endif /* _PPS_GEN_H_ */
-- 
cgit v1.2.3


From d75abf2f9f2e584633bd1072267a2ff25deb422b Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:42 -0600
Subject: ntsync: Return the fd from NTSYNC_IOC_CREATE_SEM.

Simplify the user API a bit by returning the fd as return value from the ioctl
instead of through the argument pointer.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-2-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 7 ++-----
 include/uapi/linux/ntsync.h | 3 +--
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 4954553b7baa..2e7f698268c1 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -165,7 +165,6 @@ static int ntsync_obj_get_fd(struct ntsync_obj *obj)
 
 static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp)
 {
-	struct ntsync_sem_args __user *user_args = argp;
 	struct ntsync_sem_args args;
 	struct ntsync_obj *sem;
 	int fd;
@@ -182,12 +181,10 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp)
 	sem->u.sem.count = args.count;
 	sem->u.sem.max = args.max;
 	fd = ntsync_obj_get_fd(sem);
-	if (fd < 0) {
+	if (fd < 0)
 		kfree(sem);
-		return fd;
-	}
 
-	return put_user(fd, &user_args->sem);
+	return fd;
 }
 
 static int ntsync_char_open(struct inode *inode, struct file *file)
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index dcfa38fdc93c..27d8cb3dd5b7 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -11,12 +11,11 @@
 #include <linux/types.h>
 
 struct ntsync_sem_args {
-	__u32 sem;
 	__u32 count;
 	__u32 max;
 };
 
-#define NTSYNC_IOC_CREATE_SEM		_IOWR('N', 0x80, struct ntsync_sem_args)
+#define NTSYNC_IOC_CREATE_SEM		_IOW ('N', 0x80, struct ntsync_sem_args)
 
 #define NTSYNC_IOC_SEM_POST		_IOWR('N', 0x81, __u32)
 
-- 
cgit v1.2.3


From 5ec43d6b0328a2383525a2b90306c9077480e0bf Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:43 -0600
Subject: ntsync: Rename NTSYNC_IOC_SEM_POST to NTSYNC_IOC_SEM_RELEASE.

Use the more common "release" terminology, which is also the term used by NT,
instead of "post" (which is used by POSIX).

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-3-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 10 +++++-----
 include/uapi/linux/ntsync.h |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 2e7f698268c1..cb3a3bd97ba0 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -57,7 +57,7 @@ struct ntsync_device {
  * Actually change the semaphore state, returning -EOVERFLOW if it is made
  * invalid.
  */
-static int post_sem_state(struct ntsync_obj *sem, __u32 count)
+static int release_sem_state(struct ntsync_obj *sem, __u32 count)
 {
 	__u32 sum;
 
@@ -71,7 +71,7 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 count)
 	return 0;
 }
 
-static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
+static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp)
 {
 	__u32 __user *user_args = argp;
 	__u32 prev_count;
@@ -87,7 +87,7 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
 	spin_lock(&sem->lock);
 
 	prev_count = sem->u.sem.count;
-	ret = post_sem_state(sem, args);
+	ret = release_sem_state(sem, args);
 
 	spin_unlock(&sem->lock);
 
@@ -114,8 +114,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 	void __user *argp = (void __user *)parm;
 
 	switch (cmd) {
-	case NTSYNC_IOC_SEM_POST:
-		return ntsync_sem_post(obj, argp);
+	case NTSYNC_IOC_SEM_RELEASE:
+		return ntsync_sem_release(obj, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index 27d8cb3dd5b7..9af9d8125553 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -17,6 +17,6 @@ struct ntsync_sem_args {
 
 #define NTSYNC_IOC_CREATE_SEM		_IOW ('N', 0x80, struct ntsync_sem_args)
 
-#define NTSYNC_IOC_SEM_POST		_IOWR('N', 0x81, __u32)
+#define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)
 
 #endif
-- 
cgit v1.2.3


From b4a7b5fe3f5149fa35278807e0dc13ddb093f4b8 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:44 -0600
Subject: ntsync: Introduce NTSYNC_IOC_WAIT_ANY.

This corresponds to part of the functionality of the NT syscall
NtWaitForMultipleObjects(). Specifically, it implements the behaviour where
the third argument (wait_any) is TRUE, and it does not handle alertable waits.
Those features have been split out into separate patches to ease review.

This patch therefore implements the wait/wake infrastructure which comprises the
core of ntsync's functionality.

NTSYNC_IOC_WAIT_ANY is a vectored wait function similar to poll(). Unlike
poll(), it "consumes" objects when they are signaled. For semaphores, this means
decreasing one from the internal counter. At most one object can be consumed by
this function.

This wait/wake model is fundamentally different from that used anywhere else in
the kernel, and for that reason ntsync does not use any existing infrastructure,
such as futexes, kernel mutexes or semaphores, or wait_event().

Up to 64 objects can be waited on at once. As soon as one is signaled, the
object with the lowest index is consumed, and that index is returned via the
"index" field.

A timeout is supported. The timeout is passed as a u64 nanosecond value, which
represents absolute time measured against either the MONOTONIC or REALTIME clock
(controlled by the flags argument). If U64_MAX is passed, the ioctl waits
indefinitely.

This ioctl validates that all objects belong to the relevant device. This is not
necessary for any technical reason related to NTSYNC_IOC_WAIT_ANY, but will be
necessary for NTSYNC_IOC_WAIT_ALL introduced in the following patch.

Some padding fields are added for alignment and for fields which will be added
in future patches (split out to ease review).

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-4-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 247 +++++++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/ntsync.h |  14 +++
 2 files changed, 260 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index cb3a3bd97ba0..900cc5ce5761 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -6,11 +6,16 @@
  */
 
 #include <linux/anon_inodes.h>
+#include <linux/atomic.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/overflow.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <uapi/linux/ntsync.h>
@@ -30,6 +35,8 @@ enum ntsync_type {
  *
  * Both rely on struct file for reference counting. Individual
  * ntsync_obj objects take a reference to the device when created.
+ * Wait operations take a reference to each object being waited on for
+ * the duration of the wait.
  */
 
 struct ntsync_obj {
@@ -47,12 +54,55 @@ struct ntsync_obj {
 			__u32 max;
 		} sem;
 	} u;
+
+	struct list_head any_waiters;
+};
+
+struct ntsync_q_entry {
+	struct list_head node;
+	struct ntsync_q *q;
+	struct ntsync_obj *obj;
+	__u32 index;
+};
+
+struct ntsync_q {
+	struct task_struct *task;
+
+	/*
+	 * Protected via atomic_try_cmpxchg(). Only the thread that wins the
+	 * compare-and-swap may actually change object states and wake this
+	 * task.
+	 */
+	atomic_t signaled;
+
+	__u32 count;
+	struct ntsync_q_entry entries[];
 };
 
 struct ntsync_device {
 	struct file *file;
 };
 
+static void try_wake_any_sem(struct ntsync_obj *sem)
+{
+	struct ntsync_q_entry *entry;
+
+	lockdep_assert_held(&sem->lock);
+
+	list_for_each_entry(entry, &sem->any_waiters, node) {
+		struct ntsync_q *q = entry->q;
+		int signaled = -1;
+
+		if (!sem->u.sem.count)
+			break;
+
+		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
+			sem->u.sem.count--;
+			wake_up_process(q->task);
+		}
+	}
+}
+
 /*
  * Actually change the semaphore state, returning -EOVERFLOW if it is made
  * invalid.
@@ -87,7 +137,9 @@ static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp)
 	spin_lock(&sem->lock);
 
 	prev_count = sem->u.sem.count;
-	ret = release_sem_state(sem, args);
+	ret = post_sem_state(sem, args);
+	if (!ret)
+		try_wake_any_sem(sem);
 
 	spin_unlock(&sem->lock);
 
@@ -140,6 +192,7 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev,
 	obj->dev = dev;
 	get_file(dev->file);
 	spin_lock_init(&obj->lock);
+	INIT_LIST_HEAD(&obj->any_waiters);
 
 	return obj;
 }
@@ -187,6 +240,196 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp)
 	return fd;
 }
 
+static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
+{
+	struct file *file = fget(fd);
+	struct ntsync_obj *obj;
+
+	if (!file)
+		return NULL;
+
+	if (file->f_op != &ntsync_obj_fops) {
+		fput(file);
+		return NULL;
+	}
+
+	obj = file->private_data;
+	if (obj->dev != dev) {
+		fput(file);
+		return NULL;
+	}
+
+	return obj;
+}
+
+static void put_obj(struct ntsync_obj *obj)
+{
+	fput(obj->file);
+}
+
+static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_args *args)
+{
+	ktime_t timeout = ns_to_ktime(args->timeout);
+	clockid_t clock = CLOCK_MONOTONIC;
+	ktime_t *timeout_ptr;
+	int ret = 0;
+
+	timeout_ptr = (args->timeout == U64_MAX ? NULL : &timeout);
+
+	if (args->flags & NTSYNC_WAIT_REALTIME)
+		clock = CLOCK_REALTIME;
+
+	do {
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (atomic_read(&q->signaled) != -1) {
+			ret = 0;
+			break;
+		}
+		ret = schedule_hrtimeout_range_clock(timeout_ptr, 0, HRTIMER_MODE_ABS, clock);
+	} while (ret < 0);
+	__set_current_state(TASK_RUNNING);
+
+	return ret;
+}
+
+/*
+ * Allocate and initialize the ntsync_q structure, but do not queue us yet.
+ */
+static int setup_wait(struct ntsync_device *dev,
+		      const struct ntsync_wait_args *args,
+		      struct ntsync_q **ret_q)
+{
+	const __u32 count = args->count;
+	int fds[NTSYNC_MAX_WAIT_COUNT];
+	struct ntsync_q *q;
+	__u32 i, j;
+
+	if (args->pad[0] || args->pad[1] || args->pad[2] || (args->flags & ~NTSYNC_WAIT_REALTIME))
+		return -EINVAL;
+
+	if (args->count > NTSYNC_MAX_WAIT_COUNT)
+		return -EINVAL;
+
+	if (copy_from_user(fds, u64_to_user_ptr(args->objs),
+			   array_size(count, sizeof(*fds))))
+		return -EFAULT;
+
+	q = kmalloc(struct_size(q, entries, count), GFP_KERNEL);
+	if (!q)
+		return -ENOMEM;
+	q->task = current;
+	atomic_set(&q->signaled, -1);
+	q->count = count;
+
+	for (i = 0; i < count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = get_obj(dev, fds[i]);
+
+		if (!obj)
+			goto err;
+
+		entry->obj = obj;
+		entry->q = q;
+		entry->index = i;
+	}
+
+	*ret_q = q;
+	return 0;
+
+err:
+	for (j = 0; j < i; j++)
+		put_obj(q->entries[j].obj);
+	kfree(q);
+	return -EINVAL;
+}
+
+static void try_wake_any_obj(struct ntsync_obj *obj)
+{
+	switch (obj->type) {
+	case NTSYNC_TYPE_SEM:
+		try_wake_any_sem(obj);
+		break;
+	}
+}
+
+static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
+{
+	struct ntsync_wait_args args;
+	struct ntsync_q *q;
+	int signaled;
+	__u32 i;
+	int ret;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	ret = setup_wait(dev, &args, &q);
+	if (ret < 0)
+		return ret;
+
+	/* queue ourselves */
+
+	for (i = 0; i < args.count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = entry->obj;
+
+		spin_lock(&obj->lock);
+		list_add_tail(&entry->node, &obj->any_waiters);
+		spin_unlock(&obj->lock);
+	}
+
+	/* check if we are already signaled */
+
+	for (i = 0; i < args.count; i++) {
+		struct ntsync_obj *obj = q->entries[i].obj;
+
+		if (atomic_read(&q->signaled) != -1)
+			break;
+
+		spin_lock(&obj->lock);
+		try_wake_any_obj(obj);
+		spin_unlock(&obj->lock);
+	}
+
+	/* sleep */
+
+	ret = ntsync_schedule(q, &args);
+
+	/* and finally, unqueue */
+
+	for (i = 0; i < args.count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = entry->obj;
+
+		spin_lock(&obj->lock);
+		list_del(&entry->node);
+		spin_unlock(&obj->lock);
+
+		put_obj(obj);
+	}
+
+	signaled = atomic_read(&q->signaled);
+	if (signaled != -1) {
+		struct ntsync_wait_args __user *user_args = argp;
+
+		/* even if we caught a signal, we need to communicate success */
+		ret = 0;
+
+		if (put_user(signaled, &user_args->index))
+			ret = -EFAULT;
+	} else if (!ret) {
+		ret = -ETIMEDOUT;
+	}
+
+	kfree(q);
+	return ret;
+}
+
 static int ntsync_char_open(struct inode *inode, struct file *file)
 {
 	struct ntsync_device *dev;
@@ -218,6 +461,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd,
 	switch (cmd) {
 	case NTSYNC_IOC_CREATE_SEM:
 		return ntsync_create_sem(dev, argp);
+	case NTSYNC_IOC_WAIT_ANY:
+		return ntsync_wait_any(dev, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index 9af9d8125553..40ffdc41d5bb 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -15,7 +15,21 @@ struct ntsync_sem_args {
 	__u32 max;
 };
 
+#define NTSYNC_WAIT_REALTIME	0x1
+
+struct ntsync_wait_args {
+	__u64 timeout;
+	__u64 objs;
+	__u32 count;
+	__u32 index;
+	__u32 flags;
+	__u32 pad[3];
+};
+
+#define NTSYNC_MAX_WAIT_COUNT 64
+
 #define NTSYNC_IOC_CREATE_SEM		_IOW ('N', 0x80, struct ntsync_sem_args)
+#define NTSYNC_IOC_WAIT_ANY		_IOWR('N', 0x82, struct ntsync_wait_args)
 
 #define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)
 
-- 
cgit v1.2.3


From cdbb997822806cc1071619b67aef485bb2b921b1 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:45 -0600
Subject: ntsync: Introduce NTSYNC_IOC_WAIT_ALL.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is similar to NTSYNC_IOC_WAIT_ANY, but waits until all of the objects are
simultaneously signaled, and then acquires all of them as a single atomic
operation.

Because acquisition of multiple objects is atomic, some complex locking is
required. We cannot simply spin-lock multiple objects simultaneously, as that
may disable preëmption for a problematically long time.

Instead, modifying any object which may be involved in a wait-all operation takes
a device-wide sleeping mutex, "wait_all_lock", instead of the normal object
spinlock.

Because wait-for-all is a rare operation, in order to optimize wait-for-any,
this lock is only taken when necessary. "all_hint" is used to mark objects which
are involved in a wait-for-all operation, and if an object is not, only its
spinlock is taken.

The locking scheme used here was written by Peter Zijlstra.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-5-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 336 ++++++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/ntsync.h |   1 +
 2 files changed, 323 insertions(+), 14 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 900cc5ce5761..c8beb5504bcc 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -13,6 +13,7 @@
 #include <linux/ktime.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/overflow.h>
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
@@ -41,6 +42,7 @@ enum ntsync_type {
 
 struct ntsync_obj {
 	spinlock_t lock;
+	int dev_locked;
 
 	enum ntsync_type type;
 
@@ -55,7 +57,30 @@ struct ntsync_obj {
 		} sem;
 	} u;
 
+	/*
+	 * any_waiters is protected by the object lock, but all_waiters is
+	 * protected by the device wait_all_lock.
+	 */
 	struct list_head any_waiters;
+	struct list_head all_waiters;
+
+	/*
+	 * Hint describing how many tasks are queued on this object in a
+	 * wait-all operation.
+	 *
+	 * Any time we do a wake, we may need to wake "all" waiters as well as
+	 * "any" waiters. In order to atomically wake "all" waiters, we must
+	 * lock all of the objects, and that means grabbing the wait_all_lock
+	 * below (and, due to lock ordering rules, before locking this object).
+	 * However, wait-all is a rare operation, and grabbing the wait-all
+	 * lock for every wake would create unnecessary contention.
+	 * Therefore we first check whether all_hint is zero, and, if it is,
+	 * we skip trying to wake "all" waiters.
+	 *
+	 * Since wait requests must originate from user-space threads, we're
+	 * limited here by PID_MAX_LIMIT, so there's no risk of overflow.
+	 */
+	atomic_t all_hint;
 };
 
 struct ntsync_q_entry {
@@ -75,19 +100,198 @@ struct ntsync_q {
 	 */
 	atomic_t signaled;
 
+	bool all;
 	__u32 count;
 	struct ntsync_q_entry entries[];
 };
 
 struct ntsync_device {
+	/*
+	 * Wait-all operations must atomically grab all objects, and be totally
+	 * ordered with respect to each other and wait-any operations.
+	 * If one thread is trying to acquire several objects, another thread
+	 * cannot touch the object at the same time.
+	 *
+	 * This device-wide lock is used to serialize wait-for-all
+	 * operations, and operations on an object that is involved in a
+	 * wait-for-all.
+	 */
+	struct mutex wait_all_lock;
+
 	struct file *file;
 };
 
+/*
+ * Single objects are locked using obj->lock.
+ *
+ * Multiple objects are 'locked' while holding dev->wait_all_lock.
+ * In this case however, individual objects are not locked by holding
+ * obj->lock, but by setting obj->dev_locked.
+ *
+ * This means that in order to lock a single object, the sequence is slightly
+ * more complicated than usual. Specifically it needs to check obj->dev_locked
+ * after acquiring obj->lock, if set, it needs to drop the lock and acquire
+ * dev->wait_all_lock in order to serialize against the multi-object operation.
+ */
+
+static void dev_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
+{
+	lockdep_assert_held(&dev->wait_all_lock);
+	lockdep_assert(obj->dev == dev);
+	spin_lock(&obj->lock);
+	/*
+	 * By setting obj->dev_locked inside obj->lock, it is ensured that
+	 * anyone holding obj->lock must see the value.
+	 */
+	obj->dev_locked = 1;
+	spin_unlock(&obj->lock);
+}
+
+static void dev_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
+{
+	lockdep_assert_held(&dev->wait_all_lock);
+	lockdep_assert(obj->dev == dev);
+	spin_lock(&obj->lock);
+	obj->dev_locked = 0;
+	spin_unlock(&obj->lock);
+}
+
+static void obj_lock(struct ntsync_obj *obj)
+{
+	struct ntsync_device *dev = obj->dev;
+
+	for (;;) {
+		spin_lock(&obj->lock);
+		if (likely(!obj->dev_locked))
+			break;
+
+		spin_unlock(&obj->lock);
+		mutex_lock(&dev->wait_all_lock);
+		spin_lock(&obj->lock);
+		/*
+		 * obj->dev_locked should be set and released under the same
+		 * wait_all_lock section, since we now own this lock, it should
+		 * be clear.
+		 */
+		lockdep_assert(!obj->dev_locked);
+		spin_unlock(&obj->lock);
+		mutex_unlock(&dev->wait_all_lock);
+	}
+}
+
+static void obj_unlock(struct ntsync_obj *obj)
+{
+	spin_unlock(&obj->lock);
+}
+
+static bool ntsync_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
+{
+	bool all;
+
+	obj_lock(obj);
+	all = atomic_read(&obj->all_hint);
+	if (unlikely(all)) {
+		obj_unlock(obj);
+		mutex_lock(&dev->wait_all_lock);
+		dev_lock_obj(dev, obj);
+	}
+
+	return all;
+}
+
+static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj, bool all)
+{
+	if (all) {
+		dev_unlock_obj(dev, obj);
+		mutex_unlock(&dev->wait_all_lock);
+	} else {
+		obj_unlock(obj);
+	}
+}
+
+#define ntsync_assert_held(obj) \
+	lockdep_assert((lockdep_is_held(&(obj)->lock) != LOCK_STATE_NOT_HELD) || \
+		       ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \
+			(obj)->dev_locked))
+
+static bool is_signaled(struct ntsync_obj *obj)
+{
+	ntsync_assert_held(obj);
+
+	switch (obj->type) {
+	case NTSYNC_TYPE_SEM:
+		return !!obj->u.sem.count;
+	}
+
+	WARN(1, "bad object type %#x\n", obj->type);
+	return false;
+}
+
+/*
+ * "locked_obj" is an optional pointer to an object which is already locked and
+ * should not be locked again. This is necessary so that changing an object's
+ * state and waking it can be a single atomic operation.
+ */
+static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
+			 struct ntsync_obj *locked_obj)
+{
+	__u32 count = q->count;
+	bool can_wake = true;
+	int signaled = -1;
+	__u32 i;
+
+	lockdep_assert_held(&dev->wait_all_lock);
+	if (locked_obj)
+		lockdep_assert(locked_obj->dev_locked);
+
+	for (i = 0; i < count; i++) {
+		if (q->entries[i].obj != locked_obj)
+			dev_lock_obj(dev, q->entries[i].obj);
+	}
+
+	for (i = 0; i < count; i++) {
+		if (!is_signaled(q->entries[i].obj)) {
+			can_wake = false;
+			break;
+		}
+	}
+
+	if (can_wake && atomic_try_cmpxchg(&q->signaled, &signaled, 0)) {
+		for (i = 0; i < count; i++) {
+			struct ntsync_obj *obj = q->entries[i].obj;
+
+			switch (obj->type) {
+			case NTSYNC_TYPE_SEM:
+				obj->u.sem.count--;
+				break;
+			}
+		}
+		wake_up_process(q->task);
+	}
+
+	for (i = 0; i < count; i++) {
+		if (q->entries[i].obj != locked_obj)
+			dev_unlock_obj(dev, q->entries[i].obj);
+	}
+}
+
+static void try_wake_all_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
+{
+	struct ntsync_q_entry *entry;
+
+	lockdep_assert_held(&dev->wait_all_lock);
+	lockdep_assert(obj->dev_locked);
+
+	list_for_each_entry(entry, &obj->all_waiters, node)
+		try_wake_all(dev, entry->q, obj);
+}
+
 static void try_wake_any_sem(struct ntsync_obj *sem)
 {
 	struct ntsync_q_entry *entry;
 
-	lockdep_assert_held(&sem->lock);
+	ntsync_assert_held(sem);
+	lockdep_assert(sem->type == NTSYNC_TYPE_SEM);
 
 	list_for_each_entry(entry, &sem->any_waiters, node) {
 		struct ntsync_q *q = entry->q;
@@ -111,7 +315,7 @@ static int release_sem_state(struct ntsync_obj *sem, __u32 count)
 {
 	__u32 sum;
 
-	lockdep_assert_held(&sem->lock);
+	ntsync_assert_held(sem);
 
 	if (check_add_overflow(sem->u.sem.count, count, &sum) ||
 	    sum > sem->u.sem.max)
@@ -123,9 +327,11 @@ static int release_sem_state(struct ntsync_obj *sem, __u32 count)
 
 static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp)
 {
+	struct ntsync_device *dev = sem->dev;
 	__u32 __user *user_args = argp;
 	__u32 prev_count;
 	__u32 args;
+	bool all;
 	int ret;
 
 	if (copy_from_user(&args, argp, sizeof(args)))
@@ -134,14 +340,17 @@ static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp)
 	if (sem->type != NTSYNC_TYPE_SEM)
 		return -EINVAL;
 
-	spin_lock(&sem->lock);
+	all = ntsync_lock_obj(dev, sem);
 
 	prev_count = sem->u.sem.count;
-	ret = post_sem_state(sem, args);
-	if (!ret)
+	ret = release_sem_state(sem, args);
+	if (!ret) {
+		if (all)
+			try_wake_all_obj(dev, sem);
 		try_wake_any_sem(sem);
+	}
 
-	spin_unlock(&sem->lock);
+	ntsync_unlock_obj(dev, sem, all);
 
 	if (!ret && put_user(prev_count, user_args))
 		ret = -EFAULT;
@@ -193,6 +402,8 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev,
 	get_file(dev->file);
 	spin_lock_init(&obj->lock);
 	INIT_LIST_HEAD(&obj->any_waiters);
+	INIT_LIST_HEAD(&obj->all_waiters);
+	atomic_set(&obj->all_hint, 0);
 
 	return obj;
 }
@@ -301,7 +512,7 @@ static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_ar
  * Allocate and initialize the ntsync_q structure, but do not queue us yet.
  */
 static int setup_wait(struct ntsync_device *dev,
-		      const struct ntsync_wait_args *args,
+		      const struct ntsync_wait_args *args, bool all,
 		      struct ntsync_q **ret_q)
 {
 	const __u32 count = args->count;
@@ -324,6 +535,7 @@ static int setup_wait(struct ntsync_device *dev,
 		return -ENOMEM;
 	q->task = current;
 	atomic_set(&q->signaled, -1);
+	q->all = all;
 	q->count = count;
 
 	for (i = 0; i < count; i++) {
@@ -333,6 +545,16 @@ static int setup_wait(struct ntsync_device *dev,
 		if (!obj)
 			goto err;
 
+		if (all) {
+			/* Check that the objects are all distinct. */
+			for (j = 0; j < i; j++) {
+				if (obj == q->entries[j].obj) {
+					put_obj(obj);
+					goto err;
+				}
+			}
+		}
+
 		entry->obj = obj;
 		entry->q = q;
 		entry->index = i;
@@ -362,13 +584,14 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
 	struct ntsync_wait_args args;
 	struct ntsync_q *q;
 	int signaled;
+	bool all;
 	__u32 i;
 	int ret;
 
 	if (copy_from_user(&args, argp, sizeof(args)))
 		return -EFAULT;
 
-	ret = setup_wait(dev, &args, &q);
+	ret = setup_wait(dev, &args, false, &q);
 	if (ret < 0)
 		return ret;
 
@@ -378,9 +601,9 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
 		struct ntsync_q_entry *entry = &q->entries[i];
 		struct ntsync_obj *obj = entry->obj;
 
-		spin_lock(&obj->lock);
+		all = ntsync_lock_obj(dev, obj);
 		list_add_tail(&entry->node, &obj->any_waiters);
-		spin_unlock(&obj->lock);
+		ntsync_unlock_obj(dev, obj, all);
 	}
 
 	/* check if we are already signaled */
@@ -391,9 +614,9 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
 		if (atomic_read(&q->signaled) != -1)
 			break;
 
-		spin_lock(&obj->lock);
+		all = ntsync_lock_obj(dev, obj);
 		try_wake_any_obj(obj);
-		spin_unlock(&obj->lock);
+		ntsync_unlock_obj(dev, obj, all);
 	}
 
 	/* sleep */
@@ -406,13 +629,94 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
 		struct ntsync_q_entry *entry = &q->entries[i];
 		struct ntsync_obj *obj = entry->obj;
 
-		spin_lock(&obj->lock);
+		all = ntsync_lock_obj(dev, obj);
 		list_del(&entry->node);
-		spin_unlock(&obj->lock);
+		ntsync_unlock_obj(dev, obj, all);
+
+		put_obj(obj);
+	}
+
+	signaled = atomic_read(&q->signaled);
+	if (signaled != -1) {
+		struct ntsync_wait_args __user *user_args = argp;
+
+		/* even if we caught a signal, we need to communicate success */
+		ret = 0;
+
+		if (put_user(signaled, &user_args->index))
+			ret = -EFAULT;
+	} else if (!ret) {
+		ret = -ETIMEDOUT;
+	}
+
+	kfree(q);
+	return ret;
+}
+
+static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
+{
+	struct ntsync_wait_args args;
+	struct ntsync_q *q;
+	int signaled;
+	__u32 i;
+	int ret;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	ret = setup_wait(dev, &args, true, &q);
+	if (ret < 0)
+		return ret;
+
+	/* queue ourselves */
+
+	mutex_lock(&dev->wait_all_lock);
+
+	for (i = 0; i < args.count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = entry->obj;
+
+		atomic_inc(&obj->all_hint);
+
+		/*
+		 * obj->all_waiters is protected by dev->wait_all_lock rather
+		 * than obj->lock, so there is no need to acquire obj->lock
+		 * here.
+		 */
+		list_add_tail(&entry->node, &obj->all_waiters);
+	}
+
+	/* check if we are already signaled */
+
+	try_wake_all(dev, q, NULL);
+
+	mutex_unlock(&dev->wait_all_lock);
+
+	/* sleep */
+
+	ret = ntsync_schedule(q, &args);
+
+	/* and finally, unqueue */
+
+	mutex_lock(&dev->wait_all_lock);
+
+	for (i = 0; i < args.count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = entry->obj;
+
+		/*
+		 * obj->all_waiters is protected by dev->wait_all_lock rather
+		 * than obj->lock, so there is no need to acquire it here.
+		 */
+		list_del(&entry->node);
+
+		atomic_dec(&obj->all_hint);
 
 		put_obj(obj);
 	}
 
+	mutex_unlock(&dev->wait_all_lock);
+
 	signaled = atomic_read(&q->signaled);
 	if (signaled != -1) {
 		struct ntsync_wait_args __user *user_args = argp;
@@ -438,6 +742,8 @@ static int ntsync_char_open(struct inode *inode, struct file *file)
 	if (!dev)
 		return -ENOMEM;
 
+	mutex_init(&dev->wait_all_lock);
+
 	file->private_data = dev;
 	dev->file = file;
 	return nonseekable_open(inode, file);
@@ -461,6 +767,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd,
 	switch (cmd) {
 	case NTSYNC_IOC_CREATE_SEM:
 		return ntsync_create_sem(dev, argp);
+	case NTSYNC_IOC_WAIT_ALL:
+		return ntsync_wait_all(dev, argp);
 	case NTSYNC_IOC_WAIT_ANY:
 		return ntsync_wait_any(dev, argp);
 	default:
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index 40ffdc41d5bb..d64420ffbcd7 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -30,6 +30,7 @@ struct ntsync_wait_args {
 
 #define NTSYNC_IOC_CREATE_SEM		_IOW ('N', 0x80, struct ntsync_sem_args)
 #define NTSYNC_IOC_WAIT_ANY		_IOWR('N', 0x82, struct ntsync_wait_args)
+#define NTSYNC_IOC_WAIT_ALL		_IOWR('N', 0x83, struct ntsync_wait_args)
 
 #define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)
 
-- 
cgit v1.2.3


From 5bc2479a3585be35ea32e431bc82282bbf5a5d5b Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:46 -0600
Subject: ntsync: Introduce NTSYNC_IOC_CREATE_MUTEX.

This corresponds to the NT syscall NtCreateMutant().

An NT mutex is recursive, with a 32-bit recursion counter. When acquired via
NtWaitForMultipleObjects(), the recursion counter is incremented by one. The OS
records the thread which acquired it.

The OS records the thread which acquired it. However, in order to keep this
driver self-contained, the owning thread ID is managed by user-space, and passed
as a parameter to all relevant ioctls.

The initial owner and recursion count, if any, are specified when the mutex is
created.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-6-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 74 +++++++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/ntsync.h |  9 +++++-
 2 files changed, 79 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index c8beb5504bcc..a2826dbff2f4 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -25,6 +25,7 @@
 
 enum ntsync_type {
 	NTSYNC_TYPE_SEM,
+	NTSYNC_TYPE_MUTEX,
 };
 
 /*
@@ -55,6 +56,10 @@ struct ntsync_obj {
 			__u32 count;
 			__u32 max;
 		} sem;
+		struct {
+			__u32 count;
+			pid_t owner;
+		} mutex;
 	} u;
 
 	/*
@@ -92,6 +97,7 @@ struct ntsync_q_entry {
 
 struct ntsync_q {
 	struct task_struct *task;
+	__u32 owner;
 
 	/*
 	 * Protected via atomic_try_cmpxchg(). Only the thread that wins the
@@ -214,13 +220,17 @@ static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj,
 		       ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \
 			(obj)->dev_locked))
 
-static bool is_signaled(struct ntsync_obj *obj)
+static bool is_signaled(struct ntsync_obj *obj, __u32 owner)
 {
 	ntsync_assert_held(obj);
 
 	switch (obj->type) {
 	case NTSYNC_TYPE_SEM:
 		return !!obj->u.sem.count;
+	case NTSYNC_TYPE_MUTEX:
+		if (obj->u.mutex.owner && obj->u.mutex.owner != owner)
+			return false;
+		return obj->u.mutex.count < UINT_MAX;
 	}
 
 	WARN(1, "bad object type %#x\n", obj->type);
@@ -250,7 +260,7 @@ static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
 	}
 
 	for (i = 0; i < count; i++) {
-		if (!is_signaled(q->entries[i].obj)) {
+		if (!is_signaled(q->entries[i].obj, q->owner)) {
 			can_wake = false;
 			break;
 		}
@@ -264,6 +274,10 @@ static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
 			case NTSYNC_TYPE_SEM:
 				obj->u.sem.count--;
 				break;
+			case NTSYNC_TYPE_MUTEX:
+				obj->u.mutex.count++;
+				obj->u.mutex.owner = q->owner;
+				break;
 			}
 		}
 		wake_up_process(q->task);
@@ -307,6 +321,30 @@ static void try_wake_any_sem(struct ntsync_obj *sem)
 	}
 }
 
+static void try_wake_any_mutex(struct ntsync_obj *mutex)
+{
+	struct ntsync_q_entry *entry;
+
+	ntsync_assert_held(mutex);
+	lockdep_assert(mutex->type == NTSYNC_TYPE_MUTEX);
+
+	list_for_each_entry(entry, &mutex->any_waiters, node) {
+		struct ntsync_q *q = entry->q;
+		int signaled = -1;
+
+		if (mutex->u.mutex.count == UINT_MAX)
+			break;
+		if (mutex->u.mutex.owner && mutex->u.mutex.owner != q->owner)
+			continue;
+
+		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
+			mutex->u.mutex.count++;
+			mutex->u.mutex.owner = q->owner;
+			wake_up_process(q->task);
+		}
+	}
+}
+
 /*
  * Actually change the semaphore state, returning -EOVERFLOW if it is made
  * invalid.
@@ -451,6 +489,30 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp)
 	return fd;
 }
 
+static int ntsync_create_mutex(struct ntsync_device *dev, void __user *argp)
+{
+	struct ntsync_mutex_args args;
+	struct ntsync_obj *mutex;
+	int fd;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	if (!args.owner != !args.count)
+		return -EINVAL;
+
+	mutex = ntsync_alloc_obj(dev, NTSYNC_TYPE_MUTEX);
+	if (!mutex)
+		return -ENOMEM;
+	mutex->u.mutex.count = args.count;
+	mutex->u.mutex.owner = args.owner;
+	fd = ntsync_obj_get_fd(mutex);
+	if (fd < 0)
+		kfree(mutex);
+
+	return fd;
+}
+
 static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
 {
 	struct file *file = fget(fd);
@@ -520,7 +582,7 @@ static int setup_wait(struct ntsync_device *dev,
 	struct ntsync_q *q;
 	__u32 i, j;
 
-	if (args->pad[0] || args->pad[1] || args->pad[2] || (args->flags & ~NTSYNC_WAIT_REALTIME))
+	if (args->pad[0] || args->pad[1] || (args->flags & ~NTSYNC_WAIT_REALTIME))
 		return -EINVAL;
 
 	if (args->count > NTSYNC_MAX_WAIT_COUNT)
@@ -534,6 +596,7 @@ static int setup_wait(struct ntsync_device *dev,
 	if (!q)
 		return -ENOMEM;
 	q->task = current;
+	q->owner = args->owner;
 	atomic_set(&q->signaled, -1);
 	q->all = all;
 	q->count = count;
@@ -576,6 +639,9 @@ static void try_wake_any_obj(struct ntsync_obj *obj)
 	case NTSYNC_TYPE_SEM:
 		try_wake_any_sem(obj);
 		break;
+	case NTSYNC_TYPE_MUTEX:
+		try_wake_any_mutex(obj);
+		break;
 	}
 }
 
@@ -765,6 +831,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd,
 	void __user *argp = (void __user *)parm;
 
 	switch (cmd) {
+	case NTSYNC_IOC_CREATE_MUTEX:
+		return ntsync_create_mutex(dev, argp);
 	case NTSYNC_IOC_CREATE_SEM:
 		return ntsync_create_sem(dev, argp);
 	case NTSYNC_IOC_WAIT_ALL:
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index d64420ffbcd7..bb7fb94f5856 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -15,6 +15,11 @@ struct ntsync_sem_args {
 	__u32 max;
 };
 
+struct ntsync_mutex_args {
+	__u32 owner;
+	__u32 count;
+};
+
 #define NTSYNC_WAIT_REALTIME	0x1
 
 struct ntsync_wait_args {
@@ -23,7 +28,8 @@ struct ntsync_wait_args {
 	__u32 count;
 	__u32 index;
 	__u32 flags;
-	__u32 pad[3];
+	__u32 owner;
+	__u32 pad[2];
 };
 
 #define NTSYNC_MAX_WAIT_COUNT 64
@@ -31,6 +37,7 @@ struct ntsync_wait_args {
 #define NTSYNC_IOC_CREATE_SEM		_IOW ('N', 0x80, struct ntsync_sem_args)
 #define NTSYNC_IOC_WAIT_ANY		_IOWR('N', 0x82, struct ntsync_wait_args)
 #define NTSYNC_IOC_WAIT_ALL		_IOWR('N', 0x83, struct ntsync_wait_args)
+#define NTSYNC_IOC_CREATE_MUTEX		_IOW ('N', 0x84, struct ntsync_mutex_args)
 
 #define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)
 
-- 
cgit v1.2.3


From 31ca7bb8e8539b454414a2c4f8ad643c939cb0cf Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:47 -0600
Subject: ntsync: Introduce NTSYNC_IOC_MUTEX_UNLOCK.

This corresponds to the NT syscall NtReleaseMutant().

This syscall decrements the mutex's recursion count by one, and returns the
previous value. If the mutex is not owned by the current task, the function
instead fails and returns -EPERM.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-7-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 53 +++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/ntsync.h |  1 +
 2 files changed, 54 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index a2826dbff2f4..33e26240d9e7 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -396,6 +396,57 @@ static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp)
 	return ret;
 }
 
+/*
+ * Actually change the mutex state, returning -EPERM if not the owner.
+ */
+static int unlock_mutex_state(struct ntsync_obj *mutex,
+			      const struct ntsync_mutex_args *args)
+{
+	ntsync_assert_held(mutex);
+
+	if (mutex->u.mutex.owner != args->owner)
+		return -EPERM;
+
+	if (!--mutex->u.mutex.count)
+		mutex->u.mutex.owner = 0;
+	return 0;
+}
+
+static int ntsync_mutex_unlock(struct ntsync_obj *mutex, void __user *argp)
+{
+	struct ntsync_mutex_args __user *user_args = argp;
+	struct ntsync_device *dev = mutex->dev;
+	struct ntsync_mutex_args args;
+	__u32 prev_count;
+	bool all;
+	int ret;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+	if (!args.owner)
+		return -EINVAL;
+
+	if (mutex->type != NTSYNC_TYPE_MUTEX)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, mutex);
+
+	prev_count = mutex->u.mutex.count;
+	ret = unlock_mutex_state(mutex, &args);
+	if (!ret) {
+		if (all)
+			try_wake_all_obj(dev, mutex);
+		try_wake_any_mutex(mutex);
+	}
+
+	ntsync_unlock_obj(dev, mutex, all);
+
+	if (!ret && put_user(prev_count, &user_args->count))
+		ret = -EFAULT;
+
+	return ret;
+}
+
 static int ntsync_obj_release(struct inode *inode, struct file *file)
 {
 	struct ntsync_obj *obj = file->private_data;
@@ -415,6 +466,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 	switch (cmd) {
 	case NTSYNC_IOC_SEM_RELEASE:
 		return ntsync_sem_release(obj, argp);
+	case NTSYNC_IOC_MUTEX_UNLOCK:
+		return ntsync_mutex_unlock(obj, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index bb7fb94f5856..9186304b253c 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -40,5 +40,6 @@ struct ntsync_wait_args {
 #define NTSYNC_IOC_CREATE_MUTEX		_IOW ('N', 0x84, struct ntsync_mutex_args)
 
 #define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)
+#define NTSYNC_IOC_MUTEX_UNLOCK		_IOWR('N', 0x85, struct ntsync_mutex_args)
 
 #endif
-- 
cgit v1.2.3


From ecc2ee361466e4a22e95b1c27d90f8cbd4f22e93 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:48 -0600
Subject: ntsync: Introduce NTSYNC_IOC_MUTEX_KILL.

This does not correspond to any NT syscall. Rather, when a thread dies, it
should be called by the NT emulator for each mutex, with the TID of the dying
thread.

NT mutexes are robust (in the pthread sense). When an NT thread dies, any
mutexes it owned are immediately released. Acquisition of those mutexes by other
threads will return a special value indicating that the mutex was abandoned,
like EOWNERDEAD returned from pthread_mutex_lock(), and EOWNERDEAD is indeed
used here for that purpose.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-8-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 61 +++++++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/ntsync.h |  1 +
 2 files changed, 60 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 33e26240d9e7..03768ac25425 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -59,6 +59,7 @@ struct ntsync_obj {
 		struct {
 			__u32 count;
 			pid_t owner;
+			bool ownerdead;
 		} mutex;
 	} u;
 
@@ -107,6 +108,7 @@ struct ntsync_q {
 	atomic_t signaled;
 
 	bool all;
+	bool ownerdead;
 	__u32 count;
 	struct ntsync_q_entry entries[];
 };
@@ -275,6 +277,9 @@ static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
 				obj->u.sem.count--;
 				break;
 			case NTSYNC_TYPE_MUTEX:
+				if (obj->u.mutex.ownerdead)
+					q->ownerdead = true;
+				obj->u.mutex.ownerdead = false;
 				obj->u.mutex.count++;
 				obj->u.mutex.owner = q->owner;
 				break;
@@ -338,6 +343,9 @@ static void try_wake_any_mutex(struct ntsync_obj *mutex)
 			continue;
 
 		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
+			if (mutex->u.mutex.ownerdead)
+				q->ownerdead = true;
+			mutex->u.mutex.ownerdead = false;
 			mutex->u.mutex.count++;
 			mutex->u.mutex.owner = q->owner;
 			wake_up_process(q->task);
@@ -447,6 +455,52 @@ static int ntsync_mutex_unlock(struct ntsync_obj *mutex, void __user *argp)
 	return ret;
 }
 
+/*
+ * Actually change the mutex state to mark its owner as dead,
+ * returning -EPERM if not the owner.
+ */
+static int kill_mutex_state(struct ntsync_obj *mutex, __u32 owner)
+{
+	ntsync_assert_held(mutex);
+
+	if (mutex->u.mutex.owner != owner)
+		return -EPERM;
+
+	mutex->u.mutex.ownerdead = true;
+	mutex->u.mutex.owner = 0;
+	mutex->u.mutex.count = 0;
+	return 0;
+}
+
+static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp)
+{
+	struct ntsync_device *dev = mutex->dev;
+	__u32 owner;
+	bool all;
+	int ret;
+
+	if (get_user(owner, (__u32 __user *)argp))
+		return -EFAULT;
+	if (!owner)
+		return -EINVAL;
+
+	if (mutex->type != NTSYNC_TYPE_MUTEX)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, mutex);
+
+	ret = kill_mutex_state(mutex, owner);
+	if (!ret) {
+		if (all)
+			try_wake_all_obj(dev, mutex);
+		try_wake_any_mutex(mutex);
+	}
+
+	ntsync_unlock_obj(dev, mutex, all);
+
+	return ret;
+}
+
 static int ntsync_obj_release(struct inode *inode, struct file *file)
 {
 	struct ntsync_obj *obj = file->private_data;
@@ -468,6 +522,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 		return ntsync_sem_release(obj, argp);
 	case NTSYNC_IOC_MUTEX_UNLOCK:
 		return ntsync_mutex_unlock(obj, argp);
+	case NTSYNC_IOC_MUTEX_KILL:
+		return ntsync_mutex_kill(obj, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
@@ -652,6 +708,7 @@ static int setup_wait(struct ntsync_device *dev,
 	q->owner = args->owner;
 	atomic_set(&q->signaled, -1);
 	q->all = all;
+	q->ownerdead = false;
 	q->count = count;
 
 	for (i = 0; i < count; i++) {
@@ -760,7 +817,7 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
 		struct ntsync_wait_args __user *user_args = argp;
 
 		/* even if we caught a signal, we need to communicate success */
-		ret = 0;
+		ret = q->ownerdead ? -EOWNERDEAD : 0;
 
 		if (put_user(signaled, &user_args->index))
 			ret = -EFAULT;
@@ -841,7 +898,7 @@ static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
 		struct ntsync_wait_args __user *user_args = argp;
 
 		/* even if we caught a signal, we need to communicate success */
-		ret = 0;
+		ret = q->ownerdead ? -EOWNERDEAD : 0;
 
 		if (put_user(signaled, &user_args->index))
 			ret = -EFAULT;
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index 9186304b253c..633958d90be3 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -41,5 +41,6 @@ struct ntsync_wait_args {
 
 #define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)
 #define NTSYNC_IOC_MUTEX_UNLOCK		_IOWR('N', 0x85, struct ntsync_mutex_args)
+#define NTSYNC_IOC_MUTEX_KILL		_IOW ('N', 0x86, __u32)
 
 #endif
-- 
cgit v1.2.3


From 4c7404b9c2b572b42dc63bfde5e862290dab53b5 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:49 -0600
Subject: ntsync: Introduce NTSYNC_IOC_CREATE_EVENT.

This correspond to the NT syscall NtCreateEvent().

An NT event holds a single bit of state denoting whether it is signaled or
unsignaled.

There are two types of events: manual-reset and automatic-reset. When an
automatic-reset event is acquired via a wait function, its state is reset to
unsignaled. Manual-reset events are not affected by wait functions.

Whether the event is manual-reset, and its initial state, are specified at
creation time.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-9-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 59 +++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/ntsync.h |  6 +++++
 2 files changed, 65 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 03768ac25425..3e8827b6f480 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -26,6 +26,7 @@
 enum ntsync_type {
 	NTSYNC_TYPE_SEM,
 	NTSYNC_TYPE_MUTEX,
+	NTSYNC_TYPE_EVENT,
 };
 
 /*
@@ -61,6 +62,10 @@ struct ntsync_obj {
 			pid_t owner;
 			bool ownerdead;
 		} mutex;
+		struct {
+			bool manual;
+			bool signaled;
+		} event;
 	} u;
 
 	/*
@@ -233,6 +238,8 @@ static bool is_signaled(struct ntsync_obj *obj, __u32 owner)
 		if (obj->u.mutex.owner && obj->u.mutex.owner != owner)
 			return false;
 		return obj->u.mutex.count < UINT_MAX;
+	case NTSYNC_TYPE_EVENT:
+		return obj->u.event.signaled;
 	}
 
 	WARN(1, "bad object type %#x\n", obj->type);
@@ -283,6 +290,10 @@ static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
 				obj->u.mutex.count++;
 				obj->u.mutex.owner = q->owner;
 				break;
+			case NTSYNC_TYPE_EVENT:
+				if (!obj->u.event.manual)
+					obj->u.event.signaled = false;
+				break;
 			}
 		}
 		wake_up_process(q->task);
@@ -353,6 +364,28 @@ static void try_wake_any_mutex(struct ntsync_obj *mutex)
 	}
 }
 
+static void try_wake_any_event(struct ntsync_obj *event)
+{
+	struct ntsync_q_entry *entry;
+
+	ntsync_assert_held(event);
+	lockdep_assert(event->type == NTSYNC_TYPE_EVENT);
+
+	list_for_each_entry(entry, &event->any_waiters, node) {
+		struct ntsync_q *q = entry->q;
+		int signaled = -1;
+
+		if (!event->u.event.signaled)
+			break;
+
+		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
+			if (!event->u.event.manual)
+				event->u.event.signaled = false;
+			wake_up_process(q->task);
+		}
+	}
+}
+
 /*
  * Actually change the semaphore state, returning -EOVERFLOW if it is made
  * invalid.
@@ -622,6 +655,27 @@ static int ntsync_create_mutex(struct ntsync_device *dev, void __user *argp)
 	return fd;
 }
 
+static int ntsync_create_event(struct ntsync_device *dev, void __user *argp)
+{
+	struct ntsync_event_args args;
+	struct ntsync_obj *event;
+	int fd;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	event = ntsync_alloc_obj(dev, NTSYNC_TYPE_EVENT);
+	if (!event)
+		return -ENOMEM;
+	event->u.event.manual = args.manual;
+	event->u.event.signaled = args.signaled;
+	fd = ntsync_obj_get_fd(event);
+	if (fd < 0)
+		kfree(event);
+
+	return fd;
+}
+
 static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
 {
 	struct file *file = fget(fd);
@@ -752,6 +806,9 @@ static void try_wake_any_obj(struct ntsync_obj *obj)
 	case NTSYNC_TYPE_MUTEX:
 		try_wake_any_mutex(obj);
 		break;
+	case NTSYNC_TYPE_EVENT:
+		try_wake_any_event(obj);
+		break;
 	}
 }
 
@@ -941,6 +998,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd,
 	void __user *argp = (void __user *)parm;
 
 	switch (cmd) {
+	case NTSYNC_IOC_CREATE_EVENT:
+		return ntsync_create_event(dev, argp);
 	case NTSYNC_IOC_CREATE_MUTEX:
 		return ntsync_create_mutex(dev, argp);
 	case NTSYNC_IOC_CREATE_SEM:
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index 633958d90be3..e08aa02f4dcc 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -20,6 +20,11 @@ struct ntsync_mutex_args {
 	__u32 count;
 };
 
+struct ntsync_event_args {
+	__u32 manual;
+	__u32 signaled;
+};
+
 #define NTSYNC_WAIT_REALTIME	0x1
 
 struct ntsync_wait_args {
@@ -38,6 +43,7 @@ struct ntsync_wait_args {
 #define NTSYNC_IOC_WAIT_ANY		_IOWR('N', 0x82, struct ntsync_wait_args)
 #define NTSYNC_IOC_WAIT_ALL		_IOWR('N', 0x83, struct ntsync_wait_args)
 #define NTSYNC_IOC_CREATE_MUTEX		_IOW ('N', 0x84, struct ntsync_mutex_args)
+#define NTSYNC_IOC_CREATE_EVENT		_IOW ('N', 0x87, struct ntsync_event_args)
 
 #define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)
 #define NTSYNC_IOC_MUTEX_UNLOCK		_IOWR('N', 0x85, struct ntsync_mutex_args)
-- 
cgit v1.2.3


From 2dcba6fc15a442b1cf1df8cdd0da12f20db38c43 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:50 -0600
Subject: ntsync: Introduce NTSYNC_IOC_EVENT_SET.

This corresponds to the NT syscall NtSetEvent().

This sets the event to the signaled state, and returns its previous state.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-10-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 27 +++++++++++++++++++++++++++
 include/uapi/linux/ntsync.h |  1 +
 2 files changed, 28 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 3e8827b6f480..0a87f8ad5993 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -534,6 +534,31 @@ static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp)
 	return ret;
 }
 
+static int ntsync_event_set(struct ntsync_obj *event, void __user *argp)
+{
+	struct ntsync_device *dev = event->dev;
+	__u32 prev_state;
+	bool all;
+
+	if (event->type != NTSYNC_TYPE_EVENT)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, event);
+
+	prev_state = event->u.event.signaled;
+	event->u.event.signaled = true;
+	if (all)
+		try_wake_all_obj(dev, event);
+	try_wake_any_event(event);
+
+	ntsync_unlock_obj(dev, event, all);
+
+	if (put_user(prev_state, (__u32 __user *)argp))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int ntsync_obj_release(struct inode *inode, struct file *file)
 {
 	struct ntsync_obj *obj = file->private_data;
@@ -557,6 +582,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 		return ntsync_mutex_unlock(obj, argp);
 	case NTSYNC_IOC_MUTEX_KILL:
 		return ntsync_mutex_kill(obj, argp);
+	case NTSYNC_IOC_EVENT_SET:
+		return ntsync_event_set(obj, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index e08aa02f4dcc..db2512f6e3f4 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -48,5 +48,6 @@ struct ntsync_wait_args {
 #define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)
 #define NTSYNC_IOC_MUTEX_UNLOCK		_IOWR('N', 0x85, struct ntsync_mutex_args)
 #define NTSYNC_IOC_MUTEX_KILL		_IOW ('N', 0x86, __u32)
+#define NTSYNC_IOC_EVENT_SET		_IOR ('N', 0x88, __u32)
 
 #endif
-- 
cgit v1.2.3


From bbb9797514b20c316314b78a439f87fd16b7c509 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:51 -0600
Subject: ntsync: Introduce NTSYNC_IOC_EVENT_RESET.

This corresponds to the NT syscall NtResetEvent().

This sets the event to the unsignaled state, and returns its previous state.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20241213193511.457338-11-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 24 ++++++++++++++++++++++++
 include/uapi/linux/ntsync.h |  1 +
 2 files changed, 25 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 0a87f8ad5993..b31443aa9692 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -559,6 +559,28 @@ static int ntsync_event_set(struct ntsync_obj *event, void __user *argp)
 	return 0;
 }
 
+static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp)
+{
+	struct ntsync_device *dev = event->dev;
+	__u32 prev_state;
+	bool all;
+
+	if (event->type != NTSYNC_TYPE_EVENT)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, event);
+
+	prev_state = event->u.event.signaled;
+	event->u.event.signaled = false;
+
+	ntsync_unlock_obj(dev, event, all);
+
+	if (put_user(prev_state, (__u32 __user *)argp))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int ntsync_obj_release(struct inode *inode, struct file *file)
 {
 	struct ntsync_obj *obj = file->private_data;
@@ -584,6 +606,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 		return ntsync_mutex_kill(obj, argp);
 	case NTSYNC_IOC_EVENT_SET:
 		return ntsync_event_set(obj, argp);
+	case NTSYNC_IOC_EVENT_RESET:
+		return ntsync_event_reset(obj, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index db2512f6e3f4..d74c4e4f93d8 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -49,5 +49,6 @@ struct ntsync_wait_args {
 #define NTSYNC_IOC_MUTEX_UNLOCK		_IOWR('N', 0x85, struct ntsync_mutex_args)
 #define NTSYNC_IOC_MUTEX_KILL		_IOW ('N', 0x86, __u32)
 #define NTSYNC_IOC_EVENT_SET		_IOR ('N', 0x88, __u32)
+#define NTSYNC_IOC_EVENT_RESET		_IOR ('N', 0x89, __u32)
 
 #endif
-- 
cgit v1.2.3


From 12b29d3008e6bd5118af716ae2971fe6823b117a Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:52 -0600
Subject: ntsync: Introduce NTSYNC_IOC_EVENT_PULSE.

This corresponds to the NT syscall NtPulseEvent().

This wakes up any waiters as if the event had been set, but does not set the
event, instead resetting it if it had been signalled. Thus, for a manual-reset
event, all waiters are woken, whereas for an auto-reset event, at most one
waiter is woken.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20241213193511.457338-12-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 8 ++++++--
 include/uapi/linux/ntsync.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index b31443aa9692..141ebe8a7d4f 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -534,7 +534,7 @@ static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp)
 	return ret;
 }
 
-static int ntsync_event_set(struct ntsync_obj *event, void __user *argp)
+static int ntsync_event_set(struct ntsync_obj *event, void __user *argp, bool pulse)
 {
 	struct ntsync_device *dev = event->dev;
 	__u32 prev_state;
@@ -550,6 +550,8 @@ static int ntsync_event_set(struct ntsync_obj *event, void __user *argp)
 	if (all)
 		try_wake_all_obj(dev, event);
 	try_wake_any_event(event);
+	if (pulse)
+		event->u.event.signaled = false;
 
 	ntsync_unlock_obj(dev, event, all);
 
@@ -605,9 +607,11 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 	case NTSYNC_IOC_MUTEX_KILL:
 		return ntsync_mutex_kill(obj, argp);
 	case NTSYNC_IOC_EVENT_SET:
-		return ntsync_event_set(obj, argp);
+		return ntsync_event_set(obj, argp, false);
 	case NTSYNC_IOC_EVENT_RESET:
 		return ntsync_event_reset(obj, argp);
+	case NTSYNC_IOC_EVENT_PULSE:
+		return ntsync_event_set(obj, argp, true);
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index d74c4e4f93d8..9eab7666b8b9 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -50,5 +50,6 @@ struct ntsync_wait_args {
 #define NTSYNC_IOC_MUTEX_KILL		_IOW ('N', 0x86, __u32)
 #define NTSYNC_IOC_EVENT_SET		_IOR ('N', 0x88, __u32)
 #define NTSYNC_IOC_EVENT_RESET		_IOR ('N', 0x89, __u32)
+#define NTSYNC_IOC_EVENT_PULSE		_IOR ('N', 0x8a, __u32)
 
 #endif
-- 
cgit v1.2.3


From a948f4177c3cce5f27648696da95c62c0253a099 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:53 -0600
Subject: ntsync: Introduce NTSYNC_IOC_SEM_READ.

This corresponds to the NT syscall NtQuerySemaphore().

This returns the current count and maximum count of the semaphore.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-13-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 24 ++++++++++++++++++++++++
 include/uapi/linux/ntsync.h |  1 +
 2 files changed, 25 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 141ebe8a7d4f..b2043412c5cd 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -583,6 +583,28 @@ static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp)
 	return 0;
 }
 
+static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp)
+{
+	struct ntsync_sem_args __user *user_args = argp;
+	struct ntsync_device *dev = sem->dev;
+	struct ntsync_sem_args args;
+	bool all;
+
+	if (sem->type != NTSYNC_TYPE_SEM)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, sem);
+
+	args.count = sem->u.sem.count;
+	args.max = sem->u.sem.max;
+
+	ntsync_unlock_obj(dev, sem, all);
+
+	if (copy_to_user(user_args, &args, sizeof(args)))
+		return -EFAULT;
+	return 0;
+}
+
 static int ntsync_obj_release(struct inode *inode, struct file *file)
 {
 	struct ntsync_obj *obj = file->private_data;
@@ -602,6 +624,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 	switch (cmd) {
 	case NTSYNC_IOC_SEM_RELEASE:
 		return ntsync_sem_release(obj, argp);
+	case NTSYNC_IOC_SEM_READ:
+		return ntsync_sem_read(obj, argp);
 	case NTSYNC_IOC_MUTEX_UNLOCK:
 		return ntsync_mutex_unlock(obj, argp);
 	case NTSYNC_IOC_MUTEX_KILL:
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index 9eab7666b8b9..e36b4cfb7d34 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -51,5 +51,6 @@ struct ntsync_wait_args {
 #define NTSYNC_IOC_EVENT_SET		_IOR ('N', 0x88, __u32)
 #define NTSYNC_IOC_EVENT_RESET		_IOR ('N', 0x89, __u32)
 #define NTSYNC_IOC_EVENT_PULSE		_IOR ('N', 0x8a, __u32)
+#define NTSYNC_IOC_SEM_READ		_IOR ('N', 0x8b, struct ntsync_sem_args)
 
 #endif
-- 
cgit v1.2.3


From 0b3c31449d28c83c0b8a8a7be569aa30d70b7764 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:54 -0600
Subject: ntsync: Introduce NTSYNC_IOC_MUTEX_READ.

This corresponds to the NT syscall NtQueryMutant().

This returns the recursion count, owner, and abandoned state of the mutex.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-14-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 26 ++++++++++++++++++++++++++
 include/uapi/linux/ntsync.h |  1 +
 2 files changed, 27 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index b2043412c5cd..0047b13b6ebd 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -605,6 +605,30 @@ static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp)
 	return 0;
 }
 
+static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp)
+{
+	struct ntsync_mutex_args __user *user_args = argp;
+	struct ntsync_device *dev = mutex->dev;
+	struct ntsync_mutex_args args;
+	bool all;
+	int ret;
+
+	if (mutex->type != NTSYNC_TYPE_MUTEX)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, mutex);
+
+	args.count = mutex->u.mutex.count;
+	args.owner = mutex->u.mutex.owner;
+	ret = mutex->u.mutex.ownerdead ? -EOWNERDEAD : 0;
+
+	ntsync_unlock_obj(dev, mutex, all);
+
+	if (copy_to_user(user_args, &args, sizeof(args)))
+		return -EFAULT;
+	return ret;
+}
+
 static int ntsync_obj_release(struct inode *inode, struct file *file)
 {
 	struct ntsync_obj *obj = file->private_data;
@@ -630,6 +654,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 		return ntsync_mutex_unlock(obj, argp);
 	case NTSYNC_IOC_MUTEX_KILL:
 		return ntsync_mutex_kill(obj, argp);
+	case NTSYNC_IOC_MUTEX_READ:
+		return ntsync_mutex_read(obj, argp);
 	case NTSYNC_IOC_EVENT_SET:
 		return ntsync_event_set(obj, argp, false);
 	case NTSYNC_IOC_EVENT_RESET:
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index e36b4cfb7d34..207646e63ef3 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -52,5 +52,6 @@ struct ntsync_wait_args {
 #define NTSYNC_IOC_EVENT_RESET		_IOR ('N', 0x89, __u32)
 #define NTSYNC_IOC_EVENT_PULSE		_IOR ('N', 0x8a, __u32)
 #define NTSYNC_IOC_SEM_READ		_IOR ('N', 0x8b, struct ntsync_sem_args)
+#define NTSYNC_IOC_MUTEX_READ		_IOR ('N', 0x8c, struct ntsync_mutex_args)
 
 #endif
-- 
cgit v1.2.3


From e864071a630cfbbd55251e7b45461003f4f79877 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:55 -0600
Subject: ntsync: Introduce NTSYNC_IOC_EVENT_READ.

This corresponds to the NT syscall NtQueryEvent().

This returns the signaled state of the event and whether it is manual-reset.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-15-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 24 ++++++++++++++++++++++++
 include/uapi/linux/ntsync.h |  1 +
 2 files changed, 25 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 0047b13b6ebd..78dc405bb759 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -629,6 +629,28 @@ static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp)
 	return ret;
 }
 
+static int ntsync_event_read(struct ntsync_obj *event, void __user *argp)
+{
+	struct ntsync_event_args __user *user_args = argp;
+	struct ntsync_device *dev = event->dev;
+	struct ntsync_event_args args;
+	bool all;
+
+	if (event->type != NTSYNC_TYPE_EVENT)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, event);
+
+	args.manual = event->u.event.manual;
+	args.signaled = event->u.event.signaled;
+
+	ntsync_unlock_obj(dev, event, all);
+
+	if (copy_to_user(user_args, &args, sizeof(args)))
+		return -EFAULT;
+	return 0;
+}
+
 static int ntsync_obj_release(struct inode *inode, struct file *file)
 {
 	struct ntsync_obj *obj = file->private_data;
@@ -662,6 +684,8 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 		return ntsync_event_reset(obj, argp);
 	case NTSYNC_IOC_EVENT_PULSE:
 		return ntsync_event_set(obj, argp, true);
+	case NTSYNC_IOC_EVENT_READ:
+		return ntsync_event_read(obj, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index 207646e63ef3..b9d208a8c00f 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -53,5 +53,6 @@ struct ntsync_wait_args {
 #define NTSYNC_IOC_EVENT_PULSE		_IOR ('N', 0x8a, __u32)
 #define NTSYNC_IOC_SEM_READ		_IOR ('N', 0x8b, struct ntsync_sem_args)
 #define NTSYNC_IOC_MUTEX_READ		_IOR ('N', 0x8c, struct ntsync_mutex_args)
+#define NTSYNC_IOC_EVENT_READ		_IOR ('N', 0x8d, struct ntsync_event_args)
 
 #endif
-- 
cgit v1.2.3


From a138179a59d47690b069fbded1be12f47648ef07 Mon Sep 17 00:00:00 2001
From: Elizabeth Figura <zfigura@codeweavers.com>
Date: Fri, 13 Dec 2024 13:34:56 -0600
Subject: ntsync: Introduce alertable waits.

NT waits can optionally be made "alertable". This is a special channel for
thread wakeup that is mildly similar to SIGIO. A thread has an internal single
bit of "alerted" state, and if a thread is alerted while an alertable wait, the
wait will return a special value, consume the "alerted" state, and will not
consume any of its objects.

Alerts are implemented using events; the user-space NT emulator is expected to
create an internal ntsync event for each thread and pass that event to wait
functions.

Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20241213193511.457338-16-zfigura@codeweavers.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ntsync.c       | 70 +++++++++++++++++++++++++++++++++++++++------
 include/uapi/linux/ntsync.h |  3 +-
 2 files changed, 63 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 78dc405bb759..457ff28b789f 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -869,22 +869,29 @@ static int setup_wait(struct ntsync_device *dev,
 		      const struct ntsync_wait_args *args, bool all,
 		      struct ntsync_q **ret_q)
 {
+	int fds[NTSYNC_MAX_WAIT_COUNT + 1];
 	const __u32 count = args->count;
-	int fds[NTSYNC_MAX_WAIT_COUNT];
 	struct ntsync_q *q;
+	__u32 total_count;
 	__u32 i, j;
 
-	if (args->pad[0] || args->pad[1] || (args->flags & ~NTSYNC_WAIT_REALTIME))
+	if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME))
 		return -EINVAL;
 
 	if (args->count > NTSYNC_MAX_WAIT_COUNT)
 		return -EINVAL;
 
+	total_count = count;
+	if (args->alert)
+		total_count++;
+
 	if (copy_from_user(fds, u64_to_user_ptr(args->objs),
 			   array_size(count, sizeof(*fds))))
 		return -EFAULT;
+	if (args->alert)
+		fds[count] = args->alert;
 
-	q = kmalloc(struct_size(q, entries, count), GFP_KERNEL);
+	q = kmalloc(struct_size(q, entries, total_count), GFP_KERNEL);
 	if (!q)
 		return -ENOMEM;
 	q->task = current;
@@ -894,7 +901,7 @@ static int setup_wait(struct ntsync_device *dev,
 	q->ownerdead = false;
 	q->count = count;
 
-	for (i = 0; i < count; i++) {
+	for (i = 0; i < total_count; i++) {
 		struct ntsync_q_entry *entry = &q->entries[i];
 		struct ntsync_obj *obj = get_obj(dev, fds[i]);
 
@@ -944,10 +951,10 @@ static void try_wake_any_obj(struct ntsync_obj *obj)
 static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
 {
 	struct ntsync_wait_args args;
+	__u32 i, total_count;
 	struct ntsync_q *q;
 	int signaled;
 	bool all;
-	__u32 i;
 	int ret;
 
 	if (copy_from_user(&args, argp, sizeof(args)))
@@ -957,9 +964,13 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
 	if (ret < 0)
 		return ret;
 
+	total_count = args.count;
+	if (args.alert)
+		total_count++;
+
 	/* queue ourselves */
 
-	for (i = 0; i < args.count; i++) {
+	for (i = 0; i < total_count; i++) {
 		struct ntsync_q_entry *entry = &q->entries[i];
 		struct ntsync_obj *obj = entry->obj;
 
@@ -968,9 +979,15 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
 		ntsync_unlock_obj(dev, obj, all);
 	}
 
-	/* check if we are already signaled */
+	/*
+	 * Check if we are already signaled.
+	 *
+	 * Note that the API requires that normal objects are checked before
+	 * the alert event. Hence we queue the alert event last, and check
+	 * objects in order.
+	 */
 
-	for (i = 0; i < args.count; i++) {
+	for (i = 0; i < total_count; i++) {
 		struct ntsync_obj *obj = q->entries[i].obj;
 
 		if (atomic_read(&q->signaled) != -1)
@@ -987,7 +1004,7 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
 
 	/* and finally, unqueue */
 
-	for (i = 0; i < args.count; i++) {
+	for (i = 0; i < total_count; i++) {
 		struct ntsync_q_entry *entry = &q->entries[i];
 		struct ntsync_obj *obj = entry->obj;
 
@@ -1047,6 +1064,14 @@ static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
 		 */
 		list_add_tail(&entry->node, &obj->all_waiters);
 	}
+	if (args.alert) {
+		struct ntsync_q_entry *entry = &q->entries[args.count];
+		struct ntsync_obj *obj = entry->obj;
+
+		dev_lock_obj(dev, obj);
+		list_add_tail(&entry->node, &obj->any_waiters);
+		dev_unlock_obj(dev, obj);
+	}
 
 	/* check if we are already signaled */
 
@@ -1054,6 +1079,21 @@ static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
 
 	mutex_unlock(&dev->wait_all_lock);
 
+	/*
+	 * Check if the alert event is signaled, making sure to do so only
+	 * after checking if the other objects are signaled.
+	 */
+
+	if (args.alert) {
+		struct ntsync_obj *obj = q->entries[args.count].obj;
+
+		if (atomic_read(&q->signaled) == -1) {
+			bool all = ntsync_lock_obj(dev, obj);
+			try_wake_any_obj(obj);
+			ntsync_unlock_obj(dev, obj, all);
+		}
+	}
+
 	/* sleep */
 
 	ret = ntsync_schedule(q, &args);
@@ -1079,6 +1119,18 @@ static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
 
 	mutex_unlock(&dev->wait_all_lock);
 
+	if (args.alert) {
+		struct ntsync_q_entry *entry = &q->entries[args.count];
+		struct ntsync_obj *obj = entry->obj;
+		bool all;
+
+		all = ntsync_lock_obj(dev, obj);
+		list_del(&entry->node);
+		ntsync_unlock_obj(dev, obj, all);
+
+		put_obj(obj);
+	}
+
 	signaled = atomic_read(&q->signaled);
 	if (signaled != -1) {
 		struct ntsync_wait_args __user *user_args = argp;
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index b9d208a8c00f..6d06793512b1 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -34,7 +34,8 @@ struct ntsync_wait_args {
 	__u32 index;
 	__u32 flags;
 	__u32 owner;
-	__u32 pad[2];
+	__u32 alert;
+	__u32 pad;
 };
 
 #define NTSYNC_MAX_WAIT_COUNT 64
-- 
cgit v1.2.3


From 33d97a07b3ae6fa713919de4e1864ca04fff8f80 Mon Sep 17 00:00:00 2001
From: Yuyang Huang <yuyanghuang@google.com>
Date: Tue, 7 Jan 2025 20:43:55 +0900
Subject: netlink: add IPv6 anycast join/leave notifications
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change introduces a mechanism for notifying userspace
applications about changes to IPv6 anycast addresses via netlink. It
includes:

* Addition and deletion of IPv6 anycast addresses are reported using
  RTM_NEWANYCAST and RTM_DELANYCAST.
* A new netlink group (RTNLGRP_IPV6_ACADDR) for subscribing to these
  notifications.

This enables user space applications(e.g. ip monitor) to efficiently
track anycast addresses through netlink messages, improving metrics
collection and system monitoring. It also unlocks the potential for
advanced anycast management in user space, such as hardware offload
control and fine grained network control.

Cc: Maciej Żenczykowski <maze@google.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Yuyang Huang <yuyanghuang@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20250107114355.1766086-1-yuyanghuang@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/addrconf.h         |  3 +++
 include/uapi/linux/rtnetlink.h |  8 +++++++-
 net/ipv6/addrconf.c            |  6 +++---
 net/ipv6/anycast.c             | 35 +++++++++++++++++++++++++++++++++++
 4 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 58337898fa21..f8f91b2038ea 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -546,4 +546,7 @@ int inet6_fill_ifmcaddr(struct sk_buff *skb,
 			const struct ifmcaddr6 *ifmca,
 			struct inet6_fill_args *args);
 
+int inet6_fill_ifacaddr(struct sk_buff *skb,
+			const struct ifacaddr6 *ifaca,
+			struct inet6_fill_args *args);
 #endif
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 5ee94c511a28..66c3903d29cf 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -100,7 +100,11 @@ enum {
 	RTM_GETMULTICAST,
 #define RTM_GETMULTICAST RTM_GETMULTICAST
 
-	RTM_GETANYCAST	= 62,
+	RTM_NEWANYCAST	= 60,
+#define RTM_NEWANYCAST RTM_NEWANYCAST
+	RTM_DELANYCAST,
+#define RTM_DELANYCAST RTM_DELANYCAST
+	RTM_GETANYCAST,
 #define RTM_GETANYCAST	RTM_GETANYCAST
 
 	RTM_NEWNEIGHTBL	= 64,
@@ -783,6 +787,8 @@ enum rtnetlink_groups {
 #define RTNLGRP_IPV4_MCADDR	RTNLGRP_IPV4_MCADDR
 	RTNLGRP_IPV6_MCADDR,
 #define RTNLGRP_IPV6_MCADDR	RTNLGRP_IPV6_MCADDR
+	RTNLGRP_IPV6_ACADDR,
+#define RTNLGRP_IPV6_ACADDR	RTNLGRP_IPV6_ACADDR
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4da409bc4577..c3729382be3b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5240,9 +5240,9 @@ int inet6_fill_ifmcaddr(struct sk_buff *skb,
 	return 0;
 }
 
-static int inet6_fill_ifacaddr(struct sk_buff *skb,
-			       const struct ifacaddr6 *ifaca,
-			       struct inet6_fill_args *args)
+int inet6_fill_ifacaddr(struct sk_buff *skb,
+			const struct ifacaddr6 *ifaca,
+			struct inet6_fill_args *args)
 {
 	struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
 	int ifindex = dev ? dev->ifindex : 1;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 562cace50ca9..21e01695b48c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -278,6 +278,37 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
 	return aca;
 }
 
+static void inet6_ifacaddr_notify(struct net_device *dev,
+				  const struct ifacaddr6 *ifaca, int event)
+{
+	struct inet6_fill_args fillargs = {
+		.event = event,
+		.netnsid = -1,
+	};
+	struct net *net = dev_net(dev);
+	struct sk_buff *skb;
+	int err = -ENOMEM;
+
+	skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+			nla_total_size(sizeof(struct in6_addr)) +
+			nla_total_size(sizeof(struct ifa_cacheinfo)),
+			GFP_KERNEL);
+	if (!skb)
+		goto error;
+
+	err = inet6_fill_ifacaddr(skb, ifaca, &fillargs);
+	if (err < 0) {
+		pr_err("Failed to fill in anycast addresses (err %d)\n", err);
+		nlmsg_free(skb);
+		goto error;
+	}
+
+	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ACADDR, NULL, GFP_KERNEL);
+	return;
+error:
+	rtnl_set_sk_err(net, RTNLGRP_IPV6_ACADDR, err);
+}
+
 /*
  *	device anycast group inc (add if not found)
  */
@@ -333,6 +364,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 
 	addrconf_join_solict(idev->dev, &aca->aca_addr);
 
+	inet6_ifacaddr_notify(idev->dev, aca, RTM_NEWANYCAST);
+
 	aca_put(aca);
 	return 0;
 out:
@@ -375,6 +408,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 
 	ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
 
+	inet6_ifacaddr_notify(idev->dev, aca, RTM_DELANYCAST);
+
 	aca_put(aca);
 	return 0;
 }
-- 
cgit v1.2.3


From 601731fc7c6111bbca49ce3c9499c2e4d426079d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 15 Nov 2024 14:46:09 +0100
Subject: netfilter: conntrack: add conntrack event timestamp

Nadia Pinaeva writes:
  I am working on a tool that allows collecting network performance
  metrics by using conntrack events.
  Start time of a conntrack entry is used to evaluate seen_reply
  latency, therefore the sooner it is timestamped, the better the
  precision is.
  In particular, when using this tool to compare the performance of the
  same feature implemented using iptables/nftables/OVS it is crucial
  to have the entry timestamped earlier to see any difference.

At this time, conntrack events can only get timestamped at recv time in
userspace, so there can be some delay between the event being generated
and the userspace process consuming the message.

There is sys/net/netfilter/nf_conntrack_timestamp, which adds a
64bit timestamp (ns resolution) that records start and stop times,
but its not suited for this either, start time is the 'hashtable insertion
time', not 'conntrack allocation time'.

There is concern that moving the start-time moment to conntrack
allocation will add overhead in case of flooding, where conntrack
entries are allocated and released right away without getting inserted
into the hashtable.

Also, even if this was changed it would not with events other than
new (start time) and destroy (stop time).

Pablo suggested to add new CTA_TIMESTAMP_EVENT, this adds this feature.
The timestamp is recorded in case both events are requested and the
sys/net/netfilter/nf_conntrack_timestamp toggle is enabled.

Reported-by: Nadia Pinaeva <n.m.pinaeva@gmail.com>
Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_ecache.h        | 12 +++++++++++
 include/uapi/linux/netfilter/nfnetlink_conntrack.h |  1 +
 net/netfilter/nf_conntrack_ecache.c                | 23 ++++++++++++++++++++
 net/netfilter/nf_conntrack_netlink.c               | 25 ++++++++++++++++++++++
 4 files changed, 61 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 0c1dac318e02..8dcf7c371ee9 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -12,6 +12,7 @@
 #include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/netfilter/nf_conntrack_extend.h>
+#include <asm/local64.h>
 
 enum nf_ct_ecache_state {
 	NFCT_ECACHE_DESTROY_FAIL,	/* tried but failed to send destroy event */
@@ -20,6 +21,9 @@ enum nf_ct_ecache_state {
 
 struct nf_conntrack_ecache {
 	unsigned long cache;		/* bitops want long */
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	local64_t timestamp;		/* event timestamp, in nanoseconds */
+#endif
 	u16 ctmask;			/* bitmask of ct events to be delivered */
 	u16 expmask;			/* bitmask of expect events to be delivered */
 	u32 missed;			/* missed events */
@@ -108,6 +112,14 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 	if (e == NULL)
 		return;
 
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	/* renew only if this is the first cached event, so that the
+	 * timestamp reflects the first, not the last, generated event.
+	 */
+	if (local64_read(&e->timestamp) && READ_ONCE(e->cache) == 0)
+		local64_set(&e->timestamp, ktime_get_real_ns());
+#endif
+
 	set_bit(event, &e->cache);
 #endif
 }
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index c2ac7269acf7..43233af75b9d 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -57,6 +57,7 @@ enum ctattr_type {
 	CTA_SYNPROXY,
 	CTA_FILTER,
 	CTA_STATUS_MASK,
+	CTA_TIMESTAMP_EVENT,
 	__CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 69948e1d6974..af68c64acaab 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -162,6 +162,14 @@ static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
 	return ret;
 }
 
+static void nf_ct_ecache_tstamp_refresh(struct nf_conntrack_ecache *e)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	if (local64_read(&e->timestamp))
+		local64_set(&e->timestamp, ktime_get_real_ns());
+#endif
+}
+
 int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
 				  u32 portid, int report)
 {
@@ -186,6 +194,8 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
 	/* This is a resent of a destroy event? If so, skip missed */
 	missed = e->portid ? 0 : e->missed;
 
+	nf_ct_ecache_tstamp_refresh(e);
+
 	ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
 	if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
 		/* This is a destroy event that has been triggered by a process,
@@ -297,6 +307,18 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
 	}
 }
 
+static void nf_ct_ecache_tstamp_new(const struct nf_conn *ct, struct nf_conntrack_ecache *e)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	u64 ts = 0;
+
+	if (nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
+		ts = ktime_get_real_ns();
+
+	local64_set(&e->timestamp, ts);
+#endif
+}
+
 bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
 {
 	struct net *net = nf_ct_net(ct);
@@ -326,6 +348,7 @@ bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp
 
 	e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
 	if (e) {
+		nf_ct_ecache_tstamp_new(ct, e);
 		e->ctmask  = ctmask;
 		e->expmask = expmask;
 	}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 36168f8b6efa..2277b744eb2c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -383,6 +383,23 @@ nla_put_failure:
 #endif
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int
+ctnetlink_dump_event_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	const struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct);
+
+	if (e) {
+		u64 ts = local64_read(&e->timestamp);
+
+		if (ts)
+			return nla_put_be64(skb, CTA_TIMESTAMP_EVENT,
+					    cpu_to_be64(ts), CTA_TIMESTAMP_PAD);
+	}
+#endif
+	return 0;
+}
+
 static inline int ctnetlink_label_size(const struct nf_conn *ct)
 {
 	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
@@ -717,6 +734,9 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
 #endif
 	       + ctnetlink_proto_size(ct)
 	       + ctnetlink_label_size(ct)
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	       + nla_total_size(sizeof(u64)) /* CTA_TIMESTAMP_EVENT */
+#endif
 	       ;
 }
 
@@ -838,6 +858,10 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
 	if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
 		goto nla_put_failure;
 #endif
+
+	if (ctnetlink_dump_event_timestamp(skb, ct))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	err = nfnetlink_send(skb, net, item->portid, group, item->report,
 			     GFP_ATOMIC);
@@ -1557,6 +1581,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 				    .len = NF_CT_LABELS_MAX_SIZE },
 	[CTA_FILTER]		= { .type = NLA_NESTED },
 	[CTA_STATUS_MASK]	= { .type = NLA_U32 },
+	[CTA_TIMESTAMP_EVENT]	= { .type = NLA_REJECT },
 };
 
 static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
-- 
cgit v1.2.3


From 8fc7e23a9bd851e6997d3ea2ea1c3475b91862d3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 9 Jan 2025 09:31:01 +0100
Subject: fs: reformat the statx definition

The comments after the declaration are becoming rather unreadable with
long enough comments.  Move them into lines of their own.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250109083109.1441561-2-hch@lst.de
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/stat.h | 95 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 72 insertions(+), 23 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 887a25286441..8b35d7d511a2 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -98,43 +98,92 @@ struct statx_timestamp {
  */
 struct statx {
 	/* 0x00 */
-	__u32	stx_mask;	/* What results were written [uncond] */
-	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */
-	__u64	stx_attributes;	/* Flags conveying information about the file [uncond] */
+	/* What results were written [uncond] */
+	__u32	stx_mask;
+
+	/* Preferred general I/O size [uncond] */
+	__u32	stx_blksize;
+
+	/* Flags conveying information about the file [uncond] */
+	__u64	stx_attributes;
+
 	/* 0x10 */
-	__u32	stx_nlink;	/* Number of hard links */
-	__u32	stx_uid;	/* User ID of owner */
-	__u32	stx_gid;	/* Group ID of owner */
-	__u16	stx_mode;	/* File mode */
+	/* Number of hard links */
+	__u32	stx_nlink;
+
+	/* User ID of owner */
+	__u32	stx_uid;
+
+	/* Group ID of owner */
+	__u32	stx_gid;
+
+	/* File mode */
+	__u16	stx_mode;
 	__u16	__spare0[1];
+
 	/* 0x20 */
-	__u64	stx_ino;	/* Inode number */
-	__u64	stx_size;	/* File size */
-	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
-	__u64	stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+	/* Inode number */
+	__u64	stx_ino;
+
+	/* File size */
+	__u64	stx_size;
+
+	/* Number of 512-byte blocks allocated */
+	__u64	stx_blocks;
+
+	/* Mask to show what's supported in stx_attributes */
+	__u64	stx_attributes_mask;
+
 	/* 0x40 */
-	struct statx_timestamp	stx_atime;	/* Last access time */
-	struct statx_timestamp	stx_btime;	/* File creation time */
-	struct statx_timestamp	stx_ctime;	/* Last attribute change time */
-	struct statx_timestamp	stx_mtime;	/* Last data modification time */
+	/* Last access time */
+	struct statx_timestamp	stx_atime;
+
+	/* File creation time */
+	struct statx_timestamp	stx_btime;
+
+	/* Last attribute change time */
+	struct statx_timestamp	stx_ctime;
+
+	/* Last data modification time */
+	struct statx_timestamp	stx_mtime;
+
 	/* 0x80 */
-	__u32	stx_rdev_major;	/* Device ID of special file [if bdev/cdev] */
+	/* Device ID of special file [if bdev/cdev] */
+	__u32	stx_rdev_major;
 	__u32	stx_rdev_minor;
-	__u32	stx_dev_major;	/* ID of device containing file [uncond] */
+
+	/* ID of device containing file [uncond] */
+	__u32	stx_dev_major;
 	__u32	stx_dev_minor;
+
 	/* 0x90 */
 	__u64	stx_mnt_id;
-	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
-	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */
+
+	/* Memory buffer alignment for direct I/O */
+	__u32	stx_dio_mem_align;
+
+	/* File offset alignment for direct I/O */
+	__u32	stx_dio_offset_align;
+
 	/* 0xa0 */
-	__u64	stx_subvol;	/* Subvolume identifier */
-	__u32	stx_atomic_write_unit_min;	/* Min atomic write unit in bytes */
-	__u32	stx_atomic_write_unit_max;	/* Max atomic write unit in bytes */
+	/* Subvolume identifier */
+	__u64	stx_subvol;
+
+	/* Min atomic write unit in bytes */
+	__u32	stx_atomic_write_unit_min;
+
+	/* Max atomic write unit in bytes */
+	__u32	stx_atomic_write_unit_max;
+
 	/* 0xb0 */
-	__u32   stx_atomic_write_segments_max;	/* Max atomic write segment count */
+	/* Max atomic write segment count */
+	__u32   stx_atomic_write_segments_max;
+
 	__u32   __spare1[1];
+
 	/* 0xb8 */
 	__u64	__spare3[9];	/* Spare space for future expansion */
+
 	/* 0x100 */
 };
 
-- 
cgit v1.2.3


From 7ed6cbe0f8caa6ee38a2dc8f1b925acb904cc01f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 9 Jan 2025 09:31:02 +0100
Subject: fs: add STATX_DIO_READ_ALIGN

Add a separate dio read align field, as many out of place write
file systems can easily do reads aligned to the device sector size,
but require bigger alignment for writes.

This is usually papered over by falling back to buffered I/O for smaller
writes and doing read-modify-write cycles, but performance for this
sucks, so applications benefit from knowing the actual write alignment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250109083109.1441561-3-hch@lst.de
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/stat.c                 | 1 +
 include/linux/stat.h      | 1 +
 include/uapi/linux/stat.h | 4 +++-
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/fs/stat.c b/fs/stat.c
index 0870e969a8a0..2c0e111a098a 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -725,6 +725,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_mnt_id = stat->mnt_id;
 	tmp.stx_dio_mem_align = stat->dio_mem_align;
 	tmp.stx_dio_offset_align = stat->dio_offset_align;
+	tmp.stx_dio_read_offset_align = stat->dio_read_offset_align;
 	tmp.stx_subvol = stat->subvol;
 	tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
 	tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 3d900c86981c..9d8382e23a9c 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -52,6 +52,7 @@ struct kstat {
 	u64		mnt_id;
 	u32		dio_mem_align;
 	u32		dio_offset_align;
+	u32		dio_read_offset_align;
 	u64		change_cookie;
 	u64		subvol;
 	u32		atomic_write_unit_min;
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 8b35d7d511a2..f78ee3670dd5 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -179,7 +179,8 @@ struct statx {
 	/* Max atomic write segment count */
 	__u32   stx_atomic_write_segments_max;
 
-	__u32   __spare1[1];
+	/* File offset alignment for direct I/O reads */
+	__u32	stx_dio_read_offset_align;
 
 	/* 0xb8 */
 	__u64	__spare3[9];	/* Spare space for future expansion */
@@ -213,6 +214,7 @@ struct statx {
 #define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
 #define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
 #define STATX_WRITE_ATOMIC	0x00010000U	/* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN	0x00020000U	/* Want/got dio read alignment info */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
-- 
cgit v1.2.3


From 94d57442e56d2ad2ca20d096040b8ae6f216a921 Mon Sep 17 00:00:00 2001
From: Anuj Gupta <anuj20.g@samsung.com>
Date: Thu, 5 Dec 2024 11:51:09 +0530
Subject: io_uring: expose read/write attribute capability

After commit 9a213d3b80c0, we can pass additional attributes along with
read/write. However, userspace doesn't know that. Add a new feature flag
IORING_FEAT_RW_ATTR, to notify the userspace that the kernel has this
ability.

Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Li Zetao <lizetao1@huawei.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Tested-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20241205062109.1788-1-anuj20.g@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 1 +
 io_uring/io_uring.c           | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 38f0d6b10eaf..e11c82638527 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -577,6 +577,7 @@ struct io_uring_params {
 #define IORING_FEAT_REG_REG_RING	(1U << 13)
 #define IORING_FEAT_RECVSEND_BUNDLE	(1U << 14)
 #define IORING_FEAT_MIN_TIMEOUT		(1U << 15)
+#define IORING_FEAT_RW_ATTR		(1U << 16)
 
 /*
  * io_uring_register(2) opcodes and arguments
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 92ba2fdcd087..af03e9973b58 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3713,7 +3713,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
 			IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
 			IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP |
 			IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING |
-			IORING_FEAT_RECVSEND_BUNDLE | IORING_FEAT_MIN_TIMEOUT;
+			IORING_FEAT_RECVSEND_BUNDLE | IORING_FEAT_MIN_TIMEOUT |
+			IORING_FEAT_RW_ATTR;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
-- 
cgit v1.2.3


From bdf46443f350dd5d226fd528a5a5954ff762f591 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 10 Jan 2025 16:59:34 +0100
Subject: ALSA: rawmidi: Expose the tied device number in info ioctl

The UMP legacy rawmidi is derived from the UMP rawmidi, but currently
there is no way to know which device is involved in other side.

This patch extends the rawmidi info ioctl to show the tied device
number.  As default it stores -1, indicating that no tied device.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20250110155943.31578-2-tiwai@suse.de
---
 Documentation/sound/designs/midi-2.0.rst | 5 +++++
 include/sound/rawmidi.h                  | 1 +
 include/uapi/sound/asound.h              | 5 ++++-
 sound/core/rawmidi.c                     | 2 ++
 sound/core/ump.c                         | 3 +++
 5 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/Documentation/sound/designs/midi-2.0.rst b/Documentation/sound/designs/midi-2.0.rst
index 086487ca7ab1..d525bc2805f7 100644
--- a/Documentation/sound/designs/midi-2.0.rst
+++ b/Documentation/sound/designs/midi-2.0.rst
@@ -293,6 +293,11 @@ Rawmidi API Extensions
   status 0x05).  When UMP core receives such a message, it updates the
   UMP EP info and the corresponding sequencer clients as well.
 
+* The legacy rawmidi device number is found in the new `tied_device`
+  field of the rawmidi info.
+  On the other hand, the UMP rawmidi device number is found in
+  `tied_device` field of the legacy rawmidi info, too.
+
 
 Control API Extensions
 ======================
diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index f31cabf0158c..7f1fec786b72 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -118,6 +118,7 @@ struct snd_rawmidi {
 	struct list_head list;
 	unsigned int device;		/* device number */
 	unsigned int info_flags;	/* SNDRV_RAWMIDI_INFO_XXXX */
+	unsigned int tied_device;
 	char id[64];
 	char name[80];
 
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 4cd513215bcd..1fcff031b5e3 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -729,6 +729,8 @@ enum {
 #define SNDRV_RAWMIDI_INFO_DUPLEX		0x00000004
 #define SNDRV_RAWMIDI_INFO_UMP			0x00000008
 
+#define SNDRV_RAWMIDI_DEVICE_UNKNOWN		-1
+
 struct snd_rawmidi_info {
 	unsigned int device;		/* RO/WR (control): device number */
 	unsigned int subdevice;		/* RO/WR (control): subdevice number */
@@ -740,7 +742,8 @@ struct snd_rawmidi_info {
 	unsigned char subname[32];	/* name of active or selected subdevice */
 	unsigned int subdevices_count;
 	unsigned int subdevices_avail;
-	unsigned char reserved[64];	/* reserved for future use */
+	int tied_device;		/* R: tied rawmidi device (UMP/legacy) */
+	unsigned char reserved[60];	/* reserved for future use */
 };
 
 #define SNDRV_RAWMIDI_MODE_FRAMING_MASK		(7<<0)
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 348ce1b7725e..858878fe487a 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -635,6 +635,7 @@ static int snd_rawmidi_info(struct snd_rawmidi_substream *substream,
 	info->subdevices_count = substream->pstr->substream_count;
 	info->subdevices_avail = (substream->pstr->substream_count -
 				  substream->pstr->substream_opened);
+	info->tied_device = rmidi->tied_device;
 	return 0;
 }
 
@@ -1834,6 +1835,7 @@ int snd_rawmidi_init(struct snd_rawmidi *rmidi,
 	INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams);
 	INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams);
 	rmidi->info_flags = info_flags;
+	rmidi->tied_device = SNDRV_RAWMIDI_DEVICE_UNKNOWN;
 
 	if (id != NULL)
 		strscpy(rmidi->id, id, sizeof(rmidi->id));
diff --git a/sound/core/ump.c b/sound/core/ump.c
index 9198bff4768c..0bfab84eaafb 100644
--- a/sound/core/ump.c
+++ b/sound/core/ump.c
@@ -1314,6 +1314,9 @@ int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump,
 	ump->legacy_rmidi = rmidi;
 	update_legacy_names(ump);
 
+	rmidi->tied_device = ump->core.device;
+	ump->core.tied_device = rmidi->device;
+
 	ump_dbg(ump, "Created a legacy rawmidi #%d (%s)\n", device, id);
 	return 0;
 }
-- 
cgit v1.2.3


From b8fefed73a952a33521ad416fb39683abd87454b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 10 Jan 2025 16:59:35 +0100
Subject: ALSA: rawmidi: Show substream activity in info ioctl

The UMP legacy rawmidi may turn on/off the substream dynamically
depending on the UMP Function Block information.  So far, there was no
direct way to know whether the substream is disabled (inactive) or
not; at most one can take a look at the substream name string or try
to open and get -ENODEV.

This patch extends the rawmidi info ioctl to show the current inactive
state of the given substream.  When the selected substream is
inactive, info flags field contains the new bit flag
SNDRV_RAWMIDI_INFO_STREAM_INACTIVE.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20250110155943.31578-3-tiwai@suse.de
---
 Documentation/sound/designs/midi-2.0.rst | 6 ++++++
 include/sound/rawmidi.h                  | 1 +
 include/uapi/sound/asound.h              | 1 +
 sound/core/rawmidi.c                     | 2 ++
 sound/core/ump.c                         | 9 +++++----
 5 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/sound/designs/midi-2.0.rst b/Documentation/sound/designs/midi-2.0.rst
index d525bc2805f7..d6c17a47c832 100644
--- a/Documentation/sound/designs/midi-2.0.rst
+++ b/Documentation/sound/designs/midi-2.0.rst
@@ -298,6 +298,12 @@ Rawmidi API Extensions
   On the other hand, the UMP rawmidi device number is found in
   `tied_device` field of the legacy rawmidi info, too.
 
+* Each substream of the legacy rawmidi may be enabled / disabled
+  dynamically depending on the UMP FB state.
+  When the selected substream is inactive, it's indicated by the bit
+  0x10 (`SNDRV_RAWMIDI_INFO_STREAM_INACTIVE`) in the `flags` field of
+  the legacy rawmidi info.
+
 
 Control API Extensions
 ======================
diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index 7f1fec786b72..6f2e95298fc7 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -89,6 +89,7 @@ struct snd_rawmidi_substream {
 	unsigned int framing;		/* whether to frame input data */
 	unsigned int clock_type;	/* clock source to use for input framing */
 	int use_count;			/* use counter (for output) */
+	bool inactive;			/* inactive substream (for UMP legacy) */
 	size_t bytes;
 	spinlock_t lock;
 	struct snd_rawmidi *rmidi;
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 1fcff031b5e3..9f3b32602ac1 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -728,6 +728,7 @@ enum {
 #define SNDRV_RAWMIDI_INFO_INPUT		0x00000002
 #define SNDRV_RAWMIDI_INFO_DUPLEX		0x00000004
 #define SNDRV_RAWMIDI_INFO_UMP			0x00000008
+#define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE	0x00000010
 
 #define SNDRV_RAWMIDI_DEVICE_UNKNOWN		-1
 
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 858878fe487a..8a681bff4f7f 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -629,6 +629,8 @@ static int snd_rawmidi_info(struct snd_rawmidi_substream *substream,
 	info->subdevice = substream->number;
 	info->stream = substream->stream;
 	info->flags = rmidi->info_flags;
+	if (substream->inactive)
+		info->flags |= SNDRV_RAWMIDI_INFO_STREAM_INACTIVE;
 	strcpy(info->id, rmidi->id);
 	strcpy(info->name, rmidi->name);
 	strcpy(info->subname, substream->name);
diff --git a/sound/core/ump.c b/sound/core/ump.c
index 0bfab84eaafb..d6cd11be8750 100644
--- a/sound/core/ump.c
+++ b/sound/core/ump.c
@@ -1250,8 +1250,8 @@ static int fill_legacy_mapping(struct snd_ump_endpoint *ump)
 	return num;
 }
 
-static void fill_substream_names(struct snd_ump_endpoint *ump,
-				 struct snd_rawmidi *rmidi, int dir)
+static void update_legacy_substreams(struct snd_ump_endpoint *ump,
+				     struct snd_rawmidi *rmidi, int dir)
 {
 	struct snd_rawmidi_substream *s;
 	const char *name;
@@ -1265,6 +1265,7 @@ static void fill_substream_names(struct snd_ump_endpoint *ump,
 		scnprintf(s->name, sizeof(s->name), "Group %d (%.16s)%s",
 			  idx + 1, name,
 			  ump->groups[idx].active ? "" : " [Inactive]");
+		s->inactive = !ump->groups[idx].active;
 	}
 }
 
@@ -1272,8 +1273,8 @@ static void update_legacy_names(struct snd_ump_endpoint *ump)
 {
 	struct snd_rawmidi *rmidi = ump->legacy_rmidi;
 
-	fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_INPUT);
-	fill_substream_names(ump, rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT);
+	update_legacy_substreams(ump, rmidi, SNDRV_RAWMIDI_STREAM_INPUT);
+	update_legacy_substreams(ump, rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT);
 }
 
 int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump,
-- 
cgit v1.2.3


From 7bb49d2e8b52adace88249d9dece41e36af3bba0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 10 Jan 2025 16:59:36 +0100
Subject: ALSA: rawmidi: Bump protocol version to 2.0.5

Bump the protocol version to 2.0.5, as we extended the rawmidi ABI for
the new tied_device info and the substream inactive flag.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20250110155943.31578-4-tiwai@suse.de
---
 include/uapi/sound/asound.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 9f3b32602ac1..000b36c0e2b9 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -716,7 +716,7 @@ enum {
  *  Raw MIDI section - /dev/snd/midi??
  */
 
-#define SNDRV_RAWMIDI_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 4)
+#define SNDRV_RAWMIDI_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 5)
 
 enum {
 	SNDRV_RAWMIDI_STREAM_OUTPUT = 0,
-- 
cgit v1.2.3


From 3ab4a3199c782eae9b683b952f3ec5a766e4a096 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 10 Jan 2025 16:59:41 +0100
Subject: ALSA: seq: Notify UMP EP and FB changes

So far we notify the sequencer client and port changes upon UMP FB
changes, but those aren't really corresponding to the UMP updates.
e.g. when a FB info gets updated, it's not notified but done only when
some of sequencer port attribute is changed.  This is no ideal
behavior.

This patch adds the two new sequencer event types for notifying the
UMP EP and FB changes via the announce port.  The new event takes
snd_seq_ev_ump_notify type data, which is compatible with
snd_seq_addr (where the port number is replaced with the block
number).

The events are sent when the EP and FB info gets updated explicitly
via ioctl, or the backend UMP receives the corresponding UMP
messages.

The sequencer protocol version is bumped to 1.0.5 along with it.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20250110155943.31578-9-tiwai@suse.de
---
 Documentation/sound/designs/midi-2.0.rst |  7 ++++++
 include/sound/ump.h                      |  1 +
 include/uapi/sound/asequencer.h          | 12 +++++++++-
 sound/core/seq/seq_clientmgr.c           | 10 ++++++++
 sound/core/seq/seq_system.h              | 10 ++++++++
 sound/core/seq/seq_ump_client.c          | 40 ++++++++++++++++++++++++++++++--
 sound/core/ump.c                         | 23 +++++++++++++++---
 7 files changed, 97 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/sound/designs/midi-2.0.rst b/Documentation/sound/designs/midi-2.0.rst
index d6c17a47c832..71a343c93fe7 100644
--- a/Documentation/sound/designs/midi-2.0.rst
+++ b/Documentation/sound/designs/midi-2.0.rst
@@ -388,6 +388,13 @@ Sequencer API Extensions
   announcement to the ALSA sequencer system port, similarly like the
   normal port change notification.
 
+* There are two extended event types for notifying the UMP Endpoint and
+  Function Block changes via the system announcement port:
+  type 68 (`SNDRV_SEQ_EVENT_UMP_EP_CHANGE`) and type 69
+  (`SNDRV_SEQ_EVENT_UMP_BLOCK_CHANGE`). They take the new type,
+  `snd_seq_ev_ump_notify` in the payload, indicating the client number
+  and the FB number that are changed.
+
 
 MIDI2 USB Gadget Function Driver
 ================================
diff --git a/include/sound/ump.h b/include/sound/ump.h
index 532c2c3ea28e..73f97f88e2ed 100644
--- a/include/sound/ump.h
+++ b/include/sound/ump.h
@@ -83,6 +83,7 @@ struct snd_ump_ops {
 struct snd_seq_ump_ops {
 	void (*input_receive)(struct snd_ump_endpoint *ump,
 			      const u32 *data, int words);
+	int (*notify_ep_change)(struct snd_ump_endpoint *ump);
 	int (*notify_fb_change)(struct snd_ump_endpoint *ump,
 				struct snd_ump_block *fb);
 	int (*switch_protocol)(struct snd_ump_endpoint *ump);
diff --git a/include/uapi/sound/asequencer.h b/include/uapi/sound/asequencer.h
index bc30c1f2a109..a5c41f771e05 100644
--- a/include/uapi/sound/asequencer.h
+++ b/include/uapi/sound/asequencer.h
@@ -10,7 +10,7 @@
 #include <sound/asound.h>
 
 /** version of the sequencer */
-#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 4)
+#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 5)
 
 /**
  * definition of sequencer event types
@@ -92,6 +92,9 @@
 #define SNDRV_SEQ_EVENT_PORT_SUBSCRIBED	66	/* ports connected */
 #define SNDRV_SEQ_EVENT_PORT_UNSUBSCRIBED 67	/* ports disconnected */
 
+#define SNDRV_SEQ_EVENT_UMP_EP_CHANGE	68	/* UMP EP info has changed */
+#define SNDRV_SEQ_EVENT_UMP_BLOCK_CHANGE 69	/* UMP block info has changed */
+
 /* 70-89:  synthesizer events - obsoleted */
 
 /** user-defined events with fixed length
@@ -253,6 +256,12 @@ struct snd_seq_ev_quote {
 	struct snd_seq_event *event;		/* quoted event */
 } __packed;
 
+	/* UMP info change notify */
+struct snd_seq_ev_ump_notify {
+	unsigned char client;	/**< Client number */
+	unsigned char block;	/**< Block number (optional) */
+};
+
 union snd_seq_event_data { /* event data... */
 	struct snd_seq_ev_note note;
 	struct snd_seq_ev_ctrl control;
@@ -265,6 +274,7 @@ union snd_seq_event_data { /* event data... */
 	struct snd_seq_connect connect;
 	struct snd_seq_result result;
 	struct snd_seq_ev_quote quote;
+	struct snd_seq_ev_ump_notify ump_notify;
 };
 
 	/* sequencer event */
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 3d27f777f29e..75783d69e708 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -2230,6 +2230,16 @@ static int snd_seq_ioctl_client_ump_info(struct snd_seq_client *caller,
  error:
 	mutex_unlock(&cptr->ioctl_mutex);
 	snd_seq_client_unlock(cptr);
+	if (!err && cmd == SNDRV_SEQ_IOCTL_SET_CLIENT_UMP_INFO) {
+		if (type == SNDRV_SEQ_CLIENT_UMP_INFO_ENDPOINT)
+			snd_seq_system_ump_notify(client, 0,
+						  SNDRV_SEQ_EVENT_UMP_EP_CHANGE,
+						  false);
+		else
+			snd_seq_system_ump_notify(client, type - 1,
+						  SNDRV_SEQ_EVENT_UMP_BLOCK_CHANGE,
+						  false);
+	}
 	return err;
 }
 #endif
diff --git a/sound/core/seq/seq_system.h b/sound/core/seq/seq_system.h
index a118f7252b62..62e513f74871 100644
--- a/sound/core/seq/seq_system.h
+++ b/sound/core/seq/seq_system.h
@@ -16,6 +16,16 @@ void snd_seq_system_broadcast(int client, int port, int type, bool atomic);
 #define notify_event(client, port, type) \
 	snd_seq_system_broadcast(client, port, type, false)
 
+/* notify UMP EP/FB change event */
+static inline void snd_seq_system_ump_notify(int client, int block, int type,
+					     bool atomic)
+{
+	/* reuse the existing snd_seq_system_broadcast():
+	 * struct snd_seq_ev_ump_notify is compatible with struct snd_seq_addr
+	 */
+	snd_seq_system_broadcast(client, block, type, atomic);
+}
+
 #define snd_seq_system_client_ev_client_start(client) notify_event(client, 0, SNDRV_SEQ_EVENT_CLIENT_START)
 #define snd_seq_system_client_ev_client_exit(client) notify_event(client, 0, SNDRV_SEQ_EVENT_CLIENT_EXIT)
 #define snd_seq_system_client_ev_client_change(client) notify_event(client, 0, SNDRV_SEQ_EVENT_CLIENT_CHANGE)
diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c
index 27c4dd9940ff..1255351b59ce 100644
--- a/sound/core/seq/seq_ump_client.c
+++ b/sound/core/seq/seq_ump_client.c
@@ -388,6 +388,33 @@ static void handle_group_notify(struct work_struct *work)
 	setup_client_group_filter(client);
 }
 
+/* UMP EP change notification */
+static int seq_ump_notify_ep_change(struct snd_ump_endpoint *ump)
+{
+	struct seq_ump_client *client = ump->seq_client;
+	struct snd_seq_client *cptr;
+	int client_id;
+
+	if (!client)
+		return -ENODEV;
+	client_id = client->seq_client;
+	cptr = snd_seq_kernel_client_get(client_id);
+	if (!cptr)
+		return -ENODEV;
+
+	snd_seq_system_ump_notify(client_id, 0, SNDRV_SEQ_EVENT_UMP_EP_CHANGE,
+				  true);
+
+	/* update sequencer client name if needed */
+	if (*ump->core.name && strcmp(ump->core.name, cptr->name)) {
+		strscpy(cptr->name, ump->core.name, sizeof(cptr->name));
+		snd_seq_system_client_ev_client_change(client_id);
+	}
+
+	snd_seq_kernel_client_put(cptr);
+	return 0;
+}
+
 /* UMP FB change notification */
 static int seq_ump_notify_fb_change(struct snd_ump_endpoint *ump,
 				    struct snd_ump_block *fb)
@@ -397,20 +424,29 @@ static int seq_ump_notify_fb_change(struct snd_ump_endpoint *ump,
 	if (!client)
 		return -ENODEV;
 	schedule_work(&client->group_notify_work);
+	snd_seq_system_ump_notify(client->seq_client, fb->info.block_id,
+				  SNDRV_SEQ_EVENT_UMP_BLOCK_CHANGE,
+				  true);
 	return 0;
 }
 
 /* UMP protocol change notification; just update the midi_version field */
 static int seq_ump_switch_protocol(struct snd_ump_endpoint *ump)
 {
-	if (!ump->seq_client)
+	struct seq_ump_client *client = ump->seq_client;
+
+	if (!client)
 		return -ENODEV;
-	setup_client_midi_version(ump->seq_client);
+	setup_client_midi_version(client);
+	snd_seq_system_ump_notify(client->seq_client, 0,
+				  SNDRV_SEQ_EVENT_UMP_EP_CHANGE,
+				  true);
 	return 0;
 }
 
 static const struct snd_seq_ump_ops seq_ump_ops = {
 	.input_receive = seq_ump_input_receive,
+	.notify_ep_change = seq_ump_notify_ep_change,
 	.notify_fb_change = seq_ump_notify_fb_change,
 	.switch_protocol = seq_ump_switch_protocol,
 };
diff --git a/sound/core/ump.c b/sound/core/ump.c
index c80a0a8b7ad0..ff3cc2386ece 100644
--- a/sound/core/ump.c
+++ b/sound/core/ump.c
@@ -695,6 +695,15 @@ static void choose_default_protocol(struct snd_ump_endpoint *ump)
 		ump->info.protocol |= SNDRV_UMP_EP_INFO_PROTO_MIDI1;
 }
 
+/* notify the EP info/name change to sequencer */
+static void seq_notify_ep_change(struct snd_ump_endpoint *ump)
+{
+#if IS_ENABLED(CONFIG_SND_SEQUENCER)
+	if (ump->parsed && ump->seq_ops && ump->seq_ops->notify_ep_change)
+		ump->seq_ops->notify_ep_change(ump);
+#endif
+}
+
 /* handle EP info stream message; update the UMP attributes */
 static int ump_handle_ep_info_msg(struct snd_ump_endpoint *ump,
 				  const union snd_ump_stream_msg *buf)
@@ -719,6 +728,7 @@ static int ump_handle_ep_info_msg(struct snd_ump_endpoint *ump,
 
 	ump->info.protocol &= ump->info.protocol_caps;
 	choose_default_protocol(ump);
+	seq_notify_ep_change(ump);
 
 	return 1; /* finished */
 }
@@ -741,6 +751,7 @@ static int ump_handle_device_info_msg(struct snd_ump_endpoint *ump,
 		ump->info.family_id,
 		ump->info.model_id,
 		ump->info.sw_revision);
+	seq_notify_ep_change(ump);
 	return 1; /* finished */
 }
 
@@ -762,6 +773,7 @@ static int ump_handle_ep_name_msg(struct snd_ump_endpoint *ump,
 	if (ret && ump->parsed) {
 		ump_set_rawmidi_name(ump);
 		ump_legacy_set_rawmidi_name(ump);
+		seq_notify_ep_change(ump);
 	}
 
 	return ret;
@@ -771,9 +783,14 @@ static int ump_handle_ep_name_msg(struct snd_ump_endpoint *ump,
 static int ump_handle_product_id_msg(struct snd_ump_endpoint *ump,
 				     const union snd_ump_stream_msg *buf)
 {
-	return ump_append_string(ump, ump->info.product_id,
-				 sizeof(ump->info.product_id),
-				 buf->raw, 2);
+	int ret;
+
+	ret = ump_append_string(ump, ump->info.product_id,
+				sizeof(ump->info.product_id),
+				buf->raw, 2);
+	if (ret)
+		seq_notify_ep_change(ump);
+	return ret;
 }
 
 /* notify the protocol change to sequencer */
-- 
cgit v1.2.3


From 658eb5ab916ddc92f294dbce8e3d449470be9f86 Mon Sep 17 00:00:00 2001
From: Wang Yaxin <wang.yaxin@zte.com.cn>
Date: Tue, 3 Dec 2024 16:48:48 +0800
Subject: delayacct: add delay max to record delay peak

Introduce the use cases of delay max, which can help quickly detect
potential abnormal delays in the system and record the types and specific
details of delay spikes.

Problem
========
Delay accounting can track the average delay of processes to show
system workload. However, when a process experiences a significant
delay, maybe a delay spike, which adversely affects performance,
getdelays can only display the average system delay over a period
of time. Yet, average delay is unhelpful for diagnosing delay peak.
It is not even possible to determine which type of delay has spiked,
as this information might be masked by the average delay.

Solution
=========
the 'delay max' can display delay peak since the system's startup,
which can record potential abnormal delays over time, including
the type of delay and the maximum delay. This is helpful for
quickly identifying crash caused by delay.

Use case
=========
bash# ./getdelays -d -p 244
print delayacct stats ON
PID     244

CPU             count     real total  virtual total    delay total  delay average      delay max
                   68      192000000      213676651         705643          0.010ms     0.306381ms
IO              count    delay total  delay average      delay max
                    0              0          0.000ms     0.000000ms
SWAP            count    delay total  delay average      delay max
                    0              0          0.000ms     0.000000ms
RECLAIM         count    delay total  delay average      delay max
                    0              0          0.000ms     0.000000ms
THRASHING       count    delay total  delay average      delay max
                    0              0          0.000ms     0.000000ms
COMPACT         count    delay total  delay average      delay max
                    0              0          0.000ms     0.000000ms
WPCOPY          count    delay total  delay average      delay max
                  235       15648284          0.067ms     0.263842ms
IRQ             count    delay total  delay average      delay max
                    0              0          0.000ms     0.000000ms

[wang.yaxin@zte.com.cn: update docs and fix some spelling errors]
  Link: https://lkml.kernel.org/r/20241213192700771XKZ8H30OtHSeziGqRVMs0@zte.com.cn
Link: https://lkml.kernel.org/r/20241203164848805CS62CQPQWG9GLdQj2_BxS@zte.com.cn
Co-developed-by: Wang Yong <wang.yong12@zte.com.cn>
Signed-off-by: Wang Yong <wang.yong12@zte.com.cn>
Co-developed-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Co-developed-by: Wang Yaxin <wang.yaxin@zte.com.cn>
Signed-off-by: Wang Yaxin <wang.yaxin@zte.com.cn>
Signed-off-by: Kun Jiang <jiang.kun2@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Fan Yu <fan.yu9@zte.com.cn>
Cc: Peilin He <he.peilin@zte.com.cn>
Cc: tuqiang <tu.qiang35@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: ye xingchen <ye.xingchen@zte.com.cn>
Cc: Yunkai Zhang <zhang.yunkai@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/accounting/delay-accounting.rst | 42 +++++++++----------
 include/linux/delayacct.h                     |  7 ++++
 include/linux/sched.h                         |  3 ++
 include/uapi/linux/taskstats.h                |  9 ++++
 kernel/delayacct.c                            | 37 ++++++++++++-----
 kernel/sched/stats.h                          |  5 ++-
 tools/accounting/getdelays.c                  | 59 +++++++++++++++------------
 7 files changed, 105 insertions(+), 57 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index f61c01fc376e..8a0277428ccf 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -100,29 +100,29 @@ Get delays, since system boot, for pid 10::
 	# ./getdelays -d -p 10
 	(output similar to next case)
 
-Get sum of delays, since system boot, for all pids with tgid 5::
+Get sum and peak of delays, since system boot, for all pids with tgid 242::
 
-	# ./getdelays -d -t 5
+	bash-4.4# ./getdelays -d -t 242
 	print delayacct stats ON
-	TGID	5
-
-
-	CPU             count     real total  virtual total    delay total  delay average
-	                    8        7000000        6872122        3382277          0.423ms
-	IO              count    delay total  delay average
-                   0              0          0.000ms
-	SWAP            count    delay total  delay average
-                       0              0          0.000ms
-	RECLAIM         count    delay total  delay average
-                   0              0          0.000ms
-	THRASHING       count    delay total  delay average
-                       0              0          0.000ms
-	COMPACT         count    delay total  delay average
-                       0              0          0.000ms
-	WPCOPY          count    delay total  delay average
-                       0              0          0.000ms
-	IRQ             count    delay total  delay average
-                       0              0          0.000ms
+	TGID    242
+
+
+	CPU         count     real total  virtual total    delay total  delay average      delay max
+	              239      296000000      307724885        1127792          0.005ms     0.238382ms
+	IO          count    delay total  delay average      delay max
+	                0              0          0.000ms     0.000000ms
+	SWAP        count    delay total  delay average      delay max
+	                0              0          0.000ms     0.000000ms
+	RECLAIM     count    delay total  delay average      delay max
+	                0              0          0.000ms     0.000000ms
+	THRASHING   count    delay total  delay average      delay max
+	                0              0          0.000ms     0.000000ms
+	COMPACT     count    delay total  delay average      delay max
+	                0              0          0.000ms     0.000000ms
+	WPCOPY      count    delay total  delay average      delay max
+	              230       19100476          0.083ms     0.383822ms
+	IRQ         count    delay total  delay average      delay max
+	                0              0          0.000ms     0.000000ms
 
 Get IO accounting for pid 1, it works only with -p::
 
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 6639f48dac36..56fbfa2c2ac5 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -29,25 +29,32 @@ struct task_delay_info {
 	 * XXX_delay contains the accumulated delay time in nanoseconds.
 	 */
 	u64 blkio_start;
+	u64 blkio_delay_max;
 	u64 blkio_delay;	/* wait for sync block io completion */
 	u64 swapin_start;
+	u64 swapin_delay_max;
 	u64 swapin_delay;	/* wait for swapin */
 	u32 blkio_count;	/* total count of the number of sync block */
 				/* io operations performed */
 	u32 swapin_count;	/* total count of swapin */
 
 	u64 freepages_start;
+	u64 freepages_delay_max;
 	u64 freepages_delay;	/* wait for memory reclaim */
 
 	u64 thrashing_start;
+	u64 thrashing_delay_max;
 	u64 thrashing_delay;	/* wait for thrashing page */
 
 	u64 compact_start;
+	u64 compact_delay_max;
 	u64 compact_delay;	/* wait for memory compact */
 
 	u64 wpcopy_start;
+	u64 wpcopy_delay_max;
 	u64 wpcopy_delay;	/* wait for write-protect copy */
 
+	u64 irq_delay_max;
 	u64 irq_delay;	/* wait for IRQ/SOFTIRQ */
 
 	u32 freepages_count;	/* total count of memory reclaim */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 64934e0830af..a0ae3923b41d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -398,6 +398,9 @@ struct sched_info {
 	/* Time spent waiting on a runqueue: */
 	unsigned long long		run_delay;
 
+	/* Max time spent waiting on a runqueue: */
+	unsigned long long		max_run_delay;
+
 	/* Timestamps: */
 
 	/* When did we last run on a CPU? */
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index b50b2eb257a0..e0d1c6fc9f3b 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -72,6 +72,7 @@ struct taskstats {
 	 */
 	__u64	cpu_count __attribute__((aligned(8)));
 	__u64	cpu_delay_total;
+	__u64	cpu_delay_max;
 
 	/* Following four fields atomically updated using task->delays->lock */
 
@@ -80,10 +81,12 @@ struct taskstats {
 	 */
 	__u64	blkio_count;
 	__u64	blkio_delay_total;
+	__u64	blkio_delay_max;
 
 	/* Delay waiting for page fault I/O (swap in only) */
 	__u64	swapin_count;
 	__u64	swapin_delay_total;
+	__u64	swapin_delay_max;
 
 	/* cpu "wall-clock" running time
 	 * On some architectures, value will adjust for cpu time stolen
@@ -166,10 +169,12 @@ struct taskstats {
 	/* Delay waiting for memory reclaim */
 	__u64	freepages_count;
 	__u64	freepages_delay_total;
+	__u64	freepages_delay_max;
 
 	/* Delay waiting for thrashing page */
 	__u64	thrashing_count;
 	__u64	thrashing_delay_total;
+	__u64	thrashing_delay_max;
 
 	/* v10: 64-bit btime to avoid overflow */
 	__u64	ac_btime64;		/* 64-bit begin time */
@@ -177,6 +182,7 @@ struct taskstats {
 	/* v11: Delay waiting for memory compact */
 	__u64	compact_count;
 	__u64	compact_delay_total;
+	__u64	compact_delay_max;
 
 	/* v12 begin */
 	__u32   ac_tgid;	/* thread group ID */
@@ -198,10 +204,13 @@ struct taskstats {
 	/* v13: Delay waiting for write-protect copy */
 	__u64    wpcopy_count;
 	__u64    wpcopy_delay_total;
+	__u64    wpcopy_delay_max;
 
 	/* v14: Delay waiting for IRQ/SOFTIRQ */
 	__u64    irq_count;
 	__u64    irq_delay_total;
+	__u64    irq_delay_max;
+	/* v15: add Delay max */
 };
 
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index dead51de8eb5..23212a0c88e4 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -93,9 +93,9 @@ void __delayacct_tsk_init(struct task_struct *tsk)
 
 /*
  * Finish delay accounting for a statistic using its timestamps (@start),
- * accumalator (@total) and @count
+ * accumulator (@total) and @count
  */
-static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count)
+static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max)
 {
 	s64 ns = local_clock() - *start;
 	unsigned long flags;
@@ -104,6 +104,8 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou
 		raw_spin_lock_irqsave(lock, flags);
 		*total += ns;
 		(*count)++;
+		if (ns > *max)
+			*max = ns;
 		raw_spin_unlock_irqrestore(lock, flags);
 	}
 }
@@ -122,7 +124,8 @@ void __delayacct_blkio_end(struct task_struct *p)
 	delayacct_end(&p->delays->lock,
 		      &p->delays->blkio_start,
 		      &p->delays->blkio_delay,
-		      &p->delays->blkio_count);
+		      &p->delays->blkio_count,
+		      &p->delays->blkio_delay_max);
 }
 
 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -153,10 +156,11 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 
 	d->cpu_count += t1;
 
+	d->cpu_delay_max = tsk->sched_info.max_run_delay;
 	tmp = (s64)d->cpu_delay_total + t2;
 	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
-
 	tmp = (s64)d->cpu_run_virtual_total + t3;
+
 	d->cpu_run_virtual_total =
 		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
 
@@ -164,20 +168,26 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 		return 0;
 
 	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
-
 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
+	d->blkio_delay_max = tsk->delays->blkio_delay_max;
 	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
 	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
+	d->swapin_delay_max = tsk->delays->swapin_delay_max;
 	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
 	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+	d->freepages_delay_max = tsk->delays->freepages_delay_max;
 	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
 	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
+	d->thrashing_delay_max = tsk->delays->thrashing_delay_max;
 	tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
 	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
+	d->compact_delay_max = tsk->delays->compact_delay_max;
 	tmp = d->compact_delay_total + tsk->delays->compact_delay;
 	d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
+	d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max;
 	tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
 	d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
+	d->irq_delay_max = tsk->delays->irq_delay_max;
 	tmp = d->irq_delay_total + tsk->delays->irq_delay;
 	d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
 	d->blkio_count += tsk->delays->blkio_count;
@@ -213,7 +223,8 @@ void __delayacct_freepages_end(void)
 	delayacct_end(&current->delays->lock,
 		      &current->delays->freepages_start,
 		      &current->delays->freepages_delay,
-		      &current->delays->freepages_count);
+		      &current->delays->freepages_count,
+		      &current->delays->freepages_delay_max);
 }
 
 void __delayacct_thrashing_start(bool *in_thrashing)
@@ -235,7 +246,8 @@ void __delayacct_thrashing_end(bool *in_thrashing)
 	delayacct_end(&current->delays->lock,
 		      &current->delays->thrashing_start,
 		      &current->delays->thrashing_delay,
-		      &current->delays->thrashing_count);
+		      &current->delays->thrashing_count,
+		      &current->delays->thrashing_delay_max);
 }
 
 void __delayacct_swapin_start(void)
@@ -248,7 +260,8 @@ void __delayacct_swapin_end(void)
 	delayacct_end(&current->delays->lock,
 		      &current->delays->swapin_start,
 		      &current->delays->swapin_delay,
-		      &current->delays->swapin_count);
+		      &current->delays->swapin_count,
+		      &current->delays->swapin_delay_max);
 }
 
 void __delayacct_compact_start(void)
@@ -261,7 +274,8 @@ void __delayacct_compact_end(void)
 	delayacct_end(&current->delays->lock,
 		      &current->delays->compact_start,
 		      &current->delays->compact_delay,
-		      &current->delays->compact_count);
+		      &current->delays->compact_count,
+		      &current->delays->compact_delay_max);
 }
 
 void __delayacct_wpcopy_start(void)
@@ -274,7 +288,8 @@ void __delayacct_wpcopy_end(void)
 	delayacct_end(&current->delays->lock,
 		      &current->delays->wpcopy_start,
 		      &current->delays->wpcopy_delay,
-		      &current->delays->wpcopy_count);
+		      &current->delays->wpcopy_count,
+		      &current->delays->wpcopy_delay_max);
 }
 
 void __delayacct_irq(struct task_struct *task, u32 delta)
@@ -284,6 +299,8 @@ void __delayacct_irq(struct task_struct *task, u32 delta)
 	raw_spin_lock_irqsave(&task->delays->lock, flags);
 	task->delays->irq_delay += delta;
 	task->delays->irq_count++;
+	if (delta > task->delays->irq_delay_max)
+		task->delays->irq_delay_max = delta;
 	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
 }
 
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 8ee0add5a48a..ed72435aef51 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -244,7 +244,8 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
 	delta = rq_clock(rq) - t->sched_info.last_queued;
 	t->sched_info.last_queued = 0;
 	t->sched_info.run_delay += delta;
-
+	if (delta > t->sched_info.max_run_delay)
+		t->sched_info.max_run_delay = delta;
 	rq_sched_info_dequeue(rq, delta);
 }
 
@@ -266,6 +267,8 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
 	t->sched_info.run_delay += delta;
 	t->sched_info.last_arrival = now;
 	t->sched_info.pcount++;
+	if (delta > t->sched_info.max_run_delay)
+		t->sched_info.max_run_delay = delta;
 
 	rq_sched_info_arrive(rq, delta);
 }
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 1334214546d7..e570bcad185d 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -192,60 +192,69 @@ static int get_family_id(int sd)
 }
 
 #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
+#define delay_max_ms(t) (t / 1000000ULL)
 
 static void print_delayacct(struct taskstats *t)
 {
-	printf("\n\nCPU   %15s%15s%15s%15s%15s\n"
-	       "      %15llu%15llu%15llu%15llu%15.3fms\n"
-	       "IO    %15s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "SWAP  %15s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "RECLAIM  %12s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "THRASHING%12s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "COMPACT  %12s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "WPCOPY   %12s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "IRQ   %15s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n",
+	printf("\n\nCPU   %15s%15s%15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15llu%15llu%15.3fms%13.6fms\n"
+	       "IO    %15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms\n"
+	       "SWAP  %15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms\n"
+	       "RECLAIM  %12s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms\n"
+	       "THRASHING%12s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms\n"
+	       "COMPACT  %12s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms\n"
+	       "WPCOPY   %12s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms\n"
+	       "IRQ   %15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms\n",
 	       "count", "real total", "virtual total",
-	       "delay total", "delay average",
+	       "delay total", "delay average", "delay max",
 	       (unsigned long long)t->cpu_count,
 	       (unsigned long long)t->cpu_run_real_total,
 	       (unsigned long long)t->cpu_run_virtual_total,
 	       (unsigned long long)t->cpu_delay_total,
 	       average_ms((double)t->cpu_delay_total, t->cpu_count),
-	       "count", "delay total", "delay average",
+		   delay_max_ms((double)t->cpu_delay_max),
+	       "count", "delay total", "delay average", "delay max",
 	       (unsigned long long)t->blkio_count,
 	       (unsigned long long)t->blkio_delay_total,
 	       average_ms((double)t->blkio_delay_total, t->blkio_count),
-	       "count", "delay total", "delay average",
+		   delay_max_ms((double)t->blkio_delay_max),
+	       "count", "delay total", "delay average", "delay max",
 	       (unsigned long long)t->swapin_count,
 	       (unsigned long long)t->swapin_delay_total,
 	       average_ms((double)t->swapin_delay_total, t->swapin_count),
-	       "count", "delay total", "delay average",
+		   delay_max_ms((double)t->swapin_delay_max),
+	       "count", "delay total", "delay average", "delay max",
 	       (unsigned long long)t->freepages_count,
 	       (unsigned long long)t->freepages_delay_total,
 	       average_ms((double)t->freepages_delay_total, t->freepages_count),
-	       "count", "delay total", "delay average",
+		   delay_max_ms((double)t->freepages_delay_max),
+	       "count", "delay total", "delay average", "delay max",
 	       (unsigned long long)t->thrashing_count,
 	       (unsigned long long)t->thrashing_delay_total,
 	       average_ms((double)t->thrashing_delay_total, t->thrashing_count),
-	       "count", "delay total", "delay average",
+		   delay_max_ms((double)t->thrashing_delay_max),
+	       "count", "delay total", "delay average", "delay max",
 	       (unsigned long long)t->compact_count,
 	       (unsigned long long)t->compact_delay_total,
 	       average_ms((double)t->compact_delay_total, t->compact_count),
-	       "count", "delay total", "delay average",
+		   delay_max_ms((double)t->compact_delay_max),
+	       "count", "delay total", "delay average", "delay max",
 	       (unsigned long long)t->wpcopy_count,
 	       (unsigned long long)t->wpcopy_delay_total,
 	       average_ms((double)t->wpcopy_delay_total, t->wpcopy_count),
-	       "count", "delay total", "delay average",
+		   delay_max_ms((double)t->wpcopy_delay_max),
+	       "count", "delay total", "delay average", "delay max",
 	       (unsigned long long)t->irq_count,
 	       (unsigned long long)t->irq_delay_total,
-	       average_ms((double)t->irq_delay_total, t->irq_count));
+	       average_ms((double)t->irq_delay_total, t->irq_count),
+		   delay_max_ms((double)t->irq_delay_max));
 }
 
 static void task_context_switch_counts(struct taskstats *t)
-- 
cgit v1.2.3


From f65c64f311ee2f1ddc1eb395ed8b20e6b9d14e85 Mon Sep 17 00:00:00 2001
From: Wang Yaxin <wang.yaxin@zte.com.cn>
Date: Fri, 20 Dec 2024 17:31:05 +0800
Subject: delayacct: add delay min to record delay peak

Delay accounting can now calculate the average delay of processes, detect
the overall system load, and also record the 'delay max' to identify
potential abnormal delays.  However, 'delay min' can help us identify
another useful delay peak.  By comparing the difference between 'delay
max' and 'delay min', we can understand the optimization space for
latency, providing a reference for the optimization of latency
performance.

Use case
=========
bash-4.4# ./getdelays -d -t 242
print delayacct stats ON
TGID    242
CPU         count     real total  virtual total    delay total  delay average      delay max      delay min
               39      156000000      156576579        2111069          0.054ms     0.212296ms     0.031307ms
IO          count    delay total  delay average      delay max      delay min
                0              0          0.000ms     0.000000ms     0.000000ms
SWAP        count    delay total  delay average      delay max      delay min
                0              0          0.000ms     0.000000ms     0.000000ms
RECLAIM     count    delay total  delay average      delay max      delay min
                0              0          0.000ms     0.000000ms     0.000000ms
THRASHING   count    delay total  delay average      delay max      delay min
                0              0          0.000ms     0.000000ms     0.000000ms
COMPACT     count    delay total  delay average      delay max      delay min
                0              0          0.000ms     0.000000ms     0.000000ms
WPCOPY      count    delay total  delay average      delay max      delay min
              156       11215873          0.072ms     0.207403ms     0.033913ms
IRQ         count    delay total  delay average      delay max      delay min
                0              0          0.000ms     0.000000ms     0.000000ms

Link: https://lkml.kernel.org/r/20241220173105906EOdsPhzjMLYNJJBqgz1ga@zte.com.cn
Co-developed-by: Wang Yong <wang.yong12@zte.com.cn>
Signed-off-by: Wang Yong <wang.yong12@zte.com.cn>
Co-developed-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Wang Yaxin <wang.yaxin@zte.com.cn>
Co-developed-by: Kun Jiang <jiang.kun2@zte.com.cn>
Signed-off-by: Kun Jiang <jiang.kun2@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Fan Yu <fan.yu9@zte.com.cn>
Cc: Peilin He <he.peilin@zte.com.cn>
Cc: tuqiang <tu.qiang35@zte.com.cn>
Cc: ye xingchen <ye.xingchen@zte.com.cn>
Cc: Yunkai Zhang <zhang.yunkai@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/accounting/delay-accounting.rst | 32 ++++++++++----------
 include/linux/delayacct.h                     |  7 +++++
 include/linux/sched.h                         |  3 ++
 include/uapi/linux/taskstats.h                |  8 +++++
 kernel/delayacct.c                            | 32 +++++++++++++++-----
 kernel/sched/stats.h                          |  4 +++
 tools/accounting/getdelays.c                  | 42 ++++++++++++++++-----------
 7 files changed, 88 insertions(+), 40 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index 8a0277428ccf..210c194d4a7b 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -107,22 +107,22 @@ Get sum and peak of delays, since system boot, for all pids with tgid 242::
 	TGID    242
 
 
-	CPU         count     real total  virtual total    delay total  delay average      delay max
-	              239      296000000      307724885        1127792          0.005ms     0.238382ms
-	IO          count    delay total  delay average      delay max
-	                0              0          0.000ms     0.000000ms
-	SWAP        count    delay total  delay average      delay max
-	                0              0          0.000ms     0.000000ms
-	RECLAIM     count    delay total  delay average      delay max
-	                0              0          0.000ms     0.000000ms
-	THRASHING   count    delay total  delay average      delay max
-	                0              0          0.000ms     0.000000ms
-	COMPACT     count    delay total  delay average      delay max
-	                0              0          0.000ms     0.000000ms
-	WPCOPY      count    delay total  delay average      delay max
-	              230       19100476          0.083ms     0.383822ms
-	IRQ         count    delay total  delay average      delay max
-	                0              0          0.000ms     0.000000ms
+	CPU         count     real total  virtual total    delay total  delay average      delay max      delay min
+	               39      156000000      156576579        2111069          0.054ms     0.212296ms     0.031307ms
+	IO          count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	SWAP        count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	RECLAIM     count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	THRASHING   count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	COMPACT     count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	WPCOPY      count    delay total  delay average      delay max      delay min
+	              156       11215873          0.072ms     0.207403ms     0.033913ms
+	IRQ         count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
 
 Get IO accounting for pid 1, it works only with -p::
 
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 56fbfa2c2ac5..800dcc360db2 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -30,9 +30,11 @@ struct task_delay_info {
 	 */
 	u64 blkio_start;
 	u64 blkio_delay_max;
+	u64 blkio_delay_min;
 	u64 blkio_delay;	/* wait for sync block io completion */
 	u64 swapin_start;
 	u64 swapin_delay_max;
+	u64 swapin_delay_min;
 	u64 swapin_delay;	/* wait for swapin */
 	u32 blkio_count;	/* total count of the number of sync block */
 				/* io operations performed */
@@ -40,21 +42,26 @@ struct task_delay_info {
 
 	u64 freepages_start;
 	u64 freepages_delay_max;
+	u64 freepages_delay_min;
 	u64 freepages_delay;	/* wait for memory reclaim */
 
 	u64 thrashing_start;
 	u64 thrashing_delay_max;
+	u64 thrashing_delay_min;
 	u64 thrashing_delay;	/* wait for thrashing page */
 
 	u64 compact_start;
 	u64 compact_delay_max;
+	u64 compact_delay_min;
 	u64 compact_delay;	/* wait for memory compact */
 
 	u64 wpcopy_start;
 	u64 wpcopy_delay_max;
+	u64 wpcopy_delay_min;
 	u64 wpcopy_delay;	/* wait for write-protect copy */
 
 	u64 irq_delay_max;
+	u64 irq_delay_min;
 	u64 irq_delay;	/* wait for IRQ/SOFTIRQ */
 
 	u32 freepages_count;	/* total count of memory reclaim */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a0ae3923b41d..155012467b21 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -401,6 +401,9 @@ struct sched_info {
 	/* Max time spent waiting on a runqueue: */
 	unsigned long long		max_run_delay;
 
+	/* Min time spent waiting on a runqueue: */
+	unsigned long long		min_run_delay;
+
 	/* Timestamps: */
 
 	/* When did we last run on a CPU? */
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index e0d1c6fc9f3b..934e20ef7f79 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -73,6 +73,7 @@ struct taskstats {
 	__u64	cpu_count __attribute__((aligned(8)));
 	__u64	cpu_delay_total;
 	__u64	cpu_delay_max;
+	__u64	cpu_delay_min;
 
 	/* Following four fields atomically updated using task->delays->lock */
 
@@ -82,11 +83,13 @@ struct taskstats {
 	__u64	blkio_count;
 	__u64	blkio_delay_total;
 	__u64	blkio_delay_max;
+	__u64	blkio_delay_min;
 
 	/* Delay waiting for page fault I/O (swap in only) */
 	__u64	swapin_count;
 	__u64	swapin_delay_total;
 	__u64	swapin_delay_max;
+	__u64	swapin_delay_min;
 
 	/* cpu "wall-clock" running time
 	 * On some architectures, value will adjust for cpu time stolen
@@ -170,11 +173,13 @@ struct taskstats {
 	__u64	freepages_count;
 	__u64	freepages_delay_total;
 	__u64	freepages_delay_max;
+	__u64	freepages_delay_min;
 
 	/* Delay waiting for thrashing page */
 	__u64	thrashing_count;
 	__u64	thrashing_delay_total;
 	__u64	thrashing_delay_max;
+	__u64	thrashing_delay_min;
 
 	/* v10: 64-bit btime to avoid overflow */
 	__u64	ac_btime64;		/* 64-bit begin time */
@@ -183,6 +188,7 @@ struct taskstats {
 	__u64	compact_count;
 	__u64	compact_delay_total;
 	__u64	compact_delay_max;
+	__u64	compact_delay_min;
 
 	/* v12 begin */
 	__u32   ac_tgid;	/* thread group ID */
@@ -205,11 +211,13 @@ struct taskstats {
 	__u64    wpcopy_count;
 	__u64    wpcopy_delay_total;
 	__u64    wpcopy_delay_max;
+	__u64    wpcopy_delay_min;
 
 	/* v14: Delay waiting for IRQ/SOFTIRQ */
 	__u64    irq_count;
 	__u64    irq_delay_total;
 	__u64    irq_delay_max;
+	__u64    irq_delay_min;
 	/* v15: add Delay max */
 };
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 23212a0c88e4..b238eb8c6573 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -95,7 +95,7 @@ void __delayacct_tsk_init(struct task_struct *tsk)
  * Finish delay accounting for a statistic using its timestamps (@start),
  * accumulator (@total) and @count
  */
-static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max)
+static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min)
 {
 	s64 ns = local_clock() - *start;
 	unsigned long flags;
@@ -106,6 +106,8 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou
 		(*count)++;
 		if (ns > *max)
 			*max = ns;
+		if (*min == 0 || ns < *min)
+			*min = ns;
 		raw_spin_unlock_irqrestore(lock, flags);
 	}
 }
@@ -125,7 +127,8 @@ void __delayacct_blkio_end(struct task_struct *p)
 		      &p->delays->blkio_start,
 		      &p->delays->blkio_delay,
 		      &p->delays->blkio_count,
-		      &p->delays->blkio_delay_max);
+		      &p->delays->blkio_delay_max,
+		      &p->delays->blkio_delay_min);
 }
 
 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -157,6 +160,7 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	d->cpu_count += t1;
 
 	d->cpu_delay_max = tsk->sched_info.max_run_delay;
+	d->cpu_delay_min = tsk->sched_info.min_run_delay;
 	tmp = (s64)d->cpu_delay_total + t2;
 	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
 	tmp = (s64)d->cpu_run_virtual_total + t3;
@@ -170,24 +174,31 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
 	d->blkio_delay_max = tsk->delays->blkio_delay_max;
+	d->blkio_delay_min = tsk->delays->blkio_delay_min;
 	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
 	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
 	d->swapin_delay_max = tsk->delays->swapin_delay_max;
+	d->swapin_delay_min = tsk->delays->swapin_delay_min;
 	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
 	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
 	d->freepages_delay_max = tsk->delays->freepages_delay_max;
+	d->freepages_delay_min = tsk->delays->freepages_delay_min;
 	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
 	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
 	d->thrashing_delay_max = tsk->delays->thrashing_delay_max;
+	d->thrashing_delay_min = tsk->delays->thrashing_delay_min;
 	tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
 	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
 	d->compact_delay_max = tsk->delays->compact_delay_max;
+	d->compact_delay_min = tsk->delays->compact_delay_min;
 	tmp = d->compact_delay_total + tsk->delays->compact_delay;
 	d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
 	d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max;
+	d->wpcopy_delay_min = tsk->delays->wpcopy_delay_min;
 	tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
 	d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
 	d->irq_delay_max = tsk->delays->irq_delay_max;
+	d->irq_delay_min = tsk->delays->irq_delay_min;
 	tmp = d->irq_delay_total + tsk->delays->irq_delay;
 	d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
 	d->blkio_count += tsk->delays->blkio_count;
@@ -224,7 +235,8 @@ void __delayacct_freepages_end(void)
 		      &current->delays->freepages_start,
 		      &current->delays->freepages_delay,
 		      &current->delays->freepages_count,
-		      &current->delays->freepages_delay_max);
+		      &current->delays->freepages_delay_max,
+		      &current->delays->freepages_delay_min);
 }
 
 void __delayacct_thrashing_start(bool *in_thrashing)
@@ -247,7 +259,8 @@ void __delayacct_thrashing_end(bool *in_thrashing)
 		      &current->delays->thrashing_start,
 		      &current->delays->thrashing_delay,
 		      &current->delays->thrashing_count,
-		      &current->delays->thrashing_delay_max);
+		      &current->delays->thrashing_delay_max,
+		      &current->delays->thrashing_delay_min);
 }
 
 void __delayacct_swapin_start(void)
@@ -261,7 +274,8 @@ void __delayacct_swapin_end(void)
 		      &current->delays->swapin_start,
 		      &current->delays->swapin_delay,
 		      &current->delays->swapin_count,
-		      &current->delays->swapin_delay_max);
+		      &current->delays->swapin_delay_max,
+		      &current->delays->swapin_delay_min);
 }
 
 void __delayacct_compact_start(void)
@@ -275,7 +289,8 @@ void __delayacct_compact_end(void)
 		      &current->delays->compact_start,
 		      &current->delays->compact_delay,
 		      &current->delays->compact_count,
-		      &current->delays->compact_delay_max);
+		      &current->delays->compact_delay_max,
+		      &current->delays->compact_delay_min);
 }
 
 void __delayacct_wpcopy_start(void)
@@ -289,7 +304,8 @@ void __delayacct_wpcopy_end(void)
 		      &current->delays->wpcopy_start,
 		      &current->delays->wpcopy_delay,
 		      &current->delays->wpcopy_count,
-		      &current->delays->wpcopy_delay_max);
+		      &current->delays->wpcopy_delay_max,
+		      &current->delays->wpcopy_delay_min);
 }
 
 void __delayacct_irq(struct task_struct *task, u32 delta)
@@ -301,6 +317,8 @@ void __delayacct_irq(struct task_struct *task, u32 delta)
 	task->delays->irq_count++;
 	if (delta > task->delays->irq_delay_max)
 		task->delays->irq_delay_max = delta;
+	if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min))
+		task->delays->irq_delay_min = delta;
 	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
 }
 
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index ed72435aef51..693537b908a1 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -246,6 +246,8 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
 	t->sched_info.run_delay += delta;
 	if (delta > t->sched_info.max_run_delay)
 		t->sched_info.max_run_delay = delta;
+	if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay))
+		t->sched_info.min_run_delay = delta;
 	rq_sched_info_dequeue(rq, delta);
 }
 
@@ -269,6 +271,8 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
 	t->sched_info.pcount++;
 	if (delta > t->sched_info.max_run_delay)
 		t->sched_info.max_run_delay = delta;
+	if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay))
+		t->sched_info.min_run_delay = delta;
 
 	rq_sched_info_arrive(rq, delta);
 }
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index e570bcad185d..100ad3dc091a 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -192,7 +192,7 @@ static int get_family_id(int sd)
 }
 
 #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
-#define delay_max_ms(t) (t / 1000000ULL)
+#define delay_ms(t) (t / 1000000ULL)
 
 static void print_delayacct(struct taskstats *t)
 {
@@ -213,48 +213,56 @@ static void print_delayacct(struct taskstats *t)
 	       "IRQ   %15s%15s%15s%15s\n"
 	       "      %15llu%15llu%15.3fms%13.6fms\n",
 	       "count", "real total", "virtual total",
-	       "delay total", "delay average", "delay max",
+	       "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->cpu_count,
 	       (unsigned long long)t->cpu_run_real_total,
 	       (unsigned long long)t->cpu_run_virtual_total,
 	       (unsigned long long)t->cpu_delay_total,
 	       average_ms((double)t->cpu_delay_total, t->cpu_count),
-		   delay_max_ms((double)t->cpu_delay_max),
-	       "count", "delay total", "delay average", "delay max",
+	       delay_ms((double)t->cpu_delay_max),
+	       delay_ms((double)t->cpu_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->blkio_count,
 	       (unsigned long long)t->blkio_delay_total,
 	       average_ms((double)t->blkio_delay_total, t->blkio_count),
-		   delay_max_ms((double)t->blkio_delay_max),
-	       "count", "delay total", "delay average", "delay max",
+	       delay_ms((double)t->blkio_delay_max),
+	       delay_ms((double)t->blkio_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->swapin_count,
 	       (unsigned long long)t->swapin_delay_total,
 	       average_ms((double)t->swapin_delay_total, t->swapin_count),
-		   delay_max_ms((double)t->swapin_delay_max),
-	       "count", "delay total", "delay average", "delay max",
+	       delay_ms((double)t->swapin_delay_max),
+	       delay_ms((double)t->swapin_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->freepages_count,
 	       (unsigned long long)t->freepages_delay_total,
 	       average_ms((double)t->freepages_delay_total, t->freepages_count),
-		   delay_max_ms((double)t->freepages_delay_max),
-	       "count", "delay total", "delay average", "delay max",
+	       delay_ms((double)t->freepages_delay_max),
+	       delay_ms((double)t->freepages_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->thrashing_count,
 	       (unsigned long long)t->thrashing_delay_total,
 	       average_ms((double)t->thrashing_delay_total, t->thrashing_count),
-		   delay_max_ms((double)t->thrashing_delay_max),
-	       "count", "delay total", "delay average", "delay max",
+	       delay_ms((double)t->thrashing_delay_max),
+	       delay_ms((double)t->thrashing_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->compact_count,
 	       (unsigned long long)t->compact_delay_total,
 	       average_ms((double)t->compact_delay_total, t->compact_count),
-		   delay_max_ms((double)t->compact_delay_max),
-	       "count", "delay total", "delay average", "delay max",
+	       delay_ms((double)t->compact_delay_max),
+	       delay_ms((double)t->compact_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->wpcopy_count,
 	       (unsigned long long)t->wpcopy_delay_total,
 	       average_ms((double)t->wpcopy_delay_total, t->wpcopy_count),
-		   delay_max_ms((double)t->wpcopy_delay_max),
-	       "count", "delay total", "delay average", "delay max",
+	       delay_ms((double)t->wpcopy_delay_max),
+	       delay_ms((double)t->wpcopy_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->irq_count,
 	       (unsigned long long)t->irq_delay_total,
 	       average_ms((double)t->irq_delay_total, t->irq_count),
-		   delay_max_ms((double)t->irq_delay_max));
+	       delay_ms((double)t->irq_delay_max),
+	       delay_ms((double)t->irq_delay_min));
 }
 
 static void task_context_switch_counts(struct taskstats *t)
-- 
cgit v1.2.3


From f6d2e5abf154da59ccb3bcac23438f2230c8948a Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Wed, 1 Jan 2025 07:05:35 +0200
Subject: wifi: nl80211: permit userspace to pass supported selectors

Currently the SAE_H2E selector already exists, which needs to be
implemented by the SME. As new such selectors might be added in the
future, add a feature to permit userspace to report a selector as
supported.

If not given, the kernel should assume that userspace does support
SAE_H2E.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20250101070249.fe67b871cc39.Ieb98390328927e998e612345a58b6dbc00b0e3a2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 12 ++++++++++++
 include/uapi/linux/nl80211.h |  9 +++++++++
 net/wireless/nl80211.c       | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7790af534b7f..8a38d66be2c9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3023,6 +3023,10 @@ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id)
  *
  * @bss: The BSS to authenticate with, the callee must obtain a reference
  *	to it if it needs to keep it.
+ * @supported_selectors: List of selectors that should be assumed to be
+ *	supported by the station.
+ *	SAE_H2E must be assumed supported if set to %NULL.
+ * @supported_selectors_len: Length of supported_selectors in octets.
  * @auth_type: Authentication type (algorithm)
  * @ie: Extra IEs to add to Authentication frame or %NULL
  * @ie_len: Length of ie buffer in octets
@@ -3045,6 +3049,8 @@ struct cfg80211_auth_request {
 	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
+	const u8 *supported_selectors;
+	u8 supported_selectors_len;
 	enum nl80211_auth_type auth_type;
 	const u8 *key;
 	u8 key_len;
@@ -3124,6 +3130,10 @@ enum cfg80211_assoc_req_flags {
  *	included in the Current AP address field of the Reassociation Request
  *	frame.
  * @flags:  See &enum cfg80211_assoc_req_flags
+ * @supported_selectors: supported selectors in IEEE 802.11 format
+ *	(or %NULL for no change).
+ *	If %NULL, then support for SAE_H2E should be assumed.
+ * @supported_selectors_len: Length of supported_selectors in octets.
  * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
  *	will be used in ht_capa.  Un-supported values will be ignored.
  * @ht_capa_mask:  The bits of ht_capa which are to be used.
@@ -3150,6 +3160,8 @@ struct cfg80211_assoc_request {
 	struct cfg80211_crypto_settings crypto;
 	bool use_mfp;
 	u32 flags;
+	const u8 *supported_selectors;
+	u8 supported_selectors_len;
 	struct ieee80211_ht_cap ht_capa;
 	struct ieee80211_ht_cap ht_capa_mask;
 	struct ieee80211_vht_cap vht_capa, vht_capa_mask;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6d11437596b9..ee1cf19803ea 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2871,6 +2871,12 @@ enum nl80211_commands {
  * @NL80211_ATTR_VIF_RADIO_MASK: Bitmask of allowed radios (u32).
  *	A value of 0 means all radios.
  *
+ * @NL80211_ATTR_SUPPORTED_SELECTORS: supported selectors, array of
+ *	supported selectors as defined by IEEE 802.11 7.3.2.2 but without the
+ *	length restriction (at most %NL80211_MAX_SUPP_SELECTORS).
+ *	This can be used to provide a list of selectors that are implemented
+ *	by the supplicant. If not given, support for SAE_H2E is assumed.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3421,6 +3427,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_VIF_RADIO_MASK,
 
+	NL80211_ATTR_SUPPORTED_SELECTORS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3465,6 +3473,7 @@ enum nl80211_attrs {
 #define NL80211_WIPHY_NAME_MAXLEN		64
 
 #define NL80211_MAX_SUPP_RATES			32
+#define NL80211_MAX_SUPP_SELECTORS		128
 #define NL80211_MAX_SUPP_HT_RATES		77
 #define NL80211_MAX_SUPP_REG_RULES		128
 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY	0
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8789d8b73f0f..39c114265db8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -294,6 +294,21 @@ static int validate_he_capa(const struct nlattr *attr,
 	return 0;
 }
 
+static int validate_supported_selectors(const struct nlattr *attr,
+					struct netlink_ext_ack *extack)
+{
+	const u8 *supported_selectors = nla_data(attr);
+	u8 supported_selectors_len = nla_len(attr);
+
+	/* The top bit must not be set as it is not part of the selector */
+	for (int i = 0; i < supported_selectors_len; i++) {
+		if (supported_selectors[i] & 0x80)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 /* policy for the attributes */
 static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR];
 
@@ -830,6 +845,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
 	[NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG },
 	[NL80211_ATTR_VIF_RADIO_MASK] = { .type = NLA_U32 },
+	[NL80211_ATTR_SUPPORTED_SELECTORS] =
+		NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_supported_selectors,
+				       NL80211_MAX_SUPP_SELECTORS),
 };
 
 /* policy for the key attributes */
@@ -10858,6 +10876,13 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
+	if (info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]) {
+		req.supported_selectors =
+			nla_data(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]);
+		req.supported_selectors_len =
+			nla_len(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]);
+	}
+
 	auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]);
 	if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE))
 		return -EINVAL;
@@ -11140,6 +11165,13 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_PREV_BSSID])
 		req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
 
+	if (info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]) {
+		req.supported_selectors =
+			nla_data(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]);
+		req.supported_selectors_len =
+			nla_len(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]);
+	}
+
 	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT]))
 		req.flags |= ASSOC_REQ_DISABLE_HT;
 
-- 
cgit v1.2.3


From 65c1c041798484da54cbad5fb5833b81694c43cf Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Thu, 2 Jan 2025 16:19:55 +0200
Subject: wifi: cfg80211: Add support for dynamic addition/removal of links

Add support for requesting dynamic addition/removal of links to the
current MLO association.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20250102161730.cef23352f2a2.I79c849974c494cb1cbf9e1b22a5d2d37395ff5ac@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 45 ++++++++++++++++++++++++
 include/uapi/linux/nl80211.h | 10 ++++++
 net/wireless/core.h          |  4 +++
 net/wireless/mlme.c          | 77 ++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.c       | 84 ++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h       |  3 ++
 net/wireless/rdev-ops.h      | 19 ++++++++++
 net/wireless/trace.h         | 44 +++++++++++++++++++++++
 8 files changed, 286 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8a38d66be2c9..d4b4cabac029 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4596,6 +4596,15 @@ struct mgmt_frame_regs {
  * @set_ttlm: set the TID to link mapping.
  * @get_radio_mask: get bitmask of radios in use.
  *	(invoked with the wiphy mutex held)
+ * @assoc_ml_reconf: Request a non-AP MLO connection to perform ML
+ *	reconfiguration, i.e., add and/or remove links to/from the
+ *	association using ML reconfiguration action frames. Successfully added
+ *	links will be added to the set of valid links. Successfully removed
+ *	links will be removed from the set of valid links. The driver must
+ *	indicate removed links by calling cfg80211_links_removed() and added
+ *	links by calling cfg80211_mlo_reconf_add_done(). When calling
+ *	cfg80211_mlo_reconf_add_done() the bss pointer must be given for each
+ *	link for which MLO reconfiguration 'add' operation was requested.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4959,6 +4968,9 @@ struct cfg80211_ops {
 	int	(*set_ttlm)(struct wiphy *wiphy, struct net_device *dev,
 			    struct cfg80211_ttlm_params *params);
 	u32	(*get_radio_mask)(struct wiphy *wiphy, struct net_device *dev);
+	int     (*assoc_ml_reconf)(struct wiphy *wiphy, struct net_device *dev,
+				   struct cfg80211_assoc_link *add_links,
+				   u16 rem_links);
 };
 
 /*
@@ -9716,6 +9728,39 @@ static inline int cfg80211_color_change_notify(struct net_device *dev,
  */
 void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
 
+/**
+ * struct cfg80211_mlo_reconf_done_data - MLO reconfiguration data
+ * @buf: MLO Reconfiguration Response frame (header + body)
+ * @len: length of the frame data
+ * @added_links: BIT mask of links successfully added to the association
+ * @links: per-link information indexed by link ID
+ * @links.bss: the BSS that MLO reconfiguration was requested for, ownership of
+ *      the pointer moves to cfg80211 in the call to
+ *      cfg80211_mlo_reconf_add_done().
+ *
+ * The BSS pointer must be set for each link for which 'add' operation was
+ * requested in the assoc_ml_reconf callback.
+ */
+struct cfg80211_mlo_reconf_done_data {
+	const u8 *buf;
+	size_t len;
+	u16 added_links;
+	struct {
+		struct cfg80211_bss *bss;
+	} links[IEEE80211_MLD_MAX_NUM_LINKS];
+};
+
+/**
+ * cfg80211_mlo_reconf_add_done - Notify about MLO reconfiguration result
+ * @dev: network device.
+ * @data: MLO reconfiguration done data, &struct cfg80211_mlo_reconf_done_data
+ *
+ * Inform cfg80211 and the userspace that processing of ML reconfiguration
+ * request to add links to the association is done.
+ */
+void cfg80211_mlo_reconf_add_done(struct net_device *dev,
+				  struct cfg80211_mlo_reconf_done_data *data);
+
 /**
  * cfg80211_schedule_channels_check - schedule regulatory check if needed
  * @wdev: the wireless device to check
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ee1cf19803ea..f900d7cc42bc 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1329,6 +1329,9 @@
  *      %NL80211_ATTR_MLO_TTLM_ULINK attributes are used to specify the
  *      TID to Link mapping for downlink/uplink traffic.
  *
+ * @NL80211_CMD_ASSOC_MLO_RECONF: For a non-AP MLD station, request to
+ *      add/remove links to/from the association.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1586,6 +1589,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_TID_TO_LINK_MAPPING,
 
+	NL80211_CMD_ASSOC_MLO_RECONF,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -2877,6 +2882,9 @@ enum nl80211_commands {
  *	This can be used to provide a list of selectors that are implemented
  *	by the supplicant. If not given, support for SAE_H2E is assumed.
  *
+ * @NL80211_ATTR_MLO_RECONF_REM_LINKS: (u16) A bitmask of the links requested
+ *      to be removed from the MLO association.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3429,6 +3437,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SUPPORTED_SELECTORS,
 
+	NL80211_ATTR_MLO_RECONF_REM_LINKS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 4c45f994a8c0..826299f3d781 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -567,6 +567,10 @@ int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev,
 				 struct wireless_dev *wdev);
 void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask);
 
+int cfg80211_assoc_ml_reconf(struct cfg80211_registered_device *rdev,
+			     struct net_device *dev,
+			     struct cfg80211_assoc_link *links,
+			     u16 rem_links);
 /**
  * struct cfg80211_colocated_ap - colocated AP information
  *
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 5c09bf4cdc2e..e10f2b3b4b7f 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1294,3 +1294,80 @@ void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev)
 					&rdev->background_radar_chandef,
 					NL80211_RADAR_CAC_ABORTED);
 }
+
+int cfg80211_assoc_ml_reconf(struct cfg80211_registered_device *rdev,
+			     struct net_device *dev,
+			     struct cfg80211_assoc_link *links,
+			     u16 rem_links)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	lockdep_assert_wiphy(wdev->wiphy);
+
+	err = rdev_assoc_ml_reconf(rdev, dev, links, rem_links);
+	if (!err) {
+		int link_id;
+
+		for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS;
+		     link_id++) {
+			if (!links[link_id].bss)
+				continue;
+
+			cfg80211_ref_bss(&rdev->wiphy, links[link_id].bss);
+			cfg80211_hold_bss(bss_from_pub(links[link_id].bss));
+		}
+	}
+
+	return err;
+}
+
+void cfg80211_mlo_reconf_add_done(struct net_device *dev,
+				  struct cfg80211_mlo_reconf_done_data *data)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	int link_id;
+
+	lockdep_assert_wiphy(wiphy);
+
+	trace_cfg80211_mlo_reconf_add_done(dev, data->added_links,
+					   data->buf, data->len);
+
+	if (WARN_ON(!wdev->valid_links))
+		return;
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
+		return;
+
+	/* validate that a BSS is given for each added link */
+	for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) {
+		struct cfg80211_bss *bss = data->links[link_id].bss;
+
+		if (!(data->added_links & BIT(link_id)))
+			continue;
+
+		if (WARN_ON(!bss))
+			return;
+	}
+
+	for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) {
+		struct cfg80211_bss *bss = data->links[link_id].bss;
+
+		if (!bss)
+			continue;
+
+		if (data->added_links & BIT(link_id)) {
+			wdev->links[link_id].client.current_bss =
+				bss_from_pub(bss);
+		} else {
+			cfg80211_unhold_bss(bss_from_pub(bss));
+			cfg80211_put_bss(wiphy, bss);
+		}
+	}
+
+	wdev->valid_links |= data->added_links;
+	nl80211_mlo_reconf_add_done(dev, data);
+}
+EXPORT_SYMBOL(cfg80211_mlo_reconf_add_done);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5bf9a27e2c68..199fa3da31e9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -848,6 +848,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_SUPPORTED_SELECTORS] =
 		NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_supported_selectors,
 				       NL80211_MAX_SUPP_SELECTORS),
+	[NL80211_ATTR_MLO_RECONF_REM_LINKS] = { .type = NLA_U16 },
 };
 
 /* policy for the key attributes */
@@ -16476,6 +16477,66 @@ nl80211_set_ttlm(struct sk_buff *skb, struct genl_info *info)
 	return rdev_set_ttlm(rdev, dev, &params);
 }
 
+static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_assoc_link links[IEEE80211_MLD_MAX_NUM_LINKS] = {};
+	unsigned int link_id;
+	u16 add_links, rem_links;
+	int err;
+
+	if (!wdev->valid_links)
+		return -EINVAL;
+
+	if (dev->ieee80211_ptr->conn_owner_nlportid &&
+	    dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
+		return -EPERM;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
+		return -EOPNOTSUPP;
+
+	add_links = 0;
+	if (info->attrs[NL80211_ATTR_MLO_LINKS]) {
+		err = nl80211_process_links(rdev, links, NULL, 0, info);
+		if (err)
+			return err;
+
+		for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS;
+		     link_id++) {
+			if (!links[link_id].bss)
+				continue;
+			add_links |= BIT(link_id);
+		}
+	}
+
+	if (info->attrs[NL80211_ATTR_MLO_RECONF_REM_LINKS])
+		rem_links =
+			nla_get_u16(info->attrs[NL80211_ATTR_MLO_RECONF_REM_LINKS]);
+	else
+		rem_links = 0;
+
+	/* Validate that existing links are not added, removed links are valid
+	 * and don't allow adding and removing the same links
+	 */
+	if ((add_links & rem_links) || !(add_links | rem_links) ||
+	    (wdev->valid_links & add_links) ||
+	    ((wdev->valid_links & rem_links) != rem_links)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = cfg80211_assoc_ml_reconf(rdev, dev, links, rem_links);
+
+out:
+	for (link_id = 0; link_id < ARRAY_SIZE(links); link_id++)
+		cfg80211_put_bss(&rdev->wiphy, links[link_id].bss);
+
+	return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -17668,6 +17729,12 @@ static const struct genl_small_ops nl80211_small_ops[] = {
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
 	},
+	{
+		.cmd = NL80211_CMD_ASSOC_MLO_RECONF,
+		.doit = nl80211_assoc_ml_reconf,
+		.flags = GENL_UNS_ADMIN_PERM,
+		.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
+	},
 };
 
 static struct genl_family nl80211_fam __ro_after_init = {
@@ -18564,6 +18631,23 @@ void cfg80211_links_removed(struct net_device *dev, u16 link_mask)
 }
 EXPORT_SYMBOL(cfg80211_links_removed);
 
+void nl80211_mlo_reconf_add_done(struct net_device *dev,
+				 struct cfg80211_mlo_reconf_done_data *data)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct nl80211_mlme_event event = {
+		.cmd = NL80211_CMD_ASSOC_MLO_RECONF,
+		.buf = data->buf,
+		.buf_len = data->len,
+		.uapsd_queues = -1,
+	};
+
+	nl80211_send_mlme_event(rdev, dev, &event, GFP_KERNEL);
+}
+EXPORT_SYMBOL(nl80211_mlo_reconf_add_done);
+
 void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, const u8 *bssid,
 			     gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index ffaab9a92e5b..5e25782af1e0 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -124,4 +124,7 @@ void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce);
 /* peer measurement */
 int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info);
 
+void nl80211_mlo_reconf_add_done(struct net_device *dev,
+				 struct cfg80211_mlo_reconf_done_data *data);
+
 #endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 8f2aa7e76c0a..2393a25577ad 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1547,4 +1547,23 @@ rdev_get_radio_mask(struct cfg80211_registered_device *rdev,
 
 	return rdev->ops->get_radio_mask(wiphy, dev);
 }
+
+static inline int
+rdev_assoc_ml_reconf(struct cfg80211_registered_device *rdev,
+		     struct net_device *dev,
+		     struct cfg80211_assoc_link *add_links,
+		     u16 rem_links)
+{
+	struct wiphy *wiphy = &rdev->wiphy;
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_assoc_ml_reconf(wiphy, dev, add_links, rem_links);
+	if (rdev->ops->assoc_ml_reconf)
+		ret = rdev->ops->assoc_ml_reconf(wiphy, dev, add_links,
+						 rem_links);
+	trace_rdev_return_int(wiphy, ret);
+
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index a57210c8087c..627217e25c2f 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -4104,6 +4104,50 @@ TRACE_EVENT(cfg80211_links_removed,
 		  __entry->link_mask)
 );
 
+TRACE_EVENT(cfg80211_mlo_reconf_add_done,
+	TP_PROTO(struct net_device *netdev, u16 link_mask,
+		 const u8 *buf, size_t len),
+	TP_ARGS(netdev, link_mask, buf, len),
+	TP_STRUCT__entry(
+		NETDEV_ENTRY
+		__field(u16, link_mask)
+		__dynamic_array(u8, buf, len)
+	),
+	TP_fast_assign(
+		NETDEV_ASSIGN;
+		__entry->link_mask = link_mask;
+		memcpy(__get_dynamic_array(buf), buf, len);
+	),
+	TP_printk(NETDEV_PR_FMT ", link_mask:0x%x",
+		  NETDEV_PR_ARG, __entry->link_mask)
+);
+
+TRACE_EVENT(rdev_assoc_ml_reconf,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_assoc_link *add_links,
+		 u16 rem_links),
+	TP_ARGS(wiphy, netdev, add_links, rem_links),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__field(u16, add_links)
+		__field(u16, rem_links)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		u32 i;
+
+		__entry->add_links = 0;
+		__entry->rem_links = rem_links;
+		for (i = 0; add_links && i < IEEE80211_MLD_MAX_NUM_LINKS; i++)
+			if (add_links[i].bss)
+				__entry->add_links |= BIT(i);
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", add_links=0x%x, rem_links=0x%x",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG,
+		  __entry->add_links, __entry->rem_links)
+);
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From 904c277342936b75ae55999d87abacd4c1ab1fd3 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Thu, 2 Jan 2025 16:19:59 +0200
Subject: wifi: cfg80211: Add support for controlling EPCS

Add support for configuring Emergency Preparedness Communication
Services (EPCS) for station mode.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20250102161730.ea54ac94445c.I11d750188bc0871e13e86146a3b5cc048d853e69@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 10 ++++++++
 include/uapi/linux/nl80211.h |  9 +++++++
 net/wireless/nl80211.c       | 60 ++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      | 15 +++++++++++
 net/wireless/trace.h         | 34 +++++++++++++++++++++++++
 5 files changed, 128 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d4b4cabac029..363d7dd2255a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4594,6 +4594,7 @@ struct mgmt_frame_regs {
  *
  * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
  * @set_ttlm: set the TID to link mapping.
+ * @set_epcs: Enable/Disable EPCS for station mode.
  * @get_radio_mask: get bitmask of radios in use.
  *	(invoked with the wiphy mutex held)
  * @assoc_ml_reconf: Request a non-AP MLO connection to perform ML
@@ -4971,6 +4972,8 @@ struct cfg80211_ops {
 	int     (*assoc_ml_reconf)(struct wiphy *wiphy, struct net_device *dev,
 				   struct cfg80211_assoc_link *add_links,
 				   u16 rem_links);
+	int	(*set_epcs)(struct wiphy *wiphy, struct net_device *dev,
+			    bool val);
 };
 
 /*
@@ -9771,6 +9774,13 @@ void cfg80211_mlo_reconf_add_done(struct net_device *dev,
  */
 void cfg80211_schedule_channels_check(struct wireless_dev *wdev);
 
+/**
+ * cfg80211_epcs_changed - Notify about a change in EPCS state
+ * @netdev: the wireless device whose EPCS state changed
+ * @enabled: set to true if EPCS was enabled, otherwise set to false.
+ */
+void cfg80211_epcs_changed(struct net_device *netdev, bool enabled);
+
 #ifdef CONFIG_CFG80211_DEBUGFS
 /**
  * wiphy_locked_debugfs_read - do a locked read in debugfs
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f900d7cc42bc..f6c1b181c886 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1332,6 +1332,10 @@
  * @NL80211_CMD_ASSOC_MLO_RECONF: For a non-AP MLD station, request to
  *      add/remove links to/from the association.
  *
+ * @NL80211_CMD_EPCS_CFG: EPCS configuration for a station. Used by userland to
+ *	control EPCS configuration. Used to notify userland on the current state
+ *	of EPCS.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1590,6 +1594,7 @@ enum nl80211_commands {
 	NL80211_CMD_SET_TID_TO_LINK_MAPPING,
 
 	NL80211_CMD_ASSOC_MLO_RECONF,
+	NL80211_CMD_EPCS_CFG,
 
 	/* add new commands above here */
 
@@ -2885,6 +2890,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_MLO_RECONF_REM_LINKS: (u16) A bitmask of the links requested
  *      to be removed from the MLO association.
  *
+ * @NL80211_ATTR_EPCS: Flag attribute indicating that EPCS is enabled for a
+ *	station interface.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3438,6 +3446,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_SUPPORTED_SELECTORS,
 
 	NL80211_ATTR_MLO_RECONF_REM_LINKS,
+	NL80211_ATTR_EPCS,
 
 	/* add attributes here, update the policy in nl80211.c */
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 199fa3da31e9..351b863623af 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -849,6 +849,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 		NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_supported_selectors,
 				       NL80211_MAX_SUPP_SELECTORS),
 	[NL80211_ATTR_MLO_RECONF_REM_LINKS] = { .type = NLA_U16 },
+	[NL80211_ATTR_EPCS] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -16537,6 +16538,26 @@ out:
 	return err;
 }
 
+static int
+nl80211_epcs_cfg(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	bool val;
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION &&
+	    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
+		return -EOPNOTSUPP;
+
+	if (!wdev->connected)
+		return -ENOLINK;
+
+	val = nla_get_flag(info->attrs[NL80211_ATTR_EPCS]);
+
+	return rdev_set_epcs(rdev, dev, val);
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -17735,6 +17756,12 @@ static const struct genl_small_ops nl80211_small_ops[] = {
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
 	},
+	{
+		.cmd = NL80211_CMD_EPCS_CFG,
+		.doit = nl80211_epcs_cfg,
+		.flags = GENL_UNS_ADMIN_PERM,
+		.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
+	},
 };
 
 static struct genl_family nl80211_fam __ro_after_init = {
@@ -20480,6 +20507,39 @@ void cfg80211_schedule_channels_check(struct wireless_dev *wdev)
 }
 EXPORT_SYMBOL(cfg80211_schedule_channels_check);
 
+void cfg80211_epcs_changed(struct net_device *netdev, bool enabled)
+{
+	struct wireless_dev *wdev = netdev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+
+	trace_cfg80211_epcs_changed(wdev, enabled);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EPCS_CFG);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (enabled && nla_put_flag(msg, NL80211_ATTR_EPCS))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+				NL80211_MCGRP_MLME, GFP_KERNEL);
+	return;
+
+ nla_put_failure:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_epcs_changed);
+
 /* initialisation/exit functions */
 
 int __init nl80211_init(void)
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 2393a25577ad..759da1623342 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1566,4 +1566,19 @@ rdev_assoc_ml_reconf(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
+static inline int
+rdev_set_epcs(struct cfg80211_registered_device *rdev,
+	      struct net_device *dev, bool val)
+{
+	struct wiphy *wiphy = &rdev->wiphy;
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_set_epcs(wiphy, dev, val);
+	if (rdev->ops->set_epcs)
+		ret = rdev->ops->set_epcs(wiphy, dev, val);
+	trace_rdev_return_int(wiphy, ret);
+
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 627217e25c2f..4f0abd5d49df 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -3049,6 +3049,24 @@ TRACE_EVENT(rdev_set_ttlm,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG)
 );
 
+TRACE_EVENT(rdev_set_epcs,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 bool val),
+	TP_ARGS(wiphy, netdev, val),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__field(bool, val)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		__entry->val = val;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", config=%u",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->val)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
@@ -4148,6 +4166,22 @@ TRACE_EVENT(rdev_assoc_ml_reconf,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG,
 		  __entry->add_links, __entry->rem_links)
 );
+
+TRACE_EVENT(cfg80211_epcs_changed,
+	TP_PROTO(struct wireless_dev *wdev, bool enabled),
+	TP_ARGS(wdev, enabled),
+	TP_STRUCT__entry(
+		WDEV_ENTRY
+		__field(u32, enabled)
+	),
+	TP_fast_assign(
+		WDEV_ASSIGN;
+		__entry->enabled = enabled;
+	),
+	TP_printk(WDEV_PR_FMT ", enabled=%u",
+		  WDEV_PR_ARG, __entry->enabled)
+);
+
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From 127186cfb184eaccdfe948e6da66940cfa03efc5 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Thu, 2 Jan 2025 19:28:41 +0800
Subject: md: reintroduce md-linear

THe md-linear is removed by commit 849d18e27be9 ("md: Remove deprecated
CONFIG_MD_LINEAR") because it has been marked as deprecated for a long
time.

However, md-linear is used widely for underlying disks with different size,
sadly we didn't know this until now, and it's true useful to create
partitions and assemble multiple raid and then append one to the other.

People have to use dm-linear in this case now, however, they will prefer
to minimize the number of involved modules.

Fixes: 849d18e27be9 ("md: Remove deprecated CONFIG_MD_LINEAR")
Cc: stable@vger.kernel.org
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Acked-by: Coly Li <colyli@kernel.org>
Acked-by: Mike Snitzer <snitzer@kernel.org>
Link: https://lore.kernel.org/r/20250102112841.1227111-1-yukuai1@huaweicloud.com
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/Kconfig             |  13 ++
 drivers/md/Makefile            |   2 +
 drivers/md/md-autodetect.c     |   8 +-
 drivers/md/md-linear.c         | 354 +++++++++++++++++++++++++++++++++++++++++
 drivers/md/md.c                |   2 +-
 include/uapi/linux/raid/md_p.h |   2 +-
 include/uapi/linux/raid/md_u.h |   2 +
 7 files changed, 379 insertions(+), 4 deletions(-)
 create mode 100644 drivers/md/md-linear.c

(limited to 'include/uapi')

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 1e9db8e4acdf..0b1870a09e1f 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -61,6 +61,19 @@ config MD_BITMAP_FILE
 	  various kernel APIs and can only work with files on a file system not
 	  actually sitting on the MD device.
 
+config MD_LINEAR
+	tristate "Linear (append) mode"
+	depends on BLK_DEV_MD
+	help
+	  If you say Y here, then your multiple devices driver will be able to
+	  use the so-called linear mode, i.e. it will combine the hard disk
+	  partitions by simply appending one to the other.
+
+	  To compile this as a module, choose M here: the module
+	  will be called linear.
+
+	  If unsure, say Y.
+
 config MD_RAID0
 	tristate "RAID-0 (striping) mode"
 	depends on BLK_DEV_MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 476a214e4bdc..87bdfc9fe14c 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -29,12 +29,14 @@ dm-zoned-y	+= dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
 
 md-mod-y	+= md.o md-bitmap.o
 raid456-y	+= raid5.o raid5-cache.o raid5-ppl.o
+linear-y       += md-linear.o
 
 # Note: link order is important.  All raid personalities
 # and must come before md.o, as they each initialise
 # themselves, and md.o may use the personalities when it
 # auto-initialised.
 
+obj-$(CONFIG_MD_LINEAR)		+= linear.o
 obj-$(CONFIG_MD_RAID0)		+= raid0.o
 obj-$(CONFIG_MD_RAID1)		+= raid1.o
 obj-$(CONFIG_MD_RAID10)		+= raid10.o
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index b2a00f213c2c..4b80165afd23 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -49,6 +49,7 @@ static int md_setup_ents __initdata;
  *             instead of just one.  -- KTK
  * 18May2000: Added support for persistent-superblock arrays:
  *             md=n,0,factor,fault,device-list   uses RAID0 for device n
+ *             md=n,-1,factor,fault,device-list  uses LINEAR for device n
  *             md=n,device-list      reads a RAID superblock from the devices
  *             elements in device-list are read by name_to_kdev_t so can be
  *             a hex number or something like /dev/hda1 /dev/sdb
@@ -87,7 +88,7 @@ static int __init md_setup(char *str)
 		md_setup_ents++;
 	switch (get_option(&str, &level)) {	/* RAID level */
 	case 2: /* could be 0 or -1.. */
-		if (level == 0) {
+		if (level == 0 || level == LEVEL_LINEAR) {
 			if (get_option(&str, &factor) != 2 ||	/* Chunk Size */
 					get_option(&str, &fault) != 2) {
 				printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
@@ -95,7 +96,10 @@ static int __init md_setup(char *str)
 			}
 			md_setup_args[ent].level = level;
 			md_setup_args[ent].chunk = 1 << (factor+12);
-			pername = "raid0";
+			if (level ==  LEVEL_LINEAR)
+				pername = "linear";
+			else
+				pername = "raid0";
 			break;
 		}
 		fallthrough;
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
new file mode 100644
index 000000000000..53bc3fda9edb
--- /dev/null
+++ b/drivers/md/md-linear.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * linear.c : Multiple Devices driver for Linux Copyright (C) 1994-96 Marc
+ * ZYNGIER <zyngier@ufr-info-p7.ibp.fr> or <maz@gloups.fdn.fr>
+ */
+
+#include <linux/blkdev.h>
+#include <linux/raid/md_u.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <trace/events/block.h>
+#include "md.h"
+
+struct dev_info {
+	struct md_rdev	*rdev;
+	sector_t	end_sector;
+};
+
+struct linear_conf {
+	struct rcu_head         rcu;
+	sector_t                array_sectors;
+	/* a copy of mddev->raid_disks */
+	int                     raid_disks;
+	struct dev_info         disks[] __counted_by(raid_disks);
+};
+
+/*
+ * find which device holds a particular offset
+ */
+static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
+{
+	int lo, mid, hi;
+	struct linear_conf *conf;
+
+	lo = 0;
+	hi = mddev->raid_disks - 1;
+	conf = mddev->private;
+
+	/*
+	 * Binary Search
+	 */
+
+	while (hi > lo) {
+
+		mid = (hi + lo) / 2;
+		if (sector < conf->disks[mid].end_sector)
+			hi = mid;
+		else
+			lo = mid + 1;
+	}
+
+	return conf->disks + lo;
+}
+
+static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
+{
+	struct linear_conf *conf;
+	sector_t array_sectors;
+
+	conf = mddev->private;
+	WARN_ONCE(sectors || raid_disks,
+		  "%s does not support generic reshape\n", __func__);
+	array_sectors = conf->array_sectors;
+
+	return array_sectors;
+}
+
+static int linear_set_limits(struct mddev *mddev)
+{
+	struct queue_limits lim;
+	int err;
+
+	md_init_stacking_limits(&lim);
+	lim.max_hw_sectors = mddev->chunk_sectors;
+	lim.max_write_zeroes_sectors = mddev->chunk_sectors;
+	lim.io_min = mddev->chunk_sectors << 9;
+	err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
+	if (err) {
+		queue_limits_cancel_update(mddev->gendisk->queue);
+		return err;
+	}
+
+	return queue_limits_set(mddev->gendisk->queue, &lim);
+}
+
+static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
+{
+	struct linear_conf *conf;
+	struct md_rdev *rdev;
+	int ret = -EINVAL;
+	int cnt;
+	int i;
+
+	conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
+	if (!conf)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * conf->raid_disks is copy of mddev->raid_disks. The reason to
+	 * keep a copy of mddev->raid_disks in struct linear_conf is,
+	 * mddev->raid_disks may not be consistent with pointers number of
+	 * conf->disks[] when it is updated in linear_add() and used to
+	 * iterate old conf->disks[] earray in linear_congested().
+	 * Here conf->raid_disks is always consitent with number of
+	 * pointers in conf->disks[] array, and mddev->private is updated
+	 * with rcu_assign_pointer() in linear_addr(), such race can be
+	 * avoided.
+	 */
+	conf->raid_disks = raid_disks;
+
+	cnt = 0;
+	conf->array_sectors = 0;
+
+	rdev_for_each(rdev, mddev) {
+		int j = rdev->raid_disk;
+		struct dev_info *disk = conf->disks + j;
+		sector_t sectors;
+
+		if (j < 0 || j >= raid_disks || disk->rdev) {
+			pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
+				mdname(mddev));
+			goto out;
+		}
+
+		disk->rdev = rdev;
+		if (mddev->chunk_sectors) {
+			sectors = rdev->sectors;
+			sector_div(sectors, mddev->chunk_sectors);
+			rdev->sectors = sectors * mddev->chunk_sectors;
+		}
+
+		conf->array_sectors += rdev->sectors;
+		cnt++;
+	}
+	if (cnt != raid_disks) {
+		pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
+			mdname(mddev));
+		goto out;
+	}
+
+	/*
+	 * Here we calculate the device offsets.
+	 */
+	conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
+
+	for (i = 1; i < raid_disks; i++)
+		conf->disks[i].end_sector =
+			conf->disks[i-1].end_sector +
+			conf->disks[i].rdev->sectors;
+
+	if (!mddev_is_dm(mddev)) {
+		ret = linear_set_limits(mddev);
+		if (ret)
+			goto out;
+	}
+
+	return conf;
+
+out:
+	kfree(conf);
+	return ERR_PTR(ret);
+}
+
+static int linear_run(struct mddev *mddev)
+{
+	struct linear_conf *conf;
+	int ret;
+
+	if (md_check_no_bitmap(mddev))
+		return -EINVAL;
+
+	conf = linear_conf(mddev, mddev->raid_disks);
+	if (IS_ERR(conf))
+		return PTR_ERR(conf);
+
+	mddev->private = conf;
+	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
+
+	ret =  md_integrity_register(mddev);
+	if (ret) {
+		kfree(conf);
+		mddev->private = NULL;
+	}
+	return ret;
+}
+
+static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
+{
+	/* Adding a drive to a linear array allows the array to grow.
+	 * It is permitted if the new drive has a matching superblock
+	 * already on it, with raid_disk equal to raid_disks.
+	 * It is achieved by creating a new linear_private_data structure
+	 * and swapping it in in-place of the current one.
+	 * The current one is never freed until the array is stopped.
+	 * This avoids races.
+	 */
+	struct linear_conf *newconf, *oldconf;
+
+	if (rdev->saved_raid_disk != mddev->raid_disks)
+		return -EINVAL;
+
+	rdev->raid_disk = rdev->saved_raid_disk;
+	rdev->saved_raid_disk = -1;
+
+	newconf = linear_conf(mddev, mddev->raid_disks + 1);
+	if (!newconf)
+		return -ENOMEM;
+
+	/* newconf->raid_disks already keeps a copy of * the increased
+	 * value of mddev->raid_disks, WARN_ONCE() is just used to make
+	 * sure of this. It is possible that oldconf is still referenced
+	 * in linear_congested(), therefore kfree_rcu() is used to free
+	 * oldconf until no one uses it anymore.
+	 */
+	oldconf = rcu_dereference_protected(mddev->private,
+			lockdep_is_held(&mddev->reconfig_mutex));
+	mddev->raid_disks++;
+	WARN_ONCE(mddev->raid_disks != newconf->raid_disks,
+		"copied raid_disks doesn't match mddev->raid_disks");
+	rcu_assign_pointer(mddev->private, newconf);
+	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
+	set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
+	kfree_rcu(oldconf, rcu);
+	return 0;
+}
+
+static void linear_free(struct mddev *mddev, void *priv)
+{
+	struct linear_conf *conf = priv;
+
+	kfree(conf);
+}
+
+static bool linear_make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct dev_info *tmp_dev;
+	sector_t start_sector, end_sector, data_offset;
+	sector_t bio_sector = bio->bi_iter.bi_sector;
+
+	if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+	    && md_flush_request(mddev, bio))
+		return true;
+
+	tmp_dev = which_dev(mddev, bio_sector);
+	start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
+	end_sector = tmp_dev->end_sector;
+	data_offset = tmp_dev->rdev->data_offset;
+
+	if (unlikely(bio_sector >= end_sector ||
+		     bio_sector < start_sector))
+		goto out_of_bounds;
+
+	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
+		md_error(mddev, tmp_dev->rdev);
+		bio_io_error(bio);
+		return true;
+	}
+
+	if (unlikely(bio_end_sector(bio) > end_sector)) {
+		/* This bio crosses a device boundary, so we have to split it */
+		struct bio *split = bio_split(bio, end_sector - bio_sector,
+					      GFP_NOIO, &mddev->bio_set);
+
+		if (IS_ERR(split)) {
+			bio->bi_status = errno_to_blk_status(PTR_ERR(split));
+			bio_endio(bio);
+			return true;
+		}
+
+		bio_chain(split, bio);
+		submit_bio_noacct(bio);
+		bio = split;
+	}
+
+	md_account_bio(mddev, &bio);
+	bio_set_dev(bio, tmp_dev->rdev->bdev);
+	bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
+		start_sector + data_offset;
+
+	if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
+		     !bdev_max_discard_sectors(bio->bi_bdev))) {
+		/* Just ignore it */
+		bio_endio(bio);
+	} else {
+		if (mddev->gendisk)
+			trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
+					      bio_sector);
+		mddev_check_write_zeroes(mddev, bio);
+		submit_bio_noacct(bio);
+	}
+	return true;
+
+out_of_bounds:
+	pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n",
+	       mdname(mddev),
+	       (unsigned long long)bio->bi_iter.bi_sector,
+	       tmp_dev->rdev->bdev,
+	       (unsigned long long)tmp_dev->rdev->sectors,
+	       (unsigned long long)start_sector);
+	bio_io_error(bio);
+	return true;
+}
+
+static void linear_status(struct seq_file *seq, struct mddev *mddev)
+{
+	seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
+}
+
+static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+	if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
+		char *md_name = mdname(mddev);
+
+		pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
+			md_name, rdev->bdev);
+	}
+}
+
+static void linear_quiesce(struct mddev *mddev, int state)
+{
+}
+
+static struct md_personality linear_personality = {
+	.name		= "linear",
+	.level		= LEVEL_LINEAR,
+	.owner		= THIS_MODULE,
+	.make_request	= linear_make_request,
+	.run		= linear_run,
+	.free		= linear_free,
+	.status		= linear_status,
+	.hot_add_disk	= linear_add,
+	.size		= linear_size,
+	.quiesce	= linear_quiesce,
+	.error_handler	= linear_error,
+};
+
+static int __init linear_init(void)
+{
+	return register_md_personality(&linear_personality);
+}
+
+static void linear_exit(void)
+{
+	unregister_md_personality(&linear_personality);
+}
+
+module_init(linear_init);
+module_exit(linear_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)");
+MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
+MODULE_ALIAS("md-linear");
+MODULE_ALIAS("md-level--1");
diff --git a/drivers/md/md.c b/drivers/md/md.c
index aebe12b0ee27..3dd013f82e26 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8124,7 +8124,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
 		return;
 	mddev->pers->error_handler(mddev, rdev);
 
-	if (mddev->pers->level == 0)
+	if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
 		return;
 
 	if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index 5a43c23f53bf..ff47b6f0ba0f 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -233,7 +233,7 @@ struct mdp_superblock_1 {
 	char	set_name[32];	/* set and interpreted by user-space */
 
 	__le64	ctime;		/* lo 40 bits are seconds, top 24 are microseconds or 0*/
-	__le32	level;		/* 0,1,4,5 */
+	__le32	level;		/* 0,1,4,5, -1 (linear) */
 	__le32	layout;		/* only for raid5 and raid10 currently */
 	__le64	size;		/* used size of component devices, in 512byte sectors */
 
diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h
index 7be89a4906e7..a893010735fb 100644
--- a/include/uapi/linux/raid/md_u.h
+++ b/include/uapi/linux/raid/md_u.h
@@ -103,6 +103,8 @@ typedef struct mdu_array_info_s {
 
 } mdu_array_info_t;
 
+#define LEVEL_LINEAR		(-1)
+
 /* we need a value for 'no level specified' and 0
  * means 'raid0', so we need something else.  This is
  * for internal use only
-- 
cgit v1.2.3


From 6167c0b6e8d7ddb6b3e5efffcac34a85f7872997 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 10 Jan 2025 07:05:13 +0100
Subject: net: ethtool: add support for structured PHY statistics

Introduce a new way to report PHY statistics in a structured and
standardized format using the netlink API. This new method does not
replace the old driver-specific stats, which can still be accessed with
`ethtool -S <eth name>`. The structured stats are available with
`ethtool -S <eth name> --all-groups`.

This new method makes it easier to diagnose problems by organizing stats
in a consistent and documented way.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/ethtool-netlink.rst |  1 +
 include/uapi/linux/ethtool.h                 |  2 ++
 include/uapi/linux/ethtool_netlink.h         | 14 ++++++++++
 net/ethtool/netlink.h                        |  1 +
 net/ethtool/stats.c                          | 39 +++++++++++++++++++++++++++-
 net/ethtool/strset.c                         |  5 ++++
 6 files changed, 61 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index a7ba6368a4d5..da846f1d998e 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1616,6 +1616,7 @@ the ``ETHTOOL_A_STATS_GROUPS`` bitset. Currently defined values are:
  ETHTOOL_STATS_ETH_PHY  eth-phy  Basic IEEE 802.3 PHY statistics (30.3.2.1.*)
  ETHTOOL_STATS_ETH_CTRL eth-ctrl Basic IEEE 802.3 MAC Ctrl statistics (30.3.3.*)
  ETHTOOL_STATS_RMON     rmon     RMON (RFC 2819) statistics
+ ETHTOOL_STATS_PHY      phy      Additional PHY statistics, not defined by IEEE
  ====================== ======== ===============================================
 
 Each group should have a corresponding ``ETHTOOL_A_STATS_GRP`` in the reply.
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7e1b3820f91f..d1089b88efc7 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -681,6 +681,7 @@ enum ethtool_link_ext_substate_module {
  * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
  * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
  * @ETH_SS_STATS_RMON: names of RMON statistics
+ * @ETH_SS_STATS_PHY: names of PHY(dev) statistics
  *
  * @ETH_SS_COUNT: number of defined string sets
  */
@@ -706,6 +707,7 @@ enum ethtool_stringset {
 	ETH_SS_STATS_ETH_MAC,
 	ETH_SS_STATS_ETH_CTRL,
 	ETH_SS_STATS_RMON,
+	ETH_SS_STATS_PHY,
 
 	/* add new constants above here */
 	ETH_SS_COUNT
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 9c909ce733a5..9ff72cfb2e98 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -99,6 +99,7 @@ enum {
 	ETHTOOL_STATS_ETH_MAC,
 	ETHTOOL_STATS_ETH_CTRL,
 	ETHTOOL_STATS_RMON,
+	ETHTOOL_STATS_PHY,
 
 	/* add new constants above here */
 	__ETHTOOL_STATS_CNT
@@ -193,6 +194,19 @@ enum {
 	ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1)
 };
 
+enum {
+	/* Basic packet counters if PHY has separate counters from the MAC */
+	ETHTOOL_A_STATS_PHY_RX_PKTS,
+	ETHTOOL_A_STATS_PHY_RX_BYTES,
+	ETHTOOL_A_STATS_PHY_RX_ERRORS,
+	ETHTOOL_A_STATS_PHY_TX_PKTS,
+	ETHTOOL_A_STATS_PHY_TX_BYTES,
+	ETHTOOL_A_STATS_PHY_TX_ERRORS,
+
+	/* add new constants above here */
+	__ETHTOOL_A_STATS_PHY_CNT,
+	ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1)
+};
 
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 0a09298fff92..1ce0a3de1430 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -511,5 +511,6 @@ extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING
 extern const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN];
 extern const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN];
+extern const char stats_phy_names[__ETHTOOL_A_STATS_PHY_CNT][ETH_GSTRING_LEN];
 
 #endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index f4d822c225db..038a2558f052 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -36,6 +36,7 @@ const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_STATS_ETH_MAC]			= "eth-mac",
 	[ETHTOOL_STATS_ETH_CTRL]		= "eth-ctrl",
 	[ETHTOOL_STATS_RMON]			= "rmon",
+	[ETHTOOL_STATS_PHY]			= "phydev",
 };
 
 const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN] = {
@@ -80,6 +81,15 @@ const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_A_STATS_RMON_JABBER]		= "etherStatsJabbers",
 };
 
+const char stats_phy_names[__ETHTOOL_A_STATS_PHY_CNT][ETH_GSTRING_LEN] = {
+	[ETHTOOL_A_STATS_PHY_RX_PKTS]		= "RxFrames",
+	[ETHTOOL_A_STATS_PHY_RX_BYTES]		= "RxOctets",
+	[ETHTOOL_A_STATS_PHY_RX_ERRORS]		= "RxErrors",
+	[ETHTOOL_A_STATS_PHY_TX_PKTS]		= "TxFrames",
+	[ETHTOOL_A_STATS_PHY_TX_BYTES]		= "TxOctets",
+	[ETHTOOL_A_STATS_PHY_TX_ERRORS]		= "TxErrors",
+};
+
 const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1] = {
 	[ETHTOOL_A_STATS_HEADER]	=
 		NLA_POLICY_NESTED(ethnl_header_policy),
@@ -156,7 +166,8 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
 	data->ctrl_stats.src = src;
 	data->rmon_stats.src = src;
 
-	if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
+	if ((test_bit(ETHTOOL_STATS_PHY, req_info->stat_mask) ||
+	     test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) &&
 	    src == ETHTOOL_MAC_STATS_SRC_AGGREGATE) {
 		if (phydev)
 			phy_ethtool_get_phy_stats(phydev, &data->phy_stats,
@@ -212,6 +223,10 @@ static int stats_reply_size(const struct ethnl_req_info *req_base,
 			nla_total_size(4)) *	/* _A_STATS_GRP_HIST_BKT_HI */
 			ETHTOOL_RMON_HIST_MAX * 2;
 	}
+	if (test_bit(ETHTOOL_STATS_PHY, req_info->stat_mask)) {
+		n_stats += sizeof(struct ethtool_phy_stats) / sizeof(u64);
+		n_grps++;
+	}
 
 	len += n_grps * (nla_total_size(0) + /* _A_STATS_GRP */
 			 nla_total_size(4) + /* _A_STATS_GRP_ID */
@@ -265,6 +280,25 @@ static int stats_put_phy_stats(struct sk_buff *skb,
 	return 0;
 }
 
+static int stats_put_phydev_stats(struct sk_buff *skb,
+				  const struct stats_reply_data *data)
+{
+	if (stat_put(skb, ETHTOOL_A_STATS_PHY_RX_PKTS,
+		     data->phydev_stats.rx_packets) ||
+	    stat_put(skb, ETHTOOL_A_STATS_PHY_RX_BYTES,
+		     data->phydev_stats.rx_bytes) ||
+	    stat_put(skb, ETHTOOL_A_STATS_PHY_RX_ERRORS,
+		     data->phydev_stats.rx_errors) ||
+	    stat_put(skb, ETHTOOL_A_STATS_PHY_TX_PKTS,
+		     data->phydev_stats.tx_packets) ||
+	    stat_put(skb, ETHTOOL_A_STATS_PHY_TX_BYTES,
+		     data->phydev_stats.tx_bytes) ||
+	    stat_put(skb, ETHTOOL_A_STATS_PHY_TX_ERRORS,
+		     data->phydev_stats.tx_errors))
+		return -EMSGSIZE;
+	return 0;
+}
+
 static int stats_put_mac_stats(struct sk_buff *skb,
 			       const struct stats_reply_data *data)
 {
@@ -441,6 +475,9 @@ static int stats_fill_reply(struct sk_buff *skb,
 	if (!ret && test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask))
 		ret = stats_put_stats(skb, data, ETHTOOL_STATS_RMON,
 				      ETH_SS_STATS_RMON, stats_put_rmon_stats);
+	if (!ret && test_bit(ETHTOOL_STATS_PHY, req_info->stat_mask))
+		ret = stats_put_stats(skb, data, ETHTOOL_STATS_PHY,
+				      ETH_SS_STATS_PHY, stats_put_phydev_stats);
 
 	return ret;
 }
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index b3382b3cf325..818cf01f0911 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -105,6 +105,11 @@ static const struct strset_info info_template[] = {
 		.count		= __ETHTOOL_A_STATS_RMON_CNT,
 		.strings	= stats_rmon_names,
 	},
+	[ETH_SS_STATS_PHY] = {
+		.per_dev	= false,
+		.count		= __ETHTOOL_A_STATS_PHY_CNT,
+		.strings	= stats_phy_names,
+	},
 };
 
 struct strset_req_info {
-- 
cgit v1.2.3


From 384669921779806105c56751abff41fa0127f93a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 14 Jan 2025 11:47:01 +0100
Subject: ALSA: rawmidi: Make tied_device=0 as default / unknown

In the original change, rawmidi_info.tied_device showed -1 for the
unknown or untied device.  But this would require the user-space to
check the protocol version and judge the value conditionally, which
is rather error-prone.

Instead, set the tied_device = 0 to be default as unknown, and
indicate the real device with the offset 1, for achieving more
backward compatibility.

Suggested-by: Jaroslav Kysela <perex@perex.cz>
Link: https://patch.msgid.link/20250114104711.19197-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/rawmidi.h     | 9 +++++++++
 include/uapi/sound/asound.h | 2 +-
 sound/core/rawmidi.c        | 1 -
 sound/core/ump.c            | 3 +--
 4 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index 6f2e95298fc7..6916f7133597 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -191,4 +191,13 @@ long snd_rawmidi_kernel_read(struct snd_rawmidi_substream *substream,
 long snd_rawmidi_kernel_write(struct snd_rawmidi_substream *substream,
 			      const unsigned char *buf, long count);
 
+/* set up the tied devices */
+static inline void snd_rawmidi_tie_devices(struct snd_rawmidi *r1,
+					   struct snd_rawmidi *r2)
+{
+	/* tied_device field keeps the device+1 (so that 0 being unknown) */
+	r1->tied_device = r2->device + 1;
+	r2->tied_device = r1->device + 1;
+}
+
 #endif /* __SOUND_RAWMIDI_H */
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 000b36c0e2b9..5a049eeaecce 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -730,7 +730,7 @@ enum {
 #define SNDRV_RAWMIDI_INFO_UMP			0x00000008
 #define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE	0x00000010
 
-#define SNDRV_RAWMIDI_DEVICE_UNKNOWN		-1
+#define SNDRV_RAWMIDI_DEVICE_UNKNOWN		0
 
 struct snd_rawmidi_info {
 	unsigned int device;		/* RO/WR (control): device number */
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 8a681bff4f7f..70a958ac1112 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -1837,7 +1837,6 @@ int snd_rawmidi_init(struct snd_rawmidi *rmidi,
 	INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams);
 	INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams);
 	rmidi->info_flags = info_flags;
-	rmidi->tied_device = SNDRV_RAWMIDI_DEVICE_UNKNOWN;
 
 	if (id != NULL)
 		strscpy(rmidi->id, id, sizeof(rmidi->id));
diff --git a/sound/core/ump.c b/sound/core/ump.c
index ff3cc2386ece..8d8681a42ca5 100644
--- a/sound/core/ump.c
+++ b/sound/core/ump.c
@@ -1382,8 +1382,7 @@ int snd_ump_attach_legacy_rawmidi(struct snd_ump_endpoint *ump,
 	ump_legacy_set_rawmidi_name(ump);
 	update_legacy_names(ump);
 
-	rmidi->tied_device = ump->core.device;
-	ump->core.tied_device = rmidi->device;
+	snd_rawmidi_tie_devices(rmidi, &ump->core);
 
 	ump_dbg(ump, "Created a legacy rawmidi #%d (%s)\n", device, id);
 	return 0;
-- 
cgit v1.2.3


From d16b34479064fcb8dbb66a72308b5034be819161 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 13 Jan 2025 13:55:58 +0000
Subject: tcp: add LINUX_MIB_PAWS_OLD_ACK SNMP counter

Prior patch in the series added TCP_RFC7323_PAWS_ACK drop reason.

This patch adds the corresponding SNMP counter, for folks
using nstat instead of tracing for TCP diagnostics.

nstat -az | grep PAWSOldAck

Suggested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250113135558.3180360-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dropreason-core.h | 1 +
 include/uapi/linux/snmp.h     | 1 +
 net/ipv4/proc.c               | 1 +
 net/ipv4/tcp_input.c          | 7 ++++---
 4 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 28555109f9bd..ed864934e20b 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -262,6 +262,7 @@ enum skb_drop_reason {
 	SKB_DROP_REASON_TCP_RFC7323_PAWS,
 	/**
 	 * @SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK: PAWS check, old ACK packet.
+	 * Corresponds to LINUX_MIB_PAWS_OLD_ACK.
 	 */
 	SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK,
 	/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 2e75674e7d4f..848c7784e684 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -186,6 +186,7 @@ enum
 	LINUX_MIB_TIMEWAITKILLED,		/* TimeWaitKilled */
 	LINUX_MIB_PAWSACTIVEREJECTED,		/* PAWSActiveRejected */
 	LINUX_MIB_PAWSESTABREJECTED,		/* PAWSEstabRejected */
+	LINUX_MIB_PAWS_OLD_ACK,			/* PAWSOldAck */
 	LINUX_MIB_DELAYEDACKS,			/* DelayedACKs */
 	LINUX_MIB_DELAYEDACKLOCKED,		/* DelayedACKLocked */
 	LINUX_MIB_DELAYEDACKLOST,		/* DelayedACKLost */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 40053a02bae1..affd21a0f572 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
 	SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
 	SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
+	SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK),
 	SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
 	SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED),
 	SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dc0e88bcc535..eb82e01da911 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5969,12 +5969,13 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 	if (unlikely(th->syn))
 		goto syn_challenge;
 
-	/* Old ACK are common, do not change PAWSESTABREJECTED
+	/* Old ACK are common, increment PAWS_OLD_ACK
 	 * and do not send a dupack.
 	 */
-	if (reason == SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK)
+	if (reason == SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWS_OLD_ACK);
 		goto discard;
-
+	}
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
 	if (!tcp_oow_rate_limited(sock_net(sk), skb,
 				  LINUX_MIB_TCPACKSKIPPEDPAWS,
-- 
cgit v1.2.3


From e5321ae10e1323359a5067a26dfe98b5f44cc5e6 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Tue, 14 Jan 2025 19:08:38 +0200
Subject: PCI: Store number of supported End-End TLP Prefixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

eetlp_prefix_path in the struct pci_dev tells if End-End TLP Prefixes
are supported by the path or not, and the value is only calculated if
CONFIG_PCI_PASID is set.

The Max End-End TLP Prefixes field in the Device Capabilities Register 2
also tells how many (1-4) End-End TLP Prefixes are supported (PCIe r6.2 sec
7.5.3.15). The number of supported End-End Prefixes is useful for reading
correct number of DWORDs from TLP Prefix Log register in AER capability
(PCIe r6.2 sec 7.8.4.12).

Replace eetlp_prefix_path with eetlp_prefix_max and determine the number of
supported End-End Prefixes regardless of CONFIG_PCI_PASID so that an
upcoming commit generalizing TLP Prefix Log register reading does not have
to read extra DWORDs for End-End Prefixes that never will be there.

The value stored into eetlp_prefix_max is directly derived from device's
Max End-End TLP Prefixes and does not consider limitations imposed by
bridges or the Root Port beyond supported/not supported flags. This is
intentional for two reasons:

  1) PCIe r6.2 spec sections 2.2.10.4 & 6.2.4.4 indicate that a TLP is
     malformed only if the number of prefixes exceed the number of Max
     End-End TLP Prefixes, which seems to be the case even if the device
     could never receive that many prefixes due to smaller maximum imposed
     by a bridge or the Root Port. If TLP parsing is later added, this
     distinction is significant in interpreting what is logged by the TLP
     Prefix Log registers and the value matching to the Malformed TLP
     threshold is going to be more useful.

  2) TLP Prefix handling happens autonomously on a low layer and the value
     in eetlp_prefix_max is not programmed anywhere by the kernel (i.e.,
     there is no limiter OS can control to prevent sending more than N TLP
     Prefixes).

Link: https://lore.kernel.org/r/20250114170840.1633-7-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/pci/ats.c             |  2 +-
 drivers/pci/probe.c           | 14 +++++++++-----
 include/linux/pci.h           |  2 +-
 include/uapi/linux/pci_regs.h |  1 +
 4 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 6afff1f1b143..c6b266c772c8 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -410,7 +410,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	if (WARN_ON(pdev->pasid_enabled))
 		return -EBUSY;
 
-	if (!pdev->eetlp_prefix_path && !pdev->pasid_no_tlp)
+	if (!pdev->eetlp_prefix_max && !pdev->pasid_no_tlp)
 		return -EINVAL;
 
 	if (!pasid)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2e81ab0f5a25..381c22e3ccdb 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2251,8 +2251,8 @@ static void pci_configure_relaxed_ordering(struct pci_dev *dev)
 
 static void pci_configure_eetlp_prefix(struct pci_dev *dev)
 {
-#ifdef CONFIG_PCI_PASID
 	struct pci_dev *bridge;
+	unsigned int eetlp_max;
 	int pcie_type;
 	u32 cap;
 
@@ -2264,15 +2264,19 @@ static void pci_configure_eetlp_prefix(struct pci_dev *dev)
 		return;
 
 	pcie_type = pci_pcie_type(dev);
+
+	eetlp_max = FIELD_GET(PCI_EXP_DEVCAP2_EE_PREFIX_MAX, cap);
+	/* 00b means 4 */
+	eetlp_max = eetlp_max ?: 4;
+
 	if (pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
 	    pcie_type == PCI_EXP_TYPE_RC_END)
-		dev->eetlp_prefix_path = 1;
+		dev->eetlp_prefix_max = eetlp_max;
 	else {
 		bridge = pci_upstream_bridge(dev);
-		if (bridge && bridge->eetlp_prefix_path)
-			dev->eetlp_prefix_path = 1;
+		if (bridge && bridge->eetlp_prefix_max)
+			dev->eetlp_prefix_max = eetlp_max;
 	}
-#endif
 }
 
 static void pci_configure_serr(struct pci_dev *dev)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index db9b47ce3eef..21be5a1edf1a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -407,7 +407,7 @@ struct pci_dev {
 					   supported from root to here */
 #endif
 	unsigned int	pasid_no_tlp:1;		/* PASID works without TLP Prefix */
-	unsigned int	eetlp_prefix_path:1;	/* End-to-End TLP Prefix */
+	unsigned int	eetlp_prefix_max:3;	/* Max # of End-End TLP Prefixes, 0=not supported */
 
 	pci_channel_state_t error_state;	/* Current connectivity state */
 	struct device	dev;			/* Generic device interface */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 1601c7ed5fab..14a6306c4ce1 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -665,6 +665,7 @@
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
 #define  PCI_EXP_DEVCAP2_OBFF_WAKE	0x00080000 /* Re-use WAKE# for OBFF */
 #define  PCI_EXP_DEVCAP2_EE_PREFIX	0x00200000 /* End-End TLP Prefix */
+#define  PCI_EXP_DEVCAP2_EE_PREFIX_MAX	0x00c00000 /* Max End-End TLP Prefixes */
 #define PCI_EXP_DEVCTL2		0x28	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS	0x0010	/* Completion Timeout Disable */
-- 
cgit v1.2.3


From 1bebc7869c99d466f819dd2cffaef0edf7d7a035 Mon Sep 17 00:00:00 2001
From: Illia Ostapyshyn <illia@yshyn.com>
Date: Thu, 14 Nov 2024 18:39:29 +0100
Subject: Input: allocate keycode for phone linking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The F11 key on the new Lenovo Thinkpad T14 Gen 5, T16 Gen 3, and P14s
Gen 5 laptops includes a symbol showing a smartphone and a laptop
chained together.  According to the user manual, it starts the Microsoft
Phone Link software used to connect to Android/iOS devices and relay
messages/calls or sync data.

As there are no suitable keycodes for this action, introduce a new one.

Signed-off-by: Illia Ostapyshyn <illia@yshyn.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lore.kernel.org/r/20241114173930.44983-2-illia@yshyn.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/uapi/linux/input-event-codes.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index a4206723f503..5a199f3d4a26 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -519,6 +519,7 @@
 #define KEY_NOTIFICATION_CENTER	0x1bc	/* Show/hide the notification center */
 #define KEY_PICKUP_PHONE	0x1bd	/* Answer incoming call */
 #define KEY_HANGUP_PHONE	0x1be	/* Decline incoming call */
+#define KEY_LINK_PHONE		0x1bf   /* AL Phone Syncing */
 
 #define KEY_DEL_EOL		0x1c0
 #define KEY_DEL_EOS		0x1c1
-- 
cgit v1.2.3


From eec8359f0797ef87c6ef6cbed6de08b02073b833 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Tue, 14 Jan 2025 14:28:44 +0000
Subject: net: ethtool: add support for configuring hds-thresh

The hds-thresh option configures the threshold value of
the header-data-split.
If a received packet size is larger than this threshold value, a packet
will be split into header and payload.
The header indicates TCP and UDP header, but it depends on driver spec.
The bnxt_en driver supports HDS(Header-Data-Split) configuration at
FW level, affecting TCP and UDP too.
So, If hds-thresh is set, it affects UDP and TCP packets.

Example:
   # ethtool -G <interface name> hds-thresh <value>

   # ethtool -G enp14s0f0np0 tcp-data-split on hds-thresh 256
   # ethtool -g enp14s0f0np0
   Ring parameters for enp14s0f0np0:
   Pre-set maximums:
   ...
   HDS thresh:  1023
   Current hardware settings:
   ...
   TCP data split:         on
   HDS thresh:  256

The default/min/max values are not defined in the ethtool so the drivers
should define themself.
The 0 value means that all TCP/UDP packets' header and payload
will be split.

Tested-by: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Link: https://patch.msgid.link/20250114142852.3364986-3-ap420073@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml       |  8 ++++++++
 Documentation/networking/ethtool-netlink.rst   | 10 +++++++++
 include/linux/ethtool.h                        |  9 +++++++++
 include/uapi/linux/ethtool_netlink_generated.h |  2 ++
 net/ethtool/netlink.h                          |  2 +-
 net/ethtool/rings.c                            | 28 +++++++++++++++++++++++---
 6 files changed, 55 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 60f85fbf4156..66be04013048 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -332,6 +332,12 @@ attribute-sets:
       -
         name: tx-push-buf-len-max
         type: u32
+      -
+        name: hds-thresh
+        type: u32
+      -
+        name: hds-thresh-max
+        type: u32
 
   -
     name: mm-stat
@@ -1777,6 +1783,8 @@ operations:
             - rx-push
             - tx-push-buf-len
             - tx-push-buf-len-max
+            - hds-thresh
+            - hds-thresh-max
       dump: *ring-get-op
     -
       name: rings-set
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index da846f1d998e..f70c0249860c 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -899,6 +899,10 @@ Kernel response contents:
   ``ETHTOOL_A_RINGS_RX_PUSH``               u8      flag of RX Push mode
   ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN``       u32     size of TX push buffer
   ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX``   u32     max size of TX push buffer
+  ``ETHTOOL_A_RINGS_HDS_THRESH``            u32     threshold of
+                                                    header / data split
+  ``ETHTOOL_A_RINGS_HDS_THRESH_MAX``        u32     max threshold of
+                                                    header / data split
   =======================================   ======  ===========================
 
 ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with
@@ -941,10 +945,12 @@ Request contents:
   ``ETHTOOL_A_RINGS_RX_JUMBO``          u32     size of RX jumbo ring
   ``ETHTOOL_A_RINGS_TX``                u32     size of TX ring
   ``ETHTOOL_A_RINGS_RX_BUF_LEN``        u32     size of buffers on the ring
+  ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT``    u8      TCP header / data split
   ``ETHTOOL_A_RINGS_CQE_SIZE``          u32     Size of TX/RX CQE
   ``ETHTOOL_A_RINGS_TX_PUSH``           u8      flag of TX Push mode
   ``ETHTOOL_A_RINGS_RX_PUSH``           u8      flag of RX Push mode
   ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN``   u32     size of TX push buffer
+  ``ETHTOOL_A_RINGS_HDS_THRESH``        u32     threshold of header / data split
   ====================================  ======  ===========================
 
 Kernel checks that requested ring sizes do not exceed limits reported by
@@ -961,6 +967,10 @@ A bigger CQE can have more receive buffer pointers, and in turn the NIC can
 transfer a bigger frame from wire. Based on the NIC hardware, the overall
 completion queue size can be adjusted in the driver if CQE size is modified.
 
+``ETHTOOL_A_RINGS_HDS_THRESH`` specifies the threshold value of
+header / data split feature. If a received packet size is larger than this
+threshold value, header and data will be split.
+
 CHANNELS_GET
 ============
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index d79bd201c1c8..e4136b0df892 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -78,6 +78,9 @@ enum {
  * @cqe_size: Size of TX/RX completion queue event
  * @tx_push_buf_len: Size of TX push buffer
  * @tx_push_buf_max_len: Maximum allowed size of TX push buffer
+ * @hds_thresh: Packet size threshold for header data split (HDS)
+ * @hds_thresh_max: Maximum supported setting for @hds_threshold
+ *
  */
 struct kernel_ethtool_ringparam {
 	u32	rx_buf_len;
@@ -87,6 +90,8 @@ struct kernel_ethtool_ringparam {
 	u32	cqe_size;
 	u32	tx_push_buf_len;
 	u32	tx_push_buf_max_len;
+	u32	hds_thresh;
+	u32	hds_thresh_max;
 };
 
 /**
@@ -97,6 +102,7 @@ struct kernel_ethtool_ringparam {
  * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push
  * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len
  * @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split
+ * @ETHTOOL_RING_USE_HDS_THRS: capture for setting header-data-split-thresh
  */
 enum ethtool_supported_ring_param {
 	ETHTOOL_RING_USE_RX_BUF_LEN		= BIT(0),
@@ -105,6 +111,7 @@ enum ethtool_supported_ring_param {
 	ETHTOOL_RING_USE_RX_PUSH		= BIT(3),
 	ETHTOOL_RING_USE_TX_PUSH_BUF_LEN	= BIT(4),
 	ETHTOOL_RING_USE_TCP_DATA_SPLIT		= BIT(5),
+	ETHTOOL_RING_USE_HDS_THRS		= BIT(6),
 };
 
 #define __ETH_RSS_HASH_BIT(bit)	((u32)1 << (bit))
@@ -1157,6 +1164,7 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
  * @rss_ctx:		XArray of custom RSS contexts
  * @rss_lock:		Protects entries in @rss_ctx.  May be taken from
  *			within RTNL.
+ * @hds_thresh:		HDS Threshold value.
  * @hds_config:		HDS value from userspace.
  * @wol_enabled:	Wake-on-LAN is enabled
  * @module_fw_flash_in_progress: Module firmware flashing is in progress.
@@ -1164,6 +1172,7 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
 struct ethtool_netdev_state {
 	struct xarray		rss_ctx;
 	struct mutex		rss_lock;
+	u32			hds_thresh;
 	u8			hds_config;
 	unsigned		wol_enabled:1;
 	unsigned		module_fw_flash_in_progress:1;
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index 43993a2d68e5..2e17ff348f89 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -155,6 +155,8 @@ enum {
 	ETHTOOL_A_RINGS_RX_PUSH,
 	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,
 	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,
+	ETHTOOL_A_RINGS_HDS_THRESH,
+	ETHTOOL_A_RINGS_HDS_THRESH_MAX,
 
 	__ETHTOOL_A_RINGS_CNT,
 	ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1)
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 1ce0a3de1430..ff69ca0715de 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -456,7 +456,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT
 extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1];
 extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1];
 extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1];
-extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX + 1];
+extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_HDS_THRESH_MAX + 1];
 extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
 extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
 extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index b2a2586b241f..a381913a19f0 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -61,7 +61,9 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
 	       nla_total_size(sizeof(u8))  +	/* _RINGS_TX_PUSH */
 	       nla_total_size(sizeof(u8))) +	/* _RINGS_RX_PUSH */
 	       nla_total_size(sizeof(u32)) +	/* _RINGS_TX_PUSH_BUF_LEN */
-	       nla_total_size(sizeof(u32));	/* _RINGS_TX_PUSH_BUF_LEN_MAX */
+	       nla_total_size(sizeof(u32)) +	/* _RINGS_TX_PUSH_BUF_LEN_MAX */
+	       nla_total_size(sizeof(u32)) +	/* _RINGS_HDS_THRESH */
+	       nla_total_size(sizeof(u32));	/* _RINGS_HDS_THRESH_MAX*/
 }
 
 static int rings_fill_reply(struct sk_buff *skb,
@@ -108,7 +110,12 @@ static int rings_fill_reply(struct sk_buff *skb,
 	     (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,
 			  kr->tx_push_buf_max_len) ||
 	      nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,
-			  kr->tx_push_buf_len))))
+			  kr->tx_push_buf_len))) ||
+	    ((supported_ring_params & ETHTOOL_RING_USE_HDS_THRS) &&
+	     (nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH,
+			  kr->hds_thresh) ||
+	      nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH_MAX,
+			  kr->hds_thresh_max))))
 		return -EMSGSIZE;
 
 	return 0;
@@ -130,6 +137,7 @@ const struct nla_policy ethnl_rings_set_policy[] = {
 	[ETHTOOL_A_RINGS_TX_PUSH]		= NLA_POLICY_MAX(NLA_U8, 1),
 	[ETHTOOL_A_RINGS_RX_PUSH]		= NLA_POLICY_MAX(NLA_U8, 1),
 	[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN]	= { .type = NLA_U32 },
+	[ETHTOOL_A_RINGS_HDS_THRESH]		= { .type = NLA_U32 },
 };
 
 static int
@@ -155,6 +163,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info,
 		return -EOPNOTSUPP;
 	}
 
+	if (tb[ETHTOOL_A_RINGS_HDS_THRESH] &&
+	    !(ops->supported_ring_params & ETHTOOL_RING_USE_HDS_THRS)) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[ETHTOOL_A_RINGS_HDS_THRESH],
+				    "setting hds-thresh is not supported");
+		return -EOPNOTSUPP;
+	}
+
 	if (tb[ETHTOOL_A_RINGS_CQE_SIZE] &&
 	    !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
 		NL_SET_ERR_MSG_ATTR(info->extack,
@@ -223,6 +239,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
 			tb[ETHTOOL_A_RINGS_RX_PUSH], &mod);
 	ethnl_update_u32(&kernel_ringparam.tx_push_buf_len,
 			 tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod);
+	ethnl_update_u32(&kernel_ringparam.hds_thresh,
+			 tb[ETHTOOL_A_RINGS_HDS_THRESH], &mod);
 	if (!mod)
 		return 0;
 
@@ -243,6 +261,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
 		err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO];
 	else if (ringparam.tx_pending > ringparam.tx_max_pending)
 		err_attr = tb[ETHTOOL_A_RINGS_TX];
+	else if (kernel_ringparam.hds_thresh > kernel_ringparam.hds_thresh_max)
+		err_attr = tb[ETHTOOL_A_RINGS_HDS_THRESH];
 	else
 		err_attr = NULL;
 	if (err_attr) {
@@ -261,8 +281,10 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
 
 	ret = dev->ethtool_ops->set_ringparam(dev, &ringparam,
 					      &kernel_ringparam, info->extack);
-	if (!ret)
+	if (!ret) {
 		dev->ethtool->hds_config = kernel_ringparam.tcp_data_split;
+		dev->ethtool->hds_thresh = kernel_ringparam.hds_thresh;
+	}
 
 	return ret < 0 ? ret : 1;
 }
-- 
cgit v1.2.3


From ad41ddeeac216417a52fbc1060577f3098f4e90e Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Tue, 14 Jan 2025 19:08:39 +0200
Subject: PCI: Add TLP Prefix reading to pcie_read_tlp_log()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pcie_read_tlp_log() handles only 4 Header Log DWORDs but TLP Prefix Log
(PCIe r6.1 secs 7.8.4.12 & 7.9.14.13) may also be present.

Generalize pcie_read_tlp_log() and struct pcie_tlp_log to also handle TLP
Prefix Log. The relevant registers are formatted identically in AER and DPC
Capability, but has these variations:

  a) The offsets of TLP Prefix Log registers vary.

  b) DPC RP PIO TLP Prefix Log register can be < 4 DWORDs.

  c) AER TLP Prefix Log Present (PCIe r6.1 sec 7.8.4.7) can indicate Prefix
     Log is not present.

Therefore callers must pass the offset of the TLP Prefix Log register and
the entire length to pcie_read_tlp_log() to be able to read the correct
number of TLP Prefix DWORDs from the correct offset.

Link: https://lore.kernel.org/r/20250114170840.1633-8-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
[bhelgaas: squash ternary fix from
https://lore.kernel.org/r/20250116172019.88116-1-colin.i.king@gmail.com]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/pci/pci.h             |  5 ++++-
 drivers/pci/pcie/aer.c        |  5 ++++-
 drivers/pci/pcie/dpc.c        | 13 +++++------
 drivers/pci/pcie/tlp.c        | 52 ++++++++++++++++++++++++++++++++++++++-----
 include/linux/aer.h           |  1 +
 include/uapi/linux/pci_regs.h | 10 +++++----
 6 files changed, 68 insertions(+), 18 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 55fcf3bac4f7..7797b2544d00 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -550,7 +550,9 @@ struct aer_err_info {
 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 
-int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log);
+int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
+		      unsigned int tlp_len, struct pcie_tlp_log *log);
+unsigned int aer_tlp_log_len(struct pci_dev *dev, u32 aercc);
 #endif	/* CONFIG_PCIEAER */
 
 #ifdef CONFIG_PCIEPORTBUS
@@ -569,6 +571,7 @@ void pci_dpc_init(struct pci_dev *pdev);
 void dpc_process_error(struct pci_dev *pdev);
 pci_ers_result_t dpc_reset_link(struct pci_dev *pdev);
 bool pci_dpc_recovered(struct pci_dev *pdev);
+unsigned int dpc_tlp_log_len(struct pci_dev *dev);
 #else
 static inline void pci_save_dpc_state(struct pci_dev *dev) { }
 static inline void pci_restore_dpc_state(struct pci_dev *dev) { }
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 80c5ba8d8296..656dbf1ac45b 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1248,7 +1248,10 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
 
 		if (info->status & AER_LOG_TLP_MASKS) {
 			info->tlp_header_valid = 1;
-			pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG, &info->tlp);
+			pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG,
+					  aer + PCI_ERR_PREFIX_LOG,
+					  aer_tlp_log_len(dev, aercc),
+					  &info->tlp);
 		}
 	}
 
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 0674d8c89bfa..0aa20bc58697 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -190,7 +190,7 @@ out:
 static void dpc_process_rp_pio_error(struct pci_dev *pdev)
 {
 	u16 cap = pdev->dpc_cap, dpc_status, first_error;
-	u32 status, mask, sev, syserr, exc, log, prefix;
+	u32 status, mask, sev, syserr, exc, log;
 	struct pcie_tlp_log tlp_log;
 	int i;
 
@@ -217,20 +217,19 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev)
 
 	if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG)
 		goto clear_status;
-	pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, &tlp_log);
+	pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG,
+			  cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG,
+			  dpc_tlp_log_len(pdev), &tlp_log);
 	pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n",
 		tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], tlp_log.dw[3]);
+	for (i = 0; i < pdev->dpc_rp_log_size - PCIE_STD_NUM_TLP_HEADERLOG - 1; i++)
+		pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, tlp_log.prefix[i]);
 
 	if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG + 1)
 		goto clear_status;
 	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log);
 	pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log);
 
-	for (i = 0; i < pdev->dpc_rp_log_size - PCIE_STD_NUM_TLP_HEADERLOG - 1; i++) {
-		pci_read_config_dword(pdev,
-			cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG + i * 4, &prefix);
-		pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix);
-	}
  clear_status:
 	pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status);
 }
diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c
index d7ad99f466b9..0071ea8903ed 100644
--- a/drivers/pci/pcie/tlp.c
+++ b/drivers/pci/pcie/tlp.c
@@ -11,26 +11,68 @@
 
 #include "../pci.h"
 
+/**
+ * aer_tlp_log_len - Calculate AER Capability TLP Header/Prefix Log length
+ * @dev: PCIe device
+ * @aercc: AER Capabilities and Control register value
+ *
+ * Return: TLP Header/Prefix Log length
+ */
+unsigned int aer_tlp_log_len(struct pci_dev *dev, u32 aercc)
+{
+	return PCIE_STD_NUM_TLP_HEADERLOG +
+	       ((aercc & PCI_ERR_CAP_PREFIX_LOG_PRESENT) ?
+		dev->eetlp_prefix_max : 0);
+}
+
+#ifdef CONFIG_PCIE_DPC
+/**
+ * dpc_tlp_log_len - Calculate DPC RP PIO TLP Header/Prefix Log length
+ * @dev: PCIe device
+ *
+ * Return: TLP Header/Prefix Log length
+ */
+unsigned int dpc_tlp_log_len(struct pci_dev *dev)
+{
+	/* Remove ImpSpec Log register from the count */
+	if (dev->dpc_rp_log_size >= PCIE_STD_NUM_TLP_HEADERLOG + 1)
+		return dev->dpc_rp_log_size - 1;
+
+	return dev->dpc_rp_log_size;
+}
+#endif
+
 /**
  * pcie_read_tlp_log - read TLP Header Log
  * @dev: PCIe device
  * @where: PCI Config offset of TLP Header Log
+ * @where2: PCI Config offset of TLP Prefix Log
+ * @tlp_len: TLP Log length (Header Log + TLP Prefix Log in DWORDs)
  * @log: TLP Log structure to fill
  *
  * Fill @log from TLP Header Log registers, e.g., AER or DPC.
  *
  * Return: 0 on success and filled TLP Log structure, <0 on error.
  */
-int pcie_read_tlp_log(struct pci_dev *dev, int where,
-		      struct pcie_tlp_log *log)
+int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
+		      unsigned int tlp_len, struct pcie_tlp_log *log)
 {
 	unsigned int i;
-	int ret;
+	int off, ret;
+	u32 *to;
 
 	memset(log, 0, sizeof(*log));
 
-	for (i = 0; i < PCIE_STD_NUM_TLP_HEADERLOG; i++) {
-		ret = pci_read_config_dword(dev, where + i * 4, &log->dw[i]);
+	for (i = 0; i < tlp_len; i++) {
+		if (i < PCIE_STD_NUM_TLP_HEADERLOG) {
+			off = where + i * 4;
+			to = &log->dw[i];
+		} else {
+			off = where2 + (i - PCIE_STD_NUM_TLP_HEADERLOG) * 4;
+			to = &log->prefix[i - PCIE_STD_NUM_TLP_HEADERLOG];
+		}
+
+		ret = pci_read_config_dword(dev, off, to);
 		if (ret)
 			return pcibios_err_to_errno(ret);
 	}
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 4ef6515c3205..947b63091902 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -27,6 +27,7 @@ struct pci_dev;
 
 struct pcie_tlp_log {
 	u32 dw[PCIE_STD_NUM_TLP_HEADERLOG];
+	u32 prefix[PCIE_STD_MAX_TLP_PREFIXLOG];
 };
 
 struct aer_capability_regs {
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 14a6306c4ce1..82866ac0bda7 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -790,10 +790,11 @@
 	/* Same bits as above */
 #define PCI_ERR_CAP		0x18	/* Advanced Error Capabilities & Ctrl*/
 #define  PCI_ERR_CAP_FEP(x)	((x) & 0x1f)	/* First Error Pointer */
-#define  PCI_ERR_CAP_ECRC_GENC	0x00000020	/* ECRC Generation Capable */
-#define  PCI_ERR_CAP_ECRC_GENE	0x00000040	/* ECRC Generation Enable */
-#define  PCI_ERR_CAP_ECRC_CHKC	0x00000080	/* ECRC Check Capable */
-#define  PCI_ERR_CAP_ECRC_CHKE	0x00000100	/* ECRC Check Enable */
+#define  PCI_ERR_CAP_ECRC_GENC		0x00000020 /* ECRC Generation Capable */
+#define  PCI_ERR_CAP_ECRC_GENE		0x00000040 /* ECRC Generation Enable */
+#define  PCI_ERR_CAP_ECRC_CHKC		0x00000080 /* ECRC Check Capable */
+#define  PCI_ERR_CAP_ECRC_CHKE		0x00000100 /* ECRC Check Enable */
+#define  PCI_ERR_CAP_PREFIX_LOG_PRESENT	0x00000800 /* TLP Prefix Log Present */
 #define PCI_ERR_HEADER_LOG	0x1c	/* Header Log Register (16 bytes) */
 #define PCI_ERR_ROOT_COMMAND	0x2c	/* Root Error Command */
 #define  PCI_ERR_ROOT_CMD_COR_EN	0x00000001 /* Correctable Err Reporting Enable */
@@ -809,6 +810,7 @@
 #define  PCI_ERR_ROOT_FATAL_RCV		0x00000040 /* Fatal Received */
 #define  PCI_ERR_ROOT_AER_IRQ		0xf8000000 /* Advanced Error Interrupt Message Number */
 #define PCI_ERR_ROOT_ERR_SRC	0x34	/* Error Source Identification */
+#define PCI_ERR_PREFIX_LOG	0x38	/* TLP Prefix LOG Register (up to 16 bytes) */
 
 /* Virtual Channel */
 #define PCI_VC_PORT_CAP1	0x04
-- 
cgit v1.2.3


From 3ba0262a8fed29efe28e3ce3162d1794a58aa94f Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 15 Jan 2025 20:42:27 +0000
Subject: net: mdio: add definition for clock stop capable bit

Add a definition for the clock stop capable bit in the PCS MMD. This
bit indicates whether the MAC is able to stop the transmit xMII clock
while it is signalling LPI.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://patch.msgid.link/E1tYADb-0014PV-6T@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mdio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index f0d3f268240d..6975f182b22c 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -125,6 +125,7 @@
 #define MDIO_STAT1_LPOWERABLE		0x0002	/* Low-power ability */
 #define MDIO_STAT1_LSTATUS		BMSR_LSTATUS
 #define MDIO_STAT1_FAULT		0x0080	/* Fault */
+#define MDIO_PCS_STAT1_CLKSTOP_CAP	0x0040
 #define MDIO_AN_STAT1_LPABLE		0x0001	/* Link partner AN ability */
 #define MDIO_AN_STAT1_ABLE		BMSR_ANEGCAPABLE
 #define MDIO_AN_STAT1_RFAULT		BMSR_RFAULT
-- 
cgit v1.2.3


From 3194e36488e2dae05f9a822c73eaa367e47b2e3d Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Thu, 16 Jan 2025 17:02:56 +0000
Subject: dm-table: atomic writes support

Support stacking atomic write limits for DM devices.

All the pre-existing code in blk_stack_atomic_writes_limits() already takes
care of finding the aggregrate limits from the bottom devices.

Feature flag DM_TARGET_ATOMIC_WRITES is introduced so that atomic writes
can be enabled on personalities selectively. This is to ensure that atomic
writes are only enabled when verified to be working properly (for a
specific personality). In addition, it just may not make sense to enable
atomic writes on some personalities (so this flag also helps there).

Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-table.c         | 29 +++++++++++++++++++++++++++++
 include/linux/device-mapper.h |  3 +++
 include/uapi/linux/dm-ioctl.h |  4 ++--
 3 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index bd8b796ae683..0ef5203387b2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1806,6 +1806,32 @@ static bool dm_table_supports_secure_erase(struct dm_table *t)
 	return true;
 }
 
+static int device_not_atomic_write_capable(struct dm_target *ti,
+			struct dm_dev *dev, sector_t start,
+			sector_t len, void *data)
+{
+	return !bdev_can_atomic_write(dev->bdev);
+}
+
+static bool dm_table_supports_atomic_writes(struct dm_table *t)
+{
+	for (unsigned int i = 0; i < t->num_targets; i++) {
+		struct dm_target *ti = dm_table_get_target(t, i);
+
+		if (!dm_target_supports_atomic_writes(ti->type))
+			return false;
+
+		if (!ti->type->iterate_devices)
+			return false;
+
+		if (ti->type->iterate_devices(ti,
+			device_not_atomic_write_capable, NULL)) {
+			return false;
+		}
+	}
+	return true;
+}
+
 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			      struct queue_limits *limits)
 {
@@ -1854,6 +1880,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			return r;
 	}
 
+	if (dm_table_supports_atomic_writes(t))
+		limits->features |= BLK_FEAT_ATOMIC_WRITES;
+
 	r = queue_limits_set(q, limits);
 	if (r)
 		return r;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 8321f65897f3..bcc6d7b69470 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -299,6 +299,9 @@ struct target_type {
 #define dm_target_supports_mixed_zoned_model(type) (false)
 #endif
 
+#define DM_TARGET_ATOMIC_WRITES		0x00000400
+#define dm_target_supports_atomic_writes(type) ((type)->features & DM_TARGET_ATOMIC_WRITES)
+
 struct dm_target {
 	struct dm_table *table;
 	struct target_type *type;
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 1990b5700f69..b08c7378164d 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -286,9 +286,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	48
+#define DM_VERSION_MINOR	49
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2023-03-01)"
+#define DM_VERSION_EXTRA	"-ioctl (2025-01-17)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3


From 6a128cdf1926b20a94d6af7d7d03b76ba19a4f8b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 16 Jan 2025 12:46:25 +0200
Subject: net: ethtool: ts: add separate counter for unconfirmed one-step TX
 timestamps

For packets with two-step timestamp requests, the hardware timestamp
comes back to the driver through a confirmation mechanism of sorts,
which allows the driver to confidently bump the successful "pkts"
counter.

For one-step PTP, the NIC is supposed to autonomously insert its
hardware TX timestamp in the packet headers while simultaneously
transmitting it. There may be a confirmation that this was done
successfully, or there may not.

None of the current drivers which implement ethtool_ops :: get_ts_stats()
also support HWTSTAMP_TX_ONESTEP_SYNC or HWTSTAMP_TX_ONESTEP_SYNC, so it
is a bit unclear which model to follow. But there are NICs, such as DSA,
where there is no transmit confirmation at all. Here, it would be wrong /
misleading to increment the successful "pkts" counter, because one-step
PTP packets can be dropped on TX just like any other packets.

So introduce a special counter which signifies "yes, an attempt was made,
but we don't know whether it also exited the port or not". I expect that
for one-step PTP packets where a confirmation is available, the "pkts"
counter would be bumped.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20250116104628.123555-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml       |  3 +++
 Documentation/networking/ethtool-netlink.rst   | 16 +++++++++++-----
 include/linux/ethtool.h                        |  7 +++++++
 include/uapi/linux/ethtool_netlink_generated.h |  1 +
 net/ethtool/tsinfo.c                           |  2 ++
 5 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 66be04013048..259cb211a338 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -842,6 +842,9 @@ attribute-sets:
       -
         name: tx-err
         type: uint
+      -
+        name: tx-onestep-pkts-unconfirmed
+        type: uint
   -
     name: ts-hwtstamp-provider
     attr-cnt-name: __ethtool-a-ts-hwtstamp-provider-cnt
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index f70c0249860c..3770a2294509 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1281,11 +1281,17 @@ would be empty (no bit set).
 
 Additional hardware timestamping statistics response contents:
 
-  =====================================  ======  ===================================
-  ``ETHTOOL_A_TS_STAT_TX_PKTS``          uint    Packets with Tx HW timestamps
-  ``ETHTOOL_A_TS_STAT_TX_LOST``          uint    Tx HW timestamp not arrived count
-  ``ETHTOOL_A_TS_STAT_TX_ERR``           uint    HW error request Tx timestamp count
-  =====================================  ======  ===================================
+  ==================================================  ======  =====================
+  ``ETHTOOL_A_TS_STAT_TX_PKTS``                       uint    Packets with Tx
+                                                              HW timestamps
+  ``ETHTOOL_A_TS_STAT_TX_LOST``                       uint    Tx HW timestamp
+                                                              not arrived count
+  ``ETHTOOL_A_TS_STAT_TX_ERR``                        uint    HW error request
+                                                              Tx timestamp count
+  ``ETHTOOL_A_TS_STAT_TX_ONESTEP_PKTS_UNCONFIRMED``   uint    Packets with one-step
+                                                              HW TX timestamps with
+                                                              unconfirmed delivery
+  ==================================================  ======  =====================
 
 CABLE_TEST
 ==========
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e4136b0df892..64301ddf2f59 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -559,6 +559,12 @@ struct ethtool_rmon_stats {
 /**
  * struct ethtool_ts_stats - HW timestamping statistics
  * @pkts: Number of packets successfully timestamped by the hardware.
+ * @onestep_pkts_unconfirmed: Number of PTP packets with one-step TX
+ *			      timestamping that were sent, but for which the
+ *			      device offers no confirmation whether they made
+ *			      it onto the wire and the timestamp was inserted
+ *			      in the originTimestamp or correctionField, or
+ *			      not.
  * @lost: Number of hardware timestamping requests where the timestamping
  *	information from the hardware never arrived for submission with
  *	the skb.
@@ -571,6 +577,7 @@ struct ethtool_rmon_stats {
 struct ethtool_ts_stats {
 	struct_group(tx_stats,
 		u64 pkts;
+		u64 onestep_pkts_unconfirmed;
 		u64 lost;
 		u64 err;
 	);
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index 2e17ff348f89..fe24c3459ac0 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -382,6 +382,7 @@ enum {
 	ETHTOOL_A_TS_STAT_TX_PKTS,
 	ETHTOOL_A_TS_STAT_TX_LOST,
 	ETHTOOL_A_TS_STAT_TX_ERR,
+	ETHTOOL_A_TS_STAT_TX_ONESTEP_PKTS_UNCONFIRMED,
 
 	__ETHTOOL_A_TS_STAT_CNT,
 	ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1)
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 7e495a41aeec..691be6c445b3 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -186,6 +186,8 @@ static int tsinfo_put_stats(struct sk_buff *skb,
 
 	if (tsinfo_put_stat(skb, stats->tx_stats.pkts,
 			    ETHTOOL_A_TS_STAT_TX_PKTS) ||
+	    tsinfo_put_stat(skb, stats->tx_stats.onestep_pkts_unconfirmed,
+			    ETHTOOL_A_TS_STAT_TX_ONESTEP_PKTS_UNCONFIRMED) ||
 	    tsinfo_put_stat(skb, stats->tx_stats.lost,
 			    ETHTOOL_A_TS_STAT_TX_LOST) ||
 	    tsinfo_put_stat(skb, stats->tx_stats.err,
-- 
cgit v1.2.3


From 46757a3e7d50dac923888e7fbe68377736f13c70 Mon Sep 17 00:00:00 2001
From: "Geoffrey D. Bennett" <g@b4.vu>
Date: Fri, 17 Jan 2025 04:17:38 +1030
Subject: ALSA: FCP: Add Focusrite Control Protocol driver

Add a new kernel driver for the Focusrite Control Protocol (FCP),
which is used by Focusrite Scarlett 2nd Gen, 3rd Gen, 4th Gen, Clarett
USB, Clarett+, and Vocaster series audio interfaces. This driver
provides a user-space control interface via ALSA's hwdep subsystem.

Unlike the existing Scarlett2 driver which implements all ALSA
controls in kernel space, this new FCP driver takes a different
approach by providing a minimal kernel interface that allows a
user-space driver to send FCP commands and receive notifications. The
only control implemented in kernel space is the Level Meter, since it
requires frequent polling of volatile data.

While this driver supports all interfaces that the Scarlett2 driver
works with, it is initially enabled only for 4th Gen 16i16, 18i16,
and 18i20 interfaces that are not supported by the Scarlett2 driver.

Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Link: https://patch.msgid.link/597741a9b1198b965561547511d3d345f91cba20.1737048528.git.g@b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 MAINTAINERS              |   10 +-
 include/uapi/sound/fcp.h |  120 +++++
 include/uapi/sound/tlv.h |    2 +
 sound/usb/Makefile       |    1 +
 sound/usb/fcp.c          | 1134 ++++++++++++++++++++++++++++++++++++++++++++++
 sound/usb/fcp.h          |    7 +
 sound/usb/mixer_quirks.c |    7 +
 7 files changed, 1277 insertions(+), 4 deletions(-)
 create mode 100644 include/uapi/sound/fcp.h
 create mode 100644 sound/usb/fcp.c
 create mode 100644 sound/usb/fcp.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index baf0eeb9a355..336be70068e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8951,14 +8951,16 @@ L:	linux-input@vger.kernel.org
 S:	Maintained
 F:	drivers/input/joystick/fsia6b.c
 
-FOCUSRITE SCARLETT2 MIXER DRIVER (Scarlett Gen 2+ and Clarett)
+FOCUSRITE CONTROL PROTOCOL/SCARLETT2 MIXER DRIVERS (Scarlett Gen 2+, Clarett, and Vocaster)
 M:	Geoffrey D. Bennett <g@b4.vu>
 L:	linux-sound@vger.kernel.org
 S:	Maintained
-W:	https://github.com/geoffreybennett/scarlett-gen2
-B:	https://github.com/geoffreybennett/scarlett-gen2/issues
-T:	git https://github.com/geoffreybennett/scarlett-gen2.git
+W:	https://github.com/geoffreybennett/linux-fcp
+B:	https://github.com/geoffreybennett/linux-fcp/issues
+T:	git https://github.com/geoffreybennett/linux-fcp.git
+F:	include/uapi/sound/fcp.h
 F:	include/uapi/sound/scarlett2.h
+F:	sound/usb/fcp.c
 F:	sound/usb/mixer_scarlett2.c
 
 FORCEDETH GIGABIT ETHERNET DRIVER
diff --git a/include/uapi/sound/fcp.h b/include/uapi/sound/fcp.h
new file mode 100644
index 000000000000..aea428956d8f
--- /dev/null
+++ b/include/uapi/sound/fcp.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Focusrite Control Protocol Driver for ALSA
+ *
+ * Copyright (c) 2024-2025 by Geoffrey D. Bennett <g at b4.vu>
+ */
+/*
+ * DOC: FCP (Focusrite Control Protocol) User-Space API
+ *
+ * This header defines the interface between the FCP kernel driver and
+ * user-space programs to enable the use of the proprietary features
+ * available in Focusrite USB audio interfaces. This includes Scarlett
+ * 2nd Gen, 3rd Gen, 4th Gen, Clarett USB, Clarett+, and Vocaster
+ * series devices.
+ *
+ * The interface is provided via ALSA's hwdep interface. Opening the
+ * hwdep device requires CAP_SYS_RAWIO privileges as this interface
+ * provides near-direct access.
+ *
+ * For details on the FCP protocol, refer to the kernel scarlett2
+ * driver in sound/usb/mixer_scarlett2.c and the fcp-support project
+ * at https://github.com/geoffreybennett/fcp-support
+ *
+ * For examples of using these IOCTLs, see the fcp-server source in
+ * the fcp-support project.
+ *
+ * IOCTL Interface
+ * --------------
+ * FCP_IOCTL_PVERSION:
+ *   Returns the protocol version supported by the driver.
+ *
+ * FCP_IOCTL_INIT:
+ *   Initialises the protocol and synchronises sequence numbers
+ *   between the driver and device. Must be called at least once
+ *   before sending commands. Can be safely called again at any time.
+ *
+ * FCP_IOCTL_CMD:
+ *   Sends an FCP command to the device and returns the response.
+ *   Requires prior initialisation via FCP_IOCTL_INIT.
+ *
+ * FCP_IOCTL_SET_METER_MAP:
+ *   Configures the Level Meter control's mapping between device
+ *   meters and control channels. Requires FCP_IOCTL_INIT to have been
+ *   called first. The map size and number of slots cannot be changed
+ *   after initial configuration, although the map itself can be
+ *   updated. Once configured, the Level Meter remains functional even
+ *   after the hwdep device is closed.
+ *
+ * FCP_IOCTL_SET_METER_LABELS:
+ *   Set the labels for the Level Meter control. Requires
+ *   FCP_IOCTL_SET_METER_MAP to have been called first. labels[]
+ *   should contain a sequence of null-terminated labels corresponding
+ *   to the control's channels.
+ */
+#ifndef __UAPI_SOUND_FCP_H
+#define __UAPI_SOUND_FCP_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define FCP_HWDEP_MAJOR 2
+#define FCP_HWDEP_MINOR 0
+#define FCP_HWDEP_SUBMINOR 0
+
+#define FCP_HWDEP_VERSION \
+	((FCP_HWDEP_MAJOR << 16) | \
+	 (FCP_HWDEP_MINOR << 8) | \
+	  FCP_HWDEP_SUBMINOR)
+
+#define FCP_HWDEP_VERSION_MAJOR(v) (((v) >> 16) & 0xFF)
+#define FCP_HWDEP_VERSION_MINOR(v) (((v) >> 8) & 0xFF)
+#define FCP_HWDEP_VERSION_SUBMINOR(v) ((v) & 0xFF)
+
+/* Get protocol version */
+#define FCP_IOCTL_PVERSION _IOR('S', 0x60, int)
+
+/* Start the protocol */
+
+/* Step 0 and step 2 responses are variable length and placed in
+ * resp[] one after the other.
+ */
+struct fcp_init {
+	__u16 step0_resp_size;
+	__u16 step2_resp_size;
+	__u32 init1_opcode;
+	__u32 init2_opcode;
+	__u8  resp[];
+} __attribute__((packed));
+
+#define FCP_IOCTL_INIT _IOWR('S', 0x64, struct fcp_init)
+
+/* Perform a command */
+
+/* The request data is placed in data[] and the response data will
+ * overwrite it.
+ */
+struct fcp_cmd {
+	__u32 opcode;
+	__u16 req_size;
+	__u16 resp_size;
+	__u8  data[];
+} __attribute__((packed));
+#define FCP_IOCTL_CMD _IOWR('S', 0x65, struct fcp_cmd)
+
+/* Set the meter map */
+struct fcp_meter_map {
+	__u16 map_size;
+	__u16 meter_slots;
+	__s16 map[];
+} __attribute__((packed));
+#define FCP_IOCTL_SET_METER_MAP _IOW('S', 0x66, struct fcp_meter_map)
+
+/* Set the meter labels */
+struct fcp_meter_labels {
+	__u16 labels_size;
+	char  labels[];
+} __attribute__((packed));
+#define FCP_IOCTL_SET_METER_LABELS _IOW('S', 0x67, struct fcp_meter_labels)
+
+#endif /* __UAPI_SOUND_FCP_H */
diff --git a/include/uapi/sound/tlv.h b/include/uapi/sound/tlv.h
index b99a2414b53d..5bb55c80e095 100644
--- a/include/uapi/sound/tlv.h
+++ b/include/uapi/sound/tlv.h
@@ -18,6 +18,8 @@
 #define SNDRV_CTL_TLVT_CHMAP_VAR	0x102	/* channels freely swappable */
 #define SNDRV_CTL_TLVT_CHMAP_PAIRED	0x103	/* pair-wise swappable */
 
+#define SNDRV_CTL_TLVT_FCP_CHANNEL_LABELS	0x110	/* channel labels */
+
 /*
  * TLV structure is right behind the struct snd_ctl_tlv:
  *   unsigned int type  	- see SNDRV_CTL_TLVT_*
diff --git a/sound/usb/Makefile b/sound/usb/Makefile
index 0532499dbc6d..30bd5348477b 100644
--- a/sound/usb/Makefile
+++ b/sound/usb/Makefile
@@ -6,6 +6,7 @@
 snd-usb-audio-y := 	card.o \
 			clock.o \
 			endpoint.o \
+			fcp.o \
 			format.o \
 			helper.o \
 			implicit.o \
diff --git a/sound/usb/fcp.c b/sound/usb/fcp.c
new file mode 100644
index 000000000000..655e4f7338ed
--- /dev/null
+++ b/sound/usb/fcp.c
@@ -0,0 +1,1134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Focusrite Control Protocol Driver for ALSA
+ *
+ * Copyright (c) 2024-2025 by Geoffrey D. Bennett <g at b4.vu>
+ */
+/*
+ * DOC: Theory of Operation
+ *
+ * The Focusrite Control Protocol (FCP) driver provides a minimal
+ * kernel interface that allows a user-space driver (primarily
+ * fcp-server) to communicate with Focusrite USB audio interfaces
+ * using their vendor-specific protocol. This protocol is used by
+ * Scarlett 2nd Gen, 3rd Gen, 4th Gen, Clarett USB, Clarett+, and
+ * Vocaster series devices.
+ *
+ * Unlike the existing scarlett2 driver which implements all controls
+ * in kernel space, this driver takes a lighter-weight approach by
+ * moving most functionality to user space. The only control
+ * implemented in kernel space is the Level Meter, since it requires
+ * frequent polling of volatile data.
+ *
+ * The driver provides an hwdep interface that allows the user-space
+ * driver to:
+ *  - Initialise the protocol
+ *  - Send arbitrary FCP commands to the device
+ *  - Receive notifications from the device
+ *  - Configure the Level Meter control
+ *
+ * Usage Flow
+ * ----------
+ * 1. Open the hwdep device (requires CAP_SYS_RAWIO)
+ * 2. Get protocol version using FCP_IOCTL_PVERSION
+ * 3. Initialise protocol using FCP_IOCTL_INIT
+ * 4. Send commands using FCP_IOCTL_CMD
+ * 5. Receive notifications using read()
+ * 6. Optionally set up the Level Meter control using
+ *    FCP_IOCTL_SET_METER_MAP
+ * 7. Optionally add labels to the Level Meter control using
+ *    FCP_IOCTL_SET_METER_LABELS
+ *
+ * Level Meter
+ * -----------
+ * The Level Meter is implemented as an ALSA control that provides
+ * real-time level monitoring. When the control is read, the driver
+ * requests the current meter levels from the device, translates the
+ * levels using the configured mapping, and returns the result to the
+ * user. The mapping between device meters and the ALSA control's
+ * channels is configured with FCP_IOCTL_SET_METER_MAP.
+ *
+ * Labels for the Level Meter channels can be set using
+ * FCP_IOCTL_SET_METER_LABELS and read by applications through the
+ * control's TLV data. The labels are transferred as a sequence of
+ * null-terminated strings.
+ */
+
+#include <linux/slab.h>
+#include <linux/usb.h>
+
+#include <sound/control.h>
+#include <sound/hwdep.h>
+#include <sound/tlv.h>
+
+#include <uapi/sound/fcp.h>
+
+#include "usbaudio.h"
+#include "mixer.h"
+#include "helper.h"
+
+#include "fcp.h"
+
+/* notify waiting to send to *file */
+struct fcp_notify {
+	wait_queue_head_t queue;
+	u32               event;
+	spinlock_t        lock;
+};
+
+struct fcp_data {
+	struct usb_mixer_interface *mixer;
+
+	struct mutex mutex;         /* serialise access to the device */
+	struct completion cmd_done; /* wait for command completion */
+	struct file *file;          /* hwdep file */
+
+	struct fcp_notify notify;
+
+	u8  bInterfaceNumber;
+	u8  bEndpointAddress;
+	u16 wMaxPacketSize;
+	u8  bInterval;
+
+	uint16_t step0_resp_size;
+	uint16_t step2_resp_size;
+	uint32_t init1_opcode;
+	uint32_t init2_opcode;
+
+	u8  init;
+	u16 seq;
+
+	u8                   num_meter_slots;
+	s16                 *meter_level_map;
+	u32                 *meter_levels;
+	struct snd_kcontrol *meter_ctl;
+
+	unsigned int *meter_labels_tlv;
+	int           meter_labels_tlv_size;
+};
+
+/*** USB Interactions ***/
+
+/* FCP Command ACK notification bit */
+#define FCP_NOTIFY_ACK 1
+
+/* Vendor-specific USB control requests */
+#define FCP_USB_REQ_STEP0  0
+#define FCP_USB_REQ_CMD_TX 2
+#define FCP_USB_REQ_CMD_RX 3
+
+/* Focusrite Control Protocol opcodes that the kernel side needs to
+ * know about
+ */
+#define FCP_USB_REBOOT      0x00000003
+#define FCP_USB_GET_METER   0x00001001
+#define FCP_USB_FLASH_ERASE 0x00004002
+#define FCP_USB_FLASH_WRITE 0x00004004
+
+#define FCP_USB_METER_LEVELS_GET_MAGIC 1
+
+#define FCP_SEGMENT_APP_GOLD 0
+
+/* Forward declarations */
+static int fcp_init(struct usb_mixer_interface *mixer,
+		    void *step0_resp, void *step2_resp);
+
+/* FCP command request/response format */
+struct fcp_usb_packet {
+	__le32 opcode;
+	__le16 size;
+	__le16 seq;
+	__le32 error;
+	__le32 pad;
+	u8 data[];
+};
+
+static void fcp_fill_request_header(struct fcp_data *private,
+				    struct fcp_usb_packet *req,
+				    u32 opcode, u16 req_size)
+{
+	/* sequence must go up by 1 for each request */
+	u16 seq = private->seq++;
+
+	req->opcode = cpu_to_le32(opcode);
+	req->size = cpu_to_le16(req_size);
+	req->seq = cpu_to_le16(seq);
+	req->error = 0;
+	req->pad = 0;
+}
+
+static int fcp_usb_tx(struct usb_device *dev, int interface,
+		      void *buf, u16 size)
+{
+	return snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0),
+			FCP_USB_REQ_CMD_TX,
+			USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT,
+			0, interface, buf, size);
+}
+
+static int fcp_usb_rx(struct usb_device *dev, int interface,
+		      void *buf, u16 size)
+{
+	return snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0),
+			FCP_USB_REQ_CMD_RX,
+			USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+			0, interface, buf, size);
+}
+
+/* Send an FCP command and get the response */
+static int fcp_usb(struct usb_mixer_interface *mixer, u32 opcode,
+		   const void *req_data, u16 req_size,
+		   void *resp_data, u16 resp_size)
+{
+	struct fcp_data *private = mixer->private_data;
+	struct usb_device *dev = mixer->chip->dev;
+	struct fcp_usb_packet *req __free(kfree) = NULL;
+	struct fcp_usb_packet *resp __free(kfree) = NULL;
+	size_t req_buf_size = struct_size(req, data, req_size);
+	size_t resp_buf_size = struct_size(resp, data, resp_size);
+	int retries = 0;
+	const int max_retries = 5;
+	int err;
+
+	if (!mixer->urb)
+		return -ENODEV;
+
+	req = kmalloc(req_buf_size, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	resp = kmalloc(resp_buf_size, GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
+
+	/* build request message */
+	fcp_fill_request_header(private, req, opcode, req_size);
+	if (req_size)
+		memcpy(req->data, req_data, req_size);
+
+	/* send the request and retry on EPROTO */
+retry:
+	err = fcp_usb_tx(dev, private->bInterfaceNumber, req, req_buf_size);
+	if (err == -EPROTO && ++retries <= max_retries) {
+		msleep(1 << (retries - 1));
+		goto retry;
+	}
+
+	if (err != req_buf_size) {
+		usb_audio_err(mixer->chip,
+			      "FCP request %08x failed: %d\n", opcode, err);
+		return -EINVAL;
+	}
+
+	if (!wait_for_completion_timeout(&private->cmd_done,
+					 msecs_to_jiffies(1000))) {
+		usb_audio_err(mixer->chip,
+			      "FCP request %08x timed out\n", opcode);
+
+		return -ETIMEDOUT;
+	}
+
+	/* send a second message to get the response */
+	err = fcp_usb_rx(dev, private->bInterfaceNumber, resp, resp_buf_size);
+
+	/* validate the response */
+
+	if (err < 0) {
+
+		/* ESHUTDOWN and EPROTO are valid responses to a
+		 * reboot request
+		 */
+		if (opcode == FCP_USB_REBOOT &&
+		    (err == -ESHUTDOWN || err == -EPROTO))
+			return 0;
+
+		usb_audio_err(mixer->chip,
+			      "FCP read response %08x failed: %d\n",
+			      opcode, err);
+		return -EINVAL;
+	}
+
+	if (err < sizeof(*resp)) {
+		usb_audio_err(mixer->chip,
+			      "FCP response %08x too short: %d\n",
+			      opcode, err);
+		return -EINVAL;
+	}
+
+	if (req->seq != resp->seq) {
+		usb_audio_err(mixer->chip,
+			      "FCP response %08x seq mismatch %d/%d\n",
+			      opcode,
+			      le16_to_cpu(req->seq), le16_to_cpu(resp->seq));
+		return -EINVAL;
+	}
+
+	if (req->opcode != resp->opcode) {
+		usb_audio_err(mixer->chip,
+			      "FCP response %08x opcode mismatch %08x\n",
+			      opcode, le16_to_cpu(resp->opcode));
+		return -EINVAL;
+	}
+
+	if (resp->error) {
+		usb_audio_err(mixer->chip,
+			      "FCP response %08x error %d\n",
+			      opcode, le32_to_cpu(resp->error));
+		return -EINVAL;
+	}
+
+	if (err != resp_buf_size) {
+		usb_audio_err(mixer->chip,
+			      "FCP response %08x buffer size mismatch %d/%zu\n",
+			      opcode, err, resp_buf_size);
+		return -EINVAL;
+	}
+
+	if (resp_size != le16_to_cpu(resp->size)) {
+		usb_audio_err(mixer->chip,
+			      "FCP response %08x size mismatch %d/%d\n",
+			      opcode, resp_size, le16_to_cpu(resp->size));
+		return -EINVAL;
+	}
+
+	if (resp_data && resp_size > 0)
+		memcpy(resp_data, resp->data, resp_size);
+
+	return 0;
+}
+
+static int fcp_reinit(struct usb_mixer_interface *mixer)
+{
+	struct fcp_data *private = mixer->private_data;
+	void *step0_resp __free(kfree) = NULL;
+	void *step2_resp __free(kfree) = NULL;
+
+	if (mixer->urb)
+		return 0;
+
+	step0_resp = kmalloc(private->step0_resp_size, GFP_KERNEL);
+	if (!step0_resp)
+		return -ENOMEM;
+	step2_resp = kmalloc(private->step2_resp_size, GFP_KERNEL);
+	if (!step2_resp)
+		return -ENOMEM;
+
+	return fcp_init(mixer, step0_resp, step2_resp);
+}
+
+/*** Control Functions ***/
+
+/* helper function to create a new control */
+static int fcp_add_new_ctl(struct usb_mixer_interface *mixer,
+			   const struct snd_kcontrol_new *ncontrol,
+			   int index, int channels, const char *name,
+			   struct snd_kcontrol **kctl_return)
+{
+	struct snd_kcontrol *kctl;
+	struct usb_mixer_elem_info *elem;
+	int err;
+
+	elem = kzalloc(sizeof(*elem), GFP_KERNEL);
+	if (!elem)
+		return -ENOMEM;
+
+	/* We set USB_MIXER_BESPOKEN type, so that the core USB mixer code
+	 * ignores them for resume and other operations.
+	 * Also, the head.id field is set to 0, as we don't use this field.
+	 */
+	elem->head.mixer = mixer;
+	elem->control = index;
+	elem->head.id = 0;
+	elem->channels = channels;
+	elem->val_type = USB_MIXER_BESPOKEN;
+
+	kctl = snd_ctl_new1(ncontrol, elem);
+	if (!kctl) {
+		kfree(elem);
+		return -ENOMEM;
+	}
+	kctl->private_free = snd_usb_mixer_elem_free;
+
+	strscpy(kctl->id.name, name, sizeof(kctl->id.name));
+
+	err = snd_usb_mixer_add_control(&elem->head, kctl);
+	if (err < 0)
+		return err;
+
+	if (kctl_return)
+		*kctl_return = kctl;
+
+	return 0;
+}
+
+/*** Level Meter Control ***/
+
+static int fcp_meter_ctl_info(struct snd_kcontrol *kctl,
+			      struct snd_ctl_elem_info *uinfo)
+{
+	struct usb_mixer_elem_info *elem = kctl->private_data;
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = elem->channels;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 4095;
+	uinfo->value.integer.step = 1;
+	return 0;
+}
+
+static int fcp_meter_ctl_get(struct snd_kcontrol *kctl,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	struct usb_mixer_elem_info *elem = kctl->private_data;
+	struct usb_mixer_interface *mixer = elem->head.mixer;
+	struct fcp_data *private = mixer->private_data;
+	int num_meter_slots, resp_size;
+	u32 *resp = private->meter_levels;
+	int i, err = 0;
+
+	struct {
+		__le16 pad;
+		__le16 num_meters;
+		__le32 magic;
+	} __packed req;
+
+	guard(mutex)(&private->mutex);
+
+	err = fcp_reinit(mixer);
+	if (err < 0)
+		return err;
+
+	num_meter_slots = private->num_meter_slots;
+	resp_size = num_meter_slots * sizeof(u32);
+
+	req.pad = 0;
+	req.num_meters = cpu_to_le16(num_meter_slots);
+	req.magic = cpu_to_le32(FCP_USB_METER_LEVELS_GET_MAGIC);
+	err = fcp_usb(mixer, FCP_USB_GET_METER,
+		      &req, sizeof(req), resp, resp_size);
+	if (err < 0)
+		return err;
+
+	/* copy & translate from resp[] using meter_level_map[] */
+	for (i = 0; i < elem->channels; i++) {
+		int idx = private->meter_level_map[i];
+		int value = idx < 0 ? 0 : le32_to_cpu(resp[idx]);
+
+		ucontrol->value.integer.value[i] = value;
+	}
+
+	return 0;
+}
+
+static int fcp_meter_tlv_callback(struct snd_kcontrol *kctl,
+				  int op_flag, unsigned int size,
+				  unsigned int __user *tlv)
+{
+	struct usb_mixer_elem_info *elem = kctl->private_data;
+	struct usb_mixer_interface *mixer = elem->head.mixer;
+	struct fcp_data *private = mixer->private_data;
+
+	guard(mutex)(&private->mutex);
+
+	if (op_flag == SNDRV_CTL_TLV_OP_READ) {
+		if (private->meter_labels_tlv_size == 0)
+			return 0;
+
+		if (size > private->meter_labels_tlv_size)
+			size = private->meter_labels_tlv_size;
+
+		if (copy_to_user(tlv, private->meter_labels_tlv, size))
+			return -EFAULT;
+
+		return size;
+	}
+
+	return -EINVAL;
+}
+
+static const struct snd_kcontrol_new fcp_meter_ctl = {
+	.iface  = SNDRV_CTL_ELEM_IFACE_PCM,
+	.access = SNDRV_CTL_ELEM_ACCESS_READ |
+		  SNDRV_CTL_ELEM_ACCESS_VOLATILE,
+	.info = fcp_meter_ctl_info,
+	.get  = fcp_meter_ctl_get,
+	.tlv  = { .c = fcp_meter_tlv_callback },
+};
+
+/*** hwdep interface ***/
+
+/* FCP initialisation */
+static int fcp_ioctl_init(struct usb_mixer_interface *mixer,
+			  struct fcp_init __user *arg)
+{
+	struct fcp_init init;
+	struct usb_device *dev = mixer->chip->dev;
+	struct fcp_data *private = mixer->private_data;
+	void *resp __free(kfree) = NULL;
+	void *step2_resp;
+	int err, buf_size;
+
+	if (usb_pipe_type_check(dev, usb_sndctrlpipe(dev, 0)))
+		return -EINVAL;
+
+	/* Get initialisation parameters */
+	if (copy_from_user(&init, arg, sizeof(init)))
+		return -EFAULT;
+
+	/* Validate the response sizes */
+	if (init.step0_resp_size < 1 ||
+	    init.step0_resp_size > 255 ||
+	    init.step2_resp_size < 1 ||
+	    init.step2_resp_size > 255)
+		return -EINVAL;
+
+	/* Allocate response buffer */
+	buf_size = init.step0_resp_size + init.step2_resp_size;
+
+	resp = kmalloc(buf_size, GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
+
+	private->step0_resp_size = init.step0_resp_size;
+	private->step2_resp_size = init.step2_resp_size;
+	private->init1_opcode = init.init1_opcode;
+	private->init2_opcode = init.init2_opcode;
+
+	step2_resp = resp + private->step0_resp_size;
+
+	err = fcp_init(mixer, resp, step2_resp);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(arg->resp, resp, buf_size))
+		return -EFAULT;
+
+	return 0;
+}
+
+/* Check that the command is allowed
+ * Don't permit erasing/writing segment 0 (App_Gold)
+ */
+static int fcp_validate_cmd(u32 opcode, void *data, u16 size)
+{
+	if (opcode == FCP_USB_FLASH_ERASE) {
+		struct {
+			__le32 segment_num;
+			__le32 pad;
+		} __packed *req = data;
+
+		if (size != sizeof(*req))
+			return -EINVAL;
+
+		if (le32_to_cpu(req->segment_num) == FCP_SEGMENT_APP_GOLD)
+			return -EPERM;
+
+		if (req->pad != 0)
+			return -EINVAL;
+
+	} else if (opcode == FCP_USB_FLASH_WRITE) {
+		struct {
+			__le32 segment_num;
+			__le32 offset;
+			__le32 pad;
+			u8 data[];
+		} __packed *req = data;
+
+		if (size < sizeof(*req))
+			return -EINVAL;
+
+		if (le32_to_cpu(req->segment_num) == FCP_SEGMENT_APP_GOLD)
+			return -EPERM;
+
+		if (req->pad != 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Execute an FCP command specified by the user */
+static int fcp_ioctl_cmd(struct usb_mixer_interface *mixer,
+			 struct fcp_cmd __user *arg)
+{
+	struct fcp_cmd cmd;
+	int err, buf_size;
+	void *data __free(kfree) = NULL;
+
+	/* get opcode and request/response size */
+	if (copy_from_user(&cmd, arg, sizeof(cmd)))
+		return -EFAULT;
+
+	/* validate request and response sizes */
+	if (cmd.req_size > 4096 || cmd.resp_size > 4096)
+		return -EINVAL;
+
+	/* reinit if needed */
+	err = fcp_reinit(mixer);
+	if (err < 0)
+		return err;
+
+	/* allocate request/response buffer */
+	buf_size = max(cmd.req_size, cmd.resp_size);
+
+	if (buf_size > 0) {
+		data = kmalloc(buf_size, GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+	}
+
+	/* copy request from user */
+	if (cmd.req_size > 0)
+		if (copy_from_user(data, arg->data, cmd.req_size))
+			return -EFAULT;
+
+	/* check that the command is allowed */
+	err = fcp_validate_cmd(cmd.opcode, data, cmd.req_size);
+	if (err < 0)
+		return err;
+
+	/* send request, get response */
+	err = fcp_usb(mixer, cmd.opcode,
+		      data, cmd.req_size, data, cmd.resp_size);
+	if (err < 0)
+		return err;
+
+	/* copy response to user */
+	if (cmd.resp_size > 0)
+		if (copy_to_user(arg->data, data, cmd.resp_size))
+			return -EFAULT;
+
+	return 0;
+}
+
+/* Validate the Level Meter map passed by the user */
+static int validate_meter_map(const s16 *map, int map_size, int meter_slots)
+{
+	int i;
+
+	for (i = 0; i < map_size; i++)
+		if (map[i] < -1 || map[i] >= meter_slots)
+			return -EINVAL;
+
+	return 0;
+}
+
+/* Set the Level Meter map and add the control */
+static int fcp_ioctl_set_meter_map(struct usb_mixer_interface *mixer,
+				   struct fcp_meter_map __user *arg)
+{
+	struct fcp_meter_map map;
+	struct fcp_data *private = mixer->private_data;
+	s16 *tmp_map __free(kfree) = NULL;
+	int err;
+
+	if (copy_from_user(&map, arg, sizeof(map)))
+		return -EFAULT;
+
+	/* Don't allow changing the map size or meter slots once set */
+	if (private->meter_ctl) {
+		struct usb_mixer_elem_info *elem =
+			private->meter_ctl->private_data;
+
+		if (map.map_size != elem->channels ||
+		    map.meter_slots != private->num_meter_slots)
+			return -EINVAL;
+	}
+
+	/* Validate the map size */
+	if (map.map_size < 1 || map.map_size > 255 ||
+	    map.meter_slots < 1 || map.meter_slots > 255)
+		return -EINVAL;
+
+	/* Allocate and copy the map data */
+	tmp_map = kmalloc_array(map.map_size, sizeof(s16), GFP_KERNEL);
+	if (!tmp_map)
+		return -ENOMEM;
+
+	if (copy_from_user(tmp_map, arg->map, map.map_size * sizeof(s16)))
+		return -EFAULT;
+
+	err = validate_meter_map(tmp_map, map.map_size, map.meter_slots);
+	if (err < 0)
+		return err;
+
+	/* If the control doesn't exist, create it */
+	if (!private->meter_ctl) {
+		s16 *new_map __free(kfree) = NULL;
+		u32 *meter_levels __free(kfree) = NULL;
+
+		/* Allocate buffer for the map */
+		new_map = kmalloc_array(map.map_size, sizeof(s16), GFP_KERNEL);
+		if (!new_map)
+			return -ENOMEM;
+
+		/* Allocate buffer for reading meter levels */
+		meter_levels = kmalloc_array(map.meter_slots, sizeof(u32),
+					     GFP_KERNEL);
+		if (!meter_levels)
+			return -ENOMEM;
+
+		/* Create the Level Meter control */
+		err = fcp_add_new_ctl(mixer, &fcp_meter_ctl, 0, map.map_size,
+				      "Level Meter", &private->meter_ctl);
+		if (err < 0)
+			return err;
+
+		/* Success; save the pointers in private and don't free them */
+		private->meter_level_map = new_map;
+		private->meter_levels = meter_levels;
+		private->num_meter_slots = map.meter_slots;
+		new_map = NULL;
+		meter_levels = NULL;
+	}
+
+	/* Install the new map */
+	memcpy(private->meter_level_map, tmp_map, map.map_size * sizeof(s16));
+
+	return 0;
+}
+
+/* Set the Level Meter labels */
+static int fcp_ioctl_set_meter_labels(struct usb_mixer_interface *mixer,
+				      struct fcp_meter_labels __user *arg)
+{
+	struct fcp_meter_labels labels;
+	struct fcp_data *private = mixer->private_data;
+	unsigned int *tlv_data;
+	unsigned int tlv_size, data_size;
+
+	if (copy_from_user(&labels, arg, sizeof(labels)))
+		return -EFAULT;
+
+	/* Remove existing labels if size is zero */
+	if (!labels.labels_size) {
+
+		/* Clear TLV read/callback bits if labels were present */
+		if (private->meter_labels_tlv) {
+			private->meter_ctl->vd[0].access &=
+				~(SNDRV_CTL_ELEM_ACCESS_TLV_READ |
+				  SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK);
+			snd_ctl_notify(mixer->chip->card,
+				       SNDRV_CTL_EVENT_MASK_INFO,
+				       &private->meter_ctl->id);
+		}
+
+		kfree(private->meter_labels_tlv);
+		private->meter_labels_tlv = NULL;
+		private->meter_labels_tlv_size = 0;
+
+		return 0;
+	}
+
+	/* Validate size */
+	if (labels.labels_size > 4096)
+		return -EINVAL;
+
+	/* Calculate padded data size */
+	data_size = ALIGN(labels.labels_size, sizeof(unsigned int));
+
+	/* Calculate total TLV size including header */
+	tlv_size = sizeof(unsigned int) * 2 + data_size;
+
+	/* Allocate, set up TLV header, and copy the labels data */
+	tlv_data = kzalloc(tlv_size, GFP_KERNEL);
+	if (!tlv_data)
+		return -ENOMEM;
+	tlv_data[0] = SNDRV_CTL_TLVT_FCP_CHANNEL_LABELS;
+	tlv_data[1] = data_size;
+	if (copy_from_user(&tlv_data[2], arg->labels, labels.labels_size)) {
+		kfree(tlv_data);
+		return -EFAULT;
+	}
+
+	/* Set TLV read/callback bits if labels weren't present */
+	if (!private->meter_labels_tlv) {
+		private->meter_ctl->vd[0].access |=
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ |
+			SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
+		snd_ctl_notify(mixer->chip->card,
+			       SNDRV_CTL_EVENT_MASK_INFO,
+			       &private->meter_ctl->id);
+	}
+
+	/* Swap in the new labels */
+	kfree(private->meter_labels_tlv);
+	private->meter_labels_tlv = tlv_data;
+	private->meter_labels_tlv_size = tlv_size;
+
+	return 0;
+}
+
+static int fcp_hwdep_open(struct snd_hwdep *hw, struct file *file)
+{
+	struct usb_mixer_interface *mixer = hw->private_data;
+	struct fcp_data *private = mixer->private_data;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	private->file = file;
+
+	return 0;
+}
+
+static int fcp_hwdep_ioctl(struct snd_hwdep *hw, struct file *file,
+			   unsigned int cmd, unsigned long arg)
+{
+	struct usb_mixer_interface *mixer = hw->private_data;
+	struct fcp_data *private = mixer->private_data;
+	void __user *argp = (void __user *)arg;
+
+	guard(mutex)(&private->mutex);
+
+	switch (cmd) {
+
+	case FCP_IOCTL_PVERSION:
+		return put_user(FCP_HWDEP_VERSION,
+				(int __user *)argp) ? -EFAULT : 0;
+		break;
+
+	case FCP_IOCTL_INIT:
+		return fcp_ioctl_init(mixer, argp);
+
+	case FCP_IOCTL_CMD:
+		if (!private->init)
+			return -EINVAL;
+		return fcp_ioctl_cmd(mixer, argp);
+
+	case FCP_IOCTL_SET_METER_MAP:
+		if (!private->init)
+			return -EINVAL;
+		return fcp_ioctl_set_meter_map(mixer, argp);
+
+	case FCP_IOCTL_SET_METER_LABELS:
+		if (!private->init)
+			return -EINVAL;
+		if (!private->meter_ctl)
+			return -EINVAL;
+		return fcp_ioctl_set_meter_labels(mixer, argp);
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	/* not reached */
+}
+
+static ssize_t fcp_hwdep_read(struct snd_hwdep *hw, char __user *buf,
+			      ssize_t count, loff_t *offset)
+{
+	struct usb_mixer_interface *mixer = hw->private_data;
+	struct fcp_data *private = mixer->private_data;
+	unsigned long flags;
+	ssize_t ret = 0;
+	u32 event;
+
+	if (count < sizeof(event))
+		return -EINVAL;
+
+	ret = wait_event_interruptible(private->notify.queue,
+				       private->notify.event);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&private->notify.lock, flags);
+	event = private->notify.event;
+	private->notify.event = 0;
+	spin_unlock_irqrestore(&private->notify.lock, flags);
+
+	if (copy_to_user(buf, &event, sizeof(event)))
+		return -EFAULT;
+
+	return sizeof(event);
+}
+
+static unsigned int fcp_hwdep_poll(struct snd_hwdep *hw,
+				   struct file *file,
+				   poll_table *wait)
+{
+	struct usb_mixer_interface *mixer = hw->private_data;
+	struct fcp_data *private = mixer->private_data;
+	unsigned int mask = 0;
+
+	poll_wait(file, &private->notify.queue, wait);
+
+	if (private->notify.event)
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+static int fcp_hwdep_release(struct snd_hwdep *hw, struct file *file)
+{
+	struct usb_mixer_interface *mixer = hw->private_data;
+	struct fcp_data *private = mixer->private_data;
+
+	if (!private)
+		return 0;
+
+	private->file = NULL;
+
+	return 0;
+}
+
+static int fcp_hwdep_init(struct usb_mixer_interface *mixer)
+{
+	struct snd_hwdep *hw;
+	int err;
+
+	err = snd_hwdep_new(mixer->chip->card, "Focusrite Control", 0, &hw);
+	if (err < 0)
+		return err;
+
+	hw->private_data = mixer;
+	hw->exclusive = 1;
+	hw->ops.open = fcp_hwdep_open;
+	hw->ops.ioctl = fcp_hwdep_ioctl;
+	hw->ops.ioctl_compat = fcp_hwdep_ioctl;
+	hw->ops.read = fcp_hwdep_read;
+	hw->ops.poll = fcp_hwdep_poll;
+	hw->ops.release = fcp_hwdep_release;
+
+	return 0;
+}
+
+/*** Cleanup ***/
+
+static void fcp_cleanup_urb(struct usb_mixer_interface *mixer)
+{
+	if (!mixer->urb)
+		return;
+
+	usb_kill_urb(mixer->urb);
+	kfree(mixer->urb->transfer_buffer);
+	usb_free_urb(mixer->urb);
+	mixer->urb = NULL;
+}
+
+static void fcp_private_free(struct usb_mixer_interface *mixer)
+{
+	struct fcp_data *private = mixer->private_data;
+
+	fcp_cleanup_urb(mixer);
+
+	kfree(private->meter_level_map);
+	kfree(private->meter_levels);
+	kfree(private->meter_labels_tlv);
+	kfree(private);
+	mixer->private_data = NULL;
+}
+
+static void fcp_private_suspend(struct usb_mixer_interface *mixer)
+{
+	fcp_cleanup_urb(mixer);
+}
+
+/*** Callbacks ***/
+
+static void fcp_notify(struct urb *urb)
+{
+	struct usb_mixer_interface *mixer = urb->context;
+	struct fcp_data *private = mixer->private_data;
+	int len = urb->actual_length;
+	int ustatus = urb->status;
+	u32 data;
+
+	if (ustatus != 0 || len != 8)
+		goto requeue;
+
+	data = le32_to_cpu(*(__le32 *)urb->transfer_buffer);
+
+	/* Handle command acknowledgement */
+	if (data & FCP_NOTIFY_ACK) {
+		complete(&private->cmd_done);
+		data &= ~FCP_NOTIFY_ACK;
+	}
+
+	if (data) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&private->notify.lock, flags);
+		private->notify.event |= data;
+		spin_unlock_irqrestore(&private->notify.lock, flags);
+
+		wake_up_interruptible(&private->notify.queue);
+	}
+
+requeue:
+	if (ustatus != -ENOENT &&
+	    ustatus != -ECONNRESET &&
+	    ustatus != -ESHUTDOWN) {
+		urb->dev = mixer->chip->dev;
+		usb_submit_urb(urb, GFP_ATOMIC);
+	} else {
+		complete(&private->cmd_done);
+	}
+}
+
+/* Submit a URB to receive notifications from the device */
+static int fcp_init_notify(struct usb_mixer_interface *mixer)
+{
+	struct usb_device *dev = mixer->chip->dev;
+	struct fcp_data *private = mixer->private_data;
+	unsigned int pipe = usb_rcvintpipe(dev, private->bEndpointAddress);
+	void *transfer_buffer;
+	int err;
+
+	/* Already set up */
+	if (mixer->urb)
+		return 0;
+
+	if (usb_pipe_type_check(dev, pipe))
+		return -EINVAL;
+
+	mixer->urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (!mixer->urb)
+		return -ENOMEM;
+
+	transfer_buffer = kmalloc(private->wMaxPacketSize, GFP_KERNEL);
+	if (!transfer_buffer) {
+		usb_free_urb(mixer->urb);
+		mixer->urb = NULL;
+		return -ENOMEM;
+	}
+
+	usb_fill_int_urb(mixer->urb, dev, pipe,
+			 transfer_buffer, private->wMaxPacketSize,
+			 fcp_notify, mixer, private->bInterval);
+
+	init_completion(&private->cmd_done);
+
+	err = usb_submit_urb(mixer->urb, GFP_KERNEL);
+	if (err) {
+		usb_audio_err(mixer->chip,
+			      "%s: usb_submit_urb failed: %d\n",
+			      __func__, err);
+		kfree(transfer_buffer);
+		usb_free_urb(mixer->urb);
+		mixer->urb = NULL;
+	}
+
+	return err;
+}
+
+/*** Initialisation ***/
+
+static int fcp_init(struct usb_mixer_interface *mixer,
+		    void *step0_resp, void *step2_resp)
+{
+	struct fcp_data *private = mixer->private_data;
+	struct usb_device *dev = mixer->chip->dev;
+	int err;
+
+	err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0),
+		FCP_USB_REQ_STEP0,
+		USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+		0, private->bInterfaceNumber,
+		step0_resp, private->step0_resp_size);
+	if (err < 0)
+		return err;
+
+	err = fcp_init_notify(mixer);
+	if (err < 0)
+		return err;
+
+	private->seq = 0;
+	private->init = 1;
+
+	err = fcp_usb(mixer, private->init1_opcode, NULL, 0, NULL, 0);
+	if (err < 0)
+		return err;
+
+	err = fcp_usb(mixer, private->init2_opcode,
+		      NULL, 0, step2_resp, private->step2_resp_size);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int fcp_init_private(struct usb_mixer_interface *mixer)
+{
+	struct fcp_data *private =
+		kzalloc(sizeof(struct fcp_data), GFP_KERNEL);
+
+	if (!private)
+		return -ENOMEM;
+
+	mutex_init(&private->mutex);
+	init_waitqueue_head(&private->notify.queue);
+	spin_lock_init(&private->notify.lock);
+
+	mixer->private_data = private;
+	mixer->private_free = fcp_private_free;
+	mixer->private_suspend = fcp_private_suspend;
+
+	private->mixer = mixer;
+
+	return 0;
+}
+
+/* Look through the interface descriptors for the Focusrite Control
+ * interface (bInterfaceClass = 255 Vendor Specific Class) and set
+ * bInterfaceNumber, bEndpointAddress, wMaxPacketSize, and bInterval
+ * in private
+ */
+static int fcp_find_fc_interface(struct usb_mixer_interface *mixer)
+{
+	struct snd_usb_audio *chip = mixer->chip;
+	struct fcp_data *private = mixer->private_data;
+	struct usb_host_config *config = chip->dev->actconfig;
+	int i;
+
+	for (i = 0; i < config->desc.bNumInterfaces; i++) {
+		struct usb_interface *intf = config->interface[i];
+		struct usb_interface_descriptor *desc =
+			&intf->altsetting[0].desc;
+		struct usb_endpoint_descriptor *epd;
+
+		if (desc->bInterfaceClass != 255)
+			continue;
+
+		epd = get_endpoint(intf->altsetting, 0);
+		private->bInterfaceNumber = desc->bInterfaceNumber;
+		private->bEndpointAddress = epd->bEndpointAddress &
+			USB_ENDPOINT_NUMBER_MASK;
+		private->wMaxPacketSize = le16_to_cpu(epd->wMaxPacketSize);
+		private->bInterval = epd->bInterval;
+		return 0;
+	}
+
+	usb_audio_err(chip, "Focusrite vendor-specific interface not found\n");
+	return -EINVAL;
+}
+
+int snd_fcp_init(struct usb_mixer_interface *mixer)
+{
+	struct snd_usb_audio *chip = mixer->chip;
+	int err;
+
+	/* only use UAC_VERSION_2 */
+	if (!mixer->protocol)
+		return 0;
+
+	err = fcp_init_private(mixer);
+	if (err < 0)
+		return err;
+
+	err = fcp_find_fc_interface(mixer);
+	if (err < 0)
+		return err;
+
+	err = fcp_hwdep_init(mixer);
+	if (err < 0)
+		return err;
+
+	usb_audio_info(chip,
+		"Focusrite Control Protocol Driver ready (pid=0x%04x); "
+		"report any issues to "
+		"https://github.com/geoffreybennett/fcp-support/issues",
+		USB_ID_PRODUCT(chip->usb_id));
+
+	return err;
+}
diff --git a/sound/usb/fcp.h b/sound/usb/fcp.h
new file mode 100644
index 000000000000..d58cc7db75b1
--- /dev/null
+++ b/sound/usb/fcp.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __USBAUDIO_FCP_H
+#define __USBAUDIO_FCP_H
+
+int snd_fcp_init(struct usb_mixer_interface *mixer);
+
+#endif /* __USBAUDIO_FCP_H */
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index 23fcd680167d..ed6127b0389f 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -38,6 +38,7 @@
 #include "mixer_us16x08.h"
 #include "mixer_s1810c.h"
 #include "helper.h"
+#include "fcp.h"
 
 struct std_mono_table {
 	unsigned int unitid, control, cmask;
@@ -4090,6 +4091,12 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer)
 		err = snd_scarlett2_init(mixer);
 		break;
 
+	case USB_ID(0x1235, 0x821b): /* Focusrite Scarlett 16i16 4th Gen */
+	case USB_ID(0x1235, 0x821c): /* Focusrite Scarlett 18i16 4th Gen */
+	case USB_ID(0x1235, 0x821d): /* Focusrite Scarlett 18i20 4th Gen */
+		err = snd_fcp_init(mixer);
+		break;
+
 	case USB_ID(0x041e, 0x323b): /* Creative Sound Blaster E1 */
 		err = snd_soundblaster_e1_switch_create(mixer);
 		break;
-- 
cgit v1.2.3


From 3156ceb222414456084d964f43ada071206039b8 Mon Sep 17 00:00:00 2001
From: Rickard Andersson <rickard.andersson@axis.com>
Date: Mon, 16 Dec 2024 09:54:19 +0100
Subject: ubi: Expose interface for detailed erase counters

Using the ioctl command 'UBI_IOCECNFO' user space can obtain
detailed erase counter information of all blocks of a device.

Signed-off-by: Rickard Andersson <rickard.andersson@axis.com>
Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/uapi/mtd/ubi-user.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h
index e1571603175e..aa872a41ffb9 100644
--- a/include/uapi/mtd/ubi-user.h
+++ b/include/uapi/mtd/ubi-user.h
@@ -175,6 +175,8 @@
 #define UBI_IOCRPEB _IOW(UBI_IOC_MAGIC, 4, __s32)
 /* Force scrubbing on the specified PEB */
 #define UBI_IOCSPEB _IOW(UBI_IOC_MAGIC, 5, __s32)
+/* Read detailed device erase counter information */
+#define UBI_IOCECNFO _IOWR(UBI_IOC_MAGIC, 6, struct ubi_ecinfo_req)
 
 /* ioctl commands of the UBI control character device */
 
@@ -412,6 +414,37 @@ struct ubi_rnvol_req {
 	} ents[UBI_MAX_RNVOL];
 } __packed;
 
+/**
+ * struct ubi_ecinfo_req - a data structure used for requesting and receiving
+ * erase block counter information from a UBI device.
+ *
+ * @start: index of first physical erase block to read (in)
+ * @length: number of erase counters to read (in)
+ * @read_length: number of erase counters that was actually read (out)
+ * @padding: reserved for future, not used, has to be zeroed
+ * @erase_counters: array of erase counter values (out)
+ *
+ * This structure is used to retrieve erase counter information for a specified
+ * range of PEBs on a UBI device.
+ * Erase counters are read from @start and attempts to read @length number of
+ * erase counters.
+ * The retrieved values are stored in the @erase_counters array. It is the
+ * responsibility of the caller to allocate enough memory for storing @length
+ * elements in the @erase_counters array.
+ * If a block is bad or if the erase counter is unknown the corresponding value
+ * in the array will be set to -1.
+ * The @read_length field will indicate the number of erase counters actually
+ * read. Typically @read_length will be limited due to memory or the number of
+ * PEBs on the UBI device.
+ */
+struct ubi_ecinfo_req {
+	__s32 start;
+	__s32 length;
+	__s32 read_length;
+	__s8  padding[16];
+	__s32 erase_counters[];
+}  __packed;
+
 /**
  * struct ubi_leb_change_req - a data structure used in atomic LEB change
  *                             requests.
-- 
cgit v1.2.3


From d6658d3338f84173fb55c9d6c6cdfa57f879712d Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Sat, 16 Nov 2024 04:20:45 +0100
Subject: misc: pci_endpoint_test: Add consecutive BAR test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a more advanced BAR test that writes all BARs in one go, and then reads
them back and verifies that the value matches the BAR number bitwise OR'ed
with offset, this allows us to verify:

  - The BAR number was what we intended to read
  - The offset was what we intended to read

This allows us to detect potential address translation issues on the EP.

Reading back the BAR directly after writing will not allow us to detect the
case where inbound address translation on the endpoint incorrectly causes
multiple BARs to be redirected to the same memory region (within the EP).

Link: https://lore.kernel.org/r/20241116032045.2574168-2-cassel@kernel.org
Signed-off-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/misc/pci_endpoint_test.c | 88 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/pcitest.h     |  1 +
 tools/pci/pcitest.c              | 16 +++++++-
 tools/pci/pcitest.sh             |  1 +
 4 files changed, 105 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index 8a31bd4c237d..5c99da952b7a 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -325,6 +325,91 @@ static bool pci_endpoint_test_bar(struct pci_endpoint_test *test,
 	return true;
 }
 
+static u32 bar_test_pattern_with_offset(enum pci_barno barno, int offset)
+{
+	u32 val;
+
+	/* Keep the BAR pattern in the top byte. */
+	val = bar_test_pattern[barno] & 0xff000000;
+	/* Store the (partial) offset in the remaining bytes. */
+	val |= offset & 0x00ffffff;
+
+	return val;
+}
+
+static bool pci_endpoint_test_bars_write_bar(struct pci_endpoint_test *test,
+					     enum pci_barno barno)
+{
+	struct pci_dev *pdev = test->pdev;
+	int j, size;
+
+	size = pci_resource_len(pdev, barno);
+
+	if (barno == test->test_reg_bar)
+		size = 0x4;
+
+	for (j = 0; j < size; j += 4)
+		writel_relaxed(bar_test_pattern_with_offset(barno, j),
+			       test->bar[barno] + j);
+
+	return true;
+}
+
+static bool pci_endpoint_test_bars_read_bar(struct pci_endpoint_test *test,
+					    enum pci_barno barno)
+{
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	int j, size;
+	u32 val;
+
+	size = pci_resource_len(pdev, barno);
+
+	if (barno == test->test_reg_bar)
+		size = 0x4;
+
+	for (j = 0; j < size; j += 4) {
+		u32 expected = bar_test_pattern_with_offset(barno, j);
+
+		val = readl_relaxed(test->bar[barno] + j);
+		if (val != expected) {
+			dev_err(dev,
+				"BAR%d incorrect data at offset: %#x, got: %#x expected: %#x\n",
+				barno, j, val, expected);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static bool pci_endpoint_test_bars(struct pci_endpoint_test *test)
+{
+	enum pci_barno bar;
+	bool ret;
+
+	/* Write all BARs in order (without reading). */
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++)
+		if (test->bar[bar])
+			pci_endpoint_test_bars_write_bar(test, bar);
+
+	/*
+	 * Read all BARs in order (without writing).
+	 * If there is an address translation issue on the EP, writing one BAR
+	 * might have overwritten another BAR. Ensure that this is not the case.
+	 * (Reading back the BAR directly after writing can not detect this.)
+	 */
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+		if (test->bar[bar]) {
+			ret = pci_endpoint_test_bars_read_bar(test, bar);
+			if (!ret)
+				return ret;
+		}
+	}
+
+	return true;
+}
+
 static bool pci_endpoint_test_intx_irq(struct pci_endpoint_test *test)
 {
 	u32 val;
@@ -771,6 +856,9 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd,
 			goto ret;
 		ret = pci_endpoint_test_bar(test, bar);
 		break;
+	case PCITEST_BARS:
+		ret = pci_endpoint_test_bars(test);
+		break;
 	case PCITEST_INTX_IRQ:
 		ret = pci_endpoint_test_intx_irq(test);
 		break;
diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h
index 94b46b043b53..acd261f49866 100644
--- a/include/uapi/linux/pcitest.h
+++ b/include/uapi/linux/pcitest.h
@@ -20,6 +20,7 @@
 #define PCITEST_MSIX		_IOW('P', 0x7, int)
 #define PCITEST_SET_IRQTYPE	_IOW('P', 0x8, int)
 #define PCITEST_GET_IRQTYPE	_IO('P', 0x9)
+#define PCITEST_BARS		_IO('P', 0xa)
 #define PCITEST_CLEAR_IRQ	_IO('P', 0x10)
 
 #define PCITEST_FLAGS_USE_DMA	0x00000001
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
index 7b530d838d40..08f355083754 100644
--- a/tools/pci/pcitest.c
+++ b/tools/pci/pcitest.c
@@ -22,6 +22,7 @@ static char *irq[] = { "LEGACY", "MSI", "MSI-X" };
 struct pci_test {
 	char		*device;
 	char		barnum;
+	bool		consecutive_bar_test;
 	bool		legacyirq;
 	unsigned int	msinum;
 	unsigned int	msixnum;
@@ -57,6 +58,15 @@ static int run_test(struct pci_test *test)
 			fprintf(stdout, "%s\n", result[ret]);
 	}
 
+	if (test->consecutive_bar_test) {
+		ret = ioctl(fd, PCITEST_BARS);
+		fprintf(stdout, "Consecutive BAR test:\t\t");
+		if (ret < 0)
+			fprintf(stdout, "TEST FAILED\n");
+		else
+			fprintf(stdout, "%s\n", result[ret]);
+	}
+
 	if (test->set_irqtype) {
 		ret = ioctl(fd, PCITEST_SET_IRQTYPE, test->irqtype);
 		fprintf(stdout, "SET IRQ TYPE TO %s:\t\t", irq[test->irqtype]);
@@ -172,7 +182,7 @@ int main(int argc, char **argv)
 	/* set default endpoint device */
 	test->device = "/dev/pci-endpoint-test.0";
 
-	while ((c = getopt(argc, argv, "D:b:m:x:i:deIlhrwcs:")) != EOF)
+	while ((c = getopt(argc, argv, "D:b:Cm:x:i:deIlhrwcs:")) != EOF)
 	switch (c) {
 	case 'D':
 		test->device = optarg;
@@ -182,6 +192,9 @@ int main(int argc, char **argv)
 		if (test->barnum < 0 || test->barnum > 5)
 			goto usage;
 		continue;
+	case 'C':
+		test->consecutive_bar_test = true;
+		continue;
 	case 'l':
 		test->legacyirq = true;
 		continue;
@@ -230,6 +243,7 @@ usage:
 			"Options:\n"
 			"\t-D <dev>		PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n"
 			"\t-b <bar num>		BAR test (bar number between 0..5)\n"
+			"\t-C			Consecutive BAR test\n"
 			"\t-m <msi num>		MSI test (msi number between 1..32)\n"
 			"\t-x <msix num>	\tMSI-X test (msix number between 1..2048)\n"
 			"\t-i <irq type>	\tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n"
diff --git a/tools/pci/pcitest.sh b/tools/pci/pcitest.sh
index 75ed48ff2990..770f4d6df34b 100644
--- a/tools/pci/pcitest.sh
+++ b/tools/pci/pcitest.sh
@@ -11,6 +11,7 @@ do
 	pcitest -b $bar
 	bar=`expr $bar + 1`
 done
+pcitest -C
 echo
 
 echo "Interrupt tests"
-- 
cgit v1.2.3


From e721f619e3ec9bae08bf419c3944cf1e6966c821 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Mon, 20 Jan 2025 11:50:51 -0800
Subject: iommufd: Fix struct iommu_hwpt_pgfault init and padding

The iommu_hwpt_pgfault is used to report IO page fault data to userspace,
but iommufd_fault_fops_read was never zeroing its padding. This leaks the
content of the kernel stack memory to userspace.

Also, the iommufd uAPI requires explicit padding and use of __aligned_u64
to ensure ABI compatibility's with 32 bit.

pahole result, before:
struct iommu_hwpt_pgfault {
        __u32     flags;                /*     0     4 */
        __u32     dev_id;               /*     4     4 */
        __u32     pasid;                /*     8     4 */
        __u32     grpid;                /*    12     4 */
        __u32     perm;                 /*    16     4 */

        /* XXX 4 bytes hole, try to pack */

        __u64     addr;                 /*    24     8 */
        __u32     length;               /*    32     4 */
        __u32     cookie;               /*    36     4 */

        /* size: 40, cachelines: 1, members: 8 */
        /* sum members: 36, holes: 1, sum holes: 4 */
        /* last cacheline: 40 bytes */
};

pahole result, after:
struct iommu_hwpt_pgfault {
        __u32      flags;                /*     0     4 */
        __u32      dev_id;               /*     4     4 */
        __u32      pasid;                /*     8     4 */
        __u32      grpid;                /*    12     4 */
        __u32      perm;                 /*    16     4 */
        __u32      __reserved;           /*    20     4 */
        __u64      addr __attribute__((__aligned__(8))); /*    24     8 */
        __u32      length;               /*    32     4 */
        __u32      cookie;               /*    36     4 */

        /* size: 40, cachelines: 1, members: 9 */
        /* forced alignments: 1 */
        /* last cacheline: 40 bytes */
} __attribute__((__aligned__(8)));

Fixes: c714f15860fc ("iommufd: Add fault and response message definitions")
Link: https://patch.msgid.link/r/20250120195051.2450-1-nicolinc@nvidia.com
Cc: stable@vger.kernel.org
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/fault.c | 2 +-
 include/uapi/linux/iommufd.h  | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
index a9160f4443d2..d9a937450e55 100644
--- a/drivers/iommu/iommufd/fault.c
+++ b/drivers/iommu/iommufd/fault.c
@@ -263,7 +263,7 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
 {
 	size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
 	struct iommufd_fault *fault = filep->private_data;
-	struct iommu_hwpt_pgfault data;
+	struct iommu_hwpt_pgfault data = {};
 	struct iommufd_device *idev;
 	struct iopf_group *group;
 	struct iopf_fault *iopf;
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 34810f6ae2b5..78747b24bd0f 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -868,6 +868,7 @@ enum iommu_hwpt_pgfault_perm {
  * @pasid: Process Address Space ID
  * @grpid: Page Request Group Index
  * @perm: Combination of enum iommu_hwpt_pgfault_perm
+ * @__reserved: Must be 0.
  * @addr: Fault address
  * @length: a hint of how much data the requestor is expecting to fetch. For
  *          example, if the PRI initiator knows it is going to do a 10MB
@@ -883,7 +884,8 @@ struct iommu_hwpt_pgfault {
 	__u32 pasid;
 	__u32 grpid;
 	__u32 perm;
-	__u64 addr;
+	__u32 __reserved;
+	__aligned_u64 addr;
 	__u32 length;
 	__u32 cookie;
 };
-- 
cgit v1.2.3


From cee9b4ef42512a6e57562460a15f18a022c84dda Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:13:59 -0500
Subject: nfsd: rework NFS4_SHARE_WANT_* flag handling

The delstid draft adds new NFS4_SHARE_WANT_TYPE_MASK values that don't
fit neatly into the existing WANT_MASK or WHEN_MASK. Add a new
NFS4_SHARE_WANT_MOD_MASK value and redefine NFS4_SHARE_WANT_MASK to
include it.

Also fix the checks in nfsd4_deleg_xgrade_none_ext() to check for the
flags instead of equality, since there may be modifier flags in the
value.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c       | 4 ++--
 fs/nfsd/nfs4xdr.c         | 2 +-
 include/uapi/linux/nfs4.h | 7 +++++--
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e7b26806797a..780db0992c9f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6224,10 +6224,10 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
 					struct nfs4_delegation *dp)
 {
 	if (deleg_is_write(dp->dl_type)) {
-		if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
+		if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
 			open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 			open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
-		} else if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
+		} else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
 			open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 			open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
 		}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 147564a74b46..5407fd0d464c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1067,7 +1067,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
 		return nfs_ok;
 	if (!argp->minorversion)
 		return nfserr_bad_xdr;
-	switch (w & NFS4_SHARE_WANT_MASK) {
+	switch (w & NFS4_SHARE_WANT_TYPE_MASK) {
 	case OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE:
 	case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
 	case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index caf4db2fcbb9..4273e0249fcb 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -58,7 +58,7 @@
 #define NFS4_SHARE_DENY_BOTH	0x0003
 
 /* nfs41 */
-#define NFS4_SHARE_WANT_MASK		0xFF00
+#define NFS4_SHARE_WANT_TYPE_MASK	0xFF00
 #define NFS4_SHARE_WANT_NO_PREFERENCE	0x0000
 #define NFS4_SHARE_WANT_READ_DELEG	0x0100
 #define NFS4_SHARE_WANT_WRITE_DELEG	0x0200
@@ -66,13 +66,16 @@
 #define NFS4_SHARE_WANT_NO_DELEG	0x0400
 #define NFS4_SHARE_WANT_CANCEL		0x0500
 
-#define NFS4_SHARE_WHEN_MASK		0xF0000
+#define NFS4_SHARE_WHEN_MASK				0xF0000
 #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL	0x10000
 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED		0x20000
 
+#define NFS4_SHARE_WANT_MOD_MASK			0xF00000
 #define NFS4_SHARE_WANT_DELEG_TIMESTAMPS		0x100000
 #define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION		0x200000
 
+#define NFS4_SHARE_WANT_MASK	(NFS4_SHARE_WANT_TYPE_MASK | NFS4_SHARE_WANT_MOD_MASK)
+
 #define NFS4_CDFC4_FORE	0x1
 #define NFS4_CDFC4_BACK 0x2
 #define NFS4_CDFC4_BOTH 0x3
-- 
cgit v1.2.3


From 188973e3536a18aebee3af486cd3d2ef82b09f88 Mon Sep 17 00:00:00 2001
From: Dongdong Zhang <zhangdongdong@eswincomputing.com>
Date: Mon, 16 Dec 2024 09:35:36 +0800
Subject: PCI: Remove redundant PCI_VSEC_HDR and PCI_VSEC_HDR_LEN_SHIFT

Remove duplicate macro PCI_VSEC_HDR and its related macro
PCI_VSEC_HDR_LEN_SHIFT from pci_regs.h to avoid redundancy and
inconsistencies. Update VFIO PCI code to use PCI_VNDR_HEADER and
PCI_VNDR_HEADER_LEN() for consistent naming and functionality.

These changes aim to streamline header handling while minimizing impact,
given the niche usage of these macros in userspace.

Link: https://lore.kernel.org/r/20241216013536.4487-1-zhangdongdong@eswincomputing.com
Signed-off-by: Dongdong Zhang <zhangdongdong@eswincomputing.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci_config.c | 5 +++--
 include/uapi/linux/pci_regs.h      | 3 ---
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index ea2745c1ac5e..5572fd99b921 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1389,11 +1389,12 @@ static int vfio_ext_cap_len(struct vfio_pci_core_device *vdev, u16 ecap, u16 epo
 
 	switch (ecap) {
 	case PCI_EXT_CAP_ID_VNDR:
-		ret = pci_read_config_dword(pdev, epos + PCI_VSEC_HDR, &dword);
+		ret = pci_read_config_dword(pdev, epos + PCI_VNDR_HEADER,
+					    &dword);
 		if (ret)
 			return pcibios_err_to_errno(ret);
 
-		return dword >> PCI_VSEC_HDR_LEN_SHIFT;
+		return PCI_VNDR_HEADER_LEN(dword);
 	case PCI_EXT_CAP_ID_VC:
 	case PCI_EXT_CAP_ID_VC9:
 	case PCI_EXT_CAP_ID_MFVC:
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 02d0ba2999e0..c2ccc85c43ed 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1001,9 +1001,6 @@
 #define PCI_ACS_CTRL		0x06	/* ACS Control Register */
 #define PCI_ACS_EGRESS_CTL_V	0x08	/* ACS Egress Control Vector */
 
-#define PCI_VSEC_HDR		4	/* extended cap - vendor-specific */
-#define  PCI_VSEC_HDR_LEN_SHIFT	20	/* shift for length field */
-
 /* SATA capability */
 #define PCI_SATA_REGS		4	/* SATA REGs specifier */
 #define  PCI_SATA_REGS_MASK	0xF	/* location - BAR#/inline */
-- 
cgit v1.2.3


From 24fe962c86f55347385933a1b06ca71b60854690 Mon Sep 17 00:00:00 2001
From: Bernd Schubert <bschubert@ddn.com>
Date: Mon, 20 Jan 2025 02:28:59 +0100
Subject: fuse: {io-uring} Handle SQEs - register commands

This adds basic support for ring SQEs (with opcode=IORING_OP_URING_CMD).
For now only FUSE_IO_URING_CMD_REGISTER is handled to register queue
entries.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com> # io_uring
Reviewed-by: Luis Henriques <luis@igalia.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/Kconfig           |  12 ++
 fs/fuse/Makefile          |   1 +
 fs/fuse/dev_uring.c       | 326 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/dev_uring_i.h     | 113 ++++++++++++++++
 fs/fuse/fuse_i.h          |   5 +
 fs/fuse/inode.c           |  10 ++
 include/uapi/linux/fuse.h |  76 ++++++++++-
 7 files changed, 542 insertions(+), 1 deletion(-)
 create mode 100644 fs/fuse/dev_uring.c
 create mode 100644 fs/fuse/dev_uring_i.h

(limited to 'include/uapi')

diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 8674dbfbe59d..ca215a3cba3e 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -63,3 +63,15 @@ config FUSE_PASSTHROUGH
 	  to be performed directly on a backing file.
 
 	  If you want to allow passthrough operations, answer Y.
+
+config FUSE_IO_URING
+	bool "FUSE communication over io-uring"
+	default y
+	depends on FUSE_FS
+	depends on IO_URING
+	help
+	  This allows sending FUSE requests over the io-uring interface and
+          also adds request core affinity.
+
+	  If you want to allow fuse server/client communication through io-uring,
+	  answer Y
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 2c372180d631..3f0f312a31c1 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -15,5 +15,6 @@ fuse-y += iomode.o
 fuse-$(CONFIG_FUSE_DAX) += dax.o
 fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
 fuse-$(CONFIG_SYSCTL) += sysctl.o
+fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
 
 virtiofs-y := virtio_fs.o
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
new file mode 100644
index 000000000000..42092a234570
--- /dev/null
+++ b/fs/fuse/dev_uring.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (c) 2023-2024 DataDirect Networks.
+ */
+
+#include "fuse_i.h"
+#include "dev_uring_i.h"
+#include "fuse_dev_i.h"
+
+#include <linux/fs.h>
+#include <linux/io_uring/cmd.h>
+
+static bool __read_mostly enable_uring;
+module_param(enable_uring, bool, 0644);
+MODULE_PARM_DESC(enable_uring,
+		 "Enable userspace communication through io-uring");
+
+#define FUSE_URING_IOV_SEGS 2 /* header and payload */
+
+
+bool fuse_uring_enabled(void)
+{
+	return enable_uring;
+}
+
+void fuse_uring_destruct(struct fuse_conn *fc)
+{
+	struct fuse_ring *ring = fc->ring;
+	int qid;
+
+	if (!ring)
+		return;
+
+	for (qid = 0; qid < ring->nr_queues; qid++) {
+		struct fuse_ring_queue *queue = ring->queues[qid];
+
+		if (!queue)
+			continue;
+
+		WARN_ON(!list_empty(&queue->ent_avail_queue));
+		WARN_ON(!list_empty(&queue->ent_commit_queue));
+
+		kfree(queue);
+		ring->queues[qid] = NULL;
+	}
+
+	kfree(ring->queues);
+	kfree(ring);
+	fc->ring = NULL;
+}
+
+/*
+ * Basic ring setup for this connection based on the provided configuration
+ */
+static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
+{
+	struct fuse_ring *ring;
+	size_t nr_queues = num_possible_cpus();
+	struct fuse_ring *res = NULL;
+	size_t max_payload_size;
+
+	ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT);
+	if (!ring)
+		return NULL;
+
+	ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *),
+			       GFP_KERNEL_ACCOUNT);
+	if (!ring->queues)
+		goto out_err;
+
+	max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
+	max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
+
+	spin_lock(&fc->lock);
+	if (fc->ring) {
+		/* race, another thread created the ring in the meantime */
+		spin_unlock(&fc->lock);
+		res = fc->ring;
+		goto out_err;
+	}
+
+	fc->ring = ring;
+	ring->nr_queues = nr_queues;
+	ring->fc = fc;
+	ring->max_payload_sz = max_payload_size;
+
+	spin_unlock(&fc->lock);
+	return ring;
+
+out_err:
+	kfree(ring->queues);
+	kfree(ring);
+	return res;
+}
+
+static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
+						       int qid)
+{
+	struct fuse_conn *fc = ring->fc;
+	struct fuse_ring_queue *queue;
+
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
+	if (!queue)
+		return NULL;
+	queue->qid = qid;
+	queue->ring = ring;
+	spin_lock_init(&queue->lock);
+
+	INIT_LIST_HEAD(&queue->ent_avail_queue);
+	INIT_LIST_HEAD(&queue->ent_commit_queue);
+
+	spin_lock(&fc->lock);
+	if (ring->queues[qid]) {
+		spin_unlock(&fc->lock);
+		kfree(queue);
+		return ring->queues[qid];
+	}
+
+	/*
+	 * write_once and lock as the caller mostly doesn't take the lock at all
+	 */
+	WRITE_ONCE(ring->queues[qid], queue);
+	spin_unlock(&fc->lock);
+
+	return queue;
+}
+
+/*
+ * Make a ring entry available for fuse_req assignment
+ */
+static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
+				 struct fuse_ring_queue *queue)
+{
+	WARN_ON_ONCE(!ent->cmd);
+	list_move(&ent->list, &queue->ent_avail_queue);
+	ent->state = FRRS_AVAILABLE;
+}
+
+/*
+ * fuse_uring_req_fetch command handling
+ */
+static void fuse_uring_do_register(struct fuse_ring_ent *ent,
+				   struct io_uring_cmd *cmd,
+				   unsigned int issue_flags)
+{
+	struct fuse_ring_queue *queue = ent->queue;
+
+	spin_lock(&queue->lock);
+	ent->cmd = cmd;
+	fuse_uring_ent_avail(ent, queue);
+	spin_unlock(&queue->lock);
+}
+
+/*
+ * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
+ * the payload
+ */
+static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
+					 struct iovec iov[FUSE_URING_IOV_SEGS])
+{
+	struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	struct iov_iter iter;
+	ssize_t ret;
+
+	if (sqe->len != FUSE_URING_IOV_SEGS)
+		return -EINVAL;
+
+	/*
+	 * Direction for buffer access will actually be READ and WRITE,
+	 * using write for the import should include READ access as well.
+	 */
+	ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
+			   FUSE_URING_IOV_SEGS, &iov, &iter);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static struct fuse_ring_ent *
+fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
+			   struct fuse_ring_queue *queue)
+{
+	struct fuse_ring *ring = queue->ring;
+	struct fuse_ring_ent *ent;
+	size_t payload_size;
+	struct iovec iov[FUSE_URING_IOV_SEGS];
+	int err;
+
+	err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
+	if (err) {
+		pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
+				    err);
+		return ERR_PTR(err);
+	}
+
+	err = -EINVAL;
+	if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
+		pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
+		return ERR_PTR(err);
+	}
+
+	payload_size = iov[1].iov_len;
+	if (payload_size < ring->max_payload_sz) {
+		pr_info_ratelimited("Invalid req payload len %zu\n",
+				    payload_size);
+		return ERR_PTR(err);
+	}
+
+	err = -ENOMEM;
+	ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
+	if (!ent)
+		return ERR_PTR(err);
+
+	INIT_LIST_HEAD(&ent->list);
+
+	ent->queue = queue;
+	ent->headers = iov[0].iov_base;
+	ent->payload = iov[1].iov_base;
+
+	return ent;
+}
+
+/*
+ * Register header and payload buffer with the kernel and puts the
+ * entry as "ready to get fuse requests" on the queue
+ */
+static int fuse_uring_register(struct io_uring_cmd *cmd,
+			       unsigned int issue_flags, struct fuse_conn *fc)
+{
+	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
+	struct fuse_ring *ring = fc->ring;
+	struct fuse_ring_queue *queue;
+	struct fuse_ring_ent *ent;
+	int err;
+	unsigned int qid = READ_ONCE(cmd_req->qid);
+
+	err = -ENOMEM;
+	if (!ring) {
+		ring = fuse_uring_create(fc);
+		if (!ring)
+			return err;
+	}
+
+	if (qid >= ring->nr_queues) {
+		pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
+		return -EINVAL;
+	}
+
+	queue = ring->queues[qid];
+	if (!queue) {
+		queue = fuse_uring_create_queue(ring, qid);
+		if (!queue)
+			return err;
+	}
+
+	/*
+	 * The created queue above does not need to be destructed in
+	 * case of entry errors below, will be done at ring destruction time.
+	 */
+
+	ent = fuse_uring_create_ring_ent(cmd, queue);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	fuse_uring_do_register(ent, cmd, issue_flags);
+
+	return 0;
+}
+
+/*
+ * Entry function from io_uring to handle the given passthrough command
+ * (op code IORING_OP_URING_CMD)
+ */
+int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd,
+				  unsigned int issue_flags)
+{
+	struct fuse_dev *fud;
+	struct fuse_conn *fc;
+	u32 cmd_op = cmd->cmd_op;
+	int err;
+
+	if (!enable_uring) {
+		pr_info_ratelimited("fuse-io-uring is disabled\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* This extra SQE size holds struct fuse_uring_cmd_req */
+	if (!(issue_flags & IO_URING_F_SQE128))
+		return -EINVAL;
+
+	fud = fuse_get_dev(cmd->file);
+	if (!fud) {
+		pr_info_ratelimited("No fuse device found\n");
+		return -ENOTCONN;
+	}
+	fc = fud->fc;
+
+	if (fc->aborted)
+		return -ECONNABORTED;
+	if (!fc->connected)
+		return -ENOTCONN;
+
+	/*
+	 * fuse_uring_register() needs the ring to be initialized,
+	 * we need to know the max payload size
+	 */
+	if (!fc->initialized)
+		return -EAGAIN;
+
+	switch (cmd_op) {
+	case FUSE_IO_URING_CMD_REGISTER:
+		err = fuse_uring_register(cmd, issue_flags, fc);
+		if (err) {
+			pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
+				     err);
+			return err;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return -EIOCBQUEUED;
+}
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
new file mode 100644
index 000000000000..ae1536355b36
--- /dev/null
+++ b/fs/fuse/dev_uring_i.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * FUSE: Filesystem in Userspace
+ * Copyright (c) 2023-2024 DataDirect Networks.
+ */
+
+#ifndef _FS_FUSE_DEV_URING_I_H
+#define _FS_FUSE_DEV_URING_I_H
+
+#include "fuse_i.h"
+
+#ifdef CONFIG_FUSE_IO_URING
+
+enum fuse_ring_req_state {
+	FRRS_INVALID = 0,
+
+	/* The ring entry received from userspace and it is being processed */
+	FRRS_COMMIT,
+
+	/* The ring entry is waiting for new fuse requests */
+	FRRS_AVAILABLE,
+
+	/* The ring entry is in or on the way to user space */
+	FRRS_USERSPACE,
+};
+
+/** A fuse ring entry, part of the ring queue */
+struct fuse_ring_ent {
+	/* userspace buffer */
+	struct fuse_uring_req_header __user *headers;
+	void __user *payload;
+
+	/* the ring queue that owns the request */
+	struct fuse_ring_queue *queue;
+
+	/* fields below are protected by queue->lock */
+
+	struct io_uring_cmd *cmd;
+
+	struct list_head list;
+
+	enum fuse_ring_req_state state;
+
+	struct fuse_req *fuse_req;
+};
+
+struct fuse_ring_queue {
+	/*
+	 * back pointer to the main fuse uring structure that holds this
+	 * queue
+	 */
+	struct fuse_ring *ring;
+
+	/* queue id, corresponds to the cpu core */
+	unsigned int qid;
+
+	/*
+	 * queue lock, taken when any value in the queue changes _and_ also
+	 * a ring entry state changes.
+	 */
+	spinlock_t lock;
+
+	/* available ring entries (struct fuse_ring_ent) */
+	struct list_head ent_avail_queue;
+
+	/*
+	 * entries in the process of being committed or in the process
+	 * to be sent to userspace
+	 */
+	struct list_head ent_commit_queue;
+};
+
+/**
+ * Describes if uring is for communication and holds alls the data needed
+ * for uring communication
+ */
+struct fuse_ring {
+	/* back pointer */
+	struct fuse_conn *fc;
+
+	/* number of ring queues */
+	size_t nr_queues;
+
+	/* maximum payload/arg size */
+	size_t max_payload_sz;
+
+	struct fuse_ring_queue **queues;
+};
+
+bool fuse_uring_enabled(void);
+void fuse_uring_destruct(struct fuse_conn *fc);
+int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
+
+#else /* CONFIG_FUSE_IO_URING */
+
+struct fuse_ring;
+
+static inline void fuse_uring_create(struct fuse_conn *fc)
+{
+}
+
+static inline void fuse_uring_destruct(struct fuse_conn *fc)
+{
+}
+
+static inline bool fuse_uring_enabled(void)
+{
+	return false;
+}
+
+#endif /* CONFIG_FUSE_IO_URING */
+
+#endif /* _FS_FUSE_DEV_URING_I_H */
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5666900bee5e..bce8cc482d64 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -923,6 +923,11 @@ struct fuse_conn {
 	/** IDR for backing files ids */
 	struct idr backing_files_map;
 #endif
+
+#ifdef CONFIG_FUSE_IO_URING
+	/**  uring connection information*/
+	struct fuse_ring *ring;
+#endif
 };
 
 /*
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3ce4f4e81d09..e4f9bbacfc1b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -7,6 +7,7 @@
 */
 
 #include "fuse_i.h"
+#include "dev_uring_i.h"
 
 #include <linux/pagemap.h>
 #include <linux/slab.h>
@@ -992,6 +993,8 @@ static void delayed_release(struct rcu_head *p)
 {
 	struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);
 
+	fuse_uring_destruct(fc);
+
 	put_user_ns(fc->user_ns);
 	fc->release(fc);
 }
@@ -1446,6 +1449,13 @@ void fuse_send_init(struct fuse_mount *fm)
 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
 		flags |= FUSE_PASSTHROUGH;
 
+	/*
+	 * This is just an information flag for fuse server. No need to check
+	 * the reply - server is either sending IORING_OP_URING_CMD or not.
+	 */
+	if (fuse_uring_enabled())
+		flags |= FUSE_OVER_IO_URING;
+
 	ia->in.flags = flags;
 	ia->in.flags2 = flags >> 32;
 
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index f1e99458e29e..5e0eb41d967e 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -220,6 +220,15 @@
  *
  *  7.41
  *  - add FUSE_ALLOW_IDMAP
+ *  7.42
+ *  - Add FUSE_OVER_IO_URING and all other io-uring related flags and data
+ *    structures:
+ *    - struct fuse_uring_ent_in_out
+ *    - struct fuse_uring_req_header
+ *    - struct fuse_uring_cmd_req
+ *    - FUSE_URING_IN_OUT_HEADER_SZ
+ *    - FUSE_URING_OP_IN_OUT_SZ
+ *    - enum fuse_uring_cmd
  */
 
 #ifndef _LINUX_FUSE_H
@@ -255,7 +264,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 41
+#define FUSE_KERNEL_MINOR_VERSION 42
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -425,6 +434,7 @@ struct fuse_file_lock {
  * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
  *		    of the request ID indicates resend requests
  * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
+ * FUSE_OVER_IO_URING: Indicate that client supports io-uring
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -471,6 +481,7 @@ struct fuse_file_lock {
 /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
 #define FUSE_DIRECT_IO_RELAX	FUSE_DIRECT_IO_ALLOW_MMAP
 #define FUSE_ALLOW_IDMAP	(1ULL << 40)
+#define FUSE_OVER_IO_URING	(1ULL << 41)
 
 /**
  * CUSE INIT request/reply flags
@@ -1206,4 +1217,67 @@ struct fuse_supp_groups {
 	uint32_t	groups[];
 };
 
+/**
+ * Size of the ring buffer header
+ */
+#define FUSE_URING_IN_OUT_HEADER_SZ 128
+#define FUSE_URING_OP_IN_OUT_SZ 128
+
+/* Used as part of the fuse_uring_req_header */
+struct fuse_uring_ent_in_out {
+	uint64_t flags;
+
+	/*
+	 * commit ID to be used in a reply to a ring request (see also
+	 * struct fuse_uring_cmd_req)
+	 */
+	uint64_t commit_id;
+
+	/* size of user payload buffer */
+	uint32_t payload_sz;
+	uint32_t padding;
+
+	uint64_t reserved;
+};
+
+/**
+ * Header for all fuse-io-uring requests
+ */
+struct fuse_uring_req_header {
+	/* struct fuse_in_header / struct fuse_out_header */
+	char in_out[FUSE_URING_IN_OUT_HEADER_SZ];
+
+	/* per op code header */
+	char op_in[FUSE_URING_OP_IN_OUT_SZ];
+
+	struct fuse_uring_ent_in_out ring_ent_in_out;
+};
+
+/**
+ * sqe commands to the kernel
+ */
+enum fuse_uring_cmd {
+	FUSE_IO_URING_CMD_INVALID = 0,
+
+	/* register the request buffer and fetch a fuse request */
+	FUSE_IO_URING_CMD_REGISTER = 1,
+
+	/* commit fuse request result and fetch next request */
+	FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2,
+};
+
+/**
+ * In the 80B command area of the SQE.
+ */
+struct fuse_uring_cmd_req {
+	uint64_t flags;
+
+	/* entry identifier for commits */
+	uint64_t commit_id;
+
+	/* queue the command is for (queue index) */
+	uint16_t qid;
+	uint8_t padding[6];
+};
+
 #endif /* _LINUX_FUSE_H */
-- 
cgit v1.2.3


From 1629ee1078fc2ac38e324adb0e50c8b04ec714a2 Mon Sep 17 00:00:00 2001
From: Shijith Thotton <sthotton@marvell.com>
Date: Fri, 3 Jan 2025 21:01:36 +0530
Subject: virtio-pci: define type and header for PCI vendor data

Added macro definition for VIRTIO_PCI_CAP_VENDOR_CFG to identify the PCI
vendor data type in the virtio_pci_cap structure. Defined a new struct
virtio_pci_vndr_data for the vendor data capability header as per the
specification.

Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Message-Id: <20250103153226.1933479-3-sthotton@marvell.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_pci.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 1beb317df1b9..8549d4571257 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -116,6 +116,8 @@
 #define VIRTIO_PCI_CAP_PCI_CFG		5
 /* Additional shared memory capability */
 #define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8
+/* PCI vendor data configuration */
+#define VIRTIO_PCI_CAP_VENDOR_CFG	9
 
 /* This is the PCI capability header: */
 struct virtio_pci_cap {
@@ -130,6 +132,18 @@ struct virtio_pci_cap {
 	__le32 length;		/* Length of the structure, in bytes. */
 };
 
+/* This is the PCI vendor data capability header: */
+struct virtio_pci_vndr_data {
+	__u8 cap_vndr;		/* Generic PCI field: PCI_CAP_ID_VNDR */
+	__u8 cap_next;		/* Generic PCI field: next ptr. */
+	__u8 cap_len;		/* Generic PCI field: capability length */
+	__u8 cfg_type;		/* Identifies the structure. */
+	__u16 vendor_id;	/* Identifies the vendor-specific format. */
+	/* For Vendor Definition */
+	/* Pads structure to a multiple of 4 bytes */
+	/* Reads must not have side effects */
+};
+
 struct virtio_pci_cap64 {
 	struct virtio_pci_cap cap;
 	__le32 offset_hi;             /* Most sig 32 bits of offset */
-- 
cgit v1.2.3