From e56cadaa27fd156106c5583ed98976927c6febc9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 6 Dec 2025 16:47:40 -0800 Subject: ynl: add regen hint to new headers Recent commit 68e83f347266 ("tools: ynl-gen: add regeneration comment") added a hint how to regenerate the code to the headers. Update the new headers from this release cycle to also include it. Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251207004740.1657799-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/energy_model.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/energy_model.h b/include/uapi/linux/energy_model.h index 4ec4c0eabbbb..0bcad967854f 100644 --- a/include/uapi/linux/energy_model.h +++ b/include/uapi/linux/energy_model.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/em.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_ENERGY_MODEL_H #define _UAPI_LINUX_ENERGY_MODEL_H -- cgit v1.2.3 From 0ace3297a7301911e52d8195cb1006414897c859 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 5 Dec 2025 19:55:14 +0100 Subject: mptcp: pm: ignore unknown endpoint flags Before this patch, the kernel was saving any flags set by the userspace, even unknown ones. This doesn't cause critical issues because the kernel is only looking at specific ones. But on the other hand, endpoints dumps could tell the userspace some recent flags seem to be supported on older kernel versions. Instead, ignore all unknown flags when parsing them. By doing that, the userspace can continue to set unsupported flags, but it has a way to verify what is supported by the kernel. Note that it sounds better to continue accepting unsupported flags not to change the behaviour, but also that eases things on the userspace side by adding "optional" endpoint types only supported by newer kernel versions without having to deal with the different kernel versions. A note for the backports: there will be conflicts in mptcp.h on older versions not having the mentioned flags, the new line should still be added last, and the '5' needs to be adapted to have the same value as the last entry. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-1-9e4781a6c1b8@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + net/mptcp/pm_netlink.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 04eea6d1d0a9..72a5d030154e 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -40,6 +40,7 @@ #define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3) #define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4) #define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5) +#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0) struct mptcp_info { __u8 mptcpi_subflows; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d5b383870f79..7aa42de9c47b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, } if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) - entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); + entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) & + MPTCP_PM_ADDR_FLAGS_MASK; if (tb[MPTCP_PM_ADDR_ATTR_PORT]) entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); -- cgit v1.2.3 From ca45c84afb8c91a8d688b0012657099c24f59266 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 3 Dec 2025 19:32:15 -0800 Subject: bpf: Add bpf_has_frame_pointer() Introduce a bpf_has_frame_pointer() helper that unwinders can call to determine whether a given instruction pointer is within the valid frame pointer region of a BPF JIT program or trampoline (i.e., after the prologue, before the epilogue). This will enable livepatch (with the ORC unwinder) to reliably unwind through BPF JIT frames. Acked-by: Song Liu Acked-and-tested-by: Andrey Grodzovsky Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/fd2bc5b4e261a680774b28f6100509fd5ebad2f0.1764818927.git.jpoimboe@kernel.org Signed-off-by: Alexei Starovoitov Reviewed-by: Jiri Olsa --- arch/x86/net/bpf_jit_comp.c | 12 ++++++++++++ include/linux/bpf.h | 3 +++ kernel/bpf/core.c | 16 ++++++++++++++++ 3 files changed, 31 insertions(+) (limited to 'include') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b69dc7194e2c..b0bac2a66eff 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_prologue(&prog, image, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); + + bpf_prog->aux->ksym.fp_start = prog - temp; + /* Exception callback will clobber callee regs for its own use, and * restore the original callee regs from main prog's stack frame. */ @@ -2736,6 +2739,8 @@ emit_jmp: pop_r12(&prog); } EMIT1(0xC9); /* leave */ + bpf_prog->aux->ksym.fp_end = prog - temp; + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; @@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + if (im) + im->ksym.fp_start = prog - (u8 *)rw_image; + if (!is_imm8(stack_size)) { /* sub rsp, stack_size */ EMIT3_off32(0x48, 0x81, 0xEC, stack_size); @@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off); + EMIT1(0xC9); /* leave */ + if (im) + im->ksym.fp_end = prog - (u8 *)rw_image; + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6498be4c44f8..e5be698256d1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1283,6 +1283,8 @@ struct bpf_ksym { struct list_head lnode; struct latch_tree_node tnode; bool prog; + u32 fp_start; + u32 fp_end; }; enum bpf_tramp_prog_type { @@ -1511,6 +1513,7 @@ void bpf_image_ksym_add(struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); +bool bpf_has_frame_pointer(unsigned long ip); int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c8ae6ab31651..1b9b18e5b03c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -760,6 +760,22 @@ struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) NULL; } +bool bpf_has_frame_pointer(unsigned long ip) +{ + struct bpf_ksym *ksym; + unsigned long offset; + + guard(rcu)(); + + ksym = bpf_ksym_find(ip); + if (!ksym || !ksym->fp_start || !ksym->fp_end) + return false; + + offset = ip - ksym->start; + + return offset >= ksym->fp_start && offset < ksym->fp_end; +} + const struct exception_table_entry *search_bpf_extables(unsigned long addr) { const struct exception_table_entry *e = NULL; -- cgit v1.2.3 From 1231eec6994be29d6bb5c303dfa54731ed9fc0e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 6 Dec 2025 17:09:40 -0800 Subject: inet: frags: add inet_frag_queue_flush() Instead of exporting inet_frag_rbtree_purge() which requires that caller takes care of memory accounting, add a new helper. We will need to call it from a few places in the next patch. Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251207010942.1672972-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/inet_frag.h | 5 ++--- net/ipv4/inet_fragment.c | 15 ++++++++++++--- net/ipv4/ip_fragment.c | 6 +----- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0eccd9c3a883..3ffaceee7bbc 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -141,9 +141,8 @@ void inet_frag_kill(struct inet_frag_queue *q, int *refs); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); -/* Free all skbs in the queue; return the sum of their truesizes. */ -unsigned int inet_frag_rbtree_purge(struct rb_root *root, - enum skb_drop_reason reason); +void inet_frag_queue_flush(struct inet_frag_queue *q, + enum skb_drop_reason reason); static inline void inet_frag_putn(struct inet_frag_queue *q, int refs) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 30f4fa50ee2d..1bf969b5a1cb 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -263,8 +263,8 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) kmem_cache_free(f->frags_cachep, q); } -unsigned int inet_frag_rbtree_purge(struct rb_root *root, - enum skb_drop_reason reason) +static unsigned int +inet_frag_rbtree_purge(struct rb_root *root, enum skb_drop_reason reason) { struct rb_node *p = rb_first(root); unsigned int sum = 0; @@ -284,7 +284,16 @@ unsigned int inet_frag_rbtree_purge(struct rb_root *root, } return sum; } -EXPORT_SYMBOL(inet_frag_rbtree_purge); + +void inet_frag_queue_flush(struct inet_frag_queue *q, + enum skb_drop_reason reason) +{ + unsigned int sum; + + sum = inet_frag_rbtree_purge(&q->rb_fragments, reason); + sub_frag_mem_limit(q->fqdir, sum); +} +EXPORT_SYMBOL(inet_frag_queue_flush); void inet_frag_destroy(struct inet_frag_queue *q) { diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index d7bccdc9dc69..32f1c1a46ba7 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -240,14 +240,10 @@ static int ip_frag_too_far(struct ipq *qp) static int ip_frag_reinit(struct ipq *qp) { - unsigned int sum_truesize = 0; - if (!mod_timer_pending(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) return -ETIMEDOUT; - sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments, - SKB_DROP_REASON_FRAG_TOO_FAR); - sub_frag_mem_limit(qp->q.fqdir, sum_truesize); + inet_frag_queue_flush(&qp->q, SKB_DROP_REASON_FRAG_TOO_FAR); qp->q.flags = 0; qp->q.len = 0; -- cgit v1.2.3 From 006a5035b495dec008805df249f92c22c89c3d2e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 6 Dec 2025 17:09:41 -0800 Subject: inet: frags: flush pending skbs in fqdir_pre_exit() We have been seeing occasional deadlocks on pernet_ops_rwsem since September in NIPA. The stuck task was usually modprobe (often loading a driver like ipvlan), trying to take the lock as a Writer. lockdep does not track readers for rwsems so the read wasn't obvious from the reports. On closer inspection the Reader holding the lock was conntrack looping forever in nf_conntrack_cleanup_net_list(). Based on past experience with occasional NIPA crashes I looked thru the tests which run before the crash and noticed that the crash follows ip_defrag.sh. An immediate red flag. Scouring thru (de)fragmentation queues reveals skbs sitting around, holding conntrack references. The problem is that since conntrack depends on nf_defrag_ipv6, nf_defrag_ipv6 will load first. Since nf_defrag_ipv6 loads first its netns exit hooks run _after_ conntrack's netns exit hook. Flush all fragment queue SKBs during fqdir_pre_exit() to release conntrack references before conntrack cleanup runs. Also flush the queues in timer expiry handlers when they discover fqdir->dead is set, in case packet sneaks in while we're running the pre_exit flush. The commit under Fixes is not exactly the culprit, but I think previously the timer firing would eventually unblock the spinning conntrack. Fixes: d5dd88794a13 ("inet: fix various use-after-free in defrags units") Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251207010942.1672972-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/inet_frag.h | 13 +------------ include/net/ipv6_frag.h | 9 ++++++--- net/ipv4/inet_fragment.c | 36 ++++++++++++++++++++++++++++++++++++ net/ipv4/ip_fragment.c | 12 +++++++----- 4 files changed, 50 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 3ffaceee7bbc..365925c9d262 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -123,18 +123,7 @@ void inet_frags_fini(struct inet_frags *); int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); -static inline void fqdir_pre_exit(struct fqdir *fqdir) -{ - /* Prevent creation of new frags. - * Pairs with READ_ONCE() in inet_frag_find(). - */ - WRITE_ONCE(fqdir->high_thresh, 0); - - /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() - * and ip6frag_expire_frag_queue(). - */ - WRITE_ONCE(fqdir->dead, true); -} +void fqdir_pre_exit(struct fqdir *fqdir); void fqdir_exit(struct fqdir *fqdir); void inet_frag_kill(struct inet_frag_queue *q, int *refs); diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 38ef66826939..41d9fc6965f9 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -69,9 +69,6 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) int refs = 1; rcu_read_lock(); - /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ - if (READ_ONCE(fq->q.fqdir->dead)) - goto out_rcu_unlock; spin_lock(&fq->q.lock); if (fq->q.flags & INET_FRAG_COMPLETE) @@ -80,6 +77,12 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) fq->q.flags |= INET_FRAG_DROP; inet_frag_kill(&fq->q, &refs); + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) { + inet_frag_queue_flush(&fq->q, 0); + goto out; + } + dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) goto out; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 1bf969b5a1cb..001ee5c4d962 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -218,6 +218,41 @@ static int __init inet_frag_wq_init(void) pure_initcall(inet_frag_wq_init); +void fqdir_pre_exit(struct fqdir *fqdir) +{ + struct inet_frag_queue *fq; + struct rhashtable_iter hti; + + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); + + rhashtable_walk_enter(&fqdir->rhashtable, &hti); + rhashtable_walk_start(&hti); + + while ((fq = rhashtable_walk_next(&hti))) { + if (IS_ERR(fq)) { + if (PTR_ERR(fq) != -EAGAIN) + break; + continue; + } + spin_lock_bh(&fq->lock); + if (!(fq->flags & INET_FRAG_COMPLETE)) + inet_frag_queue_flush(fq, 0); + spin_unlock_bh(&fq->lock); + } + + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); +} +EXPORT_SYMBOL(fqdir_pre_exit); + void fqdir_exit(struct fqdir *fqdir) { INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); @@ -290,6 +325,7 @@ void inet_frag_queue_flush(struct inet_frag_queue *q, { unsigned int sum; + reason = reason ?: SKB_DROP_REASON_FRAG_REASM_TIMEOUT; sum = inet_frag_rbtree_purge(&q->rb_fragments, reason); sub_frag_mem_limit(q->fqdir, sum); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 32f1c1a46ba7..56b0f738d2f2 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -134,11 +134,6 @@ static void ip_expire(struct timer_list *t) net = qp->q.fqdir->net; rcu_read_lock(); - - /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ - if (READ_ONCE(qp->q.fqdir->dead)) - goto out_rcu_unlock; - spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) @@ -146,6 +141,13 @@ static void ip_expire(struct timer_list *t) qp->q.flags |= INET_FRAG_DROP; inet_frag_kill(&qp->q, &refs); + + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(qp->q.fqdir->dead)) { + inet_frag_queue_flush(&qp->q, 0); + goto out; + } + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); -- cgit v1.2.3 From d9f514d3e6ee48c34d70d637479b4c9384832d4f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 23 Nov 2025 22:51:23 +0000 Subject: block: move around bio flagging helpers We'll need bio_flagged() earlier in bio.h for later patches, move it together with all related helpers, and mark the bio_flagged()'s bio argument as const. Signed-off-by: Pavel Begunkov Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index ad2d57908c1c..c75a9b3672aa 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -46,6 +46,21 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs) #define bio_data_dir(bio) \ (op_is_write(bio_op(bio)) ? WRITE : READ) +static inline bool bio_flagged(const struct bio *bio, unsigned int bit) +{ + return bio->bi_flags & (1U << bit); +} + +static inline void bio_set_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags |= (1U << bit); +} + +static inline void bio_clear_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags &= ~(1U << bit); +} + /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ @@ -225,21 +240,6 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count) atomic_set(&bio->__bi_cnt, count); } -static inline bool bio_flagged(struct bio *bio, unsigned int bit) -{ - return bio->bi_flags & (1U << bit); -} - -static inline void bio_set_flag(struct bio *bio, unsigned int bit) -{ - bio->bi_flags |= (1U << bit); -} - -static inline void bio_clear_flag(struct bio *bio, unsigned int bit) -{ - bio->bi_flags &= ~(1U << bit); -} - static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); -- cgit v1.2.3 From 8b62e64e6d30fa047b3aefb1a36e1f80c8acb3d2 Mon Sep 17 00:00:00 2001 From: Tal Zussman Date: Fri, 12 Dec 2025 04:08:07 -0500 Subject: x86/mm/tlb/trace: Export the TLB_REMOTE_WRONG_CPU enum in When the TLB_REMOTE_WRONG_CPU enum was introduced for the tlb_flush tracepoint, the enum was not exported to user-space. Add it to the appropriate macro definition to enable parsing by userspace tools, as per: Link: https://lore.kernel.org/all/20150403013802.220157513@goodmis.org [ mingo: Capitalize IPI, etc. ] Fixes: 2815a56e4b72 ("x86/mm/tlb: Add tracepoint for TLB flush IPI to stale CPU") Signed-off-by: Tal Zussman Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (Google) Reviewed-by: David Hildenbrand Reviewed-by: Rik van Riel Link: https://patch.msgid.link/20251212-tlb-trace-fix-v2-1-d322e0ad9b69@columbia.edu --- include/trace/events/tlb.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h index b4d8e7dc38f8..fb8369511685 100644 --- a/include/trace/events/tlb.h +++ b/include/trace/events/tlb.h @@ -12,8 +12,9 @@ EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \ EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \ EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \ - EM( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) \ - EMe( TLB_REMOTE_SEND_IPI, "remote ipi send" ) + EM( TLB_LOCAL_MM_SHOOTDOWN, "local MM shootdown" ) \ + EM( TLB_REMOTE_SEND_IPI, "remote IPI send" ) \ + EMe( TLB_REMOTE_WRONG_CPU, "remote wrong CPU" ) /* * First define the enums in TLB_FLUSH_REASON to be exported to userspace -- cgit v1.2.3 From 0c01ea92f545ca7fcafdda6a8e29b65ef3a5ec74 Mon Sep 17 00:00:00 2001 From: Tal Zussman Date: Fri, 12 Dec 2025 04:08:08 -0500 Subject: mm: Remove tlb_flush_reason::NR_TLB_FLUSH_REASONS from This has been unused since it was added 11 years ago in: d17d8f9dedb9 ("x86/mm: Add tracepoints for TLB flushes") Signed-off-by: Tal Zussman Signed-off-by: Ingo Molnar Reviewed-by: Rik van Riel Acked-by: David Hildenbrand Link: https://patch.msgid.link/20251212-tlb-trace-fix-v2-2-d322e0ad9b69@columbia.edu --- include/linux/mm_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9f6de068295d..42af2292951d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1631,7 +1631,6 @@ enum tlb_flush_reason { TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, TLB_REMOTE_WRONG_CPU, - NR_TLB_FLUSH_REASONS, }; /** -- cgit v1.2.3 From 8e1a1bc4f5a42747c08130b8242ebebd1210b32f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 7 Jul 2024 01:18:25 +0200 Subject: netfilter: nf_tables: avoid chain re-validation if possible Hamza Mahfooz reports cpu soft lock-ups in nft_chain_validate(): watchdog: BUG: soft lockup - CPU#1 stuck for 27s! [iptables-nft-re:37547] [..] RIP: 0010:nft_chain_validate+0xcb/0x110 [nf_tables] [..] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_table_validate+0x6b/0xb0 [nf_tables] nf_tables_validate+0x8b/0xa0 [nf_tables] nf_tables_commit+0x1df/0x1eb0 [nf_tables] [..] Currently nf_tables will traverse the entire table (chain graph), starting from the entry points (base chains), exploring all possible paths (chain jumps). But there are cases where we could avoid revalidation. Consider: 1 input -> j2 -> j3 2 input -> j2 -> j3 3 input -> j1 -> j2 -> j3 Then the second rule does not need to revalidate j2, and, by extension j3, because this was already checked during validation of the first rule. We need to validate it only for rule 3. This is needed because chain loop detection also ensures we do not exceed the jump stack: Just because we know that j2 is cycle free, its last jump might now exceed the allowed stack size. We also need to update all reachable chains with the new largest observed call depth. Care has to be taken to revalidate even if the chain depth won't be an issue: chain validation also ensures that expressions are not called from invalid base chains. For example, the masquerade expression can only be called from NAT postrouting base chains. Therefore we also need to keep record of the base chain context (type, hooknum) and revalidate if the chain becomes reachable from a different hook location. Reported-by: Hamza Mahfooz Closes: https://lore.kernel.org/netfilter-devel/20251118221735.GA5477@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/ Tested-by: Hamza Mahfooz Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 34 ++++++++++++++----- net/netfilter/nf_tables_api.c | 69 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 91 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index fab7dc73f738..0e266c2d0e7f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1091,6 +1091,29 @@ struct nft_rule_blob { __attribute__((aligned(__alignof__(struct nft_rule_dp)))); }; +enum nft_chain_types { + NFT_CHAIN_T_DEFAULT = 0, + NFT_CHAIN_T_ROUTE, + NFT_CHAIN_T_NAT, + NFT_CHAIN_T_MAX +}; + +/** + * struct nft_chain_validate_state - validation state + * + * If a chain is encountered again during table validation it is + * possible to avoid revalidation provided the calling context is + * compatible. This structure stores relevant calling context of + * previous validations. + * + * @hook_mask: the hook numbers and locations the chain is linked to + * @depth: the deepest call chain level the chain is linked to + */ +struct nft_chain_validate_state { + u8 hook_mask[NFT_CHAIN_T_MAX]; + u8 depth; +}; + /** * struct nft_chain - nf_tables chain * @@ -1109,6 +1132,7 @@ struct nft_rule_blob { * @udlen: user data length * @udata: user data in the chain * @blob_next: rule blob pointer to the next in the chain + * @vstate: validation state */ struct nft_chain { struct nft_rule_blob __rcu *blob_gen_0; @@ -1128,9 +1152,10 @@ struct nft_chain { /* Only used during control plane commit phase: */ struct nft_rule_blob *blob_next; + struct nft_chain_validate_state vstate; }; -int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); +int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain); int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv); @@ -1138,13 +1163,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); -enum nft_chain_types { - NFT_CHAIN_T_DEFAULT = 0, - NFT_CHAIN_T_ROUTE, - NFT_CHAIN_T_NAT, - NFT_CHAIN_T_MAX -}; - /** * struct nft_chain_type - nf_tables chain type info * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c46b1bb0efe0..a9f6babcc781 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -123,6 +123,29 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s table->validate_state = new_validate_state; } + +static bool nft_chain_vstate_valid(const struct nft_ctx *ctx, + const struct nft_chain *chain) +{ + const struct nft_base_chain *base_chain; + enum nft_chain_types type; + u8 hooknum; + + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) + return false; + + base_chain = nft_base_chain(ctx->chain); + hooknum = base_chain->ops.hooknum; + type = base_chain->type->type; + + /* chain is already validated for this call depth */ + if (chain->vstate.depth >= ctx->level && + chain->vstate.hook_mask[type] & BIT(hooknum)) + return true; + + return false; +} + static void nf_tables_trans_destroy_work(struct work_struct *w); static void nft_trans_gc_work(struct work_struct *work); @@ -4079,6 +4102,29 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r nf_tables_rule_destroy(ctx, rule); } +static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + const struct nft_base_chain *base_chain; + enum nft_chain_types type; + u8 hooknum; + + /* ctx->chain must hold the calling base chain. */ + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) { + memset(&chain->vstate, 0, sizeof(chain->vstate)); + return; + } + + base_chain = nft_base_chain(ctx->chain); + hooknum = base_chain->ops.hooknum; + type = base_chain->type->type; + + BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX); + + chain->vstate.hook_mask[type] |= BIT(hooknum); + if (chain->vstate.depth < ctx->level) + chain->vstate.depth = ctx->level; +} + /** nft_chain_validate - loop detection and hook validation * * @ctx: context containing call depth and base chain @@ -4088,15 +4134,25 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r * and set lookups until either the jump limit is hit or all reachable * chains have been validated. */ -int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) +int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain) { struct nft_expr *expr, *last; struct nft_rule *rule; int err; + BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255); if (ctx->level == NFT_JUMP_STACK_SIZE) return -EMLINK; + if (ctx->level > 0) { + /* jumps to base chains are not allowed. */ + if (nft_is_base_chain(chain)) + return -ELOOP; + + if (nft_chain_vstate_valid(ctx, chain)) + return 0; + } + list_for_each_entry(rule, &chain->rules, list) { if (fatal_signal_pending(current)) return -EINTR; @@ -4117,6 +4173,7 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) } } + nft_chain_vstate_update(ctx, chain); return 0; } EXPORT_SYMBOL_GPL(nft_chain_validate); @@ -4128,7 +4185,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) .net = net, .family = table->family, }; - int err; + int err = 0; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain)) @@ -4137,12 +4194,16 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) ctx.chain = chain; err = nft_chain_validate(&ctx, chain); if (err < 0) - return err; + goto err; cond_resched(); } - return 0; +err: + list_for_each_entry(chain, &table->chains, list) + memset(&chain->vstate, 0, sizeof(chain->vstate)); + + return err; } int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, -- cgit v1.2.3 From c8f3c9fa75ff3822b56b47d5cfa0aaa484040ea8 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 15 Dec 2025 12:10:35 +0200 Subject: ASoC: soc-acpi / SOF: Add best_effort flag to get_function_tplg_files op When there is no fallback possibility available for the function topology use it is better to try to create a profile for the card in best effort manner, leaving out non supported links for example. As an example: some laptops present SSPx-BT link but we don't have fragment yet to support this. If we only have support for functional topology without monolithic fallback then we would fail the card creation. The reason why the monolithic topology works on the same device is that it does not have the SSPx-BT link handled, it is ignored. In case when there is no fallback possibility we should try to create the card with links that we support as best effort instead of failing and leaving the user without a card. Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Bard Liao Link: https://patch.msgid.link/20251215101036.9370-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 5 ++++- sound/soc/intel/common/sof-function-topology-lib.c | 5 ++++- sound/soc/intel/common/sof-function-topology-lib.h | 2 +- sound/soc/sof/topology.c | 18 +++++++++++++++++- 4 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index 90d73b9bddab..0519afd7217f 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -203,6 +203,8 @@ struct snd_soc_acpi_link_adr { * @mach: the pointer of the machine driver * @prefix: the prefix of the topology file name. Typically, it is the path. * @tplg_files: the pointer of the array of the topology file names. + * @best_effort: ignore non supported links and try to build the card in best effort + * with supported links */ /* Descriptor for SST ASoC machine driver */ struct snd_soc_acpi_mach { @@ -224,7 +226,8 @@ struct snd_soc_acpi_mach { const u32 tplg_quirk_mask; int (*get_function_tplg_files)(struct snd_soc_card *card, const struct snd_soc_acpi_mach *mach, - const char *prefix, const char ***tplg_files); + const char *prefix, const char ***tplg_files, + bool best_effort); }; #define SND_SOC_ACPI_MAX_CODECS 3 diff --git a/sound/soc/intel/common/sof-function-topology-lib.c b/sound/soc/intel/common/sof-function-topology-lib.c index b10d4794159a..0daa7d83808b 100644 --- a/sound/soc/intel/common/sof-function-topology-lib.c +++ b/sound/soc/intel/common/sof-function-topology-lib.c @@ -28,7 +28,7 @@ enum tplg_device_id { #define SOF_INTEL_PLATFORM_NAME_MAX 4 int sof_sdw_get_tplg_files(struct snd_soc_card *card, const struct snd_soc_acpi_mach *mach, - const char *prefix, const char ***tplg_files) + const char *prefix, const char ***tplg_files, bool best_effort) { struct snd_soc_acpi_mach_params mach_params = mach->mach_params; struct snd_soc_dai_link *dai_link; @@ -87,6 +87,9 @@ int sof_sdw_get_tplg_files(struct snd_soc_card *card, const struct snd_soc_acpi_ dev_dbg(card->dev, "dai_link %s is not supported by separated tplg yet\n", dai_link->name); + if (best_effort) + continue; + return 0; } if (tplg_mask & BIT(tplg_dev)) diff --git a/sound/soc/intel/common/sof-function-topology-lib.h b/sound/soc/intel/common/sof-function-topology-lib.h index e7d0c39d0788..f358f8c52d78 100644 --- a/sound/soc/intel/common/sof-function-topology-lib.h +++ b/sound/soc/intel/common/sof-function-topology-lib.h @@ -10,6 +10,6 @@ #define _SND_SOC_ACPI_INTEL_GET_TPLG_H int sof_sdw_get_tplg_files(struct snd_soc_card *card, const struct snd_soc_acpi_mach *mach, - const char *prefix, const char ***tplg_files); + const char *prefix, const char ***tplg_files, bool best_effort); #endif diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index c1083ea4624a..c76545e70860 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -2506,12 +2506,28 @@ int snd_sof_load_topology(struct snd_soc_component *scomp, const char *file) if (!tplg_files) return -ENOMEM; + /* Try to use function topologies if possible */ if (!sof_pdata->disable_function_topology && !disable_function_topology && sof_pdata->machine && sof_pdata->machine->get_function_tplg_files) { + /* + * When the topology name contains 'dummy' word, it means that + * there is no fallback option to monolithic topology in case + * any of the function topologies might be missing. + * In this case we should use best effort to form the card, + * ignoring functionalities that we are missing a fragment for. + * + * Note: monolithic topologies also ignore these possibly + * missing functions, so the functionality of the card would be + * identical to the case if there would be a fallback monolithic + * topology created for the configuration. + */ + bool no_fallback = strstr(file, "dummy"); + tplg_cnt = sof_pdata->machine->get_function_tplg_files(scomp->card, sof_pdata->machine, tplg_filename_prefix, - &tplg_files); + &tplg_files, + no_fallback); if (tplg_cnt < 0) { kfree(tplg_files); return tplg_cnt; -- cgit v1.2.3 From ed61378b4dc63efe76cb8c23a36b228043332da3 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Dec 2025 09:05:48 -0500 Subject: iomap: replace folio_batch allocation with stack allocation Zhang Yi points out that the dynamic folio_batch allocation in iomap_fill_dirty_folios() is problematic for the ext4 on iomap work that is under development because it doesn't sufficiently handle the allocation failure case (by allowing a retry, for example). We've also seen lockdep (via syzbot) complain recently about the scope of the allocation. The dynamic allocation was initially added for simplicity and to help indicate whether the batch was used or not by the calling fs. To address these issues, put the batch on the stack of iomap_zero_range() and use a flag to control whether the batch should be used in the iomap folio lookup path. This keeps things simple and eliminates allocation issues with lockdep and for ext4 on iomap. While here, also clean up the fill helper signature to be more consistent with the underlying filemap helper. Pass through the return value of the filemap helper (folio count) and update the lookup offset via an out param. Fixes: 395ed1ef0012 ("iomap: optional zero range dirty folio processing") Signed-off-by: Brian Foster Link: https://patch.msgid.link/20251208140548.373411-1-bfoster@redhat.com Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 50 +++++++++++++++++++++++++++++++++++--------------- fs/iomap/iter.c | 6 +++--- fs/xfs/xfs_iomap.c | 11 ++++++----- include/linux/iomap.h | 8 ++++++-- 4 files changed, 50 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index e5c1ca440d93..fd9a2cf95620 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -832,7 +832,7 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, if (!mapping_large_folio_support(iter->inode->i_mapping)) len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); - if (iter->fbatch) { + if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) { struct folio *folio = folio_batch_next(iter->fbatch); if (!folio) @@ -929,7 +929,7 @@ static int iomap_write_begin(struct iomap_iter *iter, * process so return and let the caller iterate and refill the batch. */ if (!folio) { - WARN_ON_ONCE(!iter->fbatch); + WARN_ON_ONCE(!(iter->iomap.flags & IOMAP_F_FOLIO_BATCH)); return 0; } @@ -1544,23 +1544,39 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, return status; } -loff_t +/** + * iomap_fill_dirty_folios - fill a folio batch with dirty folios + * @iter: Iteration structure + * @start: Start offset of range. Updated based on lookup progress. + * @end: End offset of range + * @iomap_flags: Flags to set on the associated iomap to track the batch. + * + * Returns the folio count directly. Also returns the associated control flag if + * the the batch lookup is performed and the expected offset of a subsequent + * lookup via out params. The caller is responsible to set the flag on the + * associated iomap. + */ +unsigned int iomap_fill_dirty_folios( struct iomap_iter *iter, - loff_t offset, - loff_t length) + loff_t *start, + loff_t end, + unsigned int *iomap_flags) { struct address_space *mapping = iter->inode->i_mapping; - pgoff_t start = offset >> PAGE_SHIFT; - pgoff_t end = (offset + length - 1) >> PAGE_SHIFT; + pgoff_t pstart = *start >> PAGE_SHIFT; + pgoff_t pend = (end - 1) >> PAGE_SHIFT; + unsigned int count; - iter->fbatch = kmalloc(sizeof(struct folio_batch), GFP_KERNEL); - if (!iter->fbatch) - return offset + length; - folio_batch_init(iter->fbatch); + if (!iter->fbatch) { + *start = end; + return 0; + } - filemap_get_folios_dirty(mapping, &start, end, iter->fbatch); - return (start << PAGE_SHIFT); + count = filemap_get_folios_dirty(mapping, &pstart, pend, iter->fbatch); + *start = (pstart << PAGE_SHIFT); + *iomap_flags |= IOMAP_F_FOLIO_BATCH; + return count; } EXPORT_SYMBOL_GPL(iomap_fill_dirty_folios); @@ -1569,17 +1585,21 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private) { + struct folio_batch fbatch; struct iomap_iter iter = { .inode = inode, .pos = pos, .len = len, .flags = IOMAP_ZERO, .private = private, + .fbatch = &fbatch, }; struct address_space *mapping = inode->i_mapping; int ret; bool range_dirty; + folio_batch_init(&fbatch); + /* * To avoid an unconditional flush, check pagecache state and only flush * if dirty and the fs returns a mapping that might convert on @@ -1590,11 +1610,11 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, while ((ret = iomap_iter(&iter, ops)) > 0) { const struct iomap *srcmap = iomap_iter_srcmap(&iter); - if (WARN_ON_ONCE(iter.fbatch && + if (WARN_ON_ONCE((iter.iomap.flags & IOMAP_F_FOLIO_BATCH) && srcmap->type != IOMAP_UNWRITTEN)) return -EIO; - if (!iter.fbatch && + if (!(iter.iomap.flags & IOMAP_F_FOLIO_BATCH) && (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)) { s64 status; diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c index 8692e5e41c6d..c04796f6e57f 100644 --- a/fs/iomap/iter.c +++ b/fs/iomap/iter.c @@ -8,10 +8,10 @@ static inline void iomap_iter_reset_iomap(struct iomap_iter *iter) { - if (iter->fbatch) { + if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) { folio_batch_release(iter->fbatch); - kfree(iter->fbatch); - iter->fbatch = NULL; + folio_batch_reinit(iter->fbatch); + iter->iomap.flags &= ~IOMAP_F_FOLIO_BATCH; } iter->status = 0; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 04f39ea15898..37a1b33e9045 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1831,7 +1831,6 @@ xfs_buffered_write_iomap_begin( */ if (flags & IOMAP_ZERO) { xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); - u64 end; if (isnullstartblock(imap.br_startblock) && offset_fsb >= eof_fsb) @@ -1851,12 +1850,14 @@ xfs_buffered_write_iomap_begin( */ if (imap.br_state == XFS_EXT_UNWRITTEN && offset_fsb < eof_fsb) { - loff_t len = min(count, - XFS_FSB_TO_B(mp, imap.br_blockcount)); + loff_t foffset = offset, fend; - end = iomap_fill_dirty_folios(iter, offset, len); + fend = offset + + min(count, XFS_FSB_TO_B(mp, imap.br_blockcount)); + iomap_fill_dirty_folios(iter, &foffset, fend, + &iomap_flags); end_fsb = min_t(xfs_fileoff_t, end_fsb, - XFS_B_TO_FSB(mp, end)); + XFS_B_TO_FSB(mp, foffset)); } xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 520e967cb501..6bb941707d12 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -88,6 +88,9 @@ struct vm_fault; /* * Flags set by the core iomap code during operations: * + * IOMAP_F_FOLIO_BATCH indicates that the folio batch mechanism is active + * for this operation, set by iomap_fill_dirty_folios(). + * * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size * has changed as the result of this write operation. * @@ -95,6 +98,7 @@ struct vm_fault; * range it covers needs to be remapped by the high level before the operation * can proceed. */ +#define IOMAP_F_FOLIO_BATCH (1U << 13) #define IOMAP_F_SIZE_CHANGED (1U << 14) #define IOMAP_F_STALE (1U << 15) @@ -352,8 +356,8 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops); -loff_t iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t offset, - loff_t length); +unsigned int iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t *start, + loff_t end, unsigned int *iomap_flags); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private); -- cgit v1.2.3 From 12965a190eaea614bb49e22041e8fc0d03d0310f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 4 Dec 2025 08:48:33 -0500 Subject: filelock: allow lease_managers to dictate what qualifies as a conflict Requesting a delegation on a file from the userland fcntl() interface currently succeeds when there are conflicting opens present. This is because the lease handling code ignores conflicting opens for FL_LAYOUT and FL_DELEG leases. This was a hack put in place long ago, because nfsd already checks for conflicts in its own way. The kernel needs to perform this check for userland delegations the same way it is done for leases, however. Make this dependent on the lease_manager by adding a new ->lm_open_conflict() lease_manager operation and have generic_add_lease() call that instead of check_conflicting_open(). Morph check_conflicting_open() into a ->lm_open_conflict() op that is only called for userland leases/delegations. Set the ->lm_open_conflict() operations for nfsd to trivial functions that always return 0. Reviewed-by: Chuck Lever Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251204-dir-deleg-ro-v2-2-22d37f92ce2c@kernel.org Signed-off-by: Christian Brauner --- Documentation/filesystems/locking.rst | 1 + fs/locks.c | 90 ++++++++++++++++------------------- fs/nfsd/nfs4layouts.c | 23 ++++++++- fs/nfsd/nfs4state.c | 19 ++++++++ include/linux/filelock.h | 1 + 5 files changed, 84 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 77704fde9845..04c7691e50e0 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -416,6 +416,7 @@ lm_change yes no no lm_breaker_owns_lease: yes no no lm_lock_expirable yes no no lm_expire_lock no no yes +lm_open_conflict yes no no ====================== ============= ================= ========= buffer_head diff --git a/fs/locks.c b/fs/locks.c index be0b79286da8..e75c8084d937 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -585,10 +585,50 @@ lease_setup(struct file_lease *fl, void **priv) __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0); } +/** + * lease_open_conflict - see if the given file points to an inode that has + * an existing open that would conflict with the + * desired lease. + * @filp: file to check + * @arg: type of lease that we're trying to acquire + * + * Check to see if there's an existing open fd on this file that would + * conflict with the lease we're trying to set. + */ +static int +lease_open_conflict(struct file *filp, const int arg) +{ + struct inode *inode = file_inode(filp); + int self_wcount = 0, self_rcount = 0; + + if (arg == F_RDLCK) + return inode_is_open_for_write(inode) ? -EAGAIN : 0; + else if (arg != F_WRLCK) + return 0; + + /* + * Make sure that only read/write count is from lease requestor. + * Note that this will result in denying write leases when i_writecount + * is negative, which is what we want. (We shouldn't grant write leases + * on files open for execution.) + */ + if (filp->f_mode & FMODE_WRITE) + self_wcount = 1; + else if (filp->f_mode & FMODE_READ) + self_rcount = 1; + + if (atomic_read(&inode->i_writecount) != self_wcount || + atomic_read(&inode->i_readcount) != self_rcount) + return -EAGAIN; + + return 0; +} + static const struct lease_manager_operations lease_manager_ops = { .lm_break = lease_break_callback, .lm_change = lease_modify, .lm_setup = lease_setup, + .lm_open_conflict = lease_open_conflict, }; /* @@ -1754,52 +1794,6 @@ int fcntl_getdeleg(struct file *filp, struct delegation *deleg) return 0; } -/** - * check_conflicting_open - see if the given file points to an inode that has - * an existing open that would conflict with the - * desired lease. - * @filp: file to check - * @arg: type of lease that we're trying to acquire - * @flags: current lock flags - * - * Check to see if there's an existing open fd on this file that would - * conflict with the lease we're trying to set. - */ -static int -check_conflicting_open(struct file *filp, const int arg, int flags) -{ - struct inode *inode = file_inode(filp); - int self_wcount = 0, self_rcount = 0; - - if (flags & FL_LAYOUT) - return 0; - if (flags & FL_DELEG) - /* We leave these checks to the caller */ - return 0; - - if (arg == F_RDLCK) - return inode_is_open_for_write(inode) ? -EAGAIN : 0; - else if (arg != F_WRLCK) - return 0; - - /* - * Make sure that only read/write count is from lease requestor. - * Note that this will result in denying write leases when i_writecount - * is negative, which is what we want. (We shouldn't grant write leases - * on files open for execution.) - */ - if (filp->f_mode & FMODE_WRITE) - self_wcount = 1; - else if (filp->f_mode & FMODE_READ) - self_rcount = 1; - - if (atomic_read(&inode->i_writecount) != self_wcount || - atomic_read(&inode->i_readcount) != self_rcount) - return -EAGAIN; - - return 0; -} - static int generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **priv) { @@ -1836,7 +1830,7 @@ generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **pr percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); - error = check_conflicting_open(filp, arg, lease->c.flc_flags); + error = lease->fl_lmops->lm_open_conflict(filp, arg); if (error) goto out; @@ -1893,7 +1887,7 @@ generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **pr * precedes these checks. */ smp_mb(); - error = check_conflicting_open(filp, arg, lease->c.flc_flags); + error = lease->fl_lmops->lm_open_conflict(filp, arg); if (error) { locks_unlink_lock_ctx(&lease->c); goto out; diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 683bd1130afe..ad7af8cfcf1f 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -764,9 +764,28 @@ nfsd4_layout_lm_change(struct file_lease *onlist, int arg, return lease_modify(onlist, arg, dispose); } +/** + * nfsd4_layout_lm_open_conflict - see if the given file points to an inode that has + * an existing open that would conflict with the + * desired lease. + * @filp: file to check + * @arg: type of lease that we're trying to acquire + * + * The kernel will call into this operation to determine whether there + * are conflicting opens that may prevent the layout from being granted. + * For nfsd, that check is done at a higher level, so this trivially + * returns 0. + */ +static int +nfsd4_layout_lm_open_conflict(struct file *filp, int arg) +{ + return 0; +} + static const struct lease_manager_operations nfsd4_layouts_lm_ops = { - .lm_break = nfsd4_layout_lm_break, - .lm_change = nfsd4_layout_lm_change, + .lm_break = nfsd4_layout_lm_break, + .lm_change = nfsd4_layout_lm_change, + .lm_open_conflict = nfsd4_layout_lm_open_conflict, }; int diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 808c24fb5c9a..19d6d6db107f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5552,10 +5552,29 @@ nfsd_change_deleg_cb(struct file_lease *onlist, int arg, return -EAGAIN; } +/** + * nfsd4_deleg_lm_open_conflict - see if the given file points to an inode that has + * an existing open that would conflict with the + * desired lease. + * @filp: file to check + * @arg: type of lease that we're trying to acquire + * + * The kernel will call into this operation to determine whether there + * are conflicting opens that may prevent the deleg from being granted. + * For nfsd, that check is done at a higher level, so this trivially + * returns 0. + */ +static int +nfsd4_deleg_lm_open_conflict(struct file *filp, int arg) +{ + return 0; +} + static const struct lease_manager_operations nfsd_lease_mng_ops = { .lm_breaker_owns_lease = nfsd_breaker_owns_lease, .lm_break = nfsd_break_deleg_cb, .lm_change = nfsd_change_deleg_cb, + .lm_open_conflict = nfsd4_deleg_lm_open_conflict, }; static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid) diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 54b824c05299..2f5e5588ee07 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -49,6 +49,7 @@ struct lease_manager_operations { int (*lm_change)(struct file_lease *, int, struct list_head *); void (*lm_setup)(struct file_lease *, void **); bool (*lm_breaker_owns_lease)(struct file_lease *); + int (*lm_open_conflict)(struct file *, int); }; struct lock_manager { -- cgit v1.2.3 From e1b4c6a58304fd490124cc2b454d80edc786665c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Dec 2025 17:50:23 -0500 Subject: shmem: fix recovery on rename failures maple_tree insertions can fail if we are seriously short on memory; simple_offset_rename() does not recover well if it runs into that. The same goes for simple_offset_rename_exchange(). Moreover, shmem_whiteout() expects that if it succeeds, the caller will progress to d_move(), i.e. that shmem_rename2() won't fail past the successful call of shmem_whiteout(). Not hard to fix, fortunately - mtree_store() can't fail if the index we are trying to store into is already present in the tree as a singleton. For simple_offset_rename_exchange() that's enough - we just need to be careful about the order of operations. For simple_offset_rename() solution is to preinsert the target into the tree for new_dir; the rest can be done without any potentially failing operations. That preinsertion has to be done in shmem_rename2() rather than in simple_offset_rename() itself - otherwise we'd need to deal with the possibility of failure after successful shmem_whiteout(). Fixes: a2e459555c5f ("shmem: stable directory offsets") Reviewed-by: Christian Brauner Reviewed-by: Chuck Lever Signed-off-by: Al Viro --- fs/libfs.c | 50 +++++++++++++++++++++----------------------------- include/linux/fs.h | 2 +- mm/shmem.c | 18 +++++++++++++----- 3 files changed, 35 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/fs/libfs.c b/fs/libfs.c index 9264523be85c..591eb649ebba 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -346,22 +346,22 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry) * User space expects the directory offset value of the replaced * (new) directory entry to be unchanged after a rename. * - * Returns zero on success, a negative errno value on failure. + * Caller must have grabbed a slot for new_dentry in the maple_tree + * associated with new_dir, even if dentry is negative. */ -int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +void simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); long new_offset = dentry2offset(new_dentry); - simple_offset_remove(old_ctx, old_dentry); + if (WARN_ON(!new_offset)) + return; - if (new_offset) { - offset_set(new_dentry, 0); - return simple_offset_replace(new_ctx, old_dentry, new_offset); - } - return simple_offset_add(new_ctx, old_dentry); + simple_offset_remove(old_ctx, old_dentry); + offset_set(new_dentry, 0); + WARN_ON(simple_offset_replace(new_ctx, old_dentry, new_offset)); } /** @@ -388,31 +388,23 @@ int simple_offset_rename_exchange(struct inode *old_dir, long new_index = dentry2offset(new_dentry); int ret; - simple_offset_remove(old_ctx, old_dentry); - simple_offset_remove(new_ctx, new_dentry); + if (WARN_ON(!old_index || !new_index)) + return -EINVAL; - ret = simple_offset_replace(new_ctx, old_dentry, new_index); - if (ret) - goto out_restore; + ret = mtree_store(&new_ctx->mt, new_index, old_dentry, GFP_KERNEL); + if (WARN_ON(ret)) + return ret; - ret = simple_offset_replace(old_ctx, new_dentry, old_index); - if (ret) { - simple_offset_remove(new_ctx, old_dentry); - goto out_restore; + ret = mtree_store(&old_ctx->mt, old_index, new_dentry, GFP_KERNEL); + if (WARN_ON(ret)) { + mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL); + return ret; } - ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); - if (ret) { - simple_offset_remove(new_ctx, old_dentry); - simple_offset_remove(old_ctx, new_dentry); - goto out_restore; - } + offset_set(old_dentry, new_index); + offset_set(new_dentry, old_index); + simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); return 0; - -out_restore: - (void)simple_offset_replace(old_ctx, old_dentry, old_index); - (void)simple_offset_replace(new_ctx, new_dentry, new_index); - return ret; } /** diff --git a/include/linux/fs.h b/include/linux/fs.h index 04ceeca12a0d..f5c9cf28c4dc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3247,7 +3247,7 @@ struct offset_ctx { void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); -int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, +void simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, diff --git a/mm/shmem.c b/mm/shmem.c index d3edc809e2e7..a9666b0599a4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4038,6 +4038,7 @@ static int shmem_rename2(struct mnt_idmap *idmap, { struct inode *inode = d_inode(old_dentry); int they_are_dirs = S_ISDIR(inode->i_mode); + bool had_offset = false; int error; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) @@ -4050,16 +4051,23 @@ static int shmem_rename2(struct mnt_idmap *idmap, if (!simple_empty(new_dentry)) return -ENOTEMPTY; + error = simple_offset_add(shmem_get_offset_ctx(new_dir), new_dentry); + if (error == -EBUSY) + had_offset = true; + else if (unlikely(error)) + return error; + if (flags & RENAME_WHITEOUT) { error = shmem_whiteout(idmap, old_dir, old_dentry); - if (error) + if (error) { + if (!had_offset) + simple_offset_remove(shmem_get_offset_ctx(new_dir), + new_dentry); return error; + } } - error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); - if (error) - return error; - + simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); if (d_really_is_positive(new_dentry)) { (void) shmem_unlink(new_dir, new_dentry); if (they_are_dirs) { -- cgit v1.2.3 From a75a1dec037ff3de863375fa3a74569619667184 Mon Sep 17 00:00:00 2001 From: Ahmed Naseef Date: Tue, 9 Dec 2025 11:16:02 +0400 Subject: mtd: spinand: add support for Dosilicon DS35Q1GA/DS35M1GA Add support for Dosilicon DS35Q1GA (3.3V) and DS35M1GA (1.8V) SPI NAND. These are 1Gbit (128MB) devices with: - 2048 byte pages + 64 byte OOB - 64 pages per block, 1024 blocks - On-die 4-bit ECC per 512 byte sector The 64-byte OOB area is divided into 4 segments of 16 bytes, with each segment containing 8 bytes of user data (M2+M1) and 8 bytes of ECC parity (R1). This provides 30 bytes of usable OOB space after reserving 2 bytes for the bad block marker. Tested on Genexis Platinum 4410 (EcoNet EN751221) by writing known patterns to OOB and verifying ECC parity placement in R1 regions. Datasheet: https://www.dosilicon.com/resources/SPI%20NAND/DS35X1GAXXX_rev08.pdf Signed-off-by: Ahmed Naseef Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/Makefile | 4 +- drivers/mtd/nand/spi/core.c | 1 + drivers/mtd/nand/spi/dosilicon.c | 91 ++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 1 + 4 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 drivers/mtd/nand/spi/dosilicon.c (limited to 'include') diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile index 6d3d203df048..a47bd22cd309 100644 --- a/drivers/mtd/nand/spi/Makefile +++ b/drivers/mtd/nand/spi/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 spinand-objs := core.o otp.o -spinand-objs += alliancememory.o ato.o esmt.o fmsh.o foresee.o gigadevice.o macronix.o -spinand-objs += micron.o paragon.o skyhigh.o toshiba.o winbond.o xtx.o +spinand-objs += alliancememory.o ato.o dosilicon.o esmt.o fmsh.o foresee.o gigadevice.o +spinand-objs += macronix.o micron.o paragon.o skyhigh.o toshiba.o winbond.o xtx.o obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index d207286572d8..0346916b032b 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1227,6 +1227,7 @@ static const struct nand_ops spinand_ops = { static const struct spinand_manufacturer *spinand_manufacturers[] = { &alliancememory_spinand_manufacturer, &ato_spinand_manufacturer, + &dosilicon_spinand_manufacturer, &esmt_8c_spinand_manufacturer, &esmt_c8_spinand_manufacturer, &fmsh_spinand_manufacturer, diff --git a/drivers/mtd/nand/spi/dosilicon.c b/drivers/mtd/nand/spi/dosilicon.c new file mode 100644 index 000000000000..f99899866ceb --- /dev/null +++ b/drivers/mtd/nand/spi/dosilicon.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Ahmed Naseef + */ + +#include +#include +#include + +#define SPINAND_MFR_DOSILICON 0xE5 + +static SPINAND_OP_VARIANTS(read_cache_variants, + SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(0, 1, NULL, 0, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(0, 1, NULL, 0, 0), + SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(0, 1, NULL, 0, 0)); + +static SPINAND_OP_VARIANTS(write_cache_variants, + SPINAND_PROG_LOAD_1S_1S_4S_OP(true, 0, NULL, 0), + SPINAND_PROG_LOAD_1S_1S_1S_OP(true, 0, NULL, 0)); + +static SPINAND_OP_VARIANTS(update_cache_variants, + SPINAND_PROG_LOAD_1S_1S_4S_OP(false, 0, NULL, 0), + SPINAND_PROG_LOAD_1S_1S_1S_OP(false, 0, NULL, 0)); + +static int ds35xx_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 3) + return -ERANGE; + + region->offset = 8 + (section * 16); + region->length = 8; + + return 0; +} + +static int ds35xx_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 3) + return -ERANGE; + + if (section == 0) { + /* reserve 2 bytes for the BBM */ + region->offset = 2; + region->length = 6; + } else { + region->offset = section * 16; + region->length = 8; + } + + return 0; +} + +static const struct mtd_ooblayout_ops ds35xx_ooblayout = { + .ecc = ds35xx_ooblayout_ecc, + .free = ds35xx_ooblayout_free, +}; + +static const struct spinand_info dosilicon_spinand_table[] = { + SPINAND_INFO("DS35Q1GA", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x71), + NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1), + NAND_ECCREQ(4, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_HAS_QE_BIT, + SPINAND_ECCINFO(&ds35xx_ooblayout, NULL)), + SPINAND_INFO("DS35M1GA", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x21), + NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1), + NAND_ECCREQ(4, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_HAS_QE_BIT, + SPINAND_ECCINFO(&ds35xx_ooblayout, NULL)), +}; + +static const struct spinand_manufacturer_ops dosilicon_spinand_manuf_ops = { +}; + +const struct spinand_manufacturer dosilicon_spinand_manufacturer = { + .id = SPINAND_MFR_DOSILICON, + .name = "Dosilicon", + .chips = dosilicon_spinand_table, + .nchips = ARRAY_SIZE(dosilicon_spinand_table), + .ops = &dosilicon_spinand_manuf_ops, +}; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index ce76f5c632e1..c50a43b447d2 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -354,6 +354,7 @@ struct spinand_manufacturer { /* SPI NAND manufacturers */ extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; +extern const struct spinand_manufacturer dosilicon_spinand_manufacturer; extern const struct spinand_manufacturer esmt_8c_spinand_manufacturer; extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; extern const struct spinand_manufacturer fmsh_spinand_manufacturer; -- cgit v1.2.3 From f157dd661339fc6f5f2b574fe2429c43bd309534 Mon Sep 17 00:00:00 2001 From: Miquel Sabaté Solà Date: Tue, 21 Oct 2025 11:11:25 +0200 Subject: btrfs: fix NULL dereference on root when tracing inode eviction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When evicting an inode the first thing we do is to setup tracing for it, which implies fetching the root's id. But in btrfs_evict_inode() the root might be NULL, as implied in the next check that we do in btrfs_evict_inode(). Hence, we either should set the ->root_objectid to 0 in case the root is NULL, or we move tracing setup after checking that the root is not NULL. Setting the rootid to 0 at least gives us the possibility to trace this call even in the case when the root is NULL, so that's the solution taken here. Fixes: 1abe9b8a138c ("Btrfs: add initial tracepoint support for btrfs") Reported-by: syzbot+d991fea1b4b23b1f6bf8@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d991fea1b4b23b1f6bf8 Signed-off-by: Miquel Sabaté Solà Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 7e418f065b94..125bdc166bfe 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -224,7 +224,8 @@ DECLARE_EVENT_CLASS(btrfs__inode, __entry->generation = BTRFS_I(inode)->generation; __entry->last_trans = BTRFS_I(inode)->last_trans; __entry->logged_trans = BTRFS_I(inode)->logged_trans; - __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); + __entry->root_objectid = BTRFS_I(inode)->root ? + btrfs_root_id(BTRFS_I(inode)->root) : 0; ), TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%llu blocks=%llu " -- cgit v1.2.3 From 2dc675f614850b80deab7cf6d12902636ed8a7f4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Dec 2025 14:33:05 +0100 Subject: RDMA/ucma: Fix rdma_ucm_query_ib_service_resp struct padding On a few 32-bit architectures, the newly added ib_user_service_rec structure is not 64-bit aligned the way it is on most regular ones. Add explicit padding into the rdma_ucm_query_ib_service_resp and rdma_ucm_resolve_ib_service structures that embed it, so that the layout is compatible across all of them. This is an ABI change on i386, aligning it with x86_64 and the other 64-bit architectures to avoid having to use a compat ioctl handler. Fixes: 810f874eda8e ("RDMA/ucma: Support query resolved service records") Link: https://patch.msgid.link/r/20251208133311.313977-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_user_cm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 5ded174687ee..838f8d460256 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -192,6 +192,7 @@ struct rdma_ucm_query_path_resp { struct rdma_ucm_query_ib_service_resp { __u32 num_service_recs; + __u32 reserved; struct ib_user_service_rec recs[]; }; @@ -354,7 +355,7 @@ enum { #define RDMA_USER_CM_IB_SERVICE_NAME_SIZE 64 struct rdma_ucm_ib_service { - __u64 service_id; + __aligned_u64 service_id; __u8 service_name[RDMA_USER_CM_IB_SERVICE_NAME_SIZE]; __u32 flags; __u32 reserved; @@ -362,6 +363,7 @@ struct rdma_ucm_ib_service { struct rdma_ucm_resolve_ib_service { __u32 id; + __u32 reserved; struct rdma_ucm_ib_service ibs; }; -- cgit v1.2.3 From d95e99a74eaf35c070f5939295331e5d7857c723 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Dec 2025 14:38:44 +0100 Subject: RDMA/irdma: Fix irdma_alloc_ucontext_resp padding A recent commit modified struct irdma_alloc_ucontext_resp by adding a member with implicit padding in front of it, though this does not change the offset of the data members other than m68k. Reported by scripts/check-uapi.sh: ==== ABI differences detected in include/rdma/irdma-abi.h from 1dd7bde2e91c -> HEAD ==== [C] 'struct irdma_alloc_ucontext_resp' changed: type size changed from 704 to 640 (in bits) 1 data member deletion: '__u8 rsvd3[2]', at offset 640 (in bits) at irdma-abi.h:61:1 1 data member insertion: '__u8 revd3[2]', at offset 592 (in bits) at irdma-abi.h:60:1 Change the size back to the previous version, and remove the implicit padding by making it explicit and matching what x86-64 would do by placing max_hw_srq_quanta member into a naturally aligned location. Fixes: 563e1feb5f6e ("RDMA/irdma: Add SRQ support") Link: https://patch.msgid.link/r/20251208133849.315451-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Geert Uytterhoeven Tested-by: Jacob Moroni Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/irdma-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index f7788d33376b..36f20802bcc8 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -57,8 +57,8 @@ struct irdma_alloc_ucontext_resp { __u8 rsvd2; __aligned_u64 comp_mask; __u16 min_hw_wq_size; + __u8 revd3[2]; __u32 max_hw_srq_quanta; - __u8 rsvd3[2]; }; struct irdma_alloc_pd_resp { -- cgit v1.2.3 From a58383fa45c706bda3bf4a1955c3a0327dbec7e7 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Wed, 17 Dec 2025 07:17:12 +0530 Subject: block: add allocation size check in blkdev_pr_read_keys() blkdev_pr_read_keys() takes num_keys from userspace and uses it to calculate the allocation size for keys_info via struct_size(). While there is a check for SIZE_MAX (integer overflow), there is no upper bound validation on the allocation size itself. A malicious or buggy userspace can pass a large num_keys value that doesn't trigger overflow but still results in an excessive allocation attempt, causing a warning in the page allocator when the order exceeds MAX_PAGE_ORDER. Fix this by introducing PR_KEYS_MAX to limit the number of keys to a sane value. This makes the SIZE_MAX check redundant, so remove it. Also switch to kvzalloc/kvfree to handle larger allocations gracefully. Fixes: 22a1ffea5f80 ("block: add IOC_PR_READ_KEYS ioctl") Tested-by: syzbot+660d079d90f8a1baf54d@syzkaller.appspotmail.com Reported-by: syzbot+660d079d90f8a1baf54d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=660d079d90f8a1baf54d Link: https://lore.kernel.org/all/20251212013510.3576091-1-kartikey406@gmail.com/T/ [v1] Signed-off-by: Deepanshu Kartikey Reviewed-by: Martin K. Petersen Reviewed-by: Stefan Hajnoczi Signed-off-by: Jens Axboe --- block/ioctl.c | 9 +++++---- include/uapi/linux/pr.h | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/block/ioctl.c b/block/ioctl.c index 61feed686418..344478348a54 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -442,11 +442,12 @@ static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, if (copy_from_user(&read_keys, arg, sizeof(read_keys))) return -EFAULT; - keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); - if (keys_info_len == SIZE_MAX) + if (read_keys.num_keys > PR_KEYS_MAX) return -EINVAL; - keys_info = kzalloc(keys_info_len, GFP_KERNEL); + keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); + + keys_info = kvzalloc(keys_info_len, GFP_KERNEL); if (!keys_info) return -ENOMEM; @@ -473,7 +474,7 @@ static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, if (copy_to_user(arg, &read_keys, sizeof(read_keys))) ret = -EFAULT; out: - kfree(keys_info); + kvfree(keys_info); return ret; } diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index 847f3051057a..f0ecb1677317 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -79,4 +79,6 @@ struct pr_read_reservation { #define IOC_PR_READ_KEYS _IOWR('p', 206, struct pr_read_keys) #define IOC_PR_READ_RESERVATION _IOR('p', 207, struct pr_read_reservation) +#define PR_KEYS_MAX (1u << 16) + #endif /* _UAPI_PR_H */ -- cgit v1.2.3 From dcd0b625fe440d68bb4b97c71d18ca48ecd6e594 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 17 Dec 2025 07:34:55 -0800 Subject: powercap: intel_rapl: Fix possible recursive lock warning With the RAPL PMU addition, there is a recursive locking when CPU online callback function calls rapl_package_add_pmu(). Here cpu_hotplug_lock is already acquired by cpuhp_thread_fun() and rapl_package_add_pmu() tries to acquire again. <4>[ 8.197433] ============================================ <4>[ 8.197437] WARNING: possible recursive locking detected <4>[ 8.197440] 6.19.0-rc1-lgci-xe-xe-4242-05b7c58b3367dca84+ #1 Not tainted <4>[ 8.197444] -------------------------------------------- <4>[ 8.197447] cpuhp/0/20 is trying to acquire lock: <4>[ 8.197450] ffffffff83487870 (cpu_hotplug_lock){++++}-{0:0}, at: rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197463] but task is already holding lock: <4>[ 8.197466] ffffffff83487870 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x6d/0x290 <4>[ 8.197477] other info that might help us debug this: <4>[ 8.197480] Possible unsafe locking scenario: <4>[ 8.197483] CPU0 <4>[ 8.197485] ---- <4>[ 8.197487] lock(cpu_hotplug_lock); <4>[ 8.197490] lock(cpu_hotplug_lock); <4>[ 8.197493] *** DEADLOCK *** .. .. <4>[ 8.197542] __lock_acquire+0x146e/0x2790 <4>[ 8.197548] lock_acquire+0xc4/0x2c0 <4>[ 8.197550] ? rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197556] cpus_read_lock+0x41/0x110 <4>[ 8.197558] ? rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197561] rapl_package_add_pmu+0x37/0x370 [intel_rapl_common] <4>[ 8.197565] rapl_cpu_online+0x85/0x87 [intel_rapl_msr] <4>[ 8.197568] ? __pfx_rapl_cpu_online+0x10/0x10 [intel_rapl_msr] <4>[ 8.197570] cpuhp_invoke_callback+0x41f/0x6c0 <4>[ 8.197573] ? cpuhp_thread_fun+0x6d/0x290 <4>[ 8.197575] cpuhp_thread_fun+0x1e2/0x290 <4>[ 8.197578] ? smpboot_thread_fn+0x26/0x290 <4>[ 8.197581] smpboot_thread_fn+0x12f/0x290 <4>[ 8.197584] ? __pfx_smpboot_thread_fn+0x10/0x10 <4>[ 8.197586] kthread+0x11f/0x250 <4>[ 8.197589] ? __pfx_kthread+0x10/0x10 <4>[ 8.197592] ret_from_fork+0x344/0x3a0 <4>[ 8.197595] ? __pfx_kthread+0x10/0x10 <4>[ 8.197597] ret_from_fork_asm+0x1a/0x30 <4>[ 8.197604] Fix this issue in the same way as rapl powercap package domain is added from the same CPU online callback by introducing another interface which doesn't call cpus_read_lock(). Add rapl_package_add_pmu_locked() and rapl_package_remove_pmu_locked() which don't call cpus_read_lock(). Fixes: 748d6ba43afd ("powercap: intel_rapl: Enable MSR-based RAPL PMU support") Reported-by: Borah, Chaitanya Kumar Closes: https://lore.kernel.org/linux-pm/5427ede1-57a0-43d1-99f3-8ca4b0643e82@intel.com/T/#u Tested-by: Kuppuswamy Sathyanarayanan Tested-by: RavitejaX Veesam Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20251217153455.3560176-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 24 ++++++++++++++++++------ drivers/powercap/intel_rapl_msr.c | 4 ++-- include/linux/intel_rapl.h | 4 ++++ 3 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index b9d87e56cbbc..3ff6da3bf4e6 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -2032,7 +2032,7 @@ end: return ret; } -int rapl_package_add_pmu(struct rapl_package *rp) +int rapl_package_add_pmu_locked(struct rapl_package *rp) { struct rapl_package_pmu_data *data = &rp->pmu_data; int idx; @@ -2040,8 +2040,6 @@ int rapl_package_add_pmu(struct rapl_package *rp) if (rp->has_pmu) return -EEXIST; - guard(cpus_read_lock)(); - for (idx = 0; idx < rp->nr_domains; idx++) { struct rapl_domain *rd = &rp->domains[idx]; int domain = rd->id; @@ -2091,17 +2089,23 @@ int rapl_package_add_pmu(struct rapl_package *rp) return rapl_pmu_update(rp); } +EXPORT_SYMBOL_GPL(rapl_package_add_pmu_locked); + +int rapl_package_add_pmu(struct rapl_package *rp) +{ + guard(cpus_read_lock)(); + + return rapl_package_add_pmu_locked(rp); +} EXPORT_SYMBOL_GPL(rapl_package_add_pmu); -void rapl_package_remove_pmu(struct rapl_package *rp) +void rapl_package_remove_pmu_locked(struct rapl_package *rp) { struct rapl_package *pos; if (!rp->has_pmu) return; - guard(cpus_read_lock)(); - list_for_each_entry(pos, &rapl_packages, plist) { /* PMU is still needed */ if (pos->has_pmu && pos != rp) @@ -2111,6 +2115,14 @@ void rapl_package_remove_pmu(struct rapl_package *rp) perf_pmu_unregister(&rapl_pmu.pmu); memset(&rapl_pmu, 0, sizeof(struct rapl_pmu)); } +EXPORT_SYMBOL_GPL(rapl_package_remove_pmu_locked); + +void rapl_package_remove_pmu(struct rapl_package *rp) +{ + guard(cpus_read_lock)(); + + rapl_package_remove_pmu_locked(rp); +} EXPORT_SYMBOL_GPL(rapl_package_remove_pmu); #endif diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 0ce1096b6314..9a7e150b3536 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -82,7 +82,7 @@ static int rapl_cpu_online(unsigned int cpu) if (IS_ERR(rp)) return PTR_ERR(rp); if (rapl_msr_pmu) - rapl_package_add_pmu(rp); + rapl_package_add_pmu_locked(rp); } cpumask_set_cpu(cpu, &rp->cpumask); return 0; @@ -101,7 +101,7 @@ static int rapl_cpu_down_prep(unsigned int cpu) lead_cpu = cpumask_first(&rp->cpumask); if (lead_cpu >= nr_cpu_ids) { if (rapl_msr_pmu) - rapl_package_remove_pmu(rp); + rapl_package_remove_pmu_locked(rp); rapl_remove_package_cpuslocked(rp); } else if (rp->lead_cpu == cpu) { rp->lead_cpu = lead_cpu; diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index e9ade2ff4af6..f479ef5b3341 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -214,10 +214,14 @@ void rapl_remove_package(struct rapl_package *rp); #ifdef CONFIG_PERF_EVENTS int rapl_package_add_pmu(struct rapl_package *rp); +int rapl_package_add_pmu_locked(struct rapl_package *rp); void rapl_package_remove_pmu(struct rapl_package *rp); +void rapl_package_remove_pmu_locked(struct rapl_package *rp); #else static inline int rapl_package_add_pmu(struct rapl_package *rp) { return 0; } +static inline int rapl_package_add_pmu_locked(struct rapl_package *rp) { return 0; } static inline void rapl_package_remove_pmu(struct rapl_package *rp) { } +static inline void rapl_package_remove_pmu_locked(struct rapl_package *rp) { } #endif #endif /* __INTEL_RAPL_H__ */ -- cgit v1.2.3 From 4a824c3128998158a093eaadd776a79abe3a601a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Dec 2025 15:31:27 +0000 Subject: entry: Always inline local_irq_{enable,disable}_exit_to_user() clang needs __always_inline instead of inline, even for tiny helpers. This saves some cycles in system call fast path, and saves 195 bytes on x86_64 build: $ size vmlinux.before vmlinux.after text data bss dec hex filename 34652814 22291961 5875180 62819955 3be8e73 vmlinux.before 34652619 22291961 5875180 62819760 3be8db0 vmlinux.after Signed-off-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251204153127.1321824-1-edumazet@google.com --- include/linux/irq-entry-common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h index 6ab913e57da0..d26d1b1bcbfb 100644 --- a/include/linux/irq-entry-common.h +++ b/include/linux/irq-entry-common.h @@ -110,7 +110,7 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs) static inline void local_irq_enable_exit_to_user(unsigned long ti_work); #ifndef local_irq_enable_exit_to_user -static inline void local_irq_enable_exit_to_user(unsigned long ti_work) +static __always_inline void local_irq_enable_exit_to_user(unsigned long ti_work) { local_irq_enable(); } @@ -125,7 +125,7 @@ static inline void local_irq_enable_exit_to_user(unsigned long ti_work) static inline void local_irq_disable_exit_to_user(void); #ifndef local_irq_disable_exit_to_user -static inline void local_irq_disable_exit_to_user(void) +static __always_inline void local_irq_disable_exit_to_user(void) { local_irq_disable(); } -- cgit v1.2.3 From 8e461304009135270e9ccf2d7e2dfe29daec9b60 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 5 Dec 2025 23:47:17 +0000 Subject: drm/xe: Limit num_syncs to prevent oversized allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exec and vm_bind ioctl allow userspace to specify an arbitrary num_syncs value. Without bounds checking, a very large num_syncs can force an excessively large allocation, leading to kernel warnings from the page allocator as below. Introduce DRM_XE_MAX_SYNCS (set to 1024) and reject any request exceeding this limit. " ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1217 at mm/page_alloc.c:5124 __alloc_frozen_pages_noprof+0x2f8/0x2180 mm/page_alloc.c:5124 ... Call Trace: alloc_pages_mpol+0xe4/0x330 mm/mempolicy.c:2416 ___kmalloc_large_node+0xd8/0x110 mm/slub.c:4317 __kmalloc_large_node_noprof+0x18/0xe0 mm/slub.c:4348 __do_kmalloc_node mm/slub.c:4364 [inline] __kmalloc_noprof+0x3d4/0x4b0 mm/slub.c:4388 kmalloc_noprof include/linux/slab.h:909 [inline] kmalloc_array_noprof include/linux/slab.h:948 [inline] xe_exec_ioctl+0xa47/0x1e70 drivers/gpu/drm/xe/xe_exec.c:158 drm_ioctl_kernel+0x1f1/0x3e0 drivers/gpu/drm/drm_ioctl.c:797 drm_ioctl+0x5e7/0xc50 drivers/gpu/drm/drm_ioctl.c:894 xe_drm_ioctl+0x10b/0x170 drivers/gpu/drm/xe/xe_device.c:224 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:598 [inline] __se_sys_ioctl fs/ioctl.c:584 [inline] __x64_sys_ioctl+0x18b/0x210 fs/ioctl.c:584 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xbb/0x380 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... " v2: Add "Reported-by" and Cc stable kernels. v3: Change XE_MAX_SYNCS from 64 to 1024. (Matt & Ashutosh) v4: s/XE_MAX_SYNCS/DRM_XE_MAX_SYNCS/ (Matt) v5: Do the check at the top of the exec func. (Matt) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reported-by: Koen Koning Reported-by: Peter Senna Tschudin Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6450 Cc: # v6.12+ Cc: Matthew Brost Cc: Michal Mrozek Cc: Carl Zhang Cc: José Roberto de Souza Cc: Lionel Landwerlin Cc: Ivan Briano Cc: Thomas Hellström Cc: Ashutosh Dixit Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20251205234715.2476561-5-shuicheng.lin@intel.com (cherry picked from commit b07bac9bd708ec468cd1b8a5fe70ae2ac9b0a11c) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec.c | 3 ++- drivers/gpu/drm/xe/xe_vm.c | 3 +++ include/uapi/drm/xe_drm.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 4d81210e41f5..fd9480031750 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -132,7 +132,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]) || + XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) return -EINVAL; q = xe_exec_queue_lookup(xef, args->exec_queue_id); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7cac646bdf1c..c93155c6c627 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3324,6 +3324,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; + if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) + return -EINVAL; + if (args->num_binds > 1) { u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 47853659a705..f64dc0eff0e6 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1463,6 +1463,7 @@ struct drm_xe_exec { /** @exec_queue_id: Exec queue ID for the batch buffer */ __u32 exec_queue_id; +#define DRM_XE_MAX_SYNCS 1024 /** @num_syncs: Amount of struct drm_xe_sync in array. */ __u32 num_syncs; -- cgit v1.2.3 From 733a8924229ff8c0385121a30fcd00bf70644743 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 13 Nov 2025 17:02:58 +0100 Subject: Input: add ABS_SND_PROFILE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ABS_SND_PROFILE used to describe the state of a multi-value sound profile switch. This will be used for the alert-slider on OnePlus phones or other phones. Profile values added as SND_PROFLE_(SILENT|VIBRATE|RING) identifiers to input-event-codes.h so they can be used from DTS. Signed-off-by: Gergo Koteles Reviewed-by: Bjorn Andersson Tested-by: Guido Günther # oneplus,fajita & oneplus,enchilada Reviewed-by: Guido Günther Signed-off-by: David Heidelberg Reviewed-by: Pavel Machek Link: https://patch.msgid.link/20251113-op6-tri-state-v8-1-54073f3874bc@ixit.cz Signed-off-by: Dmitry Torokhov --- Documentation/input/event-codes.rst | 6 ++++++ drivers/hid/hid-debug.c | 1 + include/uapi/linux/input-event-codes.h | 9 +++++++++ 3 files changed, 16 insertions(+) (limited to 'include') diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst index 4424cbff251f..77a6c9b3956d 100644 --- a/Documentation/input/event-codes.rst +++ b/Documentation/input/event-codes.rst @@ -241,6 +241,12 @@ A few EV_ABS codes have special meanings: emitted only when the selected profile changes, indicating the newly selected profile value. +* ABS_SND_PROFILE: + + - Used to describe the state of a multi-value sound profile switch. + An event is emitted only when the selected profile changes, + indicating the newly selected profile value. + * ABS_MT_: - Used to describe multitouch input events. Please see diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 337d2dc81b4c..c5865b0d2aaa 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3513,6 +3513,7 @@ static const char *absolutes[ABS_CNT] = { [ABS_DISTANCE] = "Distance", [ABS_TILT_X] = "XTilt", [ABS_TILT_Y] = "YTilt", [ABS_TOOL_WIDTH] = "ToolWidth", [ABS_VOLUME] = "Volume", [ABS_PROFILE] = "Profile", + [ABS_SND_PROFILE] = "SoundProfile", [ABS_MISC] = "Misc", [ABS_MT_SLOT] = "MTSlot", [ABS_MT_TOUCH_MAJOR] = "MTMajor", diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 30f3c9eaafaa..4bdb6a165987 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -891,6 +891,7 @@ #define ABS_VOLUME 0x20 #define ABS_PROFILE 0x21 +#define ABS_SND_PROFILE 0x22 #define ABS_MISC 0x28 @@ -1000,4 +1001,12 @@ #define SND_MAX 0x07 #define SND_CNT (SND_MAX+1) +/* + * ABS_SND_PROFILE values + */ + +#define SND_PROFILE_SILENT 0x00 +#define SND_PROFILE_VIBRATE 0x01 +#define SND_PROFILE_RING 0x02 + #endif -- cgit v1.2.3 From 4cc5373f2e749a6c96e8b9fa971931a4dd852860 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Dec 2025 11:20:06 +0000 Subject: clang: work around asm output constraint problems Work around clang problems with "=rm" asm constraint. clang seems to always chose the memory output, while it is almost always the worst choice. Add ASM_OUTPUT_RM so that we can replace "=rm" constraint where it matters for clang, while not penalizing gcc. Signed-off-by: Eric Dumazet Suggested-by: Uros Bizjak Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 1 + include/linux/compiler_types.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 107ce05bd16e..7edf1a07b535 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -145,6 +145,7 @@ */ #define ASM_INPUT_G "ir" #define ASM_INPUT_RM "r" +#define ASM_OUTPUT_RM "=r" /* * Declare compiler support for __typeof_unqual__() operator. diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 1280693766b9..d3318a3c2577 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -548,11 +548,12 @@ struct ftrace_likely_data { /* * Clang has trouble with constraints with multiple - * alternative behaviors (mainly "g" and "rm"). + * alternative behaviors ("g" , "rm" and "=rm"). */ #ifndef ASM_INPUT_G #define ASM_INPUT_G "g" #define ASM_INPUT_RM "rm" + #define ASM_OUTPUT_RM "=rm" #endif #ifdef CONFIG_CC_HAS_ASM_INLINE -- cgit v1.2.3 From b61104e7a6349bd2c2b3e2fb3260d87f15eda8f4 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 22 Dec 2025 08:45:48 +0100 Subject: regulator: uapi: Use UAPI integer type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using libc types and headers from the UAPI headers is problematic as it introduces a dependency on a full C toolchain. Use the fixed-width integer type provided by the UAPI headers instead. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251222-uapi-regulator-v1-1-a71c66eb1a94@linutronix.de Signed-off-by: Mark Brown --- include/uapi/regulator/regulator.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/regulator/regulator.h b/include/uapi/regulator/regulator.h index 71bf71a22e7f..c4f2d1c19828 100644 --- a/include/uapi/regulator/regulator.h +++ b/include/uapi/regulator/regulator.h @@ -8,11 +8,7 @@ #ifndef _UAPI_REGULATOR_H #define _UAPI_REGULATOR_H -#ifdef __KERNEL__ #include -#else -#include -#endif /* * Regulator notifier events. @@ -62,7 +58,7 @@ struct reg_genl_event { char reg_name[32]; - uint64_t event; + __u64 event; }; /* attributes of reg_genl_family */ -- cgit v1.2.3 From 87e7f6019097746d1d06f98874a9f179b7a68f3e Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 19 Dec 2025 10:36:38 +0200 Subject: software node: Also support referencing non-constant software nodes Fwnode references are be implemented differently if referenced node is a software node. _Generic() is used to differentiate between the two cases but only const software nodes were present in the selection. Also add non-const software nodes. Reported-by: Kenneth Crudup Closes: https://lore.kernel.org/all/af773b82-bef2-4209-baaf-526d4661b7fc@panix.com/ Fixes: d7cdbbc93c56 ("software node: allow referencing firmware nodes") Signed-off-by: Sakari Ailus Tested-By: Kenneth R. Crudup Tested-by: Mehdi Djait # Dell XPS 9315 Reviewed-by: Mehdi Djait Link: https://patch.msgid.link/20251219083638.2454138-1-sakari.ailus@linux.intel.com Signed-off-by: Danilo Krummrich --- include/linux/property.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 272bfbdea7bf..e30ef23a9af3 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -371,6 +371,7 @@ struct software_node_ref_args { (const struct software_node_ref_args) { \ .swnode = _Generic(_ref_, \ const struct software_node *: _ref_, \ + struct software_node *: _ref_, \ default: NULL), \ .fwnode = _Generic(_ref_, \ struct fwnode_handle *: _ref_, \ -- cgit v1.2.3 From 20e20b147cf7cb6780a5b95da2a0e37c52cd1015 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 Dec 2025 22:38:00 -0800 Subject: platform/x86/intel/vsec: correct kernel-doc comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix kernel-doc warnings in intel_vsec.h to eliminate all kernel-doc warnings: Warning: include/linux/intel_vsec.h:92 struct member 'read_telem' not described in 'pmt_callbacks' Warning: include/linux/intel_vsec.h:146 expecting prototype for struct intel_sec_device. Prototype was for struct intel_vsec_device instead Warning: include/linux/intel_vsec.h:146 struct member 'priv_data_size' not described in 'intel_vsec_device' In struct pmt_callbacks, correct the kernel-doc for @read_telem. kernel-doc doesn't support documenting callback function parameters, so drop the '@' signs on those and use "* *" to make them somewhat readable in the produced documentation output. Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20251216063801.2896495-1-rdunlap@infradead.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_vsec.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index 53f6fe88e369..1a0f357c2427 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -80,13 +80,13 @@ enum intel_vsec_quirks { /** * struct pmt_callbacks - Callback infrastructure for PMT devices - * ->read_telem() when specified, called by client driver to access PMT data (instead - * of direct copy). - * @pdev: PCI device reference for the callback's use - * @guid: ID of data to acccss - * @data: buffer for the data to be copied - * @off: offset into the requested buffer - * @count: size of buffer + * @read_telem: when specified, called by client driver to access PMT + * data (instead of direct copy). + * * pdev: PCI device reference for the callback's use + * * guid: ID of data to acccss + * * data: buffer for the data to be copied + * * off: offset into the requested buffer + * * count: size of buffer */ struct pmt_callbacks { int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, loff_t off, u32 count); @@ -120,7 +120,7 @@ struct intel_vsec_platform_info { }; /** - * struct intel_sec_device - Auxbus specific device information + * struct intel_vsec_device - Auxbus specific device information * @auxdev: auxbus device struct for auxbus access * @pcidev: pci device associated with the device * @resource: any resources shared by the parent @@ -128,6 +128,7 @@ struct intel_vsec_platform_info { * @num_resources: number of resources * @id: xarray id * @priv_data: any private data needed + * @priv_data_size: size of private data area * @quirks: specified quirks * @base_addr: base address of entries (if specified) * @cap_id: the enumerated id of the vsec feature -- cgit v1.2.3 From c31f4aa8fed048fa70e742c4bb49bb48dc489ab3 Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 19 Dec 2025 16:52:58 +0800 Subject: kunit: Enforce task execution in {soft,hard}irq contexts The kunit_run_irq_test() helper allows a function to be run in hardirq and softirq contexts (in addition to the task context). It does this by running the user-provided function concurrently in the three contexts, until either a timeout has expired or a number of iterations have completed in the normal task context. However, on setups where the initialisation of the hardirq and softirq contexts (or, indeed, the scheduling of those tasks) is significantly slower than the function execution, it's possible for that number of iterations to be exceeded before any runs in irq contexts actually occur. This occurs with the polyval.test_polyval_preparekey_in_irqs test, which runs 20000 iterations of the relatively fast preparekey function, and therefore fails often under many UML, 32-bit arm, m68k and other environments. Instead, ensure that the max_iterations limit counts executions in all three contexts, and requires at least one of each. This will cause the test to continue iterating until at least the irq contexts have been tested, or the 1s wall-clock limit has been exceeded. This causes the test to pass in all of my environments. In so doing, we also update the task counters to atomic ints, to better match both the 'int' max_iterations input, and to ensure they are correctly updated across contexts. Finally, we also fix a few potential assertion messages to be less-specific to the original crypto usecases. Fixes: 950a81224e8b ("lib/crypto: tests: Add hash-test-template.h and gen-hash-testvecs.py") Signed-off-by: David Gow Link: https://lore.kernel.org/r/20251219085259.1163048-1-davidgow@google.com Signed-off-by: Eric Biggers --- include/kunit/run-in-irq-context.h | 53 ++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/kunit/run-in-irq-context.h b/include/kunit/run-in-irq-context.h index 108e96433ea4..c89b1b1b12dd 100644 --- a/include/kunit/run-in-irq-context.h +++ b/include/kunit/run-in-irq-context.h @@ -20,8 +20,8 @@ struct kunit_irq_test_state { bool task_func_reported_failure; bool hardirq_func_reported_failure; bool softirq_func_reported_failure; - unsigned long hardirq_func_calls; - unsigned long softirq_func_calls; + atomic_t hardirq_func_calls; + atomic_t softirq_func_calls; struct hrtimer timer; struct work_struct bh_work; }; @@ -32,7 +32,7 @@ static enum hrtimer_restart kunit_irq_test_timer_func(struct hrtimer *timer) container_of(timer, typeof(*state), timer); WARN_ON_ONCE(!in_hardirq()); - state->hardirq_func_calls++; + atomic_inc(&state->hardirq_func_calls); if (!state->func(state->test_specific_state)) state->hardirq_func_reported_failure = true; @@ -48,7 +48,7 @@ static void kunit_irq_test_bh_work_func(struct work_struct *work) container_of(work, typeof(*state), bh_work); WARN_ON_ONCE(!in_serving_softirq()); - state->softirq_func_calls++; + atomic_inc(&state->softirq_func_calls); if (!state->func(state->test_specific_state)) state->softirq_func_reported_failure = true; @@ -59,7 +59,10 @@ static void kunit_irq_test_bh_work_func(struct work_struct *work) * hardirq context concurrently, and reports a failure to KUnit if any * invocation of @func in any context returns false. @func is passed * @test_specific_state as its argument. At most 3 invocations of @func will - * run concurrently: one in each of task, softirq, and hardirq context. + * run concurrently: one in each of task, softirq, and hardirq context. @func + * will continue running until either @max_iterations calls have been made (so + * long as at least one each runs in task, softirq, and hardirq contexts), or + * one second has passed. * * The main purpose of this interrupt context testing is to validate fallback * code paths that run in contexts where the normal code path cannot be used, @@ -85,6 +88,8 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), .test_specific_state = test_specific_state, }; unsigned long end_jiffies; + int hardirq_calls, softirq_calls; + bool allctx = false; /* * Set up a hrtimer (the way we access hardirq context) and a work @@ -94,14 +99,25 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); INIT_WORK_ONSTACK(&state.bh_work, kunit_irq_test_bh_work_func); - /* Run for up to max_iterations or 1 second, whichever comes first. */ + /* + * Run for up to max_iterations (including at least one task, softirq, + * and hardirq), or 1 second, whichever comes first. + */ end_jiffies = jiffies + HZ; hrtimer_start(&state.timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL, HRTIMER_MODE_REL_HARD); - for (int i = 0; i < max_iterations && !time_after(jiffies, end_jiffies); - i++) { + for (int task_calls = 0, calls = 0; + ((calls < max_iterations) || !allctx) && + !time_after(jiffies, end_jiffies); + task_calls++) { if (!func(test_specific_state)) state.task_func_reported_failure = true; + + hardirq_calls = atomic_read(&state.hardirq_func_calls); + softirq_calls = atomic_read(&state.softirq_func_calls); + calls = task_calls + hardirq_calls + softirq_calls; + allctx = (task_calls > 0) && (hardirq_calls > 0) && + (softirq_calls > 0); } /* Cancel the timer and work. */ @@ -109,21 +125,18 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), flush_work(&state.bh_work); /* Sanity check: the timer and BH functions should have been run. */ - KUNIT_EXPECT_GT_MSG(test, state.hardirq_func_calls, 0, + KUNIT_EXPECT_GT_MSG(test, atomic_read(&state.hardirq_func_calls), 0, "Timer function was not called"); - KUNIT_EXPECT_GT_MSG(test, state.softirq_func_calls, 0, + KUNIT_EXPECT_GT_MSG(test, atomic_read(&state.softirq_func_calls), 0, "BH work function was not called"); - /* Check for incorrect hash values reported from any context. */ - KUNIT_EXPECT_FALSE_MSG( - test, state.task_func_reported_failure, - "Incorrect hash values reported from task context"); - KUNIT_EXPECT_FALSE_MSG( - test, state.hardirq_func_reported_failure, - "Incorrect hash values reported from hardirq context"); - KUNIT_EXPECT_FALSE_MSG( - test, state.softirq_func_reported_failure, - "Incorrect hash values reported from softirq context"); + /* Check for failure reported from any context. */ + KUNIT_EXPECT_FALSE_MSG(test, state.task_func_reported_failure, + "Failure reported from task context"); + KUNIT_EXPECT_FALSE_MSG(test, state.hardirq_func_reported_failure, + "Failure reported from hardirq context"); + KUNIT_EXPECT_FALSE_MSG(test, state.softirq_func_reported_failure, + "Failure reported from softirq context"); } #endif /* _KUNIT_RUN_IN_IRQ_CONTEXT_H */ -- cgit v1.2.3 From 754c23238438600e9236719f7e67aff2c4d02093 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 19 Dec 2025 12:32:59 +0100 Subject: drm/pagemap, drm/xe: Ensure that the devmem allocation is idle before use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In situations where no system memory is migrated to devmem, and in upcoming patches where another GPU is performing the migration to the newly allocated devmem buffer, there is nothing to ensure any ongoing clear to the devmem allocation or async eviction from the devmem allocation is complete. Address that by passing a struct dma_fence down to the copy functions, and ensure it is waited for before migration is marked complete. v3: - New patch. v4: - Update the logic used for determining when to wait for the pre_migrate_fence. - Update the logic used for determining when to warn for the pre_migrate_fence since the scheduler fences apparently can signal out-of-order. v5: - Fix a UAF (CI) - Remove references to source P2P migration (Himal) - Put the pre_migrate_fence after migration. v6: - Pipeline the pre_migrate_fence dependency (Matt Brost) Fixes: c5b3eb5a906c ("drm/xe: Add GPUSVM device memory copy vfunc functions") Cc: Matthew Brost Cc: # v6.15+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Acked-by: Maarten Lankhorst # For merging through drm-xe. Link: https://patch.msgid.link/20251219113320.183860-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 16b5ad31952476fb925c401897fc171cd37f536b) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/drm_pagemap.c | 17 ++++++++++---- drivers/gpu/drm/xe/xe_migrate.c | 25 ++++++++++++++++----- drivers/gpu/drm/xe/xe_migrate.h | 6 +++-- drivers/gpu/drm/xe/xe_svm.c | 49 +++++++++++++++++++++++++++++++---------- include/drm/drm_pagemap.h | 17 +++++++++++--- 5 files changed, 88 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 37d7cfbbb3e8..06c1bd8fc4d1 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -3,6 +3,7 @@ * Copyright © 2024-2025 Intel Corporation */ +#include #include #include #include @@ -408,10 +409,14 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, drm_pagemap_get_devmem_page(page, zdd); } - err = ops->copy_to_devmem(pages, pagemap_addr, npages); + err = ops->copy_to_devmem(pages, pagemap_addr, npages, + devmem_allocation->pre_migrate_fence); if (err) goto err_finalize; + dma_fence_put(devmem_allocation->pre_migrate_fence); + devmem_allocation->pre_migrate_fence = NULL; + /* Upon success bind devmem allocation to range and zdd */ devmem_allocation->timeslice_expiration = get_jiffies_64() + msecs_to_jiffies(timeslice_ms); @@ -596,7 +601,7 @@ retry: for (i = 0; i < npages; ++i) pages[i] = migrate_pfn_to_page(src[i]); - err = ops->copy_to_ram(pages, pagemap_addr, npages); + err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL); if (err) goto err_finalize; @@ -732,7 +737,7 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, for (i = 0; i < npages; ++i) pages[i] = migrate_pfn_to_page(migrate.src[i]); - err = ops->copy_to_ram(pages, pagemap_addr, npages); + err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL); if (err) goto err_finalize; @@ -813,11 +818,14 @@ EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get); * @ops: Pointer to the operations structure for GPU SVM device memory * @dpagemap: The struct drm_pagemap we're allocating from. * @size: Size of device memory allocation + * @pre_migrate_fence: Fence to wait for or pipeline behind before migration starts. + * (May be NULL). */ void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, struct device *dev, struct mm_struct *mm, const struct drm_pagemap_devmem_ops *ops, - struct drm_pagemap *dpagemap, size_t size) + struct drm_pagemap *dpagemap, size_t size, + struct dma_fence *pre_migrate_fence) { init_completion(&devmem_allocation->detached); devmem_allocation->dev = dev; @@ -825,6 +833,7 @@ void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, devmem_allocation->ops = ops; devmem_allocation->dpagemap = dpagemap; devmem_allocation->size = size; + devmem_allocation->pre_migrate_fence = pre_migrate_fence; } EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 2184af413b91..5a95b08a4723 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -2062,6 +2062,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, unsigned long sram_offset, struct drm_pagemap_addr *sram_addr, u64 vram_addr, + struct dma_fence *deps, const enum xe_migrate_copy_dir dir) { struct xe_gt *gt = m->tile->primary_gt; @@ -2150,6 +2151,14 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); + if (deps && !dma_fence_is_signaled(deps)) { + dma_fence_get(deps); + err = drm_sched_job_add_dependency(&job->drm, deps); + if (err) + dma_fence_wait(deps, false); + err = 0; + } + mutex_lock(&m->job_mutex); xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); @@ -2175,6 +2184,8 @@ err: * @npages: Number of pages to migrate. * @src_addr: Array of DMA information (source of migrate) * @dst_addr: Device physical address of VRAM (destination of migrate) + * @deps: struct dma_fence representing the dependencies that need + * to be signaled before migration. * * Copy from an array dma addresses to a VRAM device physical address * @@ -2184,10 +2195,11 @@ err: struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, struct drm_pagemap_addr *src_addr, - u64 dst_addr) + u64 dst_addr, + struct dma_fence *deps) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr, - XE_MIGRATE_COPY_TO_VRAM); + deps, XE_MIGRATE_COPY_TO_VRAM); } /** @@ -2196,6 +2208,8 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, * @npages: Number of pages to migrate. * @src_addr: Device physical address of VRAM (source of migrate) * @dst_addr: Array of DMA information (destination of migrate) + * @deps: struct dma_fence representing the dependencies that need + * to be signaled before migration. * * Copy from a VRAM device physical address to an array dma addresses * @@ -2205,10 +2219,11 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - struct drm_pagemap_addr *dst_addr) + struct drm_pagemap_addr *dst_addr, + struct dma_fence *deps) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr, - XE_MIGRATE_COPY_TO_SRAM); + deps, XE_MIGRATE_COPY_TO_SRAM); } static void xe_migrate_dma_unmap(struct xe_device *xe, @@ -2384,7 +2399,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, &pagemap_addr[current_page], - vram_addr, write ? + vram_addr, NULL, write ? XE_MIGRATE_COPY_TO_VRAM : XE_MIGRATE_COPY_TO_SRAM); if (IS_ERR(__fence)) { diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 260e298e5dd7..b76441f062b4 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -116,12 +116,14 @@ int xe_migrate_init(struct xe_migrate *m); struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, struct drm_pagemap_addr *src_addr, - u64 dst_addr); + u64 dst_addr, + struct dma_fence *deps); struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - struct drm_pagemap_addr *dst_addr); + struct drm_pagemap_addr *dst_addr, + struct dma_fence *deps); struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 894e8f092e3f..f97e0af6a9b0 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -476,7 +476,8 @@ static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, static int xe_svm_copy(struct page **pages, struct drm_pagemap_addr *pagemap_addr, - unsigned long npages, const enum xe_svm_copy_dir dir) + unsigned long npages, const enum xe_svm_copy_dir dir, + struct dma_fence *pre_migrate_fence) { struct xe_vram_region *vr = NULL; struct xe_gt *gt = NULL; @@ -565,7 +566,8 @@ static int xe_svm_copy(struct page **pages, __fence = xe_migrate_from_vram(vr->migrate, i - pos + incr, vram_addr, - &pagemap_addr[pos]); + &pagemap_addr[pos], + pre_migrate_fence); } else { vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", @@ -574,13 +576,14 @@ static int xe_svm_copy(struct page **pages, __fence = xe_migrate_to_vram(vr->migrate, i - pos + incr, &pagemap_addr[pos], - vram_addr); + vram_addr, + pre_migrate_fence); } if (IS_ERR(__fence)) { err = PTR_ERR(__fence); goto err_out; } - + pre_migrate_fence = NULL; dma_fence_put(fence); fence = __fence; } @@ -603,20 +606,22 @@ static int xe_svm_copy(struct page **pages, vram_addr, (u64)pagemap_addr[pos].addr, 1); __fence = xe_migrate_from_vram(vr->migrate, 1, vram_addr, - &pagemap_addr[pos]); + &pagemap_addr[pos], + pre_migrate_fence); } else { vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", (u64)pagemap_addr[pos].addr, vram_addr, 1); __fence = xe_migrate_to_vram(vr->migrate, 1, &pagemap_addr[pos], - vram_addr); + vram_addr, + pre_migrate_fence); } if (IS_ERR(__fence)) { err = PTR_ERR(__fence); goto err_out; } - + pre_migrate_fence = NULL; dma_fence_put(fence); fence = __fence; } @@ -629,6 +634,8 @@ err_out: dma_fence_wait(fence, false); dma_fence_put(fence); } + if (pre_migrate_fence) + dma_fence_wait(pre_migrate_fence, false); /* * XXX: We can't derive the GT here (or anywhere in this functions, but @@ -645,16 +652,20 @@ err_out: static int xe_svm_copy_to_devmem(struct page **pages, struct drm_pagemap_addr *pagemap_addr, - unsigned long npages) + unsigned long npages, + struct dma_fence *pre_migrate_fence) { - return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM, + pre_migrate_fence); } static int xe_svm_copy_to_ram(struct page **pages, struct drm_pagemap_addr *pagemap_addr, - unsigned long npages) + unsigned long npages, + struct dma_fence *pre_migrate_fence) { - return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM, + pre_migrate_fence); } static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) @@ -667,6 +678,7 @@ static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) struct xe_bo *bo = to_xe_bo(devmem_allocation); struct xe_device *xe = xe_bo_device(bo); + dma_fence_put(devmem_allocation->pre_migrate_fence); xe_bo_put_async(bo); xe_pm_runtime_put(xe); } @@ -861,6 +873,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, unsigned long timeslice_ms) { struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap); + struct dma_fence *pre_migrate_fence = NULL; struct xe_device *xe = vr->xe; struct device *dev = xe->drm.dev; struct drm_buddy_block *block; @@ -887,8 +900,20 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, break; } + /* Ensure that any clearing or async eviction will complete before migration. */ + if (!dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) { + err = dma_resv_get_singleton(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + &pre_migrate_fence); + if (err) + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + else if (pre_migrate_fence) + dma_fence_enable_sw_signaling(pre_migrate_fence); + } + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, - &dpagemap_devmem_ops, dpagemap, end - start); + &dpagemap_devmem_ops, dpagemap, end - start, + pre_migrate_fence); blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; list_for_each_entry(block, blocks, link) diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index f6e7e234c089..70a7991f784f 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -8,6 +8,7 @@ #define NR_PAGES(order) (1U << (order)) +struct dma_fence; struct drm_pagemap; struct drm_pagemap_zdd; struct device; @@ -174,6 +175,8 @@ struct drm_pagemap_devmem_ops { * @pages: Pointer to array of device memory pages (destination) * @pagemap_addr: Pointer to array of DMA information (source) * @npages: Number of pages to copy + * @pre_migrate_fence: dma-fence to wait for before migration start. + * May be NULL. * * Copy pages to device memory. If the order of a @pagemap_addr entry * is greater than 0, the entry is populated but subsequent entries @@ -183,13 +186,16 @@ struct drm_pagemap_devmem_ops { */ int (*copy_to_devmem)(struct page **pages, struct drm_pagemap_addr *pagemap_addr, - unsigned long npages); + unsigned long npages, + struct dma_fence *pre_migrate_fence); /** * @copy_to_ram: Copy to system RAM (required for migration) * @pages: Pointer to array of device memory pages (source) * @pagemap_addr: Pointer to array of DMA information (destination) * @npages: Number of pages to copy + * @pre_migrate_fence: dma-fence to wait for before migration start. + * May be NULL. * * Copy pages to system RAM. If the order of a @pagemap_addr entry * is greater than 0, the entry is populated but subsequent entries @@ -199,7 +205,8 @@ struct drm_pagemap_devmem_ops { */ int (*copy_to_ram)(struct page **pages, struct drm_pagemap_addr *pagemap_addr, - unsigned long npages); + unsigned long npages, + struct dma_fence *pre_migrate_fence); }; /** @@ -212,6 +219,8 @@ struct drm_pagemap_devmem_ops { * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. * @size: Size of device memory allocation * @timeslice_expiration: Timeslice expiration in jiffies + * @pre_migrate_fence: Fence to wait for or pipeline behind before migration starts. + * (May be NULL). */ struct drm_pagemap_devmem { struct device *dev; @@ -221,6 +230,7 @@ struct drm_pagemap_devmem { struct drm_pagemap *dpagemap; size_t size; u64 timeslice_expiration; + struct dma_fence *pre_migrate_fence; }; int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, @@ -238,7 +248,8 @@ struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page); void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, struct device *dev, struct mm_struct *mm, const struct drm_pagemap_devmem_ops *ops, - struct drm_pagemap *dpagemap, size_t size); + struct drm_pagemap *dpagemap, size_t size, + struct dma_fence *pre_migrate_fence); int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, unsigned long start, unsigned long end, -- cgit v1.2.3 From 06e219f6a706c367c93051f408ac61417643d2f9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 15 Dec 2025 17:02:35 +0200 Subject: net: dsa: properly keep track of conduit reference Problem description ------------------- DSA has a mumbo-jumbo of reference handling of the conduit net device and its kobject which, sadly, is just wrong and doesn't make sense. There are two distinct problems. 1. The OF path, which uses of_find_net_device_by_node(), never releases the elevated refcount on the conduit's kobject. Nominally, the OF and non-OF paths should result in objects having identical reference counts taken, and it is already suspicious that dsa_dev_to_net_device() has a put_device() call which is missing in dsa_port_parse_of(), but we can actually even verify that an issue exists. With CONFIG_DEBUG_KOBJECT_RELEASE=y, if we run this command "before" and "after" applying this patch: (unbind the conduit driver for net device eno2) echo 0000:00:00.2 > /sys/bus/pci/drivers/fsl_enetc/unbind we see these lines in the output diff which appear only with the patch applied: kobject: 'eno2' (ffff002009a3a6b8): kobject_release, parent 0000000000000000 (delayed 1000) kobject: '109' (ffff0020099d59a0): kobject_release, parent 0000000000000000 (delayed 1000) 2. After we find the conduit interface one way (OF) or another (non-OF), it can get unregistered at any time, and DSA remains with a long-lived, but in this case stale, cpu_dp->conduit pointer. Holding the net device's underlying kobject isn't actually of much help, it just prevents it from being freed (but we never need that kobject directly). What helps us to prevent the net device from being unregistered is the parallel netdev reference mechanism (dev_hold() and dev_put()). Actually we actually use that netdev tracker mechanism implicitly on user ports since commit 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings"), via netdev_upper_dev_link(). But time still passes at DSA switch probe time between the initial of_find_net_device_by_node() code and the user port creation time, time during which the conduit could unregister itself and DSA wouldn't know about it. So we have to run of_find_net_device_by_node() under rtnl_lock() to prevent that from happening, and release the lock only with the netdev tracker having acquired the reference. Do we need to keep the reference until dsa_unregister_switch() / dsa_switch_shutdown()? 1: Maybe yes. A switch device will still be registered even if all user ports failed to probe, see commit 86f8b1c01a0a ("net: dsa: Do not make user port errors fatal"), and the cpu_dp->conduit pointers remain valid. I haven't audited all call paths to see whether they will actually use the conduit in lack of any user port, but if they do, it seems safer to not rely on user ports for that reference. 2. Definitely yes. We support changing the conduit which a user port is associated to, and we can get into a situation where we've moved all user ports away from a conduit, thus no longer hold any reference to it via the net device tracker. But we shouldn't let it go nonetheless - see the next change in relation to dsa_tree_find_first_conduit() and LAG conduits which disappear. We have to be prepared to return to the physical conduit, so the CPU port must explicitly keep another reference to it. This is also to say: the user ports and their CPU ports may not always keep a reference to the same conduit net device, and both are needed. As for the conduit's kobject for the /sys/class/net/ entry, we don't care about it, we can release it as soon as we hold the net device object itself. History and blame attribution ----------------------------- The code has been refactored so many times, it is very difficult to follow and properly attribute a blame, but I'll try to make a short history which I hope to be correct. We have two distinct probing paths: - one for OF, introduced in 2016 in commit 83c0afaec7b7 ("net: dsa: Add new binding implementation") - one for non-OF, introduced in 2017 in commit 71e0bbde0d88 ("net: dsa: Add support for platform data") These are both complete rewrites of the original probing paths (which used struct dsa_switch_driver and other weird stuff, instead of regular devices on their respective buses for register access, like MDIO, SPI, I2C etc): - one for OF, introduced in 2013 in commit 5e95329b701c ("dsa: add device tree bindings to register DSA switches") - one for non-OF, introduced in 2008 in commit 91da11f870f0 ("net: Distributed Switch Architecture protocol support") except for tiny bits and pieces like dsa_dev_to_net_device() which were seemingly carried over since the original commit, and used to this day. The point is that the original probing paths received a fix in 2015 in the form of commit 679fb46c5785 ("net: dsa: Add missing master netdev dev_put() calls"), but the fix never made it into the "new" (dsa2) probing paths that can still be traced to today, and the fixed probing path was later deleted in 2019 in commit 93e86b3bc842 ("net: dsa: Remove legacy probing support"). That is to say, the new probing paths were never quite correct in this area. The existence of the legacy probing support which was deleted in 2019 explains why dsa_dev_to_net_device() returns a conduit with elevated refcount (because it was supposed to be released during dsa_remove_dst()). After the removal of the legacy code, the only user of dsa_dev_to_net_device() calls dev_put(conduit) immediately after this function returns. This pattern makes no sense today, and can only be interpreted historically to understand why dev_hold() was there in the first place. Change details -------------- Today we have a better netdev tracking infrastructure which we should use. Logically netdev_hold() belongs in common code (dsa_port_parse_cpu(), where dp->conduit is assigned), but there is a tradeoff to be made with the rtnl_lock() section which would become a bit too long if we did that - dsa_port_parse_cpu() also calls request_module(). So we duplicate a bit of logic in order for the callers of dsa_port_parse_cpu() to be the ones responsible of holding the conduit reference and releasing it on error. This shortens the rtnl_lock() section significantly. In the dsa_switch_probe() error path, dsa_switch_release_ports() will be called in a number of situations, one being where dsa_port_parse_cpu() maybe didn't get the chance to run at all (a different port failed earlier, etc). So we have to test for the conduit being NULL prior to calling netdev_put(). There have still been so many transformations to the code since the blamed commits (rename master -> conduit, commit 0650bf52b31f ("net: dsa: be compatible with masters which unregister on shutdown")), that it only makes sense to fix the code using the best methods available today and see how it can be backported to stable later. I suspect the fix cannot even be backported to kernels which lack dsa_switch_shutdown(), and I suspect this is also maybe why the long-lived conduit reference didn't make it into the new DSA probing paths at the time (problems during shutdown). Because dsa_dev_to_net_device() has a single call site and has to be changed anyway, the logic was just absorbed into the non-OF dsa_port_parse(). Tested on the ocelot/felix switch and on dsa_loop, both on the NXP LS1028A with CONFIG_DEBUG_KOBJECT_RELEASE=y. Reported-by: Ma Ke Closes: https://lore.kernel.org/netdev/20251214131204.4684-1-make24@iscas.ac.cn/ Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") Fixes: 71e0bbde0d88 ("net: dsa: Add support for platform data") Reviewed-by: Jonas Gorski Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20251215150236.3931670-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- include/net/dsa.h | 1 + net/dsa/dsa.c | 59 ++++++++++++++++++++++++++++++++----------------------- 2 files changed, 35 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index cced1a866757..6b2b5ed64ea4 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -302,6 +302,7 @@ struct dsa_port { struct devlink_port devlink_port; struct phylink *pl; struct phylink_config pl_config; + netdevice_tracker conduit_tracker; struct dsa_lag *lag; struct net_device *hsr_dev; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index a20efabe778f..50b3fceb5c04 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1253,14 +1253,25 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) if (ethernet) { struct net_device *conduit; const char *user_protocol; + int err; + rtnl_lock(); conduit = of_find_net_device_by_node(ethernet); of_node_put(ethernet); - if (!conduit) + if (!conduit) { + rtnl_unlock(); return -EPROBE_DEFER; + } + + netdev_hold(conduit, &dp->conduit_tracker, GFP_KERNEL); + put_device(&conduit->dev); + rtnl_unlock(); user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL); - return dsa_port_parse_cpu(dp, conduit, user_protocol); + err = dsa_port_parse_cpu(dp, conduit, user_protocol); + if (err) + netdev_put(conduit, &dp->conduit_tracker); + return err; } if (link) @@ -1393,37 +1404,30 @@ static struct device *dev_find_class(struct device *parent, char *class) return device_find_child(parent, class, dev_is_class); } -static struct net_device *dsa_dev_to_net_device(struct device *dev) -{ - struct device *d; - - d = dev_find_class(dev, "net"); - if (d != NULL) { - struct net_device *nd; - - nd = to_net_dev(d); - dev_hold(nd); - put_device(d); - - return nd; - } - - return NULL; -} - static int dsa_port_parse(struct dsa_port *dp, const char *name, struct device *dev) { if (!strcmp(name, "cpu")) { struct net_device *conduit; + struct device *d; + int err; - conduit = dsa_dev_to_net_device(dev); - if (!conduit) + rtnl_lock(); + d = dev_find_class(dev, "net"); + if (!d) { + rtnl_unlock(); return -EPROBE_DEFER; + } - dev_put(conduit); + conduit = to_net_dev(d); + netdev_hold(conduit, &dp->conduit_tracker, GFP_KERNEL); + put_device(d); + rtnl_unlock(); - return dsa_port_parse_cpu(dp, conduit, NULL); + err = dsa_port_parse_cpu(dp, conduit, NULL); + if (err) + netdev_put(conduit, &dp->conduit_tracker); + return err; } if (!strcmp(name, "dsa")) @@ -1491,6 +1495,9 @@ static void dsa_switch_release_ports(struct dsa_switch *ds) struct dsa_vlan *v, *n; dsa_switch_for_each_port_safe(dp, next, ds) { + if (dsa_port_is_cpu(dp) && dp->conduit) + netdev_put(dp->conduit, &dp->conduit_tracker); + /* These are either entries that upper layers lost track of * (probably due to bugs), or installed through interfaces * where one does not necessarily have to remove them, like @@ -1635,8 +1642,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds) /* Disconnect from further netdevice notifiers on the conduit, * since netdev_uses_dsa() will now return false. */ - dsa_switch_for_each_cpu_port(dp, ds) + dsa_switch_for_each_cpu_port(dp, ds) { dp->conduit->dsa_ptr = NULL; + netdev_put(dp->conduit, &dp->conduit_tracker); + } rtnl_unlock(); out: -- cgit v1.2.3 From 5393802c94e0ab1295c04c94c57bcb00222d4674 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 27 Nov 2025 10:39:24 -0800 Subject: genalloc.h: fix htmldocs warning WARNING: include/linux/genalloc.h:52 function parameter 'start_addr' not described in 'genpool_algo_t' Fixes: 52fbf1134d47 ("lib/genalloc.c: fix allocation of aligned buffer from non-aligned chunk") Reported-by: Stephen Rothwell Closes: https://lkml.kernel.org/r/20251127130624.563597e3@canb.auug.org.au Acked-by: Randy Dunlap Tested-by: Randy Dunlap Cc: Alexey Skidanov Signed-off-by: Andrew Morton --- include/linux/genalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 0bd581003cd5..60de63e46b33 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -44,6 +44,7 @@ struct gen_pool; * @nr: The number of zeroed bits we're looking for * @data: optional additional data used by the callback * @pool: the pool being allocated from + * @start_addr: start address of memory chunk */ typedef unsigned long (*genpool_algo_t)(unsigned long *map, unsigned long size, -- cgit v1.2.3 From 007f5da43b3d0ecff972e2616062b8da1f862f5e Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Thu, 4 Dec 2025 18:59:55 +0000 Subject: mm/kasan: fix incorrect unpoisoning in vrealloc for KASAN Patch series "kasan: vmalloc: Fixes for the percpu allocator and vrealloc", v3. Patches fix two issues related to KASAN and vmalloc. The first one, a KASAN tag mismatch, possibly resulting in a kernel panic, can be observed on systems with a tag-based KASAN enabled and with multiple NUMA nodes. Initially it was only noticed on x86 [1] but later a similar issue was also reported on arm64 [2]. Specifically the problem is related to how vm_structs interact with pcpu_chunks - both when they are allocated, assigned and when pcpu_chunk addresses are derived. When vm_structs are allocated they are unpoisoned, each with a different random tag, if vmalloc support is enabled along the KASAN mode. Later when first pcpu chunk is allocated it gets its 'base_addr' field set to the first allocated vm_struct. With that it inherits that vm_struct's tag. When pcpu_chunk addresses are later derived (by pcpu_chunk_addr(), for example in pcpu_alloc_noprof()) the base_addr field is used and offsets are added to it. If the initial conditions are satisfied then some of the offsets will point into memory allocated with a different vm_struct. So while the lower bits will get accurately derived the tag bits in the top of the pointer won't match the shadow memory contents. The solution (proposed at v2 of the x86 KASAN series [3]) is to unpoison the vm_structs with the same tag when allocating them for the per cpu allocator (in pcpu_get_vm_areas()). The second one reported by syzkaller [4] is related to vrealloc and happens because of random tag generation when unpoisoning memory without allocating new pages. This breaks shadow memory tracking and needs to reuse the existing tag instead of generating a new one. At the same time an inconsistency in used flags is corrected. This patch (of 3): Syzkaller reported a memory out-of-bounds bug [4]. This patch fixes two issues: 1. In vrealloc the KASAN_VMALLOC_VM_ALLOC flag is missing when unpoisoning the extended region. This flag is required to correctly associate the allocation with KASAN's vmalloc tracking. Note: In contrast, vzalloc (via __vmalloc_node_range_noprof) explicitly sets KASAN_VMALLOC_VM_ALLOC and calls kasan_unpoison_vmalloc() with it. vrealloc must behave consistently -- especially when reusing existing vmalloc regions -- to ensure KASAN can track allocations correctly. 2. When vrealloc reuses an existing vmalloc region (without allocating new pages) KASAN generates a new tag, which breaks tag-based memory access tracking. Introduce KASAN_VMALLOC_KEEP_TAG, a new KASAN flag that allows reusing the tag already attached to the pointer, ensuring consistent tag behavior during reallocation. Pass KASAN_VMALLOC_KEEP_TAG and KASAN_VMALLOC_VM_ALLOC to the kasan_unpoison_vmalloc inside vrealloc_node_align_noprof(). Link: https://lkml.kernel.org/r/cover.1765978969.git.m.wieczorretman@pm.me Link: https://lkml.kernel.org/r/38dece0a4074c43e48150d1e242f8242c73bf1a5.1764874575.git.m.wieczorretman@pm.me Link: https://lore.kernel.org/all/e7e04692866d02e6d3b32bb43b998e5d17092ba4.1738686764.git.maciej.wieczor-retman@intel.com/ [1] Link: https://lore.kernel.org/all/aMUrW1Znp1GEj7St@MiWiFi-R3L-srv/ [2] Link: https://lore.kernel.org/all/CAPAsAGxDRv_uFeMYu9TwhBVWHCCtkSxoWY4xmFB_vowMbi8raw@mail.gmail.com/ [3] Link: https://syzkaller.appspot.com/bug?extid=997752115a851cb0cf36 [4] Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing") Signed-off-by: Jiayuan Chen Co-developed-by: Maciej Wieczor-Retman Signed-off-by: Maciej Wieczor-Retman Reported-by: syzbot+997752115a851cb0cf36@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68e243a2.050a0220.1696c6.007d.GAE@google.com/T/ Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Danilo Krummrich Cc: Dmitriy Vyukov Cc: Kees Cook Cc: Marco Elver Cc: "Uladzislau Rezki (Sony)" Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- include/linux/kasan.h | 1 + mm/kasan/hw_tags.c | 2 +- mm/kasan/shadow.c | 4 +++- mm/vmalloc.c | 4 +++- 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index f335c1d7b61d..df3d8567dde9 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -28,6 +28,7 @@ typedef unsigned int __bitwise kasan_vmalloc_flags_t; #define KASAN_VMALLOC_INIT ((__force kasan_vmalloc_flags_t)0x01u) #define KASAN_VMALLOC_VM_ALLOC ((__force kasan_vmalloc_flags_t)0x02u) #define KASAN_VMALLOC_PROT_NORMAL ((__force kasan_vmalloc_flags_t)0x04u) +#define KASAN_VMALLOC_KEEP_TAG ((__force kasan_vmalloc_flags_t)0x08u) #define KASAN_VMALLOC_PAGE_RANGE 0x1 /* Apply exsiting page range */ #define KASAN_VMALLOC_TLB_FLUSH 0x2 /* TLB flush */ diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 1c373cc4b3fa..cbef5e450954 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -361,7 +361,7 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, return (void *)start; } - tag = kasan_random_tag(); + tag = (flags & KASAN_VMALLOC_KEEP_TAG) ? get_tag(start) : kasan_random_tag(); start = set_tag(start, tag); /* Unpoison and initialize memory up to size. */ diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 29a751a8a08d..32fbdf759ea2 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -631,7 +631,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, !(flags & KASAN_VMALLOC_PROT_NORMAL)) return (void *)start; - start = set_tag(start, kasan_random_tag()); + if (unlikely(!(flags & KASAN_VMALLOC_KEEP_TAG))) + start = set_tag(start, kasan_random_tag()); + kasan_unpoison(start, size, false); return (void *)start; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ecbac900c35f..94c0a9262a46 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4331,7 +4331,9 @@ void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align */ if (size <= alloced_size) { kasan_unpoison_vmalloc(p + old_size, size - old_size, - KASAN_VMALLOC_PROT_NORMAL); + KASAN_VMALLOC_PROT_NORMAL | + KASAN_VMALLOC_VM_ALLOC | + KASAN_VMALLOC_KEEP_TAG); /* * No need to zero memory here, as unused memory will have * already been zeroed at initial allocation time or during -- cgit v1.2.3 From 6f13db031e27e88213381039032a9cc061578ea6 Mon Sep 17 00:00:00 2001 From: Maciej Wieczor-Retman Date: Thu, 4 Dec 2025 19:00:04 +0000 Subject: kasan: refactor pcpu kasan vmalloc unpoison A KASAN tag mismatch, possibly causing a kernel panic, can be observed on systems with a tag-based KASAN enabled and with multiple NUMA nodes. It was reported on arm64 and reproduced on x86. It can be explained in the following points: 1. There can be more than one virtual memory chunk. 2. Chunk's base address has a tag. 3. The base address points at the first chunk and thus inherits the tag of the first chunk. 4. The subsequent chunks will be accessed with the tag from the first chunk. 5. Thus, the subsequent chunks need to have their tag set to match that of the first chunk. Refactor code by reusing __kasan_unpoison_vmalloc in a new helper in preparation for the actual fix. Link: https://lkml.kernel.org/r/eb61d93b907e262eefcaa130261a08bcb6c5ce51.1764874575.git.m.wieczorretman@pm.me Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS") Signed-off-by: Maciej Wieczor-Retman Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Danilo Krummrich Cc: Dmitriy Vyukov Cc: Jiayuan Chen Cc: Kees Cook Cc: Marco Elver Cc: "Uladzislau Rezki (Sony)" Cc: Vincenzo Frascino Cc: [6.1+] Signed-off-by: Andrew Morton --- include/linux/kasan.h | 15 +++++++++++++++ mm/kasan/common.c | 17 +++++++++++++++++ mm/vmalloc.c | 4 +--- 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index df3d8567dde9..9c6ac4b62eb9 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -631,6 +631,16 @@ static __always_inline void kasan_poison_vmalloc(const void *start, __kasan_poison_vmalloc(start, size); } +void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, + kasan_vmalloc_flags_t flags); +static __always_inline void +kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, + kasan_vmalloc_flags_t flags) +{ + if (kasan_enabled()) + __kasan_unpoison_vmap_areas(vms, nr_vms, flags); +} + #else /* CONFIG_KASAN_VMALLOC */ static inline void kasan_populate_early_vm_area_shadow(void *start, @@ -655,6 +665,11 @@ static inline void *kasan_unpoison_vmalloc(const void *start, static inline void kasan_poison_vmalloc(const void *start, unsigned long size) { } +static __always_inline void +kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, + kasan_vmalloc_flags_t flags) +{ } + #endif /* CONFIG_KASAN_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 1d27f1bd260b..b2b40c59ce18 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "kasan.h" #include "../slab.h" @@ -575,3 +576,19 @@ bool __kasan_check_byte(const void *address, unsigned long ip) } return true; } + +#ifdef CONFIG_KASAN_VMALLOC +void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, + kasan_vmalloc_flags_t flags) +{ + unsigned long size; + void *addr; + int area; + + for (area = 0 ; area < nr_vms ; area++) { + size = vms[area]->size; + addr = vms[area]->addr; + vms[area]->addr = __kasan_unpoison_vmalloc(addr, size, flags); + } +} +#endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 94c0a9262a46..41dd01e8430c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -5027,9 +5027,7 @@ retry: * With hardware tag-based KASAN, marking is skipped for * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). */ - for (area = 0; area < nr_vms; area++) - vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, - vms[area]->size, KASAN_VMALLOC_PROT_NORMAL); + kasan_unpoison_vmap_areas(vms, nr_vms, KASAN_VMALLOC_PROT_NORMAL); kfree(vas); return vms; -- cgit v1.2.3 From 6ba776b533ca902631fa106b8a90811b3f40b08d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 14 Dec 2025 12:15:17 -0800 Subject: mm: leafops.h: correct kernel-doc function param. names Modify the kernel-doc function parameter names to prevent kernel-doc warnings: Warning: include/linux/leafops.h:135 function parameter 'entry' not described in 'leafent_type' Warning: include/linux/leafops.h:540 function parameter 'pte' not described in 'pte_is_uffd_marker' Link: https://lkml.kernel.org/r/20251214201517.2187051-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reviewed-by: Lorenzo Stoakes Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/leafops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/leafops.h b/include/linux/leafops.h index cfafe7a5e7b1..a9ff94b744f2 100644 --- a/include/linux/leafops.h +++ b/include/linux/leafops.h @@ -133,7 +133,7 @@ static inline bool softleaf_is_none(softleaf_t entry) /** * softleaf_type() - Identify the type of leaf entry. - * @enntry: Leaf entry. + * @entry: Leaf entry. * * Returns: the leaf entry type associated with @entry. */ @@ -534,7 +534,7 @@ static inline bool pte_is_uffd_wp_marker(pte_t pte) /** * pte_is_uffd_marker() - Does this PTE entry encode a userfault-specific marker * leaf entry? - * @entry: Leaf entry. + * @pte: PTE entry. * * It's useful to be able to determine which leaf entries encode UFFD-specific * markers so we can handle these correctly. -- cgit v1.2.3 From fe55ea85939efcbf0e6baa234f0d70acb79e7b58 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Tue, 16 Dec 2025 09:48:51 +0800 Subject: kernel/kexec: change the prototype of kimage_map_segment() The kexec segment index will be required to extract the corresponding information for that segment in kimage_map_segment(). Additionally, kexec_segment already holds the kexec relocation destination address and size. Therefore, the prototype of kimage_map_segment() can be changed. Link: https://lkml.kernel.org/r/20251216014852.8737-1-piliu@redhat.com Fixes: 07d24902977e ("kexec: enable CMA based contiguous allocation") Signed-off-by: Pingfan Liu Acked-by: Baoquan He Cc: Mimi Zohar Cc: Roberto Sassu Cc: Alexander Graf Cc: Steven Chen Cc: Signed-off-by: Andrew Morton --- include/linux/kexec.h | 4 ++-- kernel/kexec_core.c | 9 ++++++--- security/integrity/ima/ima_kexec.c | 4 +--- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index ff7e231b0485..8a22bc9b8c6c 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -530,7 +530,7 @@ extern bool kexec_file_dbg_print; #define kexec_dprintk(fmt, arg...) \ do { if (kexec_file_dbg_print) pr_info(fmt, ##arg); } while (0) -extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size); +extern void *kimage_map_segment(struct kimage *image, int idx); extern void kimage_unmap_segment(void *buffer); #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; @@ -540,7 +540,7 @@ static inline void __crash_kexec(struct pt_regs *regs) { } static inline void crash_kexec(struct pt_regs *regs) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } static inline int kexec_crash_loaded(void) { return 0; } -static inline void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size) +static inline void *kimage_map_segment(struct kimage *image, int idx) { return NULL; } static inline void kimage_unmap_segment(void *buffer) { } #define kexec_in_progress false diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 0f92acdd354d..1a79c5b18d8f 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -953,17 +953,20 @@ int kimage_load_segment(struct kimage *image, int idx) return result; } -void *kimage_map_segment(struct kimage *image, - unsigned long addr, unsigned long size) +void *kimage_map_segment(struct kimage *image, int idx) { + unsigned long addr, size, eaddr; unsigned long src_page_addr, dest_page_addr = 0; - unsigned long eaddr = addr + size; kimage_entry_t *ptr, entry; struct page **src_pages; unsigned int npages; void *vaddr = NULL; int i; + addr = image->segment[idx].mem; + size = image->segment[idx].memsz; + eaddr = addr + size; + /* * Collect the source pages and map them in a contiguous VA range. */ diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c index 7362f68f2d8b..5beb69edd12f 100644 --- a/security/integrity/ima/ima_kexec.c +++ b/security/integrity/ima/ima_kexec.c @@ -250,9 +250,7 @@ void ima_kexec_post_load(struct kimage *image) if (!image->ima_buffer_addr) return; - ima_kexec_buffer = kimage_map_segment(image, - image->ima_buffer_addr, - image->ima_buffer_size); + ima_kexec_buffer = kimage_map_segment(image, image->ima_segment_index); if (!ima_kexec_buffer) { pr_err("Could not map measurements buffer.\n"); return; -- cgit v1.2.3 From e6dbcb7c0e7b508d443a9aa6f77f63a2f83b1ae4 Mon Sep 17 00:00:00 2001 From: Ankit Agrawal Date: Thu, 11 Dec 2025 07:06:01 +0000 Subject: mm: fixup pfnmap memory failure handling to use pgoff The memory failure handling implementation for the PFNMAP memory with no struct pages is faulty. The VA of the mapping is determined based on the the PFN. It should instead be based on the file mapping offset. At the occurrence of poison, the memory_failure_pfn is triggered on the poisoned PFN. Introduce a callback function that allows mm to translate the PFN to the corresponding file page offset. The kernel module using the registration API must implement the callback function and provide the translation. The translated value is then used to determine the VA information and sending the SIGBUS to the usermode process mapped to the poisoned PFN. The callback is also useful for the driver to be notified of the poisoned PFN, which may then track it. Link: https://lkml.kernel.org/r/20251211070603.338701-2-ankita@nvidia.com Fixes: 2ec41967189c ("mm: handle poisoning of pfn without struct pages") Signed-off-by: Ankit Agrawal Suggested-by: Jason Gunthorpe Cc: Kevin Tian Cc: Matthew R. Ochs Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Neo Jia Cc: Vikram Sethi Cc: Yishai Hadas Cc: Zhi Wang Signed-off-by: Andrew Morton --- include/linux/memory-failure.h | 2 ++ mm/memory-failure.c | 29 ++++++++++++++++++----------- 2 files changed, 20 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/memory-failure.h b/include/linux/memory-failure.h index bc326503d2d2..7b5e11cf905f 100644 --- a/include/linux/memory-failure.h +++ b/include/linux/memory-failure.h @@ -9,6 +9,8 @@ struct pfn_address_space; struct pfn_address_space { struct interval_tree_node node; struct address_space *mapping; + int (*pfn_to_vma_pgoff)(struct vm_area_struct *vma, + unsigned long pfn, pgoff_t *pgoff); }; int register_pfn_address_space(struct pfn_address_space *pfn_space); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index fbc5a01260c8..c80c2907da33 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2161,6 +2161,9 @@ int register_pfn_address_space(struct pfn_address_space *pfn_space) { guard(mutex)(&pfn_space_lock); + if (!pfn_space->pfn_to_vma_pgoff) + return -EINVAL; + if (interval_tree_iter_first(&pfn_space_itree, pfn_space->node.start, pfn_space->node.last)) @@ -2183,10 +2186,10 @@ void unregister_pfn_address_space(struct pfn_address_space *pfn_space) } EXPORT_SYMBOL_GPL(unregister_pfn_address_space); -static void add_to_kill_pfn(struct task_struct *tsk, - struct vm_area_struct *vma, - struct list_head *to_kill, - unsigned long pfn) +static void add_to_kill_pgoff(struct task_struct *tsk, + struct vm_area_struct *vma, + struct list_head *to_kill, + pgoff_t pgoff) { struct to_kill *tk; @@ -2197,12 +2200,12 @@ static void add_to_kill_pfn(struct task_struct *tsk, } /* Check for pgoff not backed by struct page */ - tk->addr = vma_address(vma, pfn, 1); + tk->addr = vma_address(vma, pgoff, 1); tk->size_shift = PAGE_SHIFT; if (tk->addr == -EFAULT) pr_info("Unable to find address %lx in %s\n", - pfn, tsk->comm); + pgoff, tsk->comm); get_task_struct(tsk); tk->tsk = tsk; @@ -2212,11 +2215,12 @@ static void add_to_kill_pfn(struct task_struct *tsk, /* * Collect processes when the error hit a PFN not backed by struct page. */ -static void collect_procs_pfn(struct address_space *mapping, +static void collect_procs_pfn(struct pfn_address_space *pfn_space, unsigned long pfn, struct list_head *to_kill) { struct vm_area_struct *vma; struct task_struct *tsk; + struct address_space *mapping = pfn_space->mapping; i_mmap_lock_read(mapping); rcu_read_lock(); @@ -2226,9 +2230,12 @@ static void collect_procs_pfn(struct address_space *mapping, t = task_early_kill(tsk, true); if (!t) continue; - vma_interval_tree_foreach(vma, &mapping->i_mmap, pfn, pfn) { - if (vma->vm_mm == t->mm) - add_to_kill_pfn(t, vma, to_kill, pfn); + vma_interval_tree_foreach(vma, &mapping->i_mmap, 0, ULONG_MAX) { + pgoff_t pgoff; + + if (vma->vm_mm == t->mm && + !pfn_space->pfn_to_vma_pgoff(vma, pfn, &pgoff)) + add_to_kill_pgoff(t, vma, to_kill, pgoff); } } rcu_read_unlock(); @@ -2264,7 +2271,7 @@ static int memory_failure_pfn(unsigned long pfn, int flags) struct pfn_address_space *pfn_space = container_of(node, struct pfn_address_space, node); - collect_procs_pfn(pfn_space->mapping, pfn, &tokill); + collect_procs_pfn(pfn_space, pfn, &tokill); mf_handled = true; } -- cgit v1.2.3 From f183663901f21fe0fba8bd31ae894bc529709ee0 Mon Sep 17 00:00:00 2001 From: Bijan Tabatabai Date: Tue, 16 Dec 2025 14:07:27 -0600 Subject: mm: consider non-anon swap cache folios in folio_expected_ref_count() Currently, folio_expected_ref_count() only adds references for the swap cache if the folio is anonymous. However, according to the comment above the definition of PG_swapcache in enum pageflags, shmem folios can also have PG_swapcache set. This patch makes sure references for the swap cache are added if folio_test_swapcache(folio) is true. This issue was found when trying to hot-unplug memory in a QEMU/KVM virtual machine. When initiating hot-unplug when most of the guest memory is allocated, hot-unplug hangs partway through removal due to migration failures. The following message would be printed several times, and would be printed again about every five seconds: [ 49.641309] migrating pfn b12f25 failed ret:7 [ 49.641310] page: refcount:2 mapcount:0 mapping:0000000033bd8fe2 index:0x7f404d925 pfn:0xb12f25 [ 49.641311] aops:swap_aops [ 49.641313] flags: 0x300000000030508(uptodate|active|owner_priv_1|reclaim|swapbacked|node=0|zone=3) [ 49.641314] raw: 0300000000030508 ffffed312c4bc908 ffffed312c4bc9c8 0000000000000000 [ 49.641315] raw: 00000007f404d925 00000000000c823b 00000002ffffffff 0000000000000000 [ 49.641315] page dumped because: migration failure When debugging this, I found that these migration failures were due to __migrate_folio() returning -EAGAIN for a small set of folios because the expected reference count it calculates via folio_expected_ref_count() is one less than the actual reference count of the folios. Furthermore, all of the affected folios were not anonymous, but had the PG_swapcache flag set, inspiring this patch. After applying this patch, the memory hot-unplug behaves as expected. I tested this on a machine running Ubuntu 24.04 with kernel version 6.8.0-90-generic and 64GB of memory. The guest VM is managed by libvirt and runs Ubuntu 24.04 with kernel version 6.18 (though the head of the mm-unstable branch as a Dec 16, 2025 was also tested and behaves the same) and 48GB of memory. The libvirt XML definition for the VM can be found at [1]. CONFIG_MHP_DEFAULT_ONLINE_TYPE_ONLINE_MOVABLE is set in the guest kernel so the hot-pluggable memory is automatically onlined. Below are the steps to reproduce this behavior: 1) Define and start and virtual machine host$ virsh -c qemu:///system define ./test_vm.xml # test_vm.xml from [1] host$ virsh -c qemu:///system start test_vm 2) Setup swap in the guest guest$ sudo fallocate -l 32G /swapfile guest$ sudo chmod 0600 /swapfile guest$ sudo mkswap /swapfile guest$ sudo swapon /swapfile 3) Use alloc_data [2] to allocate most of the remaining guest memory guest$ ./alloc_data 45 4) In a separate guest terminal, monitor the amount of used memory guest$ watch -n1 free -h 5) When alloc_data has finished allocating, initiate the memory hot-unplug using the provided xml file [3] host$ virsh -c qemu:///system detach-device test_vm ./remove.xml --live After initiating the memory hot-unplug, you should see the amount of available memory in the guest decrease, and the amount of used swap data increase. If everything works as expected, when all of the memory is unplugged, there should be around 8.5-9GB of data in swap. If the unplugging is unsuccessful, the amount of used swap data will settle below that. If that happens, you should be able to see log messages in dmesg similar to the one posted above. Link: https://lkml.kernel.org/r/20251216200727.2360228-1-bijan311@gmail.com Link: https://github.com/BijanT/linux_patch_files/blob/main/test_vm.xml [1] Link: https://github.com/BijanT/linux_patch_files/blob/main/alloc_data.c [2] Link: https://github.com/BijanT/linux_patch_files/blob/main/remove.xml [3] Fixes: 86ebd50224c0 ("mm: add folio_expected_ref_count() for reference count calculation") Signed-off-by: Bijan Tabatabai Acked-by: David Hildenbrand (Red Hat) Acked-by: Zi Yan Reviewed-by: Baolin Wang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Shivank Garg Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Kairui Song Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 15076261d0c2..6f959d8ca4b4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2459,10 +2459,10 @@ static inline int folio_expected_ref_count(const struct folio *folio) if (WARN_ON_ONCE(page_has_type(&folio->page) && !folio_test_hugetlb(folio))) return 0; - if (folio_test_anon(folio)) { - /* One reference per page from the swapcache. */ - ref_count += folio_test_swapcache(folio) << order; - } else { + /* One reference per page from the swapcache. */ + ref_count += folio_test_swapcache(folio) << order; + + if (!folio_test_anon(folio)) { /* One reference per page from the pagecache. */ ref_count += !!folio->mapping << order; /* One reference from PG_private. */ -- cgit v1.2.3 From dc85a46928c41423ad89869baf05a589e2975575 Mon Sep 17 00:00:00 2001 From: Kevin Tian Date: Thu, 18 Dec 2025 08:16:49 +0000 Subject: vfio/pci: Disable qword access to the PCI ROM bar Commit 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci") enables qword access to the PCI bar resources. However certain devices (e.g. Intel X710) are observed with problem upon qword accesses to the rom bar, e.g. triggering PCI aer errors. This is triggered by Qemu which caches the rom content by simply does a pread() of the remaining size until it gets the full contents. The other bars would only perform operations at the same access width as their guest drivers. Instead of trying to identify all broken devices, universally disable qword access to the rom bar i.e. going back to the old way which worked reliably for years. Reported-by: Farrah Chen Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220740 Fixes: 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci") Cc: stable@vger.kernel.org Signed-off-by: Kevin Tian Tested-by: Farrah Chen Link: https://lore.kernel.org/r/20251218081650.555015-2-kevin.tian@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/nvgrace-gpu/main.c | 4 ++-- drivers/vfio/pci/vfio_pci_rdwr.c | 25 ++++++++++++++++++------- include/linux/vfio_pci_core.h | 10 +++++++++- 3 files changed, 29 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index 84d142a47ec6..b45a24d00387 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -561,7 +561,7 @@ nvgrace_gpu_map_and_read(struct nvgrace_gpu_pci_core_device *nvdev, ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false, nvdev->resmem.ioaddr, buf, offset, mem_count, - 0, 0, false); + 0, 0, false, VFIO_PCI_IO_WIDTH_8); } return ret; @@ -693,7 +693,7 @@ nvgrace_gpu_map_and_write(struct nvgrace_gpu_pci_core_device *nvdev, ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false, nvdev->resmem.ioaddr, (char __user *)buf, pos, mem_count, - 0, 0, true); + 0, 0, true, VFIO_PCI_IO_WIDTH_8); } return ret; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 6192788c8ba3..25380b7dfe18 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -135,7 +135,8 @@ VFIO_IORDWR(64) ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start, - size_t x_end, bool iswrite) + size_t x_end, bool iswrite, + enum vfio_pci_io_width max_width) { ssize_t done = 0; int ret; @@ -150,20 +151,19 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, else fillable = 0; - if (fillable >= 8 && !(off % 8)) { + if (fillable >= 8 && !(off % 8) && max_width >= 8) { ret = vfio_pci_iordwr64(vdev, iswrite, test_mem, io, buf, off, &filled); if (ret) return ret; - } else - if (fillable >= 4 && !(off % 4)) { + } else if (fillable >= 4 && !(off % 4) && max_width >= 4) { ret = vfio_pci_iordwr32(vdev, iswrite, test_mem, io, buf, off, &filled); if (ret) return ret; - } else if (fillable >= 2 && !(off % 2)) { + } else if (fillable >= 2 && !(off % 2) && max_width >= 2) { ret = vfio_pci_iordwr16(vdev, iswrite, test_mem, io, buf, off, &filled); if (ret) @@ -234,6 +234,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, void __iomem *io; struct resource *res = &vdev->pdev->resource[bar]; ssize_t done; + enum vfio_pci_io_width max_width = VFIO_PCI_IO_WIDTH_8; if (pci_resource_start(pdev, bar)) end = pci_resource_len(pdev, bar); @@ -262,6 +263,16 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, if (!io) return -ENOMEM; x_end = end; + + /* + * Certain devices (e.g. Intel X710) don't support qword + * access to the ROM bar. Otherwise PCI AER errors might be + * triggered. + * + * Disable qword access to the ROM bar universally, which + * worked reliably for years before qword access is enabled. + */ + max_width = VFIO_PCI_IO_WIDTH_4; } else { int ret = vfio_pci_core_setup_barmap(vdev, bar); if (ret) { @@ -278,7 +289,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, } done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos, - count, x_start, x_end, iswrite); + count, x_start, x_end, iswrite, max_width); if (done >= 0) *ppos += done; @@ -352,7 +363,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, * to the memory enable bit in the command register. */ done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count, - 0, 0, iswrite); + 0, 0, iswrite, VFIO_PCI_IO_WIDTH_8); vga_put(vdev->pdev, rsrc); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 706877f998ff..1ac86896875c 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -145,6 +145,13 @@ struct vfio_pci_core_device { struct list_head dmabufs; }; +enum vfio_pci_io_width { + VFIO_PCI_IO_WIDTH_1 = 1, + VFIO_PCI_IO_WIDTH_2 = 2, + VFIO_PCI_IO_WIDTH_4 = 4, + VFIO_PCI_IO_WIDTH_8 = 8, +}; + /* Will be exported for vfio pci drivers usage */ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, unsigned int type, unsigned int subtype, @@ -188,7 +195,8 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start, - size_t x_end, bool iswrite); + size_t x_end, bool iswrite, + enum vfio_pci_io_width max_width); bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt, loff_t reg_start, size_t reg_cnt, -- cgit v1.2.3 From 3dd57ddec9e3a98387196a3f53b8c036977d8c0f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Dec 2025 08:19:39 +0000 Subject: get rid of bogus __user in struct xattr_args::value The first member of struct xattr_args is declared as __aligned_u64 __user value; which makes no sense whatsoever; __user is a qualifier and what that declaration says is "all struct xattr_args instances have .value _stored_ in user address space, no matter where the rest of the structure happens to be". Something like "int __user *p" stands for "value of p is a pointer to an instance of int that happens to live in user address space"; it says nothing about location of p itself, just as const char *p declares a pointer to unmodifiable char rather than an unmodifiable pointer to char. With xattr_args the intent clearly had been "the 64bit value represents a _pointer_ to object in user address space", but __user has nothing to do with that. All it gets us is a couple of bogus warnings in fs/xattr.c where (userland) instance of xattr_args is copied to local variable of that type (in kernel address space), followed by access to its members. Since we've told sparse that args.value must somehow be located in userland memory, we get warned that looking at that 64bit unsigned integer (in a variable already on kernel stack) is not allowed. Note that sparse has no way to express "this integer shall never be cast into a pointer to be dereferenced directly" and I don't see any way to assign a sane semantics to that. In any case, __user is not it. Signed-off-by: Al Viro Link: https://patch.msgid.link/20251216081939.GQ1712166@ZenIV Signed-off-by: Christian Brauner --- include/uapi/linux/xattr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index c7c85bb504ba..2e5aef48fa7e 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -23,7 +23,7 @@ #define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */ struct xattr_args { - __aligned_u64 __user value; + __aligned_u64 value; __u32 size; __u32 flags; }; -- cgit v1.2.3 From f059588c552746e0fe299214f35c58effa715b74 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 4 Dec 2025 13:31:52 -0500 Subject: virtio: make it self-contained virtio.h uses struct module, add a forward declaration to make the header self-contained. Message-ID: <9171b5cac60793eb59ab044c96ee038bf1363bee.1764873799.git.mst@redhat.com> Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 132a474e5914..3626eb694728 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -13,6 +13,8 @@ #include #include +struct module; + /** * struct virtqueue - a queue to register buffers for sending or receiving. * @list: the chain of virtqueues for this device -- cgit v1.2.3 From e88dfb93311c81359b00c12e0b396bd0ea13ad6c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 4 Dec 2025 12:49:34 -0500 Subject: virtio_features: make it self-contained virtio_features.h uses WARN_ON_ONCE and memset so it must include linux/bug.h and linux/string.h Message-ID: <579986aa9b8d023844990d2a0e267382f8ad85d5.1764873799.git.mst@redhat.com> Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_features.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_features.h b/include/linux/virtio_features.h index ea2ad8717882..ce59ea91f474 100644 --- a/include/linux/virtio_features.h +++ b/include/linux/virtio_features.h @@ -3,6 +3,8 @@ #define _LINUX_VIRTIO_FEATURES_H #include +#include +#include #define VIRTIO_FEATURES_U64S 2 #define VIRTIO_FEATURES_BITS (VIRTIO_FEATURES_U64S * 64) -- cgit v1.2.3 From 5623eb1ed035f01dfa620366a82b667545b10c82 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 31 Dec 2025 08:12:46 -0700 Subject: io_uring/tctx: add separate lock for list of tctx's in ctx ctx->tcxt_list holds the tasks using this ring, and it's currently protected by the normal ctx->uring_lock. However, this can cause a circular locking issue, as reported by syzbot, where cancelations off exec end up needing to remove an entry from this list: ====================================================== WARNING: possible circular locking dependency detected syzkaller #0 Tainted: G L ------------------------------------------------------ syz.0.9999/12287 is trying to acquire lock: ffff88805851c0a8 (&ctx->uring_lock){+.+.}-{4:4}, at: io_uring_del_tctx_node+0xf0/0x2c0 io_uring/tctx.c:179 but task is already holding lock: ffff88802db5a2e0 (&sig->cred_guard_mutex){+.+.}-{4:4}, at: prepare_bprm_creds fs/exec.c:1360 [inline] ffff88802db5a2e0 (&sig->cred_guard_mutex){+.+.}-{4:4}, at: bprm_execve+0xb9/0x1400 fs/exec.c:1733 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&sig->cred_guard_mutex){+.+.}-{4:4}: __mutex_lock_common kernel/locking/mutex.c:614 [inline] __mutex_lock+0x187/0x1350 kernel/locking/mutex.c:776 proc_pid_attr_write+0x547/0x630 fs/proc/base.c:2837 vfs_write+0x27e/0xb30 fs/read_write.c:684 ksys_write+0x145/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #1 (sb_writers#3){.+.+}-{0:0}: percpu_down_read_internal include/linux/percpu-rwsem.h:53 [inline] percpu_down_read_freezable include/linux/percpu-rwsem.h:83 [inline] __sb_start_write include/linux/fs/super.h:19 [inline] sb_start_write+0x4d/0x1c0 include/linux/fs/super.h:125 mnt_want_write+0x41/0x90 fs/namespace.c:499 open_last_lookups fs/namei.c:4529 [inline] path_openat+0xadd/0x3dd0 fs/namei.c:4784 do_filp_open+0x1fa/0x410 fs/namei.c:4814 io_openat2+0x3e0/0x5c0 io_uring/openclose.c:143 __io_issue_sqe+0x181/0x4b0 io_uring/io_uring.c:1792 io_issue_sqe+0x165/0x1060 io_uring/io_uring.c:1815 io_queue_sqe io_uring/io_uring.c:2042 [inline] io_submit_sqe io_uring/io_uring.c:2320 [inline] io_submit_sqes+0xbf4/0x2140 io_uring/io_uring.c:2434 __do_sys_io_uring_enter io_uring/io_uring.c:3280 [inline] __se_sys_io_uring_enter+0x2e0/0x2b60 io_uring/io_uring.c:3219 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (&ctx->uring_lock){+.+.}-{4:4}: check_prev_add kernel/locking/lockdep.c:3165 [inline] check_prevs_add kernel/locking/lockdep.c:3284 [inline] validate_chain kernel/locking/lockdep.c:3908 [inline] __lock_acquire+0x15a6/0x2cf0 kernel/locking/lockdep.c:5237 lock_acquire+0x107/0x340 kernel/locking/lockdep.c:5868 __mutex_lock_common kernel/locking/mutex.c:614 [inline] __mutex_lock+0x187/0x1350 kernel/locking/mutex.c:776 io_uring_del_tctx_node+0xf0/0x2c0 io_uring/tctx.c:179 io_uring_clean_tctx+0xd4/0x1a0 io_uring/tctx.c:195 io_uring_cancel_generic+0x6ca/0x7d0 io_uring/cancel.c:646 io_uring_task_cancel include/linux/io_uring.h:24 [inline] begin_new_exec+0x10ed/0x2440 fs/exec.c:1131 load_elf_binary+0x9f8/0x2d70 fs/binfmt_elf.c:1010 search_binary_handler fs/exec.c:1669 [inline] exec_binprm fs/exec.c:1701 [inline] bprm_execve+0x92e/0x1400 fs/exec.c:1753 do_execveat_common+0x510/0x6a0 fs/exec.c:1859 do_execve fs/exec.c:1933 [inline] __do_sys_execve fs/exec.c:2009 [inline] __se_sys_execve fs/exec.c:2004 [inline] __x64_sys_execve+0x94/0xb0 fs/exec.c:2004 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Chain exists of: &ctx->uring_lock --> sb_writers#3 --> &sig->cred_guard_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sig->cred_guard_mutex); lock(sb_writers#3); lock(&sig->cred_guard_mutex); lock(&ctx->uring_lock); *** DEADLOCK *** 1 lock held by syz.0.9999/12287: #0: ffff88802db5a2e0 (&sig->cred_guard_mutex){+.+.}-{4:4}, at: prepare_bprm_creds fs/exec.c:1360 [inline] #0: ffff88802db5a2e0 (&sig->cred_guard_mutex){+.+.}-{4:4}, at: bprm_execve+0xb9/0x1400 fs/exec.c:1733 stack backtrace: CPU: 0 UID: 0 PID: 12287 Comm: syz.0.9999 Tainted: G L syzkaller #0 PREEMPT(full) Tainted: [L]=SOFTLOCKUP Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025 Call Trace: dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120 print_circular_bug+0x2e2/0x300 kernel/locking/lockdep.c:2043 check_noncircular+0x12e/0x150 kernel/locking/lockdep.c:2175 check_prev_add kernel/locking/lockdep.c:3165 [inline] check_prevs_add kernel/locking/lockdep.c:3284 [inline] validate_chain kernel/locking/lockdep.c:3908 [inline] __lock_acquire+0x15a6/0x2cf0 kernel/locking/lockdep.c:5237 lock_acquire+0x107/0x340 kernel/locking/lockdep.c:5868 __mutex_lock_common kernel/locking/mutex.c:614 [inline] __mutex_lock+0x187/0x1350 kernel/locking/mutex.c:776 io_uring_del_tctx_node+0xf0/0x2c0 io_uring/tctx.c:179 io_uring_clean_tctx+0xd4/0x1a0 io_uring/tctx.c:195 io_uring_cancel_generic+0x6ca/0x7d0 io_uring/cancel.c:646 io_uring_task_cancel include/linux/io_uring.h:24 [inline] begin_new_exec+0x10ed/0x2440 fs/exec.c:1131 load_elf_binary+0x9f8/0x2d70 fs/binfmt_elf.c:1010 search_binary_handler fs/exec.c:1669 [inline] exec_binprm fs/exec.c:1701 [inline] bprm_execve+0x92e/0x1400 fs/exec.c:1753 do_execveat_common+0x510/0x6a0 fs/exec.c:1859 do_execve fs/exec.c:1933 [inline] __do_sys_execve fs/exec.c:2009 [inline] __se_sys_execve fs/exec.c:2004 [inline] __x64_sys_execve+0x94/0xb0 fs/exec.c:2004 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff3a8b8f749 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ff3a9a97038 EFLAGS: 00000246 ORIG_RAX: 000000000000003b RAX: ffffffffffffffda RBX: 00007ff3a8de5fa0 RCX: 00007ff3a8b8f749 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000200000000400 RBP: 00007ff3a8c13f91 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ff3a8de6038 R14: 00007ff3a8de5fa0 R15: 00007ff3a8f0fa28 Add a separate lock just for the tctx_list, tctx_lock. This can nest under ->uring_lock, where necessary, and be used separately for list manipulation. For the cancelation off exec side, this removes the need to grab ->uring_lock, hence fixing the circular locking dependency. Reported-by: syzbot+b0e3b77ffaa8a4067ce5@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 8 +++++++- io_uring/cancel.c | 5 +++++ io_uring/io_uring.c | 5 +++++ io_uring/register.c | 2 ++ io_uring/tctx.c | 8 ++++---- 5 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e1adb0d20a0a..a3e8ddc9b380 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -424,11 +424,17 @@ struct io_ring_ctx { struct user_struct *user; struct mm_struct *mm_account; + /* + * List of tctx nodes for this ctx, protected by tctx_lock. For + * cancelation purposes, nests under uring_lock. + */ + struct list_head tctx_list; + struct mutex tctx_lock; + /* ctx exit and cancelation */ struct llist_head fallback_llist; struct delayed_work fallback_work; struct work_struct exit_work; - struct list_head tctx_list; struct completion ref_comp; /* io-wq management, e.g. thread count */ diff --git a/io_uring/cancel.c b/io_uring/cancel.c index ca12ac10c0ae..07b8d852218b 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -184,7 +184,9 @@ static int __io_async_cancel(struct io_cancel_data *cd, } while (1); /* slow path, try all io-wq's */ + __set_current_state(TASK_RUNNING); io_ring_submit_lock(ctx, issue_flags); + mutex_lock(&ctx->tctx_lock); ret = -ENOENT; list_for_each_entry(node, &ctx->tctx_list, ctx_node) { ret = io_async_cancel_one(node->task->io_uring, cd); @@ -194,6 +196,7 @@ static int __io_async_cancel(struct io_cancel_data *cd, nr++; } } + mutex_unlock(&ctx->tctx_lock); io_ring_submit_unlock(ctx, issue_flags); return all ? nr : ret; } @@ -484,6 +487,7 @@ static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) bool ret = false; mutex_lock(&ctx->uring_lock); + mutex_lock(&ctx->tctx_lock); list_for_each_entry(node, &ctx->tctx_list, ctx_node) { struct io_uring_task *tctx = node->task->io_uring; @@ -496,6 +500,7 @@ static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); ret |= (cret != IO_WQ_CANCEL_NOTFOUND); } + mutex_unlock(&ctx->tctx_lock); mutex_unlock(&ctx->uring_lock); return ret; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 709943fedaf4..87a87396e940 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -340,6 +340,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->ltimeout_list); init_llist_head(&ctx->work_llist); INIT_LIST_HEAD(&ctx->tctx_list); + mutex_init(&ctx->tctx_lock); ctx->submit_state.free_list.next = NULL; INIT_HLIST_HEAD(&ctx->waitid_list); xa_init_flags(&ctx->zcrx_ctxs, XA_FLAGS_ALLOC); @@ -3045,6 +3046,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) exit.ctx = ctx; mutex_lock(&ctx->uring_lock); + mutex_lock(&ctx->tctx_lock); while (!list_empty(&ctx->tctx_list)) { WARN_ON_ONCE(time_after(jiffies, timeout)); @@ -3056,6 +3058,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) if (WARN_ON_ONCE(ret)) continue; + mutex_unlock(&ctx->tctx_lock); mutex_unlock(&ctx->uring_lock); /* * See comment above for @@ -3064,7 +3067,9 @@ static __cold void io_ring_exit_work(struct work_struct *work) */ wait_for_completion_interruptible(&exit.completion); mutex_lock(&ctx->uring_lock); + mutex_lock(&ctx->tctx_lock); } + mutex_unlock(&ctx->tctx_lock); mutex_unlock(&ctx->uring_lock); spin_lock(&ctx->completion_lock); spin_unlock(&ctx->completion_lock); diff --git a/io_uring/register.c b/io_uring/register.c index 62d39b3ff317..3d3822ff3fd9 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -320,6 +320,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, return 0; /* now propagate the restriction to all registered users */ + mutex_lock(&ctx->tctx_lock); list_for_each_entry(node, &ctx->tctx_list, ctx_node) { tctx = node->task->io_uring; if (WARN_ON_ONCE(!tctx->io_wq)) @@ -330,6 +331,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, /* ignore errors, it always returns zero anyway */ (void)io_wq_max_workers(tctx->io_wq, new_count); } + mutex_unlock(&ctx->tctx_lock); return 0; err: if (sqd) { diff --git a/io_uring/tctx.c b/io_uring/tctx.c index 5b66755579c0..6d6f44215ec8 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -136,9 +136,9 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) return ret; } - mutex_lock(&ctx->uring_lock); + mutex_lock(&ctx->tctx_lock); list_add(&node->ctx_node, &ctx->tctx_list); - mutex_unlock(&ctx->uring_lock); + mutex_unlock(&ctx->tctx_lock); } return 0; } @@ -176,9 +176,9 @@ __cold void io_uring_del_tctx_node(unsigned long index) WARN_ON_ONCE(current != node->task); WARN_ON_ONCE(list_empty(&node->ctx_node)); - mutex_lock(&node->ctx->uring_lock); + mutex_lock(&node->ctx->tctx_lock); list_del(&node->ctx_node); - mutex_unlock(&node->ctx->uring_lock); + mutex_unlock(&node->ctx->tctx_lock); if (tctx->last == node->ctx) tctx->last = NULL; -- cgit v1.2.3 From c6c209ceb87f64a6ceebe61761951dcbbf4a0baa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 9 Dec 2025 19:28:49 -0500 Subject: NFSD: Remove NFSERR_EAGAIN I haven't found an NFSERR_EAGAIN in RFCs 1094, 1813, 7530, or 8881. None of these RFCs have an NFS status code that match the numeric value "11". Based on the meaning of the EAGAIN errno, I presume the use of this status in NFSD means NFS4ERR_DELAY. So replace the one usage of nfserr_eagain, and remove it from NFSD's NFS status conversion tables. As far as I can tell, NFSERR_EAGAIN has existed since the pre-git era, but was not actually used by any code until commit f4e44b393389 ("NFSD: delay unmount source's export after inter-server copy completed."), at which time it become possible for NFSD to return a status code of 11 (which is not valid NFS protocol). Fixes: f4e44b393389 ("NFSD: delay unmount source's export after inter-server copy completed.") Cc: stable@vger.kernel.org Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfs_common/common.c | 1 - fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfsd.h | 1 - include/trace/misc/nfs.h | 2 -- include/uapi/linux/nfs.h | 1 - 5 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/fs/nfs_common/common.c b/fs/nfs_common/common.c index af09aed09fd2..0778743ae2c2 100644 --- a/fs/nfs_common/common.c +++ b/fs/nfs_common/common.c @@ -17,7 +17,6 @@ static const struct { { NFSERR_NOENT, -ENOENT }, { NFSERR_IO, -EIO }, { NFSERR_NXIO, -ENXIO }, -/* { NFSERR_EAGAIN, -EAGAIN }, */ { NFSERR_ACCES, -EACCES }, { NFSERR_EXIST, -EEXIST }, { NFSERR_XDEV, -EXDEV }, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7f7e6bb23a90..42a6b914c0fe 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1506,7 +1506,7 @@ try_again: (schedule_timeout(20*HZ) == 0)) { finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); - return nfserr_eagain; + return nfserr_jukebox; } finish_wait(&nn->nfsd_ssc_waitq, &wait); goto try_again; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 50be785f1d2c..b0283213a8f5 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -233,7 +233,6 @@ void nfsd_lockd_shutdown(void); #define nfserr_noent cpu_to_be32(NFSERR_NOENT) #define nfserr_io cpu_to_be32(NFSERR_IO) #define nfserr_nxio cpu_to_be32(NFSERR_NXIO) -#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN) #define nfserr_acces cpu_to_be32(NFSERR_ACCES) #define nfserr_exist cpu_to_be32(NFSERR_EXIST) #define nfserr_xdev cpu_to_be32(NFSERR_XDEV) diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h index c82233e950ac..a394b4d38e18 100644 --- a/include/trace/misc/nfs.h +++ b/include/trace/misc/nfs.h @@ -16,7 +16,6 @@ TRACE_DEFINE_ENUM(NFSERR_PERM); TRACE_DEFINE_ENUM(NFSERR_NOENT); TRACE_DEFINE_ENUM(NFSERR_IO); TRACE_DEFINE_ENUM(NFSERR_NXIO); -TRACE_DEFINE_ENUM(NFSERR_EAGAIN); TRACE_DEFINE_ENUM(NFSERR_ACCES); TRACE_DEFINE_ENUM(NFSERR_EXIST); TRACE_DEFINE_ENUM(NFSERR_XDEV); @@ -52,7 +51,6 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); { NFSERR_NXIO, "NXIO" }, \ { ECHILD, "CHILD" }, \ { ETIMEDOUT, "TIMEDOUT" }, \ - { NFSERR_EAGAIN, "AGAIN" }, \ { NFSERR_ACCES, "ACCES" }, \ { NFSERR_EXIST, "EXIST" }, \ { NFSERR_XDEV, "XDEV" }, \ diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h index f356f2ba3814..71c7196d3281 100644 --- a/include/uapi/linux/nfs.h +++ b/include/uapi/linux/nfs.h @@ -49,7 +49,6 @@ NFSERR_NOENT = 2, /* v2 v3 v4 */ NFSERR_IO = 5, /* v2 v3 v4 */ NFSERR_NXIO = 6, /* v2 v3 v4 */ - NFSERR_EAGAIN = 11, /* v2 v3 */ NFSERR_ACCES = 13, /* v2 v3 v4 */ NFSERR_EXIST = 17, /* v2 v3 v4 */ NFSERR_XDEV = 18, /* v3 v4 */ -- cgit v1.2.3 From c1ef9a6cabb34dbc09e31417b0c0a672fe0de13a Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 5 Dec 2025 11:51:48 +0200 Subject: Revert "drm/atomic-helper: Re-order bridge chain pre-enable and post-disable" This reverts commit c9b1150a68d9362a0827609fc0dc1664c0d8bfe1. Changing the enable/disable sequence has caused regressions on multiple platforms: R-Car, MCDE, Rockchip. A series (see link below) was sent to fix these, but it was decided that it's better to revert the original patch and change the enable/disable sequence only in the tidss driver. Reverting this commit breaks tidss's DSI and OLDI outputs, which will be fixed in the following commits. Signed-off-by: Tomi Valkeinen Link: https://lore.kernel.org/all/20251202-mcde-drm-regression-thirdfix-v6-0-f1bffd4ec0fa%40kernel.org/ Fixes: c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable") Cc: stable@vger.kernel.org # v6.17+ Reviewed-by: Aradhya Bhatia Reviewed-by: Maxime Ripard Reviewed-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20251205-drm-seq-fix-v1-1-fda68fa1b3de@ideasonboard.com --- drivers/gpu/drm/drm_atomic_helper.c | 8 +- include/drm/drm_bridge.h | 249 ++++++++++-------------------------- 2 files changed, 70 insertions(+), 187 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 10adac9397cf..ef97f37560b2 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1341,9 +1341,9 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *state) { encoder_bridge_disable(dev, state); - crtc_disable(dev, state); - encoder_bridge_post_disable(dev, state); + + crtc_disable(dev, state); } /** @@ -1682,10 +1682,10 @@ encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, struct drm_atomic_state *state) { - encoder_bridge_pre_enable(dev, state); - crtc_enable(dev, state); + encoder_bridge_pre_enable(dev, state); + encoder_bridge_enable(dev, state); drm_atomic_helper_commit_writebacks(dev, state); diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 0ff7ab4aa868..dbafe136833f 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -176,33 +176,17 @@ struct drm_bridge_funcs { /** * @disable: * - * The @disable callback should disable the bridge. + * This callback should disable the bridge. It is called right before + * the preceding element in the display pipe is disabled. If the + * preceding element is a bridge this means it's called before that + * bridge's @disable vfunc. If the preceding element is a &drm_encoder + * it's called right before the &drm_encoder_helper_funcs.disable, + * &drm_encoder_helper_funcs.prepare or &drm_encoder_helper_funcs.dpms + * hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is still running when this callback is called. * - * - * If the preceding element is a &drm_bridge, then this is called before - * that bridge is disabled via one of: - * - * - &drm_bridge_funcs.disable - * - &drm_bridge_funcs.atomic_disable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called before the encoder is disabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_disable - * - &drm_encoder_helper_funcs.prepare - * - &drm_encoder_helper_funcs.disable - * - &drm_encoder_helper_funcs.dpms - * - * and the CRTC is disabled via one of: - * - * - &drm_crtc_helper_funcs.prepare - * - &drm_crtc_helper_funcs.atomic_disable - * - &drm_crtc_helper_funcs.disable - * - &drm_crtc_helper_funcs.dpms. - * * The @disable callback is optional. * * NOTE: @@ -215,34 +199,17 @@ struct drm_bridge_funcs { /** * @post_disable: * - * The bridge must assume that the display pipe (i.e. clocks and timing - * signals) feeding this bridge is no longer running when the - * @post_disable is called. + * This callback should disable the bridge. It is called right after the + * preceding element in the display pipe is disabled. If the preceding + * element is a bridge this means it's called after that bridge's + * @post_disable function. If the preceding element is a &drm_encoder + * it's called right after the encoder's + * &drm_encoder_helper_funcs.disable, &drm_encoder_helper_funcs.prepare + * or &drm_encoder_helper_funcs.dpms hook. * - * This callback should perform all the actions required by the hardware - * after it has stopped receiving signals from the preceding element. - * - * If the preceding element is a &drm_bridge, then this is called after - * that bridge is post-disabled (unless marked otherwise by the - * @pre_enable_prev_first flag) via one of: - * - * - &drm_bridge_funcs.post_disable - * - &drm_bridge_funcs.atomic_post_disable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called after the encoder is disabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_disable - * - &drm_encoder_helper_funcs.prepare - * - &drm_encoder_helper_funcs.disable - * - &drm_encoder_helper_funcs.dpms - * - * and the CRTC is disabled via one of: - * - * - &drm_crtc_helper_funcs.prepare - * - &drm_crtc_helper_funcs.atomic_disable - * - &drm_crtc_helper_funcs.disable - * - &drm_crtc_helper_funcs.dpms + * The bridge must assume that the display pipe (i.e. clocks and timing + * signals) feeding it is no longer running when this callback is + * called. * * The @post_disable callback is optional. * @@ -285,30 +252,18 @@ struct drm_bridge_funcs { /** * @pre_enable: * - * The display pipe (i.e. clocks and timing signals) feeding this bridge - * will not yet be running when the @pre_enable is called. - * - * This callback should perform all the necessary actions to prepare the - * bridge to accept signals from the preceding element. - * - * If the preceding element is a &drm_bridge, then this is called before - * that bridge is pre-enabled (unless marked otherwise by - * @pre_enable_prev_first flag) via one of: - * - * - &drm_bridge_funcs.pre_enable - * - &drm_bridge_funcs.atomic_pre_enable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called before the CRTC is enabled via one of: - * - * - &drm_crtc_helper_funcs.atomic_enable - * - &drm_crtc_helper_funcs.commit - * - * and the encoder is enabled via one of: + * This callback should enable the bridge. It is called right before + * the preceding element in the display pipe is enabled. If the + * preceding element is a bridge this means it's called before that + * bridge's @pre_enable function. If the preceding element is a + * &drm_encoder it's called right before the encoder's + * &drm_encoder_helper_funcs.enable, &drm_encoder_helper_funcs.commit or + * &drm_encoder_helper_funcs.dpms hook. * - * - &drm_encoder_helper_funcs.atomic_enable - * - &drm_encoder_helper_funcs.enable - * - &drm_encoder_helper_funcs.commit + * The display pipe (i.e. clocks and timing signals) feeding this bridge + * will not yet be running when this callback is called. The bridge must + * not enable the display link feeding the next bridge in the chain (if + * there is one) when this callback is called. * * The @pre_enable callback is optional. * @@ -322,31 +277,19 @@ struct drm_bridge_funcs { /** * @enable: * - * The @enable callback should enable the bridge. + * This callback should enable the bridge. It is called right after + * the preceding element in the display pipe is enabled. If the + * preceding element is a bridge this means it's called after that + * bridge's @enable function. If the preceding element is a + * &drm_encoder it's called right after the encoder's + * &drm_encoder_helper_funcs.enable, &drm_encoder_helper_funcs.commit or + * &drm_encoder_helper_funcs.dpms hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is running when this callback is called. This * callback must enable the display link feeding the next bridge in the * chain if there is one. * - * If the preceding element is a &drm_bridge, then this is called after - * that bridge is enabled via one of: - * - * - &drm_bridge_funcs.enable - * - &drm_bridge_funcs.atomic_enable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called after the CRTC is enabled via one of: - * - * - &drm_crtc_helper_funcs.atomic_enable - * - &drm_crtc_helper_funcs.commit - * - * and the encoder is enabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_enable - * - &drm_encoder_helper_funcs.enable - * - drm_encoder_helper_funcs.commit - * * The @enable callback is optional. * * NOTE: @@ -359,30 +302,17 @@ struct drm_bridge_funcs { /** * @atomic_pre_enable: * - * The display pipe (i.e. clocks and timing signals) feeding this bridge - * will not yet be running when the @atomic_pre_enable is called. - * - * This callback should perform all the necessary actions to prepare the - * bridge to accept signals from the preceding element. - * - * If the preceding element is a &drm_bridge, then this is called before - * that bridge is pre-enabled (unless marked otherwise by - * @pre_enable_prev_first flag) via one of: - * - * - &drm_bridge_funcs.pre_enable - * - &drm_bridge_funcs.atomic_pre_enable + * This callback should enable the bridge. It is called right before + * the preceding element in the display pipe is enabled. If the + * preceding element is a bridge this means it's called before that + * bridge's @atomic_pre_enable or @pre_enable function. If the preceding + * element is a &drm_encoder it's called right before the encoder's + * &drm_encoder_helper_funcs.atomic_enable hook. * - * If the preceding element of the bridge is a display controller, then - * this callback is called before the CRTC is enabled via one of: - * - * - &drm_crtc_helper_funcs.atomic_enable - * - &drm_crtc_helper_funcs.commit - * - * and the encoder is enabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_enable - * - &drm_encoder_helper_funcs.enable - * - &drm_encoder_helper_funcs.commit + * The display pipe (i.e. clocks and timing signals) feeding this bridge + * will not yet be running when this callback is called. The bridge must + * not enable the display link feeding the next bridge in the chain (if + * there is one) when this callback is called. * * The @atomic_pre_enable callback is optional. */ @@ -392,31 +322,18 @@ struct drm_bridge_funcs { /** * @atomic_enable: * - * The @atomic_enable callback should enable the bridge. + * This callback should enable the bridge. It is called right after + * the preceding element in the display pipe is enabled. If the + * preceding element is a bridge this means it's called after that + * bridge's @atomic_enable or @enable function. If the preceding element + * is a &drm_encoder it's called right after the encoder's + * &drm_encoder_helper_funcs.atomic_enable hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is running when this callback is called. This * callback must enable the display link feeding the next bridge in the * chain if there is one. * - * If the preceding element is a &drm_bridge, then this is called after - * that bridge is enabled via one of: - * - * - &drm_bridge_funcs.enable - * - &drm_bridge_funcs.atomic_enable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called after the CRTC is enabled via one of: - * - * - &drm_crtc_helper_funcs.atomic_enable - * - &drm_crtc_helper_funcs.commit - * - * and the encoder is enabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_enable - * - &drm_encoder_helper_funcs.enable - * - drm_encoder_helper_funcs.commit - * * The @atomic_enable callback is optional. */ void (*atomic_enable)(struct drm_bridge *bridge, @@ -424,32 +341,16 @@ struct drm_bridge_funcs { /** * @atomic_disable: * - * The @atomic_disable callback should disable the bridge. + * This callback should disable the bridge. It is called right before + * the preceding element in the display pipe is disabled. If the + * preceding element is a bridge this means it's called before that + * bridge's @atomic_disable or @disable vfunc. If the preceding element + * is a &drm_encoder it's called right before the + * &drm_encoder_helper_funcs.atomic_disable hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is still running when this callback is called. * - * If the preceding element is a &drm_bridge, then this is called before - * that bridge is disabled via one of: - * - * - &drm_bridge_funcs.disable - * - &drm_bridge_funcs.atomic_disable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called before the encoder is disabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_disable - * - &drm_encoder_helper_funcs.prepare - * - &drm_encoder_helper_funcs.disable - * - &drm_encoder_helper_funcs.dpms - * - * and the CRTC is disabled via one of: - * - * - &drm_crtc_helper_funcs.prepare - * - &drm_crtc_helper_funcs.atomic_disable - * - &drm_crtc_helper_funcs.disable - * - &drm_crtc_helper_funcs.dpms. - * * The @atomic_disable callback is optional. */ void (*atomic_disable)(struct drm_bridge *bridge, @@ -458,34 +359,16 @@ struct drm_bridge_funcs { /** * @atomic_post_disable: * - * The bridge must assume that the display pipe (i.e. clocks and timing - * signals) feeding this bridge is no longer running when the - * @atomic_post_disable is called. - * - * This callback should perform all the actions required by the hardware - * after it has stopped receiving signals from the preceding element. + * This callback should disable the bridge. It is called right after the + * preceding element in the display pipe is disabled. If the preceding + * element is a bridge this means it's called after that bridge's + * @atomic_post_disable or @post_disable function. If the preceding + * element is a &drm_encoder it's called right after the encoder's + * &drm_encoder_helper_funcs.atomic_disable hook. * - * If the preceding element is a &drm_bridge, then this is called after - * that bridge is post-disabled (unless marked otherwise by the - * @pre_enable_prev_first flag) via one of: - * - * - &drm_bridge_funcs.post_disable - * - &drm_bridge_funcs.atomic_post_disable - * - * If the preceding element of the bridge is a display controller, then - * this callback is called after the encoder is disabled via one of: - * - * - &drm_encoder_helper_funcs.atomic_disable - * - &drm_encoder_helper_funcs.prepare - * - &drm_encoder_helper_funcs.disable - * - &drm_encoder_helper_funcs.dpms - * - * and the CRTC is disabled via one of: - * - * - &drm_crtc_helper_funcs.prepare - * - &drm_crtc_helper_funcs.atomic_disable - * - &drm_crtc_helper_funcs.disable - * - &drm_crtc_helper_funcs.dpms + * The bridge must assume that the display pipe (i.e. clocks and timing + * signals) feeding it is no longer running when this callback is + * called. * * The @atomic_post_disable callback is optional. */ -- cgit v1.2.3 From d1c7dc57ff2400b141e6582a8d2dc5170108cf81 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 5 Dec 2025 11:51:50 +0200 Subject: drm/atomic-helper: Export and namespace some functions Export and namespace those not prefixed with drm_* so it becomes possible to write custom commit tail functions in individual drivers using the helper infrastructure. Tested-by: Marek Vasut Reviewed-by: Maxime Ripard Signed-off-by: Tomi Valkeinen Cc: stable@vger.kernel.org # v6.17+ Fixes: c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable") Reviewed-by: Aradhya Bhatia Reviewed-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20251205-drm-seq-fix-v1-3-fda68fa1b3de@ideasonboard.com --- drivers/gpu/drm/drm_atomic_helper.c | 122 +++++++++++++++++++++++++++++------- include/drm/drm_atomic_helper.h | 22 +++++++ 2 files changed, 121 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index ef97f37560b2..5beea645035f 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1162,8 +1162,18 @@ crtc_needs_disable(struct drm_crtc_state *old_state, new_state->self_refresh_active; } -static void -encoder_bridge_disable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_encoder_bridge_disable - disable bridges and encoder + * @dev: DRM device + * @state: the driver state object + * + * Loops over all connectors in the current state and if the CRTC needs + * it, disables the bridge chain all the way, then disables the encoder + * afterwards. + */ +void +drm_atomic_helper_commit_encoder_bridge_disable(struct drm_device *dev, + struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *old_conn_state, *new_conn_state; @@ -1229,9 +1239,18 @@ encoder_bridge_disable(struct drm_device *dev, struct drm_atomic_state *state) } } } +EXPORT_SYMBOL(drm_atomic_helper_commit_encoder_bridge_disable); -static void -crtc_disable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_crtc_disable - disable CRTSs + * @dev: DRM device + * @state: the driver state object + * + * Loops over all CRTCs in the current state and if the CRTC needs + * it, disables it. + */ +void +drm_atomic_helper_commit_crtc_disable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; @@ -1282,9 +1301,18 @@ crtc_disable(struct drm_device *dev, struct drm_atomic_state *state) drm_crtc_vblank_put(crtc); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_crtc_disable); -static void -encoder_bridge_post_disable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_encoder_bridge_post_disable - post-disable encoder bridges + * @dev: DRM device + * @state: the driver state object + * + * Loops over all connectors in the current state and if the CRTC needs + * it, post-disables all encoder bridges. + */ +void +drm_atomic_helper_commit_encoder_bridge_post_disable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *old_conn_state, *new_conn_state; @@ -1335,15 +1363,16 @@ encoder_bridge_post_disable(struct drm_device *dev, struct drm_atomic_state *sta drm_bridge_put(bridge); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_encoder_bridge_post_disable); static void disable_outputs(struct drm_device *dev, struct drm_atomic_state *state) { - encoder_bridge_disable(dev, state); + drm_atomic_helper_commit_encoder_bridge_disable(dev, state); - encoder_bridge_post_disable(dev, state); + drm_atomic_helper_commit_encoder_bridge_post_disable(dev, state); - crtc_disable(dev, state); + drm_atomic_helper_commit_crtc_disable(dev, state); } /** @@ -1446,8 +1475,17 @@ void drm_atomic_helper_calc_timestamping_constants(struct drm_atomic_state *stat } EXPORT_SYMBOL(drm_atomic_helper_calc_timestamping_constants); -static void -crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_crtc_set_mode - set the new mode + * @dev: DRM device + * @state: the driver state object + * + * Loops over all connectors in the current state and if the mode has + * changed, change the mode of the CRTC, then call down the bridge + * chain and change the mode in all bridges as well. + */ +void +drm_atomic_helper_commit_crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_crtc *crtc; struct drm_crtc_state *new_crtc_state; @@ -1508,6 +1546,7 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *state) drm_bridge_put(bridge); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_crtc_set_mode); /** * drm_atomic_helper_commit_modeset_disables - modeset commit to disable outputs @@ -1531,12 +1570,21 @@ void drm_atomic_helper_commit_modeset_disables(struct drm_device *dev, drm_atomic_helper_update_legacy_modeset_state(dev, state); drm_atomic_helper_calc_timestamping_constants(state); - crtc_set_mode(dev, state); + drm_atomic_helper_commit_crtc_set_mode(dev, state); } EXPORT_SYMBOL(drm_atomic_helper_commit_modeset_disables); -static void drm_atomic_helper_commit_writebacks(struct drm_device *dev, - struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_writebacks - issue writebacks + * @dev: DRM device + * @state: atomic state object being committed + * + * This loops over the connectors, checks if the new state requires + * a writeback job to be issued and in that case issues an atomic + * commit on each connector. + */ +void drm_atomic_helper_commit_writebacks(struct drm_device *dev, + struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *new_conn_state; @@ -1555,9 +1603,18 @@ static void drm_atomic_helper_commit_writebacks(struct drm_device *dev, } } } +EXPORT_SYMBOL(drm_atomic_helper_commit_writebacks); -static void -encoder_bridge_pre_enable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_encoder_bridge_pre_enable - pre-enable bridges + * @dev: DRM device + * @state: atomic state object being committed + * + * This loops over the connectors and if the CRTC needs it, pre-enables + * the entire bridge chain. + */ +void +drm_atomic_helper_commit_encoder_bridge_pre_enable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *new_conn_state; @@ -1588,9 +1645,18 @@ encoder_bridge_pre_enable(struct drm_device *dev, struct drm_atomic_state *state drm_bridge_put(bridge); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_encoder_bridge_pre_enable); -static void -crtc_enable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_crtc_enable - enables the CRTCs + * @dev: DRM device + * @state: atomic state object being committed + * + * This loops over CRTCs in the new state, and of the CRTC needs + * it, enables it. + */ +void +drm_atomic_helper_commit_crtc_enable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state; @@ -1619,9 +1685,18 @@ crtc_enable(struct drm_device *dev, struct drm_atomic_state *state) } } } +EXPORT_SYMBOL(drm_atomic_helper_commit_crtc_enable); -static void -encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) +/** + * drm_atomic_helper_commit_encoder_bridge_enable - enables the bridges + * @dev: DRM device + * @state: atomic state object being committed + * + * This loops over all connectors in the new state, and of the CRTC needs + * it, enables the entire bridge chain. + */ +void +drm_atomic_helper_commit_encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_connector *connector; struct drm_connector_state *new_conn_state; @@ -1664,6 +1739,7 @@ encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) drm_bridge_put(bridge); } } +EXPORT_SYMBOL(drm_atomic_helper_commit_encoder_bridge_enable); /** * drm_atomic_helper_commit_modeset_enables - modeset commit to enable outputs @@ -1682,11 +1758,11 @@ encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, struct drm_atomic_state *state) { - crtc_enable(dev, state); + drm_atomic_helper_commit_crtc_enable(dev, state); - encoder_bridge_pre_enable(dev, state); + drm_atomic_helper_commit_encoder_bridge_pre_enable(dev, state); - encoder_bridge_enable(dev, state); + drm_atomic_helper_commit_encoder_bridge_enable(dev, state); drm_atomic_helper_commit_writebacks(dev, state); } diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h index 53382fe93537..e154ee4f0696 100644 --- a/include/drm/drm_atomic_helper.h +++ b/include/drm/drm_atomic_helper.h @@ -60,6 +60,12 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, int drm_atomic_helper_check_planes(struct drm_device *dev, struct drm_atomic_state *state); int drm_atomic_helper_check_crtc_primary_plane(struct drm_crtc_state *crtc_state); +void drm_atomic_helper_commit_encoder_bridge_disable(struct drm_device *dev, + struct drm_atomic_state *state); +void drm_atomic_helper_commit_crtc_disable(struct drm_device *dev, + struct drm_atomic_state *state); +void drm_atomic_helper_commit_encoder_bridge_post_disable(struct drm_device *dev, + struct drm_atomic_state *state); int drm_atomic_helper_check(struct drm_device *dev, struct drm_atomic_state *state); void drm_atomic_helper_commit_tail(struct drm_atomic_state *state); @@ -89,8 +95,24 @@ drm_atomic_helper_update_legacy_modeset_state(struct drm_device *dev, void drm_atomic_helper_calc_timestamping_constants(struct drm_atomic_state *state); +void drm_atomic_helper_commit_crtc_set_mode(struct drm_device *dev, + struct drm_atomic_state *state); + void drm_atomic_helper_commit_modeset_disables(struct drm_device *dev, struct drm_atomic_state *state); + +void drm_atomic_helper_commit_writebacks(struct drm_device *dev, + struct drm_atomic_state *state); + +void drm_atomic_helper_commit_encoder_bridge_pre_enable(struct drm_device *dev, + struct drm_atomic_state *state); + +void drm_atomic_helper_commit_crtc_enable(struct drm_device *dev, + struct drm_atomic_state *state); + +void drm_atomic_helper_commit_encoder_bridge_enable(struct drm_device *dev, + struct drm_atomic_state *state); + void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, struct drm_atomic_state *old_state); -- cgit v1.2.3 From 02d1e1a3f9239cdb3ecf2c6d365fb959d1bf39df Mon Sep 17 00:00:00 2001 From: Di Zhu Date: Wed, 24 Dec 2025 09:22:24 +0800 Subject: netdev: preserve NETIF_F_ALL_FOR_ALL across TSO updates Directly increment the TSO features incurs a side effect: it will also directly clear the flags in NETIF_F_ALL_FOR_ALL on the master device, which can cause issues such as the inability to enable the nocache copy feature on the bonding driver. The fix is to include NETIF_F_ALL_FOR_ALL in the update mask, thereby preventing it from being cleared. Fixes: b0ce3508b25e ("bonding: allow TSO being set on bonding master") Signed-off-by: Di Zhu Link: https://patch.msgid.link/20251224012224.56185-1-zhud@hygon.cn Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5870a9e514a5..d99b0fbc1942 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5323,7 +5323,8 @@ netdev_features_t netdev_increment_features(netdev_features_t all, static inline netdev_features_t netdev_add_tso_features(netdev_features_t features, netdev_features_t mask) { - return netdev_increment_features(features, NETIF_F_ALL_TSO, mask); + return netdev_increment_features(features, NETIF_F_ALL_TSO | + NETIF_F_ALL_FOR_ALL, mask); } int __netdev_update_features(struct net_device *dev); -- cgit v1.2.3 From 1ca8677d9f3491e51395b0e6b9a2b7a75089dc6f Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 5 Jan 2026 11:17:05 +0100 Subject: ACPI: PCI: IRQ: Fix INTx GSIs signedness In ACPI Global System Interrupts (GSIs) are described using a 32-bit value. ACPI/PCI legacy interrupts (INTx) parsing code treats GSIs as 'int', which poses issues if the GSI interrupt value is a 32-bit value with the MSB set (as required in some interrupt configurations - eg ARM64 GICv5 systems) because acpi_pci_link_allocate_irq() treats a negative gsi return value as a failed GSI allocation (and acpi_irq_get_penalty() would trigger an out-of-bounds array dereference if the 'irq' param is a negative value). Fix ACPI/PCI legacy INTx parsing by converting variables representing GSIs from 'int' to 'u32' bringing the code in line with the ACPI specification and fixing the current parsing issue. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Bjorn Helgaas Link: https://patch.msgid.link/20260105101705.36703-1-lpieralisi@kernel.org Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_irq.c | 19 +++++++++++-------- drivers/acpi/pci_link.c | 39 +++++++++++++++++++++++++-------------- drivers/xen/acpi.c | 13 +++++++------ include/acpi/acpi_drivers.h | 2 +- 4 files changed, 44 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index ad81aa03fe2f..c416942ff3e2 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -188,7 +188,7 @@ static int acpi_pci_irq_check_entry(acpi_handle handle, struct pci_dev *dev, * the IRQ value, which is hardwired to specific interrupt inputs on * the interrupt controller. */ - pr_debug("%04x:%02x:%02x[%c] -> %s[%d]\n", + pr_debug("%04x:%02x:%02x[%c] -> %s[%u]\n", entry->id.segment, entry->id.bus, entry->id.device, pin_name(entry->pin), prt->source, entry->index); @@ -384,7 +384,7 @@ static inline bool acpi_pci_irq_valid(struct pci_dev *dev, u8 pin) int acpi_pci_irq_enable(struct pci_dev *dev) { struct acpi_prt_entry *entry; - int gsi; + u32 gsi; u8 pin; int triggering = ACPI_LEVEL_SENSITIVE; /* @@ -422,18 +422,21 @@ int acpi_pci_irq_enable(struct pci_dev *dev) return 0; } + rc = -ENODEV; + if (entry) { if (entry->link) - gsi = acpi_pci_link_allocate_irq(entry->link, + rc = acpi_pci_link_allocate_irq(entry->link, entry->index, &triggering, &polarity, - &link); - else + &link, &gsi); + else { gsi = entry->index; - } else - gsi = -1; + rc = 0; + } + } - if (gsi < 0) { + if (rc < 0) { /* * No IRQ known to the ACPI subsystem - maybe the BIOS / * driver reported one, then use it. Exit in any case. diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index bed7dc85612e..b91b039a3d20 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -448,7 +448,7 @@ static int acpi_isa_irq_penalty[ACPI_MAX_ISA_IRQS] = { /* >IRQ15 */ }; -static int acpi_irq_pci_sharing_penalty(int irq) +static int acpi_irq_pci_sharing_penalty(u32 irq) { struct acpi_pci_link *link; int penalty = 0; @@ -474,7 +474,7 @@ static int acpi_irq_pci_sharing_penalty(int irq) return penalty; } -static int acpi_irq_get_penalty(int irq) +static int acpi_irq_get_penalty(u32 irq) { int penalty = 0; @@ -528,7 +528,7 @@ static int acpi_irq_balance = -1; /* 0: static, 1: balance */ static int acpi_pci_link_allocate(struct acpi_pci_link *link) { acpi_handle handle = link->device->handle; - int irq; + u32 irq; int i; if (link->irq.initialized) { @@ -598,44 +598,53 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link) return 0; } -/* - * acpi_pci_link_allocate_irq - * success: return IRQ >= 0 - * failure: return -1 +/** + * acpi_pci_link_allocate_irq(): Retrieve a link device GSI + * + * @handle: Handle for the link device + * @index: GSI index + * @triggering: pointer to store the GSI trigger + * @polarity: pointer to store GSI polarity + * @name: pointer to store link device name + * @gsi: pointer to store GSI number + * + * Returns: + * 0 on success with @triggering, @polarity, @name, @gsi initialized. + * -ENODEV on failure */ int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering, - int *polarity, char **name) + int *polarity, char **name, u32 *gsi) { struct acpi_device *device = acpi_fetch_acpi_dev(handle); struct acpi_pci_link *link; if (!device) { acpi_handle_err(handle, "Invalid link device\n"); - return -1; + return -ENODEV; } link = acpi_driver_data(device); if (!link) { acpi_handle_err(handle, "Invalid link context\n"); - return -1; + return -ENODEV; } /* TBD: Support multiple index (IRQ) entries per Link Device */ if (index) { acpi_handle_err(handle, "Invalid index %d\n", index); - return -1; + return -ENODEV; } mutex_lock(&acpi_link_lock); if (acpi_pci_link_allocate(link)) { mutex_unlock(&acpi_link_lock); - return -1; + return -ENODEV; } if (!link->irq.active) { mutex_unlock(&acpi_link_lock); acpi_handle_err(handle, "Link active IRQ is 0!\n"); - return -1; + return -ENODEV; } link->refcnt++; mutex_unlock(&acpi_link_lock); @@ -647,7 +656,9 @@ int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering, if (name) *name = acpi_device_bid(link->device); acpi_handle_debug(handle, "Link is referenced\n"); - return link->irq.active; + *gsi = link->irq.active; + + return 0; } /* diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c index d2ee605c5ca1..eab28cfe9939 100644 --- a/drivers/xen/acpi.c +++ b/drivers/xen/acpi.c @@ -89,11 +89,11 @@ int xen_acpi_get_gsi_info(struct pci_dev *dev, int *trigger_out, int *polarity_out) { - int gsi; + u32 gsi; u8 pin; struct acpi_prt_entry *entry; int trigger = ACPI_LEVEL_SENSITIVE; - int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ? + int ret, polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ? ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW; if (!dev || !gsi_out || !trigger_out || !polarity_out) @@ -105,17 +105,18 @@ int xen_acpi_get_gsi_info(struct pci_dev *dev, entry = acpi_pci_irq_lookup(dev, pin); if (entry) { + ret = 0; if (entry->link) - gsi = acpi_pci_link_allocate_irq(entry->link, + ret = acpi_pci_link_allocate_irq(entry->link, entry->index, &trigger, &polarity, - NULL); + NULL, &gsi); else gsi = entry->index; } else - gsi = -1; + ret = -ENODEV; - if (gsi < 0) + if (ret < 0) return -EINVAL; *gsi_out = gsi; diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index b14d165632e7..402b97d12138 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -51,7 +51,7 @@ int acpi_irq_penalty_init(void); int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering, - int *polarity, char **name); + int *polarity, char **name, u32 *gsi); int acpi_pci_link_free_irq(acpi_handle handle); /* ACPI PCI Device Binding */ -- cgit v1.2.3 From a7fc8c641cab855824c45e5e8877e40fd528b5df Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 2 Jan 2026 12:29:38 +0100 Subject: net: airoha: Fix npu rx DMA definitions Fix typos in npu rx DMA descriptor definitions. Fixes: b3ef7bdec66fb ("net: airoha: Add airoha_offload.h header") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260102-airoha-npu-dma-rx-def-fixes-v1-1-205fc6bf7d94@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/soc/airoha/airoha_offload.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h index 4d23cbb7d407..ab64ecdf39a0 100644 --- a/include/linux/soc/airoha/airoha_offload.h +++ b/include/linux/soc/airoha/airoha_offload.h @@ -71,12 +71,12 @@ static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, #define NPU_RX1_DESC_NUM 512 /* CTRL */ -#define NPU_RX_DMA_DESC_LAST_MASK BIT(29) -#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(28, 15) -#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(14, 1) +#define NPU_RX_DMA_DESC_LAST_MASK BIT(27) +#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(26, 14) +#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(13, 1) #define NPU_RX_DMA_DESC_DONE_MASK BIT(0) /* INFO */ -#define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 28) +#define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 29) #define NPU_RX_DMA_PKT_ID_MASK GENMASK(28, 26) #define NPU_RX_DMA_SRC_PORT_MASK GENMASK(25, 21) #define NPU_RX_DMA_CRSN_MASK GENMASK(20, 16) -- cgit v1.2.3 From 5232196ff49be08350b27f1ba8e1fad87afc9cdf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 2 Jan 2026 14:31:48 -0500 Subject: ftrace: Make ftrace_graph_ent depth field signed The code has integrity checks to make sure that depth never goes below zero. But the depth field has recently been converted to unsigned long from "int" (for alignment reasons). As unsigned long can never be less than zero, the integrity checks no longer work. Convert depth to long from unsigned long to allow the integrity checks to work again. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Cc: pengdonglin Link: https://patch.msgid.link/20260102143148.251c2e16@gandalf.local.home Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aS6kGi0maWBl-MjZ@stanley.mountain/ Fixes: f83ac7544fbf7 ("function_graph: Enable funcgraph-args and funcgraph-retaddr to work simultaneously") Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 770f0dc993cc..a3a8989e3268 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1167,7 +1167,7 @@ static inline void ftrace_init(void) { } */ struct ftrace_graph_ent { unsigned long func; /* Current function */ - unsigned long depth; + long depth; /* signed to check for less than zero */ } __packed; /* -- cgit v1.2.3 From 5f1ef0dfcb5b7f4a91a9b0e0ba533efd9f7e2cdb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 5 Jan 2026 20:31:41 -0500 Subject: tracing: Add recursion protection in kernel stack trace recording A bug was reported about an infinite recursion caused by tracing the rcu events with the kernel stack trace trigger enabled. The stack trace code called back into RCU which then called the stack trace again. Expand the ftrace recursion protection to add a set of bits to protect events from recursion. Each bit represents the context that the event is in (normal, softirq, interrupt and NMI). Have the stack trace code use the interrupt context to protect against recursion. Note, the bug showed an issue in both the RCU code as well as the tracing stacktrace code. This only handles the tracing stack trace side of the bug. The RCU fix will be handled separately. Link: https://lore.kernel.org/all/20260102122807.7025fc87@gandalf.local.home/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: Boqun Feng Link: https://patch.msgid.link/20260105203141.515cd49f@gandalf.local.home Reported-by: Yao Kai Tested-by: Yao Kai Fixes: 5f5fa7ea89dc ("rcu: Don't use negative nesting depth in __rcu_read_unlock()") Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_recursion.h | 9 +++++++++ kernel/trace/trace.c | 6 ++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index ae04054a1be3..e6ca052b2a85 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -34,6 +34,13 @@ enum { TRACE_INTERNAL_SIRQ_BIT, TRACE_INTERNAL_TRANSITION_BIT, + /* Internal event use recursion bits */ + TRACE_INTERNAL_EVENT_BIT, + TRACE_INTERNAL_EVENT_NMI_BIT, + TRACE_INTERNAL_EVENT_IRQ_BIT, + TRACE_INTERNAL_EVENT_SIRQ_BIT, + TRACE_INTERNAL_EVENT_TRANSITION_BIT, + TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. @@ -58,6 +65,8 @@ enum { #define TRACE_LIST_START TRACE_INTERNAL_BIT +#define TRACE_EVENT_START TRACE_INTERNAL_EVENT_BIT + #define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6f2148df14d9..aef9058537d5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3012,6 +3012,11 @@ static void __ftrace_trace_stack(struct trace_array *tr, struct ftrace_stack *fstack; struct stack_entry *entry; int stackidx; + int bit; + + bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START); + if (bit < 0) + return; /* * Add one, for this function and the call to save_stack_trace() @@ -3080,6 +3085,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, /* Again, don't let gcc optimize things here */ barrier(); __this_cpu_dec(ftrace_stack_reserve); + trace_clear_recursion(bit); } static inline void ftrace_trace_stack(struct trace_array *tr, -- cgit v1.2.3 From 2e4b28c48f88ce9e263957b1d944cf5349952f88 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 11 Jan 2026 16:53:48 +0100 Subject: treewide: Update email address In a vain attempt to consolidate the email zoo switch everything to the kernel.org account. Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- .mailmap | 1 + CREDITS | 2 +- .../ABI/stable/sysfs-kernel-time-aux-clocks | 2 +- Documentation/arch/x86/topology.rst | 2 +- Documentation/core-api/cpu_hotplug.rst | 2 +- Documentation/core-api/genericirq.rst | 2 +- Documentation/core-api/librs.rst | 2 +- .../devicetree/bindings/timer/mrvl,mmp-timer.yaml | 2 +- Documentation/driver-api/mtdnand.rst | 4 +-- .../translations/zh_CN/core-api/cpu_hotplug.rst | 2 +- .../translations/zh_CN/core-api/genericirq.rst | 2 +- MAINTAINERS | 36 +++++++++++----------- arch/sh/kernel/perf_event.c | 2 +- arch/sparc/kernel/perf_event.c | 2 +- arch/x86/events/core.c | 2 +- arch/x86/events/perf_event.h | 2 +- arch/x86/kernel/x86_init.c | 2 +- arch/x86/mm/pti.c | 2 +- drivers/mtd/nand/ecc-sw-hamming.c | 2 +- drivers/mtd/nand/raw/diskonchip.c | 2 +- drivers/mtd/nand/raw/nand_base.c | 4 +-- drivers/mtd/nand/raw/nand_bbt.c | 2 +- drivers/mtd/nand/raw/nand_ids.c | 2 +- drivers/mtd/nand/raw/nand_jedec.c | 2 +- drivers/mtd/nand/raw/nand_legacy.c | 2 +- drivers/mtd/nand/raw/nand_onfi.c | 2 +- drivers/mtd/nand/raw/ndfc.c | 2 +- drivers/uio/uio.c | 2 +- fs/jffs2/wbuf.c | 4 +-- include/linux/hrtimer.h | 2 +- include/linux/ktime.h | 2 +- include/linux/mtd/jedec.h | 2 +- include/linux/mtd/nand-ecc-sw-hamming.h | 2 +- include/linux/mtd/ndfc.h | 2 +- include/linux/mtd/onfi.h | 2 +- include/linux/mtd/platnand.h | 2 +- include/linux/mtd/rawnand.h | 2 +- include/linux/perf_event.h | 2 +- include/linux/plist.h | 2 +- include/linux/rslib.h | 2 +- include/linux/uio_driver.h | 2 +- include/uapi/linux/perf_event.h | 2 +- kernel/events/callchain.c | 2 +- kernel/events/core.c | 2 +- kernel/events/ring_buffer.c | 2 +- kernel/irq/debugfs.c | 2 +- kernel/irq/matrix.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/pelt.c | 2 +- kernel/time/clockevents.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/tick-broadcast.c | 2 +- kernel/time/tick-common.c | 2 +- kernel/time/tick-oneshot.c | 2 +- kernel/time/tick-sched.c | 2 +- lib/debugobjects.c | 2 +- lib/plist.c | 2 +- lib/reed_solomon/decode_rs.c | 2 +- lib/reed_solomon/encode_rs.c | 2 +- lib/reed_solomon/reed_solomon.c | 2 +- scripts/spdxcheck.py | 2 +- tools/include/uapi/linux/perf_event.h | 2 +- tools/perf/builtin-list.c | 2 +- 63 files changed, 83 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/.mailmap b/.mailmap index b23e0853d636..fa018b5bd533 100644 --- a/.mailmap +++ b/.mailmap @@ -801,6 +801,7 @@ Tanzir Hasan Tejun Heo Tomeu Vizoso Thomas Graf +Thomas Gleixner Thomas Körper Thomas Pedersen Thorsten Blum diff --git a/CREDITS b/CREDITS index ca75f110edb6..383809bc4b7a 100644 --- a/CREDITS +++ b/CREDITS @@ -1398,7 +1398,7 @@ D: SRM environment driver (for Alpha systems) P: 1024D/8399E1BB 250D 3BCF 7127 0D8C A444 A961 1DBD 5E75 8399 E1BB N: Thomas Gleixner -E: tglx@linutronix.de +E: tglx@kernel.org D: NAND flash hardware support, JFFS2 on NAND flash N: Jérôme Glisse diff --git a/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks index 825508f42af6..e1a894c8dd1b 100644 --- a/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks +++ b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks @@ -1,5 +1,5 @@ What: /sys/kernel/time/aux_clocks//enable Date: May 2025 -Contact: Thomas Gleixner +Contact: Thomas Gleixner Description: Controls the enablement of auxiliary clock timekeepers. diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 86bec8ac2c4d..f779a68875c5 100644 --- a/Documentation/arch/x86/topology.rst +++ b/Documentation/arch/x86/topology.rst @@ -17,7 +17,7 @@ with the generic one and look at this one in parallel for the x86 specifics. Needless to say, code should use the generic functions - this file is *only* here to *document* the inner workings of x86 topology. -Started by Thomas Gleixner and Borislav Petkov . +Started by Thomas Gleixner and Borislav Petkov . The main aim of the topology facilities is to present adequate interfaces to code which needs to know/query/use the structure of the running system wrt diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index e1b0eeabbb5e..9b4afca9fd09 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -8,7 +8,7 @@ CPU hotplug in the Kernel Srivatsa Vaddagiri , Ashok Raj , Joel Schopp , - Thomas Gleixner + Thomas Gleixner Introduction ============ diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst index 582bde9bf5a9..b16d751d4b98 100644 --- a/Documentation/core-api/genericirq.rst +++ b/Documentation/core-api/genericirq.rst @@ -439,6 +439,6 @@ Credits The following people have contributed to this document: -1. Thomas Gleixner tglx@linutronix.de +1. Thomas Gleixner tglx@kernel.org 2. Ingo Molnar mingo@elte.hu diff --git a/Documentation/core-api/librs.rst b/Documentation/core-api/librs.rst index 6010f5bc5bf9..0d88893dbc03 100644 --- a/Documentation/core-api/librs.rst +++ b/Documentation/core-api/librs.rst @@ -209,4 +209,4 @@ testing. Thanks a lot. The following people have contributed to this document: -Thomas Gleixner\ tglx@linutronix.de +Thomas Gleixner\ tglx@kernel.org diff --git a/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml b/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml index fe6bc4173789..0643cfcc6bc7 100644 --- a/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml +++ b/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml @@ -8,7 +8,7 @@ title: Marvell MMP Timer maintainers: - Daniel Lezcano - - Thomas Gleixner + - Thomas Gleixner - Rob Herring properties: diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst index ce77e024c4f1..adf03983f1ba 100644 --- a/Documentation/driver-api/mtdnand.rst +++ b/Documentation/driver-api/mtdnand.rst @@ -996,11 +996,11 @@ The following people have contributed to the NAND driver: 2. David Woodhouse\ dwmw2@infradead.org -3. Thomas Gleixner\ tglx@linutronix.de +3. Thomas Gleixner\ tglx@kernel.org A lot of users have provided bugfixes, improvements and helping hands for testing. Thanks a lot. The following people have contributed to this document: -1. Thomas Gleixner\ tglx@linutronix.de +1. Thomas Gleixner\ tglx@kernel.org diff --git a/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst b/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst index bc0d7ea6d834..3447fbf0e695 100644 --- a/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst +++ b/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst @@ -22,7 +22,7 @@ Srivatsa Vaddagiri , Ashok Raj , Joel Schopp , - Thomas Gleixner + Thomas Gleixner 简介 ==== diff --git a/Documentation/translations/zh_CN/core-api/genericirq.rst b/Documentation/translations/zh_CN/core-api/genericirq.rst index 05ccb954c18d..d2c1bd94bb97 100644 --- a/Documentation/translations/zh_CN/core-api/genericirq.rst +++ b/Documentation/translations/zh_CN/core-api/genericirq.rst @@ -404,6 +404,6 @@ kernel/irq/chip.c 感谢以下人士对本文档作出的贡献: -1. Thomas Gleixner tglx@linutronix.de +1. Thomas Gleixner tglx@kernel.org 2. Ingo Molnar mingo@elte.hu diff --git a/MAINTAINERS b/MAINTAINERS index 32b5e41d9849..ee036e0a3ef6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6175,7 +6175,7 @@ F: include/linux/clk.h CLOCKSOURCE, CLOCKEVENT DRIVERS M: Daniel Lezcano -M: Thomas Gleixner +M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core @@ -6541,7 +6541,7 @@ S: Maintained F: drivers/cpufreq/virtual-cpufreq.c CPU HOTPLUG -M: Thomas Gleixner +M: Thomas Gleixner M: Peter Zijlstra L: linux-kernel@vger.kernel.org S: Maintained @@ -6968,7 +6968,7 @@ F: Documentation/scsi/dc395x.rst F: drivers/scsi/dc395x.* DEBUGOBJECTS: -M: Thomas Gleixner +M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/debugobjects @@ -10371,7 +10371,7 @@ F: include/uapi/linux/fuse.h F: tools/testing/selftests/filesystems/fuse/ FUTEX SUBSYSTEM -M: Thomas Gleixner +M: Thomas Gleixner M: Ingo Molnar R: Peter Zijlstra R: Darren Hart @@ -10515,7 +10515,7 @@ F: drivers/base/arch_topology.c F: include/linux/arch_topology.h GENERIC ENTRY CODE -M: Thomas Gleixner +M: Thomas Gleixner M: Peter Zijlstra M: Andy Lutomirski L: linux-kernel@vger.kernel.org @@ -10628,7 +10628,7 @@ F: drivers/uio/uio_pci_generic.c GENERIC VDSO LIBRARY M: Andy Lutomirski -M: Thomas Gleixner +M: Thomas Gleixner M: Vincenzo Frascino L: linux-kernel@vger.kernel.org S: Maintained @@ -11241,7 +11241,7 @@ F: drivers/hid/hid-logitech-hidpp.c HIGH-RESOLUTION TIMERS, TIMER WHEEL, CLOCKEVENTS M: Anna-Maria Behnsen M: Frederic Weisbecker -M: Thomas Gleixner +M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core @@ -11264,7 +11264,7 @@ R: Boqun Feng R: FUJITA Tomonori R: Frederic Weisbecker R: Lyude Paul -R: Thomas Gleixner +R: Thomas Gleixner R: Anna-Maria Behnsen R: John Stultz R: Stephen Boyd @@ -13334,7 +13334,7 @@ F: Documentation/devicetree/bindings/sound/irondevice,* F: sound/soc/codecs/sma* IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY) -M: Thomas Gleixner +M: Thomas Gleixner S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core F: Documentation/core-api/irq/irq-domain.rst @@ -13344,7 +13344,7 @@ F: kernel/irq/irqdomain.c F: kernel/irq/msi.c IRQ SUBSYSTEM -M: Thomas Gleixner +M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core @@ -13357,7 +13357,7 @@ F: kernel/irq/ F: lib/group_cpus.c IRQCHIP DRIVERS -M: Thomas Gleixner +M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core @@ -14451,7 +14451,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-nonmm-unstab F: lib/* LICENSES and SPDX stuff -M: Thomas Gleixner +M: Thomas Gleixner M: Greg Kroah-Hartman L: linux-spdx@vger.kernel.org S: Maintained @@ -18576,7 +18576,7 @@ NOHZ, DYNTICKS SUPPORT M: Anna-Maria Behnsen M: Frederic Weisbecker M: Ingo Molnar -M: Thomas Gleixner +M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/nohz @@ -20761,7 +20761,7 @@ F: drivers/platform/x86/portwell-ec.c POSIX CLOCKS and TIMERS M: Anna-Maria Behnsen M: Frederic Weisbecker -M: Thomas Gleixner +M: Thomas Gleixner L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core @@ -26272,7 +26272,7 @@ F: drivers/net/wireless/ti/ TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER M: John Stultz -M: Thomas Gleixner +M: Thomas Gleixner R: Stephen Boyd L: linux-kernel@vger.kernel.org S: Supported @@ -28203,7 +28203,7 @@ F: net/lapb/ F: net/x25/ X86 ARCHITECTURE (32-BIT AND 64-BIT) -M: Thomas Gleixner +M: Thomas Gleixner M: Ingo Molnar M: Borislav Petkov M: Dave Hansen @@ -28219,7 +28219,7 @@ F: tools/testing/selftests/x86 X86 CPUID DATABASE M: Borislav Petkov -M: Thomas Gleixner +M: Thomas Gleixner M: x86@kernel.org R: Ahmed S. Darwish L: x86-cpuid@lists.linux.dev @@ -28235,7 +28235,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/asm F: arch/x86/entry/ X86 HARDWARE VULNERABILITIES -M: Thomas Gleixner +M: Thomas Gleixner M: Borislav Petkov M: Peter Zijlstra M: Josh Poimboeuf diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 1d2507f22437..1fbb7d46e484 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -7,7 +7,7 @@ * Heavily based on the x86 and PowerPC implementations. * * x86: - * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index cae4d33002a5..0ce4ae343531 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -6,7 +6,7 @@ * This code is based almost entirely upon the x86 perf event * code, which is: * - * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 0c38a31d5fc7..576baa9a52c5 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1,7 +1,7 @@ /* * Performance events x86 architecture code * - * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 3161ec0a3416..62963022b517 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1,7 +1,7 @@ /* * Performance events x86 architecture header * - * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 0a2bbd674a6d..ebefb77c37bb 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009 Thomas Gleixner + * Copyright (C) 2009 Linutronix GmbH, Thomas Gleixner * * For licencing details see kernel-base/COPYING */ diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index b10d4d131dce..f7546e9e8e89 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -15,7 +15,7 @@ * Signed-off-by: Michael Schwarz * * Major changes to the original code by: Dave Hansen - * Mostly rewritten by Thomas Gleixner and + * Mostly rewritten by Thomas Gleixner and * Andy Lutomirsky */ #include diff --git a/drivers/mtd/nand/ecc-sw-hamming.c b/drivers/mtd/nand/ecc-sw-hamming.c index f2d0effad9d2..bc62a71f9fdd 100644 --- a/drivers/mtd/nand/ecc-sw-hamming.c +++ b/drivers/mtd/nand/ecc-sw-hamming.c @@ -8,7 +8,7 @@ * * Completely replaces the previous ECC implementation which was written by: * Steven J. Hill (sjhill@realitydiluted.com) - * Thomas Gleixner (tglx@linutronix.de) + * Thomas Gleixner (tglx@kernel.org) * * Information on how this algorithm works and how it was developed * can be found in Documentation/driver-api/mtd/nand_ecc.rst diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 70d6c2250f32..540b6baf8bb1 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -11,7 +11,7 @@ * Error correction code lifted from the old docecc code * Author: Fabrice Bellard (fabrice.bellard@netgem.com) * Copyright (C) 2000 Netgem S.A. - * converted to the generic Reed-Solomon library by Thomas Gleixner + * converted to the generic Reed-Solomon library by Thomas Gleixner * * Interface to generic NAND code for M-Systems DiskOnChip devices */ diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index ad6d66309597..f2322de93ab4 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -8,7 +8,7 @@ * http://www.linux-mtd.infradead.org/doc/nand.html * * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) - * 2002-2006 Thomas Gleixner (tglx@linutronix.de) + * 2002-2006 Thomas Gleixner (tglx@kernel.org) * * Credits: * David Woodhouse for adding multichip support @@ -6594,5 +6594,5 @@ EXPORT_SYMBOL_GPL(nand_cleanup); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steven J. Hill "); -MODULE_AUTHOR("Thomas Gleixner "); +MODULE_AUTHOR("Thomas Gleixner "); MODULE_DESCRIPTION("Generic NAND flash driver code"); diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c index a8fba5f39f59..3050ab7e6eb6 100644 --- a/drivers/mtd/nand/raw/nand_bbt.c +++ b/drivers/mtd/nand/raw/nand_bbt.c @@ -3,7 +3,7 @@ * Overview: * Bad block table support for the NAND driver * - * Copyright © 2004 Thomas Gleixner (tglx@linutronix.de) + * Copyright © 2004 Thomas Gleixner (tglx@kernel.org) * * Description: * diff --git a/drivers/mtd/nand/raw/nand_ids.c b/drivers/mtd/nand/raw/nand_ids.c index 650351c62af6..62a8cf86d9e2 100644 --- a/drivers/mtd/nand/raw/nand_ids.c +++ b/drivers/mtd/nand/raw/nand_ids.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2002 Thomas Gleixner (tglx@linutronix.de) + * Copyright (C) 2002 Thomas Gleixner (tglx@kernel.org) */ #include diff --git a/drivers/mtd/nand/raw/nand_jedec.c b/drivers/mtd/nand/raw/nand_jedec.c index b3cc8f360529..89e6dd8ed1a8 100644 --- a/drivers/mtd/nand/raw/nand_jedec.c +++ b/drivers/mtd/nand/raw/nand_jedec.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) - * 2002-2006 Thomas Gleixner (tglx@linutronix.de) + * 2002-2006 Thomas Gleixner (tglx@kernel.org) * * Credits: * David Woodhouse for adding multichip support diff --git a/drivers/mtd/nand/raw/nand_legacy.c b/drivers/mtd/nand/raw/nand_legacy.c index 743792edf98d..97700f80d5b8 100644 --- a/drivers/mtd/nand/raw/nand_legacy.c +++ b/drivers/mtd/nand/raw/nand_legacy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) - * 2002-2006 Thomas Gleixner (tglx@linutronix.de) + * 2002-2006 Thomas Gleixner (tglx@kernel.org) * * Credits: * David Woodhouse for adding multichip support diff --git a/drivers/mtd/nand/raw/nand_onfi.c b/drivers/mtd/nand/raw/nand_onfi.c index 861975e44b55..11954440e4de 100644 --- a/drivers/mtd/nand/raw/nand_onfi.c +++ b/drivers/mtd/nand/raw/nand_onfi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) - * 2002-2006 Thomas Gleixner (tglx@linutronix.de) + * 2002-2006 Thomas Gleixner (tglx@kernel.org) * * Credits: * David Woodhouse for adding multichip support diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c index 13365128194d..7ad8bc04be1a 100644 --- a/drivers/mtd/nand/raw/ndfc.c +++ b/drivers/mtd/nand/raw/ndfc.c @@ -272,5 +272,5 @@ static struct platform_driver ndfc_driver = { module_platform_driver(ndfc_driver); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Thomas Gleixner "); +MODULE_AUTHOR("Thomas Gleixner "); MODULE_DESCRIPTION("OF Platform driver for NDFC"); diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index d93ed4e86a17..fa0d4e6aee16 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -3,7 +3,7 @@ * drivers/uio/uio.c * * Copyright(C) 2005, Benedikt Spranger - * Copyright(C) 2005, Thomas Gleixner + * Copyright(C) 2005, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2006, Hans J. Koch * Copyright(C) 2006, Greg Kroah-Hartman * diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index bb815a002984..3ab3f0ff7ebb 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -2,10 +2,10 @@ * JFFS2 -- Journalling Flash File System, Version 2. * * Copyright © 2001-2007 Red Hat, Inc. - * Copyright © 2004 Thomas Gleixner + * Copyright © 2004 Thomas Gleixner * * Created by David Woodhouse - * Modified debugged and enhanced by Thomas Gleixner + * Modified debugged and enhanced by Thomas Gleixner * * For licensing information, see the file 'LICENCE' in this directory. * diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2cf1bf65b225..0de12f14d6a4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -2,7 +2,7 @@ /* * hrtimers - High-resolution kernel timers * - * Copyright(C) 2005, Thomas Gleixner + * Copyright(C) 2005, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar * * data type definitions, declarations, prototypes diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 383ed9985802..f247e564602f 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -3,7 +3,7 @@ * * ktime_t - nanosecond-resolution time format. * - * Copyright(C) 2005, Thomas Gleixner + * Copyright(C) 2005, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar * * data type definitions, declarations, prototypes and macros. diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h index 56047a4e54c9..255972f3d88d 100644 --- a/include/linux/mtd/jedec.h +++ b/include/linux/mtd/jedec.h @@ -2,7 +2,7 @@ /* * Copyright © 2000-2010 David Woodhouse * Steven J. Hill - * Thomas Gleixner + * Thomas Gleixner * * Contains all JEDEC related definitions */ diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index c6c71894c575..2aa2f8ef68d2 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -2,7 +2,7 @@ /* * Copyright (C) 2000-2010 Steven J. Hill * David Woodhouse - * Thomas Gleixner + * Thomas Gleixner * * This file is the header for the NAND Hamming ECC implementation. */ diff --git a/include/linux/mtd/ndfc.h b/include/linux/mtd/ndfc.h index 98f075b86931..622891191e9c 100644 --- a/include/linux/mtd/ndfc.h +++ b/include/linux/mtd/ndfc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2006 Thomas Gleixner + * Copyright (c) 2006 Linutronix GmbH, Thomas Gleixner * * Info: * Contains defines, datastructures for ndfc nand controller diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h index 55ab2e4d62f9..09a5cbd8f232 100644 --- a/include/linux/mtd/onfi.h +++ b/include/linux/mtd/onfi.h @@ -2,7 +2,7 @@ /* * Copyright © 2000-2010 David Woodhouse * Steven J. Hill - * Thomas Gleixner + * Thomas Gleixner * * Contains all ONFI related definitions */ diff --git a/include/linux/mtd/platnand.h b/include/linux/mtd/platnand.h index bc11eb6b593b..2df6fba699f2 100644 --- a/include/linux/mtd/platnand.h +++ b/include/linux/mtd/platnand.h @@ -2,7 +2,7 @@ /* * Copyright © 2000-2010 David Woodhouse * Steven J. Hill - * Thomas Gleixner + * Thomas Gleixner * * Contains all platform NAND related definitions. */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index d30bdc3fcfd7..5c70e7bd3ed5 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -2,7 +2,7 @@ /* * Copyright © 2000-2010 David Woodhouse * Steven J. Hill - * Thomas Gleixner + * Thomas Gleixner * * Info: * Contains standard defines and IDs for NAND flash devices diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9870d768db4c..9ded2e582c60 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1,7 +1,7 @@ /* * Performance events: * - * Copyright (C) 2008-2009, Thomas Gleixner + * Copyright (C) 2008-2009, Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra * diff --git a/include/linux/plist.h b/include/linux/plist.h index 8c1c8adf7fe9..16cf4355b5c1 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -8,7 +8,7 @@ * 2001-2005 (c) MontaVista Software, Inc. * Daniel Walker * - * (C) 2005 Thomas Gleixner + * (C) 2005 Linutronix GmbH, Thomas Gleixner * * Simplifications of the original code by * Oleg Nesterov diff --git a/include/linux/rslib.h b/include/linux/rslib.h index a04dacbdc8ae..a2848f6907e3 100644 --- a/include/linux/rslib.h +++ b/include/linux/rslib.h @@ -2,7 +2,7 @@ /* * Generic Reed Solomon encoder / decoder library * - * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de) + * Copyright (C) 2004 Thomas Gleixner (tglx@kernel.org) * * RS code lifted from reed solomon library written by Phil Karn * Copyright 2002 Phil Karn, KA9Q diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 18238dc8bfd3..334641e20fb1 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -3,7 +3,7 @@ * include/linux/uio_driver.h * * Copyright(C) 2005, Benedikt Spranger - * Copyright(C) 2005, Thomas Gleixner + * Copyright(C) 2005, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2006, Hans J. Koch * Copyright(C) 2006, Greg Kroah-Hartman * diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c44a8fb3e418..72f03153dd32 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -2,7 +2,7 @@ /* * Performance events: * - * Copyright (C) 2008-2009, Thomas Gleixner + * Copyright (C) 2008-2009, Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra * diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index b9c7e00725d6..1f6589578703 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -2,7 +2,7 @@ /* * Performance events callchain code, extracted from core.c: * - * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra * Copyright © 2009 Paul Mackerras, IBM Corp. diff --git a/kernel/events/core.c b/kernel/events/core.c index dad0d3d2e85f..f5e9d30e4fa9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2,7 +2,7 @@ /* * Performance events core code: * - * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra * Copyright © 2009 Paul Mackerras, IBM Corp. diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 20a905023736..3e7de2661417 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -2,7 +2,7 @@ /* * Performance events ring-buffer code: * - * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra * Copyright © 2009 Paul Mackerras, IBM Corp. diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 3527defd2890..5c5ebaee35f2 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright 2017 Thomas Gleixner +// Copyright 2017 Linutronix GmbH, Thomas Gleixner #include #include diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 8f222d1cccec..a50f2305a8dc 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2017 Thomas Gleixner +// Copyright (C) 2017 Linutronix GmbH, Thomas Gleixner #include #include diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index da46c3164537..e71302282671 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -15,7 +15,7 @@ * Author: Srivatsa Vaddagiri * * Scaled math optimizations by Thomas Gleixner - * Copyright (C) 2007, Thomas Gleixner + * Copyright (C) 2007, Linutronix GmbH, Thomas Gleixner * * Adaptive scheduling granularity, math enhancements by Peter Zijlstra * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index fa83bbaf4f3e..897790889ba3 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -15,7 +15,7 @@ * Author: Srivatsa Vaddagiri * * Scaled math optimizations by Thomas Gleixner - * Copyright (C) 2007, Thomas Gleixner + * Copyright (C) 2007, Linutronix GmbH, Thomas Gleixner * * Adaptive scheduling granularity, math enhancements by Peter Zijlstra * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index a59bc75ab7c5..eaae1ce9f060 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -2,7 +2,7 @@ /* * This file contains functions which manage clock event devices. * - * Copyright(C) 2005-2006, Thomas Gleixner + * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f8ea8c8fc895..bdb30cc5e873 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright(C) 2005-2006, Thomas Gleixner + * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner * diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 0207868c8b4d..f63c65881364 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -3,7 +3,7 @@ * This file contains functions which emulate a local clock-event * device via a broadcast event source. * - * Copyright(C) 2005-2006, Thomas Gleixner + * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 7e33d3f2e889..d305d8521896 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -3,7 +3,7 @@ * This file contains the base functions to manage periodic tick * related events. * - * Copyright(C) 2005-2006, Thomas Gleixner + * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index ffee943d796d..7472597f3225 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -3,7 +3,7 @@ * This file contains functions which manage high resolution tick * related events. * - * Copyright(C) 2005-2006, Thomas Gleixner + * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 8ddf74e705d3..2f8a7923fa27 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright(C) 2005-2006, Thomas Gleixner + * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner * diff --git a/lib/debugobjects.c b/lib/debugobjects.c index ecf8e7f978e3..89a1d6745dc2 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -2,7 +2,7 @@ /* * Generic infrastructure for lifetime debugging of objects. * - * Copyright (C) 2008, Thomas Gleixner + * Copyright (C) 2008, Linutronix GmbH, Thomas Gleixner */ #define pr_fmt(fmt) "ODEBUG: " fmt diff --git a/lib/plist.c b/lib/plist.c index ba677c31e8f3..a5bef38add43 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -10,7 +10,7 @@ * 2001-2005 (c) MontaVista Software, Inc. * Daniel Walker * - * (C) 2005 Thomas Gleixner + * (C) 2005 Linutronix GmbH, Thomas Gleixner * * Simplifications of the original code by * Oleg Nesterov diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c index 805de84ae83d..ef86ee2aec58 100644 --- a/lib/reed_solomon/decode_rs.c +++ b/lib/reed_solomon/decode_rs.c @@ -5,7 +5,7 @@ * Copyright 2002, Phil Karn, KA9Q * May be used under the terms of the GNU General Public License (GPL) * - * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) + * Adaption to the kernel by Thomas Gleixner (tglx@kernel.org) * * Generic data width independent code which is included by the wrappers. */ diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c index 9112d46e869e..1d9e51dcc83d 100644 --- a/lib/reed_solomon/encode_rs.c +++ b/lib/reed_solomon/encode_rs.c @@ -5,7 +5,7 @@ * Copyright 2002, Phil Karn, KA9Q * May be used under the terms of the GNU General Public License (GPL) * - * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) + * Adaption to the kernel by Thomas Gleixner (tglx@kernel.org) * * Generic data width independent code which is included by the wrappers. */ diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c index bbc01bad3053..a9e2dcb6f2a7 100644 --- a/lib/reed_solomon/reed_solomon.c +++ b/lib/reed_solomon/reed_solomon.c @@ -2,7 +2,7 @@ /* * Generic Reed Solomon encoder / decoder library * - * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de) + * Copyright (C) 2004 Thomas Gleixner (tglx@kernel.org) * * Reed Solomon code lifted from reed solomon library written by Phil Karn * Copyright 2002 Phil Karn, KA9Q diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py index 8d608f61bf37..908029e45ca2 100755 --- a/scripts/spdxcheck.py +++ b/scripts/spdxcheck.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 -# Copyright Thomas Gleixner +# Copyright Linutronix GmbH, Thomas Gleixner from argparse import ArgumentParser from ply import lex, yacc diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index c44a8fb3e418..72f03153dd32 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -2,7 +2,7 @@ /* * Performance events: * - * Copyright (C) 2008-2009, Thomas Gleixner + * Copyright (C) 2008-2009, Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra * diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 5cbca0bacd35..87a5491048ac 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -4,7 +4,7 @@ * * Builtin list command: list all event types * - * Copyright (C) 2009, Thomas Gleixner + * Copyright (C) 2009, Linutronix GmbH, Thomas Gleixner * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo */ -- cgit v1.2.3 From 0196932f539e306e122b6edf24c9f5e30d1f73ee Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:17:59 +0100 Subject: spi: spi-mem: Make the DTR command operation macro more suitable In order to introduce DTR support in SPI NAND, a number of macros had to be created in the spi-mem layer. One of them remained unused at this point, SPI_MEM_DTR_OP_CMD. Being in the process of introducing octal DTR support now, experience shows that as-is the macro is not useful. In order to be really useful in octal DTR mode, the command opcode (one byte) must always be transmitted on the 8 data lines on both the rising and falling edge of the clock. Align the macro with the real needs by duplicating the opcode in the buffer and doubling its size. Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://patch.msgid.link/20260109-winbond-v6-17-rc1-oddr-v2-1-1fff6a2ddb80@bootlin.com Signed-off-by: Mark Brown --- include/linux/spi/spi-mem.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 82390712794c..81c9c7e793b6 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -20,10 +20,10 @@ .opcode = __opcode, \ } -#define SPI_MEM_DTR_OP_CMD(__opcode, __buswidth) \ +#define SPI_MEM_DTR_OP_RPT_CMD(__opcode, __buswidth) \ { \ - .nbytes = 1, \ - .opcode = __opcode, \ + .nbytes = 2, \ + .opcode = __opcode | __opcode << 8, \ .buswidth = __buswidth, \ .dtr = true, \ } -- cgit v1.2.3 From af4b2dc4810380a469dcd7508923b70892c2996a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:00 +0100 Subject: spi: spi-mem: Create a repeated address operation In octal DTR mode addresses may either be long enough to cover at least two bytes (in which case the existing macro works), or otherwise for single byte addresses, the byte must also be duplicated and sent twice: on each front of the clock. Create a macro for this common case. Signed-off-by: Miquel Raynal Link: https://patch.msgid.link/20260109-winbond-v6-17-rc1-oddr-v2-2-1fff6a2ddb80@bootlin.com Signed-off-by: Mark Brown --- include/linux/spi/spi-mem.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 81c9c7e793b6..e4db0924898c 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -43,6 +43,14 @@ .dtr = true, \ } +#define SPI_MEM_DTR_OP_RPT_ADDR(__val, __buswidth) \ + { \ + .nbytes = 2, \ + .val = __val | __val << 8, \ + .buswidth = __buswidth, \ + .dtr = true, \ + } + #define SPI_MEM_OP_NO_ADDR { } #define SPI_MEM_OP_DUMMY(__nbytes, __buswidth) \ -- cgit v1.2.3 From 28c3edc43cb9ab6ebe43439d48a662a095409b03 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:00 +0100 Subject: spi: spi-mem: Create a repeated address operation In octal DTR mode addresses may either be long enough to cover at least two bytes (in which case the existing macro works), or otherwise for single byte addresses, the byte must also be duplicated and sent twice: on each front of the clock. Create a macro for this common case. Signed-off-by: Miquel Raynal --- include/linux/spi/spi-mem.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index e4db0924898c..5774e554c0f0 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -51,6 +51,14 @@ .dtr = true, \ } +#define SPI_MEM_DTR_OP_RPT_ADDR(__val, __buswidth) \ + { \ + .nbytes = 2, \ + .val = __val | __val << 8, \ + .buswidth = __buswidth, \ + .dtr = true, \ + } + #define SPI_MEM_OP_NO_ADDR { } #define SPI_MEM_OP_DUMMY(__nbytes, __buswidth) \ -- cgit v1.2.3 From a57b1f07d2d35843a7ada30c8cf9a215c0931868 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:02 +0100 Subject: mtd: spinand: Fix kernel doc The @data buffer is 5 bytes, not 4, it has been extended for the need of devices with an extra ID bytes. Fixes: 34a956739d29 ("mtd: spinand: Add support for 5-byte IDs") Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index c50a43b447d2..3de22f0f79d7 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -287,7 +287,7 @@ struct spinand_device; /** * struct spinand_id - SPI NAND id structure - * @data: buffer containing the id bytes. Currently 4 bytes large, but can + * @data: buffer containing the id bytes. Currently 5 bytes large, but can * be extended if required * @len: ID length */ -- cgit v1.2.3 From d48db8ca47662f5f4e1b7e173687d853f017ae3a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:04 +0100 Subject: mtd: spinand: Remove stale definitions SPI NAND command values are directly included in the macros defining the ops. These are stale definitions, they are unused so drop them. Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 3de22f0f79d7..cafbd0fa8db0 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -232,12 +232,6 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_OUT(len, buf, 8)) -/** - * Standard SPI NAND flash commands - */ -#define SPINAND_CMD_PROG_LOAD_X4 0x32 -#define SPINAND_CMD_PROG_LOAD_RDM_DATA_X4 0x34 - /* feature register */ #define REG_BLOCK_LOCK 0xa0 #define BL_ALL_UNLOCKED 0x00 -- cgit v1.2.3 From c0ba929cf7a960c796cc9946b3f79d8405e9b805 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:06 +0100 Subject: mtd: spinand: Decouple write enable and write disable operations In order to introduce templates for all operations and not only for page helpers (in order to introduce octal DDR support), decouple the WR_EN and WR_DIS operations into two separate macros. Adapt the callers accordingly. There is no functional change. Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 2 +- drivers/mtd/nand/spi/esmt.c | 2 +- drivers/mtd/nand/spi/micron.c | 2 +- include/linux/mtd/spinand.h | 10 ++++++++-- 4 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 2c587685e02a..30a8f2894d02 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -362,7 +362,7 @@ static void spinand_ondie_ecc_save_status(struct nand_device *nand, u8 status) int spinand_write_enable_op(struct spinand_device *spinand) { - struct spi_mem_op op = SPINAND_WR_EN_DIS_1S_0_0_OP(true); + struct spi_mem_op op = SPINAND_WR_EN_1S_0_0_OP; return spi_mem_exec_op(spinand->spimem, &op); } diff --git a/drivers/mtd/nand/spi/esmt.c b/drivers/mtd/nand/spi/esmt.c index e60e4ac1fd6f..adadc01e8f2f 100644 --- a/drivers/mtd/nand/spi/esmt.c +++ b/drivers/mtd/nand/spi/esmt.c @@ -138,7 +138,7 @@ static int f50l1g41lb_user_otp_info(struct spinand_device *spinand, size_t len, static int f50l1g41lb_otp_lock(struct spinand_device *spinand, loff_t from, size_t len) { - struct spi_mem_op write_op = SPINAND_WR_EN_DIS_1S_0_0_OP(true); + struct spi_mem_op write_op = SPINAND_WR_EN_1S_0_0_OP; struct spi_mem_op exec_op = SPINAND_PROG_EXEC_1S_1S_0_OP(0); u8 status; int ret; diff --git a/drivers/mtd/nand/spi/micron.c b/drivers/mtd/nand/spi/micron.c index a49d7cb6a96d..b8130e04e8e7 100644 --- a/drivers/mtd/nand/spi/micron.c +++ b/drivers/mtd/nand/spi/micron.c @@ -251,7 +251,7 @@ static int mt29f2g01abagd_user_otp_info(struct spinand_device *spinand, static int mt29f2g01abagd_otp_lock(struct spinand_device *spinand, loff_t from, size_t len) { - struct spi_mem_op write_op = SPINAND_WR_EN_DIS_1S_0_0_OP(true); + struct spi_mem_op write_op = SPINAND_WR_EN_1S_0_0_OP; struct spi_mem_op exec_op = SPINAND_PROG_EXEC_1S_1S_0_OP(0); u8 status; int ret; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index cafbd0fa8db0..4715083aa8e5 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -26,8 +26,14 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_WR_EN_DIS_1S_0_0_OP(enable) \ - SPI_MEM_OP(SPI_MEM_OP_CMD((enable) ? 0x06 : 0x04, 1), \ +#define SPINAND_WR_EN_1S_0_0_OP \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x06, 1), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_WR_DIS_1S_0_0_OP \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x04, 1), \ SPI_MEM_OP_NO_ADDR, \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -- cgit v1.2.3 From 408015023294958407925bc50cdd85718d12a335 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:07 +0100 Subject: mtd: spinand: Create an array of operation templates Currently, the SPI NAND core implementation directly calls macros to get the various operations in shape. These macros are specific to the bus interface, currently only supporting the single SDR interface (any command following the 1S-XX-XX pattern). Introducing support for other bus interfaces (such as octal DTR) would mean that every user of these macros should become aware of the current bus interface and act accordingly, picking up and adapting to the current configuration. This would add quite a bit of boilerplate, be repetitive as well as error prone in case we miss one occurrence. Instead, let's create a table with all SPI NAND memory operations that are currently supported. We initialize them with the same single SDR _OP macros as before. This opens the possibility for users of the individual macros to make use of these templates instead. This way, when we will add another bus interface, we can just switch to another set of templates and all users will magically fill in their spi_mem_op structures with the correct ops. The existing read, write and update cache variants are also moved in this template array, which is barely noticeable by callers as we also add a structure member pointing to it. Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 38 +++++++++++++++++++++++++++---------- drivers/mtd/nand/spi/winbond.c | 4 ++-- include/linux/mtd/spinand.h | 43 +++++++++++++++++++++++++++++++++--------- 3 files changed, 64 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 30a8f2894d02..4ab63d7fc461 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -184,9 +184,9 @@ static int spinand_init_quad_enable(struct spinand_device *spinand) if (!(spinand->flags & SPINAND_HAS_QE_BIT)) return 0; - if (spinand->op_templates.read_cache->data.buswidth == 4 || - spinand->op_templates.write_cache->data.buswidth == 4 || - spinand->op_templates.update_cache->data.buswidth == 4) + if (spinand->op_templates->read_cache->data.buswidth == 4 || + spinand->op_templates->write_cache->data.buswidth == 4 || + spinand->op_templates->update_cache->data.buswidth == 4) enable = true; return spinand_upd_cfg(spinand, CFG_QUAD_ENABLE, @@ -1154,7 +1154,7 @@ static int spinand_create_dirmap(struct spinand_device *spinand, info.offset = plane << fls(nand->memorg.pagesize); info.length = nanddev_page_size(nand) + nanddev_per_page_oobsize(nand); - info.op_tmpl = *spinand->op_templates.update_cache; + info.op_tmpl = *spinand->op_templates->update_cache; desc = devm_spi_mem_dirmap_create(&spinand->spimem->spi->dev, spinand->spimem, &info); if (IS_ERR(desc)) @@ -1162,7 +1162,7 @@ static int spinand_create_dirmap(struct spinand_device *spinand, spinand->dirmaps[plane].wdesc = desc; - info.op_tmpl = *spinand->op_templates.read_cache; + info.op_tmpl = *spinand->op_templates->read_cache; desc = spinand_create_rdesc(spinand, &info); if (IS_ERR(desc)) return PTR_ERR(desc); @@ -1177,7 +1177,7 @@ static int spinand_create_dirmap(struct spinand_device *spinand, } info.length = nanddev_page_size(nand) + nanddev_per_page_oobsize(nand); - info.op_tmpl = *spinand->op_templates.update_cache; + info.op_tmpl = *spinand->op_templates->update_cache; info.op_tmpl.data.ecc = true; desc = devm_spi_mem_dirmap_create(&spinand->spimem->spi->dev, spinand->spimem, &info); @@ -1186,7 +1186,7 @@ static int spinand_create_dirmap(struct spinand_device *spinand, spinand->dirmaps[plane].wdesc_ecc = desc; - info.op_tmpl = *spinand->op_templates.read_cache; + info.op_tmpl = *spinand->op_templates->read_cache; info.op_tmpl.data.ecc = true; desc = spinand_create_rdesc(spinand, &info); if (IS_ERR(desc)) @@ -1324,6 +1324,22 @@ static void spinand_manufacturer_cleanup(struct spinand_device *spinand) return spinand->manufacturer->ops->cleanup(spinand); } +static void spinand_init_ssdr_templates(struct spinand_device *spinand) +{ + struct spinand_mem_ops *tmpl = &spinand->ssdr_op_templates; + + tmpl->reset = (struct spi_mem_op)SPINAND_RESET_1S_0_0_OP; + tmpl->readid = (struct spi_mem_op)SPINAND_READID_1S_1S_1S_OP(0, 0, NULL, 0); + tmpl->wr_en = (struct spi_mem_op)SPINAND_WR_EN_1S_0_0_OP; + tmpl->wr_dis = (struct spi_mem_op)SPINAND_WR_DIS_1S_0_0_OP; + tmpl->set_feature = (struct spi_mem_op)SPINAND_SET_FEATURE_1S_1S_1S_OP(0, NULL); + tmpl->get_feature = (struct spi_mem_op)SPINAND_GET_FEATURE_1S_1S_1S_OP(0, NULL); + tmpl->blk_erase = (struct spi_mem_op)SPINAND_BLK_ERASE_1S_1S_0_OP(0); + tmpl->page_read = (struct spi_mem_op)SPINAND_PAGE_READ_1S_1S_0_OP(0); + tmpl->prog_exec = (struct spi_mem_op)SPINAND_PROG_EXEC_1S_1S_0_OP(0); + spinand->op_templates = &spinand->ssdr_op_templates; +} + static const struct spi_mem_op * spinand_select_op_variant(struct spinand_device *spinand, const struct spinand_op_variants *variants) @@ -1419,21 +1435,21 @@ int spinand_match_and_init(struct spinand_device *spinand, if (!op) return -EOPNOTSUPP; - spinand->op_templates.read_cache = op; + spinand->ssdr_op_templates.read_cache = op; op = spinand_select_op_variant(spinand, info->op_variants.write_cache); if (!op) return -EOPNOTSUPP; - spinand->op_templates.write_cache = op; + spinand->ssdr_op_templates.write_cache = op; op = spinand_select_op_variant(spinand, info->op_variants.update_cache); if (!op) return -EOPNOTSUPP; - spinand->op_templates.update_cache = op; + spinand->ssdr_op_templates.update_cache = op; return 0; } @@ -1548,6 +1564,8 @@ static int spinand_init(struct spinand_device *spinand) if (!spinand->scratchbuf) return -ENOMEM; + spinand_init_ssdr_templates(spinand); + ret = spinand_detect(spinand); if (ret) goto err_free_bufs; diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 4870b2d5edb2..d5799c2df065 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -291,7 +291,7 @@ static int w25n0xjw_hs_cfg(struct spinand_device *spinand) u8 sr4; int ret; - op = spinand->op_templates.read_cache; + op = spinand->op_templates->read_cache; if (op->cmd.dtr || op->addr.dtr || op->dummy.dtr || op->data.dtr) hs = false; else if (op->cmd.buswidth == 1 && op->addr.buswidth == 1 && @@ -355,7 +355,7 @@ static int w35n0xjw_vcr_cfg(struct spinand_device *spinand) u8 io_mode; int ret; - op = spinand->op_templates.read_cache; + op = spinand->op_templates->read_cache; single = (op->cmd.buswidth == 1 && op->addr.buswidth == 1 && op->data.buswidth == 1); dtr = (op->cmd.dtr || op->addr.dtr || op->data.dtr); diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 4715083aa8e5..a458617fb375 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -600,6 +600,36 @@ struct spinand_dirmap { struct spi_mem_dirmap_desc *rdesc_ecc; }; +/** + * struct spinand_mem_ops - SPI NAND memory operations + * @reset: reset op template + * @readid: read ID op template + * @wr_en: write enable op template + * @wr_dis: write disable op template + * @set_feature: set feature op template + * @get_feature: get feature op template + * @blk_erase: blk erase op template + * @page_read: page read op template + * @prog_exec: prog exec op template + * @read_cache: read cache op template + * @write_cache: write cache op template + * @update_cache: update cache op template + */ +struct spinand_mem_ops { + struct spi_mem_op reset; + struct spi_mem_op readid; + struct spi_mem_op wr_en; + struct spi_mem_op wr_dis; + struct spi_mem_op set_feature; + struct spi_mem_op get_feature; + struct spi_mem_op blk_erase; + struct spi_mem_op page_read; + struct spi_mem_op prog_exec; + const struct spi_mem_op *read_cache; + const struct spi_mem_op *write_cache; + const struct spi_mem_op *update_cache; +}; + /** * struct spinand_device - SPI NAND device instance * @base: NAND device instance @@ -607,10 +637,8 @@ struct spinand_dirmap { * @lock: lock used to serialize accesses to the NAND * @id: NAND ID as returned by READ_ID * @flags: NAND flags - * @op_templates: various SPI mem op templates - * @op_templates.read_cache: read cache op template - * @op_templates.write_cache: write cache op template - * @op_templates.update_cache: update cache op template + * @ssdr_op_templates: Templates for all single SDR SPI mem operations + * @op_templates: Templates for all SPI mem operations * @select_target: select a specific target/die. Usually called before sending * a command addressing a page or an eraseblock embedded in * this die. Only required if your chip exposes several dies @@ -644,11 +672,8 @@ struct spinand_device { struct spinand_id id; u32 flags; - struct { - const struct spi_mem_op *read_cache; - const struct spi_mem_op *write_cache; - const struct spi_mem_op *update_cache; - } op_templates; + struct spinand_mem_ops ssdr_op_templates; + struct spinand_mem_ops *op_templates; struct spinand_dirmap *dirmaps; -- cgit v1.2.3 From 88b0e3584acb905c41252b7917013ecf7c0518bc Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:08 +0100 Subject: mtd: spinand: Make use of the operation templates through SPINAND_OP() Create a SPINAND_OP() macro to which we give the name of the operation we want. This macro retrieves the correct operation template based on the current bus interface (currently only single SDR, will soon be extended to octal DTR) and fills it with the usual parameters. This macro makes the transition from calling directly the low-level macros into using the (bus interface dependent) templates very smooth. Use it in all places that can be trivially converted. At this stage there is no functional change expected, until octal DTR support gets added. Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 110 +++++++++++++++++++++++++++++++++----- drivers/mtd/nand/spi/esmt.c | 4 +- drivers/mtd/nand/spi/gigadevice.c | 8 +-- drivers/mtd/nand/spi/macronix.c | 4 +- drivers/mtd/nand/spi/micron.c | 8 +-- drivers/mtd/nand/spi/toshiba.c | 3 +- drivers/mtd/nand/spi/winbond.c | 3 +- include/linux/mtd/spinand.h | 8 +++ 8 files changed, 121 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 4ab63d7fc461..7146eec51afa 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -20,10 +20,94 @@ #include #include +static struct spi_mem_op +spinand_fill_reset_op(struct spinand_device *spinand) +{ + return spinand->op_templates->reset; +} + +static struct spi_mem_op +spinand_fill_readid_op(struct spinand_device *spinand, + u8 naddr, u8 ndummy, void *buf, unsigned int len) +{ + struct spi_mem_op op = spinand->op_templates->readid; + + op.addr.nbytes = naddr; + op.dummy.nbytes = ndummy; + op.data.buf.in = buf; + op.data.nbytes = len; + + return op; +} + +struct spi_mem_op +spinand_fill_wr_en_op(struct spinand_device *spinand) +{ + return spinand->op_templates->wr_en; +} + +static __maybe_unused struct spi_mem_op +spinand_fill_wr_dis_op(struct spinand_device *spinand) +{ + return spinand->op_templates->wr_dis; +} + +struct spi_mem_op +spinand_fill_set_feature_op(struct spinand_device *spinand, u64 reg, const void *valptr) +{ + struct spi_mem_op op = spinand->op_templates->set_feature; + + op.addr.val = reg; + op.data.buf.out = valptr; + + return op; +} + +struct spi_mem_op +spinand_fill_get_feature_op(struct spinand_device *spinand, u64 reg, void *valptr) +{ + struct spi_mem_op op = spinand->op_templates->get_feature; + + op.addr.val = reg; + op.data.buf.in = valptr; + + return op; +} + +static struct spi_mem_op +spinand_fill_blk_erase_op(struct spinand_device *spinand, u64 addr) +{ + struct spi_mem_op op = spinand->op_templates->blk_erase; + + op.addr.val = addr; + + return op; +} + +static struct spi_mem_op +spinand_fill_page_read_op(struct spinand_device *spinand, u64 addr) +{ + struct spi_mem_op op = spinand->op_templates->page_read; + + op.addr.val = addr; + + return op; +} + +struct spi_mem_op +spinand_fill_prog_exec_op(struct spinand_device *spinand, u64 addr) +{ + struct spi_mem_op op = spinand->op_templates->prog_exec; + + op.addr.val = addr; + + return op; +} + int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val) { - struct spi_mem_op op = SPINAND_GET_FEATURE_1S_1S_1S_OP(reg, - spinand->scratchbuf); + struct spi_mem_op op = SPINAND_OP(spinand, get_feature, + reg, spinand->scratchbuf); int ret; ret = spi_mem_exec_op(spinand->spimem, &op); @@ -36,8 +120,8 @@ int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val) int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val) { - struct spi_mem_op op = SPINAND_SET_FEATURE_1S_1S_1S_OP(reg, - spinand->scratchbuf); + struct spi_mem_op op = SPINAND_OP(spinand, set_feature, + reg, spinand->scratchbuf); *spinand->scratchbuf = val; return spi_mem_exec_op(spinand->spimem, &op); @@ -362,7 +446,7 @@ static void spinand_ondie_ecc_save_status(struct nand_device *nand, u8 status) int spinand_write_enable_op(struct spinand_device *spinand) { - struct spi_mem_op op = SPINAND_WR_EN_1S_0_0_OP; + struct spi_mem_op op = SPINAND_OP(spinand, wr_en); return spi_mem_exec_op(spinand->spimem, &op); } @@ -372,7 +456,7 @@ static int spinand_load_page_op(struct spinand_device *spinand, { struct nand_device *nand = spinand_to_nand(spinand); unsigned int row = nanddev_pos_to_row(nand, &req->pos); - struct spi_mem_op op = SPINAND_PAGE_READ_1S_1S_0_OP(row); + struct spi_mem_op op = SPINAND_OP(spinand, page_read, row); return spi_mem_exec_op(spinand->spimem, &op); } @@ -527,7 +611,7 @@ static int spinand_program_op(struct spinand_device *spinand, { struct nand_device *nand = spinand_to_nand(spinand); unsigned int row = nanddev_pos_to_row(nand, &req->pos); - struct spi_mem_op op = SPINAND_PROG_EXEC_1S_1S_0_OP(row); + struct spi_mem_op op = SPINAND_OP(spinand, prog_exec, row); return spi_mem_exec_op(spinand->spimem, &op); } @@ -537,7 +621,7 @@ static int spinand_erase_op(struct spinand_device *spinand, { struct nand_device *nand = spinand_to_nand(spinand); unsigned int row = nanddev_pos_to_row(nand, pos); - struct spi_mem_op op = SPINAND_BLK_ERASE_1S_1S_0_OP(row); + struct spi_mem_op op = SPINAND_OP(spinand, blk_erase, row); return spi_mem_exec_op(spinand->spimem, &op); } @@ -557,8 +641,8 @@ static int spinand_erase_op(struct spinand_device *spinand, int spinand_wait(struct spinand_device *spinand, unsigned long initial_delay_us, unsigned long poll_delay_us, u8 *s) { - struct spi_mem_op op = SPINAND_GET_FEATURE_1S_1S_1S_OP(REG_STATUS, - spinand->scratchbuf); + struct spi_mem_op op = SPINAND_OP(spinand, get_feature, + REG_STATUS, spinand->scratchbuf); u8 status; int ret; @@ -591,8 +675,8 @@ out: static int spinand_read_id_op(struct spinand_device *spinand, u8 naddr, u8 ndummy, u8 *buf) { - struct spi_mem_op op = SPINAND_READID_1S_1S_1S_OP( - naddr, ndummy, spinand->scratchbuf, SPINAND_MAX_ID_LEN); + struct spi_mem_op op = SPINAND_OP(spinand, readid, + naddr, ndummy, spinand->scratchbuf, SPINAND_MAX_ID_LEN); int ret; ret = spi_mem_exec_op(spinand->spimem, &op); @@ -604,7 +688,7 @@ static int spinand_read_id_op(struct spinand_device *spinand, u8 naddr, static int spinand_reset_op(struct spinand_device *spinand) { - struct spi_mem_op op = SPINAND_RESET_1S_0_0_OP; + struct spi_mem_op op = SPINAND_OP(spinand, reset); int ret; ret = spi_mem_exec_op(spinand->spimem, &op); diff --git a/drivers/mtd/nand/spi/esmt.c b/drivers/mtd/nand/spi/esmt.c index adadc01e8f2f..3020aa89a495 100644 --- a/drivers/mtd/nand/spi/esmt.c +++ b/drivers/mtd/nand/spi/esmt.c @@ -138,8 +138,8 @@ static int f50l1g41lb_user_otp_info(struct spinand_device *spinand, size_t len, static int f50l1g41lb_otp_lock(struct spinand_device *spinand, loff_t from, size_t len) { - struct spi_mem_op write_op = SPINAND_WR_EN_1S_0_0_OP; - struct spi_mem_op exec_op = SPINAND_PROG_EXEC_1S_1S_0_OP(0); + struct spi_mem_op write_op = SPINAND_OP(spinand, wr_en); + struct spi_mem_op exec_op = SPINAND_OP(spinand, prog_exec, 0); u8 status; int ret; diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c index 72ad36c9a126..e4380208edd0 100644 --- a/drivers/mtd/nand/spi/gigadevice.c +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -266,8 +266,8 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, u8 status) { u8 status2; - struct spi_mem_op op = SPINAND_GET_FEATURE_1S_1S_1S_OP(GD5FXGQXXEXXG_REG_STATUS2, - spinand->scratchbuf); + struct spi_mem_op op = SPINAND_OP(spinand, get_feature, + GD5FXGQXXEXXG_REG_STATUS2, spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -309,8 +309,8 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, u8 status) { u8 status2; - struct spi_mem_op op = SPINAND_GET_FEATURE_1S_1S_1S_OP(GD5FXGQXXEXXG_REG_STATUS2, - spinand->scratchbuf); + struct spi_mem_op op = SPINAND_OP(spinand, get_feature, + GD5FXGQXXEXXG_REG_STATUS2, spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c index edf63b9996cf..143cc120bdec 100644 --- a/drivers/mtd/nand/spi/macronix.c +++ b/drivers/mtd/nand/spi/macronix.c @@ -148,8 +148,8 @@ static int macronix_set_cont_read(struct spinand_device *spinand, bool enable) static int macronix_set_read_retry(struct spinand_device *spinand, unsigned int retry_mode) { - struct spi_mem_op op = SPINAND_SET_FEATURE_1S_1S_1S_OP(MACRONIX_FEATURE_ADDR_READ_RETRY, - spinand->scratchbuf); + struct spi_mem_op op = SPINAND_OP(spinand, set_feature, + MACRONIX_FEATURE_ADDR_READ_RETRY, spinand->scratchbuf); *spinand->scratchbuf = retry_mode; return spi_mem_exec_op(spinand->spimem, &op); diff --git a/drivers/mtd/nand/spi/micron.c b/drivers/mtd/nand/spi/micron.c index b8130e04e8e7..36f6cbbd7462 100644 --- a/drivers/mtd/nand/spi/micron.c +++ b/drivers/mtd/nand/spi/micron.c @@ -137,8 +137,8 @@ static const struct mtd_ooblayout_ops micron_4_ooblayout = { static int micron_select_target(struct spinand_device *spinand, unsigned int target) { - struct spi_mem_op op = SPINAND_SET_FEATURE_1S_1S_1S_OP(MICRON_DIE_SELECT_REG, - spinand->scratchbuf); + struct spi_mem_op op = SPINAND_OP(spinand, set_feature, + MICRON_DIE_SELECT_REG, spinand->scratchbuf); if (target > 1) return -EINVAL; @@ -251,8 +251,8 @@ static int mt29f2g01abagd_user_otp_info(struct spinand_device *spinand, static int mt29f2g01abagd_otp_lock(struct spinand_device *spinand, loff_t from, size_t len) { - struct spi_mem_op write_op = SPINAND_WR_EN_1S_0_0_OP; - struct spi_mem_op exec_op = SPINAND_PROG_EXEC_1S_1S_0_OP(0); + struct spi_mem_op write_op = SPINAND_OP(spinand, wr_en); + struct spi_mem_op exec_op = SPINAND_OP(spinand, prog_exec, 0); u8 status; int ret; diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c index 6530257ac0be..ef649162ee68 100644 --- a/drivers/mtd/nand/spi/toshiba.c +++ b/drivers/mtd/nand/spi/toshiba.c @@ -73,7 +73,8 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand, { struct nand_device *nand = spinand_to_nand(spinand); u8 mbf = 0; - struct spi_mem_op op = SPINAND_GET_FEATURE_1S_1S_1S_OP(0x30, spinand->scratchbuf); + struct spi_mem_op op = SPINAND_OP(spinand, get_feature, + 0x30, spinand->scratchbuf); switch (status & STATUS_ECC_MASK) { case STATUS_ECC_NO_BITFLIPS: diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index d5799c2df065..bfec5d037f25 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -251,7 +251,8 @@ static int w25n02kv_ecc_get_status(struct spinand_device *spinand, { struct nand_device *nand = spinand_to_nand(spinand); u8 mbf = 0; - struct spi_mem_op op = SPINAND_GET_FEATURE_1S_1S_1S_OP(0x30, spinand->scratchbuf); + struct spi_mem_op op = SPINAND_OP(spinand, get_feature, + 0x30, spinand->scratchbuf); switch (status & STATUS_ECC_MASK) { case STATUS_ECC_NO_BITFLIPS: diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index a458617fb375..553c56a389d2 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -703,6 +703,14 @@ struct spinand_device { unsigned int retry_mode); }; +struct spi_mem_op spinand_fill_wr_en_op(struct spinand_device *spinand); +struct spi_mem_op spinand_fill_set_feature_op(struct spinand_device *spinand, u64 reg, const void *valptr); +struct spi_mem_op spinand_fill_get_feature_op(struct spinand_device *spinand, u64 reg, void *valptr); +struct spi_mem_op spinand_fill_prog_exec_op(struct spinand_device *spinand, u64 addr); + +#define SPINAND_OP(spinand, op_name, ...) \ + spinand_fill_ ## op_name ## _op(spinand, ##__VA_ARGS__) + /** * mtd_to_spinand() - Get the SPI NAND device attached to an MTD instance * @mtd: MTD instance -- cgit v1.2.3 From fbc7538782f8e7df4737dcec7d854cf4d53bfc67 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:12 +0100 Subject: mtd: spinand: List vendor specific operations and make sure they are supported It is probably safe to expect that all SPI controller drivers will ever support all the most basic SPI NAND operations, such as write enable, register reads, page program, block erases, etc. However, what about vendor specific operations? So far nobody complained about it, but as we are about to introduce octal DTR support, and as none of the SPI NAND instruction set is defined in any standard, we must remain careful about these extra operations. One way to make sure we do not blindly get ourselves in strange situations with vendor commands failing silently is to make the check once for all, while probing the chip. However at this stage we have no such list, so let's add the necessary infrastructure to allow: - registering vendor operations, - checking they are actually supported when appropriate. Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 26 ++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 5 +++++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 7146eec51afa..cdf45d054082 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1424,6 +1424,27 @@ static void spinand_init_ssdr_templates(struct spinand_device *spinand) spinand->op_templates = &spinand->ssdr_op_templates; } +static int spinand_support_vendor_ops(struct spinand_device *spinand, + const struct spinand_info *info) +{ + int i; + + /* + * The vendor ops array is only used in order to verify this chip and all its memory + * operations are supported. If we see patterns emerging, we could ideally name these + * operations and define them at the SPI NAND core level instead. + * For now, this only serves as a sanity check. + */ + for (i = 0; i < info->vendor_ops->nops; i++) { + const struct spi_mem_op *op = &info->vendor_ops->ops[i]; + + if (!spi_mem_supports_op(spinand->spimem, op)) + return -EOPNOTSUPP; + } + + return 0; +} + static const struct spi_mem_op * spinand_select_op_variant(struct spinand_device *spinand, const struct spinand_op_variants *variants) @@ -1490,6 +1511,7 @@ int spinand_match_and_init(struct spinand_device *spinand, u8 *id = spinand->id.data; struct nand_device *nand = spinand_to_nand(spinand); unsigned int i; + int ret; for (i = 0; i < table_size; i++) { const struct spinand_info *info = &table[i]; @@ -1535,6 +1557,10 @@ int spinand_match_and_init(struct spinand_device *spinand, spinand->ssdr_op_templates.update_cache = op; + ret = spinand_support_vendor_ops(spinand, info); + if (ret) + return ret; + return 0; } diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 553c56a389d2..b020c119a15d 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -494,6 +494,7 @@ struct spinand_user_otp { * @op_variants.read_cache: variants of the read-cache operation * @op_variants.write_cache: variants of the write-cache operation * @op_variants.update_cache: variants of the update-cache operation + * @vendor_ops: vendor specific operations * @select_target: function used to select a target/die. Required only for * multi-die chips * @configure_chip: Align the chip configuration with the core settings @@ -518,6 +519,7 @@ struct spinand_info { const struct spinand_op_variants *write_cache; const struct spinand_op_variants *update_cache; } op_variants; + const struct spinand_op_variants *vendor_ops; int (*select_target)(struct spinand_device *spinand, unsigned int target); int (*configure_chip)(struct spinand_device *spinand); @@ -544,6 +546,9 @@ struct spinand_info { .update_cache = __update, \ } +#define SPINAND_INFO_VENDOR_OPS(__ops) \ + .vendor_ops = __ops + #define SPINAND_ECCINFO(__ooblayout, __get_status) \ .eccinfo = { \ .ooblayout = __ooblayout, \ -- cgit v1.2.3 From 20387f2fe509eba46ecf758da052786d7b1203fb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:20 +0100 Subject: mtd: spinand: Add support for setting a bus interface Create a bus interface enumeration, currently only containing the one we support: SSDR, for single SDR, so any operation whose command is sent over a single data line in SDR mode, ie. any operation matching 1S-XX-XX. The main spinand_device structure gets a new parameter to store this enumeration, for now unused. Of course it is set to SSDR during the SSDR templates initialization to further clarify the state we are in at the moment. This member is subject to be used to know in which bus configuration we and be updated by the core when we switch to faster mode(s). Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 1 + include/linux/mtd/spinand.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index e52d76afb5b5..b2993262b3f1 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1407,6 +1407,7 @@ static void spinand_init_ssdr_templates(struct spinand_device *spinand) tmpl->page_read = (struct spi_mem_op)SPINAND_PAGE_READ_1S_1S_0_OP(0); tmpl->prog_exec = (struct spi_mem_op)SPINAND_PROG_EXEC_1S_1S_0_OP(0); spinand->op_templates = &spinand->ssdr_op_templates; + spinand->bus_iface = SSDR; } static int spinand_support_vendor_ops(struct spinand_device *spinand, diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index b020c119a15d..154037749a6c 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -482,6 +482,14 @@ struct spinand_user_otp { const struct spinand_user_otp_ops *ops; }; +/** + * enum spinand_bus_interface - SPI NAND bus interface types + * @SSDR: Bus configuration supporting all 1S-XX-XX operations, including dual and quad + */ +enum spinand_bus_interface { + SSDR, +}; + /** * struct spinand_info - Structure used to describe SPI NAND chips * @model: model name @@ -644,6 +652,7 @@ struct spinand_mem_ops { * @flags: NAND flags * @ssdr_op_templates: Templates for all single SDR SPI mem operations * @op_templates: Templates for all SPI mem operations + * @bus_iface: Current bus interface * @select_target: select a specific target/die. Usually called before sending * a command addressing a page or an eraseblock embedded in * this die. Only required if your chip exposes several dies @@ -679,6 +688,7 @@ struct spinand_device { struct spinand_mem_ops ssdr_op_templates; struct spinand_mem_ops *op_templates; + enum spinand_bus_interface bus_iface; struct spinand_dirmap *dirmaps; -- cgit v1.2.3 From 0a331a1851aedd670b95a2d16c6a82496137378d Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:22 +0100 Subject: mtd: spinand: Give the bus interface to the configuration helper The chip configuration hook is the one responsible to actually switch the switch between bus interfaces. It is natural to give it the bus interface we expect with a new parameter. For now the only value we can give is SSDR, but this is subject to change in the future, so add a bit of extra logic in the implementations of this callback to make sure both the core and the chip driver are aligned on the request. Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 2 +- drivers/mtd/nand/spi/winbond.c | 28 +++++++++++++++++++++------- include/linux/mtd/spinand.h | 6 ++++-- 3 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 1aff2d368339..bbda60b39788 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1604,7 +1604,7 @@ static int spinand_configure_chip(struct spinand_device *spinand) return ret; if (spinand->configure_chip) { - ret = spinand->configure_chip(spinand); + ret = spinand->configure_chip(spinand, SSDR); if (ret) return ret; } diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 1d79a8ae7920..419f4303a0dc 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -311,13 +311,17 @@ static int w25n02kv_ecc_get_status(struct spinand_device *spinand, return -EINVAL; } -static int w25n0xjw_hs_cfg(struct spinand_device *spinand) +static int w25n0xjw_hs_cfg(struct spinand_device *spinand, + enum spinand_bus_interface iface) { const struct spi_mem_op *op; bool hs; u8 sr4; int ret; + if (iface != SSDR) + return -EOPNOTSUPP; + op = spinand->op_templates->read_cache; if (op->cmd.dtr || op->addr.dtr || op->dummy.dtr || op->data.dtr) hs = false; @@ -371,17 +375,25 @@ static int w35n0xjw_write_vcr(struct spinand_device *spinand, u8 reg, u8 val) return 0; } -static int w35n0xjw_vcr_cfg(struct spinand_device *spinand) +static int w35n0xjw_vcr_cfg(struct spinand_device *spinand, + enum spinand_bus_interface iface) { - const struct spi_mem_op *op; + const struct spi_mem_op *ref_op; unsigned int dummy_cycles; bool dtr, single; u8 io_mode; int ret; - op = spinand->op_templates->read_cache; + switch (iface) { + case SSDR: + ref_op = spinand->ssdr_op_templates.read_cache; + break; + default: + return -EOPNOTSUPP; + }; - dummy_cycles = ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + dummy_cycles = ((ref_op->dummy.nbytes * 8) / ref_op->dummy.buswidth) / + (ref_op->dummy.dtr ? 2 : 1); switch (dummy_cycles) { case 8: case 12: @@ -398,8 +410,10 @@ static int w35n0xjw_vcr_cfg(struct spinand_device *spinand) if (ret) return ret; - single = (op->cmd.buswidth == 1 && op->addr.buswidth == 1 && op->data.buswidth == 1); - dtr = (op->cmd.dtr && op->addr.dtr && op->data.dtr); + single = (ref_op->cmd.buswidth == 1 && + ref_op->addr.buswidth == 1 && + ref_op->data.buswidth == 1); + dtr = (ref_op->cmd.dtr && ref_op->addr.dtr && ref_op->data.dtr); if (single && !dtr) io_mode = W35N01JW_VCR_IO_MODE_SINGLE_SDR; else if (!single && !dtr) diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 154037749a6c..20643d1c395e 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -530,7 +530,8 @@ struct spinand_info { const struct spinand_op_variants *vendor_ops; int (*select_target)(struct spinand_device *spinand, unsigned int target); - int (*configure_chip)(struct spinand_device *spinand); + int (*configure_chip)(struct spinand_device *spinand, + enum spinand_bus_interface iface); int (*set_cont_read)(struct spinand_device *spinand, bool enable); struct spinand_fact_otp fact_otp; @@ -705,7 +706,8 @@ struct spinand_device { const struct spinand_manufacturer *manufacturer; void *priv; - int (*configure_chip)(struct spinand_device *spinand); + int (*configure_chip)(struct spinand_device *spinand, + enum spinand_bus_interface iface); bool cont_read_possible; int (*set_cont_read)(struct spinand_device *spinand, bool enable); -- cgit v1.2.3 From 76b7dc76dd0e1af5a538e977e015ac95271b3b12 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 9 Jan 2026 18:18:24 +0100 Subject: mtd: spinand: Add octal DTR support Create a new bus interface named ODTR for "octal DTR", which matches the following pattern: 8D-8D-8D. Add octal DTR support for all the existing core operations. Add a second set of templates for this bus interface. Give the possibility for drivers to register their read, write and update cache variants as well as their vendor specific operations. Check the SPI controller driver supports all the octal DTR commands that we might need before switching to the ODTR bus interface. Make the switch by calling ->configure_chip() with the ODTR parameter. Fallback in case this step fails. If someone ever attempts to suspend a chip in octal DTR mode, there are changes that it will loose its configuration at resume. Prevent any problem by explicitly switching back to SSDR while suspending. Note: there is a limitation in the current approach, page I/Os are not available as the dirmaps will be created for the ODTR bus interface if that option is supported and not switched back to SSDR during suspend. Switching them is possible but would be costly and would not bring anything as right after resuming we will switch again to ODTR. In case this capability is used for debug, developpers should mind to destroy and recreate suitable direct mappings. Finally, as a side effect, we increase the buffer for reading IDs to 6. No device at this point returns 6 bytes, but we support 5 bytes IDs, which means in octal DTR mode we have no other choice than reading an even number of bytes, hence 6. Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 140 +++++++++++++++++++++++++++++++++++++++++++- include/linux/mtd/spinand.h | 79 ++++++++++++++++++++++++- 2 files changed, 216 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index bbda60b39788..21a980e626eb 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -57,6 +57,9 @@ spinand_fill_set_feature_op(struct spinand_device *spinand, u64 reg, const void { struct spi_mem_op op = spinand->op_templates->set_feature; + if (op.cmd.dtr && op.cmd.buswidth == 8) + reg |= reg << 8; + op.addr.val = reg; op.data.buf.out = valptr; @@ -68,6 +71,9 @@ spinand_fill_get_feature_op(struct spinand_device *spinand, u64 reg, void *valpt { struct spi_mem_op op = spinand->op_templates->get_feature; + if (op.cmd.dtr && op.cmd.buswidth == 8) + reg |= reg << 8; + op.addr.val = reg; op.data.buf.in = valptr; @@ -1393,6 +1399,11 @@ static void spinand_manufacturer_cleanup(struct spinand_device *spinand) return spinand->manufacturer->ops->cleanup(spinand); } +static bool spinand_op_is_odtr(const struct spi_mem_op *op) +{ + return op->cmd.dtr && op->cmd.buswidth == 8; +} + static void spinand_init_ssdr_templates(struct spinand_device *spinand) { struct spinand_mem_ops *tmpl = &spinand->ssdr_op_templates; @@ -1425,6 +1436,10 @@ static int spinand_support_vendor_ops(struct spinand_device *spinand, for (i = 0; i < info->vendor_ops->nops; i++) { const struct spi_mem_op *op = &info->vendor_ops->ops[i]; + if ((iface == SSDR && spinand_op_is_odtr(op)) || + (iface == ODTR && !spinand_op_is_odtr(op))) + continue; + if (!spi_mem_supports_op(spinand->spimem, op)) return -EOPNOTSUPP; } @@ -1432,6 +1447,49 @@ static int spinand_support_vendor_ops(struct spinand_device *spinand, return 0; } +static int spinand_init_odtr_instruction_set(struct spinand_device *spinand) +{ + struct spinand_mem_ops *tmpl = &spinand->odtr_op_templates; + + tmpl->reset = (struct spi_mem_op)SPINAND_RESET_8D_0_0_OP; + if (!spi_mem_supports_op(spinand->spimem, &tmpl->reset)) + return -EOPNOTSUPP; + + tmpl->readid = (struct spi_mem_op)SPINAND_READID_8D_8D_8D_OP(0, 0, NULL, 0); + if (!spi_mem_supports_op(spinand->spimem, &tmpl->readid)) + return -EOPNOTSUPP; + + tmpl->wr_en = (struct spi_mem_op)SPINAND_WR_EN_8D_0_0_OP; + if (!spi_mem_supports_op(spinand->spimem, &tmpl->wr_en)) + return -EOPNOTSUPP; + + tmpl->wr_dis = (struct spi_mem_op)SPINAND_WR_DIS_8D_0_0_OP; + if (!spi_mem_supports_op(spinand->spimem, &tmpl->wr_dis)) + return -EOPNOTSUPP; + + tmpl->set_feature = (struct spi_mem_op)SPINAND_SET_FEATURE_8D_8D_8D_OP(0, NULL); + if (!spi_mem_supports_op(spinand->spimem, &tmpl->set_feature)) + return -EOPNOTSUPP; + + tmpl->get_feature = (struct spi_mem_op)SPINAND_GET_FEATURE_8D_8D_8D_OP(0, NULL); + if (!spi_mem_supports_op(spinand->spimem, &tmpl->get_feature)) + return -EOPNOTSUPP; + + tmpl->blk_erase = (struct spi_mem_op)SPINAND_BLK_ERASE_8D_8D_0_OP(0); + if (!spi_mem_supports_op(spinand->spimem, &tmpl->blk_erase)) + return -EOPNOTSUPP; + + tmpl->page_read = (struct spi_mem_op)SPINAND_PAGE_READ_8D_8D_0_OP(0); + if (!spi_mem_supports_op(spinand->spimem, &tmpl->page_read)) + return -EOPNOTSUPP; + + tmpl->prog_exec = (struct spi_mem_op)SPINAND_PROG_EXEC_8D_8D_0_OP(0); + if (!spi_mem_supports_op(spinand->spimem, &tmpl->prog_exec)) + return -EOPNOTSUPP; + + return 0; +} + static const struct spi_mem_op * spinand_select_op_variant(struct spinand_device *spinand, enum spinand_bus_interface iface, const struct spinand_op_variants *variants) @@ -1447,6 +1505,10 @@ spinand_select_op_variant(struct spinand_device *spinand, enum spinand_bus_inter unsigned int nbytes; int ret; + if ((iface == SSDR && spinand_op_is_odtr(&op)) || + (iface == ODTR && !spinand_op_is_odtr(&op))) + continue; + nbytes = nanddev_per_page_oobsize(nand) + nanddev_page_size(nand); @@ -1523,6 +1585,8 @@ int spinand_match_and_init(struct spinand_device *spinand, spinand->read_retries = table[i].read_retries; spinand->set_read_retry = table[i].set_read_retry; + /* I/O variants selection with single-spi SDR commands */ + op = spinand_select_op_variant(spinand, SSDR, info->op_variants.read_cache); if (!op) @@ -1548,6 +1612,28 @@ int spinand_match_and_init(struct spinand_device *spinand, if (ret) return ret; + /* I/O variants selection with octo-spi DDR commands (optional) */ + + ret = spinand_init_odtr_instruction_set(spinand); + if (ret) + return 0; + + ret = spinand_support_vendor_ops(spinand, info, ODTR); + if (ret) + return 0; + + op = spinand_select_op_variant(spinand, ODTR, + info->op_variants.read_cache); + spinand->odtr_op_templates.read_cache = op; + + op = spinand_select_op_variant(spinand, ODTR, + info->op_variants.write_cache); + spinand->odtr_op_templates.write_cache = op; + + op = spinand_select_op_variant(spinand, ODTR, + info->op_variants.update_cache); + spinand->odtr_op_templates.update_cache = op; + return 0; } @@ -1589,9 +1675,34 @@ static int spinand_detect(struct spinand_device *spinand) static int spinand_configure_chip(struct spinand_device *spinand) { - bool quad_enable = false; + bool odtr = false, quad_enable = false; int ret; + if (spinand->odtr_op_templates.read_cache && + spinand->odtr_op_templates.write_cache && + spinand->odtr_op_templates.update_cache) + odtr = true; + + if (odtr) { + if (!spinand->configure_chip) + goto try_ssdr; + + /* ODTR bus interface configuration happens here */ + ret = spinand->configure_chip(spinand, ODTR); + if (ret) { + spinand->odtr_op_templates.read_cache = NULL; + spinand->odtr_op_templates.write_cache = NULL; + spinand->odtr_op_templates.update_cache = NULL; + goto try_ssdr; + } + + spinand->op_templates = &spinand->odtr_op_templates; + spinand->bus_iface = ODTR; + + return 0; + } + +try_ssdr: if (spinand->flags & SPINAND_HAS_QE_BIT) { if (spinand->ssdr_op_templates.read_cache->data.buswidth == 4 || spinand->ssdr_op_templates.write_cache->data.buswidth == 4 || @@ -1673,6 +1784,32 @@ static void spinand_mtd_resume(struct mtd_info *mtd) spinand_ecc_enable(spinand, false); } +static int spinand_mtd_suspend(struct mtd_info *mtd) +{ + struct spinand_device *spinand = mtd_to_spinand(mtd); + int ret; + + /* + * Return to SSDR interface in the suspend path to make sure the + * reset operation is correctly processed upon resume. + * + * Note: Once back in SSDR mode, every operation but the page helpers + * (dirmap based I/O accessors) will work. Page accesses would require + * destroying and recreating the dirmaps twice to work, which would be + * impacting for no reason, as this is just a transitional state. + */ + if (spinand->bus_iface == ODTR) { + ret = spinand->configure_chip(spinand, SSDR); + if (ret) + return ret; + + spinand->op_templates = &spinand->ssdr_op_templates; + spinand->bus_iface = SSDR; + } + + return 0; +} + static int spinand_init(struct spinand_device *spinand) { struct device *dev = &spinand->spimem->spi->dev; @@ -1742,6 +1879,7 @@ static int spinand_init(struct spinand_device *spinand) mtd->_block_isreserved = spinand_mtd_block_isreserved; mtd->_erase = spinand_mtd_erase; mtd->_max_bad_blocks = nanddev_mtd_max_bad_blocks; + mtd->_suspend = spinand_mtd_suspend; mtd->_resume = spinand_mtd_resume; if (spinand_user_otp_size(spinand) || spinand_fact_otp_size(spinand)) { diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 20643d1c395e..6a024cf1c53a 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -238,6 +238,77 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_OUT(len, buf, 8)) +/** + * Octal DDR SPI NAND flash operations + */ + +#define SPINAND_RESET_8D_0_0_OP \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0xff, 8), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_READID_8D_8D_8D_OP(naddr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x9f, 8), \ + SPI_MEM_DTR_OP_ADDR(naddr, 0, 8), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 8), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 8)) + +#define SPINAND_WR_EN_8D_0_0_OP \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x06, 8), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_WR_DIS_8D_0_0_OP \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x04, 8), \ + SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_SET_FEATURE_8D_8D_8D_OP(reg, valptr) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x1f, 8), \ + SPI_MEM_DTR_OP_RPT_ADDR(reg, 8), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_DTR_OP_DATA_OUT(2, valptr, 8)) + +#define SPINAND_GET_FEATURE_8D_8D_8D_OP(reg, valptr) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x0f, 8), \ + SPI_MEM_DTR_OP_RPT_ADDR(reg, 8), \ + SPI_MEM_DTR_OP_DUMMY(14, 8), \ + SPI_MEM_DTR_OP_DATA_IN(2, valptr, 8)) + +#define SPINAND_BLK_ERASE_8D_8D_0_OP(addr) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0xd8, 8), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 8), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_PAGE_READ_8D_8D_0_OP(addr) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x13, 8), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 8), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_PAGE_READ_FROM_CACHE_8D_8D_8D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x9d, 8), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 8), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 8), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 8), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PROG_EXEC_8D_8D_0_OP(addr) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x10, 8), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 8), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + +#define SPINAND_PROG_LOAD_8D_8D_8D_OP(reset, addr, buf, len) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD((reset ? 0xc2 : 0xc4), 8), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 8), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_DTR_OP_DATA_OUT(len, buf, 8)) + /* feature register */ #define REG_BLOCK_LOCK 0xa0 #define BL_ALL_UNLOCKED 0x00 @@ -261,7 +332,7 @@ struct spinand_op; struct spinand_device; -#define SPINAND_MAX_ID_LEN 5 +#define SPINAND_MAX_ID_LEN 6 /* * For erase, write and read operation, we got the following timings : * tBERS (erase) 1ms to 4ms @@ -287,7 +358,7 @@ struct spinand_device; /** * struct spinand_id - SPI NAND id structure - * @data: buffer containing the id bytes. Currently 5 bytes large, but can + * @data: buffer containing the id bytes. Currently 6 bytes large, but can * be extended if required * @len: ID length */ @@ -485,9 +556,11 @@ struct spinand_user_otp { /** * enum spinand_bus_interface - SPI NAND bus interface types * @SSDR: Bus configuration supporting all 1S-XX-XX operations, including dual and quad + * @ODTR: Bus configuration supporting only 8D-8D-8D operations */ enum spinand_bus_interface { SSDR, + ODTR, }; /** @@ -652,6 +725,7 @@ struct spinand_mem_ops { * @id: NAND ID as returned by READ_ID * @flags: NAND flags * @ssdr_op_templates: Templates for all single SDR SPI mem operations + * @odtr_op_templates: Templates for all octal DTR SPI mem operations * @op_templates: Templates for all SPI mem operations * @bus_iface: Current bus interface * @select_target: select a specific target/die. Usually called before sending @@ -688,6 +762,7 @@ struct spinand_device { u32 flags; struct spinand_mem_ops ssdr_op_templates; + struct spinand_mem_ops odtr_op_templates; struct spinand_mem_ops *op_templates; enum spinand_bus_interface bus_iface; -- cgit v1.2.3