From 7c527c15cdda2e0a26a05ac15a44d3e14738fc55 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Aug 2025 21:20:12 +0900 Subject: firewire: core: use reference counting to invoke address handlers safely The lifetime of address handler has been managed by linked list and RCU. This approach was introduced in commit 35202f7d8420 ("firewire: remove global lock around address handlers, convert to RCU"). The invocations of address handler are performed within RCU read-side critical sections. In commit 57e6d9f85fff ("firewire: ohci: use workqueue to handle events of AR request/response contexts"), the invocations are in a workqueue context. The approach still imposes limitation that sleeping is not allowed within RCU read-side critical sections. However, since sleeping is not permitted within RCU read-side critical sections, this approach still has a limitation. This commit adds reference counting to decouple handler invocation from handler discovery. The linked list and RCU is used to discover the handlers, while the reference counting is used to invoke them safely. Link: https://lore.kernel.org/r/20250803122015.236493-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index cceb70415ed2..d38c6e538e5c 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -341,7 +341,11 @@ struct fw_address_handler { u64 length; fw_address_callback_t address_callback; void *callback_data; + + // Only for core functions. struct list_head link; + struct kref kref; + struct completion done; }; struct fw_address_region { -- cgit v1.2.3 From 6d3c3ca4c77e93660cce5819bf707f75df03e0c8 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 8 Aug 2025 15:28:47 +0200 Subject: module: Rename EXPORT_SYMBOL_GPL_FOR_MODULES to EXPORT_SYMBOL_FOR_MODULES Christoph suggested that the explicit _GPL_ can be dropped from the module namespace export macro, as it's intended for in-tree modules only. It would be possible to restrict it technically, but it was pointed out [2] that some cases of using an out-of-tree build of an in-tree module with the same name are legitimate. But in that case those also have to be GPL anyway so it's unnecessary to spell it out in the macro name. Link: https://lore.kernel.org/all/aFleJN_fE-RbSoFD@infradead.org/ [1] Link: https://lore.kernel.org/all/CAK7LNATRkZHwJGpojCnvdiaoDnP%2BaeUXgdey5sb_8muzdWTMkA@mail.gmail.com/ [2] Suggested-by: Christoph Hellwig Reviewed-by: Shivank Garg Acked-by: David Hildenbrand Acked-by: Nicolas Schier Reviewed-by: Daniel Gomez Reviewed-by: Christian Brauner Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/20250808-export_modules-v4-1-426945bcc5e1@suse.cz Signed-off-by: Christian Brauner --- Documentation/core-api/symbol-namespaces.rst | 11 ++++++----- drivers/tty/serial/8250/8250_rsa.c | 8 ++++---- fs/anon_inodes.c | 2 +- include/linux/export.h | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst index 32fc73dc5529..034898e81ba2 100644 --- a/Documentation/core-api/symbol-namespaces.rst +++ b/Documentation/core-api/symbol-namespaces.rst @@ -76,20 +76,21 @@ unit as preprocessor statement. The above example would then read:: within the corresponding compilation unit before the #include for . Typically it's placed before the first #include statement. -Using the EXPORT_SYMBOL_GPL_FOR_MODULES() macro ------------------------------------------------ +Using the EXPORT_SYMBOL_FOR_MODULES() macro +------------------------------------------- Symbols exported using this macro are put into a module namespace. This -namespace cannot be imported. +namespace cannot be imported. These exports are GPL-only as they are only +intended for in-tree modules. The macro takes a comma separated list of module names, allowing only those modules to access this symbol. Simple tail-globs are supported. For example:: - EXPORT_SYMBOL_GPL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*") + EXPORT_SYMBOL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*") -will limit usage of this symbol to modules whoes name matches the given +will limit usage of this symbol to modules whose name matches the given patterns. How to use Symbols exported in Namespaces diff --git a/drivers/tty/serial/8250/8250_rsa.c b/drivers/tty/serial/8250/8250_rsa.c index d34093cc03ad..12a65b79583c 100644 --- a/drivers/tty/serial/8250/8250_rsa.c +++ b/drivers/tty/serial/8250/8250_rsa.c @@ -147,7 +147,7 @@ void rsa_enable(struct uart_8250_port *up) if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) serial_out(up, UART_RSA_FRR, 0); } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_enable, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_enable, "8250_base"); /* * Attempts to turn off the RSA FIFO and resets the RSA board back to 115kbps compat mode. It is @@ -179,7 +179,7 @@ void rsa_disable(struct uart_8250_port *up) up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16; uart_port_unlock_irq(&up->port); } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_disable, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_disable, "8250_base"); void rsa_autoconfig(struct uart_8250_port *up) { @@ -192,7 +192,7 @@ void rsa_autoconfig(struct uart_8250_port *up) if (__rsa_enable(up)) up->port.type = PORT_RSA; } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_autoconfig, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_autoconfig, "8250_base"); void rsa_reset(struct uart_8250_port *up) { @@ -201,7 +201,7 @@ void rsa_reset(struct uart_8250_port *up) serial_out(up, UART_RSA_FRR, 0); } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_reset, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_reset, "8250_base"); #ifdef CONFIG_SERIAL_8250_DEPRECATED_OPTIONS #ifndef MODULE diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 1d847a939f29..180a458fc4f7 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -129,7 +129,7 @@ struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *n } return inode; } -EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); +EXPORT_SYMBOL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, diff --git a/include/linux/export.h b/include/linux/export.h index f35d03b4113b..a686fd0ba406 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -91,6 +91,6 @@ #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns) -#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) +#define EXPORT_SYMBOL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) #endif /* _LINUX_EXPORT_H */ -- cgit v1.2.3 From 21924af67d69d7c9fdaf845be69043cfe75196a1 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 5 Aug 2025 00:10:02 +0000 Subject: locking: Fix __clear_task_blocked_on() warning from __ww_mutex_wound() path The __clear_task_blocked_on() helper added a number of sanity checks ensuring we hold the mutex wait lock and that the task we are clearing blocked_on pointer (if set) matches the mutex. However, there is an edge case in the _ww_mutex_wound() logic where we need to clear the blocked_on pointer for the task that owns the mutex, not the task that is waiting on the mutex. For this case the sanity checks aren't valid, so handle this by allowing a NULL lock to skip the additional checks. K Prateek Nayak and Maarten Lankhorst also pointed out that in this case where we don't hold the owner's mutex wait_lock, we need to be a bit more careful using READ_ONCE/WRITE_ONCE in both the __clear_task_blocked_on() and __set_task_blocked_on() implementations to avoid accidentally tripping WARN_ONs if two instances race. So do that here as well. This issue was easier to miss, I realized, as the test-ww_mutex driver only exercises the wait-die class of ww_mutexes. I've sent a patch[1] to address this so the logic will be easier to test. [1]: https://lore.kernel.org/lkml/20250801023358.562525-2-jstultz@google.com/ Fixes: a4f0b6fef4b0 ("locking/mutex: Add p->blocked_on wrappers for correctness checks") Closes: https://lore.kernel.org/lkml/68894443.a00a0220.26d0e1.0015.GAE@google.com/ Reported-by: syzbot+602c4720aed62576cd79@syzkaller.appspotmail.com Reported-by: Maarten Lankhorst Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: K Prateek Nayak Acked-by: Maarten Lankhorst Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20250805001026.2247040-1-jstultz@google.com --- include/linux/sched.h | 29 +++++++++++++++++------------ kernel/locking/ww_mutex.h | 6 +++++- 2 files changed, 22 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 40d2fa90df42..62103dd6a48e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2144,6 +2144,8 @@ static inline struct mutex *__get_task_blocked_on(struct task_struct *p) static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) { + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + WARN_ON_ONCE(!m); /* The task should only be setting itself as blocked */ WARN_ON_ONCE(p != current); @@ -2154,8 +2156,8 @@ static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) * with a different mutex. Note, setting it to the same * lock repeatedly is ok. */ - WARN_ON_ONCE(p->blocked_on && p->blocked_on != m); - p->blocked_on = m; + WARN_ON_ONCE(blocked_on && blocked_on != m); + WRITE_ONCE(p->blocked_on, m); } static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) @@ -2166,16 +2168,19 @@ static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) { - WARN_ON_ONCE(!m); - /* Currently we serialize blocked_on under the mutex::wait_lock */ - lockdep_assert_held_once(&m->wait_lock); - /* - * There may be cases where we re-clear already cleared - * blocked_on relationships, but make sure we are not - * clearing the relationship with a different lock. - */ - WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m); - p->blocked_on = NULL; + if (m) { + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + + /* Currently we serialize blocked_on under the mutex::wait_lock */ + lockdep_assert_held_once(&m->wait_lock); + /* + * There may be cases where we re-clear already cleared + * blocked_on relationships, but make sure we are not + * clearing the relationship with a different lock. + */ + WARN_ON_ONCE(blocked_on && blocked_on != m); + } + WRITE_ONCE(p->blocked_on, NULL); } static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 086fd5487ca7..31a785afee6c 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -342,8 +342,12 @@ static bool __ww_mutex_wound(struct MUTEX *lock, * When waking up the task to wound, be sure to clear the * blocked_on pointer. Otherwise we can see circular * blocked_on relationships that can't resolve. + * + * NOTE: We pass NULL here instead of lock, because we + * are waking the mutex owner, who may be currently + * blocked on a different mutex. */ - __clear_task_blocked_on(owner, lock); + __clear_task_blocked_on(owner, NULL); wake_q_add(wake_q, owner); } return true; -- cgit v1.2.3 From a3de58b12ce074ec05b8741fa28d62ccb1070468 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Aug 2025 22:45:50 +0100 Subject: netfs: Fix unbuffered write error handling If all the subrequests in an unbuffered write stream fail, the subrequest collector doesn't update the stream->transferred value and it retains its initial LONG_MAX value. Unfortunately, if all active streams fail, then we take the smallest value of { LONG_MAX, LONG_MAX, ... } as the value to set in wreq->transferred - which is then returned from ->write_iter(). LONG_MAX was chosen as the initial value so that all the streams can be quickly assessed by taking the smallest value of all stream->transferred - but this only works if we've set any of them. Fix this by adding a flag to indicate whether the value in stream->transferred is valid and checking that when we integrate the values. stream->transferred can then be initialised to zero. This was found by running the generic/750 xfstest against cifs with cache=none. It splices data to the target file. Once (if) it has used up all the available scratch space, the writes start failing with ENOSPC. This causes ->write_iter() to fail. However, it was returning wreq->transferred, i.e. LONG_MAX, rather than an error (because it thought the amount transferred was non-zero) and iter_file_splice_write() would then try to clean up that amount of pipe bufferage - leading to an oops when it overran. The kernel log showed: CIFS: VFS: Send error in write = -28 followed by: BUG: kernel NULL pointer dereference, address: 0000000000000008 with: RIP: 0010:iter_file_splice_write+0x3a4/0x520 do_splice+0x197/0x4e0 or: RIP: 0010:pipe_buf_release (include/linux/pipe_fs_i.h:282) iter_file_splice_write (fs/splice.c:755) Also put a warning check into splice to announce if ->write_iter() returned that it had written more than it was asked to. Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Reported-by: Xiaoli Feng Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220445 Signed-off-by: David Howells Link: https://lore.kernel.org/915443.1755207950@warthog.procyon.org.uk cc: Paulo Alcantara cc: Steve French cc: Shyam Prasad N cc: netfs@lists.linux.dev cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/read_collect.c | 4 +++- fs/netfs/write_collect.c | 10 ++++++++-- fs/netfs/write_issue.c | 4 ++-- fs/splice.c | 3 +++ include/linux/netfs.h | 1 + 5 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 3e804da1e1eb..a95e7aadafd0 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -281,8 +281,10 @@ reassess: } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { notes |= MADE_PROGRESS; } else { - if (!stream->failed) + if (!stream->failed) { stream->transferred += transferred; + stream->transferred_valid = true; + } if (front->transferred < front->len) set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); notes |= MADE_PROGRESS; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 0f3a36852a4d..cbf3d9194c7b 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -254,6 +254,7 @@ reassess_streams: if (front->start + front->transferred > stream->collected_to) { stream->collected_to = front->start + front->transferred; stream->transferred = stream->collected_to - wreq->start; + stream->transferred_valid = true; notes |= MADE_PROGRESS; } if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { @@ -356,6 +357,7 @@ bool netfs_write_collection(struct netfs_io_request *wreq) { struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; + bool transferred_valid = false; int s; _enter("R=%x", wreq->debug_id); @@ -376,12 +378,16 @@ bool netfs_write_collection(struct netfs_io_request *wreq) continue; if (!list_empty(&stream->subrequests)) return false; - if (stream->transferred < transferred) + if (stream->transferred_valid && + stream->transferred < transferred) { transferred = stream->transferred; + transferred_valid = true; + } } /* Okay, declare that all I/O is complete. */ - wreq->transferred = transferred; + if (transferred_valid) + wreq->transferred = transferred; trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); if (wreq->io_streams[1].active && diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 50bee2c4130d..0584cba1a043 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -118,12 +118,12 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, wreq->io_streams[0].prepare_write = ictx->ops->prepare_write; wreq->io_streams[0].issue_write = ictx->ops->issue_write; wreq->io_streams[0].collected_to = start; - wreq->io_streams[0].transferred = LONG_MAX; + wreq->io_streams[0].transferred = 0; wreq->io_streams[1].stream_nr = 1; wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE; wreq->io_streams[1].collected_to = start; - wreq->io_streams[1].transferred = LONG_MAX; + wreq->io_streams[1].transferred = 0; if (fscache_resources_valid(&wreq->cache_resources)) { wreq->io_streams[1].avail = true; wreq->io_streams[1].active = true; diff --git a/fs/splice.c b/fs/splice.c index 4d6df083e0c0..f5094b6d00a0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -739,6 +739,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, sd.pos = kiocb.ki_pos; if (ret <= 0) break; + WARN_ONCE(ret > sd.total_len - left, + "Splice Exceeded! ret=%zd tot=%zu left=%zu\n", + ret, sd.total_len, left); sd.num_spliced += ret; sd.total_len -= ret; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 185bd8196503..98c96d649bf9 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -150,6 +150,7 @@ struct netfs_io_stream { bool active; /* T if stream is active */ bool need_retry; /* T if this stream needs retrying */ bool failed; /* T if this stream failed */ + bool transferred_valid; /* T is ->transferred is valid */ }; /* -- cgit v1.2.3 From 8ef7f3132e4005a103b382e71abea7ad01fbeb86 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: LoongArch: Add cpuhotplug hooks to fix high cpu usage of vCPU threads When the CPU is offline, the timer of LoongArch is not correctly closed. This is harmless for real machines, but resulting in an excessively high cpu usage rate of the offline vCPU thread in the virtual machines. To correctly close the timer, we have made the following modifications: Register the cpu hotplug event (CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING) for LoongArch. This event's hooks will be called to close the timer when the CPU is offline. Clear the timer interrupt when the timer is turned off. Since before the timer is turned off, there may be a timer interrupt that has already been in the pending state due to the interruption of the disabled, which also affects the halt state of the offline vCPU. Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/kernel/time.c | 22 ++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 367906b10f81..f3092f2de8b5 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -5,6 +5,7 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include #include @@ -102,6 +103,23 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev return 0; } +static int arch_timer_starting(unsigned int cpu) +{ + set_csr_ecfg(ECFGF_TIMER); + + return 0; +} + +static int arch_timer_dying(unsigned int cpu) +{ + constant_set_state_shutdown(this_cpu_ptr(&constant_clockevent_device)); + + /* Clear Timer Interrupt */ + write_csr_tintclear(CSR_TINTCLR_TI); + + return 0; +} + static unsigned long get_loops_per_jiffy(void) { unsigned long lpj = (unsigned long)const_clock_freq; @@ -172,6 +190,10 @@ int constant_clockevent_init(void) lpj_fine = get_loops_per_jiffy(); pr_info("Constant clock event device register\n"); + cpuhp_setup_state(CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING, + "clockevents/loongarch/timer:starting", + arch_timer_starting, arch_timer_dying); + return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index edfa61d80702..62cd7b35a29c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -168,6 +168,7 @@ enum cpuhp_state { CPUHP_AP_QCOM_TIMER_STARTING, CPUHP_AP_TEGRA_TIMER_STARTING, CPUHP_AP_ARMADA_TIMER_STARTING, + CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING, CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_REALTEK_TIMER_STARTING, -- cgit v1.2.3