From 9910159f06590c17df4fbddedaabb4c0201cc4cb Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 15 Dec 2025 14:17:23 +0100 Subject: iio: core: add separate lockdep class for info_exist_lock When one iio device is a consumer of another, it is possible that the ->info_exist_lock of both ends up being taken when reading the value of the consumer device. Since they currently belong to the same lockdep class (being initialized in a single location with mutex_init()), that results in a lockdep warning CPU0 ---- lock(&iio_dev_opaque->info_exist_lock); lock(&iio_dev_opaque->info_exist_lock); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by sensors/414: #0: c31fd6dc (&p->lock){+.+.}-{3:3}, at: seq_read_iter+0x44/0x4e4 #1: c4f5a1c4 (&of->mutex){+.+.}-{3:3}, at: kernfs_seq_start+0x1c/0xac #2: c2827548 (kn->active#34){.+.+}-{0:0}, at: kernfs_seq_start+0x30/0xac #3: c1dd2b68 (&iio_dev_opaque->info_exist_lock){+.+.}-{3:3}, at: iio_read_channel_processed_scale+0x24/0xd8 stack backtrace: CPU: 0 UID: 0 PID: 414 Comm: sensors Not tainted 6.17.11 #5 NONE Hardware name: Generic AM33XX (Flattened Device Tree) Call trace: unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x44/0x60 dump_stack_lvl from print_deadlock_bug+0x2b8/0x334 print_deadlock_bug from __lock_acquire+0x13a4/0x2ab0 __lock_acquire from lock_acquire+0xd0/0x2c0 lock_acquire from __mutex_lock+0xa0/0xe8c __mutex_lock from mutex_lock_nested+0x1c/0x24 mutex_lock_nested from iio_read_channel_raw+0x20/0x6c iio_read_channel_raw from rescale_read_raw+0x128/0x1c4 rescale_read_raw from iio_channel_read+0xe4/0xf4 iio_channel_read from iio_read_channel_processed_scale+0x6c/0xd8 iio_read_channel_processed_scale from iio_hwmon_read_val+0x68/0xbc iio_hwmon_read_val from dev_attr_show+0x18/0x48 dev_attr_show from sysfs_kf_seq_show+0x80/0x110 sysfs_kf_seq_show from seq_read_iter+0xdc/0x4e4 seq_read_iter from vfs_read+0x238/0x2e4 vfs_read from ksys_read+0x6c/0xec ksys_read from ret_fast_syscall+0x0/0x1c Just as the mlock_key already has its own lockdep class, add a lock_class_key for the info_exist mutex. Note that this has in theory been a problem since before IIO first left staging, but it only occurs when a chain of consumers is in use and that is not often done. Fixes: ac917a81117c ("staging:iio:core set the iio_dev.info pointer to null on unregister under lock.") Signed-off-by: Rasmus Villemoes Reviewed-by: Peter Rosin Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/iio-opaque.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 4247497f3f8b..b87841a355f8 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -14,6 +14,7 @@ * @mlock: lock used to prevent simultaneous device state changes * @mlock_key: lockdep class for iio_dev lock * @info_exist_lock: lock to prevent use during removal + * @info_exist_key: lockdep class for info_exist lock * @trig_readonly: mark the current trigger immutable * @event_interface: event chrdevs associated with interrupt lines * @attached_buffers: array of buffers statically attached by the driver @@ -47,6 +48,7 @@ struct iio_dev_opaque { struct mutex mlock; struct lock_class_key mlock_key; struct mutex info_exist_lock; + struct lock_class_key info_exist_key; bool trig_readonly; struct iio_event_interface *event_interface; struct iio_buffer **attached_buffers; -- cgit v1.2.3 From c644bce62b9c6b441143a03c910f986109c47001 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 8 Jan 2026 08:45:22 +0100 Subject: readdir: require opt-in for d_type flags Commit c31f91c6af96 ("fuse: don't allow signals to interrupt getdents copying") introduced the use of high bits in d_type as flags. However, overlayfs was not adapted to handle this change. In ovl_cache_entry_new(), the code checks if d_type == DT_CHR to determine if an entry might be a whiteout. When fuse is used as the lower layer and sets high bits in d_type, this comparison fails, causing whiteout files to not be recognized properly and resulting in incorrect overlayfs behavior. Fix this by requiring callers of iterate_dir() to opt-in for getting flag bits in d_type outside of S_DT_MASK. Fixes: c31f91c6af96 ("fuse: don't allow signals to interrupt getdents copying") Link: https://lore.kernel.org/all/20260107034551.439-1-luochunsheng@ustc.edu/ Link: https://github.com/containerd/stargz-snapshotter/issues/2214 Reported-by: Chunsheng Luo Reviewed-by: Chunsheng Luo Tested-by: Chunsheng Luo Signed-off-by: Amir Goldstein Link: https://patch.msgid.link/20260108074522.3400998-1-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/readdir.c | 3 +++ include/linux/fs.h | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/readdir.c b/fs/readdir.c index 7764b8638978..73707b6816e9 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -316,6 +316,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, struct getdents_callback buf = { .ctx.actor = filldir, .ctx.count = count, + .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR, .current_dir = dirent }; int error; @@ -400,6 +401,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, struct getdents_callback64 buf = { .ctx.actor = filldir64, .ctx.count = count, + .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR, .current_dir = dirent }; int error; @@ -569,6 +571,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, struct compat_getdents_callback buf = { .ctx.actor = compat_filldir, .ctx.count = count, + .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR, .current_dir = dirent, }; int error; diff --git a/include/linux/fs.h b/include/linux/fs.h index f5c9cf28c4dc..a01621fa636a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1855,6 +1855,8 @@ struct dir_context { * INT_MAX unlimited */ int count; + /* @actor supports these flags in d_type high bits */ + unsigned int dt_flags_mask; }; /* If OR-ed with d_type, pending signals are not checked */ @@ -3524,7 +3526,9 @@ static inline bool dir_emit(struct dir_context *ctx, const char *name, int namelen, u64 ino, unsigned type) { - return ctx->actor(ctx, name, namelen, ctx->pos, ino, type); + unsigned int dt_mask = S_DT_MASK | ctx->dt_flags_mask; + + return ctx->actor(ctx, name, namelen, ctx->pos, ino, type & dt_mask); } static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) { -- cgit v1.2.3 From a995fe1a3aa78b7d06cc1cc7b6b8436c5e93b07f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 7 Jan 2026 11:35:05 +0100 Subject: rust: driver: drop device private data post unbind Currently, the driver's device private data is allocated and initialized from driver core code called from bus abstractions after the driver's probe() callback returned the corresponding initializer. Similarly, the driver's device private data is dropped within the remove() callback of bus abstractions after calling the remove() callback of the corresponding driver. However, commit 6f61a2637abe ("rust: device: introduce Device::drvdata()") introduced an accessor for the driver's device private data for a Device, i.e. a device that is currently bound to a driver. Obviously, this is in conflict with dropping the driver's device private data in remove(), since a device can not be considered to be fully unbound after remove() has finished: We also have to consider registrations guarded by devres - such as IRQ or class device registrations - which are torn down after remove() in devres_release_all(). Thus, it can happen that, for instance, a class device or IRQ callback still calls Device::drvdata(), which then runs concurrently to remove() (which sets dev->driver_data to NULL and drops the driver's device private data), before devres_release_all() started to tear down the corresponding registration. This is because devres guarded registrations can, as expected, access the corresponding Device that defines their scope. In C it simply is the driver's responsibility to ensure that its device private data is freed after e.g. an IRQ registration is unregistered. Typically, C drivers achieve this by allocating their device private data with e.g. devm_kzalloc() before doing anything else, i.e. before e.g. registering an IRQ with devm_request_threaded_irq(), relying on the reverse order cleanup of devres. Technically, we could do something similar in Rust. However, the resulting code would be pretty messy: In Rust we have to differentiate between allocated but uninitialized memory and initialized memory in the type system. Thus, we would need to somehow keep track of whether the driver's device private data object has been initialized (i.e. probe() was successful and returned a valid initializer for this memory) and conditionally call the destructor of the corresponding object when it is freed. This is because we'd need to allocate and register the memory of the driver's device private data *before* it is initialized by the initializer returned by the driver's probe() callback, because the driver could already register devres guarded registrations within probe() outside of the driver's device private data initializer. Luckily there is a much simpler solution: Instead of dropping the driver's device private data at the end of remove(), we just drop it after the device has been fully unbound, i.e. after all devres callbacks have been processed. For this, we introduce a new post_unbind() callback private to the driver-core, i.e. the callback is neither exposed to drivers, nor to bus abstractions. This way, the driver-core code can simply continue to conditionally allocate the memory for the driver's device private data when the driver's initializer is returned from probe() - no change needed - and drop it when the driver-core code receives the post_unbind() callback. Closes: https://lore.kernel.org/all/DEZMS6Y4A7XE.XE7EUBT5SJFJ@kernel.org/ Fixes: 6f61a2637abe ("rust: device: introduce Device::drvdata()") Acked-by: Alice Ryhl Acked-by: Greg Kroah-Hartman Acked-by: Igor Korotin Link: https://patch.msgid.link/20260107103511.570525-7-dakr@kernel.org [ Remove #ifdef CONFIG_RUST, rename post_unbind() to post_unbind_rust(). - Danilo] Signed-off-by: Danilo Krummrich --- drivers/base/dd.c | 2 ++ include/linux/device/driver.h | 9 +++++++++ rust/kernel/auxiliary.rs | 4 ++-- rust/kernel/device.rs | 20 +++++++++++--------- rust/kernel/driver.rs | 36 +++++++++++++++++++++++++++++++++++- rust/kernel/i2c.rs | 4 ++-- rust/kernel/pci.rs | 4 ++-- rust/kernel/platform.rs | 4 ++-- rust/kernel/usb.rs | 4 ++-- 9 files changed, 67 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 349f31bedfa1..bea8da5f8a3a 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -548,6 +548,8 @@ static DEVICE_ATTR_RW(state_synced); static void device_unbind_cleanup(struct device *dev) { devres_release_all(dev); + if (dev->driver->p_cb.post_unbind_rust) + dev->driver->p_cb.post_unbind_rust(dev); arch_teardown_dma_ops(dev); kfree(dev->dma_range_map); dev->dma_range_map = NULL; diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index cd8e0f0a634b..bbc67ec513ed 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -85,6 +85,8 @@ enum probe_type { * uevent. * @p: Driver core's private data, no one other than the driver * core can touch this. + * @p_cb: Callbacks private to the driver core; no one other than the + * driver core is allowed to touch this. * * The device driver-model tracks all of the drivers known to the system. * The main reason for this tracking is to enable the driver core to match @@ -119,6 +121,13 @@ struct device_driver { void (*coredump) (struct device *dev); struct driver_private *p; + struct { + /* + * Called after remove() and after all devres entries have been + * processed. This is a Rust only callback. + */ + void (*post_unbind_rust)(struct device *dev); + } p_cb; }; diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs index 17574aa5066f..be76f11aecb7 100644 --- a/rust/kernel/auxiliary.rs +++ b/rust/kernel/auxiliary.rs @@ -96,9 +96,9 @@ impl Adapter { // SAFETY: `remove_callback` is only ever called after a successful call to // `probe_callback`, hence it's guaranteed that `Device::set_drvdata()` has been called // and stored a `Pin>`. - let data = unsafe { adev.as_ref().drvdata_obtain::() }; + let data = unsafe { adev.as_ref().drvdata_borrow::() }; - T::unbind(adev, data.as_ref()); + T::unbind(adev, data); } } diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 71b200df0f40..031720bf5d8c 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -232,30 +232,32 @@ impl Device { /// /// # Safety /// - /// - Must only be called once after a preceding call to [`Device::set_drvdata`]. /// - The type `T` must match the type of the `ForeignOwnable` previously stored by /// [`Device::set_drvdata`]. - pub unsafe fn drvdata_obtain(&self) -> Pin> { + pub(crate) unsafe fn drvdata_obtain(&self) -> Option>> { // SAFETY: By the type invariants, `self.as_raw()` is a valid pointer to a `struct device`. let ptr = unsafe { bindings::dev_get_drvdata(self.as_raw()) }; // SAFETY: By the type invariants, `self.as_raw()` is a valid pointer to a `struct device`. unsafe { bindings::dev_set_drvdata(self.as_raw(), core::ptr::null_mut()) }; + if ptr.is_null() { + return None; + } + // SAFETY: - // - By the safety requirements of this function, `ptr` comes from a previous call to - // `into_foreign()`. + // - If `ptr` is not NULL, it comes from a previous call to `into_foreign()`. // - `dev_get_drvdata()` guarantees to return the same pointer given to `dev_set_drvdata()` // in `into_foreign()`. - unsafe { Pin::>::from_foreign(ptr.cast()) } + Some(unsafe { Pin::>::from_foreign(ptr.cast()) }) } /// Borrow the driver's private data bound to this [`Device`]. /// /// # Safety /// - /// - Must only be called after a preceding call to [`Device::set_drvdata`] and before - /// [`Device::drvdata_obtain`]. + /// - Must only be called after a preceding call to [`Device::set_drvdata`] and before the + /// device is fully unbound. /// - The type `T` must match the type of the `ForeignOwnable` previously stored by /// [`Device::set_drvdata`]. pub unsafe fn drvdata_borrow(&self) -> Pin<&T> { @@ -271,7 +273,7 @@ impl Device { /// # Safety /// /// - Must only be called after a preceding call to [`Device::set_drvdata`] and before - /// [`Device::drvdata_obtain`]. + /// the device is fully unbound. /// - The type `T` must match the type of the `ForeignOwnable` previously stored by /// [`Device::set_drvdata`]. unsafe fn drvdata_unchecked(&self) -> Pin<&T> { @@ -320,7 +322,7 @@ impl Device { // SAFETY: // - The above check of `dev_get_drvdata()` guarantees that we are called after - // `set_drvdata()` and before `drvdata_obtain()`. + // `set_drvdata()`. // - We've just checked that the type of the driver's private data is in fact `T`. Ok(unsafe { self.drvdata_unchecked() }) } diff --git a/rust/kernel/driver.rs b/rust/kernel/driver.rs index ba1ca1f7a7e2..bee3ae21a27b 100644 --- a/rust/kernel/driver.rs +++ b/rust/kernel/driver.rs @@ -177,7 +177,39 @@ unsafe impl Sync for Registration {} // any thread, so `Registration` is `Send`. unsafe impl Send for Registration {} -impl Registration { +impl Registration { + extern "C" fn post_unbind_callback(dev: *mut bindings::device) { + // SAFETY: The driver core only ever calls the post unbind callback with a valid pointer to + // a `struct device`. + // + // INVARIANT: `dev` is valid for the duration of the `post_unbind_callback()`. + let dev = unsafe { &*dev.cast::>() }; + + // `remove()` and all devres callbacks have been completed at this point, hence drop the + // driver's device private data. + // + // SAFETY: By the safety requirements of the `Driver` trait, `T::DriverData` is the + // driver's device private data type. + drop(unsafe { dev.drvdata_obtain::() }); + } + + /// Attach generic `struct device_driver` callbacks. + fn callbacks_attach(drv: &Opaque) { + let ptr = drv.get().cast::(); + + // SAFETY: + // - `drv.get()` yields a valid pointer to `Self::DriverType`. + // - Adding `DEVICE_DRIVER_OFFSET` yields the address of the embedded `struct device_driver` + // as guaranteed by the safety requirements of the `Driver` trait. + let base = unsafe { ptr.add(T::DEVICE_DRIVER_OFFSET) }; + + // CAST: `base` points to the offset of the embedded `struct device_driver`. + let base = base.cast::(); + + // SAFETY: It is safe to set the fields of `struct device_driver` on initialization. + unsafe { (*base).p_cb.post_unbind_rust = Some(Self::post_unbind_callback) }; + } + /// Creates a new instance of the registration object. pub fn new(name: &'static CStr, module: &'static ThisModule) -> impl PinInit { try_pin_init!(Self { @@ -189,6 +221,8 @@ impl Registration { // just been initialised above, so it's also valid for read. let drv = unsafe { &*(ptr as *const Opaque) }; + Self::callbacks_attach(drv); + // SAFETY: `drv` is guaranteed to be pinned until `T::unregister`. unsafe { T::register(drv, name, module) } }), diff --git a/rust/kernel/i2c.rs b/rust/kernel/i2c.rs index e86242227081..39b0a9a207fd 100644 --- a/rust/kernel/i2c.rs +++ b/rust/kernel/i2c.rs @@ -178,9 +178,9 @@ impl Adapter { // SAFETY: `remove_callback` is only ever called after a successful call to // `probe_callback`, hence it's guaranteed that `I2cClient::set_drvdata()` has been called // and stored a `Pin>`. - let data = unsafe { idev.as_ref().drvdata_obtain::() }; + let data = unsafe { idev.as_ref().drvdata_borrow::() }; - T::unbind(idev, data.as_ref()); + T::unbind(idev, data); } extern "C" fn shutdown_callback(idev: *mut bindings::i2c_client) { diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 590723dcb5ae..bea76ca9c3da 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -123,9 +123,9 @@ impl Adapter { // SAFETY: `remove_callback` is only ever called after a successful call to // `probe_callback`, hence it's guaranteed that `Device::set_drvdata()` has been called // and stored a `Pin>`. - let data = unsafe { pdev.as_ref().drvdata_obtain::() }; + let data = unsafe { pdev.as_ref().drvdata_borrow::() }; - T::unbind(pdev, data.as_ref()); + T::unbind(pdev, data); } } diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index b8a681df9ddc..35a5813ffb33 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -101,9 +101,9 @@ impl Adapter { // SAFETY: `remove_callback` is only ever called after a successful call to // `probe_callback`, hence it's guaranteed that `Device::set_drvdata()` has been called // and stored a `Pin>`. - let data = unsafe { pdev.as_ref().drvdata_obtain::() }; + let data = unsafe { pdev.as_ref().drvdata_borrow::() }; - T::unbind(pdev, data.as_ref()); + T::unbind(pdev, data); } } diff --git a/rust/kernel/usb.rs b/rust/kernel/usb.rs index 4cf4bb1705b5..67ce5c85c619 100644 --- a/rust/kernel/usb.rs +++ b/rust/kernel/usb.rs @@ -103,9 +103,9 @@ impl Adapter { // SAFETY: `disconnect_callback` is only ever called after a successful call to // `probe_callback`, hence it's guaranteed that `Device::set_drvdata()` has been called // and stored a `Pin>`. - let data = unsafe { dev.drvdata_obtain::() }; + let data = unsafe { dev.drvdata_borrow::() }; - T::disconnect(intf, data.as_ref()); + T::disconnect(intf, data); } } -- cgit v1.2.3