summaryrefslogtreecommitdiff
path: root/rust/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'rust/kernel')
-rw-r--r--rust/kernel/acpi.rs7
-rw-r--r--rust/kernel/alloc.rs15
-rw-r--r--rust/kernel/alloc/allocator.rs8
-rw-r--r--rust/kernel/alloc/allocator_test.rs124
-rw-r--r--rust/kernel/alloc/kbox.rs92
-rw-r--r--rust/kernel/alloc/kvec.rs24
-rw-r--r--rust/kernel/alloc/kvec/errors.rs2
-rw-r--r--rust/kernel/alloc/layout.rs2
-rw-r--r--rust/kernel/auxiliary.rs8
-rw-r--r--rust/kernel/bitmap.rs600
-rw-r--r--rust/kernel/block.rs13
-rw-r--r--rust/kernel/block/mq.rs14
-rw-r--r--rust/kernel/block/mq/gen_disk.rs60
-rw-r--r--rust/kernel/block/mq/operations.rs72
-rw-r--r--rust/kernel/block/mq/raw_writer.rs55
-rw-r--r--rust/kernel/block/mq/request.rs94
-rw-r--r--rust/kernel/configfs.rs6
-rw-r--r--rust/kernel/cpu.rs1
-rw-r--r--rust/kernel/cpufreq.rs8
-rw-r--r--rust/kernel/cpumask.rs5
-rw-r--r--rust/kernel/cred.rs6
-rw-r--r--rust/kernel/debugfs.rs594
-rw-r--r--rust/kernel/debugfs/callback_adapters.rs122
-rw-r--r--rust/kernel/debugfs/entry.rs164
-rw-r--r--rust/kernel/debugfs/file_ops.rs247
-rw-r--r--rust/kernel/debugfs/traits.rs102
-rw-r--r--rust/kernel/device.rs13
-rw-r--r--rust/kernel/device/property.rs23
-rw-r--r--rust/kernel/devres.rs4
-rw-r--r--rust/kernel/dma.rs2
-rw-r--r--rust/kernel/drm/device.rs6
-rw-r--r--rust/kernel/drm/driver.rs2
-rw-r--r--rust/kernel/drm/file.rs2
-rw-r--r--rust/kernel/drm/gem/mod.rs2
-rw-r--r--rust/kernel/drm/ioctl.rs2
-rw-r--r--rust/kernel/error.rs66
-rw-r--r--rust/kernel/firmware.rs2
-rw-r--r--rust/kernel/fs/file.rs15
-rw-r--r--rust/kernel/id_pool.rs226
-rw-r--r--rust/kernel/io.rs1
-rw-r--r--rust/kernel/io/poll.rs104
-rw-r--r--rust/kernel/irq.rs24
-rw-r--r--rust/kernel/irq/flags.rs124
-rw-r--r--rust/kernel/irq/request.rs507
-rw-r--r--rust/kernel/kunit.rs25
-rw-r--r--rust/kernel/lib.rs10
-rw-r--r--rust/kernel/list.rs120
-rw-r--r--rust/kernel/miscdevice.rs2
-rw-r--r--rust/kernel/net/phy.rs2
-rw-r--r--rust/kernel/of.rs2
-rw-r--r--rust/kernel/opp.rs29
-rw-r--r--rust/kernel/pci.rs180
-rw-r--r--rust/kernel/pci/id.rs578
-rw-r--r--rust/kernel/pid_namespace.rs5
-rw-r--r--rust/kernel/platform.rs178
-rw-r--r--rust/kernel/prelude.rs5
-rw-r--r--rust/kernel/processor.rs14
-rw-r--r--rust/kernel/ptr.rs228
-rw-r--r--rust/kernel/regulator.rs171
-rw-r--r--rust/kernel/seq_file.rs6
-rw-r--r--rust/kernel/str.rs162
-rw-r--r--rust/kernel/sync.rs4
-rw-r--r--rust/kernel/sync/arc.rs63
-rw-r--r--rust/kernel/sync/aref.rs17
-rw-r--r--rust/kernel/sync/atomic.rs551
-rw-r--r--rust/kernel/sync/atomic/internal.rs265
-rw-r--r--rust/kernel/sync/atomic/ordering.rs104
-rw-r--r--rust/kernel/sync/atomic/predefine.rs169
-rw-r--r--rust/kernel/sync/barrier.rs61
-rw-r--r--rust/kernel/sync/refcount.rs113
-rw-r--r--rust/kernel/task.rs7
-rw-r--r--rust/kernel/time.rs163
-rw-r--r--rust/kernel/time/hrtimer.rs152
-rw-r--r--rust/kernel/time/hrtimer/arc.rs9
-rw-r--r--rust/kernel/time/hrtimer/pin.rs9
-rw-r--r--rust/kernel/time/hrtimer/pin_mut.rs12
-rw-r--r--rust/kernel/time/hrtimer/tbox.rs9
77 files changed, 6424 insertions, 571 deletions
diff --git a/rust/kernel/acpi.rs b/rust/kernel/acpi.rs
index 7ae317368b00..37e1161c1298 100644
--- a/rust/kernel/acpi.rs
+++ b/rust/kernel/acpi.rs
@@ -37,11 +37,8 @@ impl DeviceId {
/// Create a new device id from an ACPI 'id' string.
#[inline(always)]
pub const fn new(id: &'static CStr) -> Self {
- build_assert!(
- id.len_with_nul() <= Self::ACPI_ID_LEN,
- "ID exceeds 16 bytes"
- );
- let src = id.as_bytes_with_nul();
+ let src = id.to_bytes_with_nul();
+ build_assert!(src.len() <= Self::ACPI_ID_LEN, "ID exceeds 16 bytes");
// Replace with `bindings::acpi_device_id::default()` once stabilized for `const`.
// SAFETY: FFI type is valid to be zero-initialized.
let mut acpi: bindings::acpi_device_id = unsafe { core::mem::zeroed() };
diff --git a/rust/kernel/alloc.rs b/rust/kernel/alloc.rs
index a2c49e5494d3..9c154209423c 100644
--- a/rust/kernel/alloc.rs
+++ b/rust/kernel/alloc.rs
@@ -2,18 +2,11 @@
//! Implementation of the kernel's memory allocation infrastructure.
-#[cfg(not(any(test, testlib)))]
pub mod allocator;
pub mod kbox;
pub mod kvec;
pub mod layout;
-#[cfg(any(test, testlib))]
-pub mod allocator_test;
-
-#[cfg(any(test, testlib))]
-pub use self::allocator_test as allocator;
-
pub use self::kbox::Box;
pub use self::kbox::KBox;
pub use self::kbox::KVBox;
@@ -137,6 +130,14 @@ pub mod flags {
/// - Implementers must ensure that all trait functions abide by the guarantees documented in the
/// `# Guarantees` sections.
pub unsafe trait Allocator {
+ /// The minimum alignment satisfied by all allocations from this allocator.
+ ///
+ /// # Guarantees
+ ///
+ /// Any pointer allocated by this allocator is guaranteed to be aligned to `MIN_ALIGN` even if
+ /// the requested layout has a smaller alignment.
+ const MIN_ALIGN: usize;
+
/// Allocate memory based on `layout` and `flags`.
///
/// On success, returns a buffer represented as `NonNull<[u8]>` that satisfies the layout
diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs
index 2692cf90c948..869d9fd69527 100644
--- a/rust/kernel/alloc/allocator.rs
+++ b/rust/kernel/alloc/allocator.rs
@@ -17,6 +17,8 @@ use crate::alloc::{AllocError, Allocator};
use crate::bindings;
use crate::pr_warn;
+const ARCH_KMALLOC_MINALIGN: usize = bindings::ARCH_KMALLOC_MINALIGN;
+
/// The contiguous kernel allocator.
///
/// `Kmalloc` is typically used for physically contiguous allocations up to page size, but also
@@ -128,6 +130,8 @@ impl Kmalloc {
// - passing a pointer to a valid memory allocation is OK,
// - `realloc` satisfies the guarantees, since `ReallocFunc::call` has the same.
unsafe impl Allocator for Kmalloc {
+ const MIN_ALIGN: usize = ARCH_KMALLOC_MINALIGN;
+
#[inline]
unsafe fn realloc(
ptr: Option<NonNull<u8>>,
@@ -147,6 +151,8 @@ unsafe impl Allocator for Kmalloc {
// - passing a pointer to a valid memory allocation is OK,
// - `realloc` satisfies the guarantees, since `ReallocFunc::call` has the same.
unsafe impl Allocator for Vmalloc {
+ const MIN_ALIGN: usize = kernel::page::PAGE_SIZE;
+
#[inline]
unsafe fn realloc(
ptr: Option<NonNull<u8>>,
@@ -171,6 +177,8 @@ unsafe impl Allocator for Vmalloc {
// - passing a pointer to a valid memory allocation is OK,
// - `realloc` satisfies the guarantees, since `ReallocFunc::call` has the same.
unsafe impl Allocator for KVmalloc {
+ const MIN_ALIGN: usize = ARCH_KMALLOC_MINALIGN;
+
#[inline]
unsafe fn realloc(
ptr: Option<NonNull<u8>>,
diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs
deleted file mode 100644
index 90dd987d40e4..000000000000
--- a/rust/kernel/alloc/allocator_test.rs
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-//! So far the kernel's `Box` and `Vec` types can't be used by userspace test cases, since all users
-//! of those types (e.g. `CString`) use kernel allocators for instantiation.
-//!
-//! In order to allow userspace test cases to make use of such types as well, implement the
-//! `Cmalloc` allocator within the `allocator_test` module and type alias all kernel allocators to
-//! `Cmalloc`. The `Cmalloc` allocator uses libc's `realloc()` function as allocator backend.
-
-#![allow(missing_docs)]
-
-use super::{flags::*, AllocError, Allocator, Flags};
-use core::alloc::Layout;
-use core::cmp;
-use core::ptr;
-use core::ptr::NonNull;
-
-/// The userspace allocator based on libc.
-pub struct Cmalloc;
-
-pub type Kmalloc = Cmalloc;
-pub type Vmalloc = Kmalloc;
-pub type KVmalloc = Kmalloc;
-
-impl Cmalloc {
- /// Returns a [`Layout`] that makes [`Kmalloc`] fulfill the requested size and alignment of
- /// `layout`.
- pub fn aligned_layout(layout: Layout) -> Layout {
- // Note that `layout.size()` (after padding) is guaranteed to be a multiple of
- // `layout.align()` which together with the slab guarantees means that `Kmalloc` will return
- // a properly aligned object (see comments in `kmalloc()` for more information).
- layout.pad_to_align()
- }
-}
-
-extern "C" {
- #[link_name = "aligned_alloc"]
- fn libc_aligned_alloc(align: usize, size: usize) -> *mut crate::ffi::c_void;
-
- #[link_name = "free"]
- fn libc_free(ptr: *mut crate::ffi::c_void);
-}
-
-// SAFETY:
-// - memory remains valid until it is explicitly freed,
-// - passing a pointer to a valid memory allocation created by this `Allocator` is always OK,
-// - `realloc` provides the guarantees as provided in the `# Guarantees` section.
-unsafe impl Allocator for Cmalloc {
- unsafe fn realloc(
- ptr: Option<NonNull<u8>>,
- layout: Layout,
- old_layout: Layout,
- flags: Flags,
- ) -> Result<NonNull<[u8]>, AllocError> {
- let src = match ptr {
- Some(src) => {
- if old_layout.size() == 0 {
- ptr::null_mut()
- } else {
- src.as_ptr()
- }
- }
- None => ptr::null_mut(),
- };
-
- if layout.size() == 0 {
- // SAFETY: `src` is either NULL or was previously allocated with this `Allocator`
- unsafe { libc_free(src.cast()) };
-
- return Ok(NonNull::slice_from_raw_parts(
- crate::alloc::dangling_from_layout(layout),
- 0,
- ));
- }
-
- // ISO C (ISO/IEC 9899:2011) defines `aligned_alloc`:
- //
- // > The value of alignment shall be a valid alignment supported by the implementation
- // [...].
- //
- // As an example of the "supported by the implementation" requirement, POSIX.1-2001 (IEEE
- // 1003.1-2001) defines `posix_memalign`:
- //
- // > The value of alignment shall be a power of two multiple of sizeof (void *).
- //
- // and POSIX-based implementations of `aligned_alloc` inherit this requirement. At the time
- // of writing, this is known to be the case on macOS (but not in glibc).
- //
- // Satisfy the stricter requirement to avoid spurious test failures on some platforms.
- let min_align = core::mem::size_of::<*const crate::ffi::c_void>();
- let layout = layout.align_to(min_align).map_err(|_| AllocError)?;
- let layout = layout.pad_to_align();
-
- // SAFETY: Returns either NULL or a pointer to a memory allocation that satisfies or
- // exceeds the given size and alignment requirements.
- let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) }.cast::<u8>();
- let dst = NonNull::new(dst).ok_or(AllocError)?;
-
- if flags.contains(__GFP_ZERO) {
- // SAFETY: The preceding calls to `libc_aligned_alloc` and `NonNull::new`
- // guarantee that `dst` points to memory of at least `layout.size()` bytes.
- unsafe { dst.as_ptr().write_bytes(0, layout.size()) };
- }
-
- if !src.is_null() {
- // SAFETY:
- // - `src` has previously been allocated with this `Allocator`; `dst` has just been
- // newly allocated, hence the memory regions do not overlap.
- // - both` src` and `dst` are properly aligned and valid for reads and writes
- unsafe {
- ptr::copy_nonoverlapping(
- src,
- dst.as_ptr(),
- cmp::min(layout.size(), old_layout.size()),
- )
- };
- }
-
- // SAFETY: `src` is either NULL or was previously allocated with this `Allocator`
- unsafe { libc_free(src.cast()) };
-
- Ok(NonNull::slice_from_raw_parts(dst, layout.size()))
- }
-}
diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs
index 856d05aa60f1..27c4b5a9b61d 100644
--- a/rust/kernel/alloc/kbox.rs
+++ b/rust/kernel/alloc/kbox.rs
@@ -7,7 +7,6 @@ use super::allocator::{KVmalloc, Kmalloc, Vmalloc};
use super::{AllocError, Allocator, Flags};
use core::alloc::Layout;
use core::borrow::{Borrow, BorrowMut};
-use core::fmt;
use core::marker::PhantomData;
use core::mem::ManuallyDrop;
use core::mem::MaybeUninit;
@@ -17,6 +16,7 @@ use core::ptr::NonNull;
use core::result::Result;
use crate::ffi::c_void;
+use crate::fmt;
use crate::init::InPlaceInit;
use crate::types::ForeignOwnable;
use pin_init::{InPlaceWrite, Init, PinInit, ZeroableOption};
@@ -290,6 +290,83 @@ where
Ok(Self::new(x, flags)?.into())
}
+ /// Construct a pinned slice of elements `Pin<Box<[T], A>>`.
+ ///
+ /// This is a convenient means for creation of e.g. slices of structrures containing spinlocks
+ /// or mutexes.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::sync::{new_spinlock, SpinLock};
+ ///
+ /// struct Inner {
+ /// a: u32,
+ /// b: u32,
+ /// }
+ ///
+ /// #[pin_data]
+ /// struct Example {
+ /// c: u32,
+ /// #[pin]
+ /// d: SpinLock<Inner>,
+ /// }
+ ///
+ /// impl Example {
+ /// fn new() -> impl PinInit<Self, Error> {
+ /// try_pin_init!(Self {
+ /// c: 10,
+ /// d <- new_spinlock!(Inner { a: 20, b: 30 }),
+ /// })
+ /// }
+ /// }
+ ///
+ /// // Allocate a boxed slice of 10 `Example`s.
+ /// let s = KBox::pin_slice(
+ /// | _i | Example::new(),
+ /// 10,
+ /// GFP_KERNEL
+ /// )?;
+ ///
+ /// assert_eq!(s[5].c, 10);
+ /// assert_eq!(s[3].d.lock().a, 20);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn pin_slice<Func, Item, E>(
+ mut init: Func,
+ len: usize,
+ flags: Flags,
+ ) -> Result<Pin<Box<[T], A>>, E>
+ where
+ Func: FnMut(usize) -> Item,
+ Item: PinInit<T, E>,
+ E: From<AllocError>,
+ {
+ let mut buffer = super::Vec::<T, A>::with_capacity(len, flags)?;
+ for i in 0..len {
+ let ptr = buffer.spare_capacity_mut().as_mut_ptr().cast();
+ // SAFETY:
+ // - `ptr` is a valid pointer to uninitialized memory.
+ // - `ptr` is not used if an error is returned.
+ // - `ptr` won't be moved until it is dropped, i.e. it is pinned.
+ unsafe { init(i).__pinned_init(ptr)? };
+
+ // SAFETY:
+ // - `i + 1 <= len`, hence we don't exceed the capacity, due to the call to
+ // `with_capacity()` above.
+ // - The new value at index buffer.len() + 1 is the only element being added here, and
+ // it has been initialized above by `init(i).__pinned_init(ptr)`.
+ unsafe { buffer.inc_len(1) };
+ }
+
+ let (ptr, _, _) = buffer.into_raw_parts();
+ let slice = core::ptr::slice_from_raw_parts_mut(ptr, len);
+
+ // SAFETY: `slice` points to an allocation allocated with `A` (`buffer`) and holds a valid
+ // `[T]`.
+ Ok(Pin::from(unsafe { Box::from_raw(slice) }))
+ }
+
/// Convert a [`Box<T,A>`] to a [`Pin<Box<T,A>>`]. If `T` does not implement
/// [`Unpin`], then `x` will be pinned in memory and can't be moved.
pub fn into_pin(this: Self) -> Pin<Self> {
@@ -401,12 +478,17 @@ where
}
// SAFETY: The pointer returned by `into_foreign` comes from a well aligned
-// pointer to `T`.
+// pointer to `T` allocated by `A`.
unsafe impl<T: 'static, A> ForeignOwnable for Box<T, A>
where
A: Allocator,
{
- const FOREIGN_ALIGN: usize = core::mem::align_of::<T>();
+ const FOREIGN_ALIGN: usize = if core::mem::align_of::<T>() < A::MIN_ALIGN {
+ A::MIN_ALIGN
+ } else {
+ core::mem::align_of::<T>()
+ };
+
type Borrowed<'a> = &'a T;
type BorrowedMut<'a> = &'a mut T;
@@ -435,12 +517,12 @@ where
}
// SAFETY: The pointer returned by `into_foreign` comes from a well aligned
-// pointer to `T`.
+// pointer to `T` allocated by `A`.
unsafe impl<T: 'static, A> ForeignOwnable for Pin<Box<T, A>>
where
A: Allocator,
{
- const FOREIGN_ALIGN: usize = core::mem::align_of::<T>();
+ const FOREIGN_ALIGN: usize = <Box<T, A> as ForeignOwnable>::FOREIGN_ALIGN;
type Borrowed<'a> = Pin<&'a T>;
type BorrowedMut<'a> = Pin<&'a mut T>;
diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs
index 3c72e0bdddb8..dfc101e03f35 100644
--- a/rust/kernel/alloc/kvec.rs
+++ b/rust/kernel/alloc/kvec.rs
@@ -7,9 +7,9 @@ use super::{
layout::ArrayLayout,
AllocError, Allocator, Box, Flags,
};
+use crate::fmt;
use core::{
borrow::{Borrow, BorrowMut},
- fmt,
marker::PhantomData,
mem::{ManuallyDrop, MaybeUninit},
ops::Deref,
@@ -175,7 +175,7 @@ where
/// Returns the number of elements that can be stored within the vector without allocating
/// additional memory.
- pub fn capacity(&self) -> usize {
+ pub const fn capacity(&self) -> usize {
if const { Self::is_zst() } {
usize::MAX
} else {
@@ -185,7 +185,7 @@ where
/// Returns the number of elements stored within the vector.
#[inline]
- pub fn len(&self) -> usize {
+ pub const fn len(&self) -> usize {
self.len
}
@@ -196,7 +196,7 @@ where
/// - `additional` must be less than or equal to `self.capacity - self.len`.
/// - All elements within the interval [`self.len`,`self.len + additional`) must be initialized.
#[inline]
- pub unsafe fn inc_len(&mut self, additional: usize) {
+ pub const unsafe fn inc_len(&mut self, additional: usize) {
// Guaranteed by the type invariant to never underflow.
debug_assert!(additional <= self.capacity() - self.len());
// INVARIANT: By the safety requirements of this method this represents the exact number of
@@ -224,6 +224,16 @@ where
}
/// Returns a slice of the entire vector.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = KVec::new();
+ /// v.push(1, GFP_KERNEL)?;
+ /// v.push(2, GFP_KERNEL)?;
+ /// assert_eq!(v.as_slice(), &[1, 2]);
+ /// # Ok::<(), Error>(())
+ /// ```
#[inline]
pub fn as_slice(&self) -> &[T] {
self
@@ -245,7 +255,7 @@ where
/// Returns a raw pointer to the vector's backing buffer, or, if `T` is a ZST, a dangling raw
/// pointer.
#[inline]
- pub fn as_ptr(&self) -> *const T {
+ pub const fn as_ptr(&self) -> *const T {
self.ptr.as_ptr()
}
@@ -261,7 +271,7 @@ where
/// assert!(!v.is_empty());
/// ```
#[inline]
- pub fn is_empty(&self) -> bool {
+ pub const fn is_empty(&self) -> bool {
self.len() == 0
}
@@ -1294,7 +1304,7 @@ impl<'vec, T> Drop for DrainAll<'vec, T> {
}
}
-#[macros::kunit_tests(rust_kvec_kunit)]
+#[macros::kunit_tests(rust_kvec)]
mod tests {
use super::*;
use crate::prelude::*;
diff --git a/rust/kernel/alloc/kvec/errors.rs b/rust/kernel/alloc/kvec/errors.rs
index 348b8d27e102..21a920a4b09b 100644
--- a/rust/kernel/alloc/kvec/errors.rs
+++ b/rust/kernel/alloc/kvec/errors.rs
@@ -2,7 +2,7 @@
//! Errors for the [`Vec`] type.
-use core::fmt::{self, Debug, Formatter};
+use kernel::fmt::{self, Debug, Formatter};
use kernel::prelude::*;
/// Error type for [`Vec::push_within_capacity`].
diff --git a/rust/kernel/alloc/layout.rs b/rust/kernel/alloc/layout.rs
index 93ed514f7cc7..52cbf61c4539 100644
--- a/rust/kernel/alloc/layout.rs
+++ b/rust/kernel/alloc/layout.rs
@@ -80,7 +80,7 @@ impl<T> ArrayLayout<T> {
/// # Safety
///
/// `len` must be a value, for which `len * size_of::<T>() <= isize::MAX` is true.
- pub unsafe fn new_unchecked(len: usize) -> Self {
+ pub const unsafe fn new_unchecked(len: usize) -> Self {
// INVARIANT: By the safety requirements of this function
// `len * size_of::<T>() <= isize::MAX`.
Self {
diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs
index 4749fb6bffef..e11848bbf206 100644
--- a/rust/kernel/auxiliary.rs
+++ b/rust/kernel/auxiliary.rs
@@ -55,7 +55,7 @@ impl<T: Driver + 'static> Adapter<T> {
extern "C" fn probe_callback(
adev: *mut bindings::auxiliary_device,
id: *const bindings::auxiliary_device_id,
- ) -> kernel::ffi::c_int {
+ ) -> c_int {
// SAFETY: The auxiliary bus only ever calls the probe callback with a valid pointer to a
// `struct auxiliary_device`.
//
@@ -105,8 +105,8 @@ pub struct DeviceId(bindings::auxiliary_device_id);
impl DeviceId {
/// Create a new [`DeviceId`] from name.
pub const fn new(modname: &'static CStr, name: &'static CStr) -> Self {
- let name = name.as_bytes_with_nul();
- let modname = modname.as_bytes_with_nul();
+ let name = name.to_bytes_with_nul();
+ let modname = modname.to_bytes_with_nul();
// TODO: Replace with `bindings::auxiliary_device_id::default()` once stabilized for
// `const`.
@@ -245,7 +245,7 @@ kernel::impl_device_context_deref!(unsafe { Device });
kernel::impl_device_context_into_aref!(Device);
// SAFETY: Instances of `Device` are always reference-counted.
-unsafe impl crate::types::AlwaysRefCounted for Device {
+unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
unsafe { bindings::get_device(self.as_ref().as_raw()) };
diff --git a/rust/kernel/bitmap.rs b/rust/kernel/bitmap.rs
new file mode 100644
index 000000000000..f45915694454
--- /dev/null
+++ b/rust/kernel/bitmap.rs
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! Rust API for bitmap.
+//!
+//! C headers: [`include/linux/bitmap.h`](srctree/include/linux/bitmap.h).
+
+use crate::alloc::{AllocError, Flags};
+use crate::bindings;
+#[cfg(not(CONFIG_RUST_BITMAP_HARDENED))]
+use crate::pr_err;
+use core::ptr::NonNull;
+
+const BITS_PER_LONG: usize = bindings::BITS_PER_LONG as usize;
+
+/// Represents a C bitmap. Wraps underlying C bitmap API.
+///
+/// # Invariants
+///
+/// Must reference a `[c_ulong]` long enough to fit `data.len()` bits.
+#[cfg_attr(CONFIG_64BIT, repr(align(8)))]
+#[cfg_attr(not(CONFIG_64BIT), repr(align(4)))]
+pub struct Bitmap {
+ data: [()],
+}
+
+impl Bitmap {
+ /// Borrows a C bitmap.
+ ///
+ /// # Safety
+ ///
+ /// * `ptr` holds a non-null address of an initialized array of `unsigned long`
+ /// that is large enough to hold `nbits` bits.
+ /// * the array must not be freed for the lifetime of this [`Bitmap`]
+ /// * concurrent access only happens through atomic operations
+ pub unsafe fn from_raw<'a>(ptr: *const usize, nbits: usize) -> &'a Bitmap {
+ let data: *const [()] = core::ptr::slice_from_raw_parts(ptr.cast(), nbits);
+ // INVARIANT: `data` references an initialized array that can hold `nbits` bits.
+ // SAFETY:
+ // The caller guarantees that `data` (derived from `ptr` and `nbits`)
+ // points to a valid, initialized, and appropriately sized memory region
+ // that will not be freed for the lifetime 'a.
+ // We are casting `*const [()]` to `*const Bitmap`. The `Bitmap`
+ // struct is a ZST with a `data: [()]` field. This means its layout
+ // is compatible with a slice of `()`, and effectively it's a "thin pointer"
+ // (its size is 0 and alignment is 1). The `slice_from_raw_parts`
+ // function correctly encodes the length (number of bits, not elements)
+ // into the metadata of the fat pointer. Therefore, dereferencing this
+ // pointer as `&Bitmap` is safe given the caller's guarantees.
+ unsafe { &*(data as *const Bitmap) }
+ }
+
+ /// Borrows a C bitmap exclusively.
+ ///
+ /// # Safety
+ ///
+ /// * `ptr` holds a non-null address of an initialized array of `unsigned long`
+ /// that is large enough to hold `nbits` bits.
+ /// * the array must not be freed for the lifetime of this [`Bitmap`]
+ /// * no concurrent access may happen.
+ pub unsafe fn from_raw_mut<'a>(ptr: *mut usize, nbits: usize) -> &'a mut Bitmap {
+ let data: *mut [()] = core::ptr::slice_from_raw_parts_mut(ptr.cast(), nbits);
+ // INVARIANT: `data` references an initialized array that can hold `nbits` bits.
+ // SAFETY:
+ // The caller guarantees that `data` (derived from `ptr` and `nbits`)
+ // points to a valid, initialized, and appropriately sized memory region
+ // that will not be freed for the lifetime 'a.
+ // Furthermore, the caller guarantees no concurrent access will happen,
+ // which upholds the exclusivity requirement for a mutable reference.
+ // Similar to `from_raw`, casting `*mut [()]` to `*mut Bitmap` is
+ // safe because `Bitmap` is a ZST with a `data: [()]` field,
+ // making its layout compatible with a slice of `()`.
+ unsafe { &mut *(data as *mut Bitmap) }
+ }
+
+ /// Returns a raw pointer to the backing [`Bitmap`].
+ pub fn as_ptr(&self) -> *const usize {
+ core::ptr::from_ref::<Bitmap>(self).cast::<usize>()
+ }
+
+ /// Returns a mutable raw pointer to the backing [`Bitmap`].
+ pub fn as_mut_ptr(&mut self) -> *mut usize {
+ core::ptr::from_mut::<Bitmap>(self).cast::<usize>()
+ }
+
+ /// Returns length of this [`Bitmap`].
+ #[expect(clippy::len_without_is_empty)]
+ pub fn len(&self) -> usize {
+ self.data.len()
+ }
+}
+
+/// Holds either a pointer to array of `unsigned long` or a small bitmap.
+#[repr(C)]
+union BitmapRepr {
+ bitmap: usize,
+ ptr: NonNull<usize>,
+}
+
+macro_rules! bitmap_assert {
+ ($cond:expr, $($arg:tt)+) => {
+ #[cfg(CONFIG_RUST_BITMAP_HARDENED)]
+ assert!($cond, $($arg)*);
+ }
+}
+
+macro_rules! bitmap_assert_return {
+ ($cond:expr, $($arg:tt)+) => {
+ #[cfg(CONFIG_RUST_BITMAP_HARDENED)]
+ assert!($cond, $($arg)*);
+
+ #[cfg(not(CONFIG_RUST_BITMAP_HARDENED))]
+ if !($cond) {
+ pr_err!($($arg)*);
+ return
+ }
+ }
+}
+
+/// Represents an owned bitmap.
+///
+/// Wraps underlying C bitmap API. See [`Bitmap`] for available
+/// methods.
+///
+/// # Examples
+///
+/// Basic usage
+///
+/// ```
+/// use kernel::alloc::flags::GFP_KERNEL;
+/// use kernel::bitmap::BitmapVec;
+///
+/// let mut b = BitmapVec::new(16, GFP_KERNEL)?;
+///
+/// assert_eq!(16, b.len());
+/// for i in 0..16 {
+/// if i % 4 == 0 {
+/// b.set_bit(i);
+/// }
+/// }
+/// assert_eq!(Some(0), b.next_bit(0));
+/// assert_eq!(Some(1), b.next_zero_bit(0));
+/// assert_eq!(Some(4), b.next_bit(1));
+/// assert_eq!(Some(5), b.next_zero_bit(4));
+/// assert_eq!(Some(12), b.last_bit());
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// # Invariants
+///
+/// * `nbits` is `<= i32::MAX` and never changes.
+/// * if `nbits <= bindings::BITS_PER_LONG`, then `repr` is a `usize`.
+/// * otherwise, `repr` holds a non-null pointer to an initialized
+/// array of `unsigned long` that is large enough to hold `nbits` bits.
+pub struct BitmapVec {
+ /// Representation of bitmap.
+ repr: BitmapRepr,
+ /// Length of this bitmap. Must be `<= i32::MAX`.
+ nbits: usize,
+}
+
+impl core::ops::Deref for BitmapVec {
+ type Target = Bitmap;
+
+ fn deref(&self) -> &Bitmap {
+ let ptr = if self.nbits <= BITS_PER_LONG {
+ // SAFETY: Bitmap is represented inline.
+ unsafe { core::ptr::addr_of!(self.repr.bitmap) }
+ } else {
+ // SAFETY: Bitmap is represented as array of `unsigned long`.
+ unsafe { self.repr.ptr.as_ptr() }
+ };
+
+ // SAFETY: We got the right pointer and invariants of [`Bitmap`] hold.
+ // An inline bitmap is treated like an array with single element.
+ unsafe { Bitmap::from_raw(ptr, self.nbits) }
+ }
+}
+
+impl core::ops::DerefMut for BitmapVec {
+ fn deref_mut(&mut self) -> &mut Bitmap {
+ let ptr = if self.nbits <= BITS_PER_LONG {
+ // SAFETY: Bitmap is represented inline.
+ unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) }
+ } else {
+ // SAFETY: Bitmap is represented as array of `unsigned long`.
+ unsafe { self.repr.ptr.as_ptr() }
+ };
+
+ // SAFETY: We got the right pointer and invariants of [`BitmapVec`] hold.
+ // An inline bitmap is treated like an array with single element.
+ unsafe { Bitmap::from_raw_mut(ptr, self.nbits) }
+ }
+}
+
+/// Enable ownership transfer to other threads.
+///
+/// SAFETY: We own the underlying bitmap representation.
+unsafe impl Send for BitmapVec {}
+
+/// Enable unsynchronized concurrent access to [`BitmapVec`] through shared references.
+///
+/// SAFETY: `deref()` will return a reference to a [`Bitmap`]. Its methods
+/// take immutable references are either atomic or read-only.
+unsafe impl Sync for BitmapVec {}
+
+impl Drop for BitmapVec {
+ fn drop(&mut self) {
+ if self.nbits <= BITS_PER_LONG {
+ return;
+ }
+ // SAFETY: `self.ptr` was returned by the C `bitmap_zalloc`.
+ //
+ // INVARIANT: there is no other use of the `self.ptr` after this
+ // call and the value is being dropped so the broken invariant is
+ // not observable on function exit.
+ unsafe { bindings::bitmap_free(self.repr.ptr.as_ptr()) };
+ }
+}
+
+impl BitmapVec {
+ /// Constructs a new [`BitmapVec`].
+ ///
+ /// Fails with [`AllocError`] when the [`BitmapVec`] could not be allocated. This
+ /// includes the case when `nbits` is greater than `i32::MAX`.
+ #[inline]
+ pub fn new(nbits: usize, flags: Flags) -> Result<Self, AllocError> {
+ if nbits <= BITS_PER_LONG {
+ return Ok(BitmapVec {
+ repr: BitmapRepr { bitmap: 0 },
+ nbits,
+ });
+ }
+ if nbits > i32::MAX.try_into().unwrap() {
+ return Err(AllocError);
+ }
+ let nbits_u32 = u32::try_from(nbits).unwrap();
+ // SAFETY: `BITS_PER_LONG < nbits` and `nbits <= i32::MAX`.
+ let ptr = unsafe { bindings::bitmap_zalloc(nbits_u32, flags.as_raw()) };
+ let ptr = NonNull::new(ptr).ok_or(AllocError)?;
+ // INVARIANT: `ptr` returned by C `bitmap_zalloc` and `nbits` checked.
+ Ok(BitmapVec {
+ repr: BitmapRepr { ptr },
+ nbits,
+ })
+ }
+
+ /// Returns length of this [`Bitmap`].
+ #[allow(clippy::len_without_is_empty)]
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.nbits
+ }
+
+ /// Fills this `Bitmap` with random bits.
+ #[cfg(CONFIG_FIND_BIT_BENCHMARK_RUST)]
+ pub fn fill_random(&mut self) {
+ // SAFETY: `self.as_mut_ptr` points to either an array of the
+ // appropriate length or one usize.
+ unsafe {
+ bindings::get_random_bytes(
+ self.as_mut_ptr().cast::<ffi::c_void>(),
+ usize::div_ceil(self.nbits, bindings::BITS_PER_LONG as usize)
+ * bindings::BITS_PER_LONG as usize
+ / 8,
+ );
+ }
+ }
+}
+
+impl Bitmap {
+ /// Set bit with index `index`.
+ ///
+ /// ATTENTION: `set_bit` is non-atomic, which differs from the naming
+ /// convention in C code. The corresponding C function is `__set_bit`.
+ ///
+ /// If CONFIG_RUST_BITMAP_HARDENED is not enabled and `index` is greater than
+ /// or equal to `self.nbits`, does nothing.
+ ///
+ /// # Panics
+ ///
+ /// Panics if CONFIG_RUST_BITMAP_HARDENED is enabled and `index` is greater than
+ /// or equal to `self.nbits`.
+ #[inline]
+ pub fn set_bit(&mut self, index: usize) {
+ bitmap_assert_return!(
+ index < self.len(),
+ "Bit `index` must be < {}, was {}",
+ self.len(),
+ index
+ );
+ // SAFETY: Bit `index` is within bounds.
+ unsafe { bindings::__set_bit(index, self.as_mut_ptr()) };
+ }
+
+ /// Set bit with index `index`, atomically.
+ ///
+ /// This is a relaxed atomic operation (no implied memory barriers).
+ ///
+ /// ATTENTION: The naming convention differs from C, where the corresponding
+ /// function is called `set_bit`.
+ ///
+ /// If CONFIG_RUST_BITMAP_HARDENED is not enabled and `index` is greater than
+ /// or equal to `self.len()`, does nothing.
+ ///
+ /// # Panics
+ ///
+ /// Panics if CONFIG_RUST_BITMAP_HARDENED is enabled and `index` is greater than
+ /// or equal to `self.len()`.
+ #[inline]
+ pub fn set_bit_atomic(&self, index: usize) {
+ bitmap_assert_return!(
+ index < self.len(),
+ "Bit `index` must be < {}, was {}",
+ self.len(),
+ index
+ );
+ // SAFETY: `index` is within bounds and the caller has ensured that
+ // there is no mix of non-atomic and atomic operations.
+ unsafe { bindings::set_bit(index, self.as_ptr().cast_mut()) };
+ }
+
+ /// Clear `index` bit.
+ ///
+ /// ATTENTION: `clear_bit` is non-atomic, which differs from the naming
+ /// convention in C code. The corresponding C function is `__clear_bit`.
+ ///
+ /// If CONFIG_RUST_BITMAP_HARDENED is not enabled and `index` is greater than
+ /// or equal to `self.len()`, does nothing.
+ ///
+ /// # Panics
+ ///
+ /// Panics if CONFIG_RUST_BITMAP_HARDENED is enabled and `index` is greater than
+ /// or equal to `self.len()`.
+ #[inline]
+ pub fn clear_bit(&mut self, index: usize) {
+ bitmap_assert_return!(
+ index < self.len(),
+ "Bit `index` must be < {}, was {}",
+ self.len(),
+ index
+ );
+ // SAFETY: `index` is within bounds.
+ unsafe { bindings::__clear_bit(index, self.as_mut_ptr()) };
+ }
+
+ /// Clear `index` bit, atomically.
+ ///
+ /// This is a relaxed atomic operation (no implied memory barriers).
+ ///
+ /// ATTENTION: The naming convention differs from C, where the corresponding
+ /// function is called `clear_bit`.
+ ///
+ /// If CONFIG_RUST_BITMAP_HARDENED is not enabled and `index` is greater than
+ /// or equal to `self.len()`, does nothing.
+ ///
+ /// # Panics
+ ///
+ /// Panics if CONFIG_RUST_BITMAP_HARDENED is enabled and `index` is greater than
+ /// or equal to `self.len()`.
+ #[inline]
+ pub fn clear_bit_atomic(&self, index: usize) {
+ bitmap_assert_return!(
+ index < self.len(),
+ "Bit `index` must be < {}, was {}",
+ self.len(),
+ index
+ );
+ // SAFETY: `index` is within bounds and the caller has ensured that
+ // there is no mix of non-atomic and atomic operations.
+ unsafe { bindings::clear_bit(index, self.as_ptr().cast_mut()) };
+ }
+
+ /// Copy `src` into this [`Bitmap`] and set any remaining bits to zero.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+ /// use kernel::bitmap::BitmapVec;
+ ///
+ /// let mut long_bitmap = BitmapVec::new(256, GFP_KERNEL)?;
+ ///
+ /// assert_eq!(None, long_bitmap.last_bit());
+ ///
+ /// let mut short_bitmap = BitmapVec::new(16, GFP_KERNEL)?;
+ ///
+ /// short_bitmap.set_bit(7);
+ /// long_bitmap.copy_and_extend(&short_bitmap);
+ /// assert_eq!(Some(7), long_bitmap.last_bit());
+ ///
+ /// # Ok::<(), AllocError>(())
+ /// ```
+ #[inline]
+ pub fn copy_and_extend(&mut self, src: &Bitmap) {
+ let len = core::cmp::min(src.len(), self.len());
+ // SAFETY: access to `self` and `src` is within bounds.
+ unsafe {
+ bindings::bitmap_copy_and_extend(
+ self.as_mut_ptr(),
+ src.as_ptr(),
+ len as u32,
+ self.len() as u32,
+ )
+ };
+ }
+
+ /// Finds last set bit.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+ /// use kernel::bitmap::BitmapVec;
+ ///
+ /// let bitmap = BitmapVec::new(64, GFP_KERNEL)?;
+ ///
+ /// match bitmap.last_bit() {
+ /// Some(idx) => {
+ /// pr_info!("The last bit has index {idx}.\n");
+ /// }
+ /// None => {
+ /// pr_info!("All bits in this bitmap are 0.\n");
+ /// }
+ /// }
+ /// # Ok::<(), AllocError>(())
+ /// ```
+ #[inline]
+ pub fn last_bit(&self) -> Option<usize> {
+ // SAFETY: `_find_next_bit` access is within bounds due to invariant.
+ let index = unsafe { bindings::_find_last_bit(self.as_ptr(), self.len()) };
+ if index >= self.len() {
+ None
+ } else {
+ Some(index)
+ }
+ }
+
+ /// Finds next set bit, starting from `start`.
+ ///
+ /// Returns `None` if `start` is greater or equal to `self.nbits`.
+ #[inline]
+ pub fn next_bit(&self, start: usize) -> Option<usize> {
+ bitmap_assert!(
+ start < self.len(),
+ "`start` must be < {} was {}",
+ self.len(),
+ start
+ );
+ // SAFETY: `_find_next_bit` tolerates out-of-bounds arguments and returns a
+ // value larger than or equal to `self.len()` in that case.
+ let index = unsafe { bindings::_find_next_bit(self.as_ptr(), self.len(), start) };
+ if index >= self.len() {
+ None
+ } else {
+ Some(index)
+ }
+ }
+
+ /// Finds next zero bit, starting from `start`.
+ /// Returns `None` if `start` is greater than or equal to `self.len()`.
+ #[inline]
+ pub fn next_zero_bit(&self, start: usize) -> Option<usize> {
+ bitmap_assert!(
+ start < self.len(),
+ "`start` must be < {} was {}",
+ self.len(),
+ start
+ );
+ // SAFETY: `_find_next_zero_bit` tolerates out-of-bounds arguments and returns a
+ // value larger than or equal to `self.len()` in that case.
+ let index = unsafe { bindings::_find_next_zero_bit(self.as_ptr(), self.len(), start) };
+ if index >= self.len() {
+ None
+ } else {
+ Some(index)
+ }
+ }
+}
+
+use macros::kunit_tests;
+
+#[kunit_tests(rust_kernel_bitmap)]
+mod tests {
+ use super::*;
+ use kernel::alloc::flags::GFP_KERNEL;
+
+ #[test]
+ fn bitmap_borrow() {
+ let fake_bitmap: [usize; 2] = [0, 0];
+ // SAFETY: `fake_c_bitmap` is an array of expected length.
+ let b = unsafe { Bitmap::from_raw(fake_bitmap.as_ptr(), 2 * BITS_PER_LONG) };
+ assert_eq!(2 * BITS_PER_LONG, b.len());
+ assert_eq!(None, b.next_bit(0));
+ }
+
+ #[test]
+ fn bitmap_copy() {
+ let fake_bitmap: usize = 0xFF;
+ // SAFETY: `fake_c_bitmap` can be used as one-element array of expected length.
+ let b = unsafe { Bitmap::from_raw(core::ptr::addr_of!(fake_bitmap), 8) };
+ assert_eq!(8, b.len());
+ assert_eq!(None, b.next_zero_bit(0));
+ }
+
+ #[test]
+ fn bitmap_vec_new() -> Result<(), AllocError> {
+ let b = BitmapVec::new(0, GFP_KERNEL)?;
+ assert_eq!(0, b.len());
+
+ let b = BitmapVec::new(3, GFP_KERNEL)?;
+ assert_eq!(3, b.len());
+
+ let b = BitmapVec::new(1024, GFP_KERNEL)?;
+ assert_eq!(1024, b.len());
+
+ // Requesting too large values results in [`AllocError`].
+ let res = BitmapVec::new(1 << 31, GFP_KERNEL);
+ assert!(res.is_err());
+ Ok(())
+ }
+
+ #[test]
+ fn bitmap_set_clear_find() -> Result<(), AllocError> {
+ let mut b = BitmapVec::new(128, GFP_KERNEL)?;
+
+ // Zero-initialized
+ assert_eq!(None, b.next_bit(0));
+ assert_eq!(Some(0), b.next_zero_bit(0));
+ assert_eq!(None, b.last_bit());
+
+ b.set_bit(17);
+
+ assert_eq!(Some(17), b.next_bit(0));
+ assert_eq!(Some(17), b.next_bit(17));
+ assert_eq!(None, b.next_bit(18));
+ assert_eq!(Some(17), b.last_bit());
+
+ b.set_bit(107);
+
+ assert_eq!(Some(17), b.next_bit(0));
+ assert_eq!(Some(17), b.next_bit(17));
+ assert_eq!(Some(107), b.next_bit(18));
+ assert_eq!(Some(107), b.last_bit());
+
+ b.clear_bit(17);
+
+ assert_eq!(Some(107), b.next_bit(0));
+ assert_eq!(Some(107), b.last_bit());
+ Ok(())
+ }
+
+ #[test]
+ fn owned_bitmap_out_of_bounds() -> Result<(), AllocError> {
+ // TODO: Kunit #[test]s do not support `cfg` yet,
+ // so we add it here in the body.
+ #[cfg(not(CONFIG_RUST_BITMAP_HARDENED))]
+ {
+ let mut b = BitmapVec::new(128, GFP_KERNEL)?;
+ b.set_bit(2048);
+ b.set_bit_atomic(2048);
+ b.clear_bit(2048);
+ b.clear_bit_atomic(2048);
+ assert_eq!(None, b.next_bit(2048));
+ assert_eq!(None, b.next_zero_bit(2048));
+ assert_eq!(None, b.last_bit());
+ }
+ Ok(())
+ }
+
+ // TODO: uncomment once kunit supports [should_panic] and `cfg`.
+ // #[cfg(CONFIG_RUST_BITMAP_HARDENED)]
+ // #[test]
+ // #[should_panic]
+ // fn owned_bitmap_out_of_bounds() -> Result<(), AllocError> {
+ // let mut b = BitmapVec::new(128, GFP_KERNEL)?;
+ //
+ // b.set_bit(2048);
+ // }
+
+ #[test]
+ fn bitmap_copy_and_extend() -> Result<(), AllocError> {
+ let mut long_bitmap = BitmapVec::new(256, GFP_KERNEL)?;
+
+ long_bitmap.set_bit(3);
+ long_bitmap.set_bit(200);
+
+ let mut short_bitmap = BitmapVec::new(32, GFP_KERNEL)?;
+
+ short_bitmap.set_bit(17);
+
+ long_bitmap.copy_and_extend(&short_bitmap);
+
+ // Previous bits have been cleared.
+ assert_eq!(Some(17), long_bitmap.next_bit(0));
+ assert_eq!(Some(17), long_bitmap.last_bit());
+ Ok(())
+ }
+}
diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs
index 150f710efe5b..32c8d865afb6 100644
--- a/rust/kernel/block.rs
+++ b/rust/kernel/block.rs
@@ -3,3 +3,16 @@
//! Types for working with the block layer.
pub mod mq;
+
+/// Bit mask for masking out [`SECTOR_SIZE`].
+pub const SECTOR_MASK: u32 = bindings::SECTOR_MASK;
+
+/// Sectors are size `1 << SECTOR_SHIFT`.
+pub const SECTOR_SHIFT: u32 = bindings::SECTOR_SHIFT;
+
+/// Size of a sector.
+pub const SECTOR_SIZE: u32 = bindings::SECTOR_SIZE;
+
+/// The difference between the size of a page and the size of a sector,
+/// expressed as a power of two.
+pub const PAGE_SECTORS_SHIFT: u32 = bindings::PAGE_SECTORS_SHIFT;
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index 831445d37181..637018ead0ab 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -69,27 +69,33 @@
//!
//! #[vtable]
//! impl Operations for MyBlkDevice {
+//! type QueueData = ();
//!
-//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result {
+//! fn queue_rq(_queue_data: (), rq: ARef<Request<Self>>, _is_last: bool) -> Result {
//! Request::end_ok(rq);
//! Ok(())
//! }
//!
-//! fn commit_rqs() {}
+//! fn commit_rqs(_queue_data: ()) {}
+//!
+//! fn complete(rq: ARef<Request<Self>>) {
+//! Request::end_ok(rq)
+//! .map_err(|_e| kernel::error::code::EIO)
+//! .expect("Fatal error - expected to be able to end request");
+//! }
//! }
//!
//! let tagset: Arc<TagSet<MyBlkDevice>> =
//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
//! let mut disk = gen_disk::GenDiskBuilder::new()
//! .capacity_sectors(4096)
-//! .build(format_args!("myblk"), tagset)?;
+//! .build(fmt!("myblk"), tagset, ())?;
//!
//! # Ok::<(), kernel::error::Error>(())
//! ```
pub mod gen_disk;
mod operations;
-mod raw_writer;
mod request;
mod tag_set;
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index cd54cd64ea88..1ce815c8cdab 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -3,12 +3,19 @@
//! Generic disk abstraction.
//!
//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h)
-//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h)
-
-use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet};
-use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc};
-use crate::{error, static_lock_class};
-use core::fmt::{self, Write};
+//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h)
+
+use crate::{
+ bindings,
+ block::mq::{Operations, TagSet},
+ error::{self, from_err_ptr, Result},
+ fmt::{self, Write},
+ prelude::*,
+ static_lock_class,
+ str::NullTerminatedFormatter,
+ sync::Arc,
+ types::{ForeignOwnable, ScopeGuard},
+};
/// A builder for [`GenDisk`].
///
@@ -45,7 +52,7 @@ impl GenDiskBuilder {
/// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
/// and that it is a power of two.
- fn validate_block_size(size: u32) -> Result {
+ pub fn validate_block_size(size: u32) -> Result {
if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
Err(error::code::EINVAL)
} else {
@@ -92,7 +99,14 @@ impl GenDiskBuilder {
self,
name: fmt::Arguments<'_>,
tagset: Arc<TagSet<T>>,
+ queue_data: T::QueueData,
) -> Result<GenDisk<T>> {
+ let data = queue_data.into_foreign();
+ let recover_data = ScopeGuard::new(|| {
+ // SAFETY: T::QueueData was created by the call to `into_foreign()` above
+ drop(unsafe { T::QueueData::from_foreign(data) });
+ });
+
// SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed.
let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() };
@@ -107,7 +121,7 @@ impl GenDiskBuilder {
bindings::__blk_mq_alloc_disk(
tagset.raw_tag_set(),
&mut lim,
- core::ptr::null_mut(),
+ data,
static_lock_class!().as_ptr(),
)
})?;
@@ -139,14 +153,14 @@ impl GenDiskBuilder {
// SAFETY: `gendisk` is a valid pointer as we initialized it above
unsafe { (*gendisk).fops = &TABLE };
- let mut raw_writer = RawWriter::from_array(
+ let mut writer = NullTerminatedFormatter::new(
// SAFETY: `gendisk` points to a valid and initialized instance. We
// have exclusive access, since the disk is not added to the VFS
// yet.
unsafe { &mut (*gendisk).disk_name },
- )?;
- raw_writer.write_fmt(name)?;
- raw_writer.write_char('\0')?;
+ )
+ .ok_or(EINVAL)?;
+ writer.write_fmt(name)?;
// SAFETY: `gendisk` points to a valid and initialized instance of
// `struct gendisk`. `set_capacity` takes a lock to synchronize this
@@ -161,8 +175,12 @@ impl GenDiskBuilder {
},
)?;
+ recover_data.dismiss();
+
// INVARIANT: `gendisk` was initialized above.
// INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above.
+ // INVARIANT: `gendisk.queue.queue_data` is set to `data` in the call to
+ // `__blk_mq_alloc_disk` above.
Ok(GenDisk {
_tagset: tagset,
gendisk,
@@ -174,9 +192,10 @@ impl GenDiskBuilder {
///
/// # Invariants
///
-/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
-/// - `gendisk` was added to the VFS through a call to
-/// `bindings::device_add_disk`.
+/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
+/// - `gendisk` was added to the VFS through a call to
+/// `bindings::device_add_disk`.
+/// - `self.gendisk.queue.queuedata` is initialized by a call to `ForeignOwnable::into_foreign`.
pub struct GenDisk<T: Operations> {
_tagset: Arc<TagSet<T>>,
gendisk: *mut bindings::gendisk,
@@ -188,9 +207,20 @@ unsafe impl<T: Operations + Send> Send for GenDisk<T> {}
impl<T: Operations> Drop for GenDisk<T> {
fn drop(&mut self) {
+ // SAFETY: By type invariant of `Self`, `self.gendisk` points to a valid
+ // and initialized instance of `struct gendisk`, and, `queuedata` was
+ // initialized with the result of a call to
+ // `ForeignOwnable::into_foreign`.
+ let queue_data = unsafe { (*(*self.gendisk).queue).queuedata };
+
// SAFETY: By type invariant, `self.gendisk` points to a valid and
// initialized instance of `struct gendisk`, and it was previously added
// to the VFS.
unsafe { bindings::del_gendisk(self.gendisk) };
+
+ // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with
+ // a call to `ForeignOwnable::into_foreign` to create `queuedata`.
+ // `ForeignOwnable::from_foreign` is only called here.
+ drop(unsafe { T::QueueData::from_foreign(queue_data) });
}
}
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index c2b98f507bcb..f91a1719886c 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -6,13 +6,15 @@
use crate::{
bindings,
- block::mq::request::RequestDataWrapper,
- block::mq::Request,
+ block::mq::{request::RequestDataWrapper, Request},
error::{from_result, Result},
prelude::*,
- types::ARef,
+ sync::Refcount,
+ types::{ARef, ForeignOwnable},
};
-use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering};
+use core::marker::PhantomData;
+
+type ForeignBorrowed<'a, T> = <T as ForeignOwnable>::Borrowed<'a>;
/// Implement this trait to interface blk-mq as block devices.
///
@@ -26,12 +28,23 @@ use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering}
/// [module level documentation]: kernel::block::mq
#[macros::vtable]
pub trait Operations: Sized {
+ /// Data associated with the `struct request_queue` that is allocated for
+ /// the `GenDisk` associated with this `Operations` implementation.
+ type QueueData: ForeignOwnable;
+
/// Called by the kernel to queue a request with the driver. If `is_last` is
/// `false`, the driver is allowed to defer committing the request.
- fn queue_rq(rq: ARef<Request<Self>>, is_last: bool) -> Result;
+ fn queue_rq(
+ queue_data: ForeignBorrowed<'_, Self::QueueData>,
+ rq: ARef<Request<Self>>,
+ is_last: bool,
+ ) -> Result;
/// Called by the kernel to indicate that queued requests should be submitted.
- fn commit_rqs();
+ fn commit_rqs(queue_data: ForeignBorrowed<'_, Self::QueueData>);
+
+ /// Called by the kernel when the request is completed.
+ fn complete(rq: ARef<Request<Self>>);
/// Called by the kernel to poll the device for completed requests. Only
/// used for poll queues.
@@ -70,7 +83,7 @@ impl<T: Operations> OperationsVTable<T> {
/// promise to not access the request until the driver calls
/// `bindings::blk_mq_end_request` for the request.
unsafe extern "C" fn queue_rq_callback(
- _hctx: *mut bindings::blk_mq_hw_ctx,
+ hctx: *mut bindings::blk_mq_hw_ctx,
bd: *const bindings::blk_mq_queue_data,
) -> bindings::blk_status_t {
// SAFETY: `bd.rq` is valid as required by the safety requirement for
@@ -78,7 +91,7 @@ impl<T: Operations> OperationsVTable<T> {
let request = unsafe { &*(*bd).rq.cast::<Request<T>>() };
// One refcount for the ARef, one for being in flight
- request.wrapper_ref().refcount().store(2, Ordering::Relaxed);
+ request.wrapper_ref().refcount().set(2);
// SAFETY:
// - We own a refcount that we took above. We pass that to `ARef`.
@@ -88,10 +101,20 @@ impl<T: Operations> OperationsVTable<T> {
// reference counted by `ARef` until then.
let rq = unsafe { Request::aref_from_raw((*bd).rq) };
+ // SAFETY: `hctx` is valid as required by this function.
+ let queue_data = unsafe { (*(*hctx).queue).queuedata };
+
+ // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with
+ // a call to `ForeignOwnable::into_foreign` to create `queuedata`.
+ // `ForeignOwnable::from_foreign` is only called when the tagset is
+ // dropped, which happens after we are dropped.
+ let queue_data = unsafe { T::QueueData::borrow(queue_data) };
+
// SAFETY: We have exclusive access and we just set the refcount above.
unsafe { Request::start_unchecked(&rq) };
let ret = T::queue_rq(
+ queue_data,
rq,
// SAFETY: `bd` is valid as required by the safety requirement for
// this function.
@@ -110,18 +133,35 @@ impl<T: Operations> OperationsVTable<T> {
///
/// # Safety
///
- /// This function may only be called by blk-mq C infrastructure.
- unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) {
- T::commit_rqs()
+ /// This function may only be called by blk-mq C infrastructure. The caller
+ /// must ensure that `hctx` is valid.
+ unsafe extern "C" fn commit_rqs_callback(hctx: *mut bindings::blk_mq_hw_ctx) {
+ // SAFETY: `hctx` is valid as required by this function.
+ let queue_data = unsafe { (*(*hctx).queue).queuedata };
+
+ // SAFETY: `queue.queuedata` was created by `GenDisk::try_new()` with a
+ // call to `ForeignOwnable::into_foreign()` to create `queuedata`.
+ // `ForeignOwnable::from_foreign()` is only called when the tagset is
+ // dropped, which happens after we are dropped.
+ let queue_data = unsafe { T::QueueData::borrow(queue_data) };
+ T::commit_rqs(queue_data)
}
- /// This function is called by the C kernel. It is not currently
- /// implemented, and there is no way to exercise this code path.
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
///
/// # Safety
///
- /// This function may only be called by blk-mq C infrastructure.
- unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {}
+ /// This function may only be called by blk-mq C infrastructure. `rq` must
+ /// point to a valid request that has been marked as completed. The pointee
+ /// of `rq` must be valid for write for the duration of this function.
+ unsafe extern "C" fn complete_callback(rq: *mut bindings::request) {
+ // SAFETY: This function can only be dispatched through
+ // `Request::complete`. We leaked a refcount then which we pick back up
+ // now.
+ let aref = unsafe { Request::aref_from_raw(rq) };
+ T::complete(aref);
+ }
/// This function is called by the C kernel. A pointer to this function is
/// installed in the `blk_mq_ops` vtable for the driver.
@@ -187,7 +227,7 @@ impl<T: Operations> OperationsVTable<T> {
// SAFETY: The refcount field is allocated but not initialized, so
// it is valid for writes.
- unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(AtomicU64::new(0)) };
+ unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(Refcount::new(0)) };
Ok(0)
})
diff --git a/rust/kernel/block/mq/raw_writer.rs b/rust/kernel/block/mq/raw_writer.rs
deleted file mode 100644
index 7e2159e4f6a6..000000000000
--- a/rust/kernel/block/mq/raw_writer.rs
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-use core::fmt::{self, Write};
-
-use crate::error::Result;
-use crate::prelude::EINVAL;
-
-/// A mutable reference to a byte buffer where a string can be written into.
-///
-/// # Invariants
-///
-/// `buffer` is always null terminated.
-pub(crate) struct RawWriter<'a> {
- buffer: &'a mut [u8],
- pos: usize,
-}
-
-impl<'a> RawWriter<'a> {
- /// Create a new `RawWriter` instance.
- fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> {
- *(buffer.last_mut().ok_or(EINVAL)?) = 0;
-
- // INVARIANT: We null terminated the buffer above.
- Ok(Self { buffer, pos: 0 })
- }
-
- pub(crate) fn from_array<const N: usize>(
- a: &'a mut [crate::ffi::c_char; N],
- ) -> Result<RawWriter<'a>> {
- Self::new(
- // SAFETY: the buffer of `a` is valid for read and write as `u8` for
- // at least `N` bytes.
- unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) },
- )
- }
-}
-
-impl Write for RawWriter<'_> {
- fn write_str(&mut self, s: &str) -> fmt::Result {
- let bytes = s.as_bytes();
- let len = bytes.len();
-
- // We do not want to overwrite our null terminator
- if self.pos + len > self.buffer.len() - 1 {
- return Err(fmt::Error);
- }
-
- // INVARIANT: We are not overwriting the last byte
- self.buffer[self.pos..self.pos + len].copy_from_slice(bytes);
-
- self.pos += len;
-
- Ok(())
- }
-}
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index fefd394f064a..c5f1f6b1ccfb 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -8,13 +8,10 @@ use crate::{
bindings,
block::mq::Operations,
error::Result,
+ sync::{atomic::Relaxed, Refcount},
types::{ARef, AlwaysRefCounted, Opaque},
};
-use core::{
- marker::PhantomData,
- ptr::NonNull,
- sync::atomic::{AtomicU64, Ordering},
-};
+use core::{marker::PhantomData, ptr::NonNull};
/// A wrapper around a blk-mq [`struct request`]. This represents an IO request.
///
@@ -37,6 +34,9 @@ use core::{
/// We need to track 3 and 4 to ensure that it is safe to end the request and hand
/// back ownership to the block layer.
///
+/// Note that the driver can still obtain new `ARef` even if there is no `ARef`s in existence by
+/// using `tag_to_rq`, hence the need to distinguish B and C.
+///
/// The states are tracked through the private `refcount` field of
/// `RequestDataWrapper`. This structure lives in the private data area of the C
/// [`struct request`].
@@ -53,7 +53,7 @@ use core::{
/// [`struct request`]: srctree/include/linux/blk-mq.h
///
#[repr(transparent)]
-pub struct Request<T: Operations>(Opaque<bindings::request>, PhantomData<T>);
+pub struct Request<T>(Opaque<bindings::request>, PhantomData<T>);
impl<T: Operations> Request<T> {
/// Create an [`ARef<Request>`] from a [`struct request`] pointer.
@@ -98,13 +98,16 @@ impl<T: Operations> Request<T> {
///
/// [`struct request`]: srctree/include/linux/blk-mq.h
fn try_set_end(this: ARef<Self>) -> Result<*mut bindings::request, ARef<Self>> {
- // We can race with `TagSet::tag_to_rq`
- if let Err(_old) = this.wrapper_ref().refcount().compare_exchange(
- 2,
- 0,
- Ordering::Relaxed,
- Ordering::Relaxed,
- ) {
+ // To hand back the ownership, we need the current refcount to be 2.
+ // Since we can race with `TagSet::tag_to_rq`, this needs to atomically reduce
+ // refcount to 0. `Refcount` does not provide a way to do this, so use the underlying
+ // atomics directly.
+ if let Err(_old) = this
+ .wrapper_ref()
+ .refcount()
+ .as_atomic()
+ .cmpxchg(2, 0, Relaxed)
+ {
return Err(this);
}
@@ -135,6 +138,23 @@ impl<T: Operations> Request<T> {
Ok(())
}
+ /// Complete the request by scheduling `Operations::complete` for
+ /// execution.
+ ///
+ /// The function may be scheduled locally, via SoftIRQ or remotely via IPMI.
+ /// See `blk_mq_complete_request_remote` in [`blk-mq.c`] for details.
+ ///
+ /// [`blk-mq.c`]: srctree/block/blk-mq.c
+ pub fn complete(this: ARef<Self>) {
+ let ptr = ARef::into_raw(this).cast::<bindings::request>().as_ptr();
+ // SAFETY: By type invariant, `self.0` is a valid `struct request`
+ if !unsafe { bindings::blk_mq_complete_request_remote(ptr) } {
+ // SAFETY: We released a refcount above that we can reclaim here.
+ let this = unsafe { Request::aref_from_raw(ptr) };
+ T::complete(this);
+ }
+ }
+
/// Return a pointer to the [`RequestDataWrapper`] stored in the private area
/// of the request structure.
///
@@ -148,7 +168,7 @@ impl<T: Operations> Request<T> {
// valid allocation.
let wrapper_ptr =
unsafe { bindings::blk_mq_rq_to_pdu(request_ptr).cast::<RequestDataWrapper>() };
- // SAFETY: By C API contract, wrapper_ptr points to a valid allocation
+ // SAFETY: By C API contract, `wrapper_ptr` points to a valid allocation
// and is not null.
unsafe { NonNull::new_unchecked(wrapper_ptr) }
}
@@ -173,13 +193,13 @@ pub(crate) struct RequestDataWrapper {
/// - 0: The request is owned by C block layer.
/// - 1: The request is owned by Rust abstractions but there are no [`ARef`] references to it.
/// - 2+: There are [`ARef`] references to the request.
- refcount: AtomicU64,
+ refcount: Refcount,
}
impl RequestDataWrapper {
/// Return a reference to the refcount of the request that is embedding
/// `self`.
- pub(crate) fn refcount(&self) -> &AtomicU64 {
+ pub(crate) fn refcount(&self) -> &Refcount {
&self.refcount
}
@@ -189,7 +209,7 @@ impl RequestDataWrapper {
/// # Safety
///
/// - `this` must point to a live allocation of at least the size of `Self`.
- pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 {
+ pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut Refcount {
// SAFETY: Because of the safety requirements of this function, the
// field projection is safe.
unsafe { &raw mut (*this).refcount }
@@ -205,47 +225,13 @@ unsafe impl<T: Operations> Send for Request<T> {}
// mutate `self` are internally synchronized`
unsafe impl<T: Operations> Sync for Request<T> {}
-/// Store the result of `op(target.load())` in target, returning new value of
-/// target.
-fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 {
- let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x)));
-
- // SAFETY: Because the operation passed to `fetch_update` above always
- // return `Some`, `old` will always be `Ok`.
- let old = unsafe { old.unwrap_unchecked() };
-
- op(old)
-}
-
-/// Store the result of `op(target.load)` in `target` if `target.load() !=
-/// pred`, returning [`true`] if the target was updated.
-fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool {
- target
- .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| {
- if x == pred {
- None
- } else {
- Some(op(x))
- }
- })
- .is_ok()
-}
-
// SAFETY: All instances of `Request<T>` are reference counted. This
// implementation of `AlwaysRefCounted` ensure that increments to the ref count
// keeps the object alive in memory at least until a matching reference count
// decrement is executed.
unsafe impl<T: Operations> AlwaysRefCounted for Request<T> {
fn inc_ref(&self) {
- let refcount = &self.wrapper_ref().refcount();
-
- #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))]
- let updated = atomic_relaxed_op_unless(refcount, |x| x + 1, 0);
-
- #[cfg(CONFIG_DEBUG_MISC)]
- if !updated {
- panic!("Request refcount zero on clone")
- }
+ self.wrapper_ref().refcount().inc();
}
unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) {
@@ -257,10 +243,10 @@ unsafe impl<T: Operations> AlwaysRefCounted for Request<T> {
let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) };
#[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))]
- let new_refcount = atomic_relaxed_op_return(refcount, |x| x - 1);
+ let is_zero = refcount.dec_and_test();
#[cfg(CONFIG_DEBUG_MISC)]
- if new_refcount == 0 {
+ if is_zero {
panic!("Request reached refcount zero in Rust abstractions");
}
}
diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs
index 2736b798cdc6..10f1547ca9f1 100644
--- a/rust/kernel/configfs.rs
+++ b/rust/kernel/configfs.rs
@@ -263,7 +263,7 @@ impl<Data> Group<Data> {
try_pin_init!(Self {
group <- pin_init::init_zeroed().chain(|v: &mut Opaque<bindings::config_group>| {
let place = v.get();
- let name = name.as_bytes_with_nul().as_ptr();
+ let name = name.to_bytes_with_nul().as_ptr();
// SAFETY: It is safe to initialize a group once it has been zeroed.
unsafe {
bindings::config_group_init_type_name(place, name.cast(), item_type.as_ptr())
@@ -613,7 +613,7 @@ where
pub const fn new(name: &'static CStr) -> Self {
Self {
attribute: Opaque::new(bindings::configfs_attribute {
- ca_name: name.as_char_ptr(),
+ ca_name: crate::str::as_char_ptr_in_const_context(name),
ca_owner: core::ptr::null_mut(),
ca_mode: 0o660,
show: Some(Self::show),
@@ -1039,3 +1039,5 @@ macro_rules! configfs_attrs {
};
}
+
+pub use crate::configfs_attrs;
diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs
index 5de730c8d817..cb6c0338ef5a 100644
--- a/rust/kernel/cpu.rs
+++ b/rust/kernel/cpu.rs
@@ -109,6 +109,7 @@ impl CpuId {
/// unexpectedly due to preemption or CPU migration. It should only be
/// used when the context ensures that the task remains on the same CPU
/// or the users could use a stale (yet valid) CPU ID.
+ #[inline]
pub fn current() -> Self {
// SAFETY: raw_smp_processor_id() always returns a valid CPU ID.
unsafe { Self::from_u32_unchecked(bindings::raw_smp_processor_id()) }
diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs
index afc15e72a7c3..29bdf7fe80a1 100644
--- a/rust/kernel/cpufreq.rs
+++ b/rust/kernel/cpufreq.rs
@@ -27,7 +27,6 @@ use crate::clk::Clk;
use core::{
cell::UnsafeCell,
marker::PhantomData,
- mem::MaybeUninit,
ops::{Deref, DerefMut},
pin::Pin,
ptr,
@@ -543,7 +542,7 @@ impl Policy {
pub fn cpus(&mut self) -> &mut cpumask::Cpumask {
// SAFETY: The pointer to `cpus` is valid for writing and remains valid for the lifetime of
// the returned reference.
- unsafe { cpumask::CpumaskVar::as_mut_ref(&mut self.as_mut_ref().cpus) }
+ unsafe { cpumask::CpumaskVar::from_raw_mut(&mut self.as_mut_ref().cpus) }
}
/// Sets clock for the [`Policy`].
@@ -1013,12 +1012,11 @@ impl<T: Driver> Registration<T> {
} else {
None
},
- // SAFETY: All zeros is a valid value for `bindings::cpufreq_driver`.
- ..unsafe { MaybeUninit::zeroed().assume_init() }
+ ..pin_init::zeroed()
};
const fn copy_name(name: &'static CStr) -> [c_char; CPUFREQ_NAME_LEN] {
- let src = name.as_bytes_with_nul();
+ let src = name.to_bytes_with_nul();
let mut dst = [0; CPUFREQ_NAME_LEN];
build_assert!(src.len() <= CPUFREQ_NAME_LEN);
diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs
index 3fcbff438670..c1d17826ae7b 100644
--- a/rust/kernel/cpumask.rs
+++ b/rust/kernel/cpumask.rs
@@ -212,6 +212,7 @@ impl Cpumask {
/// }
/// assert_eq!(mask2.weight(), count);
/// ```
+#[repr(transparent)]
pub struct CpumaskVar {
#[cfg(CONFIG_CPUMASK_OFFSTACK)]
ptr: NonNull<Cpumask>,
@@ -270,7 +271,7 @@ impl CpumaskVar {
///
/// The caller must ensure that `ptr` is valid for writing and remains valid for the lifetime
/// of the returned reference.
- pub unsafe fn as_mut_ref<'a>(ptr: *mut bindings::cpumask_var_t) -> &'a mut Self {
+ pub unsafe fn from_raw_mut<'a>(ptr: *mut bindings::cpumask_var_t) -> &'a mut Self {
// SAFETY: Guaranteed by the safety requirements of the function.
//
// INVARIANT: The caller ensures that `ptr` is valid for writing and remains valid for the
@@ -284,7 +285,7 @@ impl CpumaskVar {
///
/// The caller must ensure that `ptr` is valid for reading and remains valid for the lifetime
/// of the returned reference.
- pub unsafe fn as_ref<'a>(ptr: *const bindings::cpumask_var_t) -> &'a Self {
+ pub unsafe fn from_raw<'a>(ptr: *const bindings::cpumask_var_t) -> &'a Self {
// SAFETY: Guaranteed by the safety requirements of the function.
//
// INVARIANT: The caller ensures that `ptr` is valid for reading and remains valid for the
diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs
index 2599f01e8b28..4a2229542fb7 100644
--- a/rust/kernel/cred.rs
+++ b/rust/kernel/cred.rs
@@ -8,11 +8,7 @@
//!
//! Reference: <https://www.kernel.org/doc/html/latest/security/credentials.html>
-use crate::{
- bindings,
- task::Kuid,
- types::{AlwaysRefCounted, Opaque},
-};
+use crate::{bindings, sync::aref::AlwaysRefCounted, task::Kuid, types::Opaque};
/// Wraps the kernel's `struct cred`.
///
diff --git a/rust/kernel/debugfs.rs b/rust/kernel/debugfs.rs
new file mode 100644
index 000000000000..381c23b3dd83
--- /dev/null
+++ b/rust/kernel/debugfs.rs
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Google LLC.
+
+//! DebugFS Abstraction
+//!
+//! C header: [`include/linux/debugfs.h`](srctree/include/linux/debugfs.h)
+
+// When DebugFS is disabled, many parameters are dead. Linting for this isn't helpful.
+#![cfg_attr(not(CONFIG_DEBUG_FS), allow(unused_variables))]
+
+use crate::prelude::*;
+use crate::str::CStr;
+#[cfg(CONFIG_DEBUG_FS)]
+use crate::sync::Arc;
+use crate::uaccess::UserSliceReader;
+use core::fmt;
+use core::marker::PhantomData;
+use core::marker::PhantomPinned;
+#[cfg(CONFIG_DEBUG_FS)]
+use core::mem::ManuallyDrop;
+use core::ops::Deref;
+
+mod traits;
+pub use traits::{Reader, Writer};
+
+mod callback_adapters;
+use callback_adapters::{FormatAdapter, NoWriter, WritableAdapter};
+mod file_ops;
+use file_ops::{FileOps, ReadFile, ReadWriteFile, WriteFile};
+#[cfg(CONFIG_DEBUG_FS)]
+mod entry;
+#[cfg(CONFIG_DEBUG_FS)]
+use entry::Entry;
+
+/// Owning handle to a DebugFS directory.
+///
+/// The directory in the filesystem represented by [`Dir`] will be removed when handle has been
+/// dropped *and* all children have been removed.
+// If we have a parent, we hold a reference to it in the `Entry`. This prevents the `dentry`
+// we point to from being cleaned up if our parent `Dir`/`Entry` is dropped before us.
+//
+// The `None` option indicates that the `Arc` could not be allocated, so our children would not be
+// able to refer to us. In this case, we need to silently fail. All future child directories/files
+// will silently fail as well.
+#[derive(Clone)]
+pub struct Dir(#[cfg(CONFIG_DEBUG_FS)] Option<Arc<Entry<'static>>>);
+
+impl Dir {
+ /// Create a new directory in DebugFS. If `parent` is [`None`], it will be created at the root.
+ fn create(name: &CStr, parent: Option<&Dir>) -> Self {
+ #[cfg(CONFIG_DEBUG_FS)]
+ {
+ let parent_entry = match parent {
+ // If the parent couldn't be allocated, just early-return
+ Some(Dir(None)) => return Self(None),
+ Some(Dir(Some(entry))) => Some(entry.clone()),
+ None => None,
+ };
+ Self(
+ // If Arc creation fails, the `Entry` will be dropped, so the directory will be
+ // cleaned up.
+ Arc::new(Entry::dynamic_dir(name, parent_entry), GFP_KERNEL).ok(),
+ )
+ }
+ #[cfg(not(CONFIG_DEBUG_FS))]
+ Self()
+ }
+
+ /// Creates a DebugFS file which will own the data produced by the initializer provided in
+ /// `data`.
+ fn create_file<'a, T, E: 'a>(
+ &'a self,
+ name: &'a CStr,
+ data: impl PinInit<T, E> + 'a,
+ file_ops: &'static FileOps<T>,
+ ) -> impl PinInit<File<T>, E> + 'a
+ where
+ T: Sync + 'static,
+ {
+ let scope = Scope::<T>::new(data, move |data| {
+ #[cfg(CONFIG_DEBUG_FS)]
+ if let Some(parent) = &self.0 {
+ // SAFETY: Because data derives from a scope, and our entry will be dropped before
+ // the data is dropped, it is guaranteed to outlive the entry we return.
+ unsafe { Entry::dynamic_file(name, parent.clone(), data, file_ops) }
+ } else {
+ Entry::empty()
+ }
+ });
+ try_pin_init! {
+ File {
+ scope <- scope
+ } ? E
+ }
+ }
+
+ /// Create a new directory in DebugFS at the root.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use kernel::c_str;
+ /// # use kernel::debugfs::Dir;
+ /// let debugfs = Dir::new(c_str!("parent"));
+ /// ```
+ pub fn new(name: &CStr) -> Self {
+ Dir::create(name, None)
+ }
+
+ /// Creates a subdirectory within this directory.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use kernel::c_str;
+ /// # use kernel::debugfs::Dir;
+ /// let parent = Dir::new(c_str!("parent"));
+ /// let child = parent.subdir(c_str!("child"));
+ /// ```
+ pub fn subdir(&self, name: &CStr) -> Self {
+ Dir::create(name, Some(self))
+ }
+
+ /// Creates a read-only file in this directory.
+ ///
+ /// The file's contents are produced by invoking [`Writer::write`] on the value initialized by
+ /// `data`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use kernel::c_str;
+ /// # use kernel::debugfs::Dir;
+ /// # use kernel::prelude::*;
+ /// # let dir = Dir::new(c_str!("my_debugfs_dir"));
+ /// let file = KBox::pin_init(dir.read_only_file(c_str!("foo"), 200), GFP_KERNEL)?;
+ /// // "my_debugfs_dir/foo" now contains the number 200.
+ /// // The file is removed when `file` is dropped.
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn read_only_file<'a, T, E: 'a>(
+ &'a self,
+ name: &'a CStr,
+ data: impl PinInit<T, E> + 'a,
+ ) -> impl PinInit<File<T>, E> + 'a
+ where
+ T: Writer + Send + Sync + 'static,
+ {
+ let file_ops = &<T as ReadFile<_>>::FILE_OPS;
+ self.create_file(name, data, file_ops)
+ }
+
+ /// Creates a read-only file in this directory, with contents from a callback.
+ ///
+ /// `f` must be a function item or a non-capturing closure.
+ /// This is statically asserted and not a safety requirement.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use core::sync::atomic::{AtomicU32, Ordering};
+ /// # use kernel::c_str;
+ /// # use kernel::debugfs::Dir;
+ /// # use kernel::prelude::*;
+ /// # let dir = Dir::new(c_str!("foo"));
+ /// let file = KBox::pin_init(
+ /// dir.read_callback_file(c_str!("bar"),
+ /// AtomicU32::new(3),
+ /// &|val, f| {
+ /// let out = val.load(Ordering::Relaxed);
+ /// writeln!(f, "{out:#010x}")
+ /// }),
+ /// GFP_KERNEL)?;
+ /// // Reading "foo/bar" will show "0x00000003".
+ /// file.store(10, Ordering::Relaxed);
+ /// // Reading "foo/bar" will now show "0x0000000a".
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn read_callback_file<'a, T, E: 'a, F>(
+ &'a self,
+ name: &'a CStr,
+ data: impl PinInit<T, E> + 'a,
+ _f: &'static F,
+ ) -> impl PinInit<File<T>, E> + 'a
+ where
+ T: Send + Sync + 'static,
+ F: Fn(&T, &mut fmt::Formatter<'_>) -> fmt::Result + Send + Sync,
+ {
+ let file_ops = <FormatAdapter<T, F>>::FILE_OPS.adapt();
+ self.create_file(name, data, file_ops)
+ }
+
+ /// Creates a read-write file in this directory.
+ ///
+ /// Reading the file uses the [`Writer`] implementation.
+ /// Writing to the file uses the [`Reader`] implementation.
+ pub fn read_write_file<'a, T, E: 'a>(
+ &'a self,
+ name: &'a CStr,
+ data: impl PinInit<T, E> + 'a,
+ ) -> impl PinInit<File<T>, E> + 'a
+ where
+ T: Writer + Reader + Send + Sync + 'static,
+ {
+ let file_ops = &<T as ReadWriteFile<_>>::FILE_OPS;
+ self.create_file(name, data, file_ops)
+ }
+
+ /// Creates a read-write file in this directory, with logic from callbacks.
+ ///
+ /// Reading from the file is handled by `f`. Writing to the file is handled by `w`.
+ ///
+ /// `f` and `w` must be function items or non-capturing closures.
+ /// This is statically asserted and not a safety requirement.
+ pub fn read_write_callback_file<'a, T, E: 'a, F, W>(
+ &'a self,
+ name: &'a CStr,
+ data: impl PinInit<T, E> + 'a,
+ _f: &'static F,
+ _w: &'static W,
+ ) -> impl PinInit<File<T>, E> + 'a
+ where
+ T: Send + Sync + 'static,
+ F: Fn(&T, &mut fmt::Formatter<'_>) -> fmt::Result + Send + Sync,
+ W: Fn(&T, &mut UserSliceReader) -> Result + Send + Sync,
+ {
+ let file_ops =
+ <WritableAdapter<FormatAdapter<T, F>, W> as file_ops::ReadWriteFile<_>>::FILE_OPS
+ .adapt()
+ .adapt();
+ self.create_file(name, data, file_ops)
+ }
+
+ /// Creates a write-only file in this directory.
+ ///
+ /// The file owns its backing data. Writing to the file uses the [`Reader`]
+ /// implementation.
+ ///
+ /// The file is removed when the returned [`File`] is dropped.
+ pub fn write_only_file<'a, T, E: 'a>(
+ &'a self,
+ name: &'a CStr,
+ data: impl PinInit<T, E> + 'a,
+ ) -> impl PinInit<File<T>, E> + 'a
+ where
+ T: Reader + Send + Sync + 'static,
+ {
+ self.create_file(name, data, &T::FILE_OPS)
+ }
+
+ /// Creates a write-only file in this directory, with write logic from a callback.
+ ///
+ /// `w` must be a function item or a non-capturing closure.
+ /// This is statically asserted and not a safety requirement.
+ pub fn write_callback_file<'a, T, E: 'a, W>(
+ &'a self,
+ name: &'a CStr,
+ data: impl PinInit<T, E> + 'a,
+ _w: &'static W,
+ ) -> impl PinInit<File<T>, E> + 'a
+ where
+ T: Send + Sync + 'static,
+ W: Fn(&T, &mut UserSliceReader) -> Result + Send + Sync,
+ {
+ let file_ops = <WritableAdapter<NoWriter<T>, W> as WriteFile<_>>::FILE_OPS
+ .adapt()
+ .adapt();
+ self.create_file(name, data, file_ops)
+ }
+
+ // While this function is safe, it is intentionally not public because it's a bit of a
+ // footgun.
+ //
+ // Unless you also extract the `entry` later and schedule it for `Drop` at the appropriate
+ // time, a `ScopedDir` with a `Dir` parent will never be deleted.
+ fn scoped_dir<'data>(&self, name: &CStr) -> ScopedDir<'data, 'static> {
+ #[cfg(CONFIG_DEBUG_FS)]
+ {
+ let parent_entry = match &self.0 {
+ None => return ScopedDir::empty(),
+ Some(entry) => entry.clone(),
+ };
+ ScopedDir {
+ entry: ManuallyDrop::new(Entry::dynamic_dir(name, Some(parent_entry))),
+ _phantom: PhantomData,
+ }
+ }
+ #[cfg(not(CONFIG_DEBUG_FS))]
+ ScopedDir::empty()
+ }
+
+ /// Creates a new scope, which is a directory associated with some data `T`.
+ ///
+ /// The created directory will be a subdirectory of `self`. The `init` closure is called to
+ /// populate the directory with files and subdirectories. These files can reference the data
+ /// stored in the scope.
+ ///
+ /// The entire directory tree created within the scope will be removed when the returned
+ /// `Scope` handle is dropped.
+ pub fn scope<'a, T: 'a, E: 'a, F>(
+ &'a self,
+ data: impl PinInit<T, E> + 'a,
+ name: &'a CStr,
+ init: F,
+ ) -> impl PinInit<Scope<T>, E> + 'a
+ where
+ F: for<'data, 'dir> FnOnce(&'data T, &'dir ScopedDir<'data, 'dir>) + 'a,
+ {
+ Scope::new(data, |data| {
+ let scoped = self.scoped_dir(name);
+ init(data, &scoped);
+ scoped.into_entry()
+ })
+ }
+}
+
+#[pin_data]
+/// Handle to a DebugFS scope, which ensures that attached `data` will outlive the DebugFS entry
+/// without moving.
+///
+/// This is internally used to back [`File`], and used in the API to represent the attachment
+/// of a directory lifetime to a data structure which may be jointly accessed by a number of
+/// different files.
+///
+/// When dropped, a `Scope` will remove all directories and files in the filesystem backed by the
+/// attached data structure prior to releasing the attached data.
+pub struct Scope<T> {
+ // This order is load-bearing for drops - `_entry` must be dropped before `data`.
+ #[cfg(CONFIG_DEBUG_FS)]
+ _entry: Entry<'static>,
+ #[pin]
+ data: T,
+ // Even if `T` is `Unpin`, we still can't allow it to be moved.
+ #[pin]
+ _pin: PhantomPinned,
+}
+
+#[pin_data]
+/// Handle to a DebugFS file, owning its backing data.
+///
+/// When dropped, the DebugFS file will be removed and the attached data will be dropped.
+pub struct File<T> {
+ #[pin]
+ scope: Scope<T>,
+}
+
+#[cfg(not(CONFIG_DEBUG_FS))]
+impl<'b, T: 'b> Scope<T> {
+ fn new<E: 'b, F>(data: impl PinInit<T, E> + 'b, init: F) -> impl PinInit<Self, E> + 'b
+ where
+ F: for<'a> FnOnce(&'a T) + 'b,
+ {
+ try_pin_init! {
+ Self {
+ data <- data,
+ _pin: PhantomPinned
+ } ? E
+ }
+ .pin_chain(|scope| {
+ init(&scope.data);
+ Ok(())
+ })
+ }
+}
+
+#[cfg(CONFIG_DEBUG_FS)]
+impl<'b, T: 'b> Scope<T> {
+ fn entry_mut(self: Pin<&mut Self>) -> &mut Entry<'static> {
+ // SAFETY: _entry is not structurally pinned.
+ unsafe { &mut Pin::into_inner_unchecked(self)._entry }
+ }
+
+ fn new<E: 'b, F>(data: impl PinInit<T, E> + 'b, init: F) -> impl PinInit<Self, E> + 'b
+ where
+ F: for<'a> FnOnce(&'a T) -> Entry<'static> + 'b,
+ {
+ try_pin_init! {
+ Self {
+ _entry: Entry::empty(),
+ data <- data,
+ _pin: PhantomPinned
+ } ? E
+ }
+ .pin_chain(|scope| {
+ *scope.entry_mut() = init(&scope.data);
+ Ok(())
+ })
+ }
+}
+
+impl<'a, T: 'a> Scope<T> {
+ /// Creates a new scope, which is a directory at the root of the debugfs filesystem,
+ /// associated with some data `T`.
+ ///
+ /// The `init` closure is called to populate the directory with files and subdirectories. These
+ /// files can reference the data stored in the scope.
+ ///
+ /// The entire directory tree created within the scope will be removed when the returned
+ /// `Scope` handle is dropped.
+ pub fn dir<E: 'a, F>(
+ data: impl PinInit<T, E> + 'a,
+ name: &'a CStr,
+ init: F,
+ ) -> impl PinInit<Self, E> + 'a
+ where
+ F: for<'data, 'dir> FnOnce(&'data T, &'dir ScopedDir<'data, 'dir>) + 'a,
+ {
+ Scope::new(data, |data| {
+ let scoped = ScopedDir::new(name);
+ init(data, &scoped);
+ scoped.into_entry()
+ })
+ }
+}
+
+impl<T> Deref for Scope<T> {
+ type Target = T;
+ fn deref(&self) -> &T {
+ &self.data
+ }
+}
+
+impl<T> Deref for File<T> {
+ type Target = T;
+ fn deref(&self) -> &T {
+ &self.scope
+ }
+}
+
+/// A handle to a directory which will live at most `'dir`, accessing data that will live for at
+/// least `'data`.
+///
+/// Dropping a ScopedDir will not delete or clean it up, this is expected to occur through dropping
+/// the `Scope` that created it.
+pub struct ScopedDir<'data, 'dir> {
+ #[cfg(CONFIG_DEBUG_FS)]
+ entry: ManuallyDrop<Entry<'dir>>,
+ _phantom: PhantomData<fn(&'data ()) -> &'dir ()>,
+}
+
+impl<'data, 'dir> ScopedDir<'data, 'dir> {
+ /// Creates a subdirectory inside this `ScopedDir`.
+ ///
+ /// The returned directory handle cannot outlive this one.
+ pub fn dir<'dir2>(&'dir2 self, name: &CStr) -> ScopedDir<'data, 'dir2> {
+ #[cfg(not(CONFIG_DEBUG_FS))]
+ let _ = name;
+ ScopedDir {
+ #[cfg(CONFIG_DEBUG_FS)]
+ entry: ManuallyDrop::new(Entry::dir(name, Some(&*self.entry))),
+ _phantom: PhantomData,
+ }
+ }
+
+ fn create_file<T: Sync>(&self, name: &CStr, data: &'data T, vtable: &'static FileOps<T>) {
+ #[cfg(CONFIG_DEBUG_FS)]
+ core::mem::forget(Entry::file(name, &self.entry, data, vtable));
+ }
+
+ /// Creates a read-only file in this directory.
+ ///
+ /// The file's contents are produced by invoking [`Writer::write`].
+ ///
+ /// This function does not produce an owning handle to the file. The created
+ /// file is removed when the [`Scope`] that this directory belongs
+ /// to is dropped.
+ pub fn read_only_file<T: Writer + Send + Sync + 'static>(&self, name: &CStr, data: &'data T) {
+ self.create_file(name, data, &T::FILE_OPS)
+ }
+
+ /// Creates a read-only file in this directory, with contents from a callback.
+ ///
+ /// The file contents are generated by calling `f` with `data`.
+ ///
+ ///
+ /// `f` must be a function item or a non-capturing closure.
+ /// This is statically asserted and not a safety requirement.
+ ///
+ /// This function does not produce an owning handle to the file. The created
+ /// file is removed when the [`Scope`] that this directory belongs
+ /// to is dropped.
+ pub fn read_callback_file<T, F>(&self, name: &CStr, data: &'data T, _f: &'static F)
+ where
+ T: Send + Sync + 'static,
+ F: Fn(&T, &mut fmt::Formatter<'_>) -> fmt::Result + Send + Sync,
+ {
+ let vtable = <FormatAdapter<T, F> as ReadFile<_>>::FILE_OPS.adapt();
+ self.create_file(name, data, vtable)
+ }
+
+ /// Creates a read-write file in this directory.
+ ///
+ /// Reading the file uses the [`Writer`] implementation on `data`. Writing to the file uses
+ /// the [`Reader`] implementation on `data`.
+ ///
+ /// This function does not produce an owning handle to the file. The created
+ /// file is removed when the [`Scope`] that this directory belongs
+ /// to is dropped.
+ pub fn read_write_file<T: Writer + Reader + Send + Sync + 'static>(
+ &self,
+ name: &CStr,
+ data: &'data T,
+ ) {
+ let vtable = &<T as ReadWriteFile<_>>::FILE_OPS;
+ self.create_file(name, data, vtable)
+ }
+
+ /// Creates a read-write file in this directory, with logic from callbacks.
+ ///
+ /// Reading from the file is handled by `f`. Writing to the file is handled by `w`.
+ ///
+ /// `f` and `w` must be function items or non-capturing closures.
+ /// This is statically asserted and not a safety requirement.
+ ///
+ /// This function does not produce an owning handle to the file. The created
+ /// file is removed when the [`Scope`] that this directory belongs
+ /// to is dropped.
+ pub fn read_write_callback_file<T, F, W>(
+ &self,
+ name: &CStr,
+ data: &'data T,
+ _f: &'static F,
+ _w: &'static W,
+ ) where
+ T: Send + Sync + 'static,
+ F: Fn(&T, &mut fmt::Formatter<'_>) -> fmt::Result + Send + Sync,
+ W: Fn(&T, &mut UserSliceReader) -> Result + Send + Sync,
+ {
+ let vtable = <WritableAdapter<FormatAdapter<T, F>, W> as ReadWriteFile<_>>::FILE_OPS
+ .adapt()
+ .adapt();
+ self.create_file(name, data, vtable)
+ }
+
+ /// Creates a write-only file in this directory.
+ ///
+ /// Writing to the file uses the [`Reader`] implementation on `data`.
+ ///
+ /// This function does not produce an owning handle to the file. The created
+ /// file is removed when the [`Scope`] that this directory belongs
+ /// to is dropped.
+ pub fn write_only_file<T: Reader + Send + Sync + 'static>(&self, name: &CStr, data: &'data T) {
+ let vtable = &<T as WriteFile<_>>::FILE_OPS;
+ self.create_file(name, data, vtable)
+ }
+
+ /// Creates a write-only file in this directory, with write logic from a callback.
+ ///
+ /// Writing to the file is handled by `w`.
+ ///
+ /// `w` must be a function item or a non-capturing closure.
+ /// This is statically asserted and not a safety requirement.
+ ///
+ /// This function does not produce an owning handle to the file. The created
+ /// file is removed when the [`Scope`] that this directory belongs
+ /// to is dropped.
+ pub fn write_only_callback_file<T, W>(&self, name: &CStr, data: &'data T, _w: &'static W)
+ where
+ T: Send + Sync + 'static,
+ W: Fn(&T, &mut UserSliceReader) -> Result + Send + Sync,
+ {
+ let vtable = &<WritableAdapter<NoWriter<T>, W> as WriteFile<_>>::FILE_OPS
+ .adapt()
+ .adapt();
+ self.create_file(name, data, vtable)
+ }
+
+ fn empty() -> Self {
+ ScopedDir {
+ #[cfg(CONFIG_DEBUG_FS)]
+ entry: ManuallyDrop::new(Entry::empty()),
+ _phantom: PhantomData,
+ }
+ }
+ #[cfg(CONFIG_DEBUG_FS)]
+ fn into_entry(self) -> Entry<'dir> {
+ ManuallyDrop::into_inner(self.entry)
+ }
+ #[cfg(not(CONFIG_DEBUG_FS))]
+ fn into_entry(self) {}
+}
+
+impl<'data> ScopedDir<'data, 'static> {
+ // This is safe, but intentionally not exported due to footgun status. A ScopedDir with no
+ // parent will never be released by default, and needs to have its entry extracted and used
+ // somewhere.
+ fn new(name: &CStr) -> ScopedDir<'data, 'static> {
+ ScopedDir {
+ #[cfg(CONFIG_DEBUG_FS)]
+ entry: ManuallyDrop::new(Entry::dir(name, None)),
+ _phantom: PhantomData,
+ }
+ }
+}
diff --git a/rust/kernel/debugfs/callback_adapters.rs b/rust/kernel/debugfs/callback_adapters.rs
new file mode 100644
index 000000000000..6c024230f676
--- /dev/null
+++ b/rust/kernel/debugfs/callback_adapters.rs
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Google LLC.
+
+//! Adapters which allow the user to supply a write or read implementation as a value rather
+//! than a trait implementation. If provided, it will override the trait implementation.
+
+use super::{Reader, Writer};
+use crate::prelude::*;
+use crate::uaccess::UserSliceReader;
+use core::fmt;
+use core::fmt::Formatter;
+use core::marker::PhantomData;
+use core::ops::Deref;
+
+/// # Safety
+///
+/// To implement this trait, it must be safe to cast a `&Self` to a `&Inner`.
+/// It is intended for use in unstacking adapters out of `FileOps` backings.
+pub(crate) unsafe trait Adapter {
+ type Inner;
+}
+
+/// Adapter to implement `Reader` via a callback with the same representation as `T`.
+///
+/// * Layer it on top of `WriterAdapter` if you want to add a custom callback for `write`.
+/// * Layer it on top of `NoWriter` to pass through any support present on the underlying type.
+///
+/// # Invariants
+///
+/// If an instance for `WritableAdapter<_, W>` is constructed, `W` is inhabited.
+#[repr(transparent)]
+pub(crate) struct WritableAdapter<D, W> {
+ inner: D,
+ _writer: PhantomData<W>,
+}
+
+// SAFETY: Stripping off the adapter only removes constraints
+unsafe impl<D, W> Adapter for WritableAdapter<D, W> {
+ type Inner = D;
+}
+
+impl<D: Writer, W> Writer for WritableAdapter<D, W> {
+ fn write(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.inner.write(fmt)
+ }
+}
+
+impl<D: Deref, W> Reader for WritableAdapter<D, W>
+where
+ W: Fn(&D::Target, &mut UserSliceReader) -> Result + Send + Sync + 'static,
+{
+ fn read_from_slice(&self, reader: &mut UserSliceReader) -> Result {
+ // SAFETY: WritableAdapter<_, W> can only be constructed if W is inhabited
+ let w: &W = unsafe { materialize_zst() };
+ w(self.inner.deref(), reader)
+ }
+}
+
+/// Adapter to implement `Writer` via a callback with the same representation as `T`.
+///
+/// # Invariants
+///
+/// If an instance for `FormatAdapter<_, F>` is constructed, `F` is inhabited.
+#[repr(transparent)]
+pub(crate) struct FormatAdapter<D, F> {
+ inner: D,
+ _formatter: PhantomData<F>,
+}
+
+impl<D, F> Deref for FormatAdapter<D, F> {
+ type Target = D;
+ fn deref(&self) -> &D {
+ &self.inner
+ }
+}
+
+impl<D, F> Writer for FormatAdapter<D, F>
+where
+ F: Fn(&D, &mut Formatter<'_>) -> fmt::Result + 'static,
+{
+ fn write(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
+ // SAFETY: FormatAdapter<_, F> can only be constructed if F is inhabited
+ let f: &F = unsafe { materialize_zst() };
+ f(&self.inner, fmt)
+ }
+}
+
+// SAFETY: Stripping off the adapter only removes constraints
+unsafe impl<D, F> Adapter for FormatAdapter<D, F> {
+ type Inner = D;
+}
+
+#[repr(transparent)]
+pub(crate) struct NoWriter<D> {
+ inner: D,
+}
+
+// SAFETY: Stripping off the adapter only removes constraints
+unsafe impl<D> Adapter for NoWriter<D> {
+ type Inner = D;
+}
+
+impl<D> Deref for NoWriter<D> {
+ type Target = D;
+ fn deref(&self) -> &D {
+ &self.inner
+ }
+}
+
+/// For types with a unique value, produce a static reference to it.
+///
+/// # Safety
+///
+/// The caller asserts that F is inhabited
+unsafe fn materialize_zst<F>() -> &'static F {
+ const { assert!(core::mem::size_of::<F>() == 0) };
+ let zst_dangle: core::ptr::NonNull<F> = core::ptr::NonNull::dangling();
+ // SAFETY: While the pointer is dangling, it is a dangling pointer to a ZST, based on the
+ // assertion above. The type is also inhabited, by the caller's assertion. This means
+ // we can materialize it.
+ unsafe { zst_dangle.as_ref() }
+}
diff --git a/rust/kernel/debugfs/entry.rs b/rust/kernel/debugfs/entry.rs
new file mode 100644
index 000000000000..f99402cd3ba0
--- /dev/null
+++ b/rust/kernel/debugfs/entry.rs
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Google LLC.
+
+use crate::debugfs::file_ops::FileOps;
+use crate::ffi::c_void;
+use crate::str::CStr;
+use crate::sync::Arc;
+use core::marker::PhantomData;
+
+/// Owning handle to a DebugFS entry.
+///
+/// # Invariants
+///
+/// The wrapped pointer will always be `NULL`, an error, or an owned DebugFS `dentry`.
+pub(crate) struct Entry<'a> {
+ entry: *mut bindings::dentry,
+ // If we were created with an owning parent, this is the keep-alive
+ _parent: Option<Arc<Entry<'static>>>,
+ // If we were created with a non-owning parent, this prevents us from outliving it
+ _phantom: PhantomData<&'a ()>,
+}
+
+// SAFETY: [`Entry`] is just a `dentry` under the hood, which the API promises can be transferred
+// between threads.
+unsafe impl Send for Entry<'_> {}
+
+// SAFETY: All the C functions we call on the `dentry` pointer are threadsafe.
+unsafe impl Sync for Entry<'_> {}
+
+impl Entry<'static> {
+ pub(crate) fn dynamic_dir(name: &CStr, parent: Option<Arc<Self>>) -> Self {
+ let parent_ptr = match &parent {
+ Some(entry) => entry.as_ptr(),
+ None => core::ptr::null_mut(),
+ };
+ // SAFETY: The invariants of this function's arguments ensure the safety of this call.
+ // * `name` is a valid C string by the invariants of `&CStr`.
+ // * `parent_ptr` is either `NULL` (if `parent` is `None`), or a pointer to a valid
+ // `dentry` by our invariant. `debugfs_create_dir` handles `NULL` pointers correctly.
+ let entry = unsafe { bindings::debugfs_create_dir(name.as_char_ptr(), parent_ptr) };
+
+ Entry {
+ entry,
+ _parent: parent,
+ _phantom: PhantomData,
+ }
+ }
+
+ /// # Safety
+ ///
+ /// * `data` must outlive the returned `Entry`.
+ pub(crate) unsafe fn dynamic_file<T>(
+ name: &CStr,
+ parent: Arc<Self>,
+ data: &T,
+ file_ops: &'static FileOps<T>,
+ ) -> Self {
+ // SAFETY: The invariants of this function's arguments ensure the safety of this call.
+ // * `name` is a valid C string by the invariants of `&CStr`.
+ // * `parent.as_ptr()` is a pointer to a valid `dentry` by invariant.
+ // * The caller guarantees that `data` will outlive the returned `Entry`.
+ // * The guarantees on `FileOps` assert the vtable will be compatible with the data we have
+ // provided.
+ let entry = unsafe {
+ bindings::debugfs_create_file_full(
+ name.as_char_ptr(),
+ file_ops.mode(),
+ parent.as_ptr(),
+ core::ptr::from_ref(data) as *mut c_void,
+ core::ptr::null(),
+ &**file_ops,
+ )
+ };
+
+ Entry {
+ entry,
+ _parent: Some(parent),
+ _phantom: PhantomData,
+ }
+ }
+}
+
+impl<'a> Entry<'a> {
+ pub(crate) fn dir(name: &CStr, parent: Option<&'a Entry<'_>>) -> Self {
+ let parent_ptr = match &parent {
+ Some(entry) => entry.as_ptr(),
+ None => core::ptr::null_mut(),
+ };
+ // SAFETY: The invariants of this function's arguments ensure the safety of this call.
+ // * `name` is a valid C string by the invariants of `&CStr`.
+ // * `parent_ptr` is either `NULL` (if `parent` is `None`), or a pointer to a valid
+ // `dentry` (because `parent` is a valid reference to an `Entry`). The lifetime `'a`
+ // ensures that the parent outlives this entry.
+ let entry = unsafe { bindings::debugfs_create_dir(name.as_char_ptr(), parent_ptr) };
+
+ Entry {
+ entry,
+ _parent: None,
+ _phantom: PhantomData,
+ }
+ }
+
+ pub(crate) fn file<T>(
+ name: &CStr,
+ parent: &'a Entry<'_>,
+ data: &'a T,
+ file_ops: &FileOps<T>,
+ ) -> Self {
+ // SAFETY: The invariants of this function's arguments ensure the safety of this call.
+ // * `name` is a valid C string by the invariants of `&CStr`.
+ // * `parent.as_ptr()` is a pointer to a valid `dentry` because we have `&'a Entry`.
+ // * `data` is a valid pointer to `T` for lifetime `'a`.
+ // * The returned `Entry` has lifetime `'a`, so it cannot outlive `parent` or `data`.
+ // * The caller guarantees that `vtable` is compatible with `data`.
+ // * The guarantees on `FileOps` assert the vtable will be compatible with the data we have
+ // provided.
+ let entry = unsafe {
+ bindings::debugfs_create_file_full(
+ name.as_char_ptr(),
+ file_ops.mode(),
+ parent.as_ptr(),
+ core::ptr::from_ref(data) as *mut c_void,
+ core::ptr::null(),
+ &**file_ops,
+ )
+ };
+
+ Entry {
+ entry,
+ _parent: None,
+ _phantom: PhantomData,
+ }
+ }
+}
+
+impl Entry<'_> {
+ /// Constructs a placeholder DebugFS [`Entry`].
+ pub(crate) fn empty() -> Self {
+ Self {
+ entry: core::ptr::null_mut(),
+ _parent: None,
+ _phantom: PhantomData,
+ }
+ }
+
+ /// Returns the pointer representation of the DebugFS directory.
+ ///
+ /// # Guarantees
+ ///
+ /// Due to the type invariant, the value returned from this function will always be an error
+ /// code, NULL, or a live DebugFS directory. If it is live, it will remain live at least as
+ /// long as this entry lives.
+ pub(crate) fn as_ptr(&self) -> *mut bindings::dentry {
+ self.entry
+ }
+}
+
+impl Drop for Entry<'_> {
+ fn drop(&mut self) {
+ // SAFETY: `debugfs_remove` can take `NULL`, error values, and legal DebugFS dentries.
+ // `as_ptr` guarantees that the pointer is of this form.
+ unsafe { bindings::debugfs_remove(self.as_ptr()) }
+ }
+}
diff --git a/rust/kernel/debugfs/file_ops.rs b/rust/kernel/debugfs/file_ops.rs
new file mode 100644
index 000000000000..50fead17b6f3
--- /dev/null
+++ b/rust/kernel/debugfs/file_ops.rs
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Google LLC.
+
+use super::{Reader, Writer};
+use crate::debugfs::callback_adapters::Adapter;
+use crate::prelude::*;
+use crate::seq_file::SeqFile;
+use crate::seq_print;
+use crate::uaccess::UserSlice;
+use core::fmt::{Display, Formatter, Result};
+use core::marker::PhantomData;
+
+#[cfg(CONFIG_DEBUG_FS)]
+use core::ops::Deref;
+
+/// # Invariant
+///
+/// `FileOps<T>` will always contain an `operations` which is safe to use for a file backed
+/// off an inode which has a pointer to a `T` in its private data that is safe to convert
+/// into a reference.
+pub(super) struct FileOps<T> {
+ #[cfg(CONFIG_DEBUG_FS)]
+ operations: bindings::file_operations,
+ #[cfg(CONFIG_DEBUG_FS)]
+ mode: u16,
+ _phantom: PhantomData<T>,
+}
+
+impl<T> FileOps<T> {
+ /// # Safety
+ ///
+ /// The caller asserts that the provided `operations` is safe to use for a file whose
+ /// inode has a pointer to `T` in its private data that is safe to convert into a reference.
+ const unsafe fn new(operations: bindings::file_operations, mode: u16) -> Self {
+ Self {
+ #[cfg(CONFIG_DEBUG_FS)]
+ operations,
+ #[cfg(CONFIG_DEBUG_FS)]
+ mode,
+ _phantom: PhantomData,
+ }
+ }
+
+ #[cfg(CONFIG_DEBUG_FS)]
+ pub(crate) const fn mode(&self) -> u16 {
+ self.mode
+ }
+}
+
+impl<T: Adapter> FileOps<T> {
+ pub(super) const fn adapt(&self) -> &FileOps<T::Inner> {
+ // SAFETY: `Adapter` asserts that `T` can be legally cast to `T::Inner`.
+ unsafe { core::mem::transmute(self) }
+ }
+}
+
+#[cfg(CONFIG_DEBUG_FS)]
+impl<T> Deref for FileOps<T> {
+ type Target = bindings::file_operations;
+
+ fn deref(&self) -> &Self::Target {
+ &self.operations
+ }
+}
+
+struct WriterAdapter<T>(T);
+
+impl<'a, T: Writer> Display for WriterAdapter<&'a T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+ self.0.write(f)
+ }
+}
+
+/// Implements `open` for `file_operations` via `single_open` to fill out a `seq_file`.
+///
+/// # Safety
+///
+/// * `inode`'s private pointer must point to a value of type `T` which will outlive the `inode`
+/// and will not have any unique references alias it during the call.
+/// * `file` must point to a live, not-yet-initialized file object.
+unsafe extern "C" fn writer_open<T: Writer + Sync>(
+ inode: *mut bindings::inode,
+ file: *mut bindings::file,
+) -> c_int {
+ // SAFETY: The caller ensures that `inode` is a valid pointer.
+ let data = unsafe { (*inode).i_private };
+ // SAFETY:
+ // * `file` is acceptable by caller precondition.
+ // * `print_act` will be called on a `seq_file` with private data set to the third argument,
+ // so we meet its safety requirements.
+ // * The `data` pointer passed in the third argument is a valid `T` pointer that outlives
+ // this call by caller preconditions.
+ unsafe { bindings::single_open(file, Some(writer_act::<T>), data) }
+}
+
+/// Prints private data stashed in a seq_file to that seq file.
+///
+/// # Safety
+///
+/// `seq` must point to a live `seq_file` whose private data is a valid pointer to a `T` which may
+/// not have any unique references alias it during the call.
+unsafe extern "C" fn writer_act<T: Writer + Sync>(
+ seq: *mut bindings::seq_file,
+ _: *mut c_void,
+) -> c_int {
+ // SAFETY: By caller precondition, this pointer is valid pointer to a `T`, and
+ // there are not and will not be any unique references until we are done.
+ let data = unsafe { &*((*seq).private.cast::<T>()) };
+ // SAFETY: By caller precondition, `seq_file` points to a live `seq_file`, so we can lift
+ // it.
+ let seq_file = unsafe { SeqFile::from_raw(seq) };
+ seq_print!(seq_file, "{}", WriterAdapter(data));
+ 0
+}
+
+// Work around lack of generic const items.
+pub(crate) trait ReadFile<T> {
+ const FILE_OPS: FileOps<T>;
+}
+
+impl<T: Writer + Sync> ReadFile<T> for T {
+ const FILE_OPS: FileOps<T> = {
+ let operations = bindings::file_operations {
+ read: Some(bindings::seq_read),
+ llseek: Some(bindings::seq_lseek),
+ release: Some(bindings::single_release),
+ open: Some(writer_open::<Self>),
+ // SAFETY: `file_operations` supports zeroes in all fields.
+ ..unsafe { core::mem::zeroed() }
+ };
+ // SAFETY: `operations` is all stock `seq_file` implementations except for `writer_open`.
+ // `open`'s only requirement beyond what is provided to all open functions is that the
+ // inode's data pointer must point to a `T` that will outlive it, which matches the
+ // `FileOps` requirements.
+ unsafe { FileOps::new(operations, 0o400) }
+ };
+}
+
+fn read<T: Reader + Sync>(data: &T, buf: *const c_char, count: usize) -> isize {
+ let mut reader = UserSlice::new(UserPtr::from_ptr(buf as *mut c_void), count).reader();
+
+ if let Err(e) = data.read_from_slice(&mut reader) {
+ return e.to_errno() as isize;
+ }
+
+ count as isize
+}
+
+/// # Safety
+///
+/// `file` must be a valid pointer to a `file` struct.
+/// The `private_data` of the file must contain a valid pointer to a `seq_file` whose
+/// `private` data in turn points to a `T` that implements `Reader`.
+/// `buf` must be a valid user-space buffer.
+pub(crate) unsafe extern "C" fn write<T: Reader + Sync>(
+ file: *mut bindings::file,
+ buf: *const c_char,
+ count: usize,
+ _ppos: *mut bindings::loff_t,
+) -> isize {
+ // SAFETY: The file was opened with `single_open`, which sets `private_data` to a `seq_file`.
+ let seq = unsafe { &mut *((*file).private_data.cast::<bindings::seq_file>()) };
+ // SAFETY: By caller precondition, this pointer is live and points to a value of type `T`.
+ let data = unsafe { &*(seq.private as *const T) };
+ read(data, buf, count)
+}
+
+// A trait to get the file operations for a type.
+pub(crate) trait ReadWriteFile<T> {
+ const FILE_OPS: FileOps<T>;
+}
+
+impl<T: Writer + Reader + Sync> ReadWriteFile<T> for T {
+ const FILE_OPS: FileOps<T> = {
+ let operations = bindings::file_operations {
+ open: Some(writer_open::<T>),
+ read: Some(bindings::seq_read),
+ write: Some(write::<T>),
+ llseek: Some(bindings::seq_lseek),
+ release: Some(bindings::single_release),
+ // SAFETY: `file_operations` supports zeroes in all fields.
+ ..unsafe { core::mem::zeroed() }
+ };
+ // SAFETY: `operations` is all stock `seq_file` implementations except for `writer_open`
+ // and `write`.
+ // `writer_open`'s only requirement beyond what is provided to all open functions is that
+ // the inode's data pointer must point to a `T` that will outlive it, which matches the
+ // `FileOps` requirements.
+ // `write` only requires that the file's private data pointer points to `seq_file`
+ // which points to a `T` that will outlive it, which matches what `writer_open`
+ // provides.
+ unsafe { FileOps::new(operations, 0o600) }
+ };
+}
+
+/// # Safety
+///
+/// `inode` must be a valid pointer to an `inode` struct.
+/// `file` must be a valid pointer to a `file` struct.
+unsafe extern "C" fn write_only_open(
+ inode: *mut bindings::inode,
+ file: *mut bindings::file,
+) -> c_int {
+ // SAFETY: The caller ensures that `inode` and `file` are valid pointers.
+ unsafe { (*file).private_data = (*inode).i_private };
+ 0
+}
+
+/// # Safety
+///
+/// * `file` must be a valid pointer to a `file` struct.
+/// * The `private_data` of the file must contain a valid pointer to a `T` that implements
+/// `Reader`.
+/// * `buf` must be a valid user-space buffer.
+pub(crate) unsafe extern "C" fn write_only_write<T: Reader + Sync>(
+ file: *mut bindings::file,
+ buf: *const c_char,
+ count: usize,
+ _ppos: *mut bindings::loff_t,
+) -> isize {
+ // SAFETY: The caller ensures that `file` is a valid pointer and that `private_data` holds a
+ // valid pointer to `T`.
+ let data = unsafe { &*((*file).private_data as *const T) };
+ read(data, buf, count)
+}
+
+pub(crate) trait WriteFile<T> {
+ const FILE_OPS: FileOps<T>;
+}
+
+impl<T: Reader + Sync> WriteFile<T> for T {
+ const FILE_OPS: FileOps<T> = {
+ let operations = bindings::file_operations {
+ open: Some(write_only_open),
+ write: Some(write_only_write::<T>),
+ llseek: Some(bindings::noop_llseek),
+ // SAFETY: `file_operations` supports zeroes in all fields.
+ ..unsafe { core::mem::zeroed() }
+ };
+ // SAFETY:
+ // * `write_only_open` populates the file private data with the inode private data
+ // * `write_only_write`'s only requirement is that the private data of the file point to
+ // a `T` and be legal to convert to a shared reference, which `write_only_open`
+ // satisfies.
+ unsafe { FileOps::new(operations, 0o200) }
+ };
+}
diff --git a/rust/kernel/debugfs/traits.rs b/rust/kernel/debugfs/traits.rs
new file mode 100644
index 000000000000..ab009eb254b3
--- /dev/null
+++ b/rust/kernel/debugfs/traits.rs
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Google LLC.
+
+//! Traits for rendering or updating values exported to DebugFS.
+
+use crate::prelude::*;
+use crate::sync::Mutex;
+use crate::uaccess::UserSliceReader;
+use core::fmt::{self, Debug, Formatter};
+use core::str::FromStr;
+use core::sync::atomic::{
+ AtomicI16, AtomicI32, AtomicI64, AtomicI8, AtomicIsize, AtomicU16, AtomicU32, AtomicU64,
+ AtomicU8, AtomicUsize, Ordering,
+};
+
+/// A trait for types that can be written into a string.
+///
+/// This works very similarly to `Debug`, and is automatically implemented if `Debug` is
+/// implemented for a type. It is also implemented for any writable type inside a `Mutex`.
+///
+/// The derived implementation of `Debug` [may
+/// change](https://doc.rust-lang.org/std/fmt/trait.Debug.html#stability)
+/// between Rust versions, so if stability is key for your use case, please implement `Writer`
+/// explicitly instead.
+pub trait Writer {
+ /// Formats the value using the given formatter.
+ fn write(&self, f: &mut Formatter<'_>) -> fmt::Result;
+}
+
+impl<T: Writer> Writer for Mutex<T> {
+ fn write(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ self.lock().write(f)
+ }
+}
+
+impl<T: Debug> Writer for T {
+ fn write(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ writeln!(f, "{self:?}")
+ }
+}
+
+/// A trait for types that can be updated from a user slice.
+///
+/// This works similarly to `FromStr`, but operates on a `UserSliceReader` rather than a &str.
+///
+/// It is automatically implemented for all atomic integers, or any type that implements `FromStr`
+/// wrapped in a `Mutex`.
+pub trait Reader {
+ /// Updates the value from the given user slice.
+ fn read_from_slice(&self, reader: &mut UserSliceReader) -> Result;
+}
+
+impl<T: FromStr> Reader for Mutex<T> {
+ fn read_from_slice(&self, reader: &mut UserSliceReader) -> Result {
+ let mut buf = [0u8; 128];
+ if reader.len() > buf.len() {
+ return Err(EINVAL);
+ }
+ let n = reader.len();
+ reader.read_slice(&mut buf[..n])?;
+
+ let s = core::str::from_utf8(&buf[..n]).map_err(|_| EINVAL)?;
+ let val = s.trim().parse::<T>().map_err(|_| EINVAL)?;
+ *self.lock() = val;
+ Ok(())
+ }
+}
+
+macro_rules! impl_reader_for_atomic {
+ ($(($atomic_type:ty, $int_type:ty)),*) => {
+ $(
+ impl Reader for $atomic_type {
+ fn read_from_slice(&self, reader: &mut UserSliceReader) -> Result {
+ let mut buf = [0u8; 21]; // Enough for a 64-bit number.
+ if reader.len() > buf.len() {
+ return Err(EINVAL);
+ }
+ let n = reader.len();
+ reader.read_slice(&mut buf[..n])?;
+
+ let s = core::str::from_utf8(&buf[..n]).map_err(|_| EINVAL)?;
+ let val = s.trim().parse::<$int_type>().map_err(|_| EINVAL)?;
+ self.store(val, Ordering::Relaxed);
+ Ok(())
+ }
+ }
+ )*
+ };
+}
+
+impl_reader_for_atomic!(
+ (AtomicI16, i16),
+ (AtomicI32, i32),
+ (AtomicI64, i64),
+ (AtomicI8, i8),
+ (AtomicIsize, isize),
+ (AtomicU16, u16),
+ (AtomicU32, u32),
+ (AtomicU64, u64),
+ (AtomicU8, u8),
+ (AtomicUsize, usize)
+);
diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs
index a1db49eb159a..1321e6f0b53c 100644
--- a/rust/kernel/device.rs
+++ b/rust/kernel/device.rs
@@ -5,10 +5,11 @@
//! C header: [`include/linux/device.h`](srctree/include/linux/device.h)
use crate::{
- bindings,
- types::{ARef, ForeignOwnable, Opaque},
+ bindings, fmt,
+ sync::aref::ARef,
+ types::{ForeignOwnable, Opaque},
};
-use core::{fmt, marker::PhantomData, ptr};
+use core::{marker::PhantomData, ptr};
#[cfg(CONFIG_PRINTK)]
use crate::c_str;
@@ -406,7 +407,7 @@ kernel::impl_device_context_deref!(unsafe { Device });
kernel::impl_device_context_into_aref!(Device);
// SAFETY: Instances of `Device` are always reference-counted.
-unsafe impl crate::types::AlwaysRefCounted for Device {
+unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
unsafe { bindings::get_device(self.as_raw()) };
@@ -572,7 +573,7 @@ macro_rules! impl_device_context_deref {
#[macro_export]
macro_rules! __impl_device_context_into_aref {
($src:ty, $device:tt) => {
- impl ::core::convert::From<&$device<$src>> for $crate::types::ARef<$device> {
+ impl ::core::convert::From<&$device<$src>> for $crate::sync::aref::ARef<$device> {
fn from(dev: &$device<$src>) -> Self {
(&**dev).into()
}
@@ -596,7 +597,7 @@ macro_rules! impl_device_context_into_aref {
macro_rules! dev_printk {
($method:ident, $dev:expr, $($f:tt)*) => {
{
- ($dev).$method(::core::format_args!($($f)*));
+ ($dev).$method($crate::prelude::fmt!($($f)*));
}
}
}
diff --git a/rust/kernel/device/property.rs b/rust/kernel/device/property.rs
index 49ee12a906db..3a332a8c53a9 100644
--- a/rust/kernel/device/property.rs
+++ b/rust/kernel/device/property.rs
@@ -11,6 +11,7 @@ use crate::{
alloc::KVec,
bindings,
error::{to_result, Result},
+ fmt,
prelude::*,
str::{CStr, CString},
types::{ARef, Opaque},
@@ -68,16 +69,16 @@ impl FwNode {
unsafe { bindings::is_of_node(self.as_raw()) }
}
- /// Returns an object that implements [`Display`](core::fmt::Display) for
+ /// Returns an object that implements [`Display`](fmt::Display) for
/// printing the name of a node.
///
/// This is an alternative to the default `Display` implementation, which
/// prints the full path.
- pub fn display_name(&self) -> impl core::fmt::Display + '_ {
+ pub fn display_name(&self) -> impl fmt::Display + '_ {
struct FwNodeDisplayName<'a>(&'a FwNode);
- impl core::fmt::Display for FwNodeDisplayName<'_> {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ impl fmt::Display for FwNodeDisplayName<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// SAFETY: `self` is valid by its type invariant.
let name = unsafe { bindings::fwnode_get_name(self.0.as_raw()) };
if name.is_null() {
@@ -87,7 +88,7 @@ impl FwNode {
// - `fwnode_get_name` returns null or a valid C string.
// - `name` was checked to be non-null.
let name = unsafe { CStr::from_char_ptr(name) };
- write!(f, "{name}")
+ fmt::Display::fmt(name, f)
}
}
@@ -351,8 +352,8 @@ impl FwNodeReferenceArgs {
}
}
-impl core::fmt::Debug for FwNodeReferenceArgs {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+impl fmt::Debug for FwNodeReferenceArgs {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self.as_slice())
}
}
@@ -377,8 +378,8 @@ enum Node<'a> {
Owned(ARef<FwNode>),
}
-impl core::fmt::Display for FwNode {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+impl fmt::Display for FwNode {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// The logic here is the same as the one in lib/vsprintf.c
// (fwnode_full_name_string).
@@ -413,9 +414,9 @@ impl core::fmt::Display for FwNode {
// SAFETY: `fwnode_get_name_prefix` returns null or a
// valid C string.
let prefix = unsafe { CStr::from_char_ptr(prefix) };
- write!(f, "{prefix}")?;
+ fmt::Display::fmt(prefix, f)?;
}
- write!(f, "{}", fwnode.display_name())?;
+ fmt::Display::fmt(&fwnode.display_name(), f)?;
}
Ok(())
diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs
index d04e3fcebafb..132545962218 100644
--- a/rust/kernel/devres.rs
+++ b/rust/kernel/devres.rs
@@ -13,8 +13,8 @@ use crate::{
ffi::c_void,
prelude::*,
revocable::{Revocable, RevocableGuard},
- sync::{rcu, Completion},
- types::{ARef, ForeignOwnable, Opaque, ScopeGuard},
+ sync::{aref::ARef, rcu, Completion},
+ types::{ForeignOwnable, Opaque, ScopeGuard},
};
use pin_init::Wrapper;
diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs
index 2bc8ab51ec28..68fe67624424 100644
--- a/rust/kernel/dma.rs
+++ b/rust/kernel/dma.rs
@@ -9,8 +9,8 @@ use crate::{
device::{Bound, Core},
error::{to_result, Result},
prelude::*,
+ sync::aref::ARef,
transmute::{AsBytes, FromBytes},
- types::ARef,
};
/// Trait to be implemented by DMA capable bus devices.
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
index d29c477e89a8..0956ba0f64de 100644
--- a/rust/kernel/drm/device.rs
+++ b/rust/kernel/drm/device.rs
@@ -2,7 +2,7 @@
//! DRM device.
//!
-//! C header: [`include/linux/drm/drm_device.h`](srctree/include/linux/drm/drm_device.h)
+//! C header: [`include/drm/drm_device.h`](srctree/include/drm/drm_device.h)
use crate::{
alloc::allocator::Kmalloc,
@@ -82,8 +82,8 @@ impl<T: drm::Driver> Device<T> {
major: T::INFO.major,
minor: T::INFO.minor,
patchlevel: T::INFO.patchlevel,
- name: T::INFO.name.as_char_ptr().cast_mut(),
- desc: T::INFO.desc.as_char_ptr().cast_mut(),
+ name: crate::str::as_char_ptr_in_const_context(T::INFO.name).cast_mut(),
+ desc: crate::str::as_char_ptr_in_const_context(T::INFO.desc).cast_mut(),
driver_features: drm::driver::FEAT_GEM,
ioctls: T::IOCTLS.as_ptr(),
diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs
index fe7e8d06961a..d2dad77274c4 100644
--- a/rust/kernel/drm/driver.rs
+++ b/rust/kernel/drm/driver.rs
@@ -2,7 +2,7 @@
//! DRM driver core.
//!
-//! C header: [`include/linux/drm/drm_drv.h`](srctree/include/linux/drm/drm_drv.h)
+//! C header: [`include/drm/drm_drv.h`](srctree/include/drm/drm_drv.h)
use crate::{
bindings, device, devres, drm,
diff --git a/rust/kernel/drm/file.rs b/rust/kernel/drm/file.rs
index e8789c9110d6..8c46f8d51951 100644
--- a/rust/kernel/drm/file.rs
+++ b/rust/kernel/drm/file.rs
@@ -2,7 +2,7 @@
//! DRM File objects.
//!
-//! C header: [`include/linux/drm/drm_file.h`](srctree/include/linux/drm/drm_file.h)
+//! C header: [`include/drm/drm_file.h`](srctree/include/drm/drm_file.h)
use crate::{bindings, drm, error::Result, prelude::*, types::Opaque};
use core::marker::PhantomData;
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs
index b71821cfb5ea..b9f3248876ba 100644
--- a/rust/kernel/drm/gem/mod.rs
+++ b/rust/kernel/drm/gem/mod.rs
@@ -2,7 +2,7 @@
//! DRM GEM API
//!
-//! C header: [`include/linux/drm/drm_gem.h`](srctree/include/linux/drm/drm_gem.h)
+//! C header: [`include/drm/drm_gem.h`](srctree/include/drm/drm_gem.h)
use crate::{
alloc::flags::*,
diff --git a/rust/kernel/drm/ioctl.rs b/rust/kernel/drm/ioctl.rs
index fdec01c37168..8431cdcd3ae0 100644
--- a/rust/kernel/drm/ioctl.rs
+++ b/rust/kernel/drm/ioctl.rs
@@ -2,7 +2,7 @@
//! DRM IOCTL definitions.
//!
-//! C header: [`include/linux/drm/drm_ioctl.h`](srctree/include/linux/drm/drm_ioctl.h)
+//! C header: [`include/drm/drm_ioctl.h`](srctree/include/drm/drm_ioctl.h)
use crate::ioctl;
diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
index a41de293dcd1..1c0e0e241daa 100644
--- a/rust/kernel/error.rs
+++ b/rust/kernel/error.rs
@@ -2,7 +2,9 @@
//! Kernel errors.
//!
-//! C header: [`include/uapi/asm-generic/errno-base.h`](srctree/include/uapi/asm-generic/errno-base.h)
+//! C header: [`include/uapi/asm-generic/errno-base.h`](srctree/include/uapi/asm-generic/errno-base.h)\
+//! C header: [`include/uapi/asm-generic/errno.h`](srctree/include/uapi/asm-generic/errno.h)\
+//! C header: [`include/linux/errno.h`](srctree/include/linux/errno.h)
use crate::{
alloc::{layout::LayoutError, AllocError},
@@ -101,8 +103,23 @@ pub struct Error(NonZeroI32);
impl Error {
/// Creates an [`Error`] from a kernel error code.
///
- /// It is a bug to pass an out-of-range `errno`. `EINVAL` would
- /// be returned in such a case.
+ /// `errno` must be within error code range (i.e. `>= -MAX_ERRNO && < 0`).
+ ///
+ /// It is a bug to pass an out-of-range `errno`. [`code::EINVAL`] is returned in such a case.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// assert_eq!(Error::from_errno(-1), EPERM);
+ /// assert_eq!(Error::from_errno(-2), ENOENT);
+ /// ```
+ ///
+ /// The following calls are considered a bug:
+ ///
+ /// ```
+ /// assert_eq!(Error::from_errno(0), EINVAL);
+ /// assert_eq!(Error::from_errno(-1000000), EINVAL);
+ /// ```
pub fn from_errno(errno: crate::ffi::c_int) -> Error {
if let Some(error) = Self::try_from_errno(errno) {
error
@@ -158,7 +175,7 @@ impl Error {
}
/// Returns a string representing the error, if one exists.
- #[cfg(not(any(test, testlib)))]
+ #[cfg(not(testlib))]
pub fn name(&self) -> Option<&'static CStr> {
// SAFETY: Just an FFI call, there are no extra safety requirements.
let ptr = unsafe { bindings::errname(-self.0.get()) };
@@ -175,7 +192,7 @@ impl Error {
/// When `testlib` is configured, this always returns `None` to avoid the dependency on a
/// kernel function so that tests that use this (e.g., by calling [`Result::unwrap`]) can still
/// run in userspace.
- #[cfg(any(test, testlib))]
+ #[cfg(testlib)]
pub fn name(&self) -> Option<&'static CStr> {
None
}
@@ -375,8 +392,43 @@ impl From<core::convert::Infallible> for Error {
/// [Rust documentation]: https://doc.rust-lang.org/book/ch09-02-recoverable-errors-with-result.html
pub type Result<T = (), E = Error> = core::result::Result<T, E>;
-/// Converts an integer as returned by a C kernel function to an error if it's negative, and
-/// `Ok(())` otherwise.
+/// Converts an integer as returned by a C kernel function to a [`Result`].
+///
+/// If the integer is negative, an [`Err`] with an [`Error`] as given by [`Error::from_errno`] is
+/// returned. This means the integer must be `>= -MAX_ERRNO`.
+///
+/// Otherwise, it returns [`Ok`].
+///
+/// It is a bug to pass an out-of-range negative integer. `Err(EINVAL)` is returned in such a case.
+///
+/// # Examples
+///
+/// This function may be used to easily perform early returns with the [`?`] operator when working
+/// with C APIs within Rust abstractions:
+///
+/// ```
+/// # use kernel::error::to_result;
+/// # mod bindings {
+/// # #![expect(clippy::missing_safety_doc)]
+/// # use kernel::prelude::*;
+/// # pub(super) unsafe fn f1() -> c_int { 0 }
+/// # pub(super) unsafe fn f2() -> c_int { EINVAL.to_errno() }
+/// # }
+/// fn f() -> Result {
+/// // SAFETY: ...
+/// to_result(unsafe { bindings::f1() })?;
+///
+/// // SAFETY: ...
+/// to_result(unsafe { bindings::f2() })?;
+///
+/// // ...
+///
+/// Ok(())
+/// }
+/// # assert_eq!(f(), Err(EINVAL));
+/// ```
+///
+/// [`?`]: https://doc.rust-lang.org/reference/expressions/operator-expr.html#the-question-mark-operator
pub fn to_result(err: crate::ffi::c_int) -> Result {
if err < 0 {
Err(Error::from_errno(err))
diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs
index 1abab5b2f052..94e6bb88b903 100644
--- a/rust/kernel/firmware.rs
+++ b/rust/kernel/firmware.rs
@@ -291,7 +291,7 @@ impl<const N: usize> ModInfoBuilder<N> {
let module_name = this.module_name;
if !this.module_name.is_empty() {
- this = this.push_internal(module_name.as_bytes_with_nul());
+ this = this.push_internal(module_name.to_bytes_with_nul());
if N != 0 {
// Re-use the space taken by the NULL terminator and swap it with the '.' separator.
diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs
index 35fd5db35c46..cf06e73a6da0 100644
--- a/rust/kernel/fs/file.rs
+++ b/rust/kernel/fs/file.rs
@@ -10,8 +10,10 @@
use crate::{
bindings,
cred::Credential,
- error::{code::*, Error, Result},
- types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque},
+ error::{code::*, to_result, Error, Result},
+ fmt,
+ sync::aref::{ARef, AlwaysRefCounted},
+ types::{NotThreadSafe, Opaque},
};
use core::ptr;
@@ -398,9 +400,8 @@ impl FileDescriptorReservation {
pub fn get_unused_fd_flags(flags: u32) -> Result<Self> {
// SAFETY: FFI call, there are no safety requirements on `flags`.
let fd: i32 = unsafe { bindings::get_unused_fd_flags(flags) };
- if fd < 0 {
- return Err(Error::from_errno(fd));
- }
+ to_result(fd)?;
+
Ok(Self {
fd: fd as u32,
_not_send: NotThreadSafe,
@@ -460,8 +461,8 @@ impl From<BadFdError> for Error {
}
}
-impl core::fmt::Debug for BadFdError {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+impl fmt::Debug for BadFdError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.pad("EBADF")
}
}
diff --git a/rust/kernel/id_pool.rs b/rust/kernel/id_pool.rs
new file mode 100644
index 000000000000..a41a3404213c
--- /dev/null
+++ b/rust/kernel/id_pool.rs
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! Rust API for an ID pool backed by a [`BitmapVec`].
+
+use crate::alloc::{AllocError, Flags};
+use crate::bitmap::BitmapVec;
+
+const BITS_PER_LONG: usize = bindings::BITS_PER_LONG as usize;
+
+/// Represents a dynamic ID pool backed by a [`BitmapVec`].
+///
+/// Clients acquire and release IDs from unset bits in a bitmap.
+///
+/// The capacity of the ID pool may be adjusted by users as
+/// needed. The API supports the scenario where users need precise control
+/// over the time of allocation of a new backing bitmap, which may require
+/// release of spinlock.
+/// Due to concurrent updates, all operations are re-verified to determine
+/// if the grow or shrink is sill valid.
+///
+/// # Examples
+///
+/// Basic usage
+///
+/// ```
+/// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+/// use kernel::id_pool::IdPool;
+///
+/// let mut pool = IdPool::new(64, GFP_KERNEL)?;
+/// for i in 0..64 {
+/// assert_eq!(i, pool.acquire_next_id(i).ok_or(ENOSPC)?);
+/// }
+///
+/// pool.release_id(23);
+/// assert_eq!(23, pool.acquire_next_id(0).ok_or(ENOSPC)?);
+///
+/// assert_eq!(None, pool.acquire_next_id(0)); // time to realloc.
+/// let resizer = pool.grow_request().ok_or(ENOSPC)?.realloc(GFP_KERNEL)?;
+/// pool.grow(resizer);
+///
+/// assert_eq!(pool.acquire_next_id(0), Some(64));
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// Releasing spinlock to grow the pool
+///
+/// ```no_run
+/// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+/// use kernel::sync::{new_spinlock, SpinLock};
+/// use kernel::id_pool::IdPool;
+///
+/// fn get_id_maybe_realloc(guarded_pool: &SpinLock<IdPool>) -> Result<usize, AllocError> {
+/// let mut pool = guarded_pool.lock();
+/// loop {
+/// match pool.acquire_next_id(0) {
+/// Some(index) => return Ok(index),
+/// None => {
+/// let alloc_request = pool.grow_request();
+/// drop(pool);
+/// let resizer = alloc_request.ok_or(AllocError)?.realloc(GFP_KERNEL)?;
+/// pool = guarded_pool.lock();
+/// pool.grow(resizer)
+/// }
+/// }
+/// }
+/// }
+/// ```
+pub struct IdPool {
+ map: BitmapVec,
+}
+
+/// Indicates that an [`IdPool`] should change to a new target size.
+pub struct ReallocRequest {
+ num_ids: usize,
+}
+
+/// Contains a [`BitmapVec`] of a size suitable for reallocating [`IdPool`].
+pub struct PoolResizer {
+ new: BitmapVec,
+}
+
+impl ReallocRequest {
+ /// Allocates a new backing [`BitmapVec`] for [`IdPool`].
+ ///
+ /// This method only prepares reallocation and does not complete it.
+ /// Reallocation will complete after passing the [`PoolResizer`] to the
+ /// [`IdPool::grow`] or [`IdPool::shrink`] operation, which will check
+ /// that reallocation still makes sense.
+ pub fn realloc(&self, flags: Flags) -> Result<PoolResizer, AllocError> {
+ let new = BitmapVec::new(self.num_ids, flags)?;
+ Ok(PoolResizer { new })
+ }
+}
+
+impl IdPool {
+ /// Constructs a new [`IdPool`].
+ ///
+ /// A capacity below [`BITS_PER_LONG`] is adjusted to
+ /// [`BITS_PER_LONG`].
+ ///
+ /// [`BITS_PER_LONG`]: srctree/include/asm-generic/bitsperlong.h
+ #[inline]
+ pub fn new(num_ids: usize, flags: Flags) -> Result<Self, AllocError> {
+ let num_ids = core::cmp::max(num_ids, BITS_PER_LONG);
+ let map = BitmapVec::new(num_ids, flags)?;
+ Ok(Self { map })
+ }
+
+ /// Returns how many IDs this pool can currently have.
+ #[inline]
+ pub fn capacity(&self) -> usize {
+ self.map.len()
+ }
+
+ /// Returns a [`ReallocRequest`] if the [`IdPool`] can be shrunk, [`None`] otherwise.
+ ///
+ /// The capacity of an [`IdPool`] cannot be shrunk below [`BITS_PER_LONG`].
+ ///
+ /// [`BITS_PER_LONG`]: srctree/include/asm-generic/bitsperlong.h
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+ /// use kernel::id_pool::{ReallocRequest, IdPool};
+ ///
+ /// let mut pool = IdPool::new(1024, GFP_KERNEL)?;
+ /// let alloc_request = pool.shrink_request().ok_or(AllocError)?;
+ /// let resizer = alloc_request.realloc(GFP_KERNEL)?;
+ /// pool.shrink(resizer);
+ /// assert_eq!(pool.capacity(), kernel::bindings::BITS_PER_LONG as usize);
+ /// # Ok::<(), AllocError>(())
+ /// ```
+ #[inline]
+ pub fn shrink_request(&self) -> Option<ReallocRequest> {
+ let cap = self.capacity();
+ // Shrinking below [`BITS_PER_LONG`] is never possible.
+ if cap <= BITS_PER_LONG {
+ return None;
+ }
+ // Determine if the bitmap can shrink based on the position of
+ // its last set bit. If the bit is within the first quarter of
+ // the bitmap then shrinking is possible. In this case, the
+ // bitmap should shrink to half its current size.
+ let Some(bit) = self.map.last_bit() else {
+ return Some(ReallocRequest {
+ num_ids: BITS_PER_LONG,
+ });
+ };
+ if bit >= (cap / 4) {
+ return None;
+ }
+ let num_ids = usize::max(BITS_PER_LONG, cap / 2);
+ Some(ReallocRequest { num_ids })
+ }
+
+ /// Shrinks pool by using a new [`BitmapVec`], if still possible.
+ #[inline]
+ pub fn shrink(&mut self, mut resizer: PoolResizer) {
+ // Between request to shrink that led to allocation of `resizer` and now,
+ // bits may have changed.
+ // Verify that shrinking is still possible. In case shrinking to
+ // the size of `resizer` is no longer possible, do nothing,
+ // drop `resizer` and move on.
+ let Some(updated) = self.shrink_request() else {
+ return;
+ };
+ if updated.num_ids > resizer.new.len() {
+ return;
+ }
+
+ resizer.new.copy_and_extend(&self.map);
+ self.map = resizer.new;
+ }
+
+ /// Returns a [`ReallocRequest`] for growing this [`IdPool`], if possible.
+ ///
+ /// The capacity of an [`IdPool`] cannot be grown above [`i32::MAX`].
+ #[inline]
+ pub fn grow_request(&self) -> Option<ReallocRequest> {
+ let num_ids = self.capacity() * 2;
+ if num_ids > i32::MAX.try_into().unwrap() {
+ return None;
+ }
+ Some(ReallocRequest { num_ids })
+ }
+
+ /// Grows pool by using a new [`BitmapVec`], if still necessary.
+ ///
+ /// The `resizer` arguments has to be obtained by calling [`Self::grow_request`]
+ /// on this object and performing a [`ReallocRequest::realloc`].
+ #[inline]
+ pub fn grow(&mut self, mut resizer: PoolResizer) {
+ // Between request to grow that led to allocation of `resizer` and now,
+ // another thread may have already grown the capacity.
+ // In this case, do nothing, drop `resizer` and move on.
+ if resizer.new.len() <= self.capacity() {
+ return;
+ }
+
+ resizer.new.copy_and_extend(&self.map);
+ self.map = resizer.new;
+ }
+
+ /// Acquires a new ID by finding and setting the next zero bit in the
+ /// bitmap.
+ ///
+ /// Upon success, returns its index. Otherwise, returns [`None`]
+ /// to indicate that a [`Self::grow_request`] is needed.
+ #[inline]
+ pub fn acquire_next_id(&mut self, offset: usize) -> Option<usize> {
+ let next_zero_bit = self.map.next_zero_bit(offset);
+ if let Some(nr) = next_zero_bit {
+ self.map.set_bit(nr);
+ }
+ next_zero_bit
+ }
+
+ /// Releases an ID.
+ #[inline]
+ pub fn release_id(&mut self, id: usize) {
+ self.map.clear_bit(id);
+ }
+}
diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs
index 03b467722b86..ee182b0b5452 100644
--- a/rust/kernel/io.rs
+++ b/rust/kernel/io.rs
@@ -8,6 +8,7 @@ use crate::error::{code::EINVAL, Result};
use crate::{bindings, build_assert, ffi::c_void};
pub mod mem;
+pub mod poll;
pub mod resource;
pub use resource::Resource;
diff --git a/rust/kernel/io/poll.rs b/rust/kernel/io/poll.rs
new file mode 100644
index 000000000000..613eb25047ef
--- /dev/null
+++ b/rust/kernel/io/poll.rs
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! IO polling.
+//!
+//! C header: [`include/linux/iopoll.h`](srctree/include/linux/iopoll.h).
+
+use crate::{
+ error::{code::*, Result},
+ processor::cpu_relax,
+ task::might_sleep,
+ time::{delay::fsleep, Delta, Instant, Monotonic},
+};
+
+/// Polls periodically until a condition is met, an error occurs,
+/// or the timeout is reached.
+///
+/// The function repeatedly executes the given operation `op` closure and
+/// checks its result using the condition closure `cond`.
+///
+/// If `cond` returns `true`, the function returns successfully with
+/// the result of `op`. Otherwise, it waits for a duration specified
+/// by `sleep_delta` before executing `op` again.
+///
+/// This process continues until either `op` returns an error, `cond`
+/// returns `true`, or the timeout specified by `timeout_delta` is
+/// reached.
+///
+/// This function can only be used in a nonatomic context.
+///
+/// # Errors
+///
+/// If `op` returns an error, then that error is returned directly.
+///
+/// If the timeout specified by `timeout_delta` is reached, then
+/// `Err(ETIMEDOUT)` is returned.
+///
+/// # Examples
+///
+/// ```no_run
+/// use kernel::io::{Io, poll::read_poll_timeout};
+/// use kernel::time::Delta;
+///
+/// const HW_READY: u16 = 0x01;
+///
+/// fn wait_for_hardware<const SIZE: usize>(io: &Io<SIZE>) -> Result<()> {
+/// match read_poll_timeout(
+/// // The `op` closure reads the value of a specific status register.
+/// || io.try_read16(0x1000),
+/// // The `cond` closure takes a reference to the value returned by `op`
+/// // and checks whether the hardware is ready.
+/// |val: &u16| *val == HW_READY,
+/// Delta::from_millis(50),
+/// Delta::from_secs(3),
+/// ) {
+/// Ok(_) => {
+/// // The hardware is ready. The returned value of the `op` closure
+/// // isn't used.
+/// Ok(())
+/// }
+/// Err(e) => Err(e),
+/// }
+/// }
+/// ```
+#[track_caller]
+pub fn read_poll_timeout<Op, Cond, T>(
+ mut op: Op,
+ mut cond: Cond,
+ sleep_delta: Delta,
+ timeout_delta: Delta,
+) -> Result<T>
+where
+ Op: FnMut() -> Result<T>,
+ Cond: FnMut(&T) -> bool,
+{
+ let start: Instant<Monotonic> = Instant::now();
+
+ // Unlike the C version, we always call `might_sleep()` unconditionally,
+ // as conditional calls are error-prone. We clearly separate
+ // `read_poll_timeout()` and `read_poll_timeout_atomic()` to aid
+ // tools like klint.
+ might_sleep();
+
+ loop {
+ let val = op()?;
+ if cond(&val) {
+ // Unlike the C version, we immediately return.
+ // We know the condition is met so we don't need to check again.
+ return Ok(val);
+ }
+
+ if start.elapsed() > timeout_delta {
+ // Unlike the C version, we immediately return.
+ // We have just called `op()` so we don't need to call it again.
+ return Err(ETIMEDOUT);
+ }
+
+ if !sleep_delta.is_zero() {
+ fsleep(sleep_delta);
+ }
+
+ // `fsleep()` could be a busy-wait loop so we always call `cpu_relax()`.
+ cpu_relax();
+ }
+}
diff --git a/rust/kernel/irq.rs b/rust/kernel/irq.rs
new file mode 100644
index 000000000000..20abd4056655
--- /dev/null
+++ b/rust/kernel/irq.rs
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! IRQ abstractions.
+//!
+//! An IRQ is an interrupt request from a device. It is used to get the CPU's
+//! attention so it can service a hardware event in a timely manner.
+//!
+//! The current abstractions handle IRQ requests and handlers, i.e.: it allows
+//! drivers to register a handler for a given IRQ line.
+//!
+//! C header: [`include/linux/device.h`](srctree/include/linux/interrupt.h)
+
+/// Flags to be used when registering IRQ handlers.
+mod flags;
+
+/// IRQ allocation and handling.
+mod request;
+
+pub use flags::Flags;
+
+pub use request::{
+ Handler, IrqRequest, IrqReturn, Registration, ThreadedHandler, ThreadedIrqReturn,
+ ThreadedRegistration,
+};
diff --git a/rust/kernel/irq/flags.rs b/rust/kernel/irq/flags.rs
new file mode 100644
index 000000000000..adfde96ec47c
--- /dev/null
+++ b/rust/kernel/irq/flags.rs
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright 2025 Collabora ltd.
+
+use crate::bindings;
+use crate::prelude::*;
+
+/// Flags to be used when registering IRQ handlers.
+///
+/// Flags can be used to request specific behaviors when registering an IRQ
+/// handler, and can be combined using the `|`, `&`, and `!` operators to
+/// further control the system's behavior.
+///
+/// A common use case is to register a shared interrupt, as sharing the line
+/// between devices is increasingly common in modern systems and is even
+/// required for some buses. This requires setting [`Flags::SHARED`] when
+/// requesting the interrupt. Other use cases include setting the trigger type
+/// through `Flags::TRIGGER_*`, which determines when the interrupt fires, or
+/// controlling whether the interrupt is masked after the handler runs by using
+/// [`Flags::ONESHOT`].
+///
+/// If an invalid combination of flags is provided, the system will refuse to
+/// register the handler, and lower layers will enforce certain flags when
+/// necessary. This means, for example, that all the
+/// [`crate::irq::Registration`] for a shared interrupt have to agree on
+/// [`Flags::SHARED`] and on the same trigger type, if set.
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct Flags(c_ulong);
+
+impl Flags {
+ /// Use the interrupt line as already configured.
+ pub const TRIGGER_NONE: Flags = Flags::new(bindings::IRQF_TRIGGER_NONE);
+
+ /// The interrupt is triggered when the signal goes from low to high.
+ pub const TRIGGER_RISING: Flags = Flags::new(bindings::IRQF_TRIGGER_RISING);
+
+ /// The interrupt is triggered when the signal goes from high to low.
+ pub const TRIGGER_FALLING: Flags = Flags::new(bindings::IRQF_TRIGGER_FALLING);
+
+ /// The interrupt is triggered while the signal is held high.
+ pub const TRIGGER_HIGH: Flags = Flags::new(bindings::IRQF_TRIGGER_HIGH);
+
+ /// The interrupt is triggered while the signal is held low.
+ pub const TRIGGER_LOW: Flags = Flags::new(bindings::IRQF_TRIGGER_LOW);
+
+ /// Allow sharing the IRQ among several devices.
+ pub const SHARED: Flags = Flags::new(bindings::IRQF_SHARED);
+
+ /// Set by callers when they expect sharing mismatches to occur.
+ pub const PROBE_SHARED: Flags = Flags::new(bindings::IRQF_PROBE_SHARED);
+
+ /// Flag to mark this interrupt as timer interrupt.
+ pub const TIMER: Flags = Flags::new(bindings::IRQF_TIMER);
+
+ /// Interrupt is per CPU.
+ pub const PERCPU: Flags = Flags::new(bindings::IRQF_PERCPU);
+
+ /// Flag to exclude this interrupt from irq balancing.
+ pub const NOBALANCING: Flags = Flags::new(bindings::IRQF_NOBALANCING);
+
+ /// Interrupt is used for polling (only the interrupt that is registered
+ /// first in a shared interrupt is considered for performance reasons).
+ pub const IRQPOLL: Flags = Flags::new(bindings::IRQF_IRQPOLL);
+
+ /// Interrupt is not re-enabled after the hardirq handler finished. Used by
+ /// threaded interrupts which need to keep the irq line disabled until the
+ /// threaded handler has been run.
+ pub const ONESHOT: Flags = Flags::new(bindings::IRQF_ONESHOT);
+
+ /// Do not disable this IRQ during suspend. Does not guarantee that this
+ /// interrupt will wake the system from a suspended state.
+ pub const NO_SUSPEND: Flags = Flags::new(bindings::IRQF_NO_SUSPEND);
+
+ /// Force enable it on resume even if [`Flags::NO_SUSPEND`] is set.
+ pub const FORCE_RESUME: Flags = Flags::new(bindings::IRQF_FORCE_RESUME);
+
+ /// Interrupt cannot be threaded.
+ pub const NO_THREAD: Flags = Flags::new(bindings::IRQF_NO_THREAD);
+
+ /// Resume IRQ early during syscore instead of at device resume time.
+ pub const EARLY_RESUME: Flags = Flags::new(bindings::IRQF_EARLY_RESUME);
+
+ /// If the IRQ is shared with a [`Flags::NO_SUSPEND`] user, execute this
+ /// interrupt handler after suspending interrupts. For system wakeup devices
+ /// users need to implement wakeup detection in their interrupt handlers.
+ pub const COND_SUSPEND: Flags = Flags::new(bindings::IRQF_COND_SUSPEND);
+
+ /// Don't enable IRQ or NMI automatically when users request it. Users will
+ /// enable it explicitly by `enable_irq` or `enable_nmi` later.
+ pub const NO_AUTOEN: Flags = Flags::new(bindings::IRQF_NO_AUTOEN);
+
+ /// Exclude from runnaway detection for IPI and similar handlers, depends on
+ /// `PERCPU`.
+ pub const NO_DEBUG: Flags = Flags::new(bindings::IRQF_NO_DEBUG);
+
+ pub(crate) fn into_inner(self) -> c_ulong {
+ self.0
+ }
+
+ const fn new(value: u32) -> Self {
+ build_assert!(value as u64 <= c_ulong::MAX as u64);
+ Self(value as c_ulong)
+ }
+}
+
+impl core::ops::BitOr for Flags {
+ type Output = Self;
+ fn bitor(self, rhs: Self) -> Self::Output {
+ Self(self.0 | rhs.0)
+ }
+}
+
+impl core::ops::BitAnd for Flags {
+ type Output = Self;
+ fn bitand(self, rhs: Self) -> Self::Output {
+ Self(self.0 & rhs.0)
+ }
+}
+
+impl core::ops::Not for Flags {
+ type Output = Self;
+ fn not(self) -> Self::Output {
+ Self(!self.0)
+ }
+}
diff --git a/rust/kernel/irq/request.rs b/rust/kernel/irq/request.rs
new file mode 100644
index 000000000000..b150563fdef8
--- /dev/null
+++ b/rust/kernel/irq/request.rs
@@ -0,0 +1,507 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright 2025 Collabora ltd.
+
+//! This module provides types like [`Registration`] and
+//! [`ThreadedRegistration`], which allow users to register handlers for a given
+//! IRQ line.
+
+use core::marker::PhantomPinned;
+
+use crate::alloc::Allocator;
+use crate::device::{Bound, Device};
+use crate::devres::Devres;
+use crate::error::to_result;
+use crate::irq::flags::Flags;
+use crate::prelude::*;
+use crate::str::CStr;
+use crate::sync::Arc;
+
+/// The value that can be returned from a [`Handler`] or a [`ThreadedHandler`].
+#[repr(u32)]
+pub enum IrqReturn {
+ /// The interrupt was not from this device or was not handled.
+ None = bindings::irqreturn_IRQ_NONE,
+
+ /// The interrupt was handled by this device.
+ Handled = bindings::irqreturn_IRQ_HANDLED,
+}
+
+/// Callbacks for an IRQ handler.
+pub trait Handler: Sync {
+ /// The hard IRQ handler.
+ ///
+ /// This is executed in interrupt context, hence all corresponding
+ /// limitations do apply.
+ ///
+ /// All work that does not necessarily need to be executed from
+ /// interrupt context, should be deferred to a threaded handler.
+ /// See also [`ThreadedRegistration`].
+ fn handle(&self, device: &Device<Bound>) -> IrqReturn;
+}
+
+impl<T: ?Sized + Handler + Send> Handler for Arc<T> {
+ fn handle(&self, device: &Device<Bound>) -> IrqReturn {
+ T::handle(self, device)
+ }
+}
+
+impl<T: ?Sized + Handler, A: Allocator> Handler for Box<T, A> {
+ fn handle(&self, device: &Device<Bound>) -> IrqReturn {
+ T::handle(self, device)
+ }
+}
+
+/// # Invariants
+///
+/// - `self.irq` is the same as the one passed to `request_{threaded}_irq`.
+/// - `cookie` was passed to `request_{threaded}_irq` as the cookie. It is guaranteed to be unique
+/// by the type system, since each call to `new` will return a different instance of
+/// `Registration`.
+#[pin_data(PinnedDrop)]
+struct RegistrationInner {
+ irq: u32,
+ cookie: *mut c_void,
+}
+
+impl RegistrationInner {
+ fn synchronize(&self) {
+ // SAFETY: safe as per the invariants of `RegistrationInner`
+ unsafe { bindings::synchronize_irq(self.irq) };
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for RegistrationInner {
+ fn drop(self: Pin<&mut Self>) {
+ // SAFETY:
+ //
+ // Safe as per the invariants of `RegistrationInner` and:
+ //
+ // - The containing struct is `!Unpin` and was initialized using
+ // pin-init, so it occupied the same memory location for the entirety of
+ // its lifetime.
+ //
+ // Notice that this will block until all handlers finish executing,
+ // i.e.: at no point will &self be invalid while the handler is running.
+ unsafe { bindings::free_irq(self.irq, self.cookie) };
+ }
+}
+
+// SAFETY: We only use `inner` on drop, which called at most once with no
+// concurrent access.
+unsafe impl Sync for RegistrationInner {}
+
+// SAFETY: It is safe to send `RegistrationInner` across threads.
+unsafe impl Send for RegistrationInner {}
+
+/// A request for an IRQ line for a given device.
+///
+/// # Invariants
+///
+/// - `ìrq` is the number of an interrupt source of `dev`.
+/// - `irq` has not been registered yet.
+pub struct IrqRequest<'a> {
+ dev: &'a Device<Bound>,
+ irq: u32,
+}
+
+impl<'a> IrqRequest<'a> {
+ /// Creates a new IRQ request for the given device and IRQ number.
+ ///
+ /// # Safety
+ ///
+ /// - `irq` should be a valid IRQ number for `dev`.
+ pub(crate) unsafe fn new(dev: &'a Device<Bound>, irq: u32) -> Self {
+ // INVARIANT: `irq` is a valid IRQ number for `dev`.
+ IrqRequest { dev, irq }
+ }
+
+ /// Returns the IRQ number of an [`IrqRequest`].
+ pub fn irq(&self) -> u32 {
+ self.irq
+ }
+}
+
+/// A registration of an IRQ handler for a given IRQ line.
+///
+/// # Examples
+///
+/// The following is an example of using `Registration`. It uses a
+/// [`Completion`] to coordinate between the IRQ
+/// handler and process context. [`Completion`] uses interior mutability, so the
+/// handler can signal with [`Completion::complete_all()`] and the process
+/// context can wait with [`Completion::wait_for_completion()`] even though
+/// there is no way to get a mutable reference to the any of the fields in
+/// `Data`.
+///
+/// [`Completion`]: kernel::sync::Completion
+/// [`Completion::complete_all()`]: kernel::sync::Completion::complete_all
+/// [`Completion::wait_for_completion()`]: kernel::sync::Completion::wait_for_completion
+///
+/// ```
+/// use kernel::c_str;
+/// use kernel::device::{Bound, Device};
+/// use kernel::irq::{self, Flags, IrqRequest, IrqReturn, Registration};
+/// use kernel::prelude::*;
+/// use kernel::sync::{Arc, Completion};
+///
+/// // Data shared between process and IRQ context.
+/// #[pin_data]
+/// struct Data {
+/// #[pin]
+/// completion: Completion,
+/// }
+///
+/// impl irq::Handler for Data {
+/// // Executed in IRQ context.
+/// fn handle(&self, _dev: &Device<Bound>) -> IrqReturn {
+/// self.completion.complete_all();
+/// IrqReturn::Handled
+/// }
+/// }
+///
+/// // Registers an IRQ handler for the given IrqRequest.
+/// //
+/// // This runs in process context and assumes `request` was previously acquired from a device.
+/// fn register_irq(
+/// handler: impl PinInit<Data, Error>,
+/// request: IrqRequest<'_>,
+/// ) -> Result<Arc<Registration<Data>>> {
+/// let registration = Registration::new(request, Flags::SHARED, c_str!("my_device"), handler);
+///
+/// let registration = Arc::pin_init(registration, GFP_KERNEL)?;
+///
+/// registration.handler().completion.wait_for_completion();
+///
+/// Ok(registration)
+/// }
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// # Invariants
+///
+/// * We own an irq handler whose cookie is a pointer to `Self`.
+#[pin_data]
+pub struct Registration<T: Handler + 'static> {
+ #[pin]
+ inner: Devres<RegistrationInner>,
+
+ #[pin]
+ handler: T,
+
+ /// Pinned because we need address stability so that we can pass a pointer
+ /// to the callback.
+ #[pin]
+ _pin: PhantomPinned,
+}
+
+impl<T: Handler + 'static> Registration<T> {
+ /// Registers the IRQ handler with the system for the given IRQ number.
+ pub fn new<'a>(
+ request: IrqRequest<'a>,
+ flags: Flags,
+ name: &'static CStr,
+ handler: impl PinInit<T, Error> + 'a,
+ ) -> impl PinInit<Self, Error> + 'a {
+ try_pin_init!(&this in Self {
+ handler <- handler,
+ inner <- Devres::new(
+ request.dev,
+ try_pin_init!(RegistrationInner {
+ // INVARIANT: `this` is a valid pointer to the `Registration` instance
+ cookie: this.as_ptr().cast::<c_void>(),
+ irq: {
+ // SAFETY:
+ // - The callbacks are valid for use with request_irq.
+ // - If this succeeds, the slot is guaranteed to be valid until the
+ // destructor of Self runs, which will deregister the callbacks
+ // before the memory location becomes invalid.
+ // - When request_irq is called, everything that handle_irq_callback will
+ // touch has already been initialized, so it's safe for the callback to
+ // be called immediately.
+ to_result(unsafe {
+ bindings::request_irq(
+ request.irq,
+ Some(handle_irq_callback::<T>),
+ flags.into_inner(),
+ name.as_char_ptr(),
+ this.as_ptr().cast::<c_void>(),
+ )
+ })?;
+ request.irq
+ }
+ })
+ ),
+ _pin: PhantomPinned,
+ })
+ }
+
+ /// Returns a reference to the handler that was registered with the system.
+ pub fn handler(&self) -> &T {
+ &self.handler
+ }
+
+ /// Wait for pending IRQ handlers on other CPUs.
+ ///
+ /// This will attempt to access the inner [`Devres`] container.
+ pub fn try_synchronize(&self) -> Result {
+ let inner = self.inner.try_access().ok_or(ENODEV)?;
+ inner.synchronize();
+ Ok(())
+ }
+
+ /// Wait for pending IRQ handlers on other CPUs.
+ pub fn synchronize(&self, dev: &Device<Bound>) -> Result {
+ let inner = self.inner.access(dev)?;
+ inner.synchronize();
+ Ok(())
+ }
+}
+
+/// # Safety
+///
+/// This function should be only used as the callback in `request_irq`.
+unsafe extern "C" fn handle_irq_callback<T: Handler>(_irq: i32, ptr: *mut c_void) -> c_uint {
+ // SAFETY: `ptr` is a pointer to `Registration<T>` set in `Registration::new`
+ let registration = unsafe { &*(ptr as *const Registration<T>) };
+ // SAFETY: The irq callback is removed before the device is unbound, so the fact that the irq
+ // callback is running implies that the device has not yet been unbound.
+ let device = unsafe { registration.inner.device().as_bound() };
+
+ T::handle(&registration.handler, device) as c_uint
+}
+
+/// The value that can be returned from [`ThreadedHandler::handle`].
+#[repr(u32)]
+pub enum ThreadedIrqReturn {
+ /// The interrupt was not from this device or was not handled.
+ None = bindings::irqreturn_IRQ_NONE,
+
+ /// The interrupt was handled by this device.
+ Handled = bindings::irqreturn_IRQ_HANDLED,
+
+ /// The handler wants the handler thread to wake up.
+ WakeThread = bindings::irqreturn_IRQ_WAKE_THREAD,
+}
+
+/// Callbacks for a threaded IRQ handler.
+pub trait ThreadedHandler: Sync {
+ /// The hard IRQ handler.
+ ///
+ /// This is executed in interrupt context, hence all corresponding
+ /// limitations do apply. All work that does not necessarily need to be
+ /// executed from interrupt context, should be deferred to the threaded
+ /// handler, i.e. [`ThreadedHandler::handle_threaded`].
+ ///
+ /// The default implementation returns [`ThreadedIrqReturn::WakeThread`].
+ #[expect(unused_variables)]
+ fn handle(&self, device: &Device<Bound>) -> ThreadedIrqReturn {
+ ThreadedIrqReturn::WakeThread
+ }
+
+ /// The threaded IRQ handler.
+ ///
+ /// This is executed in process context. The kernel creates a dedicated
+ /// `kthread` for this purpose.
+ fn handle_threaded(&self, device: &Device<Bound>) -> IrqReturn;
+}
+
+impl<T: ?Sized + ThreadedHandler + Send> ThreadedHandler for Arc<T> {
+ fn handle(&self, device: &Device<Bound>) -> ThreadedIrqReturn {
+ T::handle(self, device)
+ }
+
+ fn handle_threaded(&self, device: &Device<Bound>) -> IrqReturn {
+ T::handle_threaded(self, device)
+ }
+}
+
+impl<T: ?Sized + ThreadedHandler, A: Allocator> ThreadedHandler for Box<T, A> {
+ fn handle(&self, device: &Device<Bound>) -> ThreadedIrqReturn {
+ T::handle(self, device)
+ }
+
+ fn handle_threaded(&self, device: &Device<Bound>) -> IrqReturn {
+ T::handle_threaded(self, device)
+ }
+}
+
+/// A registration of a threaded IRQ handler for a given IRQ line.
+///
+/// Two callbacks are required: one to handle the IRQ, and one to handle any
+/// other work in a separate thread.
+///
+/// The thread handler is only called if the IRQ handler returns
+/// [`ThreadedIrqReturn::WakeThread`].
+///
+/// # Examples
+///
+/// The following is an example of using [`ThreadedRegistration`]. It uses a
+/// [`Mutex`](kernel::sync::Mutex) to provide interior mutability.
+///
+/// ```
+/// use kernel::c_str;
+/// use kernel::device::{Bound, Device};
+/// use kernel::irq::{
+/// self, Flags, IrqRequest, IrqReturn, ThreadedHandler, ThreadedIrqReturn,
+/// ThreadedRegistration,
+/// };
+/// use kernel::prelude::*;
+/// use kernel::sync::{Arc, Mutex};
+///
+/// // Declare a struct that will be passed in when the interrupt fires. The u32
+/// // merely serves as an example of some internal data.
+/// //
+/// // [`irq::ThreadedHandler::handle`] takes `&self`. This example
+/// // illustrates how interior mutability can be used when sharing the data
+/// // between process context and IRQ context.
+/// #[pin_data]
+/// struct Data {
+/// #[pin]
+/// value: Mutex<u32>,
+/// }
+///
+/// impl ThreadedHandler for Data {
+/// // This will run (in a separate kthread) if and only if
+/// // [`ThreadedHandler::handle`] returns [`WakeThread`], which it does by
+/// // default.
+/// fn handle_threaded(&self, _dev: &Device<Bound>) -> IrqReturn {
+/// let mut data = self.value.lock();
+/// *data += 1;
+/// IrqReturn::Handled
+/// }
+/// }
+///
+/// // Registers a threaded IRQ handler for the given [`IrqRequest`].
+/// //
+/// // This is executing in process context and assumes that `request` was
+/// // previously acquired from a device.
+/// fn register_threaded_irq(
+/// handler: impl PinInit<Data, Error>,
+/// request: IrqRequest<'_>,
+/// ) -> Result<Arc<ThreadedRegistration<Data>>> {
+/// let registration =
+/// ThreadedRegistration::new(request, Flags::SHARED, c_str!("my_device"), handler);
+///
+/// let registration = Arc::pin_init(registration, GFP_KERNEL)?;
+///
+/// {
+/// // The data can be accessed from process context too.
+/// let mut data = registration.handler().value.lock();
+/// *data += 1;
+/// }
+///
+/// Ok(registration)
+/// }
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// # Invariants
+///
+/// * We own an irq handler whose cookie is a pointer to `Self`.
+#[pin_data]
+pub struct ThreadedRegistration<T: ThreadedHandler + 'static> {
+ #[pin]
+ inner: Devres<RegistrationInner>,
+
+ #[pin]
+ handler: T,
+
+ /// Pinned because we need address stability so that we can pass a pointer
+ /// to the callback.
+ #[pin]
+ _pin: PhantomPinned,
+}
+
+impl<T: ThreadedHandler + 'static> ThreadedRegistration<T> {
+ /// Registers the IRQ handler with the system for the given IRQ number.
+ pub fn new<'a>(
+ request: IrqRequest<'a>,
+ flags: Flags,
+ name: &'static CStr,
+ handler: impl PinInit<T, Error> + 'a,
+ ) -> impl PinInit<Self, Error> + 'a {
+ try_pin_init!(&this in Self {
+ handler <- handler,
+ inner <- Devres::new(
+ request.dev,
+ try_pin_init!(RegistrationInner {
+ // INVARIANT: `this` is a valid pointer to the `ThreadedRegistration` instance.
+ cookie: this.as_ptr().cast::<c_void>(),
+ irq: {
+ // SAFETY:
+ // - The callbacks are valid for use with request_threaded_irq.
+ // - If this succeeds, the slot is guaranteed to be valid until the
+ // destructor of Self runs, which will deregister the callbacks
+ // before the memory location becomes invalid.
+ // - When request_threaded_irq is called, everything that the two callbacks
+ // will touch has already been initialized, so it's safe for the
+ // callbacks to be called immediately.
+ to_result(unsafe {
+ bindings::request_threaded_irq(
+ request.irq,
+ Some(handle_threaded_irq_callback::<T>),
+ Some(thread_fn_callback::<T>),
+ flags.into_inner(),
+ name.as_char_ptr(),
+ this.as_ptr().cast::<c_void>(),
+ )
+ })?;
+ request.irq
+ }
+ })
+ ),
+ _pin: PhantomPinned,
+ })
+ }
+
+ /// Returns a reference to the handler that was registered with the system.
+ pub fn handler(&self) -> &T {
+ &self.handler
+ }
+
+ /// Wait for pending IRQ handlers on other CPUs.
+ ///
+ /// This will attempt to access the inner [`Devres`] container.
+ pub fn try_synchronize(&self) -> Result {
+ let inner = self.inner.try_access().ok_or(ENODEV)?;
+ inner.synchronize();
+ Ok(())
+ }
+
+ /// Wait for pending IRQ handlers on other CPUs.
+ pub fn synchronize(&self, dev: &Device<Bound>) -> Result {
+ let inner = self.inner.access(dev)?;
+ inner.synchronize();
+ Ok(())
+ }
+}
+
+/// # Safety
+///
+/// This function should be only used as the callback in `request_threaded_irq`.
+unsafe extern "C" fn handle_threaded_irq_callback<T: ThreadedHandler>(
+ _irq: i32,
+ ptr: *mut c_void,
+) -> c_uint {
+ // SAFETY: `ptr` is a pointer to `ThreadedRegistration<T>` set in `ThreadedRegistration::new`
+ let registration = unsafe { &*(ptr as *const ThreadedRegistration<T>) };
+ // SAFETY: The irq callback is removed before the device is unbound, so the fact that the irq
+ // callback is running implies that the device has not yet been unbound.
+ let device = unsafe { registration.inner.device().as_bound() };
+
+ T::handle(&registration.handler, device) as c_uint
+}
+
+/// # Safety
+///
+/// This function should be only used as the callback in `request_threaded_irq`.
+unsafe extern "C" fn thread_fn_callback<T: ThreadedHandler>(_irq: i32, ptr: *mut c_void) -> c_uint {
+ // SAFETY: `ptr` is a pointer to `ThreadedRegistration<T>` set in `ThreadedRegistration::new`
+ let registration = unsafe { &*(ptr as *const ThreadedRegistration<T>) };
+ // SAFETY: The irq callback is removed before the device is unbound, so the fact that the irq
+ // callback is running implies that the device has not yet been unbound.
+ let device = unsafe { registration.inner.device().as_bound() };
+
+ T::handle_threaded(&registration.handler, device) as c_uint
+}
diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs
index 41efd87595d6..79436509dd73 100644
--- a/rust/kernel/kunit.rs
+++ b/rust/kernel/kunit.rs
@@ -6,8 +6,8 @@
//!
//! Reference: <https://docs.kernel.org/dev-tools/kunit/index.html>
+use crate::fmt;
use crate::prelude::*;
-use core::fmt;
#[cfg(CONFIG_PRINTK)]
use crate::c_str;
@@ -74,14 +74,14 @@ macro_rules! kunit_assert {
// mistake (it is hidden to prevent that).
//
// This mimics KUnit's failed assertion format.
- $crate::kunit::err(format_args!(
+ $crate::kunit::err($crate::prelude::fmt!(
" # {}: ASSERTION FAILED at {FILE}:{LINE}\n",
$name
));
- $crate::kunit::err(format_args!(
+ $crate::kunit::err($crate::prelude::fmt!(
" Expected {CONDITION} to be true, but is false\n"
));
- $crate::kunit::err(format_args!(
+ $crate::kunit::err($crate::prelude::fmt!(
" Failure not reported to KUnit since this is a non-KUnit task\n"
));
break 'out;
@@ -102,12 +102,12 @@ macro_rules! kunit_assert {
unsafe impl Sync for UnaryAssert {}
static LOCATION: Location = Location($crate::bindings::kunit_loc {
- file: FILE.as_char_ptr(),
+ file: $crate::str::as_char_ptr_in_const_context(FILE),
line: LINE,
});
static ASSERTION: UnaryAssert = UnaryAssert($crate::bindings::kunit_unary_assert {
assert: $crate::bindings::kunit_assert {},
- condition: CONDITION.as_char_ptr(),
+ condition: $crate::str::as_char_ptr_in_const_context(CONDITION),
expected_true: true,
});
@@ -202,7 +202,7 @@ pub const fn kunit_case(
) -> kernel::bindings::kunit_case {
kernel::bindings::kunit_case {
run_case: Some(run_case),
- name: name.as_char_ptr(),
+ name: kernel::str::as_char_ptr_in_const_context(name),
attr: kernel::bindings::kunit_attributes {
speed: kernel::bindings::kunit_speed_KUNIT_SPEED_NORMAL,
},
@@ -210,6 +210,8 @@ pub const fn kunit_case(
status: kernel::bindings::kunit_status_KUNIT_SUCCESS,
module_name: core::ptr::null_mut(),
log: core::ptr::null_mut(),
+ param_init: None,
+ param_exit: None,
}
}
@@ -229,6 +231,8 @@ pub const fn kunit_case_null() -> kernel::bindings::kunit_case {
status: kernel::bindings::kunit_status_KUNIT_SUCCESS,
module_name: core::ptr::null_mut(),
log: core::ptr::null_mut(),
+ param_init: None,
+ param_exit: None,
}
}
@@ -357,4 +361,11 @@ mod tests {
fn rust_test_kunit_in_kunit_test() {
assert!(in_kunit_test());
}
+
+ #[test]
+ #[cfg(not(all()))]
+ fn rust_test_kunit_always_disabled_test() {
+ // This test should never run because of the `cfg`.
+ assert!(false);
+ }
}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index fef97f2a5098..4bc7a1e11a9f 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -17,6 +17,7 @@
// the unstable features in use.
//
// Stable since Rust 1.79.0.
+#![feature(generic_nonzero)]
#![feature(inline_const)]
//
// Stable since Rust 1.81.0.
@@ -28,6 +29,7 @@
// Stable since Rust 1.83.0.
#![feature(const_maybe_uninit_as_mut_ptr)]
#![feature(const_mut_refs)]
+#![feature(const_option)]
#![feature(const_ptr_write)]
#![feature(const_refs_to_cell)]
//
@@ -62,6 +64,7 @@ pub mod acpi;
pub mod alloc;
#[cfg(CONFIG_AUXILIARY_BUS)]
pub mod auxiliary;
+pub mod bitmap;
pub mod bits;
#[cfg(CONFIG_BLOCK)]
pub mod block;
@@ -76,6 +79,7 @@ pub mod cpu;
pub mod cpufreq;
pub mod cpumask;
pub mod cred;
+pub mod debugfs;
pub mod device;
pub mod device_id;
pub mod devres;
@@ -89,9 +93,11 @@ pub mod faux;
pub mod firmware;
pub mod fmt;
pub mod fs;
+pub mod id_pool;
pub mod init;
pub mod io;
pub mod ioctl;
+pub mod irq;
pub mod jump_label;
#[cfg(CONFIG_KUNIT)]
pub mod kunit;
@@ -110,6 +116,8 @@ pub mod pid_namespace;
pub mod platform;
pub mod prelude;
pub mod print;
+pub mod processor;
+pub mod ptr;
pub mod rbtree;
pub mod regulator;
pub mod revocable;
@@ -206,7 +214,7 @@ impl ThisModule {
}
}
-#[cfg(not(any(testlib, test)))]
+#[cfg(not(testlib))]
#[panic_handler]
fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
pr_emerg!("{}\n", info);
diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs
index 44e5219cfcbc..7355bbac16a7 100644
--- a/rust/kernel/list.rs
+++ b/rust/kernel/list.rs
@@ -38,6 +38,8 @@ pub use self::arc_field::{define_list_arc_field_getter, ListArcField};
///
/// # Examples
///
+/// Use [`ListLinks`] as the type of the intrusive field.
+///
/// ```
/// use kernel::list::*;
///
@@ -140,6 +142,124 @@ pub use self::arc_field::{define_list_arc_field_getter, ListArcField};
/// }
/// # Result::<(), Error>::Ok(())
/// ```
+///
+/// Use [`ListLinksSelfPtr`] as the type of the intrusive field. This allows a list of trait object
+/// type.
+///
+/// ```
+/// use kernel::list::*;
+///
+/// trait Foo {
+/// fn foo(&self) -> (&'static str, i32);
+/// }
+///
+/// #[pin_data]
+/// struct DTWrap<T: ?Sized> {
+/// #[pin]
+/// links: ListLinksSelfPtr<DTWrap<dyn Foo>>,
+/// value: T,
+/// }
+///
+/// impl<T> DTWrap<T> {
+/// fn new(value: T) -> Result<ListArc<Self>> {
+/// ListArc::pin_init(try_pin_init!(Self {
+/// value,
+/// links <- ListLinksSelfPtr::new(),
+/// }), GFP_KERNEL)
+/// }
+/// }
+///
+/// impl_list_arc_safe! {
+/// impl{T: ?Sized} ListArcSafe<0> for DTWrap<T> { untracked; }
+/// }
+/// impl_list_item! {
+/// impl ListItem<0> for DTWrap<dyn Foo> { using ListLinksSelfPtr { self.links }; }
+/// }
+///
+/// // Create a new empty list.
+/// let mut list = List::<DTWrap<dyn Foo>>::new();
+/// {
+/// assert!(list.is_empty());
+/// }
+///
+/// struct A(i32);
+/// // `A` returns the inner value for `foo`.
+/// impl Foo for A { fn foo(&self) -> (&'static str, i32) { ("a", self.0) } }
+///
+/// struct B;
+/// // `B` always returns 42.
+/// impl Foo for B { fn foo(&self) -> (&'static str, i32) { ("b", 42) } }
+///
+/// // Insert 3 element using `push_back()`.
+/// list.push_back(DTWrap::new(A(15))?);
+/// list.push_back(DTWrap::new(A(32))?);
+/// list.push_back(DTWrap::new(B)?);
+///
+/// // Iterate over the list to verify the nodes were inserted correctly.
+/// // [A(15), A(32), B]
+/// {
+/// let mut iter = list.iter();
+/// assert_eq!(iter.next().ok_or(EINVAL)?.value.foo(), ("a", 15));
+/// assert_eq!(iter.next().ok_or(EINVAL)?.value.foo(), ("a", 32));
+/// assert_eq!(iter.next().ok_or(EINVAL)?.value.foo(), ("b", 42));
+/// assert!(iter.next().is_none());
+///
+/// // Verify the length of the list.
+/// assert_eq!(list.iter().count(), 3);
+/// }
+///
+/// // Pop the items from the list using `pop_back()` and verify the content.
+/// {
+/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value.foo(), ("b", 42));
+/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value.foo(), ("a", 32));
+/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value.foo(), ("a", 15));
+/// }
+///
+/// // Insert 3 elements using `push_front()`.
+/// list.push_front(DTWrap::new(A(15))?);
+/// list.push_front(DTWrap::new(A(32))?);
+/// list.push_front(DTWrap::new(B)?);
+///
+/// // Iterate over the list to verify the nodes were inserted correctly.
+/// // [B, A(32), A(15)]
+/// {
+/// let mut iter = list.iter();
+/// assert_eq!(iter.next().ok_or(EINVAL)?.value.foo(), ("b", 42));
+/// assert_eq!(iter.next().ok_or(EINVAL)?.value.foo(), ("a", 32));
+/// assert_eq!(iter.next().ok_or(EINVAL)?.value.foo(), ("a", 15));
+/// assert!(iter.next().is_none());
+///
+/// // Verify the length of the list.
+/// assert_eq!(list.iter().count(), 3);
+/// }
+///
+/// // Pop the items from the list using `pop_front()` and verify the content.
+/// {
+/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value.foo(), ("a", 15));
+/// assert_eq!(list.pop_back().ok_or(EINVAL)?.value.foo(), ("a", 32));
+/// }
+///
+/// // Push `list2` to `list` through `push_all_back()`.
+/// // list: [B]
+/// // list2: [B, A(25)]
+/// {
+/// let mut list2 = List::<DTWrap<dyn Foo>>::new();
+/// list2.push_back(DTWrap::new(B)?);
+/// list2.push_back(DTWrap::new(A(25))?);
+///
+/// list.push_all_back(&mut list2);
+///
+/// // list: [B, B, A(25)]
+/// // list2: []
+/// let mut iter = list.iter();
+/// assert_eq!(iter.next().ok_or(EINVAL)?.value.foo(), ("b", 42));
+/// assert_eq!(iter.next().ok_or(EINVAL)?.value.foo(), ("b", 42));
+/// assert_eq!(iter.next().ok_or(EINVAL)?.value.foo(), ("a", 25));
+/// assert!(iter.next().is_none());
+/// assert!(list2.is_empty());
+/// }
+/// # Result::<(), Error>::Ok(())
+/// ```
pub struct List<T: ?Sized + ListItem<ID>, const ID: u64 = 0> {
first: *mut ListLinksFields,
_ty: PhantomData<ListArc<T, ID>>,
diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
index 6373fe183b27..d3aa7d25afad 100644
--- a/rust/kernel/miscdevice.rs
+++ b/rust/kernel/miscdevice.rs
@@ -34,7 +34,7 @@ impl MiscDeviceOptions {
// SAFETY: All zeros is valid for this C type.
let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() };
result.minor = bindings::MISC_DYNAMIC_MINOR as ffi::c_int;
- result.name = self.name.as_char_ptr();
+ result.name = crate::str::as_char_ptr_in_const_context(self.name);
result.fops = MiscdeviceVTable::<T>::build();
result
}
diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs
index 7de5cc7a0eee..be1027b7961b 100644
--- a/rust/kernel/net/phy.rs
+++ b/rust/kernel/net/phy.rs
@@ -497,7 +497,7 @@ unsafe impl Sync for DriverVTable {}
pub const fn create_phy_driver<T: Driver>() -> DriverVTable {
// INVARIANT: All the fields of `struct phy_driver` are initialized properly.
DriverVTable(Opaque::new(bindings::phy_driver {
- name: T::NAME.as_char_ptr().cast_mut(),
+ name: crate::str::as_char_ptr_in_const_context(T::NAME).cast_mut(),
flags: T::FLAGS,
phy_id: T::PHY_DEVICE_ID.id(),
phy_id_mask: T::PHY_DEVICE_ID.mask_as_int(),
diff --git a/rust/kernel/of.rs b/rust/kernel/of.rs
index b76b35265df2..58b20c367f99 100644
--- a/rust/kernel/of.rs
+++ b/rust/kernel/of.rs
@@ -34,7 +34,7 @@ unsafe impl RawDeviceIdIndex for DeviceId {
impl DeviceId {
/// Create a new device id from an OF 'compatible' string.
pub const fn new(compatible: &'static CStr) -> Self {
- let src = compatible.as_bytes_with_nul();
+ let src = compatible.to_bytes_with_nul();
// Replace with `bindings::of_device_id::default()` once stabilized for `const`.
// SAFETY: FFI type is valid to be zero-initialized.
let mut of: bindings::of_device_id = unsafe { core::mem::zeroed() };
diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs
index 08126035d2c6..2c763fa9276d 100644
--- a/rust/kernel/opp.rs
+++ b/rust/kernel/opp.rs
@@ -12,11 +12,12 @@ use crate::{
clk::Hertz,
cpumask::{Cpumask, CpumaskVar},
device::Device,
- error::{code::*, from_err_ptr, from_result, to_result, Error, Result, VTABLE_DEFAULT_ERROR},
+ error::{code::*, from_err_ptr, from_result, to_result, Result, VTABLE_DEFAULT_ERROR},
ffi::c_ulong,
prelude::*,
str::CString,
- types::{ARef, AlwaysRefCounted, Opaque},
+ sync::aref::{ARef, AlwaysRefCounted},
+ types::Opaque,
};
#[cfg(CONFIG_CPU_FREQ)]
@@ -162,7 +163,7 @@ impl From<MicroWatt> for c_ulong {
/// use kernel::device::Device;
/// use kernel::error::Result;
/// use kernel::opp::{Data, MicroVolt, Token};
-/// use kernel::types::ARef;
+/// use kernel::sync::aref::ARef;
///
/// fn create_opp(dev: &ARef<Device>, freq: Hertz, volt: MicroVolt, level: u32) -> Result<Token> {
/// let data = Data::new(freq, volt, level, false);
@@ -211,7 +212,7 @@ impl Drop for Token {
/// use kernel::device::Device;
/// use kernel::error::Result;
/// use kernel::opp::{Data, MicroVolt, Token};
-/// use kernel::types::ARef;
+/// use kernel::sync::aref::ARef;
///
/// fn create_opp(dev: &ARef<Device>, freq: Hertz, volt: MicroVolt, level: u32) -> Result<Token> {
/// let data = Data::new(freq, volt, level, false);
@@ -262,7 +263,7 @@ impl Data {
/// use kernel::clk::Hertz;
/// use kernel::error::Result;
/// use kernel::opp::{OPP, SearchType, Table};
-/// use kernel::types::ARef;
+/// use kernel::sync::aref::ARef;
///
/// fn find_opp(table: &Table, freq: Hertz) -> Result<ARef<OPP>> {
/// let opp = table.opp_from_freq(freq, Some(true), None, SearchType::Exact)?;
@@ -335,7 +336,7 @@ impl Drop for ConfigToken {
/// use kernel::error::Result;
/// use kernel::opp::{Config, ConfigOps, ConfigToken};
/// use kernel::str::CString;
-/// use kernel::types::ARef;
+/// use kernel::sync::aref::ARef;
/// use kernel::macros::vtable;
///
/// #[derive(Default)]
@@ -500,11 +501,8 @@ impl<T: ConfigOps + Default> Config<T> {
// requirements. The OPP core guarantees not to access fields of [`Config`] after this call
// and so we don't need to save a copy of them for future use.
let ret = unsafe { bindings::dev_pm_opp_set_config(dev.as_raw(), &mut config) };
- if ret < 0 {
- Err(Error::from_errno(ret))
- } else {
- Ok(ConfigToken(ret))
- }
+
+ to_result(ret).map(|()| ConfigToken(ret))
}
/// Config's clk callback.
@@ -581,7 +579,7 @@ impl<T: ConfigOps + Default> Config<T> {
/// use kernel::device::Device;
/// use kernel::error::Result;
/// use kernel::opp::Table;
-/// use kernel::types::ARef;
+/// use kernel::sync::aref::ARef;
///
/// fn get_table(dev: &ARef<Device>, mask: &mut Cpumask, freq: Hertz) -> Result<Table> {
/// let mut opp_table = Table::from_of_cpumask(dev, mask)?;
@@ -713,11 +711,8 @@ impl Table {
// SAFETY: The requirements are satisfied by the existence of [`Device`] and its safety
// requirements.
let ret = unsafe { bindings::dev_pm_opp_get_opp_count(self.dev.as_raw()) };
- if ret < 0 {
- Err(Error::from_errno(ret))
- } else {
- Ok(ret as u32)
- }
+
+ to_result(ret).map(|()| ret as u32)
}
/// Returns max clock latency (in nanoseconds) of the [`OPP`]s in the [`Table`].
diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs
index 887ee611b553..7fcc5f6022c1 100644
--- a/rust/kernel/pci.rs
+++ b/rust/kernel/pci.rs
@@ -10,10 +10,11 @@ use crate::{
devres::Devres,
driver,
error::{from_result, to_result, Result},
- io::Io,
- io::IoRaw,
+ io::{Io, IoRaw},
+ irq::{self, IrqRequest},
str::CStr,
- types::{ARef, Opaque},
+ sync::aref::ARef,
+ types::Opaque,
ThisModule,
};
use core::{
@@ -23,6 +24,10 @@ use core::{
};
use kernel::prelude::*;
+mod id;
+
+pub use self::id::{Class, ClassMask, Vendor};
+
/// An adapter for the registration of PCI drivers.
pub struct Adapter<T: Driver>(T);
@@ -60,7 +65,7 @@ impl<T: Driver + 'static> Adapter<T> {
extern "C" fn probe_callback(
pdev: *mut bindings::pci_dev,
id: *const bindings::pci_device_id,
- ) -> kernel::ffi::c_int {
+ ) -> c_int {
// SAFETY: The PCI bus only ever calls the probe callback with a valid pointer to a
// `struct pci_dev`.
//
@@ -128,10 +133,11 @@ impl DeviceId {
/// Equivalent to C's `PCI_DEVICE` macro.
///
- /// Create a new `pci::DeviceId` from a vendor and device ID number.
- pub const fn from_id(vendor: u32, device: u32) -> Self {
+ /// Create a new `pci::DeviceId` from a vendor and device ID.
+ #[inline]
+ pub const fn from_id(vendor: Vendor, device: u32) -> Self {
Self(bindings::pci_device_id {
- vendor,
+ vendor: vendor.as_raw() as u32,
device,
subvendor: DeviceId::PCI_ANY_ID,
subdevice: DeviceId::PCI_ANY_ID,
@@ -145,6 +151,7 @@ impl DeviceId {
/// Equivalent to C's `PCI_DEVICE_CLASS` macro.
///
/// Create a new `pci::DeviceId` from a class number and mask.
+ #[inline]
pub const fn from_class(class: u32, class_mask: u32) -> Self {
Self(bindings::pci_device_id {
vendor: DeviceId::PCI_ANY_ID,
@@ -157,6 +164,29 @@ impl DeviceId {
override_only: 0,
})
}
+
+ /// Create a new [`DeviceId`] from a class number, mask, and specific vendor.
+ ///
+ /// This is more targeted than [`DeviceId::from_class`]: in addition to matching by [`Vendor`],
+ /// it also matches the PCI [`Class`] (up to the entire 24 bits, depending on the
+ /// [`ClassMask`]).
+ #[inline]
+ pub const fn from_class_and_vendor(
+ class: Class,
+ class_mask: ClassMask,
+ vendor: Vendor,
+ ) -> Self {
+ Self(bindings::pci_device_id {
+ vendor: vendor.as_raw() as u32,
+ device: DeviceId::PCI_ANY_ID,
+ subvendor: DeviceId::PCI_ANY_ID,
+ subdevice: DeviceId::PCI_ANY_ID,
+ class: class.as_raw(),
+ class_mask: class_mask.as_raw(),
+ driver_data: 0,
+ override_only: 0,
+ })
+ }
}
// SAFETY: `DeviceId` is a `#[repr(transparent)]` wrapper of `pci_device_id` and does not add
@@ -206,7 +236,7 @@ macro_rules! pci_device_table {
/// <MyDriver as pci::Driver>::IdInfo,
/// [
/// (
-/// pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, bindings::PCI_ANY_ID as u32),
+/// pci::DeviceId::from_id(pci::Vendor::REDHAT, bindings::PCI_ANY_ID as u32),
/// (),
/// )
/// ]
@@ -240,11 +270,11 @@ pub trait Driver: Send {
/// PCI driver probe.
///
- /// Called when a new platform device is added or discovered.
- /// Implementers should attempt to initialize the device here.
+ /// Called when a new pci device is added or discovered. Implementers should
+ /// attempt to initialize the device here.
fn probe(dev: &Device<device::Core>, id_info: &Self::IdInfo) -> Result<Pin<KBox<Self>>>;
- /// Platform driver unbind.
+ /// PCI driver unbind.
///
/// Called when a [`Device`] is unbound from its bound [`Driver`]. Implementing this callback
/// is optional.
@@ -347,7 +377,7 @@ impl<const SIZE: usize> Bar<SIZE> {
// `ioptr` is valid by the safety requirements.
// `num` is valid by the safety requirements.
unsafe {
- bindings::pci_iounmap(pdev.as_raw(), ioptr as *mut kernel::ffi::c_void);
+ bindings::pci_iounmap(pdev.as_raw(), ioptr as *mut c_void);
bindings::pci_release_region(pdev.as_raw(), num);
}
}
@@ -359,6 +389,7 @@ impl<const SIZE: usize> Bar<SIZE> {
}
impl Bar {
+ #[inline]
fn index_is_valid(index: u32) -> bool {
// A `struct pci_dev` owns an array of resources with at most `PCI_NUM_RESOURCES` entries.
index < bindings::PCI_NUM_RESOURCES
@@ -381,24 +412,90 @@ impl<const SIZE: usize> Deref for Bar<SIZE> {
}
impl<Ctx: device::DeviceContext> Device<Ctx> {
+ #[inline]
fn as_raw(&self) -> *mut bindings::pci_dev {
self.0.get()
}
}
impl Device {
- /// Returns the PCI vendor ID.
- pub fn vendor_id(&self) -> u16 {
+ /// Returns the PCI vendor ID as [`Vendor`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use kernel::{device::Core, pci::{self, Vendor}, prelude::*};
+ /// fn log_device_info(pdev: &pci::Device<Core>) -> Result {
+ /// // Get an instance of `Vendor`.
+ /// let vendor = pdev.vendor_id();
+ /// dev_info!(
+ /// pdev.as_ref(),
+ /// "Device: Vendor={}, Device=0x{:x}\n",
+ /// vendor,
+ /// pdev.device_id()
+ /// );
+ /// Ok(())
+ /// }
+ /// ```
+ #[inline]
+ pub fn vendor_id(&self) -> Vendor {
// SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`.
- unsafe { (*self.as_raw()).vendor }
+ let vendor_id = unsafe { (*self.as_raw()).vendor };
+ Vendor::from_raw(vendor_id)
}
/// Returns the PCI device ID.
+ #[inline]
pub fn device_id(&self) -> u16 {
- // SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`.
+ // SAFETY: By its type invariant `self.as_raw` is always a valid pointer to a
+ // `struct pci_dev`.
unsafe { (*self.as_raw()).device }
}
+ /// Returns the PCI revision ID.
+ #[inline]
+ pub fn revision_id(&self) -> u8 {
+ // SAFETY: By its type invariant `self.as_raw` is always a valid pointer to a
+ // `struct pci_dev`.
+ unsafe { (*self.as_raw()).revision }
+ }
+
+ /// Returns the PCI bus device/function.
+ #[inline]
+ pub fn dev_id(&self) -> u16 {
+ // SAFETY: By its type invariant `self.as_raw` is always a valid pointer to a
+ // `struct pci_dev`.
+ unsafe { bindings::pci_dev_id(self.as_raw()) }
+ }
+
+ /// Returns the PCI subsystem vendor ID.
+ #[inline]
+ pub fn subsystem_vendor_id(&self) -> u16 {
+ // SAFETY: By its type invariant `self.as_raw` is always a valid pointer to a
+ // `struct pci_dev`.
+ unsafe { (*self.as_raw()).subsystem_vendor }
+ }
+
+ /// Returns the PCI subsystem device ID.
+ #[inline]
+ pub fn subsystem_device_id(&self) -> u16 {
+ // SAFETY: By its type invariant `self.as_raw` is always a valid pointer to a
+ // `struct pci_dev`.
+ unsafe { (*self.as_raw()).subsystem_device }
+ }
+
+ /// Returns the start of the given PCI bar resource.
+ pub fn resource_start(&self, bar: u32) -> Result<bindings::resource_size_t> {
+ if !Bar::index_is_valid(bar) {
+ return Err(EINVAL);
+ }
+
+ // SAFETY:
+ // - `bar` is a valid bar number, as guaranteed by the above call to `Bar::index_is_valid`,
+ // - by its type invariant `self.as_raw` is always a valid pointer to a `struct pci_dev`.
+ Ok(unsafe { bindings::pci_resource_start(self.as_raw(), bar.try_into()?) })
+ }
+
/// Returns the size of the given PCI bar resource.
pub fn resource_len(&self, bar: u32) -> Result<bindings::resource_size_t> {
if !Bar::index_is_valid(bar) {
@@ -410,6 +507,13 @@ impl Device {
// - by its type invariant `self.as_raw` is always a valid pointer to a `struct pci_dev`.
Ok(unsafe { bindings::pci_resource_len(self.as_raw(), bar.try_into()?) })
}
+
+ /// Returns the PCI class as a `Class` struct.
+ #[inline]
+ pub fn pci_class(&self) -> Class {
+ // SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`.
+ Class::from_raw(unsafe { (*self.as_raw()).class })
+ }
}
impl Device<device::Bound> {
@@ -431,6 +535,47 @@ impl Device<device::Bound> {
) -> impl PinInit<Devres<Bar>, Error> + 'a {
self.iomap_region_sized::<0>(bar, name)
}
+
+ /// Returns an [`IrqRequest`] for the IRQ vector at the given index, if any.
+ pub fn irq_vector(&self, index: u32) -> Result<IrqRequest<'_>> {
+ // SAFETY: `self.as_raw` returns a valid pointer to a `struct pci_dev`.
+ let irq = unsafe { crate::bindings::pci_irq_vector(self.as_raw(), index) };
+ if irq < 0 {
+ return Err(crate::error::Error::from_errno(irq));
+ }
+ // SAFETY: `irq` is guaranteed to be a valid IRQ number for `&self`.
+ Ok(unsafe { IrqRequest::new(self.as_ref(), irq as u32) })
+ }
+
+ /// Returns a [`kernel::irq::Registration`] for the IRQ vector at the given
+ /// index.
+ pub fn request_irq<'a, T: crate::irq::Handler + 'static>(
+ &'a self,
+ index: u32,
+ flags: irq::Flags,
+ name: &'static CStr,
+ handler: impl PinInit<T, Error> + 'a,
+ ) -> Result<impl PinInit<irq::Registration<T>, Error> + 'a> {
+ let request = self.irq_vector(index)?;
+
+ Ok(irq::Registration::<T>::new(request, flags, name, handler))
+ }
+
+ /// Returns a [`kernel::irq::ThreadedRegistration`] for the IRQ vector at
+ /// the given index.
+ pub fn request_threaded_irq<'a, T: crate::irq::ThreadedHandler + 'static>(
+ &'a self,
+ index: u32,
+ flags: irq::Flags,
+ name: &'static CStr,
+ handler: impl PinInit<T, Error> + 'a,
+ ) -> Result<impl PinInit<irq::ThreadedRegistration<T>, Error> + 'a> {
+ let request = self.irq_vector(index)?;
+
+ Ok(irq::ThreadedRegistration::<T>::new(
+ request, flags, name, handler,
+ ))
+ }
}
impl Device<device::Core> {
@@ -441,6 +586,7 @@ impl Device<device::Core> {
}
/// Enable bus-mastering for this device.
+ #[inline]
pub fn set_master(&self) {
// SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`.
unsafe { bindings::pci_set_master(self.as_raw()) };
@@ -455,7 +601,7 @@ kernel::impl_device_context_into_aref!(Device);
impl crate::dma::Device for Device<device::Core> {}
// SAFETY: Instances of `Device` are always reference-counted.
-unsafe impl crate::types::AlwaysRefCounted for Device {
+unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
unsafe { bindings::pci_dev_get(self.as_raw()) };
diff --git a/rust/kernel/pci/id.rs b/rust/kernel/pci/id.rs
new file mode 100644
index 000000000000..7f2a7f57507f
--- /dev/null
+++ b/rust/kernel/pci/id.rs
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! PCI device identifiers and related types.
+//!
+//! This module contains PCI class codes, Vendor IDs, and supporting types.
+
+use crate::{bindings, error::code::EINVAL, error::Error, prelude::*};
+use core::fmt;
+
+/// PCI device class codes.
+///
+/// Each entry contains the full 24-bit PCI class code (base class in bits
+/// 23-16, subclass in bits 15-8, programming interface in bits 7-0).
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::{device::Core, pci::{self, Class}, prelude::*};
+/// fn probe_device(pdev: &pci::Device<Core>) -> Result {
+/// let pci_class = pdev.pci_class();
+/// dev_info!(
+/// pdev.as_ref(),
+/// "Detected PCI class: {}\n",
+/// pci_class
+/// );
+/// Ok(())
+/// }
+/// ```
+#[derive(Clone, Copy, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct Class(u32);
+
+/// PCI class mask constants for matching [`Class`] codes.
+#[repr(u32)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ClassMask {
+ /// Match the full 24-bit class code.
+ Full = 0xffffff,
+ /// Match the upper 16 bits of the class code (base class and subclass only)
+ ClassSubclass = 0xffff00,
+}
+
+macro_rules! define_all_pci_classes {
+ (
+ $($variant:ident = $binding:expr,)+
+ ) => {
+ impl Class {
+ $(
+ #[allow(missing_docs)]
+ pub const $variant: Self = Self(Self::to_24bit_class($binding));
+ )+
+ }
+
+ impl fmt::Display for Class {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ $(
+ &Self::$variant => write!(f, stringify!($variant)),
+ )+
+ _ => <Self as fmt::Debug>::fmt(self, f),
+ }
+ }
+ }
+ };
+}
+
+/// Once constructed, a [`Class`] contains a valid PCI class code.
+impl Class {
+ /// Create a [`Class`] from a raw 24-bit class code.
+ #[inline]
+ pub(super) fn from_raw(class_code: u32) -> Self {
+ Self(class_code)
+ }
+
+ /// Get the raw 24-bit class code value.
+ #[inline]
+ pub const fn as_raw(self) -> u32 {
+ self.0
+ }
+
+ // Converts a PCI class constant to 24-bit format.
+ //
+ // Many device drivers use only the upper 16 bits (base class and subclass),
+ // but some use the full 24 bits. In order to support both cases, store the
+ // class code as a 24-bit value, where 16-bit values are shifted up 8 bits.
+ const fn to_24bit_class(val: u32) -> u32 {
+ if val > 0xFFFF {
+ val
+ } else {
+ val << 8
+ }
+ }
+}
+
+impl fmt::Debug for Class {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "0x{:06x}", self.0)
+ }
+}
+
+impl ClassMask {
+ /// Get the raw mask value.
+ #[inline]
+ pub const fn as_raw(self) -> u32 {
+ self as u32
+ }
+}
+
+impl TryFrom<u32> for ClassMask {
+ type Error = Error;
+
+ fn try_from(value: u32) -> Result<Self, Self::Error> {
+ match value {
+ 0xffffff => Ok(ClassMask::Full),
+ 0xffff00 => Ok(ClassMask::ClassSubclass),
+ _ => Err(EINVAL),
+ }
+ }
+}
+
+/// PCI vendor IDs.
+///
+/// Each entry contains the 16-bit PCI vendor ID as assigned by the PCI SIG.
+#[derive(Clone, Copy, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct Vendor(u16);
+
+macro_rules! define_all_pci_vendors {
+ (
+ $($variant:ident = $binding:expr,)+
+ ) => {
+ impl Vendor {
+ $(
+ #[allow(missing_docs)]
+ pub const $variant: Self = Self($binding as u16);
+ )+
+ }
+
+ impl fmt::Display for Vendor {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ $(
+ &Self::$variant => write!(f, stringify!($variant)),
+ )+
+ _ => <Self as fmt::Debug>::fmt(self, f),
+ }
+ }
+ }
+ };
+}
+
+/// Once constructed, a `Vendor` contains a valid PCI Vendor ID.
+impl Vendor {
+ /// Create a Vendor from a raw 16-bit vendor ID.
+ #[inline]
+ pub(super) fn from_raw(vendor_id: u16) -> Self {
+ Self(vendor_id)
+ }
+
+ /// Get the raw 16-bit vendor ID value.
+ #[inline]
+ pub const fn as_raw(self) -> u16 {
+ self.0
+ }
+}
+
+impl fmt::Debug for Vendor {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "0x{:04x}", self.0)
+ }
+}
+
+define_all_pci_classes! {
+ NOT_DEFINED = bindings::PCI_CLASS_NOT_DEFINED, // 0x000000
+ NOT_DEFINED_VGA = bindings::PCI_CLASS_NOT_DEFINED_VGA, // 0x000100
+
+ STORAGE_SCSI = bindings::PCI_CLASS_STORAGE_SCSI, // 0x010000
+ STORAGE_IDE = bindings::PCI_CLASS_STORAGE_IDE, // 0x010100
+ STORAGE_FLOPPY = bindings::PCI_CLASS_STORAGE_FLOPPY, // 0x010200
+ STORAGE_IPI = bindings::PCI_CLASS_STORAGE_IPI, // 0x010300
+ STORAGE_RAID = bindings::PCI_CLASS_STORAGE_RAID, // 0x010400
+ STORAGE_SATA = bindings::PCI_CLASS_STORAGE_SATA, // 0x010600
+ STORAGE_SATA_AHCI = bindings::PCI_CLASS_STORAGE_SATA_AHCI, // 0x010601
+ STORAGE_SAS = bindings::PCI_CLASS_STORAGE_SAS, // 0x010700
+ STORAGE_EXPRESS = bindings::PCI_CLASS_STORAGE_EXPRESS, // 0x010802
+ STORAGE_OTHER = bindings::PCI_CLASS_STORAGE_OTHER, // 0x018000
+
+ NETWORK_ETHERNET = bindings::PCI_CLASS_NETWORK_ETHERNET, // 0x020000
+ NETWORK_TOKEN_RING = bindings::PCI_CLASS_NETWORK_TOKEN_RING, // 0x020100
+ NETWORK_FDDI = bindings::PCI_CLASS_NETWORK_FDDI, // 0x020200
+ NETWORK_ATM = bindings::PCI_CLASS_NETWORK_ATM, // 0x020300
+ NETWORK_OTHER = bindings::PCI_CLASS_NETWORK_OTHER, // 0x028000
+
+ DISPLAY_VGA = bindings::PCI_CLASS_DISPLAY_VGA, // 0x030000
+ DISPLAY_XGA = bindings::PCI_CLASS_DISPLAY_XGA, // 0x030100
+ DISPLAY_3D = bindings::PCI_CLASS_DISPLAY_3D, // 0x030200
+ DISPLAY_OTHER = bindings::PCI_CLASS_DISPLAY_OTHER, // 0x038000
+
+ MULTIMEDIA_VIDEO = bindings::PCI_CLASS_MULTIMEDIA_VIDEO, // 0x040000
+ MULTIMEDIA_AUDIO = bindings::PCI_CLASS_MULTIMEDIA_AUDIO, // 0x040100
+ MULTIMEDIA_PHONE = bindings::PCI_CLASS_MULTIMEDIA_PHONE, // 0x040200
+ MULTIMEDIA_HD_AUDIO = bindings::PCI_CLASS_MULTIMEDIA_HD_AUDIO, // 0x040300
+ MULTIMEDIA_OTHER = bindings::PCI_CLASS_MULTIMEDIA_OTHER, // 0x048000
+
+ MEMORY_RAM = bindings::PCI_CLASS_MEMORY_RAM, // 0x050000
+ MEMORY_FLASH = bindings::PCI_CLASS_MEMORY_FLASH, // 0x050100
+ MEMORY_CXL = bindings::PCI_CLASS_MEMORY_CXL, // 0x050200
+ MEMORY_OTHER = bindings::PCI_CLASS_MEMORY_OTHER, // 0x058000
+
+ BRIDGE_HOST = bindings::PCI_CLASS_BRIDGE_HOST, // 0x060000
+ BRIDGE_ISA = bindings::PCI_CLASS_BRIDGE_ISA, // 0x060100
+ BRIDGE_EISA = bindings::PCI_CLASS_BRIDGE_EISA, // 0x060200
+ BRIDGE_MC = bindings::PCI_CLASS_BRIDGE_MC, // 0x060300
+ BRIDGE_PCI_NORMAL = bindings::PCI_CLASS_BRIDGE_PCI_NORMAL, // 0x060400
+ BRIDGE_PCI_SUBTRACTIVE = bindings::PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, // 0x060401
+ BRIDGE_PCMCIA = bindings::PCI_CLASS_BRIDGE_PCMCIA, // 0x060500
+ BRIDGE_NUBUS = bindings::PCI_CLASS_BRIDGE_NUBUS, // 0x060600
+ BRIDGE_CARDBUS = bindings::PCI_CLASS_BRIDGE_CARDBUS, // 0x060700
+ BRIDGE_RACEWAY = bindings::PCI_CLASS_BRIDGE_RACEWAY, // 0x060800
+ BRIDGE_OTHER = bindings::PCI_CLASS_BRIDGE_OTHER, // 0x068000
+
+ COMMUNICATION_SERIAL = bindings::PCI_CLASS_COMMUNICATION_SERIAL, // 0x070000
+ COMMUNICATION_PARALLEL = bindings::PCI_CLASS_COMMUNICATION_PARALLEL, // 0x070100
+ COMMUNICATION_MULTISERIAL = bindings::PCI_CLASS_COMMUNICATION_MULTISERIAL, // 0x070200
+ COMMUNICATION_MODEM = bindings::PCI_CLASS_COMMUNICATION_MODEM, // 0x070300
+ COMMUNICATION_OTHER = bindings::PCI_CLASS_COMMUNICATION_OTHER, // 0x078000
+
+ SYSTEM_PIC = bindings::PCI_CLASS_SYSTEM_PIC, // 0x080000
+ SYSTEM_PIC_IOAPIC = bindings::PCI_CLASS_SYSTEM_PIC_IOAPIC, // 0x080010
+ SYSTEM_PIC_IOXAPIC = bindings::PCI_CLASS_SYSTEM_PIC_IOXAPIC, // 0x080020
+ SYSTEM_DMA = bindings::PCI_CLASS_SYSTEM_DMA, // 0x080100
+ SYSTEM_TIMER = bindings::PCI_CLASS_SYSTEM_TIMER, // 0x080200
+ SYSTEM_RTC = bindings::PCI_CLASS_SYSTEM_RTC, // 0x080300
+ SYSTEM_PCI_HOTPLUG = bindings::PCI_CLASS_SYSTEM_PCI_HOTPLUG, // 0x080400
+ SYSTEM_SDHCI = bindings::PCI_CLASS_SYSTEM_SDHCI, // 0x080500
+ SYSTEM_RCEC = bindings::PCI_CLASS_SYSTEM_RCEC, // 0x080700
+ SYSTEM_OTHER = bindings::PCI_CLASS_SYSTEM_OTHER, // 0x088000
+
+ INPUT_KEYBOARD = bindings::PCI_CLASS_INPUT_KEYBOARD, // 0x090000
+ INPUT_PEN = bindings::PCI_CLASS_INPUT_PEN, // 0x090100
+ INPUT_MOUSE = bindings::PCI_CLASS_INPUT_MOUSE, // 0x090200
+ INPUT_SCANNER = bindings::PCI_CLASS_INPUT_SCANNER, // 0x090300
+ INPUT_GAMEPORT = bindings::PCI_CLASS_INPUT_GAMEPORT, // 0x090400
+ INPUT_OTHER = bindings::PCI_CLASS_INPUT_OTHER, // 0x098000
+
+ DOCKING_GENERIC = bindings::PCI_CLASS_DOCKING_GENERIC, // 0x0a0000
+ DOCKING_OTHER = bindings::PCI_CLASS_DOCKING_OTHER, // 0x0a8000
+
+ PROCESSOR_386 = bindings::PCI_CLASS_PROCESSOR_386, // 0x0b0000
+ PROCESSOR_486 = bindings::PCI_CLASS_PROCESSOR_486, // 0x0b0100
+ PROCESSOR_PENTIUM = bindings::PCI_CLASS_PROCESSOR_PENTIUM, // 0x0b0200
+ PROCESSOR_ALPHA = bindings::PCI_CLASS_PROCESSOR_ALPHA, // 0x0b1000
+ PROCESSOR_POWERPC = bindings::PCI_CLASS_PROCESSOR_POWERPC, // 0x0b2000
+ PROCESSOR_MIPS = bindings::PCI_CLASS_PROCESSOR_MIPS, // 0x0b3000
+ PROCESSOR_CO = bindings::PCI_CLASS_PROCESSOR_CO, // 0x0b4000
+
+ SERIAL_FIREWIRE = bindings::PCI_CLASS_SERIAL_FIREWIRE, // 0x0c0000
+ SERIAL_FIREWIRE_OHCI = bindings::PCI_CLASS_SERIAL_FIREWIRE_OHCI, // 0x0c0010
+ SERIAL_ACCESS = bindings::PCI_CLASS_SERIAL_ACCESS, // 0x0c0100
+ SERIAL_SSA = bindings::PCI_CLASS_SERIAL_SSA, // 0x0c0200
+ SERIAL_USB_UHCI = bindings::PCI_CLASS_SERIAL_USB_UHCI, // 0x0c0300
+ SERIAL_USB_OHCI = bindings::PCI_CLASS_SERIAL_USB_OHCI, // 0x0c0310
+ SERIAL_USB_EHCI = bindings::PCI_CLASS_SERIAL_USB_EHCI, // 0x0c0320
+ SERIAL_USB_XHCI = bindings::PCI_CLASS_SERIAL_USB_XHCI, // 0x0c0330
+ SERIAL_USB_CDNS = bindings::PCI_CLASS_SERIAL_USB_CDNS, // 0x0c0380
+ SERIAL_USB_DEVICE = bindings::PCI_CLASS_SERIAL_USB_DEVICE, // 0x0c03fe
+ SERIAL_FIBER = bindings::PCI_CLASS_SERIAL_FIBER, // 0x0c0400
+ SERIAL_SMBUS = bindings::PCI_CLASS_SERIAL_SMBUS, // 0x0c0500
+ SERIAL_IPMI_SMIC = bindings::PCI_CLASS_SERIAL_IPMI_SMIC, // 0x0c0700
+ SERIAL_IPMI_KCS = bindings::PCI_CLASS_SERIAL_IPMI_KCS, // 0x0c0701
+ SERIAL_IPMI_BT = bindings::PCI_CLASS_SERIAL_IPMI_BT, // 0x0c0702
+
+ WIRELESS_RF_CONTROLLER = bindings::PCI_CLASS_WIRELESS_RF_CONTROLLER, // 0x0d1000
+ WIRELESS_WHCI = bindings::PCI_CLASS_WIRELESS_WHCI, // 0x0d1010
+
+ INTELLIGENT_I2O = bindings::PCI_CLASS_INTELLIGENT_I2O, // 0x0e0000
+
+ SATELLITE_TV = bindings::PCI_CLASS_SATELLITE_TV, // 0x0f0000
+ SATELLITE_AUDIO = bindings::PCI_CLASS_SATELLITE_AUDIO, // 0x0f0100
+ SATELLITE_VOICE = bindings::PCI_CLASS_SATELLITE_VOICE, // 0x0f0300
+ SATELLITE_DATA = bindings::PCI_CLASS_SATELLITE_DATA, // 0x0f0400
+
+ CRYPT_NETWORK = bindings::PCI_CLASS_CRYPT_NETWORK, // 0x100000
+ CRYPT_ENTERTAINMENT = bindings::PCI_CLASS_CRYPT_ENTERTAINMENT, // 0x100100
+ CRYPT_OTHER = bindings::PCI_CLASS_CRYPT_OTHER, // 0x108000
+
+ SP_DPIO = bindings::PCI_CLASS_SP_DPIO, // 0x110000
+ SP_OTHER = bindings::PCI_CLASS_SP_OTHER, // 0x118000
+
+ ACCELERATOR_PROCESSING = bindings::PCI_CLASS_ACCELERATOR_PROCESSING, // 0x120000
+
+ OTHERS = bindings::PCI_CLASS_OTHERS, // 0xff0000
+}
+
+define_all_pci_vendors! {
+ PCI_SIG = bindings::PCI_VENDOR_ID_PCI_SIG, // 0x0001
+ LOONGSON = bindings::PCI_VENDOR_ID_LOONGSON, // 0x0014
+ SOLIDIGM = bindings::PCI_VENDOR_ID_SOLIDIGM, // 0x025e
+ TTTECH = bindings::PCI_VENDOR_ID_TTTECH, // 0x0357
+ DYNALINK = bindings::PCI_VENDOR_ID_DYNALINK, // 0x0675
+ UBIQUITI = bindings::PCI_VENDOR_ID_UBIQUITI, // 0x0777
+ BERKOM = bindings::PCI_VENDOR_ID_BERKOM, // 0x0871
+ ITTIM = bindings::PCI_VENDOR_ID_ITTIM, // 0x0b48
+ COMPAQ = bindings::PCI_VENDOR_ID_COMPAQ, // 0x0e11
+ LSI_LOGIC = bindings::PCI_VENDOR_ID_LSI_LOGIC, // 0x1000
+ ATI = bindings::PCI_VENDOR_ID_ATI, // 0x1002
+ VLSI = bindings::PCI_VENDOR_ID_VLSI, // 0x1004
+ ADL = bindings::PCI_VENDOR_ID_ADL, // 0x1005
+ NS = bindings::PCI_VENDOR_ID_NS, // 0x100b
+ TSENG = bindings::PCI_VENDOR_ID_TSENG, // 0x100c
+ WEITEK = bindings::PCI_VENDOR_ID_WEITEK, // 0x100e
+ DEC = bindings::PCI_VENDOR_ID_DEC, // 0x1011
+ CIRRUS = bindings::PCI_VENDOR_ID_CIRRUS, // 0x1013
+ IBM = bindings::PCI_VENDOR_ID_IBM, // 0x1014
+ UNISYS = bindings::PCI_VENDOR_ID_UNISYS, // 0x1018
+ COMPEX2 = bindings::PCI_VENDOR_ID_COMPEX2, // 0x101a
+ WD = bindings::PCI_VENDOR_ID_WD, // 0x101c
+ AMI = bindings::PCI_VENDOR_ID_AMI, // 0x101e
+ AMD = bindings::PCI_VENDOR_ID_AMD, // 0x1022
+ TRIDENT = bindings::PCI_VENDOR_ID_TRIDENT, // 0x1023
+ AI = bindings::PCI_VENDOR_ID_AI, // 0x1025
+ DELL = bindings::PCI_VENDOR_ID_DELL, // 0x1028
+ MATROX = bindings::PCI_VENDOR_ID_MATROX, // 0x102B
+ MOBILITY_ELECTRONICS = bindings::PCI_VENDOR_ID_MOBILITY_ELECTRONICS, // 0x14f2
+ CT = bindings::PCI_VENDOR_ID_CT, // 0x102c
+ MIRO = bindings::PCI_VENDOR_ID_MIRO, // 0x1031
+ NEC = bindings::PCI_VENDOR_ID_NEC, // 0x1033
+ FD = bindings::PCI_VENDOR_ID_FD, // 0x1036
+ SI = bindings::PCI_VENDOR_ID_SI, // 0x1039
+ HP = bindings::PCI_VENDOR_ID_HP, // 0x103c
+ HP_3PAR = bindings::PCI_VENDOR_ID_HP_3PAR, // 0x1590
+ PCTECH = bindings::PCI_VENDOR_ID_PCTECH, // 0x1042
+ ASUSTEK = bindings::PCI_VENDOR_ID_ASUSTEK, // 0x1043
+ DPT = bindings::PCI_VENDOR_ID_DPT, // 0x1044
+ OPTI = bindings::PCI_VENDOR_ID_OPTI, // 0x1045
+ ELSA = bindings::PCI_VENDOR_ID_ELSA, // 0x1048
+ STMICRO = bindings::PCI_VENDOR_ID_STMICRO, // 0x104A
+ BUSLOGIC = bindings::PCI_VENDOR_ID_BUSLOGIC, // 0x104B
+ TI = bindings::PCI_VENDOR_ID_TI, // 0x104c
+ SONY = bindings::PCI_VENDOR_ID_SONY, // 0x104d
+ WINBOND2 = bindings::PCI_VENDOR_ID_WINBOND2, // 0x1050
+ ANIGMA = bindings::PCI_VENDOR_ID_ANIGMA, // 0x1051
+ EFAR = bindings::PCI_VENDOR_ID_EFAR, // 0x1055
+ MOTOROLA = bindings::PCI_VENDOR_ID_MOTOROLA, // 0x1057
+ PROMISE = bindings::PCI_VENDOR_ID_PROMISE, // 0x105a
+ FOXCONN = bindings::PCI_VENDOR_ID_FOXCONN, // 0x105b
+ UMC = bindings::PCI_VENDOR_ID_UMC, // 0x1060
+ PICOPOWER = bindings::PCI_VENDOR_ID_PICOPOWER, // 0x1066
+ MYLEX = bindings::PCI_VENDOR_ID_MYLEX, // 0x1069
+ APPLE = bindings::PCI_VENDOR_ID_APPLE, // 0x106b
+ YAMAHA = bindings::PCI_VENDOR_ID_YAMAHA, // 0x1073
+ QLOGIC = bindings::PCI_VENDOR_ID_QLOGIC, // 0x1077
+ CYRIX = bindings::PCI_VENDOR_ID_CYRIX, // 0x1078
+ CONTAQ = bindings::PCI_VENDOR_ID_CONTAQ, // 0x1080
+ OLICOM = bindings::PCI_VENDOR_ID_OLICOM, // 0x108d
+ SUN = bindings::PCI_VENDOR_ID_SUN, // 0x108e
+ NI = bindings::PCI_VENDOR_ID_NI, // 0x1093
+ CMD = bindings::PCI_VENDOR_ID_CMD, // 0x1095
+ BROOKTREE = bindings::PCI_VENDOR_ID_BROOKTREE, // 0x109e
+ SGI = bindings::PCI_VENDOR_ID_SGI, // 0x10a9
+ WINBOND = bindings::PCI_VENDOR_ID_WINBOND, // 0x10ad
+ PLX = bindings::PCI_VENDOR_ID_PLX, // 0x10b5
+ MADGE = bindings::PCI_VENDOR_ID_MADGE, // 0x10b6
+ THREECOM = bindings::PCI_VENDOR_ID_3COM, // 0x10b7
+ AL = bindings::PCI_VENDOR_ID_AL, // 0x10b9
+ NEOMAGIC = bindings::PCI_VENDOR_ID_NEOMAGIC, // 0x10c8
+ TCONRAD = bindings::PCI_VENDOR_ID_TCONRAD, // 0x10da
+ ROHM = bindings::PCI_VENDOR_ID_ROHM, // 0x10db
+ NVIDIA = bindings::PCI_VENDOR_ID_NVIDIA, // 0x10de
+ IMS = bindings::PCI_VENDOR_ID_IMS, // 0x10e0
+ AMCC = bindings::PCI_VENDOR_ID_AMCC, // 0x10e8
+ AMPERE = bindings::PCI_VENDOR_ID_AMPERE, // 0x1def
+ INTERG = bindings::PCI_VENDOR_ID_INTERG, // 0x10ea
+ REALTEK = bindings::PCI_VENDOR_ID_REALTEK, // 0x10ec
+ XILINX = bindings::PCI_VENDOR_ID_XILINX, // 0x10ee
+ INIT = bindings::PCI_VENDOR_ID_INIT, // 0x1101
+ CREATIVE = bindings::PCI_VENDOR_ID_CREATIVE, // 0x1102
+ TTI = bindings::PCI_VENDOR_ID_TTI, // 0x1103
+ SIGMA = bindings::PCI_VENDOR_ID_SIGMA, // 0x1105
+ VIA = bindings::PCI_VENDOR_ID_VIA, // 0x1106
+ SIEMENS = bindings::PCI_VENDOR_ID_SIEMENS, // 0x110A
+ VORTEX = bindings::PCI_VENDOR_ID_VORTEX, // 0x1119
+ EF = bindings::PCI_VENDOR_ID_EF, // 0x111a
+ IDT = bindings::PCI_VENDOR_ID_IDT, // 0x111d
+ FORE = bindings::PCI_VENDOR_ID_FORE, // 0x1127
+ PHILIPS = bindings::PCI_VENDOR_ID_PHILIPS, // 0x1131
+ EICON = bindings::PCI_VENDOR_ID_EICON, // 0x1133
+ CISCO = bindings::PCI_VENDOR_ID_CISCO, // 0x1137
+ ZIATECH = bindings::PCI_VENDOR_ID_ZIATECH, // 0x1138
+ SYSKONNECT = bindings::PCI_VENDOR_ID_SYSKONNECT, // 0x1148
+ DIGI = bindings::PCI_VENDOR_ID_DIGI, // 0x114f
+ XIRCOM = bindings::PCI_VENDOR_ID_XIRCOM, // 0x115d
+ SERVERWORKS = bindings::PCI_VENDOR_ID_SERVERWORKS, // 0x1166
+ ALTERA = bindings::PCI_VENDOR_ID_ALTERA, // 0x1172
+ SBE = bindings::PCI_VENDOR_ID_SBE, // 0x1176
+ TOSHIBA = bindings::PCI_VENDOR_ID_TOSHIBA, // 0x1179
+ TOSHIBA_2 = bindings::PCI_VENDOR_ID_TOSHIBA_2, // 0x102f
+ ATTO = bindings::PCI_VENDOR_ID_ATTO, // 0x117c
+ RICOH = bindings::PCI_VENDOR_ID_RICOH, // 0x1180
+ DLINK = bindings::PCI_VENDOR_ID_DLINK, // 0x1186
+ ARTOP = bindings::PCI_VENDOR_ID_ARTOP, // 0x1191
+ ZEITNET = bindings::PCI_VENDOR_ID_ZEITNET, // 0x1193
+ FUJITSU_ME = bindings::PCI_VENDOR_ID_FUJITSU_ME, // 0x119e
+ MARVELL = bindings::PCI_VENDOR_ID_MARVELL, // 0x11ab
+ MARVELL_EXT = bindings::PCI_VENDOR_ID_MARVELL_EXT, // 0x1b4b
+ V3 = bindings::PCI_VENDOR_ID_V3, // 0x11b0
+ ATT = bindings::PCI_VENDOR_ID_ATT, // 0x11c1
+ SPECIALIX = bindings::PCI_VENDOR_ID_SPECIALIX, // 0x11cb
+ ANALOG_DEVICES = bindings::PCI_VENDOR_ID_ANALOG_DEVICES, // 0x11d4
+ ZORAN = bindings::PCI_VENDOR_ID_ZORAN, // 0x11de
+ COMPEX = bindings::PCI_VENDOR_ID_COMPEX, // 0x11f6
+ MICROSEMI = bindings::PCI_VENDOR_ID_MICROSEMI, // 0x11f8
+ RP = bindings::PCI_VENDOR_ID_RP, // 0x11fe
+ CYCLADES = bindings::PCI_VENDOR_ID_CYCLADES, // 0x120e
+ ESSENTIAL = bindings::PCI_VENDOR_ID_ESSENTIAL, // 0x120f
+ O2 = bindings::PCI_VENDOR_ID_O2, // 0x1217
+ THREEDX = bindings::PCI_VENDOR_ID_3DFX, // 0x121a
+ AVM = bindings::PCI_VENDOR_ID_AVM, // 0x1244
+ STALLION = bindings::PCI_VENDOR_ID_STALLION, // 0x124d
+ AT = bindings::PCI_VENDOR_ID_AT, // 0x1259
+ ASIX = bindings::PCI_VENDOR_ID_ASIX, // 0x125b
+ ESS = bindings::PCI_VENDOR_ID_ESS, // 0x125d
+ SATSAGEM = bindings::PCI_VENDOR_ID_SATSAGEM, // 0x1267
+ ENSONIQ = bindings::PCI_VENDOR_ID_ENSONIQ, // 0x1274
+ TRANSMETA = bindings::PCI_VENDOR_ID_TRANSMETA, // 0x1279
+ ROCKWELL = bindings::PCI_VENDOR_ID_ROCKWELL, // 0x127A
+ ITE = bindings::PCI_VENDOR_ID_ITE, // 0x1283
+ ALTEON = bindings::PCI_VENDOR_ID_ALTEON, // 0x12ae
+ NVIDIA_SGS = bindings::PCI_VENDOR_ID_NVIDIA_SGS, // 0x12d2
+ PERICOM = bindings::PCI_VENDOR_ID_PERICOM, // 0x12D8
+ AUREAL = bindings::PCI_VENDOR_ID_AUREAL, // 0x12eb
+ ELECTRONICDESIGNGMBH = bindings::PCI_VENDOR_ID_ELECTRONICDESIGNGMBH, // 0x12f8
+ ESDGMBH = bindings::PCI_VENDOR_ID_ESDGMBH, // 0x12fe
+ CB = bindings::PCI_VENDOR_ID_CB, // 0x1307
+ SIIG = bindings::PCI_VENDOR_ID_SIIG, // 0x131f
+ RADISYS = bindings::PCI_VENDOR_ID_RADISYS, // 0x1331
+ MICRO_MEMORY = bindings::PCI_VENDOR_ID_MICRO_MEMORY, // 0x1332
+ DOMEX = bindings::PCI_VENDOR_ID_DOMEX, // 0x134a
+ INTASHIELD = bindings::PCI_VENDOR_ID_INTASHIELD, // 0x135a
+ QUATECH = bindings::PCI_VENDOR_ID_QUATECH, // 0x135C
+ SEALEVEL = bindings::PCI_VENDOR_ID_SEALEVEL, // 0x135e
+ HYPERCOPE = bindings::PCI_VENDOR_ID_HYPERCOPE, // 0x1365
+ DIGIGRAM = bindings::PCI_VENDOR_ID_DIGIGRAM, // 0x1369
+ KAWASAKI = bindings::PCI_VENDOR_ID_KAWASAKI, // 0x136b
+ CNET = bindings::PCI_VENDOR_ID_CNET, // 0x1371
+ LMC = bindings::PCI_VENDOR_ID_LMC, // 0x1376
+ NETGEAR = bindings::PCI_VENDOR_ID_NETGEAR, // 0x1385
+ APPLICOM = bindings::PCI_VENDOR_ID_APPLICOM, // 0x1389
+ MOXA = bindings::PCI_VENDOR_ID_MOXA, // 0x1393
+ CCD = bindings::PCI_VENDOR_ID_CCD, // 0x1397
+ EXAR = bindings::PCI_VENDOR_ID_EXAR, // 0x13a8
+ MICROGATE = bindings::PCI_VENDOR_ID_MICROGATE, // 0x13c0
+ THREEWARE = bindings::PCI_VENDOR_ID_3WARE, // 0x13C1
+ IOMEGA = bindings::PCI_VENDOR_ID_IOMEGA, // 0x13ca
+ ABOCOM = bindings::PCI_VENDOR_ID_ABOCOM, // 0x13D1
+ SUNDANCE = bindings::PCI_VENDOR_ID_SUNDANCE, // 0x13f0
+ CMEDIA = bindings::PCI_VENDOR_ID_CMEDIA, // 0x13f6
+ ADVANTECH = bindings::PCI_VENDOR_ID_ADVANTECH, // 0x13fe
+ MEILHAUS = bindings::PCI_VENDOR_ID_MEILHAUS, // 0x1402
+ LAVA = bindings::PCI_VENDOR_ID_LAVA, // 0x1407
+ TIMEDIA = bindings::PCI_VENDOR_ID_TIMEDIA, // 0x1409
+ ICE = bindings::PCI_VENDOR_ID_ICE, // 0x1412
+ MICROSOFT = bindings::PCI_VENDOR_ID_MICROSOFT, // 0x1414
+ OXSEMI = bindings::PCI_VENDOR_ID_OXSEMI, // 0x1415
+ CHELSIO = bindings::PCI_VENDOR_ID_CHELSIO, // 0x1425
+ EDIMAX = bindings::PCI_VENDOR_ID_EDIMAX, // 0x1432
+ ADLINK = bindings::PCI_VENDOR_ID_ADLINK, // 0x144a
+ SAMSUNG = bindings::PCI_VENDOR_ID_SAMSUNG, // 0x144d
+ GIGABYTE = bindings::PCI_VENDOR_ID_GIGABYTE, // 0x1458
+ AMBIT = bindings::PCI_VENDOR_ID_AMBIT, // 0x1468
+ MYRICOM = bindings::PCI_VENDOR_ID_MYRICOM, // 0x14c1
+ MEDIATEK = bindings::PCI_VENDOR_ID_MEDIATEK, // 0x14c3
+ TITAN = bindings::PCI_VENDOR_ID_TITAN, // 0x14D2
+ PANACOM = bindings::PCI_VENDOR_ID_PANACOM, // 0x14d4
+ SIPACKETS = bindings::PCI_VENDOR_ID_SIPACKETS, // 0x14d9
+ AFAVLAB = bindings::PCI_VENDOR_ID_AFAVLAB, // 0x14db
+ AMPLICON = bindings::PCI_VENDOR_ID_AMPLICON, // 0x14dc
+ BCM_GVC = bindings::PCI_VENDOR_ID_BCM_GVC, // 0x14a4
+ BROADCOM = bindings::PCI_VENDOR_ID_BROADCOM, // 0x14e4
+ TOPIC = bindings::PCI_VENDOR_ID_TOPIC, // 0x151f
+ MAINPINE = bindings::PCI_VENDOR_ID_MAINPINE, // 0x1522
+ ENE = bindings::PCI_VENDOR_ID_ENE, // 0x1524
+ SYBA = bindings::PCI_VENDOR_ID_SYBA, // 0x1592
+ MORETON = bindings::PCI_VENDOR_ID_MORETON, // 0x15aa
+ VMWARE = bindings::PCI_VENDOR_ID_VMWARE, // 0x15ad
+ ZOLTRIX = bindings::PCI_VENDOR_ID_ZOLTRIX, // 0x15b0
+ MELLANOX = bindings::PCI_VENDOR_ID_MELLANOX, // 0x15b3
+ DFI = bindings::PCI_VENDOR_ID_DFI, // 0x15bd
+ QUICKNET = bindings::PCI_VENDOR_ID_QUICKNET, // 0x15e2
+ ADDIDATA = bindings::PCI_VENDOR_ID_ADDIDATA, // 0x15B8
+ PDC = bindings::PCI_VENDOR_ID_PDC, // 0x15e9
+ FARSITE = bindings::PCI_VENDOR_ID_FARSITE, // 0x1619
+ ARIMA = bindings::PCI_VENDOR_ID_ARIMA, // 0x161f
+ BROCADE = bindings::PCI_VENDOR_ID_BROCADE, // 0x1657
+ SIBYTE = bindings::PCI_VENDOR_ID_SIBYTE, // 0x166d
+ ATHEROS = bindings::PCI_VENDOR_ID_ATHEROS, // 0x168c
+ NETCELL = bindings::PCI_VENDOR_ID_NETCELL, // 0x169c
+ CENATEK = bindings::PCI_VENDOR_ID_CENATEK, // 0x16CA
+ SYNOPSYS = bindings::PCI_VENDOR_ID_SYNOPSYS, // 0x16c3
+ USR = bindings::PCI_VENDOR_ID_USR, // 0x16ec
+ VITESSE = bindings::PCI_VENDOR_ID_VITESSE, // 0x1725
+ LINKSYS = bindings::PCI_VENDOR_ID_LINKSYS, // 0x1737
+ ALTIMA = bindings::PCI_VENDOR_ID_ALTIMA, // 0x173b
+ CAVIUM = bindings::PCI_VENDOR_ID_CAVIUM, // 0x177d
+ TECHWELL = bindings::PCI_VENDOR_ID_TECHWELL, // 0x1797
+ BELKIN = bindings::PCI_VENDOR_ID_BELKIN, // 0x1799
+ RDC = bindings::PCI_VENDOR_ID_RDC, // 0x17f3
+ GLI = bindings::PCI_VENDOR_ID_GLI, // 0x17a0
+ LENOVO = bindings::PCI_VENDOR_ID_LENOVO, // 0x17aa
+ QCOM = bindings::PCI_VENDOR_ID_QCOM, // 0x17cb
+ CDNS = bindings::PCI_VENDOR_ID_CDNS, // 0x17cd
+ ARECA = bindings::PCI_VENDOR_ID_ARECA, // 0x17d3
+ S2IO = bindings::PCI_VENDOR_ID_S2IO, // 0x17d5
+ SITECOM = bindings::PCI_VENDOR_ID_SITECOM, // 0x182d
+ TOPSPIN = bindings::PCI_VENDOR_ID_TOPSPIN, // 0x1867
+ COMMTECH = bindings::PCI_VENDOR_ID_COMMTECH, // 0x18f7
+ SILAN = bindings::PCI_VENDOR_ID_SILAN, // 0x1904
+ RENESAS = bindings::PCI_VENDOR_ID_RENESAS, // 0x1912
+ SOLARFLARE = bindings::PCI_VENDOR_ID_SOLARFLARE, // 0x1924
+ TDI = bindings::PCI_VENDOR_ID_TDI, // 0x192E
+ NXP = bindings::PCI_VENDOR_ID_NXP, // 0x1957
+ PASEMI = bindings::PCI_VENDOR_ID_PASEMI, // 0x1959
+ ATTANSIC = bindings::PCI_VENDOR_ID_ATTANSIC, // 0x1969
+ JMICRON = bindings::PCI_VENDOR_ID_JMICRON, // 0x197B
+ KORENIX = bindings::PCI_VENDOR_ID_KORENIX, // 0x1982
+ HUAWEI = bindings::PCI_VENDOR_ID_HUAWEI, // 0x19e5
+ NETRONOME = bindings::PCI_VENDOR_ID_NETRONOME, // 0x19ee
+ QMI = bindings::PCI_VENDOR_ID_QMI, // 0x1a32
+ AZWAVE = bindings::PCI_VENDOR_ID_AZWAVE, // 0x1a3b
+ REDHAT_QUMRANET = bindings::PCI_VENDOR_ID_REDHAT_QUMRANET, // 0x1af4
+ ASMEDIA = bindings::PCI_VENDOR_ID_ASMEDIA, // 0x1b21
+ REDHAT = bindings::PCI_VENDOR_ID_REDHAT, // 0x1b36
+ WCHIC = bindings::PCI_VENDOR_ID_WCHIC, // 0x1c00
+ SILICOM_DENMARK = bindings::PCI_VENDOR_ID_SILICOM_DENMARK, // 0x1c2c
+ AMAZON_ANNAPURNA_LABS = bindings::PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, // 0x1c36
+ CIRCUITCO = bindings::PCI_VENDOR_ID_CIRCUITCO, // 0x1cc8
+ AMAZON = bindings::PCI_VENDOR_ID_AMAZON, // 0x1d0f
+ ZHAOXIN = bindings::PCI_VENDOR_ID_ZHAOXIN, // 0x1d17
+ ROCKCHIP = bindings::PCI_VENDOR_ID_ROCKCHIP, // 0x1d87
+ HYGON = bindings::PCI_VENDOR_ID_HYGON, // 0x1d94
+ META = bindings::PCI_VENDOR_ID_META, // 0x1d9b
+ FUNGIBLE = bindings::PCI_VENDOR_ID_FUNGIBLE, // 0x1dad
+ HXT = bindings::PCI_VENDOR_ID_HXT, // 0x1dbf
+ TEKRAM = bindings::PCI_VENDOR_ID_TEKRAM, // 0x1de1
+ RPI = bindings::PCI_VENDOR_ID_RPI, // 0x1de4
+ ALIBABA = bindings::PCI_VENDOR_ID_ALIBABA, // 0x1ded
+ CXL = bindings::PCI_VENDOR_ID_CXL, // 0x1e98
+ TEHUTI = bindings::PCI_VENDOR_ID_TEHUTI, // 0x1fc9
+ SUNIX = bindings::PCI_VENDOR_ID_SUNIX, // 0x1fd4
+ HINT = bindings::PCI_VENDOR_ID_HINT, // 0x3388
+ THREEDLABS = bindings::PCI_VENDOR_ID_3DLABS, // 0x3d3d
+ NETXEN = bindings::PCI_VENDOR_ID_NETXEN, // 0x4040
+ AKS = bindings::PCI_VENDOR_ID_AKS, // 0x416c
+ WCHCN = bindings::PCI_VENDOR_ID_WCHCN, // 0x4348
+ ACCESSIO = bindings::PCI_VENDOR_ID_ACCESSIO, // 0x494f
+ S3 = bindings::PCI_VENDOR_ID_S3, // 0x5333
+ DUNORD = bindings::PCI_VENDOR_ID_DUNORD, // 0x5544
+ DCI = bindings::PCI_VENDOR_ID_DCI, // 0x6666
+ GLENFLY = bindings::PCI_VENDOR_ID_GLENFLY, // 0x6766
+ INTEL = bindings::PCI_VENDOR_ID_INTEL, // 0x8086
+ WANGXUN = bindings::PCI_VENDOR_ID_WANGXUN, // 0x8088
+ SCALEMP = bindings::PCI_VENDOR_ID_SCALEMP, // 0x8686
+ COMPUTONE = bindings::PCI_VENDOR_ID_COMPUTONE, // 0x8e0e
+ KTI = bindings::PCI_VENDOR_ID_KTI, // 0x8e2e
+ ADAPTEC = bindings::PCI_VENDOR_ID_ADAPTEC, // 0x9004
+ ADAPTEC2 = bindings::PCI_VENDOR_ID_ADAPTEC2, // 0x9005
+ HOLTEK = bindings::PCI_VENDOR_ID_HOLTEK, // 0x9412
+ NETMOS = bindings::PCI_VENDOR_ID_NETMOS, // 0x9710
+ THREECOM_2 = bindings::PCI_VENDOR_ID_3COM_2, // 0xa727
+ SOLIDRUN = bindings::PCI_VENDOR_ID_SOLIDRUN, // 0xd063
+ DIGIUM = bindings::PCI_VENDOR_ID_DIGIUM, // 0xd161
+ TIGERJET = bindings::PCI_VENDOR_ID_TIGERJET, // 0xe159
+ XILINX_RME = bindings::PCI_VENDOR_ID_XILINX_RME, // 0xea60
+ XEN = bindings::PCI_VENDOR_ID_XEN, // 0x5853
+ OCZ = bindings::PCI_VENDOR_ID_OCZ, // 0x1b85
+ NCUBE = bindings::PCI_VENDOR_ID_NCUBE, // 0x10ff
+}
diff --git a/rust/kernel/pid_namespace.rs b/rust/kernel/pid_namespace.rs
index 0e93808e4639..979a9718f153 100644
--- a/rust/kernel/pid_namespace.rs
+++ b/rust/kernel/pid_namespace.rs
@@ -7,10 +7,7 @@
//! C header: [`include/linux/pid_namespace.h`](srctree/include/linux/pid_namespace.h) and
//! [`include/linux/pid.h`](srctree/include/linux/pid.h)
-use crate::{
- bindings,
- types::{AlwaysRefCounted, Opaque},
-};
+use crate::{bindings, sync::aref::AlwaysRefCounted, types::Opaque};
use core::ptr;
/// Wraps the kernel's `struct pid_namespace`. Thread safe.
diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs
index 8f028c76f9fa..7205fe3416d3 100644
--- a/rust/kernel/platform.rs
+++ b/rust/kernel/platform.rs
@@ -10,6 +10,7 @@ use crate::{
driver,
error::{from_result, to_result, Result},
io::{mem::IoRequest, Resource},
+ irq::{self, IrqRequest},
of,
prelude::*,
types::Opaque,
@@ -284,6 +285,181 @@ impl Device<Bound> {
}
}
+macro_rules! define_irq_accessor_by_index {
+ (
+ $(#[$meta:meta])* $fn_name:ident,
+ $request_fn:ident,
+ $reg_type:ident,
+ $handler_trait:ident
+ ) => {
+ $(#[$meta])*
+ pub fn $fn_name<'a, T: irq::$handler_trait + 'static>(
+ &'a self,
+ flags: irq::Flags,
+ index: u32,
+ name: &'static CStr,
+ handler: impl PinInit<T, Error> + 'a,
+ ) -> Result<impl PinInit<irq::$reg_type<T>, Error> + 'a> {
+ let request = self.$request_fn(index)?;
+
+ Ok(irq::$reg_type::<T>::new(
+ request,
+ flags,
+ name,
+ handler,
+ ))
+ }
+ };
+}
+
+macro_rules! define_irq_accessor_by_name {
+ (
+ $(#[$meta:meta])* $fn_name:ident,
+ $request_fn:ident,
+ $reg_type:ident,
+ $handler_trait:ident
+ ) => {
+ $(#[$meta])*
+ pub fn $fn_name<'a, T: irq::$handler_trait + 'static>(
+ &'a self,
+ flags: irq::Flags,
+ irq_name: &CStr,
+ name: &'static CStr,
+ handler: impl PinInit<T, Error> + 'a,
+ ) -> Result<impl PinInit<irq::$reg_type<T>, Error> + 'a> {
+ let request = self.$request_fn(irq_name)?;
+
+ Ok(irq::$reg_type::<T>::new(
+ request,
+ flags,
+ name,
+ handler,
+ ))
+ }
+ };
+}
+
+impl Device<Bound> {
+ /// Returns an [`IrqRequest`] for the IRQ at the given index, if any.
+ pub fn irq_by_index(&self, index: u32) -> Result<IrqRequest<'_>> {
+ // SAFETY: `self.as_raw` returns a valid pointer to a `struct platform_device`.
+ let irq = unsafe { bindings::platform_get_irq(self.as_raw(), index) };
+
+ if irq < 0 {
+ return Err(Error::from_errno(irq));
+ }
+
+ // SAFETY: `irq` is guaranteed to be a valid IRQ number for `&self`.
+ Ok(unsafe { IrqRequest::new(self.as_ref(), irq as u32) })
+ }
+
+ /// Returns an [`IrqRequest`] for the IRQ at the given index, but does not
+ /// print an error if the IRQ cannot be obtained.
+ pub fn optional_irq_by_index(&self, index: u32) -> Result<IrqRequest<'_>> {
+ // SAFETY: `self.as_raw` returns a valid pointer to a `struct platform_device`.
+ let irq = unsafe { bindings::platform_get_irq_optional(self.as_raw(), index) };
+
+ if irq < 0 {
+ return Err(Error::from_errno(irq));
+ }
+
+ // SAFETY: `irq` is guaranteed to be a valid IRQ number for `&self`.
+ Ok(unsafe { IrqRequest::new(self.as_ref(), irq as u32) })
+ }
+
+ /// Returns an [`IrqRequest`] for the IRQ with the given name, if any.
+ pub fn irq_by_name(&self, name: &CStr) -> Result<IrqRequest<'_>> {
+ // SAFETY: `self.as_raw` returns a valid pointer to a `struct platform_device`.
+ let irq = unsafe { bindings::platform_get_irq_byname(self.as_raw(), name.as_char_ptr()) };
+
+ if irq < 0 {
+ return Err(Error::from_errno(irq));
+ }
+
+ // SAFETY: `irq` is guaranteed to be a valid IRQ number for `&self`.
+ Ok(unsafe { IrqRequest::new(self.as_ref(), irq as u32) })
+ }
+
+ /// Returns an [`IrqRequest`] for the IRQ with the given name, but does not
+ /// print an error if the IRQ cannot be obtained.
+ pub fn optional_irq_by_name(&self, name: &CStr) -> Result<IrqRequest<'_>> {
+ // SAFETY: `self.as_raw` returns a valid pointer to a `struct platform_device`.
+ let irq = unsafe {
+ bindings::platform_get_irq_byname_optional(self.as_raw(), name.as_char_ptr())
+ };
+
+ if irq < 0 {
+ return Err(Error::from_errno(irq));
+ }
+
+ // SAFETY: `irq` is guaranteed to be a valid IRQ number for `&self`.
+ Ok(unsafe { IrqRequest::new(self.as_ref(), irq as u32) })
+ }
+
+ define_irq_accessor_by_index!(
+ /// Returns a [`irq::Registration`] for the IRQ at the given index.
+ request_irq_by_index,
+ irq_by_index,
+ Registration,
+ Handler
+ );
+ define_irq_accessor_by_name!(
+ /// Returns a [`irq::Registration`] for the IRQ with the given name.
+ request_irq_by_name,
+ irq_by_name,
+ Registration,
+ Handler
+ );
+ define_irq_accessor_by_index!(
+ /// Does the same as [`Self::request_irq_by_index`], except that it does
+ /// not print an error message if the IRQ cannot be obtained.
+ request_optional_irq_by_index,
+ optional_irq_by_index,
+ Registration,
+ Handler
+ );
+ define_irq_accessor_by_name!(
+ /// Does the same as [`Self::request_irq_by_name`], except that it does
+ /// not print an error message if the IRQ cannot be obtained.
+ request_optional_irq_by_name,
+ optional_irq_by_name,
+ Registration,
+ Handler
+ );
+
+ define_irq_accessor_by_index!(
+ /// Returns a [`irq::ThreadedRegistration`] for the IRQ at the given index.
+ request_threaded_irq_by_index,
+ irq_by_index,
+ ThreadedRegistration,
+ ThreadedHandler
+ );
+ define_irq_accessor_by_name!(
+ /// Returns a [`irq::ThreadedRegistration`] for the IRQ with the given name.
+ request_threaded_irq_by_name,
+ irq_by_name,
+ ThreadedRegistration,
+ ThreadedHandler
+ );
+ define_irq_accessor_by_index!(
+ /// Does the same as [`Self::request_threaded_irq_by_index`], except
+ /// that it does not print an error message if the IRQ cannot be
+ /// obtained.
+ request_optional_threaded_irq_by_index,
+ optional_irq_by_index,
+ ThreadedRegistration,
+ ThreadedHandler
+ );
+ define_irq_accessor_by_name!(
+ /// Does the same as [`Self::request_threaded_irq_by_name`], except that
+ /// it does not print an error message if the IRQ cannot be obtained.
+ request_optional_threaded_irq_by_name,
+ optional_irq_by_name,
+ ThreadedRegistration,
+ ThreadedHandler
+ );
+}
+
// SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s generic
// argument.
kernel::impl_device_context_deref!(unsafe { Device });
@@ -292,7 +468,7 @@ kernel::impl_device_context_into_aref!(Device);
impl crate::dma::Device for Device<device::Core> {}
// SAFETY: Instances of `Device` are always reference-counted.
-unsafe impl crate::types::AlwaysRefCounted for Device {
+unsafe impl crate::sync::aref::AlwaysRefCounted for Device {
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
unsafe { bindings::get_device(self.as_ref().as_raw()) };
diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs
index 25fe97aafd02..198d09a31449 100644
--- a/rust/kernel/prelude.rs
+++ b/rust/kernel/prelude.rs
@@ -12,7 +12,10 @@
//! ```
#[doc(no_inline)]
-pub use core::pin::Pin;
+pub use core::{
+ mem::{align_of, align_of_val, size_of, size_of_val},
+ pin::Pin,
+};
pub use ::ffi::{
c_char, c_int, c_long, c_longlong, c_schar, c_short, c_uchar, c_uint, c_ulong, c_ulonglong,
diff --git a/rust/kernel/processor.rs b/rust/kernel/processor.rs
new file mode 100644
index 000000000000..85b49b3614dd
--- /dev/null
+++ b/rust/kernel/processor.rs
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Processor related primitives.
+//!
+//! C header: [`include/linux/processor.h`](srctree/include/linux/processor.h)
+
+/// Lower CPU power consumption or yield to a hyperthreaded twin processor.
+///
+/// It also happens to serve as a compiler barrier.
+#[inline]
+pub fn cpu_relax() {
+ // SAFETY: Always safe to call.
+ unsafe { bindings::cpu_relax() }
+}
diff --git a/rust/kernel/ptr.rs b/rust/kernel/ptr.rs
new file mode 100644
index 000000000000..2e5e2a090480
--- /dev/null
+++ b/rust/kernel/ptr.rs
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Types and functions to work with pointers and addresses.
+
+use core::fmt::Debug;
+use core::mem::align_of;
+use core::num::NonZero;
+
+use crate::build_assert;
+
+/// Type representing an alignment, which is always a power of two.
+///
+/// It is used to validate that a given value is a valid alignment, and to perform masking and
+/// alignment operations.
+///
+/// This is a temporary substitute for the [`Alignment`] nightly type from the standard library,
+/// and to be eventually replaced by it.
+///
+/// [`Alignment`]: https://github.com/rust-lang/rust/issues/102070
+///
+/// # Invariants
+///
+/// An alignment is always a power of two.
+#[repr(transparent)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Alignment(NonZero<usize>);
+
+impl Alignment {
+ /// Validates that `ALIGN` is a power of two at build-time, and returns an [`Alignment`] of the
+ /// same value.
+ ///
+ /// A build error is triggered if `ALIGN` is not a power of two.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::ptr::Alignment;
+ ///
+ /// let v = Alignment::new::<16>();
+ /// assert_eq!(v.as_usize(), 16);
+ /// ```
+ #[inline(always)]
+ pub const fn new<const ALIGN: usize>() -> Self {
+ build_assert!(
+ ALIGN.is_power_of_two(),
+ "Provided alignment is not a power of two."
+ );
+
+ // INVARIANT: `align` is a power of two.
+ // SAFETY: `align` is a power of two, and thus non-zero.
+ Self(unsafe { NonZero::new_unchecked(ALIGN) })
+ }
+
+ /// Validates that `align` is a power of two at runtime, and returns an
+ /// [`Alignment`] of the same value.
+ ///
+ /// Returns [`None`] if `align` is not a power of two.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::ptr::Alignment;
+ ///
+ /// assert_eq!(Alignment::new_checked(16), Some(Alignment::new::<16>()));
+ /// assert_eq!(Alignment::new_checked(15), None);
+ /// assert_eq!(Alignment::new_checked(1), Some(Alignment::new::<1>()));
+ /// assert_eq!(Alignment::new_checked(0), None);
+ /// ```
+ #[inline(always)]
+ pub const fn new_checked(align: usize) -> Option<Self> {
+ if align.is_power_of_two() {
+ // INVARIANT: `align` is a power of two.
+ // SAFETY: `align` is a power of two, and thus non-zero.
+ Some(Self(unsafe { NonZero::new_unchecked(align) }))
+ } else {
+ None
+ }
+ }
+
+ /// Returns the alignment of `T`.
+ ///
+ /// This is equivalent to [`align_of`], but with the return value provided as an [`Alignment`].
+ #[inline(always)]
+ pub const fn of<T>() -> Self {
+ #![allow(clippy::incompatible_msrv)]
+ // This cannot panic since alignments are always powers of two.
+ //
+ // We unfortunately cannot use `new` as it would require the `generic_const_exprs` feature.
+ const { Alignment::new_checked(align_of::<T>()).unwrap() }
+ }
+
+ /// Returns this alignment as a [`usize`].
+ ///
+ /// It is guaranteed to be a power of two.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::ptr::Alignment;
+ ///
+ /// assert_eq!(Alignment::new::<16>().as_usize(), 16);
+ /// ```
+ #[inline(always)]
+ pub const fn as_usize(self) -> usize {
+ self.as_nonzero().get()
+ }
+
+ /// Returns this alignment as a [`NonZero`].
+ ///
+ /// It is guaranteed to be a power of two.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::ptr::Alignment;
+ ///
+ /// assert_eq!(Alignment::new::<16>().as_nonzero().get(), 16);
+ /// ```
+ #[inline(always)]
+ pub const fn as_nonzero(self) -> NonZero<usize> {
+ // Allow the compiler to know that the value is indeed a power of two. This can help
+ // optimize some operations down the line, like e.g. replacing divisions by bit shifts.
+ if !self.0.is_power_of_two() {
+ // SAFETY: Per the invariants, `self.0` is always a power of two so this block will
+ // never be reached.
+ unsafe { core::hint::unreachable_unchecked() }
+ }
+ self.0
+ }
+
+ /// Returns the base-2 logarithm of the alignment.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::ptr::Alignment;
+ ///
+ /// assert_eq!(Alignment::of::<u8>().log2(), 0);
+ /// assert_eq!(Alignment::new::<16>().log2(), 4);
+ /// ```
+ #[inline(always)]
+ pub const fn log2(self) -> u32 {
+ self.0.ilog2()
+ }
+
+ /// Returns the mask for this alignment.
+ ///
+ /// This is equivalent to `!(self.as_usize() - 1)`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::ptr::Alignment;
+ ///
+ /// assert_eq!(Alignment::new::<0x10>().mask(), !0xf);
+ /// ```
+ #[inline(always)]
+ pub const fn mask(self) -> usize {
+ // No underflow can occur as the alignment is guaranteed to be a power of two, and thus is
+ // non-zero.
+ !(self.as_usize() - 1)
+ }
+}
+
+/// Trait for items that can be aligned against an [`Alignment`].
+pub trait Alignable: Sized {
+ /// Aligns `self` down to `alignment`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::ptr::{Alignable, Alignment};
+ ///
+ /// assert_eq!(0x2f_usize.align_down(Alignment::new::<0x10>()), 0x20);
+ /// assert_eq!(0x30usize.align_down(Alignment::new::<0x10>()), 0x30);
+ /// assert_eq!(0xf0u8.align_down(Alignment::new::<0x1000>()), 0x0);
+ /// ```
+ fn align_down(self, alignment: Alignment) -> Self;
+
+ /// Aligns `self` up to `alignment`, returning `None` if aligning would result in an overflow.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::ptr::{Alignable, Alignment};
+ ///
+ /// assert_eq!(0x4fusize.align_up(Alignment::new::<0x10>()), Some(0x50));
+ /// assert_eq!(0x40usize.align_up(Alignment::new::<0x10>()), Some(0x40));
+ /// assert_eq!(0x0usize.align_up(Alignment::new::<0x10>()), Some(0x0));
+ /// assert_eq!(u8::MAX.align_up(Alignment::new::<0x10>()), None);
+ /// assert_eq!(0x10u8.align_up(Alignment::new::<0x100>()), None);
+ /// assert_eq!(0x0u8.align_up(Alignment::new::<0x100>()), Some(0x0));
+ /// ```
+ fn align_up(self, alignment: Alignment) -> Option<Self>;
+}
+
+/// Implement [`Alignable`] for unsigned integer types.
+macro_rules! impl_alignable_uint {
+ ($($t:ty),*) => {
+ $(
+ impl Alignable for $t {
+ #[inline(always)]
+ fn align_down(self, alignment: Alignment) -> Self {
+ // The operands of `&` need to be of the same type so convert the alignment to
+ // `Self`. This means we need to compute the mask ourselves.
+ ::core::num::NonZero::<Self>::try_from(alignment.as_nonzero())
+ .map(|align| self & !(align.get() - 1))
+ // An alignment larger than `Self` always aligns down to `0`.
+ .unwrap_or(0)
+ }
+
+ #[inline(always)]
+ fn align_up(self, alignment: Alignment) -> Option<Self> {
+ let aligned_down = self.align_down(alignment);
+ if self == aligned_down {
+ Some(aligned_down)
+ } else {
+ Self::try_from(alignment.as_usize())
+ .ok()
+ .and_then(|align| aligned_down.checked_add(align))
+ }
+ }
+ }
+ )*
+ };
+}
+
+impl_alignable_uint!(u8, u16, u32, u64, usize);
diff --git a/rust/kernel/regulator.rs b/rust/kernel/regulator.rs
index 65f3a125348f..b55a201e5029 100644
--- a/rust/kernel/regulator.rs
+++ b/rust/kernel/regulator.rs
@@ -18,7 +18,7 @@
use crate::{
bindings,
- device::Device,
+ device::{Bound, Device},
error::{from_err_ptr, to_result, Result},
prelude::*,
};
@@ -30,7 +30,6 @@ mod private {
impl Sealed for super::Enabled {}
impl Sealed for super::Disabled {}
- impl Sealed for super::Dynamic {}
}
/// A trait representing the different states a [`Regulator`] can be in.
@@ -50,13 +49,6 @@ pub struct Enabled;
/// own an `enable` reference count, but the regulator may still be on.
pub struct Disabled;
-/// A state that models the C API. The [`Regulator`] can be either enabled or
-/// disabled, and the user is in control of the reference count. This is also
-/// the default state.
-///
-/// Use [`Regulator::is_enabled`] to check the regulator's current state.
-pub struct Dynamic;
-
impl RegulatorState for Enabled {
const DISABLE_ON_DROP: bool = true;
}
@@ -65,14 +57,9 @@ impl RegulatorState for Disabled {
const DISABLE_ON_DROP: bool = false;
}
-impl RegulatorState for Dynamic {
- const DISABLE_ON_DROP: bool = false;
-}
-
/// A trait that abstracts the ability to check if a [`Regulator`] is enabled.
pub trait IsEnabled: RegulatorState {}
impl IsEnabled for Disabled {}
-impl IsEnabled for Dynamic {}
/// An error that can occur when trying to convert a [`Regulator`] between states.
pub struct Error<State: RegulatorState> {
@@ -82,6 +69,41 @@ pub struct Error<State: RegulatorState> {
/// The regulator that caused the error, so that the operation may be retried.
pub regulator: Regulator<State>,
}
+/// Obtains and enables a [`devres`]-managed regulator for a device.
+///
+/// This calls [`regulator_disable()`] and [`regulator_put()`] automatically on
+/// driver detach.
+///
+/// This API is identical to `devm_regulator_get_enable()`, and should be
+/// preferred over the [`Regulator<T: RegulatorState>`] API if the caller only
+/// cares about the regulator being enabled.
+///
+/// [`devres`]: https://docs.kernel.org/driver-api/driver-model/devres.html
+/// [`regulator_disable()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_disable
+/// [`regulator_put()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_put
+pub fn devm_enable(dev: &Device<Bound>, name: &CStr) -> Result {
+ // SAFETY: `dev` is a valid and bound device, while `name` is a valid C
+ // string.
+ to_result(unsafe { bindings::devm_regulator_get_enable(dev.as_raw(), name.as_ptr()) })
+}
+
+/// Same as [`devm_enable`], but calls `devm_regulator_get_enable_optional`
+/// instead.
+///
+/// This obtains and enables a [`devres`]-managed regulator for a device, but
+/// does not print a message nor provides a dummy if the regulator is not found.
+///
+/// This calls [`regulator_disable()`] and [`regulator_put()`] automatically on
+/// driver detach.
+///
+/// [`devres`]: https://docs.kernel.org/driver-api/driver-model/devres.html
+/// [`regulator_disable()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_disable
+/// [`regulator_put()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_put
+pub fn devm_enable_optional(dev: &Device<Bound>, name: &CStr) -> Result {
+ // SAFETY: `dev` is a valid and bound device, while `name` is a valid C
+ // string.
+ to_result(unsafe { bindings::devm_regulator_get_enable_optional(dev.as_raw(), name.as_ptr()) })
+}
/// A `struct regulator` abstraction.
///
@@ -159,6 +181,29 @@ pub struct Error<State: RegulatorState> {
/// }
/// ```
///
+/// If a driver only cares about the regulator being on for as long it is bound
+/// to a device, then it should use [`devm_enable`] or [`devm_enable_optional`].
+/// This should be the default use-case unless more fine-grained control over
+/// the regulator's state is required.
+///
+/// [`devm_enable`]: crate::regulator::devm_enable
+/// [`devm_optional`]: crate::regulator::devm_enable_optional
+///
+/// ```
+/// # use kernel::prelude::*;
+/// # use kernel::c_str;
+/// # use kernel::device::{Bound, Device};
+/// # use kernel::regulator;
+/// fn enable(dev: &Device<Bound>) -> Result {
+/// // Obtain a reference to a (fictitious) regulator and enable it. This
+/// // call only returns whether the operation succeeded.
+/// regulator::devm_enable(dev, c_str!("vcc"))?;
+///
+/// // The regulator will be disabled and put when `dev` is unbound.
+/// Ok(())
+/// }
+/// ```
+///
/// ## Disabling a regulator
///
/// ```
@@ -183,64 +228,13 @@ pub struct Error<State: RegulatorState> {
/// }
/// ```
///
-/// ## Using [`Regulator<Dynamic>`]
-///
-/// This example mimics the behavior of the C API, where the user is in
-/// control of the enabled reference count. This is useful for drivers that
-/// might call enable and disable to manage the `enable` reference count at
-/// runtime, perhaps as a result of `open()` and `close()` calls or whatever
-/// other driver-specific or subsystem-specific hooks.
-///
-/// ```
-/// # use kernel::prelude::*;
-/// # use kernel::c_str;
-/// # use kernel::device::Device;
-/// # use kernel::regulator::{Regulator, Dynamic};
-/// struct PrivateData {
-/// regulator: Regulator<Dynamic>,
-/// }
-///
-/// // A fictictious probe function that obtains a regulator and sets it up.
-/// fn probe(dev: &Device) -> Result<PrivateData> {
-/// // Obtain a reference to a (fictitious) regulator.
-/// let mut regulator = Regulator::<Dynamic>::get(dev, c_str!("vcc"))?;
-///
-/// Ok(PrivateData { regulator })
-/// }
-///
-/// // A fictictious function that indicates that the device is going to be used.
-/// fn open(dev: &Device, data: &mut PrivateData) -> Result {
-/// // Increase the `enabled` reference count.
-/// data.regulator.enable()?;
-///
-/// Ok(())
-/// }
-///
-/// fn close(dev: &Device, data: &mut PrivateData) -> Result {
-/// // Decrease the `enabled` reference count.
-/// data.regulator.disable()?;
-///
-/// Ok(())
-/// }
-///
-/// fn remove(dev: &Device, data: PrivateData) -> Result {
-/// // `PrivateData` is dropped here, which will drop the
-/// // `Regulator<Dynamic>` in turn.
-/// //
-/// // The reference that was obtained by `regulator_get()` will be
-/// // released, but it is up to the user to make sure that the number of calls
-/// // to `enable()` and `disabled()` are balanced before this point.
-/// Ok(())
-/// }
-/// ```
-///
/// # Invariants
///
/// - `inner` is a non-null wrapper over a pointer to a `struct
/// regulator` obtained from [`regulator_get()`].
///
/// [`regulator_get()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_get
-pub struct Regulator<State = Dynamic>
+pub struct Regulator<State>
where
State: RegulatorState,
{
@@ -267,11 +261,8 @@ impl<T: RegulatorState> Regulator<T> {
pub fn get_voltage(&self) -> Result<Voltage> {
// SAFETY: Safe as per the type invariants of `Regulator`.
let voltage = unsafe { bindings::regulator_get_voltage(self.inner.as_ptr()) };
- if voltage < 0 {
- Err(kernel::error::Error::from_errno(voltage))
- } else {
- Ok(Voltage::from_microvolts(voltage))
- }
+
+ to_result(voltage).map(|()| Voltage::from_microvolts(voltage))
}
fn get_internal(dev: &Device, name: &CStr) -> Result<Regulator<T>> {
@@ -289,12 +280,12 @@ impl<T: RegulatorState> Regulator<T> {
})
}
- fn enable_internal(&mut self) -> Result {
+ fn enable_internal(&self) -> Result {
// SAFETY: Safe as per the type invariants of `Regulator`.
to_result(unsafe { bindings::regulator_enable(self.inner.as_ptr()) })
}
- fn disable_internal(&mut self) -> Result {
+ fn disable_internal(&self) -> Result {
// SAFETY: Safe as per the type invariants of `Regulator`.
to_result(unsafe { bindings::regulator_disable(self.inner.as_ptr()) })
}
@@ -310,7 +301,7 @@ impl Regulator<Disabled> {
pub fn try_into_enabled(self) -> Result<Regulator<Enabled>, Error<Disabled>> {
// We will be transferring the ownership of our `regulator_get()` count to
// `Regulator<Enabled>`.
- let mut regulator = ManuallyDrop::new(self);
+ let regulator = ManuallyDrop::new(self);
regulator
.enable_internal()
@@ -339,7 +330,7 @@ impl Regulator<Enabled> {
pub fn try_into_disabled(self) -> Result<Regulator<Disabled>, Error<Enabled>> {
// We will be transferring the ownership of our `regulator_get()` count
// to `Regulator<Disabled>`.
- let mut regulator = ManuallyDrop::new(self);
+ let regulator = ManuallyDrop::new(self);
regulator
.disable_internal()
@@ -354,28 +345,6 @@ impl Regulator<Enabled> {
}
}
-impl Regulator<Dynamic> {
- /// Obtains a [`Regulator`] instance from the system. The current state of
- /// the regulator is unknown and it is up to the user to manage the enabled
- /// reference count.
- ///
- /// This closely mimics the behavior of the C API and can be used to
- /// dynamically manage the enabled reference count at runtime.
- pub fn get(dev: &Device, name: &CStr) -> Result<Self> {
- Regulator::get_internal(dev, name)
- }
-
- /// Increases the `enabled` reference count.
- pub fn enable(&mut self) -> Result {
- self.enable_internal()
- }
-
- /// Decreases the `enabled` reference count.
- pub fn disable(&mut self) -> Result {
- self.disable_internal()
- }
-}
-
impl<T: IsEnabled> Regulator<T> {
/// Checks if the regulator is enabled.
pub fn is_enabled(&self) -> bool {
@@ -398,6 +367,14 @@ impl<T: RegulatorState> Drop for Regulator<T> {
}
}
+// SAFETY: It is safe to send a `Regulator<T>` across threads. In particular, a
+// Regulator<T> can be dropped from any thread.
+unsafe impl<T: RegulatorState> Send for Regulator<T> {}
+
+// SAFETY: It is safe to send a &Regulator<T> across threads because the C side
+// handles its own locking.
+unsafe impl<T: RegulatorState> Sync for Regulator<T> {}
+
/// A voltage.
///
/// This type represents a voltage value in microvolts.
diff --git a/rust/kernel/seq_file.rs b/rust/kernel/seq_file.rs
index 8f199b1a3bb1..59fbfc2473f8 100644
--- a/rust/kernel/seq_file.rs
+++ b/rust/kernel/seq_file.rs
@@ -4,7 +4,7 @@
//!
//! C header: [`include/linux/seq_file.h`](srctree/include/linux/seq_file.h)
-use crate::{bindings, c_str, types::NotThreadSafe, types::Opaque};
+use crate::{bindings, c_str, fmt, types::NotThreadSafe, types::Opaque};
/// A utility for generating the contents of a seq file.
#[repr(transparent)]
@@ -31,7 +31,7 @@ impl SeqFile {
/// Used by the [`seq_print`] macro.
#[inline]
- pub fn call_printf(&self, args: core::fmt::Arguments<'_>) {
+ pub fn call_printf(&self, args: fmt::Arguments<'_>) {
// SAFETY: Passing a void pointer to `Arguments` is valid for `%pA`.
unsafe {
bindings::seq_printf(
@@ -47,7 +47,7 @@ impl SeqFile {
#[macro_export]
macro_rules! seq_print {
($m:expr, $($arg:tt)+) => (
- $m.call_printf(format_args!($($arg)+))
+ $m.call_printf($crate::prelude::fmt!($($arg)+))
);
}
pub use seq_print;
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index 6c892550c0ba..5c74e5f77601 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -2,11 +2,16 @@
//! String representations.
-use crate::alloc::{flags::*, AllocError, KVec};
-use crate::fmt::{self, Write};
-use core::ops::{self, Deref, DerefMut, Index};
-
-use crate::prelude::*;
+use crate::{
+ alloc::{flags::*, AllocError, KVec},
+ error::{to_result, Result},
+ fmt::{self, Write},
+ prelude::*,
+};
+use core::{
+ marker::PhantomData,
+ ops::{self, Deref, DerefMut, Index},
+};
/// Byte string without UTF-8 validity guarantee.
#[repr(transparent)]
@@ -732,7 +737,7 @@ mod tests {
///
/// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos`
/// is less than `end`.
-pub(crate) struct RawFormatter {
+pub struct RawFormatter {
// Use `usize` to use `saturating_*` functions.
beg: usize,
pos: usize,
@@ -790,7 +795,7 @@ impl RawFormatter {
}
/// Returns the number of bytes written to the formatter.
- pub(crate) fn bytes_written(&self) -> usize {
+ pub fn bytes_written(&self) -> usize {
self.pos - self.beg
}
}
@@ -824,9 +829,9 @@ impl fmt::Write for RawFormatter {
/// Allows formatting of [`fmt::Arguments`] into a raw buffer.
///
/// Fails if callers attempt to write more than will fit in the buffer.
-pub(crate) struct Formatter(RawFormatter);
+pub struct Formatter<'a>(RawFormatter, PhantomData<&'a mut ()>);
-impl Formatter {
+impl Formatter<'_> {
/// Creates a new instance of [`Formatter`] with the given buffer.
///
/// # Safety
@@ -835,11 +840,18 @@ impl Formatter {
/// for the lifetime of the returned [`Formatter`].
pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self {
// SAFETY: The safety requirements of this function satisfy those of the callee.
- Self(unsafe { RawFormatter::from_buffer(buf, len) })
+ Self(unsafe { RawFormatter::from_buffer(buf, len) }, PhantomData)
+ }
+
+ /// Create a new [`Self`] instance.
+ pub fn new(buffer: &mut [u8]) -> Self {
+ // SAFETY: `buffer` is valid for writes for the entire length for
+ // the lifetime of `Self`.
+ unsafe { Formatter::from_buffer(buffer.as_mut_ptr(), buffer.len()) }
}
}
-impl Deref for Formatter {
+impl Deref for Formatter<'_> {
type Target = RawFormatter;
fn deref(&self) -> &Self::Target {
@@ -847,7 +859,7 @@ impl Deref for Formatter {
}
}
-impl fmt::Write for Formatter {
+impl fmt::Write for Formatter<'_> {
fn write_str(&mut self, s: &str) -> fmt::Result {
self.0.write_str(s)?;
@@ -860,6 +872,132 @@ impl fmt::Write for Formatter {
}
}
+/// A mutable reference to a byte buffer where a string can be written into.
+///
+/// The buffer will be automatically null terminated after the last written character.
+///
+/// # Invariants
+///
+/// * The first byte of `buffer` is always zero.
+/// * The length of `buffer` is at least 1.
+pub(crate) struct NullTerminatedFormatter<'a> {
+ buffer: &'a mut [u8],
+}
+
+impl<'a> NullTerminatedFormatter<'a> {
+ /// Create a new [`Self`] instance.
+ pub(crate) fn new(buffer: &'a mut [u8]) -> Option<NullTerminatedFormatter<'a>> {
+ *(buffer.first_mut()?) = 0;
+
+ // INVARIANT:
+ // - We wrote zero to the first byte above.
+ // - If buffer was not at least length 1, `buffer.first_mut()` would return None.
+ Some(Self { buffer })
+ }
+}
+
+impl Write for NullTerminatedFormatter<'_> {
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ let bytes = s.as_bytes();
+ let len = bytes.len();
+
+ // We want space for a zero. By type invariant, buffer length is always at least 1, so no
+ // underflow.
+ if len > self.buffer.len() - 1 {
+ return Err(fmt::Error);
+ }
+
+ let buffer = core::mem::take(&mut self.buffer);
+ // We break the zero start invariant for a short while.
+ buffer[..len].copy_from_slice(bytes);
+ // INVARIANT: We checked above that buffer will have size at least 1 after this assignment.
+ self.buffer = &mut buffer[len..];
+
+ // INVARIANT: We write zero to the first byte of the buffer.
+ self.buffer[0] = 0;
+
+ Ok(())
+ }
+}
+
+/// # Safety
+///
+/// - `string` must point to a null terminated string that is valid for read.
+unsafe fn kstrtobool_raw(string: *const u8) -> Result<bool> {
+ let mut result: bool = false;
+
+ // SAFETY:
+ // - By function safety requirement, `string` is a valid null-terminated string.
+ // - `result` is a valid `bool` that we own.
+ to_result(unsafe { bindings::kstrtobool(string, &mut result) })?;
+ Ok(result)
+}
+
+/// Convert common user inputs into boolean values using the kernel's `kstrtobool` function.
+///
+/// This routine returns `Ok(bool)` if the first character is one of 'YyTt1NnFf0', or
+/// \[oO\]\[NnFf\] for "on" and "off". Otherwise it will return `Err(EINVAL)`.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::{c_str, str::kstrtobool};
+///
+/// // Lowercase
+/// assert_eq!(kstrtobool(c_str!("true")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("tr")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("t")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("twrong")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("false")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("f")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("yes")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("no")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("on")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("off")), Ok(false));
+///
+/// // Camel case
+/// assert_eq!(kstrtobool(c_str!("True")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("False")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("Yes")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("No")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("On")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("Off")), Ok(false));
+///
+/// // All caps
+/// assert_eq!(kstrtobool(c_str!("TRUE")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("FALSE")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("YES")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("NO")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("ON")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("OFF")), Ok(false));
+///
+/// // Numeric
+/// assert_eq!(kstrtobool(c_str!("1")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("0")), Ok(false));
+///
+/// // Invalid input
+/// assert_eq!(kstrtobool(c_str!("invalid")), Err(EINVAL));
+/// assert_eq!(kstrtobool(c_str!("2")), Err(EINVAL));
+/// ```
+pub fn kstrtobool(string: &CStr) -> Result<bool> {
+ // SAFETY:
+ // - The pointer returned by `CStr::as_char_ptr` is guaranteed to be
+ // null terminated.
+ // - `string` is live and thus the string is valid for read.
+ unsafe { kstrtobool_raw(string.as_char_ptr()) }
+}
+
+/// Convert `&[u8]` to `bool` by deferring to [`kernel::str::kstrtobool`].
+///
+/// Only considers at most the first two bytes of `bytes`.
+pub fn kstrtobool_bytes(bytes: &[u8]) -> Result<bool> {
+ // `ktostrbool` only considers the first two bytes of the input.
+ let stack_string = [*bytes.first().unwrap_or(&0), *bytes.get(1).unwrap_or(&0), 0];
+ // SAFETY: `stack_string` is null terminated and it is live on the stack so
+ // it is valid for read.
+ unsafe { kstrtobool_raw(stack_string.as_ptr()) }
+}
+
/// An owned string that is guaranteed to have exactly one `NUL` byte, which is at the end.
///
/// Used for interoperability with kernel APIs that take C strings.
diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs
index 00f9b558a3ad..cf5b638a097d 100644
--- a/rust/kernel/sync.rs
+++ b/rust/kernel/sync.rs
@@ -11,12 +11,15 @@ use pin_init;
mod arc;
pub mod aref;
+pub mod atomic;
+pub mod barrier;
pub mod completion;
mod condvar;
pub mod lock;
mod locked_by;
pub mod poll;
pub mod rcu;
+mod refcount;
pub use arc::{Arc, ArcBorrow, UniqueArc};
pub use completion::Completion;
@@ -25,6 +28,7 @@ pub use lock::global::{global_lock, GlobalGuard, GlobalLock, GlobalLockBackend,
pub use lock::mutex::{new_mutex, Mutex, MutexGuard};
pub use lock::spinlock::{new_spinlock, SpinLock, SpinLockGuard};
pub use locked_by::LockedBy;
+pub use refcount::Refcount;
/// Represents a lockdep class. It's a wrapper around C's `lock_class_key`.
#[repr(transparent)]
diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
index 63a66761d0c7..289f77abf415 100644
--- a/rust/kernel/sync/arc.rs
+++ b/rust/kernel/sync/arc.rs
@@ -8,7 +8,7 @@
//! threads.
//!
//! It is different from the standard library's [`Arc`] in a few ways:
-//! 1. It is backed by the kernel's `refcount_t` type.
+//! 1. It is backed by the kernel's [`Refcount`] type.
//! 2. It does not support weak references, which allows it to be half the size.
//! 3. It saturates the reference count instead of aborting when it goes over a threshold.
//! 4. It does not provide a `get_mut` method, so the ref counted object is pinned.
@@ -18,16 +18,16 @@
use crate::{
alloc::{AllocError, Flags, KBox},
- bindings,
ffi::c_void,
+ fmt,
init::InPlaceInit,
+ sync::Refcount,
try_init,
- types::{ForeignOwnable, Opaque},
+ types::ForeignOwnable,
};
use core::{
alloc::Layout,
borrow::{Borrow, BorrowMut},
- fmt,
marker::PhantomData,
mem::{ManuallyDrop, MaybeUninit},
ops::{Deref, DerefMut},
@@ -145,7 +145,7 @@ pub struct Arc<T: ?Sized> {
#[pin_data]
#[repr(C)]
struct ArcInner<T: ?Sized> {
- refcount: Opaque<bindings::refcount_t>,
+ refcount: Refcount,
data: T,
}
@@ -157,7 +157,7 @@ impl<T: ?Sized> ArcInner<T> {
/// `ptr` must have been returned by a previous call to [`Arc::into_raw`], and the `Arc` must
/// not yet have been destroyed.
unsafe fn container_of(ptr: *const T) -> NonNull<ArcInner<T>> {
- let refcount_layout = Layout::new::<bindings::refcount_t>();
+ let refcount_layout = Layout::new::<Refcount>();
// SAFETY: The caller guarantees that the pointer is valid.
let val_layout = Layout::for_value(unsafe { &*ptr });
// SAFETY: We're computing the layout of a real struct that existed when compiling this
@@ -229,8 +229,7 @@ impl<T> Arc<T> {
pub fn new(contents: T, flags: Flags) -> Result<Self, AllocError> {
// INVARIANT: The refcount is initialised to a non-zero value.
let value = ArcInner {
- // SAFETY: There are no safety requirements for this FFI call.
- refcount: Opaque::new(unsafe { bindings::REFCOUNT_INIT(1) }),
+ refcount: Refcount::new(1),
data: contents,
};
@@ -321,7 +320,7 @@ impl<T: ?Sized> Arc<T> {
/// use kernel::sync::{Arc, UniqueArc};
///
/// let arc = Arc::new(42, GFP_KERNEL)?;
- /// let unique_arc = arc.into_unique_or_drop();
+ /// let unique_arc = Arc::into_unique_or_drop(arc);
///
/// // The above conversion should succeed since refcount of `arc` is 1.
/// assert!(unique_arc.is_some());
@@ -337,35 +336,30 @@ impl<T: ?Sized> Arc<T> {
/// let arc = Arc::new(42, GFP_KERNEL)?;
/// let another = arc.clone();
///
- /// let unique_arc = arc.into_unique_or_drop();
+ /// let unique_arc = Arc::into_unique_or_drop(arc);
///
/// // The above conversion should fail since refcount of `arc` is >1.
/// assert!(unique_arc.is_none());
///
/// # Ok::<(), Error>(())
/// ```
- pub fn into_unique_or_drop(self) -> Option<Pin<UniqueArc<T>>> {
+ pub fn into_unique_or_drop(this: Self) -> Option<Pin<UniqueArc<T>>> {
// We will manually manage the refcount in this method, so we disable the destructor.
- let me = ManuallyDrop::new(self);
+ let this = ManuallyDrop::new(this);
// SAFETY: We own a refcount, so the pointer is still valid.
- let refcount = unsafe { me.ptr.as_ref() }.refcount.get();
+ let refcount = unsafe { &this.ptr.as_ref().refcount };
// If the refcount reaches a non-zero value, then we have destroyed this `Arc` and will
// return without further touching the `Arc`. If the refcount reaches zero, then there are
// no other arcs, and we can create a `UniqueArc`.
- //
- // SAFETY: We own a refcount, so the pointer is not dangling.
- let is_zero = unsafe { bindings::refcount_dec_and_test(refcount) };
- if is_zero {
- // SAFETY: We have exclusive access to the arc, so we can perform unsynchronized
- // accesses to the refcount.
- unsafe { core::ptr::write(refcount, bindings::REFCOUNT_INIT(1)) };
+ if refcount.dec_and_test() {
+ refcount.set(1);
// INVARIANT: We own the only refcount to this arc, so we may create a `UniqueArc`. We
// must pin the `UniqueArc` because the values was previously in an `Arc`, and they pin
// their values.
Some(Pin::from(UniqueArc {
- inner: ManuallyDrop::into_inner(me),
+ inner: ManuallyDrop::into_inner(this),
}))
} else {
None
@@ -373,10 +367,10 @@ impl<T: ?Sized> Arc<T> {
}
}
-// SAFETY: The pointer returned by `into_foreign` comes from a well aligned
-// pointer to `ArcInner<T>`.
+// SAFETY: The pointer returned by `into_foreign` was originally allocated as an
+// `KBox<ArcInner<T>>`, so that type is what determines the alignment.
unsafe impl<T: 'static> ForeignOwnable for Arc<T> {
- const FOREIGN_ALIGN: usize = core::mem::align_of::<ArcInner<T>>();
+ const FOREIGN_ALIGN: usize = <KBox<ArcInner<T>> as ForeignOwnable>::FOREIGN_ALIGN;
type Borrowed<'a> = ArcBorrow<'a, T>;
type BorrowedMut<'a> = Self::Borrowed<'a>;
@@ -456,14 +450,10 @@ impl<T: ?Sized> Borrow<T> for Arc<T> {
impl<T: ?Sized> Clone for Arc<T> {
fn clone(&self) -> Self {
- // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is
- // safe to dereference it.
- let refcount = unsafe { self.ptr.as_ref() }.refcount.get();
-
- // INVARIANT: C `refcount_inc` saturates the refcount, so it cannot overflow to zero.
+ // INVARIANT: `Refcount` saturates the refcount, so it cannot overflow to zero.
// SAFETY: By the type invariant, there is necessarily a reference to the object, so it is
// safe to increment the refcount.
- unsafe { bindings::refcount_inc(refcount) };
+ unsafe { self.ptr.as_ref() }.refcount.inc();
// SAFETY: We just incremented the refcount. This increment is now owned by the new `Arc`.
unsafe { Self::from_inner(self.ptr) }
@@ -472,16 +462,10 @@ impl<T: ?Sized> Clone for Arc<T> {
impl<T: ?Sized> Drop for Arc<T> {
fn drop(&mut self) {
- // SAFETY: By the type invariant, there is necessarily a reference to the object. We cannot
- // touch `refcount` after it's decremented to a non-zero value because another thread/CPU
- // may concurrently decrement it to zero and free it. It is ok to have a raw pointer to
- // freed/invalid memory as long as it is never dereferenced.
- let refcount = unsafe { self.ptr.as_ref() }.refcount.get();
-
// INVARIANT: If the refcount reaches zero, there are no other instances of `Arc`, and
// this instance is being dropped, so the broken invariant is not observable.
- // SAFETY: Also by the type invariant, we are allowed to decrement the refcount.
- let is_zero = unsafe { bindings::refcount_dec_and_test(refcount) };
+ // SAFETY: By the type invariant, there is necessarily a reference to the object.
+ let is_zero = unsafe { self.ptr.as_ref() }.refcount.dec_and_test();
if is_zero {
// The count reached zero, we must free the memory.
//
@@ -775,8 +759,7 @@ impl<T> UniqueArc<T> {
// INVARIANT: The refcount is initialised to a non-zero value.
let inner = KBox::try_init::<AllocError>(
try_init!(ArcInner {
- // SAFETY: There are no safety requirements for this FFI call.
- refcount: Opaque::new(unsafe { bindings::REFCOUNT_INIT(1) }),
+ refcount: Refcount::new(1),
data <- pin_init::uninit::<T, AllocError>(),
}? AllocError),
flags,
diff --git a/rust/kernel/sync/aref.rs b/rust/kernel/sync/aref.rs
index dbd77bb68617..0d24a0432015 100644
--- a/rust/kernel/sync/aref.rs
+++ b/rust/kernel/sync/aref.rs
@@ -1,6 +1,21 @@
// SPDX-License-Identifier: GPL-2.0
//! Internal reference counting support.
+//!
+//! Many C types already have their own reference counting mechanism (e.g. by storing a
+//! `refcount_t`). This module provides support for directly using their internal reference count
+//! from Rust; instead of making users have to use an additional Rust-reference count in the form of
+//! [`Arc`].
+//!
+//! The smart pointer [`ARef<T>`] acts similarly to [`Arc<T>`] in that it holds a refcount on the
+//! underlying object, but this refcount is internal to the object. It essentially is a Rust
+//! implementation of the `get_` and `put_` pattern used in C for reference counting.
+//!
+//! To make use of [`ARef<MyType>`], `MyType` needs to implement [`AlwaysRefCounted`]. It is a trait
+//! for accessing the internal reference count of an object of the `MyType` type.
+//!
+//! [`Arc`]: crate::sync::Arc
+//! [`Arc<T>`]: crate::sync::Arc
use core::{marker::PhantomData, mem::ManuallyDrop, ops::Deref, ptr::NonNull};
@@ -97,7 +112,7 @@ impl<T: AlwaysRefCounted> ARef<T> {
///
/// ```
/// use core::ptr::NonNull;
- /// use kernel::types::{ARef, AlwaysRefCounted};
+ /// use kernel::sync::aref::{ARef, AlwaysRefCounted};
///
/// struct Empty {}
///
diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs
new file mode 100644
index 000000000000..016a6bcaf080
--- /dev/null
+++ b/rust/kernel/sync/atomic.rs
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Atomic primitives.
+//!
+//! These primitives have the same semantics as their C counterparts: and the precise definitions of
+//! semantics can be found at [`LKMM`]. Note that Linux Kernel Memory (Consistency) Model is the
+//! only model for Rust code in kernel, and Rust's own atomics should be avoided.
+//!
+//! # Data races
+//!
+//! [`LKMM`] atomics have different rules regarding data races:
+//!
+//! - A normal write from C side is treated as an atomic write if
+//! CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=y.
+//! - Mixed-size atomic accesses don't cause data races.
+//!
+//! [`LKMM`]: srctree/tools/memory-model/
+
+mod internal;
+pub mod ordering;
+mod predefine;
+
+pub use internal::AtomicImpl;
+pub use ordering::{Acquire, Full, Relaxed, Release};
+
+use crate::build_error;
+use internal::{AtomicArithmeticOps, AtomicBasicOps, AtomicExchangeOps, AtomicRepr};
+use ordering::OrderingType;
+
+/// A memory location which can be safely modified from multiple execution contexts.
+///
+/// This has the same size, alignment and bit validity as the underlying type `T`. And it disables
+/// niche optimization for the same reason as [`UnsafeCell`].
+///
+/// The atomic operations are implemented in a way that is fully compatible with the [Linux Kernel
+/// Memory (Consistency) Model][LKMM], hence they should be modeled as the corresponding
+/// [`LKMM`][LKMM] atomic primitives. With the help of [`Atomic::from_ptr()`] and
+/// [`Atomic::as_ptr()`], this provides a way to interact with [C-side atomic operations]
+/// (including those without the `atomic` prefix, e.g. `READ_ONCE()`, `WRITE_ONCE()`,
+/// `smp_load_acquire()` and `smp_store_release()`).
+///
+/// # Invariants
+///
+/// `self.0` is a valid `T`.
+///
+/// [`UnsafeCell`]: core::cell::UnsafeCell
+/// [LKMM]: srctree/tools/memory-model/
+/// [C-side atomic operations]: srctree/Documentation/atomic_t.txt
+#[repr(transparent)]
+pub struct Atomic<T: AtomicType>(AtomicRepr<T::Repr>);
+
+// SAFETY: `Atomic<T>` is safe to share among execution contexts because all accesses are atomic.
+unsafe impl<T: AtomicType> Sync for Atomic<T> {}
+
+/// Types that support basic atomic operations.
+///
+/// # Round-trip transmutability
+///
+/// `T` is round-trip transmutable to `U` if and only if both of these properties hold:
+///
+/// - Any valid bit pattern for `T` is also a valid bit pattern for `U`.
+/// - Transmuting (e.g. using [`transmute()`]) a value of type `T` to `U` and then to `T` again
+/// yields a value that is in all aspects equivalent to the original value.
+///
+/// # Safety
+///
+/// - [`Self`] must have the same size and alignment as [`Self::Repr`].
+/// - [`Self`] must be [round-trip transmutable] to [`Self::Repr`].
+///
+/// Note that this is more relaxed than requiring the bi-directional transmutability (i.e.
+/// [`transmute()`] is always sound between `U` and `T`) because of the support for atomic
+/// variables over unit-only enums, see [Examples].
+///
+/// # Limitations
+///
+/// Because C primitives are used to implement the atomic operations, and a C function requires a
+/// valid object of a type to operate on (i.e. no `MaybeUninit<_>`), hence at the Rust <-> C
+/// surface, only types with all the bits initialized can be passed. As a result, types like `(u8,
+/// u16)` (padding bytes are uninitialized) are currently not supported.
+///
+/// # Examples
+///
+/// A unit-only enum that implements [`AtomicType`]:
+///
+/// ```
+/// use kernel::sync::atomic::{AtomicType, Atomic, Relaxed};
+///
+/// #[derive(Clone, Copy, PartialEq, Eq)]
+/// #[repr(i32)]
+/// enum State {
+/// Uninit = 0,
+/// Working = 1,
+/// Done = 2,
+/// };
+///
+/// // SAFETY: `State` and `i32` has the same size and alignment, and it's round-trip
+/// // transmutable to `i32`.
+/// unsafe impl AtomicType for State {
+/// type Repr = i32;
+/// }
+///
+/// let s = Atomic::new(State::Uninit);
+///
+/// assert_eq!(State::Uninit, s.load(Relaxed));
+/// ```
+/// [`transmute()`]: core::mem::transmute
+/// [round-trip transmutable]: AtomicType#round-trip-transmutability
+/// [Examples]: AtomicType#examples
+pub unsafe trait AtomicType: Sized + Send + Copy {
+ /// The backing atomic implementation type.
+ type Repr: AtomicImpl;
+}
+
+/// Types that support atomic add operations.
+///
+/// # Safety
+///
+// TODO: Properly defines `wrapping_add` in the following comment.
+/// `wrapping_add` any value of type `Self::Repr::Delta` obtained by [`Self::rhs_into_delta()`] to
+/// any value of type `Self::Repr` obtained through transmuting a value of type `Self` to must
+/// yield a value with a bit pattern also valid for `Self`.
+pub unsafe trait AtomicAdd<Rhs = Self>: AtomicType {
+ /// Converts `Rhs` into the `Delta` type of the atomic implementation.
+ fn rhs_into_delta(rhs: Rhs) -> <Self::Repr as AtomicImpl>::Delta;
+}
+
+#[inline(always)]
+const fn into_repr<T: AtomicType>(v: T) -> T::Repr {
+ // SAFETY: Per the safety requirement of `AtomicType`, `T` is round-trip transmutable to
+ // `T::Repr`, therefore the transmute operation is sound.
+ unsafe { core::mem::transmute_copy(&v) }
+}
+
+/// # Safety
+///
+/// `r` must be a valid bit pattern of `T`.
+#[inline(always)]
+const unsafe fn from_repr<T: AtomicType>(r: T::Repr) -> T {
+ // SAFETY: Per the safety requirement of the function, the transmute operation is sound.
+ unsafe { core::mem::transmute_copy(&r) }
+}
+
+impl<T: AtomicType> Atomic<T> {
+ /// Creates a new atomic `T`.
+ pub const fn new(v: T) -> Self {
+ // INVARIANT: Per the safety requirement of `AtomicType`, `into_repr(v)` is a valid `T`.
+ Self(AtomicRepr::new(into_repr(v)))
+ }
+
+ /// Creates a reference to an atomic `T` from a pointer of `T`.
+ ///
+ /// This usually is used when communicating with C side or manipulating a C struct, see
+ /// examples below.
+ ///
+ /// # Safety
+ ///
+ /// - `ptr` is aligned to `align_of::<T>()`.
+ /// - `ptr` is valid for reads and writes for `'a`.
+ /// - For the duration of `'a`, other accesses to `*ptr` must not cause data races (defined
+ /// by [`LKMM`]) against atomic operations on the returned reference. Note that if all other
+ /// accesses are atomic, then this safety requirement is trivially fulfilled.
+ ///
+ /// [`LKMM`]: srctree/tools/memory-model
+ ///
+ /// # Examples
+ ///
+ /// Using [`Atomic::from_ptr()`] combined with [`Atomic::load()`] or [`Atomic::store()`] can
+ /// achieve the same functionality as `READ_ONCE()`/`smp_load_acquire()` or
+ /// `WRITE_ONCE()`/`smp_store_release()` in C side:
+ ///
+ /// ```
+ /// # use kernel::types::Opaque;
+ /// use kernel::sync::atomic::{Atomic, Relaxed, Release};
+ ///
+ /// // Assume there is a C struct `foo`.
+ /// mod cbindings {
+ /// #[repr(C)]
+ /// pub(crate) struct foo {
+ /// pub(crate) a: i32,
+ /// pub(crate) b: i32
+ /// }
+ /// }
+ ///
+ /// let tmp = Opaque::new(cbindings::foo { a: 1, b: 2 });
+ ///
+ /// // struct foo *foo_ptr = ..;
+ /// let foo_ptr = tmp.get();
+ ///
+ /// // SAFETY: `foo_ptr` is valid, and `.a` is in bounds.
+ /// let foo_a_ptr = unsafe { &raw mut (*foo_ptr).a };
+ ///
+ /// // a = READ_ONCE(foo_ptr->a);
+ /// //
+ /// // SAFETY: `foo_a_ptr` is valid for read, and all other accesses on it is atomic, so no
+ /// // data race.
+ /// let a = unsafe { Atomic::from_ptr(foo_a_ptr) }.load(Relaxed);
+ /// # assert_eq!(a, 1);
+ ///
+ /// // smp_store_release(&foo_ptr->a, 2);
+ /// //
+ /// // SAFETY: `foo_a_ptr` is valid for writes, and all other accesses on it is atomic, so
+ /// // no data race.
+ /// unsafe { Atomic::from_ptr(foo_a_ptr) }.store(2, Release);
+ /// ```
+ pub unsafe fn from_ptr<'a>(ptr: *mut T) -> &'a Self
+ where
+ T: Sync,
+ {
+ // CAST: `T` and `Atomic<T>` have the same size, alignment and bit validity.
+ // SAFETY: Per function safety requirement, `ptr` is a valid pointer and the object will
+ // live long enough. It's safe to return a `&Atomic<T>` because function safety requirement
+ // guarantees other accesses won't cause data races.
+ unsafe { &*ptr.cast::<Self>() }
+ }
+
+ /// Returns a pointer to the underlying atomic `T`.
+ ///
+ /// Note that use of the return pointer must not cause data races defined by [`LKMM`].
+ ///
+ /// # Guarantees
+ ///
+ /// The returned pointer is valid and properly aligned (i.e. aligned to [`align_of::<T>()`]).
+ ///
+ /// [`LKMM`]: srctree/tools/memory-model
+ /// [`align_of::<T>()`]: core::mem::align_of
+ pub const fn as_ptr(&self) -> *mut T {
+ // GUARANTEE: Per the function guarantee of `AtomicRepr::as_ptr()`, the `self.0.as_ptr()`
+ // must be a valid and properly aligned pointer for `T::Repr`, and per the safety guarantee
+ // of `AtomicType`, it's a valid and properly aligned pointer of `T`.
+ self.0.as_ptr().cast()
+ }
+
+ /// Returns a mutable reference to the underlying atomic `T`.
+ ///
+ /// This is safe because the mutable reference of the atomic `T` guarantees exclusive access.
+ pub fn get_mut(&mut self) -> &mut T {
+ // CAST: `T` and `T::Repr` has the same size and alignment per the safety requirement of
+ // `AtomicType`, and per the type invariants `self.0` is a valid `T`, therefore the casting
+ // result is a valid pointer of `T`.
+ // SAFETY: The pointer is valid per the CAST comment above, and the mutable reference
+ // guarantees exclusive access.
+ unsafe { &mut *self.0.as_ptr().cast() }
+ }
+}
+
+impl<T: AtomicType> Atomic<T>
+where
+ T::Repr: AtomicBasicOps,
+{
+ /// Loads the value from the atomic `T`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::sync::atomic::{Atomic, Relaxed};
+ ///
+ /// let x = Atomic::new(42i32);
+ ///
+ /// assert_eq!(42, x.load(Relaxed));
+ ///
+ /// let x = Atomic::new(42i64);
+ ///
+ /// assert_eq!(42, x.load(Relaxed));
+ /// ```
+ #[doc(alias("atomic_read", "atomic64_read"))]
+ #[inline(always)]
+ pub fn load<Ordering: ordering::AcquireOrRelaxed>(&self, _: Ordering) -> T {
+ let v = {
+ match Ordering::TYPE {
+ OrderingType::Relaxed => T::Repr::atomic_read(&self.0),
+ OrderingType::Acquire => T::Repr::atomic_read_acquire(&self.0),
+ _ => build_error!("Wrong ordering"),
+ }
+ };
+
+ // SAFETY: `v` comes from reading `self.0`, which is a valid `T` per the type invariants.
+ unsafe { from_repr(v) }
+ }
+
+ /// Stores a value to the atomic `T`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::sync::atomic::{Atomic, Relaxed};
+ ///
+ /// let x = Atomic::new(42i32);
+ ///
+ /// assert_eq!(42, x.load(Relaxed));
+ ///
+ /// x.store(43, Relaxed);
+ ///
+ /// assert_eq!(43, x.load(Relaxed));
+ /// ```
+ #[doc(alias("atomic_set", "atomic64_set"))]
+ #[inline(always)]
+ pub fn store<Ordering: ordering::ReleaseOrRelaxed>(&self, v: T, _: Ordering) {
+ let v = into_repr(v);
+
+ // INVARIANT: `v` is a valid `T`, and is stored to `self.0` by `atomic_set*()`.
+ match Ordering::TYPE {
+ OrderingType::Relaxed => T::Repr::atomic_set(&self.0, v),
+ OrderingType::Release => T::Repr::atomic_set_release(&self.0, v),
+ _ => build_error!("Wrong ordering"),
+ }
+ }
+}
+
+impl<T: AtomicType> Atomic<T>
+where
+ T::Repr: AtomicExchangeOps,
+{
+ /// Atomic exchange.
+ ///
+ /// Atomically updates `*self` to `v` and returns the old value of `*self`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::sync::atomic::{Atomic, Acquire, Relaxed};
+ ///
+ /// let x = Atomic::new(42);
+ ///
+ /// assert_eq!(42, x.xchg(52, Acquire));
+ /// assert_eq!(52, x.load(Relaxed));
+ /// ```
+ #[doc(alias("atomic_xchg", "atomic64_xchg", "swap"))]
+ #[inline(always)]
+ pub fn xchg<Ordering: ordering::Ordering>(&self, v: T, _: Ordering) -> T {
+ let v = into_repr(v);
+
+ // INVARIANT: `self.0` is a valid `T` after `atomic_xchg*()` because `v` is transmutable to
+ // `T`.
+ let ret = {
+ match Ordering::TYPE {
+ OrderingType::Full => T::Repr::atomic_xchg(&self.0, v),
+ OrderingType::Acquire => T::Repr::atomic_xchg_acquire(&self.0, v),
+ OrderingType::Release => T::Repr::atomic_xchg_release(&self.0, v),
+ OrderingType::Relaxed => T::Repr::atomic_xchg_relaxed(&self.0, v),
+ }
+ };
+
+ // SAFETY: `ret` comes from reading `*self`, which is a valid `T` per type invariants.
+ unsafe { from_repr(ret) }
+ }
+
+ /// Atomic compare and exchange.
+ ///
+ /// If `*self` == `old`, atomically updates `*self` to `new`. Otherwise, `*self` is not
+ /// modified.
+ ///
+ /// Compare: The comparison is done via the byte level comparison between `*self` and `old`.
+ ///
+ /// Ordering: When succeeds, provides the corresponding ordering as the `Ordering` type
+ /// parameter indicates, and a failed one doesn't provide any ordering, the load part of a
+ /// failed cmpxchg is a [`Relaxed`] load.
+ ///
+ /// Returns `Ok(value)` if cmpxchg succeeds, and `value` is guaranteed to be equal to `old`,
+ /// otherwise returns `Err(value)`, and `value` is the current value of `*self`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::sync::atomic::{Atomic, Full, Relaxed};
+ ///
+ /// let x = Atomic::new(42);
+ ///
+ /// // Checks whether cmpxchg succeeded.
+ /// let success = x.cmpxchg(52, 64, Relaxed).is_ok();
+ /// # assert!(!success);
+ ///
+ /// // Checks whether cmpxchg failed.
+ /// let failure = x.cmpxchg(52, 64, Relaxed).is_err();
+ /// # assert!(failure);
+ ///
+ /// // Uses the old value if failed, probably re-try cmpxchg.
+ /// match x.cmpxchg(52, 64, Relaxed) {
+ /// Ok(_) => { },
+ /// Err(old) => {
+ /// // do something with `old`.
+ /// # assert_eq!(old, 42);
+ /// }
+ /// }
+ ///
+ /// // Uses the latest value regardlessly, same as atomic_cmpxchg() in C.
+ /// let latest = x.cmpxchg(42, 64, Full).unwrap_or_else(|old| old);
+ /// # assert_eq!(42, latest);
+ /// assert_eq!(64, x.load(Relaxed));
+ /// ```
+ ///
+ /// [`Relaxed`]: ordering::Relaxed
+ #[doc(alias(
+ "atomic_cmpxchg",
+ "atomic64_cmpxchg",
+ "atomic_try_cmpxchg",
+ "atomic64_try_cmpxchg",
+ "compare_exchange"
+ ))]
+ #[inline(always)]
+ pub fn cmpxchg<Ordering: ordering::Ordering>(
+ &self,
+ mut old: T,
+ new: T,
+ o: Ordering,
+ ) -> Result<T, T> {
+ // Note on code generation:
+ //
+ // try_cmpxchg() is used to implement cmpxchg(), and if the helper functions are inlined,
+ // the compiler is able to figure out that branch is not needed if the users don't care
+ // about whether the operation succeeds or not. One exception is on x86, due to commit
+ // 44fe84459faf ("locking/atomic: Fix atomic_try_cmpxchg() semantics"), the
+ // atomic_try_cmpxchg() on x86 has a branch even if the caller doesn't care about the
+ // success of cmpxchg and only wants to use the old value. For example, for code like:
+ //
+ // let latest = x.cmpxchg(42, 64, Full).unwrap_or_else(|old| old);
+ //
+ // It will still generate code:
+ //
+ // movl $0x40, %ecx
+ // movl $0x34, %eax
+ // lock
+ // cmpxchgl %ecx, 0x4(%rsp)
+ // jne 1f
+ // 2:
+ // ...
+ // 1: movl %eax, %ecx
+ // jmp 2b
+ //
+ // This might be "fixed" by introducing a try_cmpxchg_exclusive() that knows the "*old"
+ // location in the C function is always safe to write.
+ if self.try_cmpxchg(&mut old, new, o) {
+ Ok(old)
+ } else {
+ Err(old)
+ }
+ }
+
+ /// Atomic compare and exchange and returns whether the operation succeeds.
+ ///
+ /// If `*self` == `old`, atomically updates `*self` to `new`. Otherwise, `*self` is not
+ /// modified, `*old` is updated to the current value of `*self`.
+ ///
+ /// "Compare" and "Ordering" part are the same as [`Atomic::cmpxchg()`].
+ ///
+ /// Returns `true` means the cmpxchg succeeds otherwise returns `false`.
+ #[inline(always)]
+ fn try_cmpxchg<Ordering: ordering::Ordering>(&self, old: &mut T, new: T, _: Ordering) -> bool {
+ let mut tmp = into_repr(*old);
+ let new = into_repr(new);
+
+ // INVARIANT: `self.0` is a valid `T` after `atomic_try_cmpxchg*()` because `new` is
+ // transmutable to `T`.
+ let ret = {
+ match Ordering::TYPE {
+ OrderingType::Full => T::Repr::atomic_try_cmpxchg(&self.0, &mut tmp, new),
+ OrderingType::Acquire => {
+ T::Repr::atomic_try_cmpxchg_acquire(&self.0, &mut tmp, new)
+ }
+ OrderingType::Release => {
+ T::Repr::atomic_try_cmpxchg_release(&self.0, &mut tmp, new)
+ }
+ OrderingType::Relaxed => {
+ T::Repr::atomic_try_cmpxchg_relaxed(&self.0, &mut tmp, new)
+ }
+ }
+ };
+
+ // SAFETY: `tmp` comes from reading `*self`, which is a valid `T` per type invariants.
+ *old = unsafe { from_repr(tmp) };
+
+ ret
+ }
+}
+
+impl<T: AtomicType> Atomic<T>
+where
+ T::Repr: AtomicArithmeticOps,
+{
+ /// Atomic add.
+ ///
+ /// Atomically updates `*self` to `(*self).wrapping_add(v)`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::sync::atomic::{Atomic, Relaxed};
+ ///
+ /// let x = Atomic::new(42);
+ ///
+ /// assert_eq!(42, x.load(Relaxed));
+ ///
+ /// x.add(12, Relaxed);
+ ///
+ /// assert_eq!(54, x.load(Relaxed));
+ /// ```
+ #[inline(always)]
+ pub fn add<Rhs>(&self, v: Rhs, _: ordering::Relaxed)
+ where
+ T: AtomicAdd<Rhs>,
+ {
+ let v = T::rhs_into_delta(v);
+
+ // INVARIANT: `self.0` is a valid `T` after `atomic_add()` due to safety requirement of
+ // `AtomicAdd`.
+ T::Repr::atomic_add(&self.0, v);
+ }
+
+ /// Atomic fetch and add.
+ ///
+ /// Atomically updates `*self` to `(*self).wrapping_add(v)`, and returns the value of `*self`
+ /// before the update.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::sync::atomic::{Atomic, Acquire, Full, Relaxed};
+ ///
+ /// let x = Atomic::new(42);
+ ///
+ /// assert_eq!(42, x.load(Relaxed));
+ ///
+ /// assert_eq!(54, { x.fetch_add(12, Acquire); x.load(Relaxed) });
+ ///
+ /// let x = Atomic::new(42);
+ ///
+ /// assert_eq!(42, x.load(Relaxed));
+ ///
+ /// assert_eq!(54, { x.fetch_add(12, Full); x.load(Relaxed) } );
+ /// ```
+ #[inline(always)]
+ pub fn fetch_add<Rhs, Ordering: ordering::Ordering>(&self, v: Rhs, _: Ordering) -> T
+ where
+ T: AtomicAdd<Rhs>,
+ {
+ let v = T::rhs_into_delta(v);
+
+ // INVARIANT: `self.0` is a valid `T` after `atomic_fetch_add*()` due to safety requirement
+ // of `AtomicAdd`.
+ let ret = {
+ match Ordering::TYPE {
+ OrderingType::Full => T::Repr::atomic_fetch_add(&self.0, v),
+ OrderingType::Acquire => T::Repr::atomic_fetch_add_acquire(&self.0, v),
+ OrderingType::Release => T::Repr::atomic_fetch_add_release(&self.0, v),
+ OrderingType::Relaxed => T::Repr::atomic_fetch_add_relaxed(&self.0, v),
+ }
+ };
+
+ // SAFETY: `ret` comes from reading `self.0`, which is a valid `T` per type invariants.
+ unsafe { from_repr(ret) }
+ }
+}
diff --git a/rust/kernel/sync/atomic/internal.rs b/rust/kernel/sync/atomic/internal.rs
new file mode 100644
index 000000000000..6fdd8e59f45b
--- /dev/null
+++ b/rust/kernel/sync/atomic/internal.rs
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Atomic internal implementations.
+//!
+//! Provides 1:1 mapping to the C atomic operations.
+
+use crate::bindings;
+use crate::macros::paste;
+use core::cell::UnsafeCell;
+
+mod private {
+ /// Sealed trait marker to disable customized impls on atomic implementation traits.
+ pub trait Sealed {}
+}
+
+// `i32` and `i64` are only supported atomic implementations.
+impl private::Sealed for i32 {}
+impl private::Sealed for i64 {}
+
+/// A marker trait for types that implement atomic operations with C side primitives.
+///
+/// This trait is sealed, and only types that have directly mapping to the C side atomics should
+/// impl this:
+///
+/// - `i32` maps to `atomic_t`.
+/// - `i64` maps to `atomic64_t`.
+pub trait AtomicImpl: Sized + Send + Copy + private::Sealed {
+ /// The type of the delta in arithmetic or logical operations.
+ ///
+ /// For example, in `atomic_add(ptr, v)`, it's the type of `v`. Usually it's the same type of
+ /// [`Self`], but it may be different for the atomic pointer type.
+ type Delta;
+}
+
+// `atomic_t` implements atomic operations on `i32`.
+impl AtomicImpl for i32 {
+ type Delta = Self;
+}
+
+// `atomic64_t` implements atomic operations on `i64`.
+impl AtomicImpl for i64 {
+ type Delta = Self;
+}
+
+/// Atomic representation.
+#[repr(transparent)]
+pub struct AtomicRepr<T: AtomicImpl>(UnsafeCell<T>);
+
+impl<T: AtomicImpl> AtomicRepr<T> {
+ /// Creates a new atomic representation `T`.
+ pub const fn new(v: T) -> Self {
+ Self(UnsafeCell::new(v))
+ }
+
+ /// Returns a pointer to the underlying `T`.
+ ///
+ /// # Guarantees
+ ///
+ /// The returned pointer is valid and properly aligned (i.e. aligned to [`align_of::<T>()`]).
+ pub const fn as_ptr(&self) -> *mut T {
+ // GUARANTEE: `self.0` is an `UnsafeCell<T>`, therefore the pointer returned by `.get()`
+ // must be valid and properly aligned.
+ self.0.get()
+ }
+}
+
+// This macro generates the function signature with given argument list and return type.
+macro_rules! declare_atomic_method {
+ (
+ $(#[doc=$doc:expr])*
+ $func:ident($($arg:ident : $arg_type:ty),*) $(-> $ret:ty)?
+ ) => {
+ paste!(
+ $(#[doc = $doc])*
+ fn [< atomic_ $func >]($($arg: $arg_type,)*) $(-> $ret)?;
+ );
+ };
+ (
+ $(#[doc=$doc:expr])*
+ $func:ident [$variant:ident $($rest:ident)*]($($arg_sig:tt)*) $(-> $ret:ty)?
+ ) => {
+ paste!(
+ declare_atomic_method!(
+ $(#[doc = $doc])*
+ [< $func _ $variant >]($($arg_sig)*) $(-> $ret)?
+ );
+ );
+
+ declare_atomic_method!(
+ $(#[doc = $doc])*
+ $func [$($rest)*]($($arg_sig)*) $(-> $ret)?
+ );
+ };
+ (
+ $(#[doc=$doc:expr])*
+ $func:ident []($($arg_sig:tt)*) $(-> $ret:ty)?
+ ) => {
+ declare_atomic_method!(
+ $(#[doc = $doc])*
+ $func($($arg_sig)*) $(-> $ret)?
+ );
+ }
+}
+
+// This macro generates the function implementation with given argument list and return type, and it
+// will replace "call(...)" expression with "$ctype _ $func" to call the real C function.
+macro_rules! impl_atomic_method {
+ (
+ ($ctype:ident) $func:ident($($arg:ident: $arg_type:ty),*) $(-> $ret:ty)? {
+ $unsafe:tt { call($($c_arg:expr),*) }
+ }
+ ) => {
+ paste!(
+ #[inline(always)]
+ fn [< atomic_ $func >]($($arg: $arg_type,)*) $(-> $ret)? {
+ // TODO: Ideally we want to use the SAFETY comments written at the macro invocation
+ // (e.g. in `declare_and_impl_atomic_methods!()`, however, since SAFETY comments
+ // are just comments, and they are not passed to macros as tokens, therefore we
+ // cannot use them here. One potential improvement is that if we support using
+ // attributes as an alternative for SAFETY comments, then we can use that for macro
+ // generating code.
+ //
+ // SAFETY: specified on macro invocation.
+ $unsafe { bindings::[< $ctype _ $func >]($($c_arg,)*) }
+ }
+ );
+ };
+ (
+ ($ctype:ident) $func:ident[$variant:ident $($rest:ident)*]($($arg_sig:tt)*) $(-> $ret:ty)? {
+ $unsafe:tt { call($($arg:tt)*) }
+ }
+ ) => {
+ paste!(
+ impl_atomic_method!(
+ ($ctype) [< $func _ $variant >]($($arg_sig)*) $( -> $ret)? {
+ $unsafe { call($($arg)*) }
+ }
+ );
+ );
+ impl_atomic_method!(
+ ($ctype) $func [$($rest)*]($($arg_sig)*) $( -> $ret)? {
+ $unsafe { call($($arg)*) }
+ }
+ );
+ };
+ (
+ ($ctype:ident) $func:ident[]($($arg_sig:tt)*) $( -> $ret:ty)? {
+ $unsafe:tt { call($($arg:tt)*) }
+ }
+ ) => {
+ impl_atomic_method!(
+ ($ctype) $func($($arg_sig)*) $(-> $ret)? {
+ $unsafe { call($($arg)*) }
+ }
+ );
+ }
+}
+
+// Delcares $ops trait with methods and implements the trait for `i32` and `i64`.
+macro_rules! declare_and_impl_atomic_methods {
+ ($(#[$attr:meta])* $pub:vis trait $ops:ident {
+ $(
+ $(#[doc=$doc:expr])*
+ fn $func:ident [$($variant:ident),*]($($arg_sig:tt)*) $( -> $ret:ty)? {
+ $unsafe:tt { bindings::#call($($arg:tt)*) }
+ }
+ )*
+ }) => {
+ $(#[$attr])*
+ $pub trait $ops: AtomicImpl {
+ $(
+ declare_atomic_method!(
+ $(#[doc=$doc])*
+ $func[$($variant)*]($($arg_sig)*) $(-> $ret)?
+ );
+ )*
+ }
+
+ impl $ops for i32 {
+ $(
+ impl_atomic_method!(
+ (atomic) $func[$($variant)*]($($arg_sig)*) $(-> $ret)? {
+ $unsafe { call($($arg)*) }
+ }
+ );
+ )*
+ }
+
+ impl $ops for i64 {
+ $(
+ impl_atomic_method!(
+ (atomic64) $func[$($variant)*]($($arg_sig)*) $(-> $ret)? {
+ $unsafe { call($($arg)*) }
+ }
+ );
+ )*
+ }
+ }
+}
+
+declare_and_impl_atomic_methods!(
+ /// Basic atomic operations
+ pub trait AtomicBasicOps {
+ /// Atomic read (load).
+ fn read[acquire](a: &AtomicRepr<Self>) -> Self {
+ // SAFETY: `a.as_ptr()` is valid and properly aligned.
+ unsafe { bindings::#call(a.as_ptr().cast()) }
+ }
+
+ /// Atomic set (store).
+ fn set[release](a: &AtomicRepr<Self>, v: Self) {
+ // SAFETY: `a.as_ptr()` is valid and properly aligned.
+ unsafe { bindings::#call(a.as_ptr().cast(), v) }
+ }
+ }
+);
+
+declare_and_impl_atomic_methods!(
+ /// Exchange and compare-and-exchange atomic operations
+ pub trait AtomicExchangeOps {
+ /// Atomic exchange.
+ ///
+ /// Atomically updates `*a` to `v` and returns the old value.
+ fn xchg[acquire, release, relaxed](a: &AtomicRepr<Self>, v: Self) -> Self {
+ // SAFETY: `a.as_ptr()` is valid and properly aligned.
+ unsafe { bindings::#call(a.as_ptr().cast(), v) }
+ }
+
+ /// Atomic compare and exchange.
+ ///
+ /// If `*a` == `*old`, atomically updates `*a` to `new`. Otherwise, `*a` is not
+ /// modified, `*old` is updated to the current value of `*a`.
+ ///
+ /// Return `true` if the update of `*a` occurred, `false` otherwise.
+ fn try_cmpxchg[acquire, release, relaxed](
+ a: &AtomicRepr<Self>, old: &mut Self, new: Self
+ ) -> bool {
+ // SAFETY: `a.as_ptr()` is valid and properly aligned. `core::ptr::from_mut(old)`
+ // is valid and properly aligned.
+ unsafe { bindings::#call(a.as_ptr().cast(), core::ptr::from_mut(old), new) }
+ }
+ }
+);
+
+declare_and_impl_atomic_methods!(
+ /// Atomic arithmetic operations
+ pub trait AtomicArithmeticOps {
+ /// Atomic add (wrapping).
+ ///
+ /// Atomically updates `*a` to `(*a).wrapping_add(v)`.
+ fn add[](a: &AtomicRepr<Self>, v: Self::Delta) {
+ // SAFETY: `a.as_ptr()` is valid and properly aligned.
+ unsafe { bindings::#call(v, a.as_ptr().cast()) }
+ }
+
+ /// Atomic fetch and add (wrapping).
+ ///
+ /// Atomically updates `*a` to `(*a).wrapping_add(v)`, and returns the value of `*a`
+ /// before the update.
+ fn fetch_add[acquire, release, relaxed](a: &AtomicRepr<Self>, v: Self::Delta) -> Self {
+ // SAFETY: `a.as_ptr()` is valid and properly aligned.
+ unsafe { bindings::#call(v, a.as_ptr().cast()) }
+ }
+ }
+);
diff --git a/rust/kernel/sync/atomic/ordering.rs b/rust/kernel/sync/atomic/ordering.rs
new file mode 100644
index 000000000000..3f103aa8db99
--- /dev/null
+++ b/rust/kernel/sync/atomic/ordering.rs
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Memory orderings.
+//!
+//! The semantics of these orderings follows the [`LKMM`] definitions and rules.
+//!
+//! - [`Acquire`] provides ordering between the load part of the annotated operation and all the
+//! following memory accesses, and if there is a store part, the store part has the [`Relaxed`]
+//! ordering.
+//! - [`Release`] provides ordering between all the preceding memory accesses and the store part of
+//! the annotated operation, and if there is a load part, the load part has the [`Relaxed`]
+//! ordering.
+//! - [`Full`] means "fully-ordered", that is:
+//! - It provides ordering between all the preceding memory accesses and the annotated operation.
+//! - It provides ordering between the annotated operation and all the following memory accesses.
+//! - It provides ordering between all the preceding memory accesses and all the following memory
+//! accesses.
+//! - All the orderings are the same strength as a full memory barrier (i.e. `smp_mb()`).
+//! - [`Relaxed`] provides no ordering except the dependency orderings. Dependency orderings are
+//! described in "DEPENDENCY RELATIONS" in [`LKMM`]'s [`explanation`].
+//!
+//! [`LKMM`]: srctree/tools/memory-model/
+//! [`explanation`]: srctree/tools/memory-model/Documentation/explanation.txt
+
+/// The annotation type for relaxed memory ordering, for the description of relaxed memory
+/// ordering, see [module-level documentation].
+///
+/// [module-level documentation]: crate::sync::atomic::ordering
+pub struct Relaxed;
+
+/// The annotation type for acquire memory ordering, for the description of acquire memory
+/// ordering, see [module-level documentation].
+///
+/// [module-level documentation]: crate::sync::atomic::ordering
+pub struct Acquire;
+
+/// The annotation type for release memory ordering, for the description of release memory
+/// ordering, see [module-level documentation].
+///
+/// [module-level documentation]: crate::sync::atomic::ordering
+pub struct Release;
+
+/// The annotation type for fully-ordered memory ordering, for the description fully-ordered memory
+/// ordering, see [module-level documentation].
+///
+/// [module-level documentation]: crate::sync::atomic::ordering
+pub struct Full;
+
+/// Describes the exact memory ordering.
+#[doc(hidden)]
+pub enum OrderingType {
+ /// Relaxed ordering.
+ Relaxed,
+ /// Acquire ordering.
+ Acquire,
+ /// Release ordering.
+ Release,
+ /// Fully-ordered.
+ Full,
+}
+
+mod internal {
+ /// Sealed trait, can be only implemented inside atomic mod.
+ pub trait Sealed {}
+
+ impl Sealed for super::Relaxed {}
+ impl Sealed for super::Acquire {}
+ impl Sealed for super::Release {}
+ impl Sealed for super::Full {}
+}
+
+/// The trait bound for annotating operations that support any ordering.
+pub trait Ordering: internal::Sealed {
+ /// Describes the exact memory ordering.
+ const TYPE: OrderingType;
+}
+
+impl Ordering for Relaxed {
+ const TYPE: OrderingType = OrderingType::Relaxed;
+}
+
+impl Ordering for Acquire {
+ const TYPE: OrderingType = OrderingType::Acquire;
+}
+
+impl Ordering for Release {
+ const TYPE: OrderingType = OrderingType::Release;
+}
+
+impl Ordering for Full {
+ const TYPE: OrderingType = OrderingType::Full;
+}
+
+/// The trait bound for operations that only support acquire or relaxed ordering.
+pub trait AcquireOrRelaxed: Ordering {}
+
+impl AcquireOrRelaxed for Acquire {}
+impl AcquireOrRelaxed for Relaxed {}
+
+/// The trait bound for operations that only support release or relaxed ordering.
+pub trait ReleaseOrRelaxed: Ordering {}
+
+impl ReleaseOrRelaxed for Release {}
+impl ReleaseOrRelaxed for Relaxed {}
diff --git a/rust/kernel/sync/atomic/predefine.rs b/rust/kernel/sync/atomic/predefine.rs
new file mode 100644
index 000000000000..45a17985cda4
--- /dev/null
+++ b/rust/kernel/sync/atomic/predefine.rs
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Pre-defined atomic types
+
+use crate::static_assert;
+use core::mem::{align_of, size_of};
+
+// SAFETY: `i32` has the same size and alignment with itself, and is round-trip transmutable to
+// itself.
+unsafe impl super::AtomicType for i32 {
+ type Repr = i32;
+}
+
+// SAFETY: The wrapping add result of two `i32`s is a valid `i32`.
+unsafe impl super::AtomicAdd<i32> for i32 {
+ fn rhs_into_delta(rhs: i32) -> i32 {
+ rhs
+ }
+}
+
+// SAFETY: `i64` has the same size and alignment with itself, and is round-trip transmutable to
+// itself.
+unsafe impl super::AtomicType for i64 {
+ type Repr = i64;
+}
+
+// SAFETY: The wrapping add result of two `i64`s is a valid `i64`.
+unsafe impl super::AtomicAdd<i64> for i64 {
+ fn rhs_into_delta(rhs: i64) -> i64 {
+ rhs
+ }
+}
+
+// Defines an internal type that always maps to the integer type which has the same size alignment
+// as `isize` and `usize`, and `isize` and `usize` are always bi-directional transmutable to
+// `isize_atomic_repr`, which also always implements `AtomicImpl`.
+#[allow(non_camel_case_types)]
+#[cfg(not(CONFIG_64BIT))]
+type isize_atomic_repr = i32;
+#[allow(non_camel_case_types)]
+#[cfg(CONFIG_64BIT)]
+type isize_atomic_repr = i64;
+
+// Ensure size and alignment requirements are checked.
+static_assert!(size_of::<isize>() == size_of::<isize_atomic_repr>());
+static_assert!(align_of::<isize>() == align_of::<isize_atomic_repr>());
+static_assert!(size_of::<usize>() == size_of::<isize_atomic_repr>());
+static_assert!(align_of::<usize>() == align_of::<isize_atomic_repr>());
+
+// SAFETY: `isize` has the same size and alignment with `isize_atomic_repr`, and is round-trip
+// transmutable to `isize_atomic_repr`.
+unsafe impl super::AtomicType for isize {
+ type Repr = isize_atomic_repr;
+}
+
+// SAFETY: The wrapping add result of two `isize_atomic_repr`s is a valid `usize`.
+unsafe impl super::AtomicAdd<isize> for isize {
+ fn rhs_into_delta(rhs: isize) -> isize_atomic_repr {
+ rhs as isize_atomic_repr
+ }
+}
+
+// SAFETY: `u32` and `i32` has the same size and alignment, and `u32` is round-trip transmutable to
+// `i32`.
+unsafe impl super::AtomicType for u32 {
+ type Repr = i32;
+}
+
+// SAFETY: The wrapping add result of two `i32`s is a valid `u32`.
+unsafe impl super::AtomicAdd<u32> for u32 {
+ fn rhs_into_delta(rhs: u32) -> i32 {
+ rhs as i32
+ }
+}
+
+// SAFETY: `u64` and `i64` has the same size and alignment, and `u64` is round-trip transmutable to
+// `i64`.
+unsafe impl super::AtomicType for u64 {
+ type Repr = i64;
+}
+
+// SAFETY: The wrapping add result of two `i64`s is a valid `u64`.
+unsafe impl super::AtomicAdd<u64> for u64 {
+ fn rhs_into_delta(rhs: u64) -> i64 {
+ rhs as i64
+ }
+}
+
+// SAFETY: `usize` has the same size and alignment with `isize_atomic_repr`, and is round-trip
+// transmutable to `isize_atomic_repr`.
+unsafe impl super::AtomicType for usize {
+ type Repr = isize_atomic_repr;
+}
+
+// SAFETY: The wrapping add result of two `isize_atomic_repr`s is a valid `usize`.
+unsafe impl super::AtomicAdd<usize> for usize {
+ fn rhs_into_delta(rhs: usize) -> isize_atomic_repr {
+ rhs as isize_atomic_repr
+ }
+}
+
+use crate::macros::kunit_tests;
+
+#[kunit_tests(rust_atomics)]
+mod tests {
+ use super::super::*;
+
+ // Call $fn($val) with each $type of $val.
+ macro_rules! for_each_type {
+ ($val:literal in [$($type:ty),*] $fn:expr) => {
+ $({
+ let v: $type = $val;
+
+ $fn(v);
+ })*
+ }
+ }
+
+ #[test]
+ fn atomic_basic_tests() {
+ for_each_type!(42 in [i32, i64, u32, u64, isize, usize] |v| {
+ let x = Atomic::new(v);
+
+ assert_eq!(v, x.load(Relaxed));
+ });
+ }
+
+ #[test]
+ fn atomic_xchg_tests() {
+ for_each_type!(42 in [i32, i64, u32, u64, isize, usize] |v| {
+ let x = Atomic::new(v);
+
+ let old = v;
+ let new = v + 1;
+
+ assert_eq!(old, x.xchg(new, Full));
+ assert_eq!(new, x.load(Relaxed));
+ });
+ }
+
+ #[test]
+ fn atomic_cmpxchg_tests() {
+ for_each_type!(42 in [i32, i64, u32, u64, isize, usize] |v| {
+ let x = Atomic::new(v);
+
+ let old = v;
+ let new = v + 1;
+
+ assert_eq!(Err(old), x.cmpxchg(new, new, Full));
+ assert_eq!(old, x.load(Relaxed));
+ assert_eq!(Ok(old), x.cmpxchg(old, new, Relaxed));
+ assert_eq!(new, x.load(Relaxed));
+ });
+ }
+
+ #[test]
+ fn atomic_arithmetic_tests() {
+ for_each_type!(42 in [i32, i64, u32, u64, isize, usize] |v| {
+ let x = Atomic::new(v);
+
+ assert_eq!(v, x.fetch_add(12, Full));
+ assert_eq!(v + 12, x.load(Relaxed));
+
+ x.add(13, Relaxed);
+
+ assert_eq!(v + 25, x.load(Relaxed));
+ });
+ }
+}
diff --git a/rust/kernel/sync/barrier.rs b/rust/kernel/sync/barrier.rs
new file mode 100644
index 000000000000..8f2d435fcd94
--- /dev/null
+++ b/rust/kernel/sync/barrier.rs
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Memory barriers.
+//!
+//! These primitives have the same semantics as their C counterparts: and the precise definitions
+//! of semantics can be found at [`LKMM`].
+//!
+//! [`LKMM`]: srctree/tools/memory-model/
+
+/// A compiler barrier.
+///
+/// A barrier that prevents compiler from reordering memory accesses across the barrier.
+#[inline(always)]
+pub(crate) fn barrier() {
+ // By default, Rust inline asms are treated as being able to access any memory or flags, hence
+ // it suffices as a compiler barrier.
+ //
+ // SAFETY: An empty asm block.
+ unsafe { core::arch::asm!("") };
+}
+
+/// A full memory barrier.
+///
+/// A barrier that prevents compiler and CPU from reordering memory accesses across the barrier.
+#[inline(always)]
+pub fn smp_mb() {
+ if cfg!(CONFIG_SMP) {
+ // SAFETY: `smp_mb()` is safe to call.
+ unsafe { bindings::smp_mb() };
+ } else {
+ barrier();
+ }
+}
+
+/// A write-write memory barrier.
+///
+/// A barrier that prevents compiler and CPU from reordering memory write accesses across the
+/// barrier.
+#[inline(always)]
+pub fn smp_wmb() {
+ if cfg!(CONFIG_SMP) {
+ // SAFETY: `smp_wmb()` is safe to call.
+ unsafe { bindings::smp_wmb() };
+ } else {
+ barrier();
+ }
+}
+
+/// A read-read memory barrier.
+///
+/// A barrier that prevents compiler and CPU from reordering memory read accesses across the
+/// barrier.
+#[inline(always)]
+pub fn smp_rmb() {
+ if cfg!(CONFIG_SMP) {
+ // SAFETY: `smp_rmb()` is safe to call.
+ unsafe { bindings::smp_rmb() };
+ } else {
+ barrier();
+ }
+}
diff --git a/rust/kernel/sync/refcount.rs b/rust/kernel/sync/refcount.rs
new file mode 100644
index 000000000000..19236a5bccde
--- /dev/null
+++ b/rust/kernel/sync/refcount.rs
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Atomic reference counting.
+//!
+//! C header: [`include/linux/refcount.h`](srctree/include/linux/refcount.h)
+
+use crate::build_assert;
+use crate::sync::atomic::Atomic;
+use crate::types::Opaque;
+
+/// Atomic reference counter.
+///
+/// This type is conceptually an atomic integer, but provides saturation semantics compared to
+/// normal atomic integers. Values in the negative range when viewed as a signed integer are
+/// saturation (bad) values. For details about the saturation semantics, please refer to top of
+/// [`include/linux/refcount.h`](srctree/include/linux/refcount.h).
+///
+/// Wraps the kernel's C `refcount_t`.
+#[repr(transparent)]
+pub struct Refcount(Opaque<bindings::refcount_t>);
+
+impl Refcount {
+ /// Construct a new [`Refcount`] from an initial value.
+ ///
+ /// The initial value should be non-saturated.
+ #[inline]
+ pub fn new(value: i32) -> Self {
+ build_assert!(value >= 0, "initial value saturated");
+ // SAFETY: There are no safety requirements for this FFI call.
+ Self(Opaque::new(unsafe { bindings::REFCOUNT_INIT(value) }))
+ }
+
+ #[inline]
+ fn as_ptr(&self) -> *mut bindings::refcount_t {
+ self.0.get()
+ }
+
+ /// Get the underlying atomic counter that backs the refcount.
+ ///
+ /// NOTE: Usage of this function is discouraged as it can circumvent the protections offered by
+ /// `refcount.h`. If there is no way to achieve the result using APIs in `refcount.h`, then
+ /// this function can be used. Otherwise consider adding a binding for the required API.
+ #[inline]
+ pub fn as_atomic(&self) -> &Atomic<i32> {
+ let ptr = self.0.get().cast();
+ // SAFETY: `refcount_t` is a transparent wrapper of `atomic_t`, which is an atomic 32-bit
+ // integer that is layout-wise compatible with `Atomic<i32>`. All values are valid for
+ // `refcount_t`, despite some of the values being considered saturated and "bad".
+ unsafe { &*ptr }
+ }
+
+ /// Set a refcount's value.
+ #[inline]
+ pub fn set(&self, value: i32) {
+ // SAFETY: `self.as_ptr()` is valid.
+ unsafe { bindings::refcount_set(self.as_ptr(), value) }
+ }
+
+ /// Increment a refcount.
+ ///
+ /// It will saturate if overflows and `WARN`. It will also `WARN` if the refcount is 0, as this
+ /// represents a possible use-after-free condition.
+ ///
+ /// Provides no memory ordering, it is assumed that caller already has a reference on the
+ /// object.
+ #[inline]
+ pub fn inc(&self) {
+ // SAFETY: self is valid.
+ unsafe { bindings::refcount_inc(self.as_ptr()) }
+ }
+
+ /// Decrement a refcount.
+ ///
+ /// It will `WARN` on underflow and fail to decrement when saturated.
+ ///
+ /// Provides release memory ordering, such that prior loads and stores are done
+ /// before.
+ #[inline]
+ pub fn dec(&self) {
+ // SAFETY: `self.as_ptr()` is valid.
+ unsafe { bindings::refcount_dec(self.as_ptr()) }
+ }
+
+ /// Decrement a refcount and test if it is 0.
+ ///
+ /// It will `WARN` on underflow and fail to decrement when saturated.
+ ///
+ /// Provides release memory ordering, such that prior loads and stores are done
+ /// before, and provides an acquire ordering on success such that memory deallocation
+ /// must come after.
+ ///
+ /// Returns true if the resulting refcount is 0, false otherwise.
+ ///
+ /// # Notes
+ ///
+ /// A common pattern of using `Refcount` is to free memory when the reference count reaches
+ /// zero. This means that the reference to `Refcount` could become invalid after calling this
+ /// function. This is fine as long as the reference to `Refcount` is no longer used when this
+ /// function returns `false`. It is not necessary to use raw pointers in this scenario, see
+ /// <https://github.com/rust-lang/rust/issues/55005>.
+ #[inline]
+ #[must_use = "use `dec` instead if you do not need to test if it is 0"]
+ pub fn dec_and_test(&self) -> bool {
+ // SAFETY: `self.as_ptr()` is valid.
+ unsafe { bindings::refcount_dec_and_test(self.as_ptr()) }
+ }
+}
+
+// SAFETY: `refcount_t` is thread-safe.
+unsafe impl Send for Refcount {}
+
+// SAFETY: `refcount_t` is thread-safe.
+unsafe impl Sync for Refcount {}
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 7d0935bc325c..49fad6de0674 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -9,7 +9,8 @@ use crate::{
ffi::{c_int, c_long, c_uint},
mm::MmWithUser,
pid_namespace::PidNamespace,
- types::{ARef, NotThreadSafe, Opaque},
+ sync::aref::ARef,
+ types::{NotThreadSafe, Opaque},
};
use core::{
cmp::{Eq, PartialEq},
@@ -76,7 +77,7 @@ macro_rules! current {
/// incremented when creating `State` and decremented when it is dropped:
///
/// ```
-/// use kernel::{task::Task, types::ARef};
+/// use kernel::{task::Task, sync::aref::ARef};
///
/// struct State {
/// creator: ARef<Task>,
@@ -347,7 +348,7 @@ impl CurrentTask {
}
// SAFETY: The type invariants guarantee that `Task` is always refcounted.
-unsafe impl crate::types::AlwaysRefCounted for Task {
+unsafe impl crate::sync::aref::AlwaysRefCounted for Task {
#[inline]
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference means that the refcount is nonzero.
diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs
index 64c8dcf548d6..6ea98dfcd027 100644
--- a/rust/kernel/time.rs
+++ b/rust/kernel/time.rs
@@ -25,6 +25,7 @@
//! C header: [`include/linux/ktime.h`](srctree/include/linux/ktime.h).
use core::marker::PhantomData;
+use core::ops;
pub mod delay;
pub mod hrtimer;
@@ -200,9 +201,31 @@ impl<C: ClockSource> Instant<C> {
pub(crate) fn as_nanos(&self) -> i64 {
self.inner
}
+
+ /// Create an [`Instant`] from a `ktime_t` without checking if it is non-negative.
+ ///
+ /// # Panics
+ ///
+ /// On debug builds, this function will panic if `ktime` is not in the range from 0 to
+ /// `KTIME_MAX`.
+ ///
+ /// # Safety
+ ///
+ /// The caller promises that `ktime` is in the range from 0 to `KTIME_MAX`.
+ #[inline]
+ pub(crate) unsafe fn from_ktime(ktime: bindings::ktime_t) -> Self {
+ debug_assert!(ktime >= 0);
+
+ // INVARIANT: Our safety contract ensures that `ktime` is in the range from 0 to
+ // `KTIME_MAX`.
+ Self {
+ inner: ktime,
+ _c: PhantomData,
+ }
+ }
}
-impl<C: ClockSource> core::ops::Sub for Instant<C> {
+impl<C: ClockSource> ops::Sub for Instant<C> {
type Output = Delta;
// By the type invariant, it never overflows.
@@ -214,6 +237,46 @@ impl<C: ClockSource> core::ops::Sub for Instant<C> {
}
}
+impl<T: ClockSource> ops::Add<Delta> for Instant<T> {
+ type Output = Self;
+
+ #[inline]
+ fn add(self, rhs: Delta) -> Self::Output {
+ // INVARIANT: With arithmetic over/underflow checks enabled, this will panic if we overflow
+ // (e.g. go above `KTIME_MAX`)
+ let res = self.inner + rhs.nanos;
+
+ // INVARIANT: With overflow checks enabled, we verify here that the value is >= 0
+ #[cfg(CONFIG_RUST_OVERFLOW_CHECKS)]
+ assert!(res >= 0);
+
+ Self {
+ inner: res,
+ _c: PhantomData,
+ }
+ }
+}
+
+impl<T: ClockSource> ops::Sub<Delta> for Instant<T> {
+ type Output = Self;
+
+ #[inline]
+ fn sub(self, rhs: Delta) -> Self::Output {
+ // INVARIANT: With arithmetic over/underflow checks enabled, this will panic if we overflow
+ // (e.g. go above `KTIME_MAX`)
+ let res = self.inner - rhs.nanos;
+
+ // INVARIANT: With overflow checks enabled, we verify here that the value is >= 0
+ #[cfg(CONFIG_RUST_OVERFLOW_CHECKS)]
+ assert!(res >= 0);
+
+ Self {
+ inner: res,
+ _c: PhantomData,
+ }
+ }
+}
+
/// A span of time.
///
/// This struct represents a span of time, with its value stored as nanoseconds.
@@ -224,6 +287,78 @@ pub struct Delta {
nanos: i64,
}
+impl ops::Add for Delta {
+ type Output = Self;
+
+ #[inline]
+ fn add(self, rhs: Self) -> Self {
+ Self {
+ nanos: self.nanos + rhs.nanos,
+ }
+ }
+}
+
+impl ops::AddAssign for Delta {
+ #[inline]
+ fn add_assign(&mut self, rhs: Self) {
+ self.nanos += rhs.nanos;
+ }
+}
+
+impl ops::Sub for Delta {
+ type Output = Self;
+
+ #[inline]
+ fn sub(self, rhs: Self) -> Self::Output {
+ Self {
+ nanos: self.nanos - rhs.nanos,
+ }
+ }
+}
+
+impl ops::SubAssign for Delta {
+ #[inline]
+ fn sub_assign(&mut self, rhs: Self) {
+ self.nanos -= rhs.nanos;
+ }
+}
+
+impl ops::Mul<i64> for Delta {
+ type Output = Self;
+
+ #[inline]
+ fn mul(self, rhs: i64) -> Self::Output {
+ Self {
+ nanos: self.nanos * rhs,
+ }
+ }
+}
+
+impl ops::MulAssign<i64> for Delta {
+ #[inline]
+ fn mul_assign(&mut self, rhs: i64) {
+ self.nanos *= rhs;
+ }
+}
+
+impl ops::Div for Delta {
+ type Output = i64;
+
+ #[inline]
+ fn div(self, rhs: Self) -> Self::Output {
+ #[cfg(CONFIG_64BIT)]
+ {
+ self.nanos / rhs.nanos
+ }
+
+ #[cfg(not(CONFIG_64BIT))]
+ {
+ // SAFETY: This function is always safe to call regardless of the input values
+ unsafe { bindings::div64_s64(self.nanos, rhs.nanos) }
+ }
+ }
+}
+
impl Delta {
/// A span of time equal to zero.
pub const ZERO: Self = Self { nanos: 0 };
@@ -312,4 +447,30 @@ impl Delta {
bindings::ktime_to_ms(self.as_nanos())
}
}
+
+ /// Return `self % dividend` where `dividend` is in nanoseconds.
+ ///
+ /// The kernel doesn't have any emulation for `s64 % s64` on 32 bit platforms, so this is
+ /// limited to 32 bit dividends.
+ #[inline]
+ pub fn rem_nanos(self, dividend: i32) -> Self {
+ #[cfg(CONFIG_64BIT)]
+ {
+ Self {
+ nanos: self.as_nanos() % i64::from(dividend),
+ }
+ }
+
+ #[cfg(not(CONFIG_64BIT))]
+ {
+ let mut rem = 0;
+
+ // SAFETY: `rem` is in the stack, so we can always provide a valid pointer to it.
+ unsafe { bindings::div_s64_rem(self.as_nanos(), dividend, &mut rem) };
+
+ Self {
+ nanos: i64::from(rem),
+ }
+ }
+ }
}
diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs
index 144e3b57cc78..856d2d929a00 100644
--- a/rust/kernel/time/hrtimer.rs
+++ b/rust/kernel/time/hrtimer.rs
@@ -69,9 +69,14 @@
use super::{ClockSource, Delta, Instant};
use crate::{prelude::*, types::Opaque};
-use core::marker::PhantomData;
+use core::{marker::PhantomData, ptr::NonNull};
use pin_init::PinInit;
+/// A type-alias to refer to the [`Instant<C>`] for a given `T` from [`HrTimer<T>`].
+///
+/// Where `C` is the [`ClockSource`] of the [`HrTimer`].
+pub type HrTimerInstant<T> = Instant<<<T as HasHrTimer<T>>::TimerMode as HrTimerMode>::Clock>;
+
/// A timer backed by a C `struct hrtimer`.
///
/// # Invariants
@@ -163,6 +168,84 @@ impl<T> HrTimer<T> {
// handled on the C side.
unsafe { bindings::hrtimer_cancel(c_timer_ptr) != 0 }
}
+
+ /// Forward the timer expiry for a given timer pointer.
+ ///
+ /// # Safety
+ ///
+ /// - `self_ptr` must point to a valid `Self`.
+ /// - The caller must either have exclusive access to the data pointed at by `self_ptr`, or be
+ /// within the context of the timer callback.
+ #[inline]
+ unsafe fn raw_forward(self_ptr: *mut Self, now: HrTimerInstant<T>, interval: Delta) -> u64
+ where
+ T: HasHrTimer<T>,
+ {
+ // SAFETY:
+ // * The C API requirements for this function are fulfilled by our safety contract.
+ // * `self_ptr` is guaranteed to point to a valid `Self` via our safety contract
+ unsafe {
+ bindings::hrtimer_forward(Self::raw_get(self_ptr), now.as_nanos(), interval.as_nanos())
+ }
+ }
+
+ /// Conditionally forward the timer.
+ ///
+ /// If the timer expires after `now`, this function does nothing and returns 0. If the timer
+ /// expired at or before `now`, this function forwards the timer by `interval` until the timer
+ /// expires after `now` and then returns the number of times the timer was forwarded by
+ /// `interval`.
+ ///
+ /// This function is mainly useful for timer types which can provide exclusive access to the
+ /// timer when the timer is not running. For forwarding the timer from within the timer callback
+ /// context, see [`HrTimerCallbackContext::forward()`].
+ ///
+ /// Returns the number of overruns that occurred as a result of the timer expiry change.
+ pub fn forward(self: Pin<&mut Self>, now: HrTimerInstant<T>, interval: Delta) -> u64
+ where
+ T: HasHrTimer<T>,
+ {
+ // SAFETY: `raw_forward` does not move `Self`
+ let this = unsafe { self.get_unchecked_mut() };
+
+ // SAFETY: By existence of `Pin<&mut Self>`, the pointer passed to `raw_forward` points to a
+ // valid `Self` that we have exclusive access to.
+ unsafe { Self::raw_forward(this, now, interval) }
+ }
+
+ /// Conditionally forward the timer.
+ ///
+ /// This is a variant of [`forward()`](Self::forward) that uses an interval after the current
+ /// time of the base clock for the [`HrTimer`].
+ pub fn forward_now(self: Pin<&mut Self>, interval: Delta) -> u64
+ where
+ T: HasHrTimer<T>,
+ {
+ self.forward(HrTimerInstant::<T>::now(), interval)
+ }
+
+ /// Return the time expiry for this [`HrTimer`].
+ ///
+ /// This value should only be used as a snapshot, as the actual expiry time could change after
+ /// this function is called.
+ pub fn expires(&self) -> HrTimerInstant<T>
+ where
+ T: HasHrTimer<T>,
+ {
+ // SAFETY: `self` is an immutable reference and thus always points to a valid `HrTimer`.
+ let c_timer_ptr = unsafe { HrTimer::raw_get(self) };
+
+ // SAFETY:
+ // - Timers cannot have negative ktime_t values as their expiration time.
+ // - There's no actual locking here, a racy read is fine and expected
+ unsafe {
+ Instant::from_ktime(
+ // This `read_volatile` is intended to correspond to a READ_ONCE call.
+ // FIXME(read_once): Replace with `read_once` when available on the Rust side.
+ core::ptr::read_volatile(&raw const ((*c_timer_ptr).node.expires)),
+ )
+ }
+ }
}
/// Implemented by pointer types that point to structs that contain a [`HrTimer`].
@@ -300,9 +383,13 @@ pub trait HrTimerCallback {
type Pointer<'a>: RawHrTimerCallback;
/// Called by the timer logic when the timer fires.
- fn run(this: <Self::Pointer<'_> as RawHrTimerCallback>::CallbackTarget<'_>) -> HrTimerRestart
+ fn run(
+ this: <Self::Pointer<'_> as RawHrTimerCallback>::CallbackTarget<'_>,
+ ctx: HrTimerCallbackContext<'_, Self>,
+ ) -> HrTimerRestart
where
- Self: Sized;
+ Self: Sized,
+ Self: HasHrTimer<Self>;
}
/// A handle representing a potentially running timer.
@@ -324,6 +411,8 @@ pub unsafe trait HrTimerHandle {
/// Note that the timer might be started by a concurrent start operation. If
/// so, the timer might not be in the **stopped** state when this function
/// returns.
+ ///
+ /// Returns `true` if the timer was running.
fn cancel(&mut self) -> bool;
}
@@ -585,6 +674,63 @@ impl<C: ClockSource> HrTimerMode for RelativePinnedHardMode<C> {
type Expires = Delta;
}
+/// Privileged smart-pointer for a [`HrTimer`] callback context.
+///
+/// Many [`HrTimer`] methods can only be called in two situations:
+///
+/// * When the caller has exclusive access to the `HrTimer` and the `HrTimer` is guaranteed not to
+/// be running.
+/// * From within the context of an `HrTimer`'s callback method.
+///
+/// This type provides access to said methods from within a timer callback context.
+///
+/// # Invariants
+///
+/// * The existence of this type means the caller is currently within the callback for an
+/// [`HrTimer`].
+/// * `self.0` always points to a live instance of [`HrTimer<T>`].
+pub struct HrTimerCallbackContext<'a, T: HasHrTimer<T>>(NonNull<HrTimer<T>>, PhantomData<&'a ()>);
+
+impl<'a, T: HasHrTimer<T>> HrTimerCallbackContext<'a, T> {
+ /// Create a new [`HrTimerCallbackContext`].
+ ///
+ /// # Safety
+ ///
+ /// This function relies on the caller being within the context of a timer callback, so it must
+ /// not be used anywhere except for within implementations of [`RawHrTimerCallback::run`]. The
+ /// caller promises that `timer` points to a valid initialized instance of
+ /// [`bindings::hrtimer`].
+ ///
+ /// The returned `Self` must not outlive the function context of [`RawHrTimerCallback::run`]
+ /// where this function is called.
+ pub(crate) unsafe fn from_raw(timer: *mut HrTimer<T>) -> Self {
+ // SAFETY: The caller guarantees `timer` is a valid pointer to an initialized
+ // `bindings::hrtimer`
+ // INVARIANT: Our safety contract ensures that we're within the context of a timer callback
+ // and that `timer` points to a live instance of `HrTimer<T>`.
+ Self(unsafe { NonNull::new_unchecked(timer) }, PhantomData)
+ }
+
+ /// Conditionally forward the timer.
+ ///
+ /// This function is identical to [`HrTimer::forward()`] except that it may only be used from
+ /// within the context of a [`HrTimer`] callback.
+ pub fn forward(&mut self, now: HrTimerInstant<T>, interval: Delta) -> u64 {
+ // SAFETY:
+ // - We are guaranteed to be within the context of a timer callback by our type invariants
+ // - By our type invariants, `self.0` always points to a valid `HrTimer<T>`
+ unsafe { HrTimer::<T>::raw_forward(self.0.as_ptr(), now, interval) }
+ }
+
+ /// Conditionally forward the timer.
+ ///
+ /// This is a variant of [`HrTimerCallbackContext::forward()`] that uses an interval after the
+ /// current time of the base clock for the [`HrTimer`].
+ pub fn forward_now(&mut self, duration: Delta) -> u64 {
+ self.forward(HrTimerInstant::<T>::now(), duration)
+ }
+}
+
/// Use to implement the [`HasHrTimer<T>`] trait.
///
/// See [`module`] documentation for an example.
diff --git a/rust/kernel/time/hrtimer/arc.rs b/rust/kernel/time/hrtimer/arc.rs
index ed490a7a8950..7be82bcb352a 100644
--- a/rust/kernel/time/hrtimer/arc.rs
+++ b/rust/kernel/time/hrtimer/arc.rs
@@ -3,6 +3,7 @@
use super::HasHrTimer;
use super::HrTimer;
use super::HrTimerCallback;
+use super::HrTimerCallbackContext;
use super::HrTimerHandle;
use super::HrTimerMode;
use super::HrTimerPointer;
@@ -99,6 +100,12 @@ where
// allocation from other `Arc` clones.
let receiver = unsafe { ArcBorrow::from_raw(data_ptr) };
- T::run(receiver).into_c()
+ // SAFETY:
+ // - By C API contract `timer_ptr` is the pointer that we passed when queuing the timer, so
+ // it is a valid pointer to a `HrTimer<T>` embedded in a `T`.
+ // - We are within `RawHrTimerCallback::run`
+ let context = unsafe { HrTimerCallbackContext::from_raw(timer_ptr) };
+
+ T::run(receiver, context).into_c()
}
}
diff --git a/rust/kernel/time/hrtimer/pin.rs b/rust/kernel/time/hrtimer/pin.rs
index aef16d9ee2f0..4d39ef781697 100644
--- a/rust/kernel/time/hrtimer/pin.rs
+++ b/rust/kernel/time/hrtimer/pin.rs
@@ -3,6 +3,7 @@
use super::HasHrTimer;
use super::HrTimer;
use super::HrTimerCallback;
+use super::HrTimerCallbackContext;
use super::HrTimerHandle;
use super::HrTimerMode;
use super::RawHrTimerCallback;
@@ -103,6 +104,12 @@ where
// here.
let receiver_pin = unsafe { Pin::new_unchecked(receiver_ref) };
- T::run(receiver_pin).into_c()
+ // SAFETY:
+ // - By C API contract `timer_ptr` is the pointer that we passed when queuing the timer, so
+ // it is a valid pointer to a `HrTimer<T>` embedded in a `T`.
+ // - We are within `RawHrTimerCallback::run`
+ let context = unsafe { HrTimerCallbackContext::from_raw(timer_ptr) };
+
+ T::run(receiver_pin, context).into_c()
}
}
diff --git a/rust/kernel/time/hrtimer/pin_mut.rs b/rust/kernel/time/hrtimer/pin_mut.rs
index 767d0a4e8a2c..9d9447d4d57e 100644
--- a/rust/kernel/time/hrtimer/pin_mut.rs
+++ b/rust/kernel/time/hrtimer/pin_mut.rs
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
use super::{
- HasHrTimer, HrTimer, HrTimerCallback, HrTimerHandle, HrTimerMode, RawHrTimerCallback,
- UnsafeHrTimerPointer,
+ HasHrTimer, HrTimer, HrTimerCallback, HrTimerCallbackContext, HrTimerHandle, HrTimerMode,
+ RawHrTimerCallback, UnsafeHrTimerPointer,
};
use core::{marker::PhantomData, pin::Pin, ptr::NonNull};
@@ -107,6 +107,12 @@ where
// here.
let receiver_pin = unsafe { Pin::new_unchecked(receiver_ref) };
- T::run(receiver_pin).into_c()
+ // SAFETY:
+ // - By C API contract `timer_ptr` is the pointer that we passed when queuing the timer, so
+ // it is a valid pointer to a `HrTimer<T>` embedded in a `T`.
+ // - We are within `RawHrTimerCallback::run`
+ let context = unsafe { HrTimerCallbackContext::from_raw(timer_ptr) };
+
+ T::run(receiver_pin, context).into_c()
}
}
diff --git a/rust/kernel/time/hrtimer/tbox.rs b/rust/kernel/time/hrtimer/tbox.rs
index ec08303315f2..aa1ee31a7195 100644
--- a/rust/kernel/time/hrtimer/tbox.rs
+++ b/rust/kernel/time/hrtimer/tbox.rs
@@ -3,6 +3,7 @@
use super::HasHrTimer;
use super::HrTimer;
use super::HrTimerCallback;
+use super::HrTimerCallbackContext;
use super::HrTimerHandle;
use super::HrTimerMode;
use super::HrTimerPointer;
@@ -119,6 +120,12 @@ where
// `data_ptr` exist.
let data_mut_ref = unsafe { Pin::new_unchecked(&mut *data_ptr) };
- T::run(data_mut_ref).into_c()
+ // SAFETY:
+ // - By C API contract `timer_ptr` is the pointer that we passed when queuing the timer, so
+ // it is a valid pointer to a `HrTimer<T>` embedded in a `T`.
+ // - We are within `RawHrTimerCallback::run`
+ let context = unsafe { HrTimerCallbackContext::from_raw(timer_ptr) };
+
+ T::run(data_mut_ref, context).into_c()
}
}