summaryrefslogtreecommitdiff
path: root/rust/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'rust/kernel')
-rw-r--r--rust/kernel/alloc.rs54
-rw-r--r--rust/kernel/alloc/allocator.rs157
-rw-r--r--rust/kernel/alloc/allocator/iter.rs102
-rw-r--r--rust/kernel/alloc/kbox.rs44
-rw-r--r--rust/kernel/alloc/kvec.rs55
-rw-r--r--rust/kernel/alloc/layout.rs5
-rw-r--r--rust/kernel/bitmap.rs600
-rw-r--r--rust/kernel/block.rs13
-rw-r--r--rust/kernel/block/mq.rs14
-rw-r--r--rust/kernel/block/mq/gen_disk.rs56
-rw-r--r--rust/kernel/block/mq/operations.rs65
-rw-r--r--rust/kernel/block/mq/raw_writer.rs54
-rw-r--r--rust/kernel/block/mq/request.rs21
-rw-r--r--rust/kernel/configfs.rs2
-rw-r--r--rust/kernel/device_id.rs8
-rw-r--r--rust/kernel/devres.rs6
-rw-r--r--rust/kernel/dma.rs86
-rw-r--r--rust/kernel/drm/device.rs3
-rw-r--r--rust/kernel/drm/driver.rs5
-rw-r--r--rust/kernel/drm/gem/mod.rs96
-rw-r--r--rust/kernel/drm/ioctl.rs11
-rw-r--r--rust/kernel/id_pool.rs226
-rw-r--r--rust/kernel/lib.rs5
-rw-r--r--rust/kernel/maple_tree.rs647
-rw-r--r--rust/kernel/mm.rs3
-rw-r--r--rust/kernel/mm/mmput_async.rs2
-rw-r--r--rust/kernel/net/phy.rs7
-rw-r--r--rust/kernel/page.rs87
-rw-r--r--rust/kernel/scatterlist.rs491
-rw-r--r--rust/kernel/str.rs162
-rw-r--r--rust/kernel/transmute.rs114
-rw-r--r--rust/kernel/workqueue.rs9
32 files changed, 2985 insertions, 225 deletions
diff --git a/rust/kernel/alloc.rs b/rust/kernel/alloc.rs
index 9c154209423c..e38720349dcf 100644
--- a/rust/kernel/alloc.rs
+++ b/rust/kernel/alloc.rs
@@ -21,6 +21,8 @@ pub use self::kvec::Vec;
/// Indicates an allocation error.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct AllocError;
+
+use crate::error::{code::EINVAL, Result};
use core::{alloc::Layout, ptr::NonNull};
/// Flags to be used when allocating memory.
@@ -108,6 +110,31 @@ pub mod flags {
pub const __GFP_NOWARN: Flags = Flags(bindings::__GFP_NOWARN);
}
+/// Non Uniform Memory Access (NUMA) node identifier.
+#[derive(Clone, Copy, PartialEq)]
+pub struct NumaNode(i32);
+
+impl NumaNode {
+ /// Create a new NUMA node identifier (non-negative integer).
+ ///
+ /// Returns [`EINVAL`] if a negative id or an id exceeding [`bindings::MAX_NUMNODES`] is
+ /// specified.
+ pub fn new(node: i32) -> Result<Self> {
+ // MAX_NUMNODES never exceeds 2**10 because NODES_SHIFT is 0..10.
+ if node < 0 || node >= bindings::MAX_NUMNODES as i32 {
+ return Err(EINVAL);
+ }
+ Ok(Self(node))
+ }
+}
+
+/// Specify necessary constant to pass the information to Allocator that the caller doesn't care
+/// about the NUMA node to allocate memory from.
+impl NumaNode {
+ /// No node preference.
+ pub const NO_NODE: NumaNode = NumaNode(bindings::NUMA_NO_NODE);
+}
+
/// The kernel's [`Allocator`] trait.
///
/// An implementation of [`Allocator`] can allocate, re-allocate and free memory buffers described
@@ -138,7 +165,7 @@ pub unsafe trait Allocator {
/// the requested layout has a smaller alignment.
const MIN_ALIGN: usize;
- /// Allocate memory based on `layout` and `flags`.
+ /// Allocate memory based on `layout`, `flags` and `nid`.
///
/// On success, returns a buffer represented as `NonNull<[u8]>` that satisfies the layout
/// constraints (i.e. minimum size and alignment as specified by `layout`).
@@ -154,13 +181,21 @@ pub unsafe trait Allocator {
///
/// Additionally, `Flags` are honored as documented in
/// <https://docs.kernel.org/core-api/mm-api.html#mm-api-gfp-flags>.
- fn alloc(layout: Layout, flags: Flags) -> Result<NonNull<[u8]>, AllocError> {
+ fn alloc(layout: Layout, flags: Flags, nid: NumaNode) -> Result<NonNull<[u8]>, AllocError> {
// SAFETY: Passing `None` to `realloc` is valid by its safety requirements and asks for a
// new memory allocation.
- unsafe { Self::realloc(None, layout, Layout::new::<()>(), flags) }
+ unsafe { Self::realloc(None, layout, Layout::new::<()>(), flags, nid) }
}
- /// Re-allocate an existing memory allocation to satisfy the requested `layout`.
+ /// Re-allocate an existing memory allocation to satisfy the requested `layout` and
+ /// a specific NUMA node request to allocate the memory for.
+ ///
+ /// Systems employing a Non Uniform Memory Access (NUMA) architecture contain collections of
+ /// hardware resources including processors, memory, and I/O buses, that comprise what is
+ /// commonly known as a NUMA node.
+ ///
+ /// `nid` stands for NUMA id, i. e. NUMA node identifier, which is a non-negative integer
+ /// if a node needs to be specified, or [`NumaNode::NO_NODE`] if the caller doesn't care.
///
/// If the requested size is zero, `realloc` behaves equivalent to `free`.
///
@@ -197,6 +232,7 @@ pub unsafe trait Allocator {
layout: Layout,
old_layout: Layout,
flags: Flags,
+ nid: NumaNode,
) -> Result<NonNull<[u8]>, AllocError>;
/// Free an existing memory allocation.
@@ -212,7 +248,15 @@ pub unsafe trait Allocator {
// SAFETY: The caller guarantees that `ptr` points at a valid allocation created by this
// allocator. We are passing a `Layout` with the smallest possible alignment, so it is
// smaller than or equal to the alignment previously used with this allocation.
- let _ = unsafe { Self::realloc(Some(ptr), Layout::new::<()>(), layout, Flags(0)) };
+ let _ = unsafe {
+ Self::realloc(
+ Some(ptr),
+ Layout::new::<()>(),
+ layout,
+ Flags(0),
+ NumaNode::NO_NODE,
+ )
+ };
}
}
diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs
index 869d9fd69527..63bfb91b3671 100644
--- a/rust/kernel/alloc/allocator.rs
+++ b/rust/kernel/alloc/allocator.rs
@@ -13,12 +13,15 @@ use core::alloc::Layout;
use core::ptr;
use core::ptr::NonNull;
-use crate::alloc::{AllocError, Allocator};
+use crate::alloc::{AllocError, Allocator, NumaNode};
use crate::bindings;
-use crate::pr_warn;
+use crate::page;
const ARCH_KMALLOC_MINALIGN: usize = bindings::ARCH_KMALLOC_MINALIGN;
+mod iter;
+pub use self::iter::VmallocPageIter;
+
/// The contiguous kernel allocator.
///
/// `Kmalloc` is typically used for physically contiguous allocations up to page size, but also
@@ -47,20 +50,26 @@ pub struct KVmalloc;
/// # Invariants
///
-/// One of the following: `krealloc`, `vrealloc`, `kvrealloc`.
+/// One of the following: `krealloc_node_align`, `vrealloc_node_align`, `kvrealloc_node_align`.
struct ReallocFunc(
- unsafe extern "C" fn(*const crate::ffi::c_void, usize, u32) -> *mut crate::ffi::c_void,
+ unsafe extern "C" fn(
+ *const crate::ffi::c_void,
+ usize,
+ crate::ffi::c_ulong,
+ u32,
+ crate::ffi::c_int,
+ ) -> *mut crate::ffi::c_void,
);
impl ReallocFunc {
- // INVARIANT: `krealloc` satisfies the type invariants.
- const KREALLOC: Self = Self(bindings::krealloc);
+ // INVARIANT: `krealloc_node_align` satisfies the type invariants.
+ const KREALLOC: Self = Self(bindings::krealloc_node_align);
- // INVARIANT: `vrealloc` satisfies the type invariants.
- const VREALLOC: Self = Self(bindings::vrealloc);
+ // INVARIANT: `vrealloc_node_align` satisfies the type invariants.
+ const VREALLOC: Self = Self(bindings::vrealloc_node_align);
- // INVARIANT: `kvrealloc` satisfies the type invariants.
- const KVREALLOC: Self = Self(bindings::kvrealloc);
+ // INVARIANT: `kvrealloc_node_align` satisfies the type invariants.
+ const KVREALLOC: Self = Self(bindings::kvrealloc_node_align);
/// # Safety
///
@@ -78,6 +87,7 @@ impl ReallocFunc {
layout: Layout,
old_layout: Layout,
flags: Flags,
+ nid: NumaNode,
) -> Result<NonNull<[u8]>, AllocError> {
let size = layout.size();
let ptr = match ptr {
@@ -101,7 +111,7 @@ impl ReallocFunc {
// - Those functions provide the guarantees of this function.
let raw_ptr = unsafe {
// If `size == 0` and `ptr != NULL` the memory behind the pointer is freed.
- self.0(ptr.cast(), size, flags.0).cast()
+ self.0(ptr.cast(), size, layout.align(), flags.0, nid.0).cast()
};
let ptr = if size == 0 {
@@ -138,11 +148,60 @@ unsafe impl Allocator for Kmalloc {
layout: Layout,
old_layout: Layout,
flags: Flags,
+ nid: NumaNode,
) -> Result<NonNull<[u8]>, AllocError> {
let layout = Kmalloc::aligned_layout(layout);
// SAFETY: `ReallocFunc::call` has the same safety requirements as `Allocator::realloc`.
- unsafe { ReallocFunc::KREALLOC.call(ptr, layout, old_layout, flags) }
+ unsafe { ReallocFunc::KREALLOC.call(ptr, layout, old_layout, flags, nid) }
+ }
+}
+
+impl Vmalloc {
+ /// Convert a pointer to a [`Vmalloc`] allocation to a [`page::BorrowedPage`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use core::ptr::{NonNull, from_mut};
+ /// # use kernel::{page, prelude::*};
+ /// use kernel::alloc::allocator::Vmalloc;
+ ///
+ /// let mut vbox = VBox::<[u8; page::PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
+ ///
+ /// {
+ /// // SAFETY: By the type invariant of `Box` the inner pointer of `vbox` is non-null.
+ /// let ptr = unsafe { NonNull::new_unchecked(from_mut(&mut *vbox)) };
+ ///
+ /// // SAFETY:
+ /// // `ptr` is a valid pointer to a `Vmalloc` allocation.
+ /// // `ptr` is valid for the entire lifetime of `page`.
+ /// let page = unsafe { Vmalloc::to_page(ptr.cast()) };
+ ///
+ /// // SAFETY: There is no concurrent read or write to the same page.
+ /// unsafe { page.fill_zero_raw(0, page::PAGE_SIZE)? };
+ /// }
+ /// # Ok::<(), Error>(())
+ /// ```
+ ///
+ /// # Safety
+ ///
+ /// - `ptr` must be a valid pointer to a [`Vmalloc`] allocation.
+ /// - `ptr` must remain valid for the entire duration of `'a`.
+ pub unsafe fn to_page<'a>(ptr: NonNull<u8>) -> page::BorrowedPage<'a> {
+ // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
+ let page = unsafe { bindings::vmalloc_to_page(ptr.as_ptr().cast()) };
+
+ // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid pointer
+ // to `Vmalloc` memory.
+ let page = unsafe { NonNull::new_unchecked(page) };
+
+ // SAFETY:
+ // - `page` is a valid pointer to a `struct page`, given that by the safety requirements of
+ // this function `ptr` is a valid pointer to a `Vmalloc` allocation.
+ // - By the safety requirements of this function `ptr` is valid for the entire lifetime of
+ // `'a`.
+ unsafe { page::BorrowedPage::from_raw(page) }
}
}
@@ -159,16 +218,11 @@ unsafe impl Allocator for Vmalloc {
layout: Layout,
old_layout: Layout,
flags: Flags,
+ nid: NumaNode,
) -> Result<NonNull<[u8]>, AllocError> {
- // TODO: Support alignments larger than PAGE_SIZE.
- if layout.align() > bindings::PAGE_SIZE {
- pr_warn!("Vmalloc does not support alignments larger than PAGE_SIZE yet.\n");
- return Err(AllocError);
- }
-
// SAFETY: If not `None`, `ptr` is guaranteed to point to valid memory, which was previously
// allocated with this `Allocator`.
- unsafe { ReallocFunc::VREALLOC.call(ptr, layout, old_layout, flags) }
+ unsafe { ReallocFunc::VREALLOC.call(ptr, layout, old_layout, flags, nid) }
}
}
@@ -185,19 +239,70 @@ unsafe impl Allocator for KVmalloc {
layout: Layout,
old_layout: Layout,
flags: Flags,
+ nid: NumaNode,
) -> Result<NonNull<[u8]>, AllocError> {
// `KVmalloc` may use the `Kmalloc` backend, hence we have to enforce a `Kmalloc`
// compatible layout.
let layout = Kmalloc::aligned_layout(layout);
- // TODO: Support alignments larger than PAGE_SIZE.
- if layout.align() > bindings::PAGE_SIZE {
- pr_warn!("KVmalloc does not support alignments larger than PAGE_SIZE yet.\n");
- return Err(AllocError);
- }
-
// SAFETY: If not `None`, `ptr` is guaranteed to point to valid memory, which was previously
// allocated with this `Allocator`.
- unsafe { ReallocFunc::KVREALLOC.call(ptr, layout, old_layout, flags) }
+ unsafe { ReallocFunc::KVREALLOC.call(ptr, layout, old_layout, flags, nid) }
+ }
+}
+
+#[macros::kunit_tests(rust_allocator)]
+mod tests {
+ use super::*;
+ use core::mem::MaybeUninit;
+ use kernel::prelude::*;
+
+ #[test]
+ fn test_alignment() -> Result {
+ const TEST_SIZE: usize = 1024;
+ const TEST_LARGE_ALIGN_SIZE: usize = kernel::page::PAGE_SIZE * 4;
+
+ // These two structs are used to test allocating aligned memory.
+ // they don't need to be accessed, so they're marked as dead_code.
+ #[expect(dead_code)]
+ #[repr(align(128))]
+ struct Blob([u8; TEST_SIZE]);
+ #[expect(dead_code)]
+ #[repr(align(8192))]
+ struct LargeAlignBlob([u8; TEST_LARGE_ALIGN_SIZE]);
+
+ struct TestAlign<T, A: Allocator>(Box<MaybeUninit<T>, A>);
+ impl<T, A: Allocator> TestAlign<T, A> {
+ fn new() -> Result<Self> {
+ Ok(Self(Box::<_, A>::new_uninit(GFP_KERNEL)?))
+ }
+
+ fn is_aligned_to(&self, align: usize) -> bool {
+ assert!(align.is_power_of_two());
+
+ let addr = self.0.as_ptr() as usize;
+ addr & (align - 1) == 0
+ }
+ }
+
+ let ta = TestAlign::<Blob, Kmalloc>::new()?;
+ assert!(ta.is_aligned_to(128));
+
+ let ta = TestAlign::<LargeAlignBlob, Kmalloc>::new()?;
+ assert!(ta.is_aligned_to(8192));
+
+ let ta = TestAlign::<Blob, Vmalloc>::new()?;
+ assert!(ta.is_aligned_to(128));
+
+ let ta = TestAlign::<LargeAlignBlob, Vmalloc>::new()?;
+ assert!(ta.is_aligned_to(8192));
+
+ let ta = TestAlign::<Blob, KVmalloc>::new()?;
+ assert!(ta.is_aligned_to(128));
+
+ let ta = TestAlign::<LargeAlignBlob, KVmalloc>::new()?;
+ assert!(ta.is_aligned_to(8192));
+
+ Ok(())
}
}
diff --git a/rust/kernel/alloc/allocator/iter.rs b/rust/kernel/alloc/allocator/iter.rs
new file mode 100644
index 000000000000..5759f86029b7
--- /dev/null
+++ b/rust/kernel/alloc/allocator/iter.rs
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use super::Vmalloc;
+use crate::page;
+use core::marker::PhantomData;
+use core::ptr::NonNull;
+
+/// An [`Iterator`] of [`page::BorrowedPage`] items owned by a [`Vmalloc`] allocation.
+///
+/// # Guarantees
+///
+/// The pages iterated by the [`Iterator`] appear in the order as they are mapped in the CPU's
+/// virtual address space ascendingly.
+///
+/// # Invariants
+///
+/// - `buf` is a valid and [`page::PAGE_SIZE`] aligned pointer into a [`Vmalloc`] allocation.
+/// - `size` is the number of bytes from `buf` until the end of the [`Vmalloc`] allocation `buf`
+/// points to.
+pub struct VmallocPageIter<'a> {
+ /// The base address of the [`Vmalloc`] buffer.
+ buf: NonNull<u8>,
+ /// The size of the buffer pointed to by `buf` in bytes.
+ size: usize,
+ /// The current page index of the [`Iterator`].
+ index: usize,
+ _p: PhantomData<page::BorrowedPage<'a>>,
+}
+
+impl<'a> Iterator for VmallocPageIter<'a> {
+ type Item = page::BorrowedPage<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let offset = self.index.checked_mul(page::PAGE_SIZE)?;
+
+ // Even though `self.size()` may be smaller than `Self::page_count() * page::PAGE_SIZE`, it
+ // is always a number between `(Self::page_count() - 1) * page::PAGE_SIZE` and
+ // `Self::page_count() * page::PAGE_SIZE`, hence the check below is sufficient.
+ if offset < self.size() {
+ self.index += 1;
+ } else {
+ return None;
+ }
+
+ // TODO: Use `NonNull::add()` instead, once the minimum supported compiler version is
+ // bumped to 1.80 or later.
+ //
+ // SAFETY: `offset` is in the interval `[0, (self.page_count() - 1) * page::PAGE_SIZE]`,
+ // hence the resulting pointer is guaranteed to be within the same allocation.
+ let ptr = unsafe { self.buf.as_ptr().add(offset) };
+
+ // SAFETY: `ptr` is guaranteed to be non-null given that it is derived from `self.buf`.
+ let ptr = unsafe { NonNull::new_unchecked(ptr) };
+
+ // SAFETY:
+ // - `ptr` is a valid pointer to a `Vmalloc` allocation.
+ // - `ptr` is valid for the duration of `'a`.
+ Some(unsafe { Vmalloc::to_page(ptr) })
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let remaining = self.page_count().saturating_sub(self.index);
+
+ (remaining, Some(remaining))
+ }
+}
+
+impl<'a> VmallocPageIter<'a> {
+ /// Creates a new [`VmallocPageIter`] instance.
+ ///
+ /// # Safety
+ ///
+ /// - `buf` must be a [`page::PAGE_SIZE`] aligned pointer into a [`Vmalloc`] allocation.
+ /// - `buf` must be valid for at least the lifetime of `'a`.
+ /// - `size` must be the number of bytes from `buf` until the end of the [`Vmalloc`] allocation
+ /// `buf` points to.
+ pub unsafe fn new(buf: NonNull<u8>, size: usize) -> Self {
+ // INVARIANT: By the safety requirements, `buf` is a valid and `page::PAGE_SIZE` aligned
+ // pointer into a [`Vmalloc`] allocation.
+ Self {
+ buf,
+ size,
+ index: 0,
+ _p: PhantomData,
+ }
+ }
+
+ /// Returns the size of the backing [`Vmalloc`] allocation in bytes.
+ ///
+ /// Note that this is the size the [`Vmalloc`] allocation has been allocated with. Hence, this
+ /// number may be smaller than `[`Self::page_count`] * [`page::PAGE_SIZE`]`.
+ #[inline]
+ pub fn size(&self) -> usize {
+ self.size
+ }
+
+ /// Returns the number of pages owned by the backing [`Vmalloc`] allocation.
+ #[inline]
+ pub fn page_count(&self) -> usize {
+ self.size().div_ceil(page::PAGE_SIZE)
+ }
+}
diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs
index 27c4b5a9b61d..622b3529edfc 100644
--- a/rust/kernel/alloc/kbox.rs
+++ b/rust/kernel/alloc/kbox.rs
@@ -3,8 +3,8 @@
//! Implementation of [`Box`].
#[allow(unused_imports)] // Used in doc comments.
-use super::allocator::{KVmalloc, Kmalloc, Vmalloc};
-use super::{AllocError, Allocator, Flags};
+use super::allocator::{KVmalloc, Kmalloc, Vmalloc, VmallocPageIter};
+use super::{AllocError, Allocator, Flags, NumaNode};
use core::alloc::Layout;
use core::borrow::{Borrow, BorrowMut};
use core::marker::PhantomData;
@@ -18,6 +18,7 @@ use core::result::Result;
use crate::ffi::c_void;
use crate::fmt;
use crate::init::InPlaceInit;
+use crate::page::AsPageIter;
use crate::types::ForeignOwnable;
use pin_init::{InPlaceWrite, Init, PinInit, ZeroableOption};
@@ -273,7 +274,7 @@ where
/// ```
pub fn new_uninit(flags: Flags) -> Result<Box<MaybeUninit<T>, A>, AllocError> {
let layout = Layout::new::<MaybeUninit<T>>();
- let ptr = A::alloc(layout, flags)?;
+ let ptr = A::alloc(layout, flags, NumaNode::NO_NODE)?;
// INVARIANT: `ptr` is either a dangling pointer or points to memory allocated with `A`,
// which is sufficient in size and alignment for storing a `T`.
@@ -680,3 +681,40 @@ where
unsafe { A::free(self.0.cast(), layout) };
}
}
+
+/// # Examples
+///
+/// ```
+/// # use kernel::prelude::*;
+/// use kernel::alloc::allocator::VmallocPageIter;
+/// use kernel::page::{AsPageIter, PAGE_SIZE};
+///
+/// let mut vbox = VBox::new((), GFP_KERNEL)?;
+///
+/// assert!(vbox.page_iter().next().is_none());
+///
+/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
+///
+/// let page = vbox.page_iter().next().expect("At least one page should be available.\n");
+///
+/// // SAFETY: There is no concurrent read or write to the same page.
+/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
+/// # Ok::<(), Error>(())
+/// ```
+impl<T> AsPageIter for VBox<T> {
+ type Iter<'a>
+ = VmallocPageIter<'a>
+ where
+ T: 'a;
+
+ fn page_iter(&mut self) -> Self::Iter<'_> {
+ let ptr = self.0.cast();
+ let size = core::mem::size_of::<T>();
+
+ // SAFETY:
+ // - `ptr` is a valid pointer to the beginning of a `Vmalloc` allocation.
+ // - `ptr` is guaranteed to be valid for the lifetime of `'a`.
+ // - `size` is the size of the `Vmalloc` allocation `ptr` points to.
+ unsafe { VmallocPageIter::new(ptr, size) }
+ }
+}
diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs
index dfc101e03f35..e94aebd084c8 100644
--- a/rust/kernel/alloc/kvec.rs
+++ b/rust/kernel/alloc/kvec.rs
@@ -3,11 +3,14 @@
//! Implementation of [`Vec`].
use super::{
- allocator::{KVmalloc, Kmalloc, Vmalloc},
+ allocator::{KVmalloc, Kmalloc, Vmalloc, VmallocPageIter},
layout::ArrayLayout,
- AllocError, Allocator, Box, Flags,
+ AllocError, Allocator, Box, Flags, NumaNode,
+};
+use crate::{
+ fmt,
+ page::AsPageIter,
};
-use crate::fmt;
use core::{
borrow::{Borrow, BorrowMut},
marker::PhantomData,
@@ -644,6 +647,7 @@ where
layout.into(),
self.layout.into(),
flags,
+ NumaNode::NO_NODE,
)?
};
@@ -1027,6 +1031,43 @@ where
}
}
+/// # Examples
+///
+/// ```
+/// # use kernel::prelude::*;
+/// use kernel::alloc::allocator::VmallocPageIter;
+/// use kernel::page::{AsPageIter, PAGE_SIZE};
+///
+/// let mut vec = VVec::<u8>::new();
+///
+/// assert!(vec.page_iter().next().is_none());
+///
+/// vec.reserve(PAGE_SIZE, GFP_KERNEL)?;
+///
+/// let page = vec.page_iter().next().expect("At least one page should be available.\n");
+///
+/// // SAFETY: There is no concurrent read or write to the same page.
+/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
+/// # Ok::<(), Error>(())
+/// ```
+impl<T> AsPageIter for VVec<T> {
+ type Iter<'a>
+ = VmallocPageIter<'a>
+ where
+ T: 'a;
+
+ fn page_iter(&mut self) -> Self::Iter<'_> {
+ let ptr = self.ptr.cast();
+ let size = self.layout.size();
+
+ // SAFETY:
+ // - `ptr` is a valid pointer to the beginning of a `Vmalloc` allocation.
+ // - `ptr` is guaranteed to be valid for the lifetime of `'a`.
+ // - `size` is the size of the `Vmalloc` allocation `ptr` points to.
+ unsafe { VmallocPageIter::new(ptr, size) }
+ }
+}
+
/// An [`Iterator`] implementation for [`Vec`] that moves elements out of a vector.
///
/// This structure is created by the [`Vec::into_iter`] method on [`Vec`] (provided by the
@@ -1121,7 +1162,13 @@ where
// the type invariant to be smaller than `cap`. Depending on `realloc` this operation
// may shrink the buffer or leave it as it is.
ptr = match unsafe {
- A::realloc(Some(buf.cast()), layout.into(), old_layout.into(), flags)
+ A::realloc(
+ Some(buf.cast()),
+ layout.into(),
+ old_layout.into(),
+ flags,
+ NumaNode::NO_NODE,
+ )
} {
// If we fail to shrink, which likely can't even happen, continue with the existing
// buffer.
diff --git a/rust/kernel/alloc/layout.rs b/rust/kernel/alloc/layout.rs
index 52cbf61c4539..9f8be72feb7a 100644
--- a/rust/kernel/alloc/layout.rs
+++ b/rust/kernel/alloc/layout.rs
@@ -98,6 +98,11 @@ impl<T> ArrayLayout<T> {
pub const fn is_empty(&self) -> bool {
self.len == 0
}
+
+ /// Returns the size of the [`ArrayLayout`] in bytes.
+ pub const fn size(&self) -> usize {
+ self.len() * core::mem::size_of::<T>()
+ }
}
impl<T> From<ArrayLayout<T>> for Layout {
diff --git a/rust/kernel/bitmap.rs b/rust/kernel/bitmap.rs
new file mode 100644
index 000000000000..f45915694454
--- /dev/null
+++ b/rust/kernel/bitmap.rs
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! Rust API for bitmap.
+//!
+//! C headers: [`include/linux/bitmap.h`](srctree/include/linux/bitmap.h).
+
+use crate::alloc::{AllocError, Flags};
+use crate::bindings;
+#[cfg(not(CONFIG_RUST_BITMAP_HARDENED))]
+use crate::pr_err;
+use core::ptr::NonNull;
+
+const BITS_PER_LONG: usize = bindings::BITS_PER_LONG as usize;
+
+/// Represents a C bitmap. Wraps underlying C bitmap API.
+///
+/// # Invariants
+///
+/// Must reference a `[c_ulong]` long enough to fit `data.len()` bits.
+#[cfg_attr(CONFIG_64BIT, repr(align(8)))]
+#[cfg_attr(not(CONFIG_64BIT), repr(align(4)))]
+pub struct Bitmap {
+ data: [()],
+}
+
+impl Bitmap {
+ /// Borrows a C bitmap.
+ ///
+ /// # Safety
+ ///
+ /// * `ptr` holds a non-null address of an initialized array of `unsigned long`
+ /// that is large enough to hold `nbits` bits.
+ /// * the array must not be freed for the lifetime of this [`Bitmap`]
+ /// * concurrent access only happens through atomic operations
+ pub unsafe fn from_raw<'a>(ptr: *const usize, nbits: usize) -> &'a Bitmap {
+ let data: *const [()] = core::ptr::slice_from_raw_parts(ptr.cast(), nbits);
+ // INVARIANT: `data` references an initialized array that can hold `nbits` bits.
+ // SAFETY:
+ // The caller guarantees that `data` (derived from `ptr` and `nbits`)
+ // points to a valid, initialized, and appropriately sized memory region
+ // that will not be freed for the lifetime 'a.
+ // We are casting `*const [()]` to `*const Bitmap`. The `Bitmap`
+ // struct is a ZST with a `data: [()]` field. This means its layout
+ // is compatible with a slice of `()`, and effectively it's a "thin pointer"
+ // (its size is 0 and alignment is 1). The `slice_from_raw_parts`
+ // function correctly encodes the length (number of bits, not elements)
+ // into the metadata of the fat pointer. Therefore, dereferencing this
+ // pointer as `&Bitmap` is safe given the caller's guarantees.
+ unsafe { &*(data as *const Bitmap) }
+ }
+
+ /// Borrows a C bitmap exclusively.
+ ///
+ /// # Safety
+ ///
+ /// * `ptr` holds a non-null address of an initialized array of `unsigned long`
+ /// that is large enough to hold `nbits` bits.
+ /// * the array must not be freed for the lifetime of this [`Bitmap`]
+ /// * no concurrent access may happen.
+ pub unsafe fn from_raw_mut<'a>(ptr: *mut usize, nbits: usize) -> &'a mut Bitmap {
+ let data: *mut [()] = core::ptr::slice_from_raw_parts_mut(ptr.cast(), nbits);
+ // INVARIANT: `data` references an initialized array that can hold `nbits` bits.
+ // SAFETY:
+ // The caller guarantees that `data` (derived from `ptr` and `nbits`)
+ // points to a valid, initialized, and appropriately sized memory region
+ // that will not be freed for the lifetime 'a.
+ // Furthermore, the caller guarantees no concurrent access will happen,
+ // which upholds the exclusivity requirement for a mutable reference.
+ // Similar to `from_raw`, casting `*mut [()]` to `*mut Bitmap` is
+ // safe because `Bitmap` is a ZST with a `data: [()]` field,
+ // making its layout compatible with a slice of `()`.
+ unsafe { &mut *(data as *mut Bitmap) }
+ }
+
+ /// Returns a raw pointer to the backing [`Bitmap`].
+ pub fn as_ptr(&self) -> *const usize {
+ core::ptr::from_ref::<Bitmap>(self).cast::<usize>()
+ }
+
+ /// Returns a mutable raw pointer to the backing [`Bitmap`].
+ pub fn as_mut_ptr(&mut self) -> *mut usize {
+ core::ptr::from_mut::<Bitmap>(self).cast::<usize>()
+ }
+
+ /// Returns length of this [`Bitmap`].
+ #[expect(clippy::len_without_is_empty)]
+ pub fn len(&self) -> usize {
+ self.data.len()
+ }
+}
+
+/// Holds either a pointer to array of `unsigned long` or a small bitmap.
+#[repr(C)]
+union BitmapRepr {
+ bitmap: usize,
+ ptr: NonNull<usize>,
+}
+
+macro_rules! bitmap_assert {
+ ($cond:expr, $($arg:tt)+) => {
+ #[cfg(CONFIG_RUST_BITMAP_HARDENED)]
+ assert!($cond, $($arg)*);
+ }
+}
+
+macro_rules! bitmap_assert_return {
+ ($cond:expr, $($arg:tt)+) => {
+ #[cfg(CONFIG_RUST_BITMAP_HARDENED)]
+ assert!($cond, $($arg)*);
+
+ #[cfg(not(CONFIG_RUST_BITMAP_HARDENED))]
+ if !($cond) {
+ pr_err!($($arg)*);
+ return
+ }
+ }
+}
+
+/// Represents an owned bitmap.
+///
+/// Wraps underlying C bitmap API. See [`Bitmap`] for available
+/// methods.
+///
+/// # Examples
+///
+/// Basic usage
+///
+/// ```
+/// use kernel::alloc::flags::GFP_KERNEL;
+/// use kernel::bitmap::BitmapVec;
+///
+/// let mut b = BitmapVec::new(16, GFP_KERNEL)?;
+///
+/// assert_eq!(16, b.len());
+/// for i in 0..16 {
+/// if i % 4 == 0 {
+/// b.set_bit(i);
+/// }
+/// }
+/// assert_eq!(Some(0), b.next_bit(0));
+/// assert_eq!(Some(1), b.next_zero_bit(0));
+/// assert_eq!(Some(4), b.next_bit(1));
+/// assert_eq!(Some(5), b.next_zero_bit(4));
+/// assert_eq!(Some(12), b.last_bit());
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// # Invariants
+///
+/// * `nbits` is `<= i32::MAX` and never changes.
+/// * if `nbits <= bindings::BITS_PER_LONG`, then `repr` is a `usize`.
+/// * otherwise, `repr` holds a non-null pointer to an initialized
+/// array of `unsigned long` that is large enough to hold `nbits` bits.
+pub struct BitmapVec {
+ /// Representation of bitmap.
+ repr: BitmapRepr,
+ /// Length of this bitmap. Must be `<= i32::MAX`.
+ nbits: usize,
+}
+
+impl core::ops::Deref for BitmapVec {
+ type Target = Bitmap;
+
+ fn deref(&self) -> &Bitmap {
+ let ptr = if self.nbits <= BITS_PER_LONG {
+ // SAFETY: Bitmap is represented inline.
+ unsafe { core::ptr::addr_of!(self.repr.bitmap) }
+ } else {
+ // SAFETY: Bitmap is represented as array of `unsigned long`.
+ unsafe { self.repr.ptr.as_ptr() }
+ };
+
+ // SAFETY: We got the right pointer and invariants of [`Bitmap`] hold.
+ // An inline bitmap is treated like an array with single element.
+ unsafe { Bitmap::from_raw(ptr, self.nbits) }
+ }
+}
+
+impl core::ops::DerefMut for BitmapVec {
+ fn deref_mut(&mut self) -> &mut Bitmap {
+ let ptr = if self.nbits <= BITS_PER_LONG {
+ // SAFETY: Bitmap is represented inline.
+ unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) }
+ } else {
+ // SAFETY: Bitmap is represented as array of `unsigned long`.
+ unsafe { self.repr.ptr.as_ptr() }
+ };
+
+ // SAFETY: We got the right pointer and invariants of [`BitmapVec`] hold.
+ // An inline bitmap is treated like an array with single element.
+ unsafe { Bitmap::from_raw_mut(ptr, self.nbits) }
+ }
+}
+
+/// Enable ownership transfer to other threads.
+///
+/// SAFETY: We own the underlying bitmap representation.
+unsafe impl Send for BitmapVec {}
+
+/// Enable unsynchronized concurrent access to [`BitmapVec`] through shared references.
+///
+/// SAFETY: `deref()` will return a reference to a [`Bitmap`]. Its methods
+/// take immutable references are either atomic or read-only.
+unsafe impl Sync for BitmapVec {}
+
+impl Drop for BitmapVec {
+ fn drop(&mut self) {
+ if self.nbits <= BITS_PER_LONG {
+ return;
+ }
+ // SAFETY: `self.ptr` was returned by the C `bitmap_zalloc`.
+ //
+ // INVARIANT: there is no other use of the `self.ptr` after this
+ // call and the value is being dropped so the broken invariant is
+ // not observable on function exit.
+ unsafe { bindings::bitmap_free(self.repr.ptr.as_ptr()) };
+ }
+}
+
+impl BitmapVec {
+ /// Constructs a new [`BitmapVec`].
+ ///
+ /// Fails with [`AllocError`] when the [`BitmapVec`] could not be allocated. This
+ /// includes the case when `nbits` is greater than `i32::MAX`.
+ #[inline]
+ pub fn new(nbits: usize, flags: Flags) -> Result<Self, AllocError> {
+ if nbits <= BITS_PER_LONG {
+ return Ok(BitmapVec {
+ repr: BitmapRepr { bitmap: 0 },
+ nbits,
+ });
+ }
+ if nbits > i32::MAX.try_into().unwrap() {
+ return Err(AllocError);
+ }
+ let nbits_u32 = u32::try_from(nbits).unwrap();
+ // SAFETY: `BITS_PER_LONG < nbits` and `nbits <= i32::MAX`.
+ let ptr = unsafe { bindings::bitmap_zalloc(nbits_u32, flags.as_raw()) };
+ let ptr = NonNull::new(ptr).ok_or(AllocError)?;
+ // INVARIANT: `ptr` returned by C `bitmap_zalloc` and `nbits` checked.
+ Ok(BitmapVec {
+ repr: BitmapRepr { ptr },
+ nbits,
+ })
+ }
+
+ /// Returns length of this [`Bitmap`].
+ #[allow(clippy::len_without_is_empty)]
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.nbits
+ }
+
+ /// Fills this `Bitmap` with random bits.
+ #[cfg(CONFIG_FIND_BIT_BENCHMARK_RUST)]
+ pub fn fill_random(&mut self) {
+ // SAFETY: `self.as_mut_ptr` points to either an array of the
+ // appropriate length or one usize.
+ unsafe {
+ bindings::get_random_bytes(
+ self.as_mut_ptr().cast::<ffi::c_void>(),
+ usize::div_ceil(self.nbits, bindings::BITS_PER_LONG as usize)
+ * bindings::BITS_PER_LONG as usize
+ / 8,
+ );
+ }
+ }
+}
+
+impl Bitmap {
+ /// Set bit with index `index`.
+ ///
+ /// ATTENTION: `set_bit` is non-atomic, which differs from the naming
+ /// convention in C code. The corresponding C function is `__set_bit`.
+ ///
+ /// If CONFIG_RUST_BITMAP_HARDENED is not enabled and `index` is greater than
+ /// or equal to `self.nbits`, does nothing.
+ ///
+ /// # Panics
+ ///
+ /// Panics if CONFIG_RUST_BITMAP_HARDENED is enabled and `index` is greater than
+ /// or equal to `self.nbits`.
+ #[inline]
+ pub fn set_bit(&mut self, index: usize) {
+ bitmap_assert_return!(
+ index < self.len(),
+ "Bit `index` must be < {}, was {}",
+ self.len(),
+ index
+ );
+ // SAFETY: Bit `index` is within bounds.
+ unsafe { bindings::__set_bit(index, self.as_mut_ptr()) };
+ }
+
+ /// Set bit with index `index`, atomically.
+ ///
+ /// This is a relaxed atomic operation (no implied memory barriers).
+ ///
+ /// ATTENTION: The naming convention differs from C, where the corresponding
+ /// function is called `set_bit`.
+ ///
+ /// If CONFIG_RUST_BITMAP_HARDENED is not enabled and `index` is greater than
+ /// or equal to `self.len()`, does nothing.
+ ///
+ /// # Panics
+ ///
+ /// Panics if CONFIG_RUST_BITMAP_HARDENED is enabled and `index` is greater than
+ /// or equal to `self.len()`.
+ #[inline]
+ pub fn set_bit_atomic(&self, index: usize) {
+ bitmap_assert_return!(
+ index < self.len(),
+ "Bit `index` must be < {}, was {}",
+ self.len(),
+ index
+ );
+ // SAFETY: `index` is within bounds and the caller has ensured that
+ // there is no mix of non-atomic and atomic operations.
+ unsafe { bindings::set_bit(index, self.as_ptr().cast_mut()) };
+ }
+
+ /// Clear `index` bit.
+ ///
+ /// ATTENTION: `clear_bit` is non-atomic, which differs from the naming
+ /// convention in C code. The corresponding C function is `__clear_bit`.
+ ///
+ /// If CONFIG_RUST_BITMAP_HARDENED is not enabled and `index` is greater than
+ /// or equal to `self.len()`, does nothing.
+ ///
+ /// # Panics
+ ///
+ /// Panics if CONFIG_RUST_BITMAP_HARDENED is enabled and `index` is greater than
+ /// or equal to `self.len()`.
+ #[inline]
+ pub fn clear_bit(&mut self, index: usize) {
+ bitmap_assert_return!(
+ index < self.len(),
+ "Bit `index` must be < {}, was {}",
+ self.len(),
+ index
+ );
+ // SAFETY: `index` is within bounds.
+ unsafe { bindings::__clear_bit(index, self.as_mut_ptr()) };
+ }
+
+ /// Clear `index` bit, atomically.
+ ///
+ /// This is a relaxed atomic operation (no implied memory barriers).
+ ///
+ /// ATTENTION: The naming convention differs from C, where the corresponding
+ /// function is called `clear_bit`.
+ ///
+ /// If CONFIG_RUST_BITMAP_HARDENED is not enabled and `index` is greater than
+ /// or equal to `self.len()`, does nothing.
+ ///
+ /// # Panics
+ ///
+ /// Panics if CONFIG_RUST_BITMAP_HARDENED is enabled and `index` is greater than
+ /// or equal to `self.len()`.
+ #[inline]
+ pub fn clear_bit_atomic(&self, index: usize) {
+ bitmap_assert_return!(
+ index < self.len(),
+ "Bit `index` must be < {}, was {}",
+ self.len(),
+ index
+ );
+ // SAFETY: `index` is within bounds and the caller has ensured that
+ // there is no mix of non-atomic and atomic operations.
+ unsafe { bindings::clear_bit(index, self.as_ptr().cast_mut()) };
+ }
+
+ /// Copy `src` into this [`Bitmap`] and set any remaining bits to zero.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+ /// use kernel::bitmap::BitmapVec;
+ ///
+ /// let mut long_bitmap = BitmapVec::new(256, GFP_KERNEL)?;
+ ///
+ /// assert_eq!(None, long_bitmap.last_bit());
+ ///
+ /// let mut short_bitmap = BitmapVec::new(16, GFP_KERNEL)?;
+ ///
+ /// short_bitmap.set_bit(7);
+ /// long_bitmap.copy_and_extend(&short_bitmap);
+ /// assert_eq!(Some(7), long_bitmap.last_bit());
+ ///
+ /// # Ok::<(), AllocError>(())
+ /// ```
+ #[inline]
+ pub fn copy_and_extend(&mut self, src: &Bitmap) {
+ let len = core::cmp::min(src.len(), self.len());
+ // SAFETY: access to `self` and `src` is within bounds.
+ unsafe {
+ bindings::bitmap_copy_and_extend(
+ self.as_mut_ptr(),
+ src.as_ptr(),
+ len as u32,
+ self.len() as u32,
+ )
+ };
+ }
+
+ /// Finds last set bit.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+ /// use kernel::bitmap::BitmapVec;
+ ///
+ /// let bitmap = BitmapVec::new(64, GFP_KERNEL)?;
+ ///
+ /// match bitmap.last_bit() {
+ /// Some(idx) => {
+ /// pr_info!("The last bit has index {idx}.\n");
+ /// }
+ /// None => {
+ /// pr_info!("All bits in this bitmap are 0.\n");
+ /// }
+ /// }
+ /// # Ok::<(), AllocError>(())
+ /// ```
+ #[inline]
+ pub fn last_bit(&self) -> Option<usize> {
+ // SAFETY: `_find_next_bit` access is within bounds due to invariant.
+ let index = unsafe { bindings::_find_last_bit(self.as_ptr(), self.len()) };
+ if index >= self.len() {
+ None
+ } else {
+ Some(index)
+ }
+ }
+
+ /// Finds next set bit, starting from `start`.
+ ///
+ /// Returns `None` if `start` is greater or equal to `self.nbits`.
+ #[inline]
+ pub fn next_bit(&self, start: usize) -> Option<usize> {
+ bitmap_assert!(
+ start < self.len(),
+ "`start` must be < {} was {}",
+ self.len(),
+ start
+ );
+ // SAFETY: `_find_next_bit` tolerates out-of-bounds arguments and returns a
+ // value larger than or equal to `self.len()` in that case.
+ let index = unsafe { bindings::_find_next_bit(self.as_ptr(), self.len(), start) };
+ if index >= self.len() {
+ None
+ } else {
+ Some(index)
+ }
+ }
+
+ /// Finds next zero bit, starting from `start`.
+ /// Returns `None` if `start` is greater than or equal to `self.len()`.
+ #[inline]
+ pub fn next_zero_bit(&self, start: usize) -> Option<usize> {
+ bitmap_assert!(
+ start < self.len(),
+ "`start` must be < {} was {}",
+ self.len(),
+ start
+ );
+ // SAFETY: `_find_next_zero_bit` tolerates out-of-bounds arguments and returns a
+ // value larger than or equal to `self.len()` in that case.
+ let index = unsafe { bindings::_find_next_zero_bit(self.as_ptr(), self.len(), start) };
+ if index >= self.len() {
+ None
+ } else {
+ Some(index)
+ }
+ }
+}
+
+use macros::kunit_tests;
+
+#[kunit_tests(rust_kernel_bitmap)]
+mod tests {
+ use super::*;
+ use kernel::alloc::flags::GFP_KERNEL;
+
+ #[test]
+ fn bitmap_borrow() {
+ let fake_bitmap: [usize; 2] = [0, 0];
+ // SAFETY: `fake_c_bitmap` is an array of expected length.
+ let b = unsafe { Bitmap::from_raw(fake_bitmap.as_ptr(), 2 * BITS_PER_LONG) };
+ assert_eq!(2 * BITS_PER_LONG, b.len());
+ assert_eq!(None, b.next_bit(0));
+ }
+
+ #[test]
+ fn bitmap_copy() {
+ let fake_bitmap: usize = 0xFF;
+ // SAFETY: `fake_c_bitmap` can be used as one-element array of expected length.
+ let b = unsafe { Bitmap::from_raw(core::ptr::addr_of!(fake_bitmap), 8) };
+ assert_eq!(8, b.len());
+ assert_eq!(None, b.next_zero_bit(0));
+ }
+
+ #[test]
+ fn bitmap_vec_new() -> Result<(), AllocError> {
+ let b = BitmapVec::new(0, GFP_KERNEL)?;
+ assert_eq!(0, b.len());
+
+ let b = BitmapVec::new(3, GFP_KERNEL)?;
+ assert_eq!(3, b.len());
+
+ let b = BitmapVec::new(1024, GFP_KERNEL)?;
+ assert_eq!(1024, b.len());
+
+ // Requesting too large values results in [`AllocError`].
+ let res = BitmapVec::new(1 << 31, GFP_KERNEL);
+ assert!(res.is_err());
+ Ok(())
+ }
+
+ #[test]
+ fn bitmap_set_clear_find() -> Result<(), AllocError> {
+ let mut b = BitmapVec::new(128, GFP_KERNEL)?;
+
+ // Zero-initialized
+ assert_eq!(None, b.next_bit(0));
+ assert_eq!(Some(0), b.next_zero_bit(0));
+ assert_eq!(None, b.last_bit());
+
+ b.set_bit(17);
+
+ assert_eq!(Some(17), b.next_bit(0));
+ assert_eq!(Some(17), b.next_bit(17));
+ assert_eq!(None, b.next_bit(18));
+ assert_eq!(Some(17), b.last_bit());
+
+ b.set_bit(107);
+
+ assert_eq!(Some(17), b.next_bit(0));
+ assert_eq!(Some(17), b.next_bit(17));
+ assert_eq!(Some(107), b.next_bit(18));
+ assert_eq!(Some(107), b.last_bit());
+
+ b.clear_bit(17);
+
+ assert_eq!(Some(107), b.next_bit(0));
+ assert_eq!(Some(107), b.last_bit());
+ Ok(())
+ }
+
+ #[test]
+ fn owned_bitmap_out_of_bounds() -> Result<(), AllocError> {
+ // TODO: Kunit #[test]s do not support `cfg` yet,
+ // so we add it here in the body.
+ #[cfg(not(CONFIG_RUST_BITMAP_HARDENED))]
+ {
+ let mut b = BitmapVec::new(128, GFP_KERNEL)?;
+ b.set_bit(2048);
+ b.set_bit_atomic(2048);
+ b.clear_bit(2048);
+ b.clear_bit_atomic(2048);
+ assert_eq!(None, b.next_bit(2048));
+ assert_eq!(None, b.next_zero_bit(2048));
+ assert_eq!(None, b.last_bit());
+ }
+ Ok(())
+ }
+
+ // TODO: uncomment once kunit supports [should_panic] and `cfg`.
+ // #[cfg(CONFIG_RUST_BITMAP_HARDENED)]
+ // #[test]
+ // #[should_panic]
+ // fn owned_bitmap_out_of_bounds() -> Result<(), AllocError> {
+ // let mut b = BitmapVec::new(128, GFP_KERNEL)?;
+ //
+ // b.set_bit(2048);
+ // }
+
+ #[test]
+ fn bitmap_copy_and_extend() -> Result<(), AllocError> {
+ let mut long_bitmap = BitmapVec::new(256, GFP_KERNEL)?;
+
+ long_bitmap.set_bit(3);
+ long_bitmap.set_bit(200);
+
+ let mut short_bitmap = BitmapVec::new(32, GFP_KERNEL)?;
+
+ short_bitmap.set_bit(17);
+
+ long_bitmap.copy_and_extend(&short_bitmap);
+
+ // Previous bits have been cleared.
+ assert_eq!(Some(17), long_bitmap.next_bit(0));
+ assert_eq!(Some(17), long_bitmap.last_bit());
+ Ok(())
+ }
+}
diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs
index 150f710efe5b..32c8d865afb6 100644
--- a/rust/kernel/block.rs
+++ b/rust/kernel/block.rs
@@ -3,3 +3,16 @@
//! Types for working with the block layer.
pub mod mq;
+
+/// Bit mask for masking out [`SECTOR_SIZE`].
+pub const SECTOR_MASK: u32 = bindings::SECTOR_MASK;
+
+/// Sectors are size `1 << SECTOR_SHIFT`.
+pub const SECTOR_SHIFT: u32 = bindings::SECTOR_SHIFT;
+
+/// Size of a sector.
+pub const SECTOR_SIZE: u32 = bindings::SECTOR_SIZE;
+
+/// The difference between the size of a page and the size of a sector,
+/// expressed as a power of two.
+pub const PAGE_SECTORS_SHIFT: u32 = bindings::PAGE_SECTORS_SHIFT;
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index 61ea35bba7d5..637018ead0ab 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -69,27 +69,33 @@
//!
//! #[vtable]
//! impl Operations for MyBlkDevice {
+//! type QueueData = ();
//!
-//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result {
+//! fn queue_rq(_queue_data: (), rq: ARef<Request<Self>>, _is_last: bool) -> Result {
//! Request::end_ok(rq);
//! Ok(())
//! }
//!
-//! fn commit_rqs() {}
+//! fn commit_rqs(_queue_data: ()) {}
+//!
+//! fn complete(rq: ARef<Request<Self>>) {
+//! Request::end_ok(rq)
+//! .map_err(|_e| kernel::error::code::EIO)
+//! .expect("Fatal error - expected to be able to end request");
+//! }
//! }
//!
//! let tagset: Arc<TagSet<MyBlkDevice>> =
//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
//! let mut disk = gen_disk::GenDiskBuilder::new()
//! .capacity_sectors(4096)
-//! .build(fmt!("myblk"), tagset)?;
+//! .build(fmt!("myblk"), tagset, ())?;
//!
//! # Ok::<(), kernel::error::Error>(())
//! ```
pub mod gen_disk;
mod operations;
-mod raw_writer;
mod request;
mod tag_set;
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index be92d0e5f031..1ce815c8cdab 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -5,10 +5,17 @@
//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h)
//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h)
-use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet};
-use crate::fmt::{self, Write};
-use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc};
-use crate::{error, static_lock_class};
+use crate::{
+ bindings,
+ block::mq::{Operations, TagSet},
+ error::{self, from_err_ptr, Result},
+ fmt::{self, Write},
+ prelude::*,
+ static_lock_class,
+ str::NullTerminatedFormatter,
+ sync::Arc,
+ types::{ForeignOwnable, ScopeGuard},
+};
/// A builder for [`GenDisk`].
///
@@ -45,7 +52,7 @@ impl GenDiskBuilder {
/// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
/// and that it is a power of two.
- fn validate_block_size(size: u32) -> Result {
+ pub fn validate_block_size(size: u32) -> Result {
if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
Err(error::code::EINVAL)
} else {
@@ -92,7 +99,14 @@ impl GenDiskBuilder {
self,
name: fmt::Arguments<'_>,
tagset: Arc<TagSet<T>>,
+ queue_data: T::QueueData,
) -> Result<GenDisk<T>> {
+ let data = queue_data.into_foreign();
+ let recover_data = ScopeGuard::new(|| {
+ // SAFETY: T::QueueData was created by the call to `into_foreign()` above
+ drop(unsafe { T::QueueData::from_foreign(data) });
+ });
+
// SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed.
let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() };
@@ -107,7 +121,7 @@ impl GenDiskBuilder {
bindings::__blk_mq_alloc_disk(
tagset.raw_tag_set(),
&mut lim,
- core::ptr::null_mut(),
+ data,
static_lock_class!().as_ptr(),
)
})?;
@@ -139,14 +153,14 @@ impl GenDiskBuilder {
// SAFETY: `gendisk` is a valid pointer as we initialized it above
unsafe { (*gendisk).fops = &TABLE };
- let mut raw_writer = RawWriter::from_array(
+ let mut writer = NullTerminatedFormatter::new(
// SAFETY: `gendisk` points to a valid and initialized instance. We
// have exclusive access, since the disk is not added to the VFS
// yet.
unsafe { &mut (*gendisk).disk_name },
- )?;
- raw_writer.write_fmt(name)?;
- raw_writer.write_char('\0')?;
+ )
+ .ok_or(EINVAL)?;
+ writer.write_fmt(name)?;
// SAFETY: `gendisk` points to a valid and initialized instance of
// `struct gendisk`. `set_capacity` takes a lock to synchronize this
@@ -161,8 +175,12 @@ impl GenDiskBuilder {
},
)?;
+ recover_data.dismiss();
+
// INVARIANT: `gendisk` was initialized above.
// INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above.
+ // INVARIANT: `gendisk.queue.queue_data` is set to `data` in the call to
+ // `__blk_mq_alloc_disk` above.
Ok(GenDisk {
_tagset: tagset,
gendisk,
@@ -174,9 +192,10 @@ impl GenDiskBuilder {
///
/// # Invariants
///
-/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
-/// - `gendisk` was added to the VFS through a call to
-/// `bindings::device_add_disk`.
+/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
+/// - `gendisk` was added to the VFS through a call to
+/// `bindings::device_add_disk`.
+/// - `self.gendisk.queue.queuedata` is initialized by a call to `ForeignOwnable::into_foreign`.
pub struct GenDisk<T: Operations> {
_tagset: Arc<TagSet<T>>,
gendisk: *mut bindings::gendisk,
@@ -188,9 +207,20 @@ unsafe impl<T: Operations + Send> Send for GenDisk<T> {}
impl<T: Operations> Drop for GenDisk<T> {
fn drop(&mut self) {
+ // SAFETY: By type invariant of `Self`, `self.gendisk` points to a valid
+ // and initialized instance of `struct gendisk`, and, `queuedata` was
+ // initialized with the result of a call to
+ // `ForeignOwnable::into_foreign`.
+ let queue_data = unsafe { (*(*self.gendisk).queue).queuedata };
+
// SAFETY: By type invariant, `self.gendisk` points to a valid and
// initialized instance of `struct gendisk`, and it was previously added
// to the VFS.
unsafe { bindings::del_gendisk(self.gendisk) };
+
+ // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with
+ // a call to `ForeignOwnable::into_foreign` to create `queuedata`.
+ // `ForeignOwnable::from_foreign` is only called here.
+ drop(unsafe { T::QueueData::from_foreign(queue_data) });
}
}
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index c0f95a9419c4..f91a1719886c 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -6,15 +6,16 @@
use crate::{
bindings,
- block::mq::request::RequestDataWrapper,
- block::mq::Request,
+ block::mq::{request::RequestDataWrapper, Request},
error::{from_result, Result},
prelude::*,
sync::Refcount,
- types::ARef,
+ types::{ARef, ForeignOwnable},
};
use core::marker::PhantomData;
+type ForeignBorrowed<'a, T> = <T as ForeignOwnable>::Borrowed<'a>;
+
/// Implement this trait to interface blk-mq as block devices.
///
/// To implement a block device driver, implement this trait as described in the
@@ -27,12 +28,23 @@ use core::marker::PhantomData;
/// [module level documentation]: kernel::block::mq
#[macros::vtable]
pub trait Operations: Sized {
+ /// Data associated with the `struct request_queue` that is allocated for
+ /// the `GenDisk` associated with this `Operations` implementation.
+ type QueueData: ForeignOwnable;
+
/// Called by the kernel to queue a request with the driver. If `is_last` is
/// `false`, the driver is allowed to defer committing the request.
- fn queue_rq(rq: ARef<Request<Self>>, is_last: bool) -> Result;
+ fn queue_rq(
+ queue_data: ForeignBorrowed<'_, Self::QueueData>,
+ rq: ARef<Request<Self>>,
+ is_last: bool,
+ ) -> Result;
/// Called by the kernel to indicate that queued requests should be submitted.
- fn commit_rqs();
+ fn commit_rqs(queue_data: ForeignBorrowed<'_, Self::QueueData>);
+
+ /// Called by the kernel when the request is completed.
+ fn complete(rq: ARef<Request<Self>>);
/// Called by the kernel to poll the device for completed requests. Only
/// used for poll queues.
@@ -71,7 +83,7 @@ impl<T: Operations> OperationsVTable<T> {
/// promise to not access the request until the driver calls
/// `bindings::blk_mq_end_request` for the request.
unsafe extern "C" fn queue_rq_callback(
- _hctx: *mut bindings::blk_mq_hw_ctx,
+ hctx: *mut bindings::blk_mq_hw_ctx,
bd: *const bindings::blk_mq_queue_data,
) -> bindings::blk_status_t {
// SAFETY: `bd.rq` is valid as required by the safety requirement for
@@ -89,10 +101,20 @@ impl<T: Operations> OperationsVTable<T> {
// reference counted by `ARef` until then.
let rq = unsafe { Request::aref_from_raw((*bd).rq) };
+ // SAFETY: `hctx` is valid as required by this function.
+ let queue_data = unsafe { (*(*hctx).queue).queuedata };
+
+ // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with
+ // a call to `ForeignOwnable::into_foreign` to create `queuedata`.
+ // `ForeignOwnable::from_foreign` is only called when the tagset is
+ // dropped, which happens after we are dropped.
+ let queue_data = unsafe { T::QueueData::borrow(queue_data) };
+
// SAFETY: We have exclusive access and we just set the refcount above.
unsafe { Request::start_unchecked(&rq) };
let ret = T::queue_rq(
+ queue_data,
rq,
// SAFETY: `bd` is valid as required by the safety requirement for
// this function.
@@ -111,18 +133,35 @@ impl<T: Operations> OperationsVTable<T> {
///
/// # Safety
///
- /// This function may only be called by blk-mq C infrastructure.
- unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) {
- T::commit_rqs()
+ /// This function may only be called by blk-mq C infrastructure. The caller
+ /// must ensure that `hctx` is valid.
+ unsafe extern "C" fn commit_rqs_callback(hctx: *mut bindings::blk_mq_hw_ctx) {
+ // SAFETY: `hctx` is valid as required by this function.
+ let queue_data = unsafe { (*(*hctx).queue).queuedata };
+
+ // SAFETY: `queue.queuedata` was created by `GenDisk::try_new()` with a
+ // call to `ForeignOwnable::into_foreign()` to create `queuedata`.
+ // `ForeignOwnable::from_foreign()` is only called when the tagset is
+ // dropped, which happens after we are dropped.
+ let queue_data = unsafe { T::QueueData::borrow(queue_data) };
+ T::commit_rqs(queue_data)
}
- /// This function is called by the C kernel. It is not currently
- /// implemented, and there is no way to exercise this code path.
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
///
/// # Safety
///
- /// This function may only be called by blk-mq C infrastructure.
- unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {}
+ /// This function may only be called by blk-mq C infrastructure. `rq` must
+ /// point to a valid request that has been marked as completed. The pointee
+ /// of `rq` must be valid for write for the duration of this function.
+ unsafe extern "C" fn complete_callback(rq: *mut bindings::request) {
+ // SAFETY: This function can only be dispatched through
+ // `Request::complete`. We leaked a refcount then which we pick back up
+ // now.
+ let aref = unsafe { Request::aref_from_raw(rq) };
+ T::complete(aref);
+ }
/// This function is called by the C kernel. A pointer to this function is
/// installed in the `blk_mq_ops` vtable for the driver.
diff --git a/rust/kernel/block/mq/raw_writer.rs b/rust/kernel/block/mq/raw_writer.rs
deleted file mode 100644
index d311e24e2595..000000000000
--- a/rust/kernel/block/mq/raw_writer.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-use crate::error::Result;
-use crate::fmt::{self, Write};
-use crate::prelude::EINVAL;
-
-/// A mutable reference to a byte buffer where a string can be written into.
-///
-/// # Invariants
-///
-/// `buffer` is always null terminated.
-pub(crate) struct RawWriter<'a> {
- buffer: &'a mut [u8],
- pos: usize,
-}
-
-impl<'a> RawWriter<'a> {
- /// Create a new `RawWriter` instance.
- fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> {
- *(buffer.last_mut().ok_or(EINVAL)?) = 0;
-
- // INVARIANT: We null terminated the buffer above.
- Ok(Self { buffer, pos: 0 })
- }
-
- pub(crate) fn from_array<const N: usize>(
- a: &'a mut [crate::ffi::c_char; N],
- ) -> Result<RawWriter<'a>> {
- Self::new(
- // SAFETY: the buffer of `a` is valid for read and write as `u8` for
- // at least `N` bytes.
- unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) },
- )
- }
-}
-
-impl Write for RawWriter<'_> {
- fn write_str(&mut self, s: &str) -> fmt::Result {
- let bytes = s.as_bytes();
- let len = bytes.len();
-
- // We do not want to overwrite our null terminator
- if self.pos + len > self.buffer.len() - 1 {
- return Err(fmt::Error);
- }
-
- // INVARIANT: We are not overwriting the last byte
- self.buffer[self.pos..self.pos + len].copy_from_slice(bytes);
-
- self.pos += len;
-
- Ok(())
- }
-}
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index f62a376dc313..c5f1f6b1ccfb 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -53,7 +53,7 @@ use core::{marker::PhantomData, ptr::NonNull};
/// [`struct request`]: srctree/include/linux/blk-mq.h
///
#[repr(transparent)]
-pub struct Request<T: Operations>(Opaque<bindings::request>, PhantomData<T>);
+pub struct Request<T>(Opaque<bindings::request>, PhantomData<T>);
impl<T: Operations> Request<T> {
/// Create an [`ARef<Request>`] from a [`struct request`] pointer.
@@ -138,6 +138,23 @@ impl<T: Operations> Request<T> {
Ok(())
}
+ /// Complete the request by scheduling `Operations::complete` for
+ /// execution.
+ ///
+ /// The function may be scheduled locally, via SoftIRQ or remotely via IPMI.
+ /// See `blk_mq_complete_request_remote` in [`blk-mq.c`] for details.
+ ///
+ /// [`blk-mq.c`]: srctree/block/blk-mq.c
+ pub fn complete(this: ARef<Self>) {
+ let ptr = ARef::into_raw(this).cast::<bindings::request>().as_ptr();
+ // SAFETY: By type invariant, `self.0` is a valid `struct request`
+ if !unsafe { bindings::blk_mq_complete_request_remote(ptr) } {
+ // SAFETY: We released a refcount above that we can reclaim here.
+ let this = unsafe { Request::aref_from_raw(ptr) };
+ T::complete(this);
+ }
+ }
+
/// Return a pointer to the [`RequestDataWrapper`] stored in the private area
/// of the request structure.
///
@@ -151,7 +168,7 @@ impl<T: Operations> Request<T> {
// valid allocation.
let wrapper_ptr =
unsafe { bindings::blk_mq_rq_to_pdu(request_ptr).cast::<RequestDataWrapper>() };
- // SAFETY: By C API contract, wrapper_ptr points to a valid allocation
+ // SAFETY: By C API contract, `wrapper_ptr` points to a valid allocation
// and is not null.
unsafe { NonNull::new_unchecked(wrapper_ptr) }
}
diff --git a/rust/kernel/configfs.rs b/rust/kernel/configfs.rs
index 9fb5ef825e41..10f1547ca9f1 100644
--- a/rust/kernel/configfs.rs
+++ b/rust/kernel/configfs.rs
@@ -1039,3 +1039,5 @@ macro_rules! configfs_attrs {
};
}
+
+pub use crate::configfs_attrs;
diff --git a/rust/kernel/device_id.rs b/rust/kernel/device_id.rs
index 70d57814ff79..62c42da12e9d 100644
--- a/rust/kernel/device_id.rs
+++ b/rust/kernel/device_id.rs
@@ -195,10 +195,10 @@ macro_rules! module_device_table {
($table_type: literal, $module_table_name:ident, $table_name:ident) => {
#[rustfmt::skip]
#[export_name =
- concat!("__mod_device_table__", $table_type,
- "__", module_path!(),
- "_", line!(),
- "_", stringify!($table_name))
+ concat!("__mod_device_table__", line!(),
+ "__kmod_", module_path!(),
+ "__", $table_type,
+ "__", stringify!($table_name))
]
static $module_table_name: [::core::mem::MaybeUninit<u8>; $table_name.raw_ids().size()] =
unsafe { ::core::mem::transmute_copy($table_name.raw_ids()) };
diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs
index 132545962218..10a6a1789854 100644
--- a/rust/kernel/devres.rs
+++ b/rust/kernel/devres.rs
@@ -135,11 +135,9 @@ impl<T: Send> Devres<T> {
T: 'a,
Error: From<E>,
{
- let callback = Self::devres_callback;
-
try_pin_init!(&this in Self {
dev: dev.into(),
- callback,
+ callback: Self::devres_callback,
// INVARIANT: `inner` is properly initialized.
inner <- Opaque::pin_init(try_pin_init!(Inner {
devm <- Completion::new(),
@@ -160,7 +158,7 @@ impl<T: Send> Devres<T> {
// properly initialized, because we require `dev` (i.e. the *bound* device) to
// live at least as long as the returned `impl PinInit<Self, Error>`.
to_result(unsafe {
- bindings::devm_add_action(dev.as_raw(), Some(callback), inner.cast())
+ bindings::devm_add_action(dev.as_raw(), Some(*callback), inner.cast())
}).inspect_err(|_| {
let inner = Opaque::cast_into(inner);
diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs
index 68fe67624424..2569c21208e3 100644
--- a/rust/kernel/dma.rs
+++ b/rust/kernel/dma.rs
@@ -13,6 +13,16 @@ use crate::{
transmute::{AsBytes, FromBytes},
};
+/// DMA address type.
+///
+/// Represents a bus address used for Direct Memory Access (DMA) operations.
+///
+/// This is an alias of the kernel's `dma_addr_t`, which may be `u32` or `u64` depending on
+/// `CONFIG_ARCH_DMA_ADDR_T_64BIT`.
+///
+/// Note that this may be `u64` even on 32-bit architectures.
+pub type DmaAddress = bindings::dma_addr_t;
+
/// Trait to be implemented by DMA capable bus devices.
///
/// The [`dma::Device`](Device) trait should be implemented by bus specific device representations,
@@ -244,6 +254,74 @@ pub mod attrs {
pub const DMA_ATTR_PRIVILEGED: Attrs = Attrs(bindings::DMA_ATTR_PRIVILEGED);
}
+/// DMA data direction.
+///
+/// Corresponds to the C [`enum dma_data_direction`].
+///
+/// [`enum dma_data_direction`]: srctree/include/linux/dma-direction.h
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[repr(u32)]
+pub enum DataDirection {
+ /// The DMA mapping is for bidirectional data transfer.
+ ///
+ /// This is used when the buffer can be both read from and written to by the device.
+ /// The cache for the corresponding memory region is both flushed and invalidated.
+ Bidirectional = Self::const_cast(bindings::dma_data_direction_DMA_BIDIRECTIONAL),
+
+ /// The DMA mapping is for data transfer from memory to the device (write).
+ ///
+ /// The CPU has prepared data in the buffer, and the device will read it.
+ /// The cache for the corresponding memory region is flushed before device access.
+ ToDevice = Self::const_cast(bindings::dma_data_direction_DMA_TO_DEVICE),
+
+ /// The DMA mapping is for data transfer from the device to memory (read).
+ ///
+ /// The device will write data into the buffer for the CPU to read.
+ /// The cache for the corresponding memory region is invalidated before CPU access.
+ FromDevice = Self::const_cast(bindings::dma_data_direction_DMA_FROM_DEVICE),
+
+ /// The DMA mapping is not for data transfer.
+ ///
+ /// This is primarily for debugging purposes. With this direction, the DMA mapping API
+ /// will not perform any cache coherency operations.
+ None = Self::const_cast(bindings::dma_data_direction_DMA_NONE),
+}
+
+impl DataDirection {
+ /// Casts the bindgen-generated enum type to a `u32` at compile time.
+ ///
+ /// This function will cause a compile-time error if the underlying value of the
+ /// C enum is out of bounds for `u32`.
+ const fn const_cast(val: bindings::dma_data_direction) -> u32 {
+ // CAST: The C standard allows compilers to choose different integer types for enums.
+ // To safely check the value, we cast it to a wide signed integer type (`i128`)
+ // which can hold any standard C integer enum type without truncation.
+ let wide_val = val as i128;
+
+ // Check if the value is outside the valid range for the target type `u32`.
+ // CAST: `u32::MAX` is cast to `i128` to match the type of `wide_val` for the comparison.
+ if wide_val < 0 || wide_val > u32::MAX as i128 {
+ // Trigger a compile-time error in a const context.
+ build_error!("C enum value is out of bounds for the target type `u32`.");
+ }
+
+ // CAST: This cast is valid because the check above guarantees that `wide_val`
+ // is within the representable range of `u32`.
+ wide_val as u32
+ }
+}
+
+impl From<DataDirection> for bindings::dma_data_direction {
+ /// Returns the raw representation of [`enum dma_data_direction`].
+ fn from(direction: DataDirection) -> Self {
+ // CAST: `direction as u32` gets the underlying representation of our `#[repr(u32)]` enum.
+ // The subsequent cast to `Self` (the bindgen type) assumes the C enum is compatible
+ // with the enum variants of `DataDirection`, which is a valid assumption given our
+ // compile-time checks.
+ direction as u32 as Self
+ }
+}
+
/// An abstraction of the `dma_alloc_coherent` API.
///
/// This is an abstraction around the `dma_alloc_coherent` API which is used to allocate and map
@@ -275,7 +353,7 @@ pub mod attrs {
// entire `CoherentAllocation` including the allocated memory itself.
pub struct CoherentAllocation<T: AsBytes + FromBytes> {
dev: ARef<device::Device>,
- dma_handle: bindings::dma_addr_t,
+ dma_handle: DmaAddress,
count: usize,
cpu_addr: *mut T,
dma_attrs: Attrs,
@@ -376,7 +454,7 @@ impl<T: AsBytes + FromBytes> CoherentAllocation<T> {
/// Returns a DMA handle which may be given to the device as the DMA address base of
/// the region.
- pub fn dma_handle(&self) -> bindings::dma_addr_t {
+ pub fn dma_handle(&self) -> DmaAddress {
self.dma_handle
}
@@ -384,13 +462,13 @@ impl<T: AsBytes + FromBytes> CoherentAllocation<T> {
/// device as the DMA address base of the region.
///
/// Returns `EINVAL` if `offset` is not within the bounds of the allocation.
- pub fn dma_handle_with_offset(&self, offset: usize) -> Result<bindings::dma_addr_t> {
+ pub fn dma_handle_with_offset(&self, offset: usize) -> Result<DmaAddress> {
if offset >= self.count {
Err(EINVAL)
} else {
// INVARIANT: The type invariant of `Self` guarantees that `size_of::<T> * count` fits
// into a `usize`, and `offset` is inferior to `count`.
- Ok(self.dma_handle + (offset * core::mem::size_of::<T>()) as bindings::dma_addr_t)
+ Ok(self.dma_handle + (offset * core::mem::size_of::<T>()) as DmaAddress)
}
}
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
index 0956ba0f64de..3ce8f62a0056 100644
--- a/rust/kernel/drm/device.rs
+++ b/rust/kernel/drm/device.rs
@@ -11,7 +11,8 @@ use crate::{
error::from_err_ptr,
error::Result,
prelude::*,
- types::{ARef, AlwaysRefCounted, Opaque},
+ sync::aref::{ARef, AlwaysRefCounted},
+ types::Opaque,
};
use core::{alloc::Layout, mem, ops::Deref, ptr, ptr::NonNull};
diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs
index d2dad77274c4..f30ee4c6245c 100644
--- a/rust/kernel/drm/driver.rs
+++ b/rust/kernel/drm/driver.rs
@@ -8,7 +8,7 @@ use crate::{
bindings, device, devres, drm,
error::{to_result, Result},
prelude::*,
- types::ARef,
+ sync::aref::ARef,
};
use macros::vtable;
@@ -86,6 +86,9 @@ pub struct AllocOps {
/// Trait for memory manager implementations. Implemented internally.
pub trait AllocImpl: super::private::Sealed + drm::gem::IntoGEMObject {
+ /// The [`Driver`] implementation for this [`AllocImpl`].
+ type Driver: drm::Driver;
+
/// The C callback operations for this memory manager.
const ALLOC_OPS: AllocOps;
}
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs
index b9f3248876ba..30c853988b94 100644
--- a/rust/kernel/drm/gem/mod.rs
+++ b/rust/kernel/drm/gem/mod.rs
@@ -10,36 +10,37 @@ use crate::{
drm::driver::{AllocImpl, AllocOps},
error::{to_result, Result},
prelude::*,
- types::{ARef, AlwaysRefCounted, Opaque},
+ sync::aref::{ARef, AlwaysRefCounted},
+ types::Opaque,
};
-use core::{mem, ops::Deref, ptr::NonNull};
+use core::{ops::Deref, ptr::NonNull};
+
+/// A type alias for retrieving a [`Driver`]s [`DriverFile`] implementation from its
+/// [`DriverObject`] implementation.
+///
+/// [`Driver`]: drm::Driver
+/// [`DriverFile`]: drm::file::DriverFile
+pub type DriverFile<T> = drm::File<<<T as DriverObject>::Driver as drm::Driver>::File>;
/// GEM object functions, which must be implemented by drivers.
-pub trait BaseDriverObject<T: BaseObject>: Sync + Send + Sized {
+pub trait DriverObject: Sync + Send + Sized {
+ /// Parent `Driver` for this object.
+ type Driver: drm::Driver;
+
/// Create a new driver data object for a GEM object of a given size.
- fn new(dev: &drm::Device<T::Driver>, size: usize) -> impl PinInit<Self, Error>;
+ fn new(dev: &drm::Device<Self::Driver>, size: usize) -> impl PinInit<Self, Error>;
/// Open a new handle to an existing object, associated with a File.
- fn open(
- _obj: &<<T as IntoGEMObject>::Driver as drm::Driver>::Object,
- _file: &drm::File<<<T as IntoGEMObject>::Driver as drm::Driver>::File>,
- ) -> Result {
+ fn open(_obj: &<Self::Driver as drm::Driver>::Object, _file: &DriverFile<Self>) -> Result {
Ok(())
}
/// Close a handle to an existing object, associated with a File.
- fn close(
- _obj: &<<T as IntoGEMObject>::Driver as drm::Driver>::Object,
- _file: &drm::File<<<T as IntoGEMObject>::Driver as drm::Driver>::File>,
- ) {
- }
+ fn close(_obj: &<Self::Driver as drm::Driver>::Object, _file: &DriverFile<Self>) {}
}
/// Trait that represents a GEM object subtype
pub trait IntoGEMObject: Sized + super::private::Sealed + AlwaysRefCounted {
- /// Owning driver for this type
- type Driver: drm::Driver;
-
/// Returns a reference to the raw `drm_gem_object` structure, which must be valid as long as
/// this owning object is valid.
fn as_raw(&self) -> *mut bindings::drm_gem_object;
@@ -74,25 +75,16 @@ unsafe impl<T: IntoGEMObject> AlwaysRefCounted for T {
}
}
-/// Trait which must be implemented by drivers using base GEM objects.
-pub trait DriverObject: BaseDriverObject<Object<Self>> {
- /// Parent `Driver` for this object.
- type Driver: drm::Driver;
-}
-
-extern "C" fn open_callback<T: BaseDriverObject<U>, U: BaseObject>(
+extern "C" fn open_callback<T: DriverObject>(
raw_obj: *mut bindings::drm_gem_object,
raw_file: *mut bindings::drm_file,
) -> core::ffi::c_int {
// SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`.
- let file = unsafe {
- drm::File::<<<U as IntoGEMObject>::Driver as drm::Driver>::File>::from_raw(raw_file)
- };
- // SAFETY: `open_callback` is specified in the AllocOps structure for `Object<T>`, ensuring that
- // `raw_obj` is indeed contained within a `Object<T>`.
- let obj = unsafe {
- <<<U as IntoGEMObject>::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj)
- };
+ let file = unsafe { DriverFile::<T>::from_raw(raw_file) };
+
+ // SAFETY: `open_callback` is specified in the AllocOps structure for `DriverObject<T>`,
+ // ensuring that `raw_obj` is contained within a `DriverObject<T>`
+ let obj = unsafe { <<T::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) };
match T::open(obj, file) {
Err(e) => e.to_errno(),
@@ -100,26 +92,21 @@ extern "C" fn open_callback<T: BaseDriverObject<U>, U: BaseObject>(
}
}
-extern "C" fn close_callback<T: BaseDriverObject<U>, U: BaseObject>(
+extern "C" fn close_callback<T: DriverObject>(
raw_obj: *mut bindings::drm_gem_object,
raw_file: *mut bindings::drm_file,
) {
// SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`.
- let file = unsafe {
- drm::File::<<<U as IntoGEMObject>::Driver as drm::Driver>::File>::from_raw(raw_file)
- };
+ let file = unsafe { DriverFile::<T>::from_raw(raw_file) };
+
// SAFETY: `close_callback` is specified in the AllocOps structure for `Object<T>`, ensuring
// that `raw_obj` is indeed contained within a `Object<T>`.
- let obj = unsafe {
- <<<U as IntoGEMObject>::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj)
- };
+ let obj = unsafe { <<T::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) };
T::close(obj, file);
}
impl<T: DriverObject> IntoGEMObject for Object<T> {
- type Driver = T::Driver;
-
fn as_raw(&self) -> *mut bindings::drm_gem_object {
self.obj.get()
}
@@ -141,10 +128,12 @@ pub trait BaseObject: IntoGEMObject {
/// Creates a new handle for the object associated with a given `File`
/// (or returns an existing one).
- fn create_handle(
- &self,
- file: &drm::File<<<Self as IntoGEMObject>::Driver as drm::Driver>::File>,
- ) -> Result<u32> {
+ fn create_handle<D, F>(&self, file: &drm::File<F>) -> Result<u32>
+ where
+ Self: AllocImpl<Driver = D>,
+ D: drm::Driver<Object = Self, File = F>,
+ F: drm::file::DriverFile<Driver = D>,
+ {
let mut handle: u32 = 0;
// SAFETY: The arguments are all valid per the type invariants.
to_result(unsafe {
@@ -154,10 +143,12 @@ pub trait BaseObject: IntoGEMObject {
}
/// Looks up an object by its handle for a given `File`.
- fn lookup_handle(
- file: &drm::File<<<Self as IntoGEMObject>::Driver as drm::Driver>::File>,
- handle: u32,
- ) -> Result<ARef<Self>> {
+ fn lookup_handle<D, F>(file: &drm::File<F>, handle: u32) -> Result<ARef<Self>>
+ where
+ Self: AllocImpl<Driver = D>,
+ D: drm::Driver<Object = Self, File = F>,
+ F: drm::file::DriverFile<Driver = D>,
+ {
// SAFETY: The arguments are all valid per the type invariants.
let ptr = unsafe { bindings::drm_gem_object_lookup(file.as_raw().cast(), handle) };
if ptr.is_null() {
@@ -207,13 +198,10 @@ pub struct Object<T: DriverObject + Send + Sync> {
}
impl<T: DriverObject> Object<T> {
- /// The size of this object's structure.
- pub const SIZE: usize = mem::size_of::<Self>();
-
const OBJECT_FUNCS: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs {
free: Some(Self::free_callback),
- open: Some(open_callback::<T, Object<T>>),
- close: Some(close_callback::<T, Object<T>>),
+ open: Some(open_callback::<T>),
+ close: Some(close_callback::<T>),
print_info: None,
export: None,
pin: None,
@@ -296,6 +284,8 @@ impl<T: DriverObject> Deref for Object<T> {
}
impl<T: DriverObject> AllocImpl for Object<T> {
+ type Driver = T::Driver;
+
const ALLOC_OPS: AllocOps = AllocOps {
gem_create_object: None,
prime_handle_to_fd: None,
diff --git a/rust/kernel/drm/ioctl.rs b/rust/kernel/drm/ioctl.rs
index 8431cdcd3ae0..69efbdb4c85a 100644
--- a/rust/kernel/drm/ioctl.rs
+++ b/rust/kernel/drm/ioctl.rs
@@ -83,7 +83,7 @@ pub mod internal {
///
/// ```ignore
/// fn foo(device: &kernel::drm::Device<Self>,
-/// data: &Opaque<uapi::argument_type>,
+/// data: &mut uapi::argument_type,
/// file: &kernel::drm::File<Self::File>,
/// ) -> Result<u32>
/// ```
@@ -138,9 +138,12 @@ macro_rules! declare_drm_ioctls {
// SAFETY: The ioctl argument has size `_IOC_SIZE(cmd)`, which we
// asserted above matches the size of this type, and all bit patterns of
// UAPI structs must be valid.
- let data = unsafe {
- &*(raw_data as *const $crate::types::Opaque<$crate::uapi::$struct>)
- };
+ // The `ioctl` argument is exclusively owned by the handler
+ // and guaranteed by the C implementation (`drm_ioctl()`) to remain
+ // valid for the entire lifetime of the reference taken here.
+ // There is no concurrent access or aliasing; no other references
+ // to this object exist during this call.
+ let data = unsafe { &mut *(raw_data.cast::<$crate::uapi::$struct>()) };
// SAFETY: This is just the DRM file structure
let file = unsafe { $crate::drm::File::from_raw(raw_file) };
diff --git a/rust/kernel/id_pool.rs b/rust/kernel/id_pool.rs
new file mode 100644
index 000000000000..a41a3404213c
--- /dev/null
+++ b/rust/kernel/id_pool.rs
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! Rust API for an ID pool backed by a [`BitmapVec`].
+
+use crate::alloc::{AllocError, Flags};
+use crate::bitmap::BitmapVec;
+
+const BITS_PER_LONG: usize = bindings::BITS_PER_LONG as usize;
+
+/// Represents a dynamic ID pool backed by a [`BitmapVec`].
+///
+/// Clients acquire and release IDs from unset bits in a bitmap.
+///
+/// The capacity of the ID pool may be adjusted by users as
+/// needed. The API supports the scenario where users need precise control
+/// over the time of allocation of a new backing bitmap, which may require
+/// release of spinlock.
+/// Due to concurrent updates, all operations are re-verified to determine
+/// if the grow or shrink is sill valid.
+///
+/// # Examples
+///
+/// Basic usage
+///
+/// ```
+/// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+/// use kernel::id_pool::IdPool;
+///
+/// let mut pool = IdPool::new(64, GFP_KERNEL)?;
+/// for i in 0..64 {
+/// assert_eq!(i, pool.acquire_next_id(i).ok_or(ENOSPC)?);
+/// }
+///
+/// pool.release_id(23);
+/// assert_eq!(23, pool.acquire_next_id(0).ok_or(ENOSPC)?);
+///
+/// assert_eq!(None, pool.acquire_next_id(0)); // time to realloc.
+/// let resizer = pool.grow_request().ok_or(ENOSPC)?.realloc(GFP_KERNEL)?;
+/// pool.grow(resizer);
+///
+/// assert_eq!(pool.acquire_next_id(0), Some(64));
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// Releasing spinlock to grow the pool
+///
+/// ```no_run
+/// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+/// use kernel::sync::{new_spinlock, SpinLock};
+/// use kernel::id_pool::IdPool;
+///
+/// fn get_id_maybe_realloc(guarded_pool: &SpinLock<IdPool>) -> Result<usize, AllocError> {
+/// let mut pool = guarded_pool.lock();
+/// loop {
+/// match pool.acquire_next_id(0) {
+/// Some(index) => return Ok(index),
+/// None => {
+/// let alloc_request = pool.grow_request();
+/// drop(pool);
+/// let resizer = alloc_request.ok_or(AllocError)?.realloc(GFP_KERNEL)?;
+/// pool = guarded_pool.lock();
+/// pool.grow(resizer)
+/// }
+/// }
+/// }
+/// }
+/// ```
+pub struct IdPool {
+ map: BitmapVec,
+}
+
+/// Indicates that an [`IdPool`] should change to a new target size.
+pub struct ReallocRequest {
+ num_ids: usize,
+}
+
+/// Contains a [`BitmapVec`] of a size suitable for reallocating [`IdPool`].
+pub struct PoolResizer {
+ new: BitmapVec,
+}
+
+impl ReallocRequest {
+ /// Allocates a new backing [`BitmapVec`] for [`IdPool`].
+ ///
+ /// This method only prepares reallocation and does not complete it.
+ /// Reallocation will complete after passing the [`PoolResizer`] to the
+ /// [`IdPool::grow`] or [`IdPool::shrink`] operation, which will check
+ /// that reallocation still makes sense.
+ pub fn realloc(&self, flags: Flags) -> Result<PoolResizer, AllocError> {
+ let new = BitmapVec::new(self.num_ids, flags)?;
+ Ok(PoolResizer { new })
+ }
+}
+
+impl IdPool {
+ /// Constructs a new [`IdPool`].
+ ///
+ /// A capacity below [`BITS_PER_LONG`] is adjusted to
+ /// [`BITS_PER_LONG`].
+ ///
+ /// [`BITS_PER_LONG`]: srctree/include/asm-generic/bitsperlong.h
+ #[inline]
+ pub fn new(num_ids: usize, flags: Flags) -> Result<Self, AllocError> {
+ let num_ids = core::cmp::max(num_ids, BITS_PER_LONG);
+ let map = BitmapVec::new(num_ids, flags)?;
+ Ok(Self { map })
+ }
+
+ /// Returns how many IDs this pool can currently have.
+ #[inline]
+ pub fn capacity(&self) -> usize {
+ self.map.len()
+ }
+
+ /// Returns a [`ReallocRequest`] if the [`IdPool`] can be shrunk, [`None`] otherwise.
+ ///
+ /// The capacity of an [`IdPool`] cannot be shrunk below [`BITS_PER_LONG`].
+ ///
+ /// [`BITS_PER_LONG`]: srctree/include/asm-generic/bitsperlong.h
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::alloc::{AllocError, flags::GFP_KERNEL};
+ /// use kernel::id_pool::{ReallocRequest, IdPool};
+ ///
+ /// let mut pool = IdPool::new(1024, GFP_KERNEL)?;
+ /// let alloc_request = pool.shrink_request().ok_or(AllocError)?;
+ /// let resizer = alloc_request.realloc(GFP_KERNEL)?;
+ /// pool.shrink(resizer);
+ /// assert_eq!(pool.capacity(), kernel::bindings::BITS_PER_LONG as usize);
+ /// # Ok::<(), AllocError>(())
+ /// ```
+ #[inline]
+ pub fn shrink_request(&self) -> Option<ReallocRequest> {
+ let cap = self.capacity();
+ // Shrinking below [`BITS_PER_LONG`] is never possible.
+ if cap <= BITS_PER_LONG {
+ return None;
+ }
+ // Determine if the bitmap can shrink based on the position of
+ // its last set bit. If the bit is within the first quarter of
+ // the bitmap then shrinking is possible. In this case, the
+ // bitmap should shrink to half its current size.
+ let Some(bit) = self.map.last_bit() else {
+ return Some(ReallocRequest {
+ num_ids: BITS_PER_LONG,
+ });
+ };
+ if bit >= (cap / 4) {
+ return None;
+ }
+ let num_ids = usize::max(BITS_PER_LONG, cap / 2);
+ Some(ReallocRequest { num_ids })
+ }
+
+ /// Shrinks pool by using a new [`BitmapVec`], if still possible.
+ #[inline]
+ pub fn shrink(&mut self, mut resizer: PoolResizer) {
+ // Between request to shrink that led to allocation of `resizer` and now,
+ // bits may have changed.
+ // Verify that shrinking is still possible. In case shrinking to
+ // the size of `resizer` is no longer possible, do nothing,
+ // drop `resizer` and move on.
+ let Some(updated) = self.shrink_request() else {
+ return;
+ };
+ if updated.num_ids > resizer.new.len() {
+ return;
+ }
+
+ resizer.new.copy_and_extend(&self.map);
+ self.map = resizer.new;
+ }
+
+ /// Returns a [`ReallocRequest`] for growing this [`IdPool`], if possible.
+ ///
+ /// The capacity of an [`IdPool`] cannot be grown above [`i32::MAX`].
+ #[inline]
+ pub fn grow_request(&self) -> Option<ReallocRequest> {
+ let num_ids = self.capacity() * 2;
+ if num_ids > i32::MAX.try_into().unwrap() {
+ return None;
+ }
+ Some(ReallocRequest { num_ids })
+ }
+
+ /// Grows pool by using a new [`BitmapVec`], if still necessary.
+ ///
+ /// The `resizer` arguments has to be obtained by calling [`Self::grow_request`]
+ /// on this object and performing a [`ReallocRequest::realloc`].
+ #[inline]
+ pub fn grow(&mut self, mut resizer: PoolResizer) {
+ // Between request to grow that led to allocation of `resizer` and now,
+ // another thread may have already grown the capacity.
+ // In this case, do nothing, drop `resizer` and move on.
+ if resizer.new.len() <= self.capacity() {
+ return;
+ }
+
+ resizer.new.copy_and_extend(&self.map);
+ self.map = resizer.new;
+ }
+
+ /// Acquires a new ID by finding and setting the next zero bit in the
+ /// bitmap.
+ ///
+ /// Upon success, returns its index. Otherwise, returns [`None`]
+ /// to indicate that a [`Self::grow_request`] is needed.
+ #[inline]
+ pub fn acquire_next_id(&mut self, offset: usize) -> Option<usize> {
+ let next_zero_bit = self.map.next_zero_bit(offset);
+ if let Some(nr) = next_zero_bit {
+ self.map.set_bit(nr);
+ }
+ next_zero_bit
+ }
+
+ /// Releases an ID.
+ #[inline]
+ pub fn release_id(&mut self, id: usize) {
+ self.map.clear_bit(id);
+ }
+}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 09ee3d17ee0a..7e5290caf788 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -19,6 +19,7 @@
// Stable since Rust 1.79.0.
#![feature(generic_nonzero)]
#![feature(inline_const)]
+#![feature(pointer_is_aligned)]
//
// Stable since Rust 1.81.0.
#![feature(lint_reasons)]
@@ -64,6 +65,7 @@ pub mod acpi;
pub mod alloc;
#[cfg(CONFIG_AUXILIARY_BUS)]
pub mod auxiliary;
+pub mod bitmap;
pub mod bits;
#[cfg(CONFIG_BLOCK)]
pub mod block;
@@ -92,6 +94,7 @@ pub mod faux;
pub mod firmware;
pub mod fmt;
pub mod fs;
+pub mod id_pool;
pub mod init;
pub mod io;
pub mod ioctl;
@@ -100,6 +103,7 @@ pub mod jump_label;
#[cfg(CONFIG_KUNIT)]
pub mod kunit;
pub mod list;
+pub mod maple_tree;
pub mod miscdevice;
pub mod mm;
#[cfg(CONFIG_NET)]
@@ -119,6 +123,7 @@ pub mod ptr;
pub mod rbtree;
pub mod regulator;
pub mod revocable;
+pub mod scatterlist;
pub mod security;
pub mod seq_file;
pub mod sizes;
diff --git a/rust/kernel/maple_tree.rs b/rust/kernel/maple_tree.rs
new file mode 100644
index 000000000000..e72eec56bf57
--- /dev/null
+++ b/rust/kernel/maple_tree.rs
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Maple trees.
+//!
+//! C header: [`include/linux/maple_tree.h`](srctree/include/linux/maple_tree.h)
+//!
+//! Reference: <https://docs.kernel.org/core-api/maple_tree.html>
+
+use core::{
+ marker::PhantomData,
+ ops::{Bound, RangeBounds},
+ ptr,
+};
+
+use kernel::{
+ alloc::Flags,
+ error::to_result,
+ prelude::*,
+ types::{ForeignOwnable, Opaque},
+};
+
+/// A maple tree optimized for storing non-overlapping ranges.
+///
+/// # Invariants
+///
+/// Each range in the maple tree owns an instance of `T`.
+#[pin_data(PinnedDrop)]
+#[repr(transparent)]
+pub struct MapleTree<T: ForeignOwnable> {
+ #[pin]
+ tree: Opaque<bindings::maple_tree>,
+ _p: PhantomData<T>,
+}
+
+/// A maple tree with `MT_FLAGS_ALLOC_RANGE` set.
+///
+/// All methods on [`MapleTree`] are also accessible on this type.
+#[pin_data]
+#[repr(transparent)]
+pub struct MapleTreeAlloc<T: ForeignOwnable> {
+ #[pin]
+ tree: MapleTree<T>,
+}
+
+// Make MapleTree methods usable on MapleTreeAlloc.
+impl<T: ForeignOwnable> core::ops::Deref for MapleTreeAlloc<T> {
+ type Target = MapleTree<T>;
+
+ #[inline]
+ fn deref(&self) -> &MapleTree<T> {
+ &self.tree
+ }
+}
+
+#[inline]
+fn to_maple_range(range: impl RangeBounds<usize>) -> Option<(usize, usize)> {
+ let first = match range.start_bound() {
+ Bound::Included(start) => *start,
+ Bound::Excluded(start) => start.checked_add(1)?,
+ Bound::Unbounded => 0,
+ };
+
+ let last = match range.end_bound() {
+ Bound::Included(end) => *end,
+ Bound::Excluded(end) => end.checked_sub(1)?,
+ Bound::Unbounded => usize::MAX,
+ };
+
+ if last < first {
+ return None;
+ }
+
+ Some((first, last))
+}
+
+impl<T: ForeignOwnable> MapleTree<T> {
+ /// Create a new maple tree.
+ ///
+ /// The tree will use the regular implementation with a higher branching factor, rather than
+ /// the allocation tree.
+ #[inline]
+ pub fn new() -> impl PinInit<Self> {
+ pin_init!(MapleTree {
+ // SAFETY: This initializes a maple tree into a pinned slot. The maple tree will be
+ // destroyed in Drop before the memory location becomes invalid.
+ tree <- Opaque::ffi_init(|slot| unsafe { bindings::mt_init_flags(slot, 0) }),
+ _p: PhantomData,
+ })
+ }
+
+ /// Insert the value at the given index.
+ ///
+ /// # Errors
+ ///
+ /// If the maple tree already contains a range using the given index, then this call will
+ /// return an [`InsertErrorKind::Occupied`]. It may also fail if memory allocation fails.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::maple_tree::{InsertErrorKind, MapleTree};
+ ///
+ /// let tree = KBox::pin_init(MapleTree::<KBox<i32>>::new(), GFP_KERNEL)?;
+ ///
+ /// let ten = KBox::new(10, GFP_KERNEL)?;
+ /// let twenty = KBox::new(20, GFP_KERNEL)?;
+ /// let the_answer = KBox::new(42, GFP_KERNEL)?;
+ ///
+ /// // These calls will succeed.
+ /// tree.insert(100, ten, GFP_KERNEL)?;
+ /// tree.insert(101, twenty, GFP_KERNEL)?;
+ ///
+ /// // This will fail because the index is already in use.
+ /// assert_eq!(
+ /// tree.insert(100, the_answer, GFP_KERNEL).unwrap_err().cause,
+ /// InsertErrorKind::Occupied,
+ /// );
+ /// # Ok::<_, Error>(())
+ /// ```
+ #[inline]
+ pub fn insert(&self, index: usize, value: T, gfp: Flags) -> Result<(), InsertError<T>> {
+ self.insert_range(index..=index, value, gfp)
+ }
+
+ /// Insert a value to the specified range, failing on overlap.
+ ///
+ /// This accepts the usual types of Rust ranges using the `..` and `..=` syntax for exclusive
+ /// and inclusive ranges respectively. The range must not be empty, and must not overlap with
+ /// any existing range.
+ ///
+ /// # Errors
+ ///
+ /// If the maple tree already contains an overlapping range, then this call will return an
+ /// [`InsertErrorKind::Occupied`]. It may also fail if memory allocation fails or if the
+ /// requested range is invalid (e.g. empty).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::maple_tree::{InsertErrorKind, MapleTree};
+ ///
+ /// let tree = KBox::pin_init(MapleTree::<KBox<i32>>::new(), GFP_KERNEL)?;
+ ///
+ /// let ten = KBox::new(10, GFP_KERNEL)?;
+ /// let twenty = KBox::new(20, GFP_KERNEL)?;
+ /// let the_answer = KBox::new(42, GFP_KERNEL)?;
+ /// let hundred = KBox::new(100, GFP_KERNEL)?;
+ ///
+ /// // Insert the value 10 at the indices 100 to 499.
+ /// tree.insert_range(100..500, ten, GFP_KERNEL)?;
+ ///
+ /// // Insert the value 20 at the indices 500 to 1000.
+ /// tree.insert_range(500..=1000, twenty, GFP_KERNEL)?;
+ ///
+ /// // This will fail due to overlap with the previous range on index 1000.
+ /// assert_eq!(
+ /// tree.insert_range(1000..1200, the_answer, GFP_KERNEL).unwrap_err().cause,
+ /// InsertErrorKind::Occupied,
+ /// );
+ ///
+ /// // When using .. to specify the range, you must be careful to ensure that the range is
+ /// // non-empty.
+ /// assert_eq!(
+ /// tree.insert_range(72..72, hundred, GFP_KERNEL).unwrap_err().cause,
+ /// InsertErrorKind::InvalidRequest,
+ /// );
+ /// # Ok::<_, Error>(())
+ /// ```
+ pub fn insert_range<R>(&self, range: R, value: T, gfp: Flags) -> Result<(), InsertError<T>>
+ where
+ R: RangeBounds<usize>,
+ {
+ let Some((first, last)) = to_maple_range(range) else {
+ return Err(InsertError {
+ value,
+ cause: InsertErrorKind::InvalidRequest,
+ });
+ };
+
+ let ptr = T::into_foreign(value);
+
+ // SAFETY: The tree is valid, and we are passing a pointer to an owned instance of `T`.
+ let res = to_result(unsafe {
+ bindings::mtree_insert_range(self.tree.get(), first, last, ptr, gfp.as_raw())
+ });
+
+ if let Err(err) = res {
+ // SAFETY: As `mtree_insert_range` failed, it is safe to take back ownership.
+ let value = unsafe { T::from_foreign(ptr) };
+
+ let cause = if err == ENOMEM {
+ InsertErrorKind::AllocError(kernel::alloc::AllocError)
+ } else if err == EEXIST {
+ InsertErrorKind::Occupied
+ } else {
+ InsertErrorKind::InvalidRequest
+ };
+ Err(InsertError { value, cause })
+ } else {
+ Ok(())
+ }
+ }
+
+ /// Erase the range containing the given index.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::maple_tree::MapleTree;
+ ///
+ /// let tree = KBox::pin_init(MapleTree::<KBox<i32>>::new(), GFP_KERNEL)?;
+ ///
+ /// let ten = KBox::new(10, GFP_KERNEL)?;
+ /// let twenty = KBox::new(20, GFP_KERNEL)?;
+ ///
+ /// tree.insert_range(100..500, ten, GFP_KERNEL)?;
+ /// tree.insert(67, twenty, GFP_KERNEL)?;
+ ///
+ /// assert_eq!(tree.erase(67).map(|v| *v), Some(20));
+ /// assert_eq!(tree.erase(275).map(|v| *v), Some(10));
+ ///
+ /// // The previous call erased the entire range, not just index 275.
+ /// assert!(tree.erase(127).is_none());
+ /// # Ok::<_, Error>(())
+ /// ```
+ #[inline]
+ pub fn erase(&self, index: usize) -> Option<T> {
+ // SAFETY: `self.tree` contains a valid maple tree.
+ let ret = unsafe { bindings::mtree_erase(self.tree.get(), index) };
+
+ // SAFETY: If the pointer is not null, then we took ownership of a valid instance of `T`
+ // from the tree.
+ unsafe { T::try_from_foreign(ret) }
+ }
+
+ /// Lock the internal spinlock.
+ #[inline]
+ pub fn lock(&self) -> MapleGuard<'_, T> {
+ // SAFETY: It's safe to lock the spinlock in a maple tree.
+ unsafe { bindings::spin_lock(self.ma_lock()) };
+
+ // INVARIANT: We just took the spinlock.
+ MapleGuard(self)
+ }
+
+ #[inline]
+ fn ma_lock(&self) -> *mut bindings::spinlock_t {
+ // SAFETY: This pointer offset operation stays in-bounds.
+ let lock_ptr = unsafe { &raw mut (*self.tree.get()).__bindgen_anon_1.ma_lock };
+ lock_ptr.cast()
+ }
+
+ /// Free all `T` instances in this tree.
+ ///
+ /// # Safety
+ ///
+ /// This frees Rust data referenced by the maple tree without removing it from the maple tree,
+ /// leaving it in an invalid state. The caller must ensure that this invalid state cannot be
+ /// observed by the end-user.
+ unsafe fn free_all_entries(self: Pin<&mut Self>) {
+ // SAFETY: The caller provides exclusive access to the entire maple tree, so we have
+ // exclusive access to the entire maple tree despite not holding the lock.
+ let mut ma_state = unsafe { MaState::new_raw(self.into_ref().get_ref(), 0, usize::MAX) };
+
+ loop {
+ // This uses the raw accessor because we're destroying pointers without removing them
+ // from the maple tree, which is only valid because this is the destructor.
+ let ptr = ma_state.mas_find_raw(usize::MAX);
+ if ptr.is_null() {
+ break;
+ }
+ // SAFETY: By the type invariants, this pointer references a valid value of type `T`.
+ // By the safety requirements, it is okay to free it without removing it from the maple
+ // tree.
+ drop(unsafe { T::from_foreign(ptr) });
+ }
+ }
+}
+
+#[pinned_drop]
+impl<T: ForeignOwnable> PinnedDrop for MapleTree<T> {
+ #[inline]
+ fn drop(mut self: Pin<&mut Self>) {
+ // We only iterate the tree if the Rust value has a destructor.
+ if core::mem::needs_drop::<T>() {
+ // SAFETY: Other than the below `mtree_destroy` call, the tree will not be accessed
+ // after this call.
+ unsafe { self.as_mut().free_all_entries() };
+ }
+
+ // SAFETY: The tree is valid, and will not be accessed after this call.
+ unsafe { bindings::mtree_destroy(self.tree.get()) };
+ }
+}
+
+/// A reference to a [`MapleTree`] that owns the inner lock.
+///
+/// # Invariants
+///
+/// This guard owns the inner spinlock.
+#[must_use = "if unused, the lock will be immediately unlocked"]
+pub struct MapleGuard<'tree, T: ForeignOwnable>(&'tree MapleTree<T>);
+
+impl<'tree, T: ForeignOwnable> Drop for MapleGuard<'tree, T> {
+ #[inline]
+ fn drop(&mut self) {
+ // SAFETY: By the type invariants, we hold this spinlock.
+ unsafe { bindings::spin_unlock(self.0.ma_lock()) };
+ }
+}
+
+impl<'tree, T: ForeignOwnable> MapleGuard<'tree, T> {
+ /// Create a [`MaState`] protected by this lock guard.
+ pub fn ma_state(&mut self, first: usize, end: usize) -> MaState<'_, T> {
+ // SAFETY: The `MaState` borrows this `MapleGuard`, so it can also borrow the `MapleGuard`s
+ // read/write permissions to the maple tree.
+ unsafe { MaState::new_raw(self.0, first, end) }
+ }
+
+ /// Load the value at the given index.
+ ///
+ /// # Examples
+ ///
+ /// Read the value while holding the spinlock.
+ ///
+ /// ```
+ /// use kernel::maple_tree::MapleTree;
+ ///
+ /// let tree = KBox::pin_init(MapleTree::<KBox<i32>>::new(), GFP_KERNEL)?;
+ ///
+ /// let ten = KBox::new(10, GFP_KERNEL)?;
+ /// let twenty = KBox::new(20, GFP_KERNEL)?;
+ /// tree.insert(100, ten, GFP_KERNEL)?;
+ /// tree.insert(200, twenty, GFP_KERNEL)?;
+ ///
+ /// let mut lock = tree.lock();
+ /// assert_eq!(lock.load(100).map(|v| *v), Some(10));
+ /// assert_eq!(lock.load(200).map(|v| *v), Some(20));
+ /// assert_eq!(lock.load(300).map(|v| *v), None);
+ /// # Ok::<_, Error>(())
+ /// ```
+ ///
+ /// Increment refcount under the lock, to keep value alive afterwards.
+ ///
+ /// ```
+ /// use kernel::maple_tree::MapleTree;
+ /// use kernel::sync::Arc;
+ ///
+ /// let tree = KBox::pin_init(MapleTree::<Arc<i32>>::new(), GFP_KERNEL)?;
+ ///
+ /// let ten = Arc::new(10, GFP_KERNEL)?;
+ /// let twenty = Arc::new(20, GFP_KERNEL)?;
+ /// tree.insert(100, ten, GFP_KERNEL)?;
+ /// tree.insert(200, twenty, GFP_KERNEL)?;
+ ///
+ /// // Briefly take the lock to increment the refcount.
+ /// let value = tree.lock().load(100).map(Arc::from);
+ ///
+ /// // At this point, another thread might remove the value.
+ /// tree.erase(100);
+ ///
+ /// // But we can still access it because we took a refcount.
+ /// assert_eq!(value.map(|v| *v), Some(10));
+ /// # Ok::<_, Error>(())
+ /// ```
+ #[inline]
+ pub fn load(&mut self, index: usize) -> Option<T::BorrowedMut<'_>> {
+ // SAFETY: `self.tree` contains a valid maple tree.
+ let ret = unsafe { bindings::mtree_load(self.0.tree.get(), index) };
+ if ret.is_null() {
+ return None;
+ }
+
+ // SAFETY: If the pointer is not null, then it references a valid instance of `T`. It is
+ // safe to borrow the instance mutably because the signature of this function enforces that
+ // the mutable borrow is not used after the spinlock is dropped.
+ Some(unsafe { T::borrow_mut(ret) })
+ }
+}
+
+impl<T: ForeignOwnable> MapleTreeAlloc<T> {
+ /// Create a new allocation tree.
+ pub fn new() -> impl PinInit<Self> {
+ let tree = pin_init!(MapleTree {
+ // SAFETY: This initializes a maple tree into a pinned slot. The maple tree will be
+ // destroyed in Drop before the memory location becomes invalid.
+ tree <- Opaque::ffi_init(|slot| unsafe {
+ bindings::mt_init_flags(slot, bindings::MT_FLAGS_ALLOC_RANGE)
+ }),
+ _p: PhantomData,
+ });
+
+ pin_init!(MapleTreeAlloc { tree <- tree })
+ }
+
+ /// Insert an entry with the given size somewhere in the given range.
+ ///
+ /// The maple tree will search for a location in the given range where there is space to insert
+ /// the new range. If there is not enough available space, then an error will be returned.
+ ///
+ /// The index of the new range is returned.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::maple_tree::{MapleTreeAlloc, AllocErrorKind};
+ ///
+ /// let tree = KBox::pin_init(MapleTreeAlloc::<KBox<i32>>::new(), GFP_KERNEL)?;
+ ///
+ /// let ten = KBox::new(10, GFP_KERNEL)?;
+ /// let twenty = KBox::new(20, GFP_KERNEL)?;
+ /// let thirty = KBox::new(30, GFP_KERNEL)?;
+ /// let hundred = KBox::new(100, GFP_KERNEL)?;
+ ///
+ /// // Allocate three ranges.
+ /// let idx1 = tree.alloc_range(100, ten, ..1000, GFP_KERNEL)?;
+ /// let idx2 = tree.alloc_range(100, twenty, ..1000, GFP_KERNEL)?;
+ /// let idx3 = tree.alloc_range(100, thirty, ..1000, GFP_KERNEL)?;
+ ///
+ /// assert_eq!(idx1, 0);
+ /// assert_eq!(idx2, 100);
+ /// assert_eq!(idx3, 200);
+ ///
+ /// // This will fail because the remaining space is too small.
+ /// assert_eq!(
+ /// tree.alloc_range(800, hundred, ..1000, GFP_KERNEL).unwrap_err().cause,
+ /// AllocErrorKind::Busy,
+ /// );
+ /// # Ok::<_, Error>(())
+ /// ```
+ pub fn alloc_range<R>(
+ &self,
+ size: usize,
+ value: T,
+ range: R,
+ gfp: Flags,
+ ) -> Result<usize, AllocError<T>>
+ where
+ R: RangeBounds<usize>,
+ {
+ let Some((min, max)) = to_maple_range(range) else {
+ return Err(AllocError {
+ value,
+ cause: AllocErrorKind::InvalidRequest,
+ });
+ };
+
+ let ptr = T::into_foreign(value);
+ let mut index = 0;
+
+ // SAFETY: The tree is valid, and we are passing a pointer to an owned instance of `T`.
+ let res = to_result(unsafe {
+ bindings::mtree_alloc_range(
+ self.tree.tree.get(),
+ &mut index,
+ ptr,
+ size,
+ min,
+ max,
+ gfp.as_raw(),
+ )
+ });
+
+ if let Err(err) = res {
+ // SAFETY: As `mtree_alloc_range` failed, it is safe to take back ownership.
+ let value = unsafe { T::from_foreign(ptr) };
+
+ let cause = if err == ENOMEM {
+ AllocErrorKind::AllocError(kernel::alloc::AllocError)
+ } else if err == EBUSY {
+ AllocErrorKind::Busy
+ } else {
+ AllocErrorKind::InvalidRequest
+ };
+ Err(AllocError { value, cause })
+ } else {
+ Ok(index)
+ }
+ }
+}
+
+/// A helper type used for navigating a [`MapleTree`].
+///
+/// # Invariants
+///
+/// For the duration of `'tree`:
+///
+/// * The `ma_state` references a valid `MapleTree<T>`.
+/// * The `ma_state` has read/write access to the tree.
+pub struct MaState<'tree, T: ForeignOwnable> {
+ state: bindings::ma_state,
+ _phantom: PhantomData<&'tree mut MapleTree<T>>,
+}
+
+impl<'tree, T: ForeignOwnable> MaState<'tree, T> {
+ /// Initialize a new `MaState` with the given tree.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that this `MaState` has read/write access to the maple tree.
+ #[inline]
+ unsafe fn new_raw(mt: &'tree MapleTree<T>, first: usize, end: usize) -> Self {
+ // INVARIANT:
+ // * Having a reference ensures that the `MapleTree<T>` is valid for `'tree`.
+ // * The caller ensures that we have read/write access.
+ Self {
+ state: bindings::ma_state {
+ tree: mt.tree.get(),
+ index: first,
+ last: end,
+ node: ptr::null_mut(),
+ status: bindings::maple_status_ma_start,
+ min: 0,
+ max: usize::MAX,
+ alloc: ptr::null_mut(),
+ mas_flags: 0,
+ store_type: bindings::store_type_wr_invalid,
+ ..Default::default()
+ },
+ _phantom: PhantomData,
+ }
+ }
+
+ #[inline]
+ fn as_raw(&mut self) -> *mut bindings::ma_state {
+ &raw mut self.state
+ }
+
+ #[inline]
+ fn mas_find_raw(&mut self, max: usize) -> *mut c_void {
+ // SAFETY: By the type invariants, the `ma_state` is active and we have read/write access
+ // to the tree.
+ unsafe { bindings::mas_find(self.as_raw(), max) }
+ }
+
+ /// Find the next entry in the maple tree.
+ ///
+ /// # Examples
+ ///
+ /// Iterate the maple tree.
+ ///
+ /// ```
+ /// use kernel::maple_tree::MapleTree;
+ /// use kernel::sync::Arc;
+ ///
+ /// let tree = KBox::pin_init(MapleTree::<Arc<i32>>::new(), GFP_KERNEL)?;
+ ///
+ /// let ten = Arc::new(10, GFP_KERNEL)?;
+ /// let twenty = Arc::new(20, GFP_KERNEL)?;
+ /// tree.insert(100, ten, GFP_KERNEL)?;
+ /// tree.insert(200, twenty, GFP_KERNEL)?;
+ ///
+ /// let mut ma_lock = tree.lock();
+ /// let mut iter = ma_lock.ma_state(0, usize::MAX);
+ ///
+ /// assert_eq!(iter.find(usize::MAX).map(|v| *v), Some(10));
+ /// assert_eq!(iter.find(usize::MAX).map(|v| *v), Some(20));
+ /// assert!(iter.find(usize::MAX).is_none());
+ /// # Ok::<_, Error>(())
+ /// ```
+ #[inline]
+ pub fn find(&mut self, max: usize) -> Option<T::BorrowedMut<'_>> {
+ let ret = self.mas_find_raw(max);
+ if ret.is_null() {
+ return None;
+ }
+
+ // SAFETY: If the pointer is not null, then it references a valid instance of `T`. It's
+ // safe to access it mutably as the returned reference borrows this `MaState`, and the
+ // `MaState` has read/write access to the maple tree.
+ Some(unsafe { T::borrow_mut(ret) })
+ }
+}
+
+/// Error type for failure to insert a new value.
+pub struct InsertError<T> {
+ /// The value that could not be inserted.
+ pub value: T,
+ /// The reason for the failure to insert.
+ pub cause: InsertErrorKind,
+}
+
+/// The reason for the failure to insert.
+#[derive(PartialEq, Eq, Copy, Clone, Debug)]
+pub enum InsertErrorKind {
+ /// There is already a value in the requested range.
+ Occupied,
+ /// Failure to allocate memory.
+ AllocError(kernel::alloc::AllocError),
+ /// The insertion request was invalid.
+ InvalidRequest,
+}
+
+impl From<InsertErrorKind> for Error {
+ #[inline]
+ fn from(kind: InsertErrorKind) -> Error {
+ match kind {
+ InsertErrorKind::Occupied => EEXIST,
+ InsertErrorKind::AllocError(kernel::alloc::AllocError) => ENOMEM,
+ InsertErrorKind::InvalidRequest => EINVAL,
+ }
+ }
+}
+
+impl<T> From<InsertError<T>> for Error {
+ #[inline]
+ fn from(insert_err: InsertError<T>) -> Error {
+ Error::from(insert_err.cause)
+ }
+}
+
+/// Error type for failure to insert a new value.
+pub struct AllocError<T> {
+ /// The value that could not be inserted.
+ pub value: T,
+ /// The reason for the failure to insert.
+ pub cause: AllocErrorKind,
+}
+
+/// The reason for the failure to insert.
+#[derive(PartialEq, Eq, Copy, Clone)]
+pub enum AllocErrorKind {
+ /// There is not enough space for the requested allocation.
+ Busy,
+ /// Failure to allocate memory.
+ AllocError(kernel::alloc::AllocError),
+ /// The insertion request was invalid.
+ InvalidRequest,
+}
+
+impl From<AllocErrorKind> for Error {
+ #[inline]
+ fn from(kind: AllocErrorKind) -> Error {
+ match kind {
+ AllocErrorKind::Busy => EBUSY,
+ AllocErrorKind::AllocError(kernel::alloc::AllocError) => ENOMEM,
+ AllocErrorKind::InvalidRequest => EINVAL,
+ }
+ }
+}
+
+impl<T> From<AllocError<T>> for Error {
+ #[inline]
+ fn from(insert_err: AllocError<T>) -> Error {
+ Error::from(insert_err.cause)
+ }
+}
diff --git a/rust/kernel/mm.rs b/rust/kernel/mm.rs
index 43f525c0d16c..4764d7b68f2a 100644
--- a/rust/kernel/mm.rs
+++ b/rust/kernel/mm.rs
@@ -13,7 +13,8 @@
use crate::{
bindings,
- types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque},
+ sync::aref::{ARef, AlwaysRefCounted},
+ types::{NotThreadSafe, Opaque},
};
use core::{ops::Deref, ptr::NonNull};
diff --git a/rust/kernel/mm/mmput_async.rs b/rust/kernel/mm/mmput_async.rs
index 9289e05f7a67..b8d2f051225c 100644
--- a/rust/kernel/mm/mmput_async.rs
+++ b/rust/kernel/mm/mmput_async.rs
@@ -10,7 +10,7 @@
use crate::{
bindings,
mm::MmWithUser,
- types::{ARef, AlwaysRefCounted},
+ sync::aref::{ARef, AlwaysRefCounted},
};
use core::{ops::Deref, ptr::NonNull};
diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs
index be1027b7961b..bf6272d87a7b 100644
--- a/rust/kernel/net/phy.rs
+++ b/rust/kernel/net/phy.rs
@@ -196,11 +196,8 @@ impl Device {
// SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`.
// So it's just an FFI call.
let ret = unsafe { bindings::phy_read_paged(phydev, page.into(), regnum.into()) };
- if ret < 0 {
- Err(Error::from_errno(ret))
- } else {
- Ok(ret as u16)
- }
+
+ to_result(ret).map(|()| ret as u16)
}
/// Resolves the advertisements into PHY settings.
diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs
index 7c1b17246ed5..75ef096075cb 100644
--- a/rust/kernel/page.rs
+++ b/rust/kernel/page.rs
@@ -9,7 +9,12 @@ use crate::{
error::Result,
uaccess::UserSliceReader,
};
-use core::ptr::{self, NonNull};
+use core::{
+ marker::PhantomData,
+ mem::ManuallyDrop,
+ ops::Deref,
+ ptr::{self, NonNull},
+};
/// A bitwise shift for the page size.
pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize;
@@ -30,6 +35,86 @@ pub const fn page_align(addr: usize) -> usize {
(addr + (PAGE_SIZE - 1)) & PAGE_MASK
}
+/// Representation of a non-owning reference to a [`Page`].
+///
+/// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a
+/// [`Vmalloc`] allocation such as [`VBox`].
+///
+/// # Example
+///
+/// ```
+/// # use kernel::{bindings, prelude::*};
+/// use kernel::page::{BorrowedPage, Page, PAGE_SIZE};
+/// # use core::{mem::MaybeUninit, ptr, ptr::NonNull };
+///
+/// fn borrow_page<'a>(vbox: &'a mut VBox<MaybeUninit<[u8; PAGE_SIZE]>>) -> BorrowedPage<'a> {
+/// let ptr = ptr::from_ref(&**vbox);
+///
+/// // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory.
+/// let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) };
+///
+/// // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid
+/// // pointer to `Vmalloc` memory.
+/// let page = unsafe { NonNull::new_unchecked(page) };
+///
+/// // SAFETY:
+/// // - `self.0` is a valid pointer to a `struct page`.
+/// // - `self.0` is valid for the entire lifetime of `self`.
+/// unsafe { BorrowedPage::from_raw(page) }
+/// }
+///
+/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?;
+/// let page = borrow_page(&mut vbox);
+///
+/// // SAFETY: There is no concurrent read or write to this page.
+/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? };
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// # Invariants
+///
+/// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`.
+///
+/// [`VBox`]: kernel::alloc::VBox
+/// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc
+pub struct BorrowedPage<'a>(ManuallyDrop<Page>, PhantomData<&'a Page>);
+
+impl<'a> BorrowedPage<'a> {
+ /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`.
+ ///
+ /// # Safety
+ ///
+ /// - `ptr` must point to a valid `bindings::page`.
+ /// - `ptr` must remain valid for the entire lifetime `'a`.
+ pub unsafe fn from_raw(ptr: NonNull<bindings::page>) -> Self {
+ let page = Page { page: ptr };
+
+ // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime
+ // `'a`.
+ Self(ManuallyDrop::new(page), PhantomData)
+ }
+}
+
+impl<'a> Deref for BorrowedPage<'a> {
+ type Target = Page;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+/// Trait to be implemented by types which provide an [`Iterator`] implementation of
+/// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
+pub trait AsPageIter {
+ /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter).
+ type Iter<'a>: Iterator<Item = BorrowedPage<'a>>
+ where
+ Self: 'a;
+
+ /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`.
+ fn page_iter(&mut self) -> Self::Iter<'_>;
+}
+
/// A pointer to a page that owns the page allocation.
///
/// # Invariants
diff --git a/rust/kernel/scatterlist.rs b/rust/kernel/scatterlist.rs
new file mode 100644
index 000000000000..9709dff60b5a
--- /dev/null
+++ b/rust/kernel/scatterlist.rs
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Abstractions for scatter-gather lists.
+//!
+//! C header: [`include/linux/scatterlist.h`](srctree/include/linux/scatterlist.h)
+//!
+//! Scatter-gather (SG) I/O is a memory access technique that allows devices to perform DMA
+//! operations on data buffers that are not physically contiguous in memory. It works by creating a
+//! "scatter-gather list", an array where each entry specifies the address and length of a
+//! physically contiguous memory segment.
+//!
+//! The device's DMA controller can then read this list and process the segments sequentially as
+//! part of one logical I/O request. This avoids the need for a single, large, physically contiguous
+//! memory buffer, which can be difficult or impossible to allocate.
+//!
+//! This module provides safe Rust abstractions over the kernel's `struct scatterlist` and
+//! `struct sg_table` types.
+//!
+//! The main entry point is the [`SGTable`] type, which represents a complete scatter-gather table.
+//! It can be either:
+//!
+//! - An owned table ([`SGTable<Owned<P>>`]), created from a Rust memory buffer (e.g., [`VVec`]).
+//! This type manages the allocation of the `struct sg_table`, the DMA mapping of the buffer, and
+//! the automatic cleanup of all resources.
+//! - A borrowed reference (&[`SGTable`]), which provides safe, read-only access to a table that was
+//! allocated by other (e.g., C) code.
+//!
+//! Individual entries in the table are represented by [`SGEntry`], which can be accessed by
+//! iterating over an [`SGTable`].
+
+use crate::{
+ alloc,
+ alloc::allocator::VmallocPageIter,
+ bindings,
+ device::{Bound, Device},
+ devres::Devres,
+ dma, error,
+ io::resource::ResourceSize,
+ page,
+ prelude::*,
+ types::{ARef, Opaque},
+};
+use core::{ops::Deref, ptr::NonNull};
+
+/// A single entry in a scatter-gather list.
+///
+/// An `SGEntry` represents a single, physically contiguous segment of memory that has been mapped
+/// for DMA.
+///
+/// Instances of this struct are obtained by iterating over an [`SGTable`]. Drivers do not create
+/// or own [`SGEntry`] objects directly.
+#[repr(transparent)]
+pub struct SGEntry(Opaque<bindings::scatterlist>);
+
+// SAFETY: `SGEntry` can be sent to any task.
+unsafe impl Send for SGEntry {}
+
+// SAFETY: `SGEntry` has no interior mutability and can be accessed concurrently.
+unsafe impl Sync for SGEntry {}
+
+impl SGEntry {
+ /// Convert a raw `struct scatterlist *` to a `&'a SGEntry`.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that the `struct scatterlist` pointed to by `ptr` is valid for the
+ /// lifetime `'a`.
+ #[inline]
+ unsafe fn from_raw<'a>(ptr: *mut bindings::scatterlist) -> &'a Self {
+ // SAFETY: The safety requirements of this function guarantee that `ptr` is a valid pointer
+ // to a `struct scatterlist` for the duration of `'a`.
+ unsafe { &*ptr.cast() }
+ }
+
+ /// Obtain the raw `struct scatterlist *`.
+ #[inline]
+ fn as_raw(&self) -> *mut bindings::scatterlist {
+ self.0.get()
+ }
+
+ /// Returns the DMA address of this SG entry.
+ ///
+ /// This is the address that the device should use to access the memory segment.
+ #[inline]
+ pub fn dma_address(&self) -> dma::DmaAddress {
+ // SAFETY: `self.as_raw()` is a valid pointer to a `struct scatterlist`.
+ unsafe { bindings::sg_dma_address(self.as_raw()) }
+ }
+
+ /// Returns the length of this SG entry in bytes.
+ #[inline]
+ pub fn dma_len(&self) -> ResourceSize {
+ #[allow(clippy::useless_conversion)]
+ // SAFETY: `self.as_raw()` is a valid pointer to a `struct scatterlist`.
+ unsafe { bindings::sg_dma_len(self.as_raw()) }.into()
+ }
+}
+
+/// The borrowed generic type of an [`SGTable`], representing a borrowed or externally managed
+/// table.
+#[repr(transparent)]
+pub struct Borrowed(Opaque<bindings::sg_table>);
+
+// SAFETY: `Borrowed` can be sent to any task.
+unsafe impl Send for Borrowed {}
+
+// SAFETY: `Borrowed` has no interior mutability and can be accessed concurrently.
+unsafe impl Sync for Borrowed {}
+
+/// A scatter-gather table.
+///
+/// This struct is a wrapper around the kernel's `struct sg_table`. It manages a list of DMA-mapped
+/// memory segments that can be passed to a device for I/O operations.
+///
+/// The generic parameter `T` is used as a generic type to distinguish between owned and borrowed
+/// tables.
+///
+/// - [`SGTable<Owned>`]: An owned table created and managed entirely by Rust code. It handles
+/// allocation, DMA mapping, and cleanup of all associated resources. See [`SGTable::new`].
+/// - [`SGTable<Borrowed>`} (or simply [`SGTable`]): Represents a table whose lifetime is managed
+/// externally. It can be used safely via a borrowed reference `&'a SGTable`, where `'a` is the
+/// external lifetime.
+///
+/// All [`SGTable`] variants can be iterated over the individual [`SGEntry`]s.
+#[repr(transparent)]
+#[pin_data]
+pub struct SGTable<T: private::Sealed = Borrowed> {
+ #[pin]
+ inner: T,
+}
+
+impl SGTable {
+ /// Creates a borrowed `&'a SGTable` from a raw `struct sg_table` pointer.
+ ///
+ /// This allows safe access to an `sg_table` that is managed elsewhere (for example, in C code).
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that:
+ ///
+ /// - the `struct sg_table` pointed to by `ptr` is valid for the entire lifetime of `'a`,
+ /// - the data behind `ptr` is not modified concurrently for the duration of `'a`.
+ #[inline]
+ pub unsafe fn from_raw<'a>(ptr: *mut bindings::sg_table) -> &'a Self {
+ // SAFETY: The safety requirements of this function guarantee that `ptr` is a valid pointer
+ // to a `struct sg_table` for the duration of `'a`.
+ unsafe { &*ptr.cast() }
+ }
+
+ #[inline]
+ fn as_raw(&self) -> *mut bindings::sg_table {
+ self.inner.0.get()
+ }
+
+ /// Returns an [`SGTableIter`] bound to the lifetime of `self`.
+ pub fn iter(&self) -> SGTableIter<'_> {
+ // SAFETY: `self.as_raw()` is a valid pointer to a `struct sg_table`.
+ let nents = unsafe { (*self.as_raw()).nents };
+
+ let pos = if nents > 0 {
+ // SAFETY: `self.as_raw()` is a valid pointer to a `struct sg_table`.
+ let ptr = unsafe { (*self.as_raw()).sgl };
+
+ // SAFETY: `ptr` is guaranteed to be a valid pointer to a `struct scatterlist`.
+ Some(unsafe { SGEntry::from_raw(ptr) })
+ } else {
+ None
+ };
+
+ SGTableIter { pos, nents }
+ }
+}
+
+/// Represents the DMA mapping state of a `struct sg_table`.
+///
+/// This is used as an inner type of [`Owned`] to manage the DMA mapping lifecycle.
+///
+/// # Invariants
+///
+/// - `sgt` is a valid pointer to a `struct sg_table` for the entire lifetime of the
+/// [`DmaMappedSgt`].
+/// - `sgt` is always DMA mapped.
+struct DmaMappedSgt {
+ sgt: NonNull<bindings::sg_table>,
+ dev: ARef<Device>,
+ dir: dma::DataDirection,
+}
+
+// SAFETY: `DmaMappedSgt` can be sent to any task.
+unsafe impl Send for DmaMappedSgt {}
+
+// SAFETY: `DmaMappedSgt` has no interior mutability and can be accessed concurrently.
+unsafe impl Sync for DmaMappedSgt {}
+
+impl DmaMappedSgt {
+ /// # Safety
+ ///
+ /// - `sgt` must be a valid pointer to a `struct sg_table` for the entire lifetime of the
+ /// returned [`DmaMappedSgt`].
+ /// - The caller must guarantee that `sgt` remains DMA mapped for the entire lifetime of
+ /// [`DmaMappedSgt`].
+ unsafe fn new(
+ sgt: NonNull<bindings::sg_table>,
+ dev: &Device<Bound>,
+ dir: dma::DataDirection,
+ ) -> Result<Self> {
+ // SAFETY:
+ // - `dev.as_raw()` is a valid pointer to a `struct device`, which is guaranteed to be
+ // bound to a driver for the duration of this call.
+ // - `sgt` is a valid pointer to a `struct sg_table`.
+ error::to_result(unsafe {
+ bindings::dma_map_sgtable(dev.as_raw(), sgt.as_ptr(), dir.into(), 0)
+ })?;
+
+ // INVARIANT: By the safety requirements of this function it is guaranteed that `sgt` is
+ // valid for the entire lifetime of this object instance.
+ Ok(Self {
+ sgt,
+ dev: dev.into(),
+ dir,
+ })
+ }
+}
+
+impl Drop for DmaMappedSgt {
+ #[inline]
+ fn drop(&mut self) {
+ // SAFETY:
+ // - `self.dev.as_raw()` is a pointer to a valid `struct device`.
+ // - `self.dev` is the same device the mapping has been created for in `Self::new()`.
+ // - `self.sgt.as_ptr()` is a valid pointer to a `struct sg_table` by the type invariants
+ // of `Self`.
+ // - `self.dir` is the same `dma::DataDirection` the mapping has been created with in
+ // `Self::new()`.
+ unsafe {
+ bindings::dma_unmap_sgtable(self.dev.as_raw(), self.sgt.as_ptr(), self.dir.into(), 0)
+ };
+ }
+}
+
+/// A transparent wrapper around a `struct sg_table`.
+///
+/// While we could also create the `struct sg_table` in the constructor of [`Owned`], we can't tear
+/// down the `struct sg_table` in [`Owned::drop`]; the drop order in [`Owned`] matters.
+#[repr(transparent)]
+struct RawSGTable(Opaque<bindings::sg_table>);
+
+// SAFETY: `RawSGTable` can be sent to any task.
+unsafe impl Send for RawSGTable {}
+
+// SAFETY: `RawSGTable` has no interior mutability and can be accessed concurrently.
+unsafe impl Sync for RawSGTable {}
+
+impl RawSGTable {
+ /// # Safety
+ ///
+ /// - `pages` must be a slice of valid `struct page *`.
+ /// - The pages pointed to by `pages` must remain valid for the entire lifetime of the returned
+ /// [`RawSGTable`].
+ unsafe fn new(
+ pages: &mut [*mut bindings::page],
+ size: usize,
+ max_segment: u32,
+ flags: alloc::Flags,
+ ) -> Result<Self> {
+ // `sg_alloc_table_from_pages_segment()` expects at least one page, otherwise it
+ // produces a NPE.
+ if pages.is_empty() {
+ return Err(EINVAL);
+ }
+
+ let sgt = Opaque::zeroed();
+ // SAFETY:
+ // - `sgt.get()` is a valid pointer to uninitialized memory.
+ // - As by the check above, `pages` is not empty.
+ error::to_result(unsafe {
+ bindings::sg_alloc_table_from_pages_segment(
+ sgt.get(),
+ pages.as_mut_ptr(),
+ pages.len().try_into()?,
+ 0,
+ size,
+ max_segment,
+ flags.as_raw(),
+ )
+ })?;
+
+ Ok(Self(sgt))
+ }
+
+ #[inline]
+ fn as_raw(&self) -> *mut bindings::sg_table {
+ self.0.get()
+ }
+}
+
+impl Drop for RawSGTable {
+ #[inline]
+ fn drop(&mut self) {
+ // SAFETY: `sgt` is a valid and initialized `struct sg_table`.
+ unsafe { bindings::sg_free_table(self.0.get()) };
+ }
+}
+
+/// The [`Owned`] generic type of an [`SGTable`].
+///
+/// A [`SGTable<Owned>`] signifies that the [`SGTable`] owns all associated resources:
+///
+/// - The backing memory pages.
+/// - The `struct sg_table` allocation (`sgt`).
+/// - The DMA mapping, managed through a [`Devres`]-managed `DmaMappedSgt`.
+///
+/// Users interact with this type through the [`SGTable`] handle and do not need to manage
+/// [`Owned`] directly.
+#[pin_data]
+pub struct Owned<P> {
+ // Note: The drop order is relevant; we first have to unmap the `struct sg_table`, then free the
+ // `struct sg_table` and finally free the backing pages.
+ #[pin]
+ dma: Devres<DmaMappedSgt>,
+ sgt: RawSGTable,
+ _pages: P,
+}
+
+// SAFETY: `Owned` can be sent to any task if `P` can be send to any task.
+unsafe impl<P: Send> Send for Owned<P> {}
+
+// SAFETY: `Owned` has no interior mutability and can be accessed concurrently if `P` can be
+// accessed concurrently.
+unsafe impl<P: Sync> Sync for Owned<P> {}
+
+impl<P> Owned<P>
+where
+ for<'a> P: page::AsPageIter<Iter<'a> = VmallocPageIter<'a>> + 'static,
+{
+ fn new(
+ dev: &Device<Bound>,
+ mut pages: P,
+ dir: dma::DataDirection,
+ flags: alloc::Flags,
+ ) -> Result<impl PinInit<Self, Error> + '_> {
+ let page_iter = pages.page_iter();
+ let size = page_iter.size();
+
+ let mut page_vec: KVec<*mut bindings::page> =
+ KVec::with_capacity(page_iter.page_count(), flags)?;
+
+ for page in page_iter {
+ page_vec.push(page.as_ptr(), flags)?;
+ }
+
+ // `dma_max_mapping_size` returns `size_t`, but `sg_alloc_table_from_pages_segment()` takes
+ // an `unsigned int`.
+ //
+ // SAFETY: `dev.as_raw()` is a valid pointer to a `struct device`.
+ let max_segment = match unsafe { bindings::dma_max_mapping_size(dev.as_raw()) } {
+ 0 => u32::MAX,
+ max_segment => u32::try_from(max_segment).unwrap_or(u32::MAX),
+ };
+
+ Ok(try_pin_init!(&this in Self {
+ // SAFETY:
+ // - `page_vec` is a `KVec` of valid `struct page *` obtained from `pages`.
+ // - The pages contained in `pages` remain valid for the entire lifetime of the
+ // `RawSGTable`.
+ sgt: unsafe { RawSGTable::new(&mut page_vec, size, max_segment, flags) }?,
+ dma <- {
+ // SAFETY: `this` is a valid pointer to uninitialized memory.
+ let sgt = unsafe { &raw mut (*this.as_ptr()).sgt }.cast();
+
+ // SAFETY: `sgt` is guaranteed to be non-null.
+ let sgt = unsafe { NonNull::new_unchecked(sgt) };
+
+ // SAFETY:
+ // - It is guaranteed that the object returned by `DmaMappedSgt::new` won't out-live
+ // `sgt`.
+ // - `sgt` is never DMA unmapped manually.
+ Devres::new(dev, unsafe { DmaMappedSgt::new(sgt, dev, dir) })
+ },
+ _pages: pages,
+ }))
+ }
+}
+
+impl<P> SGTable<Owned<P>>
+where
+ for<'a> P: page::AsPageIter<Iter<'a> = VmallocPageIter<'a>> + 'static,
+{
+ /// Allocates a new scatter-gather table from the given pages and maps it for DMA.
+ ///
+ /// This constructor creates a new [`SGTable<Owned>`] that takes ownership of `P`.
+ /// It allocates a `struct sg_table`, populates it with entries corresponding to the physical
+ /// pages of `P`, and maps the table for DMA with the specified [`Device`] and
+ /// [`dma::DataDirection`].
+ ///
+ /// The DMA mapping is managed through [`Devres`], ensuring that the DMA mapping is unmapped
+ /// once the associated [`Device`] is unbound, or when the [`SGTable<Owned>`] is dropped.
+ ///
+ /// # Parameters
+ ///
+ /// * `dev`: The [`Device`] that will be performing the DMA.
+ /// * `pages`: The entity providing the backing pages. It must implement [`page::AsPageIter`].
+ /// The ownership of this entity is moved into the new [`SGTable<Owned>`].
+ /// * `dir`: The [`dma::DataDirection`] of the DMA transfer.
+ /// * `flags`: Allocation flags for internal allocations (e.g., [`GFP_KERNEL`]).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::{
+ /// device::{Bound, Device},
+ /// dma, page,
+ /// prelude::*,
+ /// scatterlist::{SGTable, Owned},
+ /// };
+ ///
+ /// fn test(dev: &Device<Bound>) -> Result {
+ /// let size = 4 * page::PAGE_SIZE;
+ /// let pages = VVec::<u8>::with_capacity(size, GFP_KERNEL)?;
+ ///
+ /// let sgt = KBox::pin_init(SGTable::new(
+ /// dev,
+ /// pages,
+ /// dma::DataDirection::ToDevice,
+ /// GFP_KERNEL,
+ /// ), GFP_KERNEL)?;
+ ///
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn new(
+ dev: &Device<Bound>,
+ pages: P,
+ dir: dma::DataDirection,
+ flags: alloc::Flags,
+ ) -> impl PinInit<Self, Error> + '_ {
+ try_pin_init!(Self {
+ inner <- Owned::new(dev, pages, dir, flags)?
+ })
+ }
+}
+
+impl<P> Deref for SGTable<Owned<P>> {
+ type Target = SGTable;
+
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ // SAFETY:
+ // - `self.inner.sgt.as_raw()` is a valid pointer to a `struct sg_table` for the entire
+ // lifetime of `self`.
+ // - The backing `struct sg_table` is not modified for the entire lifetime of `self`.
+ unsafe { SGTable::from_raw(self.inner.sgt.as_raw()) }
+ }
+}
+
+mod private {
+ pub trait Sealed {}
+
+ impl Sealed for super::Borrowed {}
+ impl<P> Sealed for super::Owned<P> {}
+}
+
+/// An [`Iterator`] over the DMA mapped [`SGEntry`] items of an [`SGTable`].
+///
+/// Note that the existence of an [`SGTableIter`] does not guarantee that the [`SGEntry`] items
+/// actually remain DMA mapped; they are prone to be unmapped on device unbind.
+pub struct SGTableIter<'a> {
+ pos: Option<&'a SGEntry>,
+ /// The number of DMA mapped entries in a `struct sg_table`.
+ nents: c_uint,
+}
+
+impl<'a> Iterator for SGTableIter<'a> {
+ type Item = &'a SGEntry;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let entry = self.pos?;
+ self.nents = self.nents.saturating_sub(1);
+
+ // SAFETY: `entry.as_raw()` is a valid pointer to a `struct scatterlist`.
+ let next = unsafe { bindings::sg_next(entry.as_raw()) };
+
+ self.pos = (!next.is_null() && self.nents > 0).then(|| {
+ // SAFETY: If `next` is not NULL, `sg_next()` guarantees to return a valid pointer to
+ // the next `struct scatterlist`.
+ unsafe { SGEntry::from_raw(next) }
+ });
+
+ Some(entry)
+ }
+}
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index 6c892550c0ba..5c74e5f77601 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -2,11 +2,16 @@
//! String representations.
-use crate::alloc::{flags::*, AllocError, KVec};
-use crate::fmt::{self, Write};
-use core::ops::{self, Deref, DerefMut, Index};
-
-use crate::prelude::*;
+use crate::{
+ alloc::{flags::*, AllocError, KVec},
+ error::{to_result, Result},
+ fmt::{self, Write},
+ prelude::*,
+};
+use core::{
+ marker::PhantomData,
+ ops::{self, Deref, DerefMut, Index},
+};
/// Byte string without UTF-8 validity guarantee.
#[repr(transparent)]
@@ -732,7 +737,7 @@ mod tests {
///
/// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos`
/// is less than `end`.
-pub(crate) struct RawFormatter {
+pub struct RawFormatter {
// Use `usize` to use `saturating_*` functions.
beg: usize,
pos: usize,
@@ -790,7 +795,7 @@ impl RawFormatter {
}
/// Returns the number of bytes written to the formatter.
- pub(crate) fn bytes_written(&self) -> usize {
+ pub fn bytes_written(&self) -> usize {
self.pos - self.beg
}
}
@@ -824,9 +829,9 @@ impl fmt::Write for RawFormatter {
/// Allows formatting of [`fmt::Arguments`] into a raw buffer.
///
/// Fails if callers attempt to write more than will fit in the buffer.
-pub(crate) struct Formatter(RawFormatter);
+pub struct Formatter<'a>(RawFormatter, PhantomData<&'a mut ()>);
-impl Formatter {
+impl Formatter<'_> {
/// Creates a new instance of [`Formatter`] with the given buffer.
///
/// # Safety
@@ -835,11 +840,18 @@ impl Formatter {
/// for the lifetime of the returned [`Formatter`].
pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self {
// SAFETY: The safety requirements of this function satisfy those of the callee.
- Self(unsafe { RawFormatter::from_buffer(buf, len) })
+ Self(unsafe { RawFormatter::from_buffer(buf, len) }, PhantomData)
+ }
+
+ /// Create a new [`Self`] instance.
+ pub fn new(buffer: &mut [u8]) -> Self {
+ // SAFETY: `buffer` is valid for writes for the entire length for
+ // the lifetime of `Self`.
+ unsafe { Formatter::from_buffer(buffer.as_mut_ptr(), buffer.len()) }
}
}
-impl Deref for Formatter {
+impl Deref for Formatter<'_> {
type Target = RawFormatter;
fn deref(&self) -> &Self::Target {
@@ -847,7 +859,7 @@ impl Deref for Formatter {
}
}
-impl fmt::Write for Formatter {
+impl fmt::Write for Formatter<'_> {
fn write_str(&mut self, s: &str) -> fmt::Result {
self.0.write_str(s)?;
@@ -860,6 +872,132 @@ impl fmt::Write for Formatter {
}
}
+/// A mutable reference to a byte buffer where a string can be written into.
+///
+/// The buffer will be automatically null terminated after the last written character.
+///
+/// # Invariants
+///
+/// * The first byte of `buffer` is always zero.
+/// * The length of `buffer` is at least 1.
+pub(crate) struct NullTerminatedFormatter<'a> {
+ buffer: &'a mut [u8],
+}
+
+impl<'a> NullTerminatedFormatter<'a> {
+ /// Create a new [`Self`] instance.
+ pub(crate) fn new(buffer: &'a mut [u8]) -> Option<NullTerminatedFormatter<'a>> {
+ *(buffer.first_mut()?) = 0;
+
+ // INVARIANT:
+ // - We wrote zero to the first byte above.
+ // - If buffer was not at least length 1, `buffer.first_mut()` would return None.
+ Some(Self { buffer })
+ }
+}
+
+impl Write for NullTerminatedFormatter<'_> {
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ let bytes = s.as_bytes();
+ let len = bytes.len();
+
+ // We want space for a zero. By type invariant, buffer length is always at least 1, so no
+ // underflow.
+ if len > self.buffer.len() - 1 {
+ return Err(fmt::Error);
+ }
+
+ let buffer = core::mem::take(&mut self.buffer);
+ // We break the zero start invariant for a short while.
+ buffer[..len].copy_from_slice(bytes);
+ // INVARIANT: We checked above that buffer will have size at least 1 after this assignment.
+ self.buffer = &mut buffer[len..];
+
+ // INVARIANT: We write zero to the first byte of the buffer.
+ self.buffer[0] = 0;
+
+ Ok(())
+ }
+}
+
+/// # Safety
+///
+/// - `string` must point to a null terminated string that is valid for read.
+unsafe fn kstrtobool_raw(string: *const u8) -> Result<bool> {
+ let mut result: bool = false;
+
+ // SAFETY:
+ // - By function safety requirement, `string` is a valid null-terminated string.
+ // - `result` is a valid `bool` that we own.
+ to_result(unsafe { bindings::kstrtobool(string, &mut result) })?;
+ Ok(result)
+}
+
+/// Convert common user inputs into boolean values using the kernel's `kstrtobool` function.
+///
+/// This routine returns `Ok(bool)` if the first character is one of 'YyTt1NnFf0', or
+/// \[oO\]\[NnFf\] for "on" and "off". Otherwise it will return `Err(EINVAL)`.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::{c_str, str::kstrtobool};
+///
+/// // Lowercase
+/// assert_eq!(kstrtobool(c_str!("true")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("tr")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("t")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("twrong")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("false")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("f")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("yes")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("no")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("on")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("off")), Ok(false));
+///
+/// // Camel case
+/// assert_eq!(kstrtobool(c_str!("True")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("False")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("Yes")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("No")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("On")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("Off")), Ok(false));
+///
+/// // All caps
+/// assert_eq!(kstrtobool(c_str!("TRUE")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("FALSE")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("YES")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("NO")), Ok(false));
+/// assert_eq!(kstrtobool(c_str!("ON")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("OFF")), Ok(false));
+///
+/// // Numeric
+/// assert_eq!(kstrtobool(c_str!("1")), Ok(true));
+/// assert_eq!(kstrtobool(c_str!("0")), Ok(false));
+///
+/// // Invalid input
+/// assert_eq!(kstrtobool(c_str!("invalid")), Err(EINVAL));
+/// assert_eq!(kstrtobool(c_str!("2")), Err(EINVAL));
+/// ```
+pub fn kstrtobool(string: &CStr) -> Result<bool> {
+ // SAFETY:
+ // - The pointer returned by `CStr::as_char_ptr` is guaranteed to be
+ // null terminated.
+ // - `string` is live and thus the string is valid for read.
+ unsafe { kstrtobool_raw(string.as_char_ptr()) }
+}
+
+/// Convert `&[u8]` to `bool` by deferring to [`kernel::str::kstrtobool`].
+///
+/// Only considers at most the first two bytes of `bytes`.
+pub fn kstrtobool_bytes(bytes: &[u8]) -> Result<bool> {
+ // `ktostrbool` only considers the first two bytes of the input.
+ let stack_string = [*bytes.first().unwrap_or(&0), *bytes.get(1).unwrap_or(&0), 0];
+ // SAFETY: `stack_string` is null terminated and it is live on the stack so
+ // it is valid for read.
+ unsafe { kstrtobool_raw(stack_string.as_ptr()) }
+}
+
/// An owned string that is guaranteed to have exactly one `NUL` byte, which is at the end.
///
/// Used for interoperability with kernel APIs that take C strings.
diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs
index 1c7d43771a37..cfc37d81adf2 100644
--- a/rust/kernel/transmute.rs
+++ b/rust/kernel/transmute.rs
@@ -2,6 +2,8 @@
//! Traits for transmuting types.
+use core::mem::size_of;
+
/// Types for which any bit pattern is valid.
///
/// Not all types are valid for all values. For example, a `bool` must be either zero or one, so
@@ -9,10 +11,93 @@
///
/// It's okay for the type to have padding, as initializing those bytes has no effect.
///
+/// # Examples
+///
+/// ```
+/// use kernel::transmute::FromBytes;
+///
+/// # fn test() -> Option<()> {
+/// let raw = [1, 2, 3, 4];
+///
+/// let result = u32::from_bytes(&raw)?;
+///
+/// #[cfg(target_endian = "little")]
+/// assert_eq!(*result, 0x4030201);
+///
+/// #[cfg(target_endian = "big")]
+/// assert_eq!(*result, 0x1020304);
+///
+/// # Some(()) }
+/// # test().ok_or(EINVAL)?;
+/// # Ok::<(), Error>(())
+/// ```
+///
/// # Safety
///
/// All bit-patterns must be valid for this type. This type must not have interior mutability.
-pub unsafe trait FromBytes {}
+pub unsafe trait FromBytes {
+ /// Converts a slice of bytes to a reference to `Self`.
+ ///
+ /// Succeeds if the reference is properly aligned, and the size of `bytes` is equal to that of
+ /// `T` and different from zero.
+ ///
+ /// Otherwise, returns [`None`].
+ fn from_bytes(bytes: &[u8]) -> Option<&Self>
+ where
+ Self: Sized,
+ {
+ let slice_ptr = bytes.as_ptr().cast::<Self>();
+ let size = size_of::<Self>();
+
+ #[allow(clippy::incompatible_msrv)]
+ if bytes.len() == size && slice_ptr.is_aligned() {
+ // SAFETY: Size and alignment were just checked.
+ unsafe { Some(&*slice_ptr) }
+ } else {
+ None
+ }
+ }
+
+ /// Converts a mutable slice of bytes to a reference to `Self`.
+ ///
+ /// Succeeds if the reference is properly aligned, and the size of `bytes` is equal to that of
+ /// `T` and different from zero.
+ ///
+ /// Otherwise, returns [`None`].
+ fn from_bytes_mut(bytes: &mut [u8]) -> Option<&mut Self>
+ where
+ Self: AsBytes + Sized,
+ {
+ let slice_ptr = bytes.as_mut_ptr().cast::<Self>();
+ let size = size_of::<Self>();
+
+ #[allow(clippy::incompatible_msrv)]
+ if bytes.len() == size && slice_ptr.is_aligned() {
+ // SAFETY: Size and alignment were just checked.
+ unsafe { Some(&mut *slice_ptr) }
+ } else {
+ None
+ }
+ }
+
+ /// Creates an owned instance of `Self` by copying `bytes`.
+ ///
+ /// Unlike [`FromBytes::from_bytes`], which requires aligned input, this method can be used on
+ /// non-aligned data at the cost of a copy.
+ fn from_bytes_copy(bytes: &[u8]) -> Option<Self>
+ where
+ Self: Sized,
+ {
+ if bytes.len() == size_of::<Self>() {
+ // SAFETY: we just verified that `bytes` has the same size as `Self`, and per the
+ // invariants of `FromBytes`, any byte sequence of the correct length is a valid value
+ // for `Self`.
+ Some(unsafe { core::ptr::read_unaligned(bytes.as_ptr().cast::<Self>()) })
+ } else {
+ None
+ }
+ }
+}
macro_rules! impl_frombytes {
($($({$($generics:tt)*})? $t:ty, )*) => {
@@ -47,7 +132,32 @@ impl_frombytes! {
///
/// Values of this type may not contain any uninitialized bytes. This type must not have interior
/// mutability.
-pub unsafe trait AsBytes {}
+pub unsafe trait AsBytes {
+ /// Returns `self` as a slice of bytes.
+ fn as_bytes(&self) -> &[u8] {
+ // CAST: `Self` implements `AsBytes` thus all bytes of `self` are initialized.
+ let data = core::ptr::from_ref(self).cast::<u8>();
+ let len = core::mem::size_of_val(self);
+
+ // SAFETY: `data` is non-null and valid for reads of `len * sizeof::<u8>()` bytes.
+ unsafe { core::slice::from_raw_parts(data, len) }
+ }
+
+ /// Returns `self` as a mutable slice of bytes.
+ fn as_bytes_mut(&mut self) -> &mut [u8]
+ where
+ Self: FromBytes,
+ {
+ // CAST: `Self` implements both `AsBytes` and `FromBytes` thus making `Self`
+ // bi-directionally transmutable to `[u8; size_of_val(self)]`.
+ let data = core::ptr::from_mut(self).cast::<u8>();
+ let len = core::mem::size_of_val(self);
+
+ // SAFETY: `data` is non-null and valid for read and writes of `len * sizeof::<u8>()`
+ // bytes.
+ unsafe { core::slice::from_raw_parts_mut(data, len) }
+ }
+}
macro_rules! impl_asbytes {
($($({$($generics:tt)*})? $t:ty, )*) => {
diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs
index b9343d5bc00f..706e833e9702 100644
--- a/rust/kernel/workqueue.rs
+++ b/rust/kernel/workqueue.rs
@@ -356,18 +356,11 @@ struct ClosureWork<T> {
func: Option<T>,
}
-impl<T> ClosureWork<T> {
- fn project(self: Pin<&mut Self>) -> &mut Option<T> {
- // SAFETY: The `func` field is not structurally pinned.
- unsafe { &mut self.get_unchecked_mut().func }
- }
-}
-
impl<T: FnOnce()> WorkItem for ClosureWork<T> {
type Pointer = Pin<KBox<Self>>;
fn run(mut this: Pin<KBox<Self>>) {
- if let Some(func) = this.as_mut().project().take() {
+ if let Some(func) = this.as_mut().project().func.take() {
(func)()
}
}