summaryrefslogtreecommitdiff
path: root/rust/kernel/block
diff options
context:
space:
mode:
Diffstat (limited to 'rust/kernel/block')
-rw-r--r--rust/kernel/block/mq.rs14
-rw-r--r--rust/kernel/block/mq/gen_disk.rs60
-rw-r--r--rust/kernel/block/mq/operations.rs72
-rw-r--r--rust/kernel/block/mq/raw_writer.rs55
-rw-r--r--rust/kernel/block/mq/request.rs94
5 files changed, 151 insertions, 144 deletions
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
index 831445d37181..637018ead0ab 100644
--- a/rust/kernel/block/mq.rs
+++ b/rust/kernel/block/mq.rs
@@ -69,27 +69,33 @@
//!
//! #[vtable]
//! impl Operations for MyBlkDevice {
+//! type QueueData = ();
//!
-//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result {
+//! fn queue_rq(_queue_data: (), rq: ARef<Request<Self>>, _is_last: bool) -> Result {
//! Request::end_ok(rq);
//! Ok(())
//! }
//!
-//! fn commit_rqs() {}
+//! fn commit_rqs(_queue_data: ()) {}
+//!
+//! fn complete(rq: ARef<Request<Self>>) {
+//! Request::end_ok(rq)
+//! .map_err(|_e| kernel::error::code::EIO)
+//! .expect("Fatal error - expected to be able to end request");
+//! }
//! }
//!
//! let tagset: Arc<TagSet<MyBlkDevice>> =
//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
//! let mut disk = gen_disk::GenDiskBuilder::new()
//! .capacity_sectors(4096)
-//! .build(format_args!("myblk"), tagset)?;
+//! .build(fmt!("myblk"), tagset, ())?;
//!
//! # Ok::<(), kernel::error::Error>(())
//! ```
pub mod gen_disk;
mod operations;
-mod raw_writer;
mod request;
mod tag_set;
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
index cd54cd64ea88..1ce815c8cdab 100644
--- a/rust/kernel/block/mq/gen_disk.rs
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -3,12 +3,19 @@
//! Generic disk abstraction.
//!
//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h)
-//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h)
-
-use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet};
-use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc};
-use crate::{error, static_lock_class};
-use core::fmt::{self, Write};
+//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h)
+
+use crate::{
+ bindings,
+ block::mq::{Operations, TagSet},
+ error::{self, from_err_ptr, Result},
+ fmt::{self, Write},
+ prelude::*,
+ static_lock_class,
+ str::NullTerminatedFormatter,
+ sync::Arc,
+ types::{ForeignOwnable, ScopeGuard},
+};
/// A builder for [`GenDisk`].
///
@@ -45,7 +52,7 @@ impl GenDiskBuilder {
/// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
/// and that it is a power of two.
- fn validate_block_size(size: u32) -> Result {
+ pub fn validate_block_size(size: u32) -> Result {
if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
Err(error::code::EINVAL)
} else {
@@ -92,7 +99,14 @@ impl GenDiskBuilder {
self,
name: fmt::Arguments<'_>,
tagset: Arc<TagSet<T>>,
+ queue_data: T::QueueData,
) -> Result<GenDisk<T>> {
+ let data = queue_data.into_foreign();
+ let recover_data = ScopeGuard::new(|| {
+ // SAFETY: T::QueueData was created by the call to `into_foreign()` above
+ drop(unsafe { T::QueueData::from_foreign(data) });
+ });
+
// SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed.
let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() };
@@ -107,7 +121,7 @@ impl GenDiskBuilder {
bindings::__blk_mq_alloc_disk(
tagset.raw_tag_set(),
&mut lim,
- core::ptr::null_mut(),
+ data,
static_lock_class!().as_ptr(),
)
})?;
@@ -139,14 +153,14 @@ impl GenDiskBuilder {
// SAFETY: `gendisk` is a valid pointer as we initialized it above
unsafe { (*gendisk).fops = &TABLE };
- let mut raw_writer = RawWriter::from_array(
+ let mut writer = NullTerminatedFormatter::new(
// SAFETY: `gendisk` points to a valid and initialized instance. We
// have exclusive access, since the disk is not added to the VFS
// yet.
unsafe { &mut (*gendisk).disk_name },
- )?;
- raw_writer.write_fmt(name)?;
- raw_writer.write_char('\0')?;
+ )
+ .ok_or(EINVAL)?;
+ writer.write_fmt(name)?;
// SAFETY: `gendisk` points to a valid and initialized instance of
// `struct gendisk`. `set_capacity` takes a lock to synchronize this
@@ -161,8 +175,12 @@ impl GenDiskBuilder {
},
)?;
+ recover_data.dismiss();
+
// INVARIANT: `gendisk` was initialized above.
// INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above.
+ // INVARIANT: `gendisk.queue.queue_data` is set to `data` in the call to
+ // `__blk_mq_alloc_disk` above.
Ok(GenDisk {
_tagset: tagset,
gendisk,
@@ -174,9 +192,10 @@ impl GenDiskBuilder {
///
/// # Invariants
///
-/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
-/// - `gendisk` was added to the VFS through a call to
-/// `bindings::device_add_disk`.
+/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
+/// - `gendisk` was added to the VFS through a call to
+/// `bindings::device_add_disk`.
+/// - `self.gendisk.queue.queuedata` is initialized by a call to `ForeignOwnable::into_foreign`.
pub struct GenDisk<T: Operations> {
_tagset: Arc<TagSet<T>>,
gendisk: *mut bindings::gendisk,
@@ -188,9 +207,20 @@ unsafe impl<T: Operations + Send> Send for GenDisk<T> {}
impl<T: Operations> Drop for GenDisk<T> {
fn drop(&mut self) {
+ // SAFETY: By type invariant of `Self`, `self.gendisk` points to a valid
+ // and initialized instance of `struct gendisk`, and, `queuedata` was
+ // initialized with the result of a call to
+ // `ForeignOwnable::into_foreign`.
+ let queue_data = unsafe { (*(*self.gendisk).queue).queuedata };
+
// SAFETY: By type invariant, `self.gendisk` points to a valid and
// initialized instance of `struct gendisk`, and it was previously added
// to the VFS.
unsafe { bindings::del_gendisk(self.gendisk) };
+
+ // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with
+ // a call to `ForeignOwnable::into_foreign` to create `queuedata`.
+ // `ForeignOwnable::from_foreign` is only called here.
+ drop(unsafe { T::QueueData::from_foreign(queue_data) });
}
}
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
index c2b98f507bcb..f91a1719886c 100644
--- a/rust/kernel/block/mq/operations.rs
+++ b/rust/kernel/block/mq/operations.rs
@@ -6,13 +6,15 @@
use crate::{
bindings,
- block::mq::request::RequestDataWrapper,
- block::mq::Request,
+ block::mq::{request::RequestDataWrapper, Request},
error::{from_result, Result},
prelude::*,
- types::ARef,
+ sync::Refcount,
+ types::{ARef, ForeignOwnable},
};
-use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering};
+use core::marker::PhantomData;
+
+type ForeignBorrowed<'a, T> = <T as ForeignOwnable>::Borrowed<'a>;
/// Implement this trait to interface blk-mq as block devices.
///
@@ -26,12 +28,23 @@ use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering}
/// [module level documentation]: kernel::block::mq
#[macros::vtable]
pub trait Operations: Sized {
+ /// Data associated with the `struct request_queue` that is allocated for
+ /// the `GenDisk` associated with this `Operations` implementation.
+ type QueueData: ForeignOwnable;
+
/// Called by the kernel to queue a request with the driver. If `is_last` is
/// `false`, the driver is allowed to defer committing the request.
- fn queue_rq(rq: ARef<Request<Self>>, is_last: bool) -> Result;
+ fn queue_rq(
+ queue_data: ForeignBorrowed<'_, Self::QueueData>,
+ rq: ARef<Request<Self>>,
+ is_last: bool,
+ ) -> Result;
/// Called by the kernel to indicate that queued requests should be submitted.
- fn commit_rqs();
+ fn commit_rqs(queue_data: ForeignBorrowed<'_, Self::QueueData>);
+
+ /// Called by the kernel when the request is completed.
+ fn complete(rq: ARef<Request<Self>>);
/// Called by the kernel to poll the device for completed requests. Only
/// used for poll queues.
@@ -70,7 +83,7 @@ impl<T: Operations> OperationsVTable<T> {
/// promise to not access the request until the driver calls
/// `bindings::blk_mq_end_request` for the request.
unsafe extern "C" fn queue_rq_callback(
- _hctx: *mut bindings::blk_mq_hw_ctx,
+ hctx: *mut bindings::blk_mq_hw_ctx,
bd: *const bindings::blk_mq_queue_data,
) -> bindings::blk_status_t {
// SAFETY: `bd.rq` is valid as required by the safety requirement for
@@ -78,7 +91,7 @@ impl<T: Operations> OperationsVTable<T> {
let request = unsafe { &*(*bd).rq.cast::<Request<T>>() };
// One refcount for the ARef, one for being in flight
- request.wrapper_ref().refcount().store(2, Ordering::Relaxed);
+ request.wrapper_ref().refcount().set(2);
// SAFETY:
// - We own a refcount that we took above. We pass that to `ARef`.
@@ -88,10 +101,20 @@ impl<T: Operations> OperationsVTable<T> {
// reference counted by `ARef` until then.
let rq = unsafe { Request::aref_from_raw((*bd).rq) };
+ // SAFETY: `hctx` is valid as required by this function.
+ let queue_data = unsafe { (*(*hctx).queue).queuedata };
+
+ // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with
+ // a call to `ForeignOwnable::into_foreign` to create `queuedata`.
+ // `ForeignOwnable::from_foreign` is only called when the tagset is
+ // dropped, which happens after we are dropped.
+ let queue_data = unsafe { T::QueueData::borrow(queue_data) };
+
// SAFETY: We have exclusive access and we just set the refcount above.
unsafe { Request::start_unchecked(&rq) };
let ret = T::queue_rq(
+ queue_data,
rq,
// SAFETY: `bd` is valid as required by the safety requirement for
// this function.
@@ -110,18 +133,35 @@ impl<T: Operations> OperationsVTable<T> {
///
/// # Safety
///
- /// This function may only be called by blk-mq C infrastructure.
- unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) {
- T::commit_rqs()
+ /// This function may only be called by blk-mq C infrastructure. The caller
+ /// must ensure that `hctx` is valid.
+ unsafe extern "C" fn commit_rqs_callback(hctx: *mut bindings::blk_mq_hw_ctx) {
+ // SAFETY: `hctx` is valid as required by this function.
+ let queue_data = unsafe { (*(*hctx).queue).queuedata };
+
+ // SAFETY: `queue.queuedata` was created by `GenDisk::try_new()` with a
+ // call to `ForeignOwnable::into_foreign()` to create `queuedata`.
+ // `ForeignOwnable::from_foreign()` is only called when the tagset is
+ // dropped, which happens after we are dropped.
+ let queue_data = unsafe { T::QueueData::borrow(queue_data) };
+ T::commit_rqs(queue_data)
}
- /// This function is called by the C kernel. It is not currently
- /// implemented, and there is no way to exercise this code path.
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
///
/// # Safety
///
- /// This function may only be called by blk-mq C infrastructure.
- unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {}
+ /// This function may only be called by blk-mq C infrastructure. `rq` must
+ /// point to a valid request that has been marked as completed. The pointee
+ /// of `rq` must be valid for write for the duration of this function.
+ unsafe extern "C" fn complete_callback(rq: *mut bindings::request) {
+ // SAFETY: This function can only be dispatched through
+ // `Request::complete`. We leaked a refcount then which we pick back up
+ // now.
+ let aref = unsafe { Request::aref_from_raw(rq) };
+ T::complete(aref);
+ }
/// This function is called by the C kernel. A pointer to this function is
/// installed in the `blk_mq_ops` vtable for the driver.
@@ -187,7 +227,7 @@ impl<T: Operations> OperationsVTable<T> {
// SAFETY: The refcount field is allocated but not initialized, so
// it is valid for writes.
- unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(AtomicU64::new(0)) };
+ unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(Refcount::new(0)) };
Ok(0)
})
diff --git a/rust/kernel/block/mq/raw_writer.rs b/rust/kernel/block/mq/raw_writer.rs
deleted file mode 100644
index 7e2159e4f6a6..000000000000
--- a/rust/kernel/block/mq/raw_writer.rs
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-use core::fmt::{self, Write};
-
-use crate::error::Result;
-use crate::prelude::EINVAL;
-
-/// A mutable reference to a byte buffer where a string can be written into.
-///
-/// # Invariants
-///
-/// `buffer` is always null terminated.
-pub(crate) struct RawWriter<'a> {
- buffer: &'a mut [u8],
- pos: usize,
-}
-
-impl<'a> RawWriter<'a> {
- /// Create a new `RawWriter` instance.
- fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> {
- *(buffer.last_mut().ok_or(EINVAL)?) = 0;
-
- // INVARIANT: We null terminated the buffer above.
- Ok(Self { buffer, pos: 0 })
- }
-
- pub(crate) fn from_array<const N: usize>(
- a: &'a mut [crate::ffi::c_char; N],
- ) -> Result<RawWriter<'a>> {
- Self::new(
- // SAFETY: the buffer of `a` is valid for read and write as `u8` for
- // at least `N` bytes.
- unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) },
- )
- }
-}
-
-impl Write for RawWriter<'_> {
- fn write_str(&mut self, s: &str) -> fmt::Result {
- let bytes = s.as_bytes();
- let len = bytes.len();
-
- // We do not want to overwrite our null terminator
- if self.pos + len > self.buffer.len() - 1 {
- return Err(fmt::Error);
- }
-
- // INVARIANT: We are not overwriting the last byte
- self.buffer[self.pos..self.pos + len].copy_from_slice(bytes);
-
- self.pos += len;
-
- Ok(())
- }
-}
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
index fefd394f064a..c5f1f6b1ccfb 100644
--- a/rust/kernel/block/mq/request.rs
+++ b/rust/kernel/block/mq/request.rs
@@ -8,13 +8,10 @@ use crate::{
bindings,
block::mq::Operations,
error::Result,
+ sync::{atomic::Relaxed, Refcount},
types::{ARef, AlwaysRefCounted, Opaque},
};
-use core::{
- marker::PhantomData,
- ptr::NonNull,
- sync::atomic::{AtomicU64, Ordering},
-};
+use core::{marker::PhantomData, ptr::NonNull};
/// A wrapper around a blk-mq [`struct request`]. This represents an IO request.
///
@@ -37,6 +34,9 @@ use core::{
/// We need to track 3 and 4 to ensure that it is safe to end the request and hand
/// back ownership to the block layer.
///
+/// Note that the driver can still obtain new `ARef` even if there is no `ARef`s in existence by
+/// using `tag_to_rq`, hence the need to distinguish B and C.
+///
/// The states are tracked through the private `refcount` field of
/// `RequestDataWrapper`. This structure lives in the private data area of the C
/// [`struct request`].
@@ -53,7 +53,7 @@ use core::{
/// [`struct request`]: srctree/include/linux/blk-mq.h
///
#[repr(transparent)]
-pub struct Request<T: Operations>(Opaque<bindings::request>, PhantomData<T>);
+pub struct Request<T>(Opaque<bindings::request>, PhantomData<T>);
impl<T: Operations> Request<T> {
/// Create an [`ARef<Request>`] from a [`struct request`] pointer.
@@ -98,13 +98,16 @@ impl<T: Operations> Request<T> {
///
/// [`struct request`]: srctree/include/linux/blk-mq.h
fn try_set_end(this: ARef<Self>) -> Result<*mut bindings::request, ARef<Self>> {
- // We can race with `TagSet::tag_to_rq`
- if let Err(_old) = this.wrapper_ref().refcount().compare_exchange(
- 2,
- 0,
- Ordering::Relaxed,
- Ordering::Relaxed,
- ) {
+ // To hand back the ownership, we need the current refcount to be 2.
+ // Since we can race with `TagSet::tag_to_rq`, this needs to atomically reduce
+ // refcount to 0. `Refcount` does not provide a way to do this, so use the underlying
+ // atomics directly.
+ if let Err(_old) = this
+ .wrapper_ref()
+ .refcount()
+ .as_atomic()
+ .cmpxchg(2, 0, Relaxed)
+ {
return Err(this);
}
@@ -135,6 +138,23 @@ impl<T: Operations> Request<T> {
Ok(())
}
+ /// Complete the request by scheduling `Operations::complete` for
+ /// execution.
+ ///
+ /// The function may be scheduled locally, via SoftIRQ or remotely via IPMI.
+ /// See `blk_mq_complete_request_remote` in [`blk-mq.c`] for details.
+ ///
+ /// [`blk-mq.c`]: srctree/block/blk-mq.c
+ pub fn complete(this: ARef<Self>) {
+ let ptr = ARef::into_raw(this).cast::<bindings::request>().as_ptr();
+ // SAFETY: By type invariant, `self.0` is a valid `struct request`
+ if !unsafe { bindings::blk_mq_complete_request_remote(ptr) } {
+ // SAFETY: We released a refcount above that we can reclaim here.
+ let this = unsafe { Request::aref_from_raw(ptr) };
+ T::complete(this);
+ }
+ }
+
/// Return a pointer to the [`RequestDataWrapper`] stored in the private area
/// of the request structure.
///
@@ -148,7 +168,7 @@ impl<T: Operations> Request<T> {
// valid allocation.
let wrapper_ptr =
unsafe { bindings::blk_mq_rq_to_pdu(request_ptr).cast::<RequestDataWrapper>() };
- // SAFETY: By C API contract, wrapper_ptr points to a valid allocation
+ // SAFETY: By C API contract, `wrapper_ptr` points to a valid allocation
// and is not null.
unsafe { NonNull::new_unchecked(wrapper_ptr) }
}
@@ -173,13 +193,13 @@ pub(crate) struct RequestDataWrapper {
/// - 0: The request is owned by C block layer.
/// - 1: The request is owned by Rust abstractions but there are no [`ARef`] references to it.
/// - 2+: There are [`ARef`] references to the request.
- refcount: AtomicU64,
+ refcount: Refcount,
}
impl RequestDataWrapper {
/// Return a reference to the refcount of the request that is embedding
/// `self`.
- pub(crate) fn refcount(&self) -> &AtomicU64 {
+ pub(crate) fn refcount(&self) -> &Refcount {
&self.refcount
}
@@ -189,7 +209,7 @@ impl RequestDataWrapper {
/// # Safety
///
/// - `this` must point to a live allocation of at least the size of `Self`.
- pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 {
+ pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut Refcount {
// SAFETY: Because of the safety requirements of this function, the
// field projection is safe.
unsafe { &raw mut (*this).refcount }
@@ -205,47 +225,13 @@ unsafe impl<T: Operations> Send for Request<T> {}
// mutate `self` are internally synchronized`
unsafe impl<T: Operations> Sync for Request<T> {}
-/// Store the result of `op(target.load())` in target, returning new value of
-/// target.
-fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 {
- let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x)));
-
- // SAFETY: Because the operation passed to `fetch_update` above always
- // return `Some`, `old` will always be `Ok`.
- let old = unsafe { old.unwrap_unchecked() };
-
- op(old)
-}
-
-/// Store the result of `op(target.load)` in `target` if `target.load() !=
-/// pred`, returning [`true`] if the target was updated.
-fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool {
- target
- .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| {
- if x == pred {
- None
- } else {
- Some(op(x))
- }
- })
- .is_ok()
-}
-
// SAFETY: All instances of `Request<T>` are reference counted. This
// implementation of `AlwaysRefCounted` ensure that increments to the ref count
// keeps the object alive in memory at least until a matching reference count
// decrement is executed.
unsafe impl<T: Operations> AlwaysRefCounted for Request<T> {
fn inc_ref(&self) {
- let refcount = &self.wrapper_ref().refcount();
-
- #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))]
- let updated = atomic_relaxed_op_unless(refcount, |x| x + 1, 0);
-
- #[cfg(CONFIG_DEBUG_MISC)]
- if !updated {
- panic!("Request refcount zero on clone")
- }
+ self.wrapper_ref().refcount().inc();
}
unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) {
@@ -257,10 +243,10 @@ unsafe impl<T: Operations> AlwaysRefCounted for Request<T> {
let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) };
#[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))]
- let new_refcount = atomic_relaxed_op_return(refcount, |x| x - 1);
+ let is_zero = refcount.dec_and_test();
#[cfg(CONFIG_DEBUG_MISC)]
- if new_refcount == 0 {
+ if is_zero {
panic!("Request reached refcount zero in Rust abstractions");
}
}