diff options
Diffstat (limited to 'rust/kernel/block')
-rw-r--r-- | rust/kernel/block/mq.rs | 14 | ||||
-rw-r--r-- | rust/kernel/block/mq/gen_disk.rs | 60 | ||||
-rw-r--r-- | rust/kernel/block/mq/operations.rs | 72 | ||||
-rw-r--r-- | rust/kernel/block/mq/raw_writer.rs | 55 | ||||
-rw-r--r-- | rust/kernel/block/mq/request.rs | 94 |
5 files changed, 151 insertions, 144 deletions
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs index 831445d37181..637018ead0ab 100644 --- a/rust/kernel/block/mq.rs +++ b/rust/kernel/block/mq.rs @@ -69,27 +69,33 @@ //! //! #[vtable] //! impl Operations for MyBlkDevice { +//! type QueueData = (); //! -//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result { +//! fn queue_rq(_queue_data: (), rq: ARef<Request<Self>>, _is_last: bool) -> Result { //! Request::end_ok(rq); //! Ok(()) //! } //! -//! fn commit_rqs() {} +//! fn commit_rqs(_queue_data: ()) {} +//! +//! fn complete(rq: ARef<Request<Self>>) { +//! Request::end_ok(rq) +//! .map_err(|_e| kernel::error::code::EIO) +//! .expect("Fatal error - expected to be able to end request"); +//! } //! } //! //! let tagset: Arc<TagSet<MyBlkDevice>> = //! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?; //! let mut disk = gen_disk::GenDiskBuilder::new() //! .capacity_sectors(4096) -//! .build(format_args!("myblk"), tagset)?; +//! .build(fmt!("myblk"), tagset, ())?; //! //! # Ok::<(), kernel::error::Error>(()) //! ``` pub mod gen_disk; mod operations; -mod raw_writer; mod request; mod tag_set; diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs index cd54cd64ea88..1ce815c8cdab 100644 --- a/rust/kernel/block/mq/gen_disk.rs +++ b/rust/kernel/block/mq/gen_disk.rs @@ -3,12 +3,19 @@ //! Generic disk abstraction. //! //! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h) -//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h) - -use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet}; -use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc}; -use crate::{error, static_lock_class}; -use core::fmt::{self, Write}; +//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) + +use crate::{ + bindings, + block::mq::{Operations, TagSet}, + error::{self, from_err_ptr, Result}, + fmt::{self, Write}, + prelude::*, + static_lock_class, + str::NullTerminatedFormatter, + sync::Arc, + types::{ForeignOwnable, ScopeGuard}, +}; /// A builder for [`GenDisk`]. /// @@ -45,7 +52,7 @@ impl GenDiskBuilder { /// Validate block size by verifying that it is between 512 and `PAGE_SIZE`, /// and that it is a power of two. - fn validate_block_size(size: u32) -> Result { + pub fn validate_block_size(size: u32) -> Result { if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() { Err(error::code::EINVAL) } else { @@ -92,7 +99,14 @@ impl GenDiskBuilder { self, name: fmt::Arguments<'_>, tagset: Arc<TagSet<T>>, + queue_data: T::QueueData, ) -> Result<GenDisk<T>> { + let data = queue_data.into_foreign(); + let recover_data = ScopeGuard::new(|| { + // SAFETY: T::QueueData was created by the call to `into_foreign()` above + drop(unsafe { T::QueueData::from_foreign(data) }); + }); + // SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed. let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() }; @@ -107,7 +121,7 @@ impl GenDiskBuilder { bindings::__blk_mq_alloc_disk( tagset.raw_tag_set(), &mut lim, - core::ptr::null_mut(), + data, static_lock_class!().as_ptr(), ) })?; @@ -139,14 +153,14 @@ impl GenDiskBuilder { // SAFETY: `gendisk` is a valid pointer as we initialized it above unsafe { (*gendisk).fops = &TABLE }; - let mut raw_writer = RawWriter::from_array( + let mut writer = NullTerminatedFormatter::new( // SAFETY: `gendisk` points to a valid and initialized instance. We // have exclusive access, since the disk is not added to the VFS // yet. unsafe { &mut (*gendisk).disk_name }, - )?; - raw_writer.write_fmt(name)?; - raw_writer.write_char('\0')?; + ) + .ok_or(EINVAL)?; + writer.write_fmt(name)?; // SAFETY: `gendisk` points to a valid and initialized instance of // `struct gendisk`. `set_capacity` takes a lock to synchronize this @@ -161,8 +175,12 @@ impl GenDiskBuilder { }, )?; + recover_data.dismiss(); + // INVARIANT: `gendisk` was initialized above. // INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above. + // INVARIANT: `gendisk.queue.queue_data` is set to `data` in the call to + // `__blk_mq_alloc_disk` above. Ok(GenDisk { _tagset: tagset, gendisk, @@ -174,9 +192,10 @@ impl GenDiskBuilder { /// /// # Invariants /// -/// - `gendisk` must always point to an initialized and valid `struct gendisk`. -/// - `gendisk` was added to the VFS through a call to -/// `bindings::device_add_disk`. +/// - `gendisk` must always point to an initialized and valid `struct gendisk`. +/// - `gendisk` was added to the VFS through a call to +/// `bindings::device_add_disk`. +/// - `self.gendisk.queue.queuedata` is initialized by a call to `ForeignOwnable::into_foreign`. pub struct GenDisk<T: Operations> { _tagset: Arc<TagSet<T>>, gendisk: *mut bindings::gendisk, @@ -188,9 +207,20 @@ unsafe impl<T: Operations + Send> Send for GenDisk<T> {} impl<T: Operations> Drop for GenDisk<T> { fn drop(&mut self) { + // SAFETY: By type invariant of `Self`, `self.gendisk` points to a valid + // and initialized instance of `struct gendisk`, and, `queuedata` was + // initialized with the result of a call to + // `ForeignOwnable::into_foreign`. + let queue_data = unsafe { (*(*self.gendisk).queue).queuedata }; + // SAFETY: By type invariant, `self.gendisk` points to a valid and // initialized instance of `struct gendisk`, and it was previously added // to the VFS. unsafe { bindings::del_gendisk(self.gendisk) }; + + // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with + // a call to `ForeignOwnable::into_foreign` to create `queuedata`. + // `ForeignOwnable::from_foreign` is only called here. + drop(unsafe { T::QueueData::from_foreign(queue_data) }); } } diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs index c2b98f507bcb..f91a1719886c 100644 --- a/rust/kernel/block/mq/operations.rs +++ b/rust/kernel/block/mq/operations.rs @@ -6,13 +6,15 @@ use crate::{ bindings, - block::mq::request::RequestDataWrapper, - block::mq::Request, + block::mq::{request::RequestDataWrapper, Request}, error::{from_result, Result}, prelude::*, - types::ARef, + sync::Refcount, + types::{ARef, ForeignOwnable}, }; -use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering}; +use core::marker::PhantomData; + +type ForeignBorrowed<'a, T> = <T as ForeignOwnable>::Borrowed<'a>; /// Implement this trait to interface blk-mq as block devices. /// @@ -26,12 +28,23 @@ use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering} /// [module level documentation]: kernel::block::mq #[macros::vtable] pub trait Operations: Sized { + /// Data associated with the `struct request_queue` that is allocated for + /// the `GenDisk` associated with this `Operations` implementation. + type QueueData: ForeignOwnable; + /// Called by the kernel to queue a request with the driver. If `is_last` is /// `false`, the driver is allowed to defer committing the request. - fn queue_rq(rq: ARef<Request<Self>>, is_last: bool) -> Result; + fn queue_rq( + queue_data: ForeignBorrowed<'_, Self::QueueData>, + rq: ARef<Request<Self>>, + is_last: bool, + ) -> Result; /// Called by the kernel to indicate that queued requests should be submitted. - fn commit_rqs(); + fn commit_rqs(queue_data: ForeignBorrowed<'_, Self::QueueData>); + + /// Called by the kernel when the request is completed. + fn complete(rq: ARef<Request<Self>>); /// Called by the kernel to poll the device for completed requests. Only /// used for poll queues. @@ -70,7 +83,7 @@ impl<T: Operations> OperationsVTable<T> { /// promise to not access the request until the driver calls /// `bindings::blk_mq_end_request` for the request. unsafe extern "C" fn queue_rq_callback( - _hctx: *mut bindings::blk_mq_hw_ctx, + hctx: *mut bindings::blk_mq_hw_ctx, bd: *const bindings::blk_mq_queue_data, ) -> bindings::blk_status_t { // SAFETY: `bd.rq` is valid as required by the safety requirement for @@ -78,7 +91,7 @@ impl<T: Operations> OperationsVTable<T> { let request = unsafe { &*(*bd).rq.cast::<Request<T>>() }; // One refcount for the ARef, one for being in flight - request.wrapper_ref().refcount().store(2, Ordering::Relaxed); + request.wrapper_ref().refcount().set(2); // SAFETY: // - We own a refcount that we took above. We pass that to `ARef`. @@ -88,10 +101,20 @@ impl<T: Operations> OperationsVTable<T> { // reference counted by `ARef` until then. let rq = unsafe { Request::aref_from_raw((*bd).rq) }; + // SAFETY: `hctx` is valid as required by this function. + let queue_data = unsafe { (*(*hctx).queue).queuedata }; + + // SAFETY: `queue.queuedata` was created by `GenDiskBuilder::build` with + // a call to `ForeignOwnable::into_foreign` to create `queuedata`. + // `ForeignOwnable::from_foreign` is only called when the tagset is + // dropped, which happens after we are dropped. + let queue_data = unsafe { T::QueueData::borrow(queue_data) }; + // SAFETY: We have exclusive access and we just set the refcount above. unsafe { Request::start_unchecked(&rq) }; let ret = T::queue_rq( + queue_data, rq, // SAFETY: `bd` is valid as required by the safety requirement for // this function. @@ -110,18 +133,35 @@ impl<T: Operations> OperationsVTable<T> { /// /// # Safety /// - /// This function may only be called by blk-mq C infrastructure. - unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) { - T::commit_rqs() + /// This function may only be called by blk-mq C infrastructure. The caller + /// must ensure that `hctx` is valid. + unsafe extern "C" fn commit_rqs_callback(hctx: *mut bindings::blk_mq_hw_ctx) { + // SAFETY: `hctx` is valid as required by this function. + let queue_data = unsafe { (*(*hctx).queue).queuedata }; + + // SAFETY: `queue.queuedata` was created by `GenDisk::try_new()` with a + // call to `ForeignOwnable::into_foreign()` to create `queuedata`. + // `ForeignOwnable::from_foreign()` is only called when the tagset is + // dropped, which happens after we are dropped. + let queue_data = unsafe { T::QueueData::borrow(queue_data) }; + T::commit_rqs(queue_data) } - /// This function is called by the C kernel. It is not currently - /// implemented, and there is no way to exercise this code path. + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. /// /// # Safety /// - /// This function may only be called by blk-mq C infrastructure. - unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {} + /// This function may only be called by blk-mq C infrastructure. `rq` must + /// point to a valid request that has been marked as completed. The pointee + /// of `rq` must be valid for write for the duration of this function. + unsafe extern "C" fn complete_callback(rq: *mut bindings::request) { + // SAFETY: This function can only be dispatched through + // `Request::complete`. We leaked a refcount then which we pick back up + // now. + let aref = unsafe { Request::aref_from_raw(rq) }; + T::complete(aref); + } /// This function is called by the C kernel. A pointer to this function is /// installed in the `blk_mq_ops` vtable for the driver. @@ -187,7 +227,7 @@ impl<T: Operations> OperationsVTable<T> { // SAFETY: The refcount field is allocated but not initialized, so // it is valid for writes. - unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(AtomicU64::new(0)) }; + unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(Refcount::new(0)) }; Ok(0) }) diff --git a/rust/kernel/block/mq/raw_writer.rs b/rust/kernel/block/mq/raw_writer.rs deleted file mode 100644 index 7e2159e4f6a6..000000000000 --- a/rust/kernel/block/mq/raw_writer.rs +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -use core::fmt::{self, Write}; - -use crate::error::Result; -use crate::prelude::EINVAL; - -/// A mutable reference to a byte buffer where a string can be written into. -/// -/// # Invariants -/// -/// `buffer` is always null terminated. -pub(crate) struct RawWriter<'a> { - buffer: &'a mut [u8], - pos: usize, -} - -impl<'a> RawWriter<'a> { - /// Create a new `RawWriter` instance. - fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> { - *(buffer.last_mut().ok_or(EINVAL)?) = 0; - - // INVARIANT: We null terminated the buffer above. - Ok(Self { buffer, pos: 0 }) - } - - pub(crate) fn from_array<const N: usize>( - a: &'a mut [crate::ffi::c_char; N], - ) -> Result<RawWriter<'a>> { - Self::new( - // SAFETY: the buffer of `a` is valid for read and write as `u8` for - // at least `N` bytes. - unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) }, - ) - } -} - -impl Write for RawWriter<'_> { - fn write_str(&mut self, s: &str) -> fmt::Result { - let bytes = s.as_bytes(); - let len = bytes.len(); - - // We do not want to overwrite our null terminator - if self.pos + len > self.buffer.len() - 1 { - return Err(fmt::Error); - } - - // INVARIANT: We are not overwriting the last byte - self.buffer[self.pos..self.pos + len].copy_from_slice(bytes); - - self.pos += len; - - Ok(()) - } -} diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs index fefd394f064a..c5f1f6b1ccfb 100644 --- a/rust/kernel/block/mq/request.rs +++ b/rust/kernel/block/mq/request.rs @@ -8,13 +8,10 @@ use crate::{ bindings, block::mq::Operations, error::Result, + sync::{atomic::Relaxed, Refcount}, types::{ARef, AlwaysRefCounted, Opaque}, }; -use core::{ - marker::PhantomData, - ptr::NonNull, - sync::atomic::{AtomicU64, Ordering}, -}; +use core::{marker::PhantomData, ptr::NonNull}; /// A wrapper around a blk-mq [`struct request`]. This represents an IO request. /// @@ -37,6 +34,9 @@ use core::{ /// We need to track 3 and 4 to ensure that it is safe to end the request and hand /// back ownership to the block layer. /// +/// Note that the driver can still obtain new `ARef` even if there is no `ARef`s in existence by +/// using `tag_to_rq`, hence the need to distinguish B and C. +/// /// The states are tracked through the private `refcount` field of /// `RequestDataWrapper`. This structure lives in the private data area of the C /// [`struct request`]. @@ -53,7 +53,7 @@ use core::{ /// [`struct request`]: srctree/include/linux/blk-mq.h /// #[repr(transparent)] -pub struct Request<T: Operations>(Opaque<bindings::request>, PhantomData<T>); +pub struct Request<T>(Opaque<bindings::request>, PhantomData<T>); impl<T: Operations> Request<T> { /// Create an [`ARef<Request>`] from a [`struct request`] pointer. @@ -98,13 +98,16 @@ impl<T: Operations> Request<T> { /// /// [`struct request`]: srctree/include/linux/blk-mq.h fn try_set_end(this: ARef<Self>) -> Result<*mut bindings::request, ARef<Self>> { - // We can race with `TagSet::tag_to_rq` - if let Err(_old) = this.wrapper_ref().refcount().compare_exchange( - 2, - 0, - Ordering::Relaxed, - Ordering::Relaxed, - ) { + // To hand back the ownership, we need the current refcount to be 2. + // Since we can race with `TagSet::tag_to_rq`, this needs to atomically reduce + // refcount to 0. `Refcount` does not provide a way to do this, so use the underlying + // atomics directly. + if let Err(_old) = this + .wrapper_ref() + .refcount() + .as_atomic() + .cmpxchg(2, 0, Relaxed) + { return Err(this); } @@ -135,6 +138,23 @@ impl<T: Operations> Request<T> { Ok(()) } + /// Complete the request by scheduling `Operations::complete` for + /// execution. + /// + /// The function may be scheduled locally, via SoftIRQ or remotely via IPMI. + /// See `blk_mq_complete_request_remote` in [`blk-mq.c`] for details. + /// + /// [`blk-mq.c`]: srctree/block/blk-mq.c + pub fn complete(this: ARef<Self>) { + let ptr = ARef::into_raw(this).cast::<bindings::request>().as_ptr(); + // SAFETY: By type invariant, `self.0` is a valid `struct request` + if !unsafe { bindings::blk_mq_complete_request_remote(ptr) } { + // SAFETY: We released a refcount above that we can reclaim here. + let this = unsafe { Request::aref_from_raw(ptr) }; + T::complete(this); + } + } + /// Return a pointer to the [`RequestDataWrapper`] stored in the private area /// of the request structure. /// @@ -148,7 +168,7 @@ impl<T: Operations> Request<T> { // valid allocation. let wrapper_ptr = unsafe { bindings::blk_mq_rq_to_pdu(request_ptr).cast::<RequestDataWrapper>() }; - // SAFETY: By C API contract, wrapper_ptr points to a valid allocation + // SAFETY: By C API contract, `wrapper_ptr` points to a valid allocation // and is not null. unsafe { NonNull::new_unchecked(wrapper_ptr) } } @@ -173,13 +193,13 @@ pub(crate) struct RequestDataWrapper { /// - 0: The request is owned by C block layer. /// - 1: The request is owned by Rust abstractions but there are no [`ARef`] references to it. /// - 2+: There are [`ARef`] references to the request. - refcount: AtomicU64, + refcount: Refcount, } impl RequestDataWrapper { /// Return a reference to the refcount of the request that is embedding /// `self`. - pub(crate) fn refcount(&self) -> &AtomicU64 { + pub(crate) fn refcount(&self) -> &Refcount { &self.refcount } @@ -189,7 +209,7 @@ impl RequestDataWrapper { /// # Safety /// /// - `this` must point to a live allocation of at least the size of `Self`. - pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 { + pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut Refcount { // SAFETY: Because of the safety requirements of this function, the // field projection is safe. unsafe { &raw mut (*this).refcount } @@ -205,47 +225,13 @@ unsafe impl<T: Operations> Send for Request<T> {} // mutate `self` are internally synchronized` unsafe impl<T: Operations> Sync for Request<T> {} -/// Store the result of `op(target.load())` in target, returning new value of -/// target. -fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 { - let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x))); - - // SAFETY: Because the operation passed to `fetch_update` above always - // return `Some`, `old` will always be `Ok`. - let old = unsafe { old.unwrap_unchecked() }; - - op(old) -} - -/// Store the result of `op(target.load)` in `target` if `target.load() != -/// pred`, returning [`true`] if the target was updated. -fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool { - target - .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { - if x == pred { - None - } else { - Some(op(x)) - } - }) - .is_ok() -} - // SAFETY: All instances of `Request<T>` are reference counted. This // implementation of `AlwaysRefCounted` ensure that increments to the ref count // keeps the object alive in memory at least until a matching reference count // decrement is executed. unsafe impl<T: Operations> AlwaysRefCounted for Request<T> { fn inc_ref(&self) { - let refcount = &self.wrapper_ref().refcount(); - - #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))] - let updated = atomic_relaxed_op_unless(refcount, |x| x + 1, 0); - - #[cfg(CONFIG_DEBUG_MISC)] - if !updated { - panic!("Request refcount zero on clone") - } + self.wrapper_ref().refcount().inc(); } unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) { @@ -257,10 +243,10 @@ unsafe impl<T: Operations> AlwaysRefCounted for Request<T> { let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) }; #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))] - let new_refcount = atomic_relaxed_op_return(refcount, |x| x - 1); + let is_zero = refcount.dec_and_test(); #[cfg(CONFIG_DEBUG_MISC)] - if new_refcount == 0 { + if is_zero { panic!("Request reached refcount zero in Rust abstractions"); } } |