diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-09 17:57:21 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-09 17:57:21 -0800 |
| commit | 0c00ed308d0559fc216be0442a3df124e9e13533 (patch) | |
| tree | a41c8509b8543ce8681d0aa9c06a9f94c2b6e458 /include/uapi/linux | |
| parent | 591beb0e3a03258ef9c01893a5209845799a7c33 (diff) | |
| parent | 72f4d6fca699a1e35b39c5e5dacac2926d254135 (diff) | |
Merge tag 'for-7.0/block-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block updates from Jens Axboe:
- Support for batch request processing for ublk, improving the
efficiency of the kernel/ublk server communication. This can yield
nice 7-12% performance improvements
- Support for integrity data for ublk
- Various other ublk improvements and additions, including a ton of
selftests additions and updated
- Move the handling of blk-crypto software fallback from below the
block layer to above it. This reduces the complexity of dealing with
bio splitting
- Series fixing a number of potential deadlocks in blk-mq related to
the queue usage counter and writeback throttling and rq-qos debugfs
handling
- Add an async_depth queue attribute, to resolve a performance
regression that's been around for a qhilw related to the scheduler
depth handling
- Only use task_work for IOPOLL completions on NVMe, if it is necessary
to do so. An earlier fix for an issue resulted in all these
completions being punted to task_work, to guarantee that completions
were only run for a given io_uring ring when it was local to that
ring. With the new changes, we can detect if it's necessary to use
task_work or not, and avoid it if possible.
- rnbd fixes:
- Fix refcount underflow in device unmap path
- Handle PREFLUSH and NOUNMAP flags properly in protocol
- Fix server-side bi_size for special IOs
- Zero response buffer before use
- Fix trace format for flags
- Add .release to rnbd_dev_ktype
- MD pull requests via Yu Kuai
- Fix raid5_run() to return error when log_init() fails
- Fix IO hang with degraded array with llbitmap
- Fix percpu_ref not resurrected on suspend timeout in llbitmap
- Fix GPF in write_page caused by resize race
- Fix NULL pointer dereference in process_metadata_update
- Fix hang when stopping arrays with metadata through dm-raid
- Fix any_working flag handling in raid10_sync_request
- Refactor sync/recovery code path, improve error handling for
badblocks, and remove unused recovery_disabled field
- Consolidate mddev boolean fields into mddev_flags
- Use mempool to allocate stripe_request_ctx and make sure
max_sectors is not less than io_opt in raid5
- Fix return value of mddev_trylock
- Fix memory leak in raid1_run()
- Add Li Nan as mdraid reviewer
- Move phys_vec definitions to the kernel types, mostly in preparation
for some VFIO and RDMA changes
- Improve the speed for secure erase for some devices
- Various little rust updates
- Various other minor fixes, improvements, and cleanups
* tag 'for-7.0/block-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (162 commits)
blk-mq: ABI/sysfs-block: fix docs build warnings
selftests: ublk: organize test directories by test ID
block: decouple secure erase size limit from discard size limit
block: remove redundant kill_bdev() call in set_blocksize()
blk-mq: add documentation for new queue attribute async_dpeth
block, bfq: convert to use request_queue->async_depth
mq-deadline: covert to use request_queue->async_depth
kyber: covert to use request_queue->async_depth
blk-mq: add a new queue sysfs attribute async_depth
blk-mq: factor out a helper blk_mq_limit_depth()
blk-mq-sched: unify elevators checking for async requests
block: convert nr_requests to unsigned int
block: don't use strcpy to copy blockdev name
blk-mq-debugfs: warn about possible deadlock
blk-mq-debugfs: add missing debugfs_mutex in blk_mq_debugfs_register_hctxs()
blk-mq-debugfs: remove blk_mq_debugfs_unregister_rqos()
blk-mq-debugfs: make blk_mq_debugfs_register_rqos() static
blk-rq-qos: fix possible debugfs_mutex deadlock
blk-mq-debugfs: factor out a helper to register debugfs for all rq_qos
blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under q_usage_counter
...
Diffstat (limited to 'include/uapi/linux')
| -rw-r--r-- | include/uapi/linux/ublk_cmd.h | 121 |
1 files changed, 120 insertions, 1 deletions
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index ec77dabba45b..a88876756805 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -55,7 +55,8 @@ _IOWR('u', 0x15, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_QUIESCE_DEV \ _IOWR('u', 0x16, struct ublksrv_ctrl_cmd) - +#define UBLK_U_CMD_TRY_STOP_DEV \ + _IOWR('u', 0x17, struct ublksrv_ctrl_cmd) /* * 64bits are enough now, and it should be easy to extend in case of * running out of feature flags @@ -103,6 +104,30 @@ #define UBLK_U_IO_UNREGISTER_IO_BUF \ _IOWR('u', 0x24, struct ublksrv_io_cmd) +/* + * return 0 if the command is run successfully, otherwise failure code + * is returned + */ +#define UBLK_U_IO_PREP_IO_CMDS \ + _IOWR('u', 0x25, struct ublk_batch_io) +/* + * If failure code is returned, nothing in the command buffer is handled. + * Otherwise, the returned value means how many bytes in command buffer + * are handled actually, then number of handled IOs can be calculated with + * `elem_bytes` for each IO. IOs in the remained bytes are not committed, + * userspace has to check return value for dealing with partial committing + * correctly. + */ +#define UBLK_U_IO_COMMIT_IO_CMDS \ + _IOWR('u', 0x26, struct ublk_batch_io) + +/* + * Fetch io commands to provided buffer in multishot style, + * `IORING_URING_CMD_MULTISHOT` is required for this command. + */ +#define UBLK_U_IO_FETCH_IO_CMDS \ + _IOWR('u', 0x27, struct ublk_batch_io) + /* only ABORT means that no re-fetch */ #define UBLK_IO_RES_OK 0 #define UBLK_IO_RES_NEED_GET_DATA 1 @@ -134,6 +159,10 @@ #define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS) #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) +/* Copy to/from request integrity buffer instead of data buffer */ +#define UBLK_INTEGRITY_FLAG_OFF 62 +#define UBLKSRV_IO_INTEGRITY_FLAG (1ULL << UBLK_INTEGRITY_FLAG_OFF) + /* * ublk server can register data buffers for incoming I/O requests with a sparse * io_uring buffer table. The request buffer can then be used as the data buffer @@ -311,6 +340,36 @@ */ #define UBLK_F_BUF_REG_OFF_DAEMON (1ULL << 14) +/* + * Support the following commands for delivering & committing io command + * in batch. + * + * - UBLK_U_IO_PREP_IO_CMDS + * - UBLK_U_IO_COMMIT_IO_CMDS + * - UBLK_U_IO_FETCH_IO_CMDS + * - UBLK_U_IO_REGISTER_IO_BUF + * - UBLK_U_IO_UNREGISTER_IO_BUF + * + * The existing UBLK_U_IO_FETCH_REQ, UBLK_U_IO_COMMIT_AND_FETCH_REQ and + * UBLK_U_IO_NEED_GET_DATA uring_cmd are not supported for this feature. + */ +#define UBLK_F_BATCH_IO (1ULL << 15) + +/* + * ublk device supports requests with integrity/metadata buffer. + * Requires UBLK_F_USER_COPY. + */ +#define UBLK_F_INTEGRITY (1ULL << 16) + +/* + * The device supports the UBLK_CMD_TRY_STOP_DEV command, which + * allows stopping the device only if there are no openers. + */ +#define UBLK_F_SAFE_STOP_DEV (1ULL << 17) + +/* Disable automatic partition scanning when device is started */ +#define UBLK_F_NO_AUTO_PART_SCAN (1ULL << 18) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 @@ -408,6 +467,8 @@ struct ublksrv_ctrl_dev_info { * passed in. */ #define UBLK_IO_F_NEED_REG_BUF (1U << 17) +/* Request has an integrity data buffer */ +#define UBLK_IO_F_INTEGRITY (1UL << 18) /* * io cmd is described by this structure, and stored in share memory, indexed @@ -525,6 +586,51 @@ struct ublksrv_io_cmd { }; }; +struct ublk_elem_header { + __u16 tag; /* IO tag */ + + /* + * Buffer index for incoming io command, only valid iff + * UBLK_F_AUTO_BUF_REG is set + */ + __u16 buf_index; + __s32 result; /* I/O completion result (commit only) */ +}; + +/* + * uring_cmd buffer structure for batch commands + * + * buffer includes multiple elements, which number is specified by + * `nr_elem`. Each element buffer is organized in the following order: + * + * struct ublk_elem_buffer { + * // Mandatory fields (8 bytes) + * struct ublk_elem_header header; + * + * // Optional fields (8 bytes each, included based on flags) + * + * // Buffer address (if UBLK_BATCH_F_HAS_BUF_ADDR) for copying data + * // between ublk request and ublk server buffer + * __u64 buf_addr; + * + * // returned Zone append LBA (if UBLK_BATCH_F_HAS_ZONE_LBA) + * __u64 zone_lba; + * } + * + * Used for `UBLK_U_IO_PREP_IO_CMDS` and `UBLK_U_IO_COMMIT_IO_CMDS` + */ +struct ublk_batch_io { + __u16 q_id; +#define UBLK_BATCH_F_HAS_ZONE_LBA (1 << 0) +#define UBLK_BATCH_F_HAS_BUF_ADDR (1 << 1) +#define UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK (1 << 2) + __u16 flags; + __u16 nr_elem; + __u8 elem_bytes; + __u8 reserved; + __u64 reserved2; +}; + struct ublk_param_basic { #define UBLK_ATTR_READ_ONLY (1 << 0) #define UBLK_ATTR_ROTATIONAL (1 << 1) @@ -600,6 +706,17 @@ struct ublk_param_segment { __u8 pad[2]; }; +struct ublk_param_integrity { + __u32 flags; /* LBMD_PI_CAP_* from linux/fs.h */ + __u16 max_integrity_segments; /* 0 means no limit */ + __u8 interval_exp; + __u8 metadata_size; /* UBLK_PARAM_TYPE_INTEGRITY requires nonzero */ + __u8 pi_offset; + __u8 csum_type; /* LBMD_PI_CSUM_* from linux/fs.h */ + __u8 tag_size; + __u8 pad[5]; +}; + struct ublk_params { /* * Total length of parameters, userspace has to set 'len' for both @@ -614,6 +731,7 @@ struct ublk_params { #define UBLK_PARAM_TYPE_ZONED (1 << 3) #define UBLK_PARAM_TYPE_DMA_ALIGN (1 << 4) #define UBLK_PARAM_TYPE_SEGMENT (1 << 5) +#define UBLK_PARAM_TYPE_INTEGRITY (1 << 6) /* requires UBLK_F_INTEGRITY */ __u32 types; /* types of parameter included */ struct ublk_param_basic basic; @@ -622,6 +740,7 @@ struct ublk_params { struct ublk_param_zoned zoned; struct ublk_param_dma_align dma; struct ublk_param_segment seg; + struct ublk_param_integrity integrity; }; #endif |
