/* SPDX-License-Identifier: GPL-2.0 */ #ifndef KUBLK_INTERNAL_H #define KUBLK_INTERNAL_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* allow ublk_dep.h to override ublk_cmd.h */ #include "ublk_dep.h" #include #include "utils.h" #define MAX_BACK_FILES 4 /****************** part 1: libublk ********************/ #define CTRL_DEV "/dev/ublk-control" #define UBLKC_DEV "/dev/ublkc" #define UBLKB_DEV "/dev/ublkb" #define UBLK_CTRL_RING_DEPTH 32 #define ERROR_EVTFD_DEVID -2 #define UBLK_IO_MAX_BYTES (1 << 20) #define UBLK_MAX_QUEUES_SHIFT 5 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) #define UBLK_MAX_THREADS_SHIFT 5 #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) #define UBLK_QUEUE_DEPTH 1024 struct ublk_dev; struct ublk_queue; struct ublk_thread; struct stripe_ctx { /* stripe */ unsigned int chunk_size; }; struct fault_inject_ctx { /* fault_inject */ unsigned long delay_us; }; struct dev_ctx { char tgt_type[16]; unsigned long flags; unsigned nr_hw_queues; unsigned short nthreads; unsigned queue_depth; int dev_id; int nr_files; char *files[MAX_BACK_FILES]; unsigned int logging:1; unsigned int all:1; unsigned int fg:1; unsigned int recovery:1; unsigned int auto_zc_fallback:1; unsigned int per_io_tasks:1; unsigned int no_ublk_fixed_fd:1; unsigned int safe_stop:1; unsigned int no_auto_part_scan:1; __u32 integrity_flags; __u8 metadata_size; __u8 pi_offset; __u8 csum_type; __u8 tag_size; int _evtfd; int _shmid; /* built from shmem, only for ublk_dump_dev() */ struct ublk_dev *shadow_dev; /* for 'update_size' command */ unsigned long long size; union { struct stripe_ctx stripe; struct fault_inject_ctx fault_inject; }; }; struct ublk_ctrl_cmd_data { __u32 cmd_op; #define CTRL_CMD_HAS_DATA 1 #define CTRL_CMD_HAS_BUF 2 __u32 flags; __u64 data[2]; __u64 addr; __u32 len; }; struct ublk_io { char *buf_addr; void *integrity_buf; #define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) #define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKS_IO_FREE (1UL << 2) #define UBLKS_IO_NEED_GET_DATA (1UL << 3) #define UBLKS_IO_NEED_REG_BUF (1UL << 4) unsigned short flags; unsigned short refs; /* used by target code only */ int tag; int result; unsigned short buf_index; unsigned short tgt_ios; void *private_data; }; struct ublk_tgt_ops { const char *name; int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); void (*deinit_tgt)(struct ublk_dev *); int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, const struct io_uring_cqe *); /* * Target specific command line handling * * each option requires argument for target command line */ void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); void (*usage)(const struct ublk_tgt_ops *ops); /* return buffer index for UBLK_F_AUTO_BUF_REG */ unsigned short (*buf_index)(const struct ublk_thread *t, const struct ublk_queue *, int tag); }; struct ublk_tgt { unsigned long dev_size; unsigned int sq_depth; unsigned int cq_depth; const struct ublk_tgt_ops *ops; struct ublk_params params; int nr_backing_files; unsigned long backing_file_size[MAX_BACK_FILES]; char backing_file[MAX_BACK_FILES][PATH_MAX]; }; struct ublk_queue { int q_id; int q_depth; struct ublk_dev *dev; const struct ublk_tgt_ops *tgt_ops; struct ublksrv_io_desc *io_cmd_buf; /* borrow three bit of ublk uapi flags, which may never be used */ #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) #define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) #define UBLKS_Q_PREPARED (1ULL << 61) __u64 flags; int ublk_fd; /* cached ublk char device fd */ __u8 metadata_size; struct ublk_io ios[UBLK_QUEUE_DEPTH]; /* used for prep io commands */ pthread_spinlock_t lock; }; /* align with `ublk_elem_header` */ struct ublk_batch_elem { __u16 tag; __u16 buf_index; __s32 result; __u64 buf_addr; }; struct batch_commit_buf { unsigned short q_id; unsigned short buf_idx; void *elem; unsigned short done; unsigned short count; }; struct batch_fetch_buf { struct io_uring_buf_ring *br; void *fetch_buf; unsigned int fetch_buf_size; unsigned int fetch_buf_off; }; struct ublk_thread { /* Thread-local copy of queue-to-thread mapping for this thread */ unsigned char q_map[UBLK_MAX_QUEUES]; struct ublk_dev *dev; unsigned short idx; unsigned short nr_queues; #define UBLKS_T_STOPPING (1U << 0) #define UBLKS_T_IDLE (1U << 1) #define UBLKS_T_BATCH_IO (1U << 31) /* readonly */ unsigned state; unsigned int cmd_inflight; unsigned int io_inflight; unsigned short nr_bufs; /* followings are for BATCH_IO */ unsigned short commit_buf_start; unsigned char commit_buf_elem_size; /* * We just support single device, so pre-calculate commit/prep flags */ unsigned short cmd_flags; unsigned int nr_commit_buf; unsigned int commit_buf_size; void *commit_buf; #define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1) struct allocator commit_buf_alloc; struct batch_commit_buf *commit; /* FETCH_IO_CMDS buffer */ unsigned short nr_fetch_bufs; struct batch_fetch_buf *fetch; struct io_uring ring; }; struct ublk_dev { struct ublk_tgt tgt; struct ublksrv_ctrl_dev_info dev_info; struct ublk_queue q[UBLK_MAX_QUEUES]; unsigned nthreads; unsigned per_io_tasks; int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */ int nr_fds; int ctrl_fd; struct io_uring ring; void *private_data; }; extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); static inline int __ublk_use_batch_io(__u64 flags) { return flags & UBLK_F_BATCH_IO; } static inline int ublk_queue_batch_io(const struct ublk_queue *q) { return __ublk_use_batch_io(q->flags); } static inline int ublk_dev_batch_io(const struct ublk_dev *dev) { return __ublk_use_batch_io(dev->dev_info.flags); } /* only work for handle single device in this pthread context */ static inline int ublk_thread_batch_io(const struct ublk_thread *t) { return t->state & UBLKS_T_BATCH_IO; } static inline void ublk_set_integrity_params(const struct dev_ctx *ctx, struct ublk_params *params) { if (!ctx->metadata_size) return; params->types |= UBLK_PARAM_TYPE_INTEGRITY; params->integrity = (struct ublk_param_integrity) { .flags = ctx->integrity_flags, .interval_exp = params->basic.logical_bs_shift, .metadata_size = ctx->metadata_size, .pi_offset = ctx->pi_offset, .csum_type = ctx->csum_type, .tag_size = ctx->tag_size, }; } static inline size_t ublk_integrity_len(const struct ublk_queue *q, size_t len) { /* All targets currently use interval_exp = logical_bs_shift = 9 */ return (len >> 9) * q->metadata_size; } static inline size_t ublk_integrity_data_len(const struct ublk_queue *q, size_t integrity_len) { return (integrity_len / q->metadata_size) << 9; } static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) { return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); } static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag) { return UBLKSRV_IO_BUF_OFFSET + ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF); } static inline int is_target_io(__u64 user_data) { return (user_data & (1ULL << 63)) != 0; } static inline __u64 build_user_data(unsigned tag, unsigned op, unsigned tgt_data, unsigned q_id, unsigned is_target_io) { /* we only have 7 bits to encode q_id */ _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7, "UBLK_MAX_QUEUES_SHIFT must be <= 7"); ublk_assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7)); return tag | ((__u64)op << 16) | ((__u64)tgt_data << 24) | (__u64)q_id << 56 | (__u64)is_target_io << 63; } static inline unsigned int user_data_to_tag(__u64 user_data) { return user_data & 0xffff; } static inline unsigned int user_data_to_op(__u64 user_data) { return (user_data >> 16) & 0xff; } static inline unsigned int user_data_to_tgt_data(__u64 user_data) { return (user_data >> 24) & 0xffff; } static inline unsigned int user_data_to_q_id(__u64 user_data) { return (user_data >> 56) & 0x7f; } static inline unsigned short ublk_cmd_op_nr(unsigned int op) { return _IOC_NR(op); } static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) { return container_of(io, struct ublk_queue, ios[io->tag]); } static inline int ublk_io_alloc_sqes(struct ublk_thread *t, struct io_uring_sqe *sqes[], int nr_sqes) { struct io_uring *ring = &t->ring; unsigned left = io_uring_sq_space_left(ring); int i; if (left < nr_sqes) io_uring_submit(ring); for (i = 0; i < nr_sqes; i++) { sqes[i] = io_uring_get_sqe(ring); if (!sqes[i]) return i; } return nr_sqes; } static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) { if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { if (fd_index == 0) /* Return the raw ublk FD for index 0 */ return q->ublk_fd; /* Adjust index for backing files (index 1 becomes 0, etc.) */ return fd_index - 1; } return fd_index; } static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, struct ublk_queue *q, int tag, int q_id, __u64 index) { struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; int dev_fd = ublk_get_registered_fd(q, 0); io_uring_prep_read(sqe, dev_fd, 0, 0, 0); sqe->opcode = IORING_OP_URING_CMD; if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) sqe->flags &= ~IOSQE_FIXED_FILE; else sqe->flags |= IOSQE_FIXED_FILE; cmd->tag = tag; cmd->addr = index; cmd->q_id = q_id; } static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, struct ublk_queue *q, int tag, int q_id, __u64 index) { __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; } static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, struct ublk_queue *q, int tag, int q_id, __u64 index) { __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; } static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) { return (void *)&sqe->cmd; } static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res) { q->ios[tag].result = res; } static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) { return q->ios[tag].result; } static inline void ublk_mark_io_done(struct ublk_io *io, int res) { io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); io->result = res; } static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag) { return &q->io_cmd_buf[tag]; } static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) { __u32 *addr = (__u32 *)&sqe->off; addr[0] = cmd_op; addr[1] = 0; } static inline unsigned short ublk_batch_io_buf_idx( const struct ublk_thread *t, const struct ublk_queue *q, unsigned tag); static inline unsigned short ublk_io_buf_idx(const struct ublk_thread *t, const struct ublk_queue *q, unsigned tag) { if (ublk_queue_batch_io(q)) return ublk_batch_io_buf_idx(t, q, tag); return q->ios[tag].buf_index; } static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) { return &q->ios[tag]; } static inline int ublk_completed_tgt_io(struct ublk_thread *t, struct ublk_queue *q, unsigned tag) { struct ublk_io *io = ublk_get_io(q, tag); t->io_inflight--; return --io->tgt_ios == 0; } static inline bool ublk_queue_use_zc(const struct ublk_queue *q) { return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY); } static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q) { return !!(q->flags & UBLK_F_AUTO_BUF_REG); } static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q) { return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK); } static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q) { return !!(q->flags & UBLK_F_USER_COPY); } static inline int ublk_queue_no_buf(const struct ublk_queue *q) { return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); } static inline int ublk_batch_commit_prepared(struct batch_commit_buf *cb) { return cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX; } static inline unsigned ublk_queue_idx_in_thread(const struct ublk_thread *t, const struct ublk_queue *q) { unsigned char idx; idx = t->q_map[q->q_id]; ublk_assert(idx != 0); return idx - 1; } /* * Each IO's buffer index has to be calculated by this helper for * UBLKS_T_BATCH_IO */ static inline unsigned short ublk_batch_io_buf_idx( const struct ublk_thread *t, const struct ublk_queue *q, unsigned tag) { return ublk_queue_idx_in_thread(t, q) * q->q_depth + tag; } /* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */ int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q); /* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */ void ublk_batch_start_fetch(struct ublk_thread *t); /* Handle completion of batch I/O commands (prep/commit) */ void ublk_batch_compl_cmd(struct ublk_thread *t, const struct io_uring_cqe *cqe); /* Initialize batch I/O state and calculate buffer parameters */ void ublk_batch_prepare(struct ublk_thread *t); /* Allocate and register commit buffers for batch operations */ int ublk_batch_alloc_buf(struct ublk_thread *t); /* Free commit buffers and cleanup batch allocator */ void ublk_batch_free_buf(struct ublk_thread *t); /* Prepare a new commit buffer for batching completed I/O operations */ void ublk_batch_prep_commit(struct ublk_thread *t); /* Submit UBLK_U_IO_COMMIT_IO_CMDS with batched completed I/O operations */ void ublk_batch_commit_io_cmds(struct ublk_thread *t); /* Add a completed I/O operation to the current batch commit buffer */ void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q, unsigned tag, int res); void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES], int nthreads, int queues); static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, unsigned tag, int res) { if (ublk_queue_batch_io(q)) { ublk_batch_complete_io(t, q, tag, res); return 0; } else { struct ublk_io *io = &q->ios[tag]; ublk_mark_io_done(io, res); return ublk_queue_io_cmd(t, io); } } static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, unsigned tag, int queued) { if (queued < 0) ublk_complete_io(t, q, tag, queued); else { struct ublk_io *io = ublk_get_io(q, tag); t->io_inflight += queued; io->tgt_ios = queued; io->result = 0; } } extern const struct ublk_tgt_ops null_tgt_ops; extern const struct ublk_tgt_ops loop_tgt_ops; extern const struct ublk_tgt_ops stripe_tgt_ops; extern const struct ublk_tgt_ops fault_inject_tgt_ops; void backing_file_tgt_deinit(struct ublk_dev *dev); int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct); #endif