summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/vmlinux.lds.h2
-rw-r--r--include/linux/bio-integrity.h1
-rw-r--r--include/linux/bio.h18
-rw-r--r--include/linux/blk-integrity.h32
-rw-r--r--include/linux/blk-mq-dma.h25
-rw-r--r--include/linux/blk-mq.h4
-rw-r--r--include/linux/blk_types.h19
-rw-r--r--include/linux/blkdev.h26
-rw-r--r--include/linux/hw_bitfield.h62
-rw-r--r--include/linux/io_uring/cmd.h69
-rw-r--r--include/linux/io_uring_types.h31
-rw-r--r--include/linux/libata.h2
-rw-r--r--include/linux/module.h18
-rw-r--r--include/linux/poison.h3
-rw-r--r--include/linux/uio.h2
-rw-r--r--include/scsi/libsas.h2
-rw-r--r--include/scsi/scsi_host.h2
-rw-r--r--include/scsi/scsicam.h7
-rw-r--r--include/trace/events/io_uring.h4
-rw-r--r--include/uapi/linux/io_uring.h38
-rw-r--r--include/uapi/linux/io_uring/query.h41
21 files changed, 331 insertions, 77 deletions
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 0e35c526b281..8a9a2e732a65 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -832,6 +832,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
/* Required sections not related to debugging. */
#define ELF_DETAILS \
+ .modinfo : { *(.modinfo) } \
.comment 0 : { *(.comment) } \
.symtab 0 : { *(.symtab) } \
.strtab 0 : { *(.strtab) } \
@@ -1045,7 +1046,6 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*(.discard.*) \
*(.export_symbol) \
*(.no_trim_symbol) \
- *(.modinfo) \
/* ld.bfd warns about .gnu.version* even when not emitted */ \
*(.gnu.version*) \
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 0a25716820fe..851254f36eb3 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -13,6 +13,7 @@ enum bip_flags {
BIP_CHECK_GUARD = 1 << 5, /* guard check */
BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
+ BIP_P2P_DMA = 1 << 8, /* using P2P address */
};
struct bio_integrity_payload {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 46ffac5caab7..a64a30131031 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -322,8 +322,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
void bio_trim(struct bio *bio, sector_t offset, sector_t size);
extern struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs);
-int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
- unsigned *segs, unsigned max_bytes);
+int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
+ unsigned *segs, unsigned max_bytes, unsigned len_align);
/**
* bio_next_split - get next @sectors from a bio, splitting if necessary
@@ -405,6 +405,11 @@ struct request_queue;
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, blk_opf_t opf);
+static inline void bio_init_inline(struct bio *bio, struct block_device *bdev,
+ unsigned short max_vecs, blk_opf_t opf)
+{
+ bio_init(bio, bdev, bio_inline_vecs(bio), max_vecs, opf);
+}
extern void bio_uninit(struct bio *);
void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
void bio_chain(struct bio *, struct bio *);
@@ -441,7 +446,14 @@ int submit_bio_wait(struct bio *bio);
int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
size_t len, enum req_op op);
-int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
+int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter,
+ unsigned len_align_mask);
+
+static inline int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+ return bio_iov_iter_get_pages_aligned(bio, iter, 0);
+}
+
void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
void __bio_release_pages(struct bio *bio, bool mark_dirty);
extern void bio_set_pages_dirty(struct bio *bio);
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index e67a2b6e8f11..b659373788f6 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -4,6 +4,7 @@
#include <linux/blk-mq.h>
#include <linux/bio-integrity.h>
+#include <linux/blk-mq-dma.h>
struct request;
@@ -26,11 +27,25 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
#ifdef CONFIG_BLK_DEV_INTEGRITY
int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
+
+static inline bool blk_rq_integrity_dma_unmap(struct request *req,
+ struct device *dma_dev, struct dma_iova_state *state,
+ size_t mapped_len)
+{
+ return blk_dma_unmap(req, dma_dev, state, mapped_len,
+ bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA);
+}
+
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes);
int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd,
struct logical_block_metadata_cap __user *argp);
+bool blk_rq_integrity_dma_map_iter_start(struct request *req,
+ struct device *dma_dev, struct dma_iova_state *state,
+ struct blk_dma_iter *iter);
+bool blk_rq_integrity_dma_map_iter_next(struct request *req,
+ struct device *dma_dev, struct blk_dma_iter *iter);
static inline bool
blk_integrity_queue_supports_integrity(struct request_queue *q)
@@ -109,12 +124,29 @@ static inline int blk_rq_map_integrity_sg(struct request *q,
{
return 0;
}
+static inline bool blk_rq_integrity_dma_unmap(struct request *req,
+ struct device *dma_dev, struct dma_iova_state *state,
+ size_t mapped_len)
+{
+ return false;
+}
static inline int blk_rq_integrity_map_user(struct request *rq,
void __user *ubuf,
ssize_t bytes)
{
return -EINVAL;
}
+static inline bool blk_rq_integrity_dma_map_iter_start(struct request *req,
+ struct device *dma_dev, struct dma_iova_state *state,
+ struct blk_dma_iter *iter)
+{
+ return false;
+}
+static inline bool blk_rq_integrity_dma_map_iter_next(struct request *req,
+ struct device *dma_dev, struct blk_dma_iter *iter)
+{
+ return false;
+}
static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
{
return NULL;
diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h
index c26a01aeae00..51829958d872 100644
--- a/include/linux/blk-mq-dma.h
+++ b/include/linux/blk-mq-dma.h
@@ -5,6 +5,13 @@
#include <linux/blk-mq.h>
#include <linux/pci-p2pdma.h>
+struct blk_map_iter {
+ struct bvec_iter iter;
+ struct bio *bio;
+ struct bio_vec *bvecs;
+ bool is_integrity;
+};
+
struct blk_dma_iter {
/* Output address range for this iteration */
dma_addr_t addr;
@@ -14,7 +21,7 @@ struct blk_dma_iter {
blk_status_t status;
/* Internal to blk_rq_dma_map_iter_* */
- struct req_iterator iter;
+ struct blk_map_iter iter;
struct pci_p2pdma_map_state p2pdma;
};
@@ -36,19 +43,20 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state)
}
/**
- * blk_rq_dma_unmap - try to DMA unmap a request
+ * blk_dma_unmap - try to DMA unmap a request
* @req: request to unmap
* @dma_dev: device to unmap from
* @state: DMA IOVA state
* @mapped_len: number of bytes to unmap
+ * @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR
*
* Returns %false if the callers need to manually unmap every DMA segment
* mapped using @iter or %true if no work is left to be done.
*/
-static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
- struct dma_iova_state *state, size_t mapped_len)
+static inline bool blk_dma_unmap(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, size_t mapped_len, bool is_p2p)
{
- if (req->cmd_flags & REQ_P2PDMA)
+ if (is_p2p)
return true;
if (dma_use_iova(state)) {
@@ -60,4 +68,11 @@ static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
return !dma_need_unmap(dma_dev);
}
+static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, size_t mapped_len)
+{
+ return blk_dma_unmap(req, dma_dev, state, mapped_len,
+ req->cmd_flags & REQ_P2PDMA);
+}
+
#endif /* BLK_MQ_DMA_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2a5a828f19a0..b25d12545f46 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -507,6 +507,8 @@ enum hctx_type {
* request_queue.tag_set_list.
* @srcu: Use as lock when type of the request queue is blocking
* (BLK_MQ_F_BLOCKING).
+ * @tags_srcu: SRCU used to defer freeing of tags page_list to prevent
+ * use-after-free when iterating tags.
* @update_nr_hwq_lock:
* Synchronize updating nr_hw_queues with add/del disk &
* switching elevator.
@@ -531,6 +533,7 @@ struct blk_mq_tag_set {
struct mutex tag_list_lock;
struct list_head tag_list;
struct srcu_struct *srcu;
+ struct srcu_struct tags_srcu;
struct rw_semaphore update_nr_hwq_lock;
};
@@ -767,6 +770,7 @@ struct blk_mq_tags {
* request pool
*/
spinlock_t lock;
+ struct rcu_head rcu_head;
};
static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 09b99d52fd36..8e8d1cc8b06c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -198,10 +198,6 @@ static inline bool blk_path_error(blk_status_t error)
return true;
}
-struct bio_issue {
- u64 value;
-};
-
typedef __u32 __bitwise blk_opf_t;
typedef unsigned int blk_qc_t;
@@ -242,7 +238,8 @@ struct bio {
* on release of the bio.
*/
struct blkcg_gq *bi_blkg;
- struct bio_issue bi_issue;
+ /* Time that this bio was issued. */
+ u64 issue_time_ns;
#ifdef CONFIG_BLK_CGROUP_IOCOST
u64 bi_iocost_cost;
#endif
@@ -269,18 +266,16 @@ struct bio {
struct bio_vec *bi_io_vec; /* the actual vec list */
struct bio_set *bi_pool;
-
- /*
- * We can inline a number of vecs at the end of the bio, to avoid
- * double allocations for a small number of bio_vecs. This member
- * MUST obviously be kept at the very end of the bio.
- */
- struct bio_vec bi_inline_vecs[];
};
#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
#define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT)
+static inline struct bio_vec *bio_inline_vecs(struct bio *bio)
+{
+ return (struct bio_vec *)(bio + 1);
+}
+
/*
* bio flags
*/
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fe1797bbec42..066e5309bd45 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -657,6 +657,7 @@ enum {
QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */
QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */
QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */
+ QUEUE_FLAG_BIO_ISSUE_TIME, /* record bio->issue_time_ns */
QUEUE_FLAG_MAX
};
@@ -999,6 +1000,8 @@ extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
void submit_bio_noacct(struct bio *bio);
struct bio *bio_split_to_limits(struct bio *bio);
+struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors,
+ struct bio_set *bs);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
@@ -1590,13 +1593,6 @@ static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
return queue_dma_alignment(bdev_get_queue(bdev));
}
-static inline bool bdev_iter_is_aligned(struct block_device *bdev,
- struct iov_iter *iter)
-{
- return iov_iter_is_aligned(iter, bdev_dma_alignment(bdev),
- bdev_logical_block_size(bdev) - 1);
-}
-
static inline unsigned int
blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
{
@@ -1660,7 +1656,7 @@ struct block_device_operations {
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
void (*unlock_native_capacity) (struct gendisk *);
- int (*getgeo)(struct block_device *, struct hd_geometry *);
+ int (*getgeo)(struct gendisk *, struct hd_geometry *);
int (*set_read_only)(struct block_device *bdev, bool ro);
void (*free_disk)(struct gendisk *disk);
/* this callback is with swap_lock and sometimes page table lock held */
@@ -1870,6 +1866,20 @@ bdev_atomic_write_unit_max_bytes(struct block_device *bdev)
return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev));
}
+static inline int bio_split_rw_at(struct bio *bio,
+ const struct queue_limits *lim,
+ unsigned *segs, unsigned max_bytes)
+{
+ return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment);
+}
+
+static inline int bio_iov_iter_get_bdev_pages(struct bio *bio,
+ struct iov_iter *iter, struct block_device *bdev)
+{
+ return bio_iov_iter_get_pages_aligned(bio, iter,
+ bdev_logical_block_size(bdev) - 1);
+}
+
#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
#endif /* _LINUX_BLKDEV_H */
diff --git a/include/linux/hw_bitfield.h b/include/linux/hw_bitfield.h
new file mode 100644
index 000000000000..df202e167ce4
--- /dev/null
+++ b/include/linux/hw_bitfield.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2025, Collabora Ltd.
+ */
+
+#ifndef _LINUX_HW_BITFIELD_H
+#define _LINUX_HW_BITFIELD_H
+
+#include <linux/bitfield.h>
+#include <linux/build_bug.h>
+#include <linux/limits.h>
+
+/**
+ * FIELD_PREP_WM16() - prepare a bitfield element with a mask in the upper half
+ * @_mask: shifted mask defining the field's length and position
+ * @_val: value to put in the field
+ *
+ * FIELD_PREP_WM16() masks and shifts up the value, as well as bitwise ORs the
+ * result with the mask shifted up by 16.
+ *
+ * This is useful for a common design of hardware registers where the upper
+ * 16-bit half of a 32-bit register is used as a write-enable mask. In such a
+ * register, a bit in the lower half is only updated if the corresponding bit
+ * in the upper half is high.
+ */
+#define FIELD_PREP_WM16(_mask, _val) \
+ ({ \
+ typeof(_val) __val = _val; \
+ typeof(_mask) __mask = _mask; \
+ __BF_FIELD_CHECK(__mask, ((u16)0U), __val, \
+ "HWORD_UPDATE: "); \
+ (((typeof(__mask))(__val) << __bf_shf(__mask)) & (__mask)) | \
+ ((__mask) << 16); \
+ })
+
+/**
+ * FIELD_PREP_WM16_CONST() - prepare a constant bitfield element with a mask in
+ * the upper half
+ * @_mask: shifted mask defining the field's length and position
+ * @_val: value to put in the field
+ *
+ * FIELD_PREP_WM16_CONST() masks and shifts up the value, as well as bitwise ORs
+ * the result with the mask shifted up by 16.
+ *
+ * This is useful for a common design of hardware registers where the upper
+ * 16-bit half of a 32-bit register is used as a write-enable mask. In such a
+ * register, a bit in the lower half is only updated if the corresponding bit
+ * in the upper half is high.
+ *
+ * Unlike FIELD_PREP_WM16(), this is a constant expression and can therefore
+ * be used in initializers. Error checking is less comfortable for this
+ * version.
+ */
+#define FIELD_PREP_WM16_CONST(_mask, _val) \
+ ( \
+ FIELD_PREP_CONST(_mask, _val) | \
+ (BUILD_BUG_ON_ZERO(const_true((u64)(_mask) > U16_MAX)) + \
+ ((_mask) << 16)) \
+ )
+
+
+#endif /* _LINUX_HW_BITFIELD_H */
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index cfa6d0c0c322..7509025b4071 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -11,11 +11,14 @@
/* io_uring_cmd is being issued again */
#define IORING_URING_CMD_REISSUE (1U << 31)
+typedef void (*io_uring_cmd_tw_t)(struct io_uring_cmd *cmd,
+ unsigned issue_flags);
+
struct io_uring_cmd {
struct file *file;
const struct io_uring_sqe *sqe;
/* callback to defer completions to task context */
- void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
+ io_uring_cmd_tw_t task_work_cb;
u32 cmd_op;
u32 flags;
u8 pdu[32]; /* available inline for free use */
@@ -53,11 +56,11 @@ int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
* Note: the caller should never hard code @issue_flags and is only allowed
* to pass the mask provided by the core io_uring code.
*/
-void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, u64 res2,
- unsigned issue_flags);
+void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2,
+ unsigned issue_flags, bool is_cqe32);
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+ io_uring_cmd_tw_t task_work_cb,
unsigned flags);
/*
@@ -70,6 +73,21 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
/* Execute the request from a blocking context */
void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
+/*
+ * Select a buffer from the provided buffer group for multishot uring_cmd.
+ * Returns the selected buffer address and size.
+ */
+struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd,
+ unsigned buf_group, size_t *len,
+ unsigned int issue_flags);
+
+/*
+ * Complete a multishot uring_cmd event. This will post a CQE to the completion
+ * queue and update the provided buffer.
+ */
+bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
+ struct io_br_sel *sel, unsigned int issue_flags);
+
#else
static inline int
io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
@@ -86,13 +104,12 @@ static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
{
return -EOPNOTSUPP;
}
-static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
- u64 ret2, unsigned issue_flags)
+static inline void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret,
+ u64 ret2, unsigned issue_flags, bool is_cqe32)
{
}
static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned),
- unsigned flags)
+ io_uring_cmd_tw_t task_work_cb, unsigned flags)
{
}
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
@@ -102,28 +119,28 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
{
}
-#endif
-
-/*
- * Polled completions must ensure they are coming from a poll queue, and
- * hence are completed inside the usual poll handling loops.
- */
-static inline void io_uring_cmd_iopoll_done(struct io_uring_cmd *ioucmd,
- ssize_t ret, ssize_t res2)
+static inline struct io_br_sel
+io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, unsigned buf_group,
+ size_t *len, unsigned int issue_flags)
+{
+ return (struct io_br_sel) { .val = -EOPNOTSUPP };
+}
+static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
+ struct io_br_sel *sel, unsigned int issue_flags)
{
- lockdep_assert(in_task());
- io_uring_cmd_done(ioucmd, ret, res2, 0);
+ return true;
}
+#endif
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+ io_uring_cmd_tw_t task_work_cb)
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+ io_uring_cmd_tw_t task_work_cb)
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
}
@@ -142,6 +159,18 @@ static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd)
return cmd_to_io_kiocb(cmd)->ctx;
}
+static inline void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret,
+ unsigned issue_flags)
+{
+ return __io_uring_cmd_done(ioucmd, ret, 0, issue_flags, false);
+}
+
+static inline void io_uring_cmd_done32(struct io_uring_cmd *ioucmd, s32 ret,
+ u64 res2, unsigned issue_flags)
+{
+ return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, true);
+}
+
int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
void (*release)(void *), unsigned int index,
unsigned int issue_flags);
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 12f5ee43850e..c2ea6280901d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -86,6 +86,25 @@ struct io_mapped_region {
};
/*
+ * Return value from io_buffer_list selection, to avoid stashing it in
+ * struct io_kiocb. For legacy/classic provided buffers, keeping a reference
+ * across execution contexts are fine. But for ring provided buffers, the
+ * list may go away as soon as ->uring_lock is dropped. As the io_kiocb
+ * persists, it's better to just keep the buffer local for those cases.
+ */
+struct io_br_sel {
+ struct io_buffer_list *buf_list;
+ /*
+ * Some selection parts return the user address, others return an error.
+ */
+ union {
+ void __user *addr;
+ ssize_t val;
+ };
+};
+
+
+/*
* Arbitrary limit, can be raised if need be
*/
#define IO_RINGFD_REG_MAX 16
@@ -671,12 +690,6 @@ struct io_kiocb {
/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
struct io_buffer *kbuf;
- /*
- * stores buffer ID for ring provided buffers, valid IFF
- * REQ_F_BUFFER_RING is set.
- */
- struct io_buffer_list *buf_list;
-
struct io_rsrc_node *buf_node;
};
@@ -724,10 +737,4 @@ struct io_overflow_cqe {
struct list_head list;
struct io_uring_cqe cqe;
};
-
-static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx)
-{
- return ctx->flags & IORING_SETUP_CQE32;
-}
-
#endif
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0620dd67369f..21de0935775d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1203,7 +1203,7 @@ extern void ata_qc_complete(struct ata_queued_cmd *qc);
extern u64 ata_qc_get_active(struct ata_port *ap);
extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
extern int ata_std_bios_param(struct scsi_device *sdev,
- struct block_device *bdev,
+ struct gendisk *unused,
sector_t capacity, int geom[]);
extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev);
extern int ata_scsi_sdev_init(struct scsi_device *sdev);
diff --git a/include/linux/module.h b/include/linux/module.h
index 3319a5269d28..e135cc79acee 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -244,14 +244,22 @@ struct module_kobject *lookup_or_create_module_kobject(const char *name);
/* What your module does. */
#define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description)
-#ifdef MODULE
+/*
+ * Format: __mod_device_table__kmod_<modname>__<type>__<name>
+ * Parts of the string `__kmod_` and `__` are used as delimiters when parsing
+ * a symbol in file2alias.c
+ */
+#define __mod_device_table(type, name) \
+ __PASTE(__mod_device_table__, \
+ __PASTE(__KBUILD_MODNAME, \
+ __PASTE(__, \
+ __PASTE(type, \
+ __PASTE(__, name)))))
+
/* Creates an alias so file2alias.c can find device table. */
#define MODULE_DEVICE_TABLE(type, name) \
-static typeof(name) __mod_device_table__##type##__##name \
+static typeof(name) __mod_device_table(type, name) \
__attribute__ ((used, alias(__stringify(name))))
-#else /* !MODULE */
-#define MODULE_DEVICE_TABLE(type, name)
-#endif
/* Version of form [<epoch>:]<version>[-<extra-version>].
* Or for CVS/RCS ID version, everything but the number is stripped.
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 8ca2235f78d5..299e2dd7da6d 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -90,4 +90,7 @@
/********** lib/stackdepot.c **********/
#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA))
+/********** io_uring/ **********/
+#define IO_URING_PTR_POISON ((void *)(0x1091UL + POISON_POINTER_DELTA))
+
#endif
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 2e86c653186c..5b127043a151 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -286,8 +286,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
#endif
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
-bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
- unsigned len_mask);
unsigned long iov_iter_alignment(const struct iov_iter *i);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index ba460b6c0374..9c6e90829dbd 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -685,7 +685,7 @@ extern int sas_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
extern int sas_target_alloc(struct scsi_target *);
int sas_sdev_configure(struct scsi_device *dev, struct queue_limits *lim);
extern int sas_change_queue_depth(struct scsi_device *, int new_depth);
-extern int sas_bios_param(struct scsi_device *, struct block_device *,
+extern int sas_bios_param(struct scsi_device *, struct gendisk *,
sector_t capacity, int *hsc);
int sas_execute_internal_abort_single(struct domain_device *device,
u16 tag, unsigned int qid,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index c53812b9026f..f5a243261236 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -318,7 +318,7 @@ struct scsi_host_template {
*
* Status: OPTIONAL
*/
- int (* bios_param)(struct scsi_device *, struct block_device *,
+ int (* bios_param)(struct scsi_device *, struct gendisk *,
sector_t, int []);
/*
diff --git a/include/scsi/scsicam.h b/include/scsi/scsicam.h
index 08edd603e521..1131f51ed2c8 100644
--- a/include/scsi/scsicam.h
+++ b/include/scsi/scsicam.h
@@ -13,7 +13,8 @@
#ifndef SCSICAM_H
#define SCSICAM_H
-int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip);
-bool scsi_partsize(struct block_device *bdev, sector_t capacity, int geom[3]);
-unsigned char *scsi_bios_ptable(struct block_device *bdev);
+struct gendisk;
+int scsicam_bios_param(struct gendisk *disk, sector_t capacity, int *ip);
+bool scsi_partsize(struct gendisk *disk, sector_t capacity, int geom[3]);
+unsigned char *scsi_bios_ptable(struct gendisk *disk);
#endif /* def SCSICAM_H */
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 178ab6f611be..45d15460b495 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -340,8 +340,8 @@ TP_PROTO(struct io_ring_ctx *ctx, void *req, struct io_uring_cqe *cqe),
__entry->user_data = cqe->user_data;
__entry->res = cqe->res;
__entry->cflags = cqe->flags;
- __entry->extra1 = io_ctx_cqe32(ctx) ? cqe->big_cqe[0] : 0;
- __entry->extra2 = io_ctx_cqe32(ctx) ? cqe->big_cqe[1] : 0;
+ __entry->extra1 = ctx->flags & IORING_SETUP_CQE32 || cqe->flags & IORING_CQE_F_32 ? cqe->big_cqe[0] : 0;
+ __entry->extra2 = ctx->flags & IORING_SETUP_CQE32 || cqe->flags & IORING_CQE_F_32 ? cqe->big_cqe[1] : 0;
),
TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x "
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6957dc539d83..a0cc1cc0dd01 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -225,6 +225,12 @@ enum io_uring_sqe_flags_bit {
/* Use hybrid poll in iopoll process */
#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
+/*
+ * Allow both 16b and 32b CQEs. If a 32b CQE is posted, it will have
+ * IORING_CQE_F_32 set in cqe->flags.
+ */
+#define IORING_SETUP_CQE_MIXED (1U << 18)
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
@@ -298,9 +304,13 @@ enum io_uring_op {
* sqe->uring_cmd_flags top 8bits aren't available for userspace
* IORING_URING_CMD_FIXED use registered buffer; pass this flag
* along with setting sqe->buf_index.
+ * IORING_URING_CMD_MULTISHOT must be used with buffer select, like other
+ * multishot commands. Not compatible with
+ * IORING_URING_CMD_FIXED, for now.
*/
#define IORING_URING_CMD_FIXED (1U << 0)
-#define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED
+#define IORING_URING_CMD_MULTISHOT (1U << 1)
+#define IORING_URING_CMD_MASK (IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT)
/*
@@ -454,6 +464,7 @@ enum io_uring_msg_ring_flags {
#define IORING_NOP_FIXED_FILE (1U << 2)
#define IORING_NOP_FIXED_BUFFER (1U << 3)
#define IORING_NOP_TW (1U << 4)
+#define IORING_NOP_CQE32 (1U << 5)
/*
* IO completion data structure (Completion Queue Entry)
@@ -487,12 +498,22 @@ struct io_uring_cqe {
* other provided buffer type, all completions with a
* buffer passed back is automatically returned to the
* application.
+ * IORING_CQE_F_SKIP If set, then the application/liburing must ignore this
+ * CQE. It's only purpose is to fill a gap in the ring,
+ * if a large CQE is attempted posted when the ring has
+ * just a single small CQE worth of space left before
+ * wrapping.
+ * IORING_CQE_F_32 If set, this is a 32b/big-cqe posting. Use with rings
+ * setup in a mixed CQE mode, where both 16b and 32b
+ * CQEs may be posted to the CQ ring.
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
#define IORING_CQE_F_NOTIF (1U << 3)
#define IORING_CQE_F_BUF_MORE (1U << 4)
+#define IORING_CQE_F_SKIP (1U << 5)
+#define IORING_CQE_F_32 (1U << 15)
#define IORING_CQE_BUFFER_SHIFT 16
@@ -665,6 +686,12 @@ enum io_uring_register_op {
IORING_REGISTER_MEM_REGION = 34,
+ /* query various aspects of io_uring, see linux/io_uring/query.h */
+ IORING_REGISTER_QUERY = 35,
+
+ /* return zcrx buffers back into circulation */
+ IORING_REGISTER_ZCRX_REFILL = 36,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -1046,6 +1073,15 @@ struct io_uring_zcrx_ifq_reg {
__u64 __resv[3];
};
+struct io_uring_zcrx_sync_refill {
+ __u32 zcrx_id;
+ /* the number of entries to return */
+ __u32 nr_entries;
+ /* pointer to an array of struct io_uring_zcrx_rqe */
+ __u64 rqes;
+ __u64 __resv[2];
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h
new file mode 100644
index 000000000000..5d754322a27c
--- /dev/null
+++ b/include/uapi/linux/io_uring/query.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Header file for the io_uring query interface.
+ */
+#ifndef LINUX_IO_URING_QUERY_H
+#define LINUX_IO_URING_QUERY_H
+
+#include <linux/types.h>
+
+struct io_uring_query_hdr {
+ __u64 next_entry;
+ __u64 query_data;
+ __u32 query_op;
+ __u32 size;
+ __s32 result;
+ __u32 __resv[3];
+};
+
+enum {
+ IO_URING_QUERY_OPCODES = 0,
+
+ __IO_URING_QUERY_MAX,
+};
+
+/* Doesn't require a ring */
+struct io_uring_query_opcode {
+ /* The number of supported IORING_OP_* opcodes */
+ __u32 nr_request_opcodes;
+ /* The number of supported IORING_[UN]REGISTER_* opcodes */
+ __u32 nr_register_opcodes;
+ /* Bitmask of all supported IORING_FEAT_* flags */
+ __u64 feature_flags;
+ /* Bitmask of all supported IORING_SETUP_* flags */
+ __u64 ring_setup_flags;
+ /* Bitmask of all supported IORING_ENTER_** flags */
+ __u64 enter_flags;
+ /* Bitmask of all supported IOSQE_* flags */
+ __u64 sqe_flags;
+};
+
+#endif