From 8165b566049b14152873011ea540eb22eae5111d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Jun 2023 10:33:11 -0600 Subject: io_uring/cancel: add IORING_ASYNC_CANCEL_USERDATA Add a flag to explicitly match on user_data in the request for cancelation purposes. This is the default behavior if none of the other match flags are set, but if we ALSO want to match on user_data, then this flag can be set. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 08720c7bd92f..97246ec386d4 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -299,11 +299,13 @@ enum io_uring_op { * request 'user_data' * IORING_ASYNC_CANCEL_ANY Match any request * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor + * IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) #define IORING_ASYNC_CANCEL_FD (1U << 1) #define IORING_ASYNC_CANCEL_ANY (1U << 2) #define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3) +#define IORING_ASYNC_CANCEL_USERDATA (1U << 4) /* * send/sendmsg and recv/recvmsg flags (sqe->ioprio) -- cgit v1.2.3 From d7b8b079a8f6bc007d06d9ee468659dae6053e13 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Jun 2023 10:36:43 -0600 Subject: io_uring/cancel: support opcode based lookup and cancelation Add IORING_ASYNC_CANCEL_OP flag for cancelation, which allows the application to target cancelation based on the opcode of the original request. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ io_uring/cancel.c | 17 ++++++++++++++--- io_uring/cancel.h | 2 +- io_uring/poll.c | 3 ++- 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 97246ec386d4..b64ddd41468e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -300,12 +300,14 @@ enum io_uring_op { * IORING_ASYNC_CANCEL_ANY Match any request * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor * IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key + * IORING_ASYNC_CANCEL_OP Match request based on opcode */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) #define IORING_ASYNC_CANCEL_FD (1U << 1) #define IORING_ASYNC_CANCEL_ANY (1U << 2) #define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3) #define IORING_ASYNC_CANCEL_USERDATA (1U << 4) +#define IORING_ASYNC_CANCEL_OP (1U << 5) /* * send/sendmsg and recv/recvmsg flags (sqe->ioprio) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 20612e93a354..d91116b032eb 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -22,11 +22,12 @@ struct io_cancel { u64 addr; u32 flags; s32 fd; + u8 opcode; }; #define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \ - IORING_ASYNC_CANCEL_USERDATA) + IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP) /* * Returns true if the request matches the criteria outlined by 'cd'. @@ -38,7 +39,7 @@ bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd) if (req->ctx != cd->ctx) return false; - if (!(cd->flags & (IORING_ASYNC_CANCEL_FD))) + if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP))) match_user_data = true; if (cd->flags & IORING_ASYNC_CANCEL_ANY) @@ -47,6 +48,10 @@ bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd) if (req->file != cd->file) return false; } + if (cd->flags & IORING_ASYNC_CANCEL_OP) { + if (req->opcode != cd->opcode) + return false; + } if (match_user_data && req->cqe.user_data != cd->data) return false; if (cd->flags & IORING_ASYNC_CANCEL_ALL) { @@ -127,7 +132,7 @@ int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) return -EINVAL; - if (sqe->off || sqe->len || sqe->splice_fd_in) + if (sqe->off || sqe->splice_fd_in) return -EINVAL; cancel->addr = READ_ONCE(sqe->addr); @@ -139,6 +144,11 @@ int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; cancel->fd = READ_ONCE(sqe->fd); } + if (cancel->flags & IORING_ASYNC_CANCEL_OP) { + if (cancel->flags & IORING_ASYNC_CANCEL_ANY) + return -EINVAL; + cancel->opcode = READ_ONCE(sqe->len); + } return 0; } @@ -185,6 +195,7 @@ int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) .ctx = req->ctx, .data = cancel->addr, .flags = cancel->flags, + .opcode = cancel->opcode, .seq = atomic_inc_return(&req->ctx->cancel_seq), }; struct io_uring_task *tctx = req->task->io_uring; diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 496ce4dac78e..fc98622e6166 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -8,11 +8,11 @@ struct io_cancel_data { u64 data; struct file *file; }; + u8 opcode; u32 flags; int seq; }; - int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags); diff --git a/io_uring/poll.c b/io_uring/poll.c index dc1219f606e5..65ec363f6377 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -851,7 +851,8 @@ static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, struct io_hash_bucket *bucket; struct io_kiocb *req; - if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) + if (cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP | + IORING_ASYNC_CANCEL_ANY)) req = io_poll_file_find(ctx, cd, table, &bucket); else req = io_poll_find(ctx, false, cd, table, &bucket); -- cgit v1.2.3 From f77569d22ad91dc25de294864fa5b24d37ddc149 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 22 Jun 2023 13:03:52 -0600 Subject: io_uring/cancel: wire up IORING_ASYNC_CANCEL_OP for sync cancel Allow usage of IORING_ASYNC_CANCEL_OP through the sync cancelation API as well. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 +++- io_uring/cancel.c | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b64ddd41468e..36f9c73082de 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -701,7 +701,9 @@ struct io_uring_sync_cancel_reg { __s32 fd; __u32 flags; struct __kernel_timespec timeout; - __u64 pad[4]; + __u8 opcode; + __u8 pad[7]; + __u64 pad2[3]; }; /* diff --git a/io_uring/cancel.c b/io_uring/cancel.c index d91116b032eb..7b23607cf4af 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -265,17 +265,22 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) struct io_uring_sync_cancel_reg sc; struct fd f = { }; DEFINE_WAIT(wait); - int ret; + int ret, i; if (copy_from_user(&sc, arg, sizeof(sc))) return -EFAULT; if (sc.flags & ~CANCEL_FLAGS) return -EINVAL; - if (sc.pad[0] || sc.pad[1] || sc.pad[2] || sc.pad[3]) - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(sc.pad); i++) + if (sc.pad[i]) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(sc.pad2); i++) + if (sc.pad2[i]) + return -EINVAL; cd.data = sc.addr; cd.flags = sc.flags; + cd.opcode = sc.opcode; /* we can grab a normal file descriptor upfront */ if ((cd.flags & IORING_ASYNC_CANCEL_FD) && -- cgit v1.2.3 From 8e9fad0e70b7b62848e0aeb1a873903b9ce4d7c4 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 27 Jun 2023 06:44:24 -0700 Subject: io_uring: Add io_uring command support for sockets Enable io_uring commands on network sockets. Create two new SOCKET_URING_OP commands that will operate on sockets. In order to call ioctl on sockets, use the file_operations->io_uring_cmd callbacks, and map it to a uring socket function, which handles the SOCKET_URING_OP accordingly, and calls socket ioctls. This patches was tested by creating a new test case in liburing. Link: https://github.com/leitao/liburing/tree/io_uring_cmd Signed-off-by: Breno Leitao Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230627134424.2784797-1-leitao@debian.org Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 6 ++++++ include/uapi/linux/io_uring.h | 8 ++++++++ io_uring/uring_cmd.c | 28 ++++++++++++++++++++++++++++ net/socket.c | 2 ++ 4 files changed, 44 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index bb9c666bd584..106cdc55ff3b 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -81,6 +81,7 @@ static inline void io_uring_free(struct task_struct *tsk) if (tsk->io_uring) __io_uring_free(tsk); } +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) @@ -116,6 +117,11 @@ static inline const char *io_uring_get_opcode(u8 opcode) { return ""; } +static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + return -EOPNOTSUPP; +} #endif #endif diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 36f9c73082de..9fc7195f25df 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -723,6 +723,14 @@ struct io_uring_recvmsg_out { __u32 flags; }; +/* + * Argument for IORING_OP_URING_CMD when file is a socket + */ +enum { + SOCKET_URING_OP_SIOCINQ = 0, + SOCKET_URING_OP_SIOCOUTQ, +}; + #ifdef __cplusplus } #endif diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 476c7877ce58..8e7a03c1b20e 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -7,6 +7,7 @@ #include #include +#include #include "io_uring.h" #include "rsrc.h" @@ -164,3 +165,30 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, return io_import_fixed(rw, iter, req->imu, ubuf, len); } EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed); + +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct socket *sock = cmd->file->private_data; + struct sock *sk = sock->sk; + struct proto *prot = READ_ONCE(sk->sk_prot); + int ret, arg = 0; + + if (!prot || !prot->ioctl) + return -EOPNOTSUPP; + + switch (cmd->sqe->cmd_op) { + case SOCKET_URING_OP_SIOCINQ: + ret = prot->ioctl(sk, SIOCINQ, &arg); + if (ret) + return ret; + return arg; + case SOCKET_URING_OP_SIOCOUTQ: + ret = prot->ioctl(sk, SIOCOUTQ, &arg); + if (ret) + return ret; + return arg; + default: + return -EOPNOTSUPP; + } +} +EXPORT_SYMBOL_GPL(io_uring_cmd_sock); diff --git a/net/socket.c b/net/socket.c index 2b0e54b2405c..1dc23f5298ba 100644 --- a/net/socket.c +++ b/net/socket.c @@ -88,6 +88,7 @@ #include #include #include +#include #include #include @@ -159,6 +160,7 @@ static const struct file_operations socket_file_ops = { #ifdef CONFIG_COMPAT .compat_ioctl = compat_sock_ioctl, #endif + .uring_cmd = io_uring_cmd_sock, .mmap = sock_mmap, .release = sock_close, .fasync = sock_fasync, -- cgit v1.2.3 From 2af89abda7d9c2aeb573677e2c498ddb09f8058a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 24 Aug 2023 23:53:32 +0100 Subject: io_uring: add option to remove SQ indirection Not many aware, but io_uring submission queue has two levels. The first level usually appears as sq_array and stores indexes into the actual SQ. To my knowledge, no one has ever seriously used it, nor liburing exposes it to users. Add IORING_SETUP_NO_SQARRAY, when set we don't bother creating and using the sq_array and SQ heads/tails will be pointing directly into the SQ. Improves memory footprint, in term of both allocations as well as cache usage, and also should make io_get_sqe() less branchy in the end. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0ffa3268a5ef61d326201ff43a233315c96312e0.1692916914.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++++ io_uring/io_uring.c | 52 ++++++++++++++++++++++++++----------------- 2 files changed, 37 insertions(+), 20 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 9fc7195f25df..8e61f8b7c2ce 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -185,6 +185,11 @@ enum { */ #define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15) +/* + * Removes indirection through the SQ index array. + */ +#define IORING_SETUP_NO_SQARRAY (1U << 16) + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index e8321903e3f3..a6eea3938802 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2339,8 +2339,21 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) */ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe) { - unsigned head, mask = ctx->sq_entries - 1; - unsigned sq_idx = ctx->cached_sq_head++ & mask; + unsigned mask = ctx->sq_entries - 1; + unsigned head = ctx->cached_sq_head++ & mask; + + if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) { + head = READ_ONCE(ctx->sq_array[head]); + if (unlikely(head >= ctx->sq_entries)) { + /* drop invalid entries */ + spin_lock(&ctx->completion_lock); + ctx->cq_extra--; + spin_unlock(&ctx->completion_lock); + WRITE_ONCE(ctx->rings->sq_dropped, + READ_ONCE(ctx->rings->sq_dropped) + 1); + return false; + } + } /* * The cached sq head (or cq tail) serves two purposes: @@ -2350,22 +2363,12 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe) * 2) allows the kernel side to track the head on its own, even * though the application is the one updating it. */ - head = READ_ONCE(ctx->sq_array[sq_idx]); - if (likely(head < ctx->sq_entries)) { - /* double index for 128-byte SQEs, twice as long */ - if (ctx->flags & IORING_SETUP_SQE128) - head <<= 1; - *sqe = &ctx->sq_sqes[head]; - return true; - } - /* drop invalid entries */ - spin_lock(&ctx->completion_lock); - ctx->cq_extra--; - spin_unlock(&ctx->completion_lock); - WRITE_ONCE(ctx->rings->sq_dropped, - READ_ONCE(ctx->rings->sq_dropped) + 1); - return false; + /* double index for 128-byte SQEs, twice as long */ + if (ctx->flags & IORING_SETUP_SQE128) + head <<= 1; + *sqe = &ctx->sq_sqes[head]; + return true; } int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) @@ -2734,6 +2737,12 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries return SIZE_MAX; #endif + if (ctx->flags & IORING_SETUP_NO_SQARRAY) { + if (sq_offset) + *sq_offset = SIZE_MAX; + return off; + } + if (sq_offset) *sq_offset = off; @@ -3710,7 +3719,8 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, return PTR_ERR(rings); ctx->rings = rings; - ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); + if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) + ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); rings->sq_ring_mask = p->sq_entries - 1; rings->cq_ring_mask = p->cq_entries - 1; rings->sq_ring_entries = p->sq_entries; @@ -3921,7 +3931,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); p->sq_off.flags = offsetof(struct io_rings, sq_flags); p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); - p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; + if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) + p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; p->sq_off.resv1 = 0; if (!(ctx->flags & IORING_SETUP_NO_MMAP)) p->sq_off.user_addr = 0; @@ -4010,7 +4021,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG | IORING_SETUP_SQE128 | IORING_SETUP_CQE32 | IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN | - IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY)) + IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY | + IORING_SETUP_NO_SQARRAY)) return -EINVAL; return io_uring_create(entries, &p, params); -- cgit v1.2.3