summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/eventfd.c16
-rw-r--r--io_uring/io_uring.c16
-rw-r--r--io_uring/io_uring.h7
-rw-r--r--io_uring/opdef.c3
-rw-r--r--io_uring/register.c52
-rw-r--r--io_uring/rsrc.c10
-rw-r--r--io_uring/sqpoll.c6
-rw-r--r--io_uring/timeout.c4
-rw-r--r--io_uring/uring_cmd.c23
-rw-r--r--io_uring/uring_cmd.h4
10 files changed, 75 insertions, 66 deletions
diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index fab936d31ba8..100d5da94cb9 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -33,20 +33,18 @@ static void io_eventfd_free(struct rcu_head *rcu)
kfree(ev_fd);
}
-static void io_eventfd_do_signal(struct rcu_head *rcu)
+static void io_eventfd_put(struct io_ev_fd *ev_fd)
{
- struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
-
- eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
-
if (refcount_dec_and_test(&ev_fd->refs))
- io_eventfd_free(rcu);
+ call_rcu(&ev_fd->rcu, io_eventfd_free);
}
-static void io_eventfd_put(struct io_ev_fd *ev_fd)
+static void io_eventfd_do_signal(struct rcu_head *rcu)
{
- if (refcount_dec_and_test(&ev_fd->refs))
- call_rcu(&ev_fd->rcu, io_eventfd_free);
+ struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+
+ eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+ io_eventfd_put(ev_fd);
}
static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index d3403c8216db..4758f1ba902b 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -320,7 +320,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_async_rw));
ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
- sizeof(struct uring_cache));
+ sizeof(struct io_uring_cmd_data));
spin_lock_init(&ctx->msg_lock);
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_kiocb));
@@ -1226,10 +1226,7 @@ static void io_req_normal_work_add(struct io_kiocb *req)
/* SQPOLL doesn't need the task_work added, it'll run it itself */
if (ctx->flags & IORING_SETUP_SQPOLL) {
- struct io_sq_data *sqd = ctx->sq_data;
-
- if (sqd->thread)
- __set_notify_signal(sqd->thread);
+ __set_notify_signal(tctx->task);
return;
}
@@ -2813,13 +2810,12 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
if (unlikely(!ctx->poll_activated))
io_activate_pollwq(ctx);
-
- poll_wait(file, &ctx->poll_wq, wait);
/*
- * synchronizes with barrier from wq_has_sleeper call in
- * io_commit_cqring
+ * provides mb() which pairs with barrier from wq_has_sleeper
+ * call in io_commit_cqring
*/
- smp_rmb();
+ poll_wait(file, &ctx->poll_wq, wait);
+
if (!io_sqring_full(ctx))
mask |= EPOLLOUT | EPOLLWRNORM;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 12abee607e4a..492cbbf2c23b 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -125,6 +125,9 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
#if defined(CONFIG_PROVE_LOCKING)
lockdep_assert(in_task());
+ if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
+ lockdep_assert_held(&ctx->uring_lock);
+
if (ctx->flags & IORING_SETUP_IOPOLL) {
lockdep_assert_held(&ctx->uring_lock);
} else if (!ctx->task_complete) {
@@ -136,9 +139,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
* Not from an SQE, as those cannot be submitted, but via
* updating tagged resources.
*/
- if (percpu_ref_is_dying(&ctx->refs))
- lockdep_assert(current_work());
- else
+ if (!percpu_ref_is_dying(&ctx->refs))
lockdep_assert(current == ctx->submitter_task);
}
#endif
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 3de75eca1c92..e8baef4e5146 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/io_uring.h>
+#include <linux/io_uring/cmd.h>
#include "io_uring.h"
#include "opdef.h"
@@ -414,7 +415,7 @@ const struct io_issue_def io_issue_defs[] = {
.plug = 1,
.iopoll = 1,
.iopoll_queue = 1,
- .async_size = 2 * sizeof(struct io_uring_sqe),
+ .async_size = sizeof(struct io_uring_cmd_data),
.prep = io_uring_cmd_prep,
.issue = io_uring_cmd,
},
diff --git a/io_uring/register.c b/io_uring/register.c
index fdd44914c39c..371aec87e078 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -405,8 +405,8 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL;
size_t size, sq_array_offset;
+ unsigned i, tail, old_head;
struct io_uring_params p;
- unsigned i, tail;
void *ptr;
int ret;
@@ -449,10 +449,18 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
if (IS_ERR(n.rings))
return PTR_ERR(n.rings);
- n.rings->sq_ring_mask = p.sq_entries - 1;
- n.rings->cq_ring_mask = p.cq_entries - 1;
- n.rings->sq_ring_entries = p.sq_entries;
- n.rings->cq_ring_entries = p.cq_entries;
+ /*
+ * At this point n.rings is shared with userspace, just like o.rings
+ * is as well. While we don't expect userspace to modify it while
+ * a resize is in progress, and it's most likely that userspace will
+ * shoot itself in the foot if it does, we can't always assume good
+ * intent... Use read/write once helpers from here on to indicate the
+ * shared nature of it.
+ */
+ WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1);
+ WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1);
+ WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries);
+ WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries);
if (copy_to_user(arg, &p, sizeof(p))) {
io_register_free_rings(&p, &n);
@@ -509,20 +517,22 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
* rings can't hold what is already there, then fail the operation.
*/
n.sq_sqes = ptr;
- tail = o.rings->sq.tail;
- if (tail - o.rings->sq.head > p.sq_entries)
+ tail = READ_ONCE(o.rings->sq.tail);
+ old_head = READ_ONCE(o.rings->sq.head);
+ if (tail - old_head > p.sq_entries)
goto overflow;
- for (i = o.rings->sq.head; i < tail; i++) {
+ for (i = old_head; i < tail; i++) {
unsigned src_head = i & (ctx->sq_entries - 1);
- unsigned dst_head = i & n.rings->sq_ring_mask;
+ unsigned dst_head = i & (p.sq_entries - 1);
n.sq_sqes[dst_head] = o.sq_sqes[src_head];
}
- n.rings->sq.head = o.rings->sq.head;
- n.rings->sq.tail = o.rings->sq.tail;
+ WRITE_ONCE(n.rings->sq.head, READ_ONCE(o.rings->sq.head));
+ WRITE_ONCE(n.rings->sq.tail, READ_ONCE(o.rings->sq.tail));
- tail = o.rings->cq.tail;
- if (tail - o.rings->cq.head > p.cq_entries) {
+ tail = READ_ONCE(o.rings->cq.tail);
+ old_head = READ_ONCE(o.rings->cq.head);
+ if (tail - old_head > p.cq_entries) {
overflow:
/* restore old rings, and return -EOVERFLOW via cleanup path */
ctx->rings = o.rings;
@@ -531,21 +541,21 @@ overflow:
ret = -EOVERFLOW;
goto out;
}
- for (i = o.rings->cq.head; i < tail; i++) {
+ for (i = old_head; i < tail; i++) {
unsigned src_head = i & (ctx->cq_entries - 1);
- unsigned dst_head = i & n.rings->cq_ring_mask;
+ unsigned dst_head = i & (p.cq_entries - 1);
n.rings->cqes[dst_head] = o.rings->cqes[src_head];
}
- n.rings->cq.head = o.rings->cq.head;
- n.rings->cq.tail = o.rings->cq.tail;
+ WRITE_ONCE(n.rings->cq.head, READ_ONCE(o.rings->cq.head));
+ WRITE_ONCE(n.rings->cq.tail, READ_ONCE(o.rings->cq.tail));
/* invalidate cached cqe refill */
ctx->cqe_cached = ctx->cqe_sentinel = NULL;
- n.rings->sq_dropped = o.rings->sq_dropped;
- n.rings->sq_flags = o.rings->sq_flags;
- n.rings->cq_flags = o.rings->cq_flags;
- n.rings->cq_overflow = o.rings->cq_overflow;
+ WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped));
+ WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags));
+ WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags));
+ WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow));
/* all done, store old pointers and assign new ones */
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 077f84684c18..69937d0c94f9 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -997,7 +997,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!dst_node) {
ret = -ENOMEM;
- goto out_put_free;
+ goto out_unlock;
}
refcount_inc(&src_node->buf->refs);
@@ -1033,14 +1033,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
mutex_lock(&src_ctx->uring_lock);
/* someone raced setting up buffers, dump ours */
ret = -EBUSY;
-out_put_free:
- i = data.nr;
- while (i--) {
- if (data.nodes[i]) {
- io_buffer_unmap(src_ctx, data.nodes[i]);
- kfree(data.nodes[i]);
- }
- }
out_unlock:
io_rsrc_data_free(ctx, &data);
mutex_unlock(&src_ctx->uring_lock);
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 9e5bd79fd2b5..8961a3c1e73c 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -268,8 +268,12 @@ static int io_sq_thread(void *data)
DEFINE_WAIT(wait);
/* offload context creation failed, just exit */
- if (!current->io_uring)
+ if (!current->io_uring) {
+ mutex_lock(&sqd->lock);
+ sqd->thread = NULL;
+ mutex_unlock(&sqd->lock);
goto err_out;
+ }
snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
set_task_comm(current, buf);
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 362689b17ccc..e9cec9e4dc2f 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -427,10 +427,12 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
timeout->off = 0; /* noseq */
data = req->async_data;
+ data->ts = *ts;
+
list_add_tail(&timeout->list, &ctx->timeout_list);
hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
data->timer.function = io_timeout_fn;
- hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
+ hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), mode);
return 0;
}
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index af842e9b4eb9..ce7726a04883 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -16,26 +16,35 @@
#include "rsrc.h"
#include "uring_cmd.h"
-static struct uring_cache *io_uring_async_get(struct io_kiocb *req)
+static struct io_uring_cmd_data *io_uring_async_get(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- struct uring_cache *cache;
+ struct io_uring_cmd_data *cache;
cache = io_alloc_cache_get(&ctx->uring_cache);
if (cache) {
+ cache->op_data = NULL;
req->flags |= REQ_F_ASYNC_DATA;
req->async_data = cache;
return cache;
}
- if (!io_alloc_async_data(req))
- return req->async_data;
+ if (!io_alloc_async_data(req)) {
+ cache = req->async_data;
+ cache->op_data = NULL;
+ return cache;
+ }
return NULL;
}
static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- struct uring_cache *cache = req->async_data;
+ struct io_uring_cmd_data *cache = req->async_data;
+
+ if (cache->op_data) {
+ kfree(cache->op_data);
+ cache->op_data = NULL;
+ }
if (issue_flags & IO_URING_F_UNLOCKED)
return;
@@ -183,7 +192,7 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- struct uring_cache *cache;
+ struct io_uring_cmd_data *cache;
cache = io_uring_async_get(req);
if (unlikely(!cache))
@@ -260,7 +269,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
ret = file->f_op->uring_cmd(ioucmd, issue_flags);
if (ret == -EAGAIN) {
- struct uring_cache *cache = req->async_data;
+ struct io_uring_cmd_data *cache = req->async_data;
if (ioucmd->sqe != (void *) cache)
memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx));
diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
index 7dba0f1efc58..f6837ee0955b 100644
--- a/io_uring/uring_cmd.h
+++ b/io_uring/uring_cmd.h
@@ -1,9 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-struct uring_cache {
- struct io_uring_sqe sqes[2];
-};
-
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);