diff options
| author | Yu Kuai <yukuai@fnnas.com> | 2026-02-14 13:43:50 +0800 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2026-02-16 10:47:25 -0700 |
| commit | dfe48ea179733be948c432f6af2fc3913cf5dd28 (patch) | |
| tree | 5b931d881352843e79ab7d7509f4750dfe6d0284 /kernel | |
| parent | 3678a334a55e869b413e2fb4824e92200b149d73 (diff) | |
blk-mq: use NOIO context to prevent deadlock during debugfs creation
Creating debugfs entries can trigger fs reclaim, which can enter back
into the block layer request_queue. This can cause deadlock if the
queue is frozen.
Previously, a WARN_ON_ONCE check was used in debugfs_create_files()
to detect this condition, but it was racy since the queue can be frozen
from another context at any time.
Introduce blk_debugfs_lock()/blk_debugfs_unlock() helpers that combine
the debugfs_mutex with memalloc_noio_save()/restore() to prevent fs
reclaim from triggering block I/O. Also add blk_debugfs_lock_nomemsave()
and blk_debugfs_unlock_nomemrestore() variants for callers that don't
need NOIO protection (e.g., debugfs removal or read-only operations).
Replace all raw debugfs_mutex lock/unlock pairs with these helpers,
using the _nomemsave/_nomemrestore variants where appropriate.
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/all/CAHj4cs9gNKEYAPagD9JADfO5UH+OiCr4P7OO2wjpfOYeM-RV=A@mail.gmail.com/
Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Closes: https://lore.kernel.org/all/aYWQR7CtYdk3K39g@shinmob/
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/trace/blktrace.c | 38 |
1 files changed, 21 insertions, 17 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c4db5c2e7103..a3d8a68f8683 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -559,9 +559,9 @@ int blk_trace_remove(struct request_queue *q) { int ret; - mutex_lock(&q->debugfs_mutex); + blk_debugfs_lock_nomemsave(q); ret = __blk_trace_remove(q); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock_nomemrestore(q); return ret; } @@ -767,6 +767,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct blk_user_trace_setup2 buts2; struct blk_user_trace_setup buts; struct blk_trace *bt; + unsigned int memflags; int ret; ret = copy_from_user(&buts, arg, sizeof(buts)); @@ -785,16 +786,16 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, .pid = buts.pid, }; - mutex_lock(&q->debugfs_mutex); + memflags = blk_debugfs_lock(q); bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr, bdev); if (IS_ERR(bt)) { - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); return PTR_ERR(bt); } blk_trace_setup_finalize(q, name, 1, bt, &buts2); strscpy(buts.name, buts2.name, BLKTRACE_BDEV_SIZE); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); if (copy_to_user(arg, &buts, sizeof(buts))) { blk_trace_remove(q); @@ -809,6 +810,7 @@ static int blk_trace_setup2(struct request_queue *q, char *name, dev_t dev, { struct blk_user_trace_setup2 buts2; struct blk_trace *bt; + unsigned int memflags; if (copy_from_user(&buts2, arg, sizeof(buts2))) return -EFAULT; @@ -819,15 +821,15 @@ static int blk_trace_setup2(struct request_queue *q, char *name, dev_t dev, if (buts2.flags != 0) return -EINVAL; - mutex_lock(&q->debugfs_mutex); + memflags = blk_debugfs_lock(q); bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr, bdev); if (IS_ERR(bt)) { - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); return PTR_ERR(bt); } blk_trace_setup_finalize(q, name, 2, bt, &buts2); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); if (copy_to_user(arg, &buts2, sizeof(buts2))) { blk_trace_remove(q); @@ -844,6 +846,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, struct blk_user_trace_setup2 buts2; struct compat_blk_user_trace_setup cbuts; struct blk_trace *bt; + unsigned int memflags; if (copy_from_user(&cbuts, arg, sizeof(cbuts))) return -EFAULT; @@ -860,15 +863,15 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, .pid = cbuts.pid, }; - mutex_lock(&q->debugfs_mutex); + memflags = blk_debugfs_lock(q); bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr, bdev); if (IS_ERR(bt)) { - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); return PTR_ERR(bt); } blk_trace_setup_finalize(q, name, 1, bt, &buts2); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); if (copy_to_user(arg, &buts2.name, ARRAY_SIZE(buts2.name))) { blk_trace_remove(q); @@ -898,9 +901,9 @@ int blk_trace_startstop(struct request_queue *q, int start) { int ret; - mutex_lock(&q->debugfs_mutex); + blk_debugfs_lock_nomemsave(q); ret = __blk_trace_startstop(q, start); - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock_nomemrestore(q); return ret; } @@ -2020,7 +2023,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct blk_trace *bt; ssize_t ret = -ENXIO; - mutex_lock(&q->debugfs_mutex); + blk_debugfs_lock_nomemsave(q); bt = rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex)); @@ -2041,7 +2044,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, ret = sprintf(buf, "%llu\n", bt->end_lba); out_unlock_bdev: - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock_nomemrestore(q); return ret; } @@ -2052,6 +2055,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct block_device *bdev = dev_to_bdev(dev); struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; + unsigned int memflags; u64 value; ssize_t ret = -EINVAL; @@ -2071,7 +2075,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; } - mutex_lock(&q->debugfs_mutex); + memflags = blk_debugfs_lock(q); bt = rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex)); @@ -2106,7 +2110,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, } out_unlock_bdev: - mutex_unlock(&q->debugfs_mutex); + blk_debugfs_unlock(q, memflags); out: return ret ? ret : count; } |
