From 16ede66973c84f890c03584f79158dd5b2d725f5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 25 Aug 2022 07:53:12 -0700 Subject: sbitmap: fix batched wait_cnt accounting Batched completions can clear multiple bits, but we're only decrementing the wait_cnt by one each time. This can cause waiters to never be woken, stalling IO. Use the batched count instead. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215679 Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20220825145312.1217900-1-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/sbitmap.h') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 8f5a86e210b9..4d2d5205ab58 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -575,8 +575,9 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. + * @nr: Number of bits cleared. */ -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct -- cgit v1.2.3 From bce1b56c73826fec8caf6187f0c922ede397a5a8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 4 Sep 2022 06:39:25 -0600 Subject: Revert "sbitmap: fix batched wait_cnt accounting" This reverts commit 16ede66973c84f890c03584f79158dd5b2d725f5. This is causing issues with CPU stalls on my test box, revert it for now until we understand what is going on. It looks like infinite looping off sbitmap_queue_wake_up(), but hard to tell with a lot of CPUs hitting this issue and the console scrolling infinitely. Link: https://lore.kernel.org/linux-block/e742813b-ce5c-0d58-205b-1626f639b1bd@kernel.dk/ Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 2 +- include/linux/sbitmap.h | 3 +-- lib/sbitmap.c | 31 ++++++++++++++----------------- 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/linux/sbitmap.h') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 9eb968e14d31..8e3b36d1cb57 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -196,7 +196,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) * other allocations on previous queue won't be starved. */ if (bt != bt_prev) - sbitmap_queue_wake_up(bt_prev, 1); + sbitmap_queue_wake_up(bt_prev); ws = bt_wait_ptr(bt, data->hctx); } while (1); diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 4d2d5205ab58..8f5a86e210b9 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -575,9 +575,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. - * @nr: Number of bits cleared. */ -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr); +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 2fedf07a9db5..a39b1a877366 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -599,38 +599,34 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) return NULL; } -static bool __sbq_wake_up(struct sbitmap_queue *sbq, int nr) +static bool __sbq_wake_up(struct sbitmap_queue *sbq) { struct sbq_wait_state *ws; - int wake_batch, wait_cnt, cur; + unsigned int wake_batch; + int wait_cnt; ws = sbq_wake_ptr(sbq); - if (!ws || !nr) + if (!ws) return false; - wake_batch = READ_ONCE(sbq->wake_batch); - cur = atomic_read(&ws->wait_cnt); - do { - if (cur <= 0) - return true; - wait_cnt = cur - nr; - } while (!atomic_try_cmpxchg(&ws->wait_cnt, &cur, wait_cnt)); - + wait_cnt = atomic_dec_return(&ws->wait_cnt); /* * For concurrent callers of this, callers should call this function * again to wakeup a new batch on a different 'ws'. */ - if (!waitqueue_active(&ws->wait)) + if (wait_cnt < 0 || !waitqueue_active(&ws->wait)) return true; if (wait_cnt > 0) return false; + wake_batch = READ_ONCE(sbq->wake_batch); + /* * Wake up first in case that concurrent callers decrease wait_cnt * while waitqueue is empty. */ - wake_up_nr(&ws->wait, max(wake_batch, nr)); + wake_up_nr(&ws->wait, wake_batch); /* * Pairs with the memory barrier in sbitmap_queue_resize() to @@ -655,11 +651,12 @@ static bool __sbq_wake_up(struct sbitmap_queue *sbq, int nr) return false; } -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) { - while (__sbq_wake_up(sbq, nr)) + while (__sbq_wake_up(sbq)) ; } +EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); static inline void sbitmap_update_cpu_hint(struct sbitmap *sb, int cpu, int tag) { @@ -696,7 +693,7 @@ void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, atomic_long_andnot(mask, (atomic_long_t *) addr); smp_mb__after_atomic(); - sbitmap_queue_wake_up(sbq, nr_tags); + sbitmap_queue_wake_up(sbq); sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(), tags[nr_tags - 1] - offset); } @@ -724,7 +721,7 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, * waiter. See the comment on waitqueue_active(). */ smp_mb__after_atomic(); - sbitmap_queue_wake_up(sbq, 1); + sbitmap_queue_wake_up(sbq); sbitmap_update_cpu_hint(&sbq->sb, cpu, nr); } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); -- cgit v1.2.3 From 4acb83417cadfdcbe64215f9d0ddcf3132af808e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 9 Sep 2022 11:40:22 -0700 Subject: sbitmap: fix batched wait_cnt accounting Batched completions can clear multiple bits, but we're only decrementing the wait_cnt by one each time. This can cause waiters to never be woken, stalling IO. Use the batched count instead. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215679 Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20220909184022.1709476-1-kbusch@fb.com Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 2 +- include/linux/sbitmap.h | 3 ++- lib/sbitmap.c | 37 +++++++++++++++++++++++-------------- 3 files changed, 26 insertions(+), 16 deletions(-) (limited to 'include/linux/sbitmap.h') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 8e3b36d1cb57..9eb968e14d31 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -196,7 +196,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) * other allocations on previous queue won't be starved. */ if (bt != bt_prev) - sbitmap_queue_wake_up(bt_prev); + sbitmap_queue_wake_up(bt_prev, 1); ws = bt_wait_ptr(bt, data->hctx); } while (1); diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 8f5a86e210b9..4d2d5205ab58 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -575,8 +575,9 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. + * @nr: Number of bits cleared. */ -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct diff --git a/lib/sbitmap.c b/lib/sbitmap.c index cbfd2e677d87..624fa7f118d1 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -599,24 +599,31 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) return NULL; } -static bool __sbq_wake_up(struct sbitmap_queue *sbq) +static bool __sbq_wake_up(struct sbitmap_queue *sbq, int *nr) { struct sbq_wait_state *ws; unsigned int wake_batch; - int wait_cnt; + int wait_cnt, cur, sub; bool ret; + if (*nr <= 0) + return false; + ws = sbq_wake_ptr(sbq); if (!ws) return false; - wait_cnt = atomic_dec_return(&ws->wait_cnt); - /* - * For concurrent callers of this, callers should call this function - * again to wakeup a new batch on a different 'ws'. - */ - if (wait_cnt < 0) - return true; + cur = atomic_read(&ws->wait_cnt); + do { + /* + * For concurrent callers of this, callers should call this + * function again to wakeup a new batch on a different 'ws'. + */ + if (cur == 0) + return true; + sub = min(*nr, cur); + wait_cnt = cur - sub; + } while (!atomic_try_cmpxchg(&ws->wait_cnt, &cur, wait_cnt)); /* * If we decremented queue without waiters, retry to avoid lost @@ -625,6 +632,8 @@ static bool __sbq_wake_up(struct sbitmap_queue *sbq) if (wait_cnt > 0) return !waitqueue_active(&ws->wait); + *nr -= sub; + /* * When wait_cnt == 0, we have to be particularly careful as we are * responsible to reset wait_cnt regardless whether we've actually @@ -660,12 +669,12 @@ static bool __sbq_wake_up(struct sbitmap_queue *sbq) sbq_index_atomic_inc(&sbq->wake_index); atomic_set(&ws->wait_cnt, wake_batch); - return ret; + return ret || *nr; } -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { - while (__sbq_wake_up(sbq)) + while (__sbq_wake_up(sbq, &nr)) ; } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); @@ -705,7 +714,7 @@ void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, atomic_long_andnot(mask, (atomic_long_t *) addr); smp_mb__after_atomic(); - sbitmap_queue_wake_up(sbq); + sbitmap_queue_wake_up(sbq, nr_tags); sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(), tags[nr_tags - 1] - offset); } @@ -733,7 +742,7 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, * waiter. See the comment on waitqueue_active(). */ smp_mb__after_atomic(); - sbitmap_queue_wake_up(sbq); + sbitmap_queue_wake_up(sbq, 1); sbitmap_update_cpu_hint(&sbq->sb, cpu, nr); } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); -- cgit v1.2.3