From fd4d36bf0d54e0b020b8ffeddf7552562eab17c5 Mon Sep 17 00:00:00 2001 From: William Lee Irwin III Date: Mon, 18 Oct 2004 17:59:41 -0700 Subject: [PATCH] standardize bit waiting data type Eliminate specialized page and bh waitqueue hashing structures in favor of a standardized structure, using wake_up_bit() to wake waiters using the standardized wait_bit_key structure. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 52 ++++++++-------------------------------------------- 1 file changed, 8 insertions(+), 44 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 4ec2acb57946..6eeafe142756 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -45,26 +45,6 @@ static void invalidate_bh_lrus(void); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) -struct bh_wait_queue { - struct buffer_head *bh; - wait_queue_t wait; -}; - -#define __DEFINE_BH_WAIT(name, b, f) \ - struct bh_wait_queue name = { \ - .bh = b, \ - .wait = { \ - .task = current, \ - .flags = f, \ - .func = bh_wake_function, \ - .task_list = \ - LIST_HEAD_INIT(name.wait.task_list),\ - }, \ - } -#define DEFINE_BH_WAIT(name, bh) __DEFINE_BH_WAIT(name, bh, 0) -#define DEFINE_BH_WAIT_EXCLUSIVE(name, bh) \ - __DEFINE_BH_WAIT(name, bh, WQ_FLAG_EXCLUSIVE) - /* * Hashed waitqueue_head's for wait_on_buffer() */ @@ -95,24 +75,10 @@ void wake_up_buffer(struct buffer_head *bh) wait_queue_head_t *wq = bh_waitq_head(bh); smp_mb(); - if (waitqueue_active(wq)) - __wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, bh); + __wake_up_bit(wq, &bh->b_state, BH_Lock); } EXPORT_SYMBOL(wake_up_buffer); -static int bh_wake_function(wait_queue_t *wait, unsigned mode, - int sync, void *key) -{ - struct buffer_head *bh = key; - struct bh_wait_queue *wq; - - wq = container_of(wait, struct bh_wait_queue, wait); - if (wq->bh != bh || buffer_locked(bh)) - return 0; - else - return autoremove_wake_function(wait, mode, sync, key); -} - static void sync_buffer(struct buffer_head *bh) { struct block_device *bd; @@ -126,7 +92,7 @@ static void sync_buffer(struct buffer_head *bh) void fastcall __lock_buffer(struct buffer_head *bh) { wait_queue_head_t *wqh = bh_waitq_head(bh); - DEFINE_BH_WAIT_EXCLUSIVE(wait, bh); + DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock); do { prepare_to_wait_exclusive(wqh, &wait.wait, @@ -155,15 +121,13 @@ void fastcall unlock_buffer(struct buffer_head *bh) void __wait_on_buffer(struct buffer_head * bh) { wait_queue_head_t *wqh = bh_waitq_head(bh); - DEFINE_BH_WAIT(wait, bh); + DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock); - do { - prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); - if (buffer_locked(bh)) { - sync_buffer(bh); - io_schedule(); - } - } while (buffer_locked(bh)); + prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); + if (buffer_locked(bh)) { + sync_buffer(bh); + io_schedule(); + } finish_wait(wqh, &wait.wait); } -- cgit v1.2.3 From 525b64cdbd0401b8d3cb5642159e5ec8e49290b7 Mon Sep 17 00:00:00 2001 From: William Lee Irwin III Date: Mon, 18 Oct 2004 18:00:05 -0700 Subject: [PATCH] eliminate bh waitqueue hashtable Eliminate the bh waitqueue hashtable using bit_waitqueue() via wait_on_bit() and wake_up_bit() to locate the waitqueue head associated with a bit. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 55 +++++++-------------------------------- fs/jbd/transaction.c | 10 +++---- include/linux/wait.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/wait.c | 11 ++++++++ 4 files changed, 98 insertions(+), 51 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 6eeafe142756..a8cfc265ec64 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -45,14 +45,6 @@ static void invalidate_bh_lrus(void); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) -/* - * Hashed waitqueue_head's for wait_on_buffer() - */ -#define BH_WAIT_TABLE_ORDER 7 -static struct bh_wait_queue_head { - wait_queue_head_t wqh; -} ____cacheline_aligned_in_smp bh_wait_queue_heads[1<b_private = private; } -/* - * Return the address of the waitqueue_head to be used for this - * buffer_head - */ -wait_queue_head_t *bh_waitq_head(struct buffer_head *bh) -{ - return &bh_wait_queue_heads[hash_ptr(bh, BH_WAIT_TABLE_ORDER)].wqh; -} -EXPORT_SYMBOL(bh_waitq_head); - void wake_up_buffer(struct buffer_head *bh) { - wait_queue_head_t *wq = bh_waitq_head(bh); - smp_mb(); - __wake_up_bit(wq, &bh->b_state, BH_Lock); + wake_up_bit(&bh->b_state, BH_Lock); } EXPORT_SYMBOL(wake_up_buffer); -static void sync_buffer(struct buffer_head *bh) +static int sync_buffer(void *word) { struct block_device *bd; + struct buffer_head *bh + = container_of(word, struct buffer_head, b_state); smp_mb(); bd = bh->b_bdev; if (bd) blk_run_address_space(bd->bd_inode->i_mapping); + io_schedule(); + return 0; } void fastcall __lock_buffer(struct buffer_head *bh) { - wait_queue_head_t *wqh = bh_waitq_head(bh); - DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock); - - do { - prepare_to_wait_exclusive(wqh, &wait.wait, - TASK_UNINTERRUPTIBLE); - if (buffer_locked(bh)) { - sync_buffer(bh); - io_schedule(); - } - } while (test_set_buffer_locked(bh)); - finish_wait(wqh, &wait.wait); + wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, + TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(__lock_buffer); @@ -120,15 +94,7 @@ void fastcall unlock_buffer(struct buffer_head *bh) */ void __wait_on_buffer(struct buffer_head * bh) { - wait_queue_head_t *wqh = bh_waitq_head(bh); - DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock); - - prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); - if (buffer_locked(bh)) { - sync_buffer(bh); - io_schedule(); - } - finish_wait(wqh, &wait.wait); + wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); } static void @@ -3087,14 +3053,11 @@ static int buffer_cpu_notify(struct notifier_block *self, void __init buffer_init(void) { - int i; int nrpages; bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, SLAB_PANIC, init_buffer_head, NULL); - for (i = 0; i < ARRAY_SIZE(bh_wait_queue_heads); i++) - init_waitqueue_head(&bh_wait_queue_heads[i].wqh); /* * Limit the bh occupancy to 10% of ZONE_NORMAL diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 18a678ce2591..304165faa19d 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -633,21 +633,21 @@ repeat: * disk then we cannot do copy-out here. */ if (jh->b_jlist == BJ_Shadow) { - wait_queue_head_t *wqh; - DEFINE_WAIT(wait); + DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock); + wait_queue_head_t *wqh + = bit_waitqueue(&bh->b_state, BH_Lock); JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); /* commit wakes up all shadow buffers after IO */ - wqh = bh_waitq_head(bh); for ( ; ; ) { - prepare_to_wait(wqh, &wait, + prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); if (jh->b_jlist != BJ_Shadow) break; schedule(); } - finish_wait(wqh, &wait); + finish_wait(wqh, &wait.wait); goto repeat; } diff --git a/include/linux/wait.h b/include/linux/wait.h index d9dfd7e32e7b..f58a313cc5b3 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -24,6 +24,7 @@ #include #include #include +#include typedef struct __wait_queue wait_queue_t; typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int sync, void *key); @@ -141,6 +142,22 @@ extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int)); int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, void *, int, int (*)(void *), unsigned)); int FASTCALL(__wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, void *, int, int (*)(void *), unsigned)); +wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int)); + +/** + * wake_up_bit - wake up a waiter on a bit + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that wakes up waiters + * on a bit. For instance, if one were to have waiters on a bitflag, + * one would call wake_up_bit() after clearing the bit. + */ +static inline void wake_up_bit(void *word, int bit) +{ + __wake_up_bit(bit_waitqueue(word, bit), word, bit); +} #define wake_up(x) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr, NULL) @@ -344,6 +361,62 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); wait->func = autoremove_wake_function; \ INIT_LIST_HEAD(&wait->task_list); \ } while (0) + +/** + * wait_on_bit - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit. + * For instance, if one were to have waiters on a bitflag, one would + * call wait_on_bit() in threads waiting for the bit to clear. + * One uses wait_on_bit() where one is waiting for the bit to clear, + * but has no intention of setting it. + */ +static inline int wait_on_bit(void *word, int bit, + int (*action)(void *), unsigned mode) +{ + DEFINE_WAIT_BIT(q, word, bit); + wait_queue_head_t *wqh; + + if (!test_and_set_bit(bit, word)) + return 0; + + wqh = bit_waitqueue(word, bit); + return __wait_on_bit(wqh, &q, word, bit, action, mode); +} + +/** + * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit + * when one intends to set it, for instance, trying to lock bitflags. + * For instance, if one were to have waiters trying to set bitflag + * and waiting for it to clear before setting it, one would call + * wait_on_bit() in threads waiting to be able to set the bit. + * One uses wait_on_bit_lock() where one is waiting for the bit to + * clear with the intention of setting it, and when done, clearing it. + */ +static inline int wait_on_bit_lock(void *word, int bit, + int (*action)(void *), unsigned mode) +{ + DEFINE_WAIT_BIT(q, word, bit); + wait_queue_head_t *wqh; + + if (!test_bit(bit, word)) + return 0; + + wqh = bit_waitqueue(word, bit); + return __wait_on_bit_lock(wqh, &q, word, bit, action, mode); +} #endif /* __KERNEL__ */ diff --git a/kernel/wait.c b/kernel/wait.c index 29057f707dbd..e87ae721643c 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -8,6 +8,7 @@ #include #include #include +#include void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) { @@ -187,3 +188,13 @@ void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) __wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key); } EXPORT_SYMBOL(__wake_up_bit); + +fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit) +{ + const int shift = BITS_PER_LONG == 32 ? 5 : 6; + const struct zone *zone = page_zone(virt_to_page(word)); + unsigned long val = (unsigned long)word << shift | bit; + + return &zone->wait_table[hash_long(val, zone->wait_table_bits)]; +} +EXPORT_SYMBOL(bit_waitqueue); -- cgit v1.2.3 From 91cd0c2bdb62901f3a8fcac3584b392f3c8115b0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 18 Oct 2004 18:01:03 -0700 Subject: [PATCH] jbd wakeup fix Processes can sleep in do_get_write_access(), waiting for buffers to be removed from the BJ_Shadow state. We did this by doing a wake_up_buffer() in the commit path and sleeping on the buffer in do_get_write_access(). With the filtered bit-level wakeup code this doesn't work properly any more - the wake_up_buffer() accidentally wakes up tasks which are sleeping in lock_buffer() as well. Those tasks now implicitly assume that the buffer came unlocked. Net effect: Bogus I/O errors when reading journal blocks, because the buffer isn't up to date yet. Hence the recently spate of journal_bmap() failure reports. The patch creates a new jbd-private BH flag purely for this wakeup function. So a wake_up_bit(..., BH_Unshadow) doesn't wake up someone who is waiting for a wake_up_bit(BH_Lock). JBD was the only user of wake_up_buffer(), so remove it altogether. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 11 ++--------- fs/jbd/commit.c | 2 +- fs/jbd/transaction.c | 7 ++++--- include/linux/buffer_head.h | 1 - include/linux/jbd.h | 1 + 5 files changed, 8 insertions(+), 14 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index a8cfc265ec64..2a75b3f9efe4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -52,13 +52,6 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) bh->b_private = private; } -void wake_up_buffer(struct buffer_head *bh) -{ - smp_mb(); - wake_up_bit(&bh->b_state, BH_Lock); -} -EXPORT_SYMBOL(wake_up_buffer); - static int sync_buffer(void *word) { struct block_device *bd; @@ -83,8 +76,8 @@ EXPORT_SYMBOL(__lock_buffer); void fastcall unlock_buffer(struct buffer_head *bh) { clear_buffer_locked(bh); - smp_mb__after_clear_bit(); - wake_up_buffer(bh); + smp_mb(); + wake_up_bit(&bh->b_state, BH_Lock); } /* diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index f8a1dea56611..b4d6654ef7f2 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -579,7 +579,7 @@ wait_for_iobuf: journal_file_buffer(jh, commit_transaction, BJ_Forget); /* Wake up any transactions which were waiting for this IO to complete */ - wake_up_buffer(bh); + wake_up_bit(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "brelse shadowed buffer"); __brelse(bh); } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 304165faa19d..a168757d26af 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -633,9 +633,10 @@ repeat: * disk then we cannot do copy-out here. */ if (jh->b_jlist == BJ_Shadow) { - DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock); - wait_queue_head_t *wqh - = bit_waitqueue(&bh->b_state, BH_Lock); + DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); + wait_queue_head_t *wqh; + + wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 367a8a313506..47fb6a02d630 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -155,7 +155,6 @@ void invalidate_bdev(struct block_device *, int); int sync_blockdev(struct block_device *bdev); void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); -void wake_up_buffer(struct buffer_head *bh); int fsync_bdev(struct block_device *); struct super_block *freeze_bdev(struct block_device *); void thaw_bdev(struct block_device *, struct super_block *); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index e65b90f1962c..dfdd307872bb 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -299,6 +299,7 @@ enum jbd_state_bits { BH_JBDDirty, /* Is dirty but journaled */ BH_State, /* Pins most journal_head state */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ + BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ }; BUFFER_FNS(JBD, jbd) -- cgit v1.2.3