summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorAndrew Morton <akpm@zip.com.au>2002-05-19 02:20:58 -0700
committerArnaldo Carvalho de Melo <acme@conectiva.com.br>2002-05-19 02:20:58 -0700
commit43152186ec28f3d4adf2a79ff8becacdfca9c82d (patch)
treea0e8366d2b4cc5ebf6c4535b5c92e425a2b839e9 /include/linux
parent6b9f3b418f690318f88fbfd2002bbd7ca344c04a (diff)
[PATCH] i_dirty_buffers locking fix
This fixes a race between try_to_free_buffers' call to __remove_inode_queue() and other users of b_inode_buffers (fsync_inode_buffers and mark_buffer_dirty_inode()). They are presently taking different locks. The patch relocates and redefines and clarifies(?) the role of inode.i_dirty_buffers. The 2.4 definition of i_dirty_buffers is "a list of random buffers which is protected by a kernel-wide lock". This definition needs to be narrowed in the 2.5 context. It is now "a list of buffers from a different mapping, protected by a lock within that mapping". This list of buffers is specifically for fsync(). As this is a "data plane" operation, all the structures have been moved out of the inode and into the address_space. So address_space now has: list_head private_list; A list, available to the address_space for any purpose. If that address_space chooses to use the helper functions mark_buffer_dirty_inode and sync_mapping_buffers() then this list will contain buffer_heads, attached via buffer_head.b_assoc_buffers. If the address_space does not call those helper functions then the list is free for other usage. The only requirement is that the list be list_empty() at destroy_inode() time. At least, this is the objective. At present, generic_file_write() will call generic_osync_inode(), which expects that list to contain buffer_heads. So private_list isn't useful for anything else yet. spinlock_t private_lock; A spinlock, available to the address_space. If the address_space is using try_to_free_buffers(), mark_inode_dirty_buffers() and fsync_inode_buffers() then this lock is used to protect the private_list of *other* mappings which have listed buffers from *this* mapping onto themselves. That is: for buffer_heads, mapping_A->private_lock does not protect mapping_A->private_list! It protects the b_assoc_buffers list from buffers which are backed by mapping_A and it protects mapping_B->private_list, mapping_C->private_list, ... So what we have here is a cross-mapping association. S_ISREG mappings maintain a list of buffers from the blockdev's address_space which they need to know about for a successful fsync(). The locking follows the buffers: the lock in in the blockdev's mapping, not in the S_ISREG file's mapping. For address_spaces which use try_to_free_buffers, private_lock is also (and quite unrelatedly) used for protection of the buffer ring at page->private. Exclusion between try_to_free_buffers(), __get_hash_table() and __set_page_dirty_buffers(). This is in fact its major use. address_space *assoc_mapping Sigh. This is the address of the mapping which backs the buffers which are attached to private_list. It's here so that generic_osync_inode() can locate the lock which protects this mapping's private_list. Will probably go away. A consequence of all the above is that: a) All the buffers at a mapping_A's ->private_list must come from the same mapping, mapping_B. There is no requirement that mapping_B be a blockdev mapping, but that's how it's used. There is a BUG() check in mark_buffer_dirty_inode() for this. b) blockdev mappings never have any buffers on ->private_list. It just never happens, and doesn't make a lot of sense. reiserfs is using b_inode_buffers for attaching dependent buffers to its journal and that caused a few problems. Fixed in reiserfs_releasepage.patch
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/buffer_head.h17
-rw-r--r--include/linux/fs.h9
2 files changed, 7 insertions, 19 deletions
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 500a082b7bcc..b2c54106b8c8 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -50,7 +50,7 @@ struct buffer_head {
struct block_device *b_bdev;
bh_end_io_t *b_end_io; /* I/O completion */
void *b_private; /* reserved for b_end_io */
- struct list_head b_inode_buffers; /* list of inode dirty buffers */
+ struct list_head b_assoc_buffers; /* associated with another mapping */
};
@@ -147,6 +147,8 @@ void create_empty_buffers(struct page *, unsigned long,
void end_buffer_io_sync(struct buffer_head *bh, int uptodate);
void buffer_insert_list(spinlock_t *lock,
struct buffer_head *, struct list_head *);
+int sync_mapping_buffers(struct address_space *mapping);
+void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
void mark_buffer_async_read(struct buffer_head *bh);
void mark_buffer_async_write(struct buffer_head *bh);
@@ -217,14 +219,6 @@ static inline void put_bh(struct buffer_head *bh)
atomic_dec(&bh->b_count);
}
-static inline void
-mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
-{
- mark_buffer_dirty(bh);
- buffer_insert_list(&inode->i_bufferlist_lock,
- bh, &inode->i_dirty_buffers);
-}
-
/*
* If an error happens during the make_request, this function
* has to be recalled. It marks the buffer as clean and not
@@ -243,11 +237,6 @@ static inline void buffer_IO_error(struct buffer_head * bh)
bh->b_end_io(bh, buffer_uptodate(bh));
}
-static inline int fsync_inode_buffers(struct inode *inode)
-{
- return fsync_buffers_list(&inode->i_bufferlist_lock,
- &inode->i_dirty_buffers);
-}
static inline void brelse(struct buffer_head *buf)
{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b858f90c6fe..25578c7a5e62 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -306,6 +306,7 @@ struct address_space_operations {
};
struct address_space {
+ struct inode *host; /* owner: inode, block_device */
struct radix_tree_root page_tree; /* radix tree of all pages */
rwlock_t page_lock; /* and rwlock protecting it */
struct list_head clean_pages; /* list of clean pages */
@@ -314,13 +315,15 @@ struct address_space {
struct list_head io_pages; /* being prepared for I/O */
unsigned long nrpages; /* number of total pages */
struct address_space_operations *a_ops; /* methods */
- struct inode *host; /* owner: inode, block_device */
list_t i_mmap; /* list of private mappings */
list_t i_mmap_shared; /* list of private mappings */
spinlock_t i_shared_lock; /* and spinlock protecting it */
unsigned long dirtied_when; /* jiffies of first page dirtying */
int gfp_mask; /* how to allocate the pages */
unsigned long *ra_pages; /* device readahead */
+ spinlock_t private_lock; /* for use by the address_space */
+ struct list_head private_list; /* ditto */
+ struct address_space *assoc_mapping; /* ditto */
};
struct char_device {
@@ -350,10 +353,6 @@ struct inode {
struct list_head i_hash;
struct list_head i_list;
struct list_head i_dentry;
-
- struct list_head i_dirty_buffers; /* uses i_bufferlist_lock */
- spinlock_t i_bufferlist_lock;
-
unsigned long i_ino;
atomic_t i_count;
kdev_t i_dev;