From 43152186ec28f3d4adf2a79ff8becacdfca9c82d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 19 May 2002 02:20:58 -0700 Subject: [PATCH] i_dirty_buffers locking fix This fixes a race between try_to_free_buffers' call to __remove_inode_queue() and other users of b_inode_buffers (fsync_inode_buffers and mark_buffer_dirty_inode()). They are presently taking different locks. The patch relocates and redefines and clarifies(?) the role of inode.i_dirty_buffers. The 2.4 definition of i_dirty_buffers is "a list of random buffers which is protected by a kernel-wide lock". This definition needs to be narrowed in the 2.5 context. It is now "a list of buffers from a different mapping, protected by a lock within that mapping". This list of buffers is specifically for fsync(). As this is a "data plane" operation, all the structures have been moved out of the inode and into the address_space. So address_space now has: list_head private_list; A list, available to the address_space for any purpose. If that address_space chooses to use the helper functions mark_buffer_dirty_inode and sync_mapping_buffers() then this list will contain buffer_heads, attached via buffer_head.b_assoc_buffers. If the address_space does not call those helper functions then the list is free for other usage. The only requirement is that the list be list_empty() at destroy_inode() time. At least, this is the objective. At present, generic_file_write() will call generic_osync_inode(), which expects that list to contain buffer_heads. So private_list isn't useful for anything else yet. spinlock_t private_lock; A spinlock, available to the address_space. If the address_space is using try_to_free_buffers(), mark_inode_dirty_buffers() and fsync_inode_buffers() then this lock is used to protect the private_list of *other* mappings which have listed buffers from *this* mapping onto themselves. That is: for buffer_heads, mapping_A->private_lock does not protect mapping_A->private_list! It protects the b_assoc_buffers list from buffers which are backed by mapping_A and it protects mapping_B->private_list, mapping_C->private_list, ... So what we have here is a cross-mapping association. S_ISREG mappings maintain a list of buffers from the blockdev's address_space which they need to know about for a successful fsync(). The locking follows the buffers: the lock in in the blockdev's mapping, not in the S_ISREG file's mapping. For address_spaces which use try_to_free_buffers, private_lock is also (and quite unrelatedly) used for protection of the buffer ring at page->private. Exclusion between try_to_free_buffers(), __get_hash_table() and __set_page_dirty_buffers(). This is in fact its major use. address_space *assoc_mapping Sigh. This is the address of the mapping which backs the buffers which are attached to private_list. It's here so that generic_osync_inode() can locate the lock which protects this mapping's private_list. Will probably go away. A consequence of all the above is that: a) All the buffers at a mapping_A's ->private_list must come from the same mapping, mapping_B. There is no requirement that mapping_B be a blockdev mapping, but that's how it's used. There is a BUG() check in mark_buffer_dirty_inode() for this. b) blockdev mappings never have any buffers on ->private_list. It just never happens, and doesn't make a lot of sense. reiserfs is using b_inode_buffers for attaching dependent buffers to its journal and that caused a few problems. Fixed in reiserfs_releasepage.patch --- include/linux/buffer_head.h | 17 +++-------------- include/linux/fs.h | 9 ++++----- 2 files changed, 7 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 500a082b7bcc..b2c54106b8c8 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -50,7 +50,7 @@ struct buffer_head { struct block_device *b_bdev; bh_end_io_t *b_end_io; /* I/O completion */ void *b_private; /* reserved for b_end_io */ - struct list_head b_inode_buffers; /* list of inode dirty buffers */ + struct list_head b_assoc_buffers; /* associated with another mapping */ }; @@ -147,6 +147,8 @@ void create_empty_buffers(struct page *, unsigned long, void end_buffer_io_sync(struct buffer_head *bh, int uptodate); void buffer_insert_list(spinlock_t *lock, struct buffer_head *, struct list_head *); +int sync_mapping_buffers(struct address_space *mapping); +void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); void mark_buffer_async_read(struct buffer_head *bh); void mark_buffer_async_write(struct buffer_head *bh); @@ -217,14 +219,6 @@ static inline void put_bh(struct buffer_head *bh) atomic_dec(&bh->b_count); } -static inline void -mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) -{ - mark_buffer_dirty(bh); - buffer_insert_list(&inode->i_bufferlist_lock, - bh, &inode->i_dirty_buffers); -} - /* * If an error happens during the make_request, this function * has to be recalled. It marks the buffer as clean and not @@ -243,11 +237,6 @@ static inline void buffer_IO_error(struct buffer_head * bh) bh->b_end_io(bh, buffer_uptodate(bh)); } -static inline int fsync_inode_buffers(struct inode *inode) -{ - return fsync_buffers_list(&inode->i_bufferlist_lock, - &inode->i_dirty_buffers); -} static inline void brelse(struct buffer_head *buf) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 4b858f90c6fe..25578c7a5e62 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -306,6 +306,7 @@ struct address_space_operations { }; struct address_space { + struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ rwlock_t page_lock; /* and rwlock protecting it */ struct list_head clean_pages; /* list of clean pages */ @@ -314,13 +315,15 @@ struct address_space { struct list_head io_pages; /* being prepared for I/O */ unsigned long nrpages; /* number of total pages */ struct address_space_operations *a_ops; /* methods */ - struct inode *host; /* owner: inode, block_device */ list_t i_mmap; /* list of private mappings */ list_t i_mmap_shared; /* list of private mappings */ spinlock_t i_shared_lock; /* and spinlock protecting it */ unsigned long dirtied_when; /* jiffies of first page dirtying */ int gfp_mask; /* how to allocate the pages */ unsigned long *ra_pages; /* device readahead */ + spinlock_t private_lock; /* for use by the address_space */ + struct list_head private_list; /* ditto */ + struct address_space *assoc_mapping; /* ditto */ }; struct char_device { @@ -350,10 +353,6 @@ struct inode { struct list_head i_hash; struct list_head i_list; struct list_head i_dentry; - - struct list_head i_dirty_buffers; /* uses i_bufferlist_lock */ - spinlock_t i_bufferlist_lock; - unsigned long i_ino; atomic_t i_count; kdev_t i_dev; -- cgit v1.2.3