From 5a7a781f7c301826c0d35f470590acd4c271c036 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 31 Dec 2002 20:07:20 -0800 Subject: [PATCH] include only in files actually needing it fs.h only needs the forward-declaration of struct statfs --- include/linux/coda_psdev.h | 2 ++ include/linux/efs_fs.h | 2 ++ include/linux/ext3_fs.h | 3 +++ include/linux/fs.h | 2 +- include/linux/msdos_fs.h | 3 +++ include/linux/nfsd/xdr.h | 1 + 6 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 0e3f73f7a73e..5c3fefddd4f7 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -6,6 +6,8 @@ #define CODA_SUPER_MAGIC 0x73757245 +struct statfs; + struct coda_sb_info { struct venus_comm * sbi_vcomm; diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h index a636043ab8a3..31096307c124 100644 --- a/include/linux/efs_fs.h +++ b/include/linux/efs_fs.h @@ -49,6 +49,8 @@ static inline struct efs_sb_info *SUPER_INFO(struct super_block *sb) return sb->s_fs_info; } +struct statfs; + extern struct inode_operations efs_dir_inode_operations; extern struct file_operations efs_dir_operations; extern struct address_space_operations efs_symlink_aops; diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 634459be7d64..f909a967778e 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -20,6 +20,9 @@ #include #include + +struct statfs; + /* * The second extended filesystem constants/structures */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3fa13b80747d..878f2d2e4a9f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -25,6 +24,7 @@ struct iovec; struct nameidata; struct pipe_inode_info; struct poll_table_struct; +struct statfs; struct vm_area_struct; struct vfsmount; diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 1cb5e7d2b3d7..2945cb406b64 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -8,6 +8,9 @@ #include #include +struct statfs; + + #define SECTOR_SIZE 512 /* sector size (bytes) */ #define SECTOR_BITS 9 /* log2(SECTOR_SIZE) */ #define MSDOS_DPB (MSDOS_DPS) /* dir entries per block */ diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h index b00141bc86e5..970474550bb9 100644 --- a/include/linux/nfsd/xdr.h +++ b/include/linux/nfsd/xdr.h @@ -8,6 +8,7 @@ #define LINUX_NFSD_H #include +#include #include struct nfsd_fhandle { -- cgit v1.2.3 From 3d1864bcd5a61eeeaedcf22c9c1b13c5d3fdf448 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 31 Dec 2002 23:32:55 -0800 Subject: [PATCH] devfs creptomancy As already state in the thread about Adam's devfs reimplementation there is much devfs functionality that is unused or only used by the arch/ia64/sn/ code that currently doesn't even compile in 2.5 and that will get it's own filesystem that fits the needs better when SGI moves to 2.6. (the first hunk is the only exception to the above rule, but it's just a debug printk :)) --- drivers/media/radio/miropcm20-rds.c | 3 - fs/devfs/base.c | 141 +----------------------------------- include/linux/devfs_fs_kernel.h | 44 ----------- 3 files changed, 2 insertions(+), 186 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/radio/miropcm20-rds.c b/drivers/media/radio/miropcm20-rds.c index 41f99c9eb818..17a32bc17d96 100644 --- a/drivers/media/radio/miropcm20-rds.c +++ b/drivers/media/radio/miropcm20-rds.c @@ -119,9 +119,6 @@ static int __init miropcm20_rds_init(void) return -EINVAL; printk("miropcm20-rds: userinterface driver loaded.\n"); -#if DEBUG - printk("v4l-name: %s\n", devfs_get_name(pcm20_radio.devfs_handle, 0)); -#endif return 0; } diff --git a/fs/devfs/base.c b/fs/devfs/base.c index b4cbba51d30e..e3c8c13b7bfd 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -1764,29 +1764,6 @@ devfs_handle_t devfs_mk_dir (devfs_handle_t dir, const char *name, void *info) } /* End Function devfs_mk_dir */ -/** - * devfs_get_handle - Find the handle of a devfs entry. - * @dir: The handle to the parent devfs directory entry. If this is %NULL the - * name is relative to the root of the devfs. - * @name: The name of the entry. - * @traverse_symlinks: If %TRUE then symlink entries in the devfs namespace are - * traversed. Symlinks pointing out of the devfs namespace will cause a - * failure. Symlink traversal consumes stack space. - * - * Returns a handle which may later be used in a call to - * devfs_unregister(), devfs_get_flags(), or devfs_set_flags(). A - * subsequent devfs_put() is required to decrement the refcount. - * On failure %NULL is returned. - */ - -devfs_handle_t devfs_get_handle (devfs_handle_t dir, const char *name, - int traverse_symlinks) -{ - if (!name || !name[0]) - return NULL; - return _devfs_find_entry (dir, name, traverse_symlinks); -} /* End Function devfs_get_handle */ - void devfs_remove(const char *fmt, ...) { char buf[64]; @@ -1795,27 +1772,13 @@ void devfs_remove(const char *fmt, ...) va_start(args, fmt); n = vsnprintf(buf, 64, fmt, args); - if (n < 64) { - devfs_handle_t de = devfs_get_handle(NULL, buf, 0); + if (n < 64 && buf[0]) { + devfs_handle_t de = _devfs_find_entry(NULL, buf, 0); devfs_unregister(de); devfs_put(de); } } -/** - * devfs_get_handle_from_inode - Get the devfs handle for a VFS inode. - * @inode: The VFS inode. - * - * Returns the devfs handle on success, else %NULL. - */ - -devfs_handle_t devfs_get_handle_from_inode (struct inode *inode) -{ - if (!inode || !inode->i_sb) return NULL; - if (inode->i_sb->s_magic != DEVFS_SUPER_MAGIC) return NULL; - return get_devfs_entry_from_vfs_inode (inode); -} /* End Function devfs_get_handle_from_inode */ - /** * devfs_generate_path - Generate a pathname for an entry, relative to the devfs root. @@ -1905,97 +1868,6 @@ int devfs_set_file_size (devfs_handle_t de, unsigned long size) } /* End Function devfs_set_file_size */ -/** - * devfs_get_info - Get the info pointer written to private_data of @de upon open. - * @de: The handle to the device entry. - * - * Returns the info pointer. - */ -void *devfs_get_info (devfs_handle_t de) -{ - if (de == NULL) return NULL; - VERIFY_ENTRY (de); - return de->info; -} /* End Function devfs_get_info */ - - -/** - * devfs_set_info - Set the info pointer written to private_data upon open. - * @de: The handle to the device entry. - * @info: pointer to the data - * - * Returns 0 on success, else a negative error code. - */ -int devfs_set_info (devfs_handle_t de, void *info) -{ - if (de == NULL) return -EINVAL; - VERIFY_ENTRY (de); - de->info = info; - return 0; -} /* End Function devfs_set_info */ - - -/** - * devfs_get_parent - Get the parent device entry. - * @de: The handle to the device entry. - * - * Returns the parent device entry if it exists, else %NULL. - */ -devfs_handle_t devfs_get_parent (devfs_handle_t de) -{ - if (de == NULL) return NULL; - VERIFY_ENTRY (de); - return de->parent; -} /* End Function devfs_get_parent */ - - -/** - * devfs_get_first_child - Get the first leaf node in a directory. - * @de: The handle to the device entry. - * - * Returns the leaf node device entry if it exists, else %NULL. - */ - -devfs_handle_t devfs_get_first_child (devfs_handle_t de) -{ - if (de == NULL) return NULL; - VERIFY_ENTRY (de); - if ( !S_ISDIR (de->mode) ) return NULL; - return de->u.dir.first; -} /* End Function devfs_get_first_child */ - - -/** - * devfs_get_next_sibling - Get the next sibling leaf node. for a device entry. - * @de: The handle to the device entry. - * - * Returns the leaf node device entry if it exists, else %NULL. - */ - -devfs_handle_t devfs_get_next_sibling (devfs_handle_t de) -{ - if (de == NULL) return NULL; - VERIFY_ENTRY (de); - return de->next; -} /* End Function devfs_get_next_sibling */ - -/** - * devfs_get_name - Get the name for a device entry in its parent directory. - * @de: The handle to the device entry. - * @namelen: The length of the name is written here. This may be %NULL. - * - * Returns the name on success, else %NULL. - */ - -const char *devfs_get_name (devfs_handle_t de, unsigned int *namelen) -{ - if (de == NULL) return NULL; - VERIFY_ENTRY (de); - if (namelen != NULL) *namelen = de->namelen; - return de->name; -} /* End Function devfs_get_name */ - - /** * devfs_only - returns true if "devfs=only" is a boot option * @@ -2079,17 +1951,8 @@ EXPORT_SYMBOL(devfs_register); EXPORT_SYMBOL(devfs_unregister); EXPORT_SYMBOL(devfs_mk_symlink); EXPORT_SYMBOL(devfs_mk_dir); -EXPORT_SYMBOL(devfs_get_handle); EXPORT_SYMBOL(devfs_remove); -EXPORT_SYMBOL(devfs_get_handle_from_inode); EXPORT_SYMBOL(devfs_generate_path); -EXPORT_SYMBOL(devfs_set_file_size); -EXPORT_SYMBOL(devfs_get_info); -EXPORT_SYMBOL(devfs_set_info); -EXPORT_SYMBOL(devfs_get_parent); -EXPORT_SYMBOL(devfs_get_first_child); -EXPORT_SYMBOL(devfs_get_next_sibling); -EXPORT_SYMBOL(devfs_get_name); EXPORT_SYMBOL(devfs_only); diff --git a/include/linux/devfs_fs_kernel.h b/include/linux/devfs_fs_kernel.h index 15c85cccdded..491dc297b930 100644 --- a/include/linux/devfs_fs_kernel.h +++ b/include/linux/devfs_fs_kernel.h @@ -53,17 +53,8 @@ extern int devfs_mk_symlink (devfs_handle_t dir, const char *name, devfs_handle_t *handle, void *info); extern devfs_handle_t devfs_mk_dir (devfs_handle_t dir, const char *name, void *info); -extern devfs_handle_t devfs_get_handle (devfs_handle_t dir, const char *name, - int traverse_symlinks); -extern devfs_handle_t devfs_get_handle_from_inode (struct inode *inode); extern int devfs_generate_path (devfs_handle_t de, char *path, int buflen); extern int devfs_set_file_size (devfs_handle_t de, unsigned long size); -extern void *devfs_get_info (devfs_handle_t de); -extern int devfs_set_info (devfs_handle_t de, void *info); -extern devfs_handle_t devfs_get_parent (devfs_handle_t de); -extern devfs_handle_t devfs_get_first_child (devfs_handle_t de); -extern devfs_handle_t devfs_get_next_sibling (devfs_handle_t de); -extern const char *devfs_get_name (devfs_handle_t de, unsigned int *namelen); extern int devfs_only (void); extern int devfs_register_tape (devfs_handle_t de); extern void devfs_unregister_tape(int num); @@ -115,19 +106,9 @@ static inline devfs_handle_t devfs_mk_dir (devfs_handle_t dir, { return NULL; } -static inline devfs_handle_t devfs_get_handle (devfs_handle_t dir, - const char *name, - int traverse_symlinks) -{ - return NULL; -} static inline void devfs_remove(const char *fmt, ...) { } -static inline devfs_handle_t devfs_get_handle_from_inode (struct inode *inode) -{ - return NULL; -} static inline int devfs_generate_path (devfs_handle_t de, char *path, int buflen) { @@ -137,31 +118,6 @@ static inline int devfs_set_file_size (devfs_handle_t de, unsigned long size) { return -ENOSYS; } -static inline void *devfs_get_info (devfs_handle_t de) -{ - return NULL; -} -static inline int devfs_set_info (devfs_handle_t de, void *info) -{ - return 0; -} -static inline devfs_handle_t devfs_get_parent (devfs_handle_t de) -{ - return NULL; -} -static inline devfs_handle_t devfs_get_first_child (devfs_handle_t de) -{ - return NULL; -} -static inline devfs_handle_t devfs_get_next_sibling (devfs_handle_t de) -{ - return NULL; -} -static inline const char *devfs_get_name (devfs_handle_t de, - unsigned int *namelen) -{ - return NULL; -} static inline int devfs_only (void) { return 0; -- cgit v1.2.3 From b0de9c76475f7d8f6917d8ed8bb55259e1512c37 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 1 Jan 2003 00:46:22 -0800 Subject: [PATCH] quota locking update I've been carrying this since Jan sent it out a month or two ago. I don't know if anyone has tested it though. The sort of people who use quotas tend to like nice stable kernels. I read through it, but can't say that I know enough about quotas to know if it makes sense. The wait_on_dquot() synchronisation is a bit odd. I do need to do a round of stability testing with this and ext3 - the interaction between quotas and ext3 is an area where we've had deadlocks in the past. But the quota locking is definitely looking crufty, and I'd suggest that we run with this.. Patch from Jan Kara "I'm resending you the patch with new quota SMP locking. The patch removes BKL and replaces it with two spinlocks protecting quota lists and data stored in dquot structures. Also non-SMP locking was changed a bit make SMP locking easier (eg. we got rid of not very nice dq_dup_ref counters). The patch is against 2.5.48 but applies well also to 2.5.49. Would you please apply the patch?" - Change dqoff_sem from a semaphore to an rwsem. - Convert dqi_flags from an int to a ulong and use test_bit/set_bit rather thatn &/| - The various exported quota operations now run without lock_kernel(). This means that things like DQUOT_ALLOC_SPACE no longer take lock_kernel() in out high-perfomance filesystems. Nice. - Replace lock_kernel() in the quota code with two quota-private global locks. - Replace all the open-coded waitqueue management with a semaphore (wait_on_dquot()) --- fs/dquot.c | 691 ++++++++++++++++++++--------------------------- fs/inode.c | 4 +- fs/quota.c | 9 +- fs/super.c | 2 +- include/linux/quota.h | 71 +++-- include/linux/quotaops.h | 55 ++-- 6 files changed, 356 insertions(+), 476 deletions(-) (limited to 'include/linux') diff --git a/fs/dquot.c b/fs/dquot.c index cfef15182227..21ba3cd19783 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -49,6 +49,9 @@ * formats registering. * Jan Kara, , 2001,2002 * + * New SMP locking. + * Jan Kara, , 10/2002 + * * (C) Copyright 1994 - 1997 Marco van Wieringen */ @@ -74,15 +77,32 @@ #include +#define __DQUOT_PARANOIA + +/* + * There are two quota SMP locks. dq_list_lock protects all lists with quotas + * and quota formats and also dqstats structure containing statistics about the + * lists. dq_data_lock protects data from dq_dqb and also mem_dqinfo structures + * and also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. + * Note that we don't have to do the locking of i_blocks and i_bytes when the + * quota is disabled - i_sem should serialize the access. dq_data_lock should + * be always grabbed before dq_list_lock. + * + * Note that some things (eg. sb pointer, type, id) doesn't change during + * the life of the dquot structure and so needn't to be protected by a lock + */ +spinlock_t dq_list_lock = SPIN_LOCK_UNLOCKED; +spinlock_t dq_data_lock = SPIN_LOCK_UNLOCKED; + static char *quotatypes[] = INITQFNAMES; static struct quota_format_type *quota_formats; /* List of registered formats */ int register_quota_format(struct quota_format_type *fmt) { - lock_kernel(); + spin_lock(&dq_list_lock); fmt->qf_next = quota_formats; quota_formats = fmt; - unlock_kernel(); + spin_unlock(&dq_list_lock); return 0; } @@ -90,22 +110,22 @@ void unregister_quota_format(struct quota_format_type *fmt) { struct quota_format_type **actqf; - lock_kernel(); + spin_lock(&dq_list_lock); for (actqf = "a_formats; *actqf && *actqf != fmt; actqf = &(*actqf)->qf_next); if (*actqf) *actqf = (*actqf)->qf_next; - unlock_kernel(); + spin_unlock(&dq_list_lock); } static struct quota_format_type *find_quota_format(int id) { struct quota_format_type *actqf; - lock_kernel(); + spin_lock(&dq_list_lock); for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next); if (actqf && !try_module_get(actqf->qf_owner)) actqf = NULL; - unlock_kernel(); + spin_unlock(&dq_list_lock); return actqf; } @@ -136,18 +156,20 @@ static void put_quota_format(struct quota_format_type *fmt) */ /* - * Note that any operation which operates on dquot data (ie. dq_dqb) mustn't - * block while it's updating/reading it. Otherwise races would occur. + * Note that any operation which operates on dquot data (ie. dq_dqb) must + * hold dq_data_lock. * - * Locked dquots might not be referenced in inodes - operations like - * add_dquot_space() does dqduplicate() and would complain. Currently - * dquot it locked only once in its existence - when it's being read - * to memory on first dqget() and at that time it can't be referenced - * from inode. Write operations on dquots don't hold dquot lock as they - * copy data to internal buffers before writing anyway and copying as well - * as any data update should be atomic. Also nobody can change used - * entries in dquot structure as this is done only when quota is destroyed - * and invalidate_dquots() waits for dquot to have dq_count == 0. + * Any operation working with dquots must hold dqoff_sem. If operation is + * just reading pointers from inodes than read lock is enough. If pointers + * are altered function must hold write lock. + * + * Locked dquots might not be referenced in inodes. Currently dquot it locked + * only once in its existence - when it's being read to memory on first dqget() + * and at that time it can't be referenced from inode. Write operations on + * dquots don't hold dquot lock as they copy data to internal buffers before + * writing anyway and copying as well as any data update should be atomic. Also + * nobody can change used entries in dquot structure as this is done only when + * quota is destroyed and invalidate_dquots() is called only when dq_count == 0. */ static LIST_HEAD(inuse_list); @@ -156,34 +178,14 @@ static struct list_head dquot_hash[NR_DQHASH]; struct dqstats dqstats; -static void dqput(struct dquot *); -static struct dquot *dqduplicate(struct dquot *); - -static inline void get_dquot_ref(struct dquot *dquot) -{ - dquot->dq_count++; -} - -static inline void put_dquot_ref(struct dquot *dquot) -{ - dquot->dq_count--; -} - -static inline void get_dquot_dup_ref(struct dquot *dquot) -{ - dquot->dq_dup_ref++; -} - -static inline void put_dquot_dup_ref(struct dquot *dquot) -{ - dquot->dq_dup_ref--; -} - static inline int const hashfn(struct super_block *sb, unsigned int id, int type) { return((((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type)) % NR_DQHASH; } +/* + * Following list functions expect dq_list_lock to be held + */ static inline void insert_dquot_hash(struct dquot *dquot) { struct list_head *head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type); @@ -208,13 +210,6 @@ static inline struct dquot *find_dquot(unsigned int hashent, struct super_block return NODQUOT; } -/* Add a dquot to the head of the free list */ -static inline void put_dquot_head(struct dquot *dquot) -{ - list_add(&dquot->dq_free, &free_dquots); - dqstats.free_dquots++; -} - /* Add a dquot to the tail of the free list */ static inline void put_dquot_last(struct dquot *dquot) { @@ -222,13 +217,6 @@ static inline void put_dquot_last(struct dquot *dquot) dqstats.free_dquots++; } -/* Move dquot to the head of free list (it must be already on it) */ -static inline void move_dquot_head(struct dquot *dquot) -{ - list_del(&dquot->dq_free); - list_add(&dquot->dq_free, &free_dquots); -} - static inline void remove_free_dquot(struct dquot *dquot) { if (list_empty(&dquot->dq_free)) @@ -251,69 +239,10 @@ static inline void remove_inuse(struct dquot *dquot) list_del(&dquot->dq_inuse); } -static void __wait_on_dquot(struct dquot *dquot) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&dquot->dq_wait_lock, &wait); -repeat: - set_current_state(TASK_UNINTERRUPTIBLE); - if (dquot->dq_flags & DQ_LOCKED) { - schedule(); - goto repeat; - } - remove_wait_queue(&dquot->dq_wait_lock, &wait); - current->state = TASK_RUNNING; -} - -static inline void wait_on_dquot(struct dquot *dquot) -{ - if (dquot->dq_flags & DQ_LOCKED) - __wait_on_dquot(dquot); -} - -static inline void lock_dquot(struct dquot *dquot) +static void wait_on_dquot(struct dquot *dquot) { - wait_on_dquot(dquot); - dquot->dq_flags |= DQ_LOCKED; -} - -static inline void unlock_dquot(struct dquot *dquot) -{ - dquot->dq_flags &= ~DQ_LOCKED; - wake_up(&dquot->dq_wait_lock); -} - -/* Wait for dquot to be unused */ -static void __wait_dquot_unused(struct dquot *dquot) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&dquot->dq_wait_free, &wait); -repeat: - set_current_state(TASK_UNINTERRUPTIBLE); - if (dquot->dq_count) { - schedule(); - goto repeat; - } - remove_wait_queue(&dquot->dq_wait_free, &wait); - current->state = TASK_RUNNING; -} - -/* Wait for all duplicated dquot references to be dropped */ -static void __wait_dup_drop(struct dquot *dquot) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&dquot->dq_wait_free, &wait); -repeat: - set_current_state(TASK_UNINTERRUPTIBLE); - if (dquot->dq_dup_ref) { - schedule(); - goto repeat; - } - remove_wait_queue(&dquot->dq_wait_free, &wait); - current->state = TASK_RUNNING; + down(&dquot->dq_lock); + up(&dquot->dq_lock); } static int read_dqblk(struct dquot *dquot) @@ -321,11 +250,11 @@ static int read_dqblk(struct dquot *dquot) int ret; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); - lock_dquot(dquot); + down(&dquot->dq_lock); down(&dqopt->dqio_sem); ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot); up(&dqopt->dqio_sem); - unlock_dquot(dquot); + up(&dquot->dq_lock); return ret; } @@ -340,36 +269,35 @@ static int commit_dqblk(struct dquot *dquot) return ret; } -/* Invalidate all dquots on the list, wait for all users. Note that this function is called - * after quota is disabled so no new quota might be created. As we only insert to the end of - * inuse list, we don't have to restart searching... */ +/* Invalidate all dquots on the list. Note that this function is called after + * quota is disabled so no new quota might be created. Because we hold dqoff_sem + * for writing and pointers were already removed from inodes we actually know that + * no quota for this sb+type should be held. */ static void invalidate_dquots(struct super_block *sb, int type) { struct dquot *dquot; struct list_head *head; -restart: - list_for_each(head, &inuse_list) { + spin_lock(&dq_list_lock); + for (head = inuse_list.next; head != &inuse_list;) { dquot = list_entry(head, struct dquot, dq_inuse); + head = head->next; if (dquot->dq_sb != sb) continue; if (dquot->dq_type != type) continue; - dquot->dq_flags |= DQ_INVAL; - if (dquot->dq_count) - /* - * Wait for any users of quota. As we have already cleared the flags in - * superblock and cleared all pointers from inodes we are assured - * that there will be no new users of this quota. - */ - __wait_dquot_unused(dquot); +#ifdef __DQUOT_PARANOIA + /* There should be no users of quota - we hold dqoff_sem for writing */ + if (atomic_read(&dquot->dq_count)) + BUG(); +#endif /* Quota now have no users and it has been written on last dqput() */ remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); kmem_cache_free(dquot_cachep, dquot); - goto restart; } + spin_unlock(&dq_list_lock); } static int vfs_quota_sync(struct super_block *sb, int type) @@ -379,7 +307,14 @@ static int vfs_quota_sync(struct super_block *sb, int type) struct quota_info *dqopt = sb_dqopt(sb); int cnt; + down_read(&dqopt->dqoff_sem); restart: + /* At this point any dirty dquot will definitely be written so we can clear + dirty flag from info */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)) + clear_bit(DQF_ANY_DQUOT_DIRTY_B, &dqopt->info[cnt].dqi_flags); + spin_lock(&dq_list_lock); list_for_each(head, &inuse_list) { dquot = list_entry(head, struct dquot, dq_inuse); if (sb && dquot->dq_sb != sb) @@ -388,26 +323,24 @@ restart: continue; if (!dquot->dq_sb) /* Invalidated? */ continue; - if (!dquot_dirty(dquot) && !(dquot->dq_flags & DQ_LOCKED)) + if (!dquot_dirty(dquot)) continue; - /* Get reference to quota so it won't be invalidated. get_dquot_ref() - * is enough since if dquot is locked/modified it can't be - * on the free list */ - get_dquot_ref(dquot); - if (dquot->dq_flags & DQ_LOCKED) - wait_on_dquot(dquot); - if (dquot_dirty(dquot)) - commit_dqblk(dquot); - dqput(dquot); + spin_unlock(&dq_list_lock); + commit_dqblk(dquot); goto restart; } + spin_unlock(&dq_list_lock); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)) - dqopt->info[cnt].dqi_flags &= ~DQF_ANY_DQUOT_DIRTY; - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) && info_dirty(&dqopt->info[cnt])) + if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) && info_dirty(&dqopt->info[cnt])) { + down(&dqopt->dqio_sem); dqopt->ops[cnt]->write_file_info(sb, cnt); + up(&dqopt->dqio_sem); + } + spin_lock(&dq_list_lock); dqstats.syncs++; + spin_unlock(&dq_list_lock); + up_read(&dqopt->dqoff_sem); return 0; } @@ -424,7 +357,7 @@ restart: for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) - && sb_dqopt(sb)->info[cnt].dqi_flags & DQF_ANY_DQUOT_DIRTY) + && info_any_dquot_dirty(&sb_dqopt(sb)->info[cnt])) dirty = 1; if (!dirty) continue; @@ -444,17 +377,13 @@ restart: void sync_dquots(struct super_block *sb, int type) { if (sb) { - lock_kernel(); if (sb->s_qcop->quota_sync) sb->s_qcop->quota_sync(sb, type); - unlock_kernel(); } else { while ((sb = get_super_to_sync(type))) { - lock_kernel(); if (sb->s_qcop->quota_sync) sb->s_qcop->quota_sync(sb, type); - unlock_kernel(); drop_super(sb); } } @@ -485,60 +414,60 @@ static void prune_dqcache(int count) static int shrink_dqcache_memory(int nr, unsigned int gfp_mask) { - if (nr) { - lock_kernel(); + int ret; + + spin_lock(&dq_list_lock); + if (nr) prune_dqcache(nr); - unlock_kernel(); - } - return dqstats.allocated_dquots; + ret = dqstats.allocated_dquots; + spin_unlock(&dq_list_lock); + return ret; } /* * Put reference to dquot * NOTE: If you change this function please check whether dqput_blocks() works right... + * MUST be called with dqoff_sem held */ static void dqput(struct dquot *dquot) { if (!dquot) return; #ifdef __DQUOT_PARANOIA - if (!dquot->dq_count) { + if (!atomic_read(&dquot->dq_count)) { printk("VFS: dqput: trying to free free dquot\n"); printk("VFS: device %s, dquot of %s %d\n", dquot->dq_sb->s_id, quotatypes[dquot->dq_type], dquot->dq_id); - return; + BUG(); } #endif - + + spin_lock(&dq_list_lock); dqstats.drops++; + spin_unlock(&dq_list_lock); we_slept: - if (dquot->dq_dup_ref && dquot->dq_count - dquot->dq_dup_ref <= 1) { /* Last unduplicated reference? */ - __wait_dup_drop(dquot); - goto we_slept; - } - if (dquot->dq_count > 1) { - /* We have more than one user... We can simply decrement use count */ - put_dquot_ref(dquot); + spin_lock(&dq_list_lock); + if (atomic_read(&dquot->dq_count) > 1) { + /* We have more than one user... nothing to do */ + atomic_dec(&dquot->dq_count); + spin_unlock(&dq_list_lock); return; } if (dquot_dirty(dquot)) { + spin_unlock(&dq_list_lock); commit_dqblk(dquot); goto we_slept; } - + atomic_dec(&dquot->dq_count); +#ifdef __DQUOT_PARANOIA /* sanity check */ - if (!list_empty(&dquot->dq_free)) { - printk(KERN_ERR "dqput: dquot already on free list??\n"); - put_dquot_ref(dquot); - return; - } - put_dquot_ref(dquot); - /* If dquot is going to be invalidated invalidate_dquots() is going to free it so */ - if (!(dquot->dq_flags & DQ_INVAL)) - put_dquot_last(dquot); /* Place at end of LRU free queue */ - wake_up(&dquot->dq_wait_free); + if (!list_empty(&dquot->dq_free)) + BUG(); +#endif + put_dquot_last(dquot); + spin_unlock(&dq_list_lock); } static struct dquot *get_empty_dquot(struct super_block *sb, int type) @@ -550,99 +479,66 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) return NODQUOT; memset((caddr_t)dquot, 0, sizeof(struct dquot)); - init_waitqueue_head(&dquot->dq_wait_free); - init_waitqueue_head(&dquot->dq_wait_lock); + sema_init(&dquot->dq_lock, 1); INIT_LIST_HEAD(&dquot->dq_free); INIT_LIST_HEAD(&dquot->dq_inuse); INIT_LIST_HEAD(&dquot->dq_hash); dquot->dq_sb = sb; dquot->dq_type = type; - dquot->dq_count = 1; - /* all dquots go on the inuse_list */ - put_inuse(dquot); + atomic_set(&dquot->dq_count, 1); return dquot; } +/* + * Get reference to dquot + * MUST be called with dqoff_sem held + */ static struct dquot *dqget(struct super_block *sb, unsigned int id, int type) { unsigned int hashent = hashfn(sb, id, type); struct dquot *dquot, *empty = NODQUOT; - struct quota_info *dqopt = sb_dqopt(sb); + if (!sb_has_quota_enabled(sb, type)) + return NODQUOT; we_slept: - if (!is_enabled(dqopt, type)) { - if (empty) - dqput(empty); - return NODQUOT; - } - + spin_lock(&dq_list_lock); if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) { if (empty == NODQUOT) { + spin_unlock(&dq_list_lock); if ((empty = get_empty_dquot(sb, type)) == NODQUOT) schedule(); /* Try to wait for a moment... */ goto we_slept; } dquot = empty; dquot->dq_id = id; + /* all dquots go on the inuse_list */ + put_inuse(dquot); /* hash it first so it can be found */ insert_dquot_hash(dquot); + dqstats.lookups++; + spin_unlock(&dq_list_lock); read_dqblk(dquot); } else { - if (!dquot->dq_count) + if (!atomic_read(&dquot->dq_count)) remove_free_dquot(dquot); - get_dquot_ref(dquot); + atomic_inc(&dquot->dq_count); dqstats.cache_hits++; + dqstats.lookups++; + spin_unlock(&dq_list_lock); wait_on_dquot(dquot); if (empty) - dqput(empty); + kmem_cache_free(dquot_cachep, empty); } - if (!dquot->dq_sb) { /* Has somebody invalidated entry under us? */ - printk(KERN_ERR "VFS: dqget(): Quota invalidated in dqget()!\n"); - dqput(dquot); - return NODQUOT; - } - ++dquot->dq_referenced; - dqstats.lookups++; - - return dquot; -} - -/* Duplicate reference to dquot got from inode */ -static struct dquot *dqduplicate(struct dquot *dquot) -{ - if (dquot == NODQUOT) - return NODQUOT; - get_dquot_ref(dquot); - if (!dquot->dq_sb) { - printk(KERN_ERR "VFS: dqduplicate(): Invalidated quota to be duplicated!\n"); - put_dquot_ref(dquot); - return NODQUOT; - } - if (dquot->dq_flags & DQ_LOCKED) - printk(KERN_ERR "VFS: dqduplicate(): Locked quota to be duplicated!\n"); - get_dquot_dup_ref(dquot); - dquot->dq_referenced++; - dqstats.lookups++; +#ifdef __DQUOT_PARANOIA + if (!dquot->dq_sb) /* Has somebody invalidated entry under us? */ + BUG(); +#endif return dquot; } -/* Put duplicated reference */ -static void dqputduplicate(struct dquot *dquot) -{ - if (!dquot->dq_dup_ref) { - printk(KERN_ERR "VFS: dqputduplicate(): Duplicated dquot put without duplicate reference.\n"); - return; - } - put_dquot_dup_ref(dquot); - if (!dquot->dq_dup_ref) - wake_up(&dquot->dq_wait_free); - put_dquot_ref(dquot); - dqstats.drops++; -} - static int dqinit_needed(struct inode *inode, int type) { int cnt; @@ -657,6 +553,7 @@ static int dqinit_needed(struct inode *inode, int type) return 0; } +/* This routine is guarded by dqoff_sem semaphore */ static void add_dquot_ref(struct super_block *sb, int type) { struct list_head *p; @@ -683,14 +580,13 @@ restart: /* Return 0 if dqput() won't block (note that 1 doesn't necessarily mean blocking) */ static inline int dqput_blocks(struct dquot *dquot) { - if (dquot->dq_dup_ref && dquot->dq_count - dquot->dq_dup_ref <= 1) - return 1; - if (dquot->dq_count <= 1 && dquot->dq_flags & DQ_MOD) + if (atomic_read(&dquot->dq_count) <= 1 && dquot_dirty(dquot)) return 1; return 0; } /* Remove references to dquots from inode - add dquot to list for freeing if needed */ +/* We can't race with anybody because we hold dqoff_sem for writing... */ int remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head) { struct dquot *dquot = inode->i_dquot[type]; @@ -706,9 +602,13 @@ int remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofr put_it: if (dquot != NODQUOT) { if (dqput_blocks(dquot)) { - if (dquot->dq_count != 1) - printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", dquot->dq_count); +#ifdef __DQUOT_PARANOIA + if (atomic_read(&dquot->dq_count) != 1) + printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); +#endif + spin_lock(&dq_list_lock); list_add(&dquot->dq_free, tofree_head); /* As dquot must have currently users it can't be on the free list... */ + spin_unlock(&dq_list_lock); return 1; } else @@ -718,12 +618,12 @@ put_it: } /* Free list of dquots - called from inode.c */ +/* dquots are removed from inodes, no new references can be got so we are the only ones holding reference */ void put_dquot_list(struct list_head *tofree_head) { struct list_head *act_head; struct dquot *dquot; - lock_kernel(); act_head = tofree_head->next; /* So now we have dquots on the list... Just free them */ while (act_head != tofree_head) { @@ -732,7 +632,6 @@ void put_dquot_list(struct list_head *tofree_head) list_del_init(&dquot->dq_free); /* Remove dquot from the list so we won't have problems... */ dqput(dquot); } - unlock_kernel(); } static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number) @@ -755,7 +654,7 @@ static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number) dquot->dq_dqb.dqb_curinodes = 0; if (dquot->dq_dqb.dqb_curinodes < dquot->dq_dqb.dqb_isoftlimit) dquot->dq_dqb.dqb_itime = (time_t) 0; - dquot->dq_flags &= ~DQ_INODES; + clear_bit(DQ_INODES_B, &dquot->dq_flags); mark_dquot_dirty(dquot); } @@ -767,17 +666,17 @@ static inline void dquot_decr_space(struct dquot *dquot, qsize_t number) dquot->dq_dqb.dqb_curspace = 0; if (toqb(dquot->dq_dqb.dqb_curspace) < dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time_t) 0; - dquot->dq_flags &= ~DQ_BLKS; + clear_bit(DQ_BLKS_B, &dquot->dq_flags); mark_dquot_dirty(dquot); } -static inline int need_print_warning(struct dquot *dquot, int flag) +static inline int need_print_warning(struct dquot *dquot) { switch (dquot->dq_type) { case USRQUOTA: - return current->fsuid == dquot->dq_id && !(dquot->dq_flags & flag); + return current->fsuid == dquot->dq_id; case GRPQUOTA: - return in_group_p(dquot->dq_id) && !(dquot->dq_flags & flag); + return in_group_p(dquot->dq_id); } return 0; } @@ -795,12 +694,11 @@ static inline int need_print_warning(struct dquot *dquot, int flag) static void print_warning(struct dquot *dquot, const char warntype) { char *msg = NULL; - int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS : - ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES : 0); + int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS_B : + ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES_B : 0); - if (!need_print_warning(dquot, flag)) + if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags))) return; - dquot->dq_flags |= flag; tty_write_message(current->tty, dquot->dq_sb->s_id); if (warntype == ISOFTWARN || warntype == BSOFTWARN) tty_write_message(current->tty, ": warning, "); @@ -847,10 +745,11 @@ static inline char ignore_hardlimit(struct dquot *dquot) (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & V1_DQF_RSQUASH)); } +/* needs dq_data_lock */ static int check_idq(struct dquot *dquot, ulong inodes, char *warntype) { *warntype = NOWARN; - if (inodes <= 0 || dquot->dq_flags & DQ_FAKE) + if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags)) return QUOTA_OK; if (dquot->dq_dqb.dqb_ihardlimit && @@ -878,10 +777,11 @@ static int check_idq(struct dquot *dquot, ulong inodes, char *warntype) return QUOTA_OK; } +/* needs dq_data_lock */ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype) { *warntype = 0; - if (space <= 0 || dquot->dq_flags & DQ_FAKE) + if (space <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags)) return QUOTA_OK; if (dquot->dq_dqb.dqb_bhardlimit && @@ -926,19 +826,19 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war */ void dquot_initialize(struct inode *inode, int type) { - struct dquot *dquot[MAXQUOTAS]; unsigned int id = 0; int cnt; - if (IS_NOQUOTA(inode)) + down_write(&sb_dqopt(inode->i_sb)->dqoff_sem); + /* Having dqoff lock we know NOQUOTA flags can't be altered... */ + if (IS_NOQUOTA(inode)) { + up_write(&sb_dqopt(inode->i_sb)->dqoff_sem); return; - /* Build list of quotas to initialize... We can block here */ + } + /* Build list of quotas to initialize... */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - dquot[cnt] = NODQUOT; if (type != -1 && cnt != type) continue; - if (!sb_has_quota_enabled(inode->i_sb, cnt)) - continue; if (inode->i_dquot[cnt] == NODQUOT) { switch (cnt) { case USRQUOTA: @@ -948,22 +848,12 @@ void dquot_initialize(struct inode *inode, int type) id = inode->i_gid; break; } - dquot[cnt] = dqget(inode->i_sb, id, cnt); + inode->i_dquot[cnt] = dqget(inode->i_sb, id, cnt); + if (inode->i_dquot[cnt]) + inode->i_flags |= S_QUOTA; } } - /* NOBLOCK START: Here we shouldn't block */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquot[cnt] == NODQUOT || !sb_has_quota_enabled(inode->i_sb, cnt) || inode->i_dquot[cnt] != NODQUOT) - continue; - inode->i_dquot[cnt] = dquot[cnt]; - dquot[cnt] = NODQUOT; - inode->i_flags |= S_QUOTA; - } - /* NOBLOCK END */ - /* Put quotas which we didn't use */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (dquot[cnt] != NODQUOT) - dqput(dquot[cnt]); + up_write(&sb_dqopt(inode->i_sb)->dqoff_sem); } /* @@ -971,57 +861,56 @@ void dquot_initialize(struct inode *inode, int type) * * Note: this is a blocking operation. */ -void dquot_drop(struct inode *inode) +static void dquot_drop_nolock(struct inode *inode) { - struct dquot *dquot; int cnt; inode->i_flags &= ~S_QUOTA; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] == NODQUOT) continue; - dquot = inode->i_dquot[cnt]; + dqput(inode->i_dquot[cnt]); inode->i_dquot[cnt] = NODQUOT; - dqput(dquot); } } +void dquot_drop(struct inode *inode) +{ + down_write(&sb_dqopt(inode->i_sb)->dqoff_sem); + dquot_drop_nolock(inode); + up_write(&sb_dqopt(inode->i_sb)->dqoff_sem); +} + /* * This operation can block, but only after everything is updated */ int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) { int cnt, ret = NO_QUOTA; - struct dquot *dquot[MAXQUOTAS]; char warntype[MAXQUOTAS]; - lock_kernel(); - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - dquot[cnt] = NODQUOT; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = NOWARN; - } - /* NOBLOCK Start */ + + down_read(&sb_dqopt(inode->i_sb)->dqoff_sem); + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - dquot[cnt] = dqduplicate(inode->i_dquot[cnt]); - if (dquot[cnt] == NODQUOT) + if (inode->i_dquot[cnt] == NODQUOT) continue; - if (check_bdq(dquot[cnt], number, warn, warntype+cnt) == NO_QUOTA) + if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) == NO_QUOTA) goto warn_put_all; } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquot[cnt] == NODQUOT) + if (inode->i_dquot[cnt] == NODQUOT) continue; - dquot_incr_space(dquot[cnt], number); + dquot_incr_space(inode->i_dquot[cnt], number); } inode_add_bytes(inode, number); - /* NOBLOCK End */ ret = QUOTA_OK; warn_put_all: - flush_warnings(dquot, warntype); - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (dquot[cnt] != NODQUOT) - dqputduplicate(dquot[cnt]); - unlock_kernel(); + spin_unlock(&dq_data_lock); + flush_warnings(inode->i_dquot, warntype); + up_read(&sb_dqopt(inode->i_sb)->dqoff_sem); return ret; } @@ -1031,36 +920,29 @@ warn_put_all: int dquot_alloc_inode(const struct inode *inode, unsigned long number) { int cnt, ret = NO_QUOTA; - struct dquot *dquot[MAXQUOTAS]; char warntype[MAXQUOTAS]; - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - dquot[cnt] = NODQUOT; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = NOWARN; - } - /* NOBLOCK Start */ - lock_kernel(); + down_read(&sb_dqopt(inode->i_sb)->dqoff_sem); + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - dquot[cnt] = dqduplicate(inode -> i_dquot[cnt]); - if (dquot[cnt] == NODQUOT) + if (inode->i_dquot[cnt] == NODQUOT) continue; - if (check_idq(dquot[cnt], number, warntype+cnt) == NO_QUOTA) + if (check_idq(inode->i_dquot[cnt], number, warntype+cnt) == NO_QUOTA) goto warn_put_all; } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquot[cnt] == NODQUOT) + if (inode->i_dquot[cnt] == NODQUOT) continue; - dquot_incr_inodes(dquot[cnt], number); + dquot_incr_inodes(inode->i_dquot[cnt], number); } - /* NOBLOCK End */ ret = QUOTA_OK; warn_put_all: - flush_warnings(dquot, warntype); - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (dquot[cnt] != NODQUOT) - dqputduplicate(dquot[cnt]); - unlock_kernel(); + spin_unlock(&dq_data_lock); + flush_warnings((struct dquot **)inode->i_dquot, warntype); + up_read(&sb_dqopt(inode->i_sb)->dqoff_sem); return ret; } @@ -1070,20 +952,17 @@ warn_put_all: void dquot_free_space(struct inode *inode, qsize_t number) { unsigned int cnt; - struct dquot *dquot; - /* NOBLOCK Start */ - lock_kernel(); + down_read(&sb_dqopt(inode->i_sb)->dqoff_sem); + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - dquot = dqduplicate(inode->i_dquot[cnt]); - if (dquot == NODQUOT) + if (inode->i_dquot[cnt] == NODQUOT) continue; - dquot_decr_space(dquot, number); - dqputduplicate(dquot); + dquot_decr_space(inode->i_dquot[cnt], number); } inode_sub_bytes(inode, number); - unlock_kernel(); - /* NOBLOCK End */ + spin_unlock(&dq_data_lock); + up_read(&sb_dqopt(inode->i_sb)->dqoff_sem); } /* @@ -1092,19 +971,16 @@ void dquot_free_space(struct inode *inode, qsize_t number) void dquot_free_inode(const struct inode *inode, unsigned long number) { unsigned int cnt; - struct dquot *dquot; - /* NOBLOCK Start */ - lock_kernel(); + down_read(&sb_dqopt(inode->i_sb)->dqoff_sem); + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - dquot = dqduplicate(inode->i_dquot[cnt]); - if (dquot == NODQUOT) + if (inode->i_dquot[cnt] == NODQUOT) continue; - dquot_decr_inodes(dquot, number); - dqputduplicate(dquot); + dquot_decr_inodes(inode->i_dquot[cnt], number); } - unlock_kernel(); - /* NOBLOCK End */ + spin_unlock(&dq_data_lock); + up_read(&sb_dqopt(inode->i_sb)->dqoff_sem); } /* @@ -1126,10 +1002,11 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) transfer_to[cnt] = transfer_from[cnt] = NODQUOT; warntype[cnt] = NOWARN; } + down_write(&sb_dqopt(inode->i_sb)->dqoff_sem); + if (IS_NOQUOTA(inode)) /* File without quota accounting? */ + goto warn_put_all; /* First build the transfer_to list - here we can block on reading of dquots... */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!sb_has_quota_enabled(inode->i_sb, cnt)) - continue; switch (cnt) { case USRQUOTA: if (!chuid) @@ -1143,16 +1020,13 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) break; } } - /* NOBLOCK START: From now on we shouldn't block */ + spin_lock(&dq_data_lock); space = inode_get_bytes(inode); /* Build the transfer_from list and check the limits */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - /* The second test can fail when quotaoff is in progress... */ - if (transfer_to[cnt] == NODQUOT || !sb_has_quota_enabled(inode->i_sb, cnt)) - continue; - transfer_from[cnt] = dqduplicate(inode->i_dquot[cnt]); - if (transfer_from[cnt] == NODQUOT) /* Can happen on quotafiles (quota isn't initialized on them)... */ + if (transfer_to[cnt] == NODQUOT) continue; + transfer_from[cnt] = inode->i_dquot[cnt]; if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA || check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA) goto warn_put_all; @@ -1163,9 +1037,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { /* - * Skip changes for same uid or gid or for non-existing quota-type. + * Skip changes for same uid or gid or for turned off quota-type. */ - if (transfer_from[cnt] == NODQUOT || transfer_to[cnt] == NODQUOT) + if (transfer_to[cnt] == NODQUOT) continue; dquot_decr_inodes(transfer_from[cnt], 1); @@ -1174,26 +1048,17 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) dquot_incr_inodes(transfer_to[cnt], 1); dquot_incr_space(transfer_to[cnt], space); - if (inode->i_dquot[cnt] == NODQUOT) - BUG(); inode->i_dquot[cnt] = transfer_to[cnt]; - /* - * We've got to release transfer_from[] twice - once for dquot_transfer() and - * once for inode. We don't want to release transfer_to[] as it's now placed in inode - */ - transfer_to[cnt] = transfer_from[cnt]; } - /* NOBLOCK END. From now on we can block as we wish */ ret = QUOTA_OK; warn_put_all: + spin_unlock(&dq_data_lock); flush_warnings(transfer_to, warntype); - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - /* First we must put duplicate - otherwise we might deadlock */ - if (transfer_to[cnt] != NODQUOT) - dqputduplicate(transfer_to[cnt]); + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (transfer_from[cnt] != NODQUOT) dqput(transfer_from[cnt]); - } + up_write(&sb_dqopt(inode->i_sb)->dqoff_sem); return ret; } @@ -1245,24 +1110,30 @@ int vfs_quota_off(struct super_block *sb, int type) int cnt; struct quota_info *dqopt = sb_dqopt(sb); - lock_kernel(); if (!sb) goto out; /* We need to serialize quota_off() for device */ - down(&dqopt->dqoff_sem); + down_write(&dqopt->dqoff_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; - if (!is_enabled(dqopt, cnt)) + if (!sb_has_quota_enabled(sb, cnt)) continue; reset_enable_flags(dqopt, cnt); /* Note: these are blocking operations */ remove_dquot_ref(sb, cnt); invalidate_dquots(sb, cnt); - if (info_dirty(&dqopt->info[cnt])) + /* + * Now all dquots should be invalidated, all writes done so we should be only + * users of the info. No locks needed. + */ + if (info_dirty(&dqopt->info[cnt])) { + down(&dqopt->dqio_sem); dqopt->ops[cnt]->write_file_info(sb, cnt); + up(&dqopt->dqio_sem); + } if (dqopt->ops[cnt]->free_file_info) dqopt->ops[cnt]->free_file_info(sb, cnt); put_quota_format(dqopt->info[cnt].dqi_format); @@ -1274,15 +1145,14 @@ int vfs_quota_off(struct super_block *sb, int type) dqopt->info[cnt].dqi_bgrace = 0; dqopt->ops[cnt] = NULL; } - up(&dqopt->dqoff_sem); + up_write(&dqopt->dqoff_sem); out: - unlock_kernel(); return 0; } int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) { - struct file *f = NULL; + struct file *f; struct inode *inode; struct quota_info *dqopt = sb_dqopt(sb); struct quota_format_type *fmt = find_quota_format(format_id); @@ -1290,19 +1160,11 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) if (!fmt) return -ESRCH; - if (is_enabled(dqopt, type)) { - error = -EBUSY; + f = filp_open(path, O_RDWR, 0600); + if (IS_ERR(f)) { + error = PTR_ERR(f); goto out_fmt; } - - down(&dqopt->dqoff_sem); - - f = filp_open(path, O_RDWR, 0600); - - error = PTR_ERR(f); - if (IS_ERR(f)) - goto out_lock; - dqopt->files[type] = f; error = -EIO; if (!f->f_op || !f->f_op->read || !f->f_op->write) goto out_f; @@ -1313,30 +1175,41 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) error = -EACCES; if (!S_ISREG(inode->i_mode)) goto out_f; + + down_write(&dqopt->dqoff_sem); + if (sb_has_quota_enabled(sb, type)) { + error = -EBUSY; + goto out_lock; + } + dqopt->files[type] = f; error = -EINVAL; if (!fmt->qf_ops->check_quota_file(sb, type)) - goto out_f; + goto out_lock; /* We don't want quota on quota files */ - dquot_drop(inode); + dquot_drop_nolock(inode); inode->i_flags |= S_NOQUOTA; dqopt->ops[type] = fmt->qf_ops; dqopt->info[type].dqi_format = fmt; - if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) - goto out_f; + down(&dqopt->dqio_sem); + if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) { + up(&dqopt->dqio_sem); + goto out_lock; + } + up(&dqopt->dqio_sem); set_enable_flags(dqopt, type); add_dquot_ref(sb, type); - up(&dqopt->dqoff_sem); + up_write(&dqopt->dqoff_sem); return 0; -out_f: - if (f) - filp_close(f, NULL); - dqopt->files[type] = NULL; out_lock: - up(&dqopt->dqoff_sem); + inode->i_flags &= ~S_NOQUOTA; + dqopt->files[type] = NULL; + up_write(&dqopt->dqoff_sem); +out_f: + filp_close(f, NULL); out_fmt: put_quota_format(fmt); @@ -1348,6 +1221,7 @@ static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; + spin_lock(&dq_data_lock); di->dqb_bhardlimit = dm->dqb_bhardlimit; di->dqb_bsoftlimit = dm->dqb_bsoftlimit; di->dqb_curspace = dm->dqb_curspace; @@ -1357,16 +1231,21 @@ static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) di->dqb_btime = dm->dqb_btime; di->dqb_itime = dm->dqb_itime; di->dqb_valid = QIF_ALL; + spin_unlock(&dq_data_lock); } int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) { - struct dquot *dquot = dqget(sb, id, type); + struct dquot *dquot; - if (!dquot) - return -EINVAL; + down_read(&sb_dqopt(sb)->dqoff_sem); + if (!(dquot = dqget(sb, id, type))) { + up_read(&sb_dqopt(sb)->dqoff_sem); + return -ESRCH; + } do_get_dqblk(dquot, di); dqput(dquot); + up_read(&sb_dqopt(sb)->dqoff_sem); return 0; } @@ -1376,6 +1255,7 @@ static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) struct mem_dqblk *dm = &dquot->dq_dqb; int check_blim = 0, check_ilim = 0; + spin_lock(&dq_data_lock); if (di->dqb_valid & QIF_SPACE) { dm->dqb_curspace = di->dqb_curspace; check_blim = 1; @@ -1402,7 +1282,7 @@ static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) if (check_blim) { if (!dm->dqb_bsoftlimit || toqb(dm->dqb_curspace) < dm->dqb_bsoftlimit) { dm->dqb_btime = 0; - dquot->dq_flags &= ~DQ_BLKS; + clear_bit(DQ_BLKS_B, &dquot->dq_flags); } else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */ dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; @@ -1410,46 +1290,67 @@ static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) if (check_ilim) { if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) { dm->dqb_itime = 0; - dquot->dq_flags &= ~DQ_INODES; + clear_bit(DQ_INODES_B, &dquot->dq_flags); } else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */ dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; } if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) - dquot->dq_flags &= ~DQ_FAKE; + clear_bit(DQ_FAKE_B, &dquot->dq_flags); else - dquot->dq_flags |= DQ_FAKE; - dquot->dq_flags |= DQ_MOD; + set_bit(DQ_FAKE_B, &dquot->dq_flags); + mark_dquot_dirty(dquot); + spin_unlock(&dq_data_lock); } int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) { - struct dquot *dquot = dqget(sb, id, type); + struct dquot *dquot; - if (!dquot) - return -EINVAL; + down_read(&sb_dqopt(sb)->dqoff_sem); + if (!(dquot = dqget(sb, id, type))) { + up_read(&sb_dqopt(sb)->dqoff_sem); + return -ESRCH; + } do_set_dqblk(dquot, di); dqput(dquot); + up_read(&sb_dqopt(sb)->dqoff_sem); return 0; } /* Generic routine for getting common part of quota file information */ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) { - struct mem_dqinfo *mi = sb_dqopt(sb)->info + type; - + struct mem_dqinfo *mi; + + down_read(&sb_dqopt(sb)->dqoff_sem); + if (!sb_has_quota_enabled(sb, type)) { + up_read(&sb_dqopt(sb)->dqoff_sem); + return -ESRCH; + } + mi = sb_dqopt(sb)->info + type; + spin_lock(&dq_data_lock); ii->dqi_bgrace = mi->dqi_bgrace; ii->dqi_igrace = mi->dqi_igrace; ii->dqi_flags = mi->dqi_flags & DQF_MASK; ii->dqi_valid = IIF_ALL; + spin_unlock(&dq_data_lock); + up_read(&sb_dqopt(sb)->dqoff_sem); return 0; } /* Generic routine for setting common part of quota file information */ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) { - struct mem_dqinfo *mi = sb_dqopt(sb)->info + type; + struct mem_dqinfo *mi; + down_read(&sb_dqopt(sb)->dqoff_sem); + if (!sb_has_quota_enabled(sb, type)) { + up_read(&sb_dqopt(sb)->dqoff_sem); + return -ESRCH; + } + mi = sb_dqopt(sb)->info + type; + spin_lock(&dq_data_lock); if (ii->dqi_valid & IIF_BGRACE) mi->dqi_bgrace = ii->dqi_bgrace; if (ii->dqi_valid & IIF_IGRACE) @@ -1457,6 +1358,8 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) if (ii->dqi_valid & IIF_FLAGS) mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK); mark_info_dirty(mi); + spin_unlock(&dq_data_lock); + up_read(&sb_dqopt(sb)->dqoff_sem); return 0; } @@ -1502,7 +1405,7 @@ static int __init dquot_init(void) register_sysctl_table(sys_table, 0); for (i = 0; i < NR_DQHASH; i++) INIT_LIST_HEAD(dquot_hash + i); - printk(KERN_NOTICE "VFS: Disk quotas v%s\n", __DQUOT_VERSION__); + printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); dquot_cachep = kmem_cache_create("dquot", sizeof(struct dquot), sizeof(unsigned long) * 4, @@ -1519,3 +1422,5 @@ module_init(dquot_init); EXPORT_SYMBOL(register_quota_format); EXPORT_SYMBOL(unregister_quota_format); EXPORT_SYMBOL(dqstats); +EXPORT_SYMBOL(dq_list_lock); +EXPORT_SYMBOL(dq_data_lock); diff --git a/fs/inode.c b/fs/inode.c index 5fba39ebe82d..1c36522999c3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1133,9 +1133,8 @@ void remove_dquot_ref(struct super_block *sb, int type) if (!sb->dq_op) return; /* nothing to do */ - /* We have to be protected against other CPUs */ - lock_kernel(); /* This lock is for quota code */ spin_lock(&inode_lock); /* This lock is for inodes code */ + /* We don't have to lock against quota code - test IS_QUOTAINIT is just for speedup... */ list_for_each(act_head, &inode_in_use) { inode = list_entry(act_head, struct inode, i_list); @@ -1158,7 +1157,6 @@ void remove_dquot_ref(struct super_block *sb, int type) remove_inode_dquot_ref(inode, type, &tofree_head); } spin_unlock(&inode_lock); - unlock_kernel(); put_dquot_list(&tofree_head); } diff --git a/fs/quota.c b/fs/quota.c index 600765da6ecb..54302d445086 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -84,6 +84,7 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t case Q_SETINFO: case Q_SETQUOTA: case Q_GETQUOTA: + /* This is just informative test so we are satisfied without a lock */ if (!sb_has_quota_enabled(sb, type)) return -ESRCH; } @@ -151,7 +152,13 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, cadd case Q_GETFMT: { __u32 fmt; + down_read(&sb_dqopt(sb)->dqoff_sem); + if (!sb_has_quota_enabled(sb, type)) { + up_read(&sb_dqopt(sb)->dqoff_sem); + return -ESRCH; + } fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; + up_read(&sb_dqopt(sb)->dqoff_sem); if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; @@ -244,7 +251,6 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char *special, qid_t id, ca struct super_block *sb = NULL; int ret = -EINVAL; - lock_kernel(); cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; @@ -259,6 +265,5 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char *special, qid_t id, ca out: if (sb) drop_super(sb); - unlock_kernel(); return ret; } diff --git a/fs/super.c b/fs/super.c index 458800970226..09e0fa12b126 100644 --- a/fs/super.c +++ b/fs/super.c @@ -71,7 +71,7 @@ static struct super_block *alloc_super(void) atomic_set(&s->s_active, 1); sema_init(&s->s_vfs_rename_sem,1); sema_init(&s->s_dquot.dqio_sem, 1); - sema_init(&s->s_dquot.dqoff_sem, 1); + init_rwsem(&s->s_dquot.dqoff_sem); s->s_maxbytes = MAX_NON_LFS; s->dq_op = sb_dquot_ops; s->s_qcop = sb_quotactl_ops; diff --git a/include/linux/quota.h b/include/linux/quota.h index 02bf7c53a6d4..e5b1e2187156 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -37,6 +37,7 @@ #include #include +#include #define __DQUOT_VERSION__ "dquot_6.5.1" #define __DQUOT_NUM_VERSION__ 6*10000+5*100+1 @@ -44,6 +45,9 @@ typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */ typedef __u64 qsize_t; /* Type in which we store sizes */ +extern spinlock_t dq_list_lock; +extern spinlock_t dq_data_lock; + /* Size of blocks in which are counted size limits */ #define QUOTABLOCK_BITS 10 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS) @@ -155,7 +159,7 @@ struct quota_format_type; struct mem_dqinfo { struct quota_format_type *dqi_format; - int dqi_flags; + unsigned long dqi_flags; unsigned int dqi_bgrace; unsigned int dqi_igrace; union { @@ -165,18 +169,19 @@ struct mem_dqinfo { }; #define DQF_MASK 0xffff /* Mask for format specific flags */ -#define DQF_INFO_DIRTY 0x10000 /* Is info dirty? */ -#define DQF_ANY_DQUOT_DIRTY 0x20000 /* Is any dquot dirty? */ +#define DQF_INFO_DIRTY_B 16 +#define DQF_ANY_DQUOT_DIRTY_B 17 +#define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */ +#define DQF_ANY_DQUOT_DIRTY (1 << DQF_ANY_DQUOT_DIRTY B) /* Is any dquot dirty? */ extern inline void mark_info_dirty(struct mem_dqinfo *info) { - info->dqi_flags |= DQF_INFO_DIRTY; + set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags); } -#define info_dirty(info) ((info)->dqi_flags & DQF_INFO_DIRTY) - -#define info_any_dirty(info) ((info)->dqi_flags & DQF_INFO_DIRTY ||\ - (info)->dqi_flags & DQF_ANY_DQUOT_DIRTY) +#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags) +#define info_any_dquot_dirty(info) test_bit(DQF_ANY_DQUOT_DIRTY_B, &(info)->dqi_flags) +#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info)) #define sb_dqopt(sb) (&(sb)->s_dquot) @@ -195,30 +200,29 @@ extern struct dqstats dqstats; #define NR_DQHASH 43 /* Just an arbitrary number */ -#define DQ_LOCKED 0x01 /* dquot under IO */ -#define DQ_MOD 0x02 /* dquot modified since read */ -#define DQ_BLKS 0x10 /* uid/gid has been warned about blk limit */ -#define DQ_INODES 0x20 /* uid/gid has been warned about inode limit */ -#define DQ_FAKE 0x40 /* no limits only usage */ -#define DQ_INVAL 0x80 /* dquot is going to be invalidated */ +#define DQ_MOD_B 0 +#define DQ_BLKS_B 1 +#define DQ_INODES_B 2 +#define DQ_FAKE_B 3 + +#define DQ_MOD (1 << DQ_MOD_B) /* dquot modified since read */ +#define DQ_BLKS (1 << DQ_BLKS_B) /* uid/gid has been warned about blk limit */ +#define DQ_INODES (1 << DQ_INODES_B) /* uid/gid has been warned about inode limit */ +#define DQ_FAKE (1 << DQ_FAKE_B) /* no limits only usage */ struct dquot { struct list_head dq_hash; /* Hash list in memory */ struct list_head dq_inuse; /* List of all quotas */ struct list_head dq_free; /* Free list element */ - wait_queue_head_t dq_wait_lock; /* Pointer to waitqueue on dquot lock */ - wait_queue_head_t dq_wait_free; /* Pointer to waitqueue for quota to be unused */ - int dq_count; /* Use count */ - int dq_dup_ref; /* Number of duplicated refences */ + struct semaphore dq_lock; /* dquot IO lock */ + atomic_t dq_count; /* Use count */ /* fields after this point are cleared when invalidating */ struct super_block *dq_sb; /* superblock this applies to */ unsigned int dq_id; /* ID this applies to (uid, gid) */ loff_t dq_off; /* Offset of dquot on disk */ + unsigned long dq_flags; /* See DQ_* */ short dq_type; /* Type of quota */ - short dq_flags; /* See DQ_* */ - unsigned long dq_referenced; /* Number of times this dquot was - referenced during its lifetime */ struct mem_dqblk dq_dqb; /* Diskquota usage */ }; @@ -276,7 +280,7 @@ struct quota_format_type { struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ struct semaphore dqio_sem; /* lock device while I/O in progress */ - struct semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device */ + struct rw_semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device and ops using quota_info struct, pointers from inode to dquots */ struct file *files[MAXQUOTAS]; /* fp's to quotafiles */ struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ @@ -284,26 +288,17 @@ struct quota_info { /* Inline would be better but we need to dereference super_block which is not defined yet */ #define mark_dquot_dirty(dquot) do {\ - dquot->dq_flags |= DQ_MOD;\ - sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_flags |= DQF_ANY_DQUOT_DIRTY;\ + set_bit(DQF_ANY_DQUOT_DIRTY_B, &(sb_dqopt((dquot)->dq_sb)->info[(dquot)->dq_type].dqi_flags));\ + set_bit(DQ_MOD_B, &(dquot)->dq_flags);\ } while (0) -#define dquot_dirty(dquot) ((dquot)->dq_flags & DQ_MOD) - -static inline int is_enabled(struct quota_info *dqopt, int type) -{ - switch (type) { - case USRQUOTA: - return dqopt->flags & DQUOT_USR_ENABLED; - case GRPQUOTA: - return dqopt->flags & DQUOT_GRP_ENABLED; - } - return 0; -} +#define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags) -#define sb_any_quota_enabled(sb) (is_enabled(sb_dqopt(sb), USRQUOTA) | is_enabled(sb_dqopt(sb), GRPQUOTA)) +#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \ + (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED)) -#define sb_has_quota_enabled(sb, type) (is_enabled(sb_dqopt(sb), type)) +#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \ + sb_has_quota_enabled(sb, GRPQUOTA)) int register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 31b24e37c159..e1c502012025 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -46,36 +46,31 @@ static __inline__ void DQUOT_INIT(struct inode *inode) { if (!inode->i_sb) BUG(); - lock_kernel(); if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) inode->i_sb->dq_op->initialize(inode, -1); - unlock_kernel(); } static __inline__ void DQUOT_DROP(struct inode *inode) { - lock_kernel(); if (IS_QUOTAINIT(inode)) { if (!inode->i_sb) BUG(); inode->i_sb->dq_op->drop(inode); /* Ops must be set when there's any quota... */ } - unlock_kernel(); } static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - lock_kernel(); if (sb_any_quota_enabled(inode->i_sb)) { /* Used space is updated in alloc_space() */ - if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) { - unlock_kernel(); + if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) return 1; - } } - else + else { + spin_lock(&dq_data_lock); inode_add_bytes(inode, nr); - unlock_kernel(); + spin_unlock(&dq_data_lock); + } return 0; } @@ -89,17 +84,16 @@ static __inline__ int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - lock_kernel(); if (sb_any_quota_enabled(inode->i_sb)) { /* Used space is updated in alloc_space() */ - if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) { - unlock_kernel(); + if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) return 1; - } } - else + else { + spin_lock(&dq_data_lock); inode_add_bytes(inode, nr); - unlock_kernel(); + spin_unlock(&dq_data_lock); + } return 0; } @@ -113,26 +107,23 @@ static __inline__ int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode) { - lock_kernel(); if (sb_any_quota_enabled(inode->i_sb)) { DQUOT_INIT(inode); - if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { - unlock_kernel(); + if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) return 1; - } } - unlock_kernel(); return 0; } static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - lock_kernel(); if (sb_any_quota_enabled(inode->i_sb)) inode->i_sb->dq_op->free_space(inode, nr); - else + else { + spin_lock(&dq_data_lock); inode_sub_bytes(inode, nr); - unlock_kernel(); + spin_unlock(&dq_data_lock); + } } static __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) @@ -143,23 +134,17 @@ static __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) static __inline__ void DQUOT_FREE_INODE(struct inode *inode) { - lock_kernel(); if (sb_any_quota_enabled(inode->i_sb)) inode->i_sb->dq_op->free_inode(inode, 1); - unlock_kernel(); } static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) { - lock_kernel(); if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) { DQUOT_INIT(inode); - if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) { - unlock_kernel(); + if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) return 1; - } } - unlock_kernel(); return 0; } @@ -169,10 +154,8 @@ static __inline__ int DQUOT_OFF(struct super_block *sb) { int ret = -ENOSYS; - lock_kernel(); if (sb->s_qcop && sb->s_qcop->quota_off) ret = sb->s_qcop->quota_off(sb, -1); - unlock_kernel(); return ret; } @@ -192,9 +175,7 @@ static __inline__ int DQUOT_OFF(struct super_block *sb) #define DQUOT_TRANSFER(inode, iattr) (0) extern __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - lock_kernel(); inode_add_bytes(inode, nr); - unlock_kernel(); return 0; } @@ -207,9 +188,7 @@ extern __inline__ int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) extern __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - lock_kernel(); inode_add_bytes(inode, nr); - unlock_kernel(); return 0; } @@ -222,9 +201,7 @@ extern __inline__ int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) extern __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { - lock_kernel(); inode_sub_bytes(inode, nr); - unlock_kernel(); } extern __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) -- cgit v1.2.3 From 5b2f80762d125d8a3aa4710129eca6bbbcc4d1ad Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 1 Jan 2003 03:02:38 -0800 Subject: [PATCH] Modules 3/3: Sort sections RTH's final complaint (so far 8) was that we should sort the module sections: archs might require some sections to be adjacent, so they can all be reached by a relative pointer (ie. GOT pointer). This implements that reordering, and simplfies the module interface for architectures as well. Previously an arch could specify it wanted extra space, but not where that space would be. The new method (used only by PPC so far) is to allocate an empty section (in asm/module.h or by setting LDFLAGS_MODULE to use an arch specific linker script), and expand that to the desired size in "module_frob_arch_sections()". --- arch/arm/kernel/module.c | 16 ++-- arch/i386/kernel/module.c | 18 ++--- arch/ppc/kernel/module.c | 53 +++++++----- arch/s390/kernel/module.c | 21 ++--- arch/s390x/kernel/module.c | 20 ++--- arch/sparc/kernel/module.c | 18 ++--- arch/sparc64/kernel/module.c | 18 ++--- arch/x86_64/kernel/module.c | 18 ++--- include/asm-ppc/module.h | 13 +-- include/linux/moduleloader.h | 19 ++--- kernel/module.c | 188 +++++++++++++++++++++---------------------- 11 files changed, 169 insertions(+), 233 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 33a6735d3f03..3a2b5aaaa3b6 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -67,18 +67,12 @@ void module_free(struct module *module, void *region) vfree(region); } -long -module_core_size(const Elf32_Ehdr *hdr, const Elf32_Shdr *sechdrs, - const char *secstrings, struct module *module) +int module_frob_arch_sections(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *secstrings, + struct module *mod) { - return module->core_size; -} - -long -module_init_size(const Elf32_Ehdr *hdr, const Elf32_Shdr *sechdrs, - const char *secstrings, struct module *module) -{ - return module->init_size; + return 0; } int diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c index bae0e3df7718..d71b0e367eb6 100644 --- a/arch/i386/kernel/module.c +++ b/arch/i386/kernel/module.c @@ -45,20 +45,12 @@ void module_free(struct module *mod, void *module_region) } /* We don't need anything special. */ -long module_core_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) +int module_frob_arch_sections(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *secstrings, + struct module *mod) { - return module->core_size; -} - -long module_init_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) -{ - return module->init_size; + return 0; } int apply_relocate(Elf32_Shdr *sechdrs, diff --git a/arch/ppc/kernel/module.c b/arch/ppc/kernel/module.c index 31cbdf1661ac..8e396b7f40c0 100644 --- a/arch/ppc/kernel/module.c +++ b/arch/ppc/kernel/module.c @@ -101,24 +101,31 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, return ret; } -long module_core_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) +int module_frob_arch_sections(Elf32_Ehdr *hdr, + Elf32_Shdr *sechdrs, + const char *secstrings, + struct module *me) { - module->arch.core_plt_offset = ALIGN(module->core_size, 4); - return module->arch.core_plt_offset - + get_plt_size(hdr, sechdrs, secstrings, 0); -} + unsigned int i; -long module_init_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) -{ - module->arch.init_plt_offset = ALIGN(module->init_size, 4); - return module->arch.init_plt_offset - + get_plt_size(hdr, sechdrs, secstrings, 1); + /* Find .plt and .pltinit sections */ + for (i = 0; i < hdr->e_shnum; i++) { + if (strcmp(secstrings + sechdrs[i].sh_name, ".plt.init") == 0) + me->arch.init_plt_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0) + me->arch.core_plt_section = i; + } + if (!me->arch.core_plt_section || !me->arch.init_plt_section) { + printk("Module doesn't contain .plt or .plt.init sections.\n"); + return -ENOEXEC; + } + + /* Override their sizes */ + sechdrs[me->arch.core_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 0); + sechdrs[me->arch.init_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 1); + return 0; } int apply_relocate(Elf32_Shdr *sechdrs, @@ -141,17 +148,20 @@ static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) } /* Set up a trampoline in the PLT to bounce us to the distant function */ -static uint32_t do_plt_call(void *location, Elf32_Addr val, struct module *mod) +static uint32_t do_plt_call(void *location, + Elf32_Addr val, + Elf32_Shdr *sechdrs, + struct module *mod) { struct ppc_plt_entry *entry; DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ if (location >= mod->module_core - && location < mod->module_core + mod->arch.core_plt_offset) - entry = mod->module_core + mod->arch.core_plt_offset; + && location < mod->module_core + mod->core_size) + entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else - entry = mod->module_init + mod->arch.init_plt_offset; + entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; /* Find this entry, or if that fails, the next avail. entry */ while (entry->jump[0]) { @@ -220,7 +230,8 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, case R_PPC_REL24: if ((int)(value - (uint32_t)location) < -0x02000000 || (int)(value - (uint32_t)location) >= 0x02000000) - value = do_plt_call(location, value, module); + value = do_plt_call(location, value, + sechdrs, module); /* Only replace bits 2 through 26 */ DEBUGP("REL24 value = %08X. location = %08X\n", diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index e089cbe09a91..d1d4dd05c212 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -51,26 +51,15 @@ void module_free(struct module *mod, void *module_region) table entries. */ } -/* s390/s390x needs additional memory for GOT/PLT sections. */ -long module_core_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) +int module_frob_arch_sections(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *secstrings, + struct module *mod) { // FIXME: add space needed for GOT/PLT - return module->core_size; -} - -long module_init_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) -{ - return module->init_size; + return 0; } - - int apply_relocate(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, diff --git a/arch/s390x/kernel/module.c b/arch/s390x/kernel/module.c index 19d09fbcda1d..61f3b34e90be 100644 --- a/arch/s390x/kernel/module.c +++ b/arch/s390x/kernel/module.c @@ -52,25 +52,15 @@ void module_free(struct module *mod, void *module_region) } /* s390/s390x needs additional memory for GOT/PLT sections. */ -long module_core_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) +int module_frob_arch_sections(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *secstrings, + struct module *mod) { // FIXME: add space needed for GOT/PLT - return module->core_size; -} - -long module_init_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) -{ - return module->init_size; + return 0; } - - int apply_relocate(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index fd78be55226e..348c24f63d85 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -37,20 +37,12 @@ void module_free(struct module *mod, void *module_region) } /* We don't need anything special. */ -long module_core_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) +int module_frob_arch_sections(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *secstrings, + struct module *mod) { - return module->core_size; -} - -long module_init_size(const Elf32_Ehdr *hdr, - const Elf32_Shdr *sechdrs, - const char *secstrings, - struct module *module) -{ - return module->init_size; + return 0; } int apply_relocate(Elf32_Shdr *sechdrs, diff --git a/arch/sparc64/kernel/module.c b/arch/sparc64/kernel/module.c index 1c7b8f0cad7e..dc0daa59aac1 100644 --- a/arch/sparc64/kernel/module.c +++ b/arch/sparc64/kernel/module.c @@ -144,20 +144,12 @@ void module_free(struct module *mod, void *module_region) } /* We don't need anything special. */ -long module_core_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs, - const char *secstrings, - struct module *module) +int module_frob_arch_sections(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *secstrings, + struct module *mod) { - return module->core_size; -} - -long module_init_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs, - const char *secstrings, - struct module *module) -{ - return module->init_size; + return 0; } int apply_relocate(Elf64_Shdr *sechdrs, diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c index f3d3bb3ca2d4..c585076087db 100644 --- a/arch/x86_64/kernel/module.c +++ b/arch/x86_64/kernel/module.c @@ -26,20 +26,12 @@ #define DEBUGP(fmt...) /* We don't need anything special. */ -long module_core_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs, - const char *secstrings, - struct module *module) +int module_frob_arch_sections(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *secstrings, + struct module *mod) { - return module->core_size; -} - -long module_init_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs, - const char *secstrings, - struct module *module) -{ - return module->init_size; + return 0; } int apply_relocate_add(Elf64_Shdr *sechdrs, diff --git a/include/asm-ppc/module.h b/include/asm-ppc/module.h index 7d75a3e3e2ee..9686cfa4fc36 100644 --- a/include/asm-ppc/module.h +++ b/include/asm-ppc/module.h @@ -18,16 +18,17 @@ struct ppc_plt_entry struct mod_arch_specific { - /* How much of the core is actually taken up with core (then - we know the rest is for the PLT */ - unsigned int core_plt_offset; - - /* Same for init */ - unsigned int init_plt_offset; + /* Indices of PLT sections within module. */ + unsigned int core_plt_section, init_plt_section; }; #define Elf_Shdr Elf32_Shdr #define Elf_Sym Elf32_Sym #define Elf_Ehdr Elf32_Ehdr +/* Make empty sections for module_frob_arch_sections to expand. */ +#ifdef MODULE +asm(".section .plt,\"aws\",@nobits; .align 3; .previous"); +asm(".section .plt.init,\"aws\",@nobits; .align 3; .previous"); +#endif #endif /* _ASM_PPC_MODULE_H */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 928ae553ca37..5001ff39204d 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -15,20 +15,11 @@ unsigned long find_symbol_internal(Elf_Shdr *sechdrs, /* These must be implemented by the specific architecture */ -/* Total size to allocate for the non-releasable code; return len or - -error. mod->core_size is the current generic tally. */ -long module_core_size(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - const char *secstrings, - struct module *mod); - -/* Total size of (if any) sections to be freed after init. Return 0 - for none, len, or -error. mod->init_size is the current generic - tally. */ -long module_init_size(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - const char *secstrings, - struct module *mod); +/* Adjust arch-specific sections. Return 0 on success. */ +int module_frob_arch_sections(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *secstrings, + struct module *mod); /* Allocator used for allocating struct module, core sections and init sections. Returns NULL on failure. */ diff --git a/kernel/module.c b/kernel/module.c index 79769b09b3d2..680deb635e30 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1,4 +1,5 @@ /* Rewritten by Rusty Russell, on the backs of many others... + Copyright (C) 2002 Richard Henderson Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. This program is free software; you can redistribute it and/or modify @@ -27,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -38,6 +41,13 @@ #define DEBUGP(fmt , a...) #endif +#ifndef ARCH_SHF_SMALL +#define ARCH_SHF_SMALL 0 +#endif + +/* If this is set, the section belongs in the init part of the module */ +#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) + #define symbol_is(literal, string) \ (strcmp(MODULE_SYMBOL_PREFIX literal, (string)) == 0) @@ -53,13 +63,6 @@ static inline int strong_try_module_get(struct module *mod) return try_module_get(mod); } -/* Convenient structure for holding init and core sizes */ -struct sizes -{ - unsigned long init_size; - unsigned long core_size; -}; - /* Stub function for modules which don't have an initfn */ int init_module(void) { @@ -764,43 +767,6 @@ void *__symbol_get(const char *symbol) } EXPORT_SYMBOL_GPL(__symbol_get); -/* Transfer one ELF section to the correct (init or core) area. */ -static void *copy_section(const char *name, - void *base, - Elf_Shdr *sechdr, - struct module *mod, - struct sizes *used) -{ - void *dest; - unsigned long *use; - unsigned long max; - - /* Only copy to init section if there is one */ - if (strstr(name, ".init") && mod->module_init) { - dest = mod->module_init; - use = &used->init_size; - max = mod->init_size; - } else { - dest = mod->module_core; - use = &used->core_size; - max = mod->core_size; - } - - /* Align up */ - *use = ALIGN(*use, sechdr->sh_addralign); - dest += *use; - *use += sechdr->sh_size; - - if (*use > max) - return ERR_PTR(-ENOEXEC); - - /* May not actually be in the file (eg. bss). */ - if (sechdr->sh_type != SHT_NOBITS) - memcpy(dest, base + sechdr->sh_offset, sechdr->sh_size); - - return dest; -} - /* Deal with the given section */ static int handle_section(const char *name, Elf_Shdr *sechdrs, @@ -902,33 +868,66 @@ static int simplify_symbols(Elf_Shdr *sechdrs, return 0; } -/* Get the total allocation size of the init and non-init sections */ -static struct sizes get_sizes(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - const char *secstrings) +/* Update size with this section: return offset. */ +static long get_offset(unsigned long *size, Elf_Shdr *sechdr) { - struct sizes ret = { 0, 0 }; - unsigned i; - - /* Everything marked ALLOC (this includes the exported - symbols) */ - for (i = 1; i < hdr->e_shnum; i++) { - unsigned long *add; + long ret; - /* If it's called *.init*, and we're init, we're interested */ - if (strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) - add = &ret.init_size; - else - add = &ret.core_size; + ret = ALIGN(*size, sechdr->sh_addralign ?: 1); + *size = ret + sechdr->sh_size; + return ret; +} - if (sechdrs[i].sh_flags & SHF_ALLOC) { - /* Pad up to required alignment */ - *add = ALIGN(*add, sechdrs[i].sh_addralign ?: 1); - *add += sechdrs[i].sh_size; +/* Lay out the SHF_ALLOC sections in a way not dissimilar to how ld + might -- code, read-only data, read-write data, small data. Tally + sizes, and place the offsets into sh_link fields: high bit means it + belongs in init. */ +static void layout_sections(struct module *mod, + const Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings) +{ + static unsigned long const masks[][2] = { + { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL }, + { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL }, + { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL }, + { ARCH_SHF_SMALL | SHF_ALLOC, 0 } + }; + unsigned int m, i; + + for (i = 0; i < hdr->e_shnum; i++) + sechdrs[i].sh_link = ~0UL; + + DEBUGP("Core section allocation order:\n"); + for (m = 0; m < ARRAY_SIZE(masks); ++m) { + for (i = 0; i < hdr->e_shnum; ++i) { + Elf_Shdr *s = &sechdrs[i]; + + if ((s->sh_flags & masks[m][0]) != masks[m][0] + || (s->sh_flags & masks[m][1]) + || s->sh_link != ~0UL + || strstr(secstrings + s->sh_name, ".init")) + continue; + s->sh_link = get_offset(&mod->core_size, s); + DEBUGP("\t%s\n", name); } } - return ret; + DEBUGP("Init section allocation order:\n"); + for (m = 0; m < ARRAY_SIZE(masks); ++m) { + for (i = 0; i < hdr->e_shnum; ++i) { + Elf_Shdr *s = &sechdrs[i]; + + if ((s->sh_flags & masks[m][0]) != masks[m][0] + || (s->sh_flags & masks[m][1]) + || s->sh_link != ~0UL + || !strstr(secstrings + s->sh_name, ".init")) + continue; + s->sh_link = (get_offset(&mod->init_size, s) + | INIT_OFFSET_MASK); + DEBUGP("\t%s\n", name); + } + } } /* Allocate and load the module */ @@ -942,7 +941,6 @@ static struct module *load_module(void *umod, unsigned int i, symindex, exportindex, strindex, setupindex, exindex, modindex, obsparmindex; long arglen; - struct sizes sizes, used; struct module *mod; long err = 0; void *ptr = NULL; /* Stops spurious gcc uninitialized warning */ @@ -1063,23 +1061,15 @@ static struct module *load_module(void *umod, mod->state = MODULE_STATE_COMING; - /* How much space will we need? */ - sizes = get_sizes(hdr, sechdrs, secstrings); - - /* Set these up, and allow archs to manipulate them. */ - mod->core_size = sizes.core_size; - mod->init_size = sizes.init_size; - - /* Allow archs to add to them. */ - err = module_init_size(hdr, sechdrs, secstrings, mod); + /* Allow arches to frob section contents and sizes. */ + err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod); if (err < 0) goto free_mod; - mod->init_size = err; - err = module_core_size(hdr, sechdrs, secstrings, mod); - if (err < 0) - goto free_mod; - mod->core_size = err; + /* Determine total sizes, and put offsets in sh_link. For now + this is done generically; there doesn't appear to be any + special cases for the architectures. */ + layout_sections(mod, hdr, sechdrs, secstrings); /* Do the allocs. */ ptr = module_alloc(mod->core_size); @@ -1098,25 +1088,27 @@ static struct module *load_module(void *umod, memset(ptr, 0, mod->init_size); mod->module_init = ptr; - /* Transfer each section which requires ALLOC, and set sh_addr - fields to absolute addresses. */ - used.core_size = 0; - used.init_size = 0; - for (i = 1; i < hdr->e_shnum; i++) { - if (sechdrs[i].sh_flags & SHF_ALLOC) { - ptr = copy_section(secstrings + sechdrs[i].sh_name, - hdr, &sechdrs[i], mod, &used); - if (IS_ERR(ptr)) - goto cleanup; - sechdrs[i].sh_addr = (unsigned long)ptr; - /* Have we just copied __this_module across? */ - if (i == modindex) - mod = ptr; - } + /* Transfer each section which specifies SHF_ALLOC */ + for (i = 0; i < hdr->e_shnum; i++) { + void *dest; + + if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + continue; + + if (sechdrs[i].sh_link & INIT_OFFSET_MASK) + dest = mod->module_init + + (sechdrs[i].sh_link & ~INIT_OFFSET_MASK); + else + dest = mod->module_core + sechdrs[i].sh_link; + + if (sechdrs[i].sh_type != SHT_NOBITS) + memcpy(dest, (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size); + /* Update sh_addr to point to copy in image. */ + sechdrs[i].sh_addr = (unsigned long)dest; } - /* Don't use more than we allocated! */ - if (used.init_size > mod->init_size || used.core_size > mod->core_size) - BUG(); + /* Module has been moved. */ + mod = (void *)sechdrs[modindex].sh_addr; /* Now we've moved module, initialize linked lists, etc. */ module_unload_init(mod); -- cgit v1.2.3