diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/module.c | 24 | ||||
| -rw-r--r-- | kernel/sysctl.c | 216 |
2 files changed, 222 insertions, 18 deletions
diff --git a/kernel/module.c b/kernel/module.c index dd02b40cd891..36023bce008f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -23,6 +23,7 @@ * Fix sys_init_module race, Andrew Morton <andrewm@uow.edu.au> Oct 2000 * http://www.uwsg.iu.edu/hypermail/linux/kernel/0008.3/0379.html * Replace xxx_module_symbol with inter_module_xxx. Keith Owens <kaos@ocs.com.au> Oct 2000 + * Add a module list lock for kernel fault race fixing. Alan Cox <alan@redhat.com> * * This source is covered by the GNU GPL, the same as all kernel sources. */ @@ -65,6 +66,17 @@ static struct list_head ime_list = LIST_HEAD_INIT(ime_list); static spinlock_t ime_lock = SPIN_LOCK_UNLOCKED; static int kmalloc_failed; +/* + * This lock prevents modifications that might race the kernel fault + * fixups. It does not prevent reader walks that the modules code + * does. The kernel lock does that. + * + * Since vmalloc fault fixups occur in any context this lock is taken + * irqsave at all times. + */ + +spinlock_t modlist_lock = SPIN_LOCK_UNLOCKED; + /** * inter_module_register - register a new set of inter module data. * @im_name: an arbitrary string to identify the data, must be unique @@ -283,6 +295,7 @@ sys_create_module(const char *name_user, size_t size) char *name; long namelen, error; struct module *mod; + unsigned long flags; if (!capable(CAP_SYS_MODULE)) return -EPERM; @@ -306,14 +319,16 @@ sys_create_module(const char *name_user, size_t size) memset(mod, 0, sizeof(*mod)); mod->size_of_struct = sizeof(*mod); - mod->next = module_list; mod->name = (char *)(mod + 1); mod->size = size; memcpy((char*)(mod+1), name, namelen+1); put_mod_name(name); + spin_lock_irqsave(&modlist_lock, flags); + mod->next = module_list; module_list = mod; /* link it in */ + spin_unlock_irqrestore(&modlist_lock, flags); error = (long) mod; goto err0; @@ -628,6 +643,7 @@ sys_delete_module(const char *name_user) /* Do automatic reaping */ restart: something_changed = 0; + for (mod = module_list; mod != &kernel_module; mod = next) { next = mod->next; spin_lock(&unload_lock); @@ -651,10 +667,13 @@ restart: spin_unlock(&unload_lock); } } + if (something_changed) goto restart; + for (mod = module_list; mod != &kernel_module; mod = mod->next) mod->flags &= ~MOD_JUST_FREED; + error = 0; out: unlock_kernel(); @@ -1018,6 +1037,7 @@ free_module(struct module *mod, int tag_freed) { struct module_ref *dep; unsigned i; + unsigned long flags; /* Let the module clean up. */ @@ -1041,6 +1061,7 @@ free_module(struct module *mod, int tag_freed) /* And from the main module list. */ + spin_lock_irqsave(&modlist_lock, flags); if (mod == module_list) { module_list = mod->next; } else { @@ -1049,6 +1070,7 @@ free_module(struct module *mod, int tag_freed) continue; p->next = mod->next; } + spin_unlock_irqrestore(&modlist_lock, flags); /* And free the memory. */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 187ea87ea000..5484da89b261 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -75,6 +75,14 @@ extern int sem_ctls[]; extern char reboot_command []; extern int stop_a_enabled; #endif + +#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_IEEEFPU_EMULATION +extern int sysctl_ieee_emulation_warnings; +#endif +extern int sysctl_userprocess_debug; +#endif + #ifdef __powerpc__ extern unsigned long htab_reclaim_on, zero_paged_on, powersave_nap; int proc_dol2crvec(ctl_table *table, int write, struct file *filp, @@ -233,6 +241,14 @@ static ctl_table kern_table[] = { {KERN_OVERFLOWGID, "overflowgid", &overflowgid, sizeof(int), 0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, &minolduid, &maxolduid}, +#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_IEEEFPU_EMULATION + {KERN_IEEE_EMULATION_WARNINGS,"ieee_emulation_warnings", + &sysctl_ieee_emulation_warnings,sizeof(int),0644,NULL,&proc_dointvec}, +#endif + {KERN_S390_USER_DEBUG_LOGGING,"userprocess_debug", + &sysctl_userprocess_debug,sizeof(int),0644,NULL,&proc_dointvec}, +#endif {0} }; @@ -320,8 +336,6 @@ int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp, if (nlen <= 0 || nlen >= CTL_MAXNAME) return -ENOTDIR; - if ((int) newlen < 0) - return -EINVAL; if (oldval) { int old_len; if (!oldlenp || get_user(old_len, oldlenp)) @@ -425,7 +439,8 @@ int do_sysctl_strategy (ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen, void **context) { - int op = 0, rc, len; + int op = 0, rc; + size_t len; if (oldval) op |= 004; @@ -468,13 +483,82 @@ int do_sysctl_strategy (ctl_table *table, return 0; } +/** + * register_sysctl_table - register a sysctl heirarchy + * @table: the top-level table structure + * @insert_at_head: whether the entry should be inserted in front or at the end + * + * Register a sysctl table heirarchy. @table should be a filled in ctl_table + * array. An entry with a ctl_name of 0 terminates the table. + * + * The members of the &ctl_table structure are used as follows: + * + * ctl_name - This is the numeric sysctl value used by sysctl(2). The number + * must be unique within that level of sysctl + * + * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not + * enter a sysctl file + * + * data - a pointer to data for use by proc_handler + * + * maxlen - the maximum size in bytes of the data + * + * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * + * child - a pointer to the child sysctl table if this entry is a directory, or + * %NULL. + * + * proc_handler - the text handler routine (described below) + * + * strategy - the strategy routine (described below) + * + * de - for internal use by the sysctl routines + * + * extra1, extra2 - extra pointers usable by the proc handler routines + * + * Leaf nodes in the sysctl tree will be represented by a single file + * under /proc; non-leaf nodes will be represented by directories. + * + * sysctl(2) can automatically manage read and write requests through + * the sysctl table. The data and maxlen fields of the ctl_table + * struct enable minimal validation of the values being written to be + * performed, and the mode field allows minimal authentication. + * + * More sophisticated management can be enabled by the provision of a + * strategy routine with the table entry. This will be called before + * any automatic read or write of the data is performed. + * + * The strategy routine may return + * + * < 0 - Error occurred (error is passed to user process) + * + * 0 - OK - proceed with automatic read or write. + * + * > 0 - OK - read or write has been done by the strategy routine, so + * return immediately. + * + * There must be a proc_handler routine for any terminal nodes + * mirrored under /proc/sys (non-terminals are handled by a built-in + * directory handler). Several default handlers are available to + * cover common cases - + * + * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), + * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(), + * proc_doulongvec_minmax() + * + * It is the handler's job to read the input buffer from user memory + * and process it. The handler should return 0 on success. + * + * This routine returns %NULL on a failure to register, and a pointer + * to the table header on success. + */ struct ctl_table_header *register_sysctl_table(ctl_table * table, int insert_at_head) { struct ctl_table_header *tmp; tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); if (!tmp) - return 0; + return NULL; tmp->ctl_table = table; INIT_LIST_HEAD(&tmp->ctl_entry); if (insert_at_head) @@ -487,8 +571,12 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, return tmp; } -/* - * Unlink and free a ctl_table. +/** + * unregister_sysctl_table - unregister a sysctl table heirarchy + * @header: the header returned from register_sysctl_table + * + * Unregisters the sysctl table and all children. proc entries may not + * actually be removed until they are no longer used by anyone. */ void unregister_sysctl_table(struct ctl_table_header * header) { @@ -632,10 +720,27 @@ static int proc_sys_permission(struct inode *inode, int op) return test_perm(inode->i_mode, op); } +/** + * proc_dostring - read a string sysctl + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes a string from/to the user buffer. If the kernel + * buffer provided is not large enough to hold the string, the + * string is truncated. The copied string is %NULL-terminated. + * If the string is being read by the user process, it is copied + * and a newline '\n' is added. It is truncated if the buffer is + * not large enough. + * + * Returns 0 on success. + */ int proc_dostring(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { - int len; + size_t len; char *p, c; if (!table->data || !table->maxlen || !*lenp || @@ -711,7 +816,9 @@ static int proc_doutsstring(ctl_table *table, int write, struct file *filp, static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp, int conv, int op) { - int *i, vleft, first=1, len, left, neg, val; + int *i, vleft, first=1, neg, val; + size_t left, len; + #define TMPBUFLEN 20 char buf[TMPBUFLEN], *p; @@ -809,6 +916,19 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, return 0; } +/** + * proc_dointvec - read a vector of integers + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * + * Returns 0 on success. + */ int proc_dointvec(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { @@ -829,10 +949,27 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, (current->pid == 1) ? OP_SET : OP_AND); } +/** + * proc_dointvec_minmax - read a vector of integers with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success. + */ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { - int *i, *min, *max, vleft, first=1, len, left, neg, val; + int *i, *min, *max, vleft, first=1, neg, val; + size_t len, left; #define TMPBUFLEN 20 char buf[TMPBUFLEN], *p; @@ -927,10 +1064,6 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, return 0; } -/* - * an unsigned long function version - */ - static int do_proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp, @@ -939,7 +1072,8 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, { #define TMPBUFLEN 20 unsigned long *i, *min, *max, val; - int vleft, first=1, len, left, neg; + int vleft, first=1, neg; + size_t len, left; char buf[TMPBUFLEN], *p; if (!table->data || !table->maxlen || !*lenp || @@ -1036,12 +1170,45 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, #undef TMPBUFLEN } +/** + * proc_doulongvec_minmax - read a vector of long integers with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long + * values from/to the user buffer, treated as an ASCII string. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success. + */ int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, 1l, 1l); } +/** + * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long + * values from/to the user buffer, treated as an ASCII string. The values + * are treated as milliseconds, and converted to jiffies when they are stored. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success. + */ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) @@ -1051,7 +1218,21 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, } -/* Like proc_dointvec, but converts seconds to jiffies */ +/** + * proc_dointvec_jiffies - read a vector of integers as seconds + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in seconds, and are converted into + * jiffies. + * + * Returns 0 on success. + */ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { @@ -1122,7 +1303,7 @@ int sysctl_string(ctl_table *table, int *name, int nlen, void *oldval, size_t *oldlenp, void *newval, size_t newlen, void **context) { - int l, len; + size_t l, len; if (!table->data || !table->maxlen) return -ENOTDIR; @@ -1165,7 +1346,8 @@ int sysctl_intvec(ctl_table *table, int *name, int nlen, void *oldval, size_t *oldlenp, void *newval, size_t newlen, void **context) { - int i, length, *vec, *min, *max; + int i, *vec, *min, *max; + size_t length; if (newval && newlen) { if (newlen % sizeof(int) != 0) |
