summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/module.c24
-rw-r--r--kernel/sysctl.c216
2 files changed, 222 insertions, 18 deletions
diff --git a/kernel/module.c b/kernel/module.c
index dd02b40cd891..36023bce008f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -23,6 +23,7 @@
* Fix sys_init_module race, Andrew Morton <andrewm@uow.edu.au> Oct 2000
* http://www.uwsg.iu.edu/hypermail/linux/kernel/0008.3/0379.html
* Replace xxx_module_symbol with inter_module_xxx. Keith Owens <kaos@ocs.com.au> Oct 2000
+ * Add a module list lock for kernel fault race fixing. Alan Cox <alan@redhat.com>
*
* This source is covered by the GNU GPL, the same as all kernel sources.
*/
@@ -65,6 +66,17 @@ static struct list_head ime_list = LIST_HEAD_INIT(ime_list);
static spinlock_t ime_lock = SPIN_LOCK_UNLOCKED;
static int kmalloc_failed;
+/*
+ * This lock prevents modifications that might race the kernel fault
+ * fixups. It does not prevent reader walks that the modules code
+ * does. The kernel lock does that.
+ *
+ * Since vmalloc fault fixups occur in any context this lock is taken
+ * irqsave at all times.
+ */
+
+spinlock_t modlist_lock = SPIN_LOCK_UNLOCKED;
+
/**
* inter_module_register - register a new set of inter module data.
* @im_name: an arbitrary string to identify the data, must be unique
@@ -283,6 +295,7 @@ sys_create_module(const char *name_user, size_t size)
char *name;
long namelen, error;
struct module *mod;
+ unsigned long flags;
if (!capable(CAP_SYS_MODULE))
return -EPERM;
@@ -306,14 +319,16 @@ sys_create_module(const char *name_user, size_t size)
memset(mod, 0, sizeof(*mod));
mod->size_of_struct = sizeof(*mod);
- mod->next = module_list;
mod->name = (char *)(mod + 1);
mod->size = size;
memcpy((char*)(mod+1), name, namelen+1);
put_mod_name(name);
+ spin_lock_irqsave(&modlist_lock, flags);
+ mod->next = module_list;
module_list = mod; /* link it in */
+ spin_unlock_irqrestore(&modlist_lock, flags);
error = (long) mod;
goto err0;
@@ -628,6 +643,7 @@ sys_delete_module(const char *name_user)
/* Do automatic reaping */
restart:
something_changed = 0;
+
for (mod = module_list; mod != &kernel_module; mod = next) {
next = mod->next;
spin_lock(&unload_lock);
@@ -651,10 +667,13 @@ restart:
spin_unlock(&unload_lock);
}
}
+
if (something_changed)
goto restart;
+
for (mod = module_list; mod != &kernel_module; mod = mod->next)
mod->flags &= ~MOD_JUST_FREED;
+
error = 0;
out:
unlock_kernel();
@@ -1018,6 +1037,7 @@ free_module(struct module *mod, int tag_freed)
{
struct module_ref *dep;
unsigned i;
+ unsigned long flags;
/* Let the module clean up. */
@@ -1041,6 +1061,7 @@ free_module(struct module *mod, int tag_freed)
/* And from the main module list. */
+ spin_lock_irqsave(&modlist_lock, flags);
if (mod == module_list) {
module_list = mod->next;
} else {
@@ -1049,6 +1070,7 @@ free_module(struct module *mod, int tag_freed)
continue;
p->next = mod->next;
}
+ spin_unlock_irqrestore(&modlist_lock, flags);
/* And free the memory. */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 187ea87ea000..5484da89b261 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -75,6 +75,14 @@ extern int sem_ctls[];
extern char reboot_command [];
extern int stop_a_enabled;
#endif
+
+#ifdef CONFIG_ARCH_S390
+#ifdef CONFIG_IEEEFPU_EMULATION
+extern int sysctl_ieee_emulation_warnings;
+#endif
+extern int sysctl_userprocess_debug;
+#endif
+
#ifdef __powerpc__
extern unsigned long htab_reclaim_on, zero_paged_on, powersave_nap;
int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
@@ -233,6 +241,14 @@ static ctl_table kern_table[] = {
{KERN_OVERFLOWGID, "overflowgid", &overflowgid, sizeof(int), 0644, NULL,
&proc_dointvec_minmax, &sysctl_intvec, NULL,
&minolduid, &maxolduid},
+#ifdef CONFIG_ARCH_S390
+#ifdef CONFIG_IEEEFPU_EMULATION
+ {KERN_IEEE_EMULATION_WARNINGS,"ieee_emulation_warnings",
+ &sysctl_ieee_emulation_warnings,sizeof(int),0644,NULL,&proc_dointvec},
+#endif
+ {KERN_S390_USER_DEBUG_LOGGING,"userprocess_debug",
+ &sysctl_userprocess_debug,sizeof(int),0644,NULL,&proc_dointvec},
+#endif
{0}
};
@@ -320,8 +336,6 @@ int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp,
if (nlen <= 0 || nlen >= CTL_MAXNAME)
return -ENOTDIR;
- if ((int) newlen < 0)
- return -EINVAL;
if (oldval) {
int old_len;
if (!oldlenp || get_user(old_len, oldlenp))
@@ -425,7 +439,8 @@ int do_sysctl_strategy (ctl_table *table,
void *oldval, size_t *oldlenp,
void *newval, size_t newlen, void **context)
{
- int op = 0, rc, len;
+ int op = 0, rc;
+ size_t len;
if (oldval)
op |= 004;
@@ -468,13 +483,82 @@ int do_sysctl_strategy (ctl_table *table,
return 0;
}
+/**
+ * register_sysctl_table - register a sysctl heirarchy
+ * @table: the top-level table structure
+ * @insert_at_head: whether the entry should be inserted in front or at the end
+ *
+ * Register a sysctl table heirarchy. @table should be a filled in ctl_table
+ * array. An entry with a ctl_name of 0 terminates the table.
+ *
+ * The members of the &ctl_table structure are used as follows:
+ *
+ * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
+ * must be unique within that level of sysctl
+ *
+ * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
+ * enter a sysctl file
+ *
+ * data - a pointer to data for use by proc_handler
+ *
+ * maxlen - the maximum size in bytes of the data
+ *
+ * mode - the file permissions for the /proc/sys file, and for sysctl(2)
+ *
+ * child - a pointer to the child sysctl table if this entry is a directory, or
+ * %NULL.
+ *
+ * proc_handler - the text handler routine (described below)
+ *
+ * strategy - the strategy routine (described below)
+ *
+ * de - for internal use by the sysctl routines
+ *
+ * extra1, extra2 - extra pointers usable by the proc handler routines
+ *
+ * Leaf nodes in the sysctl tree will be represented by a single file
+ * under /proc; non-leaf nodes will be represented by directories.
+ *
+ * sysctl(2) can automatically manage read and write requests through
+ * the sysctl table. The data and maxlen fields of the ctl_table
+ * struct enable minimal validation of the values being written to be
+ * performed, and the mode field allows minimal authentication.
+ *
+ * More sophisticated management can be enabled by the provision of a
+ * strategy routine with the table entry. This will be called before
+ * any automatic read or write of the data is performed.
+ *
+ * The strategy routine may return
+ *
+ * < 0 - Error occurred (error is passed to user process)
+ *
+ * 0 - OK - proceed with automatic read or write.
+ *
+ * > 0 - OK - read or write has been done by the strategy routine, so
+ * return immediately.
+ *
+ * There must be a proc_handler routine for any terminal nodes
+ * mirrored under /proc/sys (non-terminals are handled by a built-in
+ * directory handler). Several default handlers are available to
+ * cover common cases -
+ *
+ * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
+ * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(),
+ * proc_doulongvec_minmax()
+ *
+ * It is the handler's job to read the input buffer from user memory
+ * and process it. The handler should return 0 on success.
+ *
+ * This routine returns %NULL on a failure to register, and a pointer
+ * to the table header on success.
+ */
struct ctl_table_header *register_sysctl_table(ctl_table * table,
int insert_at_head)
{
struct ctl_table_header *tmp;
tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
if (!tmp)
- return 0;
+ return NULL;
tmp->ctl_table = table;
INIT_LIST_HEAD(&tmp->ctl_entry);
if (insert_at_head)
@@ -487,8 +571,12 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table,
return tmp;
}
-/*
- * Unlink and free a ctl_table.
+/**
+ * unregister_sysctl_table - unregister a sysctl table heirarchy
+ * @header: the header returned from register_sysctl_table
+ *
+ * Unregisters the sysctl table and all children. proc entries may not
+ * actually be removed until they are no longer used by anyone.
*/
void unregister_sysctl_table(struct ctl_table_header * header)
{
@@ -632,10 +720,27 @@ static int proc_sys_permission(struct inode *inode, int op)
return test_perm(inode->i_mode, op);
}
+/**
+ * proc_dostring - read a string sysctl
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes a string from/to the user buffer. If the kernel
+ * buffer provided is not large enough to hold the string, the
+ * string is truncated. The copied string is %NULL-terminated.
+ * If the string is being read by the user process, it is copied
+ * and a newline '\n' is added. It is truncated if the buffer is
+ * not large enough.
+ *
+ * Returns 0 on success.
+ */
int proc_dostring(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
- int len;
+ size_t len;
char *p, c;
if (!table->data || !table->maxlen || !*lenp ||
@@ -711,7 +816,9 @@ static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp, int conv, int op)
{
- int *i, vleft, first=1, len, left, neg, val;
+ int *i, vleft, first=1, neg, val;
+ size_t left, len;
+
#define TMPBUFLEN 20
char buf[TMPBUFLEN], *p;
@@ -809,6 +916,19 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
return 0;
}
+/**
+ * proc_dointvec - read a vector of integers
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
int proc_dointvec(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
@@ -829,10 +949,27 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
(current->pid == 1) ? OP_SET : OP_AND);
}
+/**
+ * proc_dointvec_minmax - read a vector of integers with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max).
+ *
+ * Returns 0 on success.
+ */
int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
- int *i, *min, *max, vleft, first=1, len, left, neg, val;
+ int *i, *min, *max, vleft, first=1, neg, val;
+ size_t len, left;
#define TMPBUFLEN 20
char buf[TMPBUFLEN], *p;
@@ -927,10 +1064,6 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
return 0;
}
-/*
- * an unsigned long function version
- */
-
static int do_proc_doulongvec_minmax(ctl_table *table, int write,
struct file *filp,
void *buffer, size_t *lenp,
@@ -939,7 +1072,8 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write,
{
#define TMPBUFLEN 20
unsigned long *i, *min, *max, val;
- int vleft, first=1, len, left, neg;
+ int vleft, first=1, neg;
+ size_t len, left;
char buf[TMPBUFLEN], *p;
if (!table->data || !table->maxlen || !*lenp ||
@@ -1036,12 +1170,45 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write,
#undef TMPBUFLEN
}
+/**
+ * proc_doulongvec_minmax - read a vector of long integers with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max).
+ *
+ * Returns 0 on success.
+ */
int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, 1l, 1l);
}
+/**
+ * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
+ * values from/to the user buffer, treated as an ASCII string. The values
+ * are treated as milliseconds, and converted to jiffies when they are stored.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max).
+ *
+ * Returns 0 on success.
+ */
int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
struct file *filp,
void *buffer, size_t *lenp)
@@ -1051,7 +1218,21 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
}
-/* Like proc_dointvec, but converts seconds to jiffies */
+/**
+ * proc_dointvec_jiffies - read a vector of integers as seconds
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ * The values read are assumed to be in seconds, and are converted into
+ * jiffies.
+ *
+ * Returns 0 on success.
+ */
int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
@@ -1122,7 +1303,7 @@ int sysctl_string(ctl_table *table, int *name, int nlen,
void *oldval, size_t *oldlenp,
void *newval, size_t newlen, void **context)
{
- int l, len;
+ size_t l, len;
if (!table->data || !table->maxlen)
return -ENOTDIR;
@@ -1165,7 +1346,8 @@ int sysctl_intvec(ctl_table *table, int *name, int nlen,
void *oldval, size_t *oldlenp,
void *newval, size_t newlen, void **context)
{
- int i, length, *vec, *min, *max;
+ int i, *vec, *min, *max;
+ size_t length;
if (newval && newlen) {
if (newlen % sizeof(int) != 0)