diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/ksyms.c | 3 | ||||
| -rw-r--r-- | kernel/posix-timers.c | 132 | ||||
| -rw-r--r-- | kernel/printk.c | 7 | ||||
| -rw-r--r-- | kernel/sched.c | 31 | ||||
| -rw-r--r-- | kernel/signal.c | 62 | ||||
| -rw-r--r-- | kernel/sys.c | 21 | ||||
| -rw-r--r-- | kernel/sysctl.c | 5 | ||||
| -rw-r--r-- | kernel/timer.c | 67 |
8 files changed, 182 insertions, 146 deletions
diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 3e9ffd1891ef..fc6ff398cbc4 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -177,7 +177,8 @@ EXPORT_SYMBOL(mark_buffer_dirty); EXPORT_SYMBOL(end_buffer_io_sync); EXPORT_SYMBOL(__mark_inode_dirty); EXPORT_SYMBOL(get_empty_filp); -EXPORT_SYMBOL(init_private_file); +EXPORT_SYMBOL(open_private_file); +EXPORT_SYMBOL(close_private_file); EXPORT_SYMBOL(filp_open); EXPORT_SYMBOL(filp_close); EXPORT_SYMBOL(put_filp); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 19e2aa5dcb41..3780d17e49b0 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -9,7 +9,6 @@ /* These are all the functions necessary to implement * POSIX clocks & timers */ - #include <linux/mm.h> #include <linux/smp_lock.h> #include <linux/interrupt.h> @@ -23,6 +22,7 @@ #include <linux/compiler.h> #include <linux/idr.h> #include <linux/posix-timers.h> +#include <linux/wait.h> #ifndef div_long_long_rem #include <asm/div64.h> @@ -56,8 +56,8 @@ * Lets keep our timers in a slab cache :-) */ static kmem_cache_t *posix_timers_cache; -struct idr posix_timers_id; -spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; +static struct idr posix_timers_id; +static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; /* * Just because the timer is not in the timer list does NOT mean it is @@ -130,7 +130,7 @@ spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; * which we beg off on and pass to do_sys_settimeofday(). */ -struct k_clock posix_clocks[MAX_CLOCKS]; +static struct k_clock posix_clocks[MAX_CLOCKS]; #define if_clock_do(clock_fun, alt_fun,parms) (! clock_fun)? alt_fun parms :\ clock_fun parms @@ -183,7 +183,7 @@ init_posix_timers(void) __initcall(init_posix_timers); static inline int -tstojiffie(struct timespec *tp, int res, unsigned long *jiff) +tstojiffie(struct timespec *tp, int res, u64 *jiff) { unsigned long sec = tp->tv_sec; long nsec = tp->tv_nsec + res - 1; @@ -203,7 +203,7 @@ tstojiffie(struct timespec *tp, int res, unsigned long *jiff) * below. Here it is enough to just discard the high order * bits. */ - *jiff = HZ * sec; + *jiff = (u64)sec * HZ; /* * Do the res thing. (Don't forget the add in the declaration of nsec) */ @@ -221,9 +221,12 @@ tstojiffie(struct timespec *tp, int res, unsigned long *jiff) static void tstotimer(struct itimerspec *time, struct k_itimer *timer) { + u64 result; int res = posix_clocks[timer->it_clock].res; - tstojiffie(&time->it_value, res, &timer->it_timer.expires); - tstojiffie(&time->it_interval, res, &timer->it_incr); + tstojiffie(&time->it_value, res, &result); + timer->it_timer.expires = (unsigned long)result; + tstojiffie(&time->it_interval, res, &result); + timer->it_incr = (unsigned long)result; } static void @@ -1020,6 +1023,9 @@ do_posix_gettime(struct k_clock *clock, struct timespec *tp) * Note also that the while loop assures that the sub_jiff_offset * will be less than a jiffie, thus no need to normalize the result. * Well, not really, if called with ints off :( + + * HELP, this code should make an attempt at resolution beyond the + * jiffie. Trouble is this is "arch" dependent... */ int @@ -1127,26 +1133,14 @@ nanosleep_wake_up(unsigned long __data) * holds (or has held for it) a write_lock_irq( xtime_lock) and is * called from the timer bh code. Thus we need the irq save locks. */ -spinlock_t nanosleep_abs_list_lock = SPIN_LOCK_UNLOCKED; -struct list_head nanosleep_abs_list = LIST_HEAD_INIT(nanosleep_abs_list); +static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue); -struct abs_struct { - struct list_head list; - struct task_struct *t; -}; void clock_was_set(void) { - struct list_head *pos; - unsigned long flags; - - spin_lock_irqsave(&nanosleep_abs_list_lock, flags); - list_for_each(pos, &nanosleep_abs_list) { - wake_up_process(list_entry(pos, struct abs_struct, list)->t); - } - spin_unlock_irqrestore(&nanosleep_abs_list_lock, flags); + wake_up_all(&nanosleep_abs_wqueue); } long clock_nanosleep_restart(struct restart_block *restart_block); @@ -1201,19 +1195,19 @@ sys_clock_nanosleep(clockid_t which_clock, int flags, return ret; } - long do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave) { struct timespec t; struct timer_list new_timer; - struct abs_struct abs_struct = { .list = { .next = 0 } }; + DECLARE_WAITQUEUE(abs_wqueue, current); + u64 rq_time = 0; + s64 left; int abs; - int rtn = 0; - int active; struct restart_block *restart_block = ¤t_thread_info()->restart_block; + abs_wqueue.flags = 0; init_timer(&new_timer); new_timer.expires = 0; new_timer.data = (unsigned long) current; @@ -1226,54 +1220,50 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave) * time and continue. */ restart_block->fn = do_no_restart_syscall; - if (!restart_block->arg2) - return -EINTR; - new_timer.expires = restart_block->arg2; - if (time_before(new_timer.expires, jiffies)) + rq_time = restart_block->arg3; + rq_time = (rq_time << 32) + restart_block->arg2; + if (!rq_time) + return -EINTR; + if (rq_time <= get_jiffies_64()) return 0; } if (abs && (posix_clocks[which_clock].clock_get != posix_clocks[CLOCK_MONOTONIC].clock_get)) { - spin_lock_irq(&nanosleep_abs_list_lock); - list_add(&abs_struct.list, &nanosleep_abs_list); - abs_struct.t = current; - spin_unlock_irq(&nanosleep_abs_list_lock); + add_wait_queue(&nanosleep_abs_wqueue, &abs_wqueue); } do { t = *tsave; - if ((abs || !new_timer.expires) && - !(rtn = adjust_abs_time(&posix_clocks[which_clock], - &t, abs))) { - /* - * On error, we don't set up the timer so - * we don't arm the timer so - * del_timer_sync() will return 0, thus - * active is zero... and so it goes. - */ + if (abs || !rq_time){ + adjust_abs_time(&posix_clocks[which_clock], &t, abs); - tstojiffie(&t, - posix_clocks[which_clock].res, - &new_timer.expires); + tstojiffie(&t, posix_clocks[which_clock].res, &rq_time); } - if (new_timer.expires) { - current->state = TASK_INTERRUPTIBLE; - add_timer(&new_timer); - - schedule(); +#if (BITS_PER_LONG < 64) + if ((rq_time - get_jiffies_64()) > MAX_JIFFY_OFFSET){ + new_timer.expires = MAX_JIFFY_OFFSET; + }else +#endif + { + new_timer.expires = (long)rq_time; } - } - while ((active = del_timer_sync(&new_timer)) && - !test_thread_flag(TIF_SIGPENDING)); + current->state = TASK_INTERRUPTIBLE; + add_timer(&new_timer); + + schedule(); - if (abs_struct.list.next) { - spin_lock_irq(&nanosleep_abs_list_lock); - list_del(&abs_struct.list); - spin_unlock_irq(&nanosleep_abs_list_lock); + del_timer_sync(&new_timer); + left = rq_time - get_jiffies_64(); } - if (active) { - long jiffies_left; + while ( (left > 0) && + !test_thread_flag(TIF_SIGPENDING)); + + if( abs_wqueue.task_list.next) + finish_wait(&nanosleep_abs_wqueue, &abs_wqueue); + + if (left > 0) { + unsigned long rmd; /* * Always restart abs calls from scratch to pick up any @@ -1282,29 +1272,19 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave) if (abs) return -ERESTARTNOHAND; - jiffies_left = new_timer.expires - jiffies; - - if (jiffies_left < 0) - return 0; - - jiffies_to_timespec(jiffies_left, tsave); + tsave->tv_sec = div_long_long_rem(left, HZ, &rmd); + tsave->tv_nsec = rmd * (NSEC_PER_SEC / HZ); - while (tsave->tv_nsec < 0) { - tsave->tv_nsec += NSEC_PER_SEC; - tsave->tv_sec--; - } - if (tsave->tv_sec < 0) { - tsave->tv_sec = 0; - tsave->tv_nsec = 1; - } restart_block->fn = clock_nanosleep_restart; restart_block->arg0 = which_clock; restart_block->arg1 = (unsigned long)tsave; - restart_block->arg2 = new_timer.expires; + restart_block->arg2 = rq_time & 0xffffffffLL; + restart_block->arg3 = rq_time >> 32; + return -ERESTART_RESTARTBLOCK; } - return rtn; + return 0; } /* * This will restart either clock_nanosleep or clock_nanosleep diff --git a/kernel/printk.c b/kernel/printk.c index 9f2eb4b45669..853ac68708ae 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -28,6 +28,7 @@ #include <linux/config.h> #include <linux/delay.h> #include <linux/smp.h> +#include <linux/security.h> #include <asm/uaccess.h> @@ -161,6 +162,10 @@ int do_syslog(int type, char * buf, int len) char c; int error = 0; + error = security_syslog(type); + if (error) + return error; + switch (type) { case 0: /* Close log */ break; @@ -273,8 +278,6 @@ out: asmlinkage long sys_syslog(int type, char * buf, int len) { - if ((type != 3) && !capable(CAP_SYS_ADMIN)) - return -EPERM; return do_syslog(type, buf, len); } diff --git a/kernel/sched.c b/kernel/sched.c index a399056e6ac2..caeca9ec9c21 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -342,10 +342,10 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) */ static inline int activate_task(task_t *p, runqueue_t *rq) { - unsigned long sleep_time = jiffies - p->last_run; + long sleep_time = jiffies - p->last_run - 1; int requeue_waker = 0; - if (sleep_time) { + if (sleep_time > 0) { int sleep_avg; /* @@ -846,7 +846,7 @@ void sched_balance_exec(void) } /* - * Find the busiest node. All previous node loads contribute with a + * Find the busiest node. All previous node loads contribute with a * geometrically deccaying weight to the load measure: * load_{t} = load_{t-1}/2 + nr_node_running_{t} * This way sudden load peaks are flattened out a bit. @@ -854,7 +854,7 @@ void sched_balance_exec(void) static int find_busiest_node(int this_node) { int i, node = -1, load, this_load, maxload; - + this_load = maxload = (this_rq()->prev_node_load[this_node] >> 1) + atomic_read(&node_nr_running[this_node]); this_rq()->prev_node_load[this_node] = this_load; @@ -1194,8 +1194,8 @@ void scheduler_tick(int user_ticks, int sys_ticks) runqueue_t *rq = this_rq(); task_t *p = current; - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, user_ticks); + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_ticks); if (p == rq->idle) { /* note: this timer irq context must be accounted for as well */ @@ -1353,7 +1353,7 @@ switch_tasks: if (likely(prev != next)) { rq->nr_switches++; rq->curr = next; - + prepare_arch_switch(rq, next); prev = context_switch(rq, prev, next); barrier(); @@ -1483,7 +1483,7 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) } #endif - + void complete(struct completion *x) { unsigned long flags; @@ -1567,7 +1567,7 @@ long interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) void sleep_on(wait_queue_head_t *q) { SLEEP_ON_VAR - + current->state = TASK_UNINTERRUPTIBLE; SLEEP_ON_HEAD @@ -1578,7 +1578,7 @@ void sleep_on(wait_queue_head_t *q) long sleep_on_timeout(wait_queue_head_t *q, long timeout) { SLEEP_ON_VAR - + current->state = TASK_UNINTERRUPTIBLE; SLEEP_ON_HEAD @@ -2472,12 +2472,12 @@ spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; static void kstat_init_cpu(int cpu) { - /* Add any initialisation to kstat here */ - /* Useful when cpu offlining logic is added.. */ + /* Add any initialisation to kstat here */ + /* Useful when cpu offlining logic is added.. */ } static int __devinit kstat_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) + unsigned long action, void *hcpu) { int cpu = (unsigned long)hcpu; switch(action) { @@ -2489,7 +2489,7 @@ static int __devinit kstat_cpu_notify(struct notifier_block *self, } return NOTIFY_OK; } - + static struct notifier_block __devinitdata kstat_nb = { .notifier_call = kstat_cpu_notify, .next = NULL, @@ -2498,7 +2498,7 @@ static struct notifier_block __devinitdata kstat_nb = { __init static void init_kstat(void) { kstat_cpu_notify(&kstat_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); - register_cpu_notifier(&kstat_nb); + register_cpu_notifier(&kstat_nb); } void __init sched_init(void) @@ -2538,7 +2538,6 @@ void __init sched_init(void) rq->idle = current; set_task_cpu(current, smp_processor_id()); wake_up_forked_process(current); - current->prio = MAX_PRIO; init_timers(); diff --git a/kernel/signal.c b/kernel/signal.c index 49e483f8451e..7f630c0261e0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1460,6 +1460,45 @@ do_signal_stop(int signr) #ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER +/* + * Do appropriate magic when group_stop_count > 0. + * We return nonzero if we stopped, after releasing the siglock. + * We return zero if we still hold the siglock and should look + * for another signal without checking group_stop_count again. + */ +static inline int handle_group_stop(void) +{ + int stop_count; + + if (current->signal->group_exit_task == current) { + /* + * Group stop is so we can do a core dump, + * We are the initiating thread, so get on with it. + */ + current->signal->group_exit_task = NULL; + return 0; + } + + if (current->signal->group_exit) + /* + * Group stop is so another thread can do a core dump, + * or else we are racing against a death signal. + * Just punt the stop so we can get the next signal. + */ + return 0; + + /* + * There is a group stop in progress. We stop + * without any associated signal being in our queue. + */ + stop_count = --current->signal->group_stop_count; + current->exit_code = current->signal->group_exit_code; + set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); + finish_stop(stop_count); + return 1; +} + int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs, void *cookie) { sigset_t *mask = ¤t->blocked; @@ -1469,28 +1508,9 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs, void *cookie) struct k_sigaction *ka; spin_lock_irq(¤t->sighand->siglock); - if (unlikely(current->signal->group_stop_count > 0)) { - int stop_count; - if (current->signal->group_exit_task == current) { - /* - * Group stop is so we can do a core dump. - */ - current->signal->group_exit_task = NULL; - goto dequeue; - } - /* - * There is a group stop in progress. We stop - * without any associated signal being in our queue. - */ - stop_count = --current->signal->group_stop_count; - signr = current->signal->group_exit_code; - current->exit_code = signr; - set_current_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); - finish_stop(stop_count); + if (unlikely(current->signal->group_stop_count > 0) && + handle_group_stop()) continue; - } - dequeue: signr = dequeue_signal(current, mask, info); spin_unlock_irq(¤t->sighand->siglock); diff --git a/kernel/sys.c b/kernel/sys.c index 8e3fb524d641..21c75eaf033e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -212,18 +212,25 @@ cond_syscall(sys_delete_module) static int set_one_prio(struct task_struct *p, int niceval, int error) { + int no_nice; + if (p->uid != current->euid && p->uid != current->uid && !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } - + if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) { + error = -EACCES; + goto out; + } + no_nice = security_task_setnice(p, niceval); + if (no_nice) { + error = no_nice; + goto out; + } if (error == -ESRCH) error = 0; - if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) - error = -EACCES; - else - set_user_nice(p, niceval); + set_user_nice(p, niceval); out: return error; } @@ -941,6 +948,10 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) } ok_pgid: + err = security_task_setpgid(p, pgid); + if (err) + goto out; + if (p->pgrp != pgid) { detach_pid(p, PIDTYPE_PGID); p->pgrp = pgid; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c3c96cd208d4..0364833761c4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -33,6 +33,7 @@ #include <linux/highuid.h> #include <linux/writeback.h> #include <linux/hugetlb.h> +#include <linux/security.h> #include <asm/uaccess.h> #ifdef CONFIG_ROOT_NFS @@ -432,6 +433,10 @@ static int test_perm(int mode, int op) static inline int ctl_perm(ctl_table *table, int op) { + int error; + error = security_sysctl(table, op); + if (error) + return error; return test_perm(table->mode, op); } diff --git a/kernel/timer.c b/kernel/timer.c index 6e7e23cb95ba..d3983cbfa8d7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -53,11 +53,11 @@ typedef struct tvec_root_s { struct list_head vec[TVR_SIZE]; } tvec_root_t; - struct tvec_t_base_s { spinlock_t lock; unsigned long timer_jiffies; struct timer_list *running_timer; + struct list_head *run_timer_list_running; tvec_root_t tv1; tvec_t tv2; tvec_t tv3; @@ -67,6 +67,14 @@ struct tvec_t_base_s { typedef struct tvec_t_base_s tvec_base_t; +static inline void set_running_timer(tvec_base_t *base, + struct timer_list *timer) +{ +#ifdef CONFIG_SMP + base->running_timer = timer; +#endif +} + /* Fake initialization */ static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED }; @@ -94,13 +102,22 @@ static inline void check_timer(struct timer_list *timer) check_timer_failed(timer); } -static inline void internal_add_timer(tvec_base_t *base, struct timer_list *timer) +/* + * If a timer handler re-adds the timer with expires == jiffies, the timer + * running code can lock up. So here we detect that situation and park the + * timer onto base->run_timer_list_running. It will be added to the main timer + * structures later, by __run_timers(). + */ + +static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) { unsigned long expires = timer->expires; unsigned long idx = expires - base->timer_jiffies; struct list_head *vec; - if (idx < TVR_SIZE) { + if (base->run_timer_list_running) { + vec = base->run_timer_list_running; + } else if (idx < TVR_SIZE) { int i = expires & TVR_MASK; vec = base->tv1.vec + i; } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { @@ -354,7 +371,7 @@ del_again: static int cascade(tvec_base_t *base, tvec_t *tv) { /* cascade all the timers from tv up one level */ - struct list_head *head, *curr, *next; + struct list_head *head, *curr; head = tv->vec + tv->index; curr = head->next; @@ -366,11 +383,9 @@ static int cascade(tvec_base_t *base, tvec_t *tv) struct timer_list *tmp; tmp = list_entry(curr, struct timer_list, entry); - if (tmp->base != base) - BUG(); - next = curr->next; + BUG_ON(tmp->base != base); + curr = curr->next; internal_add_timer(base, tmp); - curr = next; } INIT_LIST_HEAD(head); @@ -386,9 +401,12 @@ static int cascade(tvec_base_t *base, tvec_t *tv) */ static inline void __run_timers(tvec_base_t *base) { + struct timer_list *timer; + spin_lock_irq(&base->lock); - while ((long)(jiffies - base->timer_jiffies) >= 0) { - struct list_head *head, *curr; + while (time_after_eq(jiffies, base->timer_jiffies)) { + LIST_HEAD(deferred_timers); + struct list_head *head; /* * Cascade timers: @@ -398,37 +416,36 @@ static inline void __run_timers(tvec_base_t *base) (cascade(base, &base->tv3) == 1) && cascade(base, &base->tv4) == 1) cascade(base, &base->tv5); + base->run_timer_list_running = &deferred_timers; repeat: head = base->tv1.vec + base->tv1.index; - curr = head->next; - if (curr != head) { + if (!list_empty(head)) { void (*fn)(unsigned long); unsigned long data; - struct timer_list *timer; - timer = list_entry(curr, struct timer_list, entry); + timer = list_entry(head->next,struct timer_list,entry); fn = timer->function; data = timer->data; list_del(&timer->entry); timer->base = NULL; -#if CONFIG_SMP - base->running_timer = timer; -#endif + set_running_timer(base, timer); spin_unlock_irq(&base->lock); - if (!fn) - printk("Bad: timer %p has NULL fn. (data: %08lx)\n", timer, data); - else - fn(data); + fn(data); spin_lock_irq(&base->lock); goto repeat; } + base->run_timer_list_running = NULL; ++base->timer_jiffies; base->tv1.index = (base->tv1.index + 1) & TVR_MASK; + while (!list_empty(&deferred_timers)) { + timer = list_entry(deferred_timers.prev, + struct timer_list, entry); + list_del(&timer->entry); + internal_add_timer(base, timer); + } } -#if CONFIG_SMP - base->running_timer = NULL; -#endif + set_running_timer(base, NULL); spin_unlock_irq(&base->lock); } @@ -775,7 +792,7 @@ static void run_timer_softirq(struct softirq_action *h) { tvec_base_t *base = &per_cpu(tvec_bases, smp_processor_id()); - if ((long)(jiffies - base->timer_jiffies) >= 0) + if (time_after_eq(jiffies, base->timer_jiffies)) __run_timers(base); } |
