diff options
Diffstat (limited to 'kernel/timer.c')
| -rw-r--r-- | kernel/timer.c | 445 |
1 files changed, 250 insertions, 195 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index 55c14c11c901..01ee9d3103b4 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -14,74 +14,21 @@ * Copyright (C) 1998 Andrea Arcangeli * 1999-03-10 Improved NTP compatibility by Ulrich Windl * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love + * 2000-10-05 Implemented scalable SMP per-CPU timer handling. + * Copyright (C) 2000, 2001, 2002 Ingo Molnar + * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar */ -#include <linux/config.h> -#include <linux/mm.h> -#include <linux/timex.h> -#include <linux/delay.h> -#include <linux/smp_lock.h> -#include <linux/interrupt.h> -#include <linux/tqueue.h> #include <linux/kernel_stat.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/mm.h> #include <asm/uaccess.h> -struct kernel_stat kstat; - -/* - * Timekeeping variables - */ - -unsigned long tick_usec = TICK_USEC; /* ACTHZ period (usec) */ -unsigned long tick_nsec = TICK_NSEC(TICK_USEC); /* USER_HZ period (nsec) */ - -/* The current time */ -struct timespec xtime __attribute__ ((aligned (16))); - -/* Don't completely fail for HZ > 500. */ -int tickadj = 500/HZ ? : 1; /* microsecs */ - -DECLARE_TASK_QUEUE(tq_timer); -DECLARE_TASK_QUEUE(tq_immediate); - /* - * phase-lock loop variables - */ -/* TIME_ERROR prevents overwriting the CMOS clock */ -int time_state = TIME_OK; /* clock synchronization status */ -int time_status = STA_UNSYNC; /* clock status bits */ -long time_offset; /* time adjustment (us) */ -long time_constant = 2; /* pll time constant */ -long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ -long time_precision = 1; /* clock precision (us) */ -long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ -long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ -long time_phase; /* phase offset (scaled us) */ -long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; - /* frequency offset (scaled ppm)*/ -long time_adj; /* tick adjust (scaled 1 / HZ) */ -long time_reftime; /* time at last adjustment (s) */ - -long time_adjust; - -unsigned long event; - -extern int do_setitimer(int, struct itimerval *, struct itimerval *); - -/* - * The 64-bit jiffies value is not atomic - you MUST NOT read it - * without holding read_lock_irq(&xtime_lock). - * jiffies is defined in the linker script... - */ - - -unsigned int * prof_buffer; -unsigned long prof_len; -unsigned long prof_shift; - -/* - * Event timer code + * per-CPU timer vector definitions: */ #define TVN_BITS 6 #define TVR_BITS 8 @@ -90,115 +37,88 @@ unsigned long prof_shift; #define TVN_MASK (TVN_SIZE - 1) #define TVR_MASK (TVR_SIZE - 1) -struct timer_vec { +typedef struct tvec_s { int index; struct list_head vec[TVN_SIZE]; -}; +} tvec_t; -struct timer_vec_root { +typedef struct tvec_root_s { int index; struct list_head vec[TVR_SIZE]; -}; +} tvec_root_t; -static struct timer_vec tv5; -static struct timer_vec tv4; -static struct timer_vec tv3; -static struct timer_vec tv2; -static struct timer_vec_root tv1; +struct tvec_t_base_s { + spinlock_t lock; + unsigned long timer_jiffies; + volatile timer_t * volatile running_timer; + tvec_root_t tv1; + tvec_t tv2; + tvec_t tv3; + tvec_t tv4; + tvec_t tv5; +} ____cacheline_aligned_in_smp; -static struct timer_vec * const tvecs[] = { - (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 -}; +typedef struct tvec_t_base_s tvec_base_t; -#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) +static tvec_base_t tvec_bases[NR_CPUS] __cacheline_aligned; -void init_timervecs (void) -{ - int i; +/* Fake initialization needed to avoid compiler breakage */ +static DEFINE_PER_CPU(struct tasklet_struct, timer_tasklet) = { NULL }; - for (i = 0; i < TVN_SIZE; i++) { - INIT_LIST_HEAD(tv5.vec + i); - INIT_LIST_HEAD(tv4.vec + i); - INIT_LIST_HEAD(tv3.vec + i); - INIT_LIST_HEAD(tv2.vec + i); - } - for (i = 0; i < TVR_SIZE; i++) - INIT_LIST_HEAD(tv1.vec + i); -} - -static unsigned long timer_jiffies; - -static inline void internal_add_timer(struct timer_list *timer) +static inline void internal_add_timer(tvec_base_t *base, timer_t *timer) { - /* - * must be cli-ed when calling this - */ unsigned long expires = timer->expires; - unsigned long idx = expires - timer_jiffies; + unsigned long idx = expires - base->timer_jiffies; struct list_head * vec; if (idx < TVR_SIZE) { int i = expires & TVR_MASK; - vec = tv1.vec + i; + vec = base->tv1.vec + i; } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { int i = (expires >> TVR_BITS) & TVN_MASK; - vec = tv2.vec + i; + vec = base->tv2.vec + i; } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; - vec = tv3.vec + i; + vec = base->tv3.vec + i; } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; - vec = tv4.vec + i; + vec = base->tv4.vec + i; } else if ((signed long) idx < 0) { - /* can happen if you add a timer with expires == jiffies, + /* + * Can happen if you add a timer with expires == jiffies, * or you set a timer to go off in the past */ - vec = tv1.vec + tv1.index; + vec = base->tv1.vec + base->tv1.index; } else if (idx <= 0xffffffffUL) { int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; - vec = tv5.vec + i; + vec = base->tv5.vec + i; } else { /* Can only get here on architectures with 64-bit jiffies */ INIT_LIST_HEAD(&timer->list); return; } /* - * Timers are FIFO! + * Timers are FIFO: */ - list_add(&timer->list, vec->prev); + list_add_tail(&timer->list, vec); } -/* Initialize both explicitly - let's try to have them in the same cache line */ -spinlock_t timerlist_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; - -#ifdef CONFIG_SMP -volatile struct timer_list * volatile running_timer; -#define timer_enter(t) do { running_timer = t; mb(); } while (0) -#define timer_exit() do { running_timer = NULL; } while (0) -#define timer_is_running(t) (running_timer == t) -#define timer_synchronize(t) while (timer_is_running(t)) barrier() -#else -#define timer_enter(t) do { } while (0) -#define timer_exit() do { } while (0) -#endif - -void add_timer(struct timer_list *timer) +void add_timer(timer_t *timer) { - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - if (unlikely(timer_pending(timer))) - goto bug; - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - return; -bug: - spin_unlock_irqrestore(&timerlist_lock, flags); - printk(KERN_ERR "BUG: kernel timer added twice at %p.\n", - __builtin_return_address(0)); + int cpu = get_cpu(); + tvec_base_t *base = tvec_bases + cpu; + unsigned long flags; + + BUG_ON(timer_pending(timer)); + + spin_lock_irqsave(&base->lock, flags); + internal_add_timer(base, timer); + timer->base = base; + spin_unlock_irqrestore(&base->lock, flags); + put_cpu(); } -static inline int detach_timer (struct timer_list *timer) +static inline int detach_timer (timer_t *timer) { if (!timer_pending(timer)) return 0; @@ -206,28 +126,78 @@ static inline int detach_timer (struct timer_list *timer) return 1; } -int mod_timer(struct timer_list *timer, unsigned long expires) +/* + * mod_timer() has subtle locking semantics because parallel + * calls to it must happen serialized. + */ +int mod_timer(timer_t *timer, unsigned long expires) { - int ret; + tvec_base_t *old_base, *new_base; unsigned long flags; + int ret; + + if (timer_pending(timer) && timer->expires == expires) + return 1; + + local_irq_save(flags); + new_base = tvec_bases + smp_processor_id(); +repeat: + old_base = timer->base; + + /* + * Prevent deadlocks via ordering by old_base < new_base. + */ + if (old_base && (new_base != old_base)) { + if (old_base < new_base) { + spin_lock(&new_base->lock); + spin_lock(&old_base->lock); + } else { + spin_lock(&old_base->lock); + spin_lock(&new_base->lock); + } + /* + * Subtle, we rely on timer->base being always + * valid and being updated atomically. + */ + if (timer->base != old_base) { + spin_unlock(&new_base->lock); + spin_unlock(&old_base->lock); + goto repeat; + } + } else + spin_lock(&new_base->lock); - spin_lock_irqsave(&timerlist_lock, flags); timer->expires = expires; ret = detach_timer(timer); - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); + internal_add_timer(new_base, timer); + timer->base = new_base; + + if (old_base && (new_base != old_base)) + spin_unlock(&old_base->lock); + spin_unlock_irqrestore(&new_base->lock, flags); + return ret; } -int del_timer(struct timer_list * timer) +int del_timer(timer_t * timer) { - int ret; unsigned long flags; + tvec_base_t * base; + int ret; - spin_lock_irqsave(&timerlist_lock, flags); + if (!timer->base) + return 0; +repeat: + base = timer->base; + spin_lock_irqsave(&base->lock, flags); + if (base != timer->base) { + spin_unlock_irqrestore(&base->lock, flags); + goto repeat; + } ret = detach_timer(timer); timer->list.next = timer->list.prev = NULL; - spin_unlock_irqrestore(&timerlist_lock, flags); + spin_unlock_irqrestore(&base->lock, flags); + return ret; } @@ -240,24 +210,33 @@ int del_timer(struct timer_list * timer) * (for reference counting). */ -int del_timer_sync(struct timer_list * timer) +int del_timer_sync(timer_t * timer) { + tvec_base_t * base; int ret = 0; + if (!timer->base) + return 0; for (;;) { unsigned long flags; int running; - spin_lock_irqsave(&timerlist_lock, flags); +repeat: + base = timer->base; + spin_lock_irqsave(&base->lock, flags); + if (base != timer->base) { + spin_unlock_irqrestore(&base->lock, flags); + goto repeat; + } ret += detach_timer(timer); timer->list.next = timer->list.prev = 0; - running = timer_is_running(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); + running = timer_is_running(base, timer); + spin_unlock_irqrestore(&base->lock, flags); if (!running) break; - timer_synchronize(timer); + timer_synchronize(base, timer); } return ret; @@ -265,7 +244,7 @@ int del_timer_sync(struct timer_list * timer) #endif -static inline void cascade_timers(struct timer_vec *tv) +static void cascade(tvec_base_t *base, tvec_t *tv) { /* cascade all the timers from tv up one level */ struct list_head *head, *curr, *next; @@ -277,67 +256,107 @@ static inline void cascade_timers(struct timer_vec *tv) * detach them individually, just clear the list afterwards. */ while (curr != head) { - struct timer_list *tmp; + timer_t *tmp; - tmp = list_entry(curr, struct timer_list, list); + tmp = list_entry(curr, timer_t, list); + if (tmp->base != base) + BUG(); next = curr->next; list_del(curr); // not needed - internal_add_timer(tmp); + internal_add_timer(base, tmp); curr = next; } INIT_LIST_HEAD(head); tv->index = (tv->index + 1) & TVN_MASK; } -static inline void run_timer_list(void) +static void __run_timers(tvec_base_t *base) { - spin_lock_irq(&timerlist_lock); - while ((long)(jiffies - timer_jiffies) >= 0) { + unsigned long flags; + + spin_lock_irqsave(&base->lock, flags); + while ((long)(jiffies - base->timer_jiffies) >= 0) { struct list_head *head, *curr; - if (!tv1.index) { - int n = 1; - do { - cascade_timers(tvecs[n]); - } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); + + /* + * Cascade timers: + */ + if (!base->tv1.index) { + cascade(base, &base->tv2); + if (base->tv2.index == 1) { + cascade(base, &base->tv3); + if (base->tv3.index == 1) { + cascade(base, &base->tv4); + if (base->tv4.index == 1) + cascade(base, &base->tv5); + } + } } repeat: - head = tv1.vec + tv1.index; + head = base->tv1.vec + base->tv1.index; curr = head->next; if (curr != head) { - struct timer_list *timer; void (*fn)(unsigned long); unsigned long data; + timer_t *timer; - timer = list_entry(curr, struct timer_list, list); + timer = list_entry(curr, timer_t, list); fn = timer->function; - data= timer->data; + data = timer->data; detach_timer(timer); timer->list.next = timer->list.prev = NULL; - timer_enter(timer); - spin_unlock_irq(&timerlist_lock); + timer_enter(base, timer); + spin_unlock_irq(&base->lock); fn(data); - spin_lock_irq(&timerlist_lock); - timer_exit(); + spin_lock_irq(&base->lock); + timer_exit(base); goto repeat; } - ++timer_jiffies; - tv1.index = (tv1.index + 1) & TVR_MASK; + ++base->timer_jiffies; + base->tv1.index = (base->tv1.index + 1) & TVR_MASK; } - spin_unlock_irq(&timerlist_lock); + spin_unlock_irqrestore(&base->lock, flags); } -spinlock_t tqueue_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; +/******************************************************************/ -void tqueue_bh(void) -{ - run_task_queue(&tq_timer); -} +/* + * Timekeeping variables + */ +unsigned long tick_usec = TICK_USEC; /* ACTHZ period (usec) */ +unsigned long tick_nsec = TICK_NSEC(TICK_USEC); /* USER_HZ period (nsec) */ -void immediate_bh(void) -{ - run_task_queue(&tq_immediate); -} +/* The current time */ +struct timespec xtime __attribute__ ((aligned (16))); + +/* Don't completely fail for HZ > 500. */ +int tickadj = 500/HZ ? : 1; /* microsecs */ + +struct kernel_stat kstat; + +/* + * phase-lock loop variables + */ +/* TIME_ERROR prevents overwriting the CMOS clock */ +int time_state = TIME_OK; /* clock synchronization status */ +int time_status = STA_UNSYNC; /* clock status bits */ +long time_offset; /* time adjustment (us) */ +long time_constant = 2; /* pll time constant */ +long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ +long time_precision = 1; /* clock precision (us) */ +long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ +long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ +long time_phase; /* phase offset (scaled us) */ +long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; + /* frequency offset (scaled ppm)*/ +long time_adj; /* tick adjust (scaled 1 / HZ) */ +long time_reftime; /* time at last adjustment (s) */ +long time_adjust; + +unsigned int * prof_buffer; +unsigned long prof_len; +unsigned long prof_shift; /* * this routine handles the overflow of the microsecond field @@ -638,17 +657,33 @@ unsigned long wall_jiffies; rwlock_t xtime_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED; unsigned long last_time_offset; +/* + * This function runs timers and the timer-tq in softirq context. + */ +static void run_timer_tasklet(unsigned long data) +{ + tvec_base_t *base = tvec_bases + smp_processor_id(); + + if ((long)(jiffies - base->timer_jiffies) >= 0) + __run_timers(base); +} + +/* + * Called by the local, per-CPU timer interrupt on SMP. + */ +void run_local_timers(void) +{ + tasklet_hi_schedule(&per_cpu(timer_tasklet, smp_processor_id())); +} + +/* + * Called by the timer interrupt. xtime_lock must already be taken + * by the timer IRQ! + */ static inline void update_times(void) { unsigned long ticks; - /* - * update_times() is run from the raw timer_bh handler so we - * just know that the irqs are locally enabled and so we don't - * need to save/restore the flags of the local CPU here. -arca - */ - write_lock_irq(&xtime_lock); - ticks = jiffies - wall_jiffies; if (ticks) { wall_jiffies += ticks; @@ -656,14 +691,13 @@ static inline void update_times(void) } last_time_offset = 0; calc_load(ticks); - write_unlock_irq(&xtime_lock); -} - -void timer_bh(void) -{ - update_times(); - run_timer_list(); } + +/* + * The 64-bit jiffies value is not atomic - you MUST NOT read it + * without holding read_lock_irq(&xtime_lock). + * jiffies is defined in the linker script... + */ void do_timer(struct pt_regs *regs) { @@ -673,13 +707,13 @@ void do_timer(struct pt_regs *regs) update_process_times(user_mode(regs)); #endif - mark_bh(TIMER_BH); - if (TQ_ACTIVE(tq_timer)) - mark_bh(TQUEUE_BH); + update_times(); } #if !defined(__alpha__) && !defined(__ia64__) +extern int do_setitimer(int, struct itimerval *, struct itimerval *); + /* * For backwards compatibility? This can be done in libc so Alpha * and all newer ports shouldn't need it. @@ -821,7 +855,7 @@ static void process_timeout(unsigned long __data) */ signed long schedule_timeout(signed long timeout) { - struct timer_list timer; + timer_t timer; unsigned long expire; switch (timeout) @@ -974,3 +1008,24 @@ out: return 0; } + +void __init init_timers(void) +{ + int i, j; + + for (i = 0; i < NR_CPUS; i++) { + tvec_base_t *base; + + base = tvec_bases + i; + spin_lock_init(&base->lock); + for (j = 0; j < TVN_SIZE; j++) { + INIT_LIST_HEAD(base->tv5.vec + j); + INIT_LIST_HEAD(base->tv4.vec + j); + INIT_LIST_HEAD(base->tv3.vec + j); + INIT_LIST_HEAD(base->tv2.vec + j); + } + for (j = 0; j < TVR_SIZE; j++) + INIT_LIST_HEAD(base->tv1.vec + j); + tasklet_init(&per_cpu(timer_tasklet, i), run_timer_tasklet, 0); + } +} |
