From 7d2f944a2b836c69a9d260a0a5f0d1720d57fdff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 Nov 2009 14:05:29 +0000 Subject: clocksource: Provide a generic mult/shift factor calculation MIPS has two functions to calculcate the mult/shift factors for clock sources and clock events at run time. ARM needs such functions as well. Implement a function which calculates the mult/shift factors based on the frequencies to which and from which is converted. The function also has a parameter to specify the minimum conversion range in seconds. This range is guaranteed not to produce a 64bit overflow when a value is multiplied with the calculated mult factor. The larger the conversion range the less becomes the conversion accuracy. Provide two inline wrappers which handle clock events and clock sources. For clock events the "from" frequency is nano seconds per second which corresponds to 1GHz and "to" is the device frequency. For clock sources "from" is the device frequency and "to" is nano seconds per second. Signed-off-by: Thomas Gleixner Tested-by: Mikael Pettersson Acked-by: Ralf Baechle Acked-by: Linus Walleij Cc: John Stultz LKML-Reference: <20091111134229.766673305@linutronix.de> --- include/linux/clocksource.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/clocksource.h') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 83d2fbd81b93..f57f88250526 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -279,6 +279,16 @@ extern void clocksource_resume(void); extern struct clocksource * __init __weak clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); +extern void +clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); + +static inline void +clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec) +{ + return clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, + NSEC_PER_SEC, minsec); +} + #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timespec *ts, struct clocksource *c); extern void update_vsyscall_tz(void); -- cgit v1.2.3 From 98962465ed9e6ea99c38e0af63fe1dcb5a79dc25 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 18 Aug 2009 12:45:10 -0500 Subject: nohz: Prevent clocksource wrapping during idle The dynamic tick allows the kernel to sleep for periods longer than a single tick, but it does not limit the sleep time currently. In the worst case the kernel could sleep longer than the wrap around time of the time keeping clock source which would result in losing track of time. Prevent this by limiting it to the safe maximum sleep time of the current time keeping clock source. The value is calculated when the clock source is registered. [ tglx: simplified the code a bit and massaged the commit msg ] Signed-off-by: Jon Hunter Cc: John Stultz LKML-Reference: <1250617512-23567-2-git-send-email-jon-hunter@ti.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 ++ include/linux/time.h | 1 + kernel/time/clocksource.c | 44 ++++++++++++++++++++++++++++++++++++++ kernel/time/tick-sched.c | 52 +++++++++++++++++++++++++++++++++------------ kernel/time/timekeeping.c | 11 ++++++++++ 5 files changed, 96 insertions(+), 14 deletions(-) (limited to 'include/linux/clocksource.h') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f57f88250526..279c5478e8a6 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -151,6 +151,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * subtraction of non 64 bit counters * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) + * @max_idle_ns: max idle time permitted by the clocksource (nsecs) * @flags: flags describing special properties * @vread: vsyscall based read * @resume: resume function for the clocksource, if necessary @@ -168,6 +169,7 @@ struct clocksource { cycle_t mask; u32 mult; u32 shift; + u64 max_idle_ns; unsigned long flags; cycle_t (*vread)(void); void (*resume)(void); diff --git a/include/linux/time.h b/include/linux/time.h index fe04e5ef6a59..6e026e45a179 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -148,6 +148,7 @@ extern void monotonic_to_bootbased(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); +extern u64 timekeeping_max_deferment(void); extern void update_wall_time(void); extern void update_xtime_cache(u64 nsec); extern void timekeeping_leap_insert(int leapsecond); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 407c0894ef37..b65b242f04dd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -468,6 +468,47 @@ void clocksource_touch_watchdog(void) #ifdef CONFIG_GENERIC_TIME +/** + * clocksource_max_deferment - Returns max time the clocksource can be deferred + * @cs: Pointer to clocksource + * + */ +static u64 clocksource_max_deferment(struct clocksource *cs) +{ + u64 max_nsecs, max_cycles; + + /* + * Calculate the maximum number of cycles that we can pass to the + * cyc2ns function without overflowing a 64-bit signed result. The + * maximum number of cycles is equal to ULLONG_MAX/cs->mult which + * is equivalent to the below. + * max_cycles < (2^63)/cs->mult + * max_cycles < 2^(log2((2^63)/cs->mult)) + * max_cycles < 2^(log2(2^63) - log2(cs->mult)) + * max_cycles < 2^(63 - log2(cs->mult)) + * max_cycles < 1 << (63 - log2(cs->mult)) + * Please note that we add 1 to the result of the log2 to account for + * any rounding errors, ensure the above inequality is satisfied and + * no overflow will occur. + */ + max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1)); + + /* + * The actual maximum number of cycles we can defer the clocksource is + * determined by the minimum of max_cycles and cs->mask. + */ + max_cycles = min_t(u64, max_cycles, (u64) cs->mask); + max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift); + + /* + * To ensure that the clocksource does not wrap whilst we are idle, + * limit the time the clocksource can be deferred by 12.5%. Please + * note a margin of 12.5% is used because this can be computed with + * a shift, versus say 10% which would require division. + */ + return max_nsecs - (max_nsecs >> 5); +} + /** * clocksource_select - Select the best clocksource available * @@ -564,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs) */ int clocksource_register(struct clocksource *cs) { + /* calculate max idle time permitted for this clocksource */ + cs->max_idle_ns = clocksource_max_deferment(cs); + mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); clocksource_select(); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c65ba0faa98f..a80b4644fe6b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -208,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle) struct tick_sched *ts; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; + u64 time_delta; int cpu; local_irq_save(flags); @@ -262,6 +263,17 @@ void tick_nohz_stop_sched_tick(int inidle) seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; + + /* + * On SMP we really should only care for the CPU which + * has the do_timer duty assigned. All other CPUs can + * sleep as long as they want. + */ + if (cpu == tick_do_timer_cpu || + tick_do_timer_cpu == TICK_DO_TIMER_NONE) + time_delta = timekeeping_max_deferment(); + else + time_delta = KTIME_MAX; } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || @@ -284,11 +296,26 @@ void tick_nohz_stop_sched_tick(int inidle) if ((long)delta_jiffies >= 1) { /* - * calculate the expiry time for the next timer wheel - * timer - */ - expires = ktime_add_ns(last_update, tick_period.tv64 * - delta_jiffies); + * calculate the expiry time for the next timer wheel + * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals + * that there is no timer pending or at least extremely + * far into the future (12 days for HZ=1000). In this + * case we set the expiry to the end of time. + */ + if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { + /* + * Calculate the time delta for the next timer event. + * If the time delta exceeds the maximum time delta + * permitted by the current clocksource then adjust + * the time delta accordingly to ensure the + * clocksource does not wrap. + */ + time_delta = min_t(u64, time_delta, + tick_period.tv64 * delta_jiffies); + expires = ktime_add_ns(last_update, time_delta); + } else { + expires.tv64 = KTIME_MAX; + } /* * If this cpu is the one which updates jiffies, then @@ -332,22 +359,19 @@ void tick_nohz_stop_sched_tick(int inidle) ts->idle_sleeps++; + /* Mark expires */ + ts->idle_expires = expires; + /* - * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that - * there is no timer pending or at least extremly far - * into the future (12 days for HZ=1000). In this case - * we simply stop the tick timer: + * If the expiration time == KTIME_MAX, then + * in this case we simply stop the tick timer. */ - if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { - ts->idle_expires.tv64 = KTIME_MAX; + if (unlikely(expires.tv64 == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); goto out; } - /* Mark expiries */ - ts->idle_expires = expires; - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 96b3f0dfa5dc..5d4d4239a0aa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -477,6 +477,17 @@ int timekeeping_valid_for_hres(void) return ret; } +/** + * timekeeping_max_deferment - Returns max time the clocksource can be deferred + * + * Caller must observe xtime_lock via read_seqbegin/read_seqretry to + * ensure that the clocksource does not change! + */ +u64 timekeeping_max_deferment(void) +{ + return timekeeper.clock->max_idle_ns; +} + /** * read_persistent_clock - Return time from the persistent clock. * -- cgit v1.2.3 From 0696b711e4be45fa104c12329f617beb29c03f78 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 17 Nov 2009 13:49:50 +0800 Subject: timekeeping: Fix clock_gettime vsyscall time warp Since commit 0a544198 "timekeeping: Move NTP adjusted clock multiplier to struct timekeeper" the clock multiplier of vsyscall is updated with the unmodified clock multiplier of the clock source and not with the NTP adjusted multiplier of the timekeeper. This causes user space observerable time warps: new CLOCK-warp maximum: 120 nsecs, 00000025c337c537 -> 00000025c337c4bf Add a new argument "mult" to update_vsyscall() and hand in the timekeeping internal NTP adjusted multiplier. Signed-off-by: Lin Ming Cc: "Zhang Yanmin" Cc: Martin Schwidefsky Cc: Benjamin Herrenschmidt Cc: Tony Luck LKML-Reference: <1258436990.17765.83.camel@minggr.sh.intel.com> Signed-off-by: Thomas Gleixner --- arch/ia64/kernel/time.c | 4 ++-- arch/powerpc/kernel/time.c | 5 +++-- arch/s390/kernel/time.c | 3 ++- arch/x86/kernel/vsyscall_64.c | 5 +++-- include/linux/clocksource.h | 6 ++++-- kernel/time/timekeeping.c | 6 +++--- 6 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux/clocksource.h') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 4990495d7531..a35c661e5e89 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -473,7 +473,7 @@ void update_vsyscall_tz(void) { } -void update_vsyscall(struct timespec *wall, struct clocksource *c) +void update_vsyscall(struct timespec *wall, struct clocksource *c, u32 mult) { unsigned long flags; @@ -481,7 +481,7 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c) /* copy fsyscall clock data */ fsyscall_gtod_data.clk_mask = c->mask; - fsyscall_gtod_data.clk_mult = c->mult; + fsyscall_gtod_data.clk_mult = mult; fsyscall_gtod_data.clk_shift = c->shift; fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio; fsyscall_gtod_data.clk_cycle_last = c->cycle_last; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a136a11c490d..39713312fbc7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -828,7 +828,8 @@ static cycle_t timebase_read(struct clocksource *cs) return (cycle_t)get_tb(); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { u64 t2x, stamp_xsec; @@ -841,7 +842,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) /* XXX this assumes clock->shift == 22 */ /* 4611686018 ~= 2^(20+64-22) / 1e9 */ - t2x = (u64) clock->mult * 4611686018ULL; + t2x = (u64) mult * 4611686018ULL; stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; do_div(stamp_xsec, 1000000000); stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 34162a0b2caa..68e1ecf5ebab 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -214,7 +214,8 @@ struct clocksource * __init clocksource_default_clock(void) return &clocksource_tod; } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { if (clock != &clocksource_tod) return; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8cb4974ff599..62f39d79b775 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -73,7 +73,8 @@ void update_vsyscall_tz(void) write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { unsigned long flags; @@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.clock.vread = clock->vread; vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; vsyscall_gtod_data.clock.mask = clock->mask; - vsyscall_gtod_data.clock.mult = clock->mult; + vsyscall_gtod_data.clock.mult = mult; vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 83d2fbd81b93..95e4995d9987 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -280,10 +280,12 @@ extern struct clocksource * __init __weak clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); #ifdef CONFIG_GENERIC_TIME_VSYSCALL -extern void update_vsyscall(struct timespec *ts, struct clocksource *c); +extern void +update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult); extern void update_vsyscall_tz(void); #else -static inline void update_vsyscall(struct timespec *ts, struct clocksource *c) +static inline void +update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult) { } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c3a4e2907eaa..2a6d3e3e2c3e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -177,7 +177,7 @@ void timekeeping_leap_insert(int leapsecond) { xtime.tv_sec += leapsecond; wall_to_monotonic.tv_sec -= leapsecond; - update_vsyscall(&xtime, timekeeper.clock); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); } #ifdef CONFIG_GENERIC_TIME @@ -337,7 +337,7 @@ int do_settimeofday(struct timespec *tv) timekeeper.ntp_error = 0; ntp_clear(); - update_vsyscall(&xtime, timekeeper.clock); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -811,7 +811,7 @@ void update_wall_time(void) update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ - update_vsyscall(&xtime, timekeeper.clock); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); } /** -- cgit v1.2.3