From 29f1b2b0fecfae69e31833836f1da3136696eee5 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 28 Dec 2017 22:11:36 -0500 Subject: posix-timers: Prevent UB from shifting negative signed value Shifting a negative signed number is undefined behavior. Looking at the macros MAKE_PROCESS_CPUCLOCK and FD_TO_CLOCKID, it seems that the subexpression: (~(clockid_t) (pid) << 3) where clockid_t resolves to a signed int, which once negated, is undefined behavior to shift the value of if the results thus far are negative. It was further suggested to make these macros into inline functions. Suggested-by: Thomas Gleixner Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Cc: Dimitri Sivanich Cc: Frederic Weisbecker Cc: Al Viro Cc: linux-kselftest@vger.kernel.org Cc: Shuah Khan Cc: Deepa Dinamani Link: https://lkml.kernel.org/r/1514517100-18051-1-git-send-email-nick.desaulniers@gmail.com --- include/linux/posix-timers.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 672c4f32311e..c85704fcdbd2 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -42,13 +42,26 @@ struct cpu_timer_list { #define CLOCKFD CPUCLOCK_MAX #define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) -#define MAKE_PROCESS_CPUCLOCK(pid, clock) \ - ((~(clockid_t) (pid) << 3) | (clockid_t) (clock)) -#define MAKE_THREAD_CPUCLOCK(tid, clock) \ - MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK) +static inline clockid_t make_process_cpuclock(const unsigned int pid, + const clockid_t clock) +{ + return ((~pid) << 3) | clock; +} +static inline clockid_t make_thread_cpuclock(const unsigned int tid, + const clockid_t clock) +{ + return make_process_cpuclock(tid, clock | CPUCLOCK_PERTHREAD_MASK); +} -#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) -#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3)) +static inline clockid_t fd_to_clockid(const int fd) +{ + return make_process_cpuclock((unsigned int) fd, CLOCKFD); +} + +static inline int clockid_to_fd(const clockid_t clk) +{ + return ~(clk >> 3); +} #define REQUEUE_PENDING 1 -- cgit v1.2.3 From ae67badaa1643253998cb21d5782e4ea7c231a29 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 23:30:51 +0100 Subject: hrtimer: Optimize the hrtimer code by using static keys for migration_enable/nohz_active The hrtimer_cpu_base::migration_enable and ::nohz_active fields were originally introduced to avoid accessing global variables for these decisions. Still that results in a (cache hot) load and conditional branch, which can be avoided by using static keys. Implement it with static keys and optimize for the most critical case of high performance networking which tends to disable the timer migration functionality. No change in functionality. Signed-off-by: Thomas Gleixner Cc: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: Frederic Weisbecker Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1801142327490.2371@nanos Link: https://lkml.kernel.org/r/20171221104205.7269-2-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 --- kernel/time/hrtimer.c | 17 +++------- kernel/time/tick-internal.h | 19 +++++++---- kernel/time/tick-sched.c | 2 +- kernel/time/timer.c | 83 +++++++++++++++++++++++---------------------- 5 files changed, 60 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 012c37fdb688..79b2a8d29d8c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -153,8 +153,6 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events - * @migration_enabled: The migration of hrtimers to other cpus is enabled - * @nohz_active: The nohz functionality is enabled * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @next_timer: Pointer to the first expiring timer @@ -178,8 +176,6 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - bool migration_enabled; - bool nohz_active; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hres_active : 1, diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d32520840fde..1d06d2bde733 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -178,23 +178,16 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) #endif } -#ifdef CONFIG_NO_HZ_COMMON -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) -{ - if (pinned || !base->migration_enabled) - return base; - return &per_cpu(hrtimer_bases, get_nohz_timer_target()); -} -#else static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) + if (static_branch_likely(&timers_migration_enabled) && !pinned) + return &per_cpu(hrtimer_bases, get_nohz_timer_target()); +#endif return base; } -#endif /* * We switch the timer base to a power-optimized selected CPU target, @@ -969,7 +962,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * Kick to reschedule the next tick to handle the new timer * on dynticks target. */ - if (new_base->cpu_base->nohz_active) + if (is_timers_nohz_active()) wake_up_nohz_cpu(new_base->cpu_base->cpu); } else { hrtimer_reprogram(timer, new_base); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f8e1845aa464..f690628e068c 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -150,14 +150,19 @@ static inline void tick_nohz_init(void) { } #ifdef CONFIG_NO_HZ_COMMON extern unsigned long tick_nohz_active; -#else +extern void timers_update_nohz(void); +extern struct static_key_false timers_nohz_active; +static inline bool is_timers_nohz_active(void) +{ + return static_branch_likely(&timers_nohz_active); +} +# ifdef CONFIG_SMP +extern struct static_key_false timers_migration_enabled; +# endif +#else /* CONFIG_NO_HZ_COMMON */ +static inline void timers_update_nohz(void) { } #define tick_nohz_active (0) -#endif - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void timers_update_migration(bool update_nohz); -#else -static inline void timers_update_migration(bool update_nohz) { } +static inline bool is_timers_nohz_active(void) { return false; } #endif DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f7cc7abfcf25..29a5733eff83 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1107,7 +1107,7 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode) ts->nohz_mode = mode; /* One update is enough */ if (!test_and_set_bit(0, &tick_nohz_active)) - timers_update_migration(true); + timers_update_nohz(); } /** diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 0bcf00e3ce48..d530f72b32f9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -200,8 +200,6 @@ struct timer_base { unsigned long clk; unsigned long next_expiry; unsigned int cpu; - bool migration_enabled; - bool nohz_active; bool is_idle; bool must_forward_clk; DECLARE_BITMAP(pending_map, WHEEL_SIZE); @@ -210,45 +208,57 @@ struct timer_base { static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +#ifdef CONFIG_NO_HZ_COMMON + +DEFINE_STATIC_KEY_FALSE(timers_nohz_active); +static DEFINE_MUTEX(timer_keys_mutex); + +static void timer_update_keys(struct work_struct *work); +static DECLARE_WORK(timer_update_work, timer_update_keys); + +#ifdef CONFIG_SMP unsigned int sysctl_timer_migration = 1; -void timers_update_migration(bool update_nohz) +DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); + +static void timers_update_migration(void) { - bool on = sysctl_timer_migration && tick_nohz_active; - unsigned int cpu; + if (sysctl_timer_migration && tick_nohz_active) + static_branch_enable(&timers_migration_enabled); + else + static_branch_disable(&timers_migration_enabled); +} +#else +static inline void timers_update_migration(void) { } +#endif /* !CONFIG_SMP */ - /* Avoid the loop, if nothing to update */ - if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on) - return; +static void timer_update_keys(struct work_struct *work) +{ + mutex_lock(&timer_keys_mutex); + timers_update_migration(); + static_branch_enable(&timers_nohz_active); + mutex_unlock(&timer_keys_mutex); +} - for_each_possible_cpu(cpu) { - per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on; - per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on; - per_cpu(hrtimer_bases.migration_enabled, cpu) = on; - if (!update_nohz) - continue; - per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true; - per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true; - per_cpu(hrtimer_bases.nohz_active, cpu) = true; - } +void timers_update_nohz(void) +{ + schedule_work(&timer_update_work); } int timer_migration_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - static DEFINE_MUTEX(mutex); int ret; - mutex_lock(&mutex); + mutex_lock(&timer_keys_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) - timers_update_migration(false); - mutex_unlock(&mutex); + timers_update_migration(); + mutex_unlock(&timer_keys_mutex); return ret; } -#endif +#endif /* NO_HZ_COMMON */ static unsigned long round_jiffies_common(unsigned long j, int cpu, bool force_up) @@ -534,7 +544,7 @@ __internal_add_timer(struct timer_base *base, struct timer_list *timer) static void trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer) { - if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + if (!is_timers_nohz_active()) return; /* @@ -849,21 +859,20 @@ static inline struct timer_base *get_timer_base(u32 tflags) return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); } -#ifdef CONFIG_NO_HZ_COMMON static inline struct timer_base * get_target_base(struct timer_base *base, unsigned tflags) { -#ifdef CONFIG_SMP - if ((tflags & TIMER_PINNED) || !base->migration_enabled) - return get_timer_this_cpu_base(tflags); - return get_timer_cpu_base(tflags, get_nohz_timer_target()); -#else - return get_timer_this_cpu_base(tflags); +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) + if (static_branch_likely(&timers_migration_enabled) && + !(tflags & TIMER_PINNED)) + return get_timer_cpu_base(tflags, get_nohz_timer_target()); #endif + return get_timer_this_cpu_base(tflags); } static inline void forward_timer_base(struct timer_base *base) { +#ifdef CONFIG_NO_HZ_COMMON unsigned long jnow; /* @@ -887,16 +896,8 @@ static inline void forward_timer_base(struct timer_base *base) base->clk = jnow; else base->clk = base->next_expiry; -} -#else -static inline struct timer_base * -get_target_base(struct timer_base *base, unsigned tflags) -{ - return get_timer_this_cpu_base(tflags); -} - -static inline void forward_timer_base(struct timer_base *base) { } #endif +} /* -- cgit v1.2.3 From 1fbc78b3c980364d4fc15db83eca4a8e7ad289da Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:32 +0100 Subject: hrtimer: Fix kerneldoc syntax for 'struct hrtimer_cpu_base' The '/**' sequence marks the start of a structure description. Add the missing second asterisk. While at it adapt the ordering of the struct members to the struct definition and document the purpose of expires_next more precisely. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-4-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 79b2a8d29d8c..b3a382be8db0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -144,7 +144,7 @@ enum hrtimer_base_type { HRTIMER_MAX_CLOCK_BASES, }; -/* +/** * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers @@ -153,12 +153,12 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events - * @expires_next: absolute time of the next event which was scheduled - * via clock_set_next_event() - * @next_timer: Pointer to the first expiring timer * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode * @hang_detected: The last hrtimer interrupt detected a hang + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue + * @next_timer: Pointer to the first expiring timer * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs -- cgit v1.2.3 From 907777136f80d0cc0f714e5a389c4dfa9b4670ee Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:33 +0100 Subject: hrtimer: Clean up the 'int clock' parameter of schedule_hrtimeout_range_clock() schedule_hrtimeout_range_clock() uses an 'int clock' parameter for the clock ID, instead of the customary predefined "clockid_t" type. In hrtimer coding style the canonical variable name for the clock ID is 'clock_id', therefore change the name of the parameter here as well to make it all consistent. While at it, clean up the description for the 'clock_id' and 'mode' function parameters. The clock modes and the clock IDs are not restricted as the comment suggests. Fix the mode description as well for the callers of schedule_hrtimeout_range_clock(). No functional changes intended. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-5-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 2 +- kernel/time/hrtimer.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index b3a382be8db0..931ce9c89c93 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -462,7 +462,7 @@ extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, extern int schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, const enum hrtimer_mode mode, - int clock); + clockid_t clock_id); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 7687355c00ff..f2de328bb8d5 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1662,12 +1662,12 @@ void __init hrtimers_init(void) * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) * @delta: slack in expires timeout (ktime_t) - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL - * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME + * @mode: timer mode + * @clock_id: timer clock to be used */ int __sched schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, - const enum hrtimer_mode mode, int clock) + const enum hrtimer_mode mode, clockid_t clock_id) { struct hrtimer_sleeper t; @@ -1688,7 +1688,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } - hrtimer_init_on_stack(&t.timer, clock, mode); + hrtimer_init_on_stack(&t.timer, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_init_sleeper(&t, current); @@ -1710,7 +1710,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) * @delta: slack in expires timeout (ktime_t) - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * @mode: timer mode * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless @@ -1749,7 +1749,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); /** * schedule_hrtimeout - sleep until timeout * @expires: timeout value (ktime_t) - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * @mode: timer mode * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless -- cgit v1.2.3 From 6de6250c759781daeadca784d0cc34ae73f3b502 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:34 +0100 Subject: hrtimer: Fix hrtimer_start[_range_ns]() function descriptions The hrtimer_start[_range_ns]() functions start a timer reliably on this CPU only when HRTIMER_MODE_PINNED is set. Furthermore the HRTIMER_MODE_PINNED mode is not considered when a hrtimer is initialized. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-6-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 6 +++--- kernel/time/hrtimer.c | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 931ce9c89c93..4e6a8841dcbe 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -361,11 +361,11 @@ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 range_ns, const enum hrtimer_mode mode); /** - * hrtimer_start - (re)start an hrtimer on the current CPU + * hrtimer_start - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL) + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) */ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f2de328bb8d5..fd08729de5d2 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -924,12 +924,12 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, } /** - * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU + * hrtimer_start_range_ns - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time * @delta_ns: "slack" range for the timer - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL) + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) */ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) @@ -1107,7 +1107,8 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, * hrtimer_init - initialize a timer to the given clock * @timer: the timer to be initialized * @clock_id: the clock to be used - * @mode: timer mode abs/rel + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL); pinned is not considered here! */ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) -- cgit v1.2.3 From 19b51cb5ff6ab7957bcbbec4ff812b83208f7e99 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:36 +0100 Subject: hrtimer: Clean up 'enum hrtimer_mode' It's not obvious that the HRTIMER_MODE variants are bit combinations, because all modes are hard coded constants currently. Change it so the bit meanings are clear; and use the symbols for creating modes which combine bits. While at it get rid of the ugly tail comments as well. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-8-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4e6a8841dcbe..28f267cf2851 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -28,13 +28,19 @@ struct hrtimer_cpu_base; /* * Mode arguments of xxx_hrtimer functions: + * + * HRTIMER_MODE_ABS - Time value is absolute + * HRTIMER_MODE_REL - Time value is relative to now + * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered + * when starting the timer) */ enum hrtimer_mode { - HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */ - HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */ - HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */ - HRTIMER_MODE_ABS_PINNED = 0x02, - HRTIMER_MODE_REL_PINNED = 0x03, + HRTIMER_MODE_ABS = 0x00, + HRTIMER_MODE_REL = 0x01, + HRTIMER_MODE_PINNED = 0x02, + + HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, + HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, }; /* -- cgit v1.2.3 From 3f0b9e8eec7262648ab9c8321bf931624ee5c10a Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:40 +0100 Subject: hrtimer: Store running timer in hrtimer_clock_base The pointer to the currently running timer is stored in hrtimer_cpu_base before the base lock is dropped and the callback is invoked. This results in two levels of indirections and the upcoming support for softirq based hrtimer requires splitting the "running" storage into soft and hard IRQ context expiry. Storing both in the cpu base would require conditionals in all code paths accessing that information. It's possible to have a per clock base sequence count and running pointer without changing the semantics of the related mechanisms because the timer base pointer cannot be changed while a timer is running the callback. Unfortunately this makes cpu_clock base larger than 32 bytes on 32-bit kernels. Instead of having huge gaps due to alignment, remove the alignment and let the compiler pack CPU base for 32-bit kernels. The resulting cache access patterns are fortunately not really different from the current behaviour. On 64-bit kernels the 64-byte alignment stays and the behaviour is unchanged. This was determined by analyzing the resulting layout and looking at the number of cache lines involved for the frequently used clocks. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-12-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 20 +++++++++----------- kernel/time/hrtimer.c | 28 +++++++++++++--------------- 2 files changed, 22 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 28f267cf2851..1bae7b9f071d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -118,9 +118,9 @@ struct hrtimer_sleeper { }; #ifdef CONFIG_64BIT -# define HRTIMER_CLOCK_BASE_ALIGN 64 +# define __hrtimer_clock_base_align ____cacheline_aligned #else -# define HRTIMER_CLOCK_BASE_ALIGN 32 +# define __hrtimer_clock_base_align #endif /** @@ -129,18 +129,22 @@ struct hrtimer_sleeper { * @index: clock type index for per_cpu support when moving a * timer to a base on another cpu. * @clockid: clock id for per_cpu support + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer * @active: red black tree root node for the active timers * @get_time: function to retrieve the current time of the clock * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; - int index; + unsigned int index; clockid_t clockid; + seqcount_t seq; + struct hrtimer *running; struct timerqueue_head active; ktime_t (*get_time)(void); ktime_t offset; -} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); +} __hrtimer_clock_base_align; enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, @@ -154,8 +158,6 @@ enum hrtimer_base_type { * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers - * @seq: seqcount around __run_hrtimer - * @running: pointer to the currently running hrtimer * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events @@ -177,8 +179,6 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; - seqcount_t seq; - struct hrtimer *running; unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; @@ -198,8 +198,6 @@ struct hrtimer_cpu_base { static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { - BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); - timer->node.expires = time; timer->_softexpires = time; } @@ -424,7 +422,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) */ static inline int hrtimer_callback_running(struct hrtimer *timer) { - return timer->base->cpu_base->running == timer; + return timer->base->running == timer; } /* Forward a hrtimer so it expires after now: */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index cfcf8decf102..e56805fe5d00 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -70,7 +70,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), - .seq = SEQCNT_ZERO(hrtimer_bases.seq), .clock_base = { { @@ -118,7 +117,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { * timer->base->cpu_base */ static struct hrtimer_cpu_base migration_cpu_base = { - .seq = SEQCNT_ZERO(migration_cpu_base), .clock_base = { { .cpu_base = &migration_cpu_base, }, }, }; @@ -1148,19 +1146,19 @@ EXPORT_SYMBOL_GPL(hrtimer_init); */ bool hrtimer_active(const struct hrtimer *timer) { - struct hrtimer_cpu_base *cpu_base; + struct hrtimer_clock_base *base; unsigned int seq; do { - cpu_base = READ_ONCE(timer->base->cpu_base); - seq = raw_read_seqcount_begin(&cpu_base->seq); + base = READ_ONCE(timer->base); + seq = raw_read_seqcount_begin(&base->seq); if (timer->state != HRTIMER_STATE_INACTIVE || - cpu_base->running == timer) + base->running == timer) return true; - } while (read_seqcount_retry(&cpu_base->seq, seq) || - cpu_base != READ_ONCE(timer->base->cpu_base)); + } while (read_seqcount_retry(&base->seq, seq) || + base != READ_ONCE(timer->base)); return false; } @@ -1194,16 +1192,16 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, lockdep_assert_held(&cpu_base->lock); debug_deactivate(timer); - cpu_base->running = timer; + base->running = timer; /* * Separate the ->running assignment from the ->state assignment. * * As with a regular write barrier, this ensures the read side in - * hrtimer_active() cannot observe cpu_base->running == NULL && + * hrtimer_active() cannot observe base->running == NULL && * timer->state == INACTIVE. */ - raw_write_seqcount_barrier(&cpu_base->seq); + raw_write_seqcount_barrier(&base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); fn = timer->function; @@ -1244,13 +1242,13 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, * Separate the ->running assignment from the ->state assignment. * * As with a regular write barrier, this ensures the read side in - * hrtimer_active() cannot observe cpu_base->running == NULL && + * hrtimer_active() cannot observe base->running.timer == NULL && * timer->state == INACTIVE. */ - raw_write_seqcount_barrier(&cpu_base->seq); + raw_write_seqcount_barrier(&base->seq); - WARN_ON_ONCE(cpu_base->running != timer); - cpu_base->running = NULL; + WARN_ON_ONCE(base->running != timer); + base->running = NULL; } static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) -- cgit v1.2.3 From da21c5a58a7f30db69e04e06dfb6777ccbb1113c Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:41 +0100 Subject: hrtimer: Make room in 'struct hrtimer_cpu_base' The upcoming softirq based hrtimers support requires an additional field in the hrtimer_cpu_base struct, which would grow the struct size beyond a cache line. The hrtimer_cpu_base::nr_retries and ::nr_hangs members are solely used for diagnostic output and have no requirement to be 'unsigned int'. Make them 'unsigned short' to create room for the new struct member. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-13-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1bae7b9f071d..56e56bcb6f0f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -189,8 +189,8 @@ struct hrtimer_cpu_base { ktime_t expires_next; struct hrtimer *next_timer; unsigned int nr_events; - unsigned int nr_retries; - unsigned int nr_hangs; + unsigned short nr_retries; + unsigned short nr_hangs; unsigned int max_hang_time; #endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -- cgit v1.2.3 From 28bfd18bf3daa5db8bb3422ea7138c8b7d2444ac Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:42 +0100 Subject: hrtimer: Make the hrtimer_cpu_base::hres_active field unconditional, to simplify the code The hrtimer_cpu_base::hres_active_member field depends on CONFIG_HIGH_RES_TIMERS=y currently, and all related functions to this member are conditional as well. To simplify the code make it unconditional and set it to zero during initialization. (This will also help with the upcoming softirq based hrtimers code.) The conditional code sections can be avoided by adding IS_ENABLED(HIGHRES) conditionals into common functions, which ensures dead code elimination. There is no functional change. Suggested-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-14-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 20 ++++++++------------ kernel/time/hrtimer.c | 31 +++++++++++++++---------------- 2 files changed, 23 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 56e56bcb6f0f..22627b3a33fe 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -161,8 +161,8 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events - * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue @@ -182,9 +182,9 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; + unsigned int hres_active : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, - hres_active : 1, hang_detected : 1; ktime_t expires_next; struct hrtimer *next_timer; @@ -266,16 +266,17 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) return timer->base->get_time(); } +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? + timer->base->cpu_base->hres_active : 0; +} + #ifdef CONFIG_HIGH_RES_TIMERS struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return timer->base->cpu_base->hres_active; -} - /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -298,11 +299,6 @@ extern unsigned int hrtimer_resolution; #define hrtimer_resolution (unsigned int)LOW_RES_NSEC -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return 0; -} - static inline void clock_was_set_delayed(void) { } #endif diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e56805fe5d00..b688090093d6 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -512,6 +512,20 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) offs_real, offs_boot, offs_tai); } +/* + * Is the high resolution mode active ? + */ +static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) +{ + return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? + cpu_base->hres_active : 0; +} + +static inline int hrtimer_hres_active(void) +{ + return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); +} + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -540,19 +554,6 @@ static inline int hrtimer_is_hres_enabled(void) return hrtimer_hres_enabled; } -/* - * Is the high resolution mode active ? - */ -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) -{ - return cpu_base->hres_active; -} - -static inline int hrtimer_hres_active(void) -{ - return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); -} - /* * Reprogram the event source with checking both queues for the * next event @@ -661,7 +662,6 @@ static void hrtimer_reprogram(struct hrtimer *timer, static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next = KTIME_MAX; - base->hres_active = 0; } /* @@ -720,8 +720,6 @@ void clock_was_set_delayed(void) #else -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; } -static inline int hrtimer_hres_active(void) { return 0; } static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } static inline void @@ -1600,6 +1598,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) } cpu_base->cpu = cpu; + cpu_base->hres_active = 0; hrtimer_init_hres(cpu_base); return 0; } -- cgit v1.2.3 From 07a9a7eae86abb796468b225586086d7c4cb59fc Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:44 +0100 Subject: hrtimer: Make the remote enqueue check unconditional hrtimer_cpu_base.expires_next is used to cache the next event armed in the timer hardware. The value is used to check whether an hrtimer can be enqueued remotely. If the new hrtimer is expiring before expires_next, then remote enqueue is not possible as the remote hrtimer hardware cannot be accessed for reprogramming to an earlier expiry time. The remote enqueue check is currently conditional on CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active. There is no compelling reason to make this conditional. Move hrtimer_cpu_base.expires_next out of the CONFIG_HIGH_RES_TIMERS=y guarded area and remove the conditionals in hrtimer_check_target(). The check is currently a NOOP for the CONFIG_HIGH_RES_TIMERS=n and the !hrtimer_cpu_base.hres_active case because in these cases nothing updates hrtimer_cpu_base.expires_next yet. This will be changed with later patches which further reduce the #ifdef zoo in this code. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-16-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 6 +++--- kernel/time/hrtimer.c | 26 ++++++-------------------- 2 files changed, 9 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 22627b3a33fe..bb7270e8bc37 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -164,13 +164,13 @@ enum hrtimer_base_type { * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang - * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue * @next_timer: Pointer to the first expiring timer * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -186,13 +186,13 @@ struct hrtimer_cpu_base { #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hang_detected : 1; - ktime_t expires_next; struct hrtimer *next_timer; unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; #endif + ktime_t expires_next; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5a624f9c8408..a9ab67f3e5d5 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -154,26 +154,21 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * With HIGHRES=y we do not migrate the timer when it is expiring - * before the next event on the target cpu because we cannot reprogram - * the target cpu hardware and we would cause it to fire late. + * We do not migrate the timer when it is expiring before the next + * event on the target cpu. When high resolution is enabled, we cannot + * reprogram the target cpu hardware and we would cause it to fire + * late. To keep it simple, we handle the high resolution enabled and + * disabled case similar. * * Called with cpu_base->lock of target cpu held. */ static int hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) { -#ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires; - if (!new_base->cpu_base->hres_active) - return 0; - expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); return expires <= new_base->cpu_base->expires_next; -#else - return 0; -#endif } static inline @@ -656,14 +651,6 @@ static void hrtimer_reprogram(struct hrtimer *timer, tick_program_event(expires, 1); } -/* - * Initialize the high resolution related parts of cpu_base - */ -static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) -{ - base->expires_next = KTIME_MAX; -} - /* * Retrigger next event is called after clock was set * @@ -729,7 +716,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, { return 0; } -static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -1599,7 +1585,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->cpu = cpu; cpu_base->hres_active = 0; - hrtimer_init_hres(cpu_base); + cpu_base->expires_next = KTIME_MAX; return 0; } -- cgit v1.2.3 From eb27926ba05233dc4f2052cc9d4f19359ec3cd2c Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:45 +0100 Subject: hrtimer: Make hrtimer_cpu_base.next_timer handling unconditional hrtimer_cpu_base.next_timer stores the pointer to the next expiring timer in a CPU base. This pointer cannot be dereferenced and is solely used to check whether a hrtimer which is removed is the hrtimer which is the first to expire in the CPU base. If this is the case, then the timer hardware needs to be reprogrammed to avoid an extra interrupt for nothing. Again, this is conditional functionality, but there is no compelling reason to make this conditional. As a preparation, hrtimer_cpu_base.next_timer needs to be available unconditonally. Aside of that the upcoming support for softirq based hrtimers requires access to this pointer unconditionally as well, so our motivation is not entirely simplicity based. Make the update of hrtimer_cpu_base.next_timer unconditional and remove the #ifdef cruft. The impact on CONFIG_HIGH_RES_TIMERS=n && CONFIG_NOHZ=n is marginal as it's just a store on an already dirtied cacheline. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-17-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 ++-- kernel/time/hrtimer.c | 12 ++---------- 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bb7270e8bc37..2d3e1d678a4d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -164,13 +164,13 @@ enum hrtimer_base_type { * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang - * @next_timer: Pointer to the first expiring timer * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue + * @next_timer: Pointer to the first expiring timer * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -186,13 +186,13 @@ struct hrtimer_cpu_base { #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hang_detected : 1; - struct hrtimer *next_timer; unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; #endif ktime_t expires_next; + struct hrtimer *next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index a9ab67f3e5d5..26abaa7b0419 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -459,21 +459,13 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) while ((base = __next_base((cpu_base), &(active)))) #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) -static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, - struct hrtimer *timer) -{ -#ifdef CONFIG_HIGH_RES_TIMERS - cpu_base->next_timer = timer; -#endif -} - static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) { struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases; ktime_t expires, expires_next = KTIME_MAX; - hrtimer_update_next_timer(cpu_base, NULL); + cpu_base->next_timer = NULL; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; struct hrtimer *timer; @@ -483,7 +475,7 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires < expires_next) { expires_next = expires; - hrtimer_update_next_timer(cpu_base, timer); + cpu_base->next_timer = timer; } } /* -- cgit v1.2.3 From 11a9fe069e341ac53bddb8fe1a85ea986cff1a42 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:46 +0100 Subject: hrtimer: Make hrtimer_reprogramm() unconditional hrtimer_reprogram() needs to be available unconditionally for softirq based hrtimers. Move the function and all required struct members out of the CONFIG_HIGH_RES_TIMERS #ifdef. There is no functional change because hrtimer_reprogram() is only invoked when hrtimer_cpu_base.hres_active is true. Making it unconditional increases the text size for the CONFIG_HIGH_RES_TIMERS=n case, but avoids replication of that code for the upcoming softirq based hrtimers support. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-18-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 6 +-- kernel/time/hrtimer.c | 129 +++++++++++++++++++++++------------------------- 2 files changed, 65 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2d3e1d678a4d..98ed35767ac5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -182,10 +182,10 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - unsigned int hres_active : 1; -#ifdef CONFIG_HIGH_RES_TIMERS - unsigned int in_hrtirq : 1, + unsigned int hres_active : 1, + in_hrtirq : 1, hang_detected : 1; +#ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 26abaa7b0419..63d804aea1ea 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -581,68 +581,6 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) tick_program_event(cpu_base->expires_next, 1); } -/* - * When a timer is enqueued and expires earlier than the already enqueued - * timers, we have to check, whether it expires earlier than the timer for - * which the clock event device was armed. - * - * Called with interrupts disabled and base->cpu_base.lock held - */ -static void hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) -{ - struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); - ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); - - WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); - - /* - * If the timer is not on the current cpu, we cannot reprogram - * the other cpus clock event device. - */ - if (base->cpu_base != cpu_base) - return; - - /* - * If the hrtimer interrupt is running, then it will - * reevaluate the clock bases and reprogram the clock event - * device. The callbacks are always executed in hard interrupt - * context so we don't need an extra check for a running - * callback. - */ - if (cpu_base->in_hrtirq) - return; - - /* - * CLOCK_REALTIME timer might be requested with an absolute - * expiry time which is less than base->offset. Set it to 0. - */ - if (expires < 0) - expires = 0; - - if (expires >= cpu_base->expires_next) - return; - - /* Update the pointer to the next expiring timer */ - cpu_base->next_timer = timer; - - /* - * If a hang was detected in the last timer interrupt then we - * do not schedule a timer which is earlier than the expiry - * which we enforced in the hang detection. We want the system - * to make progress. - */ - if (cpu_base->hang_detected) - return; - - /* - * Program the timer hardware. We enforce the expiry for - * events which are already in the past. - */ - cpu_base->expires_next = expires; - tick_program_event(expires, 1); -} - /* * Retrigger next event is called after clock was set * @@ -703,15 +641,72 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } -static inline int hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) -{ - return 0; -} static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ +/* + * When a timer is enqueued and expires earlier than the already enqueued + * timers, we have to check, whether it expires earlier than the timer for + * which the clock event device was armed. + * + * Called with interrupts disabled and base->cpu_base.lock held + */ +static void hrtimer_reprogram(struct hrtimer *timer, + struct hrtimer_clock_base *base) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + + WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); + + /* + * If the timer is not on the current cpu, we cannot reprogram + * the other cpus clock event device. + */ + if (base->cpu_base != cpu_base) + return; + + /* + * If the hrtimer interrupt is running, then it will + * reevaluate the clock bases and reprogram the clock event + * device. The callbacks are always executed in hard interrupt + * context so we don't need an extra check for a running + * callback. + */ + if (cpu_base->in_hrtirq) + return; + + /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Set it to 0. + */ + if (expires < 0) + expires = 0; + + if (expires >= cpu_base->expires_next) + return; + + /* Update the pointer to the next expiring timer */ + cpu_base->next_timer = timer; + + /* + * If a hang was detected in the last timer interrupt then we + * do not schedule a timer which is earlier than the expiry + * which we enforced in the hang detection. We want the system + * to make progress. + */ + if (cpu_base->hang_detected) + return; + + /* + * Program the timer hardware. We enforce the expiry for + * events which are already in the past. + */ + cpu_base->expires_next = expires; + tick_program_event(expires, 1); +} + /* * Clock realtime was set * -- cgit v1.2.3 From 98ecadd4305d8677ba77162152485798d47dcc85 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:55 +0100 Subject: hrtimer: Add clock bases and hrtimer mode for softirq context Currently hrtimer callback functions are always executed in hard interrupt context. Users of hrtimers, which need their timer function to be executed in soft interrupt context, make use of tasklets to get the proper context. Add additional hrtimer clock bases for timers which must expire in softirq context, so the detour via the tasklet can be avoided. This is also required for RT, where the majority of hrtimer is moved into softirq hrtimer context. The selection of the expiry mode happens via a mode bit. Introduce HRTIMER_MODE_SOFT and the matching combinations with the ABS/REL/PINNED bits and update the decoding of hrtimer_mode in tracepoints. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-27-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 14 ++++++++++++++ include/trace/events/timer.h | 6 +++++- kernel/time/hrtimer.c | 20 ++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 98ed35767ac5..26ae8a868ea8 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -33,14 +33,24 @@ struct hrtimer_cpu_base; * HRTIMER_MODE_REL - Time value is relative to now * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered * when starting the timer) + * HRTIMER_MODE_SOFT - Timer callback function will be executed in + * soft irq context */ enum hrtimer_mode { HRTIMER_MODE_ABS = 0x00, HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, + HRTIMER_MODE_SOFT = 0x04, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, + + HRTIMER_MODE_ABS_SOFT = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT, + HRTIMER_MODE_REL_SOFT = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT, + + HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, + HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, + }; /* @@ -151,6 +161,10 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, + HRTIMER_BASE_MONOTONIC_SOFT, + HRTIMER_BASE_REALTIME_SOFT, + HRTIMER_BASE_BOOTTIME_SOFT, + HRTIMER_BASE_TAI_SOFT, HRTIMER_MAX_CLOCK_BASES, }; diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 744b4310b24b..a57e4ee989d6 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -148,7 +148,11 @@ DEFINE_EVENT(timer_class, timer_cancel, { HRTIMER_MODE_ABS, "ABS" }, \ { HRTIMER_MODE_REL, "REL" }, \ { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ - { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }) + { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }, \ + { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ + { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ + { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ + { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }) /** * hrtimer_init - called when the hrtimer is initialized diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 31ccd86e63c0..e2353f5cdf51 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -92,6 +92,26 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, + { + .index = HRTIMER_BASE_MONOTONIC_SOFT, + .clockid = CLOCK_MONOTONIC, + .get_time = &ktime_get, + }, + { + .index = HRTIMER_BASE_REALTIME_SOFT, + .clockid = CLOCK_REALTIME, + .get_time = &ktime_get_real, + }, + { + .index = HRTIMER_BASE_BOOTTIME_SOFT, + .clockid = CLOCK_BOOTTIME, + .get_time = &ktime_get_boottime, + }, + { + .index = HRTIMER_BASE_TAI_SOFT, + .clockid = CLOCK_TAI, + .get_time = &ktime_get_clocktai, + }, } }; -- cgit v1.2.3 From 5da70160462e80b0ab8a6960cdd0cdd476907523 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:57 +0100 Subject: hrtimer: Implement support for softirq based hrtimers hrtimer callbacks are always invoked in hard interrupt context. Several users in tree require soft interrupt context for their callbacks and achieve this by combining a hrtimer with a tasklet. The hrtimer schedules the tasklet in hard interrupt context and the tasklet callback gets invoked in softirq context later. That's suboptimal and aside of that the real-time patch moves most of the hrtimers into softirq context. So adding native support for hrtimers expiring in softirq context is a valuable extension for both mainline and the RT patch set. Each valid hrtimer clock id has two associated hrtimer clock bases: one for timers expiring in hardirq context and one for timers expiring in softirq context. Implement the functionality to associate a hrtimer with the hard or softirq related clock bases and update the relevant functions to take them into account when the next expiry time needs to be evaluated. Add a check into the hard interrupt context handler functions to check whether the first expiring softirq based timer has expired. If it's expired the softirq is raised and the accounting of softirq based timers to evaluate the next expiry time for programming the timer hardware is skipped until the softirq processing has finished. At the end of the softirq processing the regular processing is resumed. Suggested-by: Thomas Gleixner Suggested-by: Peter Zijlstra Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-29-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 21 ++++-- kernel/time/hrtimer.c | 196 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 188 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 26ae8a868ea8..c7902ca7c9f4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -103,6 +103,7 @@ enum hrtimer_restart { * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) * @is_rel: Set if the timer was armed relative + * @is_soft: Set if hrtimer will be expired in soft interrupt context. * * The hrtimer structure must be initialized by hrtimer_init() */ @@ -113,6 +114,7 @@ struct hrtimer { struct hrtimer_clock_base *base; u8 state; u8 is_rel; + u8 is_soft; }; /** @@ -178,13 +180,18 @@ enum hrtimer_base_type { * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -196,9 +203,10 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1; + unsigned int hres_active : 1, + in_hrtirq : 1, + hang_detected : 1, + softirq_activated : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; @@ -207,6 +215,8 @@ struct hrtimer_cpu_base { #endif ktime_t expires_next; struct hrtimer *next_timer; + ktime_t softirq_expires_next; + struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; @@ -379,7 +389,8 @@ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * @timer: the timer to be added * @tim: expiry time * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); + * softirq based mode is considered for debug purpose only! */ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index ba4674e9adc2..d93e3e745592 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -411,7 +411,8 @@ static inline void debug_hrtimer_init(struct hrtimer *timer) debug_object_init(timer, &hrtimer_debug_descr); } -static inline void debug_hrtimer_activate(struct hrtimer *timer) +static inline void debug_hrtimer_activate(struct hrtimer *timer, + enum hrtimer_mode mode) { debug_object_activate(timer, &hrtimer_debug_descr); } @@ -444,8 +445,10 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer) EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); #else + static inline void debug_hrtimer_init(struct hrtimer *timer) { } -static inline void debug_hrtimer_activate(struct hrtimer *timer) { } +static inline void debug_hrtimer_activate(struct hrtimer *timer, + enum hrtimer_mode mode) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif @@ -460,7 +463,7 @@ debug_init(struct hrtimer *timer, clockid_t clockid, static inline void debug_activate(struct hrtimer *timer, enum hrtimer_mode mode) { - debug_hrtimer_activate(timer); + debug_hrtimer_activate(timer, mode); trace_hrtimer_start(timer, mode); } @@ -503,7 +506,10 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires < expires_next) { expires_next = expires; - cpu_base->next_timer = timer; + if (timer->is_soft) + cpu_base->softirq_next_timer = timer; + else + cpu_base->next_timer = timer; } } /* @@ -520,21 +526,39 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, * Recomputes cpu_base::*next_timer and returns the earliest expires_next but * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. * + * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, + * those timers will get run whenever the softirq gets handled, at the end of + * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. + * + * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. + * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual + * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. + * * @active_mask must be one of: - * - HRTIMER_ACTIVE, + * - HRTIMER_ACTIVE_ALL, * - HRTIMER_ACTIVE_SOFT, or * - HRTIMER_ACTIVE_HARD. */ -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, - unsigned int active_mask) +static ktime_t +__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) { unsigned int active; + struct hrtimer *next_timer = NULL; ktime_t expires_next = KTIME_MAX; - cpu_base->next_timer = NULL; + if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { + active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; + cpu_base->softirq_next_timer = NULL; + expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX); + + next_timer = cpu_base->softirq_next_timer; + } - active = cpu_base->active_bases & active_mask; - expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + if (active_mask & HRTIMER_ACTIVE_HARD) { + active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; + cpu_base->next_timer = next_timer; + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + } return expires_next; } @@ -545,8 +569,14 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; - return ktime_get_update_offsets_now(&base->clock_was_set_seq, + ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, offs_real, offs_boot, offs_tai); + + base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; + base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; + base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; + + return now; } /* @@ -573,7 +603,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + /* + * Find the current next expiration time. + */ + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); + + if (cpu_base->next_timer && cpu_base->next_timer->is_soft) { + /* + * When the softirq is activated, hrtimer has to be + * programmed with the first hard hrtimer because soft + * timer interrupt could occur too late. + */ + if (cpu_base->softirq_activated) + expires_next = __hrtimer_get_next_event(cpu_base, + HRTIMER_ACTIVE_HARD); + else + cpu_base->softirq_expires_next = expires_next; + } if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -700,7 +746,7 @@ static inline void retrigger_next_event(void *arg) { } * * Called with interrupts disabled and base->cpu_base.lock held */ -static void hrtimer_reprogram(struct hrtimer *timer) +static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *base = timer->base; @@ -708,6 +754,37 @@ static void hrtimer_reprogram(struct hrtimer *timer) WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); + /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Set it to 0. + */ + if (expires < 0) + expires = 0; + + if (timer->is_soft) { + /* + * soft hrtimer could be started on a remote CPU. In this + * case softirq_expires_next needs to be updated on the + * remote CPU. The soft hrtimer will not expire before the + * first hard hrtimer on the remote CPU - + * hrtimer_check_target() prevents this case. + */ + struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; + + if (timer_cpu_base->softirq_activated) + return; + + if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) + return; + + timer_cpu_base->softirq_next_timer = timer; + timer_cpu_base->softirq_expires_next = expires; + + if (!ktime_before(expires, timer_cpu_base->expires_next) || + !reprogram) + return; + } + /* * If the timer is not on the current cpu, we cannot reprogram * the other cpus clock event device. @@ -725,13 +802,6 @@ static void hrtimer_reprogram(struct hrtimer *timer) if (cpu_base->in_hrtirq) return; - /* - * CLOCK_REALTIME timer might be requested with an absolute - * expiry time which is less than base->offset. Set it to 0. - */ - if (expires < 0) - expires = 0; - if (expires >= cpu_base->expires_next) return; @@ -957,6 +1027,31 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, return tim; } +static void +hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) +{ + ktime_t expires; + + /* + * Find the next SOFT expiration. + */ + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); + + /* + * reprogramming needs to be triggered, even if the next soft + * hrtimer expires at the same time than the next hard + * hrtimer. cpu_base->softirq_expires_next needs to be updated! + */ + if (expires == KTIME_MAX) + return; + + /* + * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() + * cpu_base->*expires_next is only set by hrtimer_reprogram() + */ + hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); +} + static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) @@ -978,13 +1073,15 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, return enqueue_hrtimer(timer, new_base, mode); } + /** * hrtimer_start_range_ns - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time * @delta_ns: "slack" range for the timer * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); + * softirq based mode is considered for debug purpose only! */ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) @@ -992,10 +1089,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base; unsigned long flags; + /* + * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft + * match. + */ + WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); + base = lock_hrtimer_base(timer, &flags); if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) - hrtimer_reprogram(timer); + hrtimer_reprogram(timer, true); unlock_hrtimer_base(timer, &flags); } @@ -1094,7 +1197,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!__hrtimer_hres_active(cpu_base)) - expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1304,6 +1407,23 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, } } +static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + unsigned long flags; + ktime_t now; + + raw_spin_lock_irqsave(&cpu_base->lock, flags); + + now = hrtimer_update_base(cpu_base); + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); + + cpu_base->softirq_activated = 0; + hrtimer_update_softirq_timer(cpu_base, true); + + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); +} + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1334,10 +1454,16 @@ retry: */ cpu_base->expires_next = KTIME_MAX; + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); /* Reevaluate the clock bases for the next expiry */ - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); /* * Store the new expiry value so the migration code can verify * against it. @@ -1441,6 +1567,13 @@ void hrtimer_run_queues(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); + + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } @@ -1622,6 +1755,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->cpu = cpu; cpu_base->hres_active = 0; cpu_base->expires_next = KTIME_MAX; + cpu_base->softirq_expires_next = KTIME_MAX; return 0; } @@ -1665,6 +1799,12 @@ int hrtimers_dead_cpu(unsigned int scpu) BUG_ON(cpu_online(scpu)); tick_cancel_sched_timer(scpu); + /* + * this BH disable ensures that raise_softirq_irqoff() does + * not wakeup ksoftirqd (and acquire the pi-lock) while + * holding the cpu_base lock + */ + local_bh_disable(); local_irq_disable(); old_base = &per_cpu(hrtimer_bases, scpu); new_base = this_cpu_ptr(&hrtimer_bases); @@ -1680,12 +1820,19 @@ int hrtimers_dead_cpu(unsigned int scpu) &new_base->clock_base[i]); } + /* + * The migration might have changed the first expiring softirq + * timer on this CPU. Update it. + */ + hrtimer_update_softirq_timer(new_base, false); + raw_spin_unlock(&old_base->lock); raw_spin_unlock(&new_base->lock); /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); local_irq_enable(); + local_bh_enable(); return 0; } @@ -1694,6 +1841,7 @@ int hrtimers_dead_cpu(unsigned int scpu) void __init hrtimers_init(void) { hrtimers_prepare_cpu(smp_processor_id()); + open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); } /** -- cgit v1.2.3