diff options
Diffstat (limited to 'kernel/sched/sched.h')
| -rw-r--r-- | kernel/sched/sched.h | 128 | 
1 files changed, 106 insertions, 22 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1d4e94c1e5fe..3fd283892761 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -67,6 +67,7 @@  #include <linux/tsacct_kern.h>  #include <asm/tlb.h> +#include <asm-generic/vmlinux.lds.h>  #ifdef CONFIG_PARAVIRT  # include <asm/paravirt.h> @@ -75,6 +76,8 @@  #include "cpupri.h"  #include "cpudeadline.h" +#include <trace/events/sched.h> +  #ifdef CONFIG_SCHED_DEBUG  # define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)  #else @@ -96,6 +99,7 @@ extern atomic_long_t calc_load_tasks;  extern void calc_global_load_tick(struct rq *this_rq);  extern long calc_load_fold_active(struct rq *this_rq, long adjust); +extern void call_trace_sched_update_nr_running(struct rq *rq, int count);  /*   * Helpers for converting nanosecond timing to jiffy resolution   */ @@ -310,11 +314,26 @@ void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)  	__dl_update(dl_b, -((s32)tsk_bw / cpus));  } -static inline -bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) +static inline bool __dl_overflow(struct dl_bw *dl_b, unsigned long cap, +				 u64 old_bw, u64 new_bw)  {  	return dl_b->bw != -1 && -	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; +	       cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw; +} + +/* + * Verify the fitness of task @p to run on @cpu taking into account the + * CPU original capacity and the runtime/deadline ratio of the task. + * + * The function will return true if the CPU original capacity of the + * @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the + * task and false otherwise. + */ +static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu) +{ +	unsigned long cap = arch_scale_cpu_capacity(cpu); + +	return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;  }  extern void init_dl_bw(struct dl_bw *dl_b); @@ -862,6 +881,8 @@ struct uclamp_rq {  	unsigned int value;  	struct uclamp_bucket bucket[UCLAMP_BUCKETS];  }; + +DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);  #endif /* CONFIG_UCLAMP_TASK */  /* @@ -1182,6 +1203,16 @@ struct rq_flags {  #endif  }; +/* + * Lockdep annotation that avoids accidental unlocks; it's like a + * sticky/continuous lockdep_assert_held(). + * + * This avoids code that has access to 'struct rq *rq' (basically everything in + * the scheduler) from accidentally unlocking the rq if they do not also have a + * copy of the (on-stack) 'struct rq_flags rf'. + * + * Also see Documentation/locking/lockdep-design.rst. + */  static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)  {  	rf->cookie = lockdep_pin_lock(&rq->lock); @@ -1682,7 +1713,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)  #define WF_SYNC			0x01		/* Waker goes to sleep after wakeup */  #define WF_FORK			0x02		/* Child wakeup after fork */  #define WF_MIGRATED		0x04		/* Internal use, task got migrated */ -#define WF_ON_RQ		0x08		/* Wakee is on_rq */ +#define WF_ON_CPU		0x08		/* Wakee is on_cpu */  /*   * To aid in avoiding the subversion of "niceness" due to uneven distribution @@ -1739,7 +1770,6 @@ extern const u32		sched_prio_to_wmult[40];  #define RETRY_TASK		((void *)-1UL)  struct sched_class { -	const struct sched_class *next;  #ifdef CONFIG_UCLAMP_TASK  	int uclamp_enabled; @@ -1748,7 +1778,7 @@ struct sched_class {  	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);  	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);  	void (*yield_task)   (struct rq *rq); -	bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt); +	bool (*yield_to_task)(struct rq *rq, struct task_struct *p);  	void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags); @@ -1796,7 +1826,7 @@ struct sched_class {  #ifdef CONFIG_FAIR_GROUP_SCHED  	void (*task_change_group)(struct task_struct *p, int type);  #endif -}; +} __aligned(STRUCT_ALIGNMENT); /* STRUCT_ALIGN(), vmlinux.lds.h */  static inline void put_prev_task(struct rq *rq, struct task_struct *prev)  { @@ -1810,17 +1840,18 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)  	next->sched_class->set_next_task(rq, next, false);  } -#ifdef CONFIG_SMP -#define sched_class_highest (&stop_sched_class) -#else -#define sched_class_highest (&dl_sched_class) -#endif +/* Defined in include/asm-generic/vmlinux.lds.h */ +extern struct sched_class __begin_sched_classes[]; +extern struct sched_class __end_sched_classes[]; + +#define sched_class_highest (__end_sched_classes - 1) +#define sched_class_lowest  (__begin_sched_classes - 1)  #define for_class_range(class, _from, _to) \ -	for (class = (_from); class != (_to); class = class->next) +	for (class = (_from); class != (_to); class--)  #define for_each_class(class) \ -	for_class_range(class, sched_class_highest, NULL) +	for_class_range(class, sched_class_highest, sched_class_lowest)  extern const struct sched_class stop_sched_class;  extern const struct sched_class dl_sched_class; @@ -1930,12 +1961,7 @@ extern int __init sched_tick_offload_init(void);   */  static inline void sched_update_tick_dependency(struct rq *rq)  { -	int cpu; - -	if (!tick_nohz_full_enabled()) -		return; - -	cpu = cpu_of(rq); +	int cpu = cpu_of(rq);  	if (!tick_nohz_full_cpu(cpu))  		return; @@ -1955,6 +1981,9 @@ static inline void add_nr_running(struct rq *rq, unsigned count)  	unsigned prev_nr = rq->nr_running;  	rq->nr_running = prev_nr + count; +	if (trace_sched_update_nr_running_tp_enabled()) { +		call_trace_sched_update_nr_running(rq, count); +	}  #ifdef CONFIG_SMP  	if (prev_nr < 2 && rq->nr_running >= 2) { @@ -1969,6 +1998,10 @@ static inline void add_nr_running(struct rq *rq, unsigned count)  static inline void sub_nr_running(struct rq *rq, unsigned count)  {  	rq->nr_running -= count; +	if (trace_sched_update_nr_running_tp_enabled()) { +		call_trace_sched_update_nr_running(rq, count); +	} +  	/* Check if we still need preemption */  	sched_update_tick_dependency(rq);  } @@ -2016,6 +2049,16 @@ void arch_scale_freq_tick(void)  #endif  #ifndef arch_scale_freq_capacity +/** + * arch_scale_freq_capacity - get the frequency scale factor of a given CPU. + * @cpu: the CPU in question. + * + * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e. + * + *     f_curr + *     ------ * SCHED_CAPACITY_SCALE + *     f_max + */  static __always_inline  unsigned long arch_scale_freq_capacity(int cpu)  { @@ -2349,12 +2392,35 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}  #ifdef CONFIG_UCLAMP_TASK  unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); +/** + * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values. + * @rq:		The rq to clamp against. Must not be NULL. + * @util:	The util value to clamp. + * @p:		The task to clamp against. Can be NULL if you want to clamp + *		against @rq only. + * + * Clamps the passed @util to the max(@rq, @p) effective uclamp values. + * + * If sched_uclamp_used static key is disabled, then just return the util + * without any clamping since uclamp aggregation at the rq level in the fast + * path is disabled, rendering this operation a NOP. + * + * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It + * will return the correct effective uclamp value of the task even if the + * static key is disabled. + */  static __always_inline  unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,  				  struct task_struct *p)  { -	unsigned long min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value); -	unsigned long max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); +	unsigned long min_util; +	unsigned long max_util; + +	if (!static_branch_likely(&sched_uclamp_used)) +		return util; + +	min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value); +	max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);  	if (p) {  		min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN)); @@ -2371,6 +2437,19 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,  	return clamp(util, min_util, max_util);  } + +/* + * When uclamp is compiled in, the aggregation at rq level is 'turned off' + * by default in the fast path and only gets turned on once userspace performs + * an operation that requires it. + * + * Returns true if userspace opted-in to use uclamp and aggregation at rq level + * hence is active. + */ +static inline bool uclamp_is_used(void) +{ +	return static_branch_likely(&sched_uclamp_used); +}  #else /* CONFIG_UCLAMP_TASK */  static inline  unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, @@ -2378,6 +2457,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,  {  	return util;  } + +static inline bool uclamp_is_used(void) +{ +	return false; +}  #endif /* CONFIG_UCLAMP_TASK */  #ifdef arch_scale_freq_capacity  | 
