diff options
Diffstat (limited to 'include/linux/sched.h')
| -rw-r--r-- | include/linux/sched.h | 195 | 
1 files changed, 165 insertions, 30 deletions
| diff --git a/include/linux/sched.h b/include/linux/sched.h index 6682da36b293..6f7ffa460089 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -22,6 +22,7 @@ struct sched_param {  #include <linux/errno.h>  #include <linux/nodemask.h>  #include <linux/mm_types.h> +#include <linux/preempt.h>  #include <asm/page.h>  #include <asm/ptrace.h> @@ -285,6 +286,14 @@ static inline void lockup_detector_init(void)  }  #endif +#ifdef CONFIG_DETECT_HUNG_TASK +void reset_hung_task_detector(void); +#else +static inline void reset_hung_task_detector(void) +{ +} +#endif +  /* Attach to any functions which should be ignored in wchan output. */  #define __sched		__attribute__((__section__(".sched.text"))) @@ -322,6 +331,10 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}  extern void set_dumpable(struct mm_struct *mm, int value);  extern int get_dumpable(struct mm_struct *mm); +#define SUID_DUMP_DISABLE	0	/* No setuid dumping */ +#define SUID_DUMP_USER		1	/* Dump as user of process */ +#define SUID_DUMP_ROOT		2	/* Dump as root */ +  /* mm flags */  /* dumpable bits */  #define MMF_DUMPABLE      0  /* core dump is permitted */ @@ -427,6 +440,14 @@ struct task_cputime {  		.sum_exec_runtime = 0,				\  	} +#define PREEMPT_ENABLED		(PREEMPT_NEED_RESCHED) + +#ifdef CONFIG_PREEMPT_COUNT +#define PREEMPT_DISABLED	(1 + PREEMPT_ENABLED) +#else +#define PREEMPT_DISABLED	PREEMPT_ENABLED +#endif +  /*   * Disable preemption until the scheduler is running.   * Reset by start_kernel()->sched_init()->init_idle(). @@ -434,7 +455,7 @@ struct task_cputime {   * We include PREEMPT_ACTIVE to avoid cond_resched() from working   * before the scheduler is active -- see should_resched().   */ -#define INIT_PREEMPT_COUNT	(1 + PREEMPT_ACTIVE) +#define INIT_PREEMPT_COUNT	(PREEMPT_DISABLED + PREEMPT_ACTIVE)  /**   * struct thread_group_cputimer - thread group interval timer counts @@ -768,6 +789,7 @@ enum cpu_idle_type {  #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */  #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */  #define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */ +#define SD_NUMA			0x4000	/* cross-node balancing */  extern int __weak arch_sd_sibiling_asym_packing(void); @@ -811,6 +833,10 @@ struct sched_domain {  	u64 last_update; +	/* idle_balance() stats */ +	u64 max_newidle_lb_cost; +	unsigned long next_decay_max_lb_cost; +  #ifdef CONFIG_SCHEDSTATS  	/* load_balance() stats */  	unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -1029,6 +1055,8 @@ struct task_struct {  	struct task_struct *last_wakee;  	unsigned long wakee_flips;  	unsigned long wakee_flip_decay_ts; + +	int wake_cpu;  #endif  	int on_rq; @@ -1046,15 +1074,6 @@ struct task_struct {  	struct hlist_head preempt_notifiers;  #endif -	/* -	 * fpu_counter contains the number of consecutive context switches -	 * that the FPU is used. If this is over a threshold, the lazy fpu -	 * saving becomes unlazy to save the trap. This is an unsigned char -	 * so that after 256 times the counter wraps and the behavior turns -	 * lazy again; this to deal with bursty apps that only use FPU for -	 * a short time -	 */ -	unsigned char fpu_counter;  #ifdef CONFIG_BLK_DEV_IO_TRACE  	unsigned int btrace_seq;  #endif @@ -1324,10 +1343,41 @@ struct task_struct {  #endif  #ifdef CONFIG_NUMA_BALANCING  	int numa_scan_seq; -	int numa_migrate_seq;  	unsigned int numa_scan_period; +	unsigned int numa_scan_period_max; +	int numa_preferred_nid; +	int numa_migrate_deferred; +	unsigned long numa_migrate_retry;  	u64 node_stamp;			/* migration stamp  */  	struct callback_head numa_work; + +	struct list_head numa_entry; +	struct numa_group *numa_group; + +	/* +	 * Exponential decaying average of faults on a per-node basis. +	 * Scheduling placement decisions are made based on the these counts. +	 * The values remain static for the duration of a PTE scan +	 */ +	unsigned long *numa_faults; +	unsigned long total_numa_faults; + +	/* +	 * numa_faults_buffer records faults per node during the current +	 * scan window. When the scan completes, the counts in numa_faults +	 * decay and these values are copied. +	 */ +	unsigned long *numa_faults_buffer; + +	/* +	 * numa_faults_locality tracks if faults recorded during the last +	 * scan window were remote/local. The task scan period is adapted +	 * based on the locality of the faults with different weights +	 * depending on whether they were shared or private faults +	 */ +	unsigned long numa_faults_locality[2]; + +	unsigned long numa_pages_migrated;  #endif /* CONFIG_NUMA_BALANCING */  	struct rcu_head rcu; @@ -1394,11 +1444,10 @@ struct task_struct {  	} memcg_batch;  	unsigned int memcg_kmem_skip_account;  	struct memcg_oom_info { +		struct mem_cgroup *memcg; +		gfp_t gfp_mask; +		int order;  		unsigned int may_oom:1; -		unsigned int in_memcg_oom:1; -		unsigned int oom_locked:1; -		int wakeups; -		struct mem_cgroup *wait_on_memcg;  	} memcg_oom;  #endif  #ifdef CONFIG_UPROBES @@ -1413,16 +1462,33 @@ struct task_struct {  /* Future-safe accessor for struct task_struct's cpus_allowed. */  #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) +#define TNF_MIGRATED	0x01 +#define TNF_NO_GROUP	0x02 +#define TNF_SHARED	0x04 +#define TNF_FAULT_LOCAL	0x08 +  #ifdef CONFIG_NUMA_BALANCING -extern void task_numa_fault(int node, int pages, bool migrated); +extern void task_numa_fault(int last_node, int node, int pages, int flags); +extern pid_t task_numa_group_id(struct task_struct *p);  extern void set_numabalancing_state(bool enabled); +extern void task_numa_free(struct task_struct *p); + +extern unsigned int sysctl_numa_balancing_migrate_deferred;  #else -static inline void task_numa_fault(int node, int pages, bool migrated) +static inline void task_numa_fault(int last_node, int node, int pages, +				   int flags) +{ +} +static inline pid_t task_numa_group_id(struct task_struct *p)  { +	return 0;  }  static inline void set_numabalancing_state(bool enabled)  {  } +static inline void task_numa_free(struct task_struct *p) +{ +}  #endif  static inline struct pid *task_pid(struct task_struct *task) @@ -1975,7 +2041,7 @@ extern void wake_up_new_task(struct task_struct *tsk);  #else   static inline void kick_process(struct task_struct *tsk) { }  #endif -extern void sched_fork(struct task_struct *p); +extern void sched_fork(unsigned long clone_flags, struct task_struct *p);  extern void sched_dead(struct task_struct *p);  extern void proc_caches_init(void); @@ -2402,11 +2468,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)  	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);  } -static inline int need_resched(void) -{ -	return unlikely(test_thread_flag(TIF_NEED_RESCHED)); -} -  /*   * cond_resched() and cond_resched_lock(): latency reduction via   * explicit rescheduling in places that are safe. The return @@ -2475,36 +2536,105 @@ static inline int tsk_is_polling(struct task_struct *p)  {  	return task_thread_info(p)->status & TS_POLLING;  } -static inline void current_set_polling(void) +static inline void __current_set_polling(void)  {  	current_thread_info()->status |= TS_POLLING;  } -static inline void current_clr_polling(void) +static inline bool __must_check current_set_polling_and_test(void) +{ +	__current_set_polling(); + +	/* +	 * Polling state must be visible before we test NEED_RESCHED, +	 * paired by resched_task() +	 */ +	smp_mb(); + +	return unlikely(tif_need_resched()); +} + +static inline void __current_clr_polling(void)  {  	current_thread_info()->status &= ~TS_POLLING; -	smp_mb__after_clear_bit(); +} + +static inline bool __must_check current_clr_polling_and_test(void) +{ +	__current_clr_polling(); + +	/* +	 * Polling state must be visible before we test NEED_RESCHED, +	 * paired by resched_task() +	 */ +	smp_mb(); + +	return unlikely(tif_need_resched());  }  #elif defined(TIF_POLLING_NRFLAG)  static inline int tsk_is_polling(struct task_struct *p)  {  	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);  } -static inline void current_set_polling(void) + +static inline void __current_set_polling(void)  {  	set_thread_flag(TIF_POLLING_NRFLAG);  } -static inline void current_clr_polling(void) +static inline bool __must_check current_set_polling_and_test(void) +{ +	__current_set_polling(); + +	/* +	 * Polling state must be visible before we test NEED_RESCHED, +	 * paired by resched_task() +	 * +	 * XXX: assumes set/clear bit are identical barrier wise. +	 */ +	smp_mb__after_clear_bit(); + +	return unlikely(tif_need_resched()); +} + +static inline void __current_clr_polling(void)  {  	clear_thread_flag(TIF_POLLING_NRFLAG);  } + +static inline bool __must_check current_clr_polling_and_test(void) +{ +	__current_clr_polling(); + +	/* +	 * Polling state must be visible before we test NEED_RESCHED, +	 * paired by resched_task() +	 */ +	smp_mb__after_clear_bit(); + +	return unlikely(tif_need_resched()); +} +  #else  static inline int tsk_is_polling(struct task_struct *p) { return 0; } -static inline void current_set_polling(void) { } -static inline void current_clr_polling(void) { } +static inline void __current_set_polling(void) { } +static inline void __current_clr_polling(void) { } + +static inline bool __must_check current_set_polling_and_test(void) +{ +	return unlikely(tif_need_resched()); +} +static inline bool __must_check current_clr_polling_and_test(void) +{ +	return unlikely(tif_need_resched()); +}  #endif +static __always_inline bool need_resched(void) +{ +	return unlikely(tif_need_resched()); +} +  /*   * Thread group CPU time accounting.   */ @@ -2546,6 +2676,11 @@ static inline unsigned int task_cpu(const struct task_struct *p)  	return task_thread_info(p)->cpu;  } +static inline int task_node(const struct task_struct *p) +{ +	return cpu_to_node(task_cpu(p)); +} +  extern void set_task_cpu(struct task_struct *p, unsigned int cpu);  #else | 
