diff options
Diffstat (limited to 'kernel/sched/fair.c')
| -rw-r--r-- | kernel/sched/fair.c | 121 | 
1 files changed, 81 insertions, 40 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 79f574dba096..1866e64792a7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1139,6 +1139,47 @@ static unsigned int task_scan_max(struct task_struct *p)  	return max(smin, smax);  } +void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) +{ +	int mm_users = 0; +	struct mm_struct *mm = p->mm; + +	if (mm) { +		mm_users = atomic_read(&mm->mm_users); +		if (mm_users == 1) { +			mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay); +			mm->numa_scan_seq = 0; +		} +	} +	p->node_stamp			= 0; +	p->numa_scan_seq		= mm ? mm->numa_scan_seq : 0; +	p->numa_scan_period		= sysctl_numa_balancing_scan_delay; +	p->numa_work.next		= &p->numa_work; +	p->numa_faults			= NULL; +	p->numa_group			= NULL; +	p->last_task_numa_placement	= 0; +	p->last_sum_exec_runtime	= 0; + +	/* New address space, reset the preferred nid */ +	if (!(clone_flags & CLONE_VM)) { +		p->numa_preferred_nid = -1; +		return; +	} + +	/* +	 * New thread, keep existing numa_preferred_nid which should be copied +	 * already by arch_dup_task_struct but stagger when scans start. +	 */ +	if (mm) { +		unsigned int delay; + +		delay = min_t(unsigned int, task_scan_max(current), +			current->numa_scan_period * mm_users * NSEC_PER_MSEC); +		delay += 2 * TICK_NSEC; +		p->node_stamp = delay; +	} +} +  static void account_numa_enqueue(struct rq *rq, struct task_struct *p)  {  	rq->nr_numa_running += (p->numa_preferred_nid != -1); @@ -5345,6 +5386,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)  	struct sched_entity *se = &p->se;  	/* +	 * The code below (indirectly) updates schedutil which looks at +	 * the cfs_rq utilization to select a frequency. +	 * Let's add the task's estimated utilization to the cfs_rq's +	 * estimated utilization, before we update schedutil. +	 */ +	util_est_enqueue(&rq->cfs, p); + +	/*  	 * If in_iowait is set, the code below may not trigger any cpufreq  	 * utilization updates, so do it here explicitly with the IOWAIT flag  	 * passed. @@ -5385,7 +5434,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)  	if (!se)  		add_nr_running(rq, 1); -	util_est_enqueue(&rq->cfs, p);  	hrtick_update(rq);  } @@ -5858,8 +5906,8 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)  	 * a cpufreq perspective, it's better to have higher utilisation  	 * on one CPU.  	 */ -	if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu)) -		return idle_cpu(prev_cpu) ? prev_cpu : this_cpu; +	if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu)) +		return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;  	if (sync && cpu_rq(this_cpu)->nr_running == 1)  		return this_cpu; @@ -6102,7 +6150,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this  	/* Traverse only the allowed CPUs */  	for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { -		if (idle_cpu(i)) { +		if (available_idle_cpu(i)) {  			struct rq *rq = cpu_rq(i);  			struct cpuidle_state *idle = idle_get_state(rq);  			if (idle && idle->exit_latency < min_exit_latency) { @@ -6144,6 +6192,13 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p  	if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))  		return prev_cpu; +	/* +	 * We need task's util for capacity_spare_wake, sync it up to prev_cpu's +	 * last_update_time. +	 */ +	if (!(sd_flag & SD_BALANCE_FORK)) +		sync_entity_load_avg(&p->se); +  	while (sd) {  		struct sched_group *group;  		struct sched_domain *tmp; @@ -6224,7 +6279,7 @@ void __update_idle_core(struct rq *rq)  		if (cpu == core)  			continue; -		if (!idle_cpu(cpu)) +		if (!available_idle_cpu(cpu))  			goto unlock;  	} @@ -6256,7 +6311,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int  		for_each_cpu(cpu, cpu_smt_mask(core)) {  			cpumask_clear_cpu(cpu, cpus); -			if (!idle_cpu(cpu)) +			if (!available_idle_cpu(cpu))  				idle = false;  		} @@ -6285,7 +6340,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t  	for_each_cpu(cpu, cpu_smt_mask(target)) {  		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))  			continue; -		if (idle_cpu(cpu)) +		if (available_idle_cpu(cpu))  			return cpu;  	} @@ -6348,7 +6403,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t  			return -1;  		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))  			continue; -		if (idle_cpu(cpu)) +		if (available_idle_cpu(cpu))  			break;  	} @@ -6368,13 +6423,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)  	struct sched_domain *sd;  	int i, recent_used_cpu; -	if (idle_cpu(target)) +	if (available_idle_cpu(target))  		return target;  	/*  	 * If the previous CPU is cache affine and idle, don't be stupid:  	 */ -	if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) +	if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev))  		return prev;  	/* Check a recently used CPU as a potential idle candidate: */ @@ -6382,7 +6437,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)  	if (recent_used_cpu != prev &&  	    recent_used_cpu != target &&  	    cpus_share_cache(recent_used_cpu, target) && -	    idle_cpu(recent_used_cpu) && +	    available_idle_cpu(recent_used_cpu) &&  	    cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {  		/*  		 * Replace recent_used_cpu with prev as it is a potential @@ -6558,7 +6613,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)  static int  select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)  { -	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; +	struct sched_domain *tmp, *sd = NULL;  	int cpu = smp_processor_id();  	int new_cpu = prev_cpu;  	int want_affine = 0; @@ -6581,7 +6636,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f  		 */  		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&  		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { -			affine_sd = tmp; +			if (cpu != prev_cpu) +				new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync); + +			sd = NULL; /* Prefer wake_affine over balance flags */  			break;  		} @@ -6591,33 +6649,16 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f  			break;  	} -	if (affine_sd) { -		sd = NULL; /* Prefer wake_affine over balance flags */ -		if (cpu == prev_cpu) -			goto pick_cpu; - -		new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync); -	} - -	if (sd && !(sd_flag & SD_BALANCE_FORK)) { -		/* -		 * We're going to need the task's util for capacity_spare_wake -		 * in find_idlest_group. Sync it up to prev_cpu's -		 * last_update_time. -		 */ -		sync_entity_load_avg(&p->se); -	} +	if (unlikely(sd)) { +		/* Slow path */ +		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); +	} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */ +		/* Fast path */ -	if (!sd) { -pick_cpu: -		if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */ -			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); +		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); -			if (want_affine) -				current->recent_used_cpu = cpu; -		} -	} else { -		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); +		if (want_affine) +			current->recent_used_cpu = cpu;  	}  	rcu_read_unlock(); @@ -10174,10 +10215,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)  	struct cfs_rq *cfs_rq;  	int i; -	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); +	tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL);  	if (!tg->cfs_rq)  		goto err; -	tg->se = kzalloc(sizeof(se) * nr_cpu_ids, GFP_KERNEL); +	tg->se = kcalloc(nr_cpu_ids, sizeof(se), GFP_KERNEL);  	if (!tg->se)  		goto err;  | 
