diff options
Diffstat (limited to 'kernel/cpuset.c')
| -rw-r--r-- | kernel/cpuset.c | 83 | 
1 files changed, 57 insertions, 26 deletions
| diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 02a8ea5c9963..41989ab4db57 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -51,6 +51,7 @@  #include <linux/stat.h>  #include <linux/string.h>  #include <linux/time.h> +#include <linux/time64.h>  #include <linux/backing-dev.h>  #include <linux/sort.h> @@ -68,7 +69,7 @@ struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE;  struct fmeter {  	int cnt;		/* unprocessed events count */  	int val;		/* most recent output value */ -	time_t time;		/* clock (secs) when val computed */ +	time64_t time;		/* clock (secs) when val computed */  	spinlock_t lock;	/* guards read or write of above */  }; @@ -286,6 +287,8 @@ static struct cpuset top_cpuset = {  static DEFINE_MUTEX(cpuset_mutex);  static DEFINE_SPINLOCK(callback_lock); +static struct workqueue_struct *cpuset_migrate_mm_wq; +  /*   * CPU / memory hotplug is handled asynchronously.   */ @@ -971,31 +974,51 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,  }  /* - * cpuset_migrate_mm - * - *    Migrate memory region from one set of nodes to another. - * - *    Temporarilly set tasks mems_allowed to target nodes of migration, - *    so that the migration code can allocate pages on these nodes. - * - *    While the mm_struct we are migrating is typically from some - *    other task, the task_struct mems_allowed that we are hacking - *    is for our current task, which must allocate new pages for that - *    migrating memory region. + * Migrate memory region from one set of nodes to another.  This is + * performed asynchronously as it can be called from process migration path + * holding locks involved in process management.  All mm migrations are + * performed in the queued order and can be waited for by flushing + * cpuset_migrate_mm_wq.   */ +struct cpuset_migrate_mm_work { +	struct work_struct	work; +	struct mm_struct	*mm; +	nodemask_t		from; +	nodemask_t		to; +}; + +static void cpuset_migrate_mm_workfn(struct work_struct *work) +{ +	struct cpuset_migrate_mm_work *mwork = +		container_of(work, struct cpuset_migrate_mm_work, work); + +	/* on a wq worker, no need to worry about %current's mems_allowed */ +	do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); +	mmput(mwork->mm); +	kfree(mwork); +} +  static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,  							const nodemask_t *to)  { -	struct task_struct *tsk = current; - -	tsk->mems_allowed = *to; +	struct cpuset_migrate_mm_work *mwork; -	do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); +	mwork = kzalloc(sizeof(*mwork), GFP_KERNEL); +	if (mwork) { +		mwork->mm = mm; +		mwork->from = *from; +		mwork->to = *to; +		INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); +		queue_work(cpuset_migrate_mm_wq, &mwork->work); +	} else { +		mmput(mm); +	} +} -	rcu_read_lock(); -	guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); -	rcu_read_unlock(); +void cpuset_post_attach_flush(void) +{ +	flush_workqueue(cpuset_migrate_mm_wq);  }  /* @@ -1096,7 +1119,8 @@ static void update_tasks_nodemask(struct cpuset *cs)  		mpol_rebind_mm(mm, &cs->mems_allowed);  		if (migrate)  			cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); -		mmput(mm); +		else +			mmput(mm);  	}  	css_task_iter_end(&it); @@ -1374,7 +1398,7 @@ out:   */  #define FM_COEF 933		/* coefficient for half-life of 10 secs */ -#define FM_MAXTICKS ((time_t)99) /* useless computing more ticks than this */ +#define FM_MAXTICKS ((u32)99)   /* useless computing more ticks than this */  #define FM_MAXCNT 1000000	/* limit cnt to avoid overflow */  #define FM_SCALE 1000		/* faux fixed point scale */ @@ -1390,8 +1414,11 @@ static void fmeter_init(struct fmeter *fmp)  /* Internal meter update - process cnt events and update value */  static void fmeter_update(struct fmeter *fmp)  { -	time_t now = get_seconds(); -	time_t ticks = now - fmp->time; +	time64_t now; +	u32 ticks; + +	now = ktime_get_seconds(); +	ticks = now - fmp->time;  	if (ticks == 0)  		return; @@ -1541,11 +1568,11 @@ static void cpuset_attach(struct cgroup_taskset *tset)  			 * @old_mems_allowed is the right nodesets that we  			 * migrate mm from.  			 */ -			if (is_memory_migrate(cs)) { +			if (is_memory_migrate(cs))  				cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,  						  &cpuset_attach_nodemask_to); -			} -			mmput(mm); +			else +				mmput(mm);  		}  	} @@ -1710,6 +1737,7 @@ out_unlock:  	mutex_unlock(&cpuset_mutex);  	kernfs_unbreak_active_protection(of->kn);  	css_put(&cs->css); +	flush_workqueue(cpuset_migrate_mm_wq);  	return retval ?: nbytes;  } @@ -2355,6 +2383,9 @@ void __init cpuset_init_smp(void)  	top_cpuset.effective_mems = node_states[N_MEMORY];  	register_hotmemory_notifier(&cpuset_track_online_nodes_nb); + +	cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0); +	BUG_ON(!cpuset_migrate_mm_wq);  }  /** | 
