diff options
Diffstat (limited to 'mm/mempolicy.c')
| -rw-r--r-- | mm/mempolicy.c | 149 | 
1 files changed, 102 insertions, 47 deletions
| diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 04729647f359..c4403cdf3433 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -525,8 +525,9 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,  #ifdef CONFIG_HUGETLB_PAGE  	int nid;  	struct page *page; +	spinlock_t *ptl; -	spin_lock(&vma->vm_mm->page_table_lock); +	ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd);  	page = pte_page(huge_ptep_get((pte_t *)pmd));  	nid = page_to_nid(page);  	if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) @@ -536,7 +537,7 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,  	    (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))  		isolate_huge_page(page, private);  unlock: -	spin_unlock(&vma->vm_mm->page_table_lock); +	spin_unlock(ptl);  #else  	BUG();  #endif @@ -1125,7 +1126,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,  	tmp = *from;  	while (!nodes_empty(tmp)) {  		int s,d; -		int source = -1; +		int source = NUMA_NO_NODE;  		int dest = 0;  		for_each_node_mask(s, tmp) { @@ -1160,7 +1161,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,  			if (!node_isset(dest, tmp))  				break;  		} -		if (source == -1) +		if (source == NUMA_NO_NODE)  			break;  		node_clear(source, tmp); @@ -1679,6 +1680,30 @@ struct mempolicy *get_vma_policy(struct task_struct *task,  	return pol;  } +bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma) +{ +	struct mempolicy *pol = get_task_policy(task); +	if (vma) { +		if (vma->vm_ops && vma->vm_ops->get_policy) { +			bool ret = false; + +			pol = vma->vm_ops->get_policy(vma, vma->vm_start); +			if (pol && (pol->flags & MPOL_F_MOF)) +				ret = true; +			mpol_cond_put(pol); + +			return ret; +		} else if (vma->vm_policy) { +			pol = vma->vm_policy; +		} +	} + +	if (!pol) +		return default_policy.flags & MPOL_F_MOF; + +	return pol->flags & MPOL_F_MOF; +} +  static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone)  {  	enum zone_type dynamic_policy_zone = policy_zone; @@ -1811,7 +1836,7 @@ static unsigned offset_il_node(struct mempolicy *pol,  	unsigned nnodes = nodes_weight(pol->v.nodes);  	unsigned target;  	int c; -	int nid = -1; +	int nid = NUMA_NO_NODE;  	if (!nnodes)  		return numa_node_id(); @@ -1848,11 +1873,11 @@ static inline unsigned interleave_nid(struct mempolicy *pol,  /*   * Return the bit number of a random bit set in the nodemask. - * (returns -1 if nodemask is empty) + * (returns NUMA_NO_NODE if nodemask is empty)   */  int node_random(const nodemask_t *maskp)  { -	int w, bit = -1; +	int w, bit = NUMA_NO_NODE;  	w = nodes_weight(*maskp);  	if (w) @@ -2277,6 +2302,35 @@ static void sp_free(struct sp_node *n)  	kmem_cache_free(sn_cache, n);  } +#ifdef CONFIG_NUMA_BALANCING +static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) +{ +	/* Never defer a private fault */ +	if (cpupid_match_pid(p, last_cpupid)) +		return false; + +	if (p->numa_migrate_deferred) { +		p->numa_migrate_deferred--; +		return true; +	} +	return false; +} + +static inline void defer_numa_migrate(struct task_struct *p) +{ +	p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred; +} +#else +static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) +{ +	return false; +} + +static inline void defer_numa_migrate(struct task_struct *p) +{ +} +#endif /* CONFIG_NUMA_BALANCING */ +  /**   * mpol_misplaced - check whether current page node is valid in policy   * @@ -2300,6 +2354,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long  	struct zone *zone;  	int curnid = page_to_nid(page);  	unsigned long pgoff; +	int thiscpu = raw_smp_processor_id(); +	int thisnid = cpu_to_node(thiscpu);  	int polnid = -1;  	int ret = -1; @@ -2348,9 +2404,11 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long  	/* Migrate the page towards the node whose CPU is referencing it */  	if (pol->flags & MPOL_F_MORON) { -		int last_nid; +		int last_cpupid; +		int this_cpupid; -		polnid = numa_node_id(); +		polnid = thisnid; +		this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid);  		/*  		 * Multi-stage node selection is used in conjunction @@ -2373,8 +2431,25 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long  		 * it less likely we act on an unlikely task<->page  		 * relation.  		 */ -		last_nid = page_nid_xchg_last(page, polnid); -		if (last_nid != polnid) +		last_cpupid = page_cpupid_xchg_last(page, this_cpupid); +		if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) { + +			/* See sysctl_numa_balancing_migrate_deferred comment */ +			if (!cpupid_match_pid(current, last_cpupid)) +				defer_numa_migrate(current); + +			goto out; +		} + +		/* +		 * The quadratic filter above reduces extraneous migration +		 * of shared pages somewhat. This code reduces it even more, +		 * reducing the overhead of page migrations of shared pages. +		 * This makes workloads with shared pages rely more on +		 * "move task near its memory", and less on "move memory +		 * towards its task", which is exactly what we want. +		 */ +		if (numa_migrate_deferred(current, last_cpupid))  			goto out;  	} @@ -2840,62 +2915,45 @@ out:   * @maxlen:  length of @buffer   * @pol:  pointer to mempolicy to be formatted   * - * Convert a mempolicy into a string. - * Returns the number of characters in buffer (if positive) - * or an error (negative) + * Convert @pol into a string.  If @buffer is too short, truncate the string. + * Recommend a @maxlen of at least 32 for the longest mode, "interleave", the + * longest flag, "relative", and to display at least a few node ids.   */ -int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) +void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)  {  	char *p = buffer; -	int l; -	nodemask_t nodes; -	unsigned short mode; -	unsigned short flags = pol ? pol->flags : 0; +	nodemask_t nodes = NODE_MASK_NONE; +	unsigned short mode = MPOL_DEFAULT; +	unsigned short flags = 0; -	/* -	 * Sanity check:  room for longest mode, flag and some nodes -	 */ -	VM_BUG_ON(maxlen < strlen("interleave") + strlen("relative") + 16); - -	if (!pol || pol == &default_policy) -		mode = MPOL_DEFAULT; -	else +	if (pol && pol != &default_policy) {  		mode = pol->mode; +		flags = pol->flags; +	}  	switch (mode) {  	case MPOL_DEFAULT: -		nodes_clear(nodes);  		break; -  	case MPOL_PREFERRED: -		nodes_clear(nodes);  		if (flags & MPOL_F_LOCAL)  			mode = MPOL_LOCAL;  		else  			node_set(pol->v.preferred_node, nodes);  		break; -  	case MPOL_BIND: -		/* Fall through */  	case MPOL_INTERLEAVE:  		nodes = pol->v.nodes;  		break; -  	default: -		return -EINVAL; +		WARN_ON_ONCE(1); +		snprintf(p, maxlen, "unknown"); +		return;  	} -	l = strlen(policy_modes[mode]); -	if (buffer + maxlen < p + l + 1) -		return -ENOSPC; - -	strcpy(p, policy_modes[mode]); -	p += l; +	p += snprintf(p, maxlen, policy_modes[mode]);  	if (flags & MPOL_MODE_FLAGS) { -		if (buffer + maxlen < p + 2) -			return -ENOSPC; -		*p++ = '='; +		p += snprintf(p, buffer + maxlen - p, "=");  		/*  		 * Currently, the only defined flags are mutually exclusive @@ -2907,10 +2965,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)  	}  	if (!nodes_empty(nodes)) { -		if (buffer + maxlen < p + 2) -			return -ENOSPC; -		*p++ = ':'; +		p += snprintf(p, buffer + maxlen - p, ":");  	 	p += nodelist_scnprintf(p, buffer + maxlen - p, nodes);  	} -	return p - buffer;  } | 
