[PATCH] Fix vma corruption

It occurred to me that if vma and new_vma are one and the same, then vma_relink_file will not do a good job of linking it after itself - in that pretty unlikely case when move_page_tables fails. And more generally, whenever copy_vma's vma_merge succeeds, we have no guarantee that old vma comes before new_vma in the i_mmap lists, as we need to satisfy Rajesh's point: that ordering is only guaranteed in the newly allocated case. We have to abandon the ordering method when/if we move from lists to prio_trees, so this patch switches to the less glamorous use of i_shared_sem exclusion, as in my prio_tree mremap.
author: Hugh Dickins <hugh@veritas.com> 2004-04-17 03:50:14 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-04-17 03:50:14 -0700
commit: e4cf8264fa0bbc0b113442df03ceb9606d2ec428 (patch)
tree: db03c1a1a4b9160004bc9db3838ef063a763c70d
parent: 3ba9ac07443fbcf68424a12daeadae36d2bc8b23 (diff)
3 files changed, 26 insertions, 33 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 14dba1b26016..4fd3c76ac05f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -526,9 +526,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
 	struct rb_node **, struct rb_node *);
-extern struct vm_area_struct *copy_vma(struct vm_area_struct *,
+extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	unsigned long addr, unsigned long len, unsigned long pgoff);
-extern void vma_relink_file(struct vm_area_struct *, struct vm_area_struct *);
 extern void exit_mmap(struct mm_struct *);
 
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/mm/mmap.c b/mm/mmap.c
index eed4e083bca1..94ad97fb2b04 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1498,9 +1498,11 @@ void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
  * Copy the vma structure to a new location in the same mm,
  * prior to moving page table entries, to effect an mremap move.
  */
-struct vm_area_struct *copy_vma(struct vm_area_struct *vma,
+struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	unsigned long addr, unsigned long len, unsigned long pgoff)
 {
+	struct vm_area_struct *vma = *vmap;
+	unsigned long vma_start = vma->vm_start;
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *new_vma, *prev;
 	struct rb_node **rb_link, *rb_parent;
@@ -1508,7 +1510,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct *vma,
 	find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
 	new_vma = vma_merge(mm, prev, rb_parent, addr, addr + len,
 			vma->vm_flags, vma->vm_file, pgoff);
-	if (!new_vma) {
+	if (new_vma) {
+		/*
+		 * Source vma may have been merged into new_vma
+		 */
+		if (vma_start >= new_vma->vm_start &&
+		    vma_start < new_vma->vm_end)
+			*vmap = new_vma;
+	} else {
 		new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 		if (new_vma) {
 			*new_vma = *vma;
@@ -1525,24 +1534,3 @@ struct vm_area_struct *copy_vma(struct vm_area_struct *vma,
 	}
 	return new_vma;
 }
-
-/*
- * Position vma after prev in shared file list:
- * for mremap move error recovery racing against vmtruncate.
- */
-void vma_relink_file(struct vm_area_struct *vma, struct vm_area_struct *prev)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	struct address_space *mapping;
-
-	if (vma->vm_file) {
-		mapping = vma->vm_file->f_mapping;
-		if (mapping) {
-			down(&mapping->i_shared_sem);
-			spin_lock(&mm->page_table_lock);
-			list_move(&vma->shared, &prev->shared);
-			spin_unlock(&mm->page_table_lock);
-			up(&mapping->i_shared_sem);
-		}
-	}
-}
diff --git a/mm/mremap.c b/mm/mremap.c
index c355d4da4afe..4dc19b415000 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -169,6 +169,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 		unsigned long new_len, unsigned long new_addr)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct address_space *mapping = NULL;
 	struct vm_area_struct *new_vma;
 	unsigned long vm_flags = vma->vm_flags;
 	unsigned long new_pgoff;
@@ -184,30 +185,35 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 		return -ENOMEM;
 
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
-	new_vma = copy_vma(vma, new_addr, new_len, new_pgoff);
+	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
 	if (!new_vma)
 		return -ENOMEM;
 
+	if (vma->vm_file) {
+		/*
+		 * Subtle point from Rajesh Venkatasubramanian: before
+		 * moving file-based ptes, we must lock vmtruncate out,
+		 * since it might clean the dst vma before the src vma,
+		 * and we propagate stale pages into the dst afterward.
+		 */
+		mapping = vma->vm_file->f_mapping;
+		down(&mapping->i_shared_sem);
+	}
 	moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
 	if (moved_len < old_len) {
 		/*
 		 * On error, move entries back from new area to old,
 		 * which will succeed since page tables still there,
 		 * and then proceed to unmap new area instead of old.
-		 *
-		 * Subtle point from Rajesh Venkatasubramanian: before
-		 * moving file-based ptes, move new_vma before old vma
-		 * in the i_mmap or i_mmap_shared list, so when racing
-		 * against vmtruncate we cannot propagate pages to be
-		 * truncated back from new_vma into just cleaned old.
 		 */
-		vma_relink_file(vma, new_vma);
 		move_page_tables(new_vma, old_addr, new_addr, moved_len);
 		vma = new_vma;
 		old_len = new_len;
 		old_addr = new_addr;
 		new_addr = -ENOMEM;
 	}
+	if (mapping)
+		up(&mapping->i_shared_sem);
 
 	/* Conceal VM_ACCOUNT so old reservation is not undone */
 	if (vm_flags & VM_ACCOUNT) {
author	Hugh Dickins <hugh@veritas.com>	2004-04-17 03:50:14 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-04-17 03:50:14 -0700
commit	e4cf8264fa0bbc0b113442df03ceb9606d2ec428 (patch)
tree	db03c1a1a4b9160004bc9db3838ef063a763c70d
parent	3ba9ac07443fbcf68424a12daeadae36d2bc8b23 (diff)