From 3928d4f5ee37cdc523894f6e549e6aae521d8980 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 21 Jul 2018 13:48:51 -0700 Subject: mm: use helper functions for allocating and freeing vm_area structs The vm_area_struct is one of the most fundamental memory management objects, but the management of it is entirely open-coded evertwhere, ranging from allocation and freeing (using kmem_cache_[z]alloc and kmem_cache_free) to initializing all the fields. We want to unify this in order to end up having some unified initialization of the vmas, and the first step to this is to at least have basic allocation functions. Right now those functions are literally just wrappers around the kmem_cache_*() calls. This is a purely mechanical conversion: # new vma: kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL) -> vm_area_alloc() # copy old vma kmem_cache_alloc(vm_area_cachep, GFP_KERNEL) -> vm_area_dup(old) # free vma kmem_cache_free(vm_area_cachep, vma) -> vm_area_free(vma) to the point where the old vma passed in to the vm_area_dup() function isn't even used yet (because I've left all the old manual initialization alone). Signed-off-by: Linus Torvalds --- fs/exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 2d4e0075bd24..9bd83989ea25 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -290,7 +290,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) struct vm_area_struct *vma = NULL; struct mm_struct *mm = bprm->mm; - bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + bprm->vma = vma = vm_area_alloc(); if (!vma) return -ENOMEM; @@ -326,7 +326,7 @@ err: up_write(&mm->mmap_sem); err_free: bprm->vma = NULL; - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return err; } -- cgit v1.2.3 From 490fc053865c9cc40f1085ef8a5504f5341f79d2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 21 Jul 2018 15:24:03 -0700 Subject: mm: make vm_area_alloc() initialize core fields Like vm_area_dup(), it initializes the anon_vma_chain head, and the basic mm pointer. The rest of the fields end up being different for different users, although the plan is to also initialize the 'vm_ops' field to a dummy entry. Signed-off-by: Linus Torvalds --- arch/ia64/kernel/perfmon.c | 4 +--- arch/ia64/mm/init.c | 8 ++------ fs/exec.c | 4 +--- include/linux/mm.h | 2 +- kernel/fork.c | 10 ++++++++-- mm/mmap.c | 12 +++--------- mm/nommu.c | 3 +-- 7 files changed, 17 insertions(+), 26 deletions(-) (limited to 'fs/exec.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index e859246badca..46bff1661836 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2278,17 +2278,15 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ - vma = vm_area_alloc(); + vma = vm_area_alloc(mm); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; } - INIT_LIST_HEAD(&vma->anon_vma_chain); /* * partially initialize the vma for the sampling buffer */ - vma->vm_mm = mm; vma->vm_file = get_file(filp); vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 3f2321bffb72..bdb14a369137 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -114,10 +114,8 @@ ia64_init_addr_space (void) * the problem. When the process attempts to write to the register backing store * for the first time, it will get a SEGFAULT in this case. */ - vma = vm_area_alloc(); + vma = vm_area_alloc(current->mm); if (vma) { - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = current->mm; vma->vm_start = current->thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; @@ -133,10 +131,8 @@ ia64_init_addr_space (void) /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = vm_area_alloc(); + vma = vm_area_alloc(current->mm); if (vma) { - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = current->mm; vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | diff --git a/fs/exec.c b/fs/exec.c index 9bd83989ea25..72e961a62adb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -290,7 +290,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) struct vm_area_struct *vma = NULL; struct mm_struct *mm = bprm->mm; - bprm->vma = vma = vm_area_alloc(); + bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; @@ -298,7 +298,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) err = -EINTR; goto err_free; } - vma->vm_mm = mm; /* * Place the stack at the largest stack address the architecture @@ -311,7 +310,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - INIT_LIST_HEAD(&vma->anon_vma_chain); err = insert_vm_struct(mm, vma); if (err) diff --git a/include/linux/mm.h b/include/linux/mm.h index de2fd86c6154..d3a3842316b8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -155,7 +155,7 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, * mmap() functions). */ -struct vm_area_struct *vm_area_alloc(void); +struct vm_area_struct *vm_area_alloc(struct mm_struct *); struct vm_area_struct *vm_area_dup(struct vm_area_struct *); void vm_area_free(struct vm_area_struct *); diff --git a/kernel/fork.c b/kernel/fork.c index 67253e41bfb0..a191c05e757d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -308,9 +308,15 @@ static struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -struct vm_area_struct *vm_area_alloc(void) +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) { - return kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + + if (vma) { + vma->vm_mm = mm; + INIT_LIST_HEAD(&vma->anon_vma_chain); + } + return vma; } struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) diff --git a/mm/mmap.c b/mm/mmap.c index b0ed8ce1b67e..ff1944d8d458 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1729,19 +1729,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * specific mapper. the address has already been validated, but * not unmapped, but the maps are removed from the list. */ - vma = vm_area_alloc(); + vma = vm_area_alloc(mm); if (!vma) { error = -ENOMEM; goto unacct_error; } - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); if (file) { if (vm_flags & VM_DENYWRITE) { @@ -2979,14 +2977,12 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla /* * create a vma struct for an anonymous mapping */ - vma = vm_area_alloc(); + vma = vm_area_alloc(mm); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); return -ENOMEM; } - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; @@ -3343,12 +3339,10 @@ static struct vm_area_struct *__install_special_mapping( int ret; struct vm_area_struct *vma; - vma = vm_area_alloc(); + vma = vm_area_alloc(mm); if (unlikely(vma == NULL)) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; diff --git a/mm/nommu.c b/mm/nommu.c index c2560e9cc803..1d22fdbf7d7c 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1204,7 +1204,7 @@ unsigned long do_mmap(struct file *file, if (!region) goto error_getting_region; - vma = vm_area_alloc(); + vma = vm_area_alloc(current->mm); if (!vma) goto error_getting_vma; @@ -1212,7 +1212,6 @@ unsigned long do_mmap(struct file *file, region->vm_flags = vm_flags; region->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_flags = vm_flags; vma->vm_pgoff = pgoff; -- cgit v1.2.3 From bfd40eaff5abb9f62c8ef94ca13ed0d94a560f10 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 26 Jul 2018 16:37:35 -0700 Subject: mm: fix vma_is_anonymous() false-positives vma_is_anonymous() relies on ->vm_ops being NULL to detect anonymous VMA. This is unreliable as ->mmap may not set ->vm_ops. False-positive vma_is_anonymous() may lead to crashes: next ffff8801ce5e7040 prev ffff8801d20eca50 mm ffff88019c1e13c0 prot 27 anon_vma ffff88019680cdd8 vm_ops 0000000000000000 pgoff 0 file ffff8801b2ec2d00 private_data 0000000000000000 flags: 0xff(read|write|exec|shared|mayread|maywrite|mayexec|mayshare) ------------[ cut here ]------------ kernel BUG at mm/memory.c:1422! invalid opcode: 0000 [#1] SMP KASAN CPU: 0 PID: 18486 Comm: syz-executor3 Not tainted 4.18.0-rc3+ #136 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:zap_pmd_range mm/memory.c:1421 [inline] RIP: 0010:zap_pud_range mm/memory.c:1466 [inline] RIP: 0010:zap_p4d_range mm/memory.c:1487 [inline] RIP: 0010:unmap_page_range+0x1c18/0x2220 mm/memory.c:1508 Call Trace: unmap_single_vma+0x1a0/0x310 mm/memory.c:1553 zap_page_range_single+0x3cc/0x580 mm/memory.c:1644 unmap_mapping_range_vma mm/memory.c:2792 [inline] unmap_mapping_range_tree mm/memory.c:2813 [inline] unmap_mapping_pages+0x3a7/0x5b0 mm/memory.c:2845 unmap_mapping_range+0x48/0x60 mm/memory.c:2880 truncate_pagecache+0x54/0x90 mm/truncate.c:800 truncate_setsize+0x70/0xb0 mm/truncate.c:826 simple_setattr+0xe9/0x110 fs/libfs.c:409 notify_change+0xf13/0x10f0 fs/attr.c:335 do_truncate+0x1ac/0x2b0 fs/open.c:63 do_sys_ftruncate+0x492/0x560 fs/open.c:205 __do_sys_ftruncate fs/open.c:215 [inline] __se_sys_ftruncate fs/open.c:213 [inline] __x64_sys_ftruncate+0x59/0x80 fs/open.c:213 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reproducer: #include #include #include #include #include #include #include #include #include #include #include #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) #define COVER_SIZE (1024<<10) #define KCOV_TRACE_PC 0 #define KCOV_TRACE_CMP 1 int main(int argc, char **argv) { int fd; unsigned long *cover; system("mount -t debugfs none /sys/kernel/debug"); fd = open("/sys/kernel/debug/kcov", O_RDWR); ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE); cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); munmap(cover, COVER_SIZE * sizeof(unsigned long)); cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); memset(cover, 0, COVER_SIZE * sizeof(unsigned long)); ftruncate(fd, 3UL << 20); return 0; } This can be fixed by assigning anonymous VMAs own vm_ops and not relying on it being NULL. If ->mmap() failed to set ->vm_ops, mmap_region() will set it to dummy_vm_ops. This way we will have non-NULL ->vm_ops for all VMAs. Link: http://lkml.kernel.org/r/20180724121139.62570-4-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: syzbot+3f84280d52be9b7083cc@syzkaller.appspotmail.com Acked-by: Linus Torvalds Reviewed-by: Andrew Morton Cc: Dmitry Vyukov Cc: Oleg Nesterov Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/mem.c | 1 + fs/exec.c | 1 + include/linux/mm.h | 8 ++++++++ mm/mmap.c | 3 +++ mm/nommu.c | 2 ++ 5 files changed, 15 insertions(+) (limited to 'fs/exec.c') diff --git a/drivers/char/mem.c b/drivers/char/mem.c index ffeb60d3434c..df66a9dd0aae 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -708,6 +708,7 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma) #endif if (vma->vm_flags & VM_SHARED) return shmem_zero_setup(vma); + vma_set_anonymous(vma); return 0; } diff --git a/fs/exec.c b/fs/exec.c index 72e961a62adb..bdd0eacefdf5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -293,6 +293,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; + vma_set_anonymous(vma); if (down_write_killable(&mm->mmap_sem)) { err = -EINTR; diff --git a/include/linux/mm.h b/include/linux/mm.h index 31540f166987..7ba6d356d18f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -454,10 +454,18 @@ struct vm_operations_struct { static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { + static const struct vm_operations_struct dummy_vm_ops = {}; + vma->vm_mm = mm; + vma->vm_ops = &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); } +static inline void vma_set_anonymous(struct vm_area_struct *vma) +{ + vma->vm_ops = NULL; +} + struct mmu_gather; struct inode; diff --git a/mm/mmap.c b/mm/mmap.c index ff1944d8d458..17bbf4d3e24f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1778,6 +1778,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, error = shmem_zero_setup(vma); if (error) goto free_vma; + } else { + vma_set_anonymous(vma); } vma_link(mm, vma, prev, rb_link, rb_parent); @@ -2983,6 +2985,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla return -ENOMEM; } + vma_set_anonymous(vma); vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; diff --git a/mm/nommu.c b/mm/nommu.c index 1d22fdbf7d7c..9fc9e43335b6 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1145,6 +1145,8 @@ static int do_mmap_private(struct vm_area_struct *vma, if (ret < len) memset(base + ret, 0, len - ret); + } else { + vma_set_anonymous(vma); } return 0; -- cgit v1.2.3