From e652f694598273c5d749687032d1534a30e6a3a5 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:23:25 -0700 Subject: mm: hugetlb: define system call hugetlb size encodings in single file Patch series "Consolidate system call hugetlb page size encodings". These patches are the result of discussions in https://lkml.org/lkml/2017/3/8/548. The following changes are made in the patch set: 1) Put all the log2 encoded huge page size definitions in a common header file. The idea is have a set of definitions that can be use as the basis for system call specific definitions such as MAP_HUGE_* and SHM_HUGE_*. 2) Remove MAP_HUGE_* definitions in arch specific files. All these definitions are the same. Consolidate all definitions in the primary user header file (uapi/linux/mman.h). 3) Remove SHM_HUGE_* definitions intended for user space from kernel header file, and add to user (uapi/linux/shm.h) header file. Add definitions for all known huge page size encodings as in mmap. This patch (of 3): If hugetlb pages are requested in mmap or shmget system calls, a huge page size other than default can be requested. This is accomplished by encoding the log2 of the huge page size in the upper bits of the flag argument. asm-generic and arch specific headers all define the same values for these encodings. Put common definitions in a single header file. The primary uapi header files for mmap and shm will use these definitions as a basis for definitions specific to those system calls. Link: http://lkml.kernel.org/r/1501527386-10736-2-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: Andi Kleen Cc: Michael Kerrisk Cc: Davidlohr Bueso Cc: Anshuman Khandual Cc: Aneesh Kumar K.V Cc: Andrea Arcangeli Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/hugetlb_encode.h | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/uapi/asm-generic/hugetlb_encode.h (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h new file mode 100644 index 000000000000..e4732d3c2998 --- /dev/null +++ b/include/uapi/asm-generic/hugetlb_encode.h @@ -0,0 +1,34 @@ +#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_ +#define _ASM_GENERIC_HUGETLB_ENCODE_H_ + +/* + * Several system calls take a flag to request "hugetlb" huge pages. + * Without further specification, these system calls will use the + * system's default huge page size. If a system supports multiple + * huge page sizes, the desired huge page size can be specified in + * bits [26:31] of the flag arguments. The value in these 6 bits + * will encode the log2 of the huge page size. + * + * The following definitions are associated with this huge page size + * encoding in flag arguments. System call specific header files + * that use this encoding should include this file. They can then + * provide definitions based on these with their own specific prefix. + * for example: + * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT + */ + +#define HUGETLB_FLAG_ENCODE_SHIFT 26 +#define HUGETLB_FLAG_ENCODE_MASK 0x3f + +#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) + +#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ -- cgit v1.2.3 From aafd4562dfee81a40ba21b5ea3cf5e06664bc7f6 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:23:29 -0700 Subject: mm: arch: consolidate mmap hugetlb size encodings A non-default huge page size can be encoded in the flags argument of the mmap system call. The definitions for these encodings are in arch specific header files. However, all architectures use the same values. Consolidate all the definitions in the primary user header file (uapi/linux/mman.h). Include definitions for all known huge page sizes. Use the generic encoding definitions in hugetlb_encode.h as the basis for these definitions. Link: http://lkml.kernel.org/r/1501527386-10736-3-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Davidlohr Bueso Cc: Matthew Wilcox Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/uapi/asm/mman.h | 11 ----------- arch/mips/include/uapi/asm/mman.h | 11 ----------- arch/parisc/include/uapi/asm/mman.h | 11 ----------- arch/powerpc/include/uapi/asm/mman.h | 16 ---------------- arch/x86/include/uapi/asm/mman.h | 3 --- arch/xtensa/include/uapi/asm/mman.h | 11 ----------- include/uapi/asm-generic/mman-common.h | 11 ----------- include/uapi/linux/mman.h | 22 ++++++++++++++++++++++ 8 files changed, 22 insertions(+), 74 deletions(-) (limited to 'include/uapi') diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 02760f6e6ca4..13b52aad3c43 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -67,17 +67,6 @@ /* compatibility flags */ #define MAP_FILE 0 -/* - * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 655e2fb5395b..398eebcc3541 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -94,17 +94,6 @@ /* compatibility flags */ #define MAP_FILE 0 -/* - * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 9a9c2fe4be50..b87fbe3f338a 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -64,17 +64,6 @@ #define MAP_FILE 0 #define MAP_VARIABLE 0 -/* - * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index ab45cc2f3101..03c06ba7464f 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -29,20 +29,4 @@ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ -/* - * When MAP_HUGETLB is set, bits [26:31] of the flags argument to mmap(2), - * encode the log2 of the huge page size. A value of zero indicates that the - * default huge page size should be used. To use a non-default huge page size, - * one of these defines can be used, or the size can be encoded by hand. Note - * that on most systems only a subset, or possibly none, of these sizes will be - * available. - */ -#define MAP_HUGE_512KB (19 << MAP_HUGE_SHIFT) /* 512KB HugeTLB Page */ -#define MAP_HUGE_1MB (20 << MAP_HUGE_SHIFT) /* 1MB HugeTLB Page */ -#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) /* 2MB HugeTLB Page */ -#define MAP_HUGE_8MB (23 << MAP_HUGE_SHIFT) /* 8MB HugeTLB Page */ -#define MAP_HUGE_16MB (24 << MAP_HUGE_SHIFT) /* 16MB HugeTLB Page */ -#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) /* 1GB HugeTLB Page */ -#define MAP_HUGE_16GB (34 << MAP_HUGE_SHIFT) /* 16GB HugeTLB Page */ - #endif /* _UAPI_ASM_POWERPC_MMAN_H */ diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h index 39bca7fac087..3be08f07695c 100644 --- a/arch/x86/include/uapi/asm/mman.h +++ b/arch/x86/include/uapi/asm/mman.h @@ -3,9 +3,6 @@ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ -#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) -#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS /* * Take the 4 protection key bits out of the vma->vm_flags diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 24365b30aae9..8ce77a2e9bab 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -106,17 +106,6 @@ /* compatibility flags */ #define MAP_FILE 0 -/* - * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 8c27db0c5c08..d248f3c335b5 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -61,17 +61,6 @@ /* compatibility flags */ #define MAP_FILE 0 -/* - * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index ade4acd3a90c..a937480d7cd3 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -2,6 +2,7 @@ #define _UAPI_LINUX_MMAN_H #include +#include #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 @@ -10,4 +11,25 @@ #define OVERCOMMIT_ALWAYS 1 #define OVERCOMMIT_NEVER 2 +/* + * Huge page size encoding when MAP_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB + #endif /* _UAPI_LINUX_MMAN_H */ -- cgit v1.2.3 From 4da243ac1cf6aeb30b7c555d56208982d66d6d33 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:23:33 -0700 Subject: mm: shm: use new hugetlb size encoding definitions Use the common definitions from hugetlb_encode.h header file for encoding hugetlb size definitions in shmget system call flags. In addition, move these definitions from the internal (kernel) to user (uapi) header file. Link: http://lkml.kernel.org/r/1501527386-10736-4-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Suggested-by: Matthew Wilcox Acked-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Davidlohr Bueso Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shm.h | 17 ----------------- include/uapi/linux/shm.h | 31 +++++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 19 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/shm.h b/include/linux/shm.h index 0fb7061ec54c..21a5e6c43385 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -27,23 +27,6 @@ struct shmid_kernel /* private to the kernel */ /* shm_mode upper byte flags */ #define SHM_DEST 01000 /* segment will be destroyed on last detach */ #define SHM_LOCKED 02000 /* segment will not be swapped */ -#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ -#define SHM_NORESERVE 010000 /* don't check for reservations */ - -/* Bits [26:31] are reserved */ - -/* - * When SHM_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define SHM_HUGE_SHIFT 26 -#define SHM_HUGE_MASK 0x3f -#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) -#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) #ifdef CONFIG_SYSVIPC struct sysv_shm { diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h index 1fbf24ea37fd..cf23c873719d 100644 --- a/include/uapi/linux/shm.h +++ b/include/uapi/linux/shm.h @@ -3,6 +3,7 @@ #include #include +#include #ifndef __KERNEL__ #include #endif @@ -40,11 +41,37 @@ struct shmid_ds { /* Include the definition of shmid64_ds and shminfo64 */ #include -/* permission flag for shmget */ +/* + * shmget() shmflg values. + */ +/* The bottom nine bits are the same as open(2) mode flags */ #define SHM_R 0400 /* or S_IRUGO from */ #define SHM_W 0200 /* or S_IWUGO from */ +/* Bits 9 & 10 are IPC_CREAT and IPC_EXCL */ +#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#define SHM_NORESERVE 010000 /* don't check for reservations */ + +/* + * Huge page size encoding when SHM_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h + */ +#define SHM_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define SHM_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define SHM_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define SHM_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define SHM_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB -/* mode for attach */ +/* + * shmat() shmflg values + */ #define SHM_RDONLY 010000 /* read-only access */ #define SHM_RND 020000 /* round attach address to SHMLBA boundary */ #define SHM_REMAP 040000 /* take-over region on attach */ -- cgit v1.2.3 From 2d6d6f5a09a96cc1fec7ed992b825e05f64cb50e Mon Sep 17 00:00:00 2001 From: Prakash Sangappa Date: Wed, 6 Sep 2017 16:23:39 -0700 Subject: mm: userfaultfd: add feature to request for a signal delivery In some cases, userfaultfd mechanism should just deliver a SIGBUS signal to the faulting process, instead of the page-fault event. Dealing with page-fault event using a monitor thread can be an overhead in these cases. For example applications like the database could use the signaling mechanism for robustness purpose. Database uses hugetlbfs for performance reason. Files on hugetlbfs filesystem are created and huge pages allocated using fallocate() API. Pages are deallocated/freed using fallocate() hole punching support. These files are mmapped and accessed by many processes as shared memory. The database keeps track of which offsets in the hugetlbfs file have pages allocated. Any access to mapped address over holes in the file, which can occur due to bugs in the application, is considered invalid and expect the process to simply receive a SIGBUS. However, currently when a hole in the file is accessed via the mapped address, kernel/mm attempts to automatically allocate a page at page fault time, resulting in implicitly filling the hole in the file. This may not be the desired behavior for applications like the database that want to explicitly manage page allocations of hugetlbfs files. Using userfaultfd mechanism with this support to get a signal, database application can prevent pages from being allocated implicitly when processes access mapped address over holes in the file. This patch adds UFFD_FEATURE_SIGBUS feature to userfaultfd mechnism to request for a SIGBUS signal. See following for previous discussion about the database requirement leading to this proposal as suggested by Andrea. http://www.spinics.net/lists/linux-mm/msg129224.html Link: http://lkml.kernel.org/r/1501552446-748335-2-git-send-email-prakash.sangappa@oracle.com Signed-off-by: Prakash Sangappa Reviewed-by: Mike Rapoport Reviewed-by: Andrea Arcangeli Cc: Mike Kravetz Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 3 +++ include/uapi/linux/userfaultfd.h | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 01a85e2660b8..5fd4d846691f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -370,6 +370,9 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP)); VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP)); + if (ctx->features & UFFD_FEATURE_SIGBUS) + goto out; + /* * If it's already released don't get it. This avoids to loop * in __get_user_pages if userfaultfd_release waits on the diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 3b059530dac9..d39d5db56771 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -23,7 +23,8 @@ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ - UFFD_FEATURE_MISSING_SHMEM) + UFFD_FEATURE_MISSING_SHMEM | \ + UFFD_FEATURE_SIGBUS) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -153,6 +154,12 @@ struct uffdio_api { * UFFD_FEATURE_MISSING_SHMEM works the same as * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem * (i.e. tmpfs and other shmem based APIs). + * + * UFFD_FEATURE_SIGBUS feature means no page-fault + * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead + * a SIGBUS signal will be sent to the faulting process. + * The application process can enable this behavior by adding + * it to uffdio_api.features. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -161,6 +168,7 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) +#define UFFD_FEATURE_SIGBUS (1<<7) __u64 features; __u64 ioctls; -- cgit v1.2.3 From 9d4ac934829ac58c5109c49e6dfe677300e5e652 Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Wed, 6 Sep 2017 16:23:56 -0700 Subject: userfaultfd: provide pid in userfault msg It could be useful for calculating downtime during postcopy live migration per vCPU. Side observer or application itself will be informed about proper task's sleep during userfaultfd processing. Process's thread id is being provided when user requeste it by setting UFFD_FEATURE_THREAD_ID bit into uffdio_api.features. Link: http://lkml.kernel.org/r/20170802165145.22628-6-aarcange@redhat.com Signed-off-by: Alexey Perevalov Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Maxime Coquelin Cc: Mike Kravetz Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 8 ++++++-- include/uapi/linux/userfaultfd.h | 10 +++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 5fd4d846691f..665bf7a930b2 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -178,7 +178,8 @@ static inline void msg_init(struct uffd_msg *msg) static inline struct uffd_msg userfault_msg(unsigned long address, unsigned int flags, - unsigned long reason) + unsigned long reason, + unsigned int features) { struct uffd_msg msg; msg_init(&msg); @@ -202,6 +203,8 @@ static inline struct uffd_msg userfault_msg(unsigned long address, * write protect fault. */ msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP; + if (features & UFFD_FEATURE_THREAD_ID) + msg.arg.pagefault.ptid = task_pid_vnr(current); return msg; } @@ -422,7 +425,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); uwq.wq.private = current; - uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); + uwq.msg = userfault_msg(vmf->address, vmf->flags, reason, + ctx->features); uwq.ctx = ctx; uwq.waken = false; diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index d39d5db56771..2b24c28d99a7 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -24,7 +24,8 @@ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ UFFD_FEATURE_MISSING_SHMEM | \ - UFFD_FEATURE_SIGBUS) + UFFD_FEATURE_SIGBUS | \ + UFFD_FEATURE_THREAD_ID) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -79,6 +80,7 @@ struct uffd_msg { struct { __u64 flags; __u64 address; + __u32 ptid; } pagefault; struct { @@ -158,8 +160,9 @@ struct uffdio_api { * UFFD_FEATURE_SIGBUS feature means no page-fault * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead * a SIGBUS signal will be sent to the faulting process. - * The application process can enable this behavior by adding - * it to uffdio_api.features. + * + * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will + * be returned, if feature is not requested 0 will be returned. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -169,6 +172,7 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) #define UFFD_FEATURE_SIGBUS (1<<7) +#define UFFD_FEATURE_THREAD_ID (1<<8) __u64 features; __u64 ioctls; -- cgit v1.2.3 From a36985d31a65d5c0559fb582719e32eaf0ccec3b Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 6 Sep 2017 16:23:59 -0700 Subject: userfaultfd: provide pid in userfault msg - add feat union No ABI change, but this will make it more explicit to software that ptid is only available if requested by passing UFFD_FEATURE_THREAD_ID to UFFDIO_API. The fact it's a union will also self document it shouldn't be taken for granted there's a tpid there. Link: http://lkml.kernel.org/r/20170802165145.22628-7-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Alexey Perevalov Cc: Maxime Coquelin Cc: Mike Kravetz Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 2 +- include/uapi/linux/userfaultfd.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 665bf7a930b2..5419e7da82ba 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -204,7 +204,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address, */ msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP; if (features & UFFD_FEATURE_THREAD_ID) - msg.arg.pagefault.ptid = task_pid_vnr(current); + msg.arg.pagefault.feat.ptid = task_pid_vnr(current); return msg; } diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 2b24c28d99a7..d6d1f65cb3c3 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -80,7 +80,9 @@ struct uffd_msg { struct { __u64 flags; __u64 address; - __u32 ptid; + union { + __u32 ptid; + } feat; } pagefault; struct { -- cgit v1.2.3 From 749df87bd7bee5a79cef073f5d032ddb2b211de8 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:24:16 -0700 Subject: mm/shmem: add hugetlbfs support to memfd_create() This patch came out of discussions in this e-mail thread: http://lkml.kernel.org/r/1499357846-7481-1-git-send-email-mike.kravetz%40oracle.com The Oracle JVM team is developing a new garbage collection model. This new model requires multiple mappings of the same anonymous memory. One straight forward way to accomplish this is with memfd_create. They can use the returned fd to create multiple mappings of the same memory. The JVM today has an option to use (static hugetlb) huge pages. If this option is specified, they would like to use the same garbage collection model requiring multiple mappings to the same memory. Using hugetlbfs, it is possible to explicitly mount a filesystem and specify file paths in order to get an fd that can be used for multiple mappings. However, this introduces additional system admin work and coordination. Ideally they would like to get a hugetlbfs fd without requiring explicit mounting of a filesystem. Today, mmap and shmget can make use of hugetlbfs without explicitly mounting a filesystem. The patch adds this functionality to memfd_create. Add a new flag MFD_HUGETLB to memfd_create() that will specify the file to be created resides in the hugetlbfs filesystem. This is the generic hugetlbfs filesystem not associated with any specific mount point. As with other system calls that request hugetlbfs backed pages, there is the ability to encode huge page size in the flag arguments. hugetlbfs does not support sealing operations, therefore specifying MFD_ALLOW_SEALING with MFD_HUGETLB will result in EINVAL. Of course, the memfd_man page would need updating if this type of functionality moves forward. Link: http://lkml.kernel.org/r/1502149672-7759-2-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/memfd.h | 24 ++++++++++++++++++++++++ mm/shmem.c | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h index 534e364bda92..7f3a722dbd72 100644 --- a/include/uapi/linux/memfd.h +++ b/include/uapi/linux/memfd.h @@ -1,8 +1,32 @@ #ifndef _UAPI_LINUX_MEMFD_H #define _UAPI_LINUX_MEMFD_H +#include + /* flags for memfd_create(2) (unsigned int) */ #define MFD_CLOEXEC 0x0001U #define MFD_ALLOW_SEALING 0x0002U +#define MFD_HUGETLB 0x0004U + +/* + * Huge page size encoding when MFD_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MFD_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MFD_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MFD_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MFD_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MFD_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB #endif /* _UAPI_LINUX_MEMFD_H */ diff --git a/mm/shmem.c b/mm/shmem.c index 64bdc91187f7..47179bbe9ee7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -34,6 +34,7 @@ #include #include #include +#include #include /* for arch/microblaze update_mmu_cache() */ @@ -3652,7 +3653,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) -#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING) +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) SYSCALL_DEFINE2(memfd_create, const char __user *, uname, @@ -3664,8 +3665,18 @@ SYSCALL_DEFINE2(memfd_create, char *name; long len; - if (flags & ~(unsigned int)MFD_ALL_FLAGS) - return -EINVAL; + if (!(flags & MFD_HUGETLB)) { + if (flags & ~(unsigned int)MFD_ALL_FLAGS) + return -EINVAL; + } else { + /* Sealing not supported in hugetlbfs (MFD_HUGETLB) */ + if (flags & MFD_ALLOW_SEALING) + return -EINVAL; + /* Allow huge page size encoding in flags. */ + if (flags & ~(unsigned int)(MFD_ALL_FLAGS | + (MFD_HUGE_MASK << MFD_HUGE_SHIFT))) + return -EINVAL; + } /* length includes terminating zero */ len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); @@ -3696,16 +3707,30 @@ SYSCALL_DEFINE2(memfd_create, goto err_name; } - file = shmem_file_setup(name, 0, VM_NORESERVE); + if (flags & MFD_HUGETLB) { + struct user_struct *user = NULL; + + file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user, + HUGETLB_ANONHUGE_INODE, + (flags >> MFD_HUGE_SHIFT) & + MFD_HUGE_MASK); + } else + file = shmem_file_setup(name, 0, VM_NORESERVE); if (IS_ERR(file)) { error = PTR_ERR(file); goto err_fd; } - info = SHMEM_I(file_inode(file)); file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; file->f_flags |= O_RDWR | O_LARGEFILE; - if (flags & MFD_ALLOW_SEALING) + + if (flags & MFD_ALLOW_SEALING) { + /* + * flags check at beginning of function ensures + * this is not a hugetlbfs (MFD_HUGETLB) file. + */ + info = SHMEM_I(file_inode(file)); info->seals &= ~F_SEAL_SEAL; + } fd_install(fd, file); kfree(name); -- cgit v1.2.3 From d2cd9ede6e193dd7d88b6d27399e96229a551b19 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 6 Sep 2017 16:25:15 -0700 Subject: mm,fork: introduce MADV_WIPEONFORK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce MADV_WIPEONFORK semantics, which result in a VMA being empty in the child process after fork. This differs from MADV_DONTFORK in one important way. If a child process accesses memory that was MADV_WIPEONFORK, it will get zeroes. The address ranges are still valid, they are just empty. If a child process accesses memory that was MADV_DONTFORK, it will get a segmentation fault, since those address ranges are no longer valid in the child after fork. Since MADV_DONTFORK also seems to be used to allow very large programs to fork in systems with strict memory overcommit restrictions, changing the semantics of MADV_DONTFORK might break existing programs. MADV_WIPEONFORK only works on private, anonymous VMAs. The use case is libraries that store or cache information, and want to know that they need to regenerate it in the child process after fork. Examples of this would be: - systemd/pulseaudio API checks (fail after fork) (replacing a getpid check, which is too slow without a PID cache) - PKCS#11 API reinitialization check (mandated by specification) - glibc's upcoming PRNG (reseed after fork) - OpenSSL PRNG (reseed after fork) The security benefits of a forking server having a re-inialized PRNG in every child process are pretty obvious. However, due to libraries having all kinds of internal state, and programs getting compiled with many different versions of each library, it is unreasonable to expect calling programs to re-initialize everything manually after fork. A further complication is the proliferation of clone flags, programs bypassing glibc's functions to call clone directly, and programs calling unshare, causing the glibc pthread_atfork hook to not get called. It would be better to have the kernel take care of this automatically. The patch also adds MADV_KEEPONFORK, to undo the effects of a prior MADV_WIPEONFORK. This is similar to the OpenBSD minherit syscall with MAP_INHERIT_ZERO: https://man.openbsd.org/minherit.2 [akpm@linux-foundation.org: numerically order arch/parisc/include/uapi/asm/mman.h #defines] Link: http://lkml.kernel.org/r/20170811212829.29186-3-riel@redhat.com Signed-off-by: Rik van Riel Reported-by: Florian Weimer Reported-by: Colm MacCártaigh Reviewed-by: Mike Kravetz Cc: "H. Peter Anvin" Cc: "Kirill A. Shutemov" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Helge Deller Cc: Kees Cook Cc: Matthew Wilcox Cc: Thomas Gleixner Cc: Will Drewry Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/uapi/asm/mman.h | 3 +++ arch/mips/include/uapi/asm/mman.h | 3 +++ arch/parisc/include/uapi/asm/mman.h | 3 +++ arch/xtensa/include/uapi/asm/mman.h | 3 +++ fs/proc/task_mmu.c | 1 + include/linux/mm.h | 2 +- include/trace/events/mmflags.h | 8 +------- include/uapi/asm-generic/mman-common.h | 3 +++ kernel/fork.c | 10 ++++++++-- mm/madvise.c | 13 +++++++++++++ 10 files changed, 39 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 13b52aad3c43..3b26cc62dadb 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -64,6 +64,9 @@ overrides the coredump filter bits */ #define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 398eebcc3541..da3216007fe0 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -91,6 +91,9 @@ overrides the coredump filter bits */ #define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index b87fbe3f338a..775b5d5e41a1 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -57,6 +57,9 @@ overrides the coredump filter bits */ #define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */ +#define MADV_WIPEONFORK 71 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 72 /* Undo MADV_WIPEONFORK */ + #define MADV_HWPOISON 100 /* poison a page for testing */ #define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */ diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 8ce77a2e9bab..b15b278aa314 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -103,6 +103,9 @@ overrides the coredump filter bits */ #define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b2330aedc63f..a290966f91ec 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -663,6 +663,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_NORESERVE)] = "nr", [ilog2(VM_HUGETLB)] = "ht", [ilog2(VM_ARCH_1)] = "ar", + [ilog2(VM_WIPEONFORK)] = "wf", [ilog2(VM_DONTDUMP)] = "dd", #ifdef CONFIG_MEM_SOFT_DIRTY [ilog2(VM_SOFTDIRTY)] = "sd", diff --git a/include/linux/mm.h b/include/linux/mm.h index 9efe62032094..39db8e54c5d5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -189,7 +189,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ -#define VM_ARCH_2 0x02000000 +#define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */ #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ #ifdef CONFIG_MEM_SOFT_DIRTY diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 8e50d01c645f..4c2e4737d7bc 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -125,12 +125,6 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) #define __VM_ARCH_SPECIFIC_1 {VM_ARCH_1, "arch_1" } #endif -#if defined(CONFIG_X86) -#define __VM_ARCH_SPECIFIC_2 {VM_MPX, "mpx" } -#else -#define __VM_ARCH_SPECIFIC_2 {VM_ARCH_2, "arch_2" } -#endif - #ifdef CONFIG_MEM_SOFT_DIRTY #define IF_HAVE_VM_SOFTDIRTY(flag,name) {flag, name }, #else @@ -162,7 +156,7 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) {VM_NORESERVE, "noreserve" }, \ {VM_HUGETLB, "hugetlb" }, \ __VM_ARCH_SPECIFIC_1 , \ - __VM_ARCH_SPECIFIC_2 , \ + {VM_WIPEONFORK, "wipeonfork" }, \ {VM_DONTDUMP, "dontdump" }, \ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ {VM_MIXEDMAP, "mixedmap" }, \ diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index d248f3c335b5..203268f9231e 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -58,6 +58,9 @@ overrides the coredump filter bits */ #define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */ +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/kernel/fork.c b/kernel/fork.c index 7ed64600da6c..24a4c0be80d5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -657,7 +657,12 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, retval = dup_userfaultfd(tmp, &uf); if (retval) goto fail_nomem_anon_vma_fork; - if (anon_vma_fork(tmp, mpnt)) + if (tmp->vm_flags & VM_WIPEONFORK) { + /* VM_WIPEONFORK gets a clean slate in the child. */ + tmp->anon_vma = NULL; + if (anon_vma_prepare(tmp)) + goto fail_nomem_anon_vma_fork; + } else if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); tmp->vm_next = tmp->vm_prev = NULL; @@ -701,7 +706,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, rb_parent = &tmp->vm_rb; mm->map_count++; - retval = copy_page_range(mm, oldmm, mpnt); + if (!(tmp->vm_flags & VM_WIPEONFORK)) + retval = copy_page_range(mm, oldmm, mpnt); if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); diff --git a/mm/madvise.c b/mm/madvise.c index 4d7d1e5ddba9..eea1c733286f 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -80,6 +80,17 @@ static long madvise_behavior(struct vm_area_struct *vma, } new_flags &= ~VM_DONTCOPY; break; + case MADV_WIPEONFORK: + /* MADV_WIPEONFORK is only supported on anonymous memory. */ + if (vma->vm_file || vma->vm_flags & VM_SHARED) { + error = -EINVAL; + goto out; + } + new_flags |= VM_WIPEONFORK; + break; + case MADV_KEEPONFORK: + new_flags &= ~VM_WIPEONFORK; + break; case MADV_DONTDUMP: new_flags |= VM_DONTDUMP; break; @@ -696,6 +707,8 @@ madvise_behavior_valid(int behavior) #endif case MADV_DONTDUMP: case MADV_DODUMP: + case MADV_WIPEONFORK: + case MADV_KEEPONFORK: #ifdef CONFIG_MEMORY_FAILURE case MADV_SOFT_OFFLINE: case MADV_HWPOISON: -- cgit v1.2.3