summaryrefslogtreecommitdiff
path: root/tools/testing/vma/vma_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/vma/vma_internal.h')
-rw-r--r--tools/testing/vma/vma_internal.h603
1 files changed, 528 insertions, 75 deletions
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index dc976a285ad2..9f0a9f5ed0fe 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -46,42 +46,272 @@ extern unsigned long dac_mmap_min_addr;
#define MMF_HAS_MDWE 28
+/*
+ * vm_flags in vm_area_struct, see mm_types.h.
+ * When changing, update also include/trace/events/mmflags.h
+ */
+
#define VM_NONE 0x00000000
-#define VM_READ 0x00000001
-#define VM_WRITE 0x00000002
-#define VM_EXEC 0x00000004
-#define VM_SHARED 0x00000008
-#define VM_MAYREAD 0x00000010
-#define VM_MAYWRITE 0x00000020
-#define VM_MAYEXEC 0x00000040
-#define VM_GROWSDOWN 0x00000100
-#define VM_PFNMAP 0x00000400
-#define VM_LOCKED 0x00002000
-#define VM_IO 0x00004000
-#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
-#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
-#define VM_DONTEXPAND 0x00040000
-#define VM_LOCKONFAULT 0x00080000
-#define VM_ACCOUNT 0x00100000
-#define VM_NORESERVE 0x00200000
-#define VM_MIXEDMAP 0x10000000
-#define VM_STACK VM_GROWSDOWN
-#define VM_SHADOW_STACK VM_NONE
-#define VM_SOFTDIRTY 0
-#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
-#define VM_GROWSUP VM_NONE
-#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+/**
+ * typedef vma_flag_t - specifies an individual VMA flag by bit number.
+ *
+ * This value is made type safe by sparse to avoid passing invalid flag values
+ * around.
+ */
+typedef int __bitwise vma_flag_t;
+#define DECLARE_VMA_BIT(name, bitnum) \
+ VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
+#define DECLARE_VMA_BIT_ALIAS(name, aliased) \
+ VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
+enum {
+ DECLARE_VMA_BIT(READ, 0),
+ DECLARE_VMA_BIT(WRITE, 1),
+ DECLARE_VMA_BIT(EXEC, 2),
+ DECLARE_VMA_BIT(SHARED, 3),
+ /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
+ DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */
+ DECLARE_VMA_BIT(MAYWRITE, 5),
+ DECLARE_VMA_BIT(MAYEXEC, 6),
+ DECLARE_VMA_BIT(MAYSHARE, 7),
+ DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */
+#ifdef CONFIG_MMU
+ DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
+#else
+ /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
+ DECLARE_VMA_BIT(MAYOVERLAY, 9),
+#endif /* CONFIG_MMU */
+ /* Page-ranges managed without "struct page", just pure PFN */
+ DECLARE_VMA_BIT(PFNMAP, 10),
+ DECLARE_VMA_BIT(MAYBE_GUARD, 11),
+ DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */
+ DECLARE_VMA_BIT(LOCKED, 13),
+ DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */
+ DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */
+ DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */
+ DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */
+ DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
+ DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
+ DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */
+ DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */
+ DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */
+ DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */
+ DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */
+ DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
+ DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */
+ DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */
+ DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */
+ DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */
+ DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
+ DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */
+ /* These bits are reused, we define specific uses below. */
+ DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
+ DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
+ DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
+ DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
+ DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
+ DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
+ DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
+ /*
+ * This flag is used to connect VFIO to arch specific KVM code. It
+ * indicates that the memory under this VMA is safe for use with any
+ * non-cachable memory type inside KVM. Some VFIO devices, on some
+ * platforms, are thought to be unsafe and can cause machine crashes
+ * if KVM does not lock down the memory type.
+ */
+ DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
+#ifdef CONFIG_PPC32
+ DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
+#else
+ DECLARE_VMA_BIT(DROPPABLE, 40),
+#endif
+ DECLARE_VMA_BIT(UFFD_MINOR, 41),
+ DECLARE_VMA_BIT(SEALED, 42),
+ /* Flags that reuse flags above. */
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
+#if defined(CONFIG_X86_USER_SHADOW_STACK)
+ /*
+ * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
+ * support core mm.
+ *
+ * These VMAs will get a single end guard page. This helps userspace
+ * protect itself from attacks. A single page is enough for current
+ * shadow stack archs (x86). See the comments near alloc_shstk() in
+ * arch/x86/kernel/shstk.c for more details on the guard size.
+ */
+ DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
+#elif defined(CONFIG_ARM64_GCS)
+ /*
+ * arm64's Guarded Control Stack implements similar functionality and
+ * has similar constraints to shadow stacks.
+ */
+ DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
+#endif
+ DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */
+ DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */
+ DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */
+ DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */
+ DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */
+ DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */
+ DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */
+ DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
#ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK VM_GROWSUP
-#define VM_STACK_EARLY VM_GROWSDOWN
+ DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
+ DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
+#else
+ DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
+#endif
+};
+
+#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
+#define VM_READ INIT_VM_FLAG(READ)
+#define VM_WRITE INIT_VM_FLAG(WRITE)
+#define VM_EXEC INIT_VM_FLAG(EXEC)
+#define VM_SHARED INIT_VM_FLAG(SHARED)
+#define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
+#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
+#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
+#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
+#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
+#ifdef CONFIG_MMU
+#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
+#else
+#define VM_UFFD_MISSING VM_NONE
+#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
+#endif
+#define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
+#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
+#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
+#define VM_LOCKED INIT_VM_FLAG(LOCKED)
+#define VM_IO INIT_VM_FLAG(IO)
+#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
+#define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
+#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
+#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
+#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
+#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
+#define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
+#define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
+#define VM_SYNC INIT_VM_FLAG(SYNC)
+#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
+#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
+#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
+#else
+#define VM_SOFTDIRTY VM_NONE
+#endif
+#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
+#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
+#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
+#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
+#define VM_STACK INIT_VM_FLAG(STACK)
+#ifdef CONFIG_STACK_GROWS_UP
+#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
+#else
+#define VM_STACK_EARLY VM_NONE
+#endif
+#ifdef CONFIG_ARCH_HAS_PKEYS
+#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
+/* Despite the naming, these are FLAGS not bits. */
+#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
+#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
+#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
+#if CONFIG_ARCH_PKEY_BITS > 3
+#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
+#else
+#define VM_PKEY_BIT3 VM_NONE
+#endif /* CONFIG_ARCH_PKEY_BITS > 3 */
+#if CONFIG_ARCH_PKEY_BITS > 4
+#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
+#else
+#define VM_PKEY_BIT4 VM_NONE
+#endif /* CONFIG_ARCH_PKEY_BITS > 4 */
+#endif /* CONFIG_ARCH_HAS_PKEYS */
+#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
+#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
+#else
+#define VM_SHADOW_STACK VM_NONE
+#endif
+#if defined(CONFIG_PPC64)
+#define VM_SAO INIT_VM_FLAG(SAO)
+#elif defined(CONFIG_PARISC)
+#define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
+#elif defined(CONFIG_SPARC64)
+#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
+#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
+#elif defined(CONFIG_ARM64)
+#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
+#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
+#elif !defined(CONFIG_MMU)
+#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
+#endif
+#ifndef VM_GROWSUP
+#define VM_GROWSUP VM_NONE
+#endif
+#ifdef CONFIG_ARM64_MTE
+#define VM_MTE INIT_VM_FLAG(MTE)
+#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
+#else
+#define VM_MTE VM_NONE
+#define VM_MTE_ALLOWED VM_NONE
+#endif
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
#else
-#define VM_STACK VM_GROWSDOWN
-#define VM_STACK_EARLY 0
+#define VM_UFFD_MINOR VM_NONE
+#endif
+#ifdef CONFIG_64BIT
+#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
+#define VM_SEALED INIT_VM_FLAG(SEALED)
+#else
+#define VM_ALLOW_ANY_UNCACHED VM_NONE
+#define VM_SEALED VM_NONE
+#endif
+#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
+#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
+#else
+#define VM_DROPPABLE VM_NONE
+#endif
+
+/* Bits set in the VMA until the stack is in its final location */
+#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
+
+#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+
+/* Common data flag combinations */
+#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
+ VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
+#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
+#endif
+
+#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
+#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
+/* VMA basic access permission flags */
+#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
+
+/*
+ * Special vmas that are non-mergable, non-mlock()able.
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
@@ -96,25 +326,58 @@ extern unsigned long dac_mmap_min_addr;
#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
-
-#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
-
-#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
-#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
-#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
-
#define RLIMIT_STACK 3 /* max stack size */
#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
#define CAP_IPC_LOCK 14
-#ifdef CONFIG_64BIT
-#define VM_SEALED_BIT 42
-#define VM_SEALED BIT(VM_SEALED_BIT)
-#else
-#define VM_SEALED VM_NONE
-#endif
+/*
+ * Flags which should be 'sticky' on merge - that is, flags which, when one VMA
+ * possesses it but the other does not, the merged VMA should nonetheless have
+ * applied to it:
+ *
+ * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
+ * references cleared via /proc/$pid/clear_refs, any merged VMA
+ * should be considered soft-dirty also as it operates at a VMA
+ * granularity.
+ */
+#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
+
+/*
+ * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
+ * of these flags and the other not does not preclude a merge.
+ *
+ * VM_STICKY - When merging VMAs, VMA flags must match, unless they are
+ * 'sticky'. If any sticky flags exist in either VMA, we simply
+ * set all of them on the merged VMA.
+ */
+#define VM_IGNORE_MERGE VM_STICKY
+
+/*
+ * Flags which should result in page tables being copied on fork. These are
+ * flags which indicate that the VMA maps page tables which cannot be
+ * reconsistuted upon page fault, so necessitate page table copying upon
+ *
+ * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
+ * reasonably reconstructed on page fault.
+ *
+ * VM_UFFD_WP - Encodes metadata about an installed uffd
+ * write protect handler, which cannot be
+ * reconstructed on page fault.
+ *
+ * We always copy pgtables when dst_vma has uffd-wp
+ * enabled even if it's file-backed
+ * (e.g. shmem). Because when uffd-wp is enabled,
+ * pgtable contains uffd-wp protection information,
+ * that's something we can't retrieve from page cache,
+ * and skip copying will lose those info.
+ *
+ * VM_MAYBE_GUARD - Could contain page guard region markers which
+ * by design are a property of the page tables
+ * only and thus cannot be reconstructed on page
+ * fault.
+ */
+#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
#define FIRST_USER_ADDRESS 0UL
#define USER_PGTABLES_CEILING 0UL
@@ -163,6 +426,8 @@ typedef __bitwise unsigned int vm_fault_t;
#define ASSERT_EXCLUSIVE_WRITER(x)
+#define pgtable_supports_soft_dirty() 1
+
/**
* swap - swap values of @a and @b
* @a: first value
@@ -259,6 +524,15 @@ typedef struct {
__private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS);
} mm_flags_t;
+/*
+ * Opaque type representing current VMA (vm_area_struct) flag state. Must be
+ * accessed via vma_flags_xxx() helper functions.
+ */
+#define NUM_VMA_FLAG_BITS BITS_PER_LONG
+typedef struct {
+ DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS);
+} __private vma_flags_t;
+
struct mm_struct {
struct maple_tree mm_mt;
int map_count; /* number of VMAs */
@@ -275,6 +549,57 @@ struct mm_struct {
struct vm_area_struct;
+
+/* What action should be taken after an .mmap_prepare call is complete? */
+enum mmap_action_type {
+ MMAP_NOTHING, /* Mapping is complete, no further action. */
+ MMAP_REMAP_PFN, /* Remap PFN range. */
+ MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
+};
+
+/*
+ * Describes an action an mmap_prepare hook can instruct to be taken to complete
+ * the mapping of a VMA. Specified in vm_area_desc.
+ */
+struct mmap_action {
+ union {
+ /* Remap range. */
+ struct {
+ unsigned long start;
+ unsigned long start_pfn;
+ unsigned long size;
+ pgprot_t pgprot;
+ } remap;
+ };
+ enum mmap_action_type type;
+
+ /*
+ * If specified, this hook is invoked after the selected action has been
+ * successfully completed. Note that the VMA write lock still held.
+ *
+ * The absolute minimum ought to be done here.
+ *
+ * Returns 0 on success, or an error code.
+ */
+ int (*success_hook)(const struct vm_area_struct *vma);
+
+ /*
+ * If specified, this hook is invoked when an error occurred when
+ * attempting the selection action.
+ *
+ * The hook can return an error code in order to filter the error, but
+ * it is not valid to clear the error here.
+ */
+ int (*error_hook)(int err);
+
+ /*
+ * This should be set in rare instances where the operation required
+ * that the rmap should not be able to access the VMA until
+ * completely set up.
+ */
+ bool hide_from_rmap_until_complete :1;
+};
+
/*
* Describes a VMA that is about to be mmap()'ed. Drivers may choose to
* manipulate mutable fields which will cause those fields to be updated in the
@@ -292,12 +617,18 @@ struct vm_area_desc {
/* Mutable fields. Populated with initial state. */
pgoff_t pgoff;
struct file *vm_file;
- vm_flags_t vm_flags;
+ union {
+ vm_flags_t vm_flags;
+ vma_flags_t vma_flags;
+ };
pgprot_t page_prot;
/* Write-only fields. */
const struct vm_operations_struct *vm_ops;
void *private_data;
+
+ /* Take further action? */
+ struct mmap_action action;
};
struct file_operations {
@@ -335,7 +666,7 @@ struct vm_area_struct {
*/
union {
const vm_flags_t vm_flags;
- vm_flags_t __private __vm_flags;
+ vma_flags_t flags;
};
#ifdef CONFIG_PER_VMA_LOCK
@@ -794,8 +1125,7 @@ static inline void update_hiwater_vm(struct mm_struct *mm)
static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long start_addr,
- unsigned long end_addr, unsigned long tree_end,
- bool mm_wr_locked)
+ unsigned long end_addr, unsigned long tree_end)
{
}
@@ -844,6 +1174,14 @@ static inline void vma_start_write(struct vm_area_struct *vma)
vma->vm_lock_seq++;
}
+static inline __must_check
+int vma_start_write_killable(struct vm_area_struct *vma)
+{
+ /* Used to indicate to tests that a write operation has begun. */
+ vma->vm_lock_seq++;
+ return 0;
+}
+
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
@@ -1042,26 +1380,6 @@ static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
return true;
}
-static inline void vm_flags_init(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma->__vm_flags = flags;
-}
-
-static inline void vm_flags_set(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_start_write(vma);
- vma->__vm_flags |= flags;
-}
-
-static inline void vm_flags_clear(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_start_write(vma);
- vma->__vm_flags &= ~flags;
-}
-
static inline int shmem_zero_setup(struct vm_area_struct *vma)
{
return 0;
@@ -1218,13 +1536,118 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
{
}
-# define ACCESS_PRIVATE(p, member) ((p)->member)
+#define ACCESS_PRIVATE(p, member) ((p)->member)
+
+#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE)
+
+static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
+{
+ unsigned int len = bitmap_size(nbits);
+
+ if (small_const_nbits(nbits))
+ *dst = 0;
+ else
+ memset(dst, 0, len);
+}
static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
{
return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
}
+/* Clears all bits in the VMA flags bitmap, non-atomically. */
+static inline void vma_flags_clear_all(vma_flags_t *flags)
+{
+ bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
+}
+
+/*
+ * Copy value to the first system word of VMA flags, non-atomically.
+ *
+ * IMPORTANT: This does not overwrite bytes past the first system word. The
+ * caller must account for this.
+ */
+static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
+{
+ *ACCESS_PRIVATE(flags, __vma_flags) = value;
+}
+
+/*
+ * Copy value to the first system word of VMA flags ONCE, non-atomically.
+ *
+ * IMPORTANT: This does not overwrite bytes past the first system word. The
+ * caller must account for this.
+ */
+static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ WRITE_ONCE(*bitmap, value);
+}
+
+/* Update the first system word of VMA flags setting bits, non-atomically. */
+static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ *bitmap |= value;
+}
+
+/* Update the first system word of VMA flags clearing bits, non-atomically. */
+static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ *bitmap &= ~value;
+}
+
+
+/* Use when VMA is not part of the VMA tree and needs no locking */
+static inline void vm_flags_init(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_flags_clear_all(&vma->flags);
+ vma_flags_overwrite_word(&vma->flags, flags);
+}
+
+/*
+ * Use when VMA is part of the VMA tree and modifications need coordination
+ * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
+ * it should be locked explicitly beforehand.
+ */
+static inline void vm_flags_reset(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_assert_write_locked(vma);
+ vm_flags_init(vma, flags);
+}
+
+static inline void vm_flags_reset_once(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_assert_write_locked(vma);
+ /*
+ * The user should only be interested in avoiding reordering of
+ * assignment to the first word.
+ */
+ vma_flags_clear_all(&vma->flags);
+ vma_flags_overwrite_word_once(&vma->flags, flags);
+}
+
+static inline void vm_flags_set(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_start_write(vma);
+ vma_flags_set_word(&vma->flags, flags);
+}
+
+static inline void vm_flags_clear(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_start_write(vma);
+ vma_flags_clear_word(&vma->flags, flags);
+}
+
/*
* Denies creating a writable executable mapping or gaining executable permissions.
*
@@ -1326,12 +1749,23 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma)
static inline void set_vma_from_desc(struct vm_area_struct *vma,
struct vm_area_desc *desc);
-static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op,
+static inline void mmap_action_prepare(struct mmap_action *action,
+ struct vm_area_desc *desc)
+{
+}
+
+static inline int mmap_action_complete(struct mmap_action *action,
+ struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+static inline int __compat_vma_mmap(const struct file_operations *f_op,
struct file *file, struct vm_area_struct *vma)
{
struct vm_area_desc desc = {
.mm = vma->vm_mm,
- .file = vma->vm_file,
+ .file = file,
.start = vma->vm_start,
.end = vma->vm_end,
@@ -1339,21 +1773,24 @@ static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op,
.vm_file = vma->vm_file,
.vm_flags = vma->vm_flags,
.page_prot = vma->vm_page_prot,
+
+ .action.type = MMAP_NOTHING, /* Default */
};
int err;
err = f_op->mmap_prepare(&desc);
if (err)
return err;
- set_vma_from_desc(vma, &desc);
- return 0;
+ mmap_action_prepare(&desc.action, &desc);
+ set_vma_from_desc(vma, &desc);
+ return mmap_action_complete(&desc.action, vma);
}
-static inline int compat_vma_mmap_prepare(struct file *file,
+static inline int compat_vma_mmap(struct file *file,
struct vm_area_struct *vma)
{
- return __compat_vma_mmap_prepare(file->f_op, file, vma);
+ return __compat_vma_mmap(file->f_op, file, vma);
}
/* Did the driver provide valid mmap hook configuration? */
@@ -1374,7 +1811,7 @@ static inline bool can_mmap_file(struct file *file)
static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
{
if (file->f_op->mmap_prepare)
- return compat_vma_mmap_prepare(file, vma);
+ return compat_vma_mmap(file, vma);
return file->f_op->mmap(file, vma);
}
@@ -1407,4 +1844,20 @@ static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
return vm_flags;
}
+static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
+{
+}
+
+static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t pgprot)
+{
+ return 0;
+}
+
+static inline int do_munmap(struct mm_struct *, unsigned long, size_t,
+ struct list_head *uf)
+{
+ return 0;
+}
+
#endif /* __MM_VMA_INTERNAL_H */