From 71458cfc782eafe4b27656e078d379a34e472adf Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 13 Oct 2014 15:51:05 -0700 Subject: kernel: add support for gcc 5 We're missing include/linux/compiler-gcc5.h which is required now because gcc branched off to v5 in trunk. Just copy the relevant bits out of include/linux/compiler-gcc4.h, no new code is added as of now. This fixes a build error when using gcc 5. Signed-off-by: Sasha Levin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc5.h | 66 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 include/linux/compiler-gcc5.h (limited to 'include/linux') diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h new file mode 100644 index 000000000000..cdd1cc202d51 --- /dev/null +++ b/include/linux/compiler-gcc5.h @@ -0,0 +1,66 @@ +#ifndef __LINUX_COMPILER_H +#error "Please don't include directly, include instead." +#endif + +#define __used __attribute__((__used__)) +#define __must_check __attribute__((warn_unused_result)) +#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) + +/* Mark functions as cold. gcc will assume any path leading to a call + to them will be unlikely. This means a lot of manual unlikely()s + are unnecessary now for any paths leading to the usual suspects + like BUG(), printk(), panic() etc. [but let's keep them for now for + older compilers] + + Early snapshots of gcc 4.3 don't support this and we can't detect this + in the preprocessor, but we can live with this because they're unreleased. + Maketime probing would be overkill here. + + gcc also has a __attribute__((__hot__)) to move hot functions into + a special section, but I don't see any sense in this right now in + the kernel context */ +#define __cold __attribute__((__cold__)) + +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +#ifndef __CHECKER__ +# define __compiletime_warning(message) __attribute__((warning(message))) +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* __CHECKER__ */ + +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() + +/* Mark a function definition as prohibited from being cloned. */ +#define __noclone __attribute__((__noclone__)) + +/* + * Tell the optimizer that something else uses this function or variable. + */ +#define __visible __attribute__((externally_visible)) + +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * Fixed in GCC 4.8.2 and later versions. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#define __HAVE_BUILTIN_BSWAP16__ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ -- cgit v1.2.3 From de9e14eebf33a60712a52a0bc6e08c043c0aba53 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 13 Oct 2014 15:51:09 -0700 Subject: drivers: dma-contiguous: add initialization from device tree Add a function to create CMA region from previously reserved memory and add support for handling 'shared-dma-pool' reserved-memory device tree nodes. Based on previous code provided by Josh Cartwright Signed-off-by: Marek Szyprowski Cc: Arnd Bergmann Cc: Michal Nazarewicz Cc: Grant Likely Cc: Laura Abbott Cc: Josh Cartwright Cc: Joonsoo Kim Cc: Kyungmin Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/dma-contiguous.c | 66 +++++++++++++++++++++++++++++++++++++++++++ include/linux/cma.h | 3 ++ mm/cma.c | 62 ++++++++++++++++++++++++++++++++-------- 3 files changed, 120 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 6606abdf880c..473ff4892401 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -211,3 +211,69 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, { return cma_release(dev_get_cma_area(dev), pages, count); } + +/* + * Support for reserved memory regions defined in device tree + */ +#ifdef CONFIG_OF_RESERVED_MEM +#include +#include +#include + +#undef pr_fmt +#define pr_fmt(fmt) fmt + +static void rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev) +{ + dev_set_cma_area(dev, rmem->priv); +} + +static void rmem_cma_device_release(struct reserved_mem *rmem, + struct device *dev) +{ + dev_set_cma_area(dev, NULL); +} + +static const struct reserved_mem_ops rmem_cma_ops = { + .device_init = rmem_cma_device_init, + .device_release = rmem_cma_device_release, +}; + +static int __init rmem_cma_setup(struct reserved_mem *rmem) +{ + phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); + phys_addr_t mask = align - 1; + unsigned long node = rmem->fdt_node; + struct cma *cma; + int err; + + if (!of_get_flat_dt_prop(node, "reusable", NULL) || + of_get_flat_dt_prop(node, "no-map", NULL)) + return -EINVAL; + + if ((rmem->base & mask) || (rmem->size & mask)) { + pr_err("Reserved memory: incorrect alignment of CMA region\n"); + return -EINVAL; + } + + err = cma_init_reserved_mem(rmem->base, rmem->size, 0, &cma); + if (err) { + pr_err("Reserved memory: unable to setup CMA region\n"); + return err; + } + /* Architecture specific contiguous memory fixup. */ + dma_contiguous_early_fixup(rmem->base, rmem->size); + + if (of_get_flat_dt_prop(node, "linux,cma-default", NULL)) + dma_contiguous_set_default(cma); + + rmem->ops = &rmem_cma_ops; + rmem->priv = cma; + + pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n", + &rmem->base, (unsigned long)rmem->size / SZ_1M); + + return 0; +} +RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup); +#endif diff --git a/include/linux/cma.h b/include/linux/cma.h index 371b93042520..0430ed05d3b9 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -22,6 +22,9 @@ extern int __init cma_declare_contiguous(phys_addr_t size, phys_addr_t base, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, struct cma **res_cma); +extern int cma_init_reserved_mem(phys_addr_t size, + phys_addr_t base, int order_per_bit, + struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); extern bool cma_release(struct cma *cma, struct page *pages, int count); #endif diff --git a/mm/cma.c b/mm/cma.c index a951a3b3ed36..963bc4add9af 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -142,6 +142,54 @@ static int __init cma_init_reserved_areas(void) } core_initcall(cma_init_reserved_areas); +/** + * cma_init_reserved_mem() - create custom contiguous area from reserved memory + * @base: Base address of the reserved area + * @size: Size of the reserved area (in bytes), + * @order_per_bit: Order of pages represented by one bit on bitmap. + * @res_cma: Pointer to store the created cma region. + * + * This function creates custom contiguous area from already reserved memory. + */ +int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, + int order_per_bit, struct cma **res_cma) +{ + struct cma *cma; + phys_addr_t alignment; + + /* Sanity checks */ + if (cma_area_count == ARRAY_SIZE(cma_areas)) { + pr_err("Not enough slots for CMA reserved regions!\n"); + return -ENOSPC; + } + + if (!size || !memblock_is_region_reserved(base, size)) + return -EINVAL; + + /* ensure minimal alignment requied by mm core */ + alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); + + /* alignment should be aligned with order_per_bit */ + if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit)) + return -EINVAL; + + if (ALIGN(base, alignment) != base || ALIGN(size, alignment) != size) + return -EINVAL; + + /* + * Each reserved area must be initialised later, when more kernel + * subsystems (like slab allocator) are available. + */ + cma = &cma_areas[cma_area_count]; + cma->base_pfn = PFN_DOWN(base); + cma->count = size >> PAGE_SHIFT; + cma->order_per_bit = order_per_bit; + *res_cma = cma; + cma_area_count++; + + return 0; +} + /** * cma_declare_contiguous() - reserve custom contiguous area * @base: Base address of the reserved area optional, use 0 for any @@ -165,7 +213,6 @@ int __init cma_declare_contiguous(phys_addr_t base, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, struct cma **res_cma) { - struct cma *cma; phys_addr_t memblock_end = memblock_end_of_DRAM(); phys_addr_t highmem_start = __pa(high_memory); int ret = 0; @@ -237,16 +284,9 @@ int __init cma_declare_contiguous(phys_addr_t base, } } - /* - * Each reserved area must be initialised later, when more kernel - * subsystems (like slab allocator) are available. - */ - cma = &cma_areas[cma_area_count]; - cma->base_pfn = PFN_DOWN(base); - cma->count = size >> PAGE_SHIFT; - cma->order_per_bit = order_per_bit; - *res_cma = cma; - cma_area_count++; + ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma); + if (ret) + goto err; pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, (unsigned long)base); -- cgit v1.2.3 From 8b21d9ca17ff8ed0dbf650f4162ee2d59bb5a881 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Oct 2014 15:51:30 -0700 Subject: list: include linux/kernel.h linux/list.h uses container_of, therefore it depends on linux/kernel.h. Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index cbbb96fcead9..f33f831eb3c8 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -5,6 +5,7 @@ #include #include #include +#include /* * Simple doubly linked list implementation. -- cgit v1.2.3 From 6de8ab68bc30da75116209d818c75497bdaed09d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 13 Oct 2014 15:51:36 -0700 Subject: lib: remove prio_heap The prio_heap code is unused since commit 889ed9ceaa97 ("cgroup: remove css_scan_tasks()"). It should be compiled out to shrink the binary kernel size which can be done via introducing CONFIG_PRIO_HEAD or by removing the code. We can simply recover the code from git when needed, so it would be better to remove it IMO. Signed-off-by: Lai Jiangshan Acked-by: Tejun Heo Cc: "David S. Miller" Cc: Ingo Molnar Acked-by: Peter Zijlstra Cc: Kees Cook Cc: Francesco Fusco Cc: Greg Thelen Cc: Chris Wilson Cc: Randy Dunlap Cc: George Spelvin Cc: Mark Salter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prio_heap.h | 58 --------------------------------------- lib/Makefile | 2 +- lib/prio_heap.c | 70 ----------------------------------------------- 3 files changed, 1 insertion(+), 129 deletions(-) delete mode 100644 include/linux/prio_heap.h delete mode 100644 lib/prio_heap.c (limited to 'include/linux') diff --git a/include/linux/prio_heap.h b/include/linux/prio_heap.h deleted file mode 100644 index 08094350f26a..000000000000 --- a/include/linux/prio_heap.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef _LINUX_PRIO_HEAP_H -#define _LINUX_PRIO_HEAP_H - -/* - * Simple insertion-only static-sized priority heap containing - * pointers, based on CLR, chapter 7 - */ - -#include - -/** - * struct ptr_heap - simple static-sized priority heap - * @ptrs - pointer to data area - * @max - max number of elements that can be stored in @ptrs - * @size - current number of valid elements in @ptrs (in the range 0..@size-1 - * @gt: comparison operator, which should implement "greater than" - */ -struct ptr_heap { - void **ptrs; - int max; - int size; - int (*gt)(void *, void *); -}; - -/** - * heap_init - initialize an empty heap with a given memory size - * @heap: the heap structure to be initialized - * @size: amount of memory to use in bytes - * @gfp_mask: mask to pass to kmalloc() - * @gt: comparison operator, which should implement "greater than" - */ -extern int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask, - int (*gt)(void *, void *)); - -/** - * heap_free - release a heap's storage - * @heap: the heap structure whose data should be released - */ -void heap_free(struct ptr_heap *heap); - -/** - * heap_insert - insert a value into the heap and return any overflowed value - * @heap: the heap to be operated on - * @p: the pointer to be inserted - * - * Attempts to insert the given value into the priority heap. If the - * heap is full prior to the insertion, then the resulting heap will - * consist of the smallest @max elements of the original heap and the - * new element; the greatest element will be removed from the heap and - * returned. Note that the returned element will be the new element - * (i.e. no change to the heap) if the new element is greater than all - * elements currently in the heap. - */ -extern void *heap_insert(struct ptr_heap *heap, void *p); - - - -#endif /* _LINUX_PRIO_HEAP_H */ diff --git a/lib/Makefile b/lib/Makefile index d6b4bc496408..eb95e0fdcca5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ + proportions.o flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o diff --git a/lib/prio_heap.c b/lib/prio_heap.c deleted file mode 100644 index a7af6f85eca8..000000000000 --- a/lib/prio_heap.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Simple insertion-only static-sized priority heap containing - * pointers, based on CLR, chapter 7 - */ - -#include -#include - -int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask, - int (*gt)(void *, void *)) -{ - heap->ptrs = kmalloc(size, gfp_mask); - if (!heap->ptrs) - return -ENOMEM; - heap->size = 0; - heap->max = size / sizeof(void *); - heap->gt = gt; - return 0; -} - -void heap_free(struct ptr_heap *heap) -{ - kfree(heap->ptrs); -} - -void *heap_insert(struct ptr_heap *heap, void *p) -{ - void *res; - void **ptrs = heap->ptrs; - int pos; - - if (heap->size < heap->max) { - /* Heap insertion */ - pos = heap->size++; - while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) { - ptrs[pos] = ptrs[(pos-1)/2]; - pos = (pos-1)/2; - } - ptrs[pos] = p; - return NULL; - } - - /* The heap is full, so something will have to be dropped */ - - /* If the new pointer is greater than the current max, drop it */ - if (heap->gt(p, ptrs[0])) - return p; - - /* Replace the current max and heapify */ - res = ptrs[0]; - ptrs[0] = p; - pos = 0; - - while (1) { - int left = 2 * pos + 1; - int right = 2 * pos + 2; - int largest = pos; - if (left < heap->size && heap->gt(ptrs[left], p)) - largest = left; - if (right < heap->size && heap->gt(ptrs[right], ptrs[largest])) - largest = right; - if (largest == pos) - break; - /* Push p down the heap one level and bump one up */ - ptrs[pos] = ptrs[largest]; - ptrs[largest] = p; - pos = largest; - } - return res; -} -- cgit v1.2.3 From 1c3bea0e71892ef9100c01d3799cdae8cac273ef Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 13 Oct 2014 15:53:33 -0700 Subject: signal: use BUILD_BUG() instead of _NSIG_WORDS_is_unsupported_size() Kill _NSIG_WORDS_is_unsupported_size(), use BUILD_BUG() instead. This simplifies the code, avoids the nested-externs warnings, and this way we do not defer the problem to linker. Also, fix the indentation in _SIG_SET_BINOP() and _SIG_SET_OP(). Note: this patch assumes that the code like "if (0) BUILD_BUG();" is valid. If not (say __compiletime_error() is not defined and thus __compiletime_error_fallback() uses a negative array) we should fix BUILD_BUG() and/or BUILD_BUG_ON_MSG(). This code should be fine by definition, this is the documented purpose of BUILD_BUG(). [sfr@canb.auug.org.au: fix powerpc build failures] Signed-off-by: Oleg Nesterov Reported-by: Jeff Kirsher Reviewed-by: Josh Triplett Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 750196fcc0a5..ab1e0392b5ac 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -2,6 +2,7 @@ #define _LINUX_SIGNAL_H #include +#include #include struct task_struct; @@ -67,7 +68,6 @@ static inline int sigismember(sigset_t *set, int _sig) static inline int sigisemptyset(sigset_t *set) { - extern void _NSIG_WORDS_is_unsupported_size(void); switch (_NSIG_WORDS) { case 4: return (set->sig[3] | set->sig[2] | @@ -77,7 +77,7 @@ static inline int sigisemptyset(sigset_t *set) case 1: return set->sig[0] == 0; default: - _NSIG_WORDS_is_unsupported_size(); + BUILD_BUG(); return 0; } } @@ -90,24 +90,23 @@ static inline int sigisemptyset(sigset_t *set) #define _SIG_SET_BINOP(name, op) \ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \ { \ - extern void _NSIG_WORDS_is_unsupported_size(void); \ unsigned long a0, a1, a2, a3, b0, b1, b2, b3; \ \ switch (_NSIG_WORDS) { \ - case 4: \ + case 4: \ a3 = a->sig[3]; a2 = a->sig[2]; \ b3 = b->sig[3]; b2 = b->sig[2]; \ r->sig[3] = op(a3, b3); \ r->sig[2] = op(a2, b2); \ - case 2: \ + case 2: \ a1 = a->sig[1]; b1 = b->sig[1]; \ r->sig[1] = op(a1, b1); \ - case 1: \ + case 1: \ a0 = a->sig[0]; b0 = b->sig[0]; \ r->sig[0] = op(a0, b0); \ break; \ - default: \ - _NSIG_WORDS_is_unsupported_size(); \ + default: \ + BUILD_BUG(); \ } \ } @@ -128,16 +127,14 @@ _SIG_SET_BINOP(sigandnsets, _sig_andn) #define _SIG_SET_OP(name, op) \ static inline void name(sigset_t *set) \ { \ - extern void _NSIG_WORDS_is_unsupported_size(void); \ - \ switch (_NSIG_WORDS) { \ - case 4: set->sig[3] = op(set->sig[3]); \ - set->sig[2] = op(set->sig[2]); \ - case 2: set->sig[1] = op(set->sig[1]); \ - case 1: set->sig[0] = op(set->sig[0]); \ + case 4: set->sig[3] = op(set->sig[3]); \ + set->sig[2] = op(set->sig[2]); \ + case 2: set->sig[1] = op(set->sig[1]); \ + case 1: set->sig[0] = op(set->sig[0]); \ break; \ - default: \ - _NSIG_WORDS_is_unsupported_size(); \ + default: \ + BUILD_BUG(); \ } \ } -- cgit v1.2.3 From 669280a152ce5144321c0e511498877383f34393 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Mon, 13 Oct 2014 15:53:40 -0700 Subject: kexec: take the segment adding out of locate_mem_hole functions In locate_mem_hole functions, a memory hole is located and added as kexec_segment. But from the name of locate_mem_hole, it should only take responsibility of searching a available memory hole to contain data of a specified size. So in this patch add a new field 'mem' into kexec_buf, then take that kexec segment adding code out of locate_mem_hole_top_down and locate_mem_hole_bottom_up. This make clear of the functionality of locate_mem_hole just like it declars to do. And by this locate_mem_hole_callback chould be used later if anyone want to locate a memory hole for other use. Meanwhile Vivek suggested opening code function __kexec_add_segment(), that way we have to retreive ksegment pointer once and it is easy to read. So just do it in this patch and remove __kexec_add_segment() since no one use it anymore. Signed-off-by: Baoquan He Acked-by: Vivek Goyal Cc: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 1 + kernel/kexec.c | 29 ++++++++--------------------- 2 files changed, 9 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 4b2a0e11cc5b..9d957b7ae095 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -178,6 +178,7 @@ struct kexec_buf { struct kimage *image; char *buffer; unsigned long bufsz; + unsigned long mem; unsigned long memsz; unsigned long buf_align; unsigned long buf_min; diff --git a/kernel/kexec.c b/kernel/kexec.c index 2bee072268d9..63bc3cdfb629 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -2016,22 +2016,6 @@ static int __init crash_save_vmcoreinfo_init(void) subsys_initcall(crash_save_vmcoreinfo_init); #ifdef CONFIG_KEXEC_FILE -static int __kexec_add_segment(struct kimage *image, char *buf, - unsigned long bufsz, unsigned long mem, - unsigned long memsz) -{ - struct kexec_segment *ksegment; - - ksegment = &image->segment[image->nr_segments]; - ksegment->kbuf = buf; - ksegment->bufsz = bufsz; - ksegment->mem = mem; - ksegment->memsz = memsz; - image->nr_segments++; - - return 0; -} - static int locate_mem_hole_top_down(unsigned long start, unsigned long end, struct kexec_buf *kbuf) { @@ -2064,8 +2048,7 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end, } while (1); /* If we are here, we found a suitable memory range */ - __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start, - kbuf->memsz); + kbuf->mem = temp_start; /* Success, stop navigating through remaining System RAM ranges */ return 1; @@ -2099,8 +2082,7 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, } while (1); /* If we are here, we found a suitable memory range */ - __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start, - kbuf->memsz); + kbuf->mem = temp_start; /* Success, stop navigating through remaining System RAM ranges */ return 1; @@ -2187,7 +2169,12 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz, } /* Found a suitable memory range */ - ksegment = &image->segment[image->nr_segments - 1]; + ksegment = &image->segment[image->nr_segments]; + ksegment->kbuf = kbuf->buffer; + ksegment->bufsz = kbuf->bufsz; + ksegment->mem = kbuf->mem; + ksegment->memsz = kbuf->memsz; + image->nr_segments++; *load_addr = ksegment->mem; return 0; } -- cgit v1.2.3 From a841b65921a959c759da6b5c8d5dc21966b4cf86 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 13 Oct 2014 15:53:48 -0700 Subject: rbtree: add comment to rb_insert_augmented() The comment is copied from Documentation/rbtree.txt, but this comment is so important that it should also be in the code. Signed-off-by: Lai Jiangshan Acked-by: Michel Lespinasse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index fea49b5da12a..378c5ee75f78 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -43,6 +43,16 @@ struct rb_augment_callbacks { extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ static inline void rb_insert_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) -- cgit v1.2.3 From 67cf13ceed89e2c1a967719e98624a20c48dfb5a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 13 Oct 2014 15:54:03 -0700 Subject: x86: optimize resource lookups for ioremap We have a large university system in the UK that is experiencing very long delays modprobing the driver for a specific I/O device. The delay is from 8-10 minutes per device and there are 31 devices in the system. This 4 to 5 hour delay in starting up those I/O devices is very much a burden on the customer. There are two causes for requiring a restart/reload of the drivers. First is periodic preventive maintenance (PM) and the second is if any of the devices experience a fatal error. Both of these trigger this excessively long delay in bringing the system back up to full capability. The problem was tracked down to a very slow IOREMAP operation and the excessively long ioresource lookup to insure that the user is not attempting to ioremap RAM. These patches provide a speed up to that function. The modprobe time appears to be affected quite a bit by previous activity on the ioresource list, which I suspect is due to cache preloading. While the overall improvement is impacted by other overhead of starting the devices, this drastically improves the modprobe time. Also our system is considerably smaller so the percentages gained will not be the same. Best case improvement with the modprobe on our 20 device smallish system was from 'real 5m51.913s' to 'real 0m18.275s'. This patch (of 2): Since the ioremap operation is verifying that the specified address range is NOT RAM, it will search the entire ioresource list if the condition is true. To make matters worse, it does this one 4k page at a time. For a 128M BAR region this is 32 passes to determine the entire region does not contain any RAM addresses. This patch provides another resource lookup function, region_is_ram, that searches for the entire region specified, verifying that it is completely contained within the resource region. If it is found, then it is checked to be RAM or not, within a single pass. The return result reflects if it was found or not (-1), and whether it is RAM (1) or not (0). This allows the caller to fallback to the previous page by page search if it was not found. [akpm@linux-foundation.org: fix spellos and typos in comment] Signed-off-by: Mike Travis Acked-by: Alex Thorlton Reviewed-by: Cliff Wickman Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Mark Salter Cc: Dave Young Cc: Rik van Riel Cc: Peter Zijlstra Cc: Mel Gorman Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + kernel/resource.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fa0d74e06428..4cd45cb95e6d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -347,6 +347,7 @@ static inline int put_page_unless_one(struct page *page) } extern int page_is_ram(unsigned long pfn); +extern int region_is_ram(resource_size_t phys_addr, unsigned long size); /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); diff --git a/kernel/resource.c b/kernel/resource.c index 46322019ab7d..0bcebffc4e77 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -491,6 +491,42 @@ int __weak page_is_ram(unsigned long pfn) } EXPORT_SYMBOL_GPL(page_is_ram); +/* + * Search for a resouce entry that fully contains the specified region. + * If found, return 1 if it is RAM, 0 if not. + * If not found, or region is not fully contained, return -1 + * + * Used by the ioremap functions to ensure the user is not remapping RAM and is + * a vast speed up over walking through the resource table page by page. + */ +int region_is_ram(resource_size_t start, unsigned long size) +{ + struct resource *p; + resource_size_t end = start + size - 1; + int flags = IORESOURCE_MEM | IORESOURCE_BUSY; + const char *name = "System RAM"; + int ret = -1; + + read_lock(&resource_lock); + for (p = iomem_resource.child; p ; p = p->sibling) { + if (end < p->start) + continue; + + if (p->start <= start && end <= p->end) { + /* resource fully contains region */ + if ((p->flags != flags) || strcmp(p->name, name)) + ret = 0; + else + ret = 1; + break; + } + if (p->end < start) + break; /* not found */ + } + read_unlock(&resource_lock); + return ret; +} + void __weak arch_remove_reservations(struct resource *avail) { } -- cgit v1.2.3 From b0bfb63118612e3614cf77b115c00f895a42c96a Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 13 Oct 2014 15:54:27 -0700 Subject: lib: string: Make all calls to strnicmp into calls to strncasecmp The previous patch made strnicmp into a wrapper for strncasecmp. This patch makes all in-tree users of strnicmp call strncasecmp directly, while still making sure that the strnicmp symbol can be used by out-of-tree modules. It should be considered a temporary hack until all in-tree callers have been converted. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 2 +- lib/string.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index d36977e029af..e6edfe51575a 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -41,7 +41,7 @@ extern int strcmp(const char *,const char *); extern int strncmp(const char *,const char *,__kernel_size_t); #endif #ifndef __HAVE_ARCH_STRNICMP -extern int strnicmp(const char *, const char *, __kernel_size_t); +#define strnicmp strncasecmp #endif #ifndef __HAVE_ARCH_STRCASECMP extern int strcasecmp(const char *s1, const char *s2); diff --git a/lib/string.c b/lib/string.c index 3181e267a033..2fc20aa06f84 100644 --- a/lib/string.c +++ b/lib/string.c @@ -59,6 +59,7 @@ int strncasecmp(const char *s1, const char *s2, size_t len) EXPORT_SYMBOL(strncasecmp); #endif #ifndef __HAVE_ARCH_STRNICMP +#undef strnicmp int strnicmp(const char *s1, const char *s2, size_t len) { return strncasecmp(s1, s2, len); -- cgit v1.2.3 From 3db2e9cdc085144e243495137273e2318c53a82f Mon Sep 17 00:00:00 2001 From: Daniel Walter Date: Mon, 13 Oct 2014 15:55:09 -0700 Subject: include/linux: remove strict_strto* definitions Remove obsolete and unused strict_strto* functions Signed-off-by: Daniel Walter Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 35c8ffb0136f..40728cf1c452 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -376,10 +376,6 @@ extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); -#define strict_strtoul kstrtoul -#define strict_strtol kstrtol -#define strict_strtoull kstrtoull -#define strict_strtoll kstrtoll extern int num_to_str(char *buf, int size, unsigned long long num); -- cgit v1.2.3 From d295634e965ecacdb44c6760b3ca4eae08812715 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 13 Oct 2014 15:55:11 -0700 Subject: lib / string_helpers: move documentation to c-file The introduced function string_escape_mem() is a kind of opposite to string_unescape. We have several users of such functionality each of them created custom implementation. The series contains clean up of test suite, adding new call, and switching few users to use it via %*pE specifier. Test suite covers all of existing and most of potential use cases. This patch (of 11): The documentation of API belongs to c-file. This patch moves it accordingly. There is no functional change. Signed-off-by: Andy Shevchenko Cc: "John W . Linville" Cc: Johannes Berg Cc: Greg Kroah-Hartman Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string_helpers.h | 34 ---------------------------------- lib/string_helpers.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 3eeee9672a4a..5a30f2a86239 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -20,40 +20,6 @@ int string_get_size(u64 size, enum string_size_units units, #define UNESCAPE_ANY \ (UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL) -/** - * string_unescape - unquote characters in the given string - * @src: source buffer (escaped) - * @dst: destination buffer (unescaped) - * @size: size of the destination buffer (0 to unlimit) - * @flags: combination of the flags (bitwise OR): - * %UNESCAPE_SPACE: - * '\f' - form feed - * '\n' - new line - * '\r' - carriage return - * '\t' - horizontal tab - * '\v' - vertical tab - * %UNESCAPE_OCTAL: - * '\NNN' - byte with octal value NNN (1 to 3 digits) - * %UNESCAPE_HEX: - * '\xHH' - byte with hexadecimal value HH (1 to 2 digits) - * %UNESCAPE_SPECIAL: - * '\"' - double quote - * '\\' - backslash - * '\a' - alert (BEL) - * '\e' - escape - * %UNESCAPE_ANY: - * all previous together - * - * Returns amount of characters processed to the destination buffer excluding - * trailing '\0'. - * - * Because the size of the output will be the same as or less than the size of - * the input, the transformation may be performed in place. - * - * Caller must provide valid source and destination pointers. Be aware that - * destination buffer will always be NULL-terminated. Source string must be - * NULL-terminated as well. - */ int string_unescape(char *src, char *dst, size_t size, unsigned int flags); static inline int string_unescape_inplace(char *buf, unsigned int flags) diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 29033f319aea..74ec60469640 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -168,6 +168,44 @@ static bool unescape_special(char **src, char **dst) return true; } +/** + * string_unescape - unquote characters in the given string + * @src: source buffer (escaped) + * @dst: destination buffer (unescaped) + * @size: size of the destination buffer (0 to unlimit) + * @flags: combination of the flags (bitwise OR): + * %UNESCAPE_SPACE: + * '\f' - form feed + * '\n' - new line + * '\r' - carriage return + * '\t' - horizontal tab + * '\v' - vertical tab + * %UNESCAPE_OCTAL: + * '\NNN' - byte with octal value NNN (1 to 3 digits) + * %UNESCAPE_HEX: + * '\xHH' - byte with hexadecimal value HH (1 to 2 digits) + * %UNESCAPE_SPECIAL: + * '\"' - double quote + * '\\' - backslash + * '\a' - alert (BEL) + * '\e' - escape + * %UNESCAPE_ANY: + * all previous together + * + * Description: + * The function unquotes characters in the given string. + * + * Because the size of the output will be the same as or less than the size of + * the input, the transformation may be performed in place. + * + * Caller must provide valid source and destination pointers. Be aware that + * destination buffer will always be NULL-terminated. Source string must be + * NULL-terminated as well. + * + * Return: + * The amount of the characters processed to the destination buffer excluding + * trailing '\0' is returned. + */ int string_unescape(char *src, char *dst, size_t size, unsigned int flags) { char *out = dst; -- cgit v1.2.3 From c8250381c8272a9828fdd353171727b154fbd296 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 13 Oct 2014 15:55:16 -0700 Subject: lib / string_helpers: introduce string_escape_mem() This is almost the opposite function to string_unescape(). Nevertheless it handles \0 and could be used for any byte buffer. The documentation is supplied together with the function prototype. The test cases covers most of the scenarios and would be expanded later on. [akpm@linux-foundation.org: avoid 1k stack consumption] Signed-off-by: Andy Shevchenko Cc: "John W . Linville" Cc: Johannes Berg Cc: Greg Kroah-Hartman Cc: Joe Perches Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string_helpers.h | 31 +++++ lib/string_helpers.c | 274 +++++++++++++++++++++++++++++++++++++++++ lib/test-string_helpers.c | 240 +++++++++++++++++++++++++++++++++++- 3 files changed, 541 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 5a30f2a86239..6eb567ac56bc 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -37,4 +37,35 @@ static inline int string_unescape_any_inplace(char *buf) return string_unescape_any(buf, buf, 0); } +#define ESCAPE_SPACE 0x01 +#define ESCAPE_SPECIAL 0x02 +#define ESCAPE_NULL 0x04 +#define ESCAPE_OCTAL 0x08 +#define ESCAPE_ANY \ + (ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_SPECIAL | ESCAPE_NULL) +#define ESCAPE_NP 0x10 +#define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP) +#define ESCAPE_HEX 0x20 + +int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, + unsigned int flags, const char *esc); + +static inline int string_escape_mem_any_np(const char *src, size_t isz, + char **dst, size_t osz, const char *esc) +{ + return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc); +} + +static inline int string_escape_str(const char *src, char **dst, size_t sz, + unsigned int flags, const char *esc) +{ + return string_escape_mem(src, strlen(src), dst, sz, flags, esc); +} + +static inline int string_escape_str_any_np(const char *src, char **dst, + size_t sz, const char *esc) +{ + return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc); +} + #endif diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 74ec60469640..58b78ba57439 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include /** @@ -240,3 +242,275 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags) return out - dst; } EXPORT_SYMBOL(string_unescape); + +static int escape_passthrough(unsigned char c, char **dst, size_t *osz) +{ + char *out = *dst; + + if (*osz < 1) + return -ENOMEM; + + *out++ = c; + + *dst = out; + *osz -= 1; + + return 1; +} + +static int escape_space(unsigned char c, char **dst, size_t *osz) +{ + char *out = *dst; + unsigned char to; + + if (*osz < 2) + return -ENOMEM; + + switch (c) { + case '\n': + to = 'n'; + break; + case '\r': + to = 'r'; + break; + case '\t': + to = 't'; + break; + case '\v': + to = 'v'; + break; + case '\f': + to = 'f'; + break; + default: + return 0; + } + + *out++ = '\\'; + *out++ = to; + + *dst = out; + *osz -= 2; + + return 1; +} + +static int escape_special(unsigned char c, char **dst, size_t *osz) +{ + char *out = *dst; + unsigned char to; + + if (*osz < 2) + return -ENOMEM; + + switch (c) { + case '\\': + to = '\\'; + break; + case '\a': + to = 'a'; + break; + case '\e': + to = 'e'; + break; + default: + return 0; + } + + *out++ = '\\'; + *out++ = to; + + *dst = out; + *osz -= 2; + + return 1; +} + +static int escape_null(unsigned char c, char **dst, size_t *osz) +{ + char *out = *dst; + + if (*osz < 2) + return -ENOMEM; + + if (c) + return 0; + + *out++ = '\\'; + *out++ = '0'; + + *dst = out; + *osz -= 2; + + return 1; +} + +static int escape_octal(unsigned char c, char **dst, size_t *osz) +{ + char *out = *dst; + + if (*osz < 4) + return -ENOMEM; + + *out++ = '\\'; + *out++ = ((c >> 6) & 0x07) + '0'; + *out++ = ((c >> 3) & 0x07) + '0'; + *out++ = ((c >> 0) & 0x07) + '0'; + + *dst = out; + *osz -= 4; + + return 1; +} + +static int escape_hex(unsigned char c, char **dst, size_t *osz) +{ + char *out = *dst; + + if (*osz < 4) + return -ENOMEM; + + *out++ = '\\'; + *out++ = 'x'; + *out++ = hex_asc_hi(c); + *out++ = hex_asc_lo(c); + + *dst = out; + *osz -= 4; + + return 1; +} + +/** + * string_escape_mem - quote characters in the given memory buffer + * @src: source buffer (unescaped) + * @isz: source buffer size + * @dst: destination buffer (escaped) + * @osz: destination buffer size + * @flags: combination of the flags (bitwise OR): + * %ESCAPE_SPACE: + * '\f' - form feed + * '\n' - new line + * '\r' - carriage return + * '\t' - horizontal tab + * '\v' - vertical tab + * %ESCAPE_SPECIAL: + * '\\' - backslash + * '\a' - alert (BEL) + * '\e' - escape + * %ESCAPE_NULL: + * '\0' - null + * %ESCAPE_OCTAL: + * '\NNN' - byte with octal value NNN (3 digits) + * %ESCAPE_ANY: + * all previous together + * %ESCAPE_NP: + * escape only non-printable characters (checked by isprint) + * %ESCAPE_ANY_NP: + * all previous together + * %ESCAPE_HEX: + * '\xHH' - byte with hexadecimal value HH (2 digits) + * @esc: NULL-terminated string of characters any of which, if found in + * the source, has to be escaped + * + * Description: + * The process of escaping byte buffer includes several parts. They are applied + * in the following sequence. + * 1. The character is matched to the printable class, if asked, and in + * case of match it passes through to the output. + * 2. The character is not matched to the one from @esc string and thus + * must go as is to the output. + * 3. The character is checked if it falls into the class given by @flags. + * %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any + * character. Note that they actually can't go together, otherwise + * %ESCAPE_HEX will be ignored. + * + * Caller must provide valid source and destination pointers. Be aware that + * destination buffer will not be NULL-terminated, thus caller have to append + * it if needs. + * + * Return: + * The amount of the characters processed to the destination buffer, or + * %-ENOMEM if the size of buffer is not enough to put an escaped character is + * returned. + * + * Even in the case of error @dst pointer will be updated to point to the byte + * after the last processed character. + */ +int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, + unsigned int flags, const char *esc) +{ + char *out = *dst, *p = out; + bool is_dict = esc && *esc; + int ret = 0; + + while (isz--) { + unsigned char c = *src++; + + /* + * Apply rules in the following sequence: + * - the character is printable, when @flags has + * %ESCAPE_NP bit set + * - the @esc string is supplied and does not contain a + * character under question + * - the character doesn't fall into a class of symbols + * defined by given @flags + * In these cases we just pass through a character to the + * output buffer. + */ + if ((flags & ESCAPE_NP && isprint(c)) || + (is_dict && !strchr(esc, c))) { + /* do nothing */ + } else { + if (flags & ESCAPE_SPACE) { + ret = escape_space(c, &p, &osz); + if (ret < 0) + break; + if (ret > 0) + continue; + } + + if (flags & ESCAPE_SPECIAL) { + ret = escape_special(c, &p, &osz); + if (ret < 0) + break; + if (ret > 0) + continue; + } + + if (flags & ESCAPE_NULL) { + ret = escape_null(c, &p, &osz); + if (ret < 0) + break; + if (ret > 0) + continue; + } + + /* ESCAPE_OCTAL and ESCAPE_HEX always go last */ + if (flags & ESCAPE_OCTAL) { + ret = escape_octal(c, &p, &osz); + if (ret < 0) + break; + continue; + } + if (flags & ESCAPE_HEX) { + ret = escape_hex(c, &p, &osz); + if (ret < 0) + break; + continue; + } + } + + ret = escape_passthrough(c, &p, &osz); + if (ret < 0) + break; + } + + *dst = p; + + if (ret < 0) + return ret; + + return p - out; +} +EXPORT_SYMBOL(string_escape_mem); diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index ac44c9245dcf..ab0d30e1e18f 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -62,10 +63,14 @@ static const struct test_string strings[] __initconst = { static void __init test_string_unescape(const char *name, unsigned int flags, bool inplace) { - char in[256]; - char out_test[256]; - char out_real[256]; - int i, p = 0, q_test = 0, q_real = sizeof(out_real); + int q_real = 256; + char *in = kmalloc(q_real, GFP_KERNEL); + char *out_test = kmalloc(q_real, GFP_KERNEL); + char *out_real = kmalloc(q_real, GFP_KERNEL); + int i, p = 0, q_test = 0; + + if (!in || !out_test || !out_real) + goto out; for (i = 0; i < ARRAY_SIZE(strings); i++) { const char *s = strings[i].in; @@ -100,6 +105,223 @@ static void __init test_string_unescape(const char *name, unsigned int flags, test_string_check_buf(name, flags, in, p - 1, out_real, q_real, out_test, q_test); +out: + kfree(out_real); + kfree(out_test); + kfree(in); +} + +struct test_string_1 { + const char *out; + unsigned int flags; +}; + +#define TEST_STRING_2_MAX_S1 32 +struct test_string_2 { + const char *in; + struct test_string_1 s1[TEST_STRING_2_MAX_S1]; +}; + +#define TEST_STRING_2_DICT_0 NULL +static const struct test_string_2 escape0[] __initconst = {{ + .in = "\f\\ \n\r\t\v", + .s1 = {{ + .out = "\\f\\ \\n\\r\\t\\v", + .flags = ESCAPE_SPACE, + },{ + .out = "\\f\\134\\040\\n\\r\\t\\v", + .flags = ESCAPE_SPACE | ESCAPE_OCTAL, + },{ + .out = "\\f\\x5c\\x20\\n\\r\\t\\v", + .flags = ESCAPE_SPACE | ESCAPE_HEX, + },{ + /* terminator */ + }}, +},{ + .in = "\\h\\\"\a\e\\", + .s1 = {{ + .out = "\\\\h\\\\\"\\a\\e\\\\", + .flags = ESCAPE_SPECIAL, + },{ + .out = "\\\\\\150\\\\\\042\\a\\e\\\\", + .flags = ESCAPE_SPECIAL | ESCAPE_OCTAL, + },{ + .out = "\\\\\\x68\\\\\\x22\\a\\e\\\\", + .flags = ESCAPE_SPECIAL | ESCAPE_HEX, + },{ + /* terminator */ + }}, +},{ + .in = "\eb \\C\007\"\x90\r]", + .s1 = {{ + .out = "\eb \\C\007\"\x90\\r]", + .flags = ESCAPE_SPACE, + },{ + .out = "\\eb \\\\C\\a\"\x90\r]", + .flags = ESCAPE_SPECIAL, + },{ + .out = "\\eb \\\\C\\a\"\x90\\r]", + .flags = ESCAPE_SPACE | ESCAPE_SPECIAL, + },{ + .out = "\\033\\142\\040\\134\\103\\007\\042\\220\\015\\135", + .flags = ESCAPE_OCTAL, + },{ + .out = "\\033\\142\\040\\134\\103\\007\\042\\220\\r\\135", + .flags = ESCAPE_SPACE | ESCAPE_OCTAL, + },{ + .out = "\\e\\142\\040\\\\\\103\\a\\042\\220\\015\\135", + .flags = ESCAPE_SPECIAL | ESCAPE_OCTAL, + },{ + .out = "\\e\\142\\040\\\\\\103\\a\\042\\220\\r\\135", + .flags = ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_OCTAL, + },{ + .out = "\eb \\C\007\"\x90\r]", + .flags = ESCAPE_NP, + },{ + .out = "\eb \\C\007\"\x90\\r]", + .flags = ESCAPE_SPACE | ESCAPE_NP, + },{ + .out = "\\eb \\C\\a\"\x90\r]", + .flags = ESCAPE_SPECIAL | ESCAPE_NP, + },{ + .out = "\\eb \\C\\a\"\x90\\r]", + .flags = ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_NP, + },{ + .out = "\\033b \\C\\007\"\\220\\015]", + .flags = ESCAPE_OCTAL | ESCAPE_NP, + },{ + .out = "\\033b \\C\\007\"\\220\\r]", + .flags = ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_NP, + },{ + .out = "\\eb \\C\\a\"\\220\\r]", + .flags = ESCAPE_SPECIAL | ESCAPE_SPACE | ESCAPE_OCTAL | + ESCAPE_NP, + },{ + .out = "\\x1bb \\C\\x07\"\\x90\\x0d]", + .flags = ESCAPE_NP | ESCAPE_HEX, + },{ + /* terminator */ + }}, +},{ + /* terminator */ +}}; + +#define TEST_STRING_2_DICT_1 "b\\ \t\r" +static const struct test_string_2 escape1[] __initconst = {{ + .in = "\f\\ \n\r\t\v", + .s1 = {{ + .out = "\f\\134\\040\n\\015\\011\v", + .flags = ESCAPE_OCTAL, + },{ + .out = "\f\\x5c\\x20\n\\x0d\\x09\v", + .flags = ESCAPE_HEX, + },{ + /* terminator */ + }}, +},{ + .in = "\\h\\\"\a\e\\", + .s1 = {{ + .out = "\\134h\\134\"\a\e\\134", + .flags = ESCAPE_OCTAL, + },{ + /* terminator */ + }}, +},{ + .in = "\eb \\C\007\"\x90\r]", + .s1 = {{ + .out = "\e\\142\\040\\134C\007\"\x90\\015]", + .flags = ESCAPE_OCTAL, + },{ + /* terminator */ + }}, +},{ + /* terminator */ +}}; + +static __init const char *test_string_find_match(const struct test_string_2 *s2, + unsigned int flags) +{ + const struct test_string_1 *s1 = s2->s1; + unsigned int i; + + if (!flags) + return s2->in; + + /* Test cases are NULL-aware */ + flags &= ~ESCAPE_NULL; + + /* ESCAPE_OCTAL has a higher priority */ + if (flags & ESCAPE_OCTAL) + flags &= ~ESCAPE_HEX; + + for (i = 0; i < TEST_STRING_2_MAX_S1 && s1->out; i++, s1++) + if (s1->flags == flags) + return s1->out; + return NULL; +} + +static __init void test_string_escape(const char *name, + const struct test_string_2 *s2, + unsigned int flags, const char *esc) +{ + int q_real = 512; + char *out_test = kmalloc(q_real, GFP_KERNEL); + char *out_real = kmalloc(q_real, GFP_KERNEL); + char *in = kmalloc(256, GFP_KERNEL); + char *buf = out_real; + int p = 0, q_test = 0; + + if (!out_test || !out_real || !in) + goto out; + + for (; s2->in; s2++) { + const char *out; + int len; + + /* NULL injection */ + if (flags & ESCAPE_NULL) { + in[p++] = '\0'; + out_test[q_test++] = '\\'; + out_test[q_test++] = '0'; + } + + /* Don't try strings that have no output */ + out = test_string_find_match(s2, flags); + if (!out) + continue; + + /* Copy string to in buffer */ + len = strlen(s2->in); + memcpy(&in[p], s2->in, len); + p += len; + + /* Copy expected result for given flags */ + len = strlen(out); + memcpy(&out_test[q_test], out, len); + q_test += len; + } + + q_real = string_escape_mem(in, p, &buf, q_real, flags, esc); + + test_string_check_buf(name, flags, in, p, out_real, q_real, out_test, + q_test); +out: + kfree(in); + kfree(out_real); + kfree(out_test); +} + +static __init void test_string_escape_nomem(void) +{ + char *in = "\eb \\C\007\"\x90\r]"; + char out[64], *buf = out; + int rc = -ENOMEM, ret; + + ret = string_escape_str_any_np(in, &buf, strlen(in), NULL); + if (ret == rc) + return; + + pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc); } static int __init test_string_helpers_init(void) @@ -112,6 +334,16 @@ static int __init test_string_helpers_init(void) test_string_unescape("unescape inplace", get_random_int() % (UNESCAPE_ANY + 1), true); + /* Without dictionary */ + for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) + test_string_escape("escape 0", escape0, i, TEST_STRING_2_DICT_0); + + /* With dictionary */ + for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) + test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1); + + test_string_escape_nomem(); + return -EINVAL; } module_init(test_string_helpers_init); -- cgit v1.2.3 From 6e7458a6f074c71e74cda31c483114e65ea0f570 Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Mon, 13 Oct 2014 15:55:35 -0700 Subject: kernel/watchdog.c: control hard lockup detection default In some cases we don't want hard lockup detection enabled by default. An example is when running as a guest. Introduce watchdog_enable_hardlockup_detector(bool) allowing those cases to disable hard lockup detection. This must be executed early by the boot processor from e.g. smp_prepare_boot_cpu, in order to allow kernel command line arguments to override it, as well as to avoid hard lockup detection being enabled before we've had a chance to indicate that it's unwanted. In summary, initial boot: default=enabled smp_prepare_boot_cpu watchdog_enable_hardlockup_detector(false): default=disabled cmdline has 'nmi_watchdog=1': default=enabled The running kernel still has the ability to enable/disable at any time with /proc/sys/kernel/nmi_watchdog us usual. However even when the default has been overridden /proc/sys/kernel/nmi_watchdog will initially show '1'. To truly turn it on one must disable/enable it, i.e. echo 0 > /proc/sys/kernel/nmi_watchdog echo 1 > /proc/sys/kernel/nmi_watchdog This patch will be immediately useful for KVM with the next patch of this series. Other hypervisor guest types may find it useful as well. [akpm@linux-foundation.org: fix build] [dzickus@redhat.com: fix compile issues on sparc] Signed-off-by: Ulrich Obergfell Signed-off-by: Andrew Jones Signed-off-by: Don Zickus Signed-off-by: Don Zickus Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 13 +++++++++++++ kernel/watchdog.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 1d2a6ab6b8bb..9b2022ab4d85 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -24,6 +24,19 @@ static inline void touch_nmi_watchdog(void) } #endif +#if defined(CONFIG_HARDLOCKUP_DETECTOR) +extern void watchdog_enable_hardlockup_detector(bool val); +extern bool watchdog_hardlockup_detector_is_enabled(void); +#else +static inline void watchdog_enable_hardlockup_detector(bool val) +{ +} +static inline bool watchdog_hardlockup_detector_is_enabled(void) +{ + return true; +} +#endif + /* * Create trigger_all_cpu_backtrace() out of the arch-provided * base function. Return whether such support was available, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index ff7fd80bef99..49e9537f3673 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -59,6 +59,25 @@ static unsigned long soft_lockup_nmi_warn; static int hardlockup_panic = CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; +static bool hardlockup_detector_enabled = true; +/* + * We may not want to enable hard lockup detection by default in all cases, + * for example when running the kernel as a guest on a hypervisor. In these + * cases this function can be called to disable hard lockup detection. This + * function should only be executed once by the boot processor before the + * kernel command line parameters are parsed, because otherwise it is not + * possible to override this in hardlockup_panic_setup(). + */ +void watchdog_enable_hardlockup_detector(bool val) +{ + hardlockup_detector_enabled = val; +} + +bool watchdog_hardlockup_detector_is_enabled(void) +{ + return hardlockup_detector_enabled; +} + static int __init hardlockup_panic_setup(char *str) { if (!strncmp(str, "panic", 5)) @@ -67,6 +86,14 @@ static int __init hardlockup_panic_setup(char *str) hardlockup_panic = 0; else if (!strncmp(str, "0", 1)) watchdog_user_enabled = 0; + else if (!strncmp(str, "1", 1) || !strncmp(str, "2", 1)) { + /* + * Setting 'nmi_watchdog=1' or 'nmi_watchdog=2' (legacy option) + * has the same effect. + */ + watchdog_user_enabled = 1; + watchdog_enable_hardlockup_detector(true); + } return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -465,6 +492,15 @@ static int watchdog_nmi_enable(unsigned int cpu) struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); + /* + * Some kernels need to default hard lockup detection to + * 'disabled', for example a guest on a hypervisor. + */ + if (!watchdog_hardlockup_detector_is_enabled()) { + event = ERR_PTR(-ENOENT); + goto handle_err; + } + /* is it already setup and enabled? */ if (event && event->state > PERF_EVENT_STATE_OFF) goto out; @@ -479,6 +515,7 @@ static int watchdog_nmi_enable(unsigned int cpu) /* Try to register using hardware perf events */ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); +handle_err: /* save cpu0 error for future comparision */ if (cpu == 0 && IS_ERR(event)) cpu0_err = PTR_ERR(event); @@ -624,11 +661,13 @@ int proc_dowatchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int err, old_thresh, old_enabled; + bool old_hardlockup; static DEFINE_MUTEX(watchdog_proc_mutex); mutex_lock(&watchdog_proc_mutex); old_thresh = ACCESS_ONCE(watchdog_thresh); old_enabled = ACCESS_ONCE(watchdog_user_enabled); + old_hardlockup = watchdog_hardlockup_detector_is_enabled(); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (err || !write) @@ -640,15 +679,22 @@ int proc_dowatchdog(struct ctl_table *table, int write, * disabled. The 'watchdog_running' variable check in * watchdog_*_all_cpus() function takes care of this. */ - if (watchdog_user_enabled && watchdog_thresh) + if (watchdog_user_enabled && watchdog_thresh) { + /* + * Prevent a change in watchdog_thresh accidentally overriding + * the enablement of the hardlockup detector. + */ + if (watchdog_user_enabled != old_enabled) + watchdog_enable_hardlockup_detector(true); err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); - else + } else watchdog_disable_all_cpus(); /* Restore old values on failure */ if (err) { watchdog_thresh = old_thresh; watchdog_user_enabled = old_enabled; + watchdog_enable_hardlockup_detector(old_hardlockup); } out: mutex_unlock(&watchdog_proc_mutex); -- cgit v1.2.3 From 63a12d9d01831208a47f5c0fbbf93f503d1fb162 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 13 Oct 2014 15:55:44 -0700 Subject: kernel/param: consolidate __{start,stop}___param[] in Consolidate the various external const and non-const declarations of __start___param[] and __stop___param in . This requires making a few struct kernel_param pointers in kernel/params.c const. Signed-off-by: Geert Uytterhoeven Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/moduleparam.h | 2 ++ init/main.c | 2 -- kernel/params.c | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index b43f4752304e..1c9effa25e26 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -78,6 +78,8 @@ struct kernel_param { }; }; +extern const struct kernel_param __start___param[], __stop___param[]; + /* Special one for strings we want to copy into */ struct kparam_string { unsigned int maxlen; diff --git a/init/main.c b/init/main.c index 89ec862da2d4..800a0daede7e 100644 --- a/init/main.c +++ b/init/main.c @@ -501,7 +501,6 @@ asmlinkage __visible void __init start_kernel(void) { char *command_line; char *after_dashes; - extern const struct kernel_param __start___param[], __stop___param[]; /* * Need to run as early as possible, to initialize the @@ -844,7 +843,6 @@ static char *initcall_level_names[] __initdata = { static void __init do_initcall_level(int level) { - extern const struct kernel_param __start___param[], __stop___param[]; initcall_t *fn; strcpy(initcall_command_line, saved_command_line); diff --git a/kernel/params.c b/kernel/params.c index 041b5899d5e2..db97b791390f 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -513,8 +514,6 @@ EXPORT_SYMBOL(param_ops_string); #define to_module_attr(n) container_of(n, struct module_attribute, attr) #define to_module_kobject(n) container_of(n, struct module_kobject, kobj) -extern struct kernel_param __start___param[], __stop___param[]; - struct param_attribute { struct module_attribute mattr; @@ -774,7 +773,7 @@ static struct module_kobject * __init locate_module_kobject(const char *name) } static void __init kernel_add_sysfs_param(const char *name, - struct kernel_param *kparam, + const struct kernel_param *kparam, unsigned int name_skip) { struct module_kobject *mk; @@ -809,7 +808,7 @@ static void __init kernel_add_sysfs_param(const char *name, */ static void __init param_sysfs_builtin(void) { - struct kernel_param *kp; + const struct kernel_param *kp; unsigned int name_len; char modname[MODULE_NAME_LEN]; -- cgit v1.2.3 From 64e455079e1bd7787cc47be30b7f601ce682a5f6 Mon Sep 17 00:00:00 2001 From: Peter Feiner Date: Mon, 13 Oct 2014 15:55:46 -0700 Subject: mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY cleared For VMAs that don't want write notifications, PTEs created for read faults have their write bit set. If the read fault happens after VM_SOFTDIRTY is cleared, then the PTE's softdirty bit will remain clear after subsequent writes. Here's a simple code snippet to demonstrate the bug: char* m = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); system("echo 4 > /proc/$PPID/clear_refs"); /* clear VM_SOFTDIRTY */ assert(*m == '\0'); /* new PTE allows write access */ assert(!soft_dirty(x)); *m = 'x'; /* should dirty the page */ assert(soft_dirty(x)); /* fails */ With this patch, write notifications are enabled when VM_SOFTDIRTY is cleared. Furthermore, to avoid unnecessary faults, write notifications are disabled when VM_SOFTDIRTY is set. As a side effect of enabling and disabling write notifications with care, this patch fixes a bug in mprotect where vm_page_prot bits set by drivers were zapped on mprotect. An analogous bug was fixed in mmap by commit c9d0bf241451 ("mm: uncached vma support with writenotify"). Signed-off-by: Peter Feiner Reported-by: Peter Feiner Suggested-by: Kirill A. Shutemov Cc: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Jamie Liu Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: Bjorn Helgaas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 19 +++++++++++++----- include/asm-generic/pgtable.h | 14 ++++++++++++++ include/linux/mm.h | 5 +++++ mm/memory.c | 3 ++- mm/mmap.c | 45 +++++++++++++++++++++++++++---------------- mm/mprotect.c | 20 +++++-------------- 6 files changed, 68 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b7a7dc963a35..4e0388cffe3d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -827,8 +827,21 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .private = &cp, }; down_read(&mm->mmap_sem); - if (type == CLEAR_REFS_SOFT_DIRTY) + if (type == CLEAR_REFS_SOFT_DIRTY) { + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!(vma->vm_flags & VM_SOFTDIRTY)) + continue; + up_read(&mm->mmap_sem); + down_write(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + vma->vm_flags &= ~VM_SOFTDIRTY; + vma_set_page_prot(vma); + } + downgrade_write(&mm->mmap_sem); + break; + } mmu_notifier_invalidate_range_start(mm, 0, -1); + } for (vma = mm->mmap; vma; vma = vma->vm_next) { cp.vma = vma; if (is_vm_hugetlb_page(vma)) @@ -848,10 +861,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, continue; if (type == CLEAR_REFS_MAPPED && !vma->vm_file) continue; - if (type == CLEAR_REFS_SOFT_DIRTY) { - if (vma->vm_flags & VM_SOFTDIRTY) - vma->vm_flags &= ~VM_SOFTDIRTY; - } walk_page_range(vma->vm_start, vma->vm_end, &clear_refs_walk); } diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 081ff8826bf6..752e30d63904 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -253,6 +253,20 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #define pgprot_device pgprot_noncached #endif +#ifndef pgprot_modify +#define pgprot_modify pgprot_modify +static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) +{ + if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot))) + newprot = pgprot_noncached(newprot); + if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot))) + newprot = pgprot_writecombine(newprot); + if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot))) + newprot = pgprot_device(newprot); + return newprot; +} +#endif + /* * When walking page tables, get the address of the next boundary, * or the end address of the range if that comes earlier. Although no diff --git a/include/linux/mm.h b/include/linux/mm.h index 4cd45cb95e6d..02d11ee7f19d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1974,11 +1974,16 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, #ifdef CONFIG_MMU pgprot_t vm_get_page_prot(unsigned long vm_flags); +void vma_set_page_prot(struct vm_area_struct *vma); #else static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) { return __pgprot(0); } +static inline void vma_set_page_prot(struct vm_area_struct *vma) +{ + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); +} #endif #ifdef CONFIG_NUMA_BALANCING diff --git a/mm/memory.c b/mm/memory.c index e229970e4223..1cc6bfbd872e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2053,7 +2053,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) { /* - * VM_MIXEDMAP !pfn_valid() case + * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a + * VM_PFNMAP VMA. * * We should not cow pages in a shared writeable mapping. * Just mark the pages writable as we can't do any dirty diff --git a/mm/mmap.c b/mm/mmap.c index 93d28c7e5420..7f855206e7fb 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -89,6 +89,25 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags) } EXPORT_SYMBOL(vm_get_page_prot); +static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) +{ + return pgprot_modify(oldprot, vm_get_page_prot(vm_flags)); +} + +/* Update vma->vm_page_prot to reflect vma->vm_flags. */ +void vma_set_page_prot(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + + vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags); + if (vma_wants_writenotify(vma)) { + vm_flags &= ~VM_SHARED; + vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, + vm_flags); + } +} + + int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ unsigned long sysctl_overcommit_kbytes __read_mostly; @@ -1475,11 +1494,16 @@ int vma_wants_writenotify(struct vm_area_struct *vma) if (vma->vm_ops && vma->vm_ops->page_mkwrite) return 1; - /* The open routine did something to the protections already? */ + /* The open routine did something to the protections that pgprot_modify + * won't preserve? */ if (pgprot_val(vma->vm_page_prot) != - pgprot_val(vm_get_page_prot(vm_flags))) + pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags))) return 0; + /* Do we need to track softdirty? */ + if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY)) + return 1; + /* Specialty mapping? */ if (vm_flags & VM_PFNMAP) return 0; @@ -1615,21 +1639,6 @@ munmap_back: goto free_vma; } - if (vma_wants_writenotify(vma)) { - pgprot_t pprot = vma->vm_page_prot; - - /* Can vma->vm_page_prot have changed?? - * - * Answer: Yes, drivers may have changed it in their - * f_op->mmap method. - * - * Ensures that vmas marked as uncached stay that way. - */ - vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); - if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot))) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - } - vma_link(mm, vma, prev, rb_link, rb_parent); /* Once vma denies write, undo our temporary denial count */ if (file) { @@ -1663,6 +1672,8 @@ out: */ vma->vm_flags |= VM_SOFTDIRTY; + vma_set_page_prot(vma); + return addr; unmap_and_free_vma: diff --git a/mm/mprotect.c b/mm/mprotect.c index c43d557941f8..ace93454ce8e 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -29,13 +29,6 @@ #include #include -#ifndef pgprot_modify -static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) -{ - return newprot; -} -#endif - /* * For a prot_numa update we only hold mmap_sem for read so there is a * potential race with faulting where a pmd was temporarily none. This @@ -93,7 +86,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, * Avoid taking write faults for pages we * know to be dirty. */ - if (dirty_accountable && pte_dirty(ptent)) + if (dirty_accountable && pte_dirty(ptent) && + (pte_soft_dirty(ptent) || + !(vma->vm_flags & VM_SOFTDIRTY))) ptent = pte_mkwrite(ptent); ptep_modify_prot_commit(mm, addr, pte, ptent); updated = true; @@ -320,13 +315,8 @@ success: * held in write mode. */ vma->vm_flags = newflags; - vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, - vm_get_page_prot(newflags)); - - if (vma_wants_writenotify(vma)) { - vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); - dirty_accountable = 1; - } + dirty_accountable = vma_wants_writenotify(vma); + vma_set_page_prot(vma); change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0); -- cgit v1.2.3