From 98eb235b7febbb2941e1b442b92fc5e23b0d7a83 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:47:30 -0700 Subject: [PATCH] page unmapping debug From: Manfred Spraul Manfred's latest page unmapping debug patch. The patch adds support for a special debug mode to both the page and the slab allocator: Unused pages are removed from the kernel linear mapping. This means that now any access to freed memory will cause an immediate exception. Right now, read accesses remain totally unnoticed and write accesses may be catched by the slab poisoning, but usually far too late for a meaningfull bug report. The implementation is based on a new arch dependant function, kernel_map_pages(), that removes the pages from the linear mapping. It's right now only implemented for i386. Changelog: - Add kernel_map_pages() for i386, based on change_page_attr. If DEBUG_PAGEALLOC is not set, then the function is an empty stub. The stub is in , i.e. it exists for all archs. - Make change_page_attr irq safe. Note that it's not fully irq safe due to the lack of the tlb flush ipi, but it's good enough for kernel_map_pages(). Another problem is that kernel_map_pages is not permitted to fail, thus PSE is disabled if DEBUG_PAGEALLOC is enabled - use kernel_map pages for the page allocator. - use kernel_map_pages for the slab allocator. I couldn't resist and added additional debugging support into mm/slab.c: * at kfree time, the complete backtrace of the kfree caller is stored in the freed object. * a ptrinfo() function that dumps all known data about a kernel virtual address: the pte value, if it belongs to a slab cache the cache name and additional info. * merging of common code: new helper function obj_dbglen and obj_dbghdr for the conversion between the user visible object pointers/len and the actual, internal addresses and len values. --- include/linux/mm.h | 8 ++++++++ include/linux/slab.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 492bc8aeb053..4d183974fd36 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -609,5 +609,13 @@ extern struct page * follow_page(struct mm_struct *mm, unsigned long address, int write); extern int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); + +#ifndef CONFIG_DEBUG_PAGEALLOC +static inline void +kernel_map_pages(struct page *page, int numpages, int enable) +{ +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 843c8d638d29..9f8bccba4ad3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -114,6 +114,8 @@ extern kmem_cache_t *signal_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; +void ptrinfo(unsigned long addr); + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ -- cgit v1.2.3 From d4388840f41d71d1570326f77860431c7080f7ed Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:47:43 -0700 Subject: [PATCH] NUMA memory reporting fix From: Dave Hansen The current numa meminfo code exports (via sysfs) pgdat->node_size, as totalram. This variable is consistently used elsewhere to mean "the number of physical pages that this particular node spans". This is _not_ what we want to see from meminfo, which is: "how much actual memory does this node have?" The following patch removes pgdat->node_size, and replaces it with ->node_spanned_pages. This is to avoid confusion with a new variable, node_present_pages, which is the _actual_ value that we want to export in meminfo. Most of the patch is a simple s/node_size/node_spanned_pages/. The node_size() macro is also removed, and replaced with new ones for node_{spanned,present}_pages() to avoid confusion. We were bitten by this problem in this bug: http://bugme.osdl.org/show_bug.cgi?id=818 Compiled and tested on NUMA-Q. --- arch/alpha/mm/numa.c | 4 ++-- arch/arm/mm/init.c | 4 ++-- arch/arm26/mm/init.c | 4 ++-- arch/i386/mm/pgtable.c | 2 +- arch/ia64/mm/init.c | 4 ++-- arch/ppc64/mm/init.c | 4 ++-- arch/ppc64/mm/numa.c | 16 ++++++++-------- arch/x86_64/mm/init.c | 2 +- arch/x86_64/mm/numa.c | 2 +- include/asm-alpha/mmzone.h | 3 +-- include/asm-i386/mmzone.h | 5 ++--- include/asm-mips64/mmzone.h | 2 +- include/asm-ppc64/mmzone.h | 1 - include/asm-x86_64/mmzone.h | 3 +-- include/linux/mmzone.h | 7 ++++++- mm/page_alloc.c | 7 ++++--- 16 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 5a7ad83d367c..3100bb87bd50 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -338,7 +338,7 @@ void __init mem_init(void) lmem_map = node_mem_map(nid); pfn = NODE_DATA(nid)->node_start_pfn; - for (i = 0; i < node_size(nid); i++, pfn++) + for (i = 0; i < node_spanned_pages(nid); i++, pfn++) if (page_is_ram(pfn) && PageReserved(lmem_map+i)) reservedpages++; } @@ -372,7 +372,7 @@ show_mem(void) printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for (nid = 0; nid < numnodes; nid++) { struct page * lmem_map = node_mem_map(nid); - i = node_size(nid); + i = node_spanned_pages(nid); while (i-- > 0) { total++; if (PageReserved(lmem_map+i)) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 2fbcb7f5766a..90dcf272009c 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -79,7 +79,7 @@ void show_mem(void) struct page *page, *end; page = NODE_MEM_MAP(node); - end = page + NODE_DATA(node)->node_size; + end = page + NODE_DATA(node)->node_spanned_pages; do { total++; @@ -576,7 +576,7 @@ void __init mem_init(void) for (node = 0; node < numnodes; node++) { pg_data_t *pgdat = NODE_DATA(node); - if (pgdat->node_size != 0) + if (pgdat->node_spanned_pages != 0) totalram_pages += free_all_bootmem_node(pgdat); } diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c index c2105fb1a84c..01c772bef70e 100644 --- a/arch/arm26/mm/init.c +++ b/arch/arm26/mm/init.c @@ -68,7 +68,7 @@ void show_mem(void) page = NODE_MEM_MAP(0); - end = page + NODE_DATA(0)->node_size; + end = page + NODE_DATA(0)->node_spanned_pages; do { total++; @@ -353,7 +353,7 @@ void __init mem_init(void) max_mapnr = virt_to_page(high_memory) - mem_map; /* this will put all unused low memory onto the freelists */ - if (pgdat->node_size != 0) + if (pgdat->node_spanned_pages != 0) totalram_pages += free_all_bootmem_node(pgdat); printk(KERN_INFO "Memory:"); diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 7ab983c90c53..941c2aa5236c 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -34,7 +34,7 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; ++i) { + for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat->node_mem_map + i; total++; if (PageHighMem(page)) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 8fc22262ce0f..4d37e437da3f 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -232,7 +232,7 @@ show_mem(void) printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { printk("Node ID: %d\n", pgdat->node_id); - for(i = 0; i < pgdat->node_size; i++) { + for(i = 0; i < pgdat->node_spanned_pages; i++) { if (PageReserved(pgdat->node_mem_map+i)) reserved++; else if (PageSwapCache(pgdat->node_mem_map+i)) @@ -240,7 +240,7 @@ show_mem(void) else if (page_count(pgdat->node_mem_map + i)) shared += page_count(pgdat->node_mem_map + i) - 1; } - printk("\t%d pages of RAM\n", pgdat->node_size); + printk("\t%d pages of RAM\n", pgdat->node_spanned_pages); printk("\t%d reserved pages\n", reserved); printk("\t%d pages shared\n", shared); printk("\t%d pages swap cached\n", cached); diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 79b716dbe6db..ca2472a9116a 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -109,7 +109,7 @@ void show_mem(void) show_free_areas(); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; i++) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { page = pgdat->node_mem_map + i; total++; if (PageReserved(page)) @@ -564,7 +564,7 @@ void __init mem_init(void) int nid; for (nid = 0; nid < numnodes; nid++) { - if (node_data[nid].node_size != 0) { + if (node_data[nid].node_spanned_pages != 0) { printk("freeing bootmem node %x\n", nid); totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index fd86d7ec8267..19b4ee36ee8f 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -160,21 +160,21 @@ new_range: * this simple case and complain if there is a gap in * memory */ - if (node_data[numa_domain].node_size) { + if (node_data[numa_domain].node_spanned_pages) { unsigned long shouldstart = node_data[numa_domain].node_start_pfn + - node_data[numa_domain].node_size; + node_data[numa_domain].node_spanned_pages; if (shouldstart != (start / PAGE_SIZE)) { printk(KERN_ERR "Hole in node, disabling " "region start %lx length %lx\n", start, size); continue; } - node_data[numa_domain].node_size += size / PAGE_SIZE; + node_data[numa_domain].node_spanned_pages += size / PAGE_SIZE; } else { node_data[numa_domain].node_start_pfn = start / PAGE_SIZE; - node_data[numa_domain].node_size = size / PAGE_SIZE; + node_data[numa_domain].node_spanned_pages = size / PAGE_SIZE; } for (i = start ; i < (start+size); i += MEMORY_INCREMENT) @@ -202,7 +202,7 @@ void setup_nonnuma(void) map_cpu_to_node(i, 0); node_data[0].node_start_pfn = 0; - node_data[0].node_size = lmb_end_of_DRAM() / PAGE_SIZE; + node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE; for (i = 0 ; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; @@ -224,12 +224,12 @@ void __init do_init_bootmem(void) unsigned long bootmem_paddr; unsigned long bootmap_pages; - if (node_data[nid].node_size == 0) + if (node_data[nid].node_spanned_pages == 0) continue; start_paddr = node_data[nid].node_start_pfn * PAGE_SIZE; end_paddr = start_paddr + - (node_data[nid].node_size * PAGE_SIZE); + (node_data[nid].node_spanned_pages * PAGE_SIZE); dbg("node %d\n", nid); dbg("start_paddr = %lx\n", start_paddr); @@ -311,7 +311,7 @@ void __init paging_init(void) unsigned long start_pfn; unsigned long end_pfn; - if (node_data[nid].node_size == 0) + if (node_data[nid].node_spanned_pages == 0) continue; start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT; diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 3be6a8e4b679..cafd352ba636 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -64,7 +64,7 @@ void show_mem(void) printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; ++i) { + for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat->node_mem_map + i; total++; if (PageReserved(page)) diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 0150d11586a7..738ae097faeb 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -86,7 +86,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_size = end_pfn - start_pfn; + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; /* Find a place for the bootmem map */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h index 6edb9c64aa7b..36e3130c6696 100644 --- a/include/asm-alpha/mmzone.h +++ b/include/asm-alpha/mmzone.h @@ -31,7 +31,6 @@ extern pg_data_t node_data[]; #define pa_to_nid(pa) alpha_pa_to_nid(pa) #define NODE_DATA(nid) (&node_data[(nid)]) -#define node_size(nid) (NODE_DATA(nid)->node_size) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) @@ -124,7 +123,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) #define pfn_to_nid(pfn) pa_to_nid(((u64)pfn << PAGE_SHIFT)) #define pfn_valid(pfn) \ (((pfn) - node_start_pfn(pfn_to_nid(pfn))) < \ - node_size(pfn_to_nid(pfn))) \ + node_spanned_pages(pfn_to_nid(pfn))) \ #define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT)) diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index b6138f07c309..d5da17912a8c 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -32,8 +32,7 @@ extern struct pglist_data *node_data[]; #define alloc_bootmem_low_pages_node(ignore, x) \ __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) -#define node_size(nid) (node_data[nid]->node_size) -#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) +#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) /* * Following are macros that each numa implmentation must define. @@ -54,7 +53,7 @@ extern struct pglist_data *node_data[]; #define node_end_pfn(nid) \ ({ \ pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_size; \ + __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ }) #define local_mapnr(kvaddr) \ diff --git a/include/asm-mips64/mmzone.h b/include/asm-mips64/mmzone.h index 25a42752b182..cba337b07be1 100644 --- a/include/asm-mips64/mmzone.h +++ b/include/asm-mips64/mmzone.h @@ -24,7 +24,7 @@ extern plat_pg_data_t *plat_node_data[]; #define PHYSADDR_TO_NID(pa) NASID_TO_COMPACT_NODEID(NASID_GET(pa)) #define PLAT_NODE_DATA(n) (plat_node_data[n]) -#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_size) +#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_spanned_pages) #define PLAT_NODE_DATA_LOCALNR(p, n) \ (((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn) diff --git a/include/asm-ppc64/mmzone.h b/include/asm-ppc64/mmzone.h index 8503e25b17b3..2e5136012845 100644 --- a/include/asm-ppc64/mmzone.h +++ b/include/asm-ppc64/mmzone.h @@ -54,7 +54,6 @@ static inline int pa_to_nid(unsigned long pa) */ #define NODE_DATA(nid) (&node_data[nid]) -#define node_size(nid) (NODE_DATA(nid)->node_size) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) /* diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index c1a69000c8d7..398c530270c2 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -40,8 +40,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ - NODE_DATA(nid)->node_size) -#define node_size(nid) (NODE_DATA(nid)->node_size) + NODE_DATA(nid)->node_spanned_pages) #define local_mapnr(kvaddr) \ ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 21e95664fdf8..e768f7ab8963 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -184,12 +184,17 @@ typedef struct pglist_data { unsigned long *valid_addr_bitmap; struct bootmem_data *bdata; unsigned long node_start_pfn; - unsigned long node_size; + unsigned long node_present_pages; /* total number of physical pages */ + unsigned long node_spanned_pages; /* total size of physical page + range, including holes */ int node_id; struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; } pg_data_t; +#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) +#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) + extern int numnodes; extern struct pglist_data *pgdat_list; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 741866b59d7d..8b2a02c0350a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -903,7 +903,7 @@ void si_meminfo_node(struct sysinfo *val, int nid) { pg_data_t *pgdat = NODE_DATA(nid); - val->totalram = pgdat->node_size; + val->totalram = pgdat->node_present_pages; val->freeram = nr_free_pages_pgdat(pgdat); val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; @@ -1138,12 +1138,13 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, for (i = 0; i < MAX_NR_ZONES; i++) totalpages += zones_size[i]; - pgdat->node_size = totalpages; + pgdat->node_spanned_pages = totalpages; realtotalpages = totalpages; if (zholes_size) for (i = 0; i < MAX_NR_ZONES; i++) realtotalpages -= zholes_size[i]; + pgdat->node_present_pages = realtotalpages; printk("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); } @@ -1349,7 +1350,7 @@ void __init free_area_init_node(int nid, struct pglist_data *pgdat, pgdat->node_start_pfn = node_start_pfn; calculate_zone_totalpages(pgdat, zones_size, zholes_size); if (!node_mem_map) { - size = (pgdat->node_size + 1) * sizeof(struct page); + size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); node_mem_map = alloc_bootmem_node(pgdat, size); } pgdat->node_mem_map = node_mem_map; -- cgit v1.2.3 From cee396e281fc2c8a55261eea1a89a594e98f3e0f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:49:26 -0700 Subject: [PATCH] cleanup and generalise lowmem_page_address From: William Lee Irwin III This patch allows architectures to micro-optimize lowmem_page_address() at their whims. Roman Zippel originally wrote and/or suggested this back when dependencies on page->virtual existing were being shaken out. That's long-settled, so it's fine to do this now. --- include/linux/mm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4d183974fd36..d75f64725853 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -339,9 +339,14 @@ static inline void set_page_zone(struct page *page, unsigned long zone_num) page->flags |= zone_num << ZONE_SHIFT; } -static inline void * lowmem_page_address(struct page *page) +#ifndef CONFIG_DISCONTIGMEM +/* The array of struct pages - for discontigmem use pgdat->lmem_map */ +extern struct page *mem_map; +#endif + +static inline void *lowmem_page_address(struct page *page) { - return __va( ( (page - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn) << PAGE_SHIFT); + return __va(page_to_pfn(page) << PAGE_SHIFT); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) @@ -395,11 +400,6 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_MINOR 1 #define VM_FAULT_MAJOR 2 -#ifndef CONFIG_DISCONTIGMEM -/* The array of struct pages - for discontigmem use pgdat->lmem_map */ -extern struct page *mem_map; -#endif - extern void show_free_areas(void); struct page *shmem_nopage(struct vm_area_struct * vma, -- cgit v1.2.3 From bc75ac4f1dcec256a65b531e2d5be84f5b0fe6bc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 Jul 2003 08:49:35 -0700 Subject: [PATCH] Security hook for vm_enough_memory From: Stephen Smalley This patch against 2.5.73 replaces vm_enough_memory with a security hook per Alan Cox's suggestion so that security modules can completely replace the logic if desired. Note that the patch changes the interface to follow the convention of the other security hooks, i.e. return 0 if ok or -errno on failure (-ENOMEM in this case) rather than returning a boolean. It also exports various variables and functions required for the vm_enough_memory logic. --- arch/ia64/ia32/binfmt_elf32.c | 3 +- arch/ia64/kernel/sys_ia64.c | 1 - arch/mips/kernel/sysirix.c | 5 ++- arch/s390/kernel/compat_exec.c | 3 +- arch/x86_64/ia32/ia32_binfmt.c | 4 ++- fs/exec.c | 2 +- include/linux/mman.h | 3 +- include/linux/security.h | 16 ++++++++++ include/linux/slab.h | 2 ++ kernel/fork.c | 2 +- mm/mmap.c | 71 +++++------------------------------------- mm/mprotect.c | 2 +- mm/mremap.c | 3 +- mm/page_alloc.c | 5 +++ mm/shmem.c | 9 +++--- mm/slab.c | 2 ++ mm/swap.c | 2 ++ mm/swapfile.c | 6 +++- security/capability.c | 65 ++++++++++++++++++++++++++++++++++++++ security/dummy.c | 52 +++++++++++++++++++++++++++++++ 20 files changed, 178 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index dcc4982c2c66..8b2a41592746 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -177,7 +178,7 @@ ia32_setup_arg_pages (struct linux_binprm *bprm) if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 6de52294bd80..6d94e8b1a0ad 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -100,7 +100,6 @@ ia64_shmat (int shmid, void *shmaddr, int shmflg) asmlinkage unsigned long ia64_brk (unsigned long brk) { - extern int vm_enough_memory (long pages); unsigned long rlim, retval, newbrk, oldbrk; struct mm_struct *mm = current->mm; diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c index 5722c28c1e9d..fdcc9d5bd057 100644 --- a/arch/mips/kernel/sysirix.c +++ b/arch/mips/kernel/sysirix.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -527,8 +528,6 @@ asmlinkage int irix_gtime(struct pt_regs *regs) return get_seconds(); } -int vm_enough_memory(long pages); - /* * IRIX is completely broken... it returns 0 on success, otherwise * ENOMEM. @@ -585,7 +584,7 @@ asmlinkage int irix_brk(unsigned long brk) /* * Check if we have enough memory.. */ - if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { + if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { ret = -ENOMEM; goto out; } diff --git a/arch/s390/kernel/compat_exec.c b/arch/s390/kernel/compat_exec.c index 74245a64e514..33832846833f 100644 --- a/arch/s390/kernel/compat_exec.c +++ b/arch/s390/kernel/compat_exec.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,7 @@ int setup_arg_pages32(struct linux_binprm *bprm) if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 496dfa9da747..e4b86e6cbf9b 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include #include @@ -339,7 +341,7 @@ int setup_arg_pages(struct linux_binprm *bprm) if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff --git a/fs/exec.c b/fs/exec.c index 204d0a3a1565..f91b25952248 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -392,7 +392,7 @@ int setup_arg_pages(struct linux_binprm *bprm) if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff --git a/include/linux/mman.h b/include/linux/mman.h index 474d1c046436..a8956f6588ad 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -9,7 +9,8 @@ #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 -extern int vm_enough_memory(long pages); +extern int sysctl_overcommit_memory; +extern int sysctl_overcommit_ratio; extern atomic_t vm_committed_space; #ifdef CONFIG_SMP diff --git a/include/linux/security.h b/include/linux/security.h index 9589f99c3ef3..4d91dfc52c52 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -49,6 +49,7 @@ extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); extern void cap_task_reparent_to_init (struct task_struct *p); extern int cap_syslog (int type); +extern int cap_vm_enough_memory (long pages); static inline int cap_netlink_send (struct sk_buff *skb) { @@ -958,6 +959,10 @@ struct swap_info_struct; * See the syslog(2) manual page for an explanation of the @type values. * @type contains the type of action. * Return 0 if permission is granted. + * @vm_enough_memory: + * Check permissions for allocating a new virtual mapping. + * @pages contains the number of pages. + * Return 0 if permission is granted. * * @register_security: * allow module stacking. @@ -989,6 +994,7 @@ struct security_operations { int (*quotactl) (int cmds, int type, int id, struct super_block * sb); int (*quota_on) (struct file * f); int (*syslog) (int type); + int (*vm_enough_memory) (long pages); int (*bprm_alloc_security) (struct linux_binprm * bprm); void (*bprm_free_security) (struct linux_binprm * bprm); @@ -1238,6 +1244,11 @@ static inline int security_syslog(int type) return security_ops->syslog(type); } +static inline int security_vm_enough_memory(long pages) +{ + return security_ops->vm_enough_memory(pages); +} + static inline int security_bprm_alloc (struct linux_binprm *bprm) { return security_ops->bprm_alloc_security (bprm); @@ -1898,6 +1909,11 @@ static inline int security_syslog(int type) return cap_syslog(type); } +static inline int security_vm_enough_memory(long pages) +{ + return cap_vm_enough_memory(pages); +} + static inline int security_bprm_alloc (struct linux_binprm *bprm) { return 0; diff --git a/include/linux/slab.h b/include/linux/slab.h index 9f8bccba4ad3..d797c981f37e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -116,6 +116,8 @@ extern kmem_cache_t *bio_cachep; void ptrinfo(unsigned long addr); +extern atomic_t slab_reclaim_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 2abbc9c2da23..c17e05614c88 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -286,7 +286,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) continue; if (mpnt->vm_flags & VM_ACCOUNT) { unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - if (!vm_enough_memory(len)) + if (security_vm_enough_memory(len)) goto fail_nomem; charge += len; } diff --git a/mm/mmap.c b/mm/mmap.c index c83cf2a8b126..1052f84a82a2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -53,65 +54,9 @@ int sysctl_overcommit_memory = 0; /* default is heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ atomic_t vm_committed_space = ATOMIC_INIT(0); -/* - * Check that a process has enough memory to allocate a new virtual - * mapping. 1 means there is enough memory for the allocation to - * succeed and 0 implies there is not. - * - * We currently support three overcommit policies, which are set via the - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting - * - * Strict overcommit modes added 2002 Feb 26 by Alan Cox. - * Additional code 2002 Jul 20 by Robert Love. - */ -extern atomic_t slab_reclaim_pages; -int vm_enough_memory(long pages) -{ - unsigned long free, allowed; - - vm_acct_memory(pages); - - /* - * Sometimes we want to use more memory than we have - */ - if (sysctl_overcommit_memory == 1) - return 1; - - if (sysctl_overcommit_memory == 0) { - free = get_page_cache_size(); - free += nr_free_pages(); - free += nr_swap_pages; - - /* - * Any slabs which are created with the - * SLAB_RECLAIM_ACCOUNT flag claim to have contents - * which are reclaimable, under pressure. The dentry - * cache and most inode caches should fall into this - */ - free += atomic_read(&slab_reclaim_pages); - - /* - * Leave the last 3% for root - */ - if (!capable(CAP_SYS_ADMIN)) - free -= free / 32; - - if (free > pages) - return 1; - vm_unacct_memory(pages); - return 0; - } - - allowed = totalram_pages * sysctl_overcommit_ratio / 100; - allowed += total_swap_pages; - - if (atomic_read(&vm_committed_space) < allowed) - return 1; - - vm_unacct_memory(pages); - - return 0; -} +EXPORT_SYMBOL(sysctl_overcommit_memory); +EXPORT_SYMBOL(sysctl_overcommit_ratio); +EXPORT_SYMBOL(vm_committed_space); /* * Requires inode->i_mapping->i_shared_sem @@ -646,7 +591,7 @@ munmap_back: * Private writable mapping: check memory availability */ charged = len >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; vm_flags |= VM_ACCOUNT; } @@ -950,7 +895,7 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address) grow = (address - vma->vm_end) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1004,7 +949,7 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address) grow = (vma->vm_start - address) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1376,7 +1321,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; - if (!vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; diff --git a/mm/mprotect.c b/mm/mprotect.c index 978a9509c350..2c015794e3c1 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -175,7 +175,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, if (newflags & VM_WRITE) { if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { charged = (end - start) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; newflags |= VM_ACCOUNT; } diff --git a/mm/mremap.c b/mm/mremap.c index 3bab43a88125..088af945ac5e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -385,7 +386,7 @@ unsigned long do_mremap(unsigned long addr, if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) goto out_nc; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8b2a02c0350a..16077203e5a4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -43,6 +43,9 @@ int nr_swap_pages; int numnodes = 1; int sysctl_lower_zone_protection = 0; +EXPORT_SYMBOL(totalram_pages); +EXPORT_SYMBOL(nr_swap_pages); + /* * Used by page_zone() to look up the address of the struct zone whose * id is encoded in the upper bits of page->flags @@ -733,6 +736,7 @@ unsigned int nr_free_pages(void) return sum; } +EXPORT_SYMBOL(nr_free_pages); unsigned int nr_used_zone_pages(void) { @@ -825,6 +829,7 @@ DEFINE_PER_CPU(struct page_state, page_states) = {0}; EXPORT_PER_CPU_SYMBOL(page_states); atomic_t nr_pagecache = ATOMIC_INIT(0); +EXPORT_SYMBOL(nr_pagecache); #ifdef CONFIG_SMP DEFINE_PER_CPU(long, nr_pagecache_local) = 0; #endif diff --git a/mm/shmem.c b/mm/shmem.c index 73301cee3f41..1f4ed8fece45 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -507,7 +508,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) */ change = VM_ACCT(attr->ia_size) - VM_ACCT(inode->i_size); if (change > 0) { - if (!vm_enough_memory(change)) + if (security_vm_enough_memory(change)) return -ENOMEM; } else if (attr->ia_size < inode->i_size) { vm_unacct_memory(-change); @@ -1139,7 +1140,7 @@ shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t maxpos = inode->i_size; if (maxpos < pos + count) { maxpos = pos + count; - if (!vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { + if (security_vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { err = -ENOMEM; goto out; } @@ -1493,7 +1494,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s memcpy(info, symname, len); inode->i_op = &shmem_symlink_inline_operations; } else { - if (!vm_enough_memory(VM_ACCT(1))) { + if (security_vm_enough_memory(VM_ACCT(1))) { iput(inode); return -ENOMEM; } @@ -1887,7 +1888,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) if (size > SHMEM_MAX_BYTES) return ERR_PTR(-EINVAL); - if ((flags & VM_ACCOUNT) && !vm_enough_memory(VM_ACCT(size))) + if ((flags & VM_ACCOUNT) && security_vm_enough_memory(VM_ACCT(size))) return ERR_PTR(-ENOMEM); error = -ENOMEM; diff --git a/mm/slab.c b/mm/slab.c index afb8d8415999..e05fcba90af2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -462,6 +463,7 @@ struct list_head cache_chain; * SLAB_RECLAIM_ACCOUNT turns this on per-slab */ atomic_t slab_reclaim_pages; +EXPORT_SYMBOL(slab_reclaim_pages); /* * chicken and egg problem: delay the per-cpu array allocation diff --git a/mm/swap.c b/mm/swap.c index 5818b0a5a72d..37302961e371 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include /* for try_to_release_page() */ #include @@ -370,6 +371,7 @@ void vm_acct_memory(long pages) } preempt_enable(); } +EXPORT_SYMBOL(vm_acct_memory); #endif diff --git a/mm/swapfile.c b/mm/swapfile.c index bdfd09be8d4c..bc31505b689f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -30,6 +32,8 @@ unsigned int nr_swapfiles; int total_swap_pages; static int swap_overflow; +EXPORT_SYMBOL(total_swap_pages); + static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -1042,7 +1046,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile) swap_list_unlock(); goto out_dput; } - if (vm_enough_memory(p->pages)) + if (!security_vm_enough_memory(p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; diff --git a/security/capability.c b/security/capability.c index e01bc5271c36..cff54dd440fc 100644 --- a/security/capability.c +++ b/security/capability.c @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -275,6 +278,65 @@ int cap_syslog (int type) return 0; } +/* + * Check that a process has enough memory to allocate a new virtual + * mapping. 0 means there is enough memory for the allocation to + * succeed and -ENOMEM implies there is not. + * + * We currently support three overcommit policies, which are set via the + * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting + * + * Strict overcommit modes added 2002 Feb 26 by Alan Cox. + * Additional code 2002 Jul 20 by Robert Love. + */ +int cap_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (!capable(CAP_SYS_ADMIN)) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + EXPORT_SYMBOL(cap_capable); EXPORT_SYMBOL(cap_ptrace); EXPORT_SYMBOL(cap_capget); @@ -286,6 +348,7 @@ EXPORT_SYMBOL(cap_bprm_secureexec); EXPORT_SYMBOL(cap_task_post_setuid); EXPORT_SYMBOL(cap_task_reparent_to_init); EXPORT_SYMBOL(cap_syslog); +EXPORT_SYMBOL(cap_vm_enough_memory); #ifdef CONFIG_SECURITY @@ -307,6 +370,8 @@ static struct security_operations capability_ops = { .task_reparent_to_init = cap_task_reparent_to_init, .syslog = cap_syslog, + + .vm_enough_memory = cap_vm_enough_memory, }; #if defined(CONFIG_SECURITY_CAPABILITIES_MODULE) diff --git a/security/dummy.c b/security/dummy.c index a4307e78a168..76c6560a76c2 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -97,6 +100,54 @@ static int dummy_syslog (int type) return 0; } +static int dummy_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (current->euid) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + static int dummy_bprm_alloc_security (struct linux_binprm *bprm) { return 0; @@ -793,6 +844,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, quota_on); set_to_dummy_if_null(ops, sysctl); set_to_dummy_if_null(ops, syslog); + set_to_dummy_if_null(ops, vm_enough_memory); set_to_dummy_if_null(ops, bprm_alloc_security); set_to_dummy_if_null(ops, bprm_free_security); set_to_dummy_if_null(ops, bprm_compute_creds); -- cgit v1.2.3 From 26e48e571aba7b6cba0cebb41d832949137b5fd5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 Jul 2003 10:32:49 -0700 Subject: [PATCH] Remove cpu arg from cpu_raise_irq The function cpu_raise_softirq() takes a softirq number, and a cpu number, but cannot be used with cpu != smp_processor_id(), because there's no locking around the pending softirq lists. Since noone does this, remove that arg. As per Linus' suggestion, names changed: raise_softirq(int nr) cpu_raise_softirq(int cpu, int nr) -> raise_softirq_irqoff(int nr) __cpu_raise_softirq(int cpu, int nr) -> __raise_softirq_irqoff(int nr) --- drivers/scsi/scsi.c | 2 +- include/linux/interrupt.h | 4 ++-- include/linux/netdevice.h | 8 ++++---- kernel/ksyms.c | 2 +- kernel/softirq.c | 14 +++++++------- net/core/dev.c | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index af35f91fe3b2..633c9a028e29 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -582,7 +582,7 @@ void scsi_done(struct scsi_cmnd *cmd) local_irq_save(flags); cpu = smp_processor_id(); list_add_tail(&cmd->eh_entry, &done_q[cpu]); - cpu_raise_softirq(cpu, SCSI_SOFTIRQ); + raise_softirq_irqoff(SCSI_SOFTIRQ); local_irq_restore(flags); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index da2eaeb18118..21e48723b386 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -94,8 +94,8 @@ struct softirq_action asmlinkage void do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); -#define __cpu_raise_softirq(cpu, nr) do { softirq_pending(cpu) |= 1UL << (nr); } while (0) -extern void FASTCALL(cpu_raise_softirq(unsigned int cpu, unsigned int nr)); +#define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL << (nr); } while (0) +extern void FASTCALL(raise_softirq_irqoff(unsigned int nr)); extern void FASTCALL(raise_softirq(unsigned int nr)); #ifndef invoke_softirq diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d79375c33273..3aef822b4493 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -561,7 +561,7 @@ static inline void __netif_schedule(struct net_device *dev) cpu = smp_processor_id(); dev->next_sched = softnet_data[cpu].output_queue; softnet_data[cpu].output_queue = dev; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } @@ -612,7 +612,7 @@ static inline void dev_kfree_skb_irq(struct sk_buff *skb) cpu = smp_processor_id(); skb->next = softnet_data[cpu].completion_queue; softnet_data[cpu].completion_queue = skb; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } @@ -779,7 +779,7 @@ static inline void __netif_rx_schedule(struct net_device *dev) dev->quota += dev->weight; else dev->quota = dev->weight; - __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); } @@ -805,7 +805,7 @@ static inline int netif_rx_reschedule(struct net_device *dev, int undo) local_irq_save(flags); cpu = smp_processor_id(); list_add_tail(&dev->poll_list, &softnet_data[cpu].poll_list); - __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); return 1; } diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 816627adc50a..66ea4b6b4d84 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -587,7 +587,7 @@ EXPORT_SYMBOL(tasklet_kill); EXPORT_SYMBOL(do_softirq); EXPORT_SYMBOL(raise_softirq); EXPORT_SYMBOL(open_softirq); -EXPORT_SYMBOL(cpu_raise_softirq); +EXPORT_SYMBOL(raise_softirq_irqoff); EXPORT_SYMBOL(__tasklet_schedule); EXPORT_SYMBOL(__tasklet_hi_schedule); diff --git a/kernel/softirq.c b/kernel/softirq.c index 20bf233a14c3..e581740a6e26 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -117,9 +117,9 @@ EXPORT_SYMBOL(local_bh_enable); /* * This function must run with irqs disabled! */ -inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr) +inline void raise_softirq_irqoff(unsigned int nr) { - __cpu_raise_softirq(cpu, nr); + __raise_softirq_irqoff(nr); /* * If we're in an interrupt or softirq, we're done @@ -139,7 +139,7 @@ void raise_softirq(unsigned int nr) unsigned long flags; local_irq_save(flags); - cpu_raise_softirq(smp_processor_id(), nr); + raise_softirq_irqoff(nr); local_irq_restore(flags); } @@ -168,7 +168,7 @@ void __tasklet_schedule(struct tasklet_struct *t) local_irq_save(flags); t->next = __get_cpu_var(tasklet_vec).list; __get_cpu_var(tasklet_vec).list = t; - cpu_raise_softirq(smp_processor_id(), TASKLET_SOFTIRQ); + raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_restore(flags); } @@ -179,7 +179,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) local_irq_save(flags); t->next = __get_cpu_var(tasklet_hi_vec).list; __get_cpu_var(tasklet_hi_vec).list = t; - cpu_raise_softirq(smp_processor_id(), HI_SOFTIRQ); + raise_softirq_irqoff(HI_SOFTIRQ); local_irq_restore(flags); } @@ -211,7 +211,7 @@ static void tasklet_action(struct softirq_action *a) local_irq_disable(); t->next = __get_cpu_var(tasklet_vec).list; __get_cpu_var(tasklet_vec).list = t; - __cpu_raise_softirq(smp_processor_id(), TASKLET_SOFTIRQ); + __raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_enable(); } } @@ -244,7 +244,7 @@ static void tasklet_hi_action(struct softirq_action *a) local_irq_disable(); t->next = __get_cpu_var(tasklet_hi_vec).list; __get_cpu_var(tasklet_hi_vec).list = t; - __cpu_raise_softirq(smp_processor_id(), HI_SOFTIRQ); + __raise_softirq_irqoff(HI_SOFTIRQ); local_irq_enable(); } } diff --git a/net/core/dev.c b/net/core/dev.c index 5102b235b57c..0605391589ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1712,7 +1712,7 @@ out: softnet_break: netdev_rx_stat[this_cpu].time_squeeze++; - __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } -- cgit v1.2.3 From 7b957b78f962cf3e844b7ddf8d740cb21dd276b9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 Jul 2003 10:38:21 -0700 Subject: [PATCH] Remove unused __syscall_count Noone seems to use __syscall_count. Remove the field from i386 irq_cpustat_t struct, and the generic accessor macros. Because some archs have hardcoded asm references to offsets in this structure, I haven't touched non-x86, but doing so is usually trivial. --- include/asm-i386/hardirq.h | 1 - include/linux/irq_cpustat.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/asm-i386/hardirq.h b/include/asm-i386/hardirq.h index e8b9149f0b29..a711a1890d97 100644 --- a/include/asm-i386/hardirq.h +++ b/include/asm-i386/hardirq.h @@ -7,7 +7,6 @@ typedef struct { unsigned int __softirq_pending; - unsigned int __syscall_count; struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h index 3f49c2ba63ed..641e7964a0d7 100644 --- a/include/linux/irq_cpustat.h +++ b/include/linux/irq_cpustat.h @@ -29,8 +29,6 @@ extern irq_cpustat_t irq_stat[]; /* defined in asm/hardirq.h */ /* arch independent irq_stat fields */ #define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending) #define local_softirq_pending() softirq_pending(smp_processor_id()) -#define syscall_count(cpu) __IRQ_STAT((cpu), __syscall_count) -#define local_syscall_count() syscall_count(smp_processor_id()) #define ksoftirqd_task(cpu) __IRQ_STAT((cpu), __ksoftirqd_task) #define local_ksoftirqd_task() ksoftirqd_task(smp_processor_id()) -- cgit v1.2.3 From 3ac57d3424bca0406b5349f5187f5e3d84f64013 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 Jul 2003 10:38:29 -0700 Subject: [PATCH] Make ksoftirqd a normal per-cpu variable. This moves the ksoftirqd pointers out of the irq_stat struct, and uses a normal per-cpu variable. It's not that time critical, nor referenced in assembler. This moves us closer to making irq_stat a per-cpu variable. Because some archs have hardcoded asm references to offsets in this structure, I haven't touched non-x86. The __ksoftirqd_task field is unused in other archs, too. --- include/asm-i386/hardirq.h | 1 - include/linux/irq_cpustat.h | 2 -- kernel/softirq.c | 16 ++++++++++------ 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/asm-i386/hardirq.h b/include/asm-i386/hardirq.h index a711a1890d97..5a14545af179 100644 --- a/include/asm-i386/hardirq.h +++ b/include/asm-i386/hardirq.h @@ -7,7 +7,6 @@ typedef struct { unsigned int __softirq_pending; - struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ unsigned int apic_timer_irqs; /* arch dependent */ diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h index 641e7964a0d7..03b3e17de805 100644 --- a/include/linux/irq_cpustat.h +++ b/include/linux/irq_cpustat.h @@ -29,8 +29,6 @@ extern irq_cpustat_t irq_stat[]; /* defined in asm/hardirq.h */ /* arch independent irq_stat fields */ #define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending) #define local_softirq_pending() softirq_pending(smp_processor_id()) -#define ksoftirqd_task(cpu) __IRQ_STAT((cpu), __ksoftirqd_task) -#define local_ksoftirqd_task() ksoftirqd_task(smp_processor_id()) /* arch dependent irq_stat fields */ #define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386 */ diff --git a/kernel/softirq.c b/kernel/softirq.c index e581740a6e26..96294a3d673f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -14,6 +14,7 @@ #include #include #include +#include #include /* @@ -41,15 +42,18 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); + /* * we cannot loop indefinitely here to avoid userspace starvation, * but we also don't want to introduce a worst case 1/HZ latency * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -static inline void wakeup_softirqd(unsigned cpu) +static inline void wakeup_softirqd(void) { - struct task_struct * tsk = ksoftirqd_task(cpu); + /* Interrupts are disabled: no need to stop preemption */ + struct task_struct *tsk = __get_cpu_var(ksoftirqd); if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); @@ -96,7 +100,7 @@ restart: goto restart; } if (pending) - wakeup_softirqd(smp_processor_id()); + wakeup_softirqd(); __local_bh_enable(); } @@ -131,7 +135,7 @@ inline void raise_softirq_irqoff(unsigned int nr) * schedule the softirq soon. */ if (!in_interrupt()) - wakeup_softirqd(cpu); + wakeup_softirqd(); } void raise_softirq(unsigned int nr) @@ -325,7 +329,7 @@ static int ksoftirqd(void * __bind_cpu) __set_current_state(TASK_INTERRUPTIBLE); mb(); - local_ksoftirqd_task() = current; + __get_cpu_var(ksoftirqd) = current; for (;;) { if (!local_softirq_pending()) @@ -354,7 +358,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, return NOTIFY_BAD; } - while (!ksoftirqd_task(hotcpu)) + while (!per_cpu(ksoftirqd, hotcpu)) yield(); } return NOTIFY_OK; -- cgit v1.2.3 From 98823466c86c19e7c5e7d8ebe75527acf7f47f6a Mon Sep 17 00:00:00 2001 From: Adam Belay Date: Thu, 3 Jul 2003 15:39:09 +0000 Subject: [PNP] Handle Disabled Resources Properly Some devices will allow for individual resources to be disabled, even when the device as a whole is active. The current PnP resource manager is not handling this situation properly. This patch corrects the issue by detecting disabled resources and then flagging them. The pnp layer will now skip over any disabled resources. Interface updates have also been included so that we can properly display resource tables when a resource is disabled. Also note that a new flag "IORESOURCE_DISABLED" has been added to linux/ioports.h. --- drivers/pnp/interface.c | 22 ++++++++++++++++++---- drivers/pnp/manager.c | 12 ++++++++++++ drivers/pnp/resource.c | 8 ++++++++ drivers/pnp/support.c | 24 ++++++++++++++++++++---- include/linux/ioport.h | 1 + 5 files changed, 59 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index eb2094bf72d0..e2b7388f7ec2 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -259,7 +259,10 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, char *buf) for (i = 0; i < PNP_MAX_PORT; i++) { if (pnp_port_valid(dev, i)) { pnp_printf(buffer,"io"); - pnp_printf(buffer," 0x%lx-0x%lx \n", + if (pnp_port_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," 0x%lx-0x%lx\n", pnp_port_start(dev, i), pnp_port_end(dev, i)); } @@ -267,7 +270,10 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, char *buf) for (i = 0; i < PNP_MAX_MEM; i++) { if (pnp_mem_valid(dev, i)) { pnp_printf(buffer,"mem"); - pnp_printf(buffer," 0x%lx-0x%lx \n", + if (pnp_mem_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," 0x%lx-0x%lx\n", pnp_mem_start(dev, i), pnp_mem_end(dev, i)); } @@ -275,13 +281,21 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, char *buf) for (i = 0; i < PNP_MAX_IRQ; i++) { if (pnp_irq_valid(dev, i)) { pnp_printf(buffer,"irq"); - pnp_printf(buffer," %ld \n", pnp_irq(dev, i)); + if (pnp_irq_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," %ld\n", + pnp_irq(dev, i)); } } for (i = 0; i < PNP_MAX_DMA; i++) { if (pnp_dma_valid(dev, i)) { pnp_printf(buffer,"dma"); - pnp_printf(buffer," %ld \n", pnp_dma(dev, i)); + if (pnp_dma_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," %ld\n", + pnp_dma(dev, i)); } } ret = (buffer->curr - buf); diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index a56dfac58b36..cc2bd90ae990 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -40,6 +40,9 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx) if (!(dev->res.port_resource[idx].flags & IORESOURCE_AUTO)) return 1; + if (!rule->size) + return 1; /* skip disabled resource requests */ + start = &dev->res.port_resource[idx].start; end = &dev->res.port_resource[idx].end; flags = &dev->res.port_resource[idx].flags; @@ -76,6 +79,9 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx) if (!(dev->res.mem_resource[idx].flags & IORESOURCE_AUTO)) return 1; + if (!rule->size) + return 1; /* skip disabled resource requests */ + start = &dev->res.mem_resource[idx].start; end = &dev->res.mem_resource[idx].end; flags = &dev->res.mem_resource[idx].flags; @@ -128,6 +134,9 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx) if (!(dev->res.irq_resource[idx].flags & IORESOURCE_AUTO)) return 1; + if (!rule->map) + return 1; /* skip disabled resource requests */ + start = &dev->res.irq_resource[idx].start; end = &dev->res.irq_resource[idx].end; flags = &dev->res.irq_resource[idx].flags; @@ -168,6 +177,9 @@ static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) if (!(dev->res.dma_resource[idx].flags & IORESOURCE_AUTO)) return 1; + if (!rule->map) + return 1; /* skip disabled resource requests */ + start = &dev->res.dma_resource[idx].start; end = &dev->res.dma_resource[idx].end; flags = &dev->res.dma_resource[idx].flags; diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 978decf7504a..c7c664a3035f 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -286,6 +286,8 @@ int pnp_check_port(struct pnp_dev * dev, int idx) continue; for (tmp = 0; tmp < PNP_MAX_PORT; tmp++) { if (tdev->res.port_resource[tmp].flags & IORESOURCE_IO) { + if (pnp_port_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; tport = &tdev->res.port_resource[tmp].start; tend = &tdev->res.port_resource[tmp].end; if (ranged_conflict(port,end,tport,tend)) @@ -340,6 +342,8 @@ int pnp_check_mem(struct pnp_dev * dev, int idx) continue; for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) { if (tdev->res.mem_resource[tmp].flags & IORESOURCE_MEM) { + if (pnp_mem_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; taddr = &tdev->res.mem_resource[tmp].start; tend = &tdev->res.mem_resource[tmp].end; if (ranged_conflict(addr,end,taddr,tend)) @@ -409,6 +413,8 @@ int pnp_check_irq(struct pnp_dev * dev, int idx) continue; for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) { if (tdev->res.irq_resource[tmp].flags & IORESOURCE_IRQ) { + if (pnp_irq_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; if ((tdev->res.irq_resource[tmp].start == *irq)) return 0; } @@ -462,6 +468,8 @@ int pnp_check_dma(struct pnp_dev * dev, int idx) continue; for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) { if (tdev->res.dma_resource[tmp].flags & IORESOURCE_DMA) { + if (pnp_dma_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; if ((tdev->res.dma_resource[tmp].start == *dma)) return 0; } diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index af359e092ed0..375aa2172239 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -68,9 +68,13 @@ static void current_irqresource(struct pnp_resource_table * res, int irq) int i = 0; while ((res->irq_resource[i].flags & IORESOURCE_IRQ) && i < PNP_MAX_IRQ) i++; if (i < PNP_MAX_IRQ) { + res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag + if (irq == -1) { + res->irq_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->irq_resource[i].start = res->irq_resource[i].end = (unsigned long) irq; - res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag } } @@ -79,9 +83,13 @@ static void current_dmaresource(struct pnp_resource_table * res, int dma) int i = 0; while ((res->dma_resource[i].flags & IORESOURCE_DMA) && i < PNP_MAX_DMA) i++; if (i < PNP_MAX_DMA) { + res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag + if (dma == -1) { + res->dma_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->dma_resource[i].start = res->dma_resource[i].end = (unsigned long) dma; - res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag } } @@ -90,9 +98,13 @@ static void current_ioresource(struct pnp_resource_table * res, int io, int len) int i = 0; while ((res->port_resource[i].flags & IORESOURCE_IO) && i < PNP_MAX_PORT) i++; if (i < PNP_MAX_PORT) { + res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag + if (len <= 0 || (io + len -1) >= 0x10003) { + res->port_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->port_resource[i].start = (unsigned long) io; res->port_resource[i].end = (unsigned long)(io + len - 1); - res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag } } @@ -101,9 +113,13 @@ static void current_memresource(struct pnp_resource_table * res, int mem, int le int i = 0; while ((res->mem_resource[i].flags & IORESOURCE_MEM) && i < PNP_MAX_MEM) i++; if (i < PNP_MAX_MEM) { + res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag + if (len <= 0) { + res->mem_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->mem_resource[i].start = (unsigned long) mem; res->mem_resource[i].end = (unsigned long)(mem + len - 1); - res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag } } diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 9193a8df0122..26d6293ed4c9 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -43,6 +43,7 @@ struct resource_list { #define IORESOURCE_SHADOWABLE 0x00010000 #define IORESOURCE_BUS_HAS_VGA 0x00080000 +#define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ -- cgit v1.2.3