diff options
90 files changed, 911 insertions, 691 deletions
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index bbf7417506c1..d310bcc70d54 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -66,19 +66,9 @@ pgd_alloc(struct mm_struct *mm) pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte; - long timeout = 10; - - retry: - pte = (pte_t *) __get_free_page(GFP_KERNEL); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pte) clear_page(pte); - else if (--timeout >= 0) { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - goto retry; - } - return pte; } diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a4daeb2d3b2a..2acbd3d6b0b3 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -124,15 +124,28 @@ void do_settimeofday(struct timeval *tv) * made, and then undo it! */ tv->tv_usec -= timer->get_offset(); - tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); + tv->tv_usec -= (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ); while (tv->tv_usec < 0) { - tv->tv_usec += 1000000; + tv->tv_usec += USEC_PER_SEC; tv->tv_sec--; } + tv->tv_usec *= NSEC_PER_USEC; + + wall_to_monotonic.tv_sec += xtime.tv_sec - tv->tv_sec; + wall_to_monotonic.tv_nsec += xtime.tv_nsec - tv->tv_usec; + + if (wall_to_monotonic.tv_nsec > NSEC_PER_SEC) { + wall_to_monotonic.tv_nsec -= NSEC_PER_SEC; + wall_to_monotonic.tv_sec++; + } + if (wall_to_monotonic.tv_nsec < 0) { + wall_to_monotonic.tv_nsec += NSEC_PER_SEC; + wall_to_monotonic.tv_sec--; + } xtime.tv_sec = tv->tv_sec; - xtime.tv_nsec = (tv->tv_usec * 1000); + xtime.tv_nsec = tv->tv_usec; time_adjust = 0; /* stop active adjtime() */ time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; @@ -228,41 +241,6 @@ static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *reg } /* - * Lost tick detection and compensation - */ -static inline void detect_lost_tick(void) -{ - /* read time since last interrupt */ - unsigned long delta = timer->get_offset(); - static unsigned long dbg_print; - - /* check if delta is greater then two ticks */ - if(delta >= 2*(1000000/HZ)){ - - /* - * only print debug info first 5 times - */ - /* - * AKPM: disable this for now; it's nice, but irritating. - */ - if (0 && dbg_print < 5) { - printk(KERN_WARNING "\nWarning! Detected %lu " - "micro-second gap between interrupts.\n", - delta); - printk(KERN_WARNING " Compensating for %lu lost " - "ticks.\n", - delta/(1000000/HZ)-1); - dump_stack(); - dbg_print++; - } - /* calculate number of missed ticks */ - delta = delta/(1000000/HZ)-1; - jiffies += delta; - } - -} - -/* * This is the same as the above, except we _also_ save the current * Time Stamp Counter value at the time of the timer interrupt, so that * we later on can estimate the time of day more exactly. @@ -278,7 +256,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) */ write_seqlock(&xtime_lock); - detect_lost_tick(); timer->mark_offset(); do_timer_interrupt(irq, NULL, regs); @@ -322,7 +299,9 @@ void __init time_init(void) { xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = 0; + wall_to_monotonic.tv_sec = -xtime.tv_sec + INITIAL_JIFFIES / HZ; + xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + wall_to_monotonic.tv_nsec = 0; timer = select_timer(); diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c index cbe74fd46491..7cdf4d7bca6f 100644 --- a/arch/i386/kernel/timers/timer_cyclone.c +++ b/arch/i386/kernel/timers/timer_cyclone.c @@ -18,6 +18,7 @@ #include <asm/fixmap.h> extern spinlock_t i8253_lock; +extern unsigned long jiffies; extern unsigned long calibrate_tsc(void); /* Number of usecs that the last interrupt was delayed */ @@ -46,6 +47,8 @@ static rwlock_t monotonic_lock = RW_LOCK_UNLOCKED; static void mark_offset_cyclone(void) { + unsigned long lost, delay; + unsigned long delta = last_cyclone_low; int count; unsigned long long this_offset, last_offset; @@ -62,6 +65,15 @@ static void mark_offset_cyclone(void) count |= inb(0x40) << 8; spin_unlock(&i8253_lock); + /* lost tick compensation */ + delta = last_cyclone_low - delta; + delta /=(CYCLONE_TIMER_FREQ/1000000); + delta += delay_at_last_interrupt; + lost = delta/(1000000/HZ); + delay = delta%(1000000/HZ); + if(lost >= 2) + jiffies += lost-1; + /* update the monotonic base value */ this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK; @@ -70,6 +82,12 @@ static void mark_offset_cyclone(void) /* calculate delay_at_last_interrupt */ count = ((LATCH-1) - count) * TICK_SIZE; delay_at_last_interrupt = (count + LATCH/2) / LATCH; + + /* catch corner case where tick rollover + * occured between cyclone and pit reads + */ + if(abs(delay - delay_at_last_interrupt) > 900) + jiffies++; } static unsigned long get_offset_cyclone(void) diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c index cdcc95b74aaf..7d521228c28a 100644 --- a/arch/i386/kernel/timers/timer_pit.c +++ b/arch/i386/kernel/timers/timer_pit.c @@ -54,7 +54,7 @@ static void delay_pit(unsigned long loops) } -/* This function must be called with interrupts disabled +/* This function must be called with xtime_lock held. * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs * * However, the pc-audio speaker driver changes the divisor so that @@ -93,7 +93,7 @@ static unsigned long get_offset_pit(void) static unsigned long jiffies_p = 0; /* - * cache volatile jiffies temporarily; we have IRQs turned off. + * cache volatile jiffies temporarily; we have xtime_lock. */ unsigned long jiffies_t; @@ -119,8 +119,6 @@ static unsigned long get_offset_pit(void) count = LATCH - 1; } - spin_unlock_irqrestore(&i8253_lock, flags); - /* * avoiding timer inconsistencies (they are rare, but they happen)... * there are two kinds of problems that must be avoided here: @@ -130,7 +128,6 @@ static unsigned long get_offset_pit(void) * (see c't 95/10 page 335 for Neptun bug.) */ - if( jiffies_t == jiffies_p ) { if( count > count_p ) { /* the nutcase */ @@ -141,6 +138,8 @@ static unsigned long get_offset_pit(void) count_p = count; + spin_unlock_irqrestore(&i8253_lock, flags); + count = ((LATCH-1) - count) * TICK_SIZE; count = (count + LATCH/2) / LATCH; diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index fad90c000cc6..e7c126d30e42 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -21,6 +21,7 @@ int tsc_disable __initdata = 0; extern spinlock_t i8253_lock; +extern unsigned long jiffies; static int use_tsc; /* Number of usecs that the last interrupt was delayed */ @@ -117,6 +118,8 @@ static unsigned long long monotonic_clock_tsc(void) static void mark_offset_tsc(void) { + unsigned long lost,delay; + unsigned long delta = last_tsc_low; int count; int countmp; static int count1 = 0; @@ -161,6 +164,23 @@ static void mark_offset_tsc(void) } } + /* lost tick compensation */ + delta = last_tsc_low - delta; + { + register unsigned long eax, edx; + eax = delta; + __asm__("mull %2" + :"=a" (eax), "=d" (edx) + :"rm" (fast_gettimeoffset_quotient), + "0" (eax)); + delta = edx; + } + delta += delay_at_last_interrupt; + lost = delta/(1000000/HZ); + delay = delta%(1000000/HZ); + if(lost >= 2) + jiffies += lost-1; + /* update the monotonic base value */ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; monotonic_base += cycles_2_ns(this_offset - last_offset); @@ -169,6 +189,12 @@ static void mark_offset_tsc(void) /* calculate delay_at_last_interrupt */ count = ((LATCH-1) - count) * TICK_SIZE; delay_at_last_interrupt = (count + LATCH/2) / LATCH; + + /* catch corner case where tick rollover + * occured between tsc and pit reads + */ + if(abs(delay - delay_at_last_interrupt) > 900) + jiffies++; } static void delay_tsc(unsigned long loops) diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 6b37833292aa..c7259ef89bd2 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c @@ -129,37 +129,45 @@ nomem: int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, - unsigned long *st, int *length, int i) + unsigned long *position, int *length, int i) { - pte_t *ptep, pte; - unsigned long start = *st; - unsigned long pstart; - int len = *length; - struct page *page; + unsigned long vpfn, vaddr = *position; + int remainder = *length; + + WARN_ON(!is_vm_hugetlb_page(vma)); - do { - pstart = start; - ptep = huge_pte_offset(mm, start); - pte = *ptep; + vpfn = vaddr/PAGE_SIZE; + while (vaddr < vma->vm_end && remainder) { -back1: - page = pte_page(pte); if (pages) { - page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT); + pte_t *pte; + struct page *page; + + pte = huge_pte_offset(mm, vaddr); + + /* hugetlb should be locked, and hence, prefaulted */ + WARN_ON(!pte || pte_none(*pte)); + + page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; + + WARN_ON(!PageCompound(page)); + get_page(page); pages[i] = page; } + if (vmas) vmas[i] = vma; - i++; - len--; - start += PAGE_SIZE; - if (((start & HPAGE_MASK) == pstart) && len && - (start < vma->vm_end)) - goto back1; - } while (len && start < vma->vm_end); - *length = len; - *st = start; + + vaddr += PAGE_SIZE; + ++vpfn; + --remainder; + ++i; + } + + *length = remainder; + *position = vaddr; + return i; } @@ -474,9 +482,7 @@ int hugetlb_report_meminfo(char *buf) int is_hugepage_mem_enough(size_t size) { - if (size > (htlbpagemem << HPAGE_SHIFT)) - return 0; - return 1; + return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem; } /* diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 054eec2afc35..9d36261de7d0 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -131,39 +131,23 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - int count = 0; - pte_t *pte; - - do { - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pte) - clear_page(pte); - else { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + if (pte) + clear_page(pte); return pte; } struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - int count = 0; struct page *pte; - - do { + #if CONFIG_HIGHPTE - pte = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); #else - pte = alloc_pages(GFP_KERNEL, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); #endif - if (pte) - clear_highpage(pte); - else { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); + if (pte) + clear_highpage(pte); return pte; } diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 84d34a212ac2..570b03908dd5 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -3040,7 +3040,6 @@ struct nfsctl_arg32 { #define ca32_svc u.u32_svc #define ca32_client u.u32_client #define ca32_export u.u32_export -#define ca32_authd u.u32_authd #define ca32_debug u.u32_debug }; diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 63a85350d4ea..23a0afcf31a6 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -1131,7 +1131,7 @@ asmlinkage long sys32_msgrcv(int msqid, struct nfsctl_export32 { char ex_client[NFSCLNT_IDMAX+1]; char ex_path[NFS_MAXPATHLEN+1]; - __kernel_dev_t ex_dev; + __kernel_old_dev_t ex_dev; compat_ino_t ex_ino; int ex_flags; __kernel_uid_t ex_anon_uid; diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c index 9682525026f9..5d4aef7ab895 100644 --- a/arch/ppc/mm/pgtable.c +++ b/arch/ppc/mm/pgtable.c @@ -76,15 +76,11 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) extern void *early_get_page(void); int timeout = 0; - if (mem_init_done) { - while ((pte = (pte_t *) __get_free_page(GFP_KERNEL)) == NULL - && ++timeout < 10) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } - } else - pte = (pte_t *) early_get_page(); - if (pte != NULL) + if (mem_init_done) + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + else + pte = (pte_t *)early_get_page(); + if (pte) clear_page(pte); return pte; } @@ -92,20 +88,16 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - int timeout = 0; + #ifdef CONFIG_HIGHPTE - int flags = GFP_KERNEL | __GFP_HIGHMEM; + int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; #else - int flags = GFP_KERNEL; + int flags = GFP_KERNEL | __GFP_REPEAT; #endif - while ((pte = alloc_pages(flags, 0)) == NULL) { - if (++timeout >= 10) - return NULL; - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } - clear_highpage(pte); + pte = alloc_pages(flags, 0); + if (pte) + clear_highpage(pte); return pte; } diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index fba8c8254455..300a93c9c42e 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -910,7 +910,6 @@ struct nfsctl_arg32 { #define ca32_export u.u32_export #define ca32_getfd u.u32_getfd #define ca32_getfs u.u32_getfs -#define ca32_authd u.u32_authd }; union nfsctl_res32 { diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c index 9cda5ee63d98..13495f4c418f 100644 --- a/arch/sparc/mm/sun4c.c +++ b/arch/sparc/mm/sun4c.c @@ -1901,7 +1901,7 @@ static pte_t *sun4c_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add if ((pte = sun4c_pte_alloc_one_fast(mm, address)) != NULL) return pte; - pte = (pte_t *)__get_free_page(GFP_KERNEL); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pte) memset(pte, 0, PAGE_SIZE); return pte; @@ -2194,7 +2194,7 @@ void __init ld_mmu_sun4c(void) BTFIXUPSET_CALL(pte_alloc_one_kernel, sun4c_pte_alloc_one_kernel, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(pte_alloc_one, sun4c_pte_alloc_one, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(free_pmd_fast, sun4c_free_pmd_fast, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(pmd_alloc_one, sun4c_pmd_alloc_one, BTFIXUPCALL_RETO0); + BTFIXUPSET_CALL(pmd_alloc_one, sun4c_lpmd_alloc_one, BTFIXUPCALL_RETO0); BTFIXUPSET_CALL(free_pgd_fast, sun4c_free_pgd_fast, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(get_pgd_fast, sun4c_get_pgd_fast, BTFIXUPCALL_NORM); diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index b1194401deba..ff9c8ec19b16 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -2133,7 +2133,6 @@ struct nfsctl_arg32 { #define ca32_export u.u32_export #define ca32_getfd u.u32_getfd #define ca32_getfs u.u32_getfs -#define ca32_authd u.u32_authd }; union nfsctl_res32 { diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 2e7199293856..d0c24d48071e 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -810,35 +810,21 @@ void pgd_free(pgd_t *pgd) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - int count = 0; pte_t *pte; - do { - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pte) - clear_page(pte); - else { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + if (pte) + clear_page(pte); return pte; } struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - int count = 0; struct page *pte; - do { - pte = alloc_pages(GFP_KERNEL, 0); - if (pte) - clear_highpage(pte); - else { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + if (pte) + clear_highpage(pte); return pte; } diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index 292936f958f0..6712c9475d0b 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -1708,7 +1708,6 @@ struct nfsctl_arg32 { #define ca32_export u.u32_export #define ca32_getfd u.u32_getfd #define ca32_getfs u.u32_getfs -#define ca32_authd u.u32_authd }; union nfsctl_res32 { diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 9362b6cb01eb..8ffb84e6db80 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -1069,6 +1069,7 @@ static boolean DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T if (pci_set_dma_mask(Controller->PCIDevice, DAC690_V1_PciDmaMask)) return DAC960_Failure(Controller, "DMA mask out of range"); + Controller->BounceBufferLimit = DAC690_V1_PciDmaMask; if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) { CommandMailboxesSize = 0; @@ -1271,6 +1272,7 @@ static boolean DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T if (pci_set_dma_mask(Controller->PCIDevice, DAC690_V2_PciDmaMask)) return DAC960_Failure(Controller, "DMA mask out of range"); + Controller->BounceBufferLimit = DAC690_V2_PciDmaMask; /* This is a temporary dma mapping, used only in the scope of this function */ CommandMailbox = @@ -2386,6 +2388,7 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) */ RequestQueue = &Controller->RequestQueue; blk_init_queue(RequestQueue, DAC960_RequestFunction, &Controller->queue_lock); + blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit); RequestQueue->queuedata = Controller; blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit); diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h index f38145a54a67..01b543211870 100644 --- a/drivers/block/DAC960.h +++ b/drivers/block/DAC960.h @@ -62,11 +62,6 @@ /* Define the pci dma mask supported by DAC960 V1 and V2 Firmware Controlers - - For now set the V2 mask to only 32 bits. The controller IS capable - of doing 64 bit dma. But I have yet to find out whether this needs to - be explicitely enabled in the controller, or of the controller adapts - automatically. */ #define DAC690_V1_PciDmaMask 0xffffffff @@ -2370,6 +2365,7 @@ typedef struct DAC960_Controller unsigned short ControllerScatterGatherLimit; unsigned short DriverScatterGatherLimit; unsigned int ControllerUsageCount; + u64 BounceBufferLimit; unsigned int CombinedStatusBufferLength; unsigned int InitialStatusLength; unsigned int CurrentStatusLength; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 8987b67272cd..f566e20e2094 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -599,9 +599,12 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, luninfo.num_opens = drv->usage_count; luninfo.num_parts = 0; /* count partitions 1 to 15 with sizes > 0 */ - for(i=1; i <MAX_PART; i++) - if (disk->part[i].nr_sects != 0) - luninfo.num_parts++; + for(i=1; i <MAX_PART; i++) { + if (!disk->part[i]) + continue; + if (disk->part[i]->nr_sects != 0) + luninfo.num_parts++; + } if (copy_to_user((void *) arg, &luninfo, sizeof(LogvolInfo_struct))) return -EFAULT; diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index 032739646da9..b2eeafc81195 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -365,11 +365,13 @@ static int show_partition(struct seq_file *part, void *v) (unsigned long long)get_capacity(sgp) >> 1, disk_name(sgp, 0, buf)); for (n = 0; n < sgp->minors - 1; n++) { - if (sgp->part[n].nr_sects == 0) + if (!sgp->part[n]) + continue; + if (sgp->part[n]->nr_sects == 0) continue; seq_printf(part, "%4d %4d %10llu %s\n", sgp->major, n + 1 + sgp->first_minor, - (unsigned long long)sgp->part[n].nr_sects >> 1 , + (unsigned long long)sgp->part[n]->nr_sects >> 1 , disk_name(sgp, n + 1, buf)); } @@ -542,6 +544,92 @@ static struct kset_hotplug_ops block_hotplug_ops = { static decl_subsys(block, &ktype_block, &block_hotplug_ops); +/* + * aggregate disk stat collector. Uses the same stats that the sysfs + * entries do, above, but makes them available through one seq_file. + * Watching a few disks may be efficient through sysfs, but watching + * all of them will be more efficient through this interface. + * + * The output looks suspiciously like /proc/partitions with a bunch of + * extra fields. + */ + +/* iterator */ +static void *diskstats_start(struct seq_file *part, loff_t *pos) +{ + loff_t k = *pos; + struct list_head *p; + + down_read(&block_subsys.rwsem); + list_for_each(p, &block_subsys.kset.list) + if (!k--) + return list_entry(p, struct gendisk, kobj.entry); + return NULL; +} + +static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos) +{ + struct list_head *p = ((struct gendisk *)v)->kobj.entry.next; + ++*pos; + return p==&block_subsys.kset.list ? NULL : + list_entry(p, struct gendisk, kobj.entry); +} + +static void diskstats_stop(struct seq_file *part, void *v) +{ + up_read(&block_subsys.rwsem); +} + +static int diskstats_show(struct seq_file *s, void *v) +{ + struct gendisk *gp = v; + char buf[64]; + int n = 0; + + /* + if (&sgp->kobj.entry == block_subsys.kset.list.next) + seq_puts(s, "major minor name" + " rio rmerge rsect ruse wio wmerge " + "wsect wuse running use aveq" + "\n\n"); + */ + + disk_round_stats(gp); + seq_printf(s, "%4d %4d %s %u %u %llu %u %u %u %llu %u %u %u %u\n", + gp->major, n + gp->first_minor, disk_name(gp, n, buf), + disk_stat_read(gp, reads), disk_stat_read(gp, read_merges), + (unsigned long long)disk_stat_read(gp, read_sectors), + jiffies_to_msec(disk_stat_read(gp, read_ticks)), + disk_stat_read(gp, writes), disk_stat_read(gp, write_merges), + (unsigned long long)disk_stat_read(gp, write_sectors), + jiffies_to_msec(disk_stat_read(gp, write_ticks)), + disk_stat_read(gp, in_flight), + jiffies_to_msec(disk_stat_read(gp, io_ticks)), + jiffies_to_msec(disk_stat_read(gp, time_in_queue))); + + /* now show all non-0 size partitions of it */ + for (n = 0; n < gp->minors - 1; n++) { + struct hd_struct *hd = gp->part[n]; + + if (hd && hd->nr_sects) + seq_printf(s, "%4d %4d %s %u %u %u %u\n", + gp->major, n + gp->first_minor + 1, + disk_name(gp, n + 1, buf), + hd->reads, hd->read_sectors, + hd->writes, hd->write_sectors); + } + + return 0; +} + +struct seq_operations diskstats_op = { + start: diskstats_start, + next: diskstats_next, + stop: diskstats_stop, + show: diskstats_show +}; + + struct gendisk *alloc_disk(int minors) { struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL); @@ -552,7 +640,7 @@ struct gendisk *alloc_disk(int minors) return NULL; } if (minors > 1) { - int size = (minors - 1) * sizeof(struct hd_struct); + int size = (minors - 1) * sizeof(struct hd_struct *); disk->part = kmalloc(size, GFP_KERNEL); if (!disk->part) { kfree(disk); @@ -604,8 +692,8 @@ void set_device_ro(struct block_device *bdev, int flag) struct gendisk *disk = bdev->bd_disk; if (bdev->bd_contains != bdev) { int part = bdev->bd_dev - MKDEV(disk->major, disk->first_minor); - struct hd_struct *p = &disk->part[part-1]; - p->policy = flag; + struct hd_struct *p = disk->part[part-1]; + if (p) p->policy = flag; } else disk->policy = flag; } @@ -615,7 +703,7 @@ void set_disk_ro(struct gendisk *disk, int flag) int i; disk->policy = flag; for (i = 0; i < disk->minors - 1; i++) - disk->part[i].policy = flag; + if (disk->part[i]) disk->part[i]->policy = flag; } int bdev_read_only(struct block_device *bdev) @@ -626,8 +714,9 @@ int bdev_read_only(struct block_device *bdev) disk = bdev->bd_disk; if (bdev->bd_contains != bdev) { int part = bdev->bd_dev - MKDEV(disk->major, disk->first_minor); - struct hd_struct *p = &disk->part[part-1]; - return p->policy; + struct hd_struct *p = disk->part[part-1]; + if (p) return p->policy; + return 0; } else return disk->policy; } diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c index 538c8a04a2d3..3dbd0824319b 100644 --- a/drivers/block/ioctl.c +++ b/drivers/block/ioctl.c @@ -41,11 +41,14 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg) return -EINVAL; } /* partition number in use? */ - if (disk->part[part - 1].nr_sects != 0) + if (disk->part[part - 1]) return -EBUSY; /* overlap? */ for (i = 0; i < disk->minors - 1; i++) { - struct hd_struct *s = &disk->part[i]; + struct hd_struct *s = disk->part[i]; + + if (!s) + continue; if (!(start+length <= s->start_sect || start >= s->start_sect + s->nr_sects)) return -EBUSY; @@ -54,7 +57,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg) add_partition(disk, part, start, length); return 0; case BLKPG_DEL_PARTITION: - if (disk->part[part - 1].nr_sects == 0) + if (!disk->part[part-1]) + return -ENXIO; + if (disk->part[part - 1]->nr_sects == 0) return -ENXIO; /* partition in use? Incomplete check for now. */ bdevp = bdget(MKDEV(disk->major, disk->first_minor) + part); diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index e14210308577..9e2fd26ce0ed 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1841,7 +1841,7 @@ static inline void blk_partition_remap(struct bio *bio) if (bdev == bdev->bd_contains) return; - p = &disk->part[bdev->bd_dev-MKDEV(disk->major,disk->first_minor)-1]; + p = disk->part[bdev->bd_dev-MKDEV(disk->major,disk->first_minor)-1]; switch (bio->bi_rw) { case READ: p->read_sectors += bio_sectors(bio); diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index ed453a23e4de..ef1fd1b6b0df 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -601,7 +601,7 @@ static void k_spec(struct vc_data *vc, unsigned char value, char up_flag, struct return; if ((kbd->kbdmode == VC_RAW || kbd->kbdmode == VC_MEDIUMRAW) && - value != K_SAK) + value != KVAL(K_SAK)) return; /* SAK is allowed even in raw mode */ fn_handler[value](vc, regs); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 60bd488baece..18ead55a549a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -15,7 +15,7 @@ #include <linux/slab.h> static const char *_name = DM_NAME; -#define MAX_DEVICES (1 << KDEV_MINOR_BITS) +#define MAX_DEVICES 1024 static int major = 0; static int _major = 0; diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index 9a0727a4b120..ae13b70c367a 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -940,11 +940,9 @@ static int el3_start_xmit(struct sk_buff *skb, struct net_device *dev) outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD); } - dev_kfree_skb (skb); pop_tx_status(dev); - - spin_unlock(&lp->window_lock); - + spin_unlock_irqrestore(&lp->window_lock, flags); + dev_kfree_skb(skb); return 0; } diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c index ee017a02ecbe..b2d33c9ac274 100644 --- a/drivers/net/tulip/dmfe.c +++ b/drivers/net/tulip/dmfe.c @@ -668,13 +668,13 @@ static int dmfe_start_xmit(struct sk_buff *skb, struct DEVICE *dev) if ( db->tx_queue_cnt < TX_FREE_DESC_CNT ) netif_wake_queue(dev); - /* free this SKB */ - dev_kfree_skb(skb); - /* Restore CR7 to enable interrupt */ spin_unlock_irqrestore(&db->lock, flags); outl(db->cr7_data, dev->base_addr + DCR7); + /* free this SKB */ + dev_kfree_skb(skb); + return 0; } diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index b37429ad5e65..bee04f53a84b 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -75,7 +75,8 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, * Add newly discovered PCI devices (which are on the bus->devices * list) to the global PCI device list, add the sysfs and procfs * entries. Where a bridge is found, add the discovered bus to - * the parents list of child buses, and recurse. + * the parents list of child buses, and recurse (breadth-first + * to be compatible with 2.4) * * Call hotplug for each new devices. */ @@ -98,6 +99,12 @@ void __devinit pci_bus_add_devices(struct pci_bus *bus) #endif pci_create_sysfs_dev_files(dev); + } + + list_for_each_entry(dev, &bus->devices, bus_list) { + + BUG_ON(list_empty(&dev->global_list)); + /* * If there is an unattached subordinate bus, attach * it and then scan for unattached PCI devices. diff --git a/drivers/serial/core.c b/drivers/serial/core.c index b461093a13cc..c6207f0737b7 100644 --- a/drivers/serial/core.c +++ b/drivers/serial/core.c @@ -782,8 +782,12 @@ uart_set_info(struct uart_state *state, struct serial_struct *newinfo) /* * Claim and map the new regions */ - if (port->type != PORT_UNKNOWN) + if (port->type != PORT_UNKNOWN) { retval = port->ops->request_port(port); + } else { + /* Always success - Jean II */ + retval = 0; + } /* * If we fail to request resources for the diff --git a/drivers/video/aty/mach64_gx.c b/drivers/video/aty/mach64_gx.c index a27b9bcd8859..85168a32eea8 100644 --- a/drivers/video/aty/mach64_gx.c +++ b/drivers/video/aty/mach64_gx.c @@ -119,7 +119,7 @@ static int aty_set_dac_514(const struct fb_info *info, } static int aty_var_to_pll_514(const struct fb_info *info, u32 vclk_per, - u32 bpp, u32 width, union aty_pll *pll) + u8 bpp, union aty_pll *pll) { /* * FIXME: use real calculations instead of using fixed values from the old @@ -338,7 +338,7 @@ const struct aty_dac_ops aty_dac_att21c498 = { */ static int aty_var_to_pll_18818(const struct fb_info *info, u32 vclk_per, - u32 bpp, u32 width, union aty_pll *pll) + u8 bpp, union aty_pll *pll) { u32 MHz100; /* in 0.01 MHz */ u32 program_bits; @@ -494,7 +494,7 @@ const struct aty_pll_ops aty_pll_ati18818_1 = { */ static int aty_var_to_pll_1703(const struct fb_info *info, u32 vclk_per, - u32 bpp, u32 width, union aty_pll *pll) + u32 vclk_per, u8 bpp, union aty_pll *pll) { u32 mhz100; /* in 0.01 MHz */ u32 program_bits; @@ -610,7 +610,7 @@ const struct aty_pll_ops aty_pll_stg1703 = { */ static int aty_var_to_pll_8398(const struct fb_info *info, u32 vclk_per, - u32 bpp, u32 width, union aty_pll *pll) + u32 vclk_per, u8 bpp, union aty_pll *pll) { u32 tempA, tempB, fOut, longMHz100, diff, preDiff; @@ -734,7 +734,7 @@ const struct aty_pll_ops aty_pll_ch8398 = { */ static int aty_var_to_pll_408(const struct fb_info *info, u32 vclk_per, - u32 bpp, u32 width, union aty_pll *pll) + u8 bpp, union aty_pll *pll) { u32 mhz100; /* in 0.01 MHz */ u32 program_bits; diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 0f3182b1783a..75b47be36f22 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -25,6 +25,7 @@ #include <linux/mman.h> #include <linux/tty.h> #include <linux/init.h> +#include <linux/linux_logo.h> #include <linux/proc_fs.h> #ifdef CONFIG_KMOD #include <linux/kmod.h> @@ -655,7 +656,7 @@ int fb_prepare_logo(struct fb_info *info) } /* Return if no suitable logo was found */ - fb_logo.logo = find_logo(info->var.bits_per_pixel); + fb_logo.logo = fb_find_logo(info->var.bits_per_pixel); if (!fb_logo.logo || fb_logo.logo->height > info->var.yres) { fb_logo.logo = NULL; diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c index daf9c360a2aa..3039664df313 100644 --- a/drivers/video/logo/logo.c +++ b/drivers/video/logo/logo.c @@ -33,7 +33,7 @@ extern const struct linux_logo logo_superh_vga16; extern const struct linux_logo logo_superh_clut224; -const struct linux_logo * __init find_logo(int depth) +const struct linux_logo *fb_find_logo(int depth) { const struct linux_logo *logo = 0; @@ -148,7 +148,7 @@ static int aio_setup_ring(struct kioctx *ctx) dprintk("mmap address: 0x%08lx\n", info->mmap_base); info->nr_pages = get_user_pages(current, ctx->mm, - info->mmap_base, info->mmap_size, + info->mmap_base, nr_pages, 1, 0, info->ring_pages, NULL); up_write(&ctx->mm->mmap_sem); diff --git a/fs/block_dev.c b/fs/block_dev.c index 948864b885b5..9a974170a10b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -559,10 +559,10 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * bdev->bd_contains = whole; down(&whole->bd_sem); whole->bd_part_count++; - p = disk->part + part - 1; + p = disk->part[part - 1]; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; - if (!(disk->flags & GENHD_FL_UP) || !p->nr_sects) { + if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { whole->bd_part_count--; up(&whole->bd_sem); ret = -ENXIO; diff --git a/fs/buffer.c b/fs/buffer.c index 006892c691cd..47e2cf01f30c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -776,6 +776,85 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) EXPORT_SYMBOL(mark_buffer_dirty_inode); /* + * Add a page to the dirty page list. + * + * It is a sad fact of life that this function is called from several places + * deeply under spinlocking. It may not sleep. + * + * If the page has buffers, the uptodate buffers are set dirty, to preserve + * dirty-state coherency between the page and the buffers. It the page does + * not have buffers then when they are later attached they will all be set + * dirty. + * + * The buffers are dirtied before the page is dirtied. There's a small race + * window in which a writepage caller may see the page cleanness but not the + * buffer dirtiness. That's fine. If this code were to set the page dirty + * before the buffers, a concurrent writepage caller could clear the page dirty + * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean + * page on the dirty page list. + * + * There is also a small window where the page is dirty, and not on dirty_pages. + * Also a possibility that by the time the page is added to dirty_pages, it has + * been set clean. The page lists are somewhat approximate in this regard. + * It's better to have clean pages accidentally attached to dirty_pages than to + * leave dirty pages attached to clean_pages. + * + * We use private_lock to lock against try_to_free_buffers while using the + * page's buffer list. Also use this to protect against clean buffers being + * added to the page after it was set dirty. + * + * FIXME: may need to call ->reservepage here as well. That's rather up to the + * address_space though. + * + * For now, we treat swapper_space specially. It doesn't use the normal + * block a_ops. + */ +int __set_page_dirty_buffers(struct page *page) +{ + struct address_space * const mapping = page->mapping; + int ret = 0; + + if (mapping == NULL) { + SetPageDirty(page); + goto out; + } + + if (!PageUptodate(page)) + buffer_error(); + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + if (buffer_uptodate(bh)) + set_buffer_dirty(bh); + else + buffer_error(); + bh = bh->b_this_page; + } while (bh != head); + } + spin_unlock(&mapping->private_lock); + + if (!TestSetPageDirty(page)) { + spin_lock(&mapping->page_lock); + if (page->mapping) { /* Race with truncate? */ + if (!mapping->backing_dev_info->memory_backed) + inc_page_state(nr_dirty); + list_del(&page->list); + list_add(&page->list, &mapping->dirty_pages); + } + spin_unlock(&mapping->page_lock); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + +out: + return ret; +} +EXPORT_SYMBOL(__set_page_dirty_buffers); + +/* * Write out and wait upon a list of buffers. * * We have conflicting pressures: we want to make sure that all @@ -916,7 +995,7 @@ try_again: head = NULL; offset = PAGE_SIZE; while ((offset -= size) >= 0) { - bh = alloc_buffer_head(); + bh = alloc_buffer_head(GFP_NOFS); if (!bh) goto no_grow; @@ -2267,7 +2346,7 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, if (buffer_uptodate(&map_bh)) continue; /* reiserfs does this */ if (block_start < from || block_end > to) { - struct buffer_head *bh = alloc_buffer_head(); + struct buffer_head *bh = alloc_buffer_head(GFP_NOFS); if (!bh) { ret = -ENOMEM; @@ -2826,9 +2905,9 @@ static void recalc_bh_state(void) buffer_heads_over_limit = (tot > max_buffer_heads); } -struct buffer_head *alloc_buffer_head(void) +struct buffer_head *alloc_buffer_head(int gfp_flags) { - struct buffer_head *ret = kmem_cache_alloc(bh_cachep, GFP_NOFS); + struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); if (ret) { preempt_disable(); __get_cpu_var(bh_accounting).nr++; diff --git a/fs/dcache.c b/fs/dcache.c index efc51c38ea25..9eec20e0ab20 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -155,12 +155,11 @@ repeat: if (d_unhashed(dentry)) goto kill_it; if (list_empty(&dentry->d_lru)) { - dentry->d_vfs_flags &= ~DCACHE_REFERENCED; + dentry->d_vfs_flags |= DCACHE_REFERENCED; list_add(&dentry->d_lru, &dentry_unused); dentry_stat.nr_unused++; } spin_unlock(&dentry->d_lock); - dentry->d_vfs_flags |= DCACHE_REFERENCED; spin_unlock(&dcache_lock); return; @@ -250,7 +249,6 @@ int d_invalidate(struct dentry * dentry) static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); - dentry->d_vfs_flags |= DCACHE_REFERENCED; if (atomic_read(&dentry->d_count) == 1) { dentry_stat.nr_unused--; list_del_init(&dentry->d_lru); @@ -379,17 +377,16 @@ static void prune_dcache(int count) dentry = list_entry(tmp, struct dentry, d_lru); spin_lock(&dentry->d_lock); + /* leave inuse dentries */ + if (atomic_read(&dentry->d_count)) { + spin_unlock(&dentry->d_lock); + continue; + } /* If the dentry was recently referenced, don't free it. */ if (dentry->d_vfs_flags & DCACHE_REFERENCED) { dentry->d_vfs_flags &= ~DCACHE_REFERENCED; - - /* don't add non zero d_count dentries - * back to d_lru list - */ - if (!atomic_read(&dentry->d_count)) { - list_add(&dentry->d_lru, &dentry_unused); - dentry_stat.nr_unused++; - } + list_add(&dentry->d_lru, &dentry_unused); + dentry_stat.nr_unused++; spin_unlock(&dentry->d_lock); continue; } @@ -538,13 +535,18 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; - list_del_init(&dentry->d_lru); - /* don't add non zero d_count dentries - * back to d_lru list + if (!list_empty(&dentry->d_lru)) { + dentry_stat.nr_unused--; + list_del_init(&dentry->d_lru); + } + /* + * move only zero ref count dentries to the end + * of the unused list for prune_dcache */ if (!atomic_read(&dentry->d_count)) { list_add(&dentry->d_lru, dentry_unused.prev); + dentry_stat.nr_unused++; found++; } /* @@ -609,13 +611,18 @@ void shrink_dcache_anon(struct hlist_head *head) spin_lock(&dcache_lock); hlist_for_each(lp, head) { struct dentry *this = hlist_entry(lp, struct dentry, d_hash); - list_del(&this->d_lru); + if (!list_empty(&this->d_lru)) { + dentry_stat.nr_unused--; + list_del(&this->d_lru); + } - /* don't add non zero d_count dentries - * back to d_lru list + /* + * move only zero ref count dentries to the end + * of the unused list for prune_dcache */ if (!atomic_read(&this->d_count)) { list_add_tail(&this->d_lru, &dentry_unused); + dentry_stat.nr_unused++; found++; } } @@ -1017,7 +1024,6 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) if (likely(move_count == dentry->d_move_count)) { if (!d_unhashed(dentry)) { atomic_inc(&dentry->d_count); - dentry->d_vfs_flags |= DCACHE_REFERENCED; found = dentry; } } diff --git a/fs/dquot.c b/fs/dquot.c index 737b9f1d54ab..f3c0f63265c3 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -345,50 +345,6 @@ restart: return 0; } -static struct super_block *get_super_to_sync(int type) -{ - struct list_head *head; - int cnt, dirty; - -restart: - spin_lock(&sb_lock); - list_for_each(head, &super_blocks) { - struct super_block *sb = list_entry(head, struct super_block, s_list); - - for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) - if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) - && info_any_dquot_dirty(&sb_dqopt(sb)->info[cnt])) - dirty = 1; - if (!dirty) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (!sb->s_root) { - drop_super(sb); - goto restart; - } - return sb; - } - spin_unlock(&sb_lock); - return NULL; -} - -void sync_dquots(struct super_block *sb, int type) -{ - if (sb) { - if (sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, type); - } - else { - while ((sb = get_super_to_sync(type))) { - if (sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, type); - drop_super(sb); - } - } -} - /* Free unused dquots from cache */ static void prune_dqcache(int count) { diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 7264433b25fd..aae953bb9572 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -91,7 +91,6 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, if (dentry != result && acceptable(context, dentry)) { dput(result); - dentry->d_vfs_flags |= DCACHE_REFERENCED; return dentry; } spin_lock(&dcache_lock); @@ -271,7 +270,6 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, if (dentry != result && acceptable(context, dentry)) { dput(result); - dentry->d_vfs_flags |= DCACHE_REFERENCED; return dentry; } spin_lock(&dcache_lock); @@ -434,7 +432,6 @@ static struct dentry *export_iget(struct super_block *sb, unsigned long ino, __u iput(inode); return ERR_PTR(-ENOMEM); } - result->d_vfs_flags |= DCACHE_REFERENCED; return result; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 67548b28113d..866edb62fad5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -608,7 +608,6 @@ struct dentry *fat_get_dentry(struct super_block *sb, void *inump) return ERR_PTR(-ENOMEM); } result->d_op = sb->s_root->d_op; - result->d_vfs_flags |= DCACHE_REFERENCED; return result; } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index a429a2aa0ac5..96e1d0bf490f 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -457,14 +457,8 @@ int journal_write_metadata_buffer(transaction_t *transaction, /* * Right, time to make up the new buffer_head. */ - do { - new_bh = alloc_buffer_head(); - if (!new_bh) { - printk (KERN_NOTICE "%s: ENOMEM at alloc_buffer_head, " - "trying again.\n", __FUNCTION__); - yield(); - } - } while (!new_bh); + new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); + /* keep subsequent assertions sane */ new_bh->b_state = 0; init_buffer(new_bh, NULL, NULL); @@ -1613,28 +1607,7 @@ void shrink_journal_memory(void) */ void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry) { - void *p; - static unsigned long last_warning; - - while (1) { - p = kmalloc(size, flags); - if (p) - return p; - if (!retry) - return NULL; - /* Log every retry for debugging. Also log them to the - * syslog, but do rate-limiting on the non-debugging - * messages. */ - jbd_debug(1, "ENOMEM in %s, retrying.\n", where); - - if (time_after(jiffies, last_warning + 5*HZ)) { - printk(KERN_NOTICE - "ENOMEM in %s, retrying.\n", where); - last_warning = jiffies; - } - - yield(); - } + return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); } /* diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 647f0357e30c..aa0646e44598 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -182,7 +182,7 @@ static struct sysfs_ops part_sysfs_ops = { static ssize_t part_dev_read(struct hd_struct * p, char *page) { struct gendisk *disk = container_of(p->kobj.parent,struct gendisk,kobj); - int part = p - disk->part + 1; + int part = p->partno; dev_t base = MKDEV(disk->major, disk->first_minor); return sprintf(page, "%04x\n", (unsigned)(base + part)); } @@ -234,7 +234,9 @@ struct kobj_type ktype_part = { void delete_partition(struct gendisk *disk, int part) { - struct hd_struct *p = disk->part + part - 1; + struct hd_struct *p = disk->part[part-1]; + if (!p) + return; if (!p->nr_sects) return; p->start_sect = 0; @@ -242,14 +244,23 @@ void delete_partition(struct gendisk *disk, int part) p->reads = p->writes = p->read_sectors = p->write_sectors = 0; devfs_remove("%s/part%d", disk->devfs_name, part); kobject_unregister(&p->kobj); + disk->part[part-1] = NULL; + kfree(p); } void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len) { - struct hd_struct *p = disk->part + part - 1; + struct hd_struct *p; + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return; + + memset(p, 0, sizeof(*p)); p->start_sect = start; p->nr_sects = len; + p->partno = part; + disk->part[part-1] = p; devfs_register_partition(disk, part); snprintf(p->kobj.name,KOBJ_NAME_LEN,"%s%d",disk->kobj.name,part); p->kobj.parent = &disk->kobj; diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 0be95725e097..882980c55720 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h @@ -29,3 +29,8 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) } extern int warn_no_part; + +extern void parse_bsd(struct parsed_partitions *state, + struct block_device *bdev, u32 offset, u32 size, + int origin, char *flavour, int max_partitions); + diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 147485d515c8..56dea78c0312 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -214,12 +214,12 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, #endif } -#ifdef CONFIG_BSD_DISKLABEL +#if defined(CONFIG_BSD_DISKLABEL) || defined(CONFIG_NEC98_PARTITION) /* * Create devices for BSD partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. */ -static void +void parse_bsd(struct parsed_partitions *state, struct block_device *bdev, u32 offset, u32 size, int origin, char *flavour, int max_partitions) diff --git a/fs/partitions/nec98.c b/fs/partitions/nec98.c index b3bd8faf9bda..cbd55789f4b2 100644 --- a/fs/partitions/nec98.c +++ b/fs/partitions/nec98.c @@ -66,13 +66,6 @@ is_valid_nec98_partition_table(const struct nec98_partition *ptable, return valid; } -#ifdef CONFIG_BSD_DISKLABEL -extern void parse_bsd(struct parsed_partitions *state, - struct block_device *bdev, - u32 offset, u32 size, int origin, char *flavour, - int max_partitions); -#endif - int nec98_partition(struct parsed_partitions *state, struct block_device *bdev) { unsigned int nr; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 60dd7b44e838..89fc02080f1e 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -333,6 +333,18 @@ static struct file_operations proc_partitions_operations = { .release = seq_release, }; +extern struct seq_operations diskstats_op; +static int diskstats_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &diskstats_op); +} +static struct file_operations proc_diskstats_operations = { + open: diskstats_open, + read: seq_read, + llseek: seq_lseek, + release: seq_release, +}; + #ifdef CONFIG_MODULES extern struct seq_operations modules_op; static int modules_open(struct inode *inode, struct file *file) @@ -644,6 +656,7 @@ void __init proc_misc_init(void) create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); + create_seq_entry("diskstats", 0, &proc_diskstats_operations); #ifdef CONFIG_MODULES create_seq_entry("modules", 0, &proc_modules_operations); #endif diff --git a/fs/quota.c b/fs/quota.c index 3daa61901363..ce929f581b53 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -19,8 +19,10 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t { if (type >= MAXQUOTAS) return -EINVAL; + if (!sb && cmd != Q_SYNC) + return -ENODEV; /* Is operation supported? */ - if (!sb->s_qcop) + if (sb && !sb->s_qcop) return -ENOSYS; switch (cmd) { @@ -51,7 +53,7 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t return -ENOSYS; break; case Q_SYNC: - if (!sb->s_qcop->quota_sync) + if (sb && !sb->s_qcop->quota_sync) return -ENOSYS; break; case Q_XQUOTAON: @@ -102,6 +104,50 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t return security_quotactl (cmd, type, id, sb); } +static struct super_block *get_super_to_sync(int type) +{ + struct list_head *head; + int cnt, dirty; + +restart: + spin_lock(&sb_lock); + list_for_each(head, &super_blocks) { + struct super_block *sb = list_entry(head, struct super_block, s_list); + + for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) + if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) + && info_any_dquot_dirty(&sb_dqopt(sb)->info[cnt])) + dirty = 1; + if (!dirty) + continue; + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (!sb->s_root) { + drop_super(sb); + goto restart; + } + return sb; + } + spin_unlock(&sb_lock); + return NULL; +} + +void sync_dquots(struct super_block *sb, int type) +{ + if (sb) { + if (sb->s_qcop->quota_sync) + sb->s_qcop->quota_sync(sb, type); + } + else { + while ((sb = get_super_to_sync(type))) { + if (sb->s_qcop->quota_sync) + sb->s_qcop->quota_sync(sb, type); + drop_super(sb); + } + } +} + /* Copy parameters and call proper function */ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, caddr_t addr) { @@ -167,7 +213,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, cadd return sb->s_qcop->set_dqblk(sb, type, id, &idq); } case Q_SYNC: - return sb->s_qcop->quota_sync(sb, type); + sync_dquots(sb, type); + return 0; case Q_XQUOTAON: case Q_XQUOTAOFF: @@ -222,27 +269,30 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char *special, qid_t id, ca struct super_block *sb = NULL; struct block_device *bdev; char *tmp; - int ret = -ENODEV; + int ret; cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; - tmp = getname(special); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - bdev = lookup_bdev(tmp); - putname(tmp); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - sb = get_super(bdev); - bdput(bdev); + if (cmds != Q_SYNC || special) { + tmp = getname(special); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + bdev = lookup_bdev(tmp); + putname(tmp); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + sb = get_super(bdev); + bdput(bdev); + if (!sb) + return -ENODEV; + } - if (sb) { - ret = check_quotactl_valid(sb, type, cmds, id); - if (ret >= 0) - ret = do_quotactl(sb, type, cmds, id, addr); + ret = check_quotactl_valid(sb, type, cmds, id); + if (ret >= 0) + ret = do_quotactl(sb, type, cmds, id, addr); + if (sb) drop_super(sb); - } return ret; } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 0536716aa84e..fb7f27f1f532 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1260,7 +1260,6 @@ struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) iput(inode); return ERR_PTR(-ENOMEM); } - result->d_vfs_flags |= DCACHE_REFERENCED; return result; } diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index 703d6324f417..5ecb4353994e 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -2085,7 +2085,6 @@ out: void smb_decode_unix_basic(struct smb_fattr *fattr, char *p) { /* FIXME: verify nls support. all is sent as utf8? */ - __u64 devmajor, devminor; fattr->f_unix = 1; fattr->f_mode = 0; @@ -2112,9 +2111,10 @@ void smb_decode_unix_basic(struct smb_fattr *fattr, char *p) fattr->f_mode |= smb_filetype_to_mode(WVAL(p, 56)); if (S_ISBLK(fattr->f_mode) || S_ISCHR(fattr->f_mode)) { - devmajor = LVAL(p, 60); - devminor = LVAL(p, 68); - fattr->f_rdev = ((devmajor & 0xFF) << 8) | (devminor & 0xFF); + __u64 major = LVAL(p, 60); + __u64 minor = LVAL(p, 68); + + fattr->f_rdev = MKDEV(major & 0xffffffff, minor & 0xffffffff); } fattr->f_mode |= LVAL(p, 84); } @@ -3008,7 +3008,7 @@ out: */ int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, - int major, int minor) + unsigned int major, unsigned int minor) { struct smb_sb_info *server = server_from_dentry(d); u64 nttime; diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h index 07b690eb8848..ec44bad0e84e 100644 --- a/fs/smbfs/proto.h +++ b/fs/smbfs/proto.h @@ -27,7 +27,7 @@ extern void smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *f extern void smb_decode_unix_basic(struct smb_fattr *fattr, char *p); extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr); extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr); -extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, int major, int minor); +extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor); extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr); extern int smb_proc_dskattr(struct super_block *sb, struct statfs *attr); extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len); diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c index 662a43c90a1e..73ef4ec19e95 100644 --- a/fs/xfs/linux/xfs_super.c +++ b/fs/xfs/linux/xfs_super.c @@ -741,7 +741,6 @@ linvfs_get_dentry( iput(inode); return ERR_PTR(-ENOMEM); } - result->d_vfs_flags |= DCACHE_REFERENCED; return result; } diff --git a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c index f50803bd2570..2230380e952f 100644 --- a/fs/xfs/pagebuf/page_buf.c +++ b/fs/xfs/pagebuf/page_buf.c @@ -554,7 +554,8 @@ mapit: } else if (flags & PBF_MAPPED) { if (as_list_len > 64) purge_addresses(); - pb->pb_addr = vmap(pb->pb_pages, page_count); + pb->pb_addr = vmap(pb->pb_pages, page_count, + VM_MAP, PAGE_KERNEL); if (pb->pb_addr == NULL) return -ENOMEM; pb->pb_addr += pb->pb_offset; diff --git a/include/asm-alpha/pgalloc.h b/include/asm-alpha/pgalloc.h index fc675efac381..b34194c3d96c 100644 --- a/include/asm-alpha/pgalloc.h +++ b/include/asm-alpha/pgalloc.h @@ -40,7 +40,7 @@ pgd_free(pgd_t *pgd) static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL); + pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (ret) clear_page(ret); return ret; diff --git a/include/asm-arm/proc-armv/pgalloc.h b/include/asm-arm/proc-armv/pgalloc.h index 4440be79d5ac..3263c346ccba 100644 --- a/include/asm-arm/proc-armv/pgalloc.h +++ b/include/asm-arm/proc-armv/pgalloc.h @@ -27,17 +27,9 @@ static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { - int count = 0; pte_t *pte; - do { - pte = (pte_t *)__get_free_page(GFP_KERNEL); - if (!pte) { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); - + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pte) { clear_page(pte); clean_dcache_area(pte, sizeof(pte_t) * PTRS_PER_PTE); @@ -51,16 +43,8 @@ static inline struct page * pte_alloc_one(struct mm_struct *mm, unsigned long addr) { struct page *pte; - int count = 0; - - do { - pte = alloc_pages(GFP_KERNEL, 0); - if (!pte) { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); if (pte) { void *page = page_address(pte); clear_page(page); diff --git a/include/asm-cris/pgalloc.h b/include/asm-cris/pgalloc.h index 80e73be0d2b0..75dde6f4a42f 100644 --- a/include/asm-cris/pgalloc.h +++ b/include/asm-cris/pgalloc.h @@ -62,7 +62,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) { pte_t *pte; - pte = (pte_t *) __get_free_page(GFP_KERNEL); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pte) clear_page(pte); return pte; diff --git a/include/asm-i386/div64.h b/include/asm-i386/div64.h index ef915df700e4..bc8718a0b1ce 100644 --- a/include/asm-i386/div64.h +++ b/include/asm-i386/div64.h @@ -14,4 +14,22 @@ __mod; \ }) +/* + * (long)X = ((long long)divs) / (long)div + * (long)rem = ((long long)divs) % (long)div + * + * Warning, this will do an exception if X overflows. + */ +#define div_long_long_rem(a,b,c) div_ll_X_l_rem(a,b,c) + +extern inline long +div_ll_X_l_rem(long long divs, long div, long *rem) +{ + long dum2; + __asm__("divl %2":"=a"(dum2), "=d"(*rem) + : "rm"(div), "A"(divs)); + + return dum2; + +} #endif diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h index 2e6134af88bc..4f56ceb9c42b 100644 --- a/include/asm-ia64/pgalloc.h +++ b/include/asm-ia64/pgalloc.h @@ -93,7 +93,7 @@ pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr) static inline pmd_t* pmd_alloc_one (struct mm_struct *mm, unsigned long addr) { - pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL); + pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (likely(pmd != NULL)) clear_page(pmd); @@ -125,7 +125,7 @@ pmd_populate_kernel (struct mm_struct *mm, pmd_t *pmd_entry, pte_t *pte) static inline struct page * pte_alloc_one (struct mm_struct *mm, unsigned long addr) { - struct page *pte = alloc_pages(GFP_KERNEL, 0); + struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); if (likely(pte != NULL)) clear_page(page_address(pte)); @@ -135,7 +135,7 @@ pte_alloc_one (struct mm_struct *mm, unsigned long addr) static inline pte_t * pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr) { - pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (likely(pte != NULL)) clear_page(pte); diff --git a/include/asm-m68k/motorola_pgalloc.h b/include/asm-m68k/motorola_pgalloc.h index 4beb7a822b38..f315615e488a 100644 --- a/include/asm-m68k/motorola_pgalloc.h +++ b/include/asm-m68k/motorola_pgalloc.h @@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad { pte_t *pte; - pte = (pte_t *) __get_free_page(GFP_KERNEL); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pte) { clear_page(pte); __flush_page_to_ram(pte); @@ -30,7 +30,7 @@ static inline void pte_free_kernel(pte_t *pte) static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_KERNEL, 0); + struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); pte_t *pte; if(!page) diff --git a/include/asm-m68k/sun3_pgalloc.h b/include/asm-m68k/sun3_pgalloc.h index 7740a2936511..4580b60196b9 100644 --- a/include/asm-m68k/sun3_pgalloc.h +++ b/include/asm-m68k/sun3_pgalloc.h @@ -18,7 +18,7 @@ extern const char bad_pmd_string[]; -#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) +#define lpmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) static inline void pte_free_kernel(pte_t * pte) @@ -39,7 +39,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *page) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - unsigned long page = __get_free_page(GFP_KERNEL); + unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; @@ -51,7 +51,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_KERNEL, 0); + struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); if (page == NULL) return NULL; diff --git a/include/asm-mips/pgalloc.h b/include/asm-mips/pgalloc.h index 9492a50dae76..f71b90b1c8e1 100644 --- a/include/asm-mips/pgalloc.h +++ b/include/asm-mips/pgalloc.h @@ -132,7 +132,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) { pte_t *pte; - pte = (pte_t *) __get_free_page(GFP_KERNEL); + pte = (pte_t *) __get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pte) clear_page(pte); return pte; diff --git a/include/asm-mips64/pgalloc.h b/include/asm-mips64/pgalloc.h index 79b58408d660..2b777eebcc31 100644 --- a/include/asm-mips64/pgalloc.h +++ b/include/asm-mips64/pgalloc.h @@ -93,7 +93,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) { pte_t *pte; - pte = (pte_t *) __get_free_page(GFP_KERNEL); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pte) clear_page(pte); return pte; @@ -141,7 +141,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { pmd_t *pmd; - pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, 1); + pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1); if (pmd) pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); return pmd; diff --git a/include/asm-parisc/pgalloc.h b/include/asm-parisc/pgalloc.h index 32dcf11d084c..bbc02cb134b7 100644 --- a/include/asm-parisc/pgalloc.h +++ b/include/asm-parisc/pgalloc.h @@ -35,7 +35,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL); + pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pmd) clear_page(pmd); return pmd; @@ -73,7 +73,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) static inline struct page * pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL); + struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (likely(page != NULL)) clear_page(page_address(page)); return page; @@ -82,7 +82,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { - pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (likely(pte != NULL)) clear_page(pte); return pte; diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h index 0c461418bb48..9376b791bee7 100644 --- a/include/asm-ppc64/pgalloc.h +++ b/include/asm-ppc64/pgalloc.h @@ -31,19 +31,11 @@ pgd_free(pgd_t *pgd) static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - int count = 0; pmd_t *pmd; - do { - pmd = (pmd_t *)__get_free_page(GFP_KERNEL); - if (pmd) - clear_page(pmd); - else { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pmd && (count++ < 10)); - + pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + if (pmd) + clear_page(pmd); return pmd; } @@ -62,19 +54,11 @@ pmd_free(pmd_t *pmd) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { - int count = 0; pte_t *pte; - do { - pte = (pte_t *)__get_free_page(GFP_KERNEL); - if (pte) - clear_page(pte); - else { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); - + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + if (pte) + clear_page(pte); return pte; } diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index 67230ef0e0c5..e4729fb912fd 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -120,20 +120,13 @@ static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr) { pte_t *pte; - int count; int i; - count = 0; - do { - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pte != NULL) { - for (i=0; i < PTRS_PER_PTE; i++) - pte_clear(pte+i); - } else { - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ); - } - } while (!pte && (count++ < 10)); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + if (pte != NULL) { + for (i=0; i < PTRS_PER_PTE; i++) + pte_clear(pte+i); + } return pte; } diff --git a/include/asm-sh/pgalloc.h b/include/asm-sh/pgalloc.h index 9cc5a7dc98ed..a60b4c961a4f 100644 --- a/include/asm-sh/pgalloc.h +++ b/include/asm-sh/pgalloc.h @@ -35,7 +35,7 @@ static inline void pgd_free(pgd_t *pgd) static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pte) clear_page(pte); return pte; diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index 037c09b1a737..d3c3a7060664 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -159,7 +159,7 @@ static __inline__ pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addre pmd = pmd_alloc_one_fast(mm, address); if (!pmd) { - pmd = (pmd_t *)__get_free_page(GFP_KERNEL); + pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pmd) memset(pmd, 0, PAGE_SIZE); } diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h index 4cae8e6a37a0..e15895c99a4e 100644 --- a/include/asm-x86_64/pgalloc.h +++ b/include/asm-x86_64/pgalloc.h @@ -31,12 +31,12 @@ extern __inline__ void pmd_free(pmd_t *pmd) static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr) { - return (pmd_t *) get_zeroed_page(GFP_KERNEL); + return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); } static inline pgd_t *pgd_alloc (struct mm_struct *mm) { - return (pgd_t *)get_zeroed_page(GFP_KERNEL); + return (pgd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); } static inline void pgd_free (pgd_t *pgd) @@ -48,12 +48,12 @@ static inline void pgd_free (pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *) get_zeroed_page(GFP_KERNEL); + return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); } static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - void *p = (void *)get_zeroed_page(GFP_KERNEL); + void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); if (!p) return NULL; return virt_to_page(p); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 64a70553d40c..26d71a5fb742 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -172,7 +172,7 @@ struct buffer_head * __getblk(struct block_device *, sector_t, int); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); struct buffer_head *__bread(struct block_device *, sector_t block, int size); -struct buffer_head *alloc_buffer_head(void); +struct buffer_head *alloc_buffer_head(int gfp_flags); void free_buffer_head(struct buffer_head * bh); void FASTCALL(unlock_buffer(struct buffer_head *bh)); void ll_rw_block(int, int, struct buffer_head * bh[]); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index db979c3cf890..78fafd500123 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -270,7 +270,6 @@ static inline struct dentry *dget(struct dentry *dentry) if (!atomic_read(&dentry->d_count)) BUG(); atomic_inc(&dentry->d_count); - dentry->d_vfs_flags |= DCACHE_REFERENCED; } return dentry; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c2432bd349e5..ac8fc9ef5bdb 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -64,7 +64,7 @@ struct hd_struct { sector_t nr_sects; struct kobject kobj; unsigned reads, read_sectors, writes, write_sectors; - int policy; + int policy, partno; }; #define GENHD_FL_REMOVABLE 1 @@ -89,7 +89,7 @@ struct gendisk { int minor_shift; /* number of times minor is shifted to get real minor */ char disk_name[16]; /* name of major driver */ - struct hd_struct *part; /* [indexed by minor] */ + struct hd_struct **part; /* [indexed by minor] */ struct block_device_operations *fops; struct request_queue *queue; void *private_data; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c475f7b41e59..ade6d9e97475 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -11,13 +11,26 @@ #define __GFP_DMA 0x01 #define __GFP_HIGHMEM 0x02 -/* Action modifiers - doesn't change the zoning */ +/* + * Action modifiers - doesn't change the zoning + * + * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt + * _might_ fail. This depends upon the particular VM implementation. + * + * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. + * + * __GFP_NORETRY: The VM implementation must not retry indefinitely. + */ #define __GFP_WAIT 0x10 /* Can wait and reschedule? */ #define __GFP_HIGH 0x20 /* Should access emergency pools? */ #define __GFP_IO 0x40 /* Can start physical IO? */ #define __GFP_FS 0x80 /* Can call down to low-level FS? */ #define __GFP_COLD 0x100 /* Cache-cold page required */ #define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */ +#define __GFP_REPEAT 0x400 /* Retry the allocation. Might fail */ +#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */ +#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */ #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h index ea05e16c2b16..9c01bde5bf1b 100644 --- a/include/linux/linux_logo.h +++ b/include/linux/linux_logo.h @@ -32,6 +32,6 @@ struct linux_logo { const unsigned char *data; }; -extern const struct linux_logo * __init find_logo(int depth); +extern const struct linux_logo *fb_find_logo(int depth); #endif /* _LINUX_LINUX_LOGO_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index ede6c5ff4181..6aa89d73f65b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -594,28 +594,10 @@ extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned lon extern unsigned int nr_used_zone_pages(void); -#ifdef CONFIG_MMU extern struct page * vmalloc_to_page(void *addr); extern struct page * follow_page(struct mm_struct *mm, unsigned long address, int write); extern int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); -#else -static inline struct page * vmalloc_to_page(void *addr) -{ - return NULL; -} -static inline struct page * follow_page(struct mm_struct *mm, - unsigned long address, int write) -{ - return NULL; -} -static inline int remap_page_range(struct vm_area_struct *vma, - unsigned long from, unsigned long to, - unsigned long size, pgprot_t prot) -{ - return -EPERM; -} -#endif /* CONFIG_MMU */ #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/nfsd/syscall.h b/include/linux/nfsd/syscall.h index 37b8901433d6..b6fa4d1839e3 100644 --- a/include/linux/nfsd/syscall.h +++ b/include/linux/nfsd/syscall.h @@ -59,7 +59,7 @@ struct nfsctl_client { struct nfsctl_export { char ex_client[NFSCLNT_IDMAX+1]; char ex_path[NFS_MAXPATHLEN+1]; - __kernel_dev_t ex_dev; + __kernel_old_dev_t ex_dev; __kernel_ino_t ex_ino; int ex_flags; __kernel_uid_t ex_anon_uid; @@ -104,7 +104,6 @@ struct nfsctl_arg { #define ca_export u.u_export #define ca_getfd u.u_getfd #define ca_getfs u.u_getfs -#define ca_authd u.u_authd }; union nfsctl_res { diff --git a/include/linux/sched.h b/include/linux/sched.h index c30c44f3cfcc..eee58c7354b4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -465,6 +465,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ #define PF_KSWAPD 0x00040000 /* I am kswapd */ +#define PF_SWAPOFF 0x00080000 /* I am in swapoff */ #if CONFIG_SMP extern void set_cpus_allowed(task_t *p, unsigned long new_mask); diff --git a/include/linux/slab.h b/include/linux/slab.h index bdc5256de12a..603748b9b349 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -22,7 +22,7 @@ typedef struct kmem_cache_s kmem_cache_t; #define SLAB_KERNEL GFP_KERNEL #define SLAB_DMA GFP_DMA -#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN) +#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|__GFP_NORETRY) #define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ /* flags to pass to kmem_cache_create(). diff --git a/include/linux/time.h b/include/linux/time.h index 4d7238025fe9..fdab2abc43be 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -26,6 +26,16 @@ struct timezone { #include <linux/spinlock.h> #include <linux/seqlock.h> +#include <linux/timex.h> +#include <asm/div64.h> +#ifndef div_long_long_rem + +#define div_long_long_rem(dividend,divisor,remainder) ({ \ + u64 result = dividend; \ + *remainder = do_div(result,divisor); \ + result; }) + +#endif /* * Have the 32 bit jiffies value wrap 5 minutes after boot @@ -59,25 +69,52 @@ struct timezone { #ifndef NSEC_PER_USEC #define NSEC_PER_USEC (1000L) #endif +/* + * We want to do realistic conversions of time so we need to use the same + * values the update wall clock code uses as the jiffie size. This value + * is: TICK_NSEC(TICK_USEC) (both of which are defined in timex.h). This + * is a constant and is in nanoseconds. We will used scaled math and + * with a scales defined here as SEC_JIFFIE_SC, USEC_JIFFIE_SC and + * NSEC_JIFFIE_SC. Note that these defines contain nothing but + * constants and so are computed at compile time. SHIFT_HZ (computed in + * timex.h) adjusts the scaling for different HZ values. + */ +#define SEC_JIFFIE_SC (30 - SHIFT_HZ) +#define NSEC_JIFFIE_SC (SEC_JIFFIE_SC + 30) +#define USEC_JIFFIE_SC (SEC_JIFFIE_SC + 20) +#define SEC_CONVERSION ((unsigned long)(((u64)NSEC_PER_SEC << SEC_JIFFIE_SC) /\ + (u64)TICK_NSEC(TICK_USEC))) +#define NSEC_CONVERSION ((unsigned long)(((u64)1 << NSEC_JIFFIE_SC) / \ + (u64)TICK_NSEC(TICK_USEC))) +#define USEC_CONVERSION \ + ((unsigned long)(((u64)NSEC_PER_USEC << USEC_JIFFIE_SC)/ \ + (u64)TICK_NSEC(TICK_USEC))) +#define MAX_SEC_IN_JIFFIES \ + (u32)((u64)((u64)MAX_JIFFY_OFFSET * TICK_NSEC(TICK_USEC)) / NSEC_PER_SEC) static __inline__ unsigned long timespec_to_jiffies(struct timespec *value) { unsigned long sec = value->tv_sec; - long nsec = value->tv_nsec; + long nsec = value->tv_nsec + TICK_NSEC(TICK_USEC) - 1; - if (sec >= (MAX_JIFFY_OFFSET / HZ)) + if (sec >= MAX_SEC_IN_JIFFIES) return MAX_JIFFY_OFFSET; - nsec += 1000000000L / HZ - 1; - nsec /= 1000000000L / HZ; - return HZ * sec + nsec; + return (((u64)sec * SEC_CONVERSION) + + (((u64)nsec * NSEC_CONVERSION) >> + (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; + } static __inline__ void jiffies_to_timespec(unsigned long jiffies, struct timespec *value) { - value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ); - value->tv_sec = jiffies / HZ; + /* + * Convert jiffies to nanoseconds and seperate with + * one divide. + */ + u64 nsec = (u64)jiffies * TICK_NSEC(TICK_USEC); + value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec); } /* Same for "timeval" */ @@ -85,20 +122,25 @@ static __inline__ unsigned long timeval_to_jiffies(struct timeval *value) { unsigned long sec = value->tv_sec; - long usec = value->tv_usec; + long usec = value->tv_usec + USEC_PER_SEC / HZ - 1; - if (sec >= (MAX_JIFFY_OFFSET / HZ)) + if (sec >= MAX_SEC_IN_JIFFIES) return MAX_JIFFY_OFFSET; - usec += 1000000L / HZ - 1; - usec /= 1000000L / HZ; - return HZ * sec + usec; + return (((u64)sec * SEC_CONVERSION) + + (((u64)usec * USEC_CONVERSION) >> + (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; } static __inline__ void jiffies_to_timeval(unsigned long jiffies, struct timeval *value) { - value->tv_usec = (jiffies % HZ) * (1000000L / HZ); - value->tv_sec = jiffies / HZ; + /* + * Convert jiffies to nanoseconds and seperate with + * one divide. + */ + u64 nsec = (u64)jiffies * TICK_NSEC(TICK_USEC); + value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_usec); + value->tv_usec /= NSEC_PER_USEC; } static __inline__ int timespec_equal(struct timespec *a, struct timespec *b) @@ -140,6 +182,7 @@ mktime (unsigned int year, unsigned int mon, } extern struct timespec xtime; +extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; static inline unsigned long get_seconds(void) @@ -200,6 +243,9 @@ struct itimerval { #define CLOCK_MONOTONIC_HR 5 #define MAX_CLOCKS 6 +#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC | \ + CLOCK_REALTIME_HR | CLOCK_MONOTONIC_HR) +#define CLOCKS_MONO (CLOCK_MONOTONIC & CLOCK_MONOTONIC_HR) /* * The various flags for setting POSIX.1b interval timers. diff --git a/include/linux/timex.h b/include/linux/timex.h index 5b2b0ac18ae7..6c00606c6e33 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -51,7 +51,6 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H -#include <linux/time.h> #include <asm/param.h> /* @@ -177,6 +176,7 @@ /* a value TUSEC for TICK_USEC (can be set bij adjtimex) */ #define TICK_NSEC(TUSEC) (SH_DIV (TUSEC * USER_HZ * 1000, ACTHZ, 8)) +#include <linux/time.h> /* * syscall interface - used (mainly by NTP daemon) * to discipline kernel clock oscillator diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index d90763253759..570778ddeae9 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -27,7 +27,8 @@ extern void *vmalloc_32(unsigned long size); extern void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot); extern void vfree(void *addr); -extern void *vmap(struct page **pages, unsigned int count); +extern void *vmap(struct page **pages, unsigned int count, + unsigned long flags, pgprot_t prot); extern void vunmap(void *addr); /* diff --git a/ipc/shm.c b/ipc/shm.c index e97264937900..19c06135b184 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -361,27 +361,35 @@ static inline unsigned long copy_shminfo_to_user(void *buf, struct shminfo64 *in } } -static void shm_get_stat (unsigned long *rss, unsigned long *swp) +static void shm_get_stat(unsigned long *rss, unsigned long *swp) { - struct shmem_inode_info *info; int i; *rss = 0; *swp = 0; - for(i = 0; i <= shm_ids.max_id; i++) { - struct shmid_kernel* shp; - struct inode * inode; + for (i = 0; i <= shm_ids.max_id; i++) { + struct shmid_kernel *shp; + struct inode *inode; shp = shm_get(i); - if(shp == NULL) + if(!shp) continue; + inode = shp->shm_file->f_dentry->d_inode; - info = SHMEM_I(inode); - spin_lock (&info->lock); - *rss += inode->i_mapping->nrpages; - *swp += info->swapped; - spin_unlock (&info->lock); + + if (is_file_hugepages(shp->shm_file)) { + struct address_space *mapping = inode->i_mapping; + spin_lock(&mapping->page_lock); + *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; + spin_unlock(&mapping->page_lock); + } else { + struct shmem_inode_info *info = SHMEM_I(inode); + spin_lock(&info->lock); + *rss += inode->i_mapping->nrpages; + *swp += info->swapped; + spin_unlock(&info->lock); + } } } @@ -737,21 +745,66 @@ out: * detach and kill segment if marked destroyed. * The work is done in shm_close. */ -asmlinkage long sys_shmdt (char *shmaddr) +asmlinkage long sys_shmdt(char *shmaddr) { struct mm_struct *mm = current->mm; - struct vm_area_struct *shmd, *shmdnext; + struct vm_area_struct *vma, *next; + unsigned long addr = (unsigned long)shmaddr; + loff_t size = 0; int retval = -EINVAL; down_write(&mm->mmap_sem); - for (shmd = mm->mmap; shmd; shmd = shmdnext) { - shmdnext = shmd->vm_next; - if ((shmd->vm_ops == &shm_vm_ops || (shmd->vm_flags & VM_HUGETLB)) - && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr) { - do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start); + + /* + * If it had been mremap()'d, the starting address would not + * match the usual checks anyway. So assume all vma's are + * above the starting address given. + */ + vma = find_vma(mm, addr); + + while (vma) { + next = vma->vm_next; + + /* + * Check if the starting address would match, i.e. it's + * a fragment created by mprotect() and/or munmap(), or it + * otherwise it starts at this address with no hassles. + */ + if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && + (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { + + + size = vma->vm_file->f_dentry->d_inode->i_size; + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + /* + * We discovered the size of the shm segment, so + * break out of here and fall through to the next + * loop that uses the size information to stop + * searching for matching vma's. + */ retval = 0; + vma = next; + break; } + vma = next; + } + + /* + * We need look no further than the maximum address a fragment + * could possibly have landed at. Also cast things to loff_t to + * prevent overflows and make comparisions vs. equal-width types. + */ + while (vma && (loff_t)(vma->vm_end - addr) <= size) { + next = vma->vm_next; + + /* finding a matching vma now does not alter retval */ + if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && + (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) + + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + vma = next; } + up_write(&mm->mmap_sem); return retval; } diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 54fe15f5c0b3..bca12ba294e4 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -33,7 +33,12 @@ result; }) #endif +#define CLOCK_REALTIME_RES TICK_NSEC(TICK_USEC) // In nano seconds. +static inline u64 mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2) +{ + return (u64)mpy1 * mpy2; +} /* * Management arrays for POSIX timers. Timers are kept in slab memory * Timer ids are allocated by an external routine that keeps track of the @@ -48,7 +53,7 @@ * The idr_get_new *may* call slab for more memory so it must not be * called under a spin lock. Likewise idr_remore may release memory * (but it may be ok to do this under a lock...). - * idr_find is just a memory look up and is quite fast. A zero return + * idr_find is just a memory look up and is quite fast. A -1 return * indicates that the requested id does not exist. */ @@ -82,6 +87,7 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; * For some reason mips/mips64 define the SIGEV constants plus 128. * Here we define a mask to get rid of the common bits. The * optimizer should make this costless to all but mips. + * Note that no common bits (the non-mips case) will give 0xffffffff. */ #define MIPS_SIGEV ~(SIGEV_NONE & \ SIGEV_SIGNAL & \ @@ -93,7 +99,7 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; * The timer ID is turned into a timer address by idr_find(). * Verifying a valid ID consists of: * - * a) checking that idr_find() returns other than zero. + * a) checking that idr_find() returns other than -1. * b) checking that the timer id matches the one in the timer itself. * c) that the timer owner is in the callers thread group. */ @@ -162,6 +168,8 @@ static struct k_clock posix_clocks[MAX_CLOCKS]; void register_posix_clock(int clock_id, struct k_clock *new_clock); static int do_posix_gettime(struct k_clock *clock, struct timespec *tp); +static u64 do_posix_clock_monotonic_gettime_parts( + struct timespec *tp, struct timespec *mo); int do_posix_clock_monotonic_gettime(struct timespec *tp); int do_posix_clock_monotonic_settime(struct timespec *tp); static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); @@ -172,8 +180,8 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags); */ static __init int init_posix_timers(void) { - struct k_clock clock_realtime = {.res = NSEC_PER_SEC / HZ }; - struct k_clock clock_monotonic = {.res = NSEC_PER_SEC / HZ, + struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES }; + struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES, .clock_get = do_posix_clock_monotonic_gettime, .clock_set = do_posix_clock_monotonic_settime }; @@ -192,7 +200,7 @@ __initcall(init_posix_timers); static void tstojiffie(struct timespec *tp, int res, u64 *jiff) { - unsigned long sec = tp->tv_sec; + long sec = tp->tv_sec; long nsec = tp->tv_nsec + res - 1; if (nsec > NSEC_PER_SEC) { @@ -201,35 +209,14 @@ static void tstojiffie(struct timespec *tp, int res, u64 *jiff) } /* - * A note on jiffy overflow: It is possible for the system to - * have been up long enough for the jiffies quanity to overflow. - * In order for correct timer evaluations we require that the - * specified time be somewhere between now and now + (max - * unsigned int/2). Times beyond this will be truncated back to - * this value. This is done in the absolute adjustment code, - * below. Here it is enough to just discard the high order - * bits. - */ - *jiff = (u64)sec * HZ; - /* - * Do the res thing. (Don't forget the add in the declaration of nsec) - */ - nsec -= nsec % res; - /* - * Split to jiffie and sub jiffie - */ - *jiff += nsec / (NSEC_PER_SEC / HZ); -} - -static void tstotimer(struct itimerspec *time, struct k_itimer *timer) -{ - u64 result; - int res = posix_clocks[timer->it_clock].res; - - tstojiffie(&time->it_value, res, &result); - timer->it_timer.expires = (unsigned long)result; - tstojiffie(&time->it_interval, res, &result); - timer->it_incr = (unsigned long)result; + * The scaling constants are defined in <linux/time.h> + * The difference between there and here is that we do the + * res rounding and compute a 64-bit result (well so does that + * but it then throws away the high bits). + */ + *jiff = (mpy_l_X_l_ll(sec, SEC_CONVERSION) + + (mpy_l_X_l_ll(nsec, NSEC_CONVERSION) >> + (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; } static void schedule_next_timer(struct k_itimer *timr) @@ -690,57 +677,81 @@ sys_timer_getoverrun(timer_t timer_id) * If it is relative time, we need to add the current (CLOCK_MONOTONIC) * time to it to get the proper time for the timer. */ -static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, int abs) +static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, + int abs, u64 *exp) { struct timespec now; - struct timespec oc; - do_posix_clock_monotonic_gettime(&now); - - if (!abs || (posix_clocks[CLOCK_MONOTONIC].clock_get != - clock->clock_get)) { - if (abs) - do_posix_gettime(clock, &oc); - else - oc.tv_nsec = oc.tv_sec = 0; - - tp->tv_sec += now.tv_sec - oc.tv_sec; - tp->tv_nsec += now.tv_nsec - oc.tv_nsec; + struct timespec oc = *tp; + struct timespec wall_to_mono; + u64 jiffies_64_f; + int rtn =0; + if (abs) { + /* + * The mask pick up the 4 basic clocks + */ + if (!(clock - &posix_clocks[0]) & ~CLOCKS_MASK) { + jiffies_64_f = do_posix_clock_monotonic_gettime_parts( + &now, &wall_to_mono); + /* + * If we are doing a MONOTONIC clock + */ + if((clock - &posix_clocks[0]) & CLOCKS_MONO){ + now.tv_sec += wall_to_mono.tv_sec; + now.tv_nsec += wall_to_mono.tv_nsec; + } + } else { + /* + * Not one of the basic clocks + */ + do_posix_gettime(clock, &now); + jiffies_64_f = get_jiffies_64(); + } + /* + * Take away now to get delta + */ + oc.tv_sec -= now.tv_sec; + oc.tv_nsec -= now.tv_nsec; /* * Normalize... */ - if ((tp->tv_nsec - NSEC_PER_SEC) >= 0) { - tp->tv_nsec -= NSEC_PER_SEC; - tp->tv_sec++; + while ((oc.tv_nsec - NSEC_PER_SEC) >= 0) { + oc.tv_nsec -= NSEC_PER_SEC; + oc.tv_sec++; } - if ((tp->tv_nsec) < 0) { - tp->tv_nsec += NSEC_PER_SEC; - tp->tv_sec--; + while ((oc.tv_nsec) < 0) { + oc.tv_nsec += NSEC_PER_SEC; + oc.tv_sec--; } + }else{ + jiffies_64_f = get_jiffies_64(); } /* - * Check if the requested time is prior to now (if so set now) or - * is more than the timer code can handle (if so we error out). - * The (unsigned) catches the case of prior to "now" with the same - * test. Only on failure do we sort out what happened, and then - * we use the (unsigned) to error out negative seconds. + * Check if the requested time is prior to now (if so set now) + */ + if (oc.tv_sec < 0) + oc.tv_sec = oc.tv_nsec = 0; + tstojiffie(&oc, clock->res, exp); + + /* + * Check if the requested time is more than the timer code + * can handle (if so we error out but return the value too). */ - if ((unsigned) (tp->tv_sec - now.tv_sec) > (MAX_JIFFY_OFFSET / HZ)) { - if ((unsigned) tp->tv_sec < now.tv_sec) { - tp->tv_sec = now.tv_sec; - tp->tv_nsec = now.tv_nsec; - } else + if (*exp > ((u64)MAX_JIFFY_OFFSET)) /* * This is a considered response, not exactly in * line with the standard (in fact it is silent on - * possible overflows). We assume such a large + * possible overflows). We assume such a large * value is ALMOST always a programming error and * try not to compound it by setting a really dumb * value. */ - return -EINVAL; - } - return 0; + rtn = -EINVAL; + /* + * return the actual jiffies expire time, full 64 bits + */ + *exp += jiffies_64_f; + return rtn; } /* Set a POSIX.1b interval timer. */ @@ -750,6 +761,7 @@ do_timer_settime(struct k_itimer *timr, int flags, struct itimerspec *new_setting, struct itimerspec *old_setting) { struct k_clock *clock = &posix_clocks[timr->it_clock]; + u64 expire_64; if (old_setting) do_timer_gettime(timr, old_setting); @@ -788,14 +800,15 @@ do_timer_settime(struct k_itimer *timr, int flags, return 0; } - if ((flags & TIMER_ABSTIME) && - (clock->clock_get != do_posix_clock_monotonic_gettime)) - // FIXME: what is this? - ; if (adjust_abs_time(clock, - &new_setting->it_value, flags & TIMER_ABSTIME)) + &new_setting->it_value, flags & TIMER_ABSTIME, + &expire_64)) { return -EINVAL; - tstotimer(new_setting, timr); + } + timr->it_timer.expires = (unsigned long)expire_64; + tstojiffie(&new_setting->it_interval, clock->res, &expire_64); + timr->it_incr = (unsigned long)expire_64; + /* * For some reason the timer does not fire immediately if expires is @@ -964,30 +977,46 @@ static int do_posix_gettime(struct k_clock *clock, struct timespec *tp) * Note also that the while loop assures that the sub_jiff_offset * will be less than a jiffie, thus no need to normalize the result. * Well, not really, if called with ints off :( - * - * HELP, this code should make an attempt at resolution beyond the - * jiffie. Trouble is this is "arch" dependent... */ -int do_posix_clock_monotonic_gettime(struct timespec *tp) +static u64 do_posix_clock_monotonic_gettime_parts( + struct timespec *tp, struct timespec *mo) { - long sub_sec; - u64 jiffies_64_f; - -#if (BITS_PER_LONG > 32) - jiffies_64_f = jiffies_64; -#else + u64 jiff; + struct timeval tpv; unsigned int seq; do { seq = read_seqbegin(&xtime_lock); - jiffies_64_f = jiffies_64; + do_gettimeofday(&tpv); + *mo = wall_to_monotonic; + jiff = jiffies_64; - } while (read_seqretry(&xtime_lock, seq)); -#endif - tp->tv_sec = div_long_long_rem(jiffies_64_f, HZ, &sub_sec); - tp->tv_nsec = sub_sec * (NSEC_PER_SEC / HZ); + } while(read_seqretry(&xtime_lock, seq)); + + /* + * Love to get this before it is converted to usec. + * It would save a div AND a mpy. + */ + tp->tv_sec = tpv.tv_sec; + tp->tv_nsec = tpv.tv_usec * NSEC_PER_USEC; + return jiff; +} + +int do_posix_clock_monotonic_gettime(struct timespec *tp) +{ + struct timespec wall_to_mono; + + do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono); + + tp->tv_sec += wall_to_mono.tv_sec; + tp->tv_nsec += wall_to_mono.tv_nsec; + + if ((tp->tv_nsec - NSEC_PER_SEC) > 0) { + tp->tv_nsec -= NSEC_PER_SEC; + tp->tv_sec++; + } return 0; } @@ -1138,7 +1167,7 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave) struct timespec t; struct timer_list new_timer; DECLARE_WAITQUEUE(abs_wqueue, current); - u64 rq_time = 0; + u64 rq_time = (u64)0; s64 left; int abs; struct restart_block *restart_block = @@ -1163,7 +1192,7 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave) if (!rq_time) return -EINTR; left = rq_time - get_jiffies_64(); - if (left <= 0LL) + if (left <= (s64)0) return 0; /* Already passed */ } @@ -1174,14 +1203,14 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave) do { t = *tsave; if (abs || !rq_time) { - adjust_abs_time(&posix_clocks[which_clock], &t, abs); - tstojiffie(&t, posix_clocks[which_clock].res, &rq_time); + adjust_abs_time(&posix_clocks[which_clock], &t, abs, + &rq_time); } left = rq_time - get_jiffies_64(); - if (left >= MAX_JIFFY_OFFSET) - left = MAX_JIFFY_OFFSET; - if (left < 0) + if (left >= (s64)MAX_JIFFY_OFFSET) + left = (s64)MAX_JIFFY_OFFSET; + if (left < (s64)0) break; new_timer.expires = jiffies + left; @@ -1192,13 +1221,12 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave) del_timer_sync(&new_timer); left = rq_time - get_jiffies_64(); - } while (left > 0 && !test_thread_flag(TIF_SIGPENDING)); + } while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING)); if (abs_wqueue.task_list.next) finish_wait(&nanosleep_abs_wqueue, &abs_wqueue); - if (left > 0) { - unsigned long rmd; + if (left > (s64)0) { /* * Always restart abs calls from scratch to pick up any @@ -1207,9 +1235,10 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave) if (abs) return -ERESTARTNOHAND; - tsave->tv_sec = div_long_long_rem(left, HZ, &rmd); - tsave->tv_nsec = rmd * (NSEC_PER_SEC / HZ); - + left *= TICK_NSEC(TICK_USEC); + tsave->tv_sec = div_long_long_rem(left, + NSEC_PER_SEC, + &tsave->tv_nsec); restart_block->fn = clock_nanosleep_restart; restart_block->arg0 = which_clock; restart_block->arg1 = (unsigned long)tsave; diff --git a/kernel/sched.c b/kernel/sched.c index 43b08b5ec658..ae2dbdf33d7d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1091,7 +1091,7 @@ out: #define IDLE_REBALANCE_TICK (HZ/1000 ?: 1) #define BUSY_REBALANCE_TICK (HZ/5 ?: 1) #define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 5) -#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 100) +#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2) #ifdef CONFIG_NUMA static void balance_node(runqueue_t *this_rq, int idle, int this_cpu) diff --git a/kernel/timer.c b/kernel/timer.c index 4aaf025ee8ba..caa37716f860 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -441,8 +441,16 @@ repeat: unsigned long tick_usec = TICK_USEC; /* ACTHZ period (usec) */ unsigned long tick_nsec = TICK_NSEC(TICK_USEC); /* USER_HZ period (nsec) */ -/* The current time */ +/* + * The current time + * wall_to_monotonic is what we need to add to xtime (or xtime corrected + * for sub jiffie times) to get to monotonic time. Monotonic is pegged at zero + * at zero at system boot time, so wall_to_monotonic will be negative, + * however, we will ALWAYS keep the tv_nsec part positive so we can use + * the usual normalization. + */ struct timespec xtime __attribute__ ((aligned (16))); +struct timespec wall_to_monotonic __attribute__ ((aligned (16))); /* Don't completely fail for HZ > 500. */ int tickadj = 500/HZ ? : 1; /* microsecs */ @@ -508,6 +516,7 @@ static void second_overflow(void) case TIME_INS: if (xtime.tv_sec % 86400 == 0) { xtime.tv_sec--; + wall_to_monotonic.tv_sec++; time_state = TIME_OOP; clock_was_set(); printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); @@ -517,6 +526,7 @@ static void second_overflow(void) case TIME_DEL: if ((xtime.tv_sec + 1) % 86400 == 0) { xtime.tv_sec++; + wall_to_monotonic.tv_sec--; time_state = TIME_WAIT; clock_was_set(); printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); diff --git a/mm/filemap.c b/mm/filemap.c index 884a9f50c6fe..40008f8f3626 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -31,12 +31,11 @@ * This is needed for the following functions: * - try_to_release_page * - block_invalidatepage - * - page_has_buffers * - generic_osync_inode * - * FIXME: remove all knowledge of the buffer layer from this file + * FIXME: remove all knowledge of the buffer layer from the core VM */ -#include <linux/buffer_head.h> +#include <linux/buffer_head.h> /* for generic_osync_inode */ #include <asm/uaccess.h> #include <asm/mman.h> diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 34a3aeb50799..e537462aaf58 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -129,6 +129,8 @@ static struct task_struct * select_bad_process(void) chosen = p; maxpoints = points; } + if (p->flags & PF_SWAPOFF) + return p; } while_each_thread(g, p); return chosen; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index aaa70d02b859..c33c6a207426 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -462,88 +462,6 @@ int write_one_page(struct page *page, int wait) EXPORT_SYMBOL(write_one_page); /* - * Add a page to the dirty page list. - * - * It is a sad fact of life that this function is called from several places - * deeply under spinlocking. It may not sleep. - * - * If the page has buffers, the uptodate buffers are set dirty, to preserve - * dirty-state coherency between the page and the buffers. It the page does - * not have buffers then when they are later attached they will all be set - * dirty. - * - * The buffers are dirtied before the page is dirtied. There's a small race - * window in which a writepage caller may see the page cleanness but not the - * buffer dirtiness. That's fine. If this code were to set the page dirty - * before the buffers, a concurrent writepage caller could clear the page dirty - * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean - * page on the dirty page list. - * - * There is also a small window where the page is dirty, and not on dirty_pages. - * Also a possibility that by the time the page is added to dirty_pages, it has - * been set clean. The page lists are somewhat approximate in this regard. - * It's better to have clean pages accidentally attached to dirty_pages than to - * leave dirty pages attached to clean_pages. - * - * We use private_lock to lock against try_to_free_buffers while using the - * page's buffer list. Also use this to protect against clean buffers being - * added to the page after it was set dirty. - * - * FIXME: may need to call ->reservepage here as well. That's rather up to the - * address_space though. - * - * For now, we treat swapper_space specially. It doesn't use the normal - * block a_ops. - * - * FIXME: this should move over to fs/buffer.c - buffer_heads have no business in mm/ - */ -#include <linux/buffer_head.h> -int __set_page_dirty_buffers(struct page *page) -{ - struct address_space * const mapping = page->mapping; - int ret = 0; - - if (mapping == NULL) { - SetPageDirty(page); - goto out; - } - - if (!PageUptodate(page)) - buffer_error(); - - spin_lock(&mapping->private_lock); - if (page_has_buffers(page)) { - struct buffer_head *head = page_buffers(page); - struct buffer_head *bh = head; - - do { - if (buffer_uptodate(bh)) - set_buffer_dirty(bh); - else - buffer_error(); - bh = bh->b_this_page; - } while (bh != head); - } - spin_unlock(&mapping->private_lock); - - if (!TestSetPageDirty(page)) { - spin_lock(&mapping->page_lock); - if (page->mapping) { /* Race with truncate? */ - if (!mapping->backing_dev_info->memory_backed) - inc_page_state(nr_dirty); - list_del(&page->list); - list_add(&page->list, &mapping->dirty_pages); - } - spin_unlock(&mapping->page_lock); - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - } - -out: - return ret; -} -EXPORT_SYMBOL(__set_page_dirty_buffers); - -/* * For address_spaces which do not use buffers. Just set the page's dirty bit * and move it to the dirty_pages list. Also perform space reservation if * required. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c9c7accac1f7..bff7db2296ae 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -536,6 +536,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, struct page *page; int i; int cold; + int do_retry; if (wait) might_sleep(); @@ -626,10 +627,21 @@ rebalance: } /* - * Don't let big-order allocations loop. Yield for kswapd, try again. + * Don't let big-order allocations loop unless the caller explicitly + * requests that. Wait for some write requests to complete then retry. + * + * In this implementation, __GFP_REPEAT means __GFP_NOFAIL, but that + * may not be true in other implementations. */ - if (order <= 3) { - yield(); + do_retry = 0; + if (!(gfp_mask & __GFP_NORETRY)) { + if ((order <= 3) || (gfp_mask & __GFP_REPEAT)) + do_retry = 1; + if (gfp_mask & __GFP_NOFAIL) + do_retry = 1; + } + if (do_retry) { + blk_congestion_wait(WRITE, HZ/50); goto rebalance; } diff --git a/mm/swap.c b/mm/swap.c index eb71588c1f1a..f6442275cda5 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -21,7 +21,7 @@ #include <linux/pagevec.h> #include <linux/init.h> #include <linux/mm_inline.h> -#include <linux/buffer_head.h> +#include <linux/buffer_head.h> /* for try_to_release_page() */ #include <linux/percpu.h> /* How many pages do we try to swap or page in/out together? */ diff --git a/mm/swap_state.c b/mm/swap_state.c index b479ebafa2bd..29198f06fcae 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -13,7 +13,6 @@ #include <linux/init.h> #include <linux/pagemap.h> #include <linux/backing-dev.h> -#include <linux/buffer_head.h> /* block_sync_page() */ #include <asm/pgtable.h> @@ -187,7 +186,7 @@ void delete_from_swap_cache(struct page *page) BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); - BUG_ON(page_has_buffers(page)); + BUG_ON(PagePrivate(page)); entry.val = page->index; @@ -236,7 +235,7 @@ int move_from_swap_cache(struct page *page, unsigned long index, BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); - BUG_ON(page_has_buffers(page)); + BUG_ON(PagePrivate(page)); entry.val = page->index; diff --git a/mm/swapfile.c b/mm/swapfile.c index 2271d23d7e7b..48ffb627914d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -7,6 +7,7 @@ #include <linux/config.h> #include <linux/mm.h> +#include <linux/mman.h> #include <linux/slab.h> #include <linux/kernel_stat.h> #include <linux/swap.h> @@ -15,7 +16,6 @@ #include <linux/namei.h> #include <linux/shm.h> #include <linux/blkdev.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -300,7 +300,7 @@ int remove_exclusive_swap_page(struct page *page) struct swap_info_struct * p; swp_entry_t entry; - BUG_ON(page_has_buffers(page)); + BUG_ON(PagePrivate(page)); BUG_ON(!PageLocked(page)); if (!PageSwapCache(page)) @@ -355,7 +355,7 @@ void free_swap_and_cache(swp_entry_t entry) if (page) { int one_user; - BUG_ON(page_has_buffers(page)); + BUG_ON(PagePrivate(page)); page_cache_get(page); one_user = (page_count(page) == 2); /* Only cache user (+us), or swap space full? Free it! */ @@ -590,6 +590,11 @@ static int try_to_unuse(unsigned int type) * to swapoff for a while, then reappear - but that is rare. */ while ((i = find_next_to_unuse(si, i))) { + if (signal_pending(current)) { + retval = -EINTR; + break; + } + /* * Get a page for the entry, using the existing swap * cache page if there is one. Otherwise, get a clean @@ -759,8 +764,7 @@ static int try_to_unuse(unsigned int type) /* * Make sure that we aren't completely killing - * interactive performance. Interruptible check on - * signal_pending() would be nice, but changes the spec? + * interactive performance. */ cond_resched(); } @@ -1029,12 +1033,18 @@ asmlinkage long sys_swapoff(const char __user * specialfile) } prev = type; } - err = -EINVAL; if (type < 0) { + err = -EINVAL; + swap_list_unlock(); + goto out_dput; + } + if (vm_enough_memory(p->pages)) + vm_unacct_memory(p->pages); + else { + err = -ENOMEM; swap_list_unlock(); goto out_dput; } - if (prev < 0) { swap_list.head = p->next; } else { @@ -1048,7 +1058,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile) total_swap_pages -= p->pages; p->flags &= ~SWP_WRITEOK; swap_list_unlock(); + current->flags |= PF_SWAPOFF; err = try_to_unuse(type); + current->flags &= ~PF_SWAPOFF; if (err) { /* re-insert swap space back into swap_list */ swap_list_lock(); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a6423eebcd5d..f6ce2378b721 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -308,7 +308,7 @@ void __vunmap(void *addr, int deallocate_pages) * * @addr: memory base address * - * Free the virtually continguos memory area starting at @addr, as + * Free the virtually contiguous memory area starting at @addr, as * obtained from vmalloc(), vmalloc_32() or __vmalloc(). * * May not be called in interrupt context. @@ -324,7 +324,7 @@ void vfree(void *addr) * * @addr: memory base address * - * Free the virtually continguos memory area starting at @addr, + * Free the virtually contiguous memory area starting at @addr, * which was created from the page array passed to vmap(). * * May not be called in interrupt context. @@ -336,25 +336,28 @@ void vunmap(void *addr) } /** - * vmap - map an array of pages into virtually continguos space + * vmap - map an array of pages into virtually contiguous space * * @pages: array of page pointers * @count: number of pages to map + * @flags: vm_area->flags + * @prot: page protection for the mapping * - * Maps @count pages from @pages into continguos kernel virtual + * Maps @count pages from @pages into contiguous kernel virtual * space. */ -void *vmap(struct page **pages, unsigned int count) +void *vmap(struct page **pages, unsigned int count, + unsigned long flags, pgprot_t prot) { struct vm_struct *area; if (count > num_physpages) return NULL; - area = get_vm_area((count << PAGE_SHIFT), VM_MAP); + area = get_vm_area((count << PAGE_SHIFT), flags); if (!area) return NULL; - if (map_vm_area(area, PAGE_KERNEL, &pages)) { + if (map_vm_area(area, prot, &pages)) { vunmap(area->addr); return NULL; } @@ -363,14 +366,14 @@ void *vmap(struct page **pages, unsigned int count) } /** - * __vmalloc - allocate virtually continguos memory + * __vmalloc - allocate virtually contiguous memory * * @size: allocation size * @gfp_mask: flags for the page level allocator * @prot: protection mask for the allocated pages * * Allocate enough pages to cover @size from the page level - * allocator with @gfp_mask flags. Map them into continguos + * allocator with @gfp_mask flags. Map them into contiguous * kernel virtual space, using a pagetable protection of @prot. */ void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot) @@ -418,12 +421,12 @@ fail: } /** - * vmalloc - allocate virtually continguos memory + * vmalloc - allocate virtually contiguous memory * * @size: allocation size * * Allocate enough pages to cover @size from the page level - * allocator and map them into continguos kernel virtual space. + * allocator and map them into contiguous kernel virtual space. * * For tight cotrol over page level allocator and protection flags * use __vmalloc() instead. @@ -434,12 +437,12 @@ void *vmalloc(unsigned long size) } /** - * vmalloc_32 - allocate virtually continguos memory (32bit addressable) + * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) * * @size: allocation size * * Allocate enough 32bit PA addressable pages to cover @size from the - * page level allocator and map them into continguos kernel virtual space. + * page level allocator and map them into contiguous kernel virtual space. */ void *vmalloc_32(unsigned long size) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 3d204f882d04..aa24e1d1c693 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -22,7 +22,8 @@ #include <linux/writeback.h> #include <linux/suspend.h> #include <linux/blkdev.h> -#include <linux/buffer_head.h> /* for try_to_release_page() */ +#include <linux/buffer_head.h> /* for try_to_release_page(), + buffer_heads_over_limit */ #include <linux/mm_inline.h> #include <linux/pagevec.h> #include <linux/backing-dev.h> @@ -134,11 +135,9 @@ void remove_shrinker(struct shrinker *shrinker) * If the vm encounted mapped pages on the LRU it increase the pressure on * slab to avoid swapping. * - * FIXME: do not do for zone highmem - * * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits. */ -static int shrink_slab(long scanned, unsigned int gfp_mask) +static int shrink_slab(long scanned, unsigned int gfp_mask) { struct shrinker *shrinker; long pages; @@ -804,8 +803,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned, * excessive rotation of the inactive list, which is _supposed_ to be an LRU, * yes? */ -int -try_to_free_pages(struct zone *classzone, +int try_to_free_pages(struct zone *classzone, unsigned int gfp_mask, unsigned int order) { int priority; @@ -835,9 +833,10 @@ try_to_free_pages(struct zone *classzone, /* Take a nap, wait for some writeback to complete */ blk_congestion_wait(WRITE, HZ/10); - shrink_slab(total_scanned, gfp_mask); + if (classzone - classzone->zone_pgdat->node_zones < ZONE_HIGHMEM) + shrink_slab(total_scanned, gfp_mask); } - if (gfp_mask & __GFP_FS) + if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) out_of_memory(); return 0; } @@ -895,7 +894,8 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps) max_scan = SWAP_CLUSTER_MAX; to_free -= shrink_zone(zone, max_scan, GFP_KERNEL, to_reclaim, &nr_mapped, ps, priority); - shrink_slab(max_scan + nr_mapped, GFP_KERNEL); + if (i < ZONE_HIGHMEM) + shrink_slab(max_scan + nr_mapped, GFP_KERNEL); if (zone->all_unreclaimable) continue; if (zone->pages_scanned > zone->present_pages * 2) diff --git a/sound/core/sgbuf.c b/sound/core/sgbuf.c index 84e79ebc5c80..4578d2b335bf 100644 --- a/sound/core/sgbuf.c +++ b/sound/core/sgbuf.c @@ -85,7 +85,7 @@ void *snd_malloc_sgbuf_pages(struct pci_dev *pci, size_t size, struct snd_dma_bu } sgbuf->size = size; - dmab->area = vmap(sgbuf->page_table, sgbuf->pages); + dmab->area = vmap(sgbuf->page_table, sgbuf->pages, VM_MAP, PAGE_KERNEL); if (! dmab->area) goto _failed; return dmab->area; |
