90 files changed, 911 insertions, 691 deletions
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index bbf7417506c1..d310bcc70d54 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -66,19 +66,9 @@ pgd_alloc(struct mm_struct *mm)
 pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte;
-	long timeout = 10;
-
- retry:
-	pte = (pte_t *) __get_free_page(GFP_KERNEL);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (pte)
 		clear_page(pte);
-	else if (--timeout >= 0) {
-		current->state = TASK_UNINTERRUPTIBLE;
-		schedule_timeout(HZ);
-		goto retry;
-	}
-
 	return pte;
 }
 
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index a4daeb2d3b2a..2acbd3d6b0b3 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -124,15 +124,28 @@ void do_settimeofday(struct timeval *tv)
 	 * made, and then undo it!
 	 */
 	tv->tv_usec -= timer->get_offset();
-	tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ);
+	tv->tv_usec -= (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
 
 	while (tv->tv_usec < 0) {
-		tv->tv_usec += 1000000;
+		tv->tv_usec += USEC_PER_SEC;
 		tv->tv_sec--;
 	}
+	tv->tv_usec *= NSEC_PER_USEC;
+
+	wall_to_monotonic.tv_sec += xtime.tv_sec - tv->tv_sec;
+	wall_to_monotonic.tv_nsec += xtime.tv_nsec - tv->tv_usec;
+
+	if (wall_to_monotonic.tv_nsec > NSEC_PER_SEC) {
+		wall_to_monotonic.tv_nsec -= NSEC_PER_SEC;
+		wall_to_monotonic.tv_sec++;
+	}
+	if (wall_to_monotonic.tv_nsec < 0) {
+		wall_to_monotonic.tv_nsec += NSEC_PER_SEC;
+		wall_to_monotonic.tv_sec--;
+	}
 
 	xtime.tv_sec = tv->tv_sec;
-	xtime.tv_nsec = (tv->tv_usec * 1000);
+	xtime.tv_nsec = tv->tv_usec;
 	time_adjust = 0;		/* stop active adjtime() */
 	time_status |= STA_UNSYNC;
 	time_maxerror = NTP_PHASE_LIMIT;
@@ -228,41 +241,6 @@ static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *reg
 }
 
 /*
- * Lost tick detection and compensation
- */
-static inline void detect_lost_tick(void)
-{
-	/* read time since last interrupt */
-	unsigned long delta = timer->get_offset();
-	static unsigned long dbg_print;
-	
-	/* check if delta is greater then two ticks */
-	if(delta >= 2*(1000000/HZ)){
-
-		/*
-		 * only print debug info first 5 times
-		 */
-		/*
-		 * AKPM: disable this for now; it's nice, but irritating.
-		 */
-		if (0 && dbg_print < 5) {
-			printk(KERN_WARNING "\nWarning! Detected %lu "
-				"micro-second gap between interrupts.\n",
-				delta);
-			printk(KERN_WARNING "  Compensating for %lu lost "
-				"ticks.\n",
-				delta/(1000000/HZ)-1);
-			dump_stack();
-			dbg_print++;
-		}
-		/* calculate number of missed ticks */
-		delta = delta/(1000000/HZ)-1;
-		jiffies += delta;
-	}
-		
-}
-
-/*
  * This is the same as the above, except we _also_ save the current
  * Time Stamp Counter value at the time of the timer interrupt, so that
  * we later on can estimate the time of day more exactly.
@@ -278,7 +256,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	 */
 	write_seqlock(&xtime_lock);
 
-	detect_lost_tick();
 	timer->mark_offset();
  
 	do_timer_interrupt(irq, NULL, regs);
@@ -322,7 +299,9 @@ void __init time_init(void)
 {
 	
 	xtime.tv_sec = get_cmos_time();
-	xtime.tv_nsec = 0;
+	wall_to_monotonic.tv_sec = -xtime.tv_sec + INITIAL_JIFFIES / HZ;
+	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+	wall_to_monotonic.tv_nsec = 0;
 
 
 	timer = select_timer();
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
index cbe74fd46491..7cdf4d7bca6f 100644
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ b/arch/i386/kernel/timers/timer_cyclone.c
@@ -18,6 +18,7 @@
 #include <asm/fixmap.h>
 
 extern spinlock_t i8253_lock;
+extern unsigned long jiffies;
 extern unsigned long calibrate_tsc(void);
 
 /* Number of usecs that the last interrupt was delayed */
@@ -46,6 +47,8 @@ static rwlock_t monotonic_lock = RW_LOCK_UNLOCKED;
 
 static void mark_offset_cyclone(void)
 {
+	unsigned long lost, delay;
+	unsigned long delta = last_cyclone_low;
 	int count;
 	unsigned long long this_offset, last_offset;
 
@@ -62,6 +65,15 @@ static void mark_offset_cyclone(void)
 	count |= inb(0x40) << 8;
 	spin_unlock(&i8253_lock);
 
+	/* lost tick compensation */
+	delta = last_cyclone_low - delta;	
+	delta /=(CYCLONE_TIMER_FREQ/1000000);
+	delta += delay_at_last_interrupt;
+	lost = delta/(1000000/HZ);
+	delay = delta%(1000000/HZ);
+	if(lost >= 2)
+		jiffies += lost-1;
+	
 	/* update the monotonic base value */
 	this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
 	monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
@@ -70,6 +82,12 @@ static void mark_offset_cyclone(void)
 	/* calculate delay_at_last_interrupt */
 	count = ((LATCH-1) - count) * TICK_SIZE;
 	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+
+	/* catch corner case where tick rollover 
+	 * occured between cyclone and pit reads
+	 */
+	if(abs(delay - delay_at_last_interrupt) > 900)
+		jiffies++;
 }
 
 static unsigned long get_offset_cyclone(void)
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index cdcc95b74aaf..7d521228c28a 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -54,7 +54,7 @@ static void delay_pit(unsigned long loops)
 }
 
 
-/* This function must be called with interrupts disabled 
+/* This function must be called with xtime_lock held.
  * It was inspired by Steve McCanne's microtime-i386 for BSD.  -- jrs
  * 
  * However, the pc-audio speaker driver changes the divisor so that
@@ -93,7 +93,7 @@ static unsigned long get_offset_pit(void)
 	static unsigned long jiffies_p = 0;
 
 	/*
-	 * cache volatile jiffies temporarily; we have IRQs turned off. 
+	 * cache volatile jiffies temporarily; we have xtime_lock. 
 	 */
 	unsigned long jiffies_t;
 
@@ -119,8 +119,6 @@ static unsigned long get_offset_pit(void)
                 count = LATCH - 1;
         }
 	
-	spin_unlock_irqrestore(&i8253_lock, flags);
-
 	/*
 	 * avoiding timer inconsistencies (they are rare, but they happen)...
 	 * there are two kinds of problems that must be avoided here:
@@ -130,7 +128,6 @@ static unsigned long get_offset_pit(void)
 	 *     (see c't 95/10 page 335 for Neptun bug.)
 	 */
 
-
 	if( jiffies_t == jiffies_p ) {
 		if( count > count_p ) {
 			/* the nutcase */
@@ -141,6 +138,8 @@ static unsigned long get_offset_pit(void)
 
 	count_p = count;
 
+	spin_unlock_irqrestore(&i8253_lock, flags);
+
 	count = ((LATCH-1) - count) * TICK_SIZE;
 	count = (count + LATCH/2) / LATCH;
 
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index fad90c000cc6..e7c126d30e42 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -21,6 +21,7 @@
 int tsc_disable __initdata = 0;
 
 extern spinlock_t i8253_lock;
+extern unsigned long jiffies;
 
 static int use_tsc;
 /* Number of usecs that the last interrupt was delayed */
@@ -117,6 +118,8 @@ static unsigned long long monotonic_clock_tsc(void)
 
 static void mark_offset_tsc(void)
 {
+	unsigned long lost,delay;
+	unsigned long delta = last_tsc_low;
 	int count;
 	int countmp;
 	static int count1 = 0;
@@ -161,6 +164,23 @@ static void mark_offset_tsc(void)
 		}
 	}
 
+	/* lost tick compensation */
+	delta = last_tsc_low - delta;
+	{
+		register unsigned long eax, edx;
+		eax = delta;
+		__asm__("mull %2"
+		:"=a" (eax), "=d" (edx)
+		:"rm" (fast_gettimeoffset_quotient),
+		 "0" (eax));
+		delta = edx;
+	}
+	delta += delay_at_last_interrupt;
+	lost = delta/(1000000/HZ);
+	delay = delta%(1000000/HZ);
+	if(lost >= 2)
+		jiffies += lost-1;
+
 	/* update the monotonic base value */
 	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 	monotonic_base += cycles_2_ns(this_offset - last_offset);
@@ -169,6 +189,12 @@ static void mark_offset_tsc(void)
 	/* calculate delay_at_last_interrupt */
 	count = ((LATCH-1) - count) * TICK_SIZE;
 	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+
+	/* catch corner case where tick rollover 
+	 * occured between tsc and pit reads
+	 */
+	if(abs(delay - delay_at_last_interrupt) > 900)
+		jiffies++;
 }
 
 static void delay_tsc(unsigned long loops)
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 6b37833292aa..c7259ef89bd2 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -129,37 +129,45 @@ nomem:
 int
 follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		    struct page **pages, struct vm_area_struct **vmas,
-		    unsigned long *st, int *length, int i)
+		    unsigned long *position, int *length, int i)
 {
-	pte_t *ptep, pte;
-	unsigned long start = *st;
-	unsigned long pstart;
-	int len = *length;
-	struct page *page;
+	unsigned long vpfn, vaddr = *position;
+	int remainder = *length;
+
+	WARN_ON(!is_vm_hugetlb_page(vma));
 
-	do {
-		pstart = start;
-		ptep = huge_pte_offset(mm, start);
-		pte = *ptep;
+	vpfn = vaddr/PAGE_SIZE;
+	while (vaddr < vma->vm_end && remainder) {
 
-back1:
-		page = pte_page(pte);
 		if (pages) {
-			page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
+			pte_t *pte;
+			struct page *page;
+
+			pte = huge_pte_offset(mm, vaddr);
+
+			/* hugetlb should be locked, and hence, prefaulted */
+			WARN_ON(!pte || pte_none(*pte));
+
+			page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
+
+			WARN_ON(!PageCompound(page));
+
 			get_page(page);
 			pages[i] = page;
 		}
+
 		if (vmas)
 			vmas[i] = vma;
-		i++;
-		len--;
-		start += PAGE_SIZE;
-		if (((start & HPAGE_MASK) == pstart) && len &&
-				(start < vma->vm_end))
-			goto back1;
-	} while (len && start < vma->vm_end);
-	*length = len;
-	*st = start;
+
+		vaddr += PAGE_SIZE;
+		++vpfn;
+		--remainder;
+		++i;
+	}
+
+	*length = remainder;
+	*position = vaddr;
+
 	return i;
 }
 
@@ -474,9 +482,7 @@ int hugetlb_report_meminfo(char *buf)
 
 int is_hugepage_mem_enough(size_t size)
 {
-	if (size > (htlbpagemem << HPAGE_SHIFT))
-		return 0;
-	return 1;
+	return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem;
 }
 
 /*
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 054eec2afc35..9d36261de7d0 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -131,39 +131,23 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	int count = 0;
-	pte_t *pte;
-   
-   	do {
-		pte = (pte_t *) __get_free_page(GFP_KERNEL);
-		if (pte)
-			clear_page(pte);
-		else {
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
-		}
-	} while (!pte && (count++ < 10));
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	if (pte)
+		clear_page(pte);
 	return pte;
 }
 
 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	int count = 0;
 	struct page *pte;
-   
-   	do {
+
 #if CONFIG_HIGHPTE
-		pte = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
 #else
-		pte = alloc_pages(GFP_KERNEL, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
 #endif
-		if (pte)
-			clear_highpage(pte);
-		else {
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
-		}
-	} while (!pte && (count++ < 10));
+	if (pte)
+		clear_highpage(pte);
 	return pte;
 }
 
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 84d34a212ac2..570b03908dd5 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -3040,7 +3040,6 @@ struct nfsctl_arg32 {
 #define ca32_svc	u.u32_svc
 #define ca32_client	u.u32_client
 #define ca32_export	u.u32_export
-#define ca32_authd	u.u32_authd
 #define ca32_debug	u.u32_debug
 };
 
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 63a85350d4ea..23a0afcf31a6 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -1131,7 +1131,7 @@ asmlinkage long sys32_msgrcv(int msqid,
 struct nfsctl_export32 {
 	char		ex_client[NFSCLNT_IDMAX+1];
 	char		ex_path[NFS_MAXPATHLEN+1];
-	__kernel_dev_t	ex_dev;
+	__kernel_old_dev_t ex_dev;
 	compat_ino_t	ex_ino;
 	int		ex_flags;
 	__kernel_uid_t	ex_anon_uid;
diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
index 9682525026f9..5d4aef7ab895 100644
--- a/arch/ppc/mm/pgtable.c
+++ b/arch/ppc/mm/pgtable.c
@@ -76,15 +76,11 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	extern void *early_get_page(void);
 	int timeout = 0;
 
-	if (mem_init_done) {
-		while ((pte = (pte_t *) __get_free_page(GFP_KERNEL)) == NULL
-		       && ++timeout < 10) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(HZ);
-		}
-	} else
-		pte = (pte_t *) early_get_page();
-	if (pte != NULL)
+	if (mem_init_done)
+		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	else
+		pte = (pte_t *)early_get_page();
+	if (pte)
 		clear_page(pte);
 	return pte;
 }
@@ -92,20 +88,16 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
-	int timeout = 0;
+
 #ifdef CONFIG_HIGHPTE
-	int flags = GFP_KERNEL | __GFP_HIGHMEM;
+	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
 #else
-	int flags = GFP_KERNEL;
+	int flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
 
-	while ((pte = alloc_pages(flags, 0)) == NULL) {
-		if (++timeout >= 10)
-			return NULL;
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(HZ);
-	}
-	clear_highpage(pte);
+	pte = alloc_pages(flags, 0);
+	if (pte)
+		clear_highpage(pte);
 	return pte;
 }
 
diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c
index fba8c8254455..300a93c9c42e 100644
--- a/arch/ppc64/kernel/sys_ppc32.c
+++ b/arch/ppc64/kernel/sys_ppc32.c
@@ -910,7 +910,6 @@ struct nfsctl_arg32 {
 #define ca32_export	u.u32_export
 #define ca32_getfd	u.u32_getfd
 #define ca32_getfs	u.u32_getfs
-#define ca32_authd	u.u32_authd
 };
 
 union nfsctl_res32 {
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 9cda5ee63d98..13495f4c418f 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -1901,7 +1901,7 @@ static pte_t *sun4c_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
 	if ((pte = sun4c_pte_alloc_one_fast(mm, address)) != NULL)
 		return pte;
 
-	pte = (pte_t *)__get_free_page(GFP_KERNEL);
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (pte)
 		memset(pte, 0, PAGE_SIZE);
 	return pte;
@@ -2194,7 +2194,7 @@ void __init ld_mmu_sun4c(void)
 	BTFIXUPSET_CALL(pte_alloc_one_kernel, sun4c_pte_alloc_one_kernel, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(pte_alloc_one, sun4c_pte_alloc_one, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(free_pmd_fast, sun4c_free_pmd_fast, BTFIXUPCALL_NOP);
-	BTFIXUPSET_CALL(pmd_alloc_one, sun4c_pmd_alloc_one, BTFIXUPCALL_RETO0);
+	BTFIXUPSET_CALL(pmd_alloc_one, sun4c_lpmd_alloc_one, BTFIXUPCALL_RETO0);
 	BTFIXUPSET_CALL(free_pgd_fast, sun4c_free_pgd_fast, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(get_pgd_fast, sun4c_get_pgd_fast, BTFIXUPCALL_NORM);
 
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index b1194401deba..ff9c8ec19b16 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -2133,7 +2133,6 @@ struct nfsctl_arg32 {
 #define ca32_export	u.u32_export
 #define ca32_getfd	u.u32_getfd
 #define ca32_getfs	u.u32_getfs
-#define ca32_authd	u.u32_authd
 };
 
 union nfsctl_res32 {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 2e7199293856..d0c24d48071e 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -810,35 +810,21 @@ void pgd_free(pgd_t *pgd)
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	int count = 0;
 	pte_t *pte;
 
-   	do {
-		pte = (pte_t *) __get_free_page(GFP_KERNEL);
-		if (pte)
-			clear_page(pte);
-		else {
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
-		}
-	} while (!pte && (count++ < 10));
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	if (pte)
+		clear_page(pte);
 	return pte;
 }
 
 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	int count = 0;
 	struct page *pte;
    
-   	do {
-		pte = alloc_pages(GFP_KERNEL, 0);
-		if (pte)
-			clear_highpage(pte);
-		else {
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
-		}
-	} while (!pte && (count++ < 10));
+	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	if (pte)
+		clear_highpage(pte);
 	return pte;
 }
 
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index 292936f958f0..6712c9475d0b 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -1708,7 +1708,6 @@ struct nfsctl_arg32 {
 #define ca32_export	u.u32_export
 #define ca32_getfd	u.u32_getfd
 #define ca32_getfs	u.u32_getfs
-#define ca32_authd	u.u32_authd
 };
 
 union nfsctl_res32 {
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 9362b6cb01eb..8ffb84e6db80 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1069,6 +1069,7 @@ static boolean DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T
   
   if (pci_set_dma_mask(Controller->PCIDevice, DAC690_V1_PciDmaMask))
 	return DAC960_Failure(Controller, "DMA mask out of range");
+  Controller->BounceBufferLimit = DAC690_V1_PciDmaMask;
 
   if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) {
     CommandMailboxesSize =  0;
@@ -1271,6 +1272,7 @@ static boolean DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T
 
   if (pci_set_dma_mask(Controller->PCIDevice, DAC690_V2_PciDmaMask))
 	return DAC960_Failure(Controller, "DMA mask out of range");
+  Controller->BounceBufferLimit = DAC690_V2_PciDmaMask;
 
   /* This is a temporary dma mapping, used only in the scope of this function */
   CommandMailbox =
@@ -2386,6 +2388,7 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
   */
   RequestQueue = &Controller->RequestQueue;
   blk_init_queue(RequestQueue, DAC960_RequestFunction, &Controller->queue_lock);
+  blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
   RequestQueue->queuedata = Controller;
   blk_queue_max_hw_segments(RequestQueue,
 			    Controller->DriverScatterGatherLimit);
diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h
index f38145a54a67..01b543211870 100644
--- a/drivers/block/DAC960.h
+++ b/drivers/block/DAC960.h
@@ -62,11 +62,6 @@
 
 /*
   Define the pci dma mask supported by DAC960 V1 and V2 Firmware Controlers
-
-  For now set the V2 mask to only 32 bits.  The controller IS capable
-  of doing 64 bit dma.  But I have yet to find out whether this needs to
-  be explicitely enabled in the controller, or of the controller adapts
-  automatically.
  */
 
 #define DAC690_V1_PciDmaMask	0xffffffff
@@ -2370,6 +2365,7 @@ typedef struct DAC960_Controller
   unsigned short ControllerScatterGatherLimit;
   unsigned short DriverScatterGatherLimit;
   unsigned int ControllerUsageCount;
+  u64		BounceBufferLimit;
   unsigned int CombinedStatusBufferLength;
   unsigned int InitialStatusLength;
   unsigned int CurrentStatusLength;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 8987b67272cd..f566e20e2094 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -599,9 +599,12 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
  		luninfo.num_opens = drv->usage_count;
  		luninfo.num_parts = 0;
  		/* count partitions 1 to 15 with sizes > 0 */
- 		for(i=1; i <MAX_PART; i++)
- 			if (disk->part[i].nr_sects != 0)
- 				luninfo.num_parts++;
+ 		for(i=1; i <MAX_PART; i++) {
+			if (!disk->part[i])
+				continue;
+			if (disk->part[i]->nr_sects != 0)
+				luninfo.num_parts++;
+		}
  		if (copy_to_user((void *) arg, &luninfo,
  				sizeof(LogvolInfo_struct)))
  			return -EFAULT;
diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 032739646da9..b2eeafc81195 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -365,11 +365,13 @@ static int show_partition(struct seq_file *part, void *v)
 		(unsigned long long)get_capacity(sgp) >> 1,
 		disk_name(sgp, 0, buf));
 	for (n = 0; n < sgp->minors - 1; n++) {
-		if (sgp->part[n].nr_sects == 0)
+		if (!sgp->part[n])
+			continue;
+		if (sgp->part[n]->nr_sects == 0)
 			continue;
 		seq_printf(part, "%4d  %4d %10llu %s\n",
 			sgp->major, n + 1 + sgp->first_minor,
-			(unsigned long long)sgp->part[n].nr_sects >> 1 ,
+			(unsigned long long)sgp->part[n]->nr_sects >> 1 ,
 			disk_name(sgp, n + 1, buf));
 	}
 
@@ -542,6 +544,92 @@ static struct kset_hotplug_ops block_hotplug_ops = {
 static decl_subsys(block, &ktype_block, &block_hotplug_ops);
 
 
+/*
+ * aggregate disk stat collector.  Uses the same stats that the sysfs
+ * entries do, above, but makes them available through one seq_file.
+ * Watching a few disks may be efficient through sysfs, but watching
+ * all of them will be more efficient through this interface.
+ *
+ * The output looks suspiciously like /proc/partitions with a bunch of
+ * extra fields.
+ */
+
+/* iterator */
+static void *diskstats_start(struct seq_file *part, loff_t *pos)
+{
+	loff_t k = *pos;
+	struct list_head *p;
+
+	down_read(&block_subsys.rwsem);
+	list_for_each(p, &block_subsys.kset.list)
+		if (!k--)
+			return list_entry(p, struct gendisk, kobj.entry);
+	return NULL;
+}
+
+static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
+{
+	struct list_head *p = ((struct gendisk *)v)->kobj.entry.next;
+	++*pos;
+	return p==&block_subsys.kset.list ? NULL :
+		list_entry(p, struct gendisk, kobj.entry);
+}
+
+static void diskstats_stop(struct seq_file *part, void *v)
+{
+	up_read(&block_subsys.rwsem);
+}
+
+static int diskstats_show(struct seq_file *s, void *v)
+{
+	struct gendisk *gp = v;
+	char buf[64];
+	int n = 0;
+
+	/*
+	if (&sgp->kobj.entry == block_subsys.kset.list.next)
+		seq_puts(s,	"major minor name"
+				"     rio rmerge rsect ruse wio wmerge "
+				"wsect wuse running use aveq"
+				"\n\n");
+	*/
+ 
+	disk_round_stats(gp);
+	seq_printf(s, "%4d %4d %s %u %u %llu %u %u %u %llu %u %u %u %u\n",
+		gp->major, n + gp->first_minor, disk_name(gp, n, buf),
+		disk_stat_read(gp, reads), disk_stat_read(gp, read_merges),
+		(unsigned long long)disk_stat_read(gp, read_sectors),
+		jiffies_to_msec(disk_stat_read(gp, read_ticks)),
+		disk_stat_read(gp, writes), disk_stat_read(gp, write_merges),
+		(unsigned long long)disk_stat_read(gp, write_sectors),
+		jiffies_to_msec(disk_stat_read(gp, write_ticks)),
+		disk_stat_read(gp, in_flight),
+		jiffies_to_msec(disk_stat_read(gp, io_ticks)),
+		jiffies_to_msec(disk_stat_read(gp, time_in_queue)));
+
+	/* now show all non-0 size partitions of it */
+	for (n = 0; n < gp->minors - 1; n++) {
+		struct hd_struct *hd = gp->part[n];
+
+		if (hd && hd->nr_sects)
+			seq_printf(s, "%4d %4d %s %u %u %u %u\n",
+				gp->major, n + gp->first_minor + 1,
+				disk_name(gp, n + 1, buf),
+				hd->reads, hd->read_sectors,
+				hd->writes, hd->write_sectors);
+	}
+ 
+	return 0;
+}
+
+struct seq_operations diskstats_op = {
+	start:	diskstats_start,
+	next:	diskstats_next,
+	stop:	diskstats_stop,
+	show:	diskstats_show
+};
+
+
 struct gendisk *alloc_disk(int minors)
 {
 	struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
@@ -552,7 +640,7 @@ struct gendisk *alloc_disk(int minors)
 			return NULL;
 		}
 		if (minors > 1) {
-			int size = (minors - 1) * sizeof(struct hd_struct);
+			int size = (minors - 1) * sizeof(struct hd_struct *);
 			disk->part = kmalloc(size, GFP_KERNEL);
 			if (!disk->part) {
 				kfree(disk);
@@ -604,8 +692,8 @@ void set_device_ro(struct block_device *bdev, int flag)
 	struct gendisk *disk = bdev->bd_disk;
 	if (bdev->bd_contains != bdev) {
 		int part = bdev->bd_dev - MKDEV(disk->major, disk->first_minor);
-		struct hd_struct *p = &disk->part[part-1];
-		p->policy = flag;
+		struct hd_struct *p = disk->part[part-1];
+		if (p) p->policy = flag;
 	} else
 		disk->policy = flag;
 }
@@ -615,7 +703,7 @@ void set_disk_ro(struct gendisk *disk, int flag)
 	int i;
 	disk->policy = flag;
 	for (i = 0; i < disk->minors - 1; i++)
-		disk->part[i].policy = flag;
+		if (disk->part[i]) disk->part[i]->policy = flag;
 }
 
 int bdev_read_only(struct block_device *bdev)
@@ -626,8 +714,9 @@ int bdev_read_only(struct block_device *bdev)
 	disk = bdev->bd_disk;
 	if (bdev->bd_contains != bdev) {
 		int part = bdev->bd_dev - MKDEV(disk->major, disk->first_minor);
-		struct hd_struct *p = &disk->part[part-1];
-		return p->policy;
+		struct hd_struct *p = disk->part[part-1];
+		if (p) return p->policy;
+		return 0;
 	} else
 		return disk->policy;
 }
diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
index 538c8a04a2d3..3dbd0824319b 100644
--- a/drivers/block/ioctl.c
+++ b/drivers/block/ioctl.c
@@ -41,11 +41,14 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 					return -EINVAL;
 			}
 			/* partition number in use? */
-			if (disk->part[part - 1].nr_sects != 0)
+			if (disk->part[part - 1])
 				return -EBUSY;
 			/* overlap? */
 			for (i = 0; i < disk->minors - 1; i++) {
-				struct hd_struct *s = &disk->part[i];
+				struct hd_struct *s = disk->part[i];
+
+				if (!s)
+					continue;
 				if (!(start+length <= s->start_sect ||
 				      start >= s->start_sect + s->nr_sects))
 					return -EBUSY;
@@ -54,7 +57,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg *arg)
 			add_partition(disk, part, start, length);
 			return 0;
 		case BLKPG_DEL_PARTITION:
-			if (disk->part[part - 1].nr_sects == 0)
+			if (!disk->part[part-1])
+				return -ENXIO;
+			if (disk->part[part - 1]->nr_sects == 0)
 				return -ENXIO;
 			/* partition in use? Incomplete check for now. */
 			bdevp = bdget(MKDEV(disk->major, disk->first_minor) + part);
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index e14210308577..9e2fd26ce0ed 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -1841,7 +1841,7 @@ static inline void blk_partition_remap(struct bio *bio)
 	if (bdev == bdev->bd_contains)
 		return;
 
-	p = &disk->part[bdev->bd_dev-MKDEV(disk->major,disk->first_minor)-1];
+	p = disk->part[bdev->bd_dev-MKDEV(disk->major,disk->first_minor)-1];
 	switch (bio->bi_rw) {
 	case READ:
 		p->read_sectors += bio_sectors(bio);
diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index ed453a23e4de..ef1fd1b6b0df 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -601,7 +601,7 @@ static void k_spec(struct vc_data *vc, unsigned char value, char up_flag, struct
 		return;
 	if ((kbd->kbdmode == VC_RAW || 
 	     kbd->kbdmode == VC_MEDIUMRAW) && 
-	     value != K_SAK)
+	     value != KVAL(K_SAK))
 		return;		/* SAK is allowed even in raw mode */
 	fn_handler[value](vc, regs);
 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 60bd488baece..18ead55a549a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -15,7 +15,7 @@
 #include <linux/slab.h>
 
 static const char *_name = DM_NAME;
-#define MAX_DEVICES (1 << KDEV_MINOR_BITS)
+#define MAX_DEVICES 1024
 
 static int major = 0;
 static int _major = 0;
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index 9a0727a4b120..ae13b70c367a 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -940,11 +940,9 @@ static int el3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD);
 	}
 
-	dev_kfree_skb (skb);
 	pop_tx_status(dev);
-
-	spin_unlock(&lp->window_lock);
-	
+	spin_unlock_irqrestore(&lp->window_lock, flags);
+	dev_kfree_skb(skb);
 	return 0;
 }
 
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index ee017a02ecbe..b2d33c9ac274 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -668,13 +668,13 @@ static int dmfe_start_xmit(struct sk_buff *skb, struct DEVICE *dev)
 	if ( db->tx_queue_cnt < TX_FREE_DESC_CNT )
 		netif_wake_queue(dev);
 
-	/* free this SKB */
-	dev_kfree_skb(skb);
-
 	/* Restore CR7 to enable interrupt */
 	spin_unlock_irqrestore(&db->lock, flags);
 	outl(db->cr7_data, dev->base_addr + DCR7);
 
+	/* free this SKB */
+	dev_kfree_skb(skb);
+
 	return 0;
 }
 
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index b37429ad5e65..bee04f53a84b 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -75,7 +75,8 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
  * Add newly discovered PCI devices (which are on the bus->devices
  * list) to the global PCI device list, add the sysfs and procfs
  * entries.  Where a bridge is found, add the discovered bus to
- * the parents list of child buses, and recurse.
+ * the parents list of child buses, and recurse (breadth-first
+ * to be compatible with 2.4)
  *
  * Call hotplug for each new devices.
  */
@@ -98,6 +99,12 @@ void __devinit pci_bus_add_devices(struct pci_bus *bus)
 #endif
 		pci_create_sysfs_dev_files(dev);
 
+	}
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+
+		BUG_ON(list_empty(&dev->global_list));
+
 		/*
 		 * If there is an unattached subordinate bus, attach
 		 * it and then scan for unattached PCI devices.
diff --git a/drivers/serial/core.c b/drivers/serial/core.c
index b461093a13cc..c6207f0737b7 100644
--- a/drivers/serial/core.c
+++ b/drivers/serial/core.c
@@ -782,8 +782,12 @@ uart_set_info(struct uart_state *state, struct serial_struct *newinfo)
 		/*
 		 * Claim and map the new regions
 		 */
-		if (port->type != PORT_UNKNOWN)
+		if (port->type != PORT_UNKNOWN) {
 			retval = port->ops->request_port(port);
+		} else {
+			/* Always success - Jean II */
+			retval = 0;
+		}
 
 		/*
 		 * If we fail to request resources for the
diff --git a/drivers/video/aty/mach64_gx.c b/drivers/video/aty/mach64_gx.c
index a27b9bcd8859..85168a32eea8 100644
--- a/drivers/video/aty/mach64_gx.c
+++ b/drivers/video/aty/mach64_gx.c
@@ -119,7 +119,7 @@ static int aty_set_dac_514(const struct fb_info *info,
 }
 
 static int aty_var_to_pll_514(const struct fb_info *info, u32 vclk_per,
-			      u32 bpp, u32 width, union aty_pll *pll)
+			      u8 bpp, union aty_pll *pll)
 {
 	/*
 	 *  FIXME: use real calculations instead of using fixed values from the old
@@ -338,7 +338,7 @@ const struct aty_dac_ops aty_dac_att21c498 = {
      */
 
 static int aty_var_to_pll_18818(const struct fb_info *info, u32 vclk_per,
-				u32 bpp, u32 width, union aty_pll *pll)
+				u8 bpp, union aty_pll *pll)
 {
 	u32 MHz100;		/* in 0.01 MHz */
 	u32 program_bits;
@@ -494,7 +494,7 @@ const struct aty_pll_ops aty_pll_ati18818_1 = {
      */
 
 static int aty_var_to_pll_1703(const struct fb_info *info, u32 vclk_per,
-			       u32 bpp, u32 width, union aty_pll *pll)
+			       u32 vclk_per, u8 bpp, union aty_pll *pll)
 {
 	u32 mhz100;		/* in 0.01 MHz */
 	u32 program_bits;
@@ -610,7 +610,7 @@ const struct aty_pll_ops aty_pll_stg1703 = {
      */
 
 static int aty_var_to_pll_8398(const struct fb_info *info, u32 vclk_per,
-			       u32 bpp, u32 width, union aty_pll *pll)
+			       u32 vclk_per, u8 bpp, union aty_pll *pll)
 {
 	u32 tempA, tempB, fOut, longMHz100, diff, preDiff;
 
@@ -734,7 +734,7 @@ const struct aty_pll_ops aty_pll_ch8398 = {
      */
 
 static int aty_var_to_pll_408(const struct fb_info *info, u32 vclk_per,
-			      u32 bpp, u32 width, union aty_pll *pll)
+			      u8 bpp, union aty_pll *pll)
 {
 	u32 mhz100;		/* in 0.01 MHz */
 	u32 program_bits;
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 0f3182b1783a..75b47be36f22 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -25,6 +25,7 @@
 #include <linux/mman.h>
 #include <linux/tty.h>
 #include <linux/init.h>
+#include <linux/linux_logo.h>
 #include <linux/proc_fs.h>
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
@@ -655,7 +656,7 @@ int fb_prepare_logo(struct fb_info *info)
 	}
 
 	/* Return if no suitable logo was found */
-	fb_logo.logo = find_logo(info->var.bits_per_pixel);
+	fb_logo.logo = fb_find_logo(info->var.bits_per_pixel);
 	
 	if (!fb_logo.logo || fb_logo.logo->height > info->var.yres) {
 		fb_logo.logo = NULL;
diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c
index daf9c360a2aa..3039664df313 100644
--- a/drivers/video/logo/logo.c
+++ b/drivers/video/logo/logo.c
@@ -33,7 +33,7 @@ extern const struct linux_logo logo_superh_vga16;
 extern const struct linux_logo logo_superh_clut224;
 
 
-const struct linux_logo * __init find_logo(int depth)
+const struct linux_logo *fb_find_logo(int depth)
 {
 	const struct linux_logo *logo = 0;
 
diff --git a/fs/aio.c b/fs/aio.c
index a70c898244a6..c5627b387b2e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -148,7 +148,7 @@ static int aio_setup_ring(struct kioctx *ctx)
 
 	dprintk("mmap address: 0x%08lx\n", info->mmap_base);
 	info->nr_pages = get_user_pages(current, ctx->mm,
-					info->mmap_base, info->mmap_size, 
+					info->mmap_base, nr_pages, 
 					1, 0, info->ring_pages, NULL);
 	up_write(&ctx->mm->mmap_sem);
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 948864b885b5..9a974170a10b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -559,10 +559,10 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
 			bdev->bd_contains = whole;
 			down(&whole->bd_sem);
 			whole->bd_part_count++;
-			p = disk->part + part - 1;
+			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
 			   whole->bd_inode->i_data.backing_dev_info;
-			if (!(disk->flags & GENHD_FL_UP) || !p->nr_sects) {
+			if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
 				whole->bd_part_count--;
 				up(&whole->bd_sem);
 				ret = -ENXIO;
diff --git a/fs/buffer.c b/fs/buffer.c
index 006892c691cd..47e2cf01f30c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -776,6 +776,85 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 EXPORT_SYMBOL(mark_buffer_dirty_inode);
 
 /*
+ * Add a page to the dirty page list.
+ *
+ * It is a sad fact of life that this function is called from several places
+ * deeply under spinlocking.  It may not sleep.
+ *
+ * If the page has buffers, the uptodate buffers are set dirty, to preserve
+ * dirty-state coherency between the page and the buffers.  It the page does
+ * not have buffers then when they are later attached they will all be set
+ * dirty.
+ *
+ * The buffers are dirtied before the page is dirtied.  There's a small race
+ * window in which a writepage caller may see the page cleanness but not the
+ * buffer dirtiness.  That's fine.  If this code were to set the page dirty
+ * before the buffers, a concurrent writepage caller could clear the page dirty
+ * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
+ * page on the dirty page list.
+ *
+ * There is also a small window where the page is dirty, and not on dirty_pages.
+ * Also a possibility that by the time the page is added to dirty_pages, it has
+ * been set clean.  The page lists are somewhat approximate in this regard.
+ * It's better to have clean pages accidentally attached to dirty_pages than to
+ * leave dirty pages attached to clean_pages.
+ *
+ * We use private_lock to lock against try_to_free_buffers while using the
+ * page's buffer list.  Also use this to protect against clean buffers being
+ * added to the page after it was set dirty.
+ *
+ * FIXME: may need to call ->reservepage here as well.  That's rather up to the
+ * address_space though.
+ *
+ * For now, we treat swapper_space specially.  It doesn't use the normal
+ * block a_ops.
+ */
+int __set_page_dirty_buffers(struct page *page)
+{
+	struct address_space * const mapping = page->mapping;
+	int ret = 0;
+
+	if (mapping == NULL) {
+		SetPageDirty(page);
+		goto out;
+	}
+
+	if (!PageUptodate(page))
+		buffer_error();
+
+	spin_lock(&mapping->private_lock);
+	if (page_has_buffers(page)) {
+		struct buffer_head *head = page_buffers(page);
+		struct buffer_head *bh = head;
+
+		do {
+			if (buffer_uptodate(bh))
+				set_buffer_dirty(bh);
+			else
+				buffer_error();
+			bh = bh->b_this_page;
+		} while (bh != head);
+	}
+	spin_unlock(&mapping->private_lock);
+
+	if (!TestSetPageDirty(page)) {
+		spin_lock(&mapping->page_lock);
+		if (page->mapping) {	/* Race with truncate? */
+			if (!mapping->backing_dev_info->memory_backed)
+				inc_page_state(nr_dirty);
+			list_del(&page->list);
+			list_add(&page->list, &mapping->dirty_pages);
+		}
+		spin_unlock(&mapping->page_lock);
+		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	}
+	
+out:
+	return ret;
+}
+EXPORT_SYMBOL(__set_page_dirty_buffers);
+
+/*
  * Write out and wait upon a list of buffers.
  *
  * We have conflicting pressures: we want to make sure that all
@@ -916,7 +995,7 @@ try_again:
 	head = NULL;
 	offset = PAGE_SIZE;
 	while ((offset -= size) >= 0) {
-		bh = alloc_buffer_head();
+		bh = alloc_buffer_head(GFP_NOFS);
 		if (!bh)
 			goto no_grow;
 
@@ -2267,7 +2346,7 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
 		if (buffer_uptodate(&map_bh))
 			continue;	/* reiserfs does this */
 		if (block_start < from || block_end > to) {
-			struct buffer_head *bh = alloc_buffer_head();
+			struct buffer_head *bh = alloc_buffer_head(GFP_NOFS);
 
 			if (!bh) {
 				ret = -ENOMEM;
@@ -2826,9 +2905,9 @@ static void recalc_bh_state(void)
 	buffer_heads_over_limit = (tot > max_buffer_heads);
 }
 	
-struct buffer_head *alloc_buffer_head(void)
+struct buffer_head *alloc_buffer_head(int gfp_flags)
 {
-	struct buffer_head *ret = kmem_cache_alloc(bh_cachep, GFP_NOFS);
+	struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
 	if (ret) {
 		preempt_disable();
 		__get_cpu_var(bh_accounting).nr++;
diff --git a/fs/dcache.c b/fs/dcache.c
index efc51c38ea25..9eec20e0ab20 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -155,12 +155,11 @@ repeat:
  	if (d_unhashed(dentry))
 		goto kill_it;
   	if (list_empty(&dentry->d_lru)) {
-  		dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
+  		dentry->d_vfs_flags |= DCACHE_REFERENCED;
   		list_add(&dentry->d_lru, &dentry_unused);
   		dentry_stat.nr_unused++;
   	}
  	spin_unlock(&dentry->d_lock);
-	dentry->d_vfs_flags |= DCACHE_REFERENCED;
 	spin_unlock(&dcache_lock);
 	return;
 
@@ -250,7 +249,6 @@ int d_invalidate(struct dentry * dentry)
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
 	atomic_inc(&dentry->d_count);
-	dentry->d_vfs_flags |= DCACHE_REFERENCED;
 	if (atomic_read(&dentry->d_count) == 1) {
 		dentry_stat.nr_unused--;
 		list_del_init(&dentry->d_lru);
@@ -379,17 +377,16 @@ static void prune_dcache(int count)
 		dentry = list_entry(tmp, struct dentry, d_lru);
 
  		spin_lock(&dentry->d_lock);
+		/* leave inuse dentries */
+ 		if (atomic_read(&dentry->d_count)) {
+ 			spin_unlock(&dentry->d_lock);
+			continue;
+		}
 		/* If the dentry was recently referenced, don't free it. */
 		if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
 			dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
-
-			/* don't add non zero d_count dentries 
-			 * back to d_lru list
-			 */
- 			if (!atomic_read(&dentry->d_count)) {
- 				list_add(&dentry->d_lru, &dentry_unused);
- 				dentry_stat.nr_unused++;
- 			}
+ 			list_add(&dentry->d_lru, &dentry_unused);
+ 			dentry_stat.nr_unused++;
  			spin_unlock(&dentry->d_lock);
 			continue;
 		}
@@ -538,13 +535,18 @@ resume:
 		struct list_head *tmp = next;
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
 		next = tmp->next;
-		list_del_init(&dentry->d_lru);
 
-		/* don't add non zero d_count dentries 
-		 * back to d_lru list
+		if (!list_empty(&dentry->d_lru)) {
+			dentry_stat.nr_unused--;
+			list_del_init(&dentry->d_lru);
+		}
+		/* 
+		 * move only zero ref count dentries to the end 
+		 * of the unused list for prune_dcache
 		 */
 		if (!atomic_read(&dentry->d_count)) {
 			list_add(&dentry->d_lru, dentry_unused.prev);
+			dentry_stat.nr_unused++;
 			found++;
 		}
 		/*
@@ -609,13 +611,18 @@ void shrink_dcache_anon(struct hlist_head *head)
 		spin_lock(&dcache_lock);
 		hlist_for_each(lp, head) {
 			struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
-			list_del(&this->d_lru);
+			if (!list_empty(&this->d_lru)) {
+				dentry_stat.nr_unused--;
+				list_del(&this->d_lru);
+			}
 
-			/* don't add non zero d_count dentries 
-			 * back to d_lru list
+			/* 
+			 * move only zero ref count dentries to the end 
+			 * of the unused list for prune_dcache
 			 */
 			if (!atomic_read(&this->d_count)) {
 				list_add_tail(&this->d_lru, &dentry_unused);
+				dentry_stat.nr_unused++;
 				found++;
 			}
 		}
@@ -1017,7 +1024,6 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
 		if (likely(move_count == dentry->d_move_count)) {
 			if (!d_unhashed(dentry)) {
 				atomic_inc(&dentry->d_count);
-				dentry->d_vfs_flags |= DCACHE_REFERENCED;
 				found = dentry;
 			}
 		}
diff --git a/fs/dquot.c b/fs/dquot.c
index 737b9f1d54ab..f3c0f63265c3 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -345,50 +345,6 @@ restart:
 	return 0;
 }
 
-static struct super_block *get_super_to_sync(int type)
-{
-	struct list_head *head;
-	int cnt, dirty;
-
-restart:
-	spin_lock(&sb_lock);
-	list_for_each(head, &super_blocks) {
-		struct super_block *sb = list_entry(head, struct super_block, s_list);
-
-		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
-			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-			    && info_any_dquot_dirty(&sb_dqopt(sb)->info[cnt]))
-				dirty = 1;
-		if (!dirty)
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (!sb->s_root) {
-			drop_super(sb);
-			goto restart;
-		}
-		return sb;
-	}
-	spin_unlock(&sb_lock);
-	return NULL;
-}
-
-void sync_dquots(struct super_block *sb, int type)
-{
-	if (sb) {
-		if (sb->s_qcop->quota_sync)
-			sb->s_qcop->quota_sync(sb, type);
-	}
-	else {
-		while ((sb = get_super_to_sync(type))) {
-			if (sb->s_qcop->quota_sync)
-				sb->s_qcop->quota_sync(sb, type);
-			drop_super(sb);
-		}
-	}
-}
-
 /* Free unused dquots from cache */
 static void prune_dqcache(int count)
 {
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 7264433b25fd..aae953bb9572 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -91,7 +91,6 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
 			if (dentry != result &&
 			    acceptable(context, dentry)) {
 				dput(result);
-				dentry->d_vfs_flags |= DCACHE_REFERENCED;
 				return dentry;
 			}
 			spin_lock(&dcache_lock);
@@ -271,7 +270,6 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
 		if (dentry != result &&
 		    acceptable(context, dentry)) {
 			dput(result);
-			dentry->d_vfs_flags |= DCACHE_REFERENCED;
 			return dentry;
 		}
 		spin_lock(&dcache_lock);
@@ -434,7 +432,6 @@ static struct dentry *export_iget(struct super_block *sb, unsigned long ino, __u
 		iput(inode);
 		return ERR_PTR(-ENOMEM);
 	}
-	result->d_vfs_flags |= DCACHE_REFERENCED;
 	return result;
 }
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 67548b28113d..866edb62fad5 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -608,7 +608,6 @@ struct dentry *fat_get_dentry(struct super_block *sb, void *inump)
 		return ERR_PTR(-ENOMEM);
 	}
 	result->d_op = sb->s_root->d_op;
-	result->d_vfs_flags |= DCACHE_REFERENCED;
 	return result;
 }
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a429a2aa0ac5..96e1d0bf490f 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -457,14 +457,8 @@ int journal_write_metadata_buffer(transaction_t *transaction,
 	/*
 	 * Right, time to make up the new buffer_head.
 	 */
-	do {
-		new_bh = alloc_buffer_head();
-		if (!new_bh) {
-			printk (KERN_NOTICE "%s: ENOMEM at alloc_buffer_head, "
-				"trying again.\n", __FUNCTION__);
-			yield();
-		}
-	} while (!new_bh);
+	new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+
 	/* keep subsequent assertions sane */
 	new_bh->b_state = 0;
 	init_buffer(new_bh, NULL, NULL);
@@ -1613,28 +1607,7 @@ void shrink_journal_memory(void)
  */
 void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry)
 {
-	void *p;
-	static unsigned long last_warning;
-	
-	while (1) {
-		p = kmalloc(size, flags);
-		if (p)
-			return p;
-		if (!retry)
-			return NULL;
-		/* Log every retry for debugging.  Also log them to the
-		 * syslog, but do rate-limiting on the non-debugging
-		 * messages. */
-		jbd_debug(1, "ENOMEM in %s, retrying.\n", where);
-
-		if (time_after(jiffies, last_warning + 5*HZ)) {
-			printk(KERN_NOTICE
-			       "ENOMEM in %s, retrying.\n", where);
-			last_warning = jiffies;
-		}
-		
-		yield();
-	}
+	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));	
 }
 
 /*
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 647f0357e30c..aa0646e44598 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -182,7 +182,7 @@ static struct sysfs_ops part_sysfs_ops = {
 static ssize_t part_dev_read(struct hd_struct * p, char *page)
 {
 	struct gendisk *disk = container_of(p->kobj.parent,struct gendisk,kobj);
-	int part = p - disk->part + 1;
+	int part = p->partno;
 	dev_t base = MKDEV(disk->major, disk->first_minor); 
 	return sprintf(page, "%04x\n", (unsigned)(base + part));
 }
@@ -234,7 +234,9 @@ struct kobj_type ktype_part = {
 
 void delete_partition(struct gendisk *disk, int part)
 {
-	struct hd_struct *p = disk->part + part - 1;
+	struct hd_struct *p = disk->part[part-1];
+	if (!p)
+		return;
 	if (!p->nr_sects)
 		return;
 	p->start_sect = 0;
@@ -242,14 +244,23 @@ void delete_partition(struct gendisk *disk, int part)
 	p->reads = p->writes = p->read_sectors = p->write_sectors = 0;
 	devfs_remove("%s/part%d", disk->devfs_name, part);
 	kobject_unregister(&p->kobj);
+	disk->part[part-1] = NULL;
+	kfree(p);
 }
 
 void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
 {
-	struct hd_struct *p = disk->part + part - 1;
+	struct hd_struct *p;
 
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return;
+	
+	memset(p, 0, sizeof(*p));
 	p->start_sect = start;
 	p->nr_sects = len;
+	p->partno = part;
+	disk->part[part-1] = p;
 	devfs_register_partition(disk, part);
 	snprintf(p->kobj.name,KOBJ_NAME_LEN,"%s%d",disk->kobj.name,part);
 	p->kobj.parent = &disk->kobj;
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 0be95725e097..882980c55720 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -29,3 +29,8 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
 }
 
 extern int warn_no_part;
+
+extern void parse_bsd(struct parsed_partitions *state,
+			struct block_device *bdev, u32 offset, u32 size,
+			int origin, char *flavour, int max_partitions);
+
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 147485d515c8..56dea78c0312 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -214,12 +214,12 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
 #endif
 }
 
-#ifdef CONFIG_BSD_DISKLABEL
+#if defined(CONFIG_BSD_DISKLABEL) || defined(CONFIG_NEC98_PARTITION)
 /* 
  * Create devices for BSD partitions listed in a disklabel, under a
  * dos-like partition. See parse_extended() for more information.
  */
-static void
+void
 parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
 		u32 offset, u32 size, int origin, char *flavour,
 		int max_partitions)
diff --git a/fs/partitions/nec98.c b/fs/partitions/nec98.c
index b3bd8faf9bda..cbd55789f4b2 100644
--- a/fs/partitions/nec98.c
+++ b/fs/partitions/nec98.c
@@ -66,13 +66,6 @@ is_valid_nec98_partition_table(const struct nec98_partition *ptable,
 	return valid;
 }
 
-#ifdef CONFIG_BSD_DISKLABEL
-extern void parse_bsd(struct parsed_partitions *state,
-			struct block_device *bdev,
-			u32 offset, u32 size, int origin, char *flavour,
-			int max_partitions);
-#endif
-
 int nec98_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
 	unsigned int nr;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 60dd7b44e838..89fc02080f1e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -333,6 +333,18 @@ static struct file_operations proc_partitions_operations = {
 	.release	= seq_release,
 };
 
+extern struct seq_operations diskstats_op;
+static int diskstats_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &diskstats_op);
+}
+static struct file_operations proc_diskstats_operations = {
+	open:		diskstats_open,
+	read:		seq_read,
+	llseek:		seq_lseek,
+	release:	seq_release,
+};
+
 #ifdef CONFIG_MODULES
 extern struct seq_operations modules_op;
 static int modules_open(struct inode *inode, struct file *file)
@@ -644,6 +656,7 @@ void __init proc_misc_init(void)
 	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
+	create_seq_entry("diskstats", 0, &proc_diskstats_operations);
 #ifdef CONFIG_MODULES
 	create_seq_entry("modules", 0, &proc_modules_operations);
 #endif
diff --git a/fs/quota.c b/fs/quota.c
index 3daa61901363..ce929f581b53 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -19,8 +19,10 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
 {
 	if (type >= MAXQUOTAS)
 		return -EINVAL;
+	if (!sb && cmd != Q_SYNC)
+		return -ENODEV;
 	/* Is operation supported? */
-	if (!sb->s_qcop)
+	if (sb && !sb->s_qcop)
 		return -ENOSYS;
 
 	switch (cmd) {
@@ -51,7 +53,7 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
 				return -ENOSYS;
 			break;
 		case Q_SYNC:
-			if (!sb->s_qcop->quota_sync)
+			if (sb && !sb->s_qcop->quota_sync)
 				return -ENOSYS;
 			break;
 		case Q_XQUOTAON:
@@ -102,6 +104,50 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
 	return security_quotactl (cmd, type, id, sb);
 }
 
+static struct super_block *get_super_to_sync(int type)
+{
+	struct list_head *head;
+	int cnt, dirty;
+
+restart:
+	spin_lock(&sb_lock);
+	list_for_each(head, &super_blocks) {
+		struct super_block *sb = list_entry(head, struct super_block, s_list);
+
+		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
+			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
+			    && info_any_dquot_dirty(&sb_dqopt(sb)->info[cnt]))
+				dirty = 1;
+		if (!dirty)
+			continue;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (!sb->s_root) {
+			drop_super(sb);
+			goto restart;
+		}
+		return sb;
+	}
+	spin_unlock(&sb_lock);
+	return NULL;
+}
+
+void sync_dquots(struct super_block *sb, int type)
+{
+	if (sb) {
+		if (sb->s_qcop->quota_sync)
+			sb->s_qcop->quota_sync(sb, type);
+	}
+	else {
+		while ((sb = get_super_to_sync(type))) {
+			if (sb->s_qcop->quota_sync)
+				sb->s_qcop->quota_sync(sb, type);
+			drop_super(sb);
+		}
+	}
+}
+
 /* Copy parameters and call proper function */
 static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, caddr_t addr)
 {
@@ -167,7 +213,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, cadd
 			return sb->s_qcop->set_dqblk(sb, type, id, &idq);
 		}
 		case Q_SYNC:
-			return sb->s_qcop->quota_sync(sb, type);
+			sync_dquots(sb, type);
+			return 0;
 
 		case Q_XQUOTAON:
 		case Q_XQUOTAOFF:
@@ -222,27 +269,30 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char *special, qid_t id, ca
 	struct super_block *sb = NULL;
 	struct block_device *bdev;
 	char *tmp;
-	int ret = -ENODEV;
+	int ret;
 
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
-	tmp = getname(special);
-	if (IS_ERR(tmp))
-		return PTR_ERR(tmp);
-	bdev = lookup_bdev(tmp);
-	putname(tmp);
-	if (IS_ERR(bdev))
-		return PTR_ERR(bdev);
-	sb = get_super(bdev);
-	bdput(bdev);
+	if (cmds != Q_SYNC || special) {
+		tmp = getname(special);
+		if (IS_ERR(tmp))
+			return PTR_ERR(tmp);
+		bdev = lookup_bdev(tmp);
+		putname(tmp);
+		if (IS_ERR(bdev))
+			return PTR_ERR(bdev);
+		sb = get_super(bdev);
+		bdput(bdev);
+		if (!sb)
+			return -ENODEV;
+	}
 
-	if (sb) {
-		ret = check_quotactl_valid(sb, type, cmds, id);
-		if (ret >= 0)
-			ret = do_quotactl(sb, type, cmds, id, addr);
+	ret = check_quotactl_valid(sb, type, cmds, id);
+	if (ret >= 0)
+		ret = do_quotactl(sb, type, cmds, id, addr);
+	if (sb)
 		drop_super(sb);
-	}
 
 	return ret;
 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0536716aa84e..fb7f27f1f532 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1260,7 +1260,6 @@ struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp)
 	    iput(inode);
 	    return ERR_PTR(-ENOMEM);
     }
-    result->d_vfs_flags |= DCACHE_REFERENCED;
     return result;
 }
 
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 703d6324f417..5ecb4353994e 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -2085,7 +2085,6 @@ out:
 void smb_decode_unix_basic(struct smb_fattr *fattr, char *p)
 {
 	/* FIXME: verify nls support. all is sent as utf8? */
-	__u64 devmajor, devminor;
 
 	fattr->f_unix = 1;
 	fattr->f_mode = 0;
@@ -2112,9 +2111,10 @@ void smb_decode_unix_basic(struct smb_fattr *fattr, char *p)
 	fattr->f_mode |= smb_filetype_to_mode(WVAL(p, 56));
 
 	if (S_ISBLK(fattr->f_mode) || S_ISCHR(fattr->f_mode)) {
-		devmajor = LVAL(p, 60);
-		devminor = LVAL(p, 68);
-		fattr->f_rdev = ((devmajor & 0xFF) << 8) | (devminor & 0xFF);
+		__u64 major = LVAL(p, 60);
+		__u64 minor = LVAL(p, 68);
+
+		fattr->f_rdev = MKDEV(major & 0xffffffff, minor & 0xffffffff);
 	}
 	fattr->f_mode |= LVAL(p, 84);
 }
@@ -3008,7 +3008,7 @@ out:
  */
 int
 smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
-		      int major, int minor)
+		      unsigned int major, unsigned int minor)
 {
 	struct smb_sb_info *server = server_from_dentry(d);
 	u64 nttime;
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index 07b690eb8848..ec44bad0e84e 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -27,7 +27,7 @@ extern void smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *f
 extern void smb_decode_unix_basic(struct smb_fattr *fattr, char *p);
 extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
 extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
-extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, int major, int minor);
+extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
 extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
 extern int smb_proc_dskattr(struct super_block *sb, struct statfs *attr);
 extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
index 662a43c90a1e..73ef4ec19e95 100644
--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
@@ -741,7 +741,6 @@ linvfs_get_dentry(
 		iput(inode);
 		return ERR_PTR(-ENOMEM);
 	}
-	result->d_vfs_flags |= DCACHE_REFERENCED;
 	return result;
 }
 
diff --git a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c
index f50803bd2570..2230380e952f 100644
--- a/fs/xfs/pagebuf/page_buf.c
+++ b/fs/xfs/pagebuf/page_buf.c
@@ -554,7 +554,8 @@ mapit:
 		} else if (flags & PBF_MAPPED) {
 			if (as_list_len > 64)
 				purge_addresses();
-			pb->pb_addr = vmap(pb->pb_pages, page_count);
+			pb->pb_addr = vmap(pb->pb_pages, page_count,
+					VM_MAP, PAGE_KERNEL);
 			if (pb->pb_addr == NULL)
 				return -ENOMEM;
 			pb->pb_addr += pb->pb_offset;
diff --git a/include/asm-alpha/pgalloc.h b/include/asm-alpha/pgalloc.h
index fc675efac381..b34194c3d96c 100644
--- a/include/asm-alpha/pgalloc.h
+++ b/include/asm-alpha/pgalloc.h
@@ -40,7 +40,7 @@ pgd_free(pgd_t *pgd)
 static inline pmd_t *
 pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL);
+	pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (ret)
 		clear_page(ret);
 	return ret;
diff --git a/include/asm-arm/proc-armv/pgalloc.h b/include/asm-arm/proc-armv/pgalloc.h
index 4440be79d5ac..3263c346ccba 100644
--- a/include/asm-arm/proc-armv/pgalloc.h
+++ b/include/asm-arm/proc-armv/pgalloc.h
@@ -27,17 +27,9 @@
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 {
-	int count = 0;
 	pte_t *pte;
 
-	do {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL);
-		if (!pte) {
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
-		}
-	} while (!pte && (count++ < 10));
-
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (pte) {
 		clear_page(pte);
 		clean_dcache_area(pte, sizeof(pte_t) * PTRS_PER_PTE);
@@ -51,16 +43,8 @@ static inline struct page *
 pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	struct page *pte;
-	int count = 0;
-
-	do {
-		pte = alloc_pages(GFP_KERNEL, 0);
-		if (!pte) {
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
-		}
-	} while (!pte && (count++ < 10));
 
+	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
 	if (pte) {
 		void *page = page_address(pte);
 		clear_page(page);
diff --git a/include/asm-cris/pgalloc.h b/include/asm-cris/pgalloc.h
index 80e73be0d2b0..75dde6f4a42f 100644
--- a/include/asm-cris/pgalloc.h
+++ b/include/asm-cris/pgalloc.h
@@ -62,7 +62,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
         pte_t *pte;
 
-        pte = (pte_t *) __get_free_page(GFP_KERNEL);
+        pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
         if (pte)
                 clear_page(pte);
         return pte;
diff --git a/include/asm-i386/div64.h b/include/asm-i386/div64.h
index ef915df700e4..bc8718a0b1ce 100644
--- a/include/asm-i386/div64.h
+++ b/include/asm-i386/div64.h
@@ -14,4 +14,22 @@
 	__mod; \
 })
 
+/*
+ * (long)X = ((long long)divs) / (long)div
+ * (long)rem = ((long long)divs) % (long)div
+ *
+ * Warning, this will do an exception if X overflows.
+ */
+#define div_long_long_rem(a,b,c) div_ll_X_l_rem(a,b,c)
+
+extern inline long
+div_ll_X_l_rem(long long divs, long div, long *rem)
+{
+	long dum2;
+      __asm__("divl %2":"=a"(dum2), "=d"(*rem)
+      :	"rm"(div), "A"(divs));
+
+	return dum2;
+
+}
 #endif
diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
index 2e6134af88bc..4f56ceb9c42b 100644
--- a/include/asm-ia64/pgalloc.h
+++ b/include/asm-ia64/pgalloc.h
@@ -93,7 +93,7 @@ pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr)
 static inline pmd_t*
 pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
 {
-	pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
+	pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 
 	if (likely(pmd != NULL))
 		clear_page(pmd);
@@ -125,7 +125,7 @@ pmd_populate_kernel (struct mm_struct *mm, pmd_t *pmd_entry, pte_t *pte)
 static inline struct page *
 pte_alloc_one (struct mm_struct *mm, unsigned long addr)
 {
-	struct page *pte = alloc_pages(GFP_KERNEL, 0);
+	struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
 
 	if (likely(pte != NULL))
 		clear_page(page_address(pte));
@@ -135,7 +135,7 @@ pte_alloc_one (struct mm_struct *mm, unsigned long addr)
 static inline pte_t *
 pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr)
 {
-	pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 
 	if (likely(pte != NULL))
 		clear_page(pte);
diff --git a/include/asm-m68k/motorola_pgalloc.h b/include/asm-m68k/motorola_pgalloc.h
index 4beb7a822b38..f315615e488a 100644
--- a/include/asm-m68k/motorola_pgalloc.h
+++ b/include/asm-m68k/motorola_pgalloc.h
@@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_page(GFP_KERNEL);
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (pte) {
 		clear_page(pte);
 		__flush_page_to_ram(pte);
@@ -30,7 +30,7 @@ static inline void pte_free_kernel(pte_t *pte)
 
 static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	struct page *page = alloc_pages(GFP_KERNEL, 0);
+	struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
 	pte_t *pte;
 
 	if(!page)
diff --git a/include/asm-m68k/sun3_pgalloc.h b/include/asm-m68k/sun3_pgalloc.h
index 7740a2936511..4580b60196b9 100644
--- a/include/asm-m68k/sun3_pgalloc.h
+++ b/include/asm-m68k/sun3_pgalloc.h
@@ -18,7 +18,7 @@
 
 extern const char bad_pmd_string[];
 
-#define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); })
+#define lpmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); })
 
 
 static inline void pte_free_kernel(pte_t * pte)
@@ -39,7 +39,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *page)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 
 					  unsigned long address)
 {
-	unsigned long page = __get_free_page(GFP_KERNEL);
+	unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT);
 
 	if (!page)
 		return NULL;
@@ -51,7 +51,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 static inline struct page *pte_alloc_one(struct mm_struct *mm, 
 					 unsigned long address)
 {
-        struct page *page = alloc_pages(GFP_KERNEL, 0);
+        struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
 
 	if (page == NULL)
 		return NULL;
diff --git a/include/asm-mips/pgalloc.h b/include/asm-mips/pgalloc.h
index 9492a50dae76..f71b90b1c8e1 100644
--- a/include/asm-mips/pgalloc.h
+++ b/include/asm-mips/pgalloc.h
@@ -132,7 +132,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_page(GFP_KERNEL);
+	pte = (pte_t *) __get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (pte)
 		clear_page(pte);
 	return pte;
diff --git a/include/asm-mips64/pgalloc.h b/include/asm-mips64/pgalloc.h
index 79b58408d660..2b777eebcc31 100644
--- a/include/asm-mips64/pgalloc.h
+++ b/include/asm-mips64/pgalloc.h
@@ -93,7 +93,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_page(GFP_KERNEL);
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (pte)
 		clear_page(pte);
 	return pte;
@@ -141,7 +141,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pmd_t *pmd;
 
-	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, 1);
+	pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
 	if (pmd)
 		pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
 	return pmd;
diff --git a/include/asm-parisc/pgalloc.h b/include/asm-parisc/pgalloc.h
index 32dcf11d084c..bbc02cb134b7 100644
--- a/include/asm-parisc/pgalloc.h
+++ b/include/asm-parisc/pgalloc.h
@@ -35,7 +35,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
+	pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (pmd)
 		clear_page(pmd);
 	return pmd;
@@ -73,7 +73,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 static inline struct page *
 pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL);
+	struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
 	if (likely(page != NULL))
 		clear_page(page_address(page));
 	return page;
@@ -82,7 +82,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 {
-	pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (likely(pte != NULL))
 		clear_page(pte);
 	return pte;
diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h
index 0c461418bb48..9376b791bee7 100644
--- a/include/asm-ppc64/pgalloc.h
+++ b/include/asm-ppc64/pgalloc.h
@@ -31,19 +31,11 @@ pgd_free(pgd_t *pgd)
 static inline pmd_t *
 pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	int count = 0;
 	pmd_t *pmd;
 
-	do {
-		pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
-		if (pmd)
-			clear_page(pmd);
-		else {
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
-		}
-	} while (!pmd && (count++ < 10));
-
+	pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	if (pmd)
+		clear_page(pmd);
 	return pmd;
 }
 
@@ -62,19 +54,11 @@ pmd_free(pmd_t *pmd)
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 {
-	int count = 0;
 	pte_t *pte;
 
-	do {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL);
-		if (pte)
-			clear_page(pte);
-		else {
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
-		}
-	} while (!pte && (count++ < 10));
-
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	if (pte)
+		clear_page(pte);
 	return pte;
 }
 
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index 67230ef0e0c5..e4729fb912fd 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -120,20 +120,13 @@ static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
 {
 	pte_t *pte;
-	int count;
         int i;
 
-	count = 0;
-	do {
-		pte = (pte_t *) __get_free_page(GFP_KERNEL);
-		if (pte != NULL) {
-			for (i=0; i < PTRS_PER_PTE; i++)
-				pte_clear(pte+i);
-		} else {
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
-		}
-	} while (!pte && (count++ < 10));
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	if (pte != NULL) {
+		for (i=0; i < PTRS_PER_PTE; i++)
+			pte_clear(pte+i);
+	}
 	return pte;
 }
 
diff --git a/include/asm-sh/pgalloc.h b/include/asm-sh/pgalloc.h
index 9cc5a7dc98ed..a60b4c961a4f 100644
--- a/include/asm-sh/pgalloc.h
+++ b/include/asm-sh/pgalloc.h
@@ -35,7 +35,7 @@ static inline void pgd_free(pgd_t *pgd)
 
 static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 	if (pte)
 		clear_page(pte);
 	return pte;
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index 037c09b1a737..d3c3a7060664 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -159,7 +159,7 @@ static __inline__ pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addre
 
 	pmd = pmd_alloc_one_fast(mm, address);
 	if (!pmd) {
-		pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
+		pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
 		if (pmd)
 			memset(pmd, 0, PAGE_SIZE);
 	}
diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h
index 4cae8e6a37a0..e15895c99a4e 100644
--- a/include/asm-x86_64/pgalloc.h
+++ b/include/asm-x86_64/pgalloc.h
@@ -31,12 +31,12 @@ extern __inline__ void pmd_free(pmd_t *pmd)
 
 static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *) get_zeroed_page(GFP_KERNEL); 
+	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 }
 
 static inline pgd_t *pgd_alloc (struct mm_struct *mm)
 {
-	return (pgd_t *)get_zeroed_page(GFP_KERNEL);
+	return (pgd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 }
 
 static inline void pgd_free (pgd_t *pgd)
@@ -48,12 +48,12 @@ static inline void pgd_free (pgd_t *pgd)
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	return (pte_t *) get_zeroed_page(GFP_KERNEL);
+	return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 }
 
 static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	void *p = (void *)get_zeroed_page(GFP_KERNEL); 
+	void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
 	if (!p)
 		return NULL;
 	return virt_to_page(p);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 64a70553d40c..26d71a5fb742 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -172,7 +172,7 @@ struct buffer_head * __getblk(struct block_device *, sector_t, int);
 void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
 struct buffer_head *__bread(struct block_device *, sector_t block, int size);
-struct buffer_head *alloc_buffer_head(void);
+struct buffer_head *alloc_buffer_head(int gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
 void FASTCALL(unlock_buffer(struct buffer_head *bh));
 void ll_rw_block(int, int, struct buffer_head * bh[]);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index db979c3cf890..78fafd500123 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -270,7 +270,6 @@ static inline struct dentry *dget(struct dentry *dentry)
 		if (!atomic_read(&dentry->d_count))
 			BUG();
 		atomic_inc(&dentry->d_count);
-		dentry->d_vfs_flags |= DCACHE_REFERENCED;
 	}
 	return dentry;
 }
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c2432bd349e5..ac8fc9ef5bdb 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -64,7 +64,7 @@ struct hd_struct {
 	sector_t nr_sects;
 	struct kobject kobj;
 	unsigned reads, read_sectors, writes, write_sectors;
-	int policy;
+	int policy, partno;
 };
 
 #define GENHD_FL_REMOVABLE  1
@@ -89,7 +89,7 @@ struct gendisk {
 	int minor_shift;		/* number of times minor is shifted to
 					   get real minor */
 	char disk_name[16];		/* name of major driver */
-	struct hd_struct *part;		/* [indexed by minor] */
+	struct hd_struct **part;	/* [indexed by minor] */
 	struct block_device_operations *fops;
 	struct request_queue *queue;
 	void *private_data;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c475f7b41e59..ade6d9e97475 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -11,13 +11,26 @@
 #define __GFP_DMA	0x01
 #define __GFP_HIGHMEM	0x02
 
-/* Action modifiers - doesn't change the zoning */
+/*
+ * Action modifiers - doesn't change the zoning
+ *
+ * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
+ * _might_ fail.  This depends upon the particular VM implementation.
+ *
+ * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures.
+ *
+ * __GFP_NORETRY: The VM implementation must not retry indefinitely.
+ */
 #define __GFP_WAIT	0x10	/* Can wait and reschedule? */
 #define __GFP_HIGH	0x20	/* Should access emergency pools? */
 #define __GFP_IO	0x40	/* Can start physical IO? */
 #define __GFP_FS	0x80	/* Can call down to low-level FS? */
 #define __GFP_COLD	0x100	/* Cache-cold page required */
 #define __GFP_NOWARN	0x200	/* Suppress page allocation failure warning */
+#define __GFP_REPEAT	0x400	/* Retry the allocation.  Might fail */
+#define __GFP_NOFAIL	0x800	/* Retry for ever.  Cannot fail */
+#define __GFP_NORETRY	0x1000	/* Do not retry.  Might fail */
 
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h
index ea05e16c2b16..9c01bde5bf1b 100644
--- a/include/linux/linux_logo.h
+++ b/include/linux/linux_logo.h
@@ -32,6 +32,6 @@ struct linux_logo {
 	const unsigned char *data;
 };
 
-extern const struct linux_logo * __init find_logo(int depth);
+extern const struct linux_logo *fb_find_logo(int depth);
 
 #endif /* _LINUX_LINUX_LOGO_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ede6c5ff4181..6aa89d73f65b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -594,28 +594,10 @@ extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned lon
 
 extern unsigned int nr_used_zone_pages(void);
 
-#ifdef CONFIG_MMU
 extern struct page * vmalloc_to_page(void *addr);
 extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
 		int write);
 extern int remap_page_range(struct vm_area_struct *vma, unsigned long from,
 		unsigned long to, unsigned long size, pgprot_t prot);
-#else
-static inline struct page * vmalloc_to_page(void *addr)
-{
-	return NULL;
-}
-static inline struct page * follow_page(struct mm_struct *mm,
-		unsigned long address, int write)
-{
-	return NULL;
-}
-static inline int remap_page_range(struct vm_area_struct *vma,
-		unsigned long from, unsigned long to,
-		unsigned long size, pgprot_t prot)
-{
-	return -EPERM;
-}
-#endif /* CONFIG_MMU */
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/nfsd/syscall.h b/include/linux/nfsd/syscall.h
index 37b8901433d6..b6fa4d1839e3 100644
--- a/include/linux/nfsd/syscall.h
+++ b/include/linux/nfsd/syscall.h
@@ -59,7 +59,7 @@ struct nfsctl_client {
 struct nfsctl_export {
 	char			ex_client[NFSCLNT_IDMAX+1];
 	char			ex_path[NFS_MAXPATHLEN+1];
-	__kernel_dev_t		ex_dev;
+	__kernel_old_dev_t	ex_dev;
 	__kernel_ino_t		ex_ino;
 	int			ex_flags;
 	__kernel_uid_t		ex_anon_uid;
@@ -104,7 +104,6 @@ struct nfsctl_arg {
 #define ca_export	u.u_export
 #define ca_getfd	u.u_getfd
 #define	ca_getfs	u.u_getfs
-#define ca_authd	u.u_authd
 };
 
 union nfsctl_res {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c30c44f3cfcc..eee58c7354b4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -465,6 +465,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
+#define PF_SWAPOFF	0x00080000	/* I am in swapoff */
 
 #if CONFIG_SMP
 extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index bdc5256de12a..603748b9b349 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -22,7 +22,7 @@ typedef struct kmem_cache_s kmem_cache_t;
 #define	SLAB_KERNEL		GFP_KERNEL
 #define	SLAB_DMA		GFP_DMA
 
-#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN)
+#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|__GFP_NORETRY)
 #define	SLAB_NO_GROW		0x00001000UL	/* don't grow a cache */
 
 /* flags to pass to kmem_cache_create().
diff --git a/include/linux/time.h b/include/linux/time.h
index 4d7238025fe9..fdab2abc43be 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -26,6 +26,16 @@ struct timezone {
 
 #include <linux/spinlock.h>
 #include <linux/seqlock.h>
+#include <linux/timex.h>
+#include <asm/div64.h>
+#ifndef div_long_long_rem
+
+#define div_long_long_rem(dividend,divisor,remainder) ({ \
+		       u64 result = dividend;		\
+		       *remainder = do_div(result,divisor); \
+		       result; })
+
+#endif
 
 /*
  * Have the 32 bit jiffies value wrap 5 minutes after boot
@@ -59,25 +69,52 @@ struct timezone {
 #ifndef NSEC_PER_USEC
 #define NSEC_PER_USEC (1000L)
 #endif
+/*
+ * We want to do realistic conversions of time so we need to use the same
+ * values the update wall clock code uses as the jiffie size.  This value
+ * is: TICK_NSEC(TICK_USEC) (both of which are defined in timex.h).  This 
+ * is a constant and is in nanoseconds.  We will used scaled math and
+ * with a scales defined here as SEC_JIFFIE_SC,  USEC_JIFFIE_SC and 
+ * NSEC_JIFFIE_SC.  Note that these defines contain nothing but
+ * constants and so are computed at compile time.  SHIFT_HZ (computed in
+ * timex.h) adjusts the scaling for different HZ values.
+ */
+#define SEC_JIFFIE_SC (30 - SHIFT_HZ)
+#define NSEC_JIFFIE_SC (SEC_JIFFIE_SC + 30)
+#define USEC_JIFFIE_SC (SEC_JIFFIE_SC + 20)
+#define SEC_CONVERSION ((unsigned long)(((u64)NSEC_PER_SEC << SEC_JIFFIE_SC) /\
+                            (u64)TICK_NSEC(TICK_USEC))) 
+#define NSEC_CONVERSION ((unsigned long)(((u64)1 << NSEC_JIFFIE_SC) / \
+                            (u64)TICK_NSEC(TICK_USEC))) 
+#define USEC_CONVERSION \
+               ((unsigned long)(((u64)NSEC_PER_USEC << USEC_JIFFIE_SC)/ \
+                                 (u64)TICK_NSEC(TICK_USEC))) 
+#define MAX_SEC_IN_JIFFIES \
+    (u32)((u64)((u64)MAX_JIFFY_OFFSET * TICK_NSEC(TICK_USEC)) / NSEC_PER_SEC)
 
 static __inline__ unsigned long
 timespec_to_jiffies(struct timespec *value)
 {
 	unsigned long sec = value->tv_sec;
-	long nsec = value->tv_nsec;
+	long nsec = value->tv_nsec + TICK_NSEC(TICK_USEC) - 1;
 
-	if (sec >= (MAX_JIFFY_OFFSET / HZ))
+	if (sec >=  MAX_SEC_IN_JIFFIES)
 		return MAX_JIFFY_OFFSET;
-	nsec += 1000000000L / HZ - 1;
-	nsec /= 1000000000L / HZ;
-	return HZ * sec + nsec;
+	return (((u64)sec * SEC_CONVERSION) +
+		(((u64)nsec * NSEC_CONVERSION) >>
+		 (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+
 }
 
 static __inline__ void
 jiffies_to_timespec(unsigned long jiffies, struct timespec *value)
 {
-	value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
-	value->tv_sec = jiffies / HZ;
+	/*
+	 * Convert jiffies to nanoseconds and seperate with
+	 * one divide.
+	 */
+	u64 nsec = (u64)jiffies * TICK_NSEC(TICK_USEC); 
+	value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec);
 }
 
 /* Same for "timeval" */
@@ -85,20 +122,25 @@ static __inline__ unsigned long
 timeval_to_jiffies(struct timeval *value)
 {
 	unsigned long sec = value->tv_sec;
-	long usec = value->tv_usec;
+	long usec = value->tv_usec + USEC_PER_SEC / HZ - 1;
 
-	if (sec >= (MAX_JIFFY_OFFSET / HZ))
+	if (sec >= MAX_SEC_IN_JIFFIES)
 		return MAX_JIFFY_OFFSET;
-	usec += 1000000L / HZ - 1;
-	usec /= 1000000L / HZ;
-	return HZ * sec + usec;
+	return (((u64)sec * SEC_CONVERSION) +
+		(((u64)usec * USEC_CONVERSION) >>
+		 (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
 }
 
 static __inline__ void
 jiffies_to_timeval(unsigned long jiffies, struct timeval *value)
 {
-	value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
-	value->tv_sec = jiffies / HZ;
+	/*
+	 * Convert jiffies to nanoseconds and seperate with
+	 * one divide.
+	 */
+	u64 nsec = (u64)jiffies * TICK_NSEC(TICK_USEC); 
+	value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_usec);
+	value->tv_usec /= NSEC_PER_USEC;
 }
 
 static __inline__ int timespec_equal(struct timespec *a, struct timespec *b) 
@@ -140,6 +182,7 @@ mktime (unsigned int year, unsigned int mon,
 }
 
 extern struct timespec xtime;
+extern struct timespec wall_to_monotonic;
 extern seqlock_t xtime_lock;
 
 static inline unsigned long get_seconds(void)
@@ -200,6 +243,9 @@ struct	itimerval {
 #define CLOCK_MONOTONIC_HR	  5
 
 #define MAX_CLOCKS 6
+#define CLOCKS_MASK  (CLOCK_REALTIME | CLOCK_MONOTONIC | \
+                     CLOCK_REALTIME_HR | CLOCK_MONOTONIC_HR)
+#define CLOCKS_MONO (CLOCK_MONOTONIC & CLOCK_MONOTONIC_HR)
 
 /*
  * The various flags for setting POSIX.1b interval timers.
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 5b2b0ac18ae7..6c00606c6e33 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -51,7 +51,6 @@
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
-#include <linux/time.h>
 #include <asm/param.h>
 
 /*
@@ -177,6 +176,7 @@
 /* a value TUSEC for TICK_USEC (can be set bij adjtimex)		*/
 #define TICK_NSEC(TUSEC) (SH_DIV (TUSEC * USER_HZ * 1000, ACTHZ, 8))
 
+#include <linux/time.h>
 /*
  * syscall interface - used (mainly by NTP daemon)
  * to discipline kernel clock oscillator
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index d90763253759..570778ddeae9 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -27,7 +27,8 @@ extern void *vmalloc_32(unsigned long size);
 extern void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot);
 extern void vfree(void *addr);
 
-extern void *vmap(struct page **pages, unsigned int count);
+extern void *vmap(struct page **pages, unsigned int count,
+			unsigned long flags, pgprot_t prot);
 extern void vunmap(void *addr);
  
 /*
diff --git a/ipc/shm.c b/ipc/shm.c
index e97264937900..19c06135b184 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -361,27 +361,35 @@ static inline unsigned long copy_shminfo_to_user(void *buf, struct shminfo64 *in
 	}
 }
 
-static void shm_get_stat (unsigned long *rss, unsigned long *swp) 
+static void shm_get_stat(unsigned long *rss, unsigned long *swp) 
 {
-	struct shmem_inode_info *info;
 	int i;
 
 	*rss = 0;
 	*swp = 0;
 
-	for(i = 0; i <= shm_ids.max_id; i++) {
-		struct shmid_kernel* shp;
-		struct inode * inode;
+	for (i = 0; i <= shm_ids.max_id; i++) {
+		struct shmid_kernel *shp;
+		struct inode *inode;
 
 		shp = shm_get(i);
-		if(shp == NULL)
+		if(!shp)
 			continue;
+
 		inode = shp->shm_file->f_dentry->d_inode;
-		info = SHMEM_I(inode);
-		spin_lock (&info->lock);
-		*rss += inode->i_mapping->nrpages;
-		*swp += info->swapped;
-		spin_unlock (&info->lock);
+
+		if (is_file_hugepages(shp->shm_file)) {
+			struct address_space *mapping = inode->i_mapping;
+			spin_lock(&mapping->page_lock);
+			*rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages;
+			spin_unlock(&mapping->page_lock);
+		} else {
+			struct shmem_inode_info *info = SHMEM_I(inode);
+			spin_lock(&info->lock);
+			*rss += inode->i_mapping->nrpages;
+			*swp += info->swapped;
+			spin_unlock(&info->lock);
+		}
 	}
 }
 
@@ -737,21 +745,66 @@ out:
  * detach and kill segment if marked destroyed.
  * The work is done in shm_close.
  */
-asmlinkage long sys_shmdt (char *shmaddr)
+asmlinkage long sys_shmdt(char *shmaddr)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *shmd, *shmdnext;
+	struct vm_area_struct *vma, *next;
+	unsigned long addr = (unsigned long)shmaddr;
+	loff_t size = 0;
 	int retval = -EINVAL;
 
 	down_write(&mm->mmap_sem);
-	for (shmd = mm->mmap; shmd; shmd = shmdnext) {
-		shmdnext = shmd->vm_next;
-		if ((shmd->vm_ops == &shm_vm_ops || (shmd->vm_flags & VM_HUGETLB))
-		    && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr) {
-			do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start);
+
+	/*
+	 * If it had been mremap()'d, the starting address would not
+	 * match the usual checks anyway. So assume all vma's are
+	 * above the starting address given.
+	 */
+	vma = find_vma(mm, addr);
+
+	while (vma) {
+		next = vma->vm_next;
+
+		/*
+		 * Check if the starting address would match, i.e. it's
+		 * a fragment created by mprotect() and/or munmap(), or it
+		 * otherwise it starts at this address with no hassles.
+		 */
+		if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) &&
+			(vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
+
+
+			size = vma->vm_file->f_dentry->d_inode->i_size;
+			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+			/*
+			 * We discovered the size of the shm segment, so
+			 * break out of here and fall through to the next
+			 * loop that uses the size information to stop
+			 * searching for matching vma's.
+			 */
 			retval = 0;
+			vma = next;
+			break;
 		}
+		vma = next;
+	}
+
+	/*
+	 * We need look no further than the maximum address a fragment
+	 * could possibly have landed at. Also cast things to loff_t to
+	 * prevent overflows and make comparisions vs. equal-width types.
+	 */
+	while (vma && (loff_t)(vma->vm_end - addr) <= size) {
+		next = vma->vm_next;
+
+		/* finding a matching vma now does not alter retval */
+		if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) &&
+			(vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
+
+			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+		vma = next;
 	}
+
 	up_write(&mm->mmap_sem);
 	return retval;
 }
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 54fe15f5c0b3..bca12ba294e4 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -33,7 +33,12 @@
 		       result; })
 
 #endif
+#define CLOCK_REALTIME_RES TICK_NSEC(TICK_USEC)  // In nano seconds.
 
+static inline u64  mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2)
+{
+	return (u64)mpy1 * mpy2;
+}
 /*
  * Management arrays for POSIX timers.	 Timers are kept in slab memory
  * Timer ids are allocated by an external routine that keeps track of the
@@ -48,7 +53,7 @@
  * The idr_get_new *may* call slab for more memory so it must not be
  * called under a spin lock.  Likewise idr_remore may release memory
  * (but it may be ok to do this under a lock...).
- * idr_find is just a memory look up and is quite fast.  A zero return
+ * idr_find is just a memory look up and is quite fast.  A -1 return
  * indicates that the requested id does not exist.
  */
 
@@ -82,6 +87,7 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED;
  * For some reason mips/mips64 define the SIGEV constants plus 128.
  * Here we define a mask to get rid of the common bits.	 The
  * optimizer should make this costless to all but mips.
+ * Note that no common bits (the non-mips case) will give 0xffffffff.
  */
 #define MIPS_SIGEV ~(SIGEV_NONE & \
 		      SIGEV_SIGNAL & \
@@ -93,7 +99,7 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED;
  * The timer ID is turned into a timer address by idr_find().
  * Verifying a valid ID consists of:
  *
- * a) checking that idr_find() returns other than zero.
+ * a) checking that idr_find() returns other than -1.
  * b) checking that the timer id matches the one in the timer itself.
  * c) that the timer owner is in the callers thread group.
  */
@@ -162,6 +168,8 @@ static struct k_clock posix_clocks[MAX_CLOCKS];
 
 void register_posix_clock(int clock_id, struct k_clock *new_clock);
 static int do_posix_gettime(struct k_clock *clock, struct timespec *tp);
+static u64 do_posix_clock_monotonic_gettime_parts(
+	struct timespec *tp, struct timespec *mo);
 int do_posix_clock_monotonic_gettime(struct timespec *tp);
 int do_posix_clock_monotonic_settime(struct timespec *tp);
 static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags);
@@ -172,8 +180,8 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags);
  */
 static __init int init_posix_timers(void)
 {
-	struct k_clock clock_realtime = {.res = NSEC_PER_SEC / HZ };
-	struct k_clock clock_monotonic = {.res = NSEC_PER_SEC / HZ,
+	struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
+	struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
 		.clock_get = do_posix_clock_monotonic_gettime,
 		.clock_set = do_posix_clock_monotonic_settime
 	};
@@ -192,7 +200,7 @@ __initcall(init_posix_timers);
 
 static void tstojiffie(struct timespec *tp, int res, u64 *jiff)
 {
-	unsigned long sec = tp->tv_sec;
+	long sec = tp->tv_sec;
 	long nsec = tp->tv_nsec + res - 1;
 
 	if (nsec > NSEC_PER_SEC) {
@@ -201,35 +209,14 @@ static void tstojiffie(struct timespec *tp, int res, u64 *jiff)
 	}
 
 	/*
-	 * A note on jiffy overflow: It is possible for the system to
-	 * have been up long enough for the jiffies quanity to overflow.
-	 * In order for correct timer evaluations we require that the
-	 * specified time be somewhere between now and now + (max
-	 * unsigned int/2).  Times beyond this will be truncated back to
-	 * this value.   This is done in the absolute adjustment code,
-	 * below.  Here it is enough to just discard the high order
-	 * bits.
-	 */
-	*jiff = (u64)sec * HZ;
-	/*
-	 * Do the res thing. (Don't forget the add in the declaration of nsec)
-	 */
-	nsec -= nsec % res;
-	/*
-	 * Split to jiffie and sub jiffie
-	 */
-	*jiff += nsec / (NSEC_PER_SEC / HZ);
-}
-
-static void tstotimer(struct itimerspec *time, struct k_itimer *timer)
-{
-	u64 result;
-	int res = posix_clocks[timer->it_clock].res;
-
-	tstojiffie(&time->it_value, res, &result);
-	timer->it_timer.expires = (unsigned long)result;
-	tstojiffie(&time->it_interval, res, &result);
-	timer->it_incr = (unsigned long)result;
+	 * The scaling constants are defined in <linux/time.h>
+	 * The difference between there and here is that we do the
+	 * res rounding and compute a 64-bit result (well so does that
+	 * but it then throws away the high bits).
+  	 */
+	*jiff =  (mpy_l_X_l_ll(sec, SEC_CONVERSION) +
+		  (mpy_l_X_l_ll(nsec, NSEC_CONVERSION) >> 
+		   (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
 }
 
 static void schedule_next_timer(struct k_itimer *timr)
@@ -690,57 +677,81 @@ sys_timer_getoverrun(timer_t timer_id)
  * If it is relative time, we need to add the current (CLOCK_MONOTONIC)
  * time to it to get the proper time for the timer.
  */
-static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, int abs)
+static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, 
+			   int abs, u64 *exp)
 {
 	struct timespec now;
-	struct timespec oc;
-	do_posix_clock_monotonic_gettime(&now);
-
-	if (!abs || (posix_clocks[CLOCK_MONOTONIC].clock_get !=
-							clock->clock_get)) {
-		if (abs)
-			do_posix_gettime(clock, &oc);
-		else
-			oc.tv_nsec = oc.tv_sec = 0;
-
-		tp->tv_sec += now.tv_sec - oc.tv_sec;
-		tp->tv_nsec += now.tv_nsec - oc.tv_nsec;
+	struct timespec oc = *tp;
+	struct timespec wall_to_mono;
+	u64 jiffies_64_f;
+	int rtn =0;
 
+	if (abs) {
+		/*
+		 * The mask pick up the 4 basic clocks 
+		 */
+		if (!(clock - &posix_clocks[0]) & ~CLOCKS_MASK) {
+			jiffies_64_f = do_posix_clock_monotonic_gettime_parts(
+				&now,  &wall_to_mono);
+			/*
+			 * If we are doing a MONOTONIC clock
+			 */
+			if((clock - &posix_clocks[0]) & CLOCKS_MONO){
+				now.tv_sec += wall_to_mono.tv_sec;
+				now.tv_nsec += wall_to_mono.tv_nsec;
+			}
+		} else {
+			/*
+			 * Not one of the basic clocks
+			 */
+			do_posix_gettime(clock, &now);	
+			jiffies_64_f = get_jiffies_64();
+		}
+		/*
+		 * Take away now to get delta
+		 */
+		oc.tv_sec -= now.tv_sec;
+		oc.tv_nsec -= now.tv_nsec;
 		/*
 		 * Normalize...
 		 */
-		if ((tp->tv_nsec - NSEC_PER_SEC) >= 0) {
-			tp->tv_nsec -= NSEC_PER_SEC;
-			tp->tv_sec++;
+		while ((oc.tv_nsec - NSEC_PER_SEC) >= 0) {
+			oc.tv_nsec -= NSEC_PER_SEC;
+			oc.tv_sec++;
 		}
-		if ((tp->tv_nsec) < 0) {
-			tp->tv_nsec += NSEC_PER_SEC;
-			tp->tv_sec--;
+		while ((oc.tv_nsec) < 0) {
+			oc.tv_nsec += NSEC_PER_SEC;
+			oc.tv_sec--;
 		}
+	}else{
+		jiffies_64_f = get_jiffies_64();
 	}
 	/*
-	 * Check if the requested time is prior to now (if so set now) or
-	 * is more than the timer code can handle (if so we error out).
-	 * The (unsigned) catches the case of prior to "now" with the same
-	 * test.  Only on failure do we sort out what happened, and then
-	 * we use the (unsigned) to error out negative seconds.
+	 * Check if the requested time is prior to now (if so set now)
+	 */
+	if (oc.tv_sec < 0)
+		oc.tv_sec = oc.tv_nsec = 0;
+	tstojiffie(&oc, clock->res, exp);
+
+	/*
+	 * Check if the requested time is more than the timer code
+	 * can handle (if so we error out but return the value too).
 	 */
-	if ((unsigned) (tp->tv_sec - now.tv_sec) > (MAX_JIFFY_OFFSET / HZ)) {
-		if ((unsigned) tp->tv_sec < now.tv_sec) {
-			tp->tv_sec = now.tv_sec;
-			tp->tv_nsec = now.tv_nsec;
-		} else
+	if (*exp > ((u64)MAX_JIFFY_OFFSET))
 			/*
 			 * This is a considered response, not exactly in
 			 * line with the standard (in fact it is silent on
-			 * possible overflows).  We assume such a large
+			 * possible overflows).  We assume such a large 
 			 * value is ALMOST always a programming error and
 			 * try not to compound it by setting a really dumb
 			 * value.
 			 */
-			return -EINVAL;
-	}
-	return 0;
+			rtn = -EINVAL;
+	/*
+	 * return the actual jiffies expire time, full 64 bits
+	 */
+	*exp += jiffies_64_f;
+	return rtn;
 }
 
 /* Set a POSIX.1b interval timer. */
@@ -750,6 +761,7 @@ do_timer_settime(struct k_itimer *timr, int flags,
 		 struct itimerspec *new_setting, struct itimerspec *old_setting)
 {
 	struct k_clock *clock = &posix_clocks[timr->it_clock];
+	u64 expire_64;
 
 	if (old_setting)
 		do_timer_gettime(timr, old_setting);
@@ -788,14 +800,15 @@ do_timer_settime(struct k_itimer *timr, int flags,
 		return 0;
 	}
 
-	if ((flags & TIMER_ABSTIME) &&
-	    (clock->clock_get != do_posix_clock_monotonic_gettime))
-		// FIXME: what is this?
-		;
 	if (adjust_abs_time(clock,
-			    &new_setting->it_value, flags & TIMER_ABSTIME))
+			    &new_setting->it_value, flags & TIMER_ABSTIME, 
+			    &expire_64)) {
 		return -EINVAL;
-	tstotimer(new_setting, timr);
+	}
+	timr->it_timer.expires = (unsigned long)expire_64;	
+	tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
+	timr->it_incr = (unsigned long)expire_64;
+
 
 	/*
 	 * For some reason the timer does not fire immediately if expires is
@@ -964,30 +977,46 @@ static int do_posix_gettime(struct k_clock *clock, struct timespec *tp)
  * Note also that the while loop assures that the sub_jiff_offset
  * will be less than a jiffie, thus no need to normalize the result.
  * Well, not really, if called with ints off :(
- *
- * HELP, this code should make an attempt at resolution beyond the
- * jiffie.  Trouble is this is "arch" dependent...
  */
 
-int do_posix_clock_monotonic_gettime(struct timespec *tp)
+static u64 do_posix_clock_monotonic_gettime_parts(
+	struct timespec *tp, struct timespec *mo)
 {
-	long sub_sec;
-	u64 jiffies_64_f;
-
-#if (BITS_PER_LONG > 32)
-	jiffies_64_f = jiffies_64;
-#else
+	u64 jiff;
+	struct timeval tpv;
 	unsigned int seq;
 
 	do {
 		seq = read_seqbegin(&xtime_lock);
-		jiffies_64_f = jiffies_64;
+		do_gettimeofday(&tpv);
+		*mo = wall_to_monotonic;
+		jiff = jiffies_64;
 
-	} while (read_seqretry(&xtime_lock, seq));
-#endif
-	tp->tv_sec = div_long_long_rem(jiffies_64_f, HZ, &sub_sec);
-	tp->tv_nsec = sub_sec * (NSEC_PER_SEC / HZ);
+	} while(read_seqretry(&xtime_lock, seq));
+
+	/*
+	 * Love to get this before it is converted to usec.
+	 * It would save a div AND a mpy.
+	 */
+	tp->tv_sec = tpv.tv_sec;
+	tp->tv_nsec = tpv.tv_usec * NSEC_PER_USEC;
 
+	return jiff;
+}
+
+int do_posix_clock_monotonic_gettime(struct timespec *tp)
+{
+	struct timespec wall_to_mono;
+
+	do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono);
+
+	tp->tv_sec += wall_to_mono.tv_sec;
+	tp->tv_nsec += wall_to_mono.tv_nsec;
+
+	if ((tp->tv_nsec - NSEC_PER_SEC) > 0) {
+		tp->tv_nsec -= NSEC_PER_SEC;
+		tp->tv_sec++;
+	}
 	return 0;
 }
 
@@ -1138,7 +1167,7 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 	struct timespec t;
 	struct timer_list new_timer;
 	DECLARE_WAITQUEUE(abs_wqueue, current);
-	u64 rq_time = 0;
+	u64 rq_time = (u64)0;
 	s64 left;
 	int abs;
 	struct restart_block *restart_block =
@@ -1163,7 +1192,7 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 		if (!rq_time)
 			return -EINTR;
 		left = rq_time - get_jiffies_64();
-		if (left <= 0LL)
+		if (left <= (s64)0)
 			return 0;	/* Already passed */
 	}
 
@@ -1174,14 +1203,14 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 	do {
 		t = *tsave;
 		if (abs || !rq_time) {
-			adjust_abs_time(&posix_clocks[which_clock], &t, abs);
-			tstojiffie(&t, posix_clocks[which_clock].res, &rq_time);
+			adjust_abs_time(&posix_clocks[which_clock], &t, abs,
+					&rq_time);
 		}
 
 		left = rq_time - get_jiffies_64();
-		if (left >= MAX_JIFFY_OFFSET)
-			left = MAX_JIFFY_OFFSET;
-		if (left < 0)
+		if (left >= (s64)MAX_JIFFY_OFFSET)
+			left = (s64)MAX_JIFFY_OFFSET;
+		if (left < (s64)0)
 			break;
 
 		new_timer.expires = jiffies + left;
@@ -1192,13 +1221,12 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 
 		del_timer_sync(&new_timer);
 		left = rq_time - get_jiffies_64();
-	} while (left > 0 && !test_thread_flag(TIF_SIGPENDING));
+	} while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING));
 
 	if (abs_wqueue.task_list.next)
 		finish_wait(&nanosleep_abs_wqueue, &abs_wqueue);
 
-	if (left > 0) {
-		unsigned long rmd;
+	if (left > (s64)0) {
 
 		/*
 		 * Always restart abs calls from scratch to pick up any
@@ -1207,9 +1235,10 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 		if (abs)
 			return -ERESTARTNOHAND;
 
-		tsave->tv_sec = div_long_long_rem(left, HZ, &rmd);
-		tsave->tv_nsec = rmd * (NSEC_PER_SEC / HZ);
-
+		left *= TICK_NSEC(TICK_USEC);
+		tsave->tv_sec = div_long_long_rem(left, 
+						  NSEC_PER_SEC, 
+						  &tsave->tv_nsec);
 		restart_block->fn = clock_nanosleep_restart;
 		restart_block->arg0 = which_clock;
 		restart_block->arg1 = (unsigned long)tsave;
diff --git a/kernel/sched.c b/kernel/sched.c
index 43b08b5ec658..ae2dbdf33d7d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1091,7 +1091,7 @@ out:
 #define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
 #define BUSY_REBALANCE_TICK (HZ/5 ?: 1)
 #define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 5)
-#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 100)
+#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2)
 
 #ifdef CONFIG_NUMA
 static void balance_node(runqueue_t *this_rq, int idle, int this_cpu)
diff --git a/kernel/timer.c b/kernel/timer.c
index 4aaf025ee8ba..caa37716f860 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -441,8 +441,16 @@ repeat:
 unsigned long tick_usec = TICK_USEC; 		/* ACTHZ   period (usec) */
 unsigned long tick_nsec = TICK_NSEC(TICK_USEC);	/* USER_HZ period (nsec) */
 
-/* The current time */
+/* 
+ * The current time 
+ * wall_to_monotonic is what we need to add to xtime (or xtime corrected 
+ * for sub jiffie times) to get to monotonic time.  Monotonic is pegged at zero
+ * at zero at system boot time, so wall_to_monotonic will be negative,
+ * however, we will ALWAYS keep the tv_nsec part positive so we can use
+ * the usual normalization.
+ */
 struct timespec xtime __attribute__ ((aligned (16)));
+struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
 
 /* Don't completely fail for HZ > 500.  */
 int tickadj = 500/HZ ? : 1;		/* microsecs */
@@ -508,6 +516,7 @@ static void second_overflow(void)
     case TIME_INS:
 	if (xtime.tv_sec % 86400 == 0) {
 	    xtime.tv_sec--;
+	    wall_to_monotonic.tv_sec++;
 	    time_state = TIME_OOP;
 	    clock_was_set();
 	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
@@ -517,6 +526,7 @@ static void second_overflow(void)
     case TIME_DEL:
 	if ((xtime.tv_sec + 1) % 86400 == 0) {
 	    xtime.tv_sec++;
+	    wall_to_monotonic.tv_sec--;
 	    time_state = TIME_WAIT;
 	    clock_was_set();
 	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
diff --git a/mm/filemap.c b/mm/filemap.c
index 884a9f50c6fe..40008f8f3626 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -31,12 +31,11 @@
  * This is needed for the following functions:
  *  - try_to_release_page
  *  - block_invalidatepage
- *  - page_has_buffers
  *  - generic_osync_inode
  *
- * FIXME: remove all knowledge of the buffer layer from this file
+ * FIXME: remove all knowledge of the buffer layer from the core VM
  */
-#include <linux/buffer_head.h>
+#include <linux/buffer_head.h> /* for generic_osync_inode */
 
 #include <asm/uaccess.h>
 #include <asm/mman.h>
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 34a3aeb50799..e537462aaf58 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -129,6 +129,8 @@ static struct task_struct * select_bad_process(void)
 				chosen = p;
 				maxpoints = points;
 			}
+			if (p->flags & PF_SWAPOFF)
+				return p;
 		}
 	while_each_thread(g, p);
 	return chosen;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index aaa70d02b859..c33c6a207426 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -462,88 +462,6 @@ int write_one_page(struct page *page, int wait)
 EXPORT_SYMBOL(write_one_page);
 
 /*
- * Add a page to the dirty page list.
- *
- * It is a sad fact of life that this function is called from several places
- * deeply under spinlocking.  It may not sleep.
- *
- * If the page has buffers, the uptodate buffers are set dirty, to preserve
- * dirty-state coherency between the page and the buffers.  It the page does
- * not have buffers then when they are later attached they will all be set
- * dirty.
- *
- * The buffers are dirtied before the page is dirtied.  There's a small race
- * window in which a writepage caller may see the page cleanness but not the
- * buffer dirtiness.  That's fine.  If this code were to set the page dirty
- * before the buffers, a concurrent writepage caller could clear the page dirty
- * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
- * page on the dirty page list.
- *
- * There is also a small window where the page is dirty, and not on dirty_pages.
- * Also a possibility that by the time the page is added to dirty_pages, it has
- * been set clean.  The page lists are somewhat approximate in this regard.
- * It's better to have clean pages accidentally attached to dirty_pages than to
- * leave dirty pages attached to clean_pages.
- *
- * We use private_lock to lock against try_to_free_buffers while using the
- * page's buffer list.  Also use this to protect against clean buffers being
- * added to the page after it was set dirty.
- *
- * FIXME: may need to call ->reservepage here as well.  That's rather up to the
- * address_space though.
- *
- * For now, we treat swapper_space specially.  It doesn't use the normal
- * block a_ops.
- *
- * FIXME: this should move over to fs/buffer.c - buffer_heads have no business in mm/
- */
-#include <linux/buffer_head.h>
-int __set_page_dirty_buffers(struct page *page)
-{
-	struct address_space * const mapping = page->mapping;
-	int ret = 0;
-
-	if (mapping == NULL) {
-		SetPageDirty(page);
-		goto out;
-	}
-
-	if (!PageUptodate(page))
-		buffer_error();
-
-	spin_lock(&mapping->private_lock);
-	if (page_has_buffers(page)) {
-		struct buffer_head *head = page_buffers(page);
-		struct buffer_head *bh = head;
-
-		do {
-			if (buffer_uptodate(bh))
-				set_buffer_dirty(bh);
-			else
-				buffer_error();
-			bh = bh->b_this_page;
-		} while (bh != head);
-	}
-	spin_unlock(&mapping->private_lock);
-
-	if (!TestSetPageDirty(page)) {
-		spin_lock(&mapping->page_lock);
-		if (page->mapping) {	/* Race with truncate? */
-			if (!mapping->backing_dev_info->memory_backed)
-				inc_page_state(nr_dirty);
-			list_del(&page->list);
-			list_add(&page->list, &mapping->dirty_pages);
-		}
-		spin_unlock(&mapping->page_lock);
-		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	}
-	
-out:
-	return ret;
-}
-EXPORT_SYMBOL(__set_page_dirty_buffers);
-
-/*
  * For address_spaces which do not use buffers.  Just set the page's dirty bit
  * and move it to the dirty_pages list.  Also perform space reservation if
  * required.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c9c7accac1f7..bff7db2296ae 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -536,6 +536,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
 	struct page *page;
 	int i;
 	int cold;
+	int do_retry;
 
 	if (wait)
 		might_sleep();
@@ -626,10 +627,21 @@ rebalance:
 	}
 
 	/*
-	 * Don't let big-order allocations loop.  Yield for kswapd, try again.
+	 * Don't let big-order allocations loop unless the caller explicitly
+	 * requests that.  Wait for some write requests to complete then retry.
+	 *
+	 * In this implementation, __GFP_REPEAT means __GFP_NOFAIL, but that
+	 * may not be true in other implementations.
 	 */
-	if (order <= 3) {
-		yield();
+	do_retry = 0;
+	if (!(gfp_mask & __GFP_NORETRY)) {
+		if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
+			do_retry = 1;
+		if (gfp_mask & __GFP_NOFAIL)
+			do_retry = 1;
+	}
+	if (do_retry) {
+		blk_congestion_wait(WRITE, HZ/50);
 		goto rebalance;
 	}
 
diff --git a/mm/swap.c b/mm/swap.c
index eb71588c1f1a..f6442275cda5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -21,7 +21,7 @@
 #include <linux/pagevec.h>
 #include <linux/init.h>
 #include <linux/mm_inline.h>
-#include <linux/buffer_head.h>
+#include <linux/buffer_head.h>	/* for try_to_release_page() */
 #include <linux/percpu.h>
 
 /* How many pages do we try to swap or page in/out together? */
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b479ebafa2bd..29198f06fcae 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -13,7 +13,6 @@
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/backing-dev.h>
-#include <linux/buffer_head.h>	/* block_sync_page() */
 
 #include <asm/pgtable.h>
 
@@ -187,7 +186,7 @@ void delete_from_swap_cache(struct page *page)
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
-	BUG_ON(page_has_buffers(page));
+	BUG_ON(PagePrivate(page));
   
 	entry.val = page->index;
 
@@ -236,7 +235,7 @@ int move_from_swap_cache(struct page *page, unsigned long index,
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
-	BUG_ON(page_has_buffers(page));
+	BUG_ON(PagePrivate(page));
 
 	entry.val = page->index;
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2271d23d7e7b..48ffb627914d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -7,6 +7,7 @@
 
 #include <linux/config.h>
 #include <linux/mm.h>
+#include <linux/mman.h>
 #include <linux/slab.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
@@ -15,7 +16,6 @@
 #include <linux/namei.h>
 #include <linux/shm.h>
 #include <linux/blkdev.h>
-#include <linux/buffer_head.h>
 #include <linux/writeback.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -300,7 +300,7 @@ int remove_exclusive_swap_page(struct page *page)
 	struct swap_info_struct * p;
 	swp_entry_t entry;
 
-	BUG_ON(page_has_buffers(page));
+	BUG_ON(PagePrivate(page));
 	BUG_ON(!PageLocked(page));
 
 	if (!PageSwapCache(page))
@@ -355,7 +355,7 @@ void free_swap_and_cache(swp_entry_t entry)
 	if (page) {
 		int one_user;
 
-		BUG_ON(page_has_buffers(page));
+		BUG_ON(PagePrivate(page));
 		page_cache_get(page);
 		one_user = (page_count(page) == 2);
 		/* Only cache user (+us), or swap space full? Free it! */
@@ -590,6 +590,11 @@ static int try_to_unuse(unsigned int type)
 	 * to swapoff for a while, then reappear - but that is rare.
 	 */
 	while ((i = find_next_to_unuse(si, i))) {
+		if (signal_pending(current)) {
+			retval = -EINTR;
+			break;
+		}
+
 		/* 
 		 * Get a page for the entry, using the existing swap
 		 * cache page if there is one.  Otherwise, get a clean
@@ -759,8 +764,7 @@ static int try_to_unuse(unsigned int type)
 
 		/*
 		 * Make sure that we aren't completely killing
-		 * interactive performance.  Interruptible check on
-		 * signal_pending() would be nice, but changes the spec?
+		 * interactive performance.
 		 */
 		cond_resched();
 	}
@@ -1029,12 +1033,18 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
 		}
 		prev = type;
 	}
-	err = -EINVAL;
 	if (type < 0) {
+		err = -EINVAL;
+		swap_list_unlock();
+		goto out_dput;
+	}
+	if (vm_enough_memory(p->pages))
+		vm_unacct_memory(p->pages);
+	else {
+		err = -ENOMEM;
 		swap_list_unlock();
 		goto out_dput;
 	}
-
 	if (prev < 0) {
 		swap_list.head = p->next;
 	} else {
@@ -1048,7 +1058,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
 	total_swap_pages -= p->pages;
 	p->flags &= ~SWP_WRITEOK;
 	swap_list_unlock();
+	current->flags |= PF_SWAPOFF;
 	err = try_to_unuse(type);
+	current->flags &= ~PF_SWAPOFF;
 	if (err) {
 		/* re-insert swap space back into swap_list */
 		swap_list_lock();
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a6423eebcd5d..f6ce2378b721 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -308,7 +308,7 @@ void __vunmap(void *addr, int deallocate_pages)
  *
  *	@addr:		memory base address
  *
- *	Free the virtually continguos memory area starting at @addr, as
+ *	Free the virtually contiguous memory area starting at @addr, as
  *	obtained from vmalloc(), vmalloc_32() or __vmalloc().
  *
  *	May not be called in interrupt context.
@@ -324,7 +324,7 @@ void vfree(void *addr)
  *
  *	@addr:		memory base address
  *
- *	Free the virtually continguos memory area starting at @addr,
+ *	Free the virtually contiguous memory area starting at @addr,
  *	which was created from the page array passed to vmap().
  *
  *	May not be called in interrupt context.
@@ -336,25 +336,28 @@ void vunmap(void *addr)
 }
 
 /**
- *	vmap  -  map an array of pages into virtually continguos space
+ *	vmap  -  map an array of pages into virtually contiguous space
  *
  *	@pages:		array of page pointers
  *	@count:		number of pages to map
+ *	@flags:		vm_area->flags
+ *	@prot:		page protection for the mapping
  *
- *	Maps @count pages from @pages into continguos kernel virtual
+ *	Maps @count pages from @pages into contiguous kernel virtual
  *	space.
  */
-void *vmap(struct page **pages, unsigned int count)
+void *vmap(struct page **pages, unsigned int count,
+		unsigned long flags, pgprot_t prot)
 {
 	struct vm_struct *area;
 
 	if (count > num_physpages)
 		return NULL;
 
-	area = get_vm_area((count << PAGE_SHIFT), VM_MAP);
+	area = get_vm_area((count << PAGE_SHIFT), flags);
 	if (!area)
 		return NULL;
-	if (map_vm_area(area, PAGE_KERNEL, &pages)) {
+	if (map_vm_area(area, prot, &pages)) {
 		vunmap(area->addr);
 		return NULL;
 	}
@@ -363,14 +366,14 @@ void *vmap(struct page **pages, unsigned int count)
 }
 
 /**
- *	__vmalloc  -  allocate virtually continguos memory
+ *	__vmalloc  -  allocate virtually contiguous memory
  *
  *	@size:		allocation size
  *	@gfp_mask:	flags for the page level allocator
  *	@prot:		protection mask for the allocated pages
  *
  *	Allocate enough pages to cover @size from the page level
- *	allocator with @gfp_mask flags.  Map them into continguos
+ *	allocator with @gfp_mask flags.  Map them into contiguous
  *	kernel virtual space, using a pagetable protection of @prot.
  */
 void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
@@ -418,12 +421,12 @@ fail:
 }
 
 /**
- *	vmalloc  -  allocate virtually continguos memory
+ *	vmalloc  -  allocate virtually contiguous memory
  *
  *	@size:		allocation size
  *
  *	Allocate enough pages to cover @size from the page level
- *	allocator and map them into continguos kernel virtual space.
+ *	allocator and map them into contiguous kernel virtual space.
  *
  *	For tight cotrol over page level allocator and protection flags
  *	use __vmalloc() instead.
@@ -434,12 +437,12 @@ void *vmalloc(unsigned long size)
 }
 
 /**
- *	vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
+ *	vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
  *
  *	@size:		allocation size
  *
  *	Allocate enough 32bit PA addressable pages to cover @size from the
- *	page level allocator and map them into continguos kernel virtual space.
+ *	page level allocator and map them into contiguous kernel virtual space.
  */
 void *vmalloc_32(unsigned long size)
 {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3d204f882d04..aa24e1d1c693 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -22,7 +22,8 @@
 #include <linux/writeback.h>
 #include <linux/suspend.h>
 #include <linux/blkdev.h>
-#include <linux/buffer_head.h>		/* for try_to_release_page() */
+#include <linux/buffer_head.h>	/* for try_to_release_page(),
+					buffer_heads_over_limit */
 #include <linux/mm_inline.h>
 #include <linux/pagevec.h>
 #include <linux/backing-dev.h>
@@ -134,11 +135,9 @@ void remove_shrinker(struct shrinker *shrinker)
  * If the vm encounted mapped pages on the LRU it increase the pressure on
  * slab to avoid swapping.
  *
- * FIXME: do not do for zone highmem
- *
  * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
  */
-static int shrink_slab(long scanned,  unsigned int gfp_mask)
+static int shrink_slab(long scanned, unsigned int gfp_mask)
 {
 	struct shrinker *shrinker;
 	long pages;
@@ -804,8 +803,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned,
  * excessive rotation of the inactive list, which is _supposed_ to be an LRU,
  * yes?
  */
-int
-try_to_free_pages(struct zone *classzone,
+int try_to_free_pages(struct zone *classzone,
 		unsigned int gfp_mask, unsigned int order)
 {
 	int priority;
@@ -835,9 +833,10 @@ try_to_free_pages(struct zone *classzone,
 
 		/* Take a nap, wait for some writeback to complete */
 		blk_congestion_wait(WRITE, HZ/10);
-		shrink_slab(total_scanned, gfp_mask);
+		if (classzone - classzone->zone_pgdat->node_zones < ZONE_HIGHMEM)
+			shrink_slab(total_scanned, gfp_mask);
 	}
-	if (gfp_mask & __GFP_FS)
+	if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY))
 		out_of_memory();
 	return 0;
 }
@@ -895,7 +894,8 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps)
 				max_scan = SWAP_CLUSTER_MAX;
 			to_free -= shrink_zone(zone, max_scan, GFP_KERNEL,
 					to_reclaim, &nr_mapped, ps, priority);
-			shrink_slab(max_scan + nr_mapped, GFP_KERNEL);
+			if (i < ZONE_HIGHMEM)
+				shrink_slab(max_scan + nr_mapped, GFP_KERNEL);
 			if (zone->all_unreclaimable)
 				continue;
 			if (zone->pages_scanned > zone->present_pages * 2)
diff --git a/sound/core/sgbuf.c b/sound/core/sgbuf.c
index 84e79ebc5c80..4578d2b335bf 100644
--- a/sound/core/sgbuf.c
+++ b/sound/core/sgbuf.c
@@ -85,7 +85,7 @@ void *snd_malloc_sgbuf_pages(struct pci_dev *pci, size_t size, struct snd_dma_bu
 	}
 
 	sgbuf->size = size;
-	dmab->area = vmap(sgbuf->page_table, sgbuf->pages);
+	dmab->area = vmap(sgbuf->page_table, sgbuf->pages, VM_MAP, PAGE_KERNEL);
 	if (! dmab->area)
 		goto _failed;
 	return dmab->area;