From a29ccf6f823a84d89e1c7aaaf221cf7282022024 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Tue, 3 Jun 2008 14:59:40 +0100
Subject: Make console charset translation optional

By turning off the new CONSOLE_TRANSLATIONS option and dropping the
associated code and tables from the kernel, we can save about 7KiB.

Taken from linux-tiny project by Tim Bird and mangled further by dwmw2.

Signed-off-by: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/consolemap.h | 14 ++++++++++++++
 include/linux/vt_kern.h    | 19 +++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index e2bf7e5db39a..c4811da1338b 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -3,6 +3,9 @@
  *
  * Interface between console.c, selection.c  and consolemap.c
  */
+#ifndef __LINUX_CONSOLEMAP_H__
+#define __LINUX_CONSOLEMAP_H__
+
 #define LAT1_MAP 0
 #define GRAF_MAP 1
 #define IBMPC_MAP 2
@@ -10,6 +13,7 @@
 
 #include <linux/types.h>
 
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
 struct vc_data;
 
 extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
@@ -18,3 +22,13 @@ extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
 extern u32 conv_8bit_to_uni(unsigned char c);
 extern int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
+#else
+#define inverse_translate(conp, glyph, uni) ((uint16_t)glyph)
+#define set_translate(m, vc) ((unsigned short *)NULL)
+#define conv_uni_to_pc(conp, ucs) ((int) (ucs > 0xff ? -1: ucs))
+#define conv_8bit_to_uni(c) ((uint32_t)(c))
+#define conv_uni_to_8bit(c) ((int) ((c) & 0xff))
+#define console_map_init(c) do { ; } while (0)
+#endif /* CONFIG_CONSOLE_TRANSLATIONS */
+
+#endif /* __LINUX_CONSOLEMAP_H__ */
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 9448ffbdcbf6..14c0e91be9b5 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -12,6 +12,7 @@
 #include <linux/mutex.h>
 #include <linux/console_struct.h>
 #include <linux/mm.h>
+#include <linux/consolemap.h>
 
 /*
  * Presently, a lot of graphics programs do not restore the contents of
@@ -54,6 +55,7 @@ void redraw_screen(struct vc_data *vc, int is_switch);
 struct tty_struct;
 int tioclinux(struct tty_struct *tty, unsigned long arg);
 
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
 /* consolemap.c */
 
 struct unimapinit;
@@ -71,6 +73,23 @@ void con_free_unimap(struct vc_data *vc);
 void con_protect_unimap(struct vc_data *vc, int rdonly);
 int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 
+#define vc_translate(vc, c) ((vc)->vc_translate[(c) |			\
+					(vc)->vc_toggle_meta ? 0x80 : 0])
+#else
+#define con_set_trans_old(arg) (0)
+#define con_get_trans_old(arg) (-EINVAL)
+#define con_set_trans_new(arg) (0)
+#define con_get_trans_new(arg) (-EINVAL)
+#define con_clear_unimap(vc, ui) (0)
+#define con_set_unimap(vc, ct, list) (0)
+#define con_set_default_unimap(vc) (0)
+#define con_copy_unimap(d, s) (0)
+#define con_get_unimap(vc, ct, uct, list) (-EINVAL)
+#define con_free_unimap(vc) do { ; } while (0)
+
+#define vc_translate(vc, c) (c)
+#endif
+
 /* vt.c */
 int vt_waitactive(int vt);
 void change_console(struct vc_data *new_vc);
-- 
cgit v1.2.3


From b8f8c3cf0a4ac0632ec3f0e15e9dc0c29de917af Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 18 Jul 2008 17:27:28 +0200
Subject: nohz: prevent tick stop outside of the idle loop

Jack Ren and Eric Miao tracked down the following long standing
problem in the NOHZ code:

	scheduler switch to idle task
	enable interrupts

Window starts here

	----> interrupt happens (does not set NEED_RESCHED)
	      	irq_exit() stops the tick

	----> interrupt happens (does set NEED_RESCHED)

	return from schedule()

	cpu_idle(): preempt_disable();

Window ends here

The interrupts can happen at any point inside the race window. The
first interrupt stops the tick, the second one causes the scheduler to
rerun and switch away from idle again and we end up with the tick
disabled.

The fact that it needs two interrupts where the first one does not set
NEED_RESCHED and the second one does made the bug obscure and extremly
hard to reproduce and analyse. Kudos to Jack and Eric.

Solution: Limit the NOHZ functionality to the idle loop to make sure
that we can not run into such a situation ever again.

cpu_idle()
{
	preempt_disable();

	while(1) {
		 tick_nohz_stop_sched_tick(1); <- tell NOHZ code that we
		 			          are in the idle loop

		 while (!need_resched())
		       halt();

		 tick_nohz_restart_sched_tick(); <- disables NOHZ mode
		 preempt_enable_no_resched();
		 schedule();
		 preempt_disable();
	}
}

In hindsight we should have done this forever, but ...

/me grabs a large brown paperbag.

Debugged-by: Jack Ren <jack.ren@marvell.com>,
Debugged-by: eric miao <eric.y.miao@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm/kernel/process.c              |  2 +-
 arch/avr32/kernel/process.c            |  2 +-
 arch/blackfin/kernel/process.c         |  2 +-
 arch/mips/kernel/process.c             |  2 +-
 arch/powerpc/kernel/idle.c             |  2 +-
 arch/powerpc/platforms/iseries/setup.c |  4 ++--
 arch/sh/kernel/process_32.c            |  2 +-
 arch/sparc64/kernel/process.c          |  2 +-
 arch/um/kernel/process.c               |  2 +-
 arch/x86/kernel/process_32.c           |  2 +-
 arch/x86/kernel/process_64.c           |  2 +-
 include/linux/tick.h                   |  5 +++--
 kernel/softirq.c                       |  2 +-
 kernel/time/tick-sched.c               | 12 ++++++++++--
 14 files changed, 26 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 46bf2ede6128..84f5a4c778fb 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -164,7 +164,7 @@ void cpu_idle(void)
 		if (!idle)
 			idle = default_idle;
 		leds_event(led_idle_start);
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched())
 			idle();
 		leds_event(led_idle_end);
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 6cf9df176274..ff820a9e743a 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -31,7 +31,7 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched())
 			cpu_idle_sleep();
 		tick_nohz_restart_sched_tick();
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 53c2cd255441..77800dd83e57 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -105,7 +105,7 @@ void cpu_idle(void)
 #endif
 		if (!idle)
 			idle = default_idle;
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched())
 			idle();
 		tick_nohz_restart_sched_tick();
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 2c09a442e5e5..bdead3aad253 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -53,7 +53,7 @@ void __noreturn cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched()) {
 #ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
 			extern void smtc_idle_loop_hook(void);
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index c3cf0e8f3ac1..d308a9f70f1b 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -60,7 +60,7 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b72120751bbe..70b688c1aefb 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -561,7 +561,7 @@ static void yield_shared_processor(void)
 static void iseries_shared_idle(void)
 {
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched() && !hvlpevent_is_pending()) {
 			local_irq_disable();
 			ppc64_runlatch_off();
@@ -591,7 +591,7 @@ static void iseries_dedicated_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		if (!need_resched()) {
 			while (!need_resched()) {
 				ppc64_runlatch_off();
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index b98e37a1f54c..921892c351da 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -86,7 +86,7 @@ void cpu_idle(void)
 		if (!idle)
 			idle = default_idle;
 
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched())
 			idle();
 		tick_nohz_restart_sched_tick();
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 2084f81a76e1..0798928ba361 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -97,7 +97,7 @@ void cpu_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while(1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 
 		while (!need_resched() && !cpu_is_offline(cpu))
 			sparc64_yield(cpu);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 83603cfbde81..a1c6d07cac3e 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -243,7 +243,7 @@ void default_idle(void)
 		if (need_resched())
 			schedule();
 
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		nsecs = disable_timer();
 		idle_sleep(nsecs);
 		tick_nohz_restart_sched_tick();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8476dfbb60d..1f5fa1cf16dd 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -166,7 +166,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched()) {
 			void (*idle)(void);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e2319f39988b..c0a5c2a687e6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -148,7 +148,7 @@ void cpu_idle(void)
 	current_thread_info()->status |= TS_POLLING;
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(1);
 		while (!need_resched()) {
 			void (*idle)(void);
 
diff --git a/include/linux/tick.h b/include/linux/tick.h
index a881c652f7e9..d3c02695dc5d 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -49,6 +49,7 @@ struct tick_sched {
 	unsigned long			check_clocks;
 	enum tick_nohz_mode		nohz_mode;
 	ktime_t				idle_tick;
+	int				inidle;
 	int				tick_stopped;
 	unsigned long			idle_jiffies;
 	unsigned long			idle_calls;
@@ -105,14 +106,14 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
-extern void tick_nohz_stop_sched_tick(void);
+extern void tick_nohz_stop_sched_tick(int inidle);
 extern void tick_nohz_restart_sched_tick(void);
 extern void tick_nohz_update_jiffies(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern void tick_nohz_stop_idle(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 # else
-static inline void tick_nohz_stop_sched_tick(void) { }
+static inline void tick_nohz_stop_sched_tick(int inidle) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
 static inline void tick_nohz_update_jiffies(void) { }
 static inline ktime_t tick_nohz_get_sleep_length(void)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 36e061740047..05f248039d77 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -312,7 +312,7 @@ void irq_exit(void)
 #ifdef CONFIG_NO_HZ
 	/* Make sure that timer wheel updates are propagated */
 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
-		tick_nohz_stop_sched_tick();
+		tick_nohz_stop_sched_tick(0);
 	rcu_irq_exit();
 #endif
 	preempt_enable_no_resched();
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 86baa4f0dfe4..ee962d11107b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -195,7 +195,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
  * Called either from the idle loop or from irq_exit() when an idle period was
  * just interrupted by an interrupt which did not cause a reschedule.
  */
-void tick_nohz_stop_sched_tick(void)
+void tick_nohz_stop_sched_tick(int inidle)
 {
 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
 	struct tick_sched *ts;
@@ -224,6 +224,11 @@ void tick_nohz_stop_sched_tick(void)
 	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
 		goto end;
 
+	if (!inidle && !ts->inidle)
+		goto end;
+
+	ts->inidle = 1;
+
 	if (need_resched())
 		goto end;
 
@@ -372,11 +377,14 @@ void tick_nohz_restart_sched_tick(void)
 	local_irq_disable();
 	tick_nohz_stop_idle(cpu);
 
-	if (!ts->tick_stopped) {
+	if (!ts->inidle || !ts->tick_stopped) {
+		ts->inidle = 0;
 		local_irq_enable();
 		return;
 	}
 
+	ts->inidle = 0;
+
 	rcu_exit_nohz();
 
 	/* Update jiffies first */
-- 
cgit v1.2.3


From 47b374752aed1c029f995473c7c463ee3ae5fbaa Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Tue, 22 Jul 2008 14:19:39 -0700
Subject: IB/mlx4: Rename struct mlx4_lso_seg to mlx4_wqe_lso_seg

Make the struct name consistent with other WQE segment struct types
defined in <linux/mlx4/qp.h>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/qp.c | 2 +-
 include/linux/mlx4/qp.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 89eb6cbe592e..bda0859a5ac5 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1395,7 +1395,7 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 	dseg->addr       = cpu_to_be64(sg->addr);
 }
 
-static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
 {
 	unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 7f128b266faa..f02e9ed36cfa 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -219,7 +219,7 @@ struct mlx4_wqe_datagram_seg {
 	__be32			reservd[2];
 };
 
-struct mlx4_lso_seg {
+struct mlx4_wqe_lso_seg {
 	__be32			mss_hdr_size;
 	__be32			header[0];
 };
-- 
cgit v1.2.3


From e5899e1b7d73e67de758a32174a859cc2586c0b9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 19 Jul 2008 14:39:24 +0200
Subject: PCI PM: make more PCI PM core functionality available to drivers

Make more PCI PM core functionality available to drivers

* Export pci_pme_capable() so that it can be called directly by
  drivers (for example, tg3 needs that).

* Move the state choosing part of pci_prepare_to_sleep() to a
  separate function, pci_target_state(), that can be called directly
  by drivers (for example, tg3 needs that).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 34 ++++++++++++++++++++++++----------
 include/linux/pci.h |  2 ++
 2 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d00f0e0d8453..e9c356236d27 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1040,7 +1040,7 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
  * @dev: PCI device to handle.
  * @state: PCI state from which device will issue PME#.
  */
-static bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
+bool pci_pme_capable(struct pci_dev *dev, pci_power_t state)
 {
 	if (!dev->pm_cap)
 		return false;
@@ -1123,17 +1123,10 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
 }
 
 /**
- * pci_prepare_to_sleep - prepare PCI device for system-wide transition into a sleep state
- * @dev: Device to handle.
- *
- * Choose the power state appropriate for the device depending on whether
- * it can wake up the system and/or is power manageable by the platform
- * (PCI_D3hot is the default) and put the device into that state.
  */
-int pci_prepare_to_sleep(struct pci_dev *dev)
+pci_power_t pci_target_state(struct pci_dev *dev)
 {
 	pci_power_t target_state = PCI_D3hot;
-	int error;
 
 	if (platform_pci_power_manageable(dev)) {
 		/*
@@ -1160,7 +1153,7 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
 		 * to generate PME#.
 		 */
 		if (!dev->pm_cap)
-			return -EIO;
+			return PCI_POWER_ERROR;
 
 		if (dev->pme_support) {
 			while (target_state
@@ -1169,6 +1162,25 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
 		}
 	}
 
+	return target_state;
+}
+
+/**
+ * pci_prepare_to_sleep - prepare PCI device for system-wide transition into a sleep state
+ * @dev: Device to handle.
+ *
+ * Choose the power state appropriate for the device depending on whether
+ * it can wake up the system and/or is power manageable by the platform
+ * (PCI_D3hot is the default) and put the device into that state.
+ */
+int pci_prepare_to_sleep(struct pci_dev *dev)
+{
+	pci_power_t target_state = pci_target_state(dev);
+	int error;
+
+	if (target_state == PCI_POWER_ERROR)
+		return -EIO;
+
 	pci_enable_wake(dev, target_state, true);
 
 	error = pci_set_power_state(dev, target_state);
@@ -1918,7 +1930,9 @@ EXPORT_SYMBOL(pci_select_bars);
 EXPORT_SYMBOL(pci_set_power_state);
 EXPORT_SYMBOL(pci_save_state);
 EXPORT_SYMBOL(pci_restore_state);
+EXPORT_SYMBOL(pci_pme_capable);
 EXPORT_SYMBOL(pci_enable_wake);
+EXPORT_SYMBOL(pci_target_state);
 EXPORT_SYMBOL(pci_prepare_to_sleep);
 EXPORT_SYMBOL(pci_back_from_sleep);
 EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a6a088e1a804..1d296d31abe0 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -638,7 +638,9 @@ int pci_save_state(struct pci_dev *dev);
 int pci_restore_state(struct pci_dev *dev);
 int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
 pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
+bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
 int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);
+pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 
-- 
cgit v1.2.3


From e14fa82439d33cef67eaafc1a48960bbfa610c8e Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@iki.fi>
Date: Sat, 31 May 2008 14:43:41 +0100
Subject: leds: Add pca9532 led driver

NXP pca9532 is a LED dimmer/controller attached to i2c bus.  It allows
attaching upto 16 leds which can either be on, off or dimmed and/or blinked
with the two PWM modulators available.

This driver is a "new-style" i2c driver that adheres to the driver model and
implements the led framework api.  Since the leds connected to the driver are
platform specific, it is only useful when platform data is passed to the
driver to define what leds are connected to which pins.

Signed-off-by: Riku Voipio <riku.voipio@iki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 drivers/leds/Kconfig         |   8 +
 drivers/leds/Makefile        |   1 +
 drivers/leds/leds-pca9532.c  | 337 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/leds-pca9532.h |  45 ++++++
 4 files changed, 391 insertions(+)
 create mode 100644 drivers/leds/leds-pca9532.c
 create mode 100644 include/linux/leds-pca9532.h

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 86a369bc57d6..1c35dfaef721 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -103,6 +103,14 @@ config LEDS_HP6XX
 	  This option enables led support for the handheld
 	  HP Jornada 620/660/680/690.
 
+config LEDS_PCA9532
+	tristate "LED driver for PCA9532 dimmer"
+	depends on LEDS_CLASS && I2C && INPUT && EXPERIMENTAL
+	help
+	  This option enables support for NXP pca9532
+	  led controller. It is generally only usefull
+	  as a platform driver
+
 config LEDS_GPIO
 	tristate "LED Support for GPIO connected LEDs"
 	depends on LEDS_CLASS && GENERIC_GPIO
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 973d626f5f4a..7156f9970fa9 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_LEDS_WRAP)			+= leds-wrap.o
 obj-$(CONFIG_LEDS_H1940)		+= leds-h1940.o
 obj-$(CONFIG_LEDS_COBALT_QUBE)		+= leds-cobalt-qube.o
 obj-$(CONFIG_LEDS_COBALT_RAQ)		+= leds-cobalt-raq.o
+obj-$(CONFIG_LEDS_PCA9532)		+= leds-pca9532.o
 obj-$(CONFIG_LEDS_GPIO)			+= leds-gpio.o
 obj-$(CONFIG_LEDS_CM_X270)              += leds-cm-x270.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)		+= leds-clevo-mail.o
diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c
new file mode 100644
index 000000000000..4064d4f6b33b
--- /dev/null
+++ b/drivers/leds/leds-pca9532.c
@@ -0,0 +1,337 @@
+/*
+ * pca9532.c - 16-bit Led dimmer
+ *
+ * Copyright (C) 2008 Riku Voipio <riku.voipio@movial.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * Datasheet: http://www.nxp.com/acrobat/datasheets/PCA9532_3.pdf
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+#include <linux/mutex.h>
+#include <linux/leds-pca9532.h>
+
+static const unsigned short normal_i2c[] = { /*0x60,*/ I2C_CLIENT_END};
+I2C_CLIENT_INSMOD_1(pca9532);
+
+#define PCA9532_REG_PSC(i) (0x2+(i)*2)
+#define PCA9532_REG_PWM(i) (0x3+(i)*2)
+#define PCA9532_REG_LS0  0x6
+#define LED_REG(led) ((led>>2)+PCA9532_REG_LS0)
+#define LED_NUM(led) (led & 0x3)
+
+#define ldev_to_led(c)       container_of(c, struct pca9532_led, ldev)
+
+struct pca9532_data {
+	struct i2c_client *client;
+	struct pca9532_led leds[16];
+	struct mutex update_lock;
+	struct input_dev    *idev;
+	u8 pwm[2];
+	u8 psc[2];
+};
+
+static int pca9532_probe(struct i2c_client *client,
+	const struct i2c_device_id *id);
+static int pca9532_remove(struct i2c_client *client);
+
+static const struct i2c_device_id pca9532_id[] = {
+	{ "pca9532", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, pca9532_id);
+
+static struct i2c_driver pca9532_driver = {
+	.driver = {
+		.name   = "pca9532",
+	},
+	.probe  = pca9532_probe,
+	.remove = pca9532_remove,
+	.id_table = pca9532_id,
+};
+
+/* We have two pwm/blinkers, but 16 possible leds to drive. Additionaly,
+ * the clever Thecus people are using one pwm to drive the beeper. So,
+ * as a compromise we average one pwm to the values requested by all
+ * leds that are not ON/OFF.
+ * */
+static int pca9532_setpwm(struct i2c_client *client, int pwm, int blink,
+	enum led_brightness value)
+{
+	int a = 0, b = 0, i = 0;
+	struct pca9532_data *data = i2c_get_clientdata(client);
+	for (i = 0; i < 16; i++) {
+		if (data->leds[i].type == PCA9532_TYPE_LED &&
+			data->leds[i].state == PCA9532_PWM0+pwm) {
+				a++;
+				b += data->leds[i].ldev.brightness;
+		}
+	}
+	if (a == 0) {
+		dev_err(&client->dev,
+		"fear of division by zero %d/%d, wanted %d\n",
+			b, a, value);
+		return -EINVAL;
+	}
+	b = b/a;
+	if (b > 0xFF)
+		return -EINVAL;
+	mutex_lock(&data->update_lock);
+	data->pwm[pwm] = b;
+	i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(pwm),
+		data->pwm[pwm]);
+	data->psc[pwm] = blink;
+	i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(pwm),
+		data->psc[pwm]);
+	mutex_unlock(&data->update_lock);
+	return 0;
+}
+
+/* Set LED routing */
+static void pca9532_setled(struct pca9532_led *led)
+{
+	struct i2c_client *client = led->client;
+	struct pca9532_data *data = i2c_get_clientdata(client);
+	char reg;
+
+	mutex_lock(&data->update_lock);
+	reg = i2c_smbus_read_byte_data(client, LED_REG(led->id));
+	/* zero led bits */
+	reg = reg & ~(0x3<<LED_NUM(led->id)*2);
+	/* set the new value */
+	reg = reg | (led->state << LED_NUM(led->id)*2);
+	i2c_smbus_write_byte_data(client, LED_REG(led->id), reg);
+	mutex_unlock(&data->update_lock);
+}
+
+static void pca9532_set_brightness(struct led_classdev *led_cdev,
+	enum led_brightness value)
+{
+	int err = 0;
+	struct pca9532_led *led = ldev_to_led(led_cdev);
+
+	if (value == LED_OFF)
+		led->state = PCA9532_OFF;
+	else if (value == LED_FULL)
+		led->state = PCA9532_ON;
+	else {
+		led->state = PCA9532_PWM0; /* Thecus: hardcode one pwm */
+		err = pca9532_setpwm(led->client, 0, 0, value);
+		if (err)
+			return; /* XXX: led api doesn't allow error code? */
+	}
+	pca9532_setled(led);
+}
+
+static int pca9532_set_blink(struct led_classdev *led_cdev,
+	unsigned long *delay_on, unsigned long *delay_off)
+{
+	struct pca9532_led *led = ldev_to_led(led_cdev);
+	struct i2c_client *client = led->client;
+	int psc;
+
+	if (*delay_on == 0 && *delay_off == 0) {
+	/* led subsystem ask us for a blink rate */
+		*delay_on  = 1000;
+		*delay_off = 1000;
+	}
+	if (*delay_on != *delay_off || *delay_on > 1690 || *delay_on < 6)
+		return -EINVAL;
+
+	/* Thecus specific: only use PSC/PWM 0 */
+	psc = (*delay_on * 152-1)/1000;
+	return pca9532_setpwm(client, 0, psc, led_cdev->brightness);
+}
+
+int pca9532_event(struct input_dev *dev, unsigned int type, unsigned int code,
+	int value)
+{
+	struct pca9532_data *data = input_get_drvdata(dev);
+
+	if (type != EV_SND && (code != SND_BELL || code != SND_TONE))
+		return -1;
+
+	/* XXX: allow different kind of beeps with psc/pwm modifications */
+	if (value > 1 && value < 32767)
+		data->pwm[1] = 127;
+	else
+		data->pwm[1] = 0;
+
+	dev_info(&dev->dev, "setting beep to %d \n", data->pwm[1]);
+	mutex_lock(&data->update_lock);
+	i2c_smbus_write_byte_data(data->client, PCA9532_REG_PWM(1),
+		data->pwm[1]);
+	mutex_unlock(&data->update_lock);
+
+	return 0;
+}
+
+static int pca9532_configure(struct i2c_client *client,
+	struct pca9532_data *data, struct pca9532_platform_data *pdata)
+{
+	int i, err = 0;
+
+	for (i = 0; i < 2; i++)	{
+		data->pwm[i] = pdata->pwm[i];
+		data->psc[i] = pdata->psc[i];
+		i2c_smbus_write_byte_data(client, PCA9532_REG_PWM(i),
+			data->pwm[i]);
+		i2c_smbus_write_byte_data(client, PCA9532_REG_PSC(i),
+			data->psc[i]);
+	}
+
+	for (i = 0; i < 16; i++) {
+		struct pca9532_led *led = &data->leds[i];
+		struct pca9532_led *pled = &pdata->leds[i];
+		led->client = client;
+		led->id = i;
+		led->type = pled->type;
+		switch (led->type) {
+		case PCA9532_TYPE_NONE:
+			break;
+		case PCA9532_TYPE_LED:
+			led->state = pled->state;
+			led->name =  pled->name;
+			led->ldev.name = led->name;
+			led->ldev.brightness = LED_OFF;
+			led->ldev.brightness_set = pca9532_set_brightness;
+			led->ldev.blink_set = pca9532_set_blink;
+			if (led_classdev_register(&client->dev,
+				&led->ldev) < 0)	{
+				dev_err(&client->dev,
+					"couldn't register LED %s\n",
+					led->name);
+				goto exit;
+			}
+			pca9532_setled(led);
+			break;
+		case PCA9532_TYPE_N2100_BEEP:
+			BUG_ON(data->idev);
+			led->state = PCA9532_PWM1;
+			pca9532_setled(led);
+			data->idev = input_allocate_device();
+			if (data->idev == NULL) {
+				err = -ENOMEM;
+				goto exit;
+			}
+			data->idev->name = pled->name;
+			data->idev->phys = "i2c/pca9532";
+			data->idev->id.bustype = BUS_HOST;
+			data->idev->id.vendor  = 0x001f;
+			data->idev->id.product = 0x0001;
+			data->idev->id.version = 0x0100;
+			data->idev->evbit[0] = BIT_MASK(EV_SND);
+			data->idev->sndbit[0] = BIT_MASK(SND_BELL) |
+						BIT_MASK(SND_TONE);
+			data->idev->event = pca9532_event;
+			input_set_drvdata(data->idev, data);
+			err = input_register_device(data->idev);
+			if (err) {
+				input_free_device(data->idev);
+				data->idev = NULL;
+				goto exit;
+			}
+			break;
+		}
+	}
+	return 0;
+
+exit:
+	if (i > 0)
+		for (i = i - 1; i >= 0; i--)
+			switch (data->leds[i].type) {
+			case PCA9532_TYPE_NONE:
+				break;
+			case PCA9532_TYPE_LED:
+				led_classdev_unregister(&data->leds[i].ldev);
+				break;
+			case PCA9532_TYPE_N2100_BEEP:
+				if (data->idev != NULL) {
+					input_unregister_device(data->idev);
+					input_free_device(data->idev);
+					data->idev = NULL;
+				}
+				break;
+			}
+
+	return err;
+
+}
+
+static int pca9532_probe(struct i2c_client *client,
+	const struct i2c_device_id *id)
+{
+	struct pca9532_data *data = i2c_get_clientdata(client);
+	struct pca9532_platform_data *pca9532_pdata = client->dev.platform_data;
+
+	if (!i2c_check_functionality(client->adapter,
+		I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	data = kzalloc(sizeof(struct pca9532_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	dev_info(&client->dev, "setting platform data\n");
+	i2c_set_clientdata(client, data);
+	data->client = client;
+	mutex_init(&data->update_lock);
+
+	if (pca9532_pdata == NULL)
+		return -EIO;
+
+	pca9532_configure(client, data, pca9532_pdata);
+	return 0;
+
+}
+
+static int pca9532_remove(struct i2c_client *client)
+{
+	struct pca9532_data *data = i2c_get_clientdata(client);
+	int i;
+	for (i = 0; i < 16; i++)
+		switch (data->leds[i].type) {
+		case PCA9532_TYPE_NONE:
+			break;
+		case PCA9532_TYPE_LED:
+			led_classdev_unregister(&data->leds[i].ldev);
+			break;
+		case PCA9532_TYPE_N2100_BEEP:
+			if (data->idev != NULL) {
+				input_unregister_device(data->idev);
+				input_free_device(data->idev);
+				data->idev = NULL;
+			}
+			break;
+		}
+
+	kfree(data);
+	i2c_set_clientdata(client, NULL);
+	return 0;
+}
+
+static int __init pca9532_init(void)
+{
+	return i2c_add_driver(&pca9532_driver);
+}
+
+static void __exit pca9532_exit(void)
+{
+	i2c_del_driver(&pca9532_driver);
+}
+
+MODULE_AUTHOR("Riku Voipio <riku.voipio@movial.fi>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PCA 9532 LED dimmer");
+
+module_init(pca9532_init);
+module_exit(pca9532_exit);
+
diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
new file mode 100644
index 000000000000..81b4207deb95
--- /dev/null
+++ b/include/linux/leds-pca9532.h
@@ -0,0 +1,45 @@
+/*
+ * pca9532.h - platform data structure for pca9532 led controller
+ *
+ * Copyright (C) 2008 Riku Voipio <riku.voipio@movial.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * Datasheet: http://www.nxp.com/acrobat/datasheets/PCA9532_3.pdf
+ *
+ */
+
+#ifndef __LINUX_PCA9532_H
+#define __LINUX_PCA9532_H
+
+#include <linux/leds.h>
+
+enum pca9532_state {
+	PCA9532_OFF  = 0x0,
+	PCA9532_ON   = 0x1,
+	PCA9532_PWM0 = 0x2,
+	PCA9532_PWM1 = 0x3
+};
+
+enum pca9532_type { PCA9532_TYPE_NONE, PCA9532_TYPE_LED,
+	PCA9532_TYPE_N2100_BEEP };
+
+struct pca9532_led {
+	u8 id;
+	struct i2c_client *client;
+	char *name;
+	struct led_classdev ldev;
+	enum pca9532_type type;
+	enum pca9532_state state;
+};
+
+struct pca9532_platform_data {
+	struct pca9532_led leds[16];
+	u8 pwm[2];
+	u8 psc[2];
+};
+
+#endif /* __LINUX_PCA9532_H */
+
-- 
cgit v1.2.3


From 781a54e7664cc0089287a90d27086e9656ac68a1 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Sat, 31 May 2008 15:23:19 +0100
Subject: leds: mark led_classdev.default_trigger as const

LED classdev core doesn't modify memory pointed by the default_trigger,
so mark it as const and we'll able to pass const char *s without getting
compiler warnings.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 include/linux/leds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 519df72e939d..e7a5e89932fe 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -48,7 +48,7 @@ struct led_classdev {
 
 	struct device		*dev;
 	struct list_head	 node;			/* LED Device list */
-	char			*default_trigger;	/* Trigger to use */
+	const char		*default_trigger;	/* Trigger to use */
 
 #ifdef CONFIG_LEDS_TRIGGERS
 	/* Protects the trigger data below */
-- 
cgit v1.2.3


From f46e9203d9a100bae216cc06e17f2e77351aa8d8 Mon Sep 17 00:00:00 2001
From: Nate Case <ncase@xes-inc.com>
Date: Wed, 16 Jul 2008 22:49:55 +0100
Subject: leds: Add support for Philips PCA955x I2C LED drivers

This driver supports the PCA9550, PCA9551, PCA9552, and PCA9553
LED driver chips.

Signed-off-by: Nate Case <ncase@xes-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
---
 drivers/leds/Kconfig        |   8 +
 drivers/leds/Makefile       |   1 +
 drivers/leds/leds-pca955x.c | 384 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/leds.h        |  14 ++
 4 files changed, 407 insertions(+)
 create mode 100644 drivers/leds/leds-pca955x.c

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 1c35dfaef721..9556262dda5a 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -155,6 +155,14 @@ config LEDS_CLEVO_MAIL
 	  To compile this driver as a module, choose M here: the
 	  module will be called leds-clevo-mail.
 
+config LEDS_PCA955X
+	tristate "LED Support for PCA955x I2C chips"
+	depends on LEDS_CLASS && I2C
+	help
+	  This option enables support for LEDs connected to PCA955x
+	  LED driver chips accessed via the I2C bus.  Supported
+	  devices include PCA9550, PCA9551, PCA9552, and PCA9553.
+
 comment "LED Triggers"
 
 config LEDS_TRIGGERS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 7156f9970fa9..ff7982b44565 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_LEDS_CM_X270)              += leds-cm-x270.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)		+= leds-clevo-mail.o
 obj-$(CONFIG_LEDS_HP6XX)		+= leds-hp6xx.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
+obj-$(CONFIG_LEDS_PCA955X)		+= leds-pca955x.o
 
 # LED Triggers
 obj-$(CONFIG_LEDS_TRIGGER_TIMER)	+= ledtrig-timer.o
diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
new file mode 100644
index 000000000000..146c06972863
--- /dev/null
+++ b/drivers/leds/leds-pca955x.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2007-2008 Extreme Engineering Solutions, Inc.
+ *
+ * Author: Nate Case <ncase@xes-inc.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License.  See the file COPYING in the main
+ * directory of this archive for more details.
+ *
+ * LED driver for various PCA955x I2C LED drivers
+ *
+ * Supported devices:
+ *
+ *	Device		Description		7-bit slave address
+ *	------		-----------		-------------------
+ *	PCA9550		2-bit driver		0x60 .. 0x61
+ *	PCA9551		8-bit driver		0x60 .. 0x67
+ *	PCA9552		16-bit driver		0x60 .. 0x67
+ *	PCA9553/01	4-bit driver		0x62
+ *	PCA9553/02	4-bit driver		0x63
+ *
+ * Philips PCA955x LED driver chips follow a register map as shown below:
+ *
+ *	Control Register		Description
+ *	----------------		-----------
+ *	0x0				Input register 0
+ *					..
+ *	NUM_INPUT_REGS - 1		Last Input register X
+ *
+ *	NUM_INPUT_REGS			Frequency prescaler 0
+ *	NUM_INPUT_REGS + 1		PWM register 0
+ *	NUM_INPUT_REGS + 2		Frequency prescaler 1
+ *	NUM_INPUT_REGS + 3		PWM register 1
+ *
+ *	NUM_INPUT_REGS + 4		LED selector 0
+ *	NUM_INPUT_REGS + 4
+ *	    + NUM_LED_REGS - 1		Last LED selector
+ *
+ *  where NUM_INPUT_REGS and NUM_LED_REGS vary depending on how many
+ *  bits the chip supports.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/leds.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/workqueue.h>
+
+/* LED select registers determine the source that drives LED outputs */
+#define PCA955X_LS_LED_ON	0x0	/* Output LOW */
+#define PCA955X_LS_LED_OFF	0x1	/* Output HI-Z */
+#define PCA955X_LS_BLINK0	0x2	/* Blink at PWM0 rate */
+#define PCA955X_LS_BLINK1	0x3	/* Blink at PWM1 rate */
+
+enum pca955x_type {
+	pca9550,
+	pca9551,
+	pca9552,
+	pca9553,
+};
+
+struct pca955x_chipdef {
+	int			bits;
+	u8			slv_addr;	/* 7-bit slave address mask */
+	int			slv_addr_shift;	/* Number of bits to ignore */
+};
+
+static struct pca955x_chipdef pca955x_chipdefs[] = {
+	[pca9550] = {
+		.bits		= 2,
+		.slv_addr	= /* 110000x */ 0x60,
+		.slv_addr_shift	= 1,
+	},
+	[pca9551] = {
+		.bits		= 8,
+		.slv_addr	= /* 1100xxx */ 0x60,
+		.slv_addr_shift	= 3,
+	},
+	[pca9552] = {
+		.bits		= 16,
+		.slv_addr	= /* 1100xxx */ 0x60,
+		.slv_addr_shift	= 3,
+	},
+	[pca9553] = {
+		.bits		= 4,
+		.slv_addr	= /* 110001x */ 0x62,
+		.slv_addr_shift	= 1,
+	},
+};
+
+static const struct i2c_device_id pca955x_id[] = {
+	{ "pca9550", pca9550 },
+	{ "pca9551", pca9551 },
+	{ "pca9552", pca9552 },
+	{ "pca9553", pca9553 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, pca955x_id);
+
+struct pca955x_led {
+	struct pca955x_chipdef	*chipdef;
+	struct i2c_client	*client;
+	struct work_struct	work;
+	spinlock_t		lock;
+	enum led_brightness	brightness;
+	struct led_classdev	led_cdev;
+	int			led_num;	/* 0 .. 15 potentially */
+	char			name[32];
+};
+
+/* 8 bits per input register */
+static inline int pca95xx_num_input_regs(int bits)
+{
+	return (bits + 7) / 8;
+}
+
+/* 4 bits per LED selector register */
+static inline int pca95xx_num_led_regs(int bits)
+{
+	return (bits + 3)  / 4;
+}
+
+/*
+ * Return an LED selector register value based on an existing one, with
+ * the appropriate 2-bit state value set for the given LED number (0-3).
+ */
+static inline u8 pca955x_ledsel(u8 oldval, int led_num, int state)
+{
+	return (oldval & (~(0x3 << (led_num << 1)))) |
+		((state & 0x3) << (led_num << 1));
+}
+
+/*
+ * Write to frequency prescaler register, used to program the
+ * period of the PWM output.  period = (PSCx + 1) / 38
+ */
+static void pca955x_write_psc(struct i2c_client *client, int n, u8 val)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+
+	i2c_smbus_write_byte_data(client,
+		pca95xx_num_input_regs(pca955x->chipdef->bits) + 2*n,
+		val);
+}
+
+/*
+ * Write to PWM register, which determines the duty cycle of the
+ * output.  LED is OFF when the count is less than the value of this
+ * register, and ON when it is greater.  If PWMx == 0, LED is always OFF.
+ *
+ * Duty cycle is (256 - PWMx) / 256
+ */
+static void pca955x_write_pwm(struct i2c_client *client, int n, u8 val)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+
+	i2c_smbus_write_byte_data(client,
+		pca95xx_num_input_regs(pca955x->chipdef->bits) + 1 + 2*n,
+		val);
+}
+
+/*
+ * Write to LED selector register, which determines the source that
+ * drives the LED output.
+ */
+static void pca955x_write_ls(struct i2c_client *client, int n, u8 val)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+
+	i2c_smbus_write_byte_data(client,
+		pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n,
+		val);
+}
+
+/*
+ * Read the LED selector register, which determines the source that
+ * drives the LED output.
+ */
+static u8 pca955x_read_ls(struct i2c_client *client, int n)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+
+	return (u8) i2c_smbus_read_byte_data(client,
+		pca95xx_num_input_regs(pca955x->chipdef->bits) + 4 + n);
+}
+
+static void pca955x_led_work(struct work_struct *work)
+{
+	struct pca955x_led *pca955x;
+	u8 ls;
+	int chip_ls;	/* which LSx to use (0-3 potentially) */
+	int ls_led;	/* which set of bits within LSx to use (0-3) */
+
+	pca955x = container_of(work, struct pca955x_led, work);
+	chip_ls = pca955x->led_num / 4;
+	ls_led = pca955x->led_num % 4;
+
+	ls = pca955x_read_ls(pca955x->client, chip_ls);
+
+	switch (pca955x->brightness) {
+	case LED_FULL:
+		ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_LED_ON);
+		break;
+	case LED_OFF:
+		ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_LED_OFF);
+		break;
+	case LED_HALF:
+		ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_BLINK0);
+		break;
+	default:
+		/*
+		 * Use PWM1 for all other values.  This has the unwanted
+		 * side effect of making all LEDs on the chip share the
+		 * same brightness level if set to a value other than
+		 * OFF, HALF, or FULL.  But, this is probably better than
+		 * just turning off for all other values.
+		 */
+		pca955x_write_pwm(pca955x->client, 1, 255-pca955x->brightness);
+		ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_BLINK1);
+		break;
+	}
+
+	pca955x_write_ls(pca955x->client, chip_ls, ls);
+}
+
+void pca955x_led_set(struct led_classdev *led_cdev, enum led_brightness value)
+{
+	struct pca955x_led *pca955x;
+
+	pca955x = container_of(led_cdev, struct pca955x_led, led_cdev);
+
+	spin_lock(&pca955x->lock);
+	pca955x->brightness = value;
+
+	/*
+	 * Must use workqueue for the actual I/O since I2C operations
+	 * can sleep.
+	 */
+	schedule_work(&pca955x->work);
+
+	spin_unlock(&pca955x->lock);
+}
+
+static int __devinit pca955x_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct pca955x_led *pca955x;
+	int i;
+	int err = -ENODEV;
+	struct pca955x_chipdef *chip;
+	struct i2c_adapter *adapter;
+	struct led_platform_data *pdata;
+
+	chip = &pca955x_chipdefs[id->driver_data];
+	adapter = to_i2c_adapter(client->dev.parent);
+	pdata = client->dev.platform_data;
+
+	/* Make sure the slave address / chip type combo given is possible */
+	if ((client->addr & ~((1 << chip->slv_addr_shift) - 1)) !=
+	    chip->slv_addr) {
+		dev_err(&client->dev, "invalid slave address %02x\n",
+				client->addr);
+		return -ENODEV;
+	}
+
+	printk(KERN_INFO "leds-pca955x: Using %s %d-bit LED driver at "
+			"slave address 0x%02x\n",
+			id->name, chip->bits, client->addr);
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
+		return -EIO;
+
+	if (pdata) {
+		if (pdata->num_leds != chip->bits) {
+			dev_err(&client->dev, "board info claims %d LEDs"
+					" on a %d-bit chip\n",
+					pdata->num_leds, chip->bits);
+			return -ENODEV;
+		}
+	}
+
+	for (i = 0; i < chip->bits; i++) {
+		pca955x = kzalloc(sizeof(struct pca955x_led), GFP_KERNEL);
+		if (!pca955x) {
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		pca955x->chipdef = chip;
+		pca955x->client = client;
+		pca955x->led_num = i;
+		/* Platform data can specify LED names and default triggers */
+		if (pdata) {
+			if (pdata->leds[i].name)
+				snprintf(pca955x->name, 32, "pca955x:%s",
+							pdata->leds[i].name);
+			if (pdata->leds[i].default_trigger)
+				pca955x->led_cdev.default_trigger =
+					pdata->leds[i].default_trigger;
+		} else {
+			snprintf(pca955x->name, 32, "pca955x:%d", i);
+		}
+		spin_lock_init(&pca955x->lock);
+
+		pca955x->led_cdev.name = pca955x->name;
+		pca955x->led_cdev.brightness_set =
+				pca955x_led_set;
+
+		/*
+		 * Client data is a pointer to the _first_ pca955x_led
+		 * struct
+		 */
+		if (i == 0)
+			i2c_set_clientdata(client, pca955x);
+
+		INIT_WORK(&(pca955x->work), pca955x_led_work);
+
+		led_classdev_register(&client->dev, &(pca955x->led_cdev));
+	}
+
+	/* Turn off LEDs */
+	for (i = 0; i < pca95xx_num_led_regs(chip->bits); i++)
+		pca955x_write_ls(client, i, 0x55);
+
+	/* PWM0 is used for half brightness or 50% duty cycle */
+	pca955x_write_pwm(client, 0, 255-LED_HALF);
+
+	/* PWM1 is used for variable brightness, default to OFF */
+	pca955x_write_pwm(client, 1, 0);
+
+	/* Set to fast frequency so we do not see flashing */
+	pca955x_write_psc(client, 0, 0);
+	pca955x_write_psc(client, 1, 0);
+
+	return 0;
+exit:
+	return err;
+}
+
+static int __devexit pca955x_remove(struct i2c_client *client)
+{
+	struct pca955x_led *pca955x = i2c_get_clientdata(client);
+	int leds = pca955x->chipdef->bits;
+	int i;
+
+	for (i = 0; i < leds; i++) {
+		led_classdev_unregister(&(pca955x->led_cdev));
+		cancel_work_sync(&(pca955x->work));
+		kfree(pca955x);
+		pca955x = pca955x + 1;
+	}
+
+	return 0;
+}
+
+static struct i2c_driver pca955x_driver = {
+	.driver = {
+		.name	= "leds-pca955x",
+		.owner	= THIS_MODULE,
+	},
+	.probe	= pca955x_probe,
+	.remove	= __devexit_p(pca955x_remove),
+	.id_table = pca955x_id,
+};
+
+static int __init pca955x_leds_init(void)
+{
+	return i2c_add_driver(&pca955x_driver);
+}
+
+static void __exit pca955x_leds_exit(void)
+{
+	i2c_del_driver(&pca955x_driver);
+}
+
+module_init(pca955x_leds_init);
+module_exit(pca955x_leds_exit);
+
+MODULE_AUTHOR("Nate Case <ncase@xes-inc.com>");
+MODULE_DESCRIPTION("PCA955x LED driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/leds.h b/include/linux/leds.h
index e7a5e89932fe..d41ccb56146a 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -118,6 +118,20 @@ extern void ledtrig_ide_activity(void);
 #define ledtrig_ide_activity() do {} while(0)
 #endif
 
+/*
+ * Generic LED platform data for describing LED names and default triggers.
+ */
+struct led_info {
+	const char	*name;
+	char		*default_trigger;
+	int		flags;
+};
+
+struct led_platform_data {
+	int		num_leds;
+	struct led_info	*leds;
+};
+
 /* For the leds-gpio driver */
 struct gpio_led {
 	const char *name;
-- 
cgit v1.2.3


From 95d04f0735b4fc837bff9aedcc3f3efb20ddc3d1 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 23 Jul 2008 08:12:26 -0700
Subject: IB/mlx4: Add support for memory management extensions and local DMA
 L_Key

Add support for the following operations to mlx4 when device firmware
supports them:

 - Send with invalidate and local invalidate send queue work requests;
 - Allocate/free fast register MRs;
 - Allocate/free fast register MR page lists;
 - Fast register MR send queue work requests;
 - Local DMA L_Key.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/cq.c      | 12 ++++++
 drivers/infiniband/hw/mlx4/main.c    | 11 ++++++
 drivers/infiniband/hw/mlx4/mlx4_ib.h | 15 ++++++++
 drivers/infiniband/hw/mlx4/mr.c      | 70 +++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx4/qp.c      | 72 +++++++++++++++++++++++++++++++++---
 drivers/net/mlx4/fw.c                | 10 ++---
 drivers/net/mlx4/fw.h                |  2 +-
 drivers/net/mlx4/main.c              |  2 +
 drivers/net/mlx4/mr.c                | 23 +++++++++---
 include/linux/mlx4/device.h          | 10 +++++
 include/linux/mlx4/qp.h              | 16 ++++++--
 11 files changed, 221 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 299f20832ab6..0b191a4842ce 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -637,6 +637,7 @@ repoll:
 		case MLX4_OPCODE_SEND_IMM:
 			wc->wc_flags |= IB_WC_WITH_IMM;
 		case MLX4_OPCODE_SEND:
+		case MLX4_OPCODE_SEND_INVAL:
 			wc->opcode    = IB_WC_SEND;
 			break;
 		case MLX4_OPCODE_RDMA_READ:
@@ -657,6 +658,12 @@ repoll:
 		case MLX4_OPCODE_LSO:
 			wc->opcode    = IB_WC_LSO;
 			break;
+		case MLX4_OPCODE_FMR:
+			wc->opcode    = IB_WC_FAST_REG_MR;
+			break;
+		case MLX4_OPCODE_LOCAL_INVAL:
+			wc->opcode    = IB_WC_LOCAL_INV;
+			break;
 		}
 	} else {
 		wc->byte_len = be32_to_cpu(cqe->byte_cnt);
@@ -667,6 +674,11 @@ repoll:
 			wc->wc_flags	= IB_WC_WITH_IMM;
 			wc->ex.imm_data = cqe->immed_rss_invalid;
 			break;
+		case MLX4_RECV_OPCODE_SEND_INVAL:
+			wc->opcode	= IB_WC_RECV;
+			wc->wc_flags	= IB_WC_WITH_INVALIDATE;
+			wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid);
+			break;
 		case MLX4_RECV_OPCODE_SEND:
 			wc->opcode   = IB_WC_RECV;
 			wc->wc_flags = 0;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bcf50648fa18..38d6907ab521 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -104,6 +104,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
 	if (dev->dev->caps.max_gso_sz)
 		props->device_cap_flags |= IB_DEVICE_UD_TSO;
+	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
+		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+	if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
+	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
+	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
+		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 
 	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 		0xffffff;
@@ -127,6 +133,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->max_srq		   = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
 	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
 	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
+	props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64);
 	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
 	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
 		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
@@ -565,6 +572,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
 	ibdev->ib_dev.owner		= THIS_MODULE;
 	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
+	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
 	ibdev->ib_dev.phys_port_cnt	= dev->caps.num_ports;
 	ibdev->ib_dev.num_comp_vectors	= 1;
 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
@@ -627,6 +635,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.get_dma_mr	= mlx4_ib_get_dma_mr;
 	ibdev->ib_dev.reg_user_mr	= mlx4_ib_reg_user_mr;
 	ibdev->ib_dev.dereg_mr		= mlx4_ib_dereg_mr;
+	ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
+	ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
+	ibdev->ib_dev.free_fast_reg_page_list  = mlx4_ib_free_fast_reg_page_list;
 	ibdev->ib_dev.attach_mcast	= mlx4_ib_mcg_attach;
 	ibdev->ib_dev.detach_mcast	= mlx4_ib_mcg_detach;
 	ibdev->ib_dev.process_mad	= mlx4_ib_process_mad;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index c4cf5b69eefa..d26a91317d4d 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -83,6 +83,11 @@ struct mlx4_ib_mr {
 	struct ib_umem	       *umem;
 };
 
+struct mlx4_ib_fast_reg_page_list {
+	struct ib_fast_reg_page_list	ibfrpl;
+	dma_addr_t			map;
+};
+
 struct mlx4_ib_fmr {
 	struct ib_fmr           ibfmr;
 	struct mlx4_fmr         mfmr;
@@ -199,6 +204,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
 	return container_of(ibmr, struct mlx4_ib_mr, ibmr);
 }
 
+static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
+{
+	return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
+}
+
 static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
 {
 	return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@@ -239,6 +249,11 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+					int max_page_list_len);
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+							       int page_list_len);
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
 
 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 68e92485fc76..db2086faa4ed 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -183,6 +183,76 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 	return 0;
 }
 
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+					int max_page_list_len)
+{
+	struct mlx4_ib_dev *dev = to_mdev(pd->device);
+	struct mlx4_ib_mr *mr;
+	int err;
+
+	mr = kmalloc(sizeof *mr, GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
+			    max_page_list_len, 0, &mr->mmr);
+	if (err)
+		goto err_free;
+
+	err = mlx4_mr_enable(dev->dev, &mr->mmr);
+	if (err)
+		goto err_mr;
+
+	return &mr->ibmr;
+
+err_mr:
+	mlx4_mr_free(dev->dev, &mr->mmr);
+
+err_free:
+	kfree(mr);
+	return ERR_PTR(err);
+}
+
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+							       int page_list_len)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibdev);
+	struct mlx4_ib_fast_reg_page_list *mfrpl;
+	int size = page_list_len * sizeof (u64);
+
+	if (size > PAGE_SIZE)
+		return ERR_PTR(-EINVAL);
+
+	mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
+	if (!mfrpl)
+		return ERR_PTR(-ENOMEM);
+
+	mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
+						     size, &mfrpl->map,
+						     GFP_KERNEL);
+	if (!mfrpl->ibfrpl.page_list)
+		goto err_free;
+
+	WARN_ON(mfrpl->map & 0x3f);
+
+	return &mfrpl->ibfrpl;
+
+err_free:
+	kfree(mfrpl);
+	return ERR_PTR(-ENOMEM);
+}
+
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+	struct mlx4_ib_dev *dev = to_mdev(page_list->device);
+	struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
+	int size = page_list->max_page_list_len * sizeof (u64);
+
+	dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list,
+			  mfrpl->map);
+	kfree(mfrpl);
+}
+
 struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
 				 struct ib_fmr_attr *fmr_attr)
 {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index bda0859a5ac5..02a99bc4442e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -78,6 +78,9 @@ static const __be32 mlx4_ib_opcode[] = {
 	[IB_WR_RDMA_READ]		= __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
 	[IB_WR_ATOMIC_CMP_AND_SWP]	= __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
 	[IB_WR_ATOMIC_FETCH_AND_ADD]	= __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+	[IB_WR_SEND_WITH_INV]		= __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
+	[IB_WR_LOCAL_INV]		= __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
+	[IB_WR_FAST_REG_MR]		= __constant_cpu_to_be32(MLX4_OPCODE_FMR),
 };
 
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -976,6 +979,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	context->pd	    = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
 	context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
 
+	/* Set "fast registration enabled" for all kernel QPs */
+	if (!qp->ibqp.uobject)
+		context->params1 |= cpu_to_be32(1 << 11);
+
 	if (attr_mask & IB_QP_RNR_RETRY) {
 		context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
 		optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
@@ -1322,6 +1329,38 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
 	return cur + nreq >= wq->max_post;
 }
 
+static __be32 convert_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC)       : 0) |
+	       (acc & IB_ACCESS_REMOTE_WRITE  ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) |
+	       (acc & IB_ACCESS_REMOTE_READ   ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ)  : 0) |
+	       (acc & IB_ACCESS_LOCAL_WRITE   ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE)  : 0) |
+		cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
+}
+
+static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+{
+	struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
+
+	fseg->flags		= convert_access(wr->wr.fast_reg.access_flags);
+	fseg->mem_key		= cpu_to_be32(wr->wr.fast_reg.rkey);
+	fseg->buf_list		= cpu_to_be64(mfrpl->map);
+	fseg->start_addr	= cpu_to_be64(wr->wr.fast_reg.iova_start);
+	fseg->reg_len		= cpu_to_be64(wr->wr.fast_reg.length);
+	fseg->offset		= 0; /* XXX -- is this just for ZBVA? */
+	fseg->page_size		= cpu_to_be32(wr->wr.fast_reg.page_shift);
+	fseg->reserved[0]	= 0;
+	fseg->reserved[1]	= 0;
+}
+
+static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
+{
+	iseg->flags	= 0;
+	iseg->mem_key	= cpu_to_be32(rkey);
+	iseg->guest_id	= 0;
+	iseg->pa	= 0;
+}
+
 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
 					  u64 remote_addr, u32 rkey)
 {
@@ -1423,6 +1462,21 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 	return 0;
 }
 
+static __be32 send_ieth(struct ib_send_wr *wr)
+{
+	switch (wr->opcode) {
+	case IB_WR_SEND_WITH_IMM:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		return wr->ex.imm_data;
+
+	case IB_WR_SEND_WITH_INV:
+		return cpu_to_be32(wr->ex.invalidate_rkey);
+
+	default:
+		return 0;
+	}
+}
+
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		      struct ib_send_wr **bad_wr)
 {
@@ -1469,11 +1523,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				     MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
 			qp->sq_signal_bits;
 
-		if (wr->opcode == IB_WR_SEND_WITH_IMM ||
-		    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-			ctrl->imm = wr->ex.imm_data;
-		else
-			ctrl->imm = 0;
+		ctrl->imm = send_ieth(wr);
 
 		wqe += sizeof *ctrl;
 		size = sizeof *ctrl / 16;
@@ -1505,6 +1555,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
 				break;
 
+			case IB_WR_LOCAL_INV:
+				set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
+				wqe  += sizeof (struct mlx4_wqe_local_inval_seg);
+				size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
+				break;
+
+			case IB_WR_FAST_REG_MR:
+				set_fmr_seg(wqe, wr);
+				wqe  += sizeof (struct mlx4_wqe_fmr_seg);
+				size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+				break;
+
 			default:
 				/* No extra segments required for sends */
 				break;
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 0851ebdddfd4..57278224ba1e 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -202,7 +202,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET	0x8e
 #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET	0x90
 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET	0x92
-#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET		0x97
+#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET		0x94
 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET		0x98
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET		0xa0
 
@@ -377,12 +377,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		}
 	}
 
-	if (dev_cap->bmme_flags & 1)
-		mlx4_dbg(dev, "Base MM extensions: yes "
-			 "(flags %d, rsvd L_Key %08x)\n",
-			 dev_cap->bmme_flags, dev_cap->reserved_lkey);
-	else
-		mlx4_dbg(dev, "Base MM extensions: no\n");
+	mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
+		 dev_cap->bmme_flags, dev_cap->reserved_lkey);
 
 	/*
 	 * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index a0e046c149b7..fbf0e22be122 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -98,7 +98,7 @@ struct mlx4_dev_cap {
 	int cmpt_entry_sz;
 	int mtt_entry_sz;
 	int resize_srq;
-	u8  bmme_flags;
+	u32 bmme_flags;
 	u32 reserved_lkey;
 	u64 max_icm_sz;
 	int max_gso_sz;
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index d3736013fe9b..8e1d24cda1b0 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -158,6 +158,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
 	dev->caps.flags		     = dev_cap->flags;
+	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
+	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index b3ea93b98689..a3c04c5f12c2 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -47,7 +47,7 @@ struct mlx4_mpt_entry {
 	__be32 flags;
 	__be32 qpn;
 	__be32 key;
-	__be32 pd;
+	__be32 pd_flags;
 	__be64 start;
 	__be64 length;
 	__be32 lkey;
@@ -61,11 +61,15 @@ struct mlx4_mpt_entry {
 } __attribute__((packed));
 
 #define MLX4_MPT_FLAG_SW_OWNS	    (0xfUL << 28)
+#define MLX4_MPT_FLAG_FREE	    (0x3UL << 28)
 #define MLX4_MPT_FLAG_MIO	    (1 << 17)
 #define MLX4_MPT_FLAG_BIND_ENABLE   (1 << 15)
 #define MLX4_MPT_FLAG_PHYSICAL	    (1 <<  9)
 #define MLX4_MPT_FLAG_REGION	    (1 <<  8)
 
+#define MLX4_MPT_PD_FLAG_FAST_REG   (1 << 26)
+#define MLX4_MPT_PD_FLAG_EN_INV	    (3 << 24)
+
 #define MLX4_MTT_FLAG_PRESENT		1
 
 #define MLX4_MPT_STATUS_SW		0xF0
@@ -324,21 +328,30 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 
 	memset(mpt_entry, 0, sizeof *mpt_entry);
 
-	mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS	 |
-				       MLX4_MPT_FLAG_MIO	 |
+	mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO	 |
 				       MLX4_MPT_FLAG_REGION	 |
 				       mr->access);
 
 	mpt_entry->key	       = cpu_to_be32(key_to_hw_index(mr->key));
-	mpt_entry->pd	       = cpu_to_be32(mr->pd);
+	mpt_entry->pd_flags    = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV);
 	mpt_entry->start       = cpu_to_be64(mr->iova);
 	mpt_entry->length      = cpu_to_be64(mr->size);
 	mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
 	if (mr->mtt.order < 0) {
 		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
 		mpt_entry->mtt_seg = 0;
-	} else
+	} else {
 		mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
+	}
+
+	if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
+		/* fast register MR in free state */
+		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
+		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG);
+	} else {
+		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
+	}
 
 	err = mlx4_SW2HW_MPT(dev, mailbox,
 			     key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 81b3dd5206e0..655ea0d1ee14 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -68,6 +68,14 @@ enum {
 	MLX4_DEV_CAP_FLAG_UD_MCAST	= 1 << 21
 };
 
+enum {
+	MLX4_BMME_FLAG_LOCAL_INV	= 1 <<  6,
+	MLX4_BMME_FLAG_REMOTE_INV	= 1 <<  7,
+	MLX4_BMME_FLAG_TYPE_2_WIN	= 1 <<  9,
+	MLX4_BMME_FLAG_RESERVED_LKEY	= 1 << 10,
+	MLX4_BMME_FLAG_FAST_REG_WR	= 1 << 11,
+};
+
 enum mlx4_event {
 	MLX4_EVENT_TYPE_COMP		   = 0x00,
 	MLX4_EVENT_TYPE_PATH_MIG	   = 0x01,
@@ -184,6 +192,8 @@ struct mlx4_caps {
 	u32			max_msg_sz;
 	u32			page_size_cap;
 	u32			flags;
+	u32			bmme_flags;
+	u32			reserved_lkey;
 	u16			stat_rate_support;
 	u8			port_width_cap[MLX4_MAX_PORTS + 1];
 	int			max_gso_sz;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index f02e9ed36cfa..e27082cd650e 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -233,6 +233,14 @@ struct mlx4_wqe_bind_seg {
 	__be64			length;
 };
 
+enum {
+	MLX4_WQE_FMR_PERM_LOCAL_READ	= 1 << 27,
+	MLX4_WQE_FMR_PERM_LOCAL_WRITE	= 1 << 28,
+	MLX4_WQE_FMR_PERM_REMOTE_READ	= 1 << 29,
+	MLX4_WQE_FMR_PERM_REMOTE_WRITE	= 1 << 30,
+	MLX4_WQE_FMR_PERM_ATOMIC	= 1 << 31
+};
+
 struct mlx4_wqe_fmr_seg {
 	__be32			flags;
 	__be32			mem_key;
@@ -255,11 +263,11 @@ struct mlx4_wqe_fmr_ext_seg {
 };
 
 struct mlx4_wqe_local_inval_seg {
-	u8			flags;
-	u8			reserved1[3];
+	__be32			flags;
+	u32			reserved1;
 	__be32			mem_key;
-	u8			reserved2[3];
-	u8			guest_id;
+	u32			reserved2[2];
+	__be32			guest_id;
 	__be64			pa;
 };
 
-- 
cgit v1.2.3


From a5bf6190417cbbf80443a9f71c65b653e13e9982 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 10 Jul 2008 18:38:33 +0300
Subject: UBI: add ubi_sync() interface

To flush MTD device caches.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/kapi.c  | 24 ++++++++++++++++++++++++
 include/linux/mtd/ubi.h |  1 +
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 51508832566d..e65c8e0bcd5d 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -632,3 +632,27 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
 	return vol->eba_tbl[lnum] >= 0;
 }
 EXPORT_SYMBOL_GPL(ubi_is_mapped);
+
+/**
+ * ubi_sync - synchronize UBI device buffers.
+ * @ubi_num: UBI device to synchronize
+ *
+ * The underlying MTD device may cache data in hardware or in software. This
+ * function ensures the caches are flushed. Returns zero in case of success and
+ * a negative error code in case of failure.
+ */
+int ubi_sync(int ubi_num)
+{
+	struct ubi_device *ubi;
+
+	ubi = ubi_get_device(ubi_num);
+	if (!ubi)
+		return -ENODEV;
+
+	if (ubi->mtd->sync)
+		ubi->mtd->sync(ubi->mtd);
+
+	ubi_put_device(ubi);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ubi_sync);
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index f71201d0f3e7..83302bbbddb4 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -152,6 +152,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum);
 int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum);
 int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
 int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum);
+int ubi_sync(int ubi_num);
 
 /*
  * This function is the same as the 'ubi_leb_read()' function, but it does not
-- 
cgit v1.2.3


From 85c6e6e28259e9b58b8984db536c45bc3161f40c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 16 Jul 2008 10:25:56 +0300
Subject: UBI: amend commentaries

Hch asked not to use "unit" for sub-systems, let it be so.
Also some other commentaries modifications.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c     |  2 +-
 drivers/mtd/ubi/debug.h     |  6 +--
 drivers/mtd/ubi/eba.c       | 22 +++++------
 drivers/mtd/ubi/io.c        | 22 +++++------
 drivers/mtd/ubi/scan.c      | 28 +++++++-------
 drivers/mtd/ubi/scan.h      | 19 +++++----
 drivers/mtd/ubi/ubi-media.h | 23 +++++------
 drivers/mtd/ubi/ubi.h       | 37 +++++++++---------
 drivers/mtd/ubi/wl.c        | 94 ++++++++++++++++++++++-----------------------
 include/linux/mtd/ubi.h     |  4 +-
 10 files changed, 129 insertions(+), 128 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index a5b19944eca8..27271fe32e02 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -524,7 +524,7 @@ out_si:
 }
 
 /**
- * io_init - initialize I/O unit for a given UBI device.
+ * io_init - initialize I/O sub-system for a given UBI device.
  * @ubi: UBI device description object
  *
  * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 8ea99d8c9e1f..7d8d77c31dfe 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -76,21 +76,21 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
 #endif /* CONFIG_MTD_UBI_DEBUG_MSG */
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
-/* Messages from the eraseblock association unit */
+/* Messages from the eraseblock association sub-system */
 #define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
 #define dbg_eba(fmt, ...) ({})
 #endif
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
-/* Messages from the wear-leveling unit */
+/* Messages from the wear-leveling sub-system */
 #define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
 #define dbg_wl(fmt, ...) ({})
 #endif
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
-/* Messages from the input/output unit */
+/* Messages from the input/output sub-system */
 #define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
 #define dbg_io(fmt, ...) ({})
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 8dc488fc0cdf..613cd1e51648 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -19,20 +19,20 @@
  */
 
 /*
- * The UBI Eraseblock Association (EBA) unit.
+ * The UBI Eraseblock Association (EBA) sub-system.
  *
- * This unit is responsible for I/O to/from logical eraseblock.
+ * This sub-system is responsible for I/O to/from logical eraseblock.
  *
  * Although in this implementation the EBA table is fully kept and managed in
  * RAM, which assumes poor scalability, it might be (partially) maintained on
  * flash in future implementations.
  *
- * The EBA unit implements per-logical eraseblock locking. Before accessing a
- * logical eraseblock it is locked for reading or writing. The per-logical
- * eraseblock locking is implemented by means of the lock tree. The lock tree
- * is an RB-tree which refers all the currently locked logical eraseblocks. The
- * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by
- * (@vol_id, @lnum) pairs.
+ * The EBA sub-system implements per-logical eraseblock locking. Before
+ * accessing a logical eraseblock it is locked for reading or writing. The
+ * per-logical eraseblock locking is implemented by means of the lock tree. The
+ * lock tree is an RB-tree which refers all the currently locked logical
+ * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects.
+ * They are indexed by (@vol_id, @lnum) pairs.
  *
  * EBA also maintains the global sequence counter which is incremented each
  * time a logical eraseblock is mapped to a physical eraseblock and it is
@@ -1128,7 +1128,7 @@ out_unlock_leb:
 }
 
 /**
- * ubi_eba_init_scan - initialize the EBA unit using scanning information.
+ * ubi_eba_init_scan - initialize the EBA sub-system using scanning information.
  * @ubi: UBI device description object
  * @si: scanning information
  *
@@ -1143,7 +1143,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	struct ubi_scan_leb *seb;
 	struct rb_node *rb;
 
-	dbg_eba("initialize EBA unit");
+	dbg_eba("initialize EBA sub-system");
 
 	spin_lock_init(&ubi->ltree_lock);
 	mutex_init(&ubi->alc_mutex);
@@ -1209,7 +1209,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 		ubi->rsvd_pebs  += ubi->beb_rsvd_pebs;
 	}
 
-	dbg_eba("EBA unit is initialized");
+	dbg_eba("EBA sub-system is initialized");
 	return 0;
 
 out_free:
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 4ac11df7b048..561e7b2f96cb 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -20,15 +20,15 @@
  */
 
 /*
- * UBI input/output unit.
+ * UBI input/output sub-system.
  *
- * This unit provides a uniform way to work with all kinds of the underlying
- * MTD devices. It also implements handy functions for reading and writing UBI
- * headers.
+ * This sub-system provides a uniform way to work with all kinds of the
+ * underlying MTD devices. It also implements handy functions for reading and
+ * writing UBI headers.
  *
  * We are trying to have a paranoid mindset and not to trust to what we read
- * from the flash media in order to be more secure and robust. So this unit
- * validates every single header it reads from the flash media.
+ * from the flash media in order to be more secure and robust. So this
+ * sub-system validates every single header it reads from the flash media.
  *
  * Some words about how the eraseblock headers are stored.
  *
@@ -79,11 +79,11 @@
  * 512-byte chunks, we have to allocate one more buffer and copy our VID header
  * to offset 448 of this buffer.
  *
- * The I/O unit does the following trick in order to avoid this extra copy.
- * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header
- * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the
- * VID header is being written out, it shifts the VID header pointer back and
- * writes the whole sub-page.
+ * The I/O sub-system does the following trick in order to avoid this extra
+ * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID
+ * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer.
+ * When the VID header is being written out, it shifts the VID header pointer
+ * back and writes the whole sub-page.
  */
 
 #include <linux/crc32.h>
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 96d410e106ab..892c2ba49777 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -19,9 +19,9 @@
  */
 
 /*
- * UBI scanning unit.
+ * UBI scanning sub-system.
  *
- * This unit is responsible for scanning the flash media, checking UBI
+ * This sub-system is responsible for scanning the flash media, checking UBI
  * headers and providing complete information about the UBI flash image.
  *
  * The scanning information is represented by a &struct ubi_scan_info' object.
@@ -103,7 +103,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
  * non-zero if an inconsistency was found and zero if not.
  *
  * Note, UBI does sanity check of everything it reads from the flash media.
- * Most of the checks are done in the I/O unit. Here we check that the
+ * Most of the checks are done in the I/O sub-system. Here we check that the
  * information in the VID header is consistent to the information in other VID
  * headers of the same volume.
  */
@@ -256,8 +256,8 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
 		 * that versions that are close to %0xFFFFFFFF are less then
 		 * versions that are close to %0.
 		 *
-		 * The UBI WL unit guarantees that the number of pending tasks
-		 * is not greater then %0x7FFFFFFF. So, if the difference
+		 * The UBI WL sub-system guarantees that the number of pending
+		 * tasks is not greater then %0x7FFFFFFF. So, if the difference
 		 * between any two versions is greater or equivalent to
 		 * %0x7FFFFFFF, there was an overflow and the logical
 		 * eraseblock with lower version is actually newer then the one
@@ -645,9 +645,9 @@ void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv)
  *
  * This function erases physical eraseblock 'pnum', and writes the erase
  * counter header to it. This function should only be used on UBI device
- * initialization stages, when the EBA unit had not been yet initialized. This
- * function returns zero in case of success and a negative error code in case
- * of failure.
+ * initialization stages, when the EBA sub-system had not been yet initialized.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
  */
 int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si,
 		       int pnum, int ec)
@@ -687,9 +687,10 @@ out_free:
  * @si: scanning information
  *
  * This function returns a free physical eraseblock. It is supposed to be
- * called on the UBI initialization stages when the wear-leveling unit is not
- * initialized yet. This function picks a physical eraseblocks from one of the
- * lists, writes the EC header if it is needed, and removes it from the list.
+ * called on the UBI initialization stages when the wear-leveling sub-system is
+ * not initialized yet. This function picks a physical eraseblocks from one of
+ * the lists, writes the EC header if it is needed, and removes it from the
+ * list.
  *
  * This function returns scanning physical eraseblock information in case of
  * success and an error code in case of failure.
@@ -764,8 +765,9 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
 		return err;
 	else if (err) {
 		/*
-		 * FIXME: this is actually duty of the I/O unit to initialize
-		 * this, but MTD does not provide enough information.
+		 * FIXME: this is actually duty of the I/O sub-system to
+		 * initialize this, but MTD does not provide enough
+		 * information.
 		 */
 		si->bad_peb_count += 1;
 		return 0;
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
index 966b9b682a42..4e2e3cc0becd 100644
--- a/drivers/mtd/ubi/scan.h
+++ b/drivers/mtd/ubi/scan.h
@@ -59,16 +59,16 @@ struct ubi_scan_leb {
  * @leb_count: number of logical eraseblocks in this volume
  * @vol_type: volume type
  * @used_ebs: number of used logical eraseblocks in this volume (only for
- * static volumes)
+ *            static volumes)
  * @last_data_size: amount of data in the last logical eraseblock of this
- * volume (always equivalent to the usable logical eraseblock size in case of
- * dynamic volumes)
+ *                  volume (always equivalent to the usable logical eraseblock
+ *                  size in case of dynamic volumes)
  * @data_pad: how many bytes at the end of logical eraseblocks of this volume
- * are not used (due to volume alignment)
+ *            are not used (due to volume alignment)
  * @compat: compatibility flags of this volume
  * @rb: link in the volume RB-tree
  * @root: root of the RB-tree containing all the eraseblock belonging to this
- * volume (&struct ubi_scan_leb objects)
+ *        volume (&struct ubi_scan_leb objects)
  *
  * One object of this type is allocated for each volume during scanning.
  */
@@ -92,8 +92,8 @@ struct ubi_scan_volume {
  * @free: list of free physical eraseblocks
  * @erase: list of physical eraseblocks which have to be erased
  * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ *         those belonging to "preserve"-compatible internal volumes)
  * @bad_peb_count: count of bad physical eraseblocks
- * those belonging to "preserve"-compatible internal volumes)
  * @vols_found: number of volumes found during scanning
  * @highest_vol_id: highest volume ID
  * @alien_peb_count: count of physical eraseblocks in the @alien list
@@ -106,8 +106,8 @@ struct ubi_scan_volume {
  * @ec_count: a temporary variable used when calculating @mean_ec
  *
  * This data structure contains the result of scanning and may be used by other
- * UBI units to build final UBI data structures, further error-recovery and so
- * on.
+ * UBI sub-systems to build final UBI data structures, further error-recovery
+ * and so on.
  */
 struct ubi_scan_info {
 	struct rb_root volumes;
@@ -132,8 +132,7 @@ struct ubi_device;
 struct ubi_vid_hdr;
 
 /*
- * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a
- * list.
+ * ubi_scan_move_to_list - move a PEB from the volume tree to a list.
  *
  * @sv: volume scanning information
  * @seb: scanning eraseblock infprmation
diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h
index c3185d9fd048..26bb7af9787a 100644
--- a/drivers/mtd/ubi/ubi-media.h
+++ b/drivers/mtd/ubi/ubi-media.h
@@ -98,10 +98,11 @@ enum {
  * Compatibility constants used by internal volumes.
  *
  * @UBI_COMPAT_DELETE: delete this internal volume before anything is written
- * to the flash
+ *                     to the flash
  * @UBI_COMPAT_RO: attach this device in read-only mode
  * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its
- * physical eraseblocks, don't allow the wear-leveling unit to move them
+ *                       physical eraseblocks, don't allow the wear-leveling
+ *                       sub-system to move them
  * @UBI_COMPAT_REJECT: reject this UBI image
  */
 enum {
@@ -123,7 +124,7 @@ enum {
  * struct ubi_ec_hdr - UBI erase counter header.
  * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC)
  * @version: version of UBI implementation which is supposed to accept this
- * UBI image
+ *           UBI image
  * @padding1: reserved for future, zeroes
  * @ec: the erase counter
  * @vid_hdr_offset: where the VID header starts
@@ -159,20 +160,20 @@ struct ubi_ec_hdr {
  * struct ubi_vid_hdr - on-flash UBI volume identifier header.
  * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC)
  * @version: UBI implementation version which is supposed to accept this UBI
- * image (%UBI_VERSION)
+ *           image (%UBI_VERSION)
  * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC)
  * @copy_flag: if this logical eraseblock was copied from another physical
- * eraseblock (for wear-leveling reasons)
+ *             eraseblock (for wear-leveling reasons)
  * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE,
- * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
+ *          %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
  * @vol_id: ID of this volume
  * @lnum: logical eraseblock number
  * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be
- * removed, kept only for not breaking older UBI users)
+ *           removed, kept only for not breaking older UBI users)
  * @data_size: how many bytes of data this logical eraseblock contains
  * @used_ebs: total number of used logical eraseblocks in this volume
  * @data_pad: how many bytes at the end of this physical eraseblock are not
- * used
+ *            used
  * @data_crc: CRC checksum of the data stored in this logical eraseblock
  * @padding1: reserved for future, zeroes
  * @sqnum: sequence number
@@ -248,9 +249,9 @@ struct ubi_ec_hdr {
  * The @data_crc field contains the CRC checksum of the contents of the logical
  * eraseblock if this is a static volume. In case of dynamic volumes, it does
  * not contain the CRC checksum as a rule. The only exception is when the
- * data of the physical eraseblock was moved by the wear-leveling unit, then
- * the wear-leveling unit calculates the data CRC and stores it in the
- * @data_crc field. And of course, the @copy_flag is %in this case.
+ * data of the physical eraseblock was moved by the wear-leveling sub-system,
+ * then the wear-leveling sub-system calculates the data CRC and stores it in
+ * the @data_crc field. And of course, the @copy_flag is %in this case.
  *
  * The @data_size field is used only for static volumes because UBI has to know
  * how many bytes of data are stored in this eraseblock. For dynamic volumes,
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 940f6b7deec3..1fc32c863b78 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -74,15 +74,15 @@
 #define UBI_IO_RETRIES 3
 
 /*
- * Error codes returned by the I/O unit.
+ * Error codes returned by the I/O sub-system.
  *
  * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
- * 0xFF bytes
+ *                   %0xFF bytes
  * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
- * valid erase counter header, and the rest are %0xFF bytes
+ *                  valid erase counter header, and the rest are %0xFF bytes
  * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
  * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
- * CRC)
+ *                     CRC)
  * UBI_IO_BITFLIPS: bit-flips were detected and corrected
  */
 enum {
@@ -99,9 +99,9 @@ enum {
  * @ec: erase counter
  * @pnum: physical eraseblock number
  *
- * This data structure is used in the WL unit. Each physical eraseblock has a
- * corresponding &struct wl_entry object which may be kept in different
- * RB-trees. See WL unit for details.
+ * This data structure is used in the WL sub-system. Each physical eraseblock
+ * has a corresponding &struct wl_entry object which may be kept in different
+ * RB-trees. See WL sub-system for details.
  */
 struct ubi_wl_entry {
 	struct rb_node rb;
@@ -118,10 +118,10 @@ struct ubi_wl_entry {
  * @mutex: read/write mutex to implement read/write access serialization to
  *         the (@vol_id, @lnum) logical eraseblock
  *
- * This data structure is used in the EBA unit to implement per-LEB locking.
- * When a logical eraseblock is being locked - corresponding
+ * This data structure is used in the EBA sub-system to implement per-LEB
+ * locking. When a logical eraseblock is being locked - corresponding
  * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree).
- * See EBA unit for details.
+ * See EBA sub-system for details.
  */
 struct ubi_ltree_entry {
 	struct rb_node rb;
@@ -225,7 +225,7 @@ struct ubi_volume {
 #ifdef CONFIG_MTD_UBI_GLUEBI
 	/*
 	 * Gluebi-related stuff may be compiled out.
-	 * TODO: this should not be built into UBI but should be a separate
+	 * Note: this should not be built into UBI but should be a separate
 	 * ubimtd driver which works on top of UBI and emulates MTD devices.
 	 */
 	struct ubi_volume_desc *gluebi_desc;
@@ -235,8 +235,7 @@ struct ubi_volume {
 };
 
 /**
- * struct ubi_volume_desc - descriptor of the UBI volume returned when it is
- * opened.
+ * struct ubi_volume_desc - UBI volume descriptor returned when it is opened.
  * @vol: reference to the corresponding volume description object
  * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
  */
@@ -316,11 +315,11 @@ struct ubi_wl_entry;
  * @ro_mode: if the UBI device is in read-only mode
  * @leb_size: logical eraseblock size
  * @leb_start: starting offset of logical eraseblocks within physical
- * eraseblocks
+ *             eraseblocks
  * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
  * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
  * @vid_hdr_offset: starting offset of the volume identifier header (might be
- * unaligned)
+ *                  unaligned)
  * @vid_hdr_aloffset: starting offset of the VID header aligned to
  * @hdrs_min_io_size
  * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
@@ -356,16 +355,16 @@ struct ubi_device {
 	struct mutex volumes_mutex;
 
 	int max_ec;
-	/* TODO: mean_ec is not updated run-time, fix */
+	/* Note, mean_ec is not updated run-time - should be fixed */
 	int mean_ec;
 
-	/* EBA unit's stuff */
+	/* EBA sub-system's stuff */
 	unsigned long long global_sqnum;
 	spinlock_t ltree_lock;
 	struct rb_root ltree;
 	struct mutex alc_mutex;
 
-	/* Wear-leveling unit's stuff */
+	/* Wear-leveling sub-system's stuff */
 	struct rb_root used;
 	struct rb_root free;
 	struct rb_root scrub;
@@ -388,7 +387,7 @@ struct ubi_device {
 	int thread_enabled;
 	char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
 
-	/* I/O unit's stuff */
+	/* I/O sub-system's stuff */
 	long long flash_size;
 	int peb_count;
 	int peb_size;
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index cc8fe2934d2b..761952ba125b 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -19,22 +19,22 @@
  */
 
 /*
- * UBI wear-leveling unit.
+ * UBI wear-leveling sub-system.
  *
- * This unit is responsible for wear-leveling. It works in terms of physical
- * eraseblocks and erase counters and knows nothing about logical eraseblocks,
- * volumes, etc. From this unit's perspective all physical eraseblocks are of
- * two types - used and free. Used physical eraseblocks are those that were
- * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are
- * those that were put by the 'ubi_wl_put_peb()' function.
+ * This sub-system is responsible for wear-leveling. It works in terms of
+ * physical* eraseblocks and erase counters and knows nothing about logical
+ * eraseblocks, volumes, etc. From this sub-system's perspective all physical
+ * eraseblocks are of two types - used and free. Used physical eraseblocks are
+ * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
+ * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
  *
  * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
- * header. The rest of the physical eraseblock contains only 0xFF bytes.
+ * header. The rest of the physical eraseblock contains only %0xFF bytes.
  *
- * When physical eraseblocks are returned to the WL unit by means of the
+ * When physical eraseblocks are returned to the WL sub-system by means of the
  * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
  * done asynchronously in context of the per-UBI device background thread,
- * which is also managed by the WL unit.
+ * which is also managed by the WL sub-system.
  *
  * The wear-leveling is ensured by means of moving the contents of used
  * physical eraseblocks with low erase counter to free physical eraseblocks
@@ -43,34 +43,36 @@
  * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
  * an "optimal" physical eraseblock. For example, when it is known that the
  * physical eraseblock will be "put" soon because it contains short-term data,
- * the WL unit may pick a free physical eraseblock with low erase counter, and
- * so forth.
+ * the WL sub-system may pick a free physical eraseblock with low erase
+ * counter, and so forth.
  *
- * If the WL unit fails to erase a physical eraseblock, it marks it as bad.
+ * If the WL sub-system fails to erase a physical eraseblock, it marks it as
+ * bad.
  *
- * This unit is also responsible for scrubbing. If a bit-flip is detected in a
- * physical eraseblock, it has to be moved. Technically this is the same as
- * moving it for wear-leveling reasons.
+ * This sub-system is also responsible for scrubbing. If a bit-flip is detected
+ * in a physical eraseblock, it has to be moved. Technically this is the same
+ * as moving it for wear-leveling reasons.
  *
- * As it was said, for the UBI unit all physical eraseblocks are either "free"
- * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
- * eraseblocks are kept in a set of different RB-trees: @wl->used,
+ * As it was said, for the UBI sub-system all physical eraseblocks are either
+ * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
+ * used eraseblocks are kept in a set of different RB-trees: @wl->used,
  * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
  *
  * Note, in this implementation, we keep a small in-RAM object for each physical
  * eraseblock. This is surely not a scalable solution. But it appears to be good
  * enough for moderately large flashes and it is simple. In future, one may
- * re-work this unit and make it more scalable.
+ * re-work this sub-system and make it more scalable.
  *
- * At the moment this unit does not utilize the sequence number, which was
- * introduced relatively recently. But it would be wise to do this because the
- * sequence number of a logical eraseblock characterizes how old is it. For
+ * At the moment this sub-system does not utilize the sequence number, which
+ * was introduced relatively recently. But it would be wise to do this because
+ * the sequence number of a logical eraseblock characterizes how old is it. For
  * example, when we move a PEB with low erase counter, and we need to pick the
  * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
  * pick target PEB with an average EC if our PEB is not very "old". This is a
- * room for future re-works of the WL unit.
+ * room for future re-works of the WL sub-system.
  *
- * FIXME: looks too complex, should be simplified (later).
+ * Note: the stuff with protection trees looks too complex and is difficult to
+ * understand. Should be fixed.
  */
 
 #include <linux/slab.h>
@@ -92,20 +94,21 @@
 
 /*
  * Maximum difference between two erase counters. If this threshold is
- * exceeded, the WL unit starts moving data from used physical eraseblocks with
- * low erase counter to free physical eraseblocks with high erase counter.
+ * exceeded, the WL sub-system starts moving data from used physical
+ * eraseblocks with low erase counter to free physical eraseblocks with high
+ * erase counter.
  */
 #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
 
 /*
- * When a physical eraseblock is moved, the WL unit has to pick the target
+ * When a physical eraseblock is moved, the WL sub-system has to pick the target
  * physical eraseblock to move to. The simplest way would be just to pick the
  * one with the highest erase counter. But in certain workloads this could lead
  * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
  * situation when the picked physical eraseblock is constantly erased after the
  * data is written to it. So, we have a constant which limits the highest erase
- * counter of the free physical eraseblock to pick. Namely, the WL unit does
- * not pick eraseblocks with erase counter greater then the lowest erase
+ * counter of the free physical eraseblock to pick. Namely, the WL sub-system
+ * does not pick eraseblocks with erase counter greater then the lowest erase
  * counter plus %WL_FREE_MAX_DIFF.
  */
 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
@@ -123,11 +126,11 @@
  * @abs_ec: the absolute erase counter value when the protection ends
  * @e: the wear-leveling entry of the physical eraseblock under protection
  *
- * When the WL unit returns a physical eraseblock, the physical eraseblock is
- * protected from being moved for some "time". For this reason, the physical
- * eraseblock is not directly moved from the @wl->free tree to the @wl->used
- * tree. There is one more tree in between where this physical eraseblock is
- * temporarily stored (@wl->prot).
+ * When the WL sub-system returns a physical eraseblock, the physical
+ * eraseblock is protected from being moved for some "time". For this reason,
+ * the physical eraseblock is not directly moved from the @wl->free tree to the
+ * @wl->used tree. There is one more tree in between where this physical
+ * eraseblock is temporarily stored (@wl->prot).
  *
  * All this protection stuff is needed because:
  *  o we don't want to move physical eraseblocks just after we have given them
@@ -175,7 +178,6 @@ struct ubi_wl_prot_entry {
  * @list: a link in the list of pending works
  * @func: worker function
  * @priv: private data of the worker function
- *
  * @e: physical eraseblock to erase
  * @torture: if the physical eraseblock has to be tortured
  *
@@ -1136,7 +1138,7 @@ out_ro:
 }
 
 /**
- * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
+ * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
  * @ubi: UBI device description object
  * @pnum: physical eraseblock to return
  * @torture: if this physical eraseblock has to be tortured
@@ -1175,11 +1177,11 @@ retry:
 		/*
 		 * User is putting the physical eraseblock which was selected
 		 * as the target the data is moved to. It may happen if the EBA
-		 * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but
-		 * the WL unit has not put the PEB to the "used" tree yet, but
-		 * it is about to do this. So we just set a flag which will
-		 * tell the WL worker that the PEB is not needed anymore and
-		 * should be scheduled for erasure.
+		 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
+		 * but the WL sub-system has not put the PEB to the "used" tree
+		 * yet, but it is about to do this. So we just set a flag which
+		 * will tell the WL worker that the PEB is not needed anymore
+		 * and should be scheduled for erasure.
 		 */
 		dbg_wl("PEB %d is the target of data moving", pnum);
 		ubi_assert(!ubi->move_to_put);
@@ -1425,8 +1427,7 @@ static void cancel_pending(struct ubi_device *ubi)
 }
 
 /**
- * ubi_wl_init_scan - initialize the wear-leveling unit using scanning
- * information.
+ * ubi_wl_init_scan - initialize the WL sub-system using scanning information.
  * @ubi: UBI device description object
  * @si: scanning information
  *
@@ -1583,13 +1584,12 @@ static void protection_trees_destroy(struct ubi_device *ubi)
 }
 
 /**
- * ubi_wl_close - close the wear-leveling unit.
+ * ubi_wl_close - close the wear-leveling sub-system.
  * @ubi: UBI device description object
  */
 void ubi_wl_close(struct ubi_device *ubi)
 {
-	dbg_wl("close the UBI wear-leveling unit");
-
+	dbg_wl("close the WL sub-system");
 	cancel_pending(ubi);
 	protection_trees_destroy(ubi);
 	tree_destroy(&ubi->used);
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index 83302bbbddb4..6316fafe5c2a 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -45,13 +45,13 @@ enum {
  * @size: how many physical eraseblocks are reserved for this volume
  * @used_bytes: how many bytes of data this volume contains
  * @used_ebs: how many physical eraseblocks of this volume actually contain any
- * data
+ *            data
  * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
  * @corrupted: non-zero if the volume is corrupted (static volumes only)
  * @upd_marker: non-zero if the volume has update marker set
  * @alignment: volume alignment
  * @usable_leb_size: how many bytes are available in logical eraseblocks of
- * this volume
+ *                   this volume
  * @name_len: volume name length
  * @name: volume name
  * @cdev: UBI volume character device major and minor numbers
-- 
cgit v1.2.3


From b552068999b0b05087c454e525b30b785c79dc9b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Wed, 23 Apr 2008 10:07:27 -0400
Subject: Remove __DECLARE_SEMAPHORE_GENERIC

There are no users of __DECLARE_SEMAPHORE_GENERIC in the kernel

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
 include/linux/semaphore.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 9cae64b00d6b..7415839ac890 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -26,10 +26,8 @@ struct semaphore {
 	.wait_list	= LIST_HEAD_INIT((name).wait_list),		\
 }
 
-#define __DECLARE_SEMAPHORE_GENERIC(name, count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name, count)
-
-#define DECLARE_MUTEX(name)	__DECLARE_SEMAPHORE_GENERIC(name, 1)
+#define DECLARE_MUTEX(name)	\
+	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
 
 static inline void sema_init(struct semaphore *sem, int val)
 {
-- 
cgit v1.2.3


From e108526e77aa41c89b3be96f75d97615db2b751c Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 23 Jul 2008 21:26:44 -0700
Subject: move memory_read_from_buffer() from fs.h to string.h

James Bottomley warns that inclusion of linux/fs.h in a low level
driver was always a danger signal.  This patch moves
memory_read_from_buffer() from fs.h to string.h and fixes includes in
existing memory_read_from_buffer() users.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Bob Moore <robert.moore@intel.com>
Cc: Thomas Renninger <trenn@suse.de>
Cc: Len Brown <lenb@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/acpi/system.c       | 1 +
 drivers/zorro/zorro-sysfs.c | 1 -
 include/linux/fs.h          | 2 --
 include/linux/string.h      | 3 +++
 4 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index d8e3f153b295..91dec448b3ed 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -26,6 +26,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
+#include <linux/string.h>
 #include <asm/uaccess.h>
 
 #include <acpi/acpi_drivers.h>
diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c
index 3da712cc7708..5290552d2ef7 100644
--- a/drivers/zorro/zorro-sysfs.c
+++ b/drivers/zorro/zorro-sysfs.c
@@ -15,7 +15,6 @@
 #include <linux/zorro.h>
 #include <linux/stat.h>
 #include <linux/string.h>
-#include <linux/fs.h>
 
 #include "zorro.h"
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9c2ac5c0ef5c..ff54ae4933f3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2006,8 +2006,6 @@ extern void simple_release_fs(struct vfsmount **mount, int *count);
 
 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
 			loff_t *ppos, const void *from, size_t available);
-extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
-			const void *from, size_t available);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
diff --git a/include/linux/string.h b/include/linux/string.h
index efdc44593b52..810d80df0a1d 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -111,5 +111,8 @@ extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
 
+extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
+			const void *from, size_t available);
+
 #endif
 #endif /* _LINUX_STRING_H_ */
-- 
cgit v1.2.3


From 8b05c7e6e159d2f33c9275281b8b909a89eb7c5d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 23 Jul 2008 21:26:53 -0700
Subject: add a helper function to test if an object is on the stack

lib/debugobjects.c has a function to test if an object is on the stack.
The block layer and ide needs it (they need to avoid DMA from/to stack
buffers).  This patch moves the function to include/linux/sched.h so that
everyone can use it.

lib/debugobjects.c uses current->stack but this patch uses a
task_stack_page() accessor, which is a preferable way to access the stack.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 7 +++++++
 lib/debugobjects.c    | 4 +---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc7e592c473a..6aca4a16e377 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1983,6 +1983,13 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
 
 #endif
 
+static inline int object_is_on_stack(void *obj)
+{
+	void *stack = task_stack_page(current);
+
+	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
+}
+
 extern void thread_info_cache_init(void);
 
 /* set thread flags in other task's structures
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 85b18d79be89..f86196390cfd 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -226,15 +226,13 @@ debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state),
 
 static void debug_object_is_on_stack(void *addr, int onstack)
 {
-	void *stack = current->stack;
 	int is_on_stack;
 	static int limit;
 
 	if (limit > 4)
 		return;
 
-	is_on_stack = (addr >= stack && addr < (stack + THREAD_SIZE));
-
+	is_on_stack = object_is_on_stack(addr);
 	if (is_on_stack == onstack)
 		return;
 
-- 
cgit v1.2.3


From b61bfa3c462671c48a51fb5c31af337c5a996a04 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:26:55 -0700
Subject: mm: move bootmem descriptors definition to a single place

There are a lot of places that define either a single bootmem descriptor or an
array of them.  Use only one central array with MAX_NUMNODES items instead.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Kyle McMartin <kyle@parisc-linux.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/mm/numa.c             |  8 ++++----
 arch/arm/mm/discontig.c          | 34 ++++++++++++++++------------------
 arch/ia64/mm/discontig.c         | 11 +++++------
 arch/m32r/mm/discontig.c         |  4 +---
 arch/m68k/mm/init.c              |  4 +---
 arch/mips/sgi-ip27/ip27-memory.c |  4 +---
 arch/parisc/mm/init.c            |  3 +--
 arch/powerpc/mm/numa.c           |  3 +--
 arch/sh/mm/numa.c                |  5 ++---
 arch/sparc64/mm/init.c           |  3 +--
 arch/x86/mm/discontig_32.c       |  3 +--
 arch/x86/mm/numa_64.c            |  4 +---
 include/linux/bootmem.h          |  2 ++
 mm/bootmem.c                     |  2 ++
 mm/page_alloc.c                  |  4 +---
 15 files changed, 40 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 10ab7833e83c..a53fda0481ca 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -19,7 +19,6 @@
 #include <asm/pgalloc.h>
 
 pg_data_t node_data[MAX_NUMNODES];
-bootmem_data_t node_bdata[MAX_NUMNODES];
 EXPORT_SYMBOL(node_data);
 
 #undef DEBUG_DISCONTIG
@@ -141,7 +140,7 @@ setup_memory_node(int nid, void *kernel_end)
 		printk(" not enough mem to reserve NODE_DATA");
 		return;
 	}
-	NODE_DATA(nid)->bdata = &node_bdata[nid];
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 
 	printk(" Detected node memory:   start %8lu, end %8lu\n",
 	       node_min_pfn, node_max_pfn);
@@ -304,8 +303,9 @@ void __init paging_init(void)
 	dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
 	for_each_online_node(nid) {
-		unsigned long start_pfn = node_bdata[nid].node_boot_start >> PAGE_SHIFT;
-		unsigned long end_pfn = node_bdata[nid].node_low_pfn;
+		bootmem_data_t *bdata = &bootmem_node_data[nid];
+		unsigned long start_pfn = bdata->node_boot_start >> PAGE_SHIFT;
+		unsigned long end_pfn = bdata->node_low_pfn;
 
 		if (dma_local_pfn >= end_pfn - start_pfn)
 			zones_size[ZONE_DMA] = end_pfn - start_pfn;
diff --git a/arch/arm/mm/discontig.c b/arch/arm/mm/discontig.c
index 1e5602189507..c8c0c4b0f0a3 100644
--- a/arch/arm/mm/discontig.c
+++ b/arch/arm/mm/discontig.c
@@ -21,26 +21,24 @@
  * Our node_data structure for discontiguous memory.
  */
 
-static bootmem_data_t node_bootmem_data[MAX_NUMNODES];
-
 pg_data_t discontig_node_data[MAX_NUMNODES] = {
-  { .bdata = &node_bootmem_data[0] },
-  { .bdata = &node_bootmem_data[1] },
-  { .bdata = &node_bootmem_data[2] },
-  { .bdata = &node_bootmem_data[3] },
+  { .bdata = &bootmem_node_data[0] },
+  { .bdata = &bootmem_node_data[1] },
+  { .bdata = &bootmem_node_data[2] },
+  { .bdata = &bootmem_node_data[3] },
 #if MAX_NUMNODES == 16
-  { .bdata = &node_bootmem_data[4] },
-  { .bdata = &node_bootmem_data[5] },
-  { .bdata = &node_bootmem_data[6] },
-  { .bdata = &node_bootmem_data[7] },
-  { .bdata = &node_bootmem_data[8] },
-  { .bdata = &node_bootmem_data[9] },
-  { .bdata = &node_bootmem_data[10] },
-  { .bdata = &node_bootmem_data[11] },
-  { .bdata = &node_bootmem_data[12] },
-  { .bdata = &node_bootmem_data[13] },
-  { .bdata = &node_bootmem_data[14] },
-  { .bdata = &node_bootmem_data[15] },
+  { .bdata = &bootmem_node_data[4] },
+  { .bdata = &bootmem_node_data[5] },
+  { .bdata = &bootmem_node_data[6] },
+  { .bdata = &bootmem_node_data[7] },
+  { .bdata = &bootmem_node_data[8] },
+  { .bdata = &bootmem_node_data[9] },
+  { .bdata = &bootmem_node_data[10] },
+  { .bdata = &bootmem_node_data[11] },
+  { .bdata = &bootmem_node_data[12] },
+  { .bdata = &bootmem_node_data[13] },
+  { .bdata = &bootmem_node_data[14] },
+  { .bdata = &bootmem_node_data[15] },
 #endif
 };
 
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 544dc420c65e..2fcf8464331e 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -36,7 +36,6 @@ struct early_node_data {
 	struct ia64_node_data *node_data;
 	unsigned long pernode_addr;
 	unsigned long pernode_size;
-	struct bootmem_data bootmem_data;
 	unsigned long num_physpages;
 #ifdef CONFIG_ZONE_DMA
 	unsigned long num_dma_physpages;
@@ -76,7 +75,7 @@ static int __init build_node_maps(unsigned long start, unsigned long len,
 				  int node)
 {
 	unsigned long cstart, epfn, end = start + len;
-	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
 	cstart = GRANULEROUNDDOWN(start);
@@ -167,7 +166,7 @@ static void __init fill_pernode(int node, unsigned long pernode,
 {
 	void *cpu_data;
 	int cpus = early_nr_cpus_node(node);
-	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	mem_data[node].pernode_addr = pernode;
 	mem_data[node].pernode_size = pernodesize;
@@ -224,7 +223,7 @@ static int __init find_pernode_space(unsigned long start, unsigned long len,
 {
 	unsigned long epfn;
 	unsigned long pernodesize = 0, pernode, pages, mapsize;
-	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	epfn = (start + len) >> PAGE_SHIFT;
 
@@ -440,7 +439,7 @@ void __init find_memory(void)
 	efi_memmap_walk(find_max_min_low_pfn, NULL);
 
 	for_each_online_node(node)
-		if (mem_data[node].bootmem_data.node_low_pfn) {
+		if (bootmem_node_data[node].node_low_pfn) {
 			node_clear(node, memory_less_mask);
 			mem_data[node].min_pfn = ~0UL;
 		}
@@ -460,7 +459,7 @@ void __init find_memory(void)
 		else if (node_isset(node, memory_less_mask))
 			continue;
 
-		bdp = &mem_data[node].bootmem_data;
+		bdp = &bootmem_node_data[node];
 		pernode = mem_data[node].pernode_addr;
 		pernodesize = mem_data[node].pernode_size;
 		map = pernode + pernodesize;
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index 07c1af7dc0e2..aa9145ef6cca 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -20,7 +20,6 @@ extern char _end[];
 
 struct pglist_data *node_data[MAX_NUMNODES];
 EXPORT_SYMBOL(node_data);
-static bootmem_data_t node_bdata[MAX_NUMNODES] __initdata;
 
 pg_data_t m32r_node_data[MAX_NUMNODES];
 
@@ -81,7 +80,7 @@ unsigned long __init setup_memory(void)
 	for_each_online_node(nid) {
 		mp = &mem_prof[nid];
 		NODE_DATA(nid)=(pg_data_t *)&m32r_node_data[nid];
-		NODE_DATA(nid)->bdata = &node_bdata[nid];
+		NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 		min_pfn = mp->start_pfn;
 		max_pfn = mp->start_pfn + mp->pages;
 		bootmap_size = init_bootmem_node(NODE_DATA(nid), mp->free_pfn,
@@ -163,4 +162,3 @@ unsigned long __init zone_sizes_init(void)
 
 	return holes;
 }
-
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index d8fb9c5303cc..79f5f94d4800 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -32,8 +32,6 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
-static bootmem_data_t __initdata bootmem_data[MAX_NUMNODES];
-
 pg_data_t pg_data_map[MAX_NUMNODES];
 EXPORT_SYMBOL(pg_data_map);
 
@@ -58,7 +56,7 @@ void __init m68k_setup_node(int node)
 		pg_data_table[i] = pg_data_map + node;
 	}
 #endif
-	pg_data_map[node].bdata = bootmem_data + node;
+	pg_data_map[node].bdata = bootmem_node_data + node;
 	node_set_online(node);
 }
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 42cd10956306..060d853d7b35 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -33,8 +33,6 @@
 #define SLOT_PFNSHIFT           (SLOT_SHIFT - PAGE_SHIFT)
 #define PFN_NASIDSHFT           (NASID_SHFT - PAGE_SHIFT)
 
-static struct bootmem_data __initdata plat_node_bdata[MAX_COMPACT_NODES];
-
 struct node_data *__node_data[MAX_COMPACT_NODES];
 
 EXPORT_SYMBOL(__node_data);
@@ -403,7 +401,7 @@ static void __init node_mem_init(cnodeid_t node)
 	 */
 	__node_data[node] = __va(slot_freepfn << PAGE_SHIFT);
 
-	NODE_DATA(node)->bdata = &plat_node_bdata[node];
+	NODE_DATA(node)->bdata = &bootmem_node_data[node];
 	NODE_DATA(node)->node_start_pfn = start_pfn;
 	NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
 
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index b4d6c8777ed0..0ddf4904640a 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -36,7 +36,6 @@ extern int  data_start;
 
 #ifdef CONFIG_DISCONTIGMEM
 struct node_map_data node_data[MAX_NUMNODES] __read_mostly;
-bootmem_data_t bmem_data[MAX_NUMNODES] __read_mostly;
 unsigned char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
 #endif
 
@@ -262,7 +261,7 @@ static void __init setup_bootmem(void)
 #ifdef CONFIG_DISCONTIGMEM
 	for (i = 0; i < MAX_PHYSMEM_RANGES; i++) {
 		memset(NODE_DATA(i), 0, sizeof(pg_data_t));
-		NODE_DATA(i)->bdata = &bmem_data[i];
+		NODE_DATA(i)->bdata = &bootmem_node_data[i];
 	}
 	memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index cf4bffba6f7c..d9a181351332 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -39,7 +39,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table);
 EXPORT_SYMBOL(numa_cpumask_lookup_table);
 EXPORT_SYMBOL(node_data);
 
-static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
 static int min_common_depth;
 static int n_mem_addr_cells, n_mem_size_cells;
 
@@ -816,7 +815,7 @@ void __init do_init_bootmem(void)
   		dbg("node %d\n", nid);
 		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
 
-		NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
+		NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 		NODE_DATA(nid)->node_start_pfn = start_pfn;
 		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 1663199ce888..095d93bec7cd 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -14,7 +14,6 @@
 #include <linux/pfn.h>
 #include <asm/sections.h>
 
-static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL_GPL(node_data);
 
@@ -35,7 +34,7 @@ void __init setup_memory(void)
 	NODE_DATA(0) = pfn_to_kaddr(free_pfn);
 	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
 	free_pfn += PFN_UP(sizeof(struct pglist_data));
-	NODE_DATA(0)->bdata = &plat_node_bdata[0];
+	NODE_DATA(0)->bdata = &bootmem_node_data[0];
 
 	/* Set up node 0 */
 	setup_bootmem_allocator(free_pfn);
@@ -66,7 +65,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 	free_pfn += PFN_UP(sizeof(struct pglist_data));
 	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
-	NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 	NODE_DATA(nid)->node_start_pfn = start_pfn;
 	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 84898c44dd4d..713297473951 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -788,7 +788,6 @@ int numa_cpu_lookup_table[NR_CPUS];
 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct mdesc_mblock {
 	u64	base;
@@ -871,7 +870,7 @@ static void __init allocate_node_data(int nid)
 	NODE_DATA(nid) = __va(paddr);
 	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
-	NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 #endif
 
 	p = NODE_DATA(nid);
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 5dfef9fa061a..62fa440678d8 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -42,7 +42,6 @@
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
-static bootmem_data_t node0_bdata;
 
 /*
  * numa interface - we expect the numa architecture specific code to have
@@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn,
 	for_each_online_node(nid)
 		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
-	NODE_DATA(0)->bdata = &node0_bdata;
+	NODE_DATA(0)->bdata = &bootmem_node_data[0];
 	setup_bootmem_allocator();
 }
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 9782f42dd319..a4dd793d6003 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -23,8 +23,6 @@
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 
-static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
-
 struct memnode memnode;
 
 s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
@@ -198,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 		nodedata_phys + pgdat_size - 1);
 
 	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
-	NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
+	NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
 	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
 	NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
 
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index a1d9b79078ea..2599c741405e 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -38,6 +38,8 @@ typedef struct bootmem_data {
 	struct list_head list;
 } bootmem_data_t;
 
+extern bootmem_data_t bootmem_node_data[];
+
 extern unsigned long bootmem_bootmap_pages(unsigned long);
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
 extern void free_bootmem(unsigned long addr, unsigned long size);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 9f4bbc5da73f..35b3cb667036 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -36,6 +36,8 @@ static LIST_HEAD(bdata_list);
 unsigned long saved_max_pfn;
 #endif
 
+bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
+
 /* return the number of _pages_ that will be allocated for the boot bitmap */
 unsigned long __init bootmem_bootmap_pages(unsigned long pages)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9ece07ce65b0..e089b92cdfff 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4040,9 +4040,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-static bootmem_data_t contig_bootmem_data;
-struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
-
+struct pglist_data contig_page_data = { .bdata = &bootmem_node_data[0] };
 EXPORT_SYMBOL(contig_page_data);
 #endif
 
-- 
cgit v1.2.3


From ffc6421f0720f433b5b35b89ff56e998eabff93b Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:26:59 -0700
Subject: mm: unexport __alloc_bootmem_core()

This function has no external callers, so unexport it.  Also fix its naming
inconsistency.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  5 -----
 mm/bootmem.c            | 24 ++++++++++++------------
 2 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 2599c741405e..dd8fee6c46d9 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -56,11 +56,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 				      unsigned long size,
 				      unsigned long align,
 				      unsigned long goal);
-extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal,
-				  unsigned long limit);
 
 /*
  * flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 251c66c5d96a..4bc6ae2fbaa3 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -234,9 +234,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
  *
  * NOTE:  This function is _not_ reentrant.
  */
-void * __init
-__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
-	      unsigned long align, unsigned long goal, unsigned long limit)
+static void * __init
+alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
+		unsigned long align, unsigned long goal, unsigned long limit)
 {
 	unsigned long areasize, preferred;
 	unsigned long i, start = 0, incr, eidx, end_pfn;
@@ -245,7 +245,7 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	void *node_bootmem_map;
 
 	if (!size) {
-		printk("__alloc_bootmem_core(): zero-sized request\n");
+		printk("alloc_bootmem_core(): zero-sized request\n");
 		BUG();
 	}
 	BUG_ON(align & (align-1));
@@ -512,7 +512,7 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 	void *ptr;
 
 	list_for_each_entry(bdata, &bdata_list, list) {
-		ptr = __alloc_bootmem_core(bdata, size, align, goal, 0);
+		ptr = alloc_bootmem_core(bdata, size, align, goal, 0);
 		if (ptr)
 			return ptr;
 	}
@@ -540,7 +540,7 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 {
 	void *ptr;
 
-	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
+	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
 	if (ptr)
 		return ptr;
 
@@ -559,8 +559,8 @@ void * __init alloc_bootmem_section(unsigned long size,
 	goal = PFN_PHYS(pfn);
 	limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
 	pgdat = NODE_DATA(early_pfn_to_nid(pfn));
-	ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
-				   limit);
+	ptr = alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
+				limit);
 
 	if (!ptr)
 		return NULL;
@@ -589,8 +589,8 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 	void *ptr;
 
 	list_for_each_entry(bdata, &bdata_list, list) {
-		ptr = __alloc_bootmem_core(bdata, size, align, goal,
-						ARCH_LOW_ADDRESS_LIMIT);
+		ptr = alloc_bootmem_core(bdata, size, align, goal,
+					ARCH_LOW_ADDRESS_LIMIT);
 		if (ptr)
 			return ptr;
 	}
@@ -606,6 +606,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 				       unsigned long align, unsigned long goal)
 {
-	return __alloc_bootmem_core(pgdat->bdata, size, align, goal,
-				    ARCH_LOW_ADDRESS_LIMIT);
+	return alloc_bootmem_core(pgdat->bdata, size, align, goal,
+				ARCH_LOW_ADDRESS_LIMIT);
 }
-- 
cgit v1.2.3


From e4048e5dc4aecec670f48ed007a28779f09cebd6 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 23 Jul 2008 21:27:01 -0700
Subject: page allocator: inline some __alloc_pages() wrappers

Two zonelist patch series rewrote __page_alloc() largely.  Now, it is just
a wrapper function.  Inlining them will save a function call.

[akpm@linux-foundation.org: export __alloc_pages_internal]
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 21 +++++++++++++++++----
 mm/page_alloc.c     | 19 ++-----------------
 2 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index b414be387180..f640ed241422 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -173,11 +173,24 @@ static inline void arch_free_page(struct page *page, int order) { }
 static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
 
-extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *);
+struct page *
+__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
+		       struct zonelist *zonelist, nodemask_t *nodemask);
+
+static inline struct page *
+__alloc_pages(gfp_t gfp_mask, unsigned int order,
+		struct zonelist *zonelist)
+{
+	return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
+}
+
+static inline struct page *
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+		struct zonelist *zonelist, nodemask_t *nodemask)
+{
+	return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
+}
 
-extern struct page *
-__alloc_pages_nodemask(gfp_t, unsigned int,
-				struct zonelist *, nodemask_t *nodemask);
 
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e089b92cdfff..35b1347d81bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1429,7 +1429,7 @@ try_next_zone:
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
-static struct page *
+struct page *
 __alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
 			struct zonelist *zonelist, nodemask_t *nodemask)
 {
@@ -1632,22 +1632,7 @@ nopage:
 got_pg:
 	return page;
 }
-
-struct page *
-__alloc_pages(gfp_t gfp_mask, unsigned int order,
-		struct zonelist *zonelist)
-{
-	return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
-}
-
-struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
-		struct zonelist *zonelist, nodemask_t *nodemask)
-{
-	return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
-}
-
-EXPORT_SYMBOL(__alloc_pages);
+EXPORT_SYMBOL(__alloc_pages_internal);
 
 /*
  * Common helper functions.
-- 
cgit v1.2.3


From c748e1340e0de3fa7fed86f8bdf499be9242afff Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:27:03 -0700
Subject: mm/vmstat.c: proper externs

This patch adds proper extern declarations for five variables in
include/linux/vmstat.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_misc.c    | 4 ----
 include/linux/vmstat.h | 6 ++++++
 kernel/sysctl.c        | 2 +-
 mm/vmstat.c            | 1 +
 4 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c652d469dc08..b14f43d25e9e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 #undef K
 }
 
-extern const struct seq_operations fragmentation_op;
 static int fragmentation_open(struct inode *inode, struct file *file)
 {
 	(void)inode;
@@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = {
 	.release	= seq_release,
 };
 
-extern const struct seq_operations pagetypeinfo_op;
 static int pagetypeinfo_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &pagetypeinfo_op);
@@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
 	.release	= seq_release,
 };
 
-extern const struct seq_operations zoneinfo_op;
 static int zoneinfo_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &zoneinfo_op);
@@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = {
 	.release	= seq_release,
 };
 
-extern const struct seq_operations vmstat_op;
 static int vmstat_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &vmstat_op);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e83b69346d23..58334d439516 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -44,6 +44,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		NR_VM_EVENT_ITEMS
 };
 
+extern const struct seq_operations fragmentation_op;
+extern const struct seq_operations pagetypeinfo_op;
+extern const struct seq_operations zoneinfo_op;
+extern const struct seq_operations vmstat_op;
+extern int sysctl_stat_interval;
+
 #ifdef CONFIG_VM_EVENT_COUNTERS
 /*
  * Light weight per cpu counter implementation.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2a7b9d88706b..1f7b3b76a166 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -43,6 +43,7 @@
 #include <linux/limits.h>
 #include <linux/dcache.h>
 #include <linux/syscalls.h>
+#include <linux/vmstat.h>
 #include <linux/nfs_fs.h>
 #include <linux/acpi.h>
 #include <linux/reboot.h>
@@ -80,7 +81,6 @@ extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
 extern int compat_log;
 extern int maps_protect;
-extern int sysctl_stat_interval;
 extern int latencytop_enabled;
 extern int sysctl_nr_open_min, sysctl_nr_open_max;
 #ifdef CONFIG_RCU_TORTURE_TEST
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c3d4a781802f..b0d08e667ece 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
+#include <linux/vmstat.h>
 #include <linux/sched.h>
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
-- 
cgit v1.2.3


From 0d71d10a4252a3938e6b70189bc776171c02e076 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 23 Jul 2008 21:27:05 -0700
Subject: mm: remove nopfn

There are no users of nopfn in the tree. Remove it.

[hugh@veritas.com: fix build error]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  9 --------
 mm/memory.c        | 67 ++++++------------------------------------------------
 2 files changed, 7 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2128ef7780c6..eb815cfc1b35 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -166,8 +166,6 @@ struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
-	unsigned long (*nopfn)(struct vm_area_struct *area,
-			unsigned long address);
 
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
@@ -674,13 +672,6 @@ static inline int page_mapped(struct page *page)
 	return atomic_read(&(page)->_mapcount) >= 0;
 }
 
-/*
- * Error return values for the *_nopfn functions
- */
-#define NOPFN_SIGBUS	((unsigned long) -1)
-#define NOPFN_OOM	((unsigned long) -2)
-#define NOPFN_REFAULT	((unsigned long) -3)
-
 /*
  * Different kinds of faults, as returned by handle_mm_fault().
  * Used to decide whether a process gets delivered SIGBUS or
diff --git a/mm/memory.c b/mm/memory.c
index 2302d228fe04..46dbed4b7446 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1058,11 +1058,9 @@ static inline int use_zero_page(struct vm_area_struct *vma)
 	if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 		return 0;
 	/*
-	 * And if we have a fault or a nopfn routine, it's not an
-	 * anonymous region.
+	 * And if we have a fault routine, it's not an anonymous region.
 	 */
-	return !vma->vm_ops ||
-		(!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+	return !vma->vm_ops || !vma->vm_ops->fault;
 }
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
@@ -1338,6 +1336,11 @@ out:
  *
  * This function should only be called from a vm_ops->fault handler, and
  * in that case the handler should return NULL.
+ *
+ * vma cannot be a COW mapping.
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
  */
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)
@@ -2501,59 +2504,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }
 
-
-/*
- * do_no_pfn() tries to create a new page mapping for a page without
- * a struct_page backing it
- *
- * As this is called only for pages that do not currently exist, we
- * do not need to flush old virtual caches or the TLB.
- *
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
- * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
- *
- * It is expected that the ->nopfn handler always returns the same pfn
- * for a given virtual mapping.
- *
- * Mark this `noinline' to prevent it from bloating the main pagefault code.
- */
-static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
-		     unsigned long address, pte_t *page_table, pmd_t *pmd,
-		     int write_access)
-{
-	spinlock_t *ptl;
-	pte_t entry;
-	unsigned long pfn;
-
-	pte_unmap(page_table);
-	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
-	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
-
-	pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
-
-	BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
-
-	if (unlikely(pfn == NOPFN_OOM))
-		return VM_FAULT_OOM;
-	else if (unlikely(pfn == NOPFN_SIGBUS))
-		return VM_FAULT_SIGBUS;
-	else if (unlikely(pfn == NOPFN_REFAULT))
-		return 0;
-
-	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-
-	/* Only go through if we didn't race with anybody else... */
-	if (pte_none(*page_table)) {
-		entry = pfn_pte(pfn, vma->vm_page_prot);
-		if (write_access)
-			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-		set_pte_at(mm, address, page_table, entry);
-	}
-	pte_unmap_unlock(page_table, ptl);
-	return 0;
-}
-
 /*
  * Fault of a previously existing named mapping. Repopulate the pte
  * from the encoded file_pte if possible. This enables swappable
@@ -2614,9 +2564,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 				if (likely(vma->vm_ops->fault))
 					return do_linear_fault(mm, vma, address,
 						pte, pmd, write_access, entry);
-				if (unlikely(vma->vm_ops->nopfn))
-					return do_no_pfn(mm, vma, address, pte,
-							 pmd, write_access);
 			}
 			return do_anonymous_page(mm, vma, address,
 						 pte, pmd, write_access);
-- 
cgit v1.2.3


From 28b2ee20c7cba812b6f2ccf6d722cf86d00a84dc Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 23 Jul 2008 21:27:05 -0700
Subject: access_process_vm device memory infrastructure

In order to be able to debug things like the X server and programs using
the PPC Cell SPUs, the debugger needs to be able to access device memory
through ptrace and /proc/pid/mem.

This patch:

Add the generic_access_phys access function and put the hooks in place
to allow access_process_vm to access device or PPC Cell SPU memory.

[riel@redhat.com: Add documentation for the vm_ops->access function]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Benjamin Herrensmidt <benh@kernel.crashing.org>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/Locking |   7 ++
 arch/Kconfig                      |   3 +
 arch/x86/Kconfig                  |   1 +
 arch/x86/mm/ioremap.c             |   8 +++
 include/asm-x86/io_32.h           |   2 +
 include/asm-x86/io_64.h           |   2 +
 include/linux/mm.h                |   8 +++
 mm/memory.c                       | 131 ++++++++++++++++++++++++++++++++------
 8 files changed, 144 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 8b22d7d8b991..680fb566b928 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -510,6 +510,7 @@ prototypes:
 	void (*close)(struct vm_area_struct*);
 	int (*fault)(struct vm_area_struct*, struct vm_fault *);
 	int (*page_mkwrite)(struct vm_area_struct *, struct page *);
+	int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
 
 locking rules:
 		BKL	mmap_sem	PageLocked(page)
@@ -517,6 +518,7 @@ open:		no	yes
 close:		no	yes
 fault:		no	yes
 page_mkwrite:	no	yes		no
+access:		no	yes
 
 	->page_mkwrite() is called when a previously read-only page is
 about to become writeable. The file system is responsible for
@@ -525,6 +527,11 @@ taking to lock out truncate, the page range should be verified to be
 within i_size. The page mapping should also be checked that it is not
 NULL.
 
+	->access() is called when get_user_pages() fails in
+acces_process_vm(), typically used to debug a process through
+/proc/pid/mem or ptrace.  This function is needed only for
+VM_IO | VM_PFNMAP VMAs.
+
 ================================================================================
 			Dubious stuff
 
diff --git a/arch/Kconfig b/arch/Kconfig
index 4d5ebbc1e72b..6093c0be58b0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -31,6 +31,9 @@ config KRETPROBES
 	def_bool y
 	depends on KPROBES && HAVE_KRETPROBES
 
+config HAVE_IOREMAP_PROT
+	def_bool n
+
 config HAVE_KPROBES
 	def_bool n
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 03980cb04291..b2ddfcf01728 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,6 +21,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
+	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_DYNAMIC_FTRACE
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 24c1d3c30186..016f335bbeea 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
 	return (void __iomem *)ret;
 }
 
+void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
+				unsigned long prot_val)
+{
+	return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
+				__builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_prot);
+
 /**
  * iounmap - Free a IO remapping
  * @addr: virtual address from ioremap_*
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 4df44ed54077..e876d89ac156 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -110,6 +110,8 @@ static inline void *phys_to_virt(unsigned long address)
  */
 extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+				unsigned long prot_val);
 
 /*
  * The default ioremap() behavior is non-cached:
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index ddd8058a5026..22995c5c5adc 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -175,6 +175,8 @@ extern void early_iounmap(void *addr, unsigned long size);
  */
 extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+				unsigned long prot_val);
 
 /*
  * The default ioremap() behavior is non-cached:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index eb815cfc1b35..5c7f8f64f70e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -170,6 +170,12 @@ struct vm_operations_struct {
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
 	int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
+
+	/* called by access_process_vm when get_user_pages() fails, typically
+	 * for use by special VMAs that can switch between memory and hardware
+	 */
+	int (*access)(struct vm_area_struct *vma, unsigned long addr,
+		      void *buf, int len, int write);
 #ifdef CONFIG_NUMA
 	/*
 	 * set_policy() op must add a reference to any non-NULL @new mempolicy
@@ -771,6 +777,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
+int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+			void *buf, int len, int write);
 
 static inline void unmap_shared_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen)
diff --git a/mm/memory.c b/mm/memory.c
index 46dbed4b7446..87350321e66f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2751,6 +2751,86 @@ int in_gate_area_no_task(unsigned long addr)
 
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+static resource_size_t follow_phys(struct vm_area_struct *vma,
+			unsigned long address, unsigned int flags,
+			unsigned long *prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+	resource_size_t phys_addr = 0;
+	struct mm_struct *mm = vma->vm_mm;
+
+	VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
+
+	pgd = pgd_offset(mm, address);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		goto no_page_table;
+
+	pud = pud_offset(pgd, address);
+	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+		goto no_page_table;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+		goto no_page_table;
+
+	/* We cannot handle huge page PFN maps. Luckily they don't exist. */
+	if (pmd_huge(*pmd))
+		goto no_page_table;
+
+	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+	if (!ptep)
+		goto out;
+
+	pte = *ptep;
+	if (!pte_present(pte))
+		goto unlock;
+	if ((flags & FOLL_WRITE) && !pte_write(pte))
+		goto unlock;
+	phys_addr = pte_pfn(pte);
+	phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
+
+	*prot = pgprot_val(pte_pgprot(pte));
+
+unlock:
+	pte_unmap_unlock(ptep, ptl);
+out:
+	return phys_addr;
+no_page_table:
+	return 0;
+}
+
+int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+			void *buf, int len, int write)
+{
+	resource_size_t phys_addr;
+	unsigned long prot = 0;
+	void *maddr;
+	int offset = addr & (PAGE_SIZE-1);
+
+	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+		return -EINVAL;
+
+	phys_addr = follow_phys(vma, addr, write, &prot);
+
+	if (!phys_addr)
+		return -EINVAL;
+
+	maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+	if (write)
+		memcpy_toio(maddr + offset, buf, len);
+	else
+		memcpy_fromio(buf, maddr + offset, len);
+	iounmap(maddr);
+
+	return len;
+}
+#endif
+
 /*
  * Access another process' address space.
  * Source/target buffer must be kernel space,
@@ -2760,7 +2840,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 {
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
-	struct page *page;
 	void *old_buf = buf;
 
 	mm = get_task_mm(tsk);
@@ -2772,28 +2851,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 	while (len) {
 		int bytes, ret, offset;
 		void *maddr;
+		struct page *page = NULL;
 
 		ret = get_user_pages(tsk, mm, addr, 1,
 				write, 1, &page, &vma);
-		if (ret <= 0)
-			break;
-
-		bytes = len;
-		offset = addr & (PAGE_SIZE-1);
-		if (bytes > PAGE_SIZE-offset)
-			bytes = PAGE_SIZE-offset;
-
-		maddr = kmap(page);
-		if (write) {
-			copy_to_user_page(vma, page, addr,
-					  maddr + offset, buf, bytes);
-			set_page_dirty_lock(page);
+		if (ret <= 0) {
+			/*
+			 * Check if this is a VM_IO | VM_PFNMAP VMA, which
+			 * we can access using slightly different code.
+			 */
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+			vma = find_vma(mm, addr);
+			if (!vma)
+				break;
+			if (vma->vm_ops && vma->vm_ops->access)
+				ret = vma->vm_ops->access(vma, addr, buf,
+							  len, write);
+			if (ret <= 0)
+#endif
+				break;
+			bytes = ret;
 		} else {
-			copy_from_user_page(vma, page, addr,
-					    buf, maddr + offset, bytes);
+			bytes = len;
+			offset = addr & (PAGE_SIZE-1);
+			if (bytes > PAGE_SIZE-offset)
+				bytes = PAGE_SIZE-offset;
+
+			maddr = kmap(page);
+			if (write) {
+				copy_to_user_page(vma, page, addr,
+						  maddr + offset, buf, bytes);
+				set_page_dirty_lock(page);
+			} else {
+				copy_from_user_page(vma, page, addr,
+						    buf, maddr + offset, bytes);
+			}
+			kunmap(page);
+			page_cache_release(page);
 		}
-		kunmap(page);
-		page_cache_release(page);
 		len -= bytes;
 		buf += bytes;
 		addr += bytes;
-- 
cgit v1.2.3


From 42b7772812d15b86543a23b82bd6070eef9a08b1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Wed, 23 Jul 2008 21:27:10 -0700
Subject: mm: remove double indirection on tlb parameter to free_pgd_range() &
 Co

The double indirection here is not needed anywhere and hence (at least)
confusing.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/hugetlbpage.c    |  2 +-
 arch/powerpc/mm/hugetlbpage.c |  8 ++++----
 fs/exec.c                     |  4 ++--
 include/asm-ia64/hugetlb.h    |  2 +-
 include/asm-powerpc/hugetlb.h |  2 +-
 include/asm-sh/hugetlb.h      |  2 +-
 include/asm-sparc/hugetlb.h   |  2 +-
 include/asm-x86/hugetlb.h     |  2 +-
 include/linux/mm.h            |  4 +---
 mm/internal.h                 |  3 +++
 mm/memory.c                   | 10 ++++++----
 mm/mmap.c                     |  6 ++++--
 12 files changed, 26 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index d3ce8f3bcaa6..cd49e2860eef 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -112,7 +112,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int wri
 	return NULL;
 }
 
-void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 			unsigned long addr, unsigned long end,
 			unsigned long floor, unsigned long ceiling)
 {
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0d12fba31bc5..1a96cc891cf5 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -255,7 +255,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  *
  * Must be called with pagetable lock held.
  */
-void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 			    unsigned long addr, unsigned long end,
 			    unsigned long floor, unsigned long ceiling)
 {
@@ -315,13 +315,13 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
 		return;
 
 	start = addr;
-	pgd = pgd_offset((*tlb)->mm, addr);
+	pgd = pgd_offset(tlb->mm, addr);
 	do {
-		BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize);
+		BUG_ON(get_slice_psize(tlb->mm, addr) != mmu_huge_psize);
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+		hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
 	} while (pgd++, addr = next, addr != end);
 }
 
diff --git a/fs/exec.c b/fs/exec.c
index fd9234379e8d..190ed1f92774 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -541,7 +541,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		/*
 		 * when the old and new regions overlap clear from new_end.
 		 */
-		free_pgd_range(&tlb, new_end, old_end, new_end,
+		free_pgd_range(tlb, new_end, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	} else {
 		/*
@@ -550,7 +550,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 		 * have constraints on va-space that make this illegal (IA64) -
 		 * for the others its just a little faster.
 		 */
-		free_pgd_range(&tlb, old_start, old_end, new_end,
+		free_pgd_range(tlb, old_start, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : 0);
 	}
 	tlb_finish_mmu(tlb, new_end, old_end);
diff --git a/include/asm-ia64/hugetlb.h b/include/asm-ia64/hugetlb.h
index f28a9701f1cf..e9d1e5e2382d 100644
--- a/include/asm-ia64/hugetlb.h
+++ b/include/asm-ia64/hugetlb.h
@@ -4,7 +4,7 @@
 #include <asm/page.h>
 
 
-void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h
index be32ff02f4a0..0a37aa5ecaa5 100644
--- a/include/asm-powerpc/hugetlb.h
+++ b/include/asm-powerpc/hugetlb.h
@@ -7,7 +7,7 @@
 int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 			   unsigned long len);
 
-void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 
diff --git a/include/asm-sh/hugetlb.h b/include/asm-sh/hugetlb.h
index 02402303d89b..fb30018938c7 100644
--- a/include/asm-sh/hugetlb.h
+++ b/include/asm-sh/hugetlb.h
@@ -26,7 +26,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
 static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) {
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
 					  unsigned long floor,
 					  unsigned long ceiling)
diff --git a/include/asm-sparc/hugetlb.h b/include/asm-sparc/hugetlb.h
index 412af58926a0..aeb92374ca3d 100644
--- a/include/asm-sparc/hugetlb.h
+++ b/include/asm-sparc/hugetlb.h
@@ -31,7 +31,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
 					  unsigned long floor,
 					  unsigned long ceiling)
diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 14171a4924f6..7eed6e0883bf 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h
@@ -26,7 +26,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
 static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) {
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
 					  unsigned long floor,
 					  unsigned long ceiling)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5c7f8f64f70e..f8071097302a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -769,10 +769,8 @@ struct mm_walk {
 
 int walk_page_range(unsigned long addr, unsigned long end,
 		struct mm_walk *walk);
-void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
-void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
-		unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
diff --git a/mm/internal.h b/mm/internal.h
index 50807e12490e..858ad01864dc 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -13,6 +13,9 @@
 
 #include <linux/mm.h>
 
+void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
+		unsigned long floor, unsigned long ceiling);
+
 static inline void set_page_count(struct page *page, int v)
 {
 	atomic_set(&page->_count, v);
diff --git a/mm/memory.c b/mm/memory.c
index 87350321e66f..82f3f1c5cf17 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -61,6 +61,8 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 
+#include "internal.h"
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
@@ -211,7 +213,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  *
  * Must be called with pagetable lock held.
  */
-void free_pgd_range(struct mmu_gather **tlb,
+void free_pgd_range(struct mmu_gather *tlb,
 			unsigned long addr, unsigned long end,
 			unsigned long floor, unsigned long ceiling)
 {
@@ -262,16 +264,16 @@ void free_pgd_range(struct mmu_gather **tlb,
 		return;
 
 	start = addr;
-	pgd = pgd_offset((*tlb)->mm, addr);
+	pgd = pgd_offset(tlb->mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+		free_pud_range(tlb, pgd, addr, next, floor, ceiling);
 	} while (pgd++, addr = next, addr != end);
 }
 
-void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		unsigned long floor, unsigned long ceiling)
 {
 	while (vma) {
diff --git a/mm/mmap.c b/mm/mmap.c
index 1d102b956fd8..75e0d0673d78 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -32,6 +32,8 @@
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
 
+#include "internal.h"
+
 #ifndef arch_mmap_check
 #define arch_mmap_check(addr, len, flags)	(0)
 #endif
@@ -1763,7 +1765,7 @@ static void unmap_region(struct mm_struct *mm,
 	update_hiwater_rss(mm);
 	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
-	free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+	free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
 				 next? next->vm_start: 0);
 	tlb_finish_mmu(tlb, start, end);
 }
@@ -2063,7 +2065,7 @@ void exit_mmap(struct mm_struct *mm)
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
-	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 
 	/*
-- 
cgit v1.2.3


From da3bbdd4632c0171406b2677e31494afa5bde2f8 Mon Sep 17 00:00:00 2001
From: Kentaro Makita <k-makita@np.css.fujitsu.com>
Date: Wed, 23 Jul 2008 21:27:13 -0700
Subject: fix soft lock up at NFS mount via per-SB LRU-list of unused dentries

[Summary]

 Split LRU-list of unused dentries to one per superblock to avoid soft
 lock up during NFS mounts and remounting of any filesystem.

 Previously I posted here:
 http://lkml.org/lkml/2008/3/5/590

[Descriptions]

- background

  dentry_unused is a list of dentries which are not referenced.
  dentry_unused grows up when references on directories or files are
  released.  This list can be very long if there is huge free memory.

- the problem

  When shrink_dcache_sb() is called, it scans all dentry_unused linearly
  under spin_lock(), and if dentry->d_sb is differnt from given
  superblock, scan next dentry.  This scan costs very much if there are
  many entries, and very ineffective if there are many superblocks.

  IOW, When we need to shrink unused dentries on one dentry, but scans
  unused dentries on all superblocks in the system.  For example, we scan
  500 dentries to unmount a filesystem, but scans 1,000,000 or more unused
  dentries on other superblocks.

  In our case , At mounting NFS*, shrink_dcache_sb() is called to shrink
  unused dentries on NFS, but scans 100,000,000 unused dentries on
  superblocks in the system such as local ext3 filesystems.  I hear NFS
  mounting took 1 min on some system in use.

* : NFS uses virtual filesystem in rpc layer, so NFS is affected by
  this problem.

  100,000,000 is possible number on large systems.

  Per-superblock LRU of unused dentried can reduce the cost in
  reasonable manner.

- How to fix

  I found this problem is solved by David Chinner's "Per-superblock
  unused dentry LRU lists V3"(1), so I rebase it and add some fix to
  reclaim with fairness, which is in Andrew Morton's comments(2).

  1) http://lkml.org/lkml/2006/5/25/318
  2) http://lkml.org/lkml/2006/5/25/320

  Split LRU-list of unused dentries to each superblocks.  Then, NFS
  mounting will check dentries under a superblock instead of all.  But
  this spliting will break LRU of dentry-unused.  So, I've attempted to
  make reclaim unused dentrins with fairness by calculate number of
  dentries to scan on this sb based on following way

  number of dentries to scan on this sb =
  count * (number of dentries on this sb / number of dentries in the machine)

- ToDo
 - I have to measuring performance number and do stress tests.

 - When unmount occurs during prune_dcache(), scanning on same
  superblock, It is unable to reach next superblock because it is gone
  away.  We restart scannig superblock from first one, it causes
  unfairness of reclaim unused dentries on first superblock.  But I think
  this happens very rarely.

- Test Results

  Result on 6GB boxes with excessive unused dentries.

Without patch:

$ cat /proc/sys/fs/dentry-state
10181835        10180203        45      0       0       0
# mount -t nfs 10.124.60.70:/work/kernel-src nfs
real    0m1.830s
user    0m0.001s
sys     0m1.653s

 With this patch:
$ cat /proc/sys/fs/dentry-state
10236610        10234751        45      0       0       0
# mount -t nfs 10.124.60.70:/work/kernel-src nfs
real    0m0.106s
user    0m0.002s
sys     0m0.032s

[akpm@linux-foundation.org: fix comments]
Signed-off-by: Kentaro Makita <k-makita@np.css.fujitsu.com>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: David Chinner <dgc@sgi.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c        | 335 ++++++++++++++++++++++++++++-------------------------
 fs/super.c         |   1 +
 include/linux/fs.h |   4 +
 3 files changed, 185 insertions(+), 155 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 6068c25b393c..3818d6ab76ca 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
 static struct hlist_head *dentry_hashtable __read_mostly;
-static LIST_HEAD(dentry_unused);
 
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
@@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry)
 		call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
-static void dentry_lru_remove(struct dentry *dentry)
-{
-	if (!list_empty(&dentry->d_lru)) {
-		list_del_init(&dentry->d_lru);
-		dentry_stat.nr_unused--;
-	}
-}
-
 /*
  * Release the dentry's inode, using the filesystem
  * d_iput() operation if defined.
@@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry)
 	}
 }
 
+/*
+ * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ */
+static void dentry_lru_add(struct dentry *dentry)
+{
+	list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+	dentry->d_sb->s_nr_dentry_unused++;
+	dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_add_tail(struct dentry *dentry)
+{
+	list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+	dentry->d_sb->s_nr_dentry_unused++;
+	dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_del(struct dentry *dentry)
+{
+	if (!list_empty(&dentry->d_lru)) {
+		list_del(&dentry->d_lru);
+		dentry->d_sb->s_nr_dentry_unused--;
+		dentry_stat.nr_unused--;
+	}
+}
+
+static void dentry_lru_del_init(struct dentry *dentry)
+{
+	if (likely(!list_empty(&dentry->d_lru))) {
+		list_del_init(&dentry->d_lru);
+		dentry->d_sb->s_nr_dentry_unused--;
+		dentry_stat.nr_unused--;
+	}
+}
+
 /**
  * d_kill - kill dentry and return parent
  * @dentry: dentry to kill
@@ -212,8 +238,7 @@ repeat:
 		goto kill_it;
   	if (list_empty(&dentry->d_lru)) {
   		dentry->d_flags |= DCACHE_REFERENCED;
-  		list_add(&dentry->d_lru, &dentry_unused);
-  		dentry_stat.nr_unused++;
+		dentry_lru_add(dentry);
   	}
  	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
@@ -222,7 +247,8 @@ repeat:
 unhash_it:
 	__d_drop(dentry);
 kill_it:
-	dentry_lru_remove(dentry);
+	/* if dentry was on the d_lru list delete it from there */
+	dentry_lru_del(dentry);
 	dentry = d_kill(dentry);
 	if (dentry)
 		goto repeat;
@@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry)
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
 	atomic_inc(&dentry->d_count);
-	dentry_lru_remove(dentry);
+	dentry_lru_del_init(dentry);
 	return dentry;
 }
 
@@ -406,133 +432,167 @@ static void prune_one_dentry(struct dentry * dentry)
 
 		if (dentry->d_op && dentry->d_op->d_delete)
 			dentry->d_op->d_delete(dentry);
-		dentry_lru_remove(dentry);
+		dentry_lru_del_init(dentry);
 		__d_drop(dentry);
 		dentry = d_kill(dentry);
 		spin_lock(&dcache_lock);
 	}
 }
 
-/**
- * prune_dcache - shrink the dcache
- * @count: number of entries to try and free
- * @sb: if given, ignore dentries for other superblocks
- *         which are being unmounted.
- *
- * Shrink the dcache. This is done when we need
- * more memory, or simply when we need to unmount
- * something (at which point we need to unuse
- * all dentries).
- *
- * This function may fail to free any resources if
- * all the dentries are in use.
+/*
+ * Shrink the dentry LRU on a given superblock.
+ * @sb   : superblock to shrink dentry LRU.
+ * @count: If count is NULL, we prune all dentries on superblock.
+ * @flags: If flags is non-zero, we need to do special processing based on
+ * which flags are set. This means we don't need to maintain multiple
+ * similar copies of this loop.
  */
- 
-static void prune_dcache(int count, struct super_block *sb)
+static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 {
-	spin_lock(&dcache_lock);
-	for (; count ; count--) {
-		struct dentry *dentry;
-		struct list_head *tmp;
-		struct rw_semaphore *s_umount;
-
-		cond_resched_lock(&dcache_lock);
+	LIST_HEAD(referenced);
+	LIST_HEAD(tmp);
+	struct dentry *dentry;
+	int cnt = 0;
 
-		tmp = dentry_unused.prev;
-		if (sb) {
-			/* Try to find a dentry for this sb, but don't try
-			 * too hard, if they aren't near the tail they will
-			 * be moved down again soon
+	BUG_ON(!sb);
+	BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
+	spin_lock(&dcache_lock);
+	if (count != NULL)
+		/* called from prune_dcache() and shrink_dcache_parent() */
+		cnt = *count;
+restart:
+	if (count == NULL)
+		list_splice_init(&sb->s_dentry_lru, &tmp);
+	else {
+		while (!list_empty(&sb->s_dentry_lru)) {
+			dentry = list_entry(sb->s_dentry_lru.prev,
+					struct dentry, d_lru);
+			BUG_ON(dentry->d_sb != sb);
+
+			spin_lock(&dentry->d_lock);
+			/*
+			 * If we are honouring the DCACHE_REFERENCED flag and
+			 * the dentry has this flag set, don't free it. Clear
+			 * the flag and put it back on the LRU.
 			 */
-			int skip = count;
-			while (skip && tmp != &dentry_unused &&
-			    list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
-				skip--;
-				tmp = tmp->prev;
+			if ((flags & DCACHE_REFERENCED)
+				&& (dentry->d_flags & DCACHE_REFERENCED)) {
+				dentry->d_flags &= ~DCACHE_REFERENCED;
+				list_move_tail(&dentry->d_lru, &referenced);
+				spin_unlock(&dentry->d_lock);
+			} else {
+				list_move_tail(&dentry->d_lru, &tmp);
+				spin_unlock(&dentry->d_lock);
+				cnt--;
+				if (!cnt)
+					break;
 			}
 		}
-		if (tmp == &dentry_unused)
-			break;
-		list_del_init(tmp);
-		prefetch(dentry_unused.prev);
- 		dentry_stat.nr_unused--;
-		dentry = list_entry(tmp, struct dentry, d_lru);
-
- 		spin_lock(&dentry->d_lock);
+	}
+	while (!list_empty(&tmp)) {
+		dentry = list_entry(tmp.prev, struct dentry, d_lru);
+		dentry_lru_del_init(dentry);
+		spin_lock(&dentry->d_lock);
 		/*
 		 * We found an inuse dentry which was not removed from
-		 * dentry_unused because of laziness during lookup.  Do not free
-		 * it - just keep it off the dentry_unused list.
+		 * the LRU because of laziness during lookup.  Do not free
+		 * it - just keep it off the LRU list.
 		 */
- 		if (atomic_read(&dentry->d_count)) {
- 			spin_unlock(&dentry->d_lock);
+		if (atomic_read(&dentry->d_count)) {
+			spin_unlock(&dentry->d_lock);
 			continue;
 		}
-		/* If the dentry was recently referenced, don't free it. */
-		if (dentry->d_flags & DCACHE_REFERENCED) {
-			dentry->d_flags &= ~DCACHE_REFERENCED;
- 			list_add(&dentry->d_lru, &dentry_unused);
- 			dentry_stat.nr_unused++;
- 			spin_unlock(&dentry->d_lock);
+		prune_one_dentry(dentry);
+		/* dentry->d_lock was dropped in prune_one_dentry() */
+		cond_resched_lock(&dcache_lock);
+	}
+	if (count == NULL && !list_empty(&sb->s_dentry_lru))
+		goto restart;
+	if (count != NULL)
+		*count = cnt;
+	if (!list_empty(&referenced))
+		list_splice(&referenced, &sb->s_dentry_lru);
+	spin_unlock(&dcache_lock);
+}
+
+/**
+ * prune_dcache - shrink the dcache
+ * @count: number of entries to try to free
+ *
+ * Shrink the dcache. This is done when we need more memory, or simply when we
+ * need to unmount something (at which point we need to unuse all dentries).
+ *
+ * This function may fail to free any resources if all the dentries are in use.
+ */
+static void prune_dcache(int count)
+{
+	struct super_block *sb;
+	int w_count;
+	int unused = dentry_stat.nr_unused;
+	int prune_ratio;
+	int pruned;
+
+	if (unused == 0 || count == 0)
+		return;
+	spin_lock(&dcache_lock);
+restart:
+	if (count >= unused)
+		prune_ratio = 1;
+	else
+		prune_ratio = unused / count;
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_nr_dentry_unused == 0)
 			continue;
-		}
-		/*
-		 * If the dentry is not DCACHED_REFERENCED, it is time
-		 * to remove it from the dcache, provided the super block is
-		 * NULL (which means we are trying to reclaim memory)
-		 * or this dentry belongs to the same super block that
-		 * we want to shrink.
-		 */
-		/*
-		 * If this dentry is for "my" filesystem, then I can prune it
-		 * without taking the s_umount lock (I already hold it).
+		sb->s_count++;
+		/* Now, we reclaim unused dentrins with fairness.
+		 * We reclaim them same percentage from each superblock.
+		 * We calculate number of dentries to scan on this sb
+		 * as follows, but the implementation is arranged to avoid
+		 * overflows:
+		 * number of dentries to scan on this sb =
+		 * count * (number of dentries on this sb /
+		 * number of dentries in the machine)
 		 */
-		if (sb && dentry->d_sb == sb) {
-			prune_one_dentry(dentry);
-			continue;
-		}
+		spin_unlock(&sb_lock);
+		if (prune_ratio != 1)
+			w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
+		else
+			w_count = sb->s_nr_dentry_unused;
+		pruned = w_count;
 		/*
-		 * ...otherwise we need to be sure this filesystem isn't being
-		 * unmounted, otherwise we could race with
-		 * generic_shutdown_super(), and end up holding a reference to
-		 * an inode while the filesystem is unmounted.
-		 * So we try to get s_umount, and make sure s_root isn't NULL.
-		 * (Take a local copy of s_umount to avoid a use-after-free of
-		 * `dentry').
+		 * We need to be sure this filesystem isn't being unmounted,
+		 * otherwise we could race with generic_shutdown_super(), and
+		 * end up holding a reference to an inode while the filesystem
+		 * is unmounted.  So we try to get s_umount, and make sure
+		 * s_root isn't NULL.
 		 */
-		s_umount = &dentry->d_sb->s_umount;
-		if (down_read_trylock(s_umount)) {
-			if (dentry->d_sb->s_root != NULL) {
-				prune_one_dentry(dentry);
-				up_read(s_umount);
-				continue;
+		if (down_read_trylock(&sb->s_umount)) {
+			if ((sb->s_root != NULL) &&
+			    (!list_empty(&sb->s_dentry_lru))) {
+				spin_unlock(&dcache_lock);
+				__shrink_dcache_sb(sb, &w_count,
+						DCACHE_REFERENCED);
+				pruned -= w_count;
+				spin_lock(&dcache_lock);
 			}
-			up_read(s_umount);
+			up_read(&sb->s_umount);
 		}
-		spin_unlock(&dentry->d_lock);
+		spin_lock(&sb_lock);
+		count -= pruned;
 		/*
-		 * Insert dentry at the head of the list as inserting at the
-		 * tail leads to a cycle.
+		 * restart only when sb is no longer on the list and
+		 * we have more work to do.
 		 */
- 		list_add(&dentry->d_lru, &dentry_unused);
-		dentry_stat.nr_unused++;
+		if (__put_super_and_need_restart(sb) && count > 0) {
+			spin_unlock(&sb_lock);
+			goto restart;
+		}
 	}
+	spin_unlock(&sb_lock);
 	spin_unlock(&dcache_lock);
 }
 
-/*
- * Shrink the dcache for the specified super block.
- * This allows us to unmount a device without disturbing
- * the dcache for the other devices.
- *
- * This implementation makes just two traversals of the
- * unused list.  On the first pass we move the selected
- * dentries to the most recent end, and on the second
- * pass we free them.  The second pass must restart after
- * each dput(), but since the target dentries are all at
- * the end, it's really just a single traversal.
- */
-
 /**
  * shrink_dcache_sb - shrink dcache for a superblock
  * @sb: superblock
@@ -541,44 +601,9 @@ static void prune_dcache(int count, struct super_block *sb)
  * is used to free the dcache before unmounting a file
  * system
  */
-
 void shrink_dcache_sb(struct super_block * sb)
 {
-	struct list_head *tmp, *next;
-	struct dentry *dentry;
-
-	/*
-	 * Pass one ... move the dentries for the specified
-	 * superblock to the most recent end of the unused list.
-	 */
-	spin_lock(&dcache_lock);
-	list_for_each_prev_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		list_move_tail(tmp, &dentry_unused);
-	}
-
-	/*
-	 * Pass two ... free the dentries for this superblock.
-	 */
-repeat:
-	list_for_each_prev_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		dentry_stat.nr_unused--;
-		list_del_init(tmp);
-		spin_lock(&dentry->d_lock);
-		if (atomic_read(&dentry->d_count)) {
-			spin_unlock(&dentry->d_lock);
-			continue;
-		}
-		prune_one_dentry(dentry);
-		cond_resched_lock(&dcache_lock);
-		goto repeat;
-	}
-	spin_unlock(&dcache_lock);
+	__shrink_dcache_sb(sb, NULL, 0);
 }
 
 /*
@@ -595,7 +620,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 
 	/* detach this root from the system */
 	spin_lock(&dcache_lock);
-	dentry_lru_remove(dentry);
+	dentry_lru_del_init(dentry);
 	__d_drop(dentry);
 	spin_unlock(&dcache_lock);
 
@@ -609,7 +634,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 			spin_lock(&dcache_lock);
 			list_for_each_entry(loop, &dentry->d_subdirs,
 					    d_u.d_child) {
-				dentry_lru_remove(loop);
+				dentry_lru_del_init(loop);
 				__d_drop(loop);
 				cond_resched_lock(&dcache_lock);
 			}
@@ -791,14 +816,13 @@ resume:
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 
-		dentry_lru_remove(dentry);
+		dentry_lru_del_init(dentry);
 		/* 
 		 * move only zero ref count dentries to the end 
 		 * of the unused list for prune_dcache
 		 */
 		if (!atomic_read(&dentry->d_count)) {
-			list_add_tail(&dentry->d_lru, &dentry_unused);
-			dentry_stat.nr_unused++;
+			dentry_lru_add_tail(dentry);
 			found++;
 		}
 
@@ -840,10 +864,11 @@ out:
  
 void shrink_dcache_parent(struct dentry * parent)
 {
+	struct super_block *sb = parent->d_sb;
 	int found;
 
 	while ((found = select_parent(parent)) != 0)
-		prune_dcache(found, parent->d_sb);
+		__shrink_dcache_sb(sb, &found, 0);
 }
 
 /*
@@ -863,7 +888,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
-		prune_dcache(nr, NULL);
+		prune_dcache(nr);
 	}
 	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
@@ -1215,7 +1240,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
  * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
  * lookup is going on.
  *
- * dentry_unused list is not updated even if lookup finds the required dentry
+ * The dentry unused LRU is not updated even if lookup finds the required dentry
  * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
  * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
  * acquisition.
diff --git a/fs/super.c b/fs/super.c
index 453877c5697b..e931ae9511fe 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -70,6 +70,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
+		INIT_LIST_HEAD(&s->s_dentry_lru);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
 		lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ff54ae4933f3..e5e6a244096c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1025,6 +1025,7 @@ extern int send_sigurg(struct fown_struct *fown);
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
 
+#define sb_entry(list)  list_entry((list), struct super_block, s_list)
 #define S_BIAS (1<<30)
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
@@ -1058,6 +1059,9 @@ struct super_block {
 	struct list_head	s_more_io;	/* parked for more writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_files;
+	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
+	struct list_head	s_dentry_lru;	/* unused dentry lru */
+	int			s_nr_dentry_unused;	/* # of dentry on lru */
 
 	struct block_device	*s_bdev;
 	struct mtd_info		*s_mtd;
-- 
cgit v1.2.3


From 0cad47cf13bc2e9142d3a11d9f50523797d0d4ea Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:16 -0700
Subject: page-flags: record page flag overlays explicitly

With the recent page flag reorganisation we have a single enum which
defines the valid page flags and their values, nice and clear.  However
there are a number of bits which are overloaded by different subsystems.
Firstly there is PG_owner_priv_1 which is used by filesystems and by XEN.
Secondly both SLOB and SLUB use a couple of extra page bits to manage
internal state for pages they own; both overlay other bits.  All of these
"aliases" are scattered about the source making it very hard for a reader
to know if the bits are safe to rely on in all contexts; confusion here is
bad.

As we now have a single place where the bits are clearly assigned it makes
sense to clarify the reuse of bits by making the aliases explicit and
visible with the original bit assignments.  This patch creates explicit
aliases within the enum itself for the overloaded bits, creates standard
bit accessors PageFoo etc.  and uses those throughout.

This version pulls the bit manipulation out to standard named page bit
accessors as suggested by Christoph, it retains the explicit mapping to
the overlayed bits.  A fusion of both ideas.  This has been SLUB and SLOB
have been compile tested on x86_64 only, and SLUB boot tested.  If people
feel this is worth doing then I can run a fuller set of testing.

This patch:

Some page flags are used for more than one purpose, for example
PG_owner_priv_1.  Currently there are individual accessors for each user,
each built using the common flag name far away from the bit definitions.
This makes it hard to see all possible uses of these bits.

Now that we have a single enum to generate the bit orders it makes sense
to express overlays in the same place.  So create per use aliases for this
bit in the main page-flags enum and use those in the accessors.

[akpm@linux-foundation.org: fix xen]
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0d2a4e7012aa..7d8db1233e44 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -96,7 +96,14 @@ enum pageflags {
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
-	__NR_PAGEFLAGS
+	__NR_PAGEFLAGS,
+
+	/* Filesystems */
+	PG_checked = PG_owner_priv_1,
+
+	/* XEN */
+	PG_pinned = PG_owner_priv_1,
+	PG_savepinned = PG_dirty,
 };
 
 #ifndef __GENERATING_BOUNDS_H
@@ -155,9 +162,9 @@ PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
 PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
 PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
 __PAGEFLAG(Slab, slab)
-PAGEFLAG(Checked, owner_priv_1)		/* Used by some filesystems */
-PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */
-PAGEFLAG(SavePinned, dirty);					/* Xen */
+PAGEFLAG(Checked, checked)		/* Used by some filesystems */
+PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)	/* Xen */
+PAGEFLAG(SavePinned, savepinned);			/* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
-- 
cgit v1.2.3


From 8a38082d21cbc5ec961da7dda195e98a9a064dcf Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:18 -0700
Subject: slub: record page flag overlays explicitly

SLUB reuses two page bits for internal purposes, it overlays PG_active and
PG_error.  This is hidden away in slub.c.  Document these overlays
explicitly in the main page-flags enum along with all the others.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h |  7 +++++
 mm/slub.c                  | 65 ++++++++++++----------------------------------
 2 files changed, 24 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 7d8db1233e44..3fc586b7b90b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -104,6 +104,10 @@ enum pageflags {
 	/* XEN */
 	PG_pinned = PG_owner_priv_1,
 	PG_savepinned = PG_dirty,
+
+	/* SLUB */
+	PG_slub_frozen = PG_active,
+	PG_slub_debug = PG_error,
 };
 
 #ifndef __GENERATING_BOUNDS_H
@@ -169,6 +173,9 @@ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
 
+__PAGEFLAG(SlubFrozen, slub_frozen)
+__PAGEFLAG(SlubDebug, slub_debug)
+
 /*
  * Only test-and-set exist for PG_writeback.  The unconditional operators are
  * risky: they bypass page accounting.
diff --git a/mm/slub.c b/mm/slub.c
index 6d4a49c1ff2f..77c21cf53ff9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -102,44 +102,12 @@
  * 			the fast path and disables lockless freelists.
  */
 
-#define FROZEN (1 << PG_active)
-
 #ifdef CONFIG_SLUB_DEBUG
-#define SLABDEBUG (1 << PG_error)
+#define SLABDEBUG 1
 #else
 #define SLABDEBUG 0
 #endif
 
-static inline int SlabFrozen(struct page *page)
-{
-	return page->flags & FROZEN;
-}
-
-static inline void SetSlabFrozen(struct page *page)
-{
-	page->flags |= FROZEN;
-}
-
-static inline void ClearSlabFrozen(struct page *page)
-{
-	page->flags &= ~FROZEN;
-}
-
-static inline int SlabDebug(struct page *page)
-{
-	return page->flags & SLABDEBUG;
-}
-
-static inline void SetSlabDebug(struct page *page)
-{
-	page->flags |= SLABDEBUG;
-}
-
-static inline void ClearSlabDebug(struct page *page)
-{
-	page->flags &= ~SLABDEBUG;
-}
-
 /*
  * Issues still to be resolved:
  *
@@ -971,7 +939,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
 	}
 
 	/* Special debug activities for freeing objects */
-	if (!SlabFrozen(page) && !page->freelist)
+	if (!PageSlubFrozen(page) && !page->freelist)
 		remove_full(s, page);
 	if (s->flags & SLAB_STORE_USER)
 		set_track(s, object, TRACK_FREE, addr);
@@ -1157,7 +1125,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	page->flags |= 1 << PG_slab;
 	if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
 			SLAB_STORE_USER | SLAB_TRACE))
-		SetSlabDebug(page);
+		__SetPageSlubDebug(page);
 
 	start = page_address(page);
 
@@ -1184,14 +1152,14 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	int order = compound_order(page);
 	int pages = 1 << order;
 
-	if (unlikely(SlabDebug(page))) {
+	if (unlikely(SLABDEBUG && PageSlubDebug(page))) {
 		void *p;
 
 		slab_pad_check(s, page);
 		for_each_object(p, s, page_address(page),
 						page->objects)
 			check_object(s, page, p, 0);
-		ClearSlabDebug(page);
+		__ClearPageSlubDebug(page);
 	}
 
 	mod_zone_page_state(page_zone(page),
@@ -1288,7 +1256,7 @@ static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
 	if (slab_trylock(page)) {
 		list_del(&page->lru);
 		n->nr_partial--;
-		SetSlabFrozen(page);
+		__SetPageSlubFrozen(page);
 		return 1;
 	}
 	return 0;
@@ -1398,7 +1366,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
 	struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
 
-	ClearSlabFrozen(page);
+	__ClearPageSlubFrozen(page);
 	if (page->inuse) {
 
 		if (page->freelist) {
@@ -1406,7 +1374,8 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 			stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
 		} else {
 			stat(c, DEACTIVATE_FULL);
-			if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
+			if (SLABDEBUG && PageSlubDebug(page) &&
+						(s->flags & SLAB_STORE_USER))
 				add_full(n, page);
 		}
 		slab_unlock(page);
@@ -1551,7 +1520,7 @@ load_freelist:
 	object = c->page->freelist;
 	if (unlikely(!object))
 		goto another_slab;
-	if (unlikely(SlabDebug(c->page)))
+	if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
 		goto debug;
 
 	c->freelist = object[c->offset];
@@ -1588,7 +1557,7 @@ new_slab:
 		if (c->page)
 			flush_slab(s, c);
 		slab_lock(new);
-		SetSlabFrozen(new);
+		__SetPageSlubFrozen(new);
 		c->page = new;
 		goto load_freelist;
 	}
@@ -1674,7 +1643,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 	stat(c, FREE_SLOWPATH);
 	slab_lock(page);
 
-	if (unlikely(SlabDebug(page)))
+	if (unlikely(SLABDEBUG && PageSlubDebug(page)))
 		goto debug;
 
 checks_ok:
@@ -1682,7 +1651,7 @@ checks_ok:
 	page->freelist = object;
 	page->inuse--;
 
-	if (unlikely(SlabFrozen(page))) {
+	if (unlikely(PageSlubFrozen(page))) {
 		stat(c, FREE_FROZEN);
 		goto out_unlock;
 	}
@@ -3317,12 +3286,12 @@ static void validate_slab_slab(struct kmem_cache *s, struct page *page,
 			s->name, page);
 
 	if (s->flags & DEBUG_DEFAULT_FLAGS) {
-		if (!SlabDebug(page))
-			printk(KERN_ERR "SLUB %s: SlabDebug not set "
+		if (!PageSlubDebug(page))
+			printk(KERN_ERR "SLUB %s: SlubDebug not set "
 				"on slab 0x%p\n", s->name, page);
 	} else {
-		if (SlabDebug(page))
-			printk(KERN_ERR "SLUB %s: SlabDebug set on "
+		if (PageSlubDebug(page))
+			printk(KERN_ERR "SLUB %s: SlubDebug set on "
 				"slab 0x%p\n", s->name, page);
 	}
 }
-- 
cgit v1.2.3


From 9023cb7e8564d95a1893f8cb6895a293be9a71fe Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:19 -0700
Subject: slob: record page flag overlays explicitly

SLOB reuses two page bits for internal purposes, it overlays PG_active and
PG_private.  This is hidden away in slob.c.  Document these overlays
explicitly in the main page-flags enum along with all the others.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h |  7 +++++++
 mm/slob.c                  | 12 ++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3fc586b7b90b..54590a9a103e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -105,6 +105,10 @@ enum pageflags {
 	PG_pinned = PG_owner_priv_1,
 	PG_savepinned = PG_dirty,
 
+	/* SLOB */
+	PG_slob_page = PG_active,
+	PG_slob_free = PG_private,
+
 	/* SLUB */
 	PG_slub_frozen = PG_active,
 	PG_slub_debug = PG_error,
@@ -173,6 +177,9 @@ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
 
+__PAGEFLAG(SlobPage, slob_page)
+__PAGEFLAG(SlobFree, slob_free)
+
 __PAGEFLAG(SlubFrozen, slub_frozen)
 __PAGEFLAG(SlubDebug, slub_debug)
 
diff --git a/mm/slob.c b/mm/slob.c
index a3ad6671adf1..de268eb7ac70 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -130,17 +130,17 @@ static LIST_HEAD(free_slob_large);
  */
 static inline int slob_page(struct slob_page *sp)
 {
-	return test_bit(PG_active, &sp->flags);
+	return PageSlobPage((struct page *)sp);
 }
 
 static inline void set_slob_page(struct slob_page *sp)
 {
-	__set_bit(PG_active, &sp->flags);
+	__SetPageSlobPage((struct page *)sp);
 }
 
 static inline void clear_slob_page(struct slob_page *sp)
 {
-	__clear_bit(PG_active, &sp->flags);
+	__ClearPageSlobPage((struct page *)sp);
 }
 
 /*
@@ -148,19 +148,19 @@ static inline void clear_slob_page(struct slob_page *sp)
  */
 static inline int slob_page_free(struct slob_page *sp)
 {
-	return test_bit(PG_private, &sp->flags);
+	return PageSlobFree((struct page *)sp);
 }
 
 static void set_slob_page_free(struct slob_page *sp, struct list_head *list)
 {
 	list_add(&sp->list, list);
-	__set_bit(PG_private, &sp->flags);
+	__SetPageSlobFree((struct page *)sp);
 }
 
 static inline void clear_slob_page_free(struct slob_page *sp)
 {
 	list_del(&sp->list);
-	__clear_bit(PG_private, &sp->flags);
+	__ClearPageSlobFree((struct page *)sp);
 }
 
 #define SLOB_UNIT sizeof(slob_t)
-- 
cgit v1.2.3


From 2185e69f680ae8c8496b6fc15e20c889d5b39b67 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 23 Jul 2008 21:27:19 -0700
Subject: mapping_set_error: add unlikely()

This is called on a per-page basis and in the vast majority of cases
`error' is zero.

Cc: Guillaume Chazarain <guichaz@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d2fca802f809..ee1ec2c7723c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -22,7 +22,7 @@
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
 {
-	if (error) {
+	if (unlikely(error)) {
 		if (error == -ENOSPC)
 			set_bit(AS_ENOSPC, &mapping->flags);
 		else
-- 
cgit v1.2.3


From 9109fb7b3520de187ebc3646c209d66a233f7169 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:27:20 -0700
Subject: mm: drop unneeded pgdat argument from free_area_init_node()

free_area_init_node() gets passed in the node id as well as the node
descriptor.  This is redundant as the function can trivially get the node
descriptor itself by means of NODE_DATA() and the node's id.

I checked all the users and NODE_DATA() seems to be usable everywhere
from where this function is called.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/mm/numa.c         |  2 +-
 arch/arm/mm/init.c           |  2 +-
 arch/avr32/mm/init.c         |  2 +-
 arch/cris/arch-v10/mm/init.c |  2 +-
 arch/cris/arch-v32/mm/init.c |  2 +-
 arch/m32r/mm/discontig.c     |  3 +--
 arch/m32r/mm/init.c          |  2 +-
 arch/m68k/mm/motorola.c      |  2 +-
 arch/m68k/mm/sun3mmu.c       |  2 +-
 arch/parisc/mm/init.c        |  2 +-
 arch/sparc/mm/srmmu.c        |  3 +--
 arch/sparc/mm/sun4c.c        |  3 +--
 arch/v850/kernel/setup.c     |  3 +--
 include/linux/mm.h           |  5 ++---
 mm/memory_hotplug.c          |  2 +-
 mm/page_alloc.c              | 11 ++++++-----
 16 files changed, 22 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index a53fda0481ca..def0c74a78a8 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -313,7 +313,7 @@ void __init paging_init(void)
 			zones_size[ZONE_DMA] = dma_local_pfn;
 			zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn;
 		}
-		free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, NULL);
+		free_area_init_node(nid, zones_size, start_pfn, NULL);
 	}
 
 	/* Initialize the kernel's ZERO_PGE. */
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index b657f1719af0..e6352946dde0 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -284,7 +284,7 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
 	 */
 	arch_adjust_zones(node, zone_size, zhole_size);
 
-	free_area_init_node(node, pgdat, zone_size, start_pfn, zhole_size);
+	free_area_init_node(node, zone_size, start_pfn, zhole_size);
 
 	return end_pfn;
 }
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index 3f90a87527bb..786de88a82a7 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -129,7 +129,7 @@ void __init paging_init(void)
 		printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
 		       nid, start_pfn, low);
 
-		free_area_init_node(nid, pgdat, zones_size, start_pfn, NULL);
+		free_area_init_node(nid, zones_size, start_pfn, NULL);
 
 		printk("Node %u: mem_map starts at %p\n",
 		       pgdat->node_id, pgdat->node_mem_map);
diff --git a/arch/cris/arch-v10/mm/init.c b/arch/cris/arch-v10/mm/init.c
index e0fcd1a9bfd5..742fd1974c2e 100644
--- a/arch/cris/arch-v10/mm/init.c
+++ b/arch/cris/arch-v10/mm/init.c
@@ -182,7 +182,7 @@ paging_init(void)
 	 * mem_map page array.
 	 */
 
-	free_area_init_node(0, &contig_page_data, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
+	free_area_init_node(0, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
 }
 
 /* Initialize remaps of some I/O-ports. It is important that this
diff --git a/arch/cris/arch-v32/mm/init.c b/arch/cris/arch-v32/mm/init.c
index 5a9ac5834647..8a34b8b74293 100644
--- a/arch/cris/arch-v32/mm/init.c
+++ b/arch/cris/arch-v32/mm/init.c
@@ -162,7 +162,7 @@ paging_init(void)
 	 * substantially higher than 0, like us (we start at PAGE_OFFSET). This
 	 * saves space in the mem_map page array.
 	 */
-	free_area_init_node(0, &contig_page_data, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
+	free_area_init_node(0, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0);
 
 	mem_map = contig_page_data.node_mem_map;
 }
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index aa9145ef6cca..cc23934bc41e 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -147,8 +147,7 @@ unsigned long __init zone_sizes_init(void)
 		zholes_size[ZONE_DMA] = mp->holes;
 		holes += zholes_size[ZONE_DMA];
 
-		free_area_init_node(nid, NODE_DATA(nid), zones_size,
-			start_pfn, zholes_size);
+		free_area_init_node(nid, zones_size, start_pfn, zholes_size);
 	}
 
 	/*
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index bbd97c85bc5d..28799af15e95 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -123,7 +123,7 @@ unsigned long __init zone_sizes_init(void)
 	start_pfn = __MEMORY_START >> PAGE_SHIFT;
 #endif /* CONFIG_MMU */
 
-	free_area_init_node(0, NODE_DATA(0), zones_size, start_pfn, 0);
+	free_area_init_node(0, zones_size, start_pfn, 0);
 
 	return 0;
 }
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 226795bdf355..c5dbb9bdb322 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -296,7 +296,7 @@ void __init paging_init(void)
 #endif
 	for (i = 0; i < m68k_num_memory; i++) {
 		zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT;
-		free_area_init_node(i, pg_data_map + i, zones_size,
+		free_area_init_node(i, zones_size,
 				    m68k_memory[i].addr >> PAGE_SHIFT, NULL);
 	}
 }
diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c
index edceefc18870..1b902dbd4376 100644
--- a/arch/m68k/mm/sun3mmu.c
+++ b/arch/m68k/mm/sun3mmu.c
@@ -94,7 +94,7 @@ void __init paging_init(void)
 
 	/* I really wish I knew why the following change made things better...  -- Sam */
 /*	free_area_init(zones_size); */
-	free_area_init_node(0, NODE_DATA(0), zones_size,
+	free_area_init_node(0, zones_size,
 			    (__pa(PAGE_OFFSET) >> PAGE_SHIFT) + 1, NULL);
 
 
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 0ddf4904640a..7c155c254e72 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -887,7 +887,7 @@ void __init paging_init(void)
 		}
 #endif
 
-		free_area_init_node(i, NODE_DATA(i), zones_size,
+		free_area_init_node(i, zones_size,
 				pmem_ranges[i].start_pfn, NULL);
 	}
 }
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index c624e04ff03e..ee30462598fc 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -1352,8 +1352,7 @@ void __init srmmu_paging_init(void)
 		zones_size[ZONE_HIGHMEM] = npages;
 		zholes_size[ZONE_HIGHMEM] = npages - calc_highpages();
 
-		free_area_init_node(0, &contig_page_data, zones_size,
-				    pfn_base, zholes_size);
+		free_area_init_node(0, zones_size, pfn_base, zholes_size);
 	}
 }
 
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 2375fe9dc312..d1782f6368be 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -2123,8 +2123,7 @@ void __init sun4c_paging_init(void)
 		zones_size[ZONE_HIGHMEM] = npages;
 		zholes_size[ZONE_HIGHMEM] = npages - calc_highpages();
 
-		free_area_init_node(0, &contig_page_data, zones_size,
-				    pfn_base, zholes_size);
+		free_area_init_node(0, zones_size, pfn_base, zholes_size);
 	}
 
 	cnt = 0;
diff --git a/arch/v850/kernel/setup.c b/arch/v850/kernel/setup.c
index a0a8456a8430..10335cecf7bd 100644
--- a/arch/v850/kernel/setup.c
+++ b/arch/v850/kernel/setup.c
@@ -295,8 +295,7 @@ init_mem_alloc (unsigned long ram_start, unsigned long ram_len)
 #error MAX_ORDER is too large for given PAGE_OFFSET (use CONFIG_FORCE_MAX_ZONEORDER to change it)
 #endif
 	NODE_DATA(0)->node_mem_map = NULL;
-	free_area_init_node (0, NODE_DATA(0), zones_size,
-			     ADDR_TO_PAGE (PAGE_OFFSET), 0);
+	free_area_init_node(0, zones_size, ADDR_TO_PAGE (PAGE_OFFSET), 0);
 }
 
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f8071097302a..196924b657bc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -962,9 +962,8 @@ static inline void pgtable_page_dtor(struct page *page)
 		NULL: pte_offset_kernel(pmd, address))
 
 extern void free_area_init(unsigned long * zones_size);
-extern void free_area_init_node(int nid, pg_data_t *pgdat,
-	unsigned long * zones_size, unsigned long zone_start_pfn, 
-	unsigned long *zholes_size);
+extern void free_area_init_node(int nid, unsigned long * zones_size,
+		unsigned long zone_start_pfn, unsigned long *zholes_size);
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
 /*
  * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 833f854eabe5..6e26adc08f14 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -455,7 +455,7 @@ static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
 	/* we can use NODE_DATA(nid) from here */
 
 	/* init node's zones as empty zones, we don't have any present pages.*/
-	free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);
+	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
 
 	return pgdat;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 24aa3d1b9d96..e43aae135b38 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3461,10 +3461,11 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
 #endif /* CONFIG_FLAT_NODE_MEM_MAP */
 }
 
-void __paginginit free_area_init_node(int nid, struct pglist_data *pgdat,
-		unsigned long *zones_size, unsigned long node_start_pfn,
-		unsigned long *zholes_size)
+void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
+		unsigned long node_start_pfn, unsigned long *zholes_size)
 {
+	pg_data_t *pgdat = NODE_DATA(nid);
+
 	pgdat->node_id = nid;
 	pgdat->node_start_pfn = node_start_pfn;
 	calculate_node_totalpages(pgdat, zones_size, zholes_size);
@@ -3961,7 +3962,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 	setup_nr_node_ids();
 	for_each_online_node(nid) {
 		pg_data_t *pgdat = NODE_DATA(nid);
-		free_area_init_node(nid, pgdat, NULL,
+		free_area_init_node(nid, NULL,
 				find_min_pfn_for_node(nid), NULL);
 
 		/* Any memory on that node */
@@ -4032,7 +4033,7 @@ EXPORT_SYMBOL(contig_page_data);
 
 void __init free_area_init(unsigned long *zones_size)
 {
-	free_area_init_node(0, NODE_DATA(0), zones_size,
+	free_area_init_node(0, zones_size,
 			__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
 }
 
-- 
cgit v1.2.3


From a1e78772d72b2616ed20e54896e68e0e7044854e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:27:23 -0700
Subject: hugetlb: reserve huge pages for reliable MAP_PRIVATE hugetlbfs
 mappings until fork()

This patch reserves huge pages at mmap() time for MAP_PRIVATE mappings in
a similar manner to the reservations taken for MAP_SHARED mappings.  The
reserve count is accounted both globally and on a per-VMA basis for
private mappings.  This guarantees that a process that successfully calls
mmap() will successfully fault all pages in the future unless fork() is
called.

The characteristics of private mappings of hugetlbfs files behaviour after
this patch are;

1. The process calling mmap() is guaranteed to succeed all future faults until
   it forks().
2. On fork(), the parent may die due to SIGKILL on writes to the private
   mapping if enough pages are not available for the COW. For reasonably
   reliable behaviour in the face of a small huge page pool, children of
   hugepage-aware processes should not reference the mappings; such as
   might occur when fork()ing to exec().
3. On fork(), the child VMAs inherit no reserves. Reads on pages already
   faulted by the parent will succeed. Successful writes will depend on enough
   huge pages being free in the pool.
4. Quotas of the hugetlbfs mount are checked at reserve time for the mapper
   and at fault time otherwise.

Before this patch, all reads or writes in the child potentially needs page
allocations that can later lead to the death of the parent.  This applies
to reads and writes of uninstantiated pages as well as COW.  After the
patch it is only a write to an instantiated page that causes problems.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    |   8 +--
 include/linux/hugetlb.h |   9 ++-
 kernel/fork.c           |   9 +++
 mm/hugetlb.c            | 158 ++++++++++++++++++++++++++++++++++++------------
 4 files changed, 140 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aeabf80f81a5..1576bbecd084 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -103,9 +103,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	ret = -ENOMEM;
 	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 
-	if (vma->vm_flags & VM_MAYSHARE &&
-	    hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
-				  len >> HPAGE_SHIFT))
+	if (hugetlb_reserve_pages(inode,
+				vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
+				len >> HPAGE_SHIFT, vma))
 		goto out;
 
 	ret = 0;
@@ -942,7 +942,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
 		goto out_dentry;
 
 	error = -ENOMEM;
-	if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT))
+	if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT, NULL))
 		goto out_inode;
 
 	d_instantiate(dentry, inode);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a79e80b689d8..185b14c9f021 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -17,6 +17,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 	return vma->vm_flags & VM_HUGETLB;
 }
 
+void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
 int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
@@ -30,7 +31,8 @@ int hugetlb_report_node_meminfo(int, char *);
 unsigned long hugetlb_total_pages(void);
 int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, int write_access);
-int hugetlb_reserve_pages(struct inode *inode, long from, long to);
+int hugetlb_reserve_pages(struct inode *inode, long from, long to,
+						struct vm_area_struct *vma);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 
 extern unsigned long max_huge_pages;
@@ -58,6 +60,11 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
 	return 0;
 }
+
+static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+{
+}
+
 static inline unsigned long hugetlb_total_pages(void)
 {
 	return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index adefc1131f27..552c8d8e77ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,6 +33,7 @@
 #include <linux/cpu.h>
 #include <linux/cgroup.h>
 #include <linux/security.h>
+#include <linux/hugetlb.h>
 #include <linux/swap.h>
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
@@ -306,6 +307,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			spin_unlock(&file->f_mapping->i_mmap_lock);
 		}
 
+		/*
+		 * Clear hugetlb-related page reserves for children. This only
+		 * affects MAP_PRIVATE mappings. Faults generated by the child
+		 * are not guaranteed to succeed, even if read-only
+		 */
+		if (is_vm_hugetlb_page(tmp))
+			reset_vma_resv_huge_pages(tmp);
+
 		/*
 		 * Link in the new vma and copy the page table entries.
 		 */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a4dbba8965f3..0af500db3632 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -40,6 +40,69 @@ static int hugetlb_next_nid;
  */
 static DEFINE_SPINLOCK(hugetlb_lock);
 
+/*
+ * These helpers are used to track how many pages are reserved for
+ * faults in a MAP_PRIVATE mapping. Only the process that called mmap()
+ * is guaranteed to have their future faults succeed.
+ *
+ * With the exception of reset_vma_resv_huge_pages() which is called at fork(),
+ * the reserve counters are updated with the hugetlb_lock held. It is safe
+ * to reset the VMA at fork() time as it is not in use yet and there is no
+ * chance of the global counters getting corrupted as a result of the values.
+ */
+static unsigned long vma_resv_huge_pages(struct vm_area_struct *vma)
+{
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	if (!(vma->vm_flags & VM_SHARED))
+		return (unsigned long)vma->vm_private_data;
+	return 0;
+}
+
+static void set_vma_resv_huge_pages(struct vm_area_struct *vma,
+							unsigned long reserve)
+{
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	VM_BUG_ON(vma->vm_flags & VM_SHARED);
+
+	vma->vm_private_data = (void *)reserve;
+}
+
+/* Decrement the reserved pages in the hugepage pool by one */
+static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_SHARED) {
+		/* Shared mappings always use reserves */
+		resv_huge_pages--;
+	} else {
+		/*
+		 * Only the process that called mmap() has reserves for
+		 * private mappings.
+		 */
+		if (vma_resv_huge_pages(vma)) {
+			resv_huge_pages--;
+			reserve = (unsigned long)vma->vm_private_data - 1;
+			vma->vm_private_data = (void *)reserve;
+		}
+	}
+}
+
+void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+{
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	if (!(vma->vm_flags & VM_SHARED))
+		vma->vm_private_data = (void *)0;
+}
+
+/* Returns true if the VMA has associated reserve pages */
+static int vma_has_private_reserves(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_SHARED)
+		return 0;
+	if (!vma_resv_huge_pages(vma))
+		return 0;
+	return 1;
+}
+
 static void clear_huge_page(struct page *page, unsigned long addr)
 {
 	int i;
@@ -101,6 +164,15 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 	struct zone *zone;
 	struct zoneref *z;
 
+	/*
+	 * A child process with MAP_PRIVATE mappings created by their parent
+	 * have no page reserves. This check ensures that reservations are
+	 * not "stolen". The child may still get SIGKILLed
+	 */
+	if (!vma_has_private_reserves(vma) &&
+			free_huge_pages - resv_huge_pages == 0)
+		return NULL;
+
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
 		nid = zone_to_nid(zone);
@@ -111,8 +183,8 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 			list_del(&page->lru);
 			free_huge_pages--;
 			free_huge_pages_node[nid]--;
-			if (vma && vma->vm_flags & VM_MAYSHARE)
-				resv_huge_pages--;
+			decrement_hugepage_resv_vma(vma);
+
 			break;
 		}
 	}
@@ -461,55 +533,40 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
 	}
 }
 
-
-static struct page *alloc_huge_page_shared(struct vm_area_struct *vma,
-						unsigned long addr)
+static struct page *alloc_huge_page(struct vm_area_struct *vma,
+				    unsigned long addr)
 {
 	struct page *page;
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct inode *inode = mapping->host;
+	unsigned int chg = 0;
+
+	/*
+	 * Processes that did not create the mapping will have no reserves and
+	 * will not have accounted against quota. Check that the quota can be
+	 * made before satisfying the allocation
+	 */
+	if (!vma_has_private_reserves(vma)) {
+		chg = 1;
+		if (hugetlb_get_quota(inode->i_mapping, chg))
+			return ERR_PTR(-ENOSPC);
+	}
 
 	spin_lock(&hugetlb_lock);
 	page = dequeue_huge_page_vma(vma, addr);
 	spin_unlock(&hugetlb_lock);
-	return page ? page : ERR_PTR(-VM_FAULT_OOM);
-}
 
-static struct page *alloc_huge_page_private(struct vm_area_struct *vma,
-						unsigned long addr)
-{
-	struct page *page = NULL;
-
-	if (hugetlb_get_quota(vma->vm_file->f_mapping, 1))
-		return ERR_PTR(-VM_FAULT_SIGBUS);
-
-	spin_lock(&hugetlb_lock);
-	if (free_huge_pages > resv_huge_pages)
-		page = dequeue_huge_page_vma(vma, addr);
-	spin_unlock(&hugetlb_lock);
 	if (!page) {
 		page = alloc_buddy_huge_page(vma, addr);
 		if (!page) {
-			hugetlb_put_quota(vma->vm_file->f_mapping, 1);
+			hugetlb_put_quota(inode->i_mapping, chg);
 			return ERR_PTR(-VM_FAULT_OOM);
 		}
 	}
-	return page;
-}
 
-static struct page *alloc_huge_page(struct vm_area_struct *vma,
-				    unsigned long addr)
-{
-	struct page *page;
-	struct address_space *mapping = vma->vm_file->f_mapping;
-
-	if (vma->vm_flags & VM_MAYSHARE)
-		page = alloc_huge_page_shared(vma, addr);
-	else
-		page = alloc_huge_page_private(vma, addr);
+	set_page_refcounted(page);
+	set_page_private(page, (unsigned long) mapping);
 
-	if (!IS_ERR(page)) {
-		set_page_refcounted(page);
-		set_page_private(page, (unsigned long) mapping);
-	}
 	return page;
 }
 
@@ -757,6 +814,13 @@ out:
 	return ret;
 }
 
+static void hugetlb_vm_op_close(struct vm_area_struct *vma)
+{
+	unsigned long reserve = vma_resv_huge_pages(vma);
+	if (reserve)
+		hugetlb_acct_memory(-reserve);
+}
+
 /*
  * We cannot handle pagefaults against hugetlb pages at all.  They cause
  * handle_mm_fault() to try to instantiate regular-sized pages in the
@@ -771,6 +835,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 struct vm_operations_struct hugetlb_vm_ops = {
 	.fault = hugetlb_vm_op_fault,
+	.close = hugetlb_vm_op_close,
 };
 
 static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
@@ -1289,11 +1354,25 @@ static long region_truncate(struct list_head *head, long end)
 	return chg;
 }
 
-int hugetlb_reserve_pages(struct inode *inode, long from, long to)
+int hugetlb_reserve_pages(struct inode *inode,
+					long from, long to,
+					struct vm_area_struct *vma)
 {
 	long ret, chg;
 
-	chg = region_chg(&inode->i_mapping->private_list, from, to);
+	/*
+	 * Shared mappings base their reservation on the number of pages that
+	 * are already allocated on behalf of the file. Private mappings need
+	 * to reserve the full area even if read-only as mprotect() may be
+	 * called to make the mapping read-write. Assume !vma is a shm mapping
+	 */
+	if (!vma || vma->vm_flags & VM_SHARED)
+		chg = region_chg(&inode->i_mapping->private_list, from, to);
+	else {
+		chg = to - from;
+		set_vma_resv_huge_pages(vma, chg);
+	}
+
 	if (chg < 0)
 		return chg;
 
@@ -1304,7 +1383,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
 		hugetlb_put_quota(inode->i_mapping, chg);
 		return ret;
 	}
-	region_add(&inode->i_mapping->private_list, from, to);
+	if (!vma || vma->vm_flags & VM_SHARED)
+		region_add(&inode->i_mapping->private_list, from, to);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 04f2cbe35699d22dbf428373682ead85ca1240f5 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:27:25 -0700
Subject: hugetlb: guarantee that COW faults for a process that called
 mmap(MAP_PRIVATE) on hugetlbfs will succeed

After patch 2 in this series, a process that successfully calls mmap() for
a MAP_PRIVATE mapping will be guaranteed to successfully fault until a
process calls fork().  At that point, the next write fault from the parent
could fail due to COW if the child still has a reference.

We only reserve pages for the parent but a copy must be made to avoid
leaking data from the parent to the child after fork().  Reserves could be
taken for both parent and child at fork time to guarantee faults but if
the mapping is large it is highly likely we will not have sufficient pages
for the reservation, and it is common to fork only to exec() immediatly
after.  A failure here would be very undesirable.

Note that the current behaviour of mainline with MAP_PRIVATE pages is
pretty bad.  The following situation is allowed to occur today.

1. Process calls mmap(MAP_PRIVATE)
2. Process calls mlock() to fault all pages and makes sure it succeeds
3. Process forks()
4. Process writes to MAP_PRIVATE mapping while child still exists
5. If the COW fails at this point, the process gets SIGKILLed even though it
   had taken care to ensure the pages existed

This patch improves the situation by guaranteeing the reliability of the
process that successfully calls mmap().  When the parent performs COW, it
will try to satisfy the allocation without using reserves.  If that fails
the parent will steal the page leaving any children without a page.
Faults from the child after that point will result in failure.  If the
child COW happens first, an attempt will be made to allocate the page
without reserves and the child will get SIGKILLed on failure.

To summarise the new behaviour:

1. If the original mapper performs COW on a private mapping with multiple
   references, it will attempt to allocate a hugepage from the pool or
   the buddy allocator without using the existing reserves. On fail, VMAs
   mapping the same area are traversed and the page being COW'd is unmapped
   where found. It will then steal the original page as the last mapper in
   the normal way.

2. The VMAs the pages were unmapped from are flagged to note that pages
   with data no longer exist. Future no-page faults on those VMAs will
   terminate the process as otherwise it would appear that data was corrupted.
   A warning is printed to the console that this situation occured.

2. If the child performs COW first, it will attempt to satisfy the COW
   from the pool if there are enough pages or via the buddy allocator if
   overcommit is allowed and the buddy allocator can satisfy the request. If
   it fails, the child will be killed.

If the pool is large enough, existing applications will not notice that
the reserves were a factor.  Existing applications depending on the
no-reserves been set are unlikely to exist as for much of the history of
hugetlbfs, pages were prefaulted at mmap(), allocating the pages at that
point or failing the mmap().

[npiggin@suse.de: fix CONFIG_HUGETLB=n build]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    |   2 +-
 include/linux/hugetlb.h |   8 +-
 mm/hugetlb.c            | 201 +++++++++++++++++++++++++++++++++++++++++++-----
 mm/memory.c             |   2 +-
 4 files changed, 190 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1576bbecd084..428eff5b73f3 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -441,7 +441,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
 			v_offset = 0;
 
 		__unmap_hugepage_range(vma,
-				vma->vm_start + v_offset, vma->vm_end);
+				vma->vm_start + v_offset, vma->vm_end, NULL);
 	}
 }
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 185b14c9f021..abbc187193a1 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -23,8 +23,10 @@ int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __us
 int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int);
-void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
-void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
+void unmap_hugepage_range(struct vm_area_struct *,
+			unsigned long, unsigned long, struct page *);
+void __unmap_hugepage_range(struct vm_area_struct *,
+			unsigned long, unsigned long, struct page *);
 int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
 int hugetlb_report_meminfo(char *);
 int hugetlb_report_node_meminfo(int, char *);
@@ -74,7 +76,7 @@ static inline unsigned long hugetlb_total_pages(void)
 #define follow_huge_addr(mm, addr, write)	ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
 #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; })
-#define unmap_hugepage_range(vma, start, end)	BUG()
+#define unmap_hugepage_range(vma, start, end, page)	BUG()
 #define hugetlb_report_meminfo(buf)		0
 #define hugetlb_report_node_meminfo(n, buf)	0
 #define follow_huge_pmd(mm, addr, pmd, write)	NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0af500db3632..a2d29b84501f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -40,6 +40,9 @@ static int hugetlb_next_nid;
  */
 static DEFINE_SPINLOCK(hugetlb_lock);
 
+#define HPAGE_RESV_OWNER    (1UL << (BITS_PER_LONG - 1))
+#define HPAGE_RESV_UNMAPPED (1UL << (BITS_PER_LONG - 2))
+#define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED)
 /*
  * These helpers are used to track how many pages are reserved for
  * faults in a MAP_PRIVATE mapping. Only the process that called mmap()
@@ -54,17 +57,32 @@ static unsigned long vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
 	if (!(vma->vm_flags & VM_SHARED))
-		return (unsigned long)vma->vm_private_data;
+		return (unsigned long)vma->vm_private_data & ~HPAGE_RESV_MASK;
 	return 0;
 }
 
 static void set_vma_resv_huge_pages(struct vm_area_struct *vma,
 							unsigned long reserve)
 {
+	unsigned long flags;
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
 	VM_BUG_ON(vma->vm_flags & VM_SHARED);
 
-	vma->vm_private_data = (void *)reserve;
+	flags = (unsigned long)vma->vm_private_data & HPAGE_RESV_MASK;
+	vma->vm_private_data = (void *)(reserve | flags);
+}
+
+static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
+{
+	unsigned long reserveflags = (unsigned long)vma->vm_private_data;
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	vma->vm_private_data = (void *)(reserveflags | flags);
+}
+
+static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
+{
+	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	return ((unsigned long)vma->vm_private_data & flag) != 0;
 }
 
 /* Decrement the reserved pages in the hugepage pool by one */
@@ -78,14 +96,18 @@ static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
 		 * Only the process that called mmap() has reserves for
 		 * private mappings.
 		 */
-		if (vma_resv_huge_pages(vma)) {
+		if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+			unsigned long flags, reserve;
 			resv_huge_pages--;
+			flags = (unsigned long)vma->vm_private_data &
+							HPAGE_RESV_MASK;
 			reserve = (unsigned long)vma->vm_private_data - 1;
-			vma->vm_private_data = (void *)reserve;
+			vma->vm_private_data = (void *)(reserve | flags);
 		}
 	}
 }
 
+/* Reset counters to 0 and clear all HPAGE_RESV_* flags */
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
@@ -153,7 +175,7 @@ static struct page *dequeue_huge_page(void)
 }
 
 static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
-				unsigned long address)
+				unsigned long address, int avoid_reserve)
 {
 	int nid;
 	struct page *page = NULL;
@@ -173,6 +195,10 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 			free_huge_pages - resv_huge_pages == 0)
 		return NULL;
 
+	/* If reserves cannot be used, ensure enough pages are in the pool */
+	if (avoid_reserve && free_huge_pages - resv_huge_pages == 0)
+		return NULL;
+
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
 		nid = zone_to_nid(zone);
@@ -183,7 +209,9 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 			list_del(&page->lru);
 			free_huge_pages--;
 			free_huge_pages_node[nid]--;
-			decrement_hugepage_resv_vma(vma);
+
+			if (!avoid_reserve)
+				decrement_hugepage_resv_vma(vma);
 
 			break;
 		}
@@ -534,7 +562,7 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
 }
 
 static struct page *alloc_huge_page(struct vm_area_struct *vma,
-				    unsigned long addr)
+				    unsigned long addr, int avoid_reserve)
 {
 	struct page *page;
 	struct address_space *mapping = vma->vm_file->f_mapping;
@@ -546,14 +574,15 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	 * will not have accounted against quota. Check that the quota can be
 	 * made before satisfying the allocation
 	 */
-	if (!vma_has_private_reserves(vma)) {
+	if (!(vma->vm_flags & VM_SHARED) &&
+			!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
 		chg = 1;
 		if (hugetlb_get_quota(inode->i_mapping, chg))
 			return ERR_PTR(-ENOSPC);
 	}
 
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page_vma(vma, addr);
+	page = dequeue_huge_page_vma(vma, addr, avoid_reserve);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page) {
@@ -909,7 +938,7 @@ nomem:
 }
 
 void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
-			    unsigned long end)
+			    unsigned long end, struct page *ref_page)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -937,6 +966,27 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 		if (huge_pmd_unshare(mm, &address, ptep))
 			continue;
 
+		/*
+		 * If a reference page is supplied, it is because a specific
+		 * page is being unmapped, not a range. Ensure the page we
+		 * are about to unmap is the actual page of interest.
+		 */
+		if (ref_page) {
+			pte = huge_ptep_get(ptep);
+			if (huge_pte_none(pte))
+				continue;
+			page = pte_page(pte);
+			if (page != ref_page)
+				continue;
+
+			/*
+			 * Mark the VMA as having unmapped its page so that
+			 * future faults in this VMA will fail rather than
+			 * looking like data was lost
+			 */
+			set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED);
+		}
+
 		pte = huge_ptep_get_and_clear(mm, address, ptep);
 		if (huge_pte_none(pte))
 			continue;
@@ -955,7 +1005,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 }
 
 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
-			  unsigned long end)
+			  unsigned long end, struct page *ref_page)
 {
 	/*
 	 * It is undesirable to test vma->vm_file as it should be non-null
@@ -967,19 +1017,68 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	 */
 	if (vma->vm_file) {
 		spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
-		__unmap_hugepage_range(vma, start, end);
+		__unmap_hugepage_range(vma, start, end, ref_page);
 		spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
 	}
 }
 
+/*
+ * This is called when the original mapper is failing to COW a MAP_PRIVATE
+ * mappping it owns the reserve page for. The intention is to unmap the page
+ * from other VMAs and let the children be SIGKILLed if they are faulting the
+ * same region.
+ */
+int unmap_ref_private(struct mm_struct *mm,
+					struct vm_area_struct *vma,
+					struct page *page,
+					unsigned long address)
+{
+	struct vm_area_struct *iter_vma;
+	struct address_space *mapping;
+	struct prio_tree_iter iter;
+	pgoff_t pgoff;
+
+	/*
+	 * vm_pgoff is in PAGE_SIZE units, hence the different calculation
+	 * from page cache lookup which is in HPAGE_SIZE units.
+	 */
+	address = address & huge_page_mask(hstate_vma(vma));
+	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
+		+ (vma->vm_pgoff >> PAGE_SHIFT);
+	mapping = (struct address_space *)page_private(page);
+
+	vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+		/* Do not unmap the current VMA */
+		if (iter_vma == vma)
+			continue;
+
+		/*
+		 * Unmap the page from other VMAs without their own reserves.
+		 * They get marked to be SIGKILLed if they fault in these
+		 * areas. This is because a future no-page fault on this VMA
+		 * could insert a zeroed page instead of the data existing
+		 * from the time of fork. This would look like data corruption
+		 */
+		if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
+			unmap_hugepage_range(iter_vma,
+				address, address + HPAGE_SIZE,
+				page);
+	}
+
+	return 1;
+}
+
 static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, pte_t *ptep, pte_t pte)
+			unsigned long address, pte_t *ptep, pte_t pte,
+			struct page *pagecache_page)
 {
 	struct page *old_page, *new_page;
 	int avoidcopy;
+	int outside_reserve = 0;
 
 	old_page = pte_page(pte);
 
+retry_avoidcopy:
 	/* If no-one else is actually using this page, avoid the copy
 	 * and just make the page writable */
 	avoidcopy = (page_count(old_page) == 1);
@@ -988,11 +1087,43 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 		return 0;
 	}
 
+	/*
+	 * If the process that created a MAP_PRIVATE mapping is about to
+	 * perform a COW due to a shared page count, attempt to satisfy
+	 * the allocation without using the existing reserves. The pagecache
+	 * page is used to determine if the reserve at this address was
+	 * consumed or not. If reserves were used, a partial faulted mapping
+	 * at the time of fork() could consume its reserves on COW instead
+	 * of the full address range.
+	 */
+	if (!(vma->vm_flags & VM_SHARED) &&
+			is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
+			old_page != pagecache_page)
+		outside_reserve = 1;
+
 	page_cache_get(old_page);
-	new_page = alloc_huge_page(vma, address);
+	new_page = alloc_huge_page(vma, address, outside_reserve);
 
 	if (IS_ERR(new_page)) {
 		page_cache_release(old_page);
+
+		/*
+		 * If a process owning a MAP_PRIVATE mapping fails to COW,
+		 * it is due to references held by a child and an insufficient
+		 * huge page pool. To guarantee the original mappers
+		 * reliability, unmap the page from child processes. The child
+		 * may get SIGKILLed if it later faults.
+		 */
+		if (outside_reserve) {
+			BUG_ON(huge_pte_none(pte));
+			if (unmap_ref_private(mm, vma, old_page, address)) {
+				BUG_ON(page_count(old_page) != 1);
+				BUG_ON(huge_pte_none(pte));
+				goto retry_avoidcopy;
+			}
+			WARN_ON_ONCE(1);
+		}
+
 		return -PTR_ERR(new_page);
 	}
 
@@ -1015,6 +1146,20 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 	return 0;
 }
 
+/* Return the pagecache page at a given address within a VMA */
+static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma,
+			unsigned long address)
+{
+	struct address_space *mapping;
+	unsigned long idx;
+
+	mapping = vma->vm_file->f_mapping;
+	idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
+		+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+
+	return find_lock_page(mapping, idx);
+}
+
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, int write_access)
 {
@@ -1025,6 +1170,18 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct address_space *mapping;
 	pte_t new_pte;
 
+	/*
+	 * Currently, we are forced to kill the process in the event the
+	 * original mapper has unmapped pages from the child due to a failed
+	 * COW. Warn that such a situation has occured as it may not be obvious
+	 */
+	if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
+		printk(KERN_WARNING
+			"PID %d killed due to inadequate hugepage pool\n",
+			current->pid);
+		return ret;
+	}
+
 	mapping = vma->vm_file->f_mapping;
 	idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
 		+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
@@ -1039,7 +1196,7 @@ retry:
 		size = i_size_read(mapping->host) >> HPAGE_SHIFT;
 		if (idx >= size)
 			goto out;
-		page = alloc_huge_page(vma, address);
+		page = alloc_huge_page(vma, address, 0);
 		if (IS_ERR(page)) {
 			ret = -PTR_ERR(page);
 			goto out;
@@ -1081,7 +1238,7 @@ retry:
 
 	if (write_access && !(vma->vm_flags & VM_SHARED)) {
 		/* Optimization, do the COW without a second fault */
-		ret = hugetlb_cow(mm, vma, address, ptep, new_pte);
+		ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page);
 	}
 
 	spin_unlock(&mm->page_table_lock);
@@ -1126,8 +1283,15 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	spin_lock(&mm->page_table_lock);
 	/* Check for a racing update before calling hugetlb_cow */
 	if (likely(pte_same(entry, huge_ptep_get(ptep))))
-		if (write_access && !pte_write(entry))
-			ret = hugetlb_cow(mm, vma, address, ptep, entry);
+		if (write_access && !pte_write(entry)) {
+			struct page *page;
+			page = hugetlbfs_pagecache_page(vma, address);
+			ret = hugetlb_cow(mm, vma, address, ptep, entry, page);
+			if (page) {
+				unlock_page(page);
+				put_page(page);
+			}
+		}
 	spin_unlock(&mm->page_table_lock);
 	mutex_unlock(&hugetlb_instantiation_mutex);
 
@@ -1371,6 +1535,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	else {
 		chg = to - from;
 		set_vma_resv_huge_pages(vma, chg);
+		set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
 	}
 
 	if (chg < 0)
diff --git a/mm/memory.c b/mm/memory.c
index 82f3f1c5cf17..72932489a082 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -901,7 +901,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			}
 
 			if (unlikely(is_vm_hugetlb_page(vma))) {
-				unmap_hugepage_range(vma, start, end);
+				unmap_hugepage_range(vma, start, end, NULL);
 				zap_work -= (end - start) /
 						(HPAGE_SIZE / PAGE_SIZE);
 				start = end;
-- 
cgit v1.2.3


From cdfd4325c0d878679bd6a3ba8285b71d9980e3c0 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 23 Jul 2008 21:27:28 -0700
Subject: mm: record MAP_NORESERVE status on vmas and fix small page mprotect
 reservations

With Mel's hugetlb private reservation support patches applied, strict
overcommit semantics are applied to both shared and private huge page
mappings.  This can be a problem if an application relied on unlimited
overcommit semantics for private mappings.  An example of this would be an
application which maps a huge area with the intention of using it very
sparsely.  These application would benefit from being able to opt-out of
the strict overcommit.  It should be noted that prior to hugetlb
supporting demand faulting all mappings were fully populated and so
applications of this type should be rare.

This patch stack implements the MAP_NORESERVE mmap() flag for huge page
mappings.  This flag has the same meaning as for small page mappings,
suppressing reservations for that mapping.

Thanks to Mel Gorman for reviewing a number of early versions of these
patches.

This patch:

When a small page mapping is created with mmap() reservations are created
by default for any memory pages required.  When the region is read/write
the reservation is increased for every page, no reservation is needed for
read-only regions (as they implicitly share the zero page).  Reservations
are tracked via the VM_ACCOUNT vma flag which is present when the region
has reservation backing it.  When we convert a region from read-only to
read-write new reservations are aquired and VM_ACCOUNT is set.  However,
when a read-only map is created with MAP_NORESERVE it is indistinguishable
from a normal mapping.  When we then convert that to read/write we are
forced to incorrectly create reservations for it as we have no record of
the original MAP_NORESERVE.

This patch introduces a new vma flag VM_NORESERVE which records the
presence of the original MAP_NORESERVE flag.  This allows us to
distinguish these two circumstances and correctly account the reserve.

As well as fixing this FIXME in the code, this makes it much easier to
introduce MAP_NORESERVE support for huge pages as this flag is available
consistantly for the life of the mapping.  VM_ACCOUNT on the other hand is
heavily used at the generic level in association with small pages.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Johannes Weiner <hannes@saeurebad.de>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 mm/mmap.c          | 3 +++
 mm/mprotect.c      | 6 ++----
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 196924b657bc..df322fb4df31 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -100,6 +100,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
 #define VM_RESERVED	0x00080000	/* Count as reserved_vm like IO */
 #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
+#define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
diff --git a/mm/mmap.c b/mm/mmap.c
index 75e0d0673d78..57d3b6097deb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1110,6 +1110,9 @@ munmap_back:
 	if (!may_expand_vm(mm, len >> PAGE_SHIFT))
 		return -ENOMEM;
 
+	if (flags & MAP_NORESERVE)
+		vm_flags |= VM_NORESERVE;
+
 	if (accountable && (!(flags & MAP_NORESERVE) ||
 			    sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
 		if (vm_flags & VM_SHARED) {
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 360d9cc8b38c..abd645a3b0a0 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -153,12 +153,10 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we
 	 * make it unwritable again.
-	 *
-	 * FIXME? We haven't defined a VM_NORESERVE flag, so mprotecting
-	 * a MAP_NORESERVE private mapping to writable will now reserve.
 	 */
 	if (newflags & VM_WRITE) {
-		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
+		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|
+						VM_SHARED|VM_NORESERVE))) {
 			charged = nrpages;
 			if (security_vm_enough_memory(charged))
 				return -ENOMEM;
-- 
cgit v1.2.3


From ff7ea79cf7c3a481851bd4b2185fdeb6ce4afa29 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 23 Jul 2008 21:27:39 -0700
Subject: mm: create /sys/kernel/mm

Add a kobject to create /sys/kernel/mm when sysfs is mounted.  The kobject
will exist regardless.  This will allow for the hugepage related sysfs
directories to exist under the mm "subsystem" directory.  Add an ABI file
appropriately.

[kosaki.motohiro@jp.fujitsu.com: fix build]
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-kernel-mm |  6 ++++++
 include/linux/kobject.h                   |  2 ++
 mm/mm_init.c                              | 16 ++++++++++++++++
 3 files changed, 24 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm b/Documentation/ABI/testing/sysfs-kernel-mm
new file mode 100644
index 000000000000..190d523ac159
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm
@@ -0,0 +1,6 @@
+What:		/sys/kernel/mm
+Date:		July 2008
+Contact:	Nishanth Aravamudan <nacc@us.ibm.com>, VM maintainers
+Description:
+		/sys/kernel/mm/ should contain any and all VM
+		related information in /sys/kernel/.
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 60f0d418ae32..5437ac0276e2 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -186,6 +186,8 @@ extern struct kobject *kset_find_obj(struct kset *, const char *);
 
 /* The global /sys/kernel/ kobject for people to chain off of */
 extern struct kobject *kernel_kobj;
+/* The global /sys/kernel/mm/ kobject for people to chain off of */
+extern struct kobject *mm_kobj;
 /* The global /sys/hypervisor/ kobject for people to chain off of */
 extern struct kobject *hypervisor_kobj;
 /* The global /sys/power/ kobject for people to chain off of */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index eaf0d3b47099..c6af41ea9994 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -7,6 +7,8 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
 #include "internal.h"
 
 #ifdef CONFIG_DEBUG_MEMORY_INIT
@@ -134,3 +136,17 @@ static __init int set_mminit_loglevel(char *str)
 }
 early_param("mminit_loglevel", set_mminit_loglevel);
 #endif /* CONFIG_DEBUG_MEMORY_INIT */
+
+struct kobject *mm_kobj;
+EXPORT_SYMBOL_GPL(mm_kobj);
+
+static int __init mm_sysfs_init(void)
+{
+	mm_kobj = kobject_create_and_add("mm", kernel_kobj);
+	if (!mm_kobj)
+		return -ENOMEM;
+
+	return 0;
+}
+
+__initcall(mm_sysfs_init);
-- 
cgit v1.2.3


From a5516438959d90b071ff0a484ce4f3f523dc3152 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:41 -0700
Subject: hugetlb: modular state for hugetlb page size

The goal of this patchset is to support multiple hugetlb page sizes.  This
is achieved by introducing a new struct hstate structure, which
encapsulates the important hugetlb state and constants (eg.  huge page
size, number of huge pages currently allocated, etc).

The hstate structure is then passed around the code which requires these
fields, they will do the right thing regardless of the exact hstate they
are operating on.

This patch adds the hstate structure, with a single global instance of it
(default_hstate), and does the basic work of converting hugetlb to use the
hstate.

Future patches will add more hstate structures to allow for different
hugetlbfs mounts to have different page sizes.

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/hugetlbpage.c    |   7 +-
 arch/powerpc/mm/hugetlbpage.c |   3 +-
 arch/s390/mm/hugetlbpage.c    |   3 +-
 arch/sh/mm/hugetlbpage.c      |   3 +-
 arch/sparc64/mm/hugetlbpage.c |   5 +-
 arch/x86/mm/hugetlbpage.c     |   5 +-
 fs/hugetlbfs/inode.c          |  52 +++---
 include/asm-ia64/hugetlb.h    |   3 +-
 include/asm-powerpc/hugetlb.h |   3 +-
 include/asm-s390/hugetlb.h    |   3 +-
 include/asm-sh/hugetlb.h      |   3 +-
 include/asm-sparc/hugetlb.h   |   3 +-
 include/asm-x86/hugetlb.h     |   8 +-
 include/linux/hugetlb.h       |  88 +++++++++-
 ipc/shm.c                     |   3 +-
 mm/hugetlb.c                  | 368 +++++++++++++++++++++++-------------------
 mm/memory.c                   |   2 +-
 mm/mempolicy.c                |   9 +-
 mm/mmap.c                     |   3 +-
 19 files changed, 356 insertions(+), 218 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index cd49e2860eef..6170f097d255 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -24,7 +24,7 @@
 unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
 
 pte_t *
-huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
+huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {
 	unsigned long taddr = htlbpage_to_page(addr);
 	pgd_t *pgd;
@@ -75,7 +75,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  * Don't actually need to do any preparation, but need to make sure
  * the address is in the right region.
  */
-int prepare_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
@@ -149,7 +150,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
 
 	/* Handle MAP_FIXED */
 	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len))
+		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
 		return addr;
 	}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a96cc891cf5..c94dc71af989 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -128,7 +128,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	return NULL;
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pg;
 	pud_t *pu;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f4b6124fdb75..9162dc84f77f 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -72,7 +72,8 @@ void arch_release_hugepage(struct page *page)
 	page[1].index = 0;
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgdp;
 	pud_t *pudp;
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index ae8c321d6e2a..2f9dbe0ef4ac 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -22,7 +22,8 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index ebefd2a14375..1307b23f6a76 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -175,7 +175,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len))
+		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
 		return addr;
 	}
@@ -195,7 +195,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 				pgoff, flags);
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 0b3d567e686d..52476fde8996 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 	return 1;
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -368,7 +369,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len))
+		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
 		return addr;
 	}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 428eff5b73f3..516c581b5371 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -80,6 +80,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	struct inode *inode = file->f_path.dentry->d_inode;
 	loff_t len, vma_len;
 	int ret;
+	struct hstate *h = hstate_file(file);
 
 	/*
 	 * vma address alignment (but not the pgoff alignment) has
@@ -92,7 +93,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
 	vma->vm_ops = &hugetlb_vm_ops;
 
-	if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT))
+	if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
 		return -EINVAL;
 
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
@@ -104,8 +105,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 
 	if (hugetlb_reserve_pages(inode,
-				vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
-				len >> HPAGE_SHIFT, vma))
+				vma->vm_pgoff >> huge_page_order(h),
+				len >> huge_page_shift(h), vma))
 		goto out;
 
 	ret = 0;
@@ -130,20 +131,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long start_addr;
+	struct hstate *h = hstate_file(file);
 
-	if (len & ~HPAGE_MASK)
+	if (len & ~huge_page_mask(h))
 		return -EINVAL;
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len))
+		if (prepare_hugepage_range(file, addr, len))
 			return -EINVAL;
 		return addr;
 	}
 
 	if (addr) {
-		addr = ALIGN(addr, HPAGE_SIZE);
+		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
 		    (!vma || addr + len <= vma->vm_start))
@@ -156,7 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		start_addr = TASK_UNMAPPED_BASE;
 
 full_search:
-	addr = ALIGN(start_addr, HPAGE_SIZE);
+	addr = ALIGN(start_addr, huge_page_size(h));
 
 	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
 		/* At this point:  (!vma || addr < vma->vm_end). */
@@ -174,7 +176,7 @@ full_search:
 
 		if (!vma || addr + len <= vma->vm_start)
 			return addr;
-		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+		addr = ALIGN(vma->vm_end, huge_page_size(h));
 	}
 }
 #endif
@@ -225,10 +227,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
 static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 			      size_t len, loff_t *ppos)
 {
+	struct hstate *h = hstate_file(filp);
 	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = mapping->host;
-	unsigned long index = *ppos >> HPAGE_SHIFT;
-	unsigned long offset = *ppos & ~HPAGE_MASK;
+	unsigned long index = *ppos >> huge_page_shift(h);
+	unsigned long offset = *ppos & ~huge_page_mask(h);
 	unsigned long end_index;
 	loff_t isize;
 	ssize_t retval = 0;
@@ -243,17 +246,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 	if (!isize)
 		goto out;
 
-	end_index = (isize - 1) >> HPAGE_SHIFT;
+	end_index = (isize - 1) >> huge_page_shift(h);
 	for (;;) {
 		struct page *page;
-		int nr, ret;
+		unsigned long nr, ret;
 
 		/* nr is the maximum number of bytes to copy from this page */
-		nr = HPAGE_SIZE;
+		nr = huge_page_size(h);
 		if (index >= end_index) {
 			if (index > end_index)
 				goto out;
-			nr = ((isize - 1) & ~HPAGE_MASK) + 1;
+			nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
 			if (nr <= offset) {
 				goto out;
 			}
@@ -287,8 +290,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 		offset += ret;
 		retval += ret;
 		len -= ret;
-		index += offset >> HPAGE_SHIFT;
-		offset &= ~HPAGE_MASK;
+		index += offset >> huge_page_shift(h);
+		offset &= ~huge_page_mask(h);
 
 		if (page)
 			page_cache_release(page);
@@ -298,7 +301,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 			break;
 	}
 out:
-	*ppos = ((loff_t)index << HPAGE_SHIFT) + offset;
+	*ppos = ((loff_t)index << huge_page_shift(h)) + offset;
 	mutex_unlock(&inode->i_mutex);
 	return retval;
 }
@@ -339,8 +342,9 @@ static void truncate_huge_page(struct page *page)
 
 static void truncate_hugepages(struct inode *inode, loff_t lstart)
 {
+	struct hstate *h = hstate_inode(inode);
 	struct address_space *mapping = &inode->i_data;
-	const pgoff_t start = lstart >> HPAGE_SHIFT;
+	const pgoff_t start = lstart >> huge_page_shift(h);
 	struct pagevec pvec;
 	pgoff_t next;
 	int i, freed = 0;
@@ -449,8 +453,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 {
 	pgoff_t pgoff;
 	struct address_space *mapping = inode->i_mapping;
+	struct hstate *h = hstate_inode(inode);
 
-	BUG_ON(offset & ~HPAGE_MASK);
+	BUG_ON(offset & ~huge_page_mask(h));
 	pgoff = offset >> PAGE_SHIFT;
 
 	i_size_write(inode, offset);
@@ -465,6 +470,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
+	struct hstate *h = hstate_inode(inode);
 	int error;
 	unsigned int ia_valid = attr->ia_valid;
 
@@ -476,7 +482,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	if (ia_valid & ATTR_SIZE) {
 		error = -EINVAL;
-		if (!(attr->ia_size & ~HPAGE_MASK))
+		if (!(attr->ia_size & ~huge_page_mask(h)))
 			error = hugetlb_vmtruncate(inode, attr->ia_size);
 		if (error)
 			goto out;
@@ -610,9 +616,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
+	struct hstate *h = hstate_inode(dentry->d_inode);
 
 	buf->f_type = HUGETLBFS_MAGIC;
-	buf->f_bsize = HPAGE_SIZE;
+	buf->f_bsize = huge_page_size(h);
 	if (sbinfo) {
 		spin_lock(&sbinfo->stat_lock);
 		/* If no limits set, just report 0 for max/free/used
@@ -942,7 +949,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
 		goto out_dentry;
 
 	error = -ENOMEM;
-	if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT, NULL))
+	if (hugetlb_reserve_pages(inode, 0,
+			size >> huge_page_shift(hstate_inode(inode)), NULL))
 		goto out_inode;
 
 	d_instantiate(dentry, inode);
diff --git a/include/asm-ia64/hugetlb.h b/include/asm-ia64/hugetlb.h
index e9d1e5e2382d..da55c63728e0 100644
--- a/include/asm-ia64/hugetlb.h
+++ b/include/asm-ia64/hugetlb.h
@@ -8,7 +8,8 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 
-int prepare_hugepage_range(unsigned long addr, unsigned long len);
+int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len);
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h
index 0a37aa5ecaa5..ca37c4af27b1 100644
--- a/include/asm-powerpc/hugetlb.h
+++ b/include/asm-powerpc/hugetlb.h
@@ -21,7 +21,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
diff --git a/include/asm-s390/hugetlb.h b/include/asm-s390/hugetlb.h
index 600a776f8f75..670a1d1745d2 100644
--- a/include/asm-s390/hugetlb.h
+++ b/include/asm-s390/hugetlb.h
@@ -22,7 +22,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
diff --git a/include/asm-sh/hugetlb.h b/include/asm-sh/hugetlb.h
index fb30018938c7..967068fb79ac 100644
--- a/include/asm-sh/hugetlb.h
+++ b/include/asm-sh/hugetlb.h
@@ -14,7 +14,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
diff --git a/include/asm-sparc/hugetlb.h b/include/asm-sparc/hugetlb.h
index aeb92374ca3d..177061064ee6 100644
--- a/include/asm-sparc/hugetlb.h
+++ b/include/asm-sparc/hugetlb.h
@@ -22,7 +22,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 7eed6e0883bf..439a9acc132d 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h
@@ -14,11 +14,13 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
-	if (len & ~HPAGE_MASK)
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
 		return -EINVAL;
-	if (addr & ~HPAGE_MASK)
+	if (addr & ~huge_page_mask(h))
 		return -EINVAL;
 	return 0;
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index abbc187193a1..ad2271e11f9b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -8,7 +8,6 @@
 #include <linux/mempolicy.h>
 #include <linux/shm.h>
 #include <asm/tlbflush.h>
-#include <asm/hugetlb.h>
 
 struct ctl_table;
 
@@ -45,7 +44,8 @@ extern int sysctl_hugetlb_shm_group;
 
 /* arch callbacks */
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr);
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz);
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
@@ -80,7 +80,7 @@ static inline unsigned long hugetlb_total_pages(void)
 #define hugetlb_report_meminfo(buf)		0
 #define hugetlb_report_node_meminfo(n, buf)	0
 #define follow_huge_pmd(mm, addr, pmd, write)	NULL
-#define prepare_hugepage_range(addr,len)	(-EINVAL)
+#define prepare_hugepage_range(file, addr, len)	(-EINVAL)
 #define pmd_huge(x)	0
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
@@ -134,8 +134,6 @@ struct file *hugetlb_file_setup(const char *name, size_t);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
 void hugetlb_put_quota(struct address_space *mapping, long delta);
 
-#define BLOCKS_PER_HUGEPAGE	(HPAGE_SIZE / 512)
-
 static inline int is_file_hugepages(struct file *file)
 {
 	if (file->f_op == &hugetlbfs_file_operations)
@@ -164,4 +162,84 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 					unsigned long flags);
 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
 
+#ifdef CONFIG_HUGETLB_PAGE
+
+/* Defines one hugetlb page size */
+struct hstate {
+	int hugetlb_next_nid;
+	unsigned int order;
+	unsigned long mask;
+	unsigned long max_huge_pages;
+	unsigned long nr_huge_pages;
+	unsigned long free_huge_pages;
+	unsigned long resv_huge_pages;
+	unsigned long surplus_huge_pages;
+	unsigned long nr_overcommit_huge_pages;
+	struct list_head hugepage_freelists[MAX_NUMNODES];
+	unsigned int nr_huge_pages_node[MAX_NUMNODES];
+	unsigned int free_huge_pages_node[MAX_NUMNODES];
+	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
+};
+
+extern struct hstate default_hstate;
+
+static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
+{
+	return &default_hstate;
+}
+
+static inline struct hstate *hstate_file(struct file *f)
+{
+	return &default_hstate;
+}
+
+static inline struct hstate *hstate_inode(struct inode *i)
+{
+	return &default_hstate;
+}
+
+static inline unsigned long huge_page_size(struct hstate *h)
+{
+	return (unsigned long)PAGE_SIZE << h->order;
+}
+
+static inline unsigned long huge_page_mask(struct hstate *h)
+{
+	return h->mask;
+}
+
+static inline unsigned int huge_page_order(struct hstate *h)
+{
+	return h->order;
+}
+
+static inline unsigned huge_page_shift(struct hstate *h)
+{
+	return h->order + PAGE_SHIFT;
+}
+
+static inline unsigned int pages_per_huge_page(struct hstate *h)
+{
+	return 1 << h->order;
+}
+
+static inline unsigned int blocks_per_huge_page(struct hstate *h)
+{
+	return huge_page_size(h) / 512;
+}
+
+#include <asm/hugetlb.h>
+
+#else
+struct hstate {};
+#define hstate_file(f) NULL
+#define hstate_vma(v) NULL
+#define hstate_inode(i) NULL
+#define huge_page_size(h) PAGE_SIZE
+#define huge_page_mask(h) PAGE_MASK
+#define huge_page_order(h) 0
+#define huge_page_shift(h) PAGE_SHIFT
+#define pages_per_huge_page(h) 1
+#endif
+
 #endif /* _LINUX_HUGETLB_H */
diff --git a/ipc/shm.c b/ipc/shm.c
index 790240cd067f..a726aebce7d7 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -577,7 +577,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 
 		if (is_file_hugepages(shp->shm_file)) {
 			struct address_space *mapping = inode->i_mapping;
-			*rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages;
+			struct hstate *h = hstate_file(shp->shm_file);
+			*rss += pages_per_huge_page(h) * mapping->nrpages;
 		} else {
 			struct shmem_inode_info *info = SHMEM_I(inode);
 			spin_lock(&info->lock);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 32dff4290c66..0d8153e25f09 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,18 +22,12 @@
 #include "internal.h"
 
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
-static unsigned long surplus_huge_pages;
-static unsigned long nr_overcommit_huge_pages;
 unsigned long max_huge_pages;
 unsigned long sysctl_overcommit_huge_pages;
-static struct list_head hugepage_freelists[MAX_NUMNODES];
-static unsigned int nr_huge_pages_node[MAX_NUMNODES];
-static unsigned int free_huge_pages_node[MAX_NUMNODES];
-static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
-static int hugetlb_next_nid;
+
+struct hstate default_hstate;
 
 /*
  * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
@@ -203,11 +197,11 @@ static long region_count(struct list_head *head, long f, long t)
  * Convert the address within this vma to the page offset within
  * the mapping, in pagecache page units; huge pages here.
  */
-static pgoff_t vma_hugecache_offset(struct vm_area_struct *vma,
-					unsigned long address)
+static pgoff_t vma_hugecache_offset(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
 {
-	return ((address - vma->vm_start) >> HPAGE_SHIFT) +
-			(vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+	return ((address - vma->vm_start) >> huge_page_shift(h)) +
+			(vma->vm_pgoff >> huge_page_order(h));
 }
 
 /*
@@ -309,20 +303,21 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 }
 
 /* Decrement the reserved pages in the hugepage pool by one */
-static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
+static void decrement_hugepage_resv_vma(struct hstate *h,
+			struct vm_area_struct *vma)
 {
 	if (vma->vm_flags & VM_NORESERVE)
 		return;
 
 	if (vma->vm_flags & VM_SHARED) {
 		/* Shared mappings always use reserves */
-		resv_huge_pages--;
+		h->resv_huge_pages--;
 	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
 		/*
 		 * Only the process that called mmap() has reserves for
 		 * private mappings.
 		 */
-		resv_huge_pages--;
+		h->resv_huge_pages--;
 	}
 }
 
@@ -344,12 +339,13 @@ static int vma_has_private_reserves(struct vm_area_struct *vma)
 	return 1;
 }
 
-static void clear_huge_page(struct page *page, unsigned long addr)
+static void clear_huge_page(struct page *page,
+			unsigned long addr, unsigned long sz)
 {
 	int i;
 
 	might_sleep();
-	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) {
+	for (i = 0; i < sz/PAGE_SIZE; i++) {
 		cond_resched();
 		clear_user_highpage(page + i, addr + i * PAGE_SIZE);
 	}
@@ -359,41 +355,43 @@ static void copy_huge_page(struct page *dst, struct page *src,
 			   unsigned long addr, struct vm_area_struct *vma)
 {
 	int i;
+	struct hstate *h = hstate_vma(vma);
 
 	might_sleep();
-	for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) {
+	for (i = 0; i < pages_per_huge_page(h); i++) {
 		cond_resched();
 		copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
 	}
 }
 
-static void enqueue_huge_page(struct page *page)
+static void enqueue_huge_page(struct hstate *h, struct page *page)
 {
 	int nid = page_to_nid(page);
-	list_add(&page->lru, &hugepage_freelists[nid]);
-	free_huge_pages++;
-	free_huge_pages_node[nid]++;
+	list_add(&page->lru, &h->hugepage_freelists[nid]);
+	h->free_huge_pages++;
+	h->free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page(void)
+static struct page *dequeue_huge_page(struct hstate *h)
 {
 	int nid;
 	struct page *page = NULL;
 
 	for (nid = 0; nid < MAX_NUMNODES; ++nid) {
-		if (!list_empty(&hugepage_freelists[nid])) {
-			page = list_entry(hugepage_freelists[nid].next,
+		if (!list_empty(&h->hugepage_freelists[nid])) {
+			page = list_entry(h->hugepage_freelists[nid].next,
 					  struct page, lru);
 			list_del(&page->lru);
-			free_huge_pages--;
-			free_huge_pages_node[nid]--;
+			h->free_huge_pages--;
+			h->free_huge_pages_node[nid]--;
 			break;
 		}
 	}
 	return page;
 }
 
-static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
+static struct page *dequeue_huge_page_vma(struct hstate *h,
+				struct vm_area_struct *vma,
 				unsigned long address, int avoid_reserve)
 {
 	int nid;
@@ -411,26 +409,26 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 	 * not "stolen". The child may still get SIGKILLed
 	 */
 	if (!vma_has_private_reserves(vma) &&
-			free_huge_pages - resv_huge_pages == 0)
+			h->free_huge_pages - h->resv_huge_pages == 0)
 		return NULL;
 
 	/* If reserves cannot be used, ensure enough pages are in the pool */
-	if (avoid_reserve && free_huge_pages - resv_huge_pages == 0)
+	if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
 		return NULL;
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
 		nid = zone_to_nid(zone);
 		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
-		    !list_empty(&hugepage_freelists[nid])) {
-			page = list_entry(hugepage_freelists[nid].next,
+		    !list_empty(&h->hugepage_freelists[nid])) {
+			page = list_entry(h->hugepage_freelists[nid].next,
 					  struct page, lru);
 			list_del(&page->lru);
-			free_huge_pages--;
-			free_huge_pages_node[nid]--;
+			h->free_huge_pages--;
+			h->free_huge_pages_node[nid]--;
 
 			if (!avoid_reserve)
-				decrement_hugepage_resv_vma(vma);
+				decrement_hugepage_resv_vma(h, vma);
 
 			break;
 		}
@@ -439,12 +437,13 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
 	return page;
 }
 
-static void update_and_free_page(struct page *page)
+static void update_and_free_page(struct hstate *h, struct page *page)
 {
 	int i;
-	nr_huge_pages--;
-	nr_huge_pages_node[page_to_nid(page)]--;
-	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
+
+	h->nr_huge_pages--;
+	h->nr_huge_pages_node[page_to_nid(page)]--;
+	for (i = 0; i < pages_per_huge_page(h); i++) {
 		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
 				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
 				1 << PG_private | 1<< PG_writeback);
@@ -452,11 +451,16 @@ static void update_and_free_page(struct page *page)
 	set_compound_page_dtor(page, NULL);
 	set_page_refcounted(page);
 	arch_release_hugepage(page);
-	__free_pages(page, HUGETLB_PAGE_ORDER);
+	__free_pages(page, huge_page_order(h));
 }
 
 static void free_huge_page(struct page *page)
 {
+	/*
+	 * Can't pass hstate in here because it is called from the
+	 * compound page destructor.
+	 */
+	struct hstate *h = &default_hstate;
 	int nid = page_to_nid(page);
 	struct address_space *mapping;
 
@@ -466,12 +470,12 @@ static void free_huge_page(struct page *page)
 	INIT_LIST_HEAD(&page->lru);
 
 	spin_lock(&hugetlb_lock);
-	if (surplus_huge_pages_node[nid]) {
-		update_and_free_page(page);
-		surplus_huge_pages--;
-		surplus_huge_pages_node[nid]--;
+	if (h->surplus_huge_pages_node[nid]) {
+		update_and_free_page(h, page);
+		h->surplus_huge_pages--;
+		h->surplus_huge_pages_node[nid]--;
 	} else {
-		enqueue_huge_page(page);
+		enqueue_huge_page(h, page);
 	}
 	spin_unlock(&hugetlb_lock);
 	if (mapping)
@@ -483,7 +487,7 @@ static void free_huge_page(struct page *page)
  * balanced by operating on them in a round-robin fashion.
  * Returns 1 if an adjustment was made.
  */
-static int adjust_pool_surplus(int delta)
+static int adjust_pool_surplus(struct hstate *h, int delta)
 {
 	static int prev_nid;
 	int nid = prev_nid;
@@ -496,15 +500,15 @@ static int adjust_pool_surplus(int delta)
 			nid = first_node(node_online_map);
 
 		/* To shrink on this node, there must be a surplus page */
-		if (delta < 0 && !surplus_huge_pages_node[nid])
+		if (delta < 0 && !h->surplus_huge_pages_node[nid])
 			continue;
 		/* Surplus cannot exceed the total number of pages */
-		if (delta > 0 && surplus_huge_pages_node[nid] >=
-						nr_huge_pages_node[nid])
+		if (delta > 0 && h->surplus_huge_pages_node[nid] >=
+						h->nr_huge_pages_node[nid])
 			continue;
 
-		surplus_huge_pages += delta;
-		surplus_huge_pages_node[nid] += delta;
+		h->surplus_huge_pages += delta;
+		h->surplus_huge_pages_node[nid] += delta;
 		ret = 1;
 		break;
 	} while (nid != prev_nid);
@@ -513,46 +517,46 @@ static int adjust_pool_surplus(int delta)
 	return ret;
 }
 
-static void prep_new_huge_page(struct page *page, int nid)
+static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
 	set_compound_page_dtor(page, free_huge_page);
 	spin_lock(&hugetlb_lock);
-	nr_huge_pages++;
-	nr_huge_pages_node[nid]++;
+	h->nr_huge_pages++;
+	h->nr_huge_pages_node[nid]++;
 	spin_unlock(&hugetlb_lock);
 	put_page(page); /* free it into the hugepage allocator */
 }
 
-static struct page *alloc_fresh_huge_page_node(int nid)
+static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
 	struct page *page;
 
 	page = alloc_pages_node(nid,
 		htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
 						__GFP_REPEAT|__GFP_NOWARN,
-		HUGETLB_PAGE_ORDER);
+		huge_page_order(h));
 	if (page) {
 		if (arch_prepare_hugepage(page)) {
 			__free_pages(page, HUGETLB_PAGE_ORDER);
 			return NULL;
 		}
-		prep_new_huge_page(page, nid);
+		prep_new_huge_page(h, page, nid);
 	}
 
 	return page;
 }
 
-static int alloc_fresh_huge_page(void)
+static int alloc_fresh_huge_page(struct hstate *h)
 {
 	struct page *page;
 	int start_nid;
 	int next_nid;
 	int ret = 0;
 
-	start_nid = hugetlb_next_nid;
+	start_nid = h->hugetlb_next_nid;
 
 	do {
-		page = alloc_fresh_huge_page_node(hugetlb_next_nid);
+		page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid);
 		if (page)
 			ret = 1;
 		/*
@@ -566,11 +570,11 @@ static int alloc_fresh_huge_page(void)
 		 * if we just successfully allocated a hugepage so that
 		 * the next caller gets hugepages on the next node.
 		 */
-		next_nid = next_node(hugetlb_next_nid, node_online_map);
+		next_nid = next_node(h->hugetlb_next_nid, node_online_map);
 		if (next_nid == MAX_NUMNODES)
 			next_nid = first_node(node_online_map);
-		hugetlb_next_nid = next_nid;
-	} while (!page && hugetlb_next_nid != start_nid);
+		h->hugetlb_next_nid = next_nid;
+	} while (!page && h->hugetlb_next_nid != start_nid);
 
 	if (ret)
 		count_vm_event(HTLB_BUDDY_PGALLOC);
@@ -580,8 +584,8 @@ static int alloc_fresh_huge_page(void)
 	return ret;
 }
 
-static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
-						unsigned long address)
+static struct page *alloc_buddy_huge_page(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
 {
 	struct page *page;
 	unsigned int nid;
@@ -610,18 +614,18 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
 	 * per-node value is checked there.
 	 */
 	spin_lock(&hugetlb_lock);
-	if (surplus_huge_pages >= nr_overcommit_huge_pages) {
+	if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
 		spin_unlock(&hugetlb_lock);
 		return NULL;
 	} else {
-		nr_huge_pages++;
-		surplus_huge_pages++;
+		h->nr_huge_pages++;
+		h->surplus_huge_pages++;
 	}
 	spin_unlock(&hugetlb_lock);
 
 	page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
 					__GFP_REPEAT|__GFP_NOWARN,
-					HUGETLB_PAGE_ORDER);
+					huge_page_order(h));
 
 	spin_lock(&hugetlb_lock);
 	if (page) {
@@ -636,12 +640,12 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
 		/*
 		 * We incremented the global counters already
 		 */
-		nr_huge_pages_node[nid]++;
-		surplus_huge_pages_node[nid]++;
+		h->nr_huge_pages_node[nid]++;
+		h->surplus_huge_pages_node[nid]++;
 		__count_vm_event(HTLB_BUDDY_PGALLOC);
 	} else {
-		nr_huge_pages--;
-		surplus_huge_pages--;
+		h->nr_huge_pages--;
+		h->surplus_huge_pages--;
 		__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
 	}
 	spin_unlock(&hugetlb_lock);
@@ -653,16 +657,16 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
  * Increase the hugetlb pool such that it can accomodate a reservation
  * of size 'delta'.
  */
-static int gather_surplus_pages(int delta)
+static int gather_surplus_pages(struct hstate *h, int delta)
 {
 	struct list_head surplus_list;
 	struct page *page, *tmp;
 	int ret, i;
 	int needed, allocated;
 
-	needed = (resv_huge_pages + delta) - free_huge_pages;
+	needed = (h->resv_huge_pages + delta) - h->free_huge_pages;
 	if (needed <= 0) {
-		resv_huge_pages += delta;
+		h->resv_huge_pages += delta;
 		return 0;
 	}
 
@@ -673,7 +677,7 @@ static int gather_surplus_pages(int delta)
 retry:
 	spin_unlock(&hugetlb_lock);
 	for (i = 0; i < needed; i++) {
-		page = alloc_buddy_huge_page(NULL, 0);
+		page = alloc_buddy_huge_page(h, NULL, 0);
 		if (!page) {
 			/*
 			 * We were not able to allocate enough pages to
@@ -694,7 +698,8 @@ retry:
 	 * because either resv_huge_pages or free_huge_pages may have changed.
 	 */
 	spin_lock(&hugetlb_lock);
-	needed = (resv_huge_pages + delta) - (free_huge_pages + allocated);
+	needed = (h->resv_huge_pages + delta) -
+			(h->free_huge_pages + allocated);
 	if (needed > 0)
 		goto retry;
 
@@ -707,7 +712,7 @@ retry:
 	 * before they are reserved.
 	 */
 	needed += allocated;
-	resv_huge_pages += delta;
+	h->resv_huge_pages += delta;
 	ret = 0;
 free:
 	/* Free the needed pages to the hugetlb pool */
@@ -715,7 +720,7 @@ free:
 		if ((--needed) < 0)
 			break;
 		list_del(&page->lru);
-		enqueue_huge_page(page);
+		enqueue_huge_page(h, page);
 	}
 
 	/* Free unnecessary surplus pages to the buddy allocator */
@@ -743,7 +748,8 @@ free:
  * allocated to satisfy the reservation must be explicitly freed if they were
  * never used.
  */
-static void return_unused_surplus_pages(unsigned long unused_resv_pages)
+static void return_unused_surplus_pages(struct hstate *h,
+					unsigned long unused_resv_pages)
 {
 	static int nid = -1;
 	struct page *page;
@@ -758,27 +764,27 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
 	unsigned long remaining_iterations = num_online_nodes();
 
 	/* Uncommit the reservation */
-	resv_huge_pages -= unused_resv_pages;
+	h->resv_huge_pages -= unused_resv_pages;
 
-	nr_pages = min(unused_resv_pages, surplus_huge_pages);
+	nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
 
 	while (remaining_iterations-- && nr_pages) {
 		nid = next_node(nid, node_online_map);
 		if (nid == MAX_NUMNODES)
 			nid = first_node(node_online_map);
 
-		if (!surplus_huge_pages_node[nid])
+		if (!h->surplus_huge_pages_node[nid])
 			continue;
 
-		if (!list_empty(&hugepage_freelists[nid])) {
-			page = list_entry(hugepage_freelists[nid].next,
+		if (!list_empty(&h->hugepage_freelists[nid])) {
+			page = list_entry(h->hugepage_freelists[nid].next,
 					  struct page, lru);
 			list_del(&page->lru);
-			update_and_free_page(page);
-			free_huge_pages--;
-			free_huge_pages_node[nid]--;
-			surplus_huge_pages--;
-			surplus_huge_pages_node[nid]--;
+			update_and_free_page(h, page);
+			h->free_huge_pages--;
+			h->free_huge_pages_node[nid]--;
+			h->surplus_huge_pages--;
+			h->surplus_huge_pages_node[nid]--;
 			nr_pages--;
 			remaining_iterations = num_online_nodes();
 		}
@@ -794,13 +800,14 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
  * an instantiated the change should be committed via vma_commit_reservation.
  * No action is required on failure.
  */
-static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
+static int vma_needs_reservation(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long addr)
 {
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
 
 	if (vma->vm_flags & VM_SHARED) {
-		pgoff_t idx = vma_hugecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		return region_chg(&inode->i_mapping->private_list,
 							idx, idx + 1);
 
@@ -809,7 +816,7 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
 
 	} else  {
 		int err;
-		pgoff_t idx = vma_hugecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		struct resv_map *reservations = vma_resv_map(vma);
 
 		err = region_chg(&reservations->regions, idx, idx + 1);
@@ -818,18 +825,18 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
 		return 0;
 	}
 }
-static void vma_commit_reservation(struct vm_area_struct *vma,
-							unsigned long addr)
+static void vma_commit_reservation(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long addr)
 {
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
 
 	if (vma->vm_flags & VM_SHARED) {
-		pgoff_t idx = vma_hugecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		region_add(&inode->i_mapping->private_list, idx, idx + 1);
 
 	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
-		pgoff_t idx = vma_hugecache_offset(vma, addr);
+		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		struct resv_map *reservations = vma_resv_map(vma);
 
 		/* Mark this page used in the map. */
@@ -840,6 +847,7 @@ static void vma_commit_reservation(struct vm_area_struct *vma,
 static struct page *alloc_huge_page(struct vm_area_struct *vma,
 				    unsigned long addr, int avoid_reserve)
 {
+	struct hstate *h = hstate_vma(vma);
 	struct page *page;
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
@@ -852,7 +860,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	 * MAP_NORESERVE mappings may also need pages and quota allocated
 	 * if no reserve mapping overlaps.
 	 */
-	chg = vma_needs_reservation(vma, addr);
+	chg = vma_needs_reservation(h, vma, addr);
 	if (chg < 0)
 		return ERR_PTR(chg);
 	if (chg)
@@ -860,11 +868,11 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 			return ERR_PTR(-ENOSPC);
 
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page_vma(vma, addr, avoid_reserve);
+	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page) {
-		page = alloc_buddy_huge_page(vma, addr);
+		page = alloc_buddy_huge_page(h, vma, addr);
 		if (!page) {
 			hugetlb_put_quota(inode->i_mapping, chg);
 			return ERR_PTR(-VM_FAULT_OOM);
@@ -874,7 +882,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	set_page_refcounted(page);
 	set_page_private(page, (unsigned long) mapping);
 
-	vma_commit_reservation(vma, addr);
+	vma_commit_reservation(h, vma, addr);
 
 	return page;
 }
@@ -882,21 +890,28 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 static int __init hugetlb_init(void)
 {
 	unsigned long i;
+	struct hstate *h = &default_hstate;
 
 	if (HPAGE_SHIFT == 0)
 		return 0;
 
+	if (!h->order) {
+		h->order = HPAGE_SHIFT - PAGE_SHIFT;
+		h->mask = HPAGE_MASK;
+	}
+
 	for (i = 0; i < MAX_NUMNODES; ++i)
-		INIT_LIST_HEAD(&hugepage_freelists[i]);
+		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
 
-	hugetlb_next_nid = first_node(node_online_map);
+	h->hugetlb_next_nid = first_node(node_online_map);
 
 	for (i = 0; i < max_huge_pages; ++i) {
-		if (!alloc_fresh_huge_page())
+		if (!alloc_fresh_huge_page(h))
 			break;
 	}
-	max_huge_pages = free_huge_pages = nr_huge_pages = i;
-	printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
+	max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
+	printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n",
+			h->free_huge_pages);
 	return 0;
 }
 module_init(hugetlb_init);
@@ -922,34 +937,36 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
 
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_HIGHMEM
-static void try_to_free_low(unsigned long count)
+static void try_to_free_low(struct hstate *h, unsigned long count)
 {
 	int i;
 
 	for (i = 0; i < MAX_NUMNODES; ++i) {
 		struct page *page, *next;
-		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
-			if (count >= nr_huge_pages)
+		struct list_head *freel = &h->hugepage_freelists[i];
+		list_for_each_entry_safe(page, next, freel, lru) {
+			if (count >= h->nr_huge_pages)
 				return;
 			if (PageHighMem(page))
 				continue;
 			list_del(&page->lru);
 			update_and_free_page(page);
-			free_huge_pages--;
-			free_huge_pages_node[page_to_nid(page)]--;
+			h->free_huge_pages--;
+			h->free_huge_pages_node[page_to_nid(page)]--;
 		}
 	}
 }
 #else
-static inline void try_to_free_low(unsigned long count)
+static inline void try_to_free_low(struct hstate *h, unsigned long count)
 {
 }
 #endif
 
-#define persistent_huge_pages (nr_huge_pages - surplus_huge_pages)
+#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
 static unsigned long set_max_huge_pages(unsigned long count)
 {
 	unsigned long min_count, ret;
+	struct hstate *h = &default_hstate;
 
 	/*
 	 * Increase the pool size
@@ -963,19 +980,19 @@ static unsigned long set_max_huge_pages(unsigned long count)
 	 * within all the constraints specified by the sysctls.
 	 */
 	spin_lock(&hugetlb_lock);
-	while (surplus_huge_pages && count > persistent_huge_pages) {
-		if (!adjust_pool_surplus(-1))
+	while (h->surplus_huge_pages && count > persistent_huge_pages(h)) {
+		if (!adjust_pool_surplus(h, -1))
 			break;
 	}
 
-	while (count > persistent_huge_pages) {
+	while (count > persistent_huge_pages(h)) {
 		/*
 		 * If this allocation races such that we no longer need the
 		 * page, free_huge_page will handle it by freeing the page
 		 * and reducing the surplus.
 		 */
 		spin_unlock(&hugetlb_lock);
-		ret = alloc_fresh_huge_page();
+		ret = alloc_fresh_huge_page(h);
 		spin_lock(&hugetlb_lock);
 		if (!ret)
 			goto out;
@@ -997,21 +1014,21 @@ static unsigned long set_max_huge_pages(unsigned long count)
 	 * and won't grow the pool anywhere else. Not until one of the
 	 * sysctls are changed, or the surplus pages go out of use.
 	 */
-	min_count = resv_huge_pages + nr_huge_pages - free_huge_pages;
+	min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
 	min_count = max(count, min_count);
-	try_to_free_low(min_count);
-	while (min_count < persistent_huge_pages) {
-		struct page *page = dequeue_huge_page();
+	try_to_free_low(h, min_count);
+	while (min_count < persistent_huge_pages(h)) {
+		struct page *page = dequeue_huge_page(h);
 		if (!page)
 			break;
-		update_and_free_page(page);
+		update_and_free_page(h, page);
 	}
-	while (count < persistent_huge_pages) {
-		if (!adjust_pool_surplus(1))
+	while (count < persistent_huge_pages(h)) {
+		if (!adjust_pool_surplus(h, 1))
 			break;
 	}
 out:
-	ret = persistent_huge_pages;
+	ret = persistent_huge_pages(h);
 	spin_unlock(&hugetlb_lock);
 	return ret;
 }
@@ -1041,9 +1058,10 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 			struct file *file, void __user *buffer,
 			size_t *length, loff_t *ppos)
 {
+	struct hstate *h = &default_hstate;
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
 	spin_lock(&hugetlb_lock);
-	nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
+	h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
 	spin_unlock(&hugetlb_lock);
 	return 0;
 }
@@ -1052,37 +1070,40 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 
 int hugetlb_report_meminfo(char *buf)
 {
+	struct hstate *h = &default_hstate;
 	return sprintf(buf,
 			"HugePages_Total: %5lu\n"
 			"HugePages_Free:  %5lu\n"
 			"HugePages_Rsvd:  %5lu\n"
 			"HugePages_Surp:  %5lu\n"
 			"Hugepagesize:    %5lu kB\n",
-			nr_huge_pages,
-			free_huge_pages,
-			resv_huge_pages,
-			surplus_huge_pages,
-			HPAGE_SIZE/1024);
+			h->nr_huge_pages,
+			h->free_huge_pages,
+			h->resv_huge_pages,
+			h->surplus_huge_pages,
+			1UL << (huge_page_order(h) + PAGE_SHIFT - 10));
 }
 
 int hugetlb_report_node_meminfo(int nid, char *buf)
 {
+	struct hstate *h = &default_hstate;
 	return sprintf(buf,
 		"Node %d HugePages_Total: %5u\n"
 		"Node %d HugePages_Free:  %5u\n"
 		"Node %d HugePages_Surp:  %5u\n",
-		nid, nr_huge_pages_node[nid],
-		nid, free_huge_pages_node[nid],
-		nid, surplus_huge_pages_node[nid]);
+		nid, h->nr_huge_pages_node[nid],
+		nid, h->free_huge_pages_node[nid],
+		nid, h->surplus_huge_pages_node[nid]);
 }
 
 /* Return the number pages of memory we physically have, in PAGE_SIZE units. */
 unsigned long hugetlb_total_pages(void)
 {
-	return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
+	struct hstate *h = &default_hstate;
+	return h->nr_huge_pages * pages_per_huge_page(h);
 }
 
-static int hugetlb_acct_memory(long delta)
+static int hugetlb_acct_memory(struct hstate *h, long delta)
 {
 	int ret = -ENOMEM;
 
@@ -1105,18 +1126,18 @@ static int hugetlb_acct_memory(long delta)
 	 * semantics that cpuset has.
 	 */
 	if (delta > 0) {
-		if (gather_surplus_pages(delta) < 0)
+		if (gather_surplus_pages(h, delta) < 0)
 			goto out;
 
-		if (delta > cpuset_mems_nr(free_huge_pages_node)) {
-			return_unused_surplus_pages(delta);
+		if (delta > cpuset_mems_nr(h->free_huge_pages_node)) {
+			return_unused_surplus_pages(h, delta);
 			goto out;
 		}
 	}
 
 	ret = 0;
 	if (delta < 0)
-		return_unused_surplus_pages((unsigned long) -delta);
+		return_unused_surplus_pages(h, (unsigned long) -delta);
 
 out:
 	spin_unlock(&hugetlb_lock);
@@ -1141,14 +1162,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
 
 static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 {
+	struct hstate *h = hstate_vma(vma);
 	struct resv_map *reservations = vma_resv_map(vma);
 	unsigned long reserve;
 	unsigned long start;
 	unsigned long end;
 
 	if (reservations) {
-		start = vma_hugecache_offset(vma, vma->vm_start);
-		end = vma_hugecache_offset(vma, vma->vm_end);
+		start = vma_hugecache_offset(h, vma, vma->vm_start);
+		end = vma_hugecache_offset(h, vma, vma->vm_end);
 
 		reserve = (end - start) -
 			region_count(&reservations->regions, start, end);
@@ -1156,7 +1178,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 		kref_put(&reservations->refs, resv_map_release);
 
 		if (reserve)
-			hugetlb_acct_memory(-reserve);
+			hugetlb_acct_memory(h, -reserve);
 	}
 }
 
@@ -1214,14 +1236,16 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 	struct page *ptepage;
 	unsigned long addr;
 	int cow;
+	struct hstate *h = hstate_vma(vma);
+	unsigned long sz = huge_page_size(h);
 
 	cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 
-	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
 		src_pte = huge_pte_offset(src, addr);
 		if (!src_pte)
 			continue;
-		dst_pte = huge_pte_alloc(dst, addr);
+		dst_pte = huge_pte_alloc(dst, addr, sz);
 		if (!dst_pte)
 			goto nomem;
 
@@ -1257,6 +1281,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	pte_t pte;
 	struct page *page;
 	struct page *tmp;
+	struct hstate *h = hstate_vma(vma);
+	unsigned long sz = huge_page_size(h);
+
 	/*
 	 * A page gathering list, protected by per file i_mmap_lock. The
 	 * lock is used to avoid list corruption from multiple unmapping
@@ -1265,11 +1292,11 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	LIST_HEAD(page_list);
 
 	WARN_ON(!is_vm_hugetlb_page(vma));
-	BUG_ON(start & ~HPAGE_MASK);
-	BUG_ON(end & ~HPAGE_MASK);
+	BUG_ON(start & ~huge_page_mask(h));
+	BUG_ON(end & ~huge_page_mask(h));
 
 	spin_lock(&mm->page_table_lock);
-	for (address = start; address < end; address += HPAGE_SIZE) {
+	for (address = start; address < end; address += sz) {
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
@@ -1383,6 +1410,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, pte_t pte,
 			struct page *pagecache_page)
 {
+	struct hstate *h = hstate_vma(vma);
 	struct page *old_page, *new_page;
 	int avoidcopy;
 	int outside_reserve = 0;
@@ -1443,7 +1471,7 @@ retry_avoidcopy:
 	__SetPageUptodate(new_page);
 	spin_lock(&mm->page_table_lock);
 
-	ptep = huge_pte_offset(mm, address & HPAGE_MASK);
+	ptep = huge_pte_offset(mm, address & huge_page_mask(h));
 	if (likely(pte_same(huge_ptep_get(ptep), pte))) {
 		/* Break COW */
 		huge_ptep_clear_flush(vma, address, ptep);
@@ -1458,14 +1486,14 @@ retry_avoidcopy:
 }
 
 /* Return the pagecache page at a given address within a VMA */
-static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma,
-			unsigned long address)
+static struct page *hugetlbfs_pagecache_page(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
 {
 	struct address_space *mapping;
 	pgoff_t idx;
 
 	mapping = vma->vm_file->f_mapping;
-	idx = vma_hugecache_offset(vma, address);
+	idx = vma_hugecache_offset(h, vma, address);
 
 	return find_lock_page(mapping, idx);
 }
@@ -1473,6 +1501,7 @@ static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma,
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, int write_access)
 {
+	struct hstate *h = hstate_vma(vma);
 	int ret = VM_FAULT_SIGBUS;
 	pgoff_t idx;
 	unsigned long size;
@@ -1493,7 +1522,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	mapping = vma->vm_file->f_mapping;
-	idx = vma_hugecache_offset(vma, address);
+	idx = vma_hugecache_offset(h, vma, address);
 
 	/*
 	 * Use page lock to guard against racing truncation
@@ -1502,7 +1531,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 retry:
 	page = find_lock_page(mapping, idx);
 	if (!page) {
-		size = i_size_read(mapping->host) >> HPAGE_SHIFT;
+		size = i_size_read(mapping->host) >> huge_page_shift(h);
 		if (idx >= size)
 			goto out;
 		page = alloc_huge_page(vma, address, 0);
@@ -1510,7 +1539,7 @@ retry:
 			ret = -PTR_ERR(page);
 			goto out;
 		}
-		clear_huge_page(page, address);
+		clear_huge_page(page, address, huge_page_size(h));
 		__SetPageUptodate(page);
 
 		if (vma->vm_flags & VM_SHARED) {
@@ -1526,14 +1555,14 @@ retry:
 			}
 
 			spin_lock(&inode->i_lock);
-			inode->i_blocks += BLOCKS_PER_HUGEPAGE;
+			inode->i_blocks += blocks_per_huge_page(h);
 			spin_unlock(&inode->i_lock);
 		} else
 			lock_page(page);
 	}
 
 	spin_lock(&mm->page_table_lock);
-	size = i_size_read(mapping->host) >> HPAGE_SHIFT;
+	size = i_size_read(mapping->host) >> huge_page_shift(h);
 	if (idx >= size)
 		goto backout;
 
@@ -1569,8 +1598,9 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	pte_t entry;
 	int ret;
 	static DEFINE_MUTEX(hugetlb_instantiation_mutex);
+	struct hstate *h = hstate_vma(vma);
 
-	ptep = huge_pte_alloc(mm, address);
+	ptep = huge_pte_alloc(mm, address, huge_page_size(h));
 	if (!ptep)
 		return VM_FAULT_OOM;
 
@@ -1594,7 +1624,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (likely(pte_same(entry, huge_ptep_get(ptep))))
 		if (write_access && !pte_write(entry)) {
 			struct page *page;
-			page = hugetlbfs_pagecache_page(vma, address);
+			page = hugetlbfs_pagecache_page(h, vma, address);
 			ret = hugetlb_cow(mm, vma, address, ptep, entry, page);
 			if (page) {
 				unlock_page(page);
@@ -1615,6 +1645,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long pfn_offset;
 	unsigned long vaddr = *position;
 	int remainder = *length;
+	struct hstate *h = hstate_vma(vma);
 
 	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
@@ -1626,7 +1657,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * each hugepage.  We have to make * sure we get the
 		 * first, for the page indexing below to work.
 		 */
-		pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
+		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
 
 		if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
 		    (write && !pte_write(huge_ptep_get(pte)))) {
@@ -1644,7 +1675,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			break;
 		}
 
-		pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT;
+		pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
 		page = pte_page(huge_ptep_get(pte));
 same_page:
 		if (pages) {
@@ -1660,7 +1691,7 @@ same_page:
 		--remainder;
 		++i;
 		if (vaddr < vma->vm_end && remainder &&
-				pfn_offset < HPAGE_SIZE/PAGE_SIZE) {
+				pfn_offset < pages_per_huge_page(h)) {
 			/*
 			 * We use pfn_offset to avoid touching the pageframes
 			 * of this compound page.
@@ -1682,13 +1713,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 	unsigned long start = address;
 	pte_t *ptep;
 	pte_t pte;
+	struct hstate *h = hstate_vma(vma);
 
 	BUG_ON(address >= end);
 	flush_cache_range(vma, address, end);
 
 	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
 	spin_lock(&mm->page_table_lock);
-	for (; address < end; address += HPAGE_SIZE) {
+	for (; address < end; address += huge_page_size(h)) {
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
@@ -1711,6 +1743,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 					struct vm_area_struct *vma)
 {
 	long ret, chg;
+	struct hstate *h = hstate_inode(inode);
 
 	if (vma && vma->vm_flags & VM_NORESERVE)
 		return 0;
@@ -1739,7 +1772,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 
 	if (hugetlb_get_quota(inode->i_mapping, chg))
 		return -ENOSPC;
-	ret = hugetlb_acct_memory(chg);
+	ret = hugetlb_acct_memory(h, chg);
 	if (ret < 0) {
 		hugetlb_put_quota(inode->i_mapping, chg);
 		return ret;
@@ -1751,12 +1784,13 @@ int hugetlb_reserve_pages(struct inode *inode,
 
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 {
+	struct hstate *h = hstate_inode(inode);
 	long chg = region_truncate(&inode->i_mapping->private_list, offset);
 
 	spin_lock(&inode->i_lock);
-	inode->i_blocks -= BLOCKS_PER_HUGEPAGE * freed;
+	inode->i_blocks -= blocks_per_huge_page(h);
 	spin_unlock(&inode->i_lock);
 
 	hugetlb_put_quota(inode->i_mapping, (chg - freed));
-	hugetlb_acct_memory(-(chg - freed));
+	hugetlb_acct_memory(h, -(chg - freed));
 }
diff --git a/mm/memory.c b/mm/memory.c
index 72932489a082..c1c1d6d8c22b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -903,7 +903,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			if (unlikely(is_vm_hugetlb_page(vma))) {
 				unmap_hugepage_range(vma, start, end, NULL);
 				zap_work -= (end - start) /
-						(HPAGE_SIZE / PAGE_SIZE);
+					pages_per_huge_page(hstate_vma(vma));
 				start = end;
 			} else
 				start = unmap_page_range(*tlbp, vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c94e58b192c3..e550bec20582 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1481,7 +1481,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
 
 	if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) {
 		zl = node_zonelist(interleave_nid(*mpol, vma, addr,
-						HPAGE_SHIFT), gfp_flags);
+				huge_page_shift(hstate_vma(vma))), gfp_flags);
 	} else {
 		zl = policy_zonelist(gfp_flags, *mpol);
 		if ((*mpol)->mode == MPOL_BIND)
@@ -2220,9 +2220,12 @@ static void check_huge_range(struct vm_area_struct *vma,
 {
 	unsigned long addr;
 	struct page *page;
+	struct hstate *h = hstate_vma(vma);
+	unsigned long sz = huge_page_size(h);
 
-	for (addr = start; addr < end; addr += HPAGE_SIZE) {
-		pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK);
+	for (addr = start; addr < end; addr += sz) {
+		pte_t *ptep = huge_pte_offset(vma->vm_mm,
+						addr & huge_page_mask(h));
 		pte_t pte;
 
 		if (!ptep)
diff --git a/mm/mmap.c b/mm/mmap.c
index 57d3b6097deb..5e0cc99e9cd5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1812,7 +1812,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	struct mempolicy *pol;
 	struct vm_area_struct *new;
 
-	if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
+	if (is_vm_hugetlb_page(vma) && (addr &
+					~(huge_page_mask(hstate_vma(vma)))))
 		return -EINVAL;
 
 	if (mm->map_count >= sysctl_max_map_count)
-- 
cgit v1.2.3


From e5ff215941d59f8ae6bf58f6428dc5c26745a612 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:42 -0700
Subject: hugetlb: multiple hstates for multiple page sizes

Add basic support for more than one hstate in hugetlbfs.  This is the key
to supporting multiple hugetlbfs page sizes at once.

- Rather than a single hstate, we now have an array, with an iterator
- default_hstate continues to be the struct hstate which we use by default
- Add functions for architectures to register new hstates

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  19 ++++++-
 kernel/sysctl.c         |   8 ++-
 mm/hugetlb.c            | 148 +++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 142 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ad2271e11f9b..b75bdb4deba3 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -36,8 +36,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						struct vm_area_struct *vma);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 
-extern unsigned long max_huge_pages;
-extern unsigned long sysctl_overcommit_huge_pages;
 extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
@@ -181,7 +179,17 @@ struct hstate {
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 };
 
-extern struct hstate default_hstate;
+void __init hugetlb_add_hstate(unsigned order);
+struct hstate *size_to_hstate(unsigned long size);
+
+#ifndef HUGE_MAX_HSTATE
+#define HUGE_MAX_HSTATE 1
+#endif
+
+extern struct hstate hstates[HUGE_MAX_HSTATE];
+extern unsigned int default_hstate_idx;
+
+#define default_hstate (hstates[default_hstate_idx])
 
 static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
 {
@@ -230,6 +238,11 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
 
 #include <asm/hugetlb.h>
 
+static inline struct hstate *page_hstate(struct page *page)
+{
+	return size_to_hstate(PAGE_SIZE << compound_order(page));
+}
+
 #else
 struct hstate {};
 #define hstate_file(f) NULL
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1f7b3b76a166..1a8299d1fe59 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -959,7 +959,7 @@ static struct ctl_table vm_table[] = {
 #ifdef CONFIG_HUGETLB_PAGE
 	 {
 		.procname	= "nr_hugepages",
-		.data		= &max_huge_pages,
+		.data		= NULL,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &hugetlb_sysctl_handler,
@@ -985,10 +985,12 @@ static struct ctl_table vm_table[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "nr_overcommit_hugepages",
-		.data		= &sysctl_overcommit_huge_pages,
-		.maxlen		= sizeof(sysctl_overcommit_huge_pages),
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &hugetlb_overcommit_handler,
+		.extra1		= (void *)&hugetlb_zero,
+		.extra2		= (void *)&hugetlb_infinity,
 	},
 #endif
 	{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0d8153e25f09..82378d44a0c5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,12 +22,19 @@
 #include "internal.h"
 
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-unsigned long max_huge_pages;
-unsigned long sysctl_overcommit_huge_pages;
 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
 
-struct hstate default_hstate;
+static int max_hstate;
+unsigned int default_hstate_idx;
+struct hstate hstates[HUGE_MAX_HSTATE];
+
+/* for command line parsing */
+static struct hstate * __initdata parsed_hstate;
+static unsigned long __initdata default_hstate_max_huge_pages;
+
+#define for_each_hstate(h) \
+	for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++)
 
 /*
  * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
@@ -454,13 +461,24 @@ static void update_and_free_page(struct hstate *h, struct page *page)
 	__free_pages(page, huge_page_order(h));
 }
 
+struct hstate *size_to_hstate(unsigned long size)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		if (huge_page_size(h) == size)
+			return h;
+	}
+	return NULL;
+}
+
 static void free_huge_page(struct page *page)
 {
 	/*
 	 * Can't pass hstate in here because it is called from the
 	 * compound page destructor.
 	 */
-	struct hstate *h = &default_hstate;
+	struct hstate *h = page_hstate(page);
 	int nid = page_to_nid(page);
 	struct address_space *mapping;
 
@@ -887,39 +905,94 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	return page;
 }
 
-static int __init hugetlb_init(void)
+static void __init hugetlb_init_one_hstate(struct hstate *h)
 {
 	unsigned long i;
-	struct hstate *h = &default_hstate;
-
-	if (HPAGE_SHIFT == 0)
-		return 0;
-
-	if (!h->order) {
-		h->order = HPAGE_SHIFT - PAGE_SHIFT;
-		h->mask = HPAGE_MASK;
-	}
 
 	for (i = 0; i < MAX_NUMNODES; ++i)
 		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
 
 	h->hugetlb_next_nid = first_node(node_online_map);
 
-	for (i = 0; i < max_huge_pages; ++i) {
+	for (i = 0; i < h->max_huge_pages; ++i) {
 		if (!alloc_fresh_huge_page(h))
 			break;
 	}
-	max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
-	printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n",
-			h->free_huge_pages);
+	h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
+}
+
+static void __init hugetlb_init_hstates(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		hugetlb_init_one_hstate(h);
+	}
+}
+
+static void __init report_hugepages(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		printk(KERN_INFO "Total HugeTLB memory allocated, "
+				"%ld %dMB pages\n",
+				h->free_huge_pages,
+				1 << (h->order + PAGE_SHIFT - 20));
+	}
+}
+
+static int __init hugetlb_init(void)
+{
+	BUILD_BUG_ON(HPAGE_SHIFT == 0);
+
+	if (!size_to_hstate(HPAGE_SIZE)) {
+		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
+		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
+	}
+	default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
+
+	hugetlb_init_hstates();
+
+	report_hugepages();
+
 	return 0;
 }
 module_init(hugetlb_init);
 
+/* Should be called on processing a hugepagesz=... option */
+void __init hugetlb_add_hstate(unsigned order)
+{
+	struct hstate *h;
+	if (size_to_hstate(PAGE_SIZE << order)) {
+		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
+		return;
+	}
+	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
+	BUG_ON(order == 0);
+	h = &hstates[max_hstate++];
+	h->order = order;
+	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
+	hugetlb_init_one_hstate(h);
+	parsed_hstate = h;
+}
+
 static int __init hugetlb_setup(char *s)
 {
-	if (sscanf(s, "%lu", &max_huge_pages) <= 0)
-		max_huge_pages = 0;
+	unsigned long *mhp;
+
+	/*
+	 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
+	 * so this hugepages= parameter goes to the "default hstate".
+	 */
+	if (!max_hstate)
+		mhp = &default_hstate_max_huge_pages;
+	else
+		mhp = &parsed_hstate->max_huge_pages;
+
+	if (sscanf(s, "%lu", mhp) <= 0)
+		*mhp = 0;
+
 	return 1;
 }
 __setup("hugepages=", hugetlb_setup);
@@ -950,7 +1023,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count)
 			if (PageHighMem(page))
 				continue;
 			list_del(&page->lru);
-			update_and_free_page(page);
+			update_and_free_page(h, page);
 			h->free_huge_pages--;
 			h->free_huge_pages_node[page_to_nid(page)]--;
 		}
@@ -963,10 +1036,9 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
 #endif
 
 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
-static unsigned long set_max_huge_pages(unsigned long count)
+static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
 {
 	unsigned long min_count, ret;
-	struct hstate *h = &default_hstate;
 
 	/*
 	 * Increase the pool size
@@ -1037,8 +1109,19 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 			   struct file *file, void __user *buffer,
 			   size_t *length, loff_t *ppos)
 {
+	struct hstate *h = &default_hstate;
+	unsigned long tmp;
+
+	if (!write)
+		tmp = h->max_huge_pages;
+
+	table->data = &tmp;
+	table->maxlen = sizeof(unsigned long);
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
-	max_huge_pages = set_max_huge_pages(max_huge_pages);
+
+	if (write)
+		h->max_huge_pages = set_max_huge_pages(h, tmp);
+
 	return 0;
 }
 
@@ -1059,10 +1142,21 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 			size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
+	unsigned long tmp;
+
+	if (!write)
+		tmp = h->nr_overcommit_huge_pages;
+
+	table->data = &tmp;
+	table->maxlen = sizeof(unsigned long);
 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
-	spin_lock(&hugetlb_lock);
-	h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages;
-	spin_unlock(&hugetlb_lock);
+
+	if (write) {
+		spin_lock(&hugetlb_lock);
+		h->nr_overcommit_huge_pages = tmp;
+		spin_unlock(&hugetlb_lock);
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From a137e1cc6d6e7d315fef03962a2a5a113348b13b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:43 -0700
Subject: hugetlbfs: per mount huge page sizes

Add the ability to configure the hugetlb hstate used on a per mount basis.

- Add a new pagesize= option to the hugetlbfs mount that allows setting
  the page size
- This option causes the mount code to find the hstate corresponding to the
  specified size, and sets up a pointer to the hstate in the mount's
  superblock.
- Change the hstate accessors to use this information rather than the
  global_hstate they were using (requires a slight change in mm/memory.c
  so we don't NULL deref in the error-unmap path -- see comments).

[np: take hstate out of hugetlbfs inode and vma->vm_private_data]

Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    | 45 ++++++++++++++++++++++++++++++++++++---------
 include/linux/hugetlb.h | 14 +++++++++-----
 mm/hugetlb.c            | 16 +++-------------
 mm/memory.c             | 18 ++++++++++++++++--
 4 files changed, 64 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 516c581b5371..dbd01d262ca4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
 enum {
 	Opt_size, Opt_nr_inodes,
 	Opt_mode, Opt_uid, Opt_gid,
+	Opt_pagesize,
 	Opt_err,
 };
 
@@ -62,6 +63,7 @@ static match_table_t tokens = {
 	{Opt_mode,	"mode=%o"},
 	{Opt_uid,	"uid=%u"},
 	{Opt_gid,	"gid=%u"},
+	{Opt_pagesize,	"pagesize=%s"},
 	{Opt_err,	NULL},
 };
 
@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 	char *p, *rest;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
+	unsigned long long size = 0;
+	enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
 
 	if (!options)
 		return 0;
@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 			break;
 
 		case Opt_size: {
- 			unsigned long long size;
 			/* memparse() will accept a K/M/G without a digit */
 			if (!isdigit(*args[0].from))
 				goto bad_val;
 			size = memparse(args[0].from, &rest);
-			if (*rest == '%') {
-				size <<= HPAGE_SHIFT;
-				size *= max_huge_pages;
-				do_div(size, 100);
-			}
-			pconfig->nr_blocks = (size >> HPAGE_SHIFT);
+			setsize = SIZE_STD;
+			if (*rest == '%')
+				setsize = SIZE_PERCENT;
 			break;
 		}
 
@@ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 			pconfig->nr_inodes = memparse(args[0].from, &rest);
 			break;
 
+		case Opt_pagesize: {
+			unsigned long ps;
+			ps = memparse(args[0].from, &rest);
+			pconfig->hstate = size_to_hstate(ps);
+			if (!pconfig->hstate) {
+				printk(KERN_ERR
+				"hugetlbfs: Unsupported page size %lu MB\n",
+					ps >> 20);
+				return -EINVAL;
+			}
+			break;
+		}
+
 		default:
 			printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
 				 p);
@@ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 			break;
 		}
 	}
+
+	/* Do size after hstate is set up */
+	if (setsize > NO_SIZE) {
+		struct hstate *h = pconfig->hstate;
+		if (setsize == SIZE_PERCENT) {
+			size <<= huge_page_shift(h);
+			size *= h->max_huge_pages;
+			do_div(size, 100);
+		}
+		pconfig->nr_blocks = (size >> huge_page_shift(h));
+	}
+
 	return 0;
 
 bad_val:
@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	config.uid = current->fsuid;
 	config.gid = current->fsgid;
 	config.mode = 0755;
+	config.hstate = &default_hstate;
 	ret = hugetlbfs_parse_options(data, &config);
 	if (ret)
 		return ret;
@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sbinfo)
 		return -ENOMEM;
 	sb->s_fs_info = sbinfo;
+	sbinfo->hstate = config.hstate;
 	spin_lock_init(&sbinfo->stat_lock);
 	sbinfo->max_blocks = config.nr_blocks;
 	sbinfo->free_blocks = config.nr_blocks;
 	sbinfo->max_inodes = config.nr_inodes;
 	sbinfo->free_inodes = config.nr_inodes;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = HPAGE_SIZE;
-	sb->s_blocksize_bits = HPAGE_SHIFT;
+	sb->s_blocksize = huge_page_size(config.hstate);
+	sb->s_blocksize_bits = huge_page_shift(config.hstate);
 	sb->s_magic = HUGETLBFS_MAGIC;
 	sb->s_op = &hugetlbfs_ops;
 	sb->s_time_gran = 1;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b75bdb4deba3..ba9263e631b9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -100,6 +100,7 @@ struct hugetlbfs_config {
 	umode_t mode;
 	long	nr_blocks;
 	long	nr_inodes;
+	struct hstate *hstate;
 };
 
 struct hugetlbfs_sb_info {
@@ -108,6 +109,7 @@ struct hugetlbfs_sb_info {
 	long	max_inodes;   /* inodes allowed */
 	long	free_inodes;  /* inodes free */
 	spinlock_t	stat_lock;
+	struct hstate *hstate;
 };
 
 
@@ -191,19 +193,21 @@ extern unsigned int default_hstate_idx;
 
 #define default_hstate (hstates[default_hstate_idx])
 
-static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
+static inline struct hstate *hstate_inode(struct inode *i)
 {
-	return &default_hstate;
+	struct hugetlbfs_sb_info *hsb;
+	hsb = HUGETLBFS_SB(i->i_sb);
+	return hsb->hstate;
 }
 
 static inline struct hstate *hstate_file(struct file *f)
 {
-	return &default_hstate;
+	return hstate_inode(f->f_dentry->d_inode);
 }
 
-static inline struct hstate *hstate_inode(struct inode *i)
+static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
 {
-	return &default_hstate;
+	return hstate_file(vma->vm_file);
 }
 
 static inline unsigned long huge_page_size(struct hstate *h)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 82378d44a0c5..4cf7a90e9140 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1439,19 +1439,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 			  unsigned long end, struct page *ref_page)
 {
-	/*
-	 * It is undesirable to test vma->vm_file as it should be non-null
-	 * for valid hugetlb area. However, vm_file will be NULL in the error
-	 * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
-	 * do_mmap_pgoff() nullifies vma->vm_file before calling this function
-	 * to clean up. Since no pte has actually been setup, it is safe to
-	 * do nothing in this case.
-	 */
-	if (vma->vm_file) {
-		spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
-		__unmap_hugepage_range(vma, start, end, ref_page);
-		spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
-	}
+	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+	__unmap_hugepage_range(vma, start, end, ref_page);
+	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
 }
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index c1c1d6d8c22b..02fc6b1047b0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -901,9 +901,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			}
 
 			if (unlikely(is_vm_hugetlb_page(vma))) {
-				unmap_hugepage_range(vma, start, end, NULL);
-				zap_work -= (end - start) /
+				/*
+				 * It is undesirable to test vma->vm_file as it
+				 * should be non-null for valid hugetlb area.
+				 * However, vm_file will be NULL in the error
+				 * cleanup path of do_mmap_pgoff. When
+				 * hugetlbfs ->mmap method fails,
+				 * do_mmap_pgoff() nullifies vma->vm_file
+				 * before calling this function to clean up.
+				 * Since no pte has actually been setup, it is
+				 * safe to do nothing in this case.
+				 */
+				if (vma->vm_file) {
+					unmap_hugepage_range(vma, start, end, NULL);
+					zap_work -= (end - start) /
 					pages_per_huge_page(hstate_vma(vma));
+				}
+
 				start = end;
 			} else
 				start = unmap_page_range(*tlbp, vma,
-- 
cgit v1.2.3


From a3437870160cf2caaac6bdd76c7377a5a4145a8c Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 23 Jul 2008 21:27:44 -0700
Subject: hugetlb: new sysfs interface

Provide new hugepages user APIs that are more suited to multiple hstates
in sysfs.  There is a new directory, /sys/kernel/hugepages.  Underneath
that directory there will be a directory per-supported hugepage size,
e.g.:

/sys/kernel/hugepages/hugepages-64kB
/sys/kernel/hugepages/hugepages-16384kB
/sys/kernel/hugepages/hugepages-16777216kB

corresponding to 64k, 16m and 16g respectively.  Within each
hugepages-size directory there are a number of files, corresponding to the
tracked counters in the hstate, e.g.:

/sys/kernel/hugepages/hugepages-64/nr_hugepages
/sys/kernel/hugepages/hugepages-64/nr_overcommit_hugepages
/sys/kernel/hugepages/hugepages-64/free_hugepages
/sys/kernel/hugepages/hugepages-64/resv_hugepages
/sys/kernel/hugepages/hugepages-64/surplus_hugepages

Of these files, the first two are read-write and the latter three are
read-only.  The size of the hugepage being manipulated is trivially
deducible from the enclosing directory and is always expressed in kB (to
match meminfo).

[dave@linux.vnet.ibm.com: fix build]
[nacc@us.ibm.com: hugetlb: hang off of /sys/kernel/mm rather than /sys/kernel]
[nacc@us.ibm.com: hugetlb: remove CONFIG_SYSFS dependency]
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../ABI/testing/sysfs-kernel-mm-hugepages          |  15 ++
 Documentation/vm/hugetlbpage.txt                   |  23 ++
 include/linux/hugetlb.h                            |   2 +
 mm/hugetlb.c                                       | 288 ++++++++++++++++-----
 4 files changed, 262 insertions(+), 66 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm-hugepages

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
new file mode 100644
index 000000000000..e21c00571cf4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -0,0 +1,15 @@
+What:		/sys/kernel/mm/hugepages/
+Date:		June 2008
+Contact:	Nishanth Aravamudan <nacc@us.ibm.com>, hugetlb maintainers
+Description:
+		/sys/kernel/mm/hugepages/ contains a number of subdirectories
+		of the form hugepages-<size>kB, where <size> is the page size
+		of the hugepages supported by the kernel/CPU combination.
+
+		Under these directories are a number of files:
+			nr_hugepages
+			nr_overcommit_hugepages
+			free_hugepages
+			surplus_hugepages
+			resv_hugepages
+		See Documentation/vm/hugetlbpage.txt for details.
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 3102b81bef88..8a5b5763f0fe 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -95,6 +95,29 @@ this condition holds, however, no more surplus huge pages will be
 allowed on the system until one of the two sysctls are increased
 sufficiently, or the surplus huge pages go out of use and are freed.
 
+With support for multiple hugepage pools at run-time available, much of
+the hugepage userspace interface has been duplicated in sysfs. The above
+information applies to the default hugepage size (which will be
+controlled by the proc interfaces for backwards compatibility). The root
+hugepage control directory is
+
+	/sys/kernel/mm/hugepages
+
+For each hugepage size supported by the running kernel, a subdirectory
+will exist, of the form
+
+	hugepages-${size}kB
+
+Inside each of these directories, the same set of files will exist:
+
+	nr_hugepages
+	nr_overcommit_hugepages
+	free_hugepages
+	resv_hugepages
+	surplus_hugepages
+
+which function as described above for the default hugepage-sized case.
+
 If the user applications are going to request hugepages using mmap system
 call, then it is required that system administrator mount a file system of
 type hugetlbfs:
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ba9263e631b9..58c0de32e7f0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -164,6 +164,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 
 #ifdef CONFIG_HUGETLB_PAGE
 
+#define HSTATE_NAME_LEN 32
 /* Defines one hugetlb page size */
 struct hstate {
 	int hugetlb_next_nid;
@@ -179,6 +180,7 @@ struct hstate {
 	unsigned int nr_huge_pages_node[MAX_NUMNODES];
 	unsigned int free_huge_pages_node[MAX_NUMNODES];
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
+	char name[HSTATE_NAME_LEN];
 };
 
 void __init hugetlb_add_hstate(unsigned order);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4cf7a90e9140..bb49ce5d0067 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -14,6 +14,7 @@
 #include <linux/mempolicy.h>
 #include <linux/cpuset.h>
 #include <linux/mutex.h>
+#include <linux/sysfs.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -942,72 +943,6 @@ static void __init report_hugepages(void)
 	}
 }
 
-static int __init hugetlb_init(void)
-{
-	BUILD_BUG_ON(HPAGE_SHIFT == 0);
-
-	if (!size_to_hstate(HPAGE_SIZE)) {
-		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
-		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
-	}
-	default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
-
-	hugetlb_init_hstates();
-
-	report_hugepages();
-
-	return 0;
-}
-module_init(hugetlb_init);
-
-/* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_add_hstate(unsigned order)
-{
-	struct hstate *h;
-	if (size_to_hstate(PAGE_SIZE << order)) {
-		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
-		return;
-	}
-	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
-	BUG_ON(order == 0);
-	h = &hstates[max_hstate++];
-	h->order = order;
-	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
-	hugetlb_init_one_hstate(h);
-	parsed_hstate = h;
-}
-
-static int __init hugetlb_setup(char *s)
-{
-	unsigned long *mhp;
-
-	/*
-	 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
-	 * so this hugepages= parameter goes to the "default hstate".
-	 */
-	if (!max_hstate)
-		mhp = &default_hstate_max_huge_pages;
-	else
-		mhp = &parsed_hstate->max_huge_pages;
-
-	if (sscanf(s, "%lu", mhp) <= 0)
-		*mhp = 0;
-
-	return 1;
-}
-__setup("hugepages=", hugetlb_setup);
-
-static unsigned int cpuset_mems_nr(unsigned int *array)
-{
-	int node;
-	unsigned int nr = 0;
-
-	for_each_node_mask(node, cpuset_current_mems_allowed)
-		nr += array[node];
-
-	return nr;
-}
-
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_HIGHMEM
 static void try_to_free_low(struct hstate *h, unsigned long count)
@@ -1105,6 +1040,227 @@ out:
 	return ret;
 }
 
+#define HSTATE_ATTR_RO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+#define HSTATE_ATTR(_name) \
+	static struct kobj_attribute _name##_attr = \
+		__ATTR(_name, 0644, _name##_show, _name##_store)
+
+static struct kobject *hugepages_kobj;
+static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
+
+static struct hstate *kobj_to_hstate(struct kobject *kobj)
+{
+	int i;
+	for (i = 0; i < HUGE_MAX_HSTATE; i++)
+		if (hstate_kobjs[i] == kobj)
+			return &hstates[i];
+	BUG();
+	return NULL;
+}
+
+static ssize_t nr_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->nr_huge_pages);
+}
+static ssize_t nr_hugepages_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int err;
+	unsigned long input;
+	struct hstate *h = kobj_to_hstate(kobj);
+
+	err = strict_strtoul(buf, 10, &input);
+	if (err)
+		return 0;
+
+	h->max_huge_pages = set_max_huge_pages(h, input);
+
+	return count;
+}
+HSTATE_ATTR(nr_hugepages);
+
+static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
+}
+static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int err;
+	unsigned long input;
+	struct hstate *h = kobj_to_hstate(kobj);
+
+	err = strict_strtoul(buf, 10, &input);
+	if (err)
+		return 0;
+
+	spin_lock(&hugetlb_lock);
+	h->nr_overcommit_huge_pages = input;
+	spin_unlock(&hugetlb_lock);
+
+	return count;
+}
+HSTATE_ATTR(nr_overcommit_hugepages);
+
+static ssize_t free_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->free_huge_pages);
+}
+HSTATE_ATTR_RO(free_hugepages);
+
+static ssize_t resv_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->resv_huge_pages);
+}
+HSTATE_ATTR_RO(resv_hugepages);
+
+static ssize_t surplus_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->surplus_huge_pages);
+}
+HSTATE_ATTR_RO(surplus_hugepages);
+
+static struct attribute *hstate_attrs[] = {
+	&nr_hugepages_attr.attr,
+	&nr_overcommit_hugepages_attr.attr,
+	&free_hugepages_attr.attr,
+	&resv_hugepages_attr.attr,
+	&surplus_hugepages_attr.attr,
+	NULL,
+};
+
+static struct attribute_group hstate_attr_group = {
+	.attrs = hstate_attrs,
+};
+
+static int __init hugetlb_sysfs_add_hstate(struct hstate *h)
+{
+	int retval;
+
+	hstate_kobjs[h - hstates] = kobject_create_and_add(h->name,
+							hugepages_kobj);
+	if (!hstate_kobjs[h - hstates])
+		return -ENOMEM;
+
+	retval = sysfs_create_group(hstate_kobjs[h - hstates],
+							&hstate_attr_group);
+	if (retval)
+		kobject_put(hstate_kobjs[h - hstates]);
+
+	return retval;
+}
+
+static void __init hugetlb_sysfs_init(void)
+{
+	struct hstate *h;
+	int err;
+
+	hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj);
+	if (!hugepages_kobj)
+		return;
+
+	for_each_hstate(h) {
+		err = hugetlb_sysfs_add_hstate(h);
+		if (err)
+			printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
+								h->name);
+	}
+}
+
+static void __exit hugetlb_exit(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		kobject_put(hstate_kobjs[h - hstates]);
+	}
+
+	kobject_put(hugepages_kobj);
+}
+module_exit(hugetlb_exit);
+
+static int __init hugetlb_init(void)
+{
+	BUILD_BUG_ON(HPAGE_SHIFT == 0);
+
+	if (!size_to_hstate(HPAGE_SIZE)) {
+		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
+		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
+	}
+	default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
+
+	hugetlb_init_hstates();
+
+	report_hugepages();
+
+	hugetlb_sysfs_init();
+
+	return 0;
+}
+module_init(hugetlb_init);
+
+/* Should be called on processing a hugepagesz=... option */
+void __init hugetlb_add_hstate(unsigned order)
+{
+	struct hstate *h;
+	if (size_to_hstate(PAGE_SIZE << order)) {
+		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
+		return;
+	}
+	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
+	BUG_ON(order == 0);
+	h = &hstates[max_hstate++];
+	h->order = order;
+	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
+	snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
+					huge_page_size(h)/1024);
+	hugetlb_init_one_hstate(h);
+	parsed_hstate = h;
+}
+
+static int __init hugetlb_setup(char *s)
+{
+	unsigned long *mhp;
+
+	/*
+	 * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
+	 * so this hugepages= parameter goes to the "default hstate".
+	 */
+	if (!max_hstate)
+		mhp = &default_hstate_max_huge_pages;
+	else
+		mhp = &parsed_hstate->max_huge_pages;
+
+	if (sscanf(s, "%lu", mhp) <= 0)
+		*mhp = 0;
+
+	return 1;
+}
+__setup("hugepages=", hugetlb_setup);
+
+static unsigned int cpuset_mems_nr(unsigned int *array)
+{
+	int node;
+	unsigned int nr = 0;
+
+	for_each_node_mask(node, cpuset_current_mems_allowed)
+		nr += array[node];
+
+	return nr;
+}
+
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 			   struct file *file, void __user *buffer,
 			   size_t *length, loff_t *ppos)
-- 
cgit v1.2.3


From b54bbf7b81170f03597c17dd0b559e3006bc9868 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:45 -0700
Subject: mm: introduce non panic alloc_bootmem

Straight forward variant of the existing __alloc_bootmem_node, only
subsequent patch when allocating giant hugepages at boot -- don't want to
panic if we can't allocate as many as the user asked for.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  4 ++++
 mm/bootmem.c            | 12 ++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index dd8fee6c46d9..f352c5f125b4 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -89,6 +89,10 @@ extern void *__alloc_bootmem_node(pg_data_t *pgdat,
 				  unsigned long size,
 				  unsigned long align,
 				  unsigned long goal);
+extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
 extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 				       unsigned long freepfn,
 				       unsigned long startpfn,
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 4bc6ae2fbaa3..9ac972535fff 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -578,6 +578,18 @@ void * __init alloc_bootmem_section(unsigned long size,
 }
 #endif
 
+void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
+				   unsigned long align, unsigned long goal)
+{
+	void *ptr;
+
+	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
+	if (ptr)
+		return ptr;
+
+	return __alloc_bootmem_nopanic(size, align, goal);
+}
+
 #ifndef ARCH_LOW_ADDRESS_LIMIT
 #define ARCH_LOW_ADDRESS_LIMIT	0xffffffffUL
 #endif
-- 
cgit v1.2.3


From ceb868796181dc95ea01a110e123afd391639873 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 23 Jul 2008 21:27:50 -0700
Subject: hugetlb: introduce pud_huge

Straight forward extensions for huge pages located in the PUD instead of
PMDs.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/hugetlbpage.c    |  6 ++++++
 arch/powerpc/mm/hugetlbpage.c |  5 +++++
 arch/s390/mm/hugetlbpage.c    |  5 +++++
 arch/sh/mm/hugetlbpage.c      |  5 +++++
 arch/sparc64/mm/hugetlbpage.c |  5 +++++
 arch/x86/mm/hugetlbpage.c     | 25 ++++++++++++++++++++++++-
 include/linux/hugetlb.h       |  5 +++++
 mm/hugetlb.c                  |  9 +++++++++
 mm/memory.c                   | 15 +++++++++++----
 9 files changed, 75 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 6170f097d255..c45fc7f5a979 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -107,6 +107,12 @@ int pmd_huge(pmd_t pmd)
 {
 	return 0;
 }
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
 {
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index c94dc71af989..63db7adce717 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -369,6 +369,11 @@ int pmd_huge(pmd_t pmd)
 	return 0;
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 		pmd_t *pmd, int write)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 9162dc84f77f..f28c43d2f61d 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -120,6 +120,11 @@ int pmd_huge(pmd_t pmd)
 	return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmdp, int write)
 {
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 2f9dbe0ef4ac..9304117039c4 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -79,6 +79,11 @@ int pmd_huge(pmd_t pmd)
 	return 0;
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmd, int write)
 {
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 1307b23f6a76..f27d10369e0c 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -295,6 +295,11 @@ int pmd_huge(pmd_t pmd)
 	return 0;
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmd, int write)
 {
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 52476fde8996..a4789e87a315 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -189,6 +189,11 @@ int pmd_huge(pmd_t pmd)
 	return 0;
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 		pmd_t *pmd, int write)
@@ -209,6 +214,11 @@ int pmd_huge(pmd_t pmd)
 	return !!(pmd_val(pmd) & _PAGE_PSE);
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 		pmd_t *pmd, int write)
@@ -217,9 +227,22 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 
 	page = pte_page(*(pte_t *)pmd);
 	if (page)
-		page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
 	return page;
 }
+
+struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+		pud_t *pud, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pud);
+	if (page)
+		page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
 #endif
 
 /* x86_64 also uses this file */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 58c0de32e7f0..b2c17f62cacb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -50,7 +50,10 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
 			      int write);
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 				pmd_t *pmd, int write);
+struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
+				pud_t *pud, int write);
 int pmd_huge(pmd_t pmd);
+int pud_huge(pud_t pmd);
 void hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot);
 
@@ -78,8 +81,10 @@ static inline unsigned long hugetlb_total_pages(void)
 #define hugetlb_report_meminfo(buf)		0
 #define hugetlb_report_node_meminfo(n, buf)	0
 #define follow_huge_pmd(mm, addr, pmd, write)	NULL
+#define follow_huge_pud(mm, addr, pud, write)	NULL
 #define prepare_hugepage_range(file, addr, len)	(-EINVAL)
 #define pmd_huge(x)	0
+#define pud_huge(x)	0
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 #define hugetlb_fault(mm, vma, addr, write)	({ BUG(); 0; })
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0c74c14dd2f7..107c1ce223cb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1996,6 +1996,15 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return ret;
 }
 
+/* Can be overriden by architectures */
+__attribute__((weak)) struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+	       pud_t *pud, int write)
+{
+	BUG();
+	return NULL;
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i,
diff --git a/mm/memory.c b/mm/memory.c
index 02fc6b1047b0..262e3eb6601a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -998,19 +998,24 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 		goto no_page_table;
 
 	pud = pud_offset(pgd, address);
-	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+	if (pud_none(*pud))
+		goto no_page_table;
+	if (pud_huge(*pud)) {
+		BUG_ON(flags & FOLL_GET);
+		page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
+		goto out;
+	}
+	if (unlikely(pud_bad(*pud)))
 		goto no_page_table;
-	
+
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd))
 		goto no_page_table;
-
 	if (pmd_huge(*pmd)) {
 		BUG_ON(flags & FOLL_GET);
 		page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
 		goto out;
 	}
-
 	if (unlikely(pmd_bad(*pmd)))
 		goto no_page_table;
 
@@ -1567,6 +1572,8 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
 	unsigned long next;
 	int err;
 
+	BUG_ON(pud_huge(*pud));
+
 	pmd = pmd_alloc(mm, pud, addr);
 	if (!pmd)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 53ba51d21d6e048424ab8aadfebdb1f25ae07b60 Mon Sep 17 00:00:00 2001
From: Jon Tollefson <kniht@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2008 21:27:52 -0700
Subject: hugetlb: allow arch overridden hugepage allocation

Allow alloc_bootmem_huge_page() to be overridden by architectures that
can't always use bootmem.  This requires huge_boot_pages to be available
for use by this function.

This is required for powerpc 16G pages, which have to be reserved prior to
boot-time.  The location of these pages are indicated in the device tree.

Acked-by: Adam Litke <agl@us.ibm.com>
Signed-off-by: Jon Tollefson <kniht@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 10 ++++++++++
 mm/hugetlb.c            | 11 +++--------
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b2c17f62cacb..9a71d4cc88c8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -39,6 +39,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
+extern struct list_head huge_boot_pages;
 
 /* arch callbacks */
 
@@ -188,6 +189,14 @@ struct hstate {
 	char name[HSTATE_NAME_LEN];
 };
 
+struct huge_bootmem_page {
+	struct list_head list;
+	struct hstate *hstate;
+};
+
+/* arch callback */
+int __init alloc_bootmem_huge_page(struct hstate *h);
+
 void __init hugetlb_add_hstate(unsigned order);
 struct hstate *size_to_hstate(unsigned long size);
 
@@ -256,6 +265,7 @@ static inline struct hstate *page_hstate(struct page *page)
 
 #else
 struct hstate {};
+#define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
 #define hstate_vma(v) NULL
 #define hstate_inode(i) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2a2f6e869401..3e1506b808a3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,6 +31,8 @@ static int max_hstate;
 unsigned int default_hstate_idx;
 struct hstate hstates[HUGE_MAX_HSTATE];
 
+__initdata LIST_HEAD(huge_boot_pages);
+
 /* for command line parsing */
 static struct hstate * __initdata parsed_hstate;
 static unsigned long __initdata default_hstate_max_huge_pages;
@@ -925,14 +927,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	return page;
 }
 
-static __initdata LIST_HEAD(huge_boot_pages);
-
-struct huge_bootmem_page {
-	struct list_head list;
-	struct hstate *hstate;
-};
-
-static int __init alloc_bootmem_huge_page(struct hstate *h)
+__attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h)
 {
 	struct huge_bootmem_page *m;
 	int nr_nodes = nodes_weight(node_online_map);
-- 
cgit v1.2.3


From 223e8dc9249c9e15f6c8b638d73fcad78ccb0a88 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:00 -0700
Subject: bootmem: reorder code to match new bootmem structure

This only reorders functions so that further patches will be easier to
read.  No code changed.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  86 ++++++------
 mm/bootmem.c            | 356 ++++++++++++++++++++++++------------------------
 2 files changed, 222 insertions(+), 220 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index f352c5f125b4..5000fd70b04f 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -41,36 +41,62 @@ typedef struct bootmem_data {
 extern bootmem_data_t bootmem_node_data[];
 
 extern unsigned long bootmem_bootmap_pages(unsigned long);
+
+extern unsigned long init_bootmem_node(pg_data_t *pgdat,
+				       unsigned long freepfn,
+				       unsigned long startpfn,
+				       unsigned long endpfn);
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
+
+extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
+extern unsigned long free_all_bootmem(void);
+
+extern void free_bootmem_node(pg_data_t *pgdat,
+			      unsigned long addr,
+			      unsigned long size);
 extern void free_bootmem(unsigned long addr, unsigned long size);
-extern void *__alloc_bootmem(unsigned long size,
+
+/*
+ * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
+ * the architecture-specific code should honor this).
+ *
+ * If flags is 0, then the return value is always 0 (success). If
+ * flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the
+ * memory already was reserved.
+ */
+#define BOOTMEM_DEFAULT		0
+#define BOOTMEM_EXCLUSIVE	(1<<0)
+
+extern int reserve_bootmem_node(pg_data_t *pgdat,
+				 unsigned long physaddr,
+				 unsigned long size,
+				 int flags);
+#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
+extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
+#endif
+
+extern void *__alloc_bootmem_nopanic(unsigned long size,
 			     unsigned long align,
 			     unsigned long goal);
-extern void *__alloc_bootmem_nopanic(unsigned long size,
+extern void *__alloc_bootmem(unsigned long size,
 				     unsigned long align,
 				     unsigned long goal);
 extern void *__alloc_bootmem_low(unsigned long size,
 				 unsigned long align,
 				 unsigned long goal);
+extern void *__alloc_bootmem_node(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
+extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
 extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 				      unsigned long size,
 				      unsigned long align,
 				      unsigned long goal);
-
-/*
- * flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
- * the architecture-specific code should honor this)
- */
-#define BOOTMEM_DEFAULT		0
-#define BOOTMEM_EXCLUSIVE	(1<<0)
-
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-/*
- * If flags is 0, then the return value is always 0 (success). If
- * flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the
- * memory already was reserved.
- */
-extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
 #define alloc_bootmem(x) \
 	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
@@ -83,38 +109,16 @@ extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
 
 extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
 				   int flags);
-extern unsigned long free_all_bootmem(void);
-extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
-extern void *__alloc_bootmem_node(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal);
-extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal);
-extern unsigned long init_bootmem_node(pg_data_t *pgdat,
-				       unsigned long freepfn,
-				       unsigned long startpfn,
-				       unsigned long endpfn);
-extern int reserve_bootmem_node(pg_data_t *pgdat,
-				 unsigned long physaddr,
-				 unsigned long size,
-				 int flags);
-extern void free_bootmem_node(pg_data_t *pgdat,
-			      unsigned long addr,
-			      unsigned long size);
-extern void *alloc_bootmem_section(unsigned long size,
-				   unsigned long section_nr);
 
-#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+
+extern void *alloc_bootmem_section(unsigned long size,
+				   unsigned long section_nr);
 
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
 extern void *alloc_remap(int nid, unsigned long size);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 9ac972535fff..24eacf52c50e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -38,6 +38,19 @@ unsigned long saved_max_pfn;
 
 bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
 
+/*
+ * Given an initialised bdata, it returns the size of the boot bitmap
+ */
+static unsigned long __init get_mapsize(bootmem_data_t *bdata)
+{
+	unsigned long mapsize;
+	unsigned long start = PFN_DOWN(bdata->node_boot_start);
+	unsigned long end = bdata->node_low_pfn;
+
+	mapsize = ((end - start) + 7) / 8;
+	return ALIGN(mapsize, sizeof(long));
+}
+
 /* return the number of _pages_ that will be allocated for the boot bitmap */
 unsigned long __init bootmem_bootmap_pages(unsigned long pages)
 {
@@ -71,19 +84,6 @@ static void __init link_bootmem(bootmem_data_t *bdata)
 	list_add_tail(&bdata->list, &bdata_list);
 }
 
-/*
- * Given an initialised bdata, it returns the size of the boot bitmap
- */
-static unsigned long __init get_mapsize(bootmem_data_t *bdata)
-{
-	unsigned long mapsize;
-	unsigned long start = PFN_DOWN(bdata->node_boot_start);
-	unsigned long end = bdata->node_low_pfn;
-
-	mapsize = ((end - start) + 7) / 8;
-	return ALIGN(mapsize, sizeof(long));
-}
-
 /*
  * Called once to set up the allocator itself.
  */
@@ -108,6 +108,146 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
 	return mapsize;
 }
 
+unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
+				unsigned long startpfn, unsigned long endpfn)
+{
+	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
+}
+
+unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
+{
+	max_low_pfn = pages;
+	min_low_pfn = start;
+	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
+}
+
+static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
+{
+	struct page *page;
+	unsigned long pfn;
+	unsigned long i, count;
+	unsigned long idx;
+	unsigned long *map;
+	int gofast = 0;
+
+	BUG_ON(!bdata->node_bootmem_map);
+
+	count = 0;
+	/* first extant page of the node */
+	pfn = PFN_DOWN(bdata->node_boot_start);
+	idx = bdata->node_low_pfn - pfn;
+	map = bdata->node_bootmem_map;
+	/*
+	 * Check if we are aligned to BITS_PER_LONG pages.  If so, we might
+	 * be able to free page orders of that size at once.
+	 */
+	if (!(pfn & (BITS_PER_LONG-1)))
+		gofast = 1;
+
+	for (i = 0; i < idx; ) {
+		unsigned long v = ~map[i / BITS_PER_LONG];
+
+		if (gofast && v == ~0UL) {
+			int order;
+
+			page = pfn_to_page(pfn);
+			count += BITS_PER_LONG;
+			order = ffs(BITS_PER_LONG) - 1;
+			__free_pages_bootmem(page, order);
+			i += BITS_PER_LONG;
+			page += BITS_PER_LONG;
+		} else if (v) {
+			unsigned long m;
+
+			page = pfn_to_page(pfn);
+			for (m = 1; m && i < idx; m<<=1, page++, i++) {
+				if (v & m) {
+					count++;
+					__free_pages_bootmem(page, 0);
+				}
+			}
+		} else {
+			i += BITS_PER_LONG;
+		}
+		pfn += BITS_PER_LONG;
+	}
+
+	/*
+	 * Now free the allocator bitmap itself, it's not
+	 * needed anymore:
+	 */
+	page = virt_to_page(bdata->node_bootmem_map);
+	idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
+	for (i = 0; i < idx; i++, page++)
+		__free_pages_bootmem(page, 0);
+	count += i;
+	bdata->node_bootmem_map = NULL;
+
+	return count;
+}
+
+unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
+{
+	register_page_bootmem_info_node(pgdat);
+	return free_all_bootmem_core(pgdat->bdata);
+}
+
+unsigned long __init free_all_bootmem(void)
+{
+	return free_all_bootmem_core(NODE_DATA(0)->bdata);
+}
+
+static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+				     unsigned long size)
+{
+	unsigned long sidx, eidx;
+	unsigned long i;
+
+	BUG_ON(!size);
+
+	/* out range */
+	if (addr + size < bdata->node_boot_start ||
+		PFN_DOWN(addr) > bdata->node_low_pfn)
+		return;
+	/*
+	 * round down end of usable mem, partially free pages are
+	 * considered reserved.
+	 */
+
+	if (addr >= bdata->node_boot_start && addr < bdata->last_success)
+		bdata->last_success = addr;
+
+	/*
+	 * Round up to index to the range.
+	 */
+	if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start))
+		sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
+	else
+		sidx = 0;
+
+	eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
+	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+
+	for (i = sidx; i < eidx; i++) {
+		if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
+			BUG();
+	}
+}
+
+void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+			      unsigned long size)
+{
+	free_bootmem_core(pgdat->bdata, physaddr, size);
+}
+
+void __init free_bootmem(unsigned long addr, unsigned long size)
+{
+	bootmem_data_t *bdata;
+	list_for_each_entry(bdata, &bdata_list, list)
+		free_bootmem_core(bdata, addr, size);
+}
+
 /*
  * Marks a particular physical memory range as unallocatable. Usable RAM
  * might be used for boot-time allocations - or it might get added
@@ -183,43 +323,36 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata,
 	}
 }
 
-static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
-				     unsigned long size)
+int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+				 unsigned long size, int flags)
 {
-	unsigned long sidx, eidx;
-	unsigned long i;
-
-	BUG_ON(!size);
-
-	/* out range */
-	if (addr + size < bdata->node_boot_start ||
-		PFN_DOWN(addr) > bdata->node_low_pfn)
-		return;
-	/*
-	 * round down end of usable mem, partially free pages are
-	 * considered reserved.
-	 */
-
-	if (addr >= bdata->node_boot_start && addr < bdata->last_success)
-		bdata->last_success = addr;
+	int ret;
 
-	/*
-	 * Round up to index to the range.
-	 */
-	if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start))
-		sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
-	else
-		sidx = 0;
+	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+	if (ret < 0)
+		return -ENOMEM;
+	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+	return 0;
+}
 
-	eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
-	if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
-		eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
+int __init reserve_bootmem(unsigned long addr, unsigned long size,
+			    int flags)
+{
+	bootmem_data_t *bdata;
+	int ret;
 
-	for (i = sidx; i < eidx; i++) {
-		if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
-			BUG();
+	list_for_each_entry(bdata, &bdata_list, list) {
+		ret = can_reserve_bootmem_core(bdata, addr, size, flags);
+		if (ret < 0)
+			return ret;
 	}
+	list_for_each_entry(bdata, &bdata_list, list)
+		reserve_bootmem_core(bdata, addr, size, flags);
+
+	return 0;
 }
+#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
 /*
  * We 'merge' subsequent allocations to save space. We might 'lose'
@@ -371,140 +504,6 @@ found:
 	return ret;
 }
 
-static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
-{
-	struct page *page;
-	unsigned long pfn;
-	unsigned long i, count;
-	unsigned long idx;
-	unsigned long *map; 
-	int gofast = 0;
-
-	BUG_ON(!bdata->node_bootmem_map);
-
-	count = 0;
-	/* first extant page of the node */
-	pfn = PFN_DOWN(bdata->node_boot_start);
-	idx = bdata->node_low_pfn - pfn;
-	map = bdata->node_bootmem_map;
-	/*
-	 * Check if we are aligned to BITS_PER_LONG pages.  If so, we might
-	 * be able to free page orders of that size at once.
-	 */
-	if (!(pfn & (BITS_PER_LONG-1)))
-		gofast = 1;
-
-	for (i = 0; i < idx; ) {
-		unsigned long v = ~map[i / BITS_PER_LONG];
-
-		if (gofast && v == ~0UL) {
-			int order;
-
-			page = pfn_to_page(pfn);
-			count += BITS_PER_LONG;
-			order = ffs(BITS_PER_LONG) - 1;
-			__free_pages_bootmem(page, order);
-			i += BITS_PER_LONG;
-			page += BITS_PER_LONG;
-		} else if (v) {
-			unsigned long m;
-
-			page = pfn_to_page(pfn);
-			for (m = 1; m && i < idx; m<<=1, page++, i++) {
-				if (v & m) {
-					count++;
-					__free_pages_bootmem(page, 0);
-				}
-			}
-		} else {
-			i += BITS_PER_LONG;
-		}
-		pfn += BITS_PER_LONG;
-	}
-
-	/*
-	 * Now free the allocator bitmap itself, it's not
-	 * needed anymore:
-	 */
-	page = virt_to_page(bdata->node_bootmem_map);
-	idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
-	for (i = 0; i < idx; i++, page++)
-		__free_pages_bootmem(page, 0);
-	count += i;
-	bdata->node_bootmem_map = NULL;
-
-	return count;
-}
-
-unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
-				unsigned long startpfn, unsigned long endpfn)
-{
-	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
-}
-
-int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-				 unsigned long size, int flags)
-{
-	int ret;
-
-	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
-	if (ret < 0)
-		return -ENOMEM;
-	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
-
-	return 0;
-}
-
-void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-			      unsigned long size)
-{
-	free_bootmem_core(pgdat->bdata, physaddr, size);
-}
-
-unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
-{
-	register_page_bootmem_info_node(pgdat);
-	return free_all_bootmem_core(pgdat->bdata);
-}
-
-unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
-{
-	max_low_pfn = pages;
-	min_low_pfn = start;
-	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
-}
-
-#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-int __init reserve_bootmem(unsigned long addr, unsigned long size,
-			    int flags)
-{
-	bootmem_data_t *bdata;
-	int ret;
-
-	list_for_each_entry(bdata, &bdata_list, list) {
-		ret = can_reserve_bootmem_core(bdata, addr, size, flags);
-		if (ret < 0)
-			return ret;
-	}
-	list_for_each_entry(bdata, &bdata_list, list)
-		reserve_bootmem_core(bdata, addr, size, flags);
-
-	return 0;
-}
-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-
-void __init free_bootmem(unsigned long addr, unsigned long size)
-{
-	bootmem_data_t *bdata;
-	list_for_each_entry(bdata, &bdata_list, list)
-		free_bootmem_core(bdata, addr, size);
-}
-
-unsigned long __init free_all_bootmem(void)
-{
-	return free_all_bootmem_core(NODE_DATA(0)->bdata);
-}
-
 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 				      unsigned long goal)
 {
@@ -534,7 +533,6 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 	return NULL;
 }
 
-
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 				   unsigned long align, unsigned long goal)
 {
-- 
cgit v1.2.3


From 5f2809e69c7128f86316048221cf45146f69a4a0 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:05 -0700
Subject: bootmem: clean up alloc_bootmem_core

alloc_bootmem_core has become quite nasty to read over time.  This is a
clean rewrite that keeps the semantics.

bdata->last_pos has been dropped.

bdata->last_success has been renamed to hint_idx and it is now an index
relative to the node's range.  Since further block searching might start
at this index, it is now set to the end of a succeeded allocation rather
than its beginning.

bdata->last_offset has been renamed to last_end_off to be more clear that
it represents the ending address of the last allocation relative to the
node.

[y-goto@jp.fujitsu.com: fix new alloc_bootmem_core()]
Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |   6 +-
 mm/bootmem.c            | 212 +++++++++++++++++-------------------------------
 2 files changed, 78 insertions(+), 140 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 5000fd70b04f..90921d10ffa2 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -31,10 +31,8 @@ typedef struct bootmem_data {
 	unsigned long node_boot_start;
 	unsigned long node_low_pfn;
 	void *node_bootmem_map;
-	unsigned long last_offset;
-	unsigned long last_pos;
-	unsigned long last_success;	/* Previous allocation point.  To speed
-					 * up searching */
+	unsigned long last_end_off;
+	unsigned long hint_idx;
 	struct list_head list;
 } bootmem_data_t;
 
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 300d126ec533..94ea612deccf 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -242,8 +242,9 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
 	 * considered reserved.
 	 */
 
-	if (addr >= bdata->node_boot_start && addr < bdata->last_success)
-		bdata->last_success = addr;
+	if (addr >= bdata->node_boot_start &&
+			PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx)
+		bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start);
 
 	/*
 	 * Round up to index to the range.
@@ -431,36 +432,16 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
 }
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
-/*
- * We 'merge' subsequent allocations to save space. We might 'lose'
- * some fraction of a page if allocations cannot be satisfied due to
- * size constraints on boxes where there is physical RAM space
- * fragmentation - in these cases (mostly large memory boxes) this
- * is not a problem.
- *
- * On low memory boxes we get it right in 100% of the cases.
- *
- * alignment has to be a power of 2 value.
- *
- * NOTE:  This function is _not_ reentrant.
- */
-static void * __init
-alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
-		unsigned long align, unsigned long goal, unsigned long limit)
+static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
+				unsigned long size, unsigned long align,
+				unsigned long goal, unsigned long limit)
 {
-	unsigned long areasize, preferred;
-	unsigned long i, start = 0, incr, eidx, end_pfn;
-	void *ret;
-	unsigned long node_boot_start;
-	void *node_bootmem_map;
-
-	if (!size) {
-		printk("alloc_bootmem_core(): zero-sized request\n");
-		BUG();
-	}
-	BUG_ON(align & (align-1));
+	unsigned long min, max, start, sidx, midx, step;
+
+	BUG_ON(!size);
+	BUG_ON(align & (align - 1));
+	BUG_ON(limit && goal + size > limit);
 
-	/* on nodes without memory - bootmem_map is NULL */
 	if (!bdata->node_bootmem_map)
 		return NULL;
 
@@ -468,126 +449,85 @@ alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
 		align, goal, limit);
 
-	/* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */
-	node_boot_start = bdata->node_boot_start;
-	node_bootmem_map = bdata->node_bootmem_map;
-	if (align) {
-		node_boot_start = ALIGN(bdata->node_boot_start, align);
-		if (node_boot_start > bdata->node_boot_start)
-			node_bootmem_map = (unsigned long *)bdata->node_bootmem_map +
-			    PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
-	}
+	min = PFN_DOWN(bdata->node_boot_start);
+	max = bdata->node_low_pfn;
 
-	if (limit && node_boot_start >= limit)
+	goal >>= PAGE_SHIFT;
+	limit >>= PAGE_SHIFT;
+
+	if (limit && max > limit)
+		max = limit;
+	if (max <= min)
 		return NULL;
 
-	end_pfn = bdata->node_low_pfn;
-	limit = PFN_DOWN(limit);
-	if (limit && end_pfn > limit)
-		end_pfn = limit;
+	step = max(align >> PAGE_SHIFT, 1UL);
 
-	eidx = end_pfn - PFN_DOWN(node_boot_start);
+	if (goal && min < goal && goal < max)
+		start = ALIGN(goal, step);
+	else
+		start = ALIGN(min, step);
 
-	/*
-	 * We try to allocate bootmem pages above 'goal'
-	 * first, then we try to allocate lower pages.
-	 */
-	preferred = 0;
-	if (goal && PFN_DOWN(goal) < end_pfn) {
-		if (goal > node_boot_start)
-			preferred = goal - node_boot_start;
-
-		if (bdata->last_success > node_boot_start &&
-			bdata->last_success - node_boot_start >= preferred)
-			if (!limit || (limit && limit > bdata->last_success))
-				preferred = bdata->last_success - node_boot_start;
-	}
+	sidx = start - PFN_DOWN(bdata->node_boot_start);
+	midx = max - PFN_DOWN(bdata->node_boot_start);
 
-	preferred = PFN_DOWN(ALIGN(preferred, align));
-	areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
-	incr = align >> PAGE_SHIFT ? : 1;
+	if (bdata->hint_idx > sidx) {
+		/* Make sure we retry on failure */
+		goal = 1;
+		sidx = ALIGN(bdata->hint_idx, step);
+	}
 
-restart_scan:
-	for (i = preferred; i < eidx;) {
-		unsigned long j;
+	while (1) {
+		int merge;
+		void *region;
+		unsigned long eidx, i, start_off, end_off;
+find_block:
+		sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
+		sidx = ALIGN(sidx, step);
+		eidx = sidx + PFN_UP(size);
 
-		i = find_next_zero_bit(node_bootmem_map, eidx, i);
-		i = ALIGN(i, incr);
-		if (i >= eidx)
+		if (sidx >= midx || eidx > midx)
 			break;
-		if (test_bit(i, node_bootmem_map)) {
-			i += incr;
-			continue;
-		}
-		for (j = i + 1; j < i + areasize; ++j) {
-			if (j >= eidx)
-				goto fail_block;
-			if (test_bit(j, node_bootmem_map))
-				goto fail_block;
-		}
-		start = i;
-		goto found;
-	fail_block:
-		i = ALIGN(j, incr);
-		if (i == j)
-			i += incr;
-	}
-
-	if (preferred > 0) {
-		preferred = 0;
-		goto restart_scan;
-	}
-	return NULL;
 
-found:
-	bdata->last_success = PFN_PHYS(start) + node_boot_start;
-	BUG_ON(start >= eidx);
+		for (i = sidx; i < eidx; i++)
+			if (test_bit(i, bdata->node_bootmem_map)) {
+				sidx = ALIGN(i, step);
+				if (sidx == i)
+					sidx += step;
+				goto find_block;
+			}
 
-	/*
-	 * Is the next page of the previous allocation-end the start
-	 * of this allocation's buffer? If yes then we can 'merge'
-	 * the previous partial page with this allocation.
-	 */
-	if (align < PAGE_SIZE &&
-	    bdata->last_offset && bdata->last_pos+1 == start) {
-		unsigned long offset, remaining_size;
-		offset = ALIGN(bdata->last_offset, align);
-		BUG_ON(offset > PAGE_SIZE);
-		remaining_size = PAGE_SIZE - offset;
-		if (size < remaining_size) {
-			areasize = 0;
-			/* last_pos unchanged */
-			bdata->last_offset = offset + size;
-			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
-					   offset + node_boot_start);
-		} else {
-			remaining_size = size - remaining_size;
-			areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
-			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
-					   offset + node_boot_start);
-			bdata->last_pos = start + areasize - 1;
-			bdata->last_offset = remaining_size;
-		}
-		bdata->last_offset &= ~PAGE_MASK;
-	} else {
-		bdata->last_pos = start + areasize - 1;
-		bdata->last_offset = size & ~PAGE_MASK;
-		ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
+		if (bdata->last_end_off &&
+				PFN_DOWN(bdata->last_end_off) + 1 == sidx)
+			start_off = ALIGN(bdata->last_end_off, align);
+		else
+			start_off = PFN_PHYS(sidx);
+
+		merge = PFN_DOWN(start_off) < sidx;
+		end_off = start_off + size;
+
+		bdata->last_end_off = end_off;
+		bdata->hint_idx = PFN_UP(end_off);
+
+		/*
+		 * Reserve the area now:
+		 */
+		for (i = PFN_DOWN(start_off) + merge;
+				i < PFN_UP(end_off); i++)
+			if (test_and_set_bit(i, bdata->node_bootmem_map))
+				BUG();
+
+		region = phys_to_virt(bdata->node_boot_start + start_off);
+		memset(region, 0, size);
+		return region;
 	}
 
-	bdebug("nid=%td start=%lx end=%lx\n",
-		bdata - bootmem_node_data,
-		start + PFN_DOWN(bdata->node_boot_start),
-		start + areasize + PFN_DOWN(bdata->node_boot_start));
+	if (goal) {
+		goal = 0;
+		sidx = 0;
+		goto find_block;
+	}
 
-	/*
-	 * Reserve the area now:
-	 */
-	for (i = start; i < start + areasize; i++)
-		if (unlikely(test_and_set_bit(i, node_bootmem_map)))
-			BUG();
-	memset(ret, 0, size);
-	return ret;
+	return NULL;
 }
 
 /**
-- 
cgit v1.2.3


From 3560e249abda6bee41a07a7bf0383a6e193e2839 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Wed, 23 Jul 2008 21:28:09 -0700
Subject: bootmem: replace node_boot_start in struct bootmem_data

Almost all users of this field need a PFN instead of a physical address,
so replace node_boot_start with node_min_pfn.

[Lee.Schermerhorn@hp.com: fix spurious BUG_ON() in mark_bootmem()]
Signed-off-by: Johannes Weiner <hannes@saeureba.de>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/mm/numa.c     |  2 +-
 arch/arm/plat-omap/fb.c  |  4 +---
 arch/avr32/mm/init.c     |  3 +--
 arch/ia64/mm/discontig.c | 19 ++++++++++---------
 arch/m32r/mm/discontig.c |  3 +--
 arch/m32r/mm/init.c      |  4 +---
 arch/mn10300/mm/init.c   |  6 +++---
 arch/sh/mm/init.c        |  2 +-
 include/linux/bootmem.h  |  2 +-
 mm/bootmem.c             | 40 +++++++++++++++++++++-------------------
 10 files changed, 41 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index def0c74a78a8..d8c4ceaf00b9 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -304,7 +304,7 @@ void __init paging_init(void)
 
 	for_each_online_node(nid) {
 		bootmem_data_t *bdata = &bootmem_node_data[nid];
-		unsigned long start_pfn = bdata->node_boot_start >> PAGE_SHIFT;
+		unsigned long start_pfn = bdata->node_min_pfn;
 		unsigned long end_pfn = bdata->node_low_pfn;
 
 		if (dma_local_pfn >= end_pfn - start_pfn)
diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index 7854f19b77cf..96d6f0619733 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -182,7 +182,7 @@ void __init omapfb_reserve_sdram(void)
 		return;
 
 	bdata = NODE_DATA(0)->bdata;
-	sdram_start = bdata->node_boot_start;
+	sdram_start = bdata->node_min_pfn << PAGE_SHIFT;
 	sdram_size = (bdata->node_low_pfn << PAGE_SHIFT) - sdram_start;
 	reserved = 0;
 	for (i = 0; ; i++) {
@@ -340,5 +340,3 @@ unsigned long omapfb_reserve_sram(unsigned long sram_pstart,
 
 
 #endif
-
-
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index 786de88a82a7..3c85fdaa9487 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -119,8 +119,7 @@ void __init paging_init(void)
 		unsigned long zones_size[MAX_NR_ZONES];
 		unsigned long low, start_pfn;
 
-		start_pfn = pgdat->bdata->node_boot_start;
-		start_pfn >>= PAGE_SHIFT;
+		start_pfn = pgdat->bdata->node_min_pfn;
 		low = pgdat->bdata->node_low_pfn;
 
 		memset(zones_size, 0, sizeof(zones_size));
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 2fcf8464331e..d83125e1ed27 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -74,17 +74,17 @@ pg_data_t *pgdat_list[MAX_NUMNODES];
 static int __init build_node_maps(unsigned long start, unsigned long len,
 				  int node)
 {
-	unsigned long cstart, epfn, end = start + len;
+	unsigned long spfn, epfn, end = start + len;
 	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
-	cstart = GRANULEROUNDDOWN(start);
+	spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
 
 	if (!bdp->node_low_pfn) {
-		bdp->node_boot_start = cstart;
+		bdp->node_min_pfn = spfn;
 		bdp->node_low_pfn = epfn;
 	} else {
-		bdp->node_boot_start = min(cstart, bdp->node_boot_start);
+		bdp->node_min_pfn = min(spfn, bdp->node_min_pfn);
 		bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
 	}
 
@@ -221,20 +221,21 @@ static void __init fill_pernode(int node, unsigned long pernode,
 static int __init find_pernode_space(unsigned long start, unsigned long len,
 				     int node)
 {
-	unsigned long epfn;
+	unsigned long spfn, epfn;
 	unsigned long pernodesize = 0, pernode, pages, mapsize;
 	struct bootmem_data *bdp = &bootmem_node_data[node];
 
+	spfn = start >> PAGE_SHIFT;
 	epfn = (start + len) >> PAGE_SHIFT;
 
-	pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);
+	pages = bdp->node_low_pfn - bdp->node_min_pfn;
 	mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
 
 	/*
 	 * Make sure this memory falls within this node's usable memory
 	 * since we may have thrown some away in build_maps().
 	 */
-	if (start < bdp->node_boot_start || epfn > bdp->node_low_pfn)
+	if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn)
 		return 0;
 
 	/* Don't setup this node's local space twice... */
@@ -296,7 +297,7 @@ static void __init reserve_pernode_space(void)
 		bdp = pdp->bdata;
 
 		/* First the bootmem_map itself */
-		pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
+		pages = bdp->node_low_pfn - bdp->node_min_pfn;
 		size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
 		base = __pa(bdp->node_bootmem_map);
 		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
@@ -466,7 +467,7 @@ void __init find_memory(void)
 
 		init_bootmem_node(pgdat_list[node],
 				  map>>PAGE_SHIFT,
-				  bdp->node_boot_start>>PAGE_SHIFT,
+				  bdp->node_min_pfn,
 				  bdp->node_low_pfn);
 	}
 
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index cc23934bc41e..cbc3c4c54566 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -123,8 +123,7 @@ unsigned long __init setup_memory(void)
 	return max_low_pfn;
 }
 
-#define START_PFN(nid)	\
-	(NODE_DATA(nid)->bdata->node_boot_start >> PAGE_SHIFT)
+#define START_PFN(nid)		(NODE_DATA(nid)->bdata->node_min_pfn)
 #define MAX_LOW_PFN(nid)	(NODE_DATA(nid)->bdata->node_low_pfn)
 
 unsigned long __init zone_sizes_init(void)
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 28799af15e95..2554eb59cfef 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -93,8 +93,7 @@ void free_initrd_mem(unsigned long, unsigned long);
 #endif
 
 /* It'd be good if these lines were in the standard header file. */
-#define START_PFN(nid)	\
-	(NODE_DATA(nid)->bdata->node_boot_start >> PAGE_SHIFT)
+#define START_PFN(nid)		(NODE_DATA(nid)->bdata->node_min_pfn)
 #define MAX_LOW_PFN(nid)	(NODE_DATA(nid)->bdata->node_low_pfn)
 
 #ifndef CONFIG_DISCONTIGMEM
@@ -252,4 +251,3 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 	printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
 }
 #endif
-
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index 8c5d88c7b90a..8cee387a24fd 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -67,8 +67,8 @@ void __init paging_init(void)
 
 	/* declare the sizes of the RAM zones (only use the normal zone) */
 	zones_size[ZONE_NORMAL] =
-		(contig_page_data.bdata->node_low_pfn) -
-		(contig_page_data.bdata->node_boot_start >> PAGE_SHIFT);
+		contig_page_data.bdata->node_low_pfn -
+		contig_page_data.bdata->node_min_pfn;
 
 	/* pass the memory from the bootmem allocator to the main allocator */
 	free_area_init(zones_size);
@@ -87,7 +87,7 @@ void __init mem_init(void)
 	if (!mem_map)
 		BUG();
 
-#define START_PFN	(contig_page_data.bdata->node_boot_start >> PAGE_SHIFT)
+#define START_PFN	(contig_page_data.bdata->node_min_pfn)
 #define MAX_LOW_PFN	(contig_page_data.bdata->node_low_pfn)
 
 	max_mapnr = num_physpages = MAX_LOW_PFN - START_PFN;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index d7df26bd1e54..d652d375eb1e 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -191,7 +191,7 @@ void __init paging_init(void)
 		pg_data_t *pgdat = NODE_DATA(nid);
 		unsigned long low, start_pfn;
 
-		start_pfn = pgdat->bdata->node_boot_start >> PAGE_SHIFT;
+		start_pfn = pgdat->bdata->node_min_pfn;
 		low = pgdat->bdata->node_low_pfn;
 
 		if (max_zone_pfns[ZONE_NORMAL] < low)
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 90921d10ffa2..4ddf2922fc8d 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -28,7 +28,7 @@ extern unsigned long saved_max_pfn;
  * memory pages (including holes) on the node.
  */
 typedef struct bootmem_data {
-	unsigned long node_boot_start;
+	unsigned long node_min_pfn;
 	unsigned long node_low_pfn;
 	void *node_bootmem_map;
 	unsigned long last_end_off;
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 282b786c2b15..4af15d0340ad 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -80,7 +80,7 @@ static void __init link_bootmem(bootmem_data_t *bdata)
 		bootmem_data_t *ent;
 
 		ent = list_entry(iter, bootmem_data_t, list);
-		if (bdata->node_boot_start < ent->node_boot_start)
+		if (bdata->node_min_pfn < ent->node_min_pfn)
 			break;
 	}
 	list_add_tail(&bdata->list, iter);
@@ -96,7 +96,7 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
 
 	mminit_validate_memmodel_limits(&start, &end);
 	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
-	bdata->node_boot_start = PFN_PHYS(start);
+	bdata->node_min_pfn = start;
 	bdata->node_low_pfn = end;
 	link_bootmem(bdata);
 
@@ -151,7 +151,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	if (!bdata->node_bootmem_map)
 		return 0;
 
-	start = PFN_DOWN(bdata->node_boot_start);
+	start = bdata->node_min_pfn;
 	end = bdata->node_low_pfn;
 
 	/*
@@ -167,7 +167,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 		unsigned long *map, idx, vec;
 
 		map = bdata->node_bootmem_map;
-		idx = start - PFN_DOWN(bdata->node_boot_start);
+		idx = start - bdata->node_min_pfn;
 		vec = ~map[idx / BITS_PER_LONG];
 
 		if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
@@ -192,7 +192,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	}
 
 	page = virt_to_page(bdata->node_bootmem_map);
-	pages = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+	pages = bdata->node_low_pfn - bdata->node_min_pfn;
 	pages = bootmem_bootmap_pages(pages);
 	count += pages;
 	while (pages--)
@@ -231,8 +231,8 @@ static void __init __free(bootmem_data_t *bdata,
 	unsigned long idx;
 
 	bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
-		sidx + PFN_DOWN(bdata->node_boot_start),
-		eidx + PFN_DOWN(bdata->node_boot_start));
+		sidx + bdata->node_min_pfn,
+		eidx + bdata->node_min_pfn);
 
 	if (bdata->hint_idx > sidx)
 		bdata->hint_idx = sidx;
@@ -250,8 +250,8 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
 
 	bdebug("nid=%td start=%lx end=%lx flags=%x\n",
 		bdata - bootmem_node_data,
-		sidx + PFN_DOWN(bdata->node_boot_start),
-		eidx + PFN_DOWN(bdata->node_boot_start),
+		sidx + bdata->node_min_pfn,
+		eidx + bdata->node_min_pfn,
 		flags);
 
 	for (idx = sidx; idx < eidx; idx++)
@@ -261,7 +261,7 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
 				return -EBUSY;
 			}
 			bdebug("silent double reserve of PFN %lx\n",
-				idx + PFN_DOWN(bdata->node_boot_start));
+				idx + bdata->node_min_pfn);
 		}
 	return 0;
 }
@@ -275,11 +275,11 @@ static int __init mark_bootmem_node(bootmem_data_t *bdata,
 	bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
 		bdata - bootmem_node_data, start, end, reserve, flags);
 
-	BUG_ON(start < PFN_DOWN(bdata->node_boot_start));
+	BUG_ON(start < bdata->node_min_pfn);
 	BUG_ON(end > bdata->node_low_pfn);
 
-	sidx = start - PFN_DOWN(bdata->node_boot_start);
-	eidx = end - PFN_DOWN(bdata->node_boot_start);
+	sidx = start - bdata->node_min_pfn;
+	eidx = end - bdata->node_min_pfn;
 
 	if (reserve)
 		return __reserve(bdata, sidx, eidx, flags);
@@ -299,7 +299,8 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
 		int err;
 		unsigned long max;
 
-		if (pos < PFN_DOWN(bdata->node_boot_start)) {
+		if (pos < bdata->node_min_pfn ||
+		    pos >= bdata->node_low_pfn) {
 			BUG_ON(pos != start);
 			continue;
 		}
@@ -422,7 +423,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
 		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
 		align, goal, limit);
 
-	min = PFN_DOWN(bdata->node_boot_start);
+	min = bdata->node_min_pfn;
 	max = bdata->node_low_pfn;
 
 	goal >>= PAGE_SHIFT;
@@ -440,8 +441,8 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
 	else
 		start = ALIGN(min, step);
 
-	sidx = start - PFN_DOWN(bdata->node_boot_start);
-	midx = max - PFN_DOWN(bdata->node_boot_start);
+	sidx = start - bdata->node_min_pfn;;
+	midx = max - bdata->node_min_pfn;
 
 	if (bdata->hint_idx > sidx) {
 		/*
@@ -491,7 +492,8 @@ find_block:
 				PFN_UP(end_off), BOOTMEM_EXCLUSIVE))
 			BUG();
 
-		region = phys_to_virt(bdata->node_boot_start + start_off);
+		region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
+				start_off);
 		memset(region, 0, size);
 		return region;
 	}
@@ -518,7 +520,7 @@ restart:
 
 		if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
 			continue;
-		if (limit && bdata->node_boot_start >= limit)
+		if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
 			break;
 
 		region = alloc_bootmem_core(bdata, size, align, goal, limit);
-- 
cgit v1.2.3


From 2be0ffe2b29bd31d3debd0877797892ff2d91f4c Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Wed, 23 Jul 2008 21:28:11 -0700
Subject: mm: add alloc_pages_exact() and free_pages_exact()

alloc_pages_exact() is similar to alloc_pages(), except that it allocates
the minimum number of pages to fulfill the request.  This is useful if you
want to allocate a very large buffer that is slightly larger than an even
power-of-two number of pages.  In that case, alloc_pages() will waste a
lot of memory.

I have a video driver that wants to allocate a 5MB buffer.  alloc_pages()
wiill waste 3MB of physically-contiguous memory.

Signed-off-by: Timur Tabi <timur@freescale.com>
Cc: Andi Kleen <andi@firstfloor.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  3 +++
 mm/page_alloc.c     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f640ed241422..e8003afeffba 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -228,6 +228,9 @@ extern struct page *alloc_page_vma(gfp_t gfp_mask,
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
+void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
+void free_pages_exact(void *virt, size_t size);
+
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask),0)
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eaa86671ebbd..8d528d57b403 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1697,6 +1697,59 @@ void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
+/**
+ * alloc_pages_exact - allocate an exact number physically-contiguous pages.
+ * @size: the number of bytes to allocate
+ * @gfp_mask: GFP flags for the allocation
+ *
+ * This function is similar to alloc_pages(), except that it allocates the
+ * minimum number of pages to satisfy the request.  alloc_pages() can only
+ * allocate memory in power-of-two pages.
+ *
+ * This function is also limited by MAX_ORDER.
+ *
+ * Memory allocated by this function must be released by free_pages_exact().
+ */
+void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
+{
+	unsigned int order = get_order(size);
+	unsigned long addr;
+
+	addr = __get_free_pages(gfp_mask, order);
+	if (addr) {
+		unsigned long alloc_end = addr + (PAGE_SIZE << order);
+		unsigned long used = addr + PAGE_ALIGN(size);
+
+		split_page(virt_to_page(addr), order);
+		while (used < alloc_end) {
+			free_page(used);
+			used += PAGE_SIZE;
+		}
+	}
+
+	return (void *)addr;
+}
+EXPORT_SYMBOL(alloc_pages_exact);
+
+/**
+ * free_pages_exact - release memory allocated via alloc_pages_exact()
+ * @virt: the value returned by alloc_pages_exact.
+ * @size: size of allocation, same value as passed to alloc_pages_exact().
+ *
+ * Release the memory allocated by a previous call to alloc_pages_exact.
+ */
+void free_pages_exact(void *virt, size_t size)
+{
+	unsigned long addr = (unsigned long)virt;
+	unsigned long end = addr + PAGE_ALIGN(size);
+
+	while (addr < end) {
+		free_page(addr);
+		addr += PAGE_SIZE;
+	}
+}
+EXPORT_SYMBOL(free_pages_exact);
+
 static unsigned int nr_free_zone_pages(int offset)
 {
 	struct zoneref *z;
-- 
cgit v1.2.3


From 27ac792ca0b0a1e7e65f20342260650516c95864 Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Wed, 23 Jul 2008 21:28:13 -0700
Subject: PAGE_ALIGN(): correctly handle 64-bit values on 32-bit architectures

On 32-bit architectures PAGE_ALIGN() truncates 64-bit values to the 32-bit
boundary. For example:

	u64 val = PAGE_ALIGN(size);

always returns a value < 4GB even if size is greater than 4GB.

The problem resides in PAGE_MASK definition (from include/asm-x86/page.h for
example):

#define PAGE_SHIFT      12
#define PAGE_SIZE       (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK       (~(PAGE_SIZE-1))
...
#define PAGE_ALIGN(addr)       (((addr)+PAGE_SIZE-1)&PAGE_MASK)

The "~" is performed on a 32-bit value, so everything in "and" with
PAGE_MASK greater than 4GB will be truncated to the 32-bit boundary.
Using the ALIGN() macro seems to be the right way, because it uses
typeof(addr) for the mask.

Also move the PAGE_ALIGN() definitions out of include/asm-*/page.h in
include/linux/mm.h.

See also lkml discussion: http://lkml.org/lkml/2008/6/11/237

[akpm@linux-foundation.org: fix drivers/media/video/uvc/uvc_queue.c]
[akpm@linux-foundation.org: fix v850]
[akpm@linux-foundation.org: fix powerpc]
[akpm@linux-foundation.org: fix arm]
[akpm@linux-foundation.org: fix mips]
[akpm@linux-foundation.org: fix drivers/media/video/pvrusb2/pvrusb2-dvb.c]
[akpm@linux-foundation.org: fix drivers/mtd/maps/uclinux.c]
[akpm@linux-foundation.org: fix powerpc]
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/module.c                     | 1 +
 arch/arm/plat-omap/fb.c                      | 1 +
 arch/avr32/mm/ioremap.c                      | 1 +
 arch/h8300/kernel/setup.c                    | 1 +
 arch/m68k/amiga/chipram.c                    | 1 +
 arch/m68knommu/kernel/setup.c                | 1 +
 arch/mips/kernel/module.c                    | 1 +
 arch/mips/sgi-ip27/ip27-klnuma.c             | 1 +
 arch/powerpc/kernel/suspend.c                | 1 +
 arch/powerpc/lib/code-patching.c             | 1 +
 arch/sparc64/kernel/iommu_common.h           | 2 +-
 arch/x86/kernel/module_64.c                  | 1 +
 arch/xtensa/kernel/setup.c                   | 1 +
 drivers/char/random.c                        | 1 +
 drivers/ieee1394/iso.c                       | 1 +
 drivers/media/video/pvrusb2/pvrusb2-dvb.c    | 1 +
 drivers/media/video/pvrusb2/pvrusb2-ioread.c | 1 +
 drivers/media/video/uvc/uvc_queue.c          | 1 +
 drivers/media/video/videobuf-core.c          | 1 +
 drivers/mtd/maps/uclinux.c                   | 1 +
 drivers/net/mlx4/eq.c                        | 1 +
 drivers/pcmcia/electra_cf.c                  | 1 +
 drivers/scsi/sun_esp.c                       | 1 +
 drivers/video/acornfb.c                      | 1 +
 drivers/video/imxfb.c                        | 1 +
 drivers/video/omap/dispc.c                   | 1 +
 drivers/video/omap/omapfb_main.c             | 1 +
 drivers/video/pxafb.c                        | 1 +
 drivers/video/sa1100fb.c                     | 1 +
 include/asm-alpha/page.h                     | 3 ---
 include/asm-arm/page-nommu.h                 | 4 +---
 include/asm-arm/page.h                       | 3 ---
 include/asm-avr32/page.h                     | 3 ---
 include/asm-blackfin/page.h                  | 3 ---
 include/asm-cris/page.h                      | 3 ---
 include/asm-frv/page.h                       | 3 ---
 include/asm-h8300/page.h                     | 3 ---
 include/asm-ia64/page.h                      | 1 -
 include/asm-m32r/page.h                      | 3 ---
 include/asm-m68k/dvma.h                      | 2 +-
 include/asm-m68k/page.h                      | 3 ---
 include/asm-m68knommu/page.h                 | 3 ---
 include/asm-mips/page.h                      | 3 ---
 include/asm-mips/processor.h                 | 2 +-
 include/asm-mn10300/page.h                   | 3 ---
 include/asm-parisc/page.h                    | 4 ----
 include/asm-powerpc/page.h                   | 3 ---
 include/asm-s390/page.h                      | 3 ---
 include/asm-sh/page.h                        | 3 ---
 include/asm-sparc/page_32.h                  | 3 ---
 include/asm-sparc/page_64.h                  | 3 ---
 include/asm-um/page.h                        | 3 ---
 include/asm-v850/page.h                      | 4 ----
 include/asm-x86/page.h                       | 3 ---
 include/asm-xtensa/page.h                    | 2 --
 include/linux/mm.h                           | 3 +++
 sound/core/info.c                            | 1 +
 57 files changed, 36 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 79b7e5cf5416..a68259a0cccd 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/moduleloader.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/elf.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index 96d6f0619733..5d107520e6b9 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -23,6 +23,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/bootmem.h>
diff --git a/arch/avr32/mm/ioremap.c b/arch/avr32/mm/ioremap.c
index 3437c82434ac..f03b79f0e0ab 100644
--- a/arch/avr32/mm/ioremap.c
+++ b/arch/avr32/mm/ioremap.c
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/io.h>
 
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index b1f25c20a5db..7fda657110eb 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/fb.h>
 #include <linux/console.h>
diff --git a/arch/m68k/amiga/chipram.c b/arch/m68k/amiga/chipram.c
index cbe36538af47..61df1d33c050 100644
--- a/arch/m68k/amiga/chipram.c
+++ b/arch/m68k/amiga/chipram.c
@@ -9,6 +9,7 @@
 
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/slab.h>
diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c
index 03f4fe6a2fc0..5985f1989021 100644
--- a/arch/m68knommu/kernel/setup.c
+++ b/arch/m68knommu/kernel/setup.c
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/fb.h>
 #include <linux/module.h>
+#include <linux/mm.h>
 #include <linux/console.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index e7ed0ac48537..1f60e27523d9 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -22,6 +22,7 @@
 
 #include <linux/moduleloader.h>
 #include <linux/elf.h>
+#include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index 48932ce1d730..d9c79d8be81d 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -4,6 +4,7 @@
  * Copyright 2000 - 2001 Kanoj Sarcar (kanoj@sgi.com)
  */
 #include <linux/init.h>
+#include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/kernel.h>
 #include <linux/nodemask.h>
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
index 8cee57107541..6fc6328dc626 100644
--- a/arch/powerpc/kernel/suspend.c
+++ b/arch/powerpc/kernel/suspend.c
@@ -7,6 +7,7 @@
  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
  */
 
+#include <linux/mm.h>
 #include <asm/page.h>
 
 /* References to section boundaries */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 0559fe086eb4..7c975d43e3f3 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
+#include <linux/mm.h>
 #include <asm/page.h>
 #include <asm/code-patching.h>
 
diff --git a/arch/sparc64/kernel/iommu_common.h b/arch/sparc64/kernel/iommu_common.h
index f3575a614fa2..53b19c8231a9 100644
--- a/arch/sparc64/kernel/iommu_common.h
+++ b/arch/sparc64/kernel/iommu_common.h
@@ -23,7 +23,7 @@
 #define IO_PAGE_SHIFT			13
 #define IO_PAGE_SIZE			(1UL << IO_PAGE_SHIFT)
 #define IO_PAGE_MASK			(~(IO_PAGE_SIZE-1))
-#define IO_PAGE_ALIGN(addr)		(((addr)+IO_PAGE_SIZE-1)&IO_PAGE_MASK)
+#define IO_PAGE_ALIGN(addr)		ALIGN(addr, IO_PAGE_SIZE)
 
 #define IO_TSB_ENTRIES			(128*1024)
 #define IO_TSB_SIZE			(IO_TSB_ENTRIES * 8)
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 0e867676b5a5..6ba87830d4b1 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -22,6 +22,7 @@
 #include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/bug.h>
 
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 5e6d75c9f92b..a00359e8f7a8 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -16,6 +16,7 @@
 
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/screen_info.h>
 #include <linux/bootmem.h>
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0cf98bd4f2d2..e0d0e371909c 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -236,6 +236,7 @@
 #include <linux/fs.h>
 #include <linux/genhd.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
 #include <linux/cryptohash.h>
diff --git a/drivers/ieee1394/iso.c b/drivers/ieee1394/iso.c
index 07ca35c98f96..1cf6487b65ba 100644
--- a/drivers/ieee1394/iso.c
+++ b/drivers/ieee1394/iso.c
@@ -11,6 +11,7 @@
 
 #include <linux/pci.h>
 #include <linux/sched.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 
 #include "hosts.h"
diff --git a/drivers/media/video/pvrusb2/pvrusb2-dvb.c b/drivers/media/video/pvrusb2/pvrusb2-dvb.c
index 6ec4bf81fc7f..77b3c3385066 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-dvb.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-dvb.c
@@ -20,6 +20,7 @@
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/mm.h>
 #include "dvbdev.h"
 #include "pvrusb2-debug.h"
 #include "pvrusb2-hdw-internal.h"
diff --git a/drivers/media/video/pvrusb2/pvrusb2-ioread.c b/drivers/media/video/pvrusb2/pvrusb2-ioread.c
index 05a1376405e7..b4824782d858 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-ioread.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-ioread.c
@@ -22,6 +22,7 @@
 #include "pvrusb2-debug.h"
 #include <linux/errno.h>
 #include <linux/string.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <asm/uaccess.h>
diff --git a/drivers/media/video/uvc/uvc_queue.c b/drivers/media/video/uvc/uvc_queue.c
index 7388d0cee3d4..5646a6a32939 100644
--- a/drivers/media/video/uvc/uvc_queue.c
+++ b/drivers/media/video/uvc/uvc_queue.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/version.h>
+#include <linux/mm.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/usb.h>
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index 0a88c44ace00..b7b05842cf28 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 
diff --git a/drivers/mtd/maps/uclinux.c b/drivers/mtd/maps/uclinux.c
index c42f4b83f686..3fcf92130aa4 100644
--- a/drivers/mtd/maps/uclinux.c
+++ b/drivers/mtd/maps/uclinux.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/major.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index e141a1513f07..ea3a09aaa844 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -33,6 +33,7 @@
 
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
 #include <linux/dma-mapping.h>
 
 #include <linux/mlx4/cmd.h>
diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c
index c21f9a9c3e3f..a34284b1482a 100644
--- a/drivers/pcmcia/electra_cf.c
+++ b/drivers/pcmcia/electra_cf.c
@@ -28,6 +28,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/of_platform.h>
 
diff --git a/drivers/scsi/sun_esp.c b/drivers/scsi/sun_esp.c
index 2c87db98cdfb..f9cf70151366 100644
--- a/drivers/scsi/sun_esp.c
+++ b/drivers/scsi/sun_esp.c
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 
 #include <asm/irq.h>
diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c
index eedb8285e32f..017233d0c481 100644
--- a/drivers/video/acornfb.c
+++ b/drivers/video/acornfb.c
@@ -23,6 +23,7 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/fb.h>
 #include <linux/platform_device.h>
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index 94e4d3ac1a05..0c5a475c1cae 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -24,6 +24,7 @@
 #include <linux/string.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/fb.h>
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index ab32ceb06178..ab77c51fe9d6 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -20,6 +20,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/dma-mapping.h>
+#include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/clk.h>
 #include <linux/io.h>
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index 14d0f7a11145..f85af5c4fa68 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -25,6 +25,7 @@
  * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 #include <linux/platform_device.h>
+#include <linux/mm.h>
 #include <linux/uaccess.h>
 
 #include <asm/mach-types.h>
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index bb2514369507..5e8a140399fc 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -30,6 +30,7 @@
 #include <linux/string.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/fb.h>
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index ab2b2110478b..4a9f7e121807 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -167,6 +167,7 @@
 #include <linux/string.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/fb.h>
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/include/asm-alpha/page.h b/include/asm-alpha/page.h
index 22ff9762d17b..0995f9d13417 100644
--- a/include/asm-alpha/page.h
+++ b/include/asm-alpha/page.h
@@ -80,9 +80,6 @@ typedef struct page *pgtable_t;
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #define __pa(x)			((unsigned long) (x) - PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long) (x) + PAGE_OFFSET))
 #ifndef CONFIG_DISCONTIGMEM
diff --git a/include/asm-arm/page-nommu.h b/include/asm-arm/page-nommu.h
index a1bcad060480..ea1cde84f500 100644
--- a/include/asm-arm/page-nommu.h
+++ b/include/asm-arm/page-nommu.h
@@ -7,6 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
 #ifndef _ASMARM_PAGE_NOMMU_H
 #define _ASMARM_PAGE_NOMMU_H
 
@@ -42,9 +43,6 @@ typedef unsigned long pgprot_t;
 #define __pmd(x)        (x)
 #define __pgprot(x)     (x)
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
diff --git a/include/asm-arm/page.h b/include/asm-arm/page.h
index 8e05bdb5f12f..7c5fc5582e5d 100644
--- a/include/asm-arm/page.h
+++ b/include/asm-arm/page.h
@@ -15,9 +15,6 @@
 #define PAGE_SIZE		(1UL << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #ifndef __ASSEMBLY__
 
 #ifndef CONFIG_MMU
diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h
index cbbc5ca9728b..f805d1cb11bc 100644
--- a/include/asm-avr32/page.h
+++ b/include/asm-avr32/page.h
@@ -57,9 +57,6 @@ static inline int get_order(unsigned long size)
 
 #endif /* !__ASSEMBLY__ */
 
-/* Align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 /*
  * The hardware maps the virtual addresses 0x80000000 -> 0x9fffffff
  * permanently to the physical addresses 0x00000000 -> 0x1fffffff when
diff --git a/include/asm-blackfin/page.h b/include/asm-blackfin/page.h
index c7db0220fbd6..344f6a8c1f22 100644
--- a/include/asm-blackfin/page.h
+++ b/include/asm-blackfin/page.h
@@ -51,9 +51,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
diff --git a/include/asm-cris/page.h b/include/asm-cris/page.h
index c45bb1ef397c..d19272ba6b69 100644
--- a/include/asm-cris/page.h
+++ b/include/asm-cris/page.h
@@ -60,9 +60,6 @@ typedef struct page *pgtable_t;
 
 #define page_to_phys(page)     __pa((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #ifndef __ASSEMBLY__
 
 #endif /* __ASSEMBLY__ */
diff --git a/include/asm-frv/page.h b/include/asm-frv/page.h
index c2c1e89e747d..bd9c220094c7 100644
--- a/include/asm-frv/page.h
+++ b/include/asm-frv/page.h
@@ -40,9 +40,6 @@ typedef struct page *pgtable_t;
 #define __pgprot(x)	((pgprot_t) { (x) } )
 #define PTE_MASK	PAGE_MASK
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 #define devmem_is_allowed(pfn)	1
 
 #define __pa(vaddr)		virt_to_phys((void *) (unsigned long) (vaddr))
diff --git a/include/asm-h8300/page.h b/include/asm-h8300/page.h
index d6a3eaf3b27e..0b6acf0b03aa 100644
--- a/include/asm-h8300/page.h
+++ b/include/asm-h8300/page.h
@@ -43,9 +43,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h
index 36f39321b768..5f271bc712ee 100644
--- a/include/asm-ia64/page.h
+++ b/include/asm-ia64/page.h
@@ -40,7 +40,6 @@
 
 #define PAGE_SIZE		(__IA64_UL_CONST(1) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE - 1))
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
 
 #define PERCPU_PAGE_SHIFT	16	/* log2() of max. size of per-CPU area */
 #define PERCPU_PAGE_SIZE	(__IA64_UL_CONST(1) << PERCPU_PAGE_SHIFT)
diff --git a/include/asm-m32r/page.h b/include/asm-m32r/page.h
index 8a677f3fca68..c9333089fe11 100644
--- a/include/asm-m32r/page.h
+++ b/include/asm-m32r/page.h
@@ -41,9 +41,6 @@ typedef struct page *pgtable_t;
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 /*
  * This handles the memory map.. We could make this a config
  * option, but too many people screw it up, and too few need
diff --git a/include/asm-m68k/dvma.h b/include/asm-m68k/dvma.h
index 4fff408d0150..890bbf7e7758 100644
--- a/include/asm-m68k/dvma.h
+++ b/include/asm-m68k/dvma.h
@@ -13,7 +13,7 @@
 #define DVMA_PAGE_SHIFT	13
 #define DVMA_PAGE_SIZE	(1UL << DVMA_PAGE_SHIFT)
 #define DVMA_PAGE_MASK	(~(DVMA_PAGE_SIZE-1))
-#define DVMA_PAGE_ALIGN(addr)	(((addr)+DVMA_PAGE_SIZE-1)&DVMA_PAGE_MASK)
+#define DVMA_PAGE_ALIGN(addr)	ALIGN(addr, DVMA_PAGE_SIZE)
 
 extern void dvma_init(void);
 extern int dvma_map_iommu(unsigned long kaddr, unsigned long baddr,
diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h
index 880c2cbff8a6..a34b8bad7847 100644
--- a/include/asm-m68k/page.h
+++ b/include/asm-m68k/page.h
@@ -103,9 +103,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #endif /* !__ASSEMBLY__ */
 
 #include <asm/page_offset.h>
diff --git a/include/asm-m68knommu/page.h b/include/asm-m68knommu/page.h
index 1e82ebb7d644..3a1ede4544cb 100644
--- a/include/asm-m68knommu/page.h
+++ b/include/asm-m68knommu/page.h
@@ -43,9 +43,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
diff --git a/include/asm-mips/page.h b/include/asm-mips/page.h
index 494f00ba9541..fe7a88ea066e 100644
--- a/include/asm-mips/page.h
+++ b/include/asm-mips/page.h
@@ -137,9 +137,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 /*
  * __pa()/__va() should be used only during mem init.
  */
diff --git a/include/asm-mips/processor.h b/include/asm-mips/processor.h
index 58cbac5a64e4..a1e4453469f9 100644
--- a/include/asm-mips/processor.h
+++ b/include/asm-mips/processor.h
@@ -45,7 +45,7 @@ extern unsigned int vced_count, vcei_count;
  * This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
+#define TASK_UNMAPPED_BASE	((TASK_SIZE / 3) & ~(PAGE_SIZE))
 #endif
 
 #ifdef CONFIG_64BIT
diff --git a/include/asm-mn10300/page.h b/include/asm-mn10300/page.h
index 124971b9fb9b..8288e124165b 100644
--- a/include/asm-mn10300/page.h
+++ b/include/asm-mn10300/page.h
@@ -61,9 +61,6 @@ typedef struct page *pgtable_t;
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 /*
  * This handles the memory map.. We could make this a config
  * option, but too many people screw it up, and too few need
diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
index 27d50b859541..c3941f09a878 100644
--- a/include/asm-parisc/page.h
+++ b/include/asm-parisc/page.h
@@ -119,10 +119,6 @@ extern int npmem_ranges;
 #define PMD_ENTRY_SIZE	(1UL << BITS_PER_PMD_ENTRY)
 #define PTE_ENTRY_SIZE	(1UL << BITS_PER_PTE_ENTRY)
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
-
 #define LINUX_GATEWAY_SPACE     0
 
 /* This governs the relationship between virtual and physical addresses.
diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h
index cffdf0eb0df6..e088545cb3f5 100644
--- a/include/asm-powerpc/page.h
+++ b/include/asm-powerpc/page.h
@@ -119,9 +119,6 @@ extern phys_addr_t kernstart_addr;
 /* align addr on a size boundary - adjust address up if needed */
 #define _ALIGN(addr,size)     _ALIGN_UP(addr,size)
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	_ALIGN(addr, PAGE_SIZE)
-
 /*
  * Don't compare things with KERNELBASE or PAGE_OFFSET to test for
  * "kernelness", use is_kernel_addr() - it should do what you want.
diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
index 12fd9c4f0f15..991ba939408c 100644
--- a/include/asm-s390/page.h
+++ b/include/asm-s390/page.h
@@ -138,9 +138,6 @@ void arch_alloc_page(struct page *page, int order);
 
 #endif /* !__ASSEMBLY__ */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)        (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #define __PAGE_OFFSET           0x0UL
 #define PAGE_OFFSET             0x0UL
 #define __pa(x)                 (unsigned long)(x)
diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h
index 304c30b5d947..5dc01d2fcc4c 100644
--- a/include/asm-sh/page.h
+++ b/include/asm-sh/page.h
@@ -22,9 +22,6 @@
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 #define PTE_MASK	PAGE_MASK
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
 #define HPAGE_SHIFT	16
 #elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
diff --git a/include/asm-sparc/page_32.h b/include/asm-sparc/page_32.h
index 14de518cc38f..cf5fb70ca1c1 100644
--- a/include/asm-sparc/page_32.h
+++ b/include/asm-sparc/page_32.h
@@ -134,9 +134,6 @@ BTFIXUPDEF_SETHI(sparc_unmapped_base)
 
 #endif /* !(__ASSEMBLY__) */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)  (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #define PAGE_OFFSET	0xf0000000
 #ifndef __ASSEMBLY__
 extern unsigned long phys_base;
diff --git a/include/asm-sparc/page_64.h b/include/asm-sparc/page_64.h
index a8a2bba032c1..b579b910ef51 100644
--- a/include/asm-sparc/page_64.h
+++ b/include/asm-sparc/page_64.h
@@ -106,9 +106,6 @@ typedef struct page *pgtable_t;
 
 #endif /* !(__ASSEMBLY__) */
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 /* We used to stick this into a hard-coded global register (%g4)
  * but that does not make sense anymore.
  */
diff --git a/include/asm-um/page.h b/include/asm-um/page.h
index 916e1a61999f..335c57383c02 100644
--- a/include/asm-um/page.h
+++ b/include/asm-um/page.h
@@ -92,9 +92,6 @@ typedef struct page *pgtable_t;
 #define __pgd(x) ((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 extern unsigned long uml_physmem;
 
 #define PAGE_OFFSET (uml_physmem)
diff --git a/include/asm-v850/page.h b/include/asm-v850/page.h
index 74a539a9bd59..f9de35d873fa 100644
--- a/include/asm-v850/page.h
+++ b/include/asm-v850/page.h
@@ -94,10 +94,6 @@ typedef unsigned long pgprot_t;
 #endif /* !__ASSEMBLY__ */
 
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
-
 /* No current v850 processor has virtual memory.  */
 #define __virt_to_phys(addr)	(addr)
 #define __phys_to_virt(addr)	(addr)
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 6e02098b1605..49982110e4d9 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -34,9 +34,6 @@
 
 #define HUGE_MAX_HSTATE 2
 
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 #endif
diff --git a/include/asm-xtensa/page.h b/include/asm-xtensa/page.h
index 80a6ae0dd259..11f7dc2dbec7 100644
--- a/include/asm-xtensa/page.h
+++ b/include/asm-xtensa/page.h
@@ -26,13 +26,11 @@
 
 /*
  * PAGE_SHIFT determines the page size
- * PAGE_ALIGN(x) aligns the pointer to the (next) page boundary
  */
 
 #define PAGE_SHIFT		12
 #define PAGE_SIZE		(__XTENSA_UL_CONST(1) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE - 1) & PAGE_MASK)
 
 #define PAGE_OFFSET		XCHAL_KSEG_CACHED_VADDR
 #define MAX_MEM_PFN		XCHAL_KSEG_SIZE
diff --git a/include/linux/mm.h b/include/linux/mm.h
index df322fb4df31..d87a5a5fe87d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -41,6 +41,9 @@ extern unsigned long mmap_min_addr;
 
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
+
 /*
  * Linux kernel virtual memory manager primitives.
  * The idea being to have a "virtual" mm in the same way
diff --git a/sound/core/info.c b/sound/core/info.c
index cb5ead3e202d..c67773ad9298 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -21,6 +21,7 @@
 
 #include <linux/init.h>
 #include <linux/time.h>
+#include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <sound/core.h>
-- 
cgit v1.2.3


From af370fb8cb3031f20438f246798d5f0d98089f29 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Wed, 23 Jul 2008 21:28:17 -0700
Subject: memory hotplug: small fixes to bootmem freeing for memory hotremove

- Change some naming
  * Magic -> types
  * MIX_INFO -> MIX_SECTION_INFO
  * Change definition of bootmem type from direct hex value

- __free_pages_bootmem() becomes __meminit.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  8 ++++----
 mm/memory_hotplug.c            | 12 ++++++------
 mm/page_alloc.c                |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index ea9f5ad9ec8e..3628e5088f64 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,12 +13,12 @@ struct mem_section;
 #ifdef CONFIG_MEMORY_HOTPLUG
 
 /*
- * Magic number for free bootmem.
+ * Types for free bootmem.
  * The normal smallest mapcount is -1. Here is smaller value than it.
  */
-#define SECTION_INFO		0xfffffffe
-#define MIX_INFO		0xfffffffd
-#define NODE_INFO		0xfffffffc
+#define SECTION_INFO		(-1 - 1)
+#define MIX_SECTION_INFO	(-1 - 2)
+#define NODE_INFO		(-1 - 3)
 
 /*
  * pgdat resizing functions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ec85c37dcfb9..0fb05b258f0c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -62,9 +62,9 @@ static void release_memory_resource(struct resource *res)
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
-static void get_page_bootmem(unsigned long info,  struct page *page, int magic)
+static void get_page_bootmem(unsigned long info,  struct page *page, int type)
 {
-	atomic_set(&page->_mapcount, magic);
+	atomic_set(&page->_mapcount, type);
 	SetPagePrivate(page);
 	set_page_private(page, info);
 	atomic_inc(&page->_count);
@@ -72,10 +72,10 @@ static void get_page_bootmem(unsigned long info,  struct page *page, int magic)
 
 void put_page_bootmem(struct page *page)
 {
-	int magic;
+	int type;
 
-	magic = atomic_read(&page->_mapcount);
-	BUG_ON(magic >= -1);
+	type = atomic_read(&page->_mapcount);
+	BUG_ON(type >= -1);
 
 	if (atomic_dec_return(&page->_count) == 1) {
 		ClearPagePrivate(page);
@@ -119,7 +119,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
 	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
 
 	for (i = 0; i < mapsize; i++, page++)
-		get_page_bootmem(section_nr, page, MIX_INFO);
+		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
 
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cd4c41432ef6..6da667274df5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -533,7 +533,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 /*
  * permit the bootmem allocator to evade page validation on high-order frees
  */
-void __free_pages_bootmem(struct page *page, unsigned int order)
+void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
 {
 	if (order == 0) {
 		__ClearPageReserved(page);
-- 
cgit v1.2.3


From 5c755e9fd813810680abd56ec09a5f90143e815b Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Wed, 23 Jul 2008 21:28:19 -0700
Subject: memory-hotplug: add sysfs removable attribute for hotplug memory
 remove

Memory may be hot-removed on a per-memory-block basis, particularly on
POWER where the SPARSEMEM section size often matches the memory-block
size.  A user-level agent must be able to identify which sections of
memory are likely to be removable before attempting the potentially
expensive operation.  This patch adds a file called "removable" to the
memory directory in sysfs to help such an agent.  In this patch, a memory
block is considered removable if;

o It contains only MOVABLE pageblocks
o It contains only pageblocks with free pages regardless of pageblock type

On the other hand, a memory block starting with a PageReserved() page will
never be considered removable.  Without this patch, the user-agent is
forced to choose a memory block to remove randomly.

Sample output of the sysfs files:

./memory/memory0/removable: 0
./memory/memory1/removable: 0
./memory/memory2/removable: 0
./memory/memory3/removable: 0
./memory/memory4/removable: 0
./memory/memory5/removable: 0
./memory/memory6/removable: 0
./memory/memory7/removable: 1
./memory/memory8/removable: 0
./memory/memory9/removable: 0
./memory/memory10/removable: 0
./memory/memory11/removable: 0
./memory/memory12/removable: 0
./memory/memory13/removable: 0
./memory/memory14/removable: 0
./memory/memory15/removable: 0
./memory/memory16/removable: 0
./memory/memory17/removable: 1
./memory/memory18/removable: 1
./memory/memory19/removable: 1
./memory/memory20/removable: 1
./memory/memory21/removable: 1
./memory/memory22/removable: 1

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-devices-memory | 24 +++++++++++
 drivers/base/memory.c                          | 19 ++++++++
 include/linux/memory_hotplug.h                 | 12 ++++++
 mm/memory_hotplug.c                            | 60 ++++++++++++++++++++++++++
 4 files changed, 115 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-devices-memory

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
new file mode 100644
index 000000000000..7a16fe1e2270
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -0,0 +1,24 @@
+What:		/sys/devices/system/memory
+Date:		June 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The /sys/devices/system/memory contains a snapshot of the
+		internal state of the kernel memory blocks. Files could be
+		added or removed dynamically to represent hot-add/remove
+		operations.
+
+Users:		hotplug memory add/remove tools
+		https://w3.opensource.ibm.com/projects/powerpc-utils/
+
+What:		/sys/devices/system/memory/memoryX/removable
+Date:		June 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/removable
+		indicates whether this memory block is removable or not.
+		This is useful for a user-level agent to determine
+		identify removable sections of the memory before attempting
+		potentially expensive hot-remove memory operation
+
+Users:		hotplug memory remove tools
+		https://w3.opensource.ibm.com/projects/powerpc-utils/
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 4d4e0e7b6e92..855ed1a9f97b 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -100,6 +100,21 @@ static ssize_t show_mem_phys_index(struct sys_device *dev,
 	return sprintf(buf, "%08lx\n", mem->phys_index);
 }
 
+/*
+ * Show whether the section of memory is likely to be hot-removable
+ */
+static ssize_t show_mem_removable(struct sys_device *dev, char *buf)
+{
+	unsigned long start_pfn;
+	int ret;
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+
+	start_pfn = section_nr_to_pfn(mem->phys_index);
+	ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
+	return sprintf(buf, "%d\n", ret);
+}
+
 /*
  * online, offline, going offline, etc.
  */
@@ -262,6 +277,7 @@ static ssize_t show_phys_device(struct sys_device *dev,
 static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
 static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
 static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
+static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
 
 #define mem_create_simple_file(mem, attr_name)	\
 	sysdev_create_file(&mem->sysdev, &attr_##attr_name)
@@ -350,6 +366,8 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
 		ret = mem_create_simple_file(mem, state);
 	if (!ret)
 		ret = mem_create_simple_file(mem, phys_device);
+	if (!ret)
+		ret = mem_create_simple_file(mem, removable);
 
 	return ret;
 }
@@ -394,6 +412,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
 	mem_remove_simple_file(mem, phys_index);
 	mem_remove_simple_file(mem, state);
 	mem_remove_simple_file(mem, phys_device);
+	mem_remove_simple_file(mem, removable);
 	unregister_memory(mem, section);
 
 	return 0;
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 3628e5088f64..763ba81fc0f0 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -199,6 +199,18 @@ extern int walk_memory_resource(unsigned long start_pfn,
 			unsigned long nr_pages, void *arg,
 			int (*func)(unsigned long, unsigned long, void *));
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
+
+#else
+static inline int is_mem_section_removable(unsigned long pfn,
+					unsigned long nr_pages)
+{
+	return 0;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
 extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int remove_memory(u64 start, u64 size);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 93aba78dc8b6..89fee2dcb039 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -522,6 +522,66 @@ error:
 EXPORT_SYMBOL_GPL(add_memory);
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
+/*
+ * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
+ * set and the size of the free page is given by page_order(). Using this,
+ * the function determines if the pageblock contains only free pages.
+ * Due to buddy contraints, a free page at least the size of a pageblock will
+ * be located at the start of the pageblock
+ */
+static inline int pageblock_free(struct page *page)
+{
+	return PageBuddy(page) && page_order(page) >= pageblock_order;
+}
+
+/* Return the start of the next active pageblock after a given page */
+static struct page *next_active_pageblock(struct page *page)
+{
+	int pageblocks_stride;
+
+	/* Ensure the starting page is pageblock-aligned */
+	BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
+
+	/* Move forward by at least 1 * pageblock_nr_pages */
+	pageblocks_stride = 1;
+
+	/* If the entire pageblock is free, move to the end of free page */
+	if (pageblock_free(page))
+		pageblocks_stride += page_order(page) - pageblock_order;
+
+	return page + (pageblocks_stride * pageblock_nr_pages);
+}
+
+/* Checks if this range of memory is likely to be hot-removable. */
+int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
+{
+	int type;
+	struct page *page = pfn_to_page(start_pfn);
+	struct page *end_page = page + nr_pages;
+
+	/* Check the starting page of each pageblock within the range */
+	for (; page < end_page; page = next_active_pageblock(page)) {
+		type = get_pageblock_migratetype(page);
+
+		/*
+		 * A pageblock containing MOVABLE or free pages is considered
+		 * removable
+		 */
+		if (type != MIGRATE_MOVABLE && !pageblock_free(page))
+			return 0;
+
+		/*
+		 * A pageblock starting with a PageReserved page is not
+		 * considered removable.
+		 */
+		if (PageReserved(page))
+			return 0;
+	}
+
+	/* All pageblocks in the memory block are likely to be hot-removable */
+	return 1;
+}
+
 /*
  * Confirm all pages in a range [start, end) is belongs to the same zone.
  */
-- 
cgit v1.2.3


From 9ca908f47bc784c90e17a553ce33e756c73feac4 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Wed, 23 Jul 2008 21:28:20 -0700
Subject: kcalloc: remove runtime division

While in all cases in the kernel we know the size of the elements to be
created, we don't always know the count of elements.  By commuting the size
and count in the overflow check, the compiler can reduce the runtime division
of size_t with a compare to a (unique) constant in these cases.

Signed-off-by: Milton Miller <miltonm@bga.com>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9aa90a6f20e0..41103910f8a2 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -180,7 +180,7 @@ size_t ksize(const void *);
  */
 static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
 {
-	if (n != 0 && size > ULONG_MAX / n)
+	if (size != 0 && n > ULONG_MAX / size)
 		return NULL;
 	return __kmalloc(n * size, flags | __GFP_ZERO);
 }
-- 
cgit v1.2.3


From 83d1674a946141c3c59d430e96c224f7937e6158 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Wed, 23 Jul 2008 21:28:22 -0700
Subject: mm: make CONFIG_MIGRATION available w/o CONFIG_NUMA

We'd like to support CONFIG_MEMORY_HOTREMOVE on s390, which depends on
CONFIG_MIGRATION.  So far, CONFIG_MIGRATION is only available with NUMA
support.

This patch makes CONFIG_MIGRATION selectable for architectures that define
ARCH_ENABLE_MEMORY_HOTREMOVE.  When MIGRATION is enabled w/o NUMA, the
kernel won't compile because migrate_vmas() does not know about
vm_ops->migrate() and vma_migratable() does not know about policy_zone.
To fix this, those two functions can be restricted to '#ifdef CONFIG_NUMA'
because they are not being used w/o NUMA.  vma_migratable() is moved over
from migrate.h to mempolicy.h.

[kosaki.motohiro@jp.fujitsu.com: build fix]
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: KOSAKI Motorhiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 19 +++++++++++++++++++
 include/linux/migrate.h   | 21 ---------------------
 mm/Kconfig                |  2 +-
 mm/migrate.c              |  2 +-
 4 files changed, 21 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 3a39570b81b8..085c903fe0f1 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -59,6 +59,7 @@ enum {
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/nodemask.h>
+#include <linux/pagemap.h>
 
 struct mm_struct;
 
@@ -220,6 +221,24 @@ extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
 extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 			int no_context);
 #endif
+
+/* Check if a vma is migratable */
+static inline int vma_migratable(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
+		return 0;
+	/*
+	 * Migration allocates pages in the highest zone. If we cannot
+	 * do so then migration (at least from node to node) is not
+	 * possible.
+	 */
+	if (vma->vm_file &&
+		gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
+								< policy_zone)
+			return 0;
+	return 1;
+}
+
 #else
 
 struct mempolicy {};
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index e10a90a93b5d..03aea612d284 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -3,28 +3,10 @@
 
 #include <linux/mm.h>
 #include <linux/mempolicy.h>
-#include <linux/pagemap.h>
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
 #ifdef CONFIG_MIGRATION
-/* Check if a vma is migratable */
-static inline int vma_migratable(struct vm_area_struct *vma)
-{
-	if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
-		return 0;
-	/*
-	 * Migration allocates pages in the highest zone. If we cannot
-	 * do so then migration (at least from node to node) is not
-	 * possible.
-	 */
-	if (vma->vm_file &&
-		gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
-								< policy_zone)
-			return 0;
-	return 1;
-}
-
 extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
 extern int putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
@@ -39,9 +21,6 @@ extern int migrate_vmas(struct mm_struct *mm,
 		const nodemask_t *from, const nodemask_t *to,
 		unsigned long flags);
 #else
-static inline int vma_migratable(struct vm_area_struct *vma)
-					{ return 0; }
-
 static inline int isolate_lru_page(struct page *p, struct list_head *list)
 					{ return -ENOSYS; }
 static inline int putback_lru_pages(struct list_head *l) { return 0; }
diff --git a/mm/Kconfig b/mm/Kconfig
index c4de85285bb4..aa799007a11b 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -174,7 +174,7 @@ config SPLIT_PTLOCK_CPUS
 config MIGRATION
 	bool "Page migration"
 	def_bool y
-	depends on NUMA
+	depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE
 	help
 	  Allows the migration of the physical location of pages of processes
 	  while the virtual addresses are not changed. This is useful for
diff --git a/mm/migrate.c b/mm/migrate.c
index e7d13a708da0..376cceba82f9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1071,7 +1071,6 @@ out2:
 	mmput(mm);
 	return err;
 }
-#endif
 
 /*
  * Call migration functions in the vma_ops that may prepare
@@ -1093,3 +1092,4 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
  	}
  	return err;
 }
+#endif
-- 
cgit v1.2.3


From 5459c164f0591ee75ed0203bb8f3817f25948e2f Mon Sep 17 00:00:00 2001
From: "Andrew G. Morgan" <morgan@kernel.org>
Date: Wed, 23 Jul 2008 21:28:24 -0700
Subject: security: protect legacy applications from executing with
 insufficient privilege

When cap_bset suppresses some of the forced (fP) capabilities of a file,
it is generally only safe to execute the program if it understands how to
recognize it doesn't have enough privilege to work correctly.  For legacy
applications (fE!=0), which have no non-destructive way to determine that
they are missing privilege, we fail to execute (EPERM) any executable that
requires fP capabilities, but would otherwise get pP' < fP.  This is a
fail-safe permission check.

For some discussion of why it is problematic for (legacy) privileged
applications to run with less than the set of capabilities requested for
them, see:

 http://userweb.kernel.org/~morgan/sendmail-capabilities-war-story.html

With this iteration of this support, we do not include setuid-0 based
privilege protection from the bounding set.  That is, the admin can still
(ab)use the bounding set to suppress the privileges of a setuid-0 program.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: cleanup]
Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/binfmts.h |   2 +-
 security/commoncap.c    | 108 ++++++++++++++++++++++++++----------------------
 2 files changed, 60 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index ee0ed48e8348..826f62350805 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -38,7 +38,7 @@ struct linux_binprm{
 		     misc_bang:1;
 	struct file * file;
 	int e_uid, e_gid;
-	kernel_cap_t cap_inheritable, cap_permitted;
+	kernel_cap_t cap_post_exec_permitted;
 	bool cap_effective;
 	void *security;
 	int argc, envc;
diff --git a/security/commoncap.c b/security/commoncap.c
index 0b6537a3672d..4afbece37a08 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -162,8 +162,7 @@ void cap_capset_set (struct task_struct *target, kernel_cap_t *effective,
 
 static inline void bprm_clear_caps(struct linux_binprm *bprm)
 {
-	cap_clear(bprm->cap_inheritable);
-	cap_clear(bprm->cap_permitted);
+	cap_clear(bprm->cap_post_exec_permitted);
 	bprm->cap_effective = false;
 }
 
@@ -198,6 +197,7 @@ static inline int cap_from_disk(struct vfs_cap_data *caps,
 {
 	__u32 magic_etc;
 	unsigned tocopy, i;
+	int ret;
 
 	if (size < sizeof(magic_etc))
 		return -EINVAL;
@@ -225,19 +225,40 @@ static inline int cap_from_disk(struct vfs_cap_data *caps,
 		bprm->cap_effective = false;
 	}
 
-	for (i = 0; i < tocopy; ++i) {
-		bprm->cap_permitted.cap[i] =
-			le32_to_cpu(caps->data[i].permitted);
-		bprm->cap_inheritable.cap[i] =
-			le32_to_cpu(caps->data[i].inheritable);
-	}
-	while (i < VFS_CAP_U32) {
-		bprm->cap_permitted.cap[i] = 0;
-		bprm->cap_inheritable.cap[i] = 0;
-		i++;
+	ret = 0;
+
+	CAP_FOR_EACH_U32(i) {
+		__u32 value_cpu;
+
+		if (i >= tocopy) {
+			/*
+			 * Legacy capability sets have no upper bits
+			 */
+			bprm->cap_post_exec_permitted.cap[i] = 0;
+			continue;
+		}
+		/*
+		 * pP' = (X & fP) | (pI & fI)
+		 */
+		value_cpu = le32_to_cpu(caps->data[i].permitted);
+		bprm->cap_post_exec_permitted.cap[i] =
+			(current->cap_bset.cap[i] & value_cpu) |
+			(current->cap_inheritable.cap[i] &
+				le32_to_cpu(caps->data[i].inheritable));
+		if (value_cpu & ~bprm->cap_post_exec_permitted.cap[i]) {
+			/*
+			 * insufficient to execute correctly
+			 */
+			ret = -EPERM;
+		}
 	}
 
-	return 0;
+	/*
+	 * For legacy apps, with no internal support for recognizing they
+	 * do not have enough capabilities, we return an error if they are
+	 * missing some "forced" (aka file-permitted) capabilities.
+	 */
+	return bprm->cap_effective ? ret : 0;
 }
 
 /* Locate any VFS capabilities: */
@@ -269,9 +290,9 @@ static int get_file_caps(struct linux_binprm *bprm)
 		goto out;
 
 	rc = cap_from_disk(&vcaps, bprm, rc);
-	if (rc)
+	if (rc == -EINVAL)
 		printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
-			__func__, rc, bprm->filename);
+		       __func__, rc, bprm->filename);
 
 out:
 	dput(dentry);
@@ -304,25 +325,24 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 	int ret;
 
 	ret = get_file_caps(bprm);
-	if (ret)
-		printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n",
-			__func__, ret, bprm->filename);
-
-	/*  To support inheritance of root-permissions and suid-root
-	 *  executables under compatibility mode, we raise all three
-	 *  capability sets for the file.
-	 *
-	 *  If only the real uid is 0, we only raise the inheritable
-	 *  and permitted sets of the executable file.
-	 */
 
-	if (!issecure (SECURE_NOROOT)) {
+	if (!issecure(SECURE_NOROOT)) {
+		/*
+		 * To support inheritance of root-permissions and suid-root
+		 * executables under compatibility mode, we override the
+		 * capability sets for the file.
+		 *
+		 * If only the real uid is 0, we do not set the effective
+		 * bit.
+		 */
 		if (bprm->e_uid == 0 || current->uid == 0) {
-			cap_set_full (bprm->cap_inheritable);
-			cap_set_full (bprm->cap_permitted);
+			/* pP' = (cap_bset & ~0) | (pI & ~0) */
+			bprm->cap_post_exec_permitted = cap_combine(
+				current->cap_bset, current->cap_inheritable
+				);
+			bprm->cap_effective = (bprm->e_uid == 0);
+			ret = 0;
 		}
-		if (bprm->e_uid == 0)
-			bprm->cap_effective = true;
 	}
 
 	return ret;
@@ -330,17 +350,9 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 
 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 {
-	/* Derived from fs/exec.c:compute_creds. */
-	kernel_cap_t new_permitted, working;
-
-	new_permitted = cap_intersect(bprm->cap_permitted,
-				 current->cap_bset);
-	working = cap_intersect(bprm->cap_inheritable,
-				 current->cap_inheritable);
-	new_permitted = cap_combine(new_permitted, working);
-
 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
-	    !cap_issubset (new_permitted, current->cap_permitted)) {
+	    !cap_issubset(bprm->cap_post_exec_permitted,
+			  current->cap_permitted)) {
 		set_dumpable(current->mm, suid_dumpable);
 		current->pdeath_signal = 0;
 
@@ -350,9 +362,9 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 				bprm->e_gid = current->gid;
 			}
 			if (cap_limit_ptraced_target()) {
-				new_permitted =
-					cap_intersect(new_permitted,
-						      current->cap_permitted);
+				bprm->cap_post_exec_permitted = cap_intersect(
+					bprm->cap_post_exec_permitted,
+					current->cap_permitted);
 			}
 		}
 	}
@@ -364,9 +376,9 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 	 * in the init_task struct. Thus we skip the usual
 	 * capability rules */
 	if (!is_global_init(current)) {
-		current->cap_permitted = new_permitted;
+		current->cap_permitted = bprm->cap_post_exec_permitted;
 		if (bprm->cap_effective)
-			current->cap_effective = new_permitted;
+			current->cap_effective = bprm->cap_post_exec_permitted;
 		else
 			cap_clear(current->cap_effective);
 	}
@@ -381,9 +393,7 @@ int cap_bprm_secureexec (struct linux_binprm *bprm)
 	if (current->uid != 0) {
 		if (bprm->cap_effective)
 			return 1;
-		if (!cap_isclear(bprm->cap_permitted))
-			return 1;
-		if (!cap_isclear(bprm->cap_inheritable))
+		if (!cap_isclear(bprm->cap_post_exec_permitted))
 			return 1;
 	}
 
-- 
cgit v1.2.3


From 9b3e43a747c74029b0acf6acf4666601f132f471 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 23 Jul 2008 21:28:26 -0700
Subject: security: remove unused forwards

Why would linux/security.h need forward declarations for nfsctl_arg and
swap_info_struct?  It's hard to imagine: remove them.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/security.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 31c8851ec5d0..f0e9adb22ac2 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -102,9 +102,7 @@ extern unsigned long mmap_min_addr;
 #define LSM_SETID_FS	8
 
 /* forward declares to avoid warnings */
-struct nfsctl_arg;
 struct sched_param;
-struct swap_info_struct;
 struct request_sock;
 
 /* bprm_apply_creds unsafe reasons */
-- 
cgit v1.2.3


From d75f65fd247fe85d90a3880d143b1bb22fe13a48 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:28:34 -0700
Subject: remove include/linux/pm_legacy.h

Remove the obsolete and no longer used include/linux/pm_legacy.h

Reviewed-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Pavel Machek <pavel@suse.cz>
Acked-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/kernel/pm.c            |  1 -
 arch/mips/au1000/common/power.c |  1 -
 arch/x86/kernel/apm_32.c        |  1 -
 include/linux/pm_legacy.h       | 35 -----------------------------------
 4 files changed, 38 deletions(-)
 delete mode 100644 include/linux/pm_legacy.h

(limited to 'include/linux')

diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index 73f3aeefd203..d1113c5031f5 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -14,7 +14,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/pm.h>
-#include <linux/pm_legacy.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/sysctl.h>
diff --git a/arch/mips/au1000/common/power.c b/arch/mips/au1000/common/power.c
index 2166b9e1e80c..bd854a6d1d89 100644
--- a/arch/mips/au1000/common/power.c
+++ b/arch/mips/au1000/common/power.c
@@ -31,7 +31,6 @@
 
 #include <linux/init.h>
 #include <linux/pm.h>
-#include <linux/pm_legacy.h>
 #include <linux/sysctl.h>
 #include <linux/jiffies.h>
 
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index bf9b441331e9..9ee24e6bc4b0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -219,7 +219,6 @@
 #include <linux/time.h>
 #include <linux/sched.h>
 #include <linux/pm.h>
-#include <linux/pm_legacy.h>
 #include <linux/capability.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
diff --git a/include/linux/pm_legacy.h b/include/linux/pm_legacy.h
deleted file mode 100644
index 446f4f42b952..000000000000
--- a/include/linux/pm_legacy.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef __LINUX_PM_LEGACY_H__
-#define __LINUX_PM_LEGACY_H__
-
-
-#ifdef CONFIG_PM_LEGACY
-
-/*
- * Register a device with power management
- */
-struct pm_dev __deprecated *
-pm_register(pm_dev_t type, unsigned long id, pm_callback callback);
-
-/*
- * Send a request to all devices
- */
-int __deprecated pm_send_all(pm_request_t rqst, void *data);
-
-#else /* CONFIG_PM_LEGACY */
-
-static inline struct pm_dev *pm_register(pm_dev_t type,
-					 unsigned long id,
-					 pm_callback callback)
-{
-	return NULL;
-}
-
-static inline int pm_send_all(pm_request_t rqst, void *data)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PM_LEGACY */
-
-#endif /* __LINUX_PM_LEGACY_H__ */
-
-- 
cgit v1.2.3


From 558481f038e587b22d02167af58914c814ce9de5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Jul 2008 21:28:35 -0700
Subject: pm: remove definition of struct pm_dev

Remove the definition of 'struct pm_dev', which is not used any more,
along with some related stuff from include/linux/pm.h .

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pm.h | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 4ad9de94449a..5bf1ce89cfbb 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -68,30 +68,6 @@ enum
  */
 #define PM_PCI_ID(dev) ((dev)->bus->number << 16 | (dev)->devfn)
 
-/*
- * Request handler callback
- */
-struct pm_dev;
-
-typedef int (*pm_callback)(struct pm_dev *dev, pm_request_t rqst, void *data);
-
-/*
- * Dynamic device information
- */
-struct pm_dev
-{
-	pm_dev_t	 type;
-	unsigned long	 id;
-	pm_callback	 callback;
-	void		*data;
-
-	unsigned long	 flags;
-	unsigned long	 state;
-	unsigned long	 prev_state;
-
-	struct list_head entry;
-};
-
 /* Functions above this comment are list-based old-style power
  * management. Please avoid using them.  */
 
-- 
cgit v1.2.3


From e7ecb331e11d1f7aa66aeef9170fc20781c9bb55 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Jul 2008 21:28:35 -0700
Subject: pm: remove remaining obsolete definitions from pm.h

Remove the remaining obsolete definitions from include/linux/pm.h and move
the definitions of PM_SUSPEND and PM_RESUME to the header of h3600 which
is the only user of them.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-arm/arch-sa1100/h3600.h |  5 ++++
 include/linux/pm.h                  | 46 -------------------------------------
 2 files changed, 5 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-arm/arch-sa1100/h3600.h b/include/asm-arm/arch-sa1100/h3600.h
index 1b6355971574..3ca0ecf095e6 100644
--- a/include/asm-arm/arch-sa1100/h3600.h
+++ b/include/asm-arm/arch-sa1100/h3600.h
@@ -23,6 +23,11 @@
 #ifndef _INCLUDE_H3600_H_
 #define _INCLUDE_H3600_H_
 
+typedef int __bitwise pm_request_t;
+
+#define PM_SUSPEND	((__force pm_request_t) 1)	/* enter D1-D3 */
+#define PM_RESUME	((__force pm_request_t) 2)	/* enter D0 */
+
 /* generalized support for H3xxx series Compaq Pocket PC's */
 #define machine_is_h3xxx() (machine_is_h3100() || machine_is_h3600() || machine_is_h3800())
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 5bf1ce89cfbb..390dd95a375e 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -25,52 +25,6 @@
 #include <asm/atomic.h>
 #include <asm/errno.h>
 
-/*
- * Power management requests... these are passed to pm_send_all() and friends.
- *
- * these functions are old and deprecated, see below.
- */
-typedef int __bitwise pm_request_t;
-
-#define PM_SUSPEND	((__force pm_request_t) 1)	/* enter D1-D3 */
-#define PM_RESUME	((__force pm_request_t) 2)	/* enter D0 */
-
-
-/*
- * Device types... these are passed to pm_register
- */
-typedef int __bitwise pm_dev_t;
-
-#define PM_UNKNOWN_DEV	((__force pm_dev_t) 0)	/* generic */
-#define PM_SYS_DEV	((__force pm_dev_t) 1)	/* system device (fan, KB controller, ...) */
-#define PM_PCI_DEV	((__force pm_dev_t) 2)	/* PCI device */
-#define PM_USB_DEV	((__force pm_dev_t) 3)	/* USB device */
-#define PM_SCSI_DEV	((__force pm_dev_t) 4)	/* SCSI device */
-#define PM_ISA_DEV	((__force pm_dev_t) 5)	/* ISA device */
-#define	PM_MTD_DEV	((__force pm_dev_t) 6)	/* Memory Technology Device */
-
-/*
- * System device hardware ID (PnP) values
- */
-enum
-{
-	PM_SYS_UNKNOWN = 0x00000000, /* generic */
-	PM_SYS_KBC =	 0x41d00303, /* keyboard controller */
-	PM_SYS_COM =	 0x41d00500, /* serial port */
-	PM_SYS_IRDA =	 0x41d00510, /* IRDA controller */
-	PM_SYS_FDC =	 0x41d00700, /* floppy controller */
-	PM_SYS_VGA =	 0x41d00900, /* VGA controller */
-	PM_SYS_PCMCIA =	 0x41d00e00, /* PCMCIA controller */
-};
-
-/*
- * Device identifier
- */
-#define PM_PCI_ID(dev) ((dev)->bus->number << 16 | (dev)->devfn)
-
-/* Functions above this comment are list-based old-style power
- * management. Please avoid using them.  */
-
 /*
  * Callbacks for platform drivers to implement.
  */
-- 
cgit v1.2.3


From 8c363265d57d755e62053e9f69a1f2164e83f7ea Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Jul 2008 21:28:37 -0700
Subject: pm: drop unnecessary includes from pm.h

Drop unnecessary includes from include/linux/pm.h .

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 390dd95a375e..ed98d967f9fb 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -22,8 +22,6 @@
 #define _LINUX_PM_H
 
 #include <linux/list.h>
-#include <asm/atomic.h>
-#include <asm/errno.h>
 
 /*
  * Callbacks for platform drivers to implement.
-- 
cgit v1.2.3


From 8111d1b552349921aae1acf73e4e8cea98e80970 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 23 Jul 2008 21:28:37 -0700
Subject: pm: add new PM_EVENT codes for runtime power transitions

This patch (as1112) adds some new PM_EVENT_* codes for use by kernel
subsystems.  They describe runtime power-state transitions of the sort already
implemented by the USB subsystem.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pm.h | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index ed98d967f9fb..4dcce54b6d76 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -245,6 +245,21 @@ struct pm_ext_ops {
  * RECOVER	Creation of a hibernation image or restoration of the main
  *		memory contents from a hibernation image has failed, call
  *		->thaw() and ->complete() for all devices.
+ *
+ * The following PM_EVENT_ messages are defined for internal use by
+ * kernel subsystems.  They are never issued by the PM core.
+ *
+ * USER_SUSPEND		Manual selective suspend was issued by userspace.
+ *
+ * USER_RESUME		Manual selective resume was issued by userspace.
+ *
+ * REMOTE_WAKEUP	Remote-wakeup request was received from the device.
+ *
+ * AUTO_SUSPEND		Automatic (device idle) runtime suspend was
+ *			initiated by the subsystem.
+ *
+ * AUTO_RESUME		Automatic (device needed) runtime resume was
+ *			requested by a driver.
  */
 
 #define PM_EVENT_ON		0x0000
@@ -256,9 +271,18 @@ struct pm_ext_ops {
 #define PM_EVENT_THAW		0x0020
 #define PM_EVENT_RESTORE	0x0040
 #define PM_EVENT_RECOVER	0x0080
+#define PM_EVENT_USER		0x0100
+#define PM_EVENT_REMOTE		0x0200
+#define PM_EVENT_AUTO		0x0400
 
-#define PM_EVENT_SLEEP	(PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE)
+#define PM_EVENT_SLEEP		(PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE)
+#define PM_EVENT_USER_SUSPEND	(PM_EVENT_USER | PM_EVENT_SUSPEND)
+#define PM_EVENT_USER_RESUME	(PM_EVENT_USER | PM_EVENT_RESUME)
+#define PM_EVENT_REMOTE_WAKEUP	(PM_EVENT_REMOTE | PM_EVENT_RESUME)
+#define PM_EVENT_AUTO_SUSPEND	(PM_EVENT_AUTO | PM_EVENT_SUSPEND)
+#define PM_EVENT_AUTO_RESUME	(PM_EVENT_AUTO | PM_EVENT_RESUME)
 
+#define PMSG_ON		((struct pm_message){ .event = PM_EVENT_ON, })
 #define PMSG_FREEZE	((struct pm_message){ .event = PM_EVENT_FREEZE, })
 #define PMSG_QUIESCE	((struct pm_message){ .event = PM_EVENT_QUIESCE, })
 #define PMSG_SUSPEND	((struct pm_message){ .event = PM_EVENT_SUSPEND, })
@@ -267,7 +291,16 @@ struct pm_ext_ops {
 #define PMSG_THAW	((struct pm_message){ .event = PM_EVENT_THAW, })
 #define PMSG_RESTORE	((struct pm_message){ .event = PM_EVENT_RESTORE, })
 #define PMSG_RECOVER	((struct pm_message){ .event = PM_EVENT_RECOVER, })
-#define PMSG_ON		((struct pm_message){ .event = PM_EVENT_ON, })
+#define PMSG_USER_SUSPEND	((struct pm_messge) \
+					{ .event = PM_EVENT_USER_SUSPEND, })
+#define PMSG_USER_RESUME	((struct pm_messge) \
+					{ .event = PM_EVENT_USER_RESUME, })
+#define PMSG_REMOTE_RESUME	((struct pm_messge) \
+					{ .event = PM_EVENT_REMOTE_RESUME, })
+#define PMSG_AUTO_SUSPEND	((struct pm_messge) \
+					{ .event = PM_EVENT_AUTO_SUSPEND, })
+#define PMSG_AUTO_RESUME		((struct pm_messge) \
+					{ .event = PM_EVENT_AUTO_RESUME, })
 
 /**
  * Device power management states
-- 
cgit v1.2.3


From c1a220e7acf8ad2c03504891f4a70cd9c32c904b Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 23 Jul 2008 21:28:39 -0700
Subject: pm: introduce new interfaces schedule_work_on() and queue_work_on()

This interface allows adding a job on a specific cpu.

Although a work struct on a cpu will be scheduled to other cpu if the cpu
dies, there is a recursion if a work task tries to offline the cpu it's
running on.  we need to schedule the task to a specific cpu in this case.
http://bugzilla.kernel.org/show_bug.cgi?id=10897

[oleg@tv-sign.ru: cleanups]
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Rus <harbour@sfinx.od.ua>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  3 +++
 kernel/workqueue.c        | 39 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 542526c6e8ef..14d47120682b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -179,6 +179,8 @@ __create_workqueue_key(const char *name, int singlethread,
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
 extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
+extern int queue_work_on(int cpu, struct workqueue_struct *wq,
+			struct work_struct *work);
 extern int queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
@@ -188,6 +190,7 @@ extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
 extern int schedule_work(struct work_struct *work);
+extern int schedule_work_on(int cpu, struct work_struct *work);
 extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 					unsigned long delay);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a6d36346d10a..6fd158b21026 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -140,7 +140,6 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	wake_up(&cwq->more_work);
 }
 
-/* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
 			 struct work_struct *work)
 {
@@ -175,6 +174,31 @@ int queue_work(struct workqueue_struct *wq, struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
+/**
+ * queue_work_on - queue work on specific cpu
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
+ *
+ * We queue the work to a specific CPU, the caller must ensure it
+ * can't go away.
+ */
+int
+queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
+{
+	int ret = 0;
+
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+		BUG_ON(!list_empty(&work->entry));
+		__queue_work(wq_per_cpu(wq, cpu), work);
+		ret = 1;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work_on);
+
 static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
@@ -553,6 +577,19 @@ int schedule_work(struct work_struct *work)
 }
 EXPORT_SYMBOL(schedule_work);
 
+/*
+ * schedule_work_on - put work task on a specific cpu
+ * @cpu: cpu to put the work task on
+ * @work: job to be done
+ *
+ * This puts a job on a specific cpu
+ */
+int schedule_work_on(int cpu, struct work_struct *work)
+{
+	return queue_work_on(cpu, keventd_wq, work);
+}
+EXPORT_SYMBOL(schedule_work_on);
+
 /**
  * schedule_delayed_work - put work task in global workqueue after delay
  * @dwork: job to be done
-- 
cgit v1.2.3


From bdfe6b7c681669148dae4db27eb24ee5408ba371 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Wed, 23 Jul 2008 21:28:41 -0700
Subject: pm: acpi hibernation: utilize hardware signature

ACPI defines a hardware signature.  BIOS calculates the signature according to
hardware configure and if hardware changes while hibernated, the signature
will change.  In that case, S4 resume should fail.

Still, there may be systems on which this mechanism does not work correctly,
so it is better to provide a workaround for them.  For this reason, add a new
switch to the acpi_sleep= command line argument allowing one to disable
hardware signature checking.

[shaohua.li@intel.com: build fix]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Len Brown <lenb@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: <Valdis.Kletnieks@vt.edu>
Cc: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  4 +++-
 arch/x86/kernel/acpi/sleep.c        |  4 ++++
 drivers/acpi/sleep/main.c           | 22 ++++++++++++++++++++++
 include/linux/acpi.h                |  1 +
 4 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4d705713cabc..497a98dafdaa 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -148,10 +148,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			default: 0
 
 	acpi_sleep=	[HW,ACPI] Sleep options
-			Format: { s3_bios, s3_mode, s3_beep, old_ordering }
+			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering }
 			See Documentation/power/video.txt for s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
 			as soon as the kernel's real-mode entry point is called.
+			s4_nohwsig prevents ACPI hardware signature from being
+			used during resume from hibernation.
 			old_ordering causes the ACPI 1.0 ordering of the _PTS
 			control method, wrt putting devices into low power
 			states, to be enforced (the ACPI 2.0 ordering of _PTS is
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index a3ddad18aaa3..fa2161d5003b 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -150,6 +150,10 @@ static int __init acpi_sleep_setup(char *str)
 			acpi_realmode_flags |= 2;
 		if (strncmp(str, "s3_beep", 7) == 0)
 			acpi_realmode_flags |= 4;
+#ifdef CONFIG_HIBERNATION
+		if (strncmp(str, "s4_nohwsig", 10) == 0)
+			acpi_no_s4_hw_signature();
+#endif
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
 		str = strchr(str, ',');
diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c
index 0489a7d1d42c..313507accf18 100644
--- a/drivers/acpi/sleep/main.c
+++ b/drivers/acpi/sleep/main.c
@@ -283,6 +283,15 @@ static struct platform_suspend_ops acpi_suspend_ops_old = {
 #endif /* CONFIG_SUSPEND */
 
 #ifdef CONFIG_HIBERNATION
+static unsigned long s4_hardware_signature;
+static struct acpi_table_facs *facs;
+static bool nosigcheck;
+
+void __init acpi_no_s4_hw_signature(void)
+{
+	nosigcheck = true;
+}
+
 static int acpi_hibernation_begin(void)
 {
 	acpi_target_sleep_state = ACPI_STATE_S4;
@@ -316,6 +325,12 @@ static void acpi_hibernation_leave(void)
 	acpi_enable();
 	/* Reprogram control registers and execute _BFS */
 	acpi_leave_sleep_state_prep(ACPI_STATE_S4);
+	/* Check the hardware signature */
+	if (facs && s4_hardware_signature != facs->hardware_signature) {
+		printk(KERN_EMERG "ACPI: Hardware changed while hibernated, "
+			"cannot resume!\n");
+		panic("ACPI S4 hardware signature mismatch");
+	}
 }
 
 static void acpi_pm_enable_gpes(void)
@@ -544,6 +559,13 @@ int __init acpi_sleep_init(void)
 			&acpi_hibernation_ops_old : &acpi_hibernation_ops);
 		sleep_states[ACPI_STATE_S4] = 1;
 		printk(" S4");
+		if (!nosigcheck) {
+			acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS,
+				(struct acpi_table_header **)&facs);
+			if (facs)
+				s4_hardware_signature =
+					facs->hardware_signature;
+		}
 	}
 #endif
 	status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a17177639376..702f79dad16a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -236,6 +236,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
 		      const char *name);
 
 #ifdef CONFIG_PM_SLEEP
+void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 #endif /* CONFIG_PM_SLEEP */
 #else	/* CONFIG_ACPI */
-- 
cgit v1.2.3


From f606ddf42fd4edc558eeb48bfee66d2c591571d2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 23 Jul 2008 21:28:50 -0700
Subject: remove the v850 port

Trying to compile the v850 port brings many compile errors, one of them exists
since at least kernel 2.6.19.

There also seems to be noone willing to bring this port back into a usable
state.

This patch therefore removes the v850 port.

If anyone ever decides to revive the v850 port the code will still be
available from older kernels, and it wouldn't be impossible for the port to
reenter the kernel if it would become actively maintained again.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS                            |    3 -
 arch/v850/Kconfig                      |  353 ----------
 arch/v850/Kconfig.debug                |   10 -
 arch/v850/Makefile                     |   54 --
 arch/v850/README                       |   44 --
 arch/v850/configs/rte-ma1-cb_defconfig |  617 ------------------
 arch/v850/configs/rte-me2-cb_defconfig |  462 -------------
 arch/v850/configs/sim_defconfig        |  451 -------------
 arch/v850/kernel/Makefile              |   40 --
 arch/v850/kernel/anna-rom.ld           |   16 -
 arch/v850/kernel/anna.c                |  202 ------
 arch/v850/kernel/anna.ld               |   20 -
 arch/v850/kernel/as85ep1-rom.ld        |   21 -
 arch/v850/kernel/as85ep1.c             |  234 -------
 arch/v850/kernel/as85ep1.ld            |   49 --
 arch/v850/kernel/asm-offsets.c         |   58 --
 arch/v850/kernel/bug.c                 |  142 ----
 arch/v850/kernel/entry.S               | 1121 --------------------------------
 arch/v850/kernel/fpga85e2c.c           |  167 -----
 arch/v850/kernel/fpga85e2c.ld          |   62 --
 arch/v850/kernel/gbus_int.c            |  271 --------
 arch/v850/kernel/head.S                |  128 ----
 arch/v850/kernel/highres_timer.c       |  132 ----
 arch/v850/kernel/init_task.c           |   48 --
 arch/v850/kernel/intv.S                |   87 ---
 arch/v850/kernel/irq.c                 |  123 ----
 arch/v850/kernel/ma.c                  |   69 --
 arch/v850/kernel/mach.c                |   17 -
 arch/v850/kernel/mach.h                |   56 --
 arch/v850/kernel/me2.c                 |   73 ---
 arch/v850/kernel/memcons.c             |  135 ----
 arch/v850/kernel/module.c              |  237 -------
 arch/v850/kernel/process.c             |  217 -------
 arch/v850/kernel/procfs.c              |   67 --
 arch/v850/kernel/ptrace.c              |  235 -------
 arch/v850/kernel/rte_cb.c              |  193 ------
 arch/v850/kernel/rte_cb_leds.c         |  137 ----
 arch/v850/kernel/rte_cb_multi.c        |  121 ----
 arch/v850/kernel/rte_ma1_cb-rom.ld     |   14 -
 arch/v850/kernel/rte_ma1_cb.c          |  107 ---
 arch/v850/kernel/rte_ma1_cb.ld         |   57 --
 arch/v850/kernel/rte_mb_a_pci.c        |  819 -----------------------
 arch/v850/kernel/rte_me2_cb.c          |  298 ---------
 arch/v850/kernel/rte_me2_cb.ld         |   30 -
 arch/v850/kernel/rte_nb85e_cb-multi.ld |   57 --
 arch/v850/kernel/rte_nb85e_cb.c        |   81 ---
 arch/v850/kernel/rte_nb85e_cb.ld       |   22 -
 arch/v850/kernel/setup.c               |  329 ----------
 arch/v850/kernel/signal.c              |  523 ---------------
 arch/v850/kernel/sim.c                 |  172 -----
 arch/v850/kernel/sim.ld                |   13 -
 arch/v850/kernel/sim85e2.c             |  195 ------
 arch/v850/kernel/sim85e2.ld            |   36 -
 arch/v850/kernel/simcons.c             |  161 -----
 arch/v850/kernel/syscalls.c            |  196 ------
 arch/v850/kernel/teg.c                 |   62 --
 arch/v850/kernel/time.c                |  106 ---
 arch/v850/kernel/v850_ksyms.c          |   51 --
 arch/v850/kernel/v850e2_cache.c        |  127 ----
 arch/v850/kernel/v850e_cache.c         |  174 -----
 arch/v850/kernel/v850e_intc.c          |  104 ---
 arch/v850/kernel/v850e_timer_d.c       |   54 --
 arch/v850/kernel/v850e_utils.c         |   62 --
 arch/v850/kernel/vmlinux.lds.S         |  306 ---------
 arch/v850/lib/Makefile                 |    6 -
 arch/v850/lib/ashldi3.c                |   62 --
 arch/v850/lib/ashrdi3.c                |   63 --
 arch/v850/lib/checksum.c               |  155 -----
 arch/v850/lib/lshrdi3.c                |   62 --
 arch/v850/lib/memcpy.c                 |   92 ---
 arch/v850/lib/memset.c                 |   68 --
 arch/v850/lib/muldi3.c                 |   61 --
 arch/v850/lib/negdi2.c                 |   25 -
 drivers/serial/Kconfig                 |   16 -
 drivers/watchdog/Kconfig               |    2 -
 drivers/watchdog/Makefile              |    2 -
 include/asm-v850/Kbuild                |    1 -
 include/asm-v850/a.out.h               |   21 -
 include/asm-v850/anna.h                |  137 ----
 include/asm-v850/as85ep1.h             |  152 -----
 include/asm-v850/asm.h                 |   32 -
 include/asm-v850/atomic.h              |  131 ----
 include/asm-v850/auxvec.h              |    4 -
 include/asm-v850/bitops.h              |  161 -----
 include/asm-v850/bug.h                 |   25 -
 include/asm-v850/bugs.h                |   16 -
 include/asm-v850/byteorder.h           |   48 --
 include/asm-v850/cache.h               |   26 -
 include/asm-v850/cacheflush.h          |   70 --
 include/asm-v850/checksum.h            |  112 ----
 include/asm-v850/clinkage.h            |   26 -
 include/asm-v850/cputime.h             |    6 -
 include/asm-v850/current.h             |   47 --
 include/asm-v850/delay.h               |   47 --
 include/asm-v850/device.h              |    7 -
 include/asm-v850/div64.h               |    1 -
 include/asm-v850/dma-mapping.h         |   11 -
 include/asm-v850/dma.h                 |   18 -
 include/asm-v850/elf.h                 |   99 ---
 include/asm-v850/emergency-restart.h   |    6 -
 include/asm-v850/entry.h               |  113 ----
 include/asm-v850/errno.h               |    6 -
 include/asm-v850/fb.h                  |   12 -
 include/asm-v850/fcntl.h               |   11 -
 include/asm-v850/flat.h                |  133 ----
 include/asm-v850/fpga85e2c.h           |   82 ---
 include/asm-v850/futex.h               |    6 -
 include/asm-v850/gbus_int.h            |   97 ---
 include/asm-v850/hardirq.h             |   28 -
 include/asm-v850/highres_timer.h       |   44 --
 include/asm-v850/hw_irq.h              |    4 -
 include/asm-v850/io.h                  |  142 ----
 include/asm-v850/ioctl.h               |    1 -
 include/asm-v850/ioctls.h              |   84 ---
 include/asm-v850/ipcbuf.h              |   29 -
 include/asm-v850/irq.h                 |   55 --
 include/asm-v850/irq_regs.h            |    1 -
 include/asm-v850/kdebug.h              |    1 -
 include/asm-v850/kmap_types.h          |   19 -
 include/asm-v850/kvm.h                 |    6 -
 include/asm-v850/linkage.h             |    8 -
 include/asm-v850/local.h               |    6 -
 include/asm-v850/ma.h                  |  101 ---
 include/asm-v850/ma1.h                 |   50 --
 include/asm-v850/machdep.h             |   60 --
 include/asm-v850/macrology.h           |   17 -
 include/asm-v850/me2.h                 |  182 ------
 include/asm-v850/mman.h                |   15 -
 include/asm-v850/mmu.h                 |   11 -
 include/asm-v850/mmu_context.h         |   13 -
 include/asm-v850/module.h              |   62 --
 include/asm-v850/msgbuf.h              |   31 -
 include/asm-v850/mutex.h               |    9 -
 include/asm-v850/page.h                |  124 ----
 include/asm-v850/param.h               |   33 -
 include/asm-v850/pci.h                 |  119 ----
 include/asm-v850/percpu.h              |   14 -
 include/asm-v850/pgalloc.h             |   22 -
 include/asm-v850/pgtable.h             |   59 --
 include/asm-v850/poll.h                |    9 -
 include/asm-v850/posix_types.h         |   72 --
 include/asm-v850/processor.h           |  120 ----
 include/asm-v850/ptrace.h              |  121 ----
 include/asm-v850/resource.h            |    6 -
 include/asm-v850/rte_cb.h              |   78 ---
 include/asm-v850/rte_ma1_cb.h          |  128 ----
 include/asm-v850/rte_mb_a_pci.h        |   56 --
 include/asm-v850/rte_me2_cb.h          |  202 ------
 include/asm-v850/rte_nb85e_cb.h        |  111 ----
 include/asm-v850/scatterlist.h         |   31 -
 include/asm-v850/sections.h            |    6 -
 include/asm-v850/segment.h             |   36 -
 include/asm-v850/semaphore.h           |    1 -
 include/asm-v850/sembuf.h              |   25 -
 include/asm-v850/serial.h              |   56 --
 include/asm-v850/setup.h               |    6 -
 include/asm-v850/shmbuf.h              |   42 --
 include/asm-v850/shmparam.h            |    6 -
 include/asm-v850/sigcontext.h          |   25 -
 include/asm-v850/siginfo.h             |    6 -
 include/asm-v850/signal.h              |  168 -----
 include/asm-v850/sim.h                 |   47 --
 include/asm-v850/sim85e2.h             |   69 --
 include/asm-v850/sim85e2c.h            |   26 -
 include/asm-v850/sim85e2s.h            |   28 -
 include/asm-v850/simsyscall.h          |   99 ---
 include/asm-v850/socket.h              |   57 --
 include/asm-v850/sockios.h             |   13 -
 include/asm-v850/stat.h                |   73 ---
 include/asm-v850/statfs.h              |    6 -
 include/asm-v850/string.h              |   25 -
 include/asm-v850/system.h              |  123 ----
 include/asm-v850/teg.h                 |  101 ---
 include/asm-v850/termbits.h            |  200 ------
 include/asm-v850/termios.h             |   90 ---
 include/asm-v850/thread_info.h         |  129 ----
 include/asm-v850/timex.h               |   18 -
 include/asm-v850/tlb.h                 |   21 -
 include/asm-v850/tlbflush.h            |   64 --
 include/asm-v850/topology.h            |    6 -
 include/asm-v850/types.h               |   36 -
 include/asm-v850/uaccess.h             |  159 -----
 include/asm-v850/ucontext.h            |   14 -
 include/asm-v850/unaligned.h           |   22 -
 include/asm-v850/unistd.h              |  244 -------
 include/asm-v850/user.h                |   52 --
 include/asm-v850/v850e.h               |   21 -
 include/asm-v850/v850e2.h              |   69 --
 include/asm-v850/v850e2_cache.h        |   75 ---
 include/asm-v850/v850e_cache.h         |   48 --
 include/asm-v850/v850e_intc.h          |  133 ----
 include/asm-v850/v850e_timer_c.h       |   48 --
 include/asm-v850/v850e_timer_d.h       |   62 --
 include/asm-v850/v850e_uart.h          |   76 ---
 include/asm-v850/v850e_uarta.h         |  278 --------
 include/asm-v850/v850e_uartb.h         |  262 --------
 include/asm-v850/v850e_utils.h         |   35 -
 include/linux/audit.h                  |    1 -
 include/linux/module.h                 |    2 +-
 include/linux/serial_core.h            |    3 -
 include/linux/syscalls.h               |    2 +-
 scripts/genksyms/genksyms.c            |    3 +-
 scripts/mod/file2alias.c               |    2 +-
 scripts/mod/mk_elfconfig.c             |    2 +-
 204 files changed, 5 insertions(+), 18406 deletions(-)
 delete mode 100644 arch/v850/Kconfig
 delete mode 100644 arch/v850/Kconfig.debug
 delete mode 100644 arch/v850/Makefile
 delete mode 100644 arch/v850/README
 delete mode 100644 arch/v850/configs/rte-ma1-cb_defconfig
 delete mode 100644 arch/v850/configs/rte-me2-cb_defconfig
 delete mode 100644 arch/v850/configs/sim_defconfig
 delete mode 100644 arch/v850/kernel/Makefile
 delete mode 100644 arch/v850/kernel/anna-rom.ld
 delete mode 100644 arch/v850/kernel/anna.c
 delete mode 100644 arch/v850/kernel/anna.ld
 delete mode 100644 arch/v850/kernel/as85ep1-rom.ld
 delete mode 100644 arch/v850/kernel/as85ep1.c
 delete mode 100644 arch/v850/kernel/as85ep1.ld
 delete mode 100644 arch/v850/kernel/asm-offsets.c
 delete mode 100644 arch/v850/kernel/bug.c
 delete mode 100644 arch/v850/kernel/entry.S
 delete mode 100644 arch/v850/kernel/fpga85e2c.c
 delete mode 100644 arch/v850/kernel/fpga85e2c.ld
 delete mode 100644 arch/v850/kernel/gbus_int.c
 delete mode 100644 arch/v850/kernel/head.S
 delete mode 100644 arch/v850/kernel/highres_timer.c
 delete mode 100644 arch/v850/kernel/init_task.c
 delete mode 100644 arch/v850/kernel/intv.S
 delete mode 100644 arch/v850/kernel/irq.c
 delete mode 100644 arch/v850/kernel/ma.c
 delete mode 100644 arch/v850/kernel/mach.c
 delete mode 100644 arch/v850/kernel/mach.h
 delete mode 100644 arch/v850/kernel/me2.c
 delete mode 100644 arch/v850/kernel/memcons.c
 delete mode 100644 arch/v850/kernel/module.c
 delete mode 100644 arch/v850/kernel/process.c
 delete mode 100644 arch/v850/kernel/procfs.c
 delete mode 100644 arch/v850/kernel/ptrace.c
 delete mode 100644 arch/v850/kernel/rte_cb.c
 delete mode 100644 arch/v850/kernel/rte_cb_leds.c
 delete mode 100644 arch/v850/kernel/rte_cb_multi.c
 delete mode 100644 arch/v850/kernel/rte_ma1_cb-rom.ld
 delete mode 100644 arch/v850/kernel/rte_ma1_cb.c
 delete mode 100644 arch/v850/kernel/rte_ma1_cb.ld
 delete mode 100644 arch/v850/kernel/rte_mb_a_pci.c
 delete mode 100644 arch/v850/kernel/rte_me2_cb.c
 delete mode 100644 arch/v850/kernel/rte_me2_cb.ld
 delete mode 100644 arch/v850/kernel/rte_nb85e_cb-multi.ld
 delete mode 100644 arch/v850/kernel/rte_nb85e_cb.c
 delete mode 100644 arch/v850/kernel/rte_nb85e_cb.ld
 delete mode 100644 arch/v850/kernel/setup.c
 delete mode 100644 arch/v850/kernel/signal.c
 delete mode 100644 arch/v850/kernel/sim.c
 delete mode 100644 arch/v850/kernel/sim.ld
 delete mode 100644 arch/v850/kernel/sim85e2.c
 delete mode 100644 arch/v850/kernel/sim85e2.ld
 delete mode 100644 arch/v850/kernel/simcons.c
 delete mode 100644 arch/v850/kernel/syscalls.c
 delete mode 100644 arch/v850/kernel/teg.c
 delete mode 100644 arch/v850/kernel/time.c
 delete mode 100644 arch/v850/kernel/v850_ksyms.c
 delete mode 100644 arch/v850/kernel/v850e2_cache.c
 delete mode 100644 arch/v850/kernel/v850e_cache.c
 delete mode 100644 arch/v850/kernel/v850e_intc.c
 delete mode 100644 arch/v850/kernel/v850e_timer_d.c
 delete mode 100644 arch/v850/kernel/v850e_utils.c
 delete mode 100644 arch/v850/kernel/vmlinux.lds.S
 delete mode 100644 arch/v850/lib/Makefile
 delete mode 100644 arch/v850/lib/ashldi3.c
 delete mode 100644 arch/v850/lib/ashrdi3.c
 delete mode 100644 arch/v850/lib/checksum.c
 delete mode 100644 arch/v850/lib/lshrdi3.c
 delete mode 100644 arch/v850/lib/memcpy.c
 delete mode 100644 arch/v850/lib/memset.c
 delete mode 100644 arch/v850/lib/muldi3.c
 delete mode 100644 arch/v850/lib/negdi2.c
 delete mode 100644 include/asm-v850/Kbuild
 delete mode 100644 include/asm-v850/a.out.h
 delete mode 100644 include/asm-v850/anna.h
 delete mode 100644 include/asm-v850/as85ep1.h
 delete mode 100644 include/asm-v850/asm.h
 delete mode 100644 include/asm-v850/atomic.h
 delete mode 100644 include/asm-v850/auxvec.h
 delete mode 100644 include/asm-v850/bitops.h
 delete mode 100644 include/asm-v850/bug.h
 delete mode 100644 include/asm-v850/bugs.h
 delete mode 100644 include/asm-v850/byteorder.h
 delete mode 100644 include/asm-v850/cache.h
 delete mode 100644 include/asm-v850/cacheflush.h
 delete mode 100644 include/asm-v850/checksum.h
 delete mode 100644 include/asm-v850/clinkage.h
 delete mode 100644 include/asm-v850/cputime.h
 delete mode 100644 include/asm-v850/current.h
 delete mode 100644 include/asm-v850/delay.h
 delete mode 100644 include/asm-v850/device.h
 delete mode 100644 include/asm-v850/div64.h
 delete mode 100644 include/asm-v850/dma-mapping.h
 delete mode 100644 include/asm-v850/dma.h
 delete mode 100644 include/asm-v850/elf.h
 delete mode 100644 include/asm-v850/emergency-restart.h
 delete mode 100644 include/asm-v850/entry.h
 delete mode 100644 include/asm-v850/errno.h
 delete mode 100644 include/asm-v850/fb.h
 delete mode 100644 include/asm-v850/fcntl.h
 delete mode 100644 include/asm-v850/flat.h
 delete mode 100644 include/asm-v850/fpga85e2c.h
 delete mode 100644 include/asm-v850/futex.h
 delete mode 100644 include/asm-v850/gbus_int.h
 delete mode 100644 include/asm-v850/hardirq.h
 delete mode 100644 include/asm-v850/highres_timer.h
 delete mode 100644 include/asm-v850/hw_irq.h
 delete mode 100644 include/asm-v850/io.h
 delete mode 100644 include/asm-v850/ioctl.h
 delete mode 100644 include/asm-v850/ioctls.h
 delete mode 100644 include/asm-v850/ipcbuf.h
 delete mode 100644 include/asm-v850/irq.h
 delete mode 100644 include/asm-v850/irq_regs.h
 delete mode 100644 include/asm-v850/kdebug.h
 delete mode 100644 include/asm-v850/kmap_types.h
 delete mode 100644 include/asm-v850/kvm.h
 delete mode 100644 include/asm-v850/linkage.h
 delete mode 100644 include/asm-v850/local.h
 delete mode 100644 include/asm-v850/ma.h
 delete mode 100644 include/asm-v850/ma1.h
 delete mode 100644 include/asm-v850/machdep.h
 delete mode 100644 include/asm-v850/macrology.h
 delete mode 100644 include/asm-v850/me2.h
 delete mode 100644 include/asm-v850/mman.h
 delete mode 100644 include/asm-v850/mmu.h
 delete mode 100644 include/asm-v850/mmu_context.h
 delete mode 100644 include/asm-v850/module.h
 delete mode 100644 include/asm-v850/msgbuf.h
 delete mode 100644 include/asm-v850/mutex.h
 delete mode 100644 include/asm-v850/page.h
 delete mode 100644 include/asm-v850/param.h
 delete mode 100644 include/asm-v850/pci.h
 delete mode 100644 include/asm-v850/percpu.h
 delete mode 100644 include/asm-v850/pgalloc.h
 delete mode 100644 include/asm-v850/pgtable.h
 delete mode 100644 include/asm-v850/poll.h
 delete mode 100644 include/asm-v850/posix_types.h
 delete mode 100644 include/asm-v850/processor.h
 delete mode 100644 include/asm-v850/ptrace.h
 delete mode 100644 include/asm-v850/resource.h
 delete mode 100644 include/asm-v850/rte_cb.h
 delete mode 100644 include/asm-v850/rte_ma1_cb.h
 delete mode 100644 include/asm-v850/rte_mb_a_pci.h
 delete mode 100644 include/asm-v850/rte_me2_cb.h
 delete mode 100644 include/asm-v850/rte_nb85e_cb.h
 delete mode 100644 include/asm-v850/scatterlist.h
 delete mode 100644 include/asm-v850/sections.h
 delete mode 100644 include/asm-v850/segment.h
 delete mode 100644 include/asm-v850/semaphore.h
 delete mode 100644 include/asm-v850/sembuf.h
 delete mode 100644 include/asm-v850/serial.h
 delete mode 100644 include/asm-v850/setup.h
 delete mode 100644 include/asm-v850/shmbuf.h
 delete mode 100644 include/asm-v850/shmparam.h
 delete mode 100644 include/asm-v850/sigcontext.h
 delete mode 100644 include/asm-v850/siginfo.h
 delete mode 100644 include/asm-v850/signal.h
 delete mode 100644 include/asm-v850/sim.h
 delete mode 100644 include/asm-v850/sim85e2.h
 delete mode 100644 include/asm-v850/sim85e2c.h
 delete mode 100644 include/asm-v850/sim85e2s.h
 delete mode 100644 include/asm-v850/simsyscall.h
 delete mode 100644 include/asm-v850/socket.h
 delete mode 100644 include/asm-v850/sockios.h
 delete mode 100644 include/asm-v850/stat.h
 delete mode 100644 include/asm-v850/statfs.h
 delete mode 100644 include/asm-v850/string.h
 delete mode 100644 include/asm-v850/system.h
 delete mode 100644 include/asm-v850/teg.h
 delete mode 100644 include/asm-v850/termbits.h
 delete mode 100644 include/asm-v850/termios.h
 delete mode 100644 include/asm-v850/thread_info.h
 delete mode 100644 include/asm-v850/timex.h
 delete mode 100644 include/asm-v850/tlb.h
 delete mode 100644 include/asm-v850/tlbflush.h
 delete mode 100644 include/asm-v850/topology.h
 delete mode 100644 include/asm-v850/types.h
 delete mode 100644 include/asm-v850/uaccess.h
 delete mode 100644 include/asm-v850/ucontext.h
 delete mode 100644 include/asm-v850/unaligned.h
 delete mode 100644 include/asm-v850/unistd.h
 delete mode 100644 include/asm-v850/user.h
 delete mode 100644 include/asm-v850/v850e.h
 delete mode 100644 include/asm-v850/v850e2.h
 delete mode 100644 include/asm-v850/v850e2_cache.h
 delete mode 100644 include/asm-v850/v850e_cache.h
 delete mode 100644 include/asm-v850/v850e_intc.h
 delete mode 100644 include/asm-v850/v850e_timer_c.h
 delete mode 100644 include/asm-v850/v850e_timer_d.h
 delete mode 100644 include/asm-v850/v850e_uart.h
 delete mode 100644 include/asm-v850/v850e_uarta.h
 delete mode 100644 include/asm-v850/v850e_uartb.h
 delete mode 100644 include/asm-v850/v850e_utils.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ffd78c4e277..7e5c7b0290bb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4131,9 +4131,6 @@ W:	http://www.uclinux.org/
 L:	uclinux-dev@uclinux.org  (subscribers-only)
 S:	Maintained
 
-UCLINUX FOR NEC V850
-P:	Miles Bader
-
 UCLINUX FOR RENESAS H8/300
 P:	Yoshinori Sato
 M:	ysato@users.sourceforge.jp
diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig
deleted file mode 100644
index 4379f43505ef..000000000000
--- a/arch/v850/Kconfig
+++ /dev/null
@@ -1,353 +0,0 @@
-#############################################################################
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
-#
-#############################################################################
-
-mainmenu "uClinux/v850 (w/o MMU) Kernel Configuration"
-
-config MMU
-       	bool
-	default n
-config ZONE_DMA
-	bool
-	default y
-config RWSEM_GENERIC_SPINLOCK
-	bool
-	default y
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default n
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-config GENERIC_HWEIGHT
-	bool
-	default y
-config GENERIC_CALIBRATE_DELAY
-	bool
-	default y
-
-config GENERIC_HARDIRQS
-	bool
-	default y
-
-config GENERIC_IRQ_PROBE
-	bool
-	default y
-
-config GENERIC_TIME
-	bool
-	default y
-
-config TIME_LOW_RES
-	bool
-	default y
-
-config ARCH_HAS_ILOG2_U32
-	bool
-	default n
-
-config ARCH_HAS_ILOG2_U64
-	bool
-	default n
-
-config ARCH_SUPPORTS_AOUT
-	def_bool y
-
-# Turn off some random 386 crap that can affect device config
-config ISA
-	bool
-	default n
-config ISAPNP
-	bool
-	default n
-config EISA
-	bool
-	default n
-config MCA
-	bool
-	default n
-
-
-#############################################################################
-#### v850-specific config
-
-# Define the architecture
-config V850
-	bool
-	default y
-	select HAVE_IDE
-
-menu "Processor type and features"
-
-   choice
-	  prompt "Platform"
-	  default GDB
-      config V850E_SIM
-      	     bool "GDB"
-      config RTE_CB_MA1
-      	     bool "RTE-V850E/MA1-CB"
-      config RTE_CB_NB85E
-      	     bool "RTE-V850E/NB85E-CB"
-      config RTE_CB_ME2
-      	     bool "RTE-V850E/ME2-CB"
-      config V850E_AS85EP1
-      	     bool "AS85EP1"
-      config V850E2_SIM85E2C
-      	     bool "sim85e2c"
-      config V850E2_SIM85E2S
-      	     bool "sim85e2s"
-      config V850E2_FPGA85E2C
-      	     bool "NA85E2C-FPGA"
-      config V850E2_ANNA
-      	     bool "Anna"
-   endchoice
-
-   #### V850E processor-specific config
-
-   # All CPUs currently supported use the v850e architecture
-   config V850E
-   	  bool
-	  default y
-
-   # The RTE-V850E/MA1-CB is the only type of V850E/MA1 platform we
-   # currently support
-   config V850E_MA1
-   	  bool
-	  depends on RTE_CB_MA1
-	  default y
-   # Similarly for the RTE-V850E/NB85E-CB - V850E/TEG
-   config V850E_TEG
-   	  bool
-	  depends on RTE_CB_NB85E
-	  default y
-   # ... and the RTE-V850E/ME2-CB - V850E/ME2
-   config V850E_ME2
-   	  bool
-	  depends on RTE_CB_ME2
-	  default y
-
-
-   #### sim85e2-specific config
-
-   config V850E2_SIM85E2
-   	  bool
-	  depends on V850E2_SIM85E2C || V850E2_SIM85E2S
-	  default y
-
-
-   #### V850E2 processor-specific config
-
-   # V850E2 processors
-   config V850E2
-   	  bool
-	  depends on V850E2_SIM85E2 || V850E2_FPGA85E2C || V850E2_ANNA
-	  default y
-
-
-   #### RTE-CB platform-specific config
-
-   # Boards in the RTE-x-CB series
-   config RTE_CB
-   	  bool
-	  depends on RTE_CB_MA1 || RTE_CB_NB85E || RTE_CB_ME2
-	  default y
-
-   config RTE_CB_MULTI
-   	  bool
-	  # RTE_CB_NB85E can either have multi ROM support or not, but
-	  # other platforms (currently only RTE_CB_MA1) require it.
-	  prompt "Multi monitor ROM support" if RTE_CB_NB85E
-	  depends on RTE_CB_MA1 || RTE_CB_NB85E
-	  default y
-
-   config RTE_CB_MULTI_DBTRAP
-   	  bool "Pass illegal insn trap / dbtrap to kernel"
-	  depends on RTE_CB_MULTI
-	  default n
-
-   config RTE_CB_MA1_KSRAM
-   	  bool "Kernel in SRAM (limits size of kernel)"
-	  depends on RTE_CB_MA1 && RTE_CB_MULTI
-	  default n
-
-   config RTE_MB_A_PCI
-   	  bool "Mother-A PCI support"
-	  depends on RTE_CB
-	  default y
-
-   # The GBUS is used to talk to the RTE-MOTHER-A board
-   config RTE_GBUS_INT
-   	  bool
-	  depends on RTE_MB_A_PCI
-	  default y
-
-   # The only PCI bus we support is on the RTE-MOTHER-A board
-   config PCI
-   	  bool
-	  default RTE_MB_A_PCI
-
-   #### Some feature-specific configs
-
-   # Everything except for the GDB simulator uses the same interrupt controller
-   config V850E_INTC
-   	  bool
-	  default !V850E_SIM
-
-   # Everything except for the various simulators uses the "Timer D" unit
-   config V850E_TIMER_D
-   	  bool
-	  default !V850E_SIM && !V850E2_SIM85E2
-
-   # Cache control used on some v850e1 processors
-   config V850E_CACHE
-          bool
-	  default V850E_TEG || V850E_ME2
-
-   # Cache control used on v850e2 processors; I think this should
-   # actually apply to more, but currently only the SIM85E2S uses it
-   config V850E2_CACHE
-   	  bool
-	  default V850E2_SIM85E2S
-
-   config NO_CACHE
-   	  bool
-	  default !V850E_CACHE && !V850E2_CACHE
-
-   # HZ depends on the platform
-   config HZ
-	  int
-	  default 24  if V850E_SIM || V850E2_SIM85E2
-	  default 122 if V850E2_FPGA85E2C
-	  default 100
-
-   #### Misc config
-
-   config ROM_KERNEL
-   	  bool "Kernel in ROM"
-	  depends on V850E2_ANNA || V850E_AS85EP1 || RTE_CB_ME2
-
-   # Some platforms pre-zero memory, in which case the kernel doesn't need to
-   config ZERO_BSS
-   	  bool
-	  depends on !V850E2_SIM85E2C
-	  default y
-
-   # The crappy-ass zone allocator requires that the start of allocatable
-   # memory be aligned to the largest possible allocation.
-   config FORCE_MAX_ZONEORDER
-   	  int
-	  default 8 if V850E2_SIM85E2C || V850E2_FPGA85E2C
-
-   config V850E_HIGHRES_TIMER
-   	  bool "High resolution timer support"
-	  depends on V850E_TIMER_D
-   config TIME_BOOTUP
-   	  bool "Time bootup"
-	  depends on V850E_HIGHRES_TIMER
-
-   config RESET_GUARD
-   	  bool "Reset Guard"
-
-source "mm/Kconfig"
-
-endmenu
-
-
-#############################################################################
-
-source init/Kconfig
-
-#############################################################################
-
-menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
-
-#    config PCI
-# 	   bool "PCI support"
-# 	   help
-# 	     Support for PCI bus.
-
-source "drivers/pci/Kconfig"
-
-source "drivers/pcmcia/Kconfig"
-
-source "drivers/pci/hotplug/Kconfig"
-
-endmenu
-
-menu "Executable file formats"
-
-source "fs/Kconfig.binfmt"
-
-endmenu
-
-source "net/Kconfig"
-
-#############################################################################
-
-source "drivers/base/Kconfig"
-
-source drivers/mtd/Kconfig
-
-source drivers/parport/Kconfig
-
-#source drivers/pnp/Kconfig
-
-source drivers/block/Kconfig
-
-#############################################################################
-
-menu "Disk device support"
-
-source "drivers/ide/Kconfig"
-
-source "drivers/scsi/Kconfig"
-
-endmenu
-
-#############################################################################
-
-
-source "drivers/md/Kconfig"
-
-source "drivers/message/fusion/Kconfig"
-
-source "drivers/ieee1394/Kconfig"
-
-source "drivers/message/i2o/Kconfig"
-
-source "drivers/net/Kconfig"
-
-source "drivers/isdn/Kconfig"
-
-#source "drivers/telephony/Kconfig"
-
-#
-# input before char - char/joystick depends on it. As does USB.
-#
-source "drivers/input/Kconfig"
-
-source "drivers/char/Kconfig"
-
-#source drivers/misc/Config.in
-source "drivers/media/Kconfig"
-
-source "fs/Kconfig"
-
-source "drivers/video/Kconfig"
-
-source "sound/Kconfig"
-
-source "drivers/usb/Kconfig"
-
-source "arch/v850/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
-
-#############################################################################
diff --git a/arch/v850/Kconfig.debug b/arch/v850/Kconfig.debug
deleted file mode 100644
index 4acfb9cca1ca..000000000000
--- a/arch/v850/Kconfig.debug
+++ /dev/null
@@ -1,10 +0,0 @@
-menu "Kernel hacking"
-
-source "lib/Kconfig.debug"
-
-config NO_KERNEL_MSG
-	bool "Suppress Kernel BUG Messages"
-	help
-	  Do not output any debug BUG messages within the kernel.
-
-endmenu
diff --git a/arch/v850/Makefile b/arch/v850/Makefile
deleted file mode 100644
index 8b629df0029a..000000000000
--- a/arch/v850/Makefile
+++ /dev/null
@@ -1,54 +0,0 @@
-#
-# arch/v850/Makefile
-#
-#  Copyright (C) 2001,02,03,05  NEC Corporation
-#  Copyright (C) 2001,02,03,05  Miles Bader <miles@gnu.org>
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" and "archdep" for cleaning up and making dependencies for
-# this architecture
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-arch_dir = arch/v850
-
-KBUILD_CFLAGS += -mv850e
-# r16 is a fixed pointer to the current task
-KBUILD_CFLAGS += -ffixed-r16 -mno-prolog-function
-KBUILD_CFLAGS += -fno-builtin
-KBUILD_CFLAGS += -D__linux__ -DUTS_SYSNAME=\"uClinux\"
-
-# By default, build a kernel that runs on the gdb v850 simulator.
-KBUILD_DEFCONFIG := sim_defconfig
-
-# This prevents the linker from consolidating the .gnu.linkonce.this_module
-# section into .text (which the v850 default linker script for -r does for
-# some reason)
-LDFLAGS_MODULE += --unique=.gnu.linkonce.this_module
-
-OBJCOPY_FLAGS_BLOB := -I binary -O elf32-little -B v850e
-
-
-head-y := $(arch_dir)/kernel/head.o $(arch_dir)/kernel/init_task.o
-core-y += $(arch_dir)/kernel/
-libs-y += $(arch_dir)/lib/
-
-
-# Deal with the initial contents of the root device
-ifdef ROOT_FS_IMAGE
-core-y += root_fs_image.o
-
-# Because the kernel build-system erases all explicit .o build rules, we
-# have to use an intermediate target to fool it into building for us.
-# This results in it being built anew each time, but that's alright.
-root_fs_image.o: root_fs_image_force
-
-root_fs_image_force: $(ROOT_FS_IMAGE)
-	$(OBJCOPY) $(OBJCOPY_FLAGS_BLOB) --rename-section .data=.root,alloc,load,readonly,data,contents $< root_fs_image.o
-endif
-
-CLEAN_FILES += root_fs_image.o
diff --git a/arch/v850/README b/arch/v850/README
deleted file mode 100644
index 12f7f7a665e0..000000000000
--- a/arch/v850/README
+++ /dev/null
@@ -1,44 +0,0 @@
-This port to the NEC V850E processor supports the following platforms:
-
-   "sim"
-	The gdb v850e simulator (CONFIG_V850E_SIM).
-
-   "rte-ma1-cb"
-	The Midas labs RTE-V850E/MA1-CB and RTE-V850E/NB85E-CB evaluation
-	boards (CONFIG_RTE_CB_MA1 and CONFIG_RTE_CB_NB85E).  This support
-	has only been tested when running with the Multi-debugger monitor
-	ROM (for the Green Hills Multi debugger).  The optional NEC
-	Solution Gear RTE-MOTHER-A motherboard is also supported, which
-	allows PCI boards to be used (CONFIG_RTE_MB_A_PCI).
-
-   "rte-me2-cb"
-	The Midas labs RTE-V850E/ME2-CB evaluation board (CONFIG_RTE_CB_ME2).
-     	This has only been tested using a kernel downloaded via an ICE
-     	connection using the Multi debugger.  Support for the RTE-MOTHER-A is
-     	present, but hasn't been tested (unlike the other Midas labs cpu
-     	boards, the RTE-V850E/ME2-CB includes an ethernet adaptor).
-
-   "as85ep1"
-	The NEC AS85EP1 V850E evaluation chip/board (CONFIG_V850E_AS85EP1).
-
-   "anna"
-	The NEC `Anna' (board/chip) implementation of the V850E2 processor
-	(CONFIG_V850E2_ANNA).
-
-   "sim85e2c", "sim85e2s"
-   	The sim85e2c and sim85e2s simulators, which are verilog simulations
-	of the V850E2 NA85E2C/NA85E2S cpu cores (CONFIG_V850E2_SIM85E2C and
-	CONFIG_V850E2_SIM85E2S).
-
-   "fpga85e2c"
-	A FPGA implementation of the V850E2 NA85E2C cpu core
-	(CONFIG_V850E2_FPGA85E2C).
-
-To get a default kernel configuration for a particular platform, you can
-use a <platform>_defconfig make target (e.g., "make rte-me2-cb_defconfig");
-to see which default configurations are possible, look in the directory
-"arch/v850/configs".
-
-Porting to anything with a V850E/MA1 or MA2 processor should be simple.
-See the file <asm-v850/machdep.h> and the files it includes for an example of
-how to add platform/chip-specific support.
diff --git a/arch/v850/configs/rte-ma1-cb_defconfig b/arch/v850/configs/rte-ma1-cb_defconfig
deleted file mode 100644
index 1a5beda36e29..000000000000
--- a/arch/v850/configs/rte-ma1-cb_defconfig
+++ /dev/null
@@ -1,617 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-uc0
-# Fri Sep  2 13:54:27 2005
-#
-# CONFIG_MMU is not set
-# CONFIG_UID16 is not set
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-# CONFIG_ISA is not set
-# CONFIG_ISAPNP is not set
-# CONFIG_EISA is not set
-# CONFIG_MCA is not set
-CONFIG_V850=y
-
-#
-# Processor type and features
-#
-# CONFIG_V850E_SIM is not set
-CONFIG_RTE_CB_MA1=y
-# CONFIG_RTE_CB_NB85E is not set
-# CONFIG_RTE_CB_ME2 is not set
-# CONFIG_V850E_AS85EP1 is not set
-# CONFIG_V850E2_SIM85E2C is not set
-# CONFIG_V850E2_SIM85E2S is not set
-# CONFIG_V850E2_FPGA85E2C is not set
-# CONFIG_V850E2_ANNA is not set
-CONFIG_V850E=y
-CONFIG_V850E_MA1=y
-CONFIG_RTE_CB=y
-CONFIG_RTE_CB_MULTI=y
-CONFIG_RTE_CB_MULTI_DBTRAP=y
-# CONFIG_RTE_CB_MA1_KSRAM is not set
-CONFIG_RTE_MB_A_PCI=y
-CONFIG_RTE_GBUS_INT=y
-CONFIG_PCI=y
-CONFIG_V850E_INTC=y
-CONFIG_V850E_TIMER_D=y
-# CONFIG_V850E_CACHE is not set
-# CONFIG_V850E2_CACHE is not set
-CONFIG_NO_CACHE=y
-CONFIG_ZERO_BSS=y
-# CONFIG_V850E_HIGHRES_TIMER is not set
-# CONFIG_RESET_GUARD is not set
-CONFIG_LARGE_ALLOCS=y
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-
-#
-# Code maturity level options
-#
-# CONFIG_EXPERIMENTAL is not set
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_SYSCTL is not set
-# CONFIG_AUDIT is not set
-# CONFIG_HOTPLUG is not set
-CONFIG_KOBJECT_UEVENT=y
-# CONFIG_IKCONFIG is not set
-CONFIG_EMBEDDED=y
-# CONFIG_KALLSYMS is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-# CONFIG_BASE_FULL is not set
-# CONFIG_FUTEX is not set
-# CONFIG_EPOLL is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_BASE_SMALL=1
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-
-#
-# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
-#
-# CONFIG_PCI_LEGACY_PROC is not set
-# CONFIG_PCI_NAMES is not set
-# CONFIG_PCI_DEBUG is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# PCI Hotplug Support
-#
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_FLAT=y
-# CONFIG_BINFMT_ZFLAT is not set
-# CONFIG_BINFMT_SHARED_FLAT is not set
-# CONFIG_BINFMT_MISC is not set
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_PACKET is not set
-# CONFIG_UNIX is not set
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-# CONFIG_IP_TCPDIAG is not set
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-# CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-CONFIG_MTD=y
-# CONFIG_MTD_DEBUG is not set
-# CONFIG_MTD_CONCAT is not set
-# CONFIG_MTD_PARTITIONS is not set
-
-#
-# User Modules And Translation Layers
-#
-# CONFIG_MTD_CHAR is not set
-CONFIG_MTD_BLOCK=y
-# CONFIG_FTL is not set
-# CONFIG_NFTL is not set
-# CONFIG_INFTL is not set
-
-#
-# RAM/ROM/Flash chip drivers
-#
-# CONFIG_MTD_CFI is not set
-# CONFIG_MTD_JEDECPROBE is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
-# CONFIG_MTD_RAM is not set
-# CONFIG_MTD_ROM is not set
-# CONFIG_MTD_ABSENT is not set
-
-#
-# Mapping drivers for chip access
-#
-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
-# CONFIG_MTD_PLATRAM is not set
-
-#
-# Self-contained MTD device drivers
-#
-# CONFIG_MTD_PMC551 is not set
-CONFIG_MTD_SLRAM=y
-# CONFIG_MTD_PHRAM is not set
-# CONFIG_MTD_MTDRAM is not set
-# CONFIG_MTD_BLKMTD is not set
-
-#
-# Disk-On-Chip Device Drivers
-#
-# CONFIG_MTD_DOC2000 is not set
-# CONFIG_MTD_DOC2001 is not set
-# CONFIG_MTD_DOC2001PLUS is not set
-
-#
-# NAND Flash Device Drivers
-#
-# CONFIG_MTD_NAND is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-# CONFIG_BLK_DEV_NBD is not set
-# CONFIG_BLK_DEV_SX8 is not set
-# CONFIG_BLK_DEV_RAM is not set
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# Disk device support
-#
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-# CONFIG_I2O is not set
-
-#
-# Network device support
-#
-CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_HAPPYMEAL is not set
-# CONFIG_SUNGEM is not set
-# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_NET_VENDOR_SMC is not set
-
-#
-# Tulip family network device support
-#
-# CONFIG_NET_TULIP is not set
-# CONFIG_HP100 is not set
-# CONFIG_NE2000 is not set
-CONFIG_NET_PCI=y
-# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
-# CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_DGRS is not set
-CONFIG_EEPRO100=y
-# CONFIG_E100 is not set
-# CONFIG_FEALNX is not set
-# CONFIG_NATSEMI is not set
-# CONFIG_NE2K_PCI is not set
-# CONFIG_8139TOO is not set
-# CONFIG_SIS900 is not set
-# CONFIG_EPIC100 is not set
-# CONFIG_SUNDANCE is not set
-# CONFIG_TLAN is not set
-# CONFIG_VIA_RHINE is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
-# CONFIG_E1000 is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_R8169 is not set
-# CONFIG_SK98LIN is not set
-# CONFIG_VIA_VELOCITY is not set
-# CONFIG_TIGON3 is not set
-# CONFIG_BNX2 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
-# CONFIG_IXGB is not set
-# CONFIG_S2IO is not set
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-# CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_V850E_UART=y
-CONFIG_V850E_UART_CONSOLE=y
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
-# CONFIG_UNIX98_PTYS is not set
-# CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_DRM is not set
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_JBD is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-
-#
-# XFS support
-#
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-CONFIG_ROMFS_FS=y
-# CONFIG_MAGIC_ROM_PTR is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_JFFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
-# CONFIG_NFSD is not set
-CONFIG_LOCKD=y
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=y
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
-# CONFIG_USB is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_MAGIC_SYSRQ is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_FS is not set
-# CONFIG_NO_KERNEL_MSG is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Hardware crypto devices
-#
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
diff --git a/arch/v850/configs/rte-me2-cb_defconfig b/arch/v850/configs/rte-me2-cb_defconfig
deleted file mode 100644
index 15e666478061..000000000000
--- a/arch/v850/configs/rte-me2-cb_defconfig
+++ /dev/null
@@ -1,462 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-uc0
-# Fri Sep  2 13:47:50 2005
-#
-# CONFIG_MMU is not set
-# CONFIG_UID16 is not set
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-# CONFIG_ISA is not set
-# CONFIG_ISAPNP is not set
-# CONFIG_EISA is not set
-# CONFIG_MCA is not set
-CONFIG_V850=y
-
-#
-# Processor type and features
-#
-# CONFIG_V850E_SIM is not set
-# CONFIG_RTE_CB_MA1 is not set
-# CONFIG_RTE_CB_NB85E is not set
-CONFIG_RTE_CB_ME2=y
-# CONFIG_V850E_AS85EP1 is not set
-# CONFIG_V850E2_SIM85E2C is not set
-# CONFIG_V850E2_SIM85E2S is not set
-# CONFIG_V850E2_FPGA85E2C is not set
-# CONFIG_V850E2_ANNA is not set
-CONFIG_V850E=y
-CONFIG_V850E_ME2=y
-CONFIG_RTE_CB=y
-# CONFIG_RTE_MB_A_PCI is not set
-# CONFIG_PCI is not set
-CONFIG_V850E_INTC=y
-CONFIG_V850E_TIMER_D=y
-CONFIG_V850E_CACHE=y
-# CONFIG_V850E2_CACHE is not set
-# CONFIG_NO_CACHE is not set
-# CONFIG_ROM_KERNEL is not set
-CONFIG_ZERO_BSS=y
-# CONFIG_V850E_HIGHRES_TIMER is not set
-# CONFIG_RESET_GUARD is not set
-CONFIG_LARGE_ALLOCS=y
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-
-#
-# Code maturity level options
-#
-# CONFIG_EXPERIMENTAL is not set
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_SYSCTL is not set
-# CONFIG_HOTPLUG is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_EMBEDDED=y
-# CONFIG_KALLSYMS is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-# CONFIG_BASE_FULL is not set
-# CONFIG_FUTEX is not set
-# CONFIG_EPOLL is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_BASE_SMALL=1
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-
-#
-# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
-#
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# PCI Hotplug Support
-#
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_FLAT=y
-# CONFIG_BINFMT_ZFLAT is not set
-# CONFIG_BINFMT_SHARED_FLAT is not set
-# CONFIG_BINFMT_MISC is not set
-
-#
-# Networking
-#
-# CONFIG_NET is not set
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-CONFIG_MTD=y
-# CONFIG_MTD_DEBUG is not set
-# CONFIG_MTD_CONCAT is not set
-# CONFIG_MTD_PARTITIONS is not set
-
-#
-# User Modules And Translation Layers
-#
-# CONFIG_MTD_CHAR is not set
-CONFIG_MTD_BLOCK=y
-# CONFIG_FTL is not set
-# CONFIG_NFTL is not set
-# CONFIG_INFTL is not set
-
-#
-# RAM/ROM/Flash chip drivers
-#
-# CONFIG_MTD_CFI is not set
-# CONFIG_MTD_JEDECPROBE is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
-# CONFIG_MTD_RAM is not set
-# CONFIG_MTD_ROM is not set
-# CONFIG_MTD_ABSENT is not set
-
-#
-# Mapping drivers for chip access
-#
-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
-# CONFIG_MTD_PLATRAM is not set
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_SLRAM=y
-# CONFIG_MTD_PHRAM is not set
-# CONFIG_MTD_MTDRAM is not set
-# CONFIG_MTD_BLKMTD is not set
-
-#
-# Disk-On-Chip Device Drivers
-#
-# CONFIG_MTD_DOC2000 is not set
-# CONFIG_MTD_DOC2001 is not set
-# CONFIG_MTD_DOC2001PLUS is not set
-
-#
-# NAND Flash Device Drivers
-#
-# CONFIG_MTD_NAND is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-# CONFIG_BLK_DEV_RAM is not set
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-
-#
-# Disk device support
-#
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Network device support
-#
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_LIBPS2 is not set
-# CONFIG_SERIO_RAW is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=1
-# CONFIG_SERIAL_8250_EXTENDED is not set
-
-#
-# Non-8250 serial port support
-#
-# CONFIG_V850E_UART is not set
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
-# CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_JBD is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-
-#
-# XFS support
-#
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-CONFIG_ROMFS_FS=y
-# CONFIG_MAGIC_ROM_PTR is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_JFFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_MAGIC_SYSRQ is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_FS is not set
-# CONFIG_NO_KERNEL_MSG is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Hardware crypto devices
-#
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
diff --git a/arch/v850/configs/sim_defconfig b/arch/v850/configs/sim_defconfig
deleted file mode 100644
index f31ba7398ad0..000000000000
--- a/arch/v850/configs/sim_defconfig
+++ /dev/null
@@ -1,451 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-uc0
-# Fri Sep  2 13:36:43 2005
-#
-# CONFIG_MMU is not set
-# CONFIG_UID16 is not set
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-# CONFIG_ISA is not set
-# CONFIG_ISAPNP is not set
-# CONFIG_EISA is not set
-# CONFIG_MCA is not set
-CONFIG_V850=y
-
-#
-# Processor type and features
-#
-CONFIG_V850E_SIM=y
-# CONFIG_RTE_CB_MA1 is not set
-# CONFIG_RTE_CB_NB85E is not set
-# CONFIG_RTE_CB_ME2 is not set
-# CONFIG_V850E_AS85EP1 is not set
-# CONFIG_V850E2_SIM85E2C is not set
-# CONFIG_V850E2_SIM85E2S is not set
-# CONFIG_V850E2_FPGA85E2C is not set
-# CONFIG_V850E2_ANNA is not set
-CONFIG_V850E=y
-# CONFIG_PCI is not set
-# CONFIG_V850E_INTC is not set
-# CONFIG_V850E_TIMER_D is not set
-# CONFIG_V850E_CACHE is not set
-# CONFIG_V850E2_CACHE is not set
-CONFIG_NO_CACHE=y
-CONFIG_ZERO_BSS=y
-# CONFIG_RESET_GUARD is not set
-CONFIG_LARGE_ALLOCS=y
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-
-#
-# Code maturity level options
-#
-# CONFIG_EXPERIMENTAL is not set
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_SYSCTL is not set
-# CONFIG_HOTPLUG is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_EMBEDDED=y
-# CONFIG_KALLSYMS is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-# CONFIG_BASE_FULL is not set
-# CONFIG_FUTEX is not set
-# CONFIG_EPOLL is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_BASE_SMALL=1
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-
-#
-# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
-#
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# PCI Hotplug Support
-#
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_FLAT=y
-# CONFIG_BINFMT_ZFLAT is not set
-# CONFIG_BINFMT_SHARED_FLAT is not set
-# CONFIG_BINFMT_MISC is not set
-
-#
-# Networking
-#
-# CONFIG_NET is not set
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-CONFIG_MTD=y
-# CONFIG_MTD_DEBUG is not set
-# CONFIG_MTD_CONCAT is not set
-# CONFIG_MTD_PARTITIONS is not set
-
-#
-# User Modules And Translation Layers
-#
-# CONFIG_MTD_CHAR is not set
-CONFIG_MTD_BLOCK=y
-# CONFIG_FTL is not set
-# CONFIG_NFTL is not set
-# CONFIG_INFTL is not set
-
-#
-# RAM/ROM/Flash chip drivers
-#
-# CONFIG_MTD_CFI is not set
-# CONFIG_MTD_JEDECPROBE is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
-# CONFIG_MTD_RAM is not set
-# CONFIG_MTD_ROM is not set
-# CONFIG_MTD_ABSENT is not set
-
-#
-# Mapping drivers for chip access
-#
-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
-# CONFIG_MTD_PLATRAM is not set
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_SLRAM=y
-# CONFIG_MTD_PHRAM is not set
-# CONFIG_MTD_MTDRAM is not set
-# CONFIG_MTD_BLKMTD is not set
-
-#
-# Disk-On-Chip Device Drivers
-#
-# CONFIG_MTD_DOC2000 is not set
-# CONFIG_MTD_DOC2001 is not set
-# CONFIG_MTD_DOC2001PLUS is not set
-
-#
-# NAND Flash Device Drivers
-#
-# CONFIG_MTD_NAND is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-# CONFIG_BLK_DEV_RAM is not set
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-
-#
-# Disk device support
-#
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Network device support
-#
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_LIBPS2 is not set
-# CONFIG_SERIO_RAW is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-# CONFIG_UNIX98_PTYS is not set
-# CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_JBD is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-
-#
-# XFS support
-#
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-CONFIG_ROMFS_FS=y
-# CONFIG_MAGIC_ROM_PTR is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_JFFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_MAGIC_SYSRQ is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_FS is not set
-# CONFIG_NO_KERNEL_MSG is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Hardware crypto devices
-#
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
diff --git a/arch/v850/kernel/Makefile b/arch/v850/kernel/Makefile
deleted file mode 100644
index da5889c53576..000000000000
--- a/arch/v850/kernel/Makefile
+++ /dev/null
@@ -1,40 +0,0 @@
-#
-# arch/v850/kernel/Makefile
-#
-#  Copyright (C) 2001,02,03  NEC Electronics Corporation
-#  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-extra-y := head.o init_task.o vmlinux.lds
-
-obj-y += intv.o entry.o process.o syscalls.o time.o setup.o \
-	 signal.o irq.o mach.o ptrace.o bug.o
-obj-$(CONFIG_MODULES)		+= module.o v850_ksyms.o
-# chip-specific code
-obj-$(CONFIG_V850E_MA1)		+= ma.o
-obj-$(CONFIG_V850E_ME2)		+= me2.o
-obj-$(CONFIG_V850E_TEG)		+= teg.o
-obj-$(CONFIG_V850E_AS85EP1)	+= as85ep1.o
-obj-$(CONFIG_V850E2_ANNA)	+= anna.o
-# platform-specific code
-obj-$(CONFIG_V850E_SIM)		+= sim.o simcons.o
-obj-$(CONFIG_V850E2_SIM85E2)	+= sim85e2.o memcons.o
-obj-$(CONFIG_V850E2_FPGA85E2C)	+= fpga85e2c.o memcons.o
-obj-$(CONFIG_RTE_CB)		+= rte_cb.o rte_cb_leds.o
-obj-$(CONFIG_RTE_CB_MA1)	+= rte_ma1_cb.o
-obj-$(CONFIG_RTE_CB_ME2)	+= rte_me2_cb.o
-obj-$(CONFIG_RTE_CB_NB85E)	+= rte_nb85e_cb.o
-obj-$(CONFIG_RTE_CB_MULTI)	+= rte_cb_multi.o
-obj-$(CONFIG_RTE_MB_A_PCI)	+= rte_mb_a_pci.o
-obj-$(CONFIG_RTE_GBUS_INT)	+= gbus_int.o
-# feature-specific code
-obj-$(CONFIG_V850E_INTC)	+= v850e_intc.o
-obj-$(CONFIG_V850E_TIMER_D)	+= v850e_timer_d.o v850e_utils.o
-obj-$(CONFIG_V850E_CACHE)	+= v850e_cache.o
-obj-$(CONFIG_V850E2_CACHE)	+= v850e2_cache.o
-obj-$(CONFIG_V850E_HIGHRES_TIMER) += highres_timer.o
-obj-$(CONFIG_PROC_FS)		+= procfs.o
diff --git a/arch/v850/kernel/anna-rom.ld b/arch/v850/kernel/anna-rom.ld
deleted file mode 100644
index 7c54e7e3f1b1..000000000000
--- a/arch/v850/kernel/anna-rom.ld
+++ /dev/null
@@ -1,16 +0,0 @@
-/* Linker script for the Midas labs Anna V850E2 evaluation board
-   (CONFIG_V850E2_ANNA), with kernel in ROM (CONFIG_ROM_KERNEL).  */
-
-MEMORY {
-	/* 8MB of flash ROM.  */
-	ROM   : ORIGIN = 0,          LENGTH = 0x00800000
-
-	/* 1MB of static RAM.  This memory is mirrored 64 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-	/* 64MB of DRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	ROMK_SECTIONS(ROM, SRAM)
-}
diff --git a/arch/v850/kernel/anna.c b/arch/v850/kernel/anna.c
deleted file mode 100644
index 5978a25170fb..000000000000
--- a/arch/v850/kernel/anna.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * arch/v850/kernel/anna.c -- Anna V850E2 evaluation chip/board
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/major.h>
-#include <linux/irq.h>
-
-#include <asm/machdep.h>
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/v850e_timer_d.h>
-#include <asm/v850e_uart.h>
-
-#include "mach.h"
-
-
-/* SRAM and SDRAM are vaguely contiguous (with a big hole in between; see
-   mach_reserve_bootmem for details); use both as one big area.  */
-#define RAM_START 	SRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-
-/* The bits of this port are connected to an 8-LED bar-graph.  */
-#define LEDS_PORT	0
-
-
-static void anna_led_tick (void);
-
-
-void __init mach_early_init (void)
-{
-	ANNA_ILBEN    = 0;
-
-	V850E2_CSC(0) = 0x402F;
-	V850E2_CSC(1) = 0x4000;
-	V850E2_BPC    = 0;
-	V850E2_BSC    = 0xAAAA;
-	V850E2_BEC    = 0;
-
-#if 0
-	V850E2_BHC    = 0xFFFF;	/* icache all memory, dcache all */
-#else
-	V850E2_BHC    = 0;	/* cache no memory */
-#endif
-	V850E2_BCT(0) = 0xB088;
-	V850E2_BCT(1) = 0x0008;
-	V850E2_DWC(0) = 0x0027;
-	V850E2_DWC(1) = 0;
-	V850E2_BCC    = 0x0006;
-	V850E2_ASC    = 0;
-	V850E2_LBS    = 0x0089;
-	V850E2_SCR(3) = 0x21A9;
-	V850E2_RFS(3) = 0x8121;
-
-	v850e_intc_disable_irqs ();
-}
-
-void __init mach_setup (char **cmdline)
-{
-	ANNA_PORT_PM (LEDS_PORT) = 0;	/* Make all LED pins output pins.  */
-	mach_tick = anna_led_tick;
-}
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-void __init mach_reserve_bootmem ()
-{
-	/* The space between SRAM and SDRAM is filled with duplicate
-	   images of SRAM.  Prevent the kernel from using them.  */
-	reserve_bootmem (SRAM_ADDR + SRAM_SIZE,
-			 SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE),
-			 BOOTMEM_DEFAULT);
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, 		NUM_MACH_IRQS,	1, 7 },
-	{ "PIN", IRQ_INTP(0),   IRQ_INTP_NUM,   1, 4 },
-	{ "CCC", IRQ_INTCCC(0),	IRQ_INTCCC_NUM, 1, 5 },
-	{ "CMD", IRQ_INTCMD(0), IRQ_INTCMD_NUM,	1, 5 },
-	{ "DMA", IRQ_INTDMA(0), IRQ_INTDMA_NUM,	1, 2 },
-	{ "DMXER", IRQ_INTDMXER,1,		1, 2 },
-	{ "SRE", IRQ_INTSRE(0), IRQ_INTSRE_NUM,	3, 3 },
-	{ "SR",	 IRQ_INTSR(0),	IRQ_INTSR_NUM, 	3, 4 },
-	{ "ST",  IRQ_INTST(0), 	IRQ_INTST_NUM, 	3, 5 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-void __init mach_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-void machine_restart (char *__unused)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	asm ("jmp r0"); /* Jump to the reset vector.  */
-}
-
-void machine_halt (void)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	local_irq_disable ();	/* Ignore all interrupts.  */
-	ANNA_PORT_IO(LEDS_PORT) = 0xAA;	/* Note that we halted.  */
-	for (;;)
-		asm ("halt; nop; nop; nop; nop; nop");
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
-/* Called before configuring an on-chip UART.  */
-void anna_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	/* The Anna connects some general-purpose I/O pins on the CPU to
-	   the RTS/CTS lines of UART 1's serial connection.  I/O pins P07
-	   and P37 are RTS and CTS respectively.  */
-	if (chan == 1) {
-		ANNA_PORT_PM(0) &= ~0x80; /* P07 in output mode */
-		ANNA_PORT_PM(3) |=  0x80; /* P37 in input mode */
-	}
-}
-
-/* Minimum and maximum bounds for the moving upper LED boundary in the
-   clock tick display.  We can't use the last bit because it's used for
-   UART0's CTS output.  */
-#define MIN_MAX_POS 0
-#define MAX_MAX_POS 6
-
-/* There are MAX_MAX_POS^2 - MIN_MAX_POS^2 cycles in the animation, so if
-   we pick 6 and 0 as above, we get 49 cycles, which is when divided into
-   the standard 100 value for HZ, gives us an almost 1s total time.  */
-#define TICKS_PER_FRAME \
-	(HZ / (MAX_MAX_POS * MAX_MAX_POS - MIN_MAX_POS * MIN_MAX_POS))
-
-static void anna_led_tick ()
-{
-	static unsigned counter = 0;
-	
-	if (++counter == TICKS_PER_FRAME) {
-		static int pos = 0, max_pos = MAX_MAX_POS, dir = 1;
-
-		if (dir > 0 && pos == max_pos) {
-			dir = -1;
-			if (max_pos == MIN_MAX_POS)
-				max_pos = MAX_MAX_POS;
-			else
-				max_pos--;
-		} else {
-			if (dir < 0 && pos == 0)
-				dir = 1;
-
-			if (pos + dir <= max_pos) {
-				/* Each bit of port 0 has a LED. */
-				clear_bit (pos, &ANNA_PORT_IO(LEDS_PORT));
-				pos += dir;
-				set_bit (pos, &ANNA_PORT_IO(LEDS_PORT));
-			}
-		}
-
-		counter = 0;
-	}
-}
diff --git a/arch/v850/kernel/anna.ld b/arch/v850/kernel/anna.ld
deleted file mode 100644
index df7f80f2833d..000000000000
--- a/arch/v850/kernel/anna.ld
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Linker script for the Midas labs Anna V850E2 evaluation board
-   (CONFIG_V850E2_ANNA).  */
-
-MEMORY {
-	/* 256KB of internal memory (followed by one mirror).  */
-	iMEM0 : ORIGIN = 0,	     LENGTH = 0x00040000
-	/* 256KB of internal memory (followed by one mirror).  */
-	iMEM1 : ORIGIN = 0x00040000, LENGTH = 0x00040000
-
-	/* 1MB of static RAM.  This memory is mirrored 64 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-	/* 64MB of DRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	.intv : { INTV_CONTENTS } > iMEM0
-	.sram : { RAMK_KRAM_CONTENTS } > SRAM
-	.root : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/as85ep1-rom.ld b/arch/v850/kernel/as85ep1-rom.ld
deleted file mode 100644
index fe2a9a3ab525..000000000000
--- a/arch/v850/kernel/as85ep1-rom.ld
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Linker script for the NEC AS85EP1 V850E evaluation board
-   (CONFIG_V850E_AS85EP1), with kernel in ROM (CONFIG_ROM_KERNEL).  */
-
-MEMORY {
-	/* 4MB of flash ROM.  */
-	ROM   : ORIGIN = 0,          LENGTH = 0x00400000
-
-	/* 1MB of static RAM.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-
-	/* About 58MB of DRAM.  This can actually be at one of two
-	   positions, determined by jumper JP3; we have to use the first
-	   position because the second is partially out of processor
-	   instruction addressing range (though in the second position
-	   there's actually 64MB available).  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	ROMK_SECTIONS(ROM, SRAM)
-}
diff --git a/arch/v850/kernel/as85ep1.c b/arch/v850/kernel/as85ep1.c
deleted file mode 100644
index b525ecf3aea4..000000000000
--- a/arch/v850/kernel/as85ep1.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * arch/v850/kernel/as85ep1.c -- AS85EP1 V850E evaluation chip/board
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/major.h>
-#include <linux/irq.h>
-
-#include <asm/machdep.h>
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/v850e_timer_d.h>
-#include <asm/v850e_uart.h>
-
-#include "mach.h"
-
-
-/* SRAM and SDRAM are vaguely contiguous (with a big hole in between; see
-   mach_reserve_bootmem for details); use both as one big area.  */
-#define RAM_START 	SRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-
-/* The bits of this port are connected to an 8-LED bar-graph.  */
-#define LEDS_PORT	4
-
-
-static void as85ep1_led_tick (void);
-
-extern char _intv_copy_src_start, _intv_copy_src_end;
-extern char _intv_copy_dst_start;
-
-
-void __init mach_early_init (void)
-{
-#ifndef CONFIG_ROM_KERNEL
-	const u32 *src;
-	register u32 *dst asm ("ep");
-#endif
-
-	AS85EP1_CSC(0) = 0x0403;
-	AS85EP1_BCT(0) = 0xB8B8;
-	AS85EP1_DWC(0) = 0x0104;
-	AS85EP1_BCC    = 0x0012;
-	AS85EP1_ASC    = 0;
-	AS85EP1_LBS    = 0x00A9;
-
-	AS85EP1_PORT_PMC(6)  = 0xFF; /* valid A0,A1,A20-A25 */
-	AS85EP1_PORT_PMC(7)  = 0x0E; /* valid CS1-CS3       */
-	AS85EP1_PORT_PMC(9)  = 0xFF; /* valid D16-D23       */
-	AS85EP1_PORT_PMC(10) = 0xFF; /* valid D24-D31       */
-
-	AS85EP1_RFS(1) = 0x800c;
-	AS85EP1_RFS(3) = 0x800c;
-	AS85EP1_SCR(1) = 0x20A9;
-	AS85EP1_SCR(3) = 0x20A9;
-
-#ifndef CONFIG_ROM_KERNEL
-	/* The early chip we have is buggy, and writing the interrupt
-	   vectors into low RAM may screw up, so for non-ROM kernels, we
-	   only rely on the reset vector being downloaded, and copy the
-	   rest of the interrupt vectors into place here.  The specific bug
-	   is that writing address N, where (N & 0x10) == 0x10, will _also_
-	   write to address (N - 0x10).  We avoid this (effectively) by
-	   writing in 16-byte chunks backwards from the end.  */
-
-	AS85EP1_IRAMM = 0x3;	/* "write-mode" for the internal instruction memory */
-
-	src = (u32 *)(((u32)&_intv_copy_src_end - 1) & ~0xF);
-	dst = (u32 *)&_intv_copy_dst_start
-		+ (src - (u32 *)&_intv_copy_src_start);
-	do {
-		u32 t0 = src[0], t1 = src[1], t2 = src[2], t3 = src[3];
-		dst[0] = t0; dst[1] = t1; dst[2] = t2; dst[3] = t3;
-		dst -= 4;
-		src -= 4;
-	} while (src > (u32 *)&_intv_copy_src_start);
-
-	AS85EP1_IRAMM = 0x0;	/* "read-mode" for the internal instruction memory */
-#endif /* !CONFIG_ROM_KERNEL */
-
-	v850e_intc_disable_irqs ();
-}
-
-void __init mach_setup (char **cmdline)
-{
-	AS85EP1_PORT_PMC (LEDS_PORT) = 0; /* Make the LEDs port an I/O port. */
-	AS85EP1_PORT_PM (LEDS_PORT) = 0; /* Make all the bits output pins.  */
-	mach_tick = as85ep1_led_tick;
-}
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-/* Convenience macros.  */
-#define SRAM_END	(SRAM_ADDR + SRAM_SIZE)
-#define SDRAM_END	(SDRAM_ADDR + SDRAM_SIZE)
-
-void __init mach_reserve_bootmem ()
-{
-	if (SDRAM_ADDR < RAM_END && SDRAM_ADDR > RAM_START)
-		/* We can't use the space between SRAM and SDRAM, so
-		   prevent the kernel from trying.  */
-		reserve_bootmem(SRAM_END, SDRAM_ADDR - SRAM_END,
-				BOOTMEM_DEFAULT);
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, 		NUM_MACH_IRQS,	1, 7 },
-	{ "CCC", IRQ_INTCCC(0),	IRQ_INTCCC_NUM, 1, 5 },
-	{ "CMD", IRQ_INTCMD(0), IRQ_INTCMD_NUM,	1, 5 },
-	{ "SRE", IRQ_INTSRE(0), IRQ_INTSRE_NUM,	3, 3 },
-	{ "SR",	 IRQ_INTSR(0),	IRQ_INTSR_NUM, 	3, 4 },
-	{ "ST",  IRQ_INTST(0), 	IRQ_INTST_NUM, 	3, 5 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-void __init mach_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-void machine_restart (char *__unused)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	asm ("jmp r0"); /* Jump to the reset vector.  */
-}
-
-void machine_halt (void)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	local_irq_disable ();	/* Ignore all interrupts.  */
-	AS85EP1_PORT_IO (LEDS_PORT) = 0xAA;	/* Note that we halted.  */
-	for (;;)
-		asm ("halt; nop; nop; nop; nop; nop");
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
-/* Called before configuring an on-chip UART.  */
-void as85ep1_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	/* Make the shared uart/port pins be uart pins.  */
-	AS85EP1_PORT_PMC(3) |= (0x5 << chan);
-
-	/* The AS85EP1 connects some general-purpose I/O pins on the CPU to
-	   the RTS/CTS lines of UART 1's serial connection.  I/O pins P53
-	   and P54 are RTS and CTS respectively.  */
-	if (chan == 1) {
-		/* Put P53 & P54 in I/O port mode.  */
-		AS85EP1_PORT_PMC(5) &= ~0x18;
-		/* Make P53 an output, and P54 an input.  */
-		AS85EP1_PORT_PM(5) |=  0x10;
-	}
-}
-
-/* Minimum and maximum bounds for the moving upper LED boundary in the
-   clock tick display.  */
-#define MIN_MAX_POS 0
-#define MAX_MAX_POS 7
-
-/* There are MAX_MAX_POS^2 - MIN_MAX_POS^2 cycles in the animation, so if
-   we pick 6 and 0 as above, we get 49 cycles, which is when divided into
-   the standard 100 value for HZ, gives us an almost 1s total time.  */
-#define TICKS_PER_FRAME \
-	(HZ / (MAX_MAX_POS * MAX_MAX_POS - MIN_MAX_POS * MIN_MAX_POS))
-
-static void as85ep1_led_tick ()
-{
-	static unsigned counter = 0;
-	
-	if (++counter == TICKS_PER_FRAME) {
-		static int pos = 0, max_pos = MAX_MAX_POS, dir = 1;
-
-		if (dir > 0 && pos == max_pos) {
-			dir = -1;
-			if (max_pos == MIN_MAX_POS)
-				max_pos = MAX_MAX_POS;
-			else
-				max_pos--;
-		} else {
-			if (dir < 0 && pos == 0)
-				dir = 1;
-
-			if (pos + dir <= max_pos) {
-				/* Each bit of port 0 has a LED. */
-				set_bit (pos, &AS85EP1_PORT_IO(LEDS_PORT));
-				pos += dir;
-				clear_bit (pos, &AS85EP1_PORT_IO(LEDS_PORT));
-			}
-		}
-
-		counter = 0;
-	}
-}
diff --git a/arch/v850/kernel/as85ep1.ld b/arch/v850/kernel/as85ep1.ld
deleted file mode 100644
index ef2c4399063e..000000000000
--- a/arch/v850/kernel/as85ep1.ld
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Linker script for the NEC AS85EP1 V850E evaluation board
-   (CONFIG_V850E_AS85EP1).  */
-
-MEMORY {
-	/* 1MB of internal instruction memory. */
-	iMEM0 : ORIGIN = 0,	     LENGTH = 0x00100000
-
-	/* 1MB of static RAM.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-
-	/* About 58MB of DRAM.  This can actually be at one of two
-	   positions, determined by jump JP3; we have to use the first
-	   position because the second is partially out of processor
-	   instruction addressing range (though in the second position
-	   there's actually 64MB available).  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	.resetv : {
-		__intv_start = . ;
-			*(.intv.reset)	/* Reset vector */
-	} > iMEM0
-
-	.sram : {
-		RAMK_KRAM_CONTENTS
-
-		/* We stick most of the interrupt vectors here; they'll be
-		   copied into the proper location by the early init code (we
-		   can't put them directly in the right place because of
-		   hardware bugs).  The vectors shouldn't need to be
-		   relocated, so we don't have to use `> ...  AT> ...' to
-		   split the load/vm addresses (and we can't because of
-		   problems with the loader).  */
-		. = ALIGN (0x10) ;
-		__intv_copy_src_start = . ;
-			*(.intv.common)	/* Vectors common to all v850e proc. */
-			*(.intv.mach)	/* Machine-specific int. vectors.  */
-		. = ALIGN (0x10) ;
-		__intv_copy_src_end = . ;
-	} > SRAM
-
-	/* Where we end up putting the vectors.  */
-	__intv_copy_dst_start = 0x10 ;
-	__intv_copy_dst_end = __intv_copy_dst_start + (__intv_copy_src_end - __intv_copy_src_start) ;
-	__intv_end = __intv_copy_dst_end ;
-
-	.root : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/asm-offsets.c b/arch/v850/kernel/asm-offsets.c
deleted file mode 100644
index 581e6986a776..000000000000
--- a/arch/v850/kernel/asm-offsets.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * This program is used to generate definitions needed by
- * assembly language modules.
- *
- * We use the technique used in the OSF Mach kernel code:
- * generate asm statements containing #defines,
- * compile this file to assembler, and then extract the
- * #defines from the assembly-language output.
- */
-
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/ptrace.h>
-#include <linux/hardirq.h>
-#include <linux/kbuild.h>
-
-#include <asm/irq.h>
-#include <asm/errno.h>
-
-int main (void)
-{
-	/* offsets into the task struct */
-	DEFINE (TASK_STATE, offsetof (struct task_struct, state));
-	DEFINE (TASK_FLAGS, offsetof (struct task_struct, flags));
-	DEFINE (TASK_PTRACE, offsetof (struct task_struct, ptrace));
-	DEFINE (TASK_BLOCKED, offsetof (struct task_struct, blocked));
-	DEFINE (TASK_THREAD, offsetof (struct task_struct, thread));
-	DEFINE (TASK_THREAD_INFO, offsetof (struct task_struct, stack));
-	DEFINE (TASK_MM, offsetof (struct task_struct, mm));
-	DEFINE (TASK_ACTIVE_MM, offsetof (struct task_struct, active_mm));
-	DEFINE (TASK_PID, offsetof (struct task_struct, pid));
-
-	/* offsets into the kernel_stat struct */
-	DEFINE (STAT_IRQ, offsetof (struct kernel_stat, irqs));
-
-
-	/* signal defines */
-	DEFINE (SIGSEGV, SIGSEGV);
-	DEFINE (SEGV_MAPERR, SEGV_MAPERR);
-	DEFINE (SIGTRAP, SIGTRAP);
-	DEFINE (SIGCHLD, SIGCHLD);
-	DEFINE (SIGILL, SIGILL);
-	DEFINE (TRAP_TRACE, TRAP_TRACE);
-
-	/* ptrace flag bits */
-	DEFINE (PT_PTRACED, PT_PTRACED);
-	DEFINE (PT_DTRACE, PT_DTRACE);
-
-	/* error values */
-	DEFINE (ENOSYS, ENOSYS);
-
-	/* clone flag bits */
-	DEFINE (CLONE_VFORK, CLONE_VFORK);
-	DEFINE (CLONE_VM, CLONE_VM);
-
-	return 0;
-}
diff --git a/arch/v850/kernel/bug.c b/arch/v850/kernel/bug.c
deleted file mode 100644
index c78cf750915a..000000000000
--- a/arch/v850/kernel/bug.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * arch/v850/kernel/bug.c -- Bug reporting functions
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/reboot.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-
-#include <asm/errno.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/current.h>
-
-/* We should use __builtin_return_address, but it doesn't work in gcc-2.90
-   (which is currently our standard compiler on the v850).  */
-#define ret_addr() ({ register u32 lp asm ("lp"); lp; })
-#define stack_addr() ({ register u32 sp asm ("sp"); sp; })
-
-void __bug ()
-{
-	printk (KERN_CRIT "kernel BUG at PC 0x%x (SP ~0x%x)!\n",
-		ret_addr() - 4, /* - 4 for `jarl' */
-		stack_addr());
-	machine_halt ();
-}
-
-int bad_trap (int trap_num, struct pt_regs *regs)
-{
-	printk (KERN_CRIT
-		"unimplemented trap %d called at 0x%08lx, pid %d!\n",
-		trap_num, regs->pc, current->pid);
-	return -ENOSYS;
-}
-
-#ifdef CONFIG_RESET_GUARD
-void unexpected_reset (unsigned long ret_addr, unsigned long kmode,
-		       struct task_struct *task, unsigned long sp)
-{
-	printk (KERN_CRIT
-		"unexpected reset in %s mode, pid %d"
-		" (ret_addr = 0x%lx, sp = 0x%lx)\n",
-		kmode ? "kernel" : "user",
-		task ? task->pid : -1,
-		ret_addr, sp);
-
-	machine_halt ();
-}
-#endif /* CONFIG_RESET_GUARD */
-
-
-
-struct spec_reg_name {
-	const char *name;
-	int gpr;
-};
-
-struct spec_reg_name spec_reg_names[] = {
-	{ "sp", GPR_SP },
-	{ "gp", GPR_GP },
-	{ "tp", GPR_TP },
-	{ "ep", GPR_EP },
-	{ "lp", GPR_LP },
-	{ 0, 0 }
-};
-
-void show_regs (struct pt_regs *regs)
-{
-	int gpr_base, gpr_offs;
-
-	printk ("     pc 0x%08lx    psw 0x%08lx                       kernel_mode %d\n",
-		regs->pc, regs->psw, regs->kernel_mode);
-	printk ("   ctpc 0x%08lx  ctpsw 0x%08lx   ctbp 0x%08lx\n",
-		regs->ctpc, regs->ctpsw, regs->ctbp);
-
-	for (gpr_base = 0; gpr_base < NUM_GPRS; gpr_base += 4) {
-		for (gpr_offs = 0; gpr_offs < 4; gpr_offs++) {
-			int gpr = gpr_base + gpr_offs;
-			long val = regs->gpr[gpr];
-			struct spec_reg_name *srn;
-
-			for (srn = spec_reg_names; srn->name; srn++)
-				if (srn->gpr == gpr)
-					break;
-
-			if (srn->name)
-				printk ("%7s 0x%08lx", srn->name, val);
-			else
-				printk ("    r%02d 0x%08lx", gpr, val);
-		}
-
-		printk ("\n");
-	}
-}
-
-/*
- * TASK is a pointer to the task whose backtrace we want to see (or NULL
- * for current task), SP is the stack pointer of the first frame that
- * should be shown in the back trace (or NULL if the entire call-chain of
- * the task should be shown).
- */
-void show_stack (struct task_struct *task, unsigned long *sp)
-{
-	unsigned long addr, end;
-
-	if (sp)
-		addr = (unsigned long)sp;
-	else if (task)
-		addr = task_sp (task);
-	else
-		addr = stack_addr ();
-
-	addr = addr & ~3;
-	end = (addr + THREAD_SIZE - 1) & THREAD_MASK;
-
-	while (addr < end) {
-		printk ("%8lX: ", addr);
-		while (addr < end) {
-			printk (" %8lX", *(unsigned long *)addr);
-			addr += sizeof (unsigned long);
-			if (! (addr & 0xF))
-				break;
-		}
-		printk ("\n");
-	}
-}
-
-void dump_stack ()
-{
-	show_stack (0, 0);
-}
-
-EXPORT_SYMBOL(dump_stack);
diff --git a/arch/v850/kernel/entry.S b/arch/v850/kernel/entry.S
deleted file mode 100644
index e4327a8d6bcd..000000000000
--- a/arch/v850/kernel/entry.S
+++ /dev/null
@@ -1,1121 +0,0 @@
-/*
- * arch/v850/kernel/entry.S -- Low-level system-call handling, trap handlers,
- *	and context-switching
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/sys.h>
-
-#include <asm/entry.h>
-#include <asm/current.h>
-#include <asm/thread_info.h>
-#include <asm/clinkage.h>
-#include <asm/processor.h>
-#include <asm/irq.h>
-#include <asm/errno.h>
-
-#include <asm/asm-offsets.h>
-
-
-/* Make a slightly more convenient alias for C_SYMBOL_NAME.  */
-#define CSYM	C_SYMBOL_NAME
-
-
-/* The offset of the struct pt_regs in a state-save-frame on the stack.  */
-#define PTO	STATE_SAVE_PT_OFFSET
-
-
-/* Save argument registers to the state-save-frame pointed to by EP.  */
-#define SAVE_ARG_REGS							      \
-	sst.w	r6, PTO+PT_GPR(6)[ep];					      \
-	sst.w	r7, PTO+PT_GPR(7)[ep];					      \
-	sst.w	r8, PTO+PT_GPR(8)[ep];					      \
-	sst.w	r9, PTO+PT_GPR(9)[ep]
-/* Restore argument registers from the state-save-frame pointed to by EP.  */
-#define RESTORE_ARG_REGS						      \
-	sld.w	PTO+PT_GPR(6)[ep], r6;					      \
-	sld.w	PTO+PT_GPR(7)[ep], r7;					      \
-	sld.w	PTO+PT_GPR(8)[ep], r8;					      \
-	sld.w	PTO+PT_GPR(9)[ep], r9
-
-/* Save value return registers to the state-save-frame pointed to by EP.  */
-#define SAVE_RVAL_REGS							      \
-	sst.w	r10, PTO+PT_GPR(10)[ep];				      \
-	sst.w	r11, PTO+PT_GPR(11)[ep]
-/* Restore value return registers from the state-save-frame pointed to by EP.  */
-#define RESTORE_RVAL_REGS						      \
-	sld.w	PTO+PT_GPR(10)[ep], r10;				      \
-	sld.w	PTO+PT_GPR(11)[ep], r11
-
-
-#define SAVE_CALL_CLOBBERED_REGS_BEFORE_ARGS				      \
-	sst.w	r1, PTO+PT_GPR(1)[ep];					      \
-	sst.w	r5, PTO+PT_GPR(5)[ep]
-#define SAVE_CALL_CLOBBERED_REGS_AFTER_RVAL				      \
-	sst.w	r12, PTO+PT_GPR(12)[ep];				      \
-	sst.w	r13, PTO+PT_GPR(13)[ep];				      \
-	sst.w	r14, PTO+PT_GPR(14)[ep];				      \
-	sst.w	r15, PTO+PT_GPR(15)[ep];				      \
-	sst.w	r16, PTO+PT_GPR(16)[ep];				      \
-	sst.w	r17, PTO+PT_GPR(17)[ep];				      \
-	sst.w	r18, PTO+PT_GPR(18)[ep];				      \
-	sst.w	r19, PTO+PT_GPR(19)[ep]
-#define RESTORE_CALL_CLOBBERED_REGS_BEFORE_ARGS				      \
-	sld.w	PTO+PT_GPR(1)[ep], r1;					      \
-	sld.w	PTO+PT_GPR(5)[ep], r5
-#define RESTORE_CALL_CLOBBERED_REGS_AFTER_RVAL				      \
-	sld.w	PTO+PT_GPR(12)[ep], r12;				      \
-	sld.w	PTO+PT_GPR(13)[ep], r13;				      \
-	sld.w	PTO+PT_GPR(14)[ep], r14;				      \
-	sld.w	PTO+PT_GPR(15)[ep], r15;				      \
-	sld.w	PTO+PT_GPR(16)[ep], r16;				      \
-	sld.w	PTO+PT_GPR(17)[ep], r17;				      \
-	sld.w	PTO+PT_GPR(18)[ep], r18;				      \
-	sld.w	PTO+PT_GPR(19)[ep], r19
-
-/* Save `call clobbered' registers to the state-save-frame pointed to by EP.  */
-#define SAVE_CALL_CLOBBERED_REGS					      \
-	SAVE_CALL_CLOBBERED_REGS_BEFORE_ARGS;				      \
-	SAVE_ARG_REGS;							      \
-	SAVE_RVAL_REGS;							      \
-	SAVE_CALL_CLOBBERED_REGS_AFTER_RVAL
-/* Restore `call clobbered' registers from the state-save-frame pointed to
-   by EP.  */
-#define RESTORE_CALL_CLOBBERED_REGS					      \
-	RESTORE_CALL_CLOBBERED_REGS_BEFORE_ARGS;			      \
-	RESTORE_ARG_REGS;						      \
-	RESTORE_RVAL_REGS;						      \
-	RESTORE_CALL_CLOBBERED_REGS_AFTER_RVAL
-
-/* Save `call clobbered' registers except for the return-value registers
-   to the state-save-frame pointed to by EP.  */
-#define SAVE_CALL_CLOBBERED_REGS_NO_RVAL				      \
-	SAVE_CALL_CLOBBERED_REGS_BEFORE_ARGS;				      \
-	SAVE_ARG_REGS;							      \
-	SAVE_CALL_CLOBBERED_REGS_AFTER_RVAL
-/* Restore `call clobbered' registers except for the return-value registers
-   from the state-save-frame pointed to by EP.  */
-#define RESTORE_CALL_CLOBBERED_REGS_NO_RVAL				      \
-	RESTORE_CALL_CLOBBERED_REGS_BEFORE_ARGS;			      \
-	RESTORE_ARG_REGS;						      \
-	RESTORE_CALL_CLOBBERED_REGS_AFTER_RVAL
-
-/* Save `call saved' registers to the state-save-frame pointed to by EP.  */
-#define SAVE_CALL_SAVED_REGS						      \
-	sst.w	r2, PTO+PT_GPR(2)[ep];					      \
-	sst.w	r20, PTO+PT_GPR(20)[ep];				      \
-	sst.w	r21, PTO+PT_GPR(21)[ep];				      \
-	sst.w	r22, PTO+PT_GPR(22)[ep];				      \
-	sst.w	r23, PTO+PT_GPR(23)[ep];				      \
-	sst.w	r24, PTO+PT_GPR(24)[ep];				      \
-	sst.w	r25, PTO+PT_GPR(25)[ep];				      \
-	sst.w	r26, PTO+PT_GPR(26)[ep];				      \
-	sst.w	r27, PTO+PT_GPR(27)[ep];				      \
-	sst.w	r28, PTO+PT_GPR(28)[ep];				      \
-	sst.w	r29, PTO+PT_GPR(29)[ep]
-/* Restore `call saved' registers from the state-save-frame pointed to by EP.  */
-#define RESTORE_CALL_SAVED_REGS						      \
-	sld.w	PTO+PT_GPR(2)[ep], r2;					      \
-	sld.w	PTO+PT_GPR(20)[ep], r20;				      \
-	sld.w	PTO+PT_GPR(21)[ep], r21;				      \
-	sld.w	PTO+PT_GPR(22)[ep], r22;				      \
-	sld.w	PTO+PT_GPR(23)[ep], r23;				      \
-	sld.w	PTO+PT_GPR(24)[ep], r24;				      \
-	sld.w	PTO+PT_GPR(25)[ep], r25;				      \
-	sld.w	PTO+PT_GPR(26)[ep], r26;				      \
-	sld.w	PTO+PT_GPR(27)[ep], r27;				      \
-	sld.w	PTO+PT_GPR(28)[ep], r28;				      \
-	sld.w	PTO+PT_GPR(29)[ep], r29
-
-
-/* Save the PC stored in the special register SAVEREG to the state-save-frame
-   pointed to by EP.  r19 is clobbered.  */
-#define SAVE_PC(savereg)						      \
-	stsr	SR_ ## savereg, r19;					      \
-	sst.w	r19, PTO+PT_PC[ep]
-/* Restore the PC from the state-save-frame pointed to by EP, to the special
-   register SAVEREG.  LP is clobbered (it is used as a scratch register
-   because the POP_STATE macro restores it, and this macro is usually used
-   inside POP_STATE).  */
-#define RESTORE_PC(savereg)						      \
-	sld.w	PTO+PT_PC[ep], lp;					      \
-	ldsr	lp, SR_ ## savereg
-/* Save the PSW register stored in the special register SAVREG to the
-   state-save-frame pointed to by EP.  r19 is clobbered.  */
-#define SAVE_PSW(savereg)						      \
-	stsr	SR_ ## savereg, r19;					      \
-	sst.w	r19, PTO+PT_PSW[ep]
-/* Restore the PSW register from the state-save-frame pointed to by EP, to
-   the special register SAVEREG.  LP is clobbered (it is used as a scratch
-   register because the POP_STATE macro restores it, and this macro is
-   usually used inside POP_STATE).  */
-#define RESTORE_PSW(savereg)						      \
-	sld.w	PTO+PT_PSW[ep], lp;					      \
-	ldsr	lp, SR_ ## savereg
-
-/* Save CTPC/CTPSW/CTBP registers to the state-save-frame pointed to by REG.
-   r19 is clobbered.  */
-#define SAVE_CT_REGS							      \
-	stsr	SR_CTPC, r19;						      \
-	sst.w	r19, PTO+PT_CTPC[ep];					      \
-	stsr	SR_CTPSW, r19;						      \
-	sst.w	r19, PTO+PT_CTPSW[ep];					      \
-	stsr	SR_CTBP, r19;						      \
-	sst.w	r19, PTO+PT_CTBP[ep]
-/* Restore CTPC/CTPSW/CTBP registers from the state-save-frame pointed to by EP.
-   LP is clobbered (it is used as a scratch register because the POP_STATE
-   macro restores it, and this macro is usually used inside POP_STATE).  */
-#define RESTORE_CT_REGS							      \
-	sld.w	PTO+PT_CTPC[ep], lp;					      \
-	ldsr	lp, SR_CTPC;						      \
-	sld.w	PTO+PT_CTPSW[ep], lp;					      \
-	ldsr	lp, SR_CTPSW;						      \
-	sld.w	PTO+PT_CTBP[ep], lp;					      \
-	ldsr	lp, SR_CTBP
-
-
-/* Push register state, except for the stack pointer, on the stack in the
-   form of a state-save-frame (plus some extra padding), in preparation for
-   a system call.  This macro makes sure that the EP, GP, and LP
-   registers are saved, and TYPE identifies the set of extra registers to
-   be saved as well.  Also copies (the new value of) SP to EP.  */
-#define PUSH_STATE(type)						      \
-	addi	-STATE_SAVE_SIZE, sp, sp; /* Make room on the stack.  */      \
-	st.w	ep, PTO+PT_GPR(GPR_EP)[sp];				      \
-	mov	sp, ep;							      \
-	sst.w	gp, PTO+PT_GPR(GPR_GP)[ep];				      \
-	sst.w	lp, PTO+PT_GPR(GPR_LP)[ep];				      \
-	type ## _STATE_SAVER
-/* Pop a register state pushed by PUSH_STATE, except for the stack pointer,
-   from the stack.  */
-#define POP_STATE(type)							      \
-	mov	sp, ep;							      \
-	type ## _STATE_RESTORER;					      \
-	sld.w	PTO+PT_GPR(GPR_GP)[ep], gp;				      \
-	sld.w	PTO+PT_GPR(GPR_LP)[ep], lp;				      \
-	sld.w	PTO+PT_GPR(GPR_EP)[ep], ep;				      \
-	addi	STATE_SAVE_SIZE, sp, sp /* Clean up our stack space.  */
-
-
-/* Switch to the kernel stack if necessary, and push register state on the
-   stack in the form of a state-save-frame.  Also load the current task
-   pointer if switching from user mode.  The stack-pointer (r3) should have
-   already been saved to the memory location SP_SAVE_LOC (the reason for
-   this is that the interrupt vectors may be beyond a 22-bit signed offset
-   jump from the actual interrupt handler, and this allows them to save the
-   stack-pointer and use that register to do an indirect jump).  This macro
-   makes sure that `special' registers, system registers, and the stack
-   pointer are saved; TYPE identifies the set of extra registers to be
-   saved as well.  SYSCALL_NUM is the register in which the system-call
-   number this state is for is stored (r0 if this isn't a system call).
-   Interrupts should already be disabled when calling this.  */
-#define SAVE_STATE(type, syscall_num, sp_save_loc)			      \
-	tst1	0, KM;			/* See if already in kernel mode.  */ \
-	bz	1f;							      \
-	ld.w	sp_save_loc, sp;	/* ... yes, use saved SP.  */	      \
-	br	2f;							      \
-1:	ld.w	KSP, sp;		/* ... no, switch to kernel stack. */ \
-2:	PUSH_STATE(type);						      \
-	ld.b	KM, r19;		/* Remember old kernel-mode.  */      \
-	sst.w	r19, PTO+PT_KERNEL_MODE[ep];				      \
-	ld.w	sp_save_loc, r19;	/* Remember old SP.  */		      \
-	sst.w	r19, PTO+PT_GPR(GPR_SP)[ep];				      \
-	mov	1, r19;			/* Now definitely in kernel-mode. */  \
-	st.b	r19, KM;						      \
-	GET_CURRENT_TASK(CURRENT_TASK);	/* Fetch the current task pointer. */ \
-	/* Save away the syscall number.  */				      \
-	sst.w	syscall_num, PTO+PT_CUR_SYSCALL[ep]
-
-
-/* Save register state not normally saved by PUSH_STATE for TYPE, to the
-   state-save-frame on the stack; also copies SP to EP.  r19 may be trashed. */
-#define SAVE_EXTRA_STATE(type)						      \
-	mov	sp, ep;							      \
-	type ## _EXTRA_STATE_SAVER
-/* Restore register state not normally restored by POP_STATE for TYPE,
-   from the state-save-frame on the stack; also copies SP to EP.
-   r19 may be trashed.  */
-#define RESTORE_EXTRA_STATE(type)					      \
-	mov	sp, ep;							      \
-	type ## _EXTRA_STATE_RESTORER
-
-/* Save any call-clobbered registers not normally saved by PUSH_STATE for
-   TYPE, to the state-save-frame on the stack.
-   EP may be trashed, but is not guaranteed to contain a copy of SP
-   (unlike after most SAVE_... macros).  r19 may be trashed.  */
-#define SAVE_EXTRA_STATE_FOR_SCHEDULE(type)				      \
-	type ## _SCHEDULE_EXTRA_STATE_SAVER
-/* Restore any call-clobbered registers not normally restored by
-   POP_STATE for TYPE, to the state-save-frame on the stack.
-   EP may be trashed, but is not guaranteed to contain a copy of SP
-   (unlike after most RESTORE_... macros).  r19 may be trashed.  */
-#define RESTORE_EXTRA_STATE_FOR_SCHEDULE(type)				      \
-	type ## _SCHEDULE_EXTRA_STATE_RESTORER
-
-
-/* These are extra_state_saver/restorer values for a user trap.  Note
-   that we save the argument registers so that restarted syscalls will
-   function properly (otherwise it wouldn't be necessary), and we must
-   _not_ restore the return-value registers (so that traps can return a
-   value!), but call-clobbered registers are not saved at all, as the
-   caller of the syscall function should have saved them.  */
-
-#define TRAP_RET reti
-/* Traps don't save call-clobbered registers (but do still save arg regs).
-   We preserve PSw to keep long-term state, namely interrupt status (for traps
-   from kernel-mode), and the single-step flag (for user traps).  */
-#define TRAP_STATE_SAVER						      \
-	SAVE_ARG_REGS;							      \
-	SAVE_PC(EIPC);							      \
-	SAVE_PSW(EIPSW)
-/* When traps return, they just leave call-clobbered registers (except for arg
-   regs) with whatever value they have from the kernel.  Traps don't preserve
-   the PSW, but we zero EIPSW to ensure it doesn't contain anything dangerous
-   (in particular, the single-step flag).  */
-#define TRAP_STATE_RESTORER						      \
-	RESTORE_ARG_REGS;						      \
-	RESTORE_PC(EIPC);						      \
-	RESTORE_PSW(EIPSW)
-/* Save registers not normally saved by traps.  We need to save r12, even
-   though it's nominally call-clobbered, because it's used when restarting
-   a system call (the signal-handling path uses SAVE_EXTRA_STATE, and
-   expects r12 to be restored when the trap returns).  */
-#define TRAP_EXTRA_STATE_SAVER						      \
-	SAVE_RVAL_REGS;							      \
-	sst.w	r12, PTO+PT_GPR(12)[ep];				      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_CT_REGS
-#define TRAP_EXTRA_STATE_RESTORER					      \
-	RESTORE_RVAL_REGS;						      \
-	sld.w	PTO+PT_GPR(12)[ep], r12;				      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_CT_REGS
-/* Save registers prior to calling scheduler (just before trap returns).
-   We have to save the return-value registers to preserve the trap's return
-   value.  Note that ..._SCHEDULE_EXTRA_STATE_SAVER, unlike most ..._SAVER
-   macros, is required to setup EP itself if EP is needed (this is because
-   in many cases, the macro is empty).  */
-#define TRAP_SCHEDULE_EXTRA_STATE_SAVER					      \
-	mov sp, ep;							      \
-	SAVE_RVAL_REGS
-/* Note that ..._SCHEDULE_EXTRA_STATE_RESTORER, unlike most ..._RESTORER
-   macros, is required to setup EP itself if EP is needed (this is because
-   in many cases, the macro is empty).  */
-#define TRAP_SCHEDULE_EXTRA_STATE_RESTORER				      \
-	mov sp, ep;							      \
-	RESTORE_RVAL_REGS
-
-/* Register saving/restoring for maskable interrupts.  */
-#define IRQ_RET reti
-#define IRQ_STATE_SAVER							      \
-	SAVE_CALL_CLOBBERED_REGS;					      \
-	SAVE_PC(EIPC);							      \
-	SAVE_PSW(EIPSW)
-#define IRQ_STATE_RESTORER						      \
-	RESTORE_CALL_CLOBBERED_REGS;					      \
-	RESTORE_PC(EIPC);						      \
-	RESTORE_PSW(EIPSW)
-#define IRQ_EXTRA_STATE_SAVER						      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_CT_REGS
-#define IRQ_EXTRA_STATE_RESTORER					      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_CT_REGS
-#define IRQ_SCHEDULE_EXTRA_STATE_SAVER	     /* nothing */
-#define IRQ_SCHEDULE_EXTRA_STATE_RESTORER    /* nothing */
-
-/* Register saving/restoring for non-maskable interrupts.  */
-#define NMI_RET reti
-#define NMI_STATE_SAVER							      \
-	SAVE_CALL_CLOBBERED_REGS;					      \
-	SAVE_PC(FEPC);							      \
-	SAVE_PSW(FEPSW);
-#define NMI_STATE_RESTORER						      \
-	RESTORE_CALL_CLOBBERED_REGS;					      \
-	RESTORE_PC(FEPC);						      \
-	RESTORE_PSW(FEPSW);
-#define NMI_EXTRA_STATE_SAVER						      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_CT_REGS
-#define NMI_EXTRA_STATE_RESTORER					      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_CT_REGS
-#define NMI_SCHEDULE_EXTRA_STATE_SAVER	     /* nothing */
-#define NMI_SCHEDULE_EXTRA_STATE_RESTORER    /* nothing */
-
-/* Register saving/restoring for debug traps.  */
-#define DBTRAP_RET .long 0x014607E0 /* `dbret', but gas doesn't support it. */
-#define DBTRAP_STATE_SAVER						      \
-	SAVE_CALL_CLOBBERED_REGS;					      \
-	SAVE_PC(DBPC);							      \
-	SAVE_PSW(DBPSW)
-#define DBTRAP_STATE_RESTORER						      \
-	RESTORE_CALL_CLOBBERED_REGS;					      \
-	RESTORE_PC(DBPC);						      \
-	RESTORE_PSW(DBPSW)
-#define DBTRAP_EXTRA_STATE_SAVER					      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_CT_REGS
-#define DBTRAP_EXTRA_STATE_RESTORER					      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_CT_REGS
-#define DBTRAP_SCHEDULE_EXTRA_STATE_SAVER	/* nothing */
-#define DBTRAP_SCHEDULE_EXTRA_STATE_RESTORER	/* nothing */
-
-/* Register saving/restoring for a context switch.  We don't need to save
-   too many registers, because context-switching looks like a function call
-   (via the function `switch_thread'), so callers will save any
-   call-clobbered registers themselves.  We do need to save the CT regs, as
-   they're normally not saved during kernel entry (the kernel doesn't use
-   them).  We save PSW so that interrupt-status state will correctly follow
-   each thread (mostly NMI vs. normal-IRQ/trap), though for the most part
-   it doesn't matter since threads are always in almost exactly the same
-   processor state during a context switch.  The stack pointer and return
-   value are handled by switch_thread itself.  */
-#define SWITCH_STATE_SAVER						      \
-	SAVE_CALL_SAVED_REGS;						      \
-	SAVE_PSW(PSW);							      \
-	SAVE_CT_REGS
-#define SWITCH_STATE_RESTORER						      \
-	RESTORE_CALL_SAVED_REGS;					      \
-	RESTORE_PSW(PSW);						      \
-	RESTORE_CT_REGS
-
-
-/* Restore register state from the state-save-frame on the stack, switch back
-   to the user stack if necessary, and return from the trap/interrupt.
-   EXTRA_STATE_RESTORER is a sequence of assembly language statements to
-   restore anything not restored by this macro.  Only registers not saved by
-   the C compiler are restored (that is, R3(sp), R4(gp), R31(lp), and
-   anything restored by EXTRA_STATE_RESTORER).  */
-#define RETURN(type)							      \
-	ld.b	PTO+PT_KERNEL_MODE[sp], r19;				      \
-	di;				/* Disable interrupts */	      \
-	cmp	r19, r0;		/* See if returning to kernel mode, */\
-	bne	2f;			/* ... if so, skip resched &c.  */    \
-									      \
-	/* We're returning to user mode, so check for various conditions that \
-	   trigger rescheduling. */					      \
-	GET_CURRENT_THREAD(r18);					      \
-	ld.w	TI_FLAGS[r18], r19;					      \
-	andi	_TIF_NEED_RESCHED, r19, r0;				      \
-	bnz	3f;			/* Call the scheduler.  */	      \
-5:	andi	_TIF_SIGPENDING, r19, r18;				      \
-	ld.w	TASK_PTRACE[CURRENT_TASK], r19; /* ptrace flags */	      \
-	or	r18, r19;		/* see if either is non-zero */	      \
-	bnz	4f;			/* if so, handle them */	      \
-									      \
-/* Return to user state.  */						      \
-1:	st.b	r0, KM;			/* Now officially in user state. */   \
-									      \
-/* Final return.  The stack-pointer fiddling is not needed when returning     \
-   to kernel-mode, but they don't hurt, and this way we can share the	      \
-   (sometimes rather lengthy) POP_STATE macro.  */			      \
-2:	POP_STATE(type);						      \
-	st.w	sp, KSP;		/* Save the kernel stack pointer. */  \
-	ld.w	PT_GPR(GPR_SP)-PT_SIZE[sp], sp; /* Restore stack pointer. */  \
-	type ## _RET;			/* Return from the trap/interrupt. */ \
-									      \
-/* Call the scheduler before returning from a syscall/trap. */		      \
-3:	SAVE_EXTRA_STATE_FOR_SCHEDULE(type); /* Prepare to call scheduler. */ \
-	jarl	call_scheduler, lp;	/* Call scheduler */		      \
-	di;				/* The scheduler enables interrupts */\
-	RESTORE_EXTRA_STATE_FOR_SCHEDULE(type);				      \
-	GET_CURRENT_THREAD(r18);					      \
-	ld.w	TI_FLAGS[r18], r19;					      \
-	br	5b;			/* Continue with return path. */      \
-									      \
-/* Handle a signal or ptraced process return.				      \
-   r18 should be non-zero if there are pending signals.  */		      \
-4:	/* Not all registers are saved by the normal trap/interrupt entry     \
-	   points (for instance, call-saved registers (because the normal     \
-	   C-compiler calling sequence in the kernel makes sure they're	      \
-	   preserved), and call-clobbered registers in the case of	      \
-	   traps), but signal handlers may want to examine or change the      \
-	   complete register state.  Here we save anything not saved by	      \
-	   the normal entry sequence, so that it may be safely restored	      \
-	   (in a possibly modified form) after do_signal returns.  */	      \
-	SAVE_EXTRA_STATE(type);		/* Save state not saved by entry. */  \
-	jarl	handle_signal_or_ptrace_return, lp;			      \
-	RESTORE_EXTRA_STATE(type);	/* Restore extra regs.  */	      \
-	br	1b
-
-
-/* Jump to the appropriate function for the system call number in r12
-   (r12 is not preserved), or return an error if r12 is not valid.  The
-   LP register should point to the location where the called function
-   should return.  [note that MAKE_SYS_CALL uses label 1]  */
-#define MAKE_SYS_CALL							      \
-	/* Figure out which function to use for this system call.  */	      \
-	shl	2, r12;							      \
-	/* See if the system call number is valid.  */			      \
-	addi	lo(CSYM(sys_call_table) - sys_call_table_end), r12, r0;	      \
-	bnh	1f;							      \
-	mov	hilo(CSYM(sys_call_table)), r19;			      \
-	add	r19, r12;						      \
-	ld.w	0[r12], r12;						      \
-	/* Make the system call.  */					      \
-	jmp	[r12];							      \
-	/* The syscall number is invalid, return an error.  */		      \
-1:	addi	-ENOSYS, r0, r10;					      \
-	jmp	[lp]
-
-
-	.text
-
-/*
- * User trap.
- *
- * Trap 0 system calls are also handled here.
- *
- * The stack-pointer (r3) should have already been saved to the memory
- * location ENTRY_SP (the reason for this is that the interrupt vectors may be
- * beyond a 22-bit signed offset jump from the actual interrupt handler, and
- * this allows them to save the stack-pointer and use that register to do an
- * indirect jump).
- *
- * Syscall protocol:
- *   Syscall number in r12, args in r6-r9
- *   Return value in r10
- */
-G_ENTRY(trap):
-	SAVE_STATE (TRAP, r12, ENTRY_SP) // Save registers.
-	stsr	SR_ECR, r19		// Find out which trap it was.
-	ei				// Enable interrupts.
-	mov	hilo(ret_from_trap), lp	// where the trap should return
-
-	// The following two shifts (1) clear out extraneous NMI data in the
-	// upper 16-bits, (2) convert the 0x40 - 0x5f range of trap ECR
-	// numbers into the (0-31) << 2 range we want, (3) set the flags.
-	shl	27, r19			// chop off all high bits
-	shr	25, r19			// scale back down and then << 2
-	bnz	2f			// See if not trap 0.
-
-	// Trap 0 is a `short' system call, skip general trap table.
-	MAKE_SYS_CALL			// Jump to the syscall function.
-
-2:	// For other traps, use a table lookup.
-	mov	hilo(CSYM(trap_table)), r18
-	add	r19, r18
-	ld.w	0[r18], r18
-	jmp	[r18]			// Jump to the trap handler.
-END(trap)
-
-/* This is just like ret_from_trap, but first restores extra registers
-   saved by some wrappers.  */
-L_ENTRY(restore_extra_regs_and_ret_from_trap):
-	RESTORE_EXTRA_STATE(TRAP)
-	// fall through
-END(restore_extra_regs_and_ret_from_trap)
-
-/* Entry point used to return from a syscall/trap.  */
-L_ENTRY(ret_from_trap):
-	RETURN(TRAP)
-END(ret_from_trap)
-
-
-/* This the initial entry point for a new child thread, with an appropriate
-   stack in place that makes it look that the child is in the middle of an
-   syscall.  This function is actually `returned to' from switch_thread
-   (copy_thread makes ret_from_fork the return address in each new thread's
-   saved context).  */
-C_ENTRY(ret_from_fork):
-	mov	r10, r6			// switch_thread returns the prev task.
-	jarl	CSYM(schedule_tail), lp	// ...which is schedule_tail's arg
-	mov	r0, r10			// Child's fork call should return 0.
-	br	ret_from_trap		// Do normal trap return.
-C_END(ret_from_fork)
-
-
-/*
- * Trap 1: `long' system calls
- * `Long' syscall protocol:
- *   Syscall number in r12, args in r6-r9, r13-r14
- *   Return value in r10
- */
-L_ENTRY(syscall_long):
-	// Push extra arguments on the stack.  Note that by default, the trap
-	// handler reserves enough stack space for 6 arguments, so we don't
-	// have to make any additional room.
-	st.w	r13, 16[sp]		// arg 5
-	st.w	r14, 20[sp]		// arg 6
-
-	// Make sure r13 and r14 are preserved, in case we have to restart a
-	// system call because of a signal (ep has already been set by caller).
-	st.w	r13, PTO+PT_GPR(13)[sp]
-	st.w	r14, PTO+PT_GPR(13)[sp]
-	mov	hilo(ret_from_long_syscall), lp
-
-	MAKE_SYS_CALL			// Jump to the syscall function.
-END(syscall_long)
-
-/* Entry point used to return from a long syscall.  Only needed to restore
-   r13/r14 if the general trap mechanism doesnt' do so.  */
-L_ENTRY(ret_from_long_syscall):
-	ld.w	PTO+PT_GPR(13)[sp], r13 // Restore the extra registers
-	ld.w	PTO+PT_GPR(13)[sp], r14
-	br	ret_from_trap		// The rest is the same as other traps
-END(ret_from_long_syscall)
-
-
-/* These syscalls need access to the struct pt_regs on the stack, so we
-   implement them in assembly (they're basically all wrappers anyway).  */
-
-L_ENTRY(sys_fork_wrapper):
-#ifdef CONFIG_MMU
-	addi	SIGCHLD, r0, r6		   // Arg 0: flags
-	ld.w	PTO+PT_GPR(GPR_SP)[sp], r7 // Arg 1: child SP (use parent's)
-	movea	PTO, sp, r8		   // Arg 2: parent context
-	mov	r0, r9			   // Arg 3/4/5: 0
-	st.w	r0, 16[sp]
-	st.w	r0, 20[sp]
-	mov	hilo(CSYM(do_fork)), r18   // Where the real work gets done
-	br	save_extra_state_tramp	   // Save state and go there
-#else
-	// fork almost works, enough to trick you into looking elsewhere :-(
-	addi	-EINVAL, r0, r10
-	jmp	[lp]
-#endif
-END(sys_fork_wrapper)
-
-L_ENTRY(sys_vfork_wrapper):
-	addi	CLONE_VFORK | CLONE_VM | SIGCHLD, r0, r6 // Arg 0: flags
-	ld.w	PTO+PT_GPR(GPR_SP)[sp], r7 // Arg 1: child SP (use parent's)
-	movea	PTO, sp, r8		   // Arg 2: parent context
-	mov	r0, r9			   // Arg 3/4/5: 0
-	st.w	r0, 16[sp]
-	st.w	r0, 20[sp]
-	mov	hilo(CSYM(do_fork)), r18   // Where the real work gets done
-	br	save_extra_state_tramp	   // Save state and go there
-END(sys_vfork_wrapper)
-
-L_ENTRY(sys_clone_wrapper):
-	ld.w	PTO+PT_GPR(GPR_SP)[sp], r19// parent's stack pointer
-	cmp	r7, r0			   // See if child SP arg (arg 1) is 0.
-	cmov	z, r19, r7, r7		   // ... and use the parent's if so.
-	movea	PTO, sp, r8		   // Arg 2: parent context
-	mov	r0, r9			   // Arg 3/4/5: 0
-	st.w	r0, 16[sp]
-	st.w	r0, 20[sp]
-	mov	hilo(CSYM(do_fork)), r18   // Where the real work gets done
-	br	save_extra_state_tramp	   // Save state and go there
-END(sys_clone_wrapper)
-
-
-L_ENTRY(sys_execve_wrapper):
-	movea	PTO, sp, r9		// add user context as 4th arg
-	jr	CSYM(sys_execve)	// Do real work (tail-call).
-END(sys_execve_wrapper)
-
-
-L_ENTRY(sys_sigsuspend_wrapper):
-	movea	PTO, sp, r7		// add user context as 2nd arg
-	mov	hilo(CSYM(sys_sigsuspend)), r18	// syscall function
-	jarl	save_extra_state_tramp, lp	// Save state and do it
-	br	restore_extra_regs_and_ret_from_trap
-END(sys_sigsuspend_wrapper)
-L_ENTRY(sys_rt_sigsuspend_wrapper):
-	movea	PTO, sp, r8		// add user context as 3rd arg
-	mov	hilo(CSYM(sys_rt_sigsuspend)), r18 // syscall function
-	jarl	save_extra_state_tramp, lp	   // Save state and do it
-	br	restore_extra_regs_and_ret_from_trap
-END(sys_rt_sigsuspend_wrapper)
-
-L_ENTRY(sys_sigreturn_wrapper):
-	movea	PTO, sp, r6		// add user context as 1st arg
-	mov	hilo(CSYM(sys_sigreturn)), r18	// syscall function
-	jarl	save_extra_state_tramp, lp	// Save state and do it
-	br	restore_extra_regs_and_ret_from_trap
-END(sys_sigreturn_wrapper)
-L_ENTRY(sys_rt_sigreturn_wrapper):
-	movea	PTO, sp, r6		// add user context as 1st arg
-	mov	hilo(CSYM(sys_rt_sigreturn)), r18// syscall function
-	jarl	save_extra_state_tramp, lp	 // Save state and do it
-	br	restore_extra_regs_and_ret_from_trap
-END(sys_rt_sigreturn_wrapper)
-
-
-/* Save any state not saved by SAVE_STATE(TRAP), and jump to r18.
-   It's main purpose is to share the rather lengthy code sequence that
-   SAVE_STATE expands into among the above wrapper functions.  */
-L_ENTRY(save_extra_state_tramp):
-	SAVE_EXTRA_STATE(TRAP)		// Save state not saved by entry.
-	jmp	[r18]			// Do the work the caller wants
-END(save_extra_state_tramp)
-
-
-/*
- * Hardware maskable interrupts.
- *
- * The stack-pointer (r3) should have already been saved to the memory
- * location ENTRY_SP (the reason for this is that the interrupt vectors may be
- * beyond a 22-bit signed offset jump from the actual interrupt handler, and
- * this allows them to save the stack-pointer and use that register to do an
- * indirect jump).
- */
-G_ENTRY(irq):
-	SAVE_STATE (IRQ, r0, ENTRY_SP)	// Save registers.
-
-	stsr	SR_ECR, r6		// Find out which interrupt it was.
-	movea	PTO, sp, r7		// User regs are arg2
-
-	// All v850 implementations I know about encode their interrupts as
-	// multiples of 0x10, starting at 0x80 (after NMIs and software
-	// interrupts).  Convert this number into a simple IRQ index for the
-	// rest of the kernel.  We also clear the upper 16 bits, which hold
-	// NMI info, and don't appear to be cleared when a NMI returns.
-	shl	16, r6			// clear upper 16 bits
-	shr	20, r6			// shift back, and remove lower nibble
-	add	-8, r6			// remove bias for irqs
-
-	// Call the high-level interrupt handling code.
-	jarl	CSYM(handle_irq), lp
-
-	RETURN(IRQ)
-END(irq)
-
-
-/*
- * Debug trap / illegal-instruction exception
- *
- * The stack-pointer (r3) should have already been saved to the memory
- * location ENTRY_SP (the reason for this is that the interrupt vectors may be
- * beyond a 22-bit signed offset jump from the actual interrupt handler, and
- * this allows them to save the stack-pointer and use that register to do an
- * indirect jump).
- */
-G_ENTRY(dbtrap):
-	SAVE_STATE (DBTRAP, r0, ENTRY_SP)// Save registers.
-
-	/* First see if we came from kernel mode; if so, the dbtrap
-	   instruction has a special meaning, to set the DIR (`debug
-	   information register') register.  This is because the DIR register
-	   can _only_ be manipulated/read while in `debug mode,' and debug
-	   mode is only active while we're inside the dbtrap handler.  The
-	   exact functionality is:  { DIR = (DIR | r6) & ~r7; return DIR; }. */
-	ld.b	PTO+PT_KERNEL_MODE[sp], r19
-	cmp	r19, r0
-	bz	1f
-
-	stsr	SR_DIR, r10
-	or	r6, r10
-	not	r7, r7
-	and	r7, r10
-	ldsr	r10, SR_DIR
-	stsr	SR_DIR, r10		// Confirm the value we set
-	st.w	r10, PTO+PT_GPR(10)[sp]	// return it
-	br	3f
-
-1:	ei				// Enable interrupts.
-
-	/* The default signal type we raise.  */
-	mov	SIGTRAP, r6
-
-	/* See if it's a single-step trap.  */
-	stsr	SR_DBPSW, r19
-	andi	0x0800, r19, r19
-	bnz	2f
-
-	/* Look to see if the preceding instruction was is a dbtrap or not,
-	   to decide which signal we should use.  */
-	stsr	SR_DBPC, r19		// PC following trapping insn
-	ld.hu	-2[r19], r19
-	ori	0xf840, r0, r20		// DBTRAP insn
-	cmp	r19, r20		// Was this trap caused by DBTRAP?
-	cmov	ne, SIGILL, r6, r6	// Choose signal appropriately
-
-	/* Raise the desired signal.  */
-2:	mov	CURRENT_TASK, r7	// Arg 1: task
-	jarl	CSYM(send_sig), lp	// tail call
-
-3:	RETURN(DBTRAP)
-END(dbtrap)
-
-
-/*
- * Hardware non-maskable interrupts.
- *
- * The stack-pointer (r3) should have already been saved to the memory
- * location ENTRY_SP (the reason for this is that the interrupt vectors may be
- * beyond a 22-bit signed offset jump from the actual interrupt handler, and
- * this allows them to save the stack-pointer and use that register to do an
- * indirect jump).
- */
-G_ENTRY(nmi):
-	SAVE_STATE (NMI, r0, NMI_ENTRY_SP); /* Save registers.  */
-
-	stsr	SR_ECR, r6;		/* Find out which nmi it was.  */
-	shr	20, r6;			/* Extract NMI code in bits 20-24. */
-	movea	PTO, sp, r7;		/* User regs are arg2.  */
-
-	/* Non-maskable interrupts always lie right after maskable interrupts.
-	   Call the generic IRQ handler, with two arguments, the IRQ number,
-	   and a pointer to the user registers, to handle the specifics.
-	   (we subtract one because the first NMI has code 1).  */
-	addi	FIRST_NMI - 1, r6, r6
-	jarl	CSYM(handle_irq), lp
-
-	RETURN(NMI)
-END(nmi)
-
-
-/*
- * Trap with no handler
- */
-L_ENTRY(bad_trap_wrapper):
-	mov	r19, r6			// Arg 0: trap number
-	movea	PTO, sp, r7		// Arg 1: user regs
-	jr	CSYM(bad_trap)		// tail call handler
-END(bad_trap_wrapper)
-
-
-/*
- * Invoke the scheduler, called from the trap/irq kernel exit path.
- *
- * This basically just calls `schedule', but also arranges for extra
- * registers to be saved for ptrace'd processes, so ptrace can modify them.
- */
-L_ENTRY(call_scheduler):
-	ld.w	TASK_PTRACE[CURRENT_TASK], r19	// See if task is ptrace'd
-	cmp	r19, r0
-	bnz	1f			// ... yes, do special stuff
-	jr	CSYM(schedule)		// ... no, just tail-call scheduler
-
-	// Save extra regs for ptrace'd task.  We want to save anything
-	// that would otherwise only be `implicitly' saved by the normal
-	// compiler calling-convention.
-1:	mov	sp, ep			// Setup EP for SAVE_CALL_SAVED_REGS
-	SAVE_CALL_SAVED_REGS		// Save call-saved registers to stack
-	mov	lp, r20			// Save LP in a callee-saved register
-
-	jarl	CSYM(schedule), lp	// Call scheduler
-
-	mov	r20, lp
-	mov	sp, ep			// We can't rely on EP after return
-	RESTORE_CALL_SAVED_REGS		// Restore (possibly modified) regs
-	jmp	[lp]			// Return to the return path
-END(call_scheduler)
-
-
-/*
- * This is an out-of-line handler for two special cases during the kernel
- * trap/irq exit sequence:
- *
- *  (1) If r18 is non-zero then a signal needs to be handled, which is
- *	done, and then the caller returned to.
- *
- *  (2) If r18 is non-zero then we're returning to a ptraced process, which
- *	has several special cases -- single-stepping and trap tracing, both
- *	of which require using the `dbret' instruction to exit the kernel
- *	instead of the normal `reti' (this is because the CPU not correctly
- *	single-step after a reti).  In this case, of course, this handler
- *	never returns to the caller.
- *
- * In either case, all registers should have been saved to the current
- * state-save-frame on the stack, except for callee-saved registers.
- *
- * [These two different cases are combined merely to avoid bloating the
- * macro-inlined code, not because they really make much sense together!]
- */
-L_ENTRY(handle_signal_or_ptrace_return):
-	cmp	r18, r0			// See if handling a signal
-	bz	1f			// ... nope, go do ptrace return
-
-	// Handle a signal
-	mov	lp, r20			// Save link-pointer
-	mov	r10, r21		// Save return-values (for trap)
-	mov	r11, r22
-
-	movea	PTO, sp, r6		// Arg 1: struct pt_regs *regs
-	mov	r0, r7			// Arg 2: sigset_t *oldset
-	jarl	CSYM(do_signal), lp	// Handle the signal
-	di				// sig handling enables interrupts
-
-	mov	r20, lp			// Restore link-pointer
-	mov	r21, r10		// Restore return-values (for trap)
-	mov	r22, r11
-	ld.w	TASK_PTRACE[CURRENT_TASK], r19  // check ptrace flags too
-	cmp	r19, r0
-	bnz	1f			// ... some set, so look more
-2:	jmp	[lp]			// ... none set, so return normally
-
-	// ptrace return
-1:	ld.w	PTO+PT_PSW[sp], r19	// Look at user-processes's flags
-	andi	0x0800, r19, r19	// See if single-step flag is set
-	bz	2b			// ... nope, return normally
-
-	// Return as if from a dbtrap insn
-	st.b	r0, KM			// Now officially in user state.
-	POP_STATE(DBTRAP)		// Restore regs
-	st.w	sp, KSP			// Save the kernel stack pointer.
-	ld.w	PT_GPR(GPR_SP)-PT_SIZE[sp], sp // Restore user stack pointer.
-	DBTRAP_RET			// Return from the trap/interrupt.
-END(handle_signal_or_ptrace_return)
-
-
-/*
- * This is where we switch between two threads.  The arguments are:
- *   r6 -- pointer to the struct thread for the `current' process
- *   r7 -- pointer to the struct thread for the `new' process.
- * when this function returns, it will return to the new thread.
- */
-C_ENTRY(switch_thread):
-	// Return the previous task (r10 is not clobbered by restore below)
-	mov	CURRENT_TASK, r10
-	// First, push the current processor state on the stack
-	PUSH_STATE(SWITCH)
-	// Now save the location of the kernel stack pointer for this thread;
-	// since we've pushed all other state on the stack, this is enough to
-	// restore it all later.
-	st.w	sp, THREAD_KSP[r6]
-	// Now restore the stack pointer from the new process
-	ld.w	THREAD_KSP[r7], sp
-	// ... and restore all state from that
-	POP_STATE(SWITCH)
-	// Update the current task pointer
-	GET_CURRENT_TASK(CURRENT_TASK)
-	// Now return into the new thread
-	jmp	[lp]
-C_END(switch_thread)
-
-
-	.data
-
-	.align 4
-C_DATA(trap_table):
-	.long bad_trap_wrapper		// trap 0, doesn't use trap table.
-	.long syscall_long		// trap 1, `long' syscall.
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-	.long bad_trap_wrapper
-C_END(trap_table)
-
-
-	.section .rodata
-
-	.align 4
-C_DATA(sys_call_table):
-	.long CSYM(sys_restart_syscall)	// 0
-	.long CSYM(sys_exit)
-	.long sys_fork_wrapper
-	.long CSYM(sys_read)
-	.long CSYM(sys_write)
-	.long CSYM(sys_open)		// 5
-	.long CSYM(sys_close)
-	.long CSYM(sys_waitpid)
-	.long CSYM(sys_creat)
-	.long CSYM(sys_link)
-	.long CSYM(sys_unlink)		// 10
-	.long sys_execve_wrapper
-	.long CSYM(sys_chdir)
-	.long CSYM(sys_time)
-	.long CSYM(sys_mknod)
-	.long CSYM(sys_chmod)		// 15
-	.long CSYM(sys_chown)
-	.long CSYM(sys_ni_syscall)	// was: break
-	.long CSYM(sys_ni_syscall)	// was: oldstat (aka stat)
-	.long CSYM(sys_lseek)
-	.long CSYM(sys_getpid)		// 20
-	.long CSYM(sys_mount)
-	.long CSYM(sys_oldumount)
-	.long CSYM(sys_setuid)
-	.long CSYM(sys_getuid)
-	.long CSYM(sys_stime)		// 25
-	.long CSYM(sys_ptrace)
-	.long CSYM(sys_alarm)
-	.long CSYM(sys_ni_syscall)	// was: oldfstat (aka fstat)
-	.long CSYM(sys_pause)
-	.long CSYM(sys_utime)		// 30
-	.long CSYM(sys_ni_syscall)	// was: stty
-	.long CSYM(sys_ni_syscall)	// was: gtty
-	.long CSYM(sys_access)
-	.long CSYM(sys_nice)
-	.long CSYM(sys_ni_syscall)	// 35, was: ftime
-	.long CSYM(sys_sync)
-	.long CSYM(sys_kill)
-	.long CSYM(sys_rename)
-	.long CSYM(sys_mkdir)
-	.long CSYM(sys_rmdir)		// 40
-	.long CSYM(sys_dup)
-	.long CSYM(sys_pipe)
-	.long CSYM(sys_times)
-	.long CSYM(sys_ni_syscall)	// was: prof
-	.long CSYM(sys_brk)		// 45
-	.long CSYM(sys_setgid)
-	.long CSYM(sys_getgid)
-	.long CSYM(sys_signal)
-	.long CSYM(sys_geteuid)
-	.long CSYM(sys_getegid)		// 50
-	.long CSYM(sys_acct)
-	.long CSYM(sys_umount)		// recycled never used phys()
-	.long CSYM(sys_ni_syscall)	// was: lock
-	.long CSYM(sys_ioctl)
-	.long CSYM(sys_fcntl)		// 55
-	.long CSYM(sys_ni_syscall)	// was: mpx
-	.long CSYM(sys_setpgid)
-	.long CSYM(sys_ni_syscall)	// was: ulimit
-	.long CSYM(sys_ni_syscall)
-	.long CSYM(sys_umask)		// 60
-	.long CSYM(sys_chroot)
-	.long CSYM(sys_ustat)
-	.long CSYM(sys_dup2)
-	.long CSYM(sys_getppid)
-	.long CSYM(sys_getpgrp)		// 65
-	.long CSYM(sys_setsid)
-	.long CSYM(sys_sigaction)
-	.long CSYM(sys_sgetmask)
-	.long CSYM(sys_ssetmask)
-	.long CSYM(sys_setreuid)	// 70
-	.long CSYM(sys_setregid)
-	.long sys_sigsuspend_wrapper
-	.long CSYM(sys_sigpending)
-	.long CSYM(sys_sethostname)
-	.long CSYM(sys_setrlimit)	// 75
-	.long CSYM(sys_getrlimit)
-	.long CSYM(sys_getrusage)
-	.long CSYM(sys_gettimeofday)
-	.long CSYM(sys_settimeofday)
-	.long CSYM(sys_getgroups)	// 80
-	.long CSYM(sys_setgroups)
-	.long CSYM(sys_select)
-	.long CSYM(sys_symlink)
-	.long CSYM(sys_ni_syscall)	// was: oldlstat (aka lstat)
-	.long CSYM(sys_readlink)	// 85
-	.long CSYM(sys_uselib)
-	.long CSYM(sys_swapon)
-	.long CSYM(sys_reboot)
-	.long CSYM(old_readdir)
-	.long CSYM(sys_mmap)		// 90
-	.long CSYM(sys_munmap)
-	.long CSYM(sys_truncate)
-	.long CSYM(sys_ftruncate)
-	.long CSYM(sys_fchmod)
-	.long CSYM(sys_fchown)		// 95
-	.long CSYM(sys_getpriority)
-	.long CSYM(sys_setpriority)
-	.long CSYM(sys_ni_syscall)	// was: profil
-	.long CSYM(sys_statfs)
-	.long CSYM(sys_fstatfs)		// 100
-	.long CSYM(sys_ni_syscall)	// i386: ioperm
-	.long CSYM(sys_socketcall)
-	.long CSYM(sys_syslog)
-	.long CSYM(sys_setitimer)
-	.long CSYM(sys_getitimer)	// 105
-	.long CSYM(sys_newstat)
-	.long CSYM(sys_newlstat)
-	.long CSYM(sys_newfstat)
-	.long CSYM(sys_ni_syscall)	// was: olduname (aka uname)
-	.long CSYM(sys_ni_syscall)	// 110, i386: iopl
-	.long CSYM(sys_vhangup)
-	.long CSYM(sys_ni_syscall)	// was: idle
-	.long CSYM(sys_ni_syscall)	// i386: vm86old
-	.long CSYM(sys_wait4)
-	.long CSYM(sys_swapoff)		// 115
-	.long CSYM(sys_sysinfo)
-	.long CSYM(sys_ipc)
-	.long CSYM(sys_fsync)
-	.long sys_sigreturn_wrapper
-	.long sys_clone_wrapper		// 120
-	.long CSYM(sys_setdomainname)
-	.long CSYM(sys_newuname)
-	.long CSYM(sys_ni_syscall)	// i386: modify_ldt, m68k: cacheflush
-	.long CSYM(sys_adjtimex)
-	.long CSYM(sys_ni_syscall)	// 125 - sys_mprotect
-	.long CSYM(sys_sigprocmask)
-	.long CSYM(sys_ni_syscall)	// sys_create_module
-	.long CSYM(sys_init_module)
-	.long CSYM(sys_delete_module)
-	.long CSYM(sys_ni_syscall)	// 130 - sys_get_kernel_syms
-	.long CSYM(sys_quotactl)
-	.long CSYM(sys_getpgid)
-	.long CSYM(sys_fchdir)
-	.long CSYM(sys_bdflush)
-	.long CSYM(sys_sysfs)		// 135
-	.long CSYM(sys_personality)
-	.long CSYM(sys_ni_syscall)	// for afs_syscall
-	.long CSYM(sys_setfsuid)
-	.long CSYM(sys_setfsgid)
-	.long CSYM(sys_llseek)		// 140
-	.long CSYM(sys_getdents)
-	.long CSYM(sys_select)		// for backward compat; remove someday
-	.long CSYM(sys_flock)
-	.long CSYM(sys_ni_syscall)	// sys_msync
-	.long CSYM(sys_readv)		// 145
-	.long CSYM(sys_writev)
-	.long CSYM(sys_getsid)
-	.long CSYM(sys_fdatasync)
-	.long CSYM(sys_sysctl)
-	.long CSYM(sys_ni_syscall)	// 150 - sys_mlock
-	.long CSYM(sys_ni_syscall)	// sys_munlock
-	.long CSYM(sys_ni_syscall)	// sys_mlockall
-	.long CSYM(sys_ni_syscall)	// sys_munlockall
-	.long CSYM(sys_sched_setparam)
-	.long CSYM(sys_sched_getparam)	// 155
-	.long CSYM(sys_sched_setscheduler)
-	.long CSYM(sys_sched_getscheduler)
-	.long CSYM(sys_sched_yield)
-	.long CSYM(sys_sched_get_priority_max)
-	.long CSYM(sys_sched_get_priority_min)	// 160
-	.long CSYM(sys_sched_rr_get_interval)
-	.long CSYM(sys_nanosleep)
-	.long CSYM(sys_ni_syscall)	// sys_mremap
-	.long CSYM(sys_setresuid)
-	.long CSYM(sys_getresuid)	// 165
-	.long CSYM(sys_ni_syscall)	// for vm86
-	.long CSYM(sys_ni_syscall)	// sys_query_module
-	.long CSYM(sys_poll)
-	.long CSYM(sys_nfsservctl)
-	.long CSYM(sys_setresgid)	// 170
-	.long CSYM(sys_getresgid)
-	.long CSYM(sys_prctl)
-	.long sys_rt_sigreturn_wrapper
-	.long CSYM(sys_rt_sigaction)
-	.long CSYM(sys_rt_sigprocmask)	// 175
-	.long CSYM(sys_rt_sigpending)
-	.long CSYM(sys_rt_sigtimedwait)
-	.long CSYM(sys_rt_sigqueueinfo)
-	.long sys_rt_sigsuspend_wrapper
-	.long CSYM(sys_pread64)		// 180
-	.long CSYM(sys_pwrite64)
-	.long CSYM(sys_lchown)
-	.long CSYM(sys_getcwd)
-	.long CSYM(sys_capget)
-	.long CSYM(sys_capset)		// 185
-	.long CSYM(sys_sigaltstack)
-	.long CSYM(sys_sendfile)
-	.long CSYM(sys_ni_syscall)	// streams1
-	.long CSYM(sys_ni_syscall)	// streams2
-	.long sys_vfork_wrapper		// 190
-	.long CSYM(sys_ni_syscall)
-	.long CSYM(sys_mmap2)
-	.long CSYM(sys_truncate64)
-	.long CSYM(sys_ftruncate64)
-	.long CSYM(sys_stat64)		// 195
-	.long CSYM(sys_lstat64)
-	.long CSYM(sys_fstat64)
-	.long CSYM(sys_fcntl64)
-	.long CSYM(sys_getdents64)
-	.long CSYM(sys_pivot_root)	// 200
-	.long CSYM(sys_gettid)
-	.long CSYM(sys_tkill)
-sys_call_table_end:
-C_END(sys_call_table)
diff --git a/arch/v850/kernel/fpga85e2c.c b/arch/v850/kernel/fpga85e2c.c
deleted file mode 100644
index ab9cf16a85c8..000000000000
--- a/arch/v850/kernel/fpga85e2c.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * arch/v850/kernel/fpga85e2c.h -- Machine-dependent defs for
- *	FPGA implementation of V850E2/NA85E2C
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/bitops.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-
-#include "mach.h"
-
-extern void memcons_setup (void);
-
-
-#define REG_DUMP_ADDR		0x220000
-
-
-extern struct irqaction reg_snap_action; /* fwd decl */
-
-
-void __init mach_early_init (void)
-{
-	int i;
-	const u32 *src;
-	register u32 *dst asm ("ep");
-	extern u32 _intv_end, _intv_load_start;
-
-	/* Set bus sizes: CS0 32-bit, CS1 16-bit, CS7 8-bit,
-	   everything else 32-bit.  */
-	V850E2_BSC = 0x2AA6;
-	for (i = 2; i <= 6; i++)
-		CSDEV(i) = 0;	/* 32 bit */
-
-	/* Ensure that the simulator halts on a panic, instead of going
-	   into an infinite loop inside the panic function.  */
-	panic_timeout = -1;
-
-	/* Move the interrupt vectors into their real location.  Note that
-	   any relocations there are relative to the real location, so we
-	   don't have to fix anything up.  We use a loop instead of calling
-	   memcpy to keep this a leaf function (to avoid a function
-	   prologue being generated).  */
-	dst = 0x10;		/* &_intv_start + 0x10.  */
-	src = &_intv_load_start;
-	do {
-		u32 t0 = src[0], t1 = src[1], t2 = src[2], t3 = src[3];
-		u32 t4 = src[4], t5 = src[5], t6 = src[6], t7 = src[7];
-		dst[0] = t0; dst[1] = t1; dst[2] = t2; dst[3] = t3;
-		dst[4] = t4; dst[5] = t5; dst[6] = t6; dst[7] = t7;
-		dst += 8;
-		src += 8;
-	} while (dst < &_intv_end);
-}
-
-void __init mach_setup (char **cmdline)
-{
-	memcons_setup ();
-
-	/* Setup up NMI0 to copy the registers to a known memory location.
-	   The FGPA board has a button that produces NMI0 when pressed, so
-	   this allows us to push the button, and then look at memory to see
-	   what's in the registers (there's no other way to easily do so).
-	   We have to use `setup_irq' instead of `request_irq' because it's
-	   still too early to do memory allocation.  */
-	setup_irq (IRQ_NMI (0), &reg_snap_action);
-}
-
-void mach_get_physical_ram (unsigned long *ram_start, unsigned long *ram_len)
-{
-	*ram_start = ERAM_ADDR;
-	*ram_len = ERAM_SIZE;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Setup up the timer interrupt.  The FPGA peripheral control
-	   registers _only_ work with single-bit writes (set1/clr1)!  */
-	__clear_bit (RPU_GTMC_CE_BIT, &RPU_GTMC);
-	__clear_bit (RPU_GTMC_CLK_BIT, &RPU_GTMC);
-	__set_bit (RPU_GTMC_CE_BIT, &RPU_GTMC);
-
-	/* We use the first RPU interrupt, which occurs every 8.192ms.  */
-	setup_irq (IRQ_RPU (0), timer_action);
-}
-
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-void machine_halt (void) __attribute__ ((noreturn));
-void machine_halt (void)
-{
-	for (;;) {
-		DWC(0) = 0x7777;
-		DWC(1) = 0x7777;
-		ASC = 0xffff;
-		FLGREG(0) = 1;	/* Halt immediately.  */
-		asm ("di; halt; nop; nop; nop; nop; nop");
-	}
-}
-
-void machine_restart (char *__unused)
-{
-	machine_halt ();
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
-
-/* Interrupts */
-
-struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, 		NUM_MACH_IRQS,	1, 7 },
-	{ "RPU", IRQ_RPU(0),	IRQ_RPU_NUM,	1, 6 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-/* Initialize interrupts.  */
-void __init mach_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-
-/* An interrupt handler that copies the registers to a known memory location,
-   for debugging purposes.  */
-
-static void make_reg_snap (int irq, void *dummy, struct pt_regs *regs)
-{
-	(*(unsigned *)REG_DUMP_ADDR)++;
-	(*(struct pt_regs *)(REG_DUMP_ADDR + sizeof (unsigned))) = *regs;
-}
-
-static int reg_snap_dev_id;
-static struct irqaction reg_snap_action = {
-	.handler = make_reg_snap,
-	.mask = CPU_MASK_NONE,
-	.name = "reg_snap",
-	.dev_id = &reg_snap_dev_id,
-};
diff --git a/arch/v850/kernel/fpga85e2c.ld b/arch/v850/kernel/fpga85e2c.ld
deleted file mode 100644
index b5d4578ae411..000000000000
--- a/arch/v850/kernel/fpga85e2c.ld
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Linker script for the FPGA implementation of the V850E2 NA85E2C cpu core
-   (CONFIG_V850E2_FPGA85E2C).  */
-
-MEMORY {
-	/* Reset vector.  */
-	RESET	 : ORIGIN = 0, LENGTH = 0x10
-	/* Interrupt vectors.  */
-	INTV      : ORIGIN = 0x10, LENGTH = 0x470
-	/* The `window' in RAM were we're allowed to load stuff.  */
-	RAM_LOW   : ORIGIN = 0x480, LENGTH = 0x0005FB80
-	/* Some more ram above the window were we can put bss &c.  */
-	RAM_HIGH  : ORIGIN = 0x00060000, LENGTH = 0x000A0000
-	/* This is the area visible from the outside world (we can use
-	   this only for uninitialized data).  */
-	VISIBLE   : ORIGIN = 0x00200000, LENGTH = 0x00060000
-}
-
-SECTIONS {
-	.reset : {
-		__kram_start = . ;
-		__intv_start = . ;
-	        	*(.intv.reset)	/* Reset vector */
-	} > RESET
-
-	.ram_low : {
-		__r0_ram = . ;		/* Must be near address 0.  */
-		. = . + 32 ;
-
-		TEXT_CONTENTS
-		DATA_CONTENTS
-		ROOT_FS_CONTENTS
-		RAMK_INIT_CONTENTS_NO_END
-		INITRAMFS_CONTENTS
-	} > RAM_LOW
-
-        /* Where the interrupt vectors are initially loaded.  */
-	__intv_load_start = . ;
-
-	.intv : {
-			*(.intv.common)	/* Vectors common to all v850e proc. */
-			*(.intv.mach)	/* Machine-specific int. vectors.  */
-		__intv_end = . ;
-	} > INTV  AT> RAM_LOW
-
-	.ram_high : {
-		/* This is here so that when we free init memory the
-		   load-time copy of the interrupt vectors and any empty
-		   space at the end of the `RAM_LOW' area is freed too.  */
-		. = ALIGN (4096);
-		__init_end = . ;
-
-		BSS_CONTENTS
-		__kram_end = . ;
-		BOOTMAP_CONTENTS
-	} > RAM_HIGH
-
-	.visible : {
-		_memcons_output = . ;
-		. = . + 0x8000 ;
-		_memcons_output_end = . ;
-	} > VISIBLE
-}
diff --git a/arch/v850/kernel/gbus_int.c b/arch/v850/kernel/gbus_int.c
deleted file mode 100644
index b2bcc251f65b..000000000000
--- a/arch/v850/kernel/gbus_int.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * arch/v850/kernel/gbus_int.c -- Midas labs GBUS interrupt support
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-
-#include <asm/machdep.h>
-
-
-/* The number of shared GINT interrupts. */
-#define NUM_GINTS   	4
-
-/* For each GINT interrupt, how many GBUS interrupts are using it.  */
-static unsigned gint_num_active_irqs[NUM_GINTS] = { 0 };
-
-/* A table of GINTn interrupts we actually use.
-   Note that we don't use GINT0 because all the boards we support treat it
-   specially.  */
-struct used_gint {
-	unsigned gint;
-	unsigned priority;
-} used_gint[] = {
-	{ 1, GBUS_INT_PRIORITY_HIGH },
-	{ 3, GBUS_INT_PRIORITY_LOW }
-};
-#define NUM_USED_GINTS ARRAY_SIZE(used_gint)
-
-/* A table of which GINT is used by each GBUS interrupts (they are
-   assigned based on priority).  */
-static unsigned char gbus_int_gint[IRQ_GBUS_INT_NUM];
-
-
-/* Interrupt enabling/disabling.  */
-
-/* Enable interrupt handling for interrupt IRQ.  */
-void gbus_int_enable_irq (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-	GBUS_INT_ENABLE (GBUS_INT_IRQ_WORD(irq), gint)
-		|= GBUS_INT_IRQ_MASK (irq);
-}
-
-/* Disable interrupt handling for interrupt IRQ.  Note that any
-   interrupts received while disabled will be delivered once the
-   interrupt is enabled again, unless they are explicitly cleared using
-   `gbus_int_clear_pending_irq'.  */
-void gbus_int_disable_irq (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-	GBUS_INT_ENABLE (GBUS_INT_IRQ_WORD(irq), gint)
-		&= ~GBUS_INT_IRQ_MASK (irq);
-}
-
-/* Return true if interrupt handling for interrupt IRQ is enabled.  */
-int gbus_int_irq_enabled (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-	return (GBUS_INT_ENABLE (GBUS_INT_IRQ_WORD(irq), gint)
-		& GBUS_INT_IRQ_MASK(irq));
-}
-
-/* Disable all GBUS irqs.  */
-void gbus_int_disable_irqs ()
-{
-	unsigned w, n;
-	for (w = 0; w < GBUS_INT_NUM_WORDS; w++)
-		for (n = 0; n < IRQ_GINT_NUM; n++)
-			GBUS_INT_ENABLE (w, n) = 0;
-}
-
-/* Clear any pending interrupts for IRQ.  */
-void gbus_int_clear_pending_irq (unsigned irq)
-{
-	GBUS_INT_CLEAR (GBUS_INT_IRQ_WORD(irq)) = GBUS_INT_IRQ_MASK (irq);
-}
-
-/* Return true if interrupt IRQ is pending (but disabled).  */
-int gbus_int_irq_pending (unsigned irq)
-{
-	return (GBUS_INT_STATUS (GBUS_INT_IRQ_WORD(irq))
-		& GBUS_INT_IRQ_MASK(irq));
-}
-
-
-/* Delegating interrupts.  */
-
-/* Handle a shared GINT interrupt by passing to the appropriate GBUS
-   interrupt handler.  */
-static irqreturn_t gbus_int_handle_irq (int irq, void *dev_id,
-					struct pt_regs *regs)
-{
-	unsigned w;
-	irqreturn_t rval = IRQ_NONE;
-	unsigned gint = irq - IRQ_GINT (0);
-
-	for (w = 0; w < GBUS_INT_NUM_WORDS; w++) {
-		unsigned status = GBUS_INT_STATUS (w);
-		unsigned enable = GBUS_INT_ENABLE (w, gint);
-
-		/* Only pay attention to enabled interrupts.  */
-		status &= enable;
-		if (status) {
-			irq = IRQ_GBUS_INT (w * GBUS_INT_BITS_PER_WORD);
-			do {
-				/* There's an active interrupt in word
-				   W, find out which one, and call its
-				   handler.  */
-
-				while (! (status & 0x1)) {
-					irq++;
-					status >>= 1;
-				}
-				status &= ~0x1;
-
-				/* Recursively call handle_irq to handle it. */
-				handle_irq (irq, regs);
-				rval = IRQ_HANDLED;
-			} while (status);
-		}
-	}
-
-	/* Toggle the `all enable' bit back and forth, which should cause
-	   another edge transition if there are any other interrupts
-	   still pending, and so result in another CPU interrupt.  */
-	GBUS_INT_ENABLE (0, gint) &= ~0x1;
-	GBUS_INT_ENABLE (0, gint) |=  0x1;
-
-	return rval;
-}
-
-
-/* Initialize GBUS interrupt sources.  */
-
-static void irq_nop (unsigned irq) { }
-
-static unsigned gbus_int_startup_irq (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-
-	if (gint_num_active_irqs[gint] == 0) {
-		/* First enable the CPU interrupt.  */
-		int rval =
-			request_irq (IRQ_GINT(gint), gbus_int_handle_irq,
-				     IRQF_DISABLED,
-				     "gbus_int_handler",
-				     &gint_num_active_irqs[gint]);
-		if (rval != 0)
-			return rval;
-	}
-
-	gint_num_active_irqs[gint]++;
-
-	gbus_int_clear_pending_irq (irq);
-	gbus_int_enable_irq (irq);
-
-	return 0;
-}
-
-static void gbus_int_shutdown_irq (unsigned irq)
-{
-	unsigned gint = gbus_int_gint[irq - GBUS_INT_BASE_IRQ];
-
-	gbus_int_disable_irq (irq);
-
-	if (--gint_num_active_irqs[gint] == 0)
-		/* Disable the CPU interrupt.  */
-		free_irq (IRQ_GINT(gint), &gint_num_active_irqs[gint]);
-}
-
-/* Initialize HW_IRQ_TYPES for INTC-controlled irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-void __init gbus_int_init_irq_types (struct gbus_int_irq_init *inits,
-				     struct hw_interrupt_type *hw_irq_types)
-{
-	struct gbus_int_irq_init *init;
-	for (init = inits; init->name; init++) {
-		unsigned i;
-		struct hw_interrupt_type *hwit = hw_irq_types++;
-
-		hwit->typename = init->name;
-
-		hwit->startup  = gbus_int_startup_irq;
-		hwit->shutdown = gbus_int_shutdown_irq;
-		hwit->enable   = gbus_int_enable_irq;
-		hwit->disable  = gbus_int_disable_irq;
-		hwit->ack      = irq_nop;
-		hwit->end      = irq_nop;
-		
-		/* Initialize kernel IRQ infrastructure for this interrupt.  */
-		init_irq_handlers(init->base, init->num, init->interval, hwit);
-
-		/* Set the interrupt priorities.  */
-		for (i = 0; i < init->num; i++) {
-			unsigned j;
-			for (j = 0; j < NUM_USED_GINTS; j++)
-				if (used_gint[j].priority > init->priority)
-					break;
-			/* Wherever we stopped looking is one past the
-			   GINT we want. */
-			gbus_int_gint[init->base + i * init->interval
-				      - GBUS_INT_BASE_IRQ]
-				= used_gint[j > 0 ? j - 1 : 0].gint;
-		}
-	}
-}
-
-
-/* Initialize IRQS.  */
-
-/* Chip interrupts (GINTn) shared among GBUS interrupts.  */
-static struct hw_interrupt_type gint_hw_itypes[NUM_USED_GINTS];
-
-
-/* GBUS interrupts themselves.  */
-
-struct gbus_int_irq_init gbus_irq_inits[] __initdata = {
-	/* First set defaults.  */
-	{ "GBUS_INT", IRQ_GBUS_INT(0), IRQ_GBUS_INT_NUM, 1, 6},
-	{ 0 }
-};
-#define NUM_GBUS_IRQ_INITS (ARRAY_SIZE(gbus_irq_inits) - 1)
-
-static struct hw_interrupt_type gbus_hw_itypes[NUM_GBUS_IRQ_INITS];
-
-
-/* Initialize GBUS interrupts.  */
-void __init gbus_int_init_irqs (void)
-{
-	unsigned i;
-
-	/* First initialize the shared gint interrupts.  */
-	for (i = 0; i < NUM_USED_GINTS; i++) {
-		unsigned gint = used_gint[i].gint;
-		struct v850e_intc_irq_init gint_irq_init[2];
-
-		/* We initialize one GINT interrupt at a time.  */
-		gint_irq_init[0].name = "GINT";
-		gint_irq_init[0].base = IRQ_GINT (gint);
-		gint_irq_init[0].num = 1;
-		gint_irq_init[0].interval = 1;
-		gint_irq_init[0].priority = used_gint[i].priority;
-
-		gint_irq_init[1].name = 0; /* Terminate the vector.  */
-
-		v850e_intc_init_irq_types (gint_irq_init, gint_hw_itypes);
-	}
-
-	/* Then the GBUS interrupts.  */
-	gbus_int_disable_irqs ();
-	gbus_int_init_irq_types (gbus_irq_inits, gbus_hw_itypes);
-	/* Turn on the `all enable' bits, which are ANDed with
-	   individual interrupt enable bits; we only want to bother with
-	   the latter.  They are the first bit in the first word of each
-	   interrupt-enable area.  */
-	for (i = 0; i < NUM_USED_GINTS; i++)
-		GBUS_INT_ENABLE (0, used_gint[i].gint) = 0x1;
-}
diff --git a/arch/v850/kernel/head.S b/arch/v850/kernel/head.S
deleted file mode 100644
index c490b937ef14..000000000000
--- a/arch/v850/kernel/head.S
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * arch/v850/kernel/head.S -- Lowest-level startup code
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <asm/clinkage.h>
-#include <asm/current.h>
-#include <asm/entry.h>
-#include <asm/thread_info.h>
-#include <asm/irq.h>
-
-
-/* Make a slightly more convenient alias for C_SYMBOL_NAME.  */
-#define CSYM	C_SYMBOL_NAME
-
-
-	.text
-
-	// Define `mach_early_init' as a weak symbol
-	.global	CSYM(mach_early_init)
-	.weak	CSYM(mach_early_init)
-
-C_ENTRY(start):
-	// Make sure interrupts are turned off, just in case
-	di
-
-#ifdef CONFIG_RESET_GUARD
-	// See if we got here via an unexpected reset
-	ld.w	RESET_GUARD, r19	// Check current value of reset guard
-	mov	RESET_GUARD_ACTIVE, r20
-	cmp	r19, r20
-	bne	1f			// Guard was not active
-
-	// If we get here, the reset guard was active.  Load up some
-	// interesting values as arguments, and jump to the handler.
-	st.w	r0, RESET_GUARD		// Allow further resets to succeed
-	mov	lp, r6			// Arg 0: return address
-	ld.b	KM, r7			// Arg 1: kernel mode
-	mov	sp, r9			// Arg 3: stack pointer
-	ld.w	KSP, r19		// maybe switch to kernel stack
-	cmp	r7, r0			// see if already in kernel mode
-	cmov	z, r19, sp, sp		//  and switch to kernel stack if not
-	GET_CURRENT_TASK(r8)		// Arg 2: task pointer
-	jr	CSYM(unexpected_reset)
-
-1:	st.w	r20, RESET_GUARD	// Turn on reset guard
-#endif /* CONFIG_RESET_GUARD */
-
-	// Setup a temporary stack for doing pre-initialization function calls.
-	// 
-	// We can't use the initial kernel stack, because (1) it may be
-	// located in memory we're not allowed to touch, and (2) since
-	// it's in the data segment, calling memcpy to initialize that
-	// area from ROM will overwrite memcpy's return address.
-	mov	hilo(CSYM(_init_stack_end) - 4), sp
-
-	// See if there's a platform-specific early-initialization routine
-	// defined; it's a weak symbol, so it will have an address of zero if
-	// there's not.
-	mov	hilo(CSYM(mach_early_init)), r6
-	cmp	r6, r0
-	bz	3f
-
-	// There is one, so call it.  If this function is written in C, it
-	// should be very careful -- the stack pointer is valid, but very
-	// little else is (e.g., bss is not zeroed yet, and initialized data
-	// hasn't been).
-	jarl	2f, lp			// first figure out return address
-2:	add	3f - ., lp
-	jmp	[r6]			// do call
-3:
-
-#ifdef CONFIG_ROM_KERNEL
-	// Copy the data area from ROM to RAM
-	mov	hilo(CSYM(_rom_copy_dst_start)), r6
-	mov	hilo(CSYM(_rom_copy_src_start)), r7
-	mov	hilo(CSYM(_rom_copy_dst_end)), r8
-	sub	r6, r8
-	jarl	CSYM(memcpy), lp
-#endif
-
-	// Load the initial thread's stack, and current task pointer (in r16)
-	mov	hilo(CSYM(init_thread_union)), r19
-	movea	THREAD_SIZE, r19, sp
-	ld.w	TI_TASK[r19], CURRENT_TASK
-
-#ifdef CONFIG_TIME_BOOTUP
-	/* This stuff must come after mach_early_init, because interrupts may
-	   not work until after its been called.  */
-	jarl	CSYM(highres_timer_reset), lp
-	jarl	CSYM(highres_timer_start), lp
-#endif
-
-	// Kernel stack pointer save location
-	st.w	sp, KSP
-
-	// Assert that we're in `kernel mode'
-	mov	1, r19
-	st.w	r19, KM
-
-#ifdef CONFIG_ZERO_BSS
-	// Zero bss area, since we can't rely upon any loader to do so
-	mov	hilo(CSYM(_sbss)), r6
-	mov	r0, r7
-	mov	hilo(CSYM(_ebss)), r8
-	sub	r6, r8
-	jarl	CSYM(memset), lp
-#endif
-
-	// What happens if the main kernel function returns (it shouldn't)
-	mov	hilo(CSYM(machine_halt)), lp
-
-	// Start the linux kernel.  We use an indirect jump to get extra
-	// range, because on some platforms this initial startup code
-	// (and the associated platform-specific code in mach_early_init)
-	// are located far away from the main kernel, e.g. so that they
-	// can initialize RAM first and copy the kernel or something.
-	mov	hilo(CSYM(start_kernel)), r12
-	jmp	[r12]
-C_END(start)
diff --git a/arch/v850/kernel/highres_timer.c b/arch/v850/kernel/highres_timer.c
deleted file mode 100644
index b16ad1eaf966..000000000000
--- a/arch/v850/kernel/highres_timer.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * arch/v850/kernel/highres_timer.c -- High resolution timing routines
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <asm/system.h>
-#include <asm/v850e_timer_d.h>
-#include <asm/highres_timer.h>
-
-#define HIGHRES_TIMER_USEC_SHIFT   12
-
-/* Pre-calculated constant used for converting ticks to real time
-   units.  We initialize it to prevent it being put into BSS.  */
-static u32 highres_timer_usec_prescale = 1;
-
-void highres_timer_slow_tick_irq (void) __attribute__ ((noreturn));
-void highres_timer_slow_tick_irq (void)
-{
-	/* This is an interrupt handler, so it must be very careful to
-	   not to trash any registers.  At this point, the stack-pointer
-	   (r3) has been saved in the chip ram location ENTRY_SP by the
-	   interrupt vector, so we can use it as a scratch register; we
-	   must also restore it before returning.  */
-	asm ("ld.w	%0[r0], sp;"
-	     "add	1, sp;"
-	     "st.w	sp, %0[r0];"
-	     "ld.w	%1[r0], sp;" /* restore pre-irq stack-pointer */
-	     "reti"
-	     ::
-	      "i" (HIGHRES_TIMER_SLOW_TICKS_ADDR),
-	      "i" (ENTRY_SP_ADDR)
-	     : "memory");
-}
-
-void highres_timer_reset (void)
-{
-	V850E_TIMER_D_TMD (HIGHRES_TIMER_TIMER_D_UNIT) = 0;
-	HIGHRES_TIMER_SLOW_TICKS = 0;
-}
-
-void highres_timer_start (void)
-{
-	u32 fast_tick_rate;
-
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (HIGHRES_TIMER_TIMER_D_UNIT,
-				 HIGHRES_TIMER_SLOW_TICK_RATE);
-
-	fast_tick_rate =
-		(V850E_TIMER_D_BASE_FREQ
-		 >> V850E_TIMER_D_DIVLOG2 (HIGHRES_TIMER_TIMER_D_UNIT));
-
-	/* The obvious way of calculating microseconds from fast ticks
-	   is to do:
-
-	     usec = fast_ticks * 10^6 / fast_tick_rate
-
-	   However, divisions are much slower than multiplications, and
-	   the above calculation can overflow, so we do this instead:
-
-	     usec = fast_ticks * (10^6 * 2^12 / fast_tick_rate) / 2^12
-
-           since we can pre-calculate (10^6 * (2^12 / fast_tick_rate))
-	   and use a shift for dividing by 2^12, this avoids division,
-	   and is almost as accurate (it differs by about 2 microseconds
-	   at the extreme value of the fast-tick counter's ranger).  */
-	highres_timer_usec_prescale = ((1000000 << HIGHRES_TIMER_USEC_SHIFT)
-				       / fast_tick_rate);
-
-	/* Enable the interrupt (which is hardwired to this use), and
-	   give it the highest priority.  */
-	V850E_INTC_IC (IRQ_INTCMD (HIGHRES_TIMER_TIMER_D_UNIT)) = 0;
-}
-
-void highres_timer_stop (void)
-{
-	/* Stop the timer.  */
-	V850E_TIMER_D_TMCD (HIGHRES_TIMER_TIMER_D_UNIT) =
-		V850E_TIMER_D_TMCD_CAE;
-	/* Disable its interrupt, just in case.  */
-	v850e_intc_disable_irq (IRQ_INTCMD (HIGHRES_TIMER_TIMER_D_UNIT));
-}
-
-inline void highres_timer_read_ticks (u32 *slow_ticks, u32 *fast_ticks)
-{
-	int flags;
-	u32 fast_ticks_1, fast_ticks_2, _slow_ticks;
-
-	local_irq_save (flags);
-	fast_ticks_1 = V850E_TIMER_D_TMD (HIGHRES_TIMER_TIMER_D_UNIT);
-	_slow_ticks = HIGHRES_TIMER_SLOW_TICKS;
-	fast_ticks_2 = V850E_TIMER_D_TMD (HIGHRES_TIMER_TIMER_D_UNIT);
-	local_irq_restore (flags);
-
-	if (fast_ticks_2 < fast_ticks_1)
-		_slow_ticks++;
-
-	*slow_ticks = _slow_ticks;
-	*fast_ticks = fast_ticks_2;
-}
-
-inline void highres_timer_ticks_to_timeval (u32 slow_ticks, u32 fast_ticks,
-					    struct timeval *tv)
-{
-	unsigned long sec, sec_rem, usec;
-
-	usec = ((fast_ticks * highres_timer_usec_prescale)
-		>> HIGHRES_TIMER_USEC_SHIFT);
-
-	sec = slow_ticks / HIGHRES_TIMER_SLOW_TICK_RATE;
-	sec_rem = slow_ticks % HIGHRES_TIMER_SLOW_TICK_RATE;
-
-	usec += sec_rem * (1000000 / HIGHRES_TIMER_SLOW_TICK_RATE);
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-
-void highres_timer_read (struct timeval *tv)
-{
-	u32 fast_ticks, slow_ticks;
-	highres_timer_read_ticks (&slow_ticks, &fast_ticks);
-	highres_timer_ticks_to_timeval (slow_ticks, fast_ticks, tv);
-}
diff --git a/arch/v850/kernel/init_task.c b/arch/v850/kernel/init_task.c
deleted file mode 100644
index 44b274dff33f..000000000000
--- a/arch/v850/kernel/init_task.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * arch/v850/kernel/init_task.c -- Initial task/thread structures
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct signal_struct init_signals = INIT_SIGNALS (init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM (init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK (init_task);
-
-EXPORT_SYMBOL(init_task);
-
-/*
- * Initial thread structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry.
- */
-union thread_union init_thread_union 
-	__attribute__((__section__(".data.init_task"))) =
-		{ INIT_THREAD_INFO(init_task) };
diff --git a/arch/v850/kernel/intv.S b/arch/v850/kernel/intv.S
deleted file mode 100644
index 671e4c6150dd..000000000000
--- a/arch/v850/kernel/intv.S
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * arch/v850/kernel/intv.S -- Interrupt vectors
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <asm/clinkage.h>
-#include <asm/irq.h>
-#include <asm/machdep.h>
-#include <asm/entry.h>
-
-#ifdef CONFIG_V850E_HIGHRES_TIMER
-#include <asm/highres_timer.h>
-#endif
-
-/* Jump to an interrupt/trap handler.  These handlers (defined in entry.S)
-   expect the stack-pointer to be saved in ENTRY_SP, so we use sp to do an
-   indirect jump (which avoids problems when the handler is more than a signed
-   22-bit offset away).  */
-#define JUMP_TO_HANDLER(name, sp_save_loc)				      \
-	st.w	sp, sp_save_loc;					      \
-	mov	hilo(name), sp;						      \
-	jmp	[sp]
-
-
-	/* Reset vector.  */
-	.section	.intv.reset, "ax"
-	.org	0x0
-	mov	hilo(C_SYMBOL_NAME(start)), r1;
-	jmp	[r1]
-
-
-	/* Generic interrupt vectors.  */
-	.section	.intv.common, "ax"
-	.balign	0x10
-	JUMP_TO_HANDLER (nmi, NMI_ENTRY_SP)	// 0x10 - NMI0
-	.balign	0x10
-	JUMP_TO_HANDLER (nmi, NMI_ENTRY_SP)	// 0x20 - NMI1
-	.balign	0x10
-	JUMP_TO_HANDLER (nmi, NMI_ENTRY_SP)	// 0x30 - NMI2
-	
-	.balign	0x10
-	JUMP_TO_HANDLER (trap, ENTRY_SP)	// 0x40 - TRAP0n
-	.balign	0x10
-	JUMP_TO_HANDLER (trap, ENTRY_SP)	// 0x50 - TRAP1n
-
-	.balign	0x10
-	JUMP_TO_HANDLER (dbtrap, ENTRY_SP)	// 0x60 - Illegal op / DBTRAP insn
-
-
-	/* Hardware interrupt vectors.  */
-	.section	.intv.mach, "ax"
-	.org	0x0
-
-#if defined (CONFIG_V850E_HIGHRES_TIMER) && defined (IRQ_INTCMD)
-
-	/* Interrupts before the highres timer interrupt.  */
-	.rept	IRQ_INTCMD (HIGHRES_TIMER_TIMER_D_UNIT)
-	.balign	0x10
-	JUMP_TO_HANDLER (irq, ENTRY_SP)
-	.endr
-
-	/* The highres timer interrupt.  */
-	.balign	0x10
-	JUMP_TO_HANDLER (C_SYMBOL_NAME (highres_timer_slow_tick_irq), ENTRY_SP)
-
-	/* Interrupts after the highres timer interrupt.  */
-	.rept	NUM_CPU_IRQS - IRQ_INTCMD (HIGHRES_TIMER_TIMER_D_UNIT) - 1
-	.balign	0x10
-	JUMP_TO_HANDLER (irq, ENTRY_SP)
-	.endr
-
-#else /* No highres timer */
-
-	.rept	NUM_CPU_IRQS
-	.balign	0x10
-	JUMP_TO_HANDLER (irq, ENTRY_SP)
-	.endr
-
-#endif /* Highres timer */
diff --git a/arch/v850/kernel/irq.c b/arch/v850/kernel/irq.c
deleted file mode 100644
index 858c45819aab..000000000000
--- a/arch/v850/kernel/irq.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * arch/v850/kernel/irq.c -- High-level interrupt handling
- *
- *  Copyright (C) 2001,02,03,04,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,04,05  Miles Bader <miles@gnu.org>
- *  Copyright (C) 1994-2000  Ralf Baechle
- *  Copyright (C) 1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file was was derived from the mips version, arch/mips/kernel/irq.c
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/irq.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/seq_file.h>
-
-#include <asm/system.h>
-
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves, it doesn't deserve
- * a generic callback i think.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk("received IRQ %d with unknown interrupt type\n", irq);
-}
-
-volatile unsigned long irq_err_count, spurious_count;
-
-/*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int irq = *(loff_t *) v;
-
-	if (irq == 0) {
-		int cpu;
-		seq_puts(p, "           ");
-		for (cpu=0; cpu < 1 /*smp_num_cpus*/; cpu++)
-			seq_printf(p, "CPU%d       ", cpu);
-		seq_putc(p, '\n');
-	}
-
-	if (irq < NR_IRQS) {
-		unsigned long flags;
-		struct irqaction *action;
-
-		spin_lock_irqsave(&irq_desc[irq].lock, flags);
-
-		action = irq_desc[irq].action;
-		if (action) {
-			int j;
-			int count = 0;
-			int num = -1;
-			const char *type_name = irq_desc[irq].chip->typename;
-
-			for (j = 0; j < NR_IRQS; j++)
-				if (irq_desc[j].chip->typename == type_name){
-					if (irq == j)
-						num = count;
-					count++;
-				}
-
-			seq_printf(p, "%3d: ",irq);
-			seq_printf(p, "%10u ", kstat_irqs(irq));
-			if (count > 1) {
-				int prec = (num >= 100 ? 3 : num >= 10 ? 2 : 1);
-				seq_printf(p, " %*s%d", 14 - prec,
-					   type_name, num);
-			} else
-				seq_printf(p, " %14s", type_name);
-		
-			seq_printf(p, "  %s", action->name);
-			for (action=action->next; action; action = action->next)
-				seq_printf(p, ", %s", action->name);
-			seq_putc(p, '\n');
-		}
-
-		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
-	} else if (irq == NR_IRQS)
-		seq_printf(p, "ERR: %10lu\n", irq_err_count);
-
-	return 0;
-}
-
-/* Handle interrupt IRQ.  REGS are the registers at the time of ther
-   interrupt.  */
-unsigned int handle_irq (int irq, struct pt_regs *regs)
-{
-	irq_enter();
-	__do_IRQ(irq, regs);
-	irq_exit();
-	return 1;
-}
-
-/* Initialize irq handling for IRQs.
-   BASE_IRQ, BASE_IRQ+INTERVAL, ..., BASE_IRQ+NUM*INTERVAL
-   to IRQ_TYPE.  An IRQ_TYPE of 0 means to use a generic interrupt type.  */
-void __init
-init_irq_handlers (int base_irq, int num, int interval,
-		   struct hw_interrupt_type *irq_type)
-{
-	while (num-- > 0) {
-		irq_desc[base_irq].status  = IRQ_DISABLED;
-		irq_desc[base_irq].action  = NULL;
-		irq_desc[base_irq].depth   = 1;
-		irq_desc[base_irq].chip = irq_type;
-		base_irq += interval;
-	}
-}
diff --git a/arch/v850/kernel/ma.c b/arch/v850/kernel/ma.c
deleted file mode 100644
index 143774de75e1..000000000000
--- a/arch/v850/kernel/ma.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * arch/v850/kernel/ma.c -- V850E/MA series of cpu chips
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-#include <asm/v850e_timer_d.h>
-
-#include "mach.h"
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, 		NUM_MACH_IRQS,	1, 7 },
-	{ "CMD", IRQ_INTCMD(0), IRQ_INTCMD_NUM,	1, 5 },
-	{ "DMA", IRQ_INTDMA(0), IRQ_INTDMA_NUM,	1, 2 },
-	{ "CSI", IRQ_INTCSI(0), IRQ_INTCSI_NUM,	4, 4 },
-	{ "SER", IRQ_INTSER(0), IRQ_INTSER_NUM,	4, 3 },
-	{ "SR",	 IRQ_INTSR(0),	IRQ_INTSR_NUM, 	4, 4 },
-	{ "ST",  IRQ_INTST(0), 	IRQ_INTST_NUM, 	4, 5 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-/* Initialize MA chip interrupts.  */
-void __init ma_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-/* Called before configuring an on-chip UART.  */
-void ma_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	/* We only know about the first two UART channels (though
-	   specific chips may have more).  */
-	if (chan < 2) {
-		unsigned bits = 0x3 << (chan * 3);
-		/* Specify that the relevant pins on the chip should do
-		   serial I/O, not direct I/O.  */
-		MA_PORT4_PMC |= bits;
-		/* Specify that we're using the UART, not the CSI device.  */
-		MA_PORT4_PFC |= bits;
-	}
-}
diff --git a/arch/v850/kernel/mach.c b/arch/v850/kernel/mach.c
deleted file mode 100644
index b9db278d2b71..000000000000
--- a/arch/v850/kernel/mach.c
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * arch/v850/kernel/mach.c -- Defaults for some things defined by "mach.h"
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include "mach.h"
-
-/* Called with each timer tick, if non-zero.  */
-void (*mach_tick)(void) = 0;
diff --git a/arch/v850/kernel/mach.h b/arch/v850/kernel/mach.h
deleted file mode 100644
index 9e0e4816ec56..000000000000
--- a/arch/v850/kernel/mach.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * arch/v850/kernel/mach.h -- Machine-dependent functions used by v850 port
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MACH_H__
-#define __V850_MACH_H__
-
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/fs.h>
-#include <linux/seq_file.h>
-
-#include <asm/ptrace.h>
-#include <asm/entry.h>
-#include <asm/clinkage.h>
-
-void mach_setup (char **cmdline);
-void mach_gettimeofday (struct timespec *tv);
-void mach_sched_init (struct irqaction *timer_action);
-void mach_get_physical_ram (unsigned long *ram_start, unsigned long *ram_len);
-void mach_init_irqs (void);
-
-/* If defined, is called very early in the kernel initialization.  The
-   stack pointer is valid, but very little has been initialized (e.g.,
-   bss is not zeroed yet) when this is called, so care must taken.  */
-void mach_early_init (void);
-
-/* If defined, called after the bootmem allocator has been initialized,
-   to allow the platform-dependent code to reserve any areas of RAM that
-   the kernel shouldn't touch.  */
-void mach_reserve_bootmem (void) __attribute__ ((__weak__));
-
-/* Called with each timer tick, if non-zero.  */
-extern void (*mach_tick) (void);
-
-/* The following establishes aliases for various mach_ functions to the
-   name by which the rest of the kernel calls them.  These statements
-   should only have an effect in the file that defines the actual functions. */
-#define MACH_ALIAS(to, from)						      \
-   asm (".global " macrology_stringify (C_SYMBOL_NAME (to)) ";"		      \
-	macrology_stringify (C_SYMBOL_NAME (to))			      \
-	" = " macrology_stringify (C_SYMBOL_NAME (from)))
-/* e.g.: MACH_ALIAS (kernel_name,	arch_spec_name);  */
-
-#endif /* __V850_MACH_H__ */
diff --git a/arch/v850/kernel/me2.c b/arch/v850/kernel/me2.c
deleted file mode 100644
index 007115dc9ce0..000000000000
--- a/arch/v850/kernel/me2.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * arch/v850/kernel/me2.c -- V850E/ME2 chip-specific support
- *
- *  Copyright (C) 2003  NEC Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-#include <asm/v850e_timer_d.h>
-
-#include "mach.h"
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ",    0,                NUM_CPU_IRQS,      1, 7 },
-	{ "INTP",   IRQ_INTP(0),      IRQ_INTP_NUM,      1, 5 },
-	{ "CMD",    IRQ_INTCMD(0),    IRQ_INTCMD_NUM,    1, 3 },
-	{ "UBTIRE", IRQ_INTUBTIRE(0), IRQ_INTUBTIRE_NUM, 5, 4 },
-	{ "UBTIR",  IRQ_INTUBTIR(0),  IRQ_INTUBTIR_NUM,  5, 4 },
-	{ "UBTIT",  IRQ_INTUBTIT(0),  IRQ_INTUBTIT_NUM,  5, 4 },
-	{ "UBTIF",  IRQ_INTUBTIF(0),  IRQ_INTUBTIF_NUM,  5, 4 },
-	{ "UBTITO", IRQ_INTUBTITO(0), IRQ_INTUBTITO_NUM, 5, 4 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-/* Initialize V850E/ME2 chip interrupts.  */
-void __init me2_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-/* Called before configuring an on-chip UART.  */
-void me2_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	if (chan == 0) {
-		/* Specify that the relevant pins on the chip should do
-		   serial I/O, not direct I/O.  */
-		ME2_PORT1_PMC |= 0xC;
-		/* Specify that we're using the UART, not the CSI device. */
-		ME2_PORT1_PFC |= 0xC;
-	} else if (chan == 1) {
-		/* Specify that the relevant pins on the chip should do
-		   serial I/O, not direct I/O.  */
-		ME2_PORT2_PMC |= 0x6;
-		/* Specify that we're using the UART, not the CSI device. */
-		ME2_PORT2_PFC |= 0x6;
-	}
-}
diff --git a/arch/v850/kernel/memcons.c b/arch/v850/kernel/memcons.c
deleted file mode 100644
index 92f514fdcc79..000000000000
--- a/arch/v850/kernel/memcons.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * arch/v850/kernel/memcons.c -- Console I/O to a memory buffer
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/console.h>
-#include <linux/tty.h>
-#include <linux/tty_driver.h>
-#include <linux/init.h>
-
-/* If this device is enabled, the linker map should define start and
-   end points for its buffer. */
-extern char memcons_output[], memcons_output_end;
-
-/* Current offset into the buffer.  */
-static unsigned long memcons_offs = 0;
-
-/* Spinlock protecting memcons_offs.  */
-static DEFINE_SPINLOCK(memcons_lock);
-
-
-static size_t write (const char *buf, size_t len)
-{
-	unsigned long flags;
-	char *point;
-
-	spin_lock_irqsave (memcons_lock, flags);
-
-	point = memcons_output + memcons_offs;
-	if (point + len >= &memcons_output_end) {
-		len = &memcons_output_end - point;
-		memcons_offs = 0;
-	} else
-		memcons_offs += len;
-
-	spin_unlock_irqrestore (memcons_lock, flags);
-
-	memcpy (point, buf, len);
-
-	return len;
-}
-
-
-/*  Low-level console. */
-
-static void memcons_write (struct console *co, const char *buf, unsigned len)
-{
-	while (len > 0)
-		len -= write (buf, len);
-}
-
-static struct tty_driver *tty_driver;
-
-static struct tty_driver *memcons_device (struct console *co, int *index)
-{
-	*index = co->index;
-	return tty_driver;
-}
-
-static struct console memcons =
-{
-    .name	= "memcons",
-    .write	= memcons_write,
-    .device	= memcons_device,
-    .flags	= CON_PRINTBUFFER,
-    .index	= -1,
-};
-
-void memcons_setup (void)
-{
-	register_console (&memcons);
-	printk (KERN_INFO "Console: static memory buffer (memcons)\n");
-}
-
-/* Higher level TTY interface.  */
-
-int memcons_tty_open (struct tty_struct *tty, struct file *filp)
-{
-	return 0;
-}
-
-int memcons_tty_write (struct tty_struct *tty, const unsigned char *buf, int len)
-{
-	return write (buf, len);
-}
-
-int memcons_tty_write_room (struct tty_struct *tty)
-{
-	return &memcons_output_end - (memcons_output + memcons_offs);
-}
-
-int memcons_tty_chars_in_buffer (struct tty_struct *tty)
-{
-	/* We have no buffer.  */
-	return 0;
-}
-
-static const struct tty_operations ops = {
-	.open = memcons_tty_open,
-	.write = memcons_tty_write,
-	.write_room = memcons_tty_write_room,
-	.chars_in_buffer = memcons_tty_chars_in_buffer,
-};
-
-int __init memcons_tty_init (void)
-{
-	int err;
-	struct tty_driver *driver = alloc_tty_driver(1);
-	if (!driver)
-		return -ENOMEM;
-
-	driver->name = "memcons";
-	driver->major = TTY_MAJOR;
-	driver->minor_start = 64;
-	driver->type = TTY_DRIVER_TYPE_SYSCONS;
-	driver->init_termios = tty_std_termios;
-	tty_set_operations(driver, &ops);
-	err = tty_register_driver(driver);
-	if (err) {
-		put_tty_driver(driver);
-		return err;
-	}
-	tty_driver = driver;
-	return 0;
-}
-__initcall (memcons_tty_init);
diff --git a/arch/v850/kernel/module.c b/arch/v850/kernel/module.c
deleted file mode 100644
index 64aeb3e37c52..000000000000
--- a/arch/v850/kernel/module.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * arch/v850/kernel/module.c -- Architecture-specific module functions
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *  Copyright (C) 2001,03  Rusty Russell
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- *
- * Derived in part from arch/ppc/kernel/module.c
- */
-
-#include <linux/kernel.h>
-#include <linux/vmalloc.h>
-#include <linux/moduleloader.h>
-#include <linux/elf.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt , ...)
-#endif
-
-void *module_alloc (unsigned long size)
-{
-	return size == 0 ? 0 : vmalloc (size);
-}
-
-void module_free (struct module *mod, void *module_region)
-{
-	vfree (module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
-}
-
-int module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
-		     struct module *mod)
-{
-	return 0;
-}
-
-/* Count how many different relocations (different symbol, different
-   addend) */
-static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
-{
-	unsigned int i, j, ret = 0;
-
-	/* Sure, this is order(n^2), but it's usually short, and not
-           time critical */
-	for (i = 0; i < num; i++) {
-		for (j = 0; j < i; j++) {
-			/* If this addend appeared before, it's
-                           already been counted */
-			if (ELF32_R_SYM(rela[i].r_info)
-			    == ELF32_R_SYM(rela[j].r_info)
-			    && rela[i].r_addend == rela[j].r_addend)
-				break;
-		}
-		if (j == i) ret++;
-	}
-	return ret;
-}
-
-/* Get the potential trampolines size required of the init and
-   non-init sections */
-static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
-				  const Elf32_Shdr *sechdrs,
-				  const char *secstrings,
-				  int is_init)
-{
-	unsigned long ret = 0;
-	unsigned i;
-
-	/* Everything marked ALLOC (this includes the exported
-           symbols) */
-	for (i = 1; i < hdr->e_shnum; i++) {
-		/* If it's called *.init*, and we're not init, we're
-                   not interested */
-		if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
-		    != is_init)
-			continue;
-
-		if (sechdrs[i].sh_type == SHT_RELA) {
-			DEBUGP("Found relocations in section %u\n", i);
-			DEBUGP("Ptr: %p.  Number: %u\n",
-			       (void *)hdr + sechdrs[i].sh_offset,
-			       sechdrs[i].sh_size / sizeof(Elf32_Rela));
-			ret += count_relocs((void *)hdr
-					     + sechdrs[i].sh_offset,
-					     sechdrs[i].sh_size
-					     / sizeof(Elf32_Rela))
-				* sizeof(struct v850_plt_entry);
-		}
-	}
-
-	return ret;
-}
-
-int module_frob_arch_sections(Elf32_Ehdr *hdr,
-			      Elf32_Shdr *sechdrs,
-			      char *secstrings,
-			      struct module *me)
-{
-	unsigned int i;
-
-	/* Find .plt and .pltinit sections */
-	for (i = 0; i < hdr->e_shnum; i++) {
-		if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0)
-			me->arch.init_plt_section = i;
-		else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
-			me->arch.core_plt_section = i;
-	}
-	if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
-		printk("Module doesn't contain .plt or .plt.init sections.\n");
-		return -ENOEXEC;
-	}
-
-	/* Override their sizes */
-	sechdrs[me->arch.core_plt_section].sh_size
-		= get_plt_size(hdr, sechdrs, secstrings, 0);
-	sechdrs[me->arch.init_plt_section].sh_size
-		= get_plt_size(hdr, sechdrs, secstrings, 1);
-	return 0;
-}
-
-int apply_relocate (Elf32_Shdr *sechdrs, const char *strtab,
-		    unsigned int symindex, unsigned int relsec,
-		    struct module *mod)
-{
-	printk ("Barf\n");
-	return -ENOEXEC;
-}
-
-/* Set up a trampoline in the PLT to bounce us to the distant function */
-static uint32_t do_plt_call (void *location, Elf32_Addr val,
-			     Elf32_Shdr *sechdrs, struct module *mod)
-{
-	struct v850_plt_entry *entry;
-	/* Instructions used to do the indirect jump.  */
-	uint32_t tramp[2];
-
-	/* We have to trash a register, so we assume that any control
-	   transfer more than 21-bits away must be a function call
-	   (so we can use a call-clobbered register).  */
-	tramp[0] = 0x0621 + ((val & 0xffff) << 16);   /* mov sym, r1 ... */
-	tramp[1] = ((val >> 16) & 0xffff) + 0x610000; /* ...; jmp r1 */
-
-	/* Init, or core PLT? */
-	if (location >= mod->module_core
-	    && location < mod->module_core + mod->core_size)
-		entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
-	else
-		entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
-
-	/* Find this entry, or if that fails, the next avail. entry */
-	while (entry->tramp[0])
-		if (entry->tramp[0] == tramp[0] && entry->tramp[1] == tramp[1])
-			return (uint32_t)entry;
-		else
-			entry++;
-
-	entry->tramp[0] = tramp[0];
-	entry->tramp[1] = tramp[1];
-
-	return (uint32_t)entry;
-}
-
-int apply_relocate_add (Elf32_Shdr *sechdrs, const char *strtab,
-			unsigned int symindex, unsigned int relsec,
-			struct module *mod)
-{
-	unsigned int i;
-	Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr;
-
-	DEBUGP ("Applying relocate section %u to %u\n", relsec,
-		sechdrs[relsec].sh_info);
-
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof (*rela); i++) {
-		/* This is where to make the change */
-		uint32_t *loc
-			= ((void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			   + rela[i].r_offset);
-		/* This is the symbol it is referring to.  Note that all
-		   undefined symbols have been resolved.  */
-		Elf32_Sym *sym
-			= ((Elf32_Sym *)sechdrs[symindex].sh_addr
-			   + ELF32_R_SYM (rela[i].r_info));
-		uint32_t val = sym->st_value + rela[i].r_addend;
-
-		switch (ELF32_R_TYPE (rela[i].r_info)) {
-		case R_V850_32:
-			/* We write two shorts instead of a long because even
-			   32-bit insns only need half-word alignment, but
-			   32-bit data writes need to be long-word aligned.  */
-			val += ((uint16_t *)loc)[0];
-			val += ((uint16_t *)loc)[1] << 16;
-			((uint16_t *)loc)[0] = val & 0xffff;
-			((uint16_t *)loc)[1] = (val >> 16) & 0xffff;
-			break;
-
-		case R_V850_22_PCREL:
-			/* Maybe jump indirectly via a PLT table entry.  */
-			if ((int32_t)(val - (uint32_t)loc) > 0x1fffff
-			    || (int32_t)(val - (uint32_t)loc) < -0x200000)
-				val = do_plt_call (loc, val, sechdrs, mod);
-
-			val -= (uint32_t)loc;
-
-			/* We write two shorts instead of a long because
-			   even 32-bit insns only need half-word alignment,
-			   but 32-bit data writes need to be long-word
-			   aligned.  */
-			((uint16_t *)loc)[0] =
-				(*(uint16_t *)loc & 0xffc0) /* opcode + reg */
-				| ((val >> 16) & 0xffc03f); /* offs high */
-			((uint16_t *)loc)[1] =
-				(val & 0xffff);		    /* offs low */
-			break;
-
-		default:
-			printk (KERN_ERR "module %s: Unknown reloc: %u\n",
-				mod->name, ELF32_R_TYPE (rela[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-
-	return 0;
-}
-
-void
-module_arch_cleanup(struct module *mod)
-{
-}
diff --git a/arch/v850/kernel/process.c b/arch/v850/kernel/process.c
deleted file mode 100644
index e4a4b8e7d5a3..000000000000
--- a/arch/v850/kernel/process.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * arch/v850/kernel/process.c -- Arch-dependent process handling
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/reboot.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/pgtable.h>
-
-void (*pm_power_off)(void) = NULL;
-EXPORT_SYMBOL(pm_power_off);
-
-extern void ret_from_fork (void);
-
-
-/* The idle loop.  */
-static void default_idle (void)
-{
-	while (! need_resched ())
-		asm ("halt; nop; nop; nop; nop; nop" ::: "cc");
-}
-
-void (*idle)(void) = default_idle;
-
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle (void)
-{
-	/* endless idle loop with no priority at all */
-	while (1) {
-		while (!need_resched())
-			(*idle) ();
-
-		preempt_enable_no_resched();
-		schedule();
-		preempt_disable();
-	}
-}
-
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process (ie the swapper or direct descendants who
- * haven't done an "execve()") should use this: it will work within a system
- * call from a "real" process, but the process memory space will not be free'd
- * until both the parent and the child have exited.
- */
-int kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
-{
-	register mm_segment_t fs = get_fs ();
-	register unsigned long syscall asm (SYSCALL_NUM);
-	register unsigned long arg0 asm (SYSCALL_ARG0);
-	register unsigned long ret asm (SYSCALL_RET);
-
-	set_fs (KERNEL_DS);
-
-	/* Clone this thread.  Note that we don't pass the clone syscall's
-	   second argument -- it's ignored for calls from kernel mode (the
-	   child's SP is always set to the top of the kernel stack).  */
-	arg0 = flags | CLONE_VM;
-	syscall = __NR_clone;
-	asm volatile ("trap " SYSCALL_SHORT_TRAP
-		      : "=r" (ret), "=r" (syscall)
-		      : "1" (syscall), "r" (arg0)
-		      : SYSCALL_SHORT_CLOBBERS);
-
-	if (ret == 0) {
-		/* In child thread, call FN and exit.  */
-		arg0 = (*fn) (arg);
-		syscall = __NR_exit;
-		asm volatile ("trap " SYSCALL_SHORT_TRAP
-			      : "=r" (ret), "=r" (syscall)
-			      : "1" (syscall), "r" (arg0)
-			      : SYSCALL_SHORT_CLOBBERS);
-	}
-
-	/* In parent.  */
-	set_fs (fs);
-
-	return ret;
-}
-
-void flush_thread (void)
-{
-	set_fs (USER_DS);
-}
-
-int copy_thread (int nr, unsigned long clone_flags,
-		 unsigned long stack_start, unsigned long stack_size,
-		 struct task_struct *p, struct pt_regs *regs)
-{
-	/* Start pushing stuff from the top of the child's kernel stack.  */
-	unsigned long orig_ksp = task_tos(p);
-	unsigned long ksp = orig_ksp;
-	/* We push two `state save' stack fames (see entry.S) on the new
-	   kernel stack:
-	     1) The innermost one is what switch_thread would have
-	        pushed, and is used when we context switch to the child
-		thread for the first time.  It's set up to return to
-		ret_from_fork in entry.S.
-	     2) The outermost one (nearest the top) is what a syscall
-	        trap would have pushed, and is set up to return to the
-		same location as the parent thread, but with a return
-		value of 0. */
-	struct pt_regs *child_switch_regs, *child_trap_regs;
-
-	/* Trap frame.  */
-	ksp -= STATE_SAVE_SIZE;
-	child_trap_regs = (struct pt_regs *)(ksp + STATE_SAVE_PT_OFFSET);
-	/* Switch frame.  */
-	ksp -= STATE_SAVE_SIZE;
-	child_switch_regs = (struct pt_regs *)(ksp + STATE_SAVE_PT_OFFSET);
-
-	/* First copy parent's register state to child.  */
-	*child_switch_regs = *regs;
-	*child_trap_regs = *regs;
-
-	/* switch_thread returns to the restored value of the lp
-	   register (r31), so we make that the place where we want to
-	   jump when the child thread begins running.  */
-	child_switch_regs->gpr[GPR_LP] = (v850_reg_t)ret_from_fork;
-
-	if (regs->kernel_mode)
-		/* Since we're returning to kernel-mode, make sure the child's
-		   stored kernel stack pointer agrees with what the actual
-		   stack pointer will be at that point (the trap return code
-		   always restores the SP, even when returning to
-		   kernel-mode).  */
-		child_trap_regs->gpr[GPR_SP] = orig_ksp;
-	else
-		/* Set the child's user-mode stack-pointer (the name
-		   `stack_start' is a misnomer, it's just the initial SP
-		   value).  */
-		child_trap_regs->gpr[GPR_SP] = stack_start;
-
-	/* Thread state for the child (everything else is on the stack).  */
-	p->thread.ksp = ksp;
-
-	return 0;
-}
-
-/*
- * sys_execve() executes a new program.
- */
-int sys_execve (char *name, char **argv, char **envp, struct pt_regs *regs)
-{
-	char *filename = getname (name);
-	int error = PTR_ERR (filename);
-
-	if (! IS_ERR (filename)) {
-		error = do_execve (filename, argv, envp, regs);
-		putname (filename);
-	}
-
-	return error;
-}
-
-
-/*
- * These bracket the sleeping functions..
- */
-#define first_sched	((unsigned long)__sched_text_start)
-#define last_sched	((unsigned long)__sched_text_end)
-
-unsigned long get_wchan (struct task_struct *p)
-{
-#if 0  /* Barf.  Figure out the stack-layout later.  XXX  */
-	unsigned long fp, pc;
-	int count = 0;
-
-	if (!p || p == current || p->state == TASK_RUNNING)
-		return 0;
-
-	pc = thread_saved_pc (p);
-
-	/* This quite disgusting function walks up the stack, following
-	   saved return address, until it something that's out of bounds
-	   (as defined by `first_sched' and `last_sched').  It then
-	   returns the last PC that was in-bounds.  */
-	do {
-		if (fp < stack_page + sizeof (struct task_struct) ||
-		    fp >= 8184+stack_page)
-			return 0;
-		pc = ((unsigned long *)fp)[1];
-		if (pc < first_sched || pc >= last_sched)
-			return pc;
-		fp = *(unsigned long *) fp;
-	} while (count++ < 16);
-#endif
-
-	return 0;
-}
diff --git a/arch/v850/kernel/procfs.c b/arch/v850/kernel/procfs.c
deleted file mode 100644
index e433cde789b4..000000000000
--- a/arch/v850/kernel/procfs.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * arch/v850/kernel/procfs.c -- Introspection functions for /proc filesystem
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include "mach.h"
-
-static int cpuinfo_print (struct seq_file *m, void *v)
-{
-	extern unsigned long loops_per_jiffy;
-	
-	seq_printf (m, "CPU-Family:	v850\nCPU-Arch:	%s\n", CPU_ARCH);
-
-#ifdef CPU_MODEL_LONG
-	seq_printf (m, "CPU-Model:	%s (%s)\n", CPU_MODEL, CPU_MODEL_LONG);
-#else
-	seq_printf (m, "CPU-Model:	%s\n", CPU_MODEL);
-#endif
-
-#ifdef CPU_CLOCK_FREQ
-	seq_printf (m, "CPU-Clock:	%ld (%ld MHz)\n",
-		    (long)CPU_CLOCK_FREQ,
-		    (long)CPU_CLOCK_FREQ / 1000000);
-#endif
-
-	seq_printf (m, "BogoMips:	%lu.%02lu\n",
-		    loops_per_jiffy/(500000/HZ),
-		    (loops_per_jiffy/(5000/HZ)) % 100);
-
-#ifdef PLATFORM_LONG
-	seq_printf (m, "Platform:	%s (%s)\n", PLATFORM, PLATFORM_LONG);
-#elif defined (PLATFORM)
-	seq_printf (m, "Platform:	%s\n", PLATFORM);
-#endif
-
-	return 0;
-}
-
-static void *cpuinfo_start (struct seq_file *m, loff_t *pos)
-{
-	return *pos < NR_CPUS ? ((void *) 0x12345678) : NULL;
-}
-
-static void *cpuinfo_next (struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return cpuinfo_start (m, pos);
-}
-
-static void cpuinfo_stop (struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-	.start	= cpuinfo_start,
-	.next	= cpuinfo_next,
-	.stop	= cpuinfo_stop,
-	.show	= cpuinfo_print
-};
diff --git a/arch/v850/kernel/ptrace.c b/arch/v850/kernel/ptrace.c
deleted file mode 100644
index a458ac941b25..000000000000
--- a/arch/v850/kernel/ptrace.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * arch/v850/kernel/ptrace.c -- `ptrace' system call
- *
- *  Copyright (C) 2002,03,04  NEC Electronics Corporation
- *  Copyright (C) 2002,03,04  Miles Bader <miles@gnu.org>
- *
- * Derived from arch/mips/kernel/ptrace.c:
- *
- *  Copyright (C) 1992 Ross Biro
- *  Copyright (C) Linus Torvalds
- *  Copyright (C) 1994, 95, 96, 97, 98, 2000 Ralf Baechle
- *  Copyright (C) 1996 David S. Miller
- *  Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
- *  Copyright (C) 1999 MIPS Technologies, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <linux/signal.h>
-
-#include <asm/errno.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-
-/* Returns the address where the register at REG_OFFS in P is stashed away.  */
-static v850_reg_t *reg_save_addr (unsigned reg_offs, struct task_struct *t)
-{
-	struct pt_regs *regs;
-
-	/* Three basic cases:
-
-	   (1) A register normally saved before calling the scheduler, is
-	       available in the kernel entry pt_regs structure at the top
-	       of the kernel stack.  The kernel trap/irq exit path takes
-	       care to save/restore almost all registers for ptrace'd
-	       processes.
-
-	   (2) A call-clobbered register, where the process P entered the
-	       kernel via [syscall] trap, is not stored anywhere; that's
-	       OK, because such registers are not expected to be preserved
-	       when the trap returns anyway (so we don't actually bother to
-	       test for this case).
-
-	   (3) A few registers not used at all by the kernel, and so
-	       normally never saved except by context-switches, are in the
-	       context switch state.  */
-
-	if (reg_offs == PT_CTPC || reg_offs == PT_CTPSW || reg_offs == PT_CTBP)
-		/* Register saved during context switch.  */
-		regs = thread_saved_regs (t);
-	else
-		/* Register saved during kernel entry (or not available).  */
-		regs = task_pt_regs (t);
-
-	return (v850_reg_t *)((char *)regs + reg_offs);
-}
-
-/* Set the bits SET and clear the bits CLEAR in the v850e DIR
-   (`debug information register').  Returns the new value of DIR.  */
-static inline v850_reg_t set_dir (v850_reg_t set, v850_reg_t clear)
-{
-	register v850_reg_t rval asm ("r10");
-	register v850_reg_t arg0 asm ("r6") = set;
-	register v850_reg_t arg1 asm ("r7") = clear;
-
-	/* The dbtrap handler has exactly this functionality when called
-	   from kernel mode.  0xf840 is a `dbtrap' insn.  */
-	asm (".short 0xf840" : "=r" (rval) : "r" (arg0), "r" (arg1));
-
-	return rval;
-}
-
-/* Makes sure hardware single-stepping is (globally) enabled.
-   Returns true if successful.  */
-static inline int enable_single_stepping (void)
-{
-	static int enabled = 0;	/* Remember whether we already did it.  */
-	if (! enabled) {
-		/* Turn on the SE (`single-step enable') bit, 0x100, in the
-		   DIR (`debug information register').  This may fail if a
-		   processor doesn't support it or something.  We also try
-		   to clear bit 0x40 (`INI'), which is necessary to use the
-		   debug stuff on the v850e2; on the v850e, clearing 0x40
-		   shouldn't cause any problem.  */
-		v850_reg_t dir = set_dir (0x100, 0x40);
-		/* Make sure it really got set.  */
-		if (dir & 0x100)
-			enabled = 1;
-	}
-	return enabled;
-}
-
-/* Try to set CHILD's single-step flag to VAL.  Returns true if successful.  */
-static int set_single_step (struct task_struct *t, int val)
-{
-	v850_reg_t *psw_addr = reg_save_addr(PT_PSW, t);
-	if (val) {
-		/* Make sure single-stepping is enabled.  */
-		if (! enable_single_stepping ())
-			return 0;
-		/* Set T's single-step flag.  */
-		*psw_addr |= 0x800;
-	} else
-		*psw_addr &= ~0x800;
-	return 1;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
-	int rval;
-
-	switch (request) {
-		unsigned long val;
-
-	case PTRACE_PEEKTEXT: /* read word at location addr. */
-	case PTRACE_PEEKDATA:
-		rval = generic_ptrace_peekdata(child, addr, data);
-		goto out;
-
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		rval = generic_ptrace_pokedata(child, addr, data);
-		goto out;
-
-	/* Read/write the word at location ADDR in the registers.  */
-	case PTRACE_PEEKUSR:
-	case PTRACE_POKEUSR:
-		rval = 0;
-		if (addr >= PT_SIZE && request == PTRACE_PEEKUSR) {
-			/* Special requests that don't actually correspond
-			   to offsets in struct pt_regs.  */
-			if (addr == PT_TEXT_ADDR)
-				val = child->mm->start_code;
-			else if (addr == PT_DATA_ADDR)
-				val = child->mm->start_data;
-			else if (addr == PT_TEXT_LEN)
-				val = child->mm->end_code
-					- child->mm->start_code;
-			else
-				rval = -EIO;
-		} else if (addr >= 0 && addr < PT_SIZE && (addr & 0x3) == 0) {
-			v850_reg_t *reg_addr = reg_save_addr(addr, child);
-			if (request == PTRACE_PEEKUSR)
-				val = *reg_addr;
-			else
-				*reg_addr = data;
-		} else
-			rval = -EIO;
-
-		if (rval == 0 && request == PTRACE_PEEKUSR)
-			rval = put_user (val, (unsigned long *)data);
-		goto out;
-
-	/* Continue and stop at next (return from) syscall */
-	case PTRACE_SYSCALL:
-	/* Restart after a signal.  */
-	case PTRACE_CONT:
-	/* Execute a single instruction. */
-	case PTRACE_SINGLESTEP:
-		rval = -EIO;
-		if (!valid_signal(data))
-			break;
-
-		/* Turn CHILD's single-step flag on or off.  */
-		if (! set_single_step (child, request == PTRACE_SINGLESTEP))
-			break;
-
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-
-		child->exit_code = data;
-		wake_up_process(child);
-		rval = 0;
-		break;
-
-	/*
-	 * make the child exit.  Best I can do is send it a sigkill.
-	 * perhaps it should be put in the status that it wants to
-	 * exit.
-	 */
-	case PTRACE_KILL:
-		rval = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		wake_up_process(child);
-		break;
-
-	case PTRACE_DETACH: /* detach a process that was attached. */
-		set_single_step (child, 0);  /* Clear single-step flag */
-		rval = ptrace_detach(child, data);
-		break;
-
-	default:
-		rval = -EIO;
-		goto out;
-	}
- out:
-	return rval;
-}
-
-asmlinkage void syscall_trace(void)
-{
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-	/* The 0x80 provides a way for the tracing parent to distinguish
-	   between a syscall stop and SIGTRAP delivery */
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				 ? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
-
-void ptrace_disable (struct task_struct *child)
-{
-	/* nothing to do */
-}
diff --git a/arch/v850/kernel/rte_cb.c b/arch/v850/kernel/rte_cb.c
deleted file mode 100644
index 43018e1edebd..000000000000
--- a/arch/v850/kernel/rte_cb.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * include/asm-v850/rte_cb.c -- Midas lab RTE-CB series of evaluation boards
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include <asm/machdep.h>
-#include <asm/v850e_uart.h>
-
-#include "mach.h"
-
-static void led_tick (void);
-
-/* LED access routines.  */
-extern unsigned read_leds (int pos, char *buf, int len);
-extern unsigned write_leds (int pos, const char *buf, int len);
-
-#ifdef CONFIG_RTE_CB_MULTI
-extern void multi_init (void);
-#endif
-
-
-void __init rte_cb_early_init (void)
-{
-	v850e_intc_disable_irqs ();
-
-#ifdef CONFIG_RTE_CB_MULTI
-	multi_init ();
-#endif
-}
-
-void __init mach_setup (char **cmdline)
-{
-#ifdef CONFIG_RTE_MB_A_PCI
-	/* Probe for Mother-A, and print a message if we find it.  */
-	*(volatile unsigned long *)MB_A_SRAM_ADDR = 0xDEADBEEF;
-	if (*(volatile unsigned long *)MB_A_SRAM_ADDR == 0xDEADBEEF) {
-		*(volatile unsigned long *)MB_A_SRAM_ADDR = 0x12345678;
-		if (*(volatile unsigned long *)MB_A_SRAM_ADDR == 0x12345678)
-			printk (KERN_INFO
-				"          NEC SolutionGear/Midas lab"
-				" RTE-MOTHER-A motherboard\n");
-	}
-#endif /* CONFIG_RTE_MB_A_PCI */
-
-	mach_tick = led_tick;
-}
-
-void machine_restart (char *__unused)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-	asm ("jmp r0"); /* Jump to the reset vector.  */
-}
-
-/* This says `HALt.' in LEDese.  */
-static unsigned char halt_leds_msg[] = { 0x76, 0x77, 0x38, 0xF8 };
-
-void machine_halt (void)
-{
-#ifdef CONFIG_RESET_GUARD
-	disable_reset_guard ();
-#endif
-
-	/* Ignore all interrupts.  */
-	local_irq_disable ();
-
-	/* Write a little message.  */
-	write_leds (0, halt_leds_msg, sizeof halt_leds_msg);
-
-	/* Really halt.  */
-	for (;;)
-		asm ("halt; nop; nop; nop; nop; nop");
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
-
-/* Animated LED display for timer tick.  */
-
-#define TICK_UPD_FREQ	6
-static int tick_frames[][10] = {
-	{ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, -1 },
-	{ 0x63, 0x5c, -1 },
-	{ 0x5c, 0x00, -1 },
-	{ 0x63, 0x00, -1 },
-	{ -1 }
-};
-
-static void led_tick ()
-{
-	static unsigned counter = 0;
-	
-	if (++counter == (HZ / TICK_UPD_FREQ)) {
-		/* Which frame we're currently displaying for each digit.  */
-		static unsigned frame_nums[LED_NUM_DIGITS] = { 0 };
-		/* Display image.  */
-		static unsigned char image[LED_NUM_DIGITS] = { 0 };
-		unsigned char prev_image[LED_NUM_DIGITS];
-		int write_to_leds = 1; /* true if we should actually display */
-		int digit;
-
-		/* We check to see if the physical LEDs contains what we last
-		   wrote to them; if not, we suppress display (this is so that
-		   users can write to the LEDs, and not have their output
-		   overwritten).  As a special case, we start writing again if
-		   all the LEDs are blank, or our display image is all zeros
-		   (indicating that this is the initial update, when the actual
-		   LEDs might contain random data).  */
-		read_leds (0, prev_image, LED_NUM_DIGITS);
-		for (digit = 0; digit < LED_NUM_DIGITS; digit++)
-			if (image[digit] != prev_image[digit]
-			    && image[digit] && prev_image[digit])
-			{
-				write_to_leds = 0;
-				break;
-			}
-
-		/* Update display image.  */
-		for (digit = 0;
-		     digit < LED_NUM_DIGITS && tick_frames[digit][0] >= 0;
-		     digit++)
-		{
-			int frame = tick_frames[digit][frame_nums[digit]];
-			if (frame < 0) {
-				image[digit] = tick_frames[digit][0];
-				frame_nums[digit] = 1;
-			} else {
-				image[digit] = frame;
-				frame_nums[digit]++;
-				break;
-			}
-		}
-
-		if (write_to_leds)
-			/* Write the display image to the physical LEDs.  */
-			write_leds (0, image, LED_NUM_DIGITS);
-
-		counter = 0;
-	}
-}
-
-
-/* Mother-A interrupts.  */
-
-#ifdef CONFIG_RTE_GBUS_INT
-
-#define L GBUS_INT_PRIORITY_LOW
-#define M GBUS_INT_PRIORITY_MEDIUM
-#define H GBUS_INT_PRIORITY_HIGH
-
-static struct gbus_int_irq_init gbus_irq_inits[] = {
-#ifdef CONFIG_RTE_MB_A_PCI
-	{ "MB_A_LAN",	IRQ_MB_A_LAN,		1,		     1, L },
-	{ "MB_A_PCI1",	IRQ_MB_A_PCI1(0),	IRQ_MB_A_PCI1_NUM,   1, L },
-	{ "MB_A_PCI2",	IRQ_MB_A_PCI2(0),	IRQ_MB_A_PCI2_NUM,   1, L },
-	{ "MB_A_EXT",	IRQ_MB_A_EXT(0),	IRQ_MB_A_EXT_NUM,    1, L },
-	{ "MB_A_USB_OC",IRQ_MB_A_USB_OC(0),	IRQ_MB_A_USB_OC_NUM, 1, L },
-	{ "MB_A_PCMCIA_OC",IRQ_MB_A_PCMCIA_OC,	1,		     1, L },
-#endif
-	{ 0 }
-};
-#define NUM_GBUS_IRQ_INITS (ARRAY_SIZE(gbus_irq_inits) - 1)
-
-static struct hw_interrupt_type gbus_hw_itypes[NUM_GBUS_IRQ_INITS];
-
-#endif /* CONFIG_RTE_GBUS_INT */
-
-
-void __init rte_cb_init_irqs (void)
-{
-#ifdef CONFIG_RTE_GBUS_INT
-	gbus_int_init_irqs ();
-	gbus_int_init_irq_types (gbus_irq_inits, gbus_hw_itypes);
-#endif /* CONFIG_RTE_GBUS_INT */
-}
diff --git a/arch/v850/kernel/rte_cb_leds.c b/arch/v850/kernel/rte_cb_leds.c
deleted file mode 100644
index aa47ab1dcd87..000000000000
--- a/arch/v850/kernel/rte_cb_leds.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * include/asm-v850/rte_cb_leds.c -- Midas lab RTE-CB board LED device support
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-
-#include <asm/uaccess.h>
-
-#define LEDS_MINOR	169	/* Minor device number, using misc major.  */
-
-/* The actual LED hardware is write-only, so we hold the contents here too.  */
-static unsigned char leds_image[LED_NUM_DIGITS] = { 0 };
-
-/* Spinlock protecting the above leds.  */
-static DEFINE_SPINLOCK(leds_lock);
-
-/* Common body of LED read/write functions, checks POS and LEN for
-   correctness, declares a variable using IMG_DECL, initialized pointing at
-   the POS position in the LED image buffer, and and iterates COPY_EXPR
-   until BUF is equal to the last buffer position; finally, sets LEN to be
-   the amount actually copied.  IMG should be a variable declaration
-   (without an initializer or a terminating semicolon); POS, BUF, and LEN
-   should all be simple variables.  */
-#define DO_LED_COPY(img_decl, pos, buf, len, copy_expr)			\
-do {									\
-	if (pos > LED_NUM_DIGITS)					\
-		len = 0;						\
-	else {								\
-		if (pos + len > LED_NUM_DIGITS)				\
-			len = LED_NUM_DIGITS - pos;			\
-									\
-		if (len > 0) {						\
-			unsigned long _flags;				\
-			const char *_end = buf + len;			\
-			img_decl = &leds_image[pos];			\
-									\
-			spin_lock_irqsave (leds_lock, _flags);		\
-			do						\
-				(copy_expr);				\
-			while (buf != _end);				\
-			spin_unlock_irqrestore (leds_lock, _flags);	\
-		}							\
-	}								\
-} while (0)
-
-/* Read LEN bytes from LEDs at position POS, into BUF.
-   Returns actual amount read.  */
-unsigned read_leds (unsigned pos, char *buf, unsigned len)
-{
-	DO_LED_COPY (const char *img, pos, buf, len, *buf++ = *img++);
-	return len;
-}
-
-/* Write LEN bytes to LEDs at position POS, from BUF.
-   Returns actual amount written.  */
-unsigned write_leds (unsigned pos, const char *buf, unsigned len)
-{
-	/* We write the actual LED values backwards, because
-	   increasing memory addresses reflect LEDs right-to-left. */
-	volatile char *led = &LED (LED_NUM_DIGITS - pos - 1);
-	/* We invert the value written to the hardware, because 1 = off,
-	   and 0 = on.  */
-	DO_LED_COPY (char *img, pos, buf, len,
-		     *led-- = 0xFF ^ (*img++ = *buf++));
-	return len;
-}
-
-
-/* Device functions.  */
-
-static ssize_t leds_dev_read (struct file *file, char *buf, size_t len,
-			      loff_t *pos)
-{
-	char temp_buf[LED_NUM_DIGITS];
-	len = read_leds (*pos, temp_buf, len);
-	if (copy_to_user (buf, temp_buf, len))
-		return -EFAULT;
-	*pos += len;
-	return len;
-}
-
-static ssize_t leds_dev_write (struct file *file, const char *buf, size_t len,
-			       loff_t *pos)
-{
-	char temp_buf[LED_NUM_DIGITS];
-	if (copy_from_user (temp_buf, buf, min_t(size_t, len, LED_NUM_DIGITS)))
-		return -EFAULT;
-	len = write_leds (*pos, temp_buf, len);
-	*pos += len;
-	return len;
-}
-
-static loff_t leds_dev_lseek (struct file *file, loff_t offs, int whence)
-{
-	if (whence == 1)
-		offs += file->f_pos; /* relative */
-	else if (whence == 2)
-		offs += LED_NUM_DIGITS; /* end-relative */
-
-	if (offs < 0 || offs > LED_NUM_DIGITS)
-		return -EINVAL;
-
-	file->f_pos = offs;
-
-	return 0;
-}
-
-static const struct file_operations leds_fops = {
-	.read		= leds_dev_read,
-	.write		= leds_dev_write,
-	.llseek		= leds_dev_lseek
-};
-
-static struct miscdevice leds_miscdev = {
-	.name		= "leds",
-	.minor		= LEDS_MINOR,
-	.fops		= &leds_fops
-};
-
-int __init leds_dev_init (void)
-{
-	return misc_register (&leds_miscdev);
-}
-
-__initcall (leds_dev_init);
diff --git a/arch/v850/kernel/rte_cb_multi.c b/arch/v850/kernel/rte_cb_multi.c
deleted file mode 100644
index 963d55ab34cc..000000000000
--- a/arch/v850/kernel/rte_cb_multi.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * include/asm-v850/rte_multi.c -- Support for Multi debugger monitor ROM
- * 	on Midas lab RTE-CB series of evaluation boards
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/init.h>
-
-#include <asm/machdep.h>
-
-#define IRQ_ADDR(irq) (0x80 + (irq) * 0x10)
-
-/* A table of which interrupt vectors to install, since blindly
-   installing all of them makes the debugger stop working.  This is a
-   list of offsets in the interrupt vector area; each entry means to
-   copy that particular 16-byte vector.  An entry less than zero ends
-   the table.  */
-static long multi_intv_install_table[] = {
-	/* Trap vectors */
-	0x40, 0x50,		
-
-#ifdef CONFIG_RTE_CB_MULTI_DBTRAP
-	/* Illegal insn / dbtrap.  These are used by multi, so only handle
-	   them if configured to do so.  */
-	0x60,
-#endif
-
-	/* GINT1 - GINT3 (note, not GINT0!) */
-	IRQ_ADDR (IRQ_GINT(1)),
-	IRQ_ADDR (IRQ_GINT(2)),
-	IRQ_ADDR (IRQ_GINT(3)),
-
-	/* Timer D interrupts (up to 4 timers) */
-	IRQ_ADDR (IRQ_INTCMD(0)),
-#if IRQ_INTCMD_NUM > 1
-	IRQ_ADDR (IRQ_INTCMD(1)),
-#if IRQ_INTCMD_NUM > 2
-	IRQ_ADDR (IRQ_INTCMD(2)),
-#if IRQ_INTCMD_NUM > 3
-	IRQ_ADDR (IRQ_INTCMD(3)),
-#endif
-#endif
-#endif
-	
-	/* UART interrupts (up to 3 channels) */
-	IRQ_ADDR (IRQ_INTSER (0)), /* err */
-	IRQ_ADDR (IRQ_INTSR  (0)), /* rx */
-	IRQ_ADDR (IRQ_INTST  (0)), /* tx */
-#if IRQ_INTSR_NUM > 1
-	IRQ_ADDR (IRQ_INTSER (1)), /* err */
-	IRQ_ADDR (IRQ_INTSR  (1)), /* rx */
-	IRQ_ADDR (IRQ_INTST  (1)), /* tx */
-#if IRQ_INTSR_NUM > 2
-	IRQ_ADDR (IRQ_INTSER (2)), /* err */
-	IRQ_ADDR (IRQ_INTSR  (2)), /* rx */
-	IRQ_ADDR (IRQ_INTST  (2)), /* tx */
-#endif
-#endif
-
-	-1
-};
-
-/* Early initialization for kernel using Multi debugger ROM monitor.  */
-void __init multi_init (void)
-{
-	/* We're using the Multi debugger monitor, so we have to install
-	   the interrupt vectors.  The monitor doesn't allow them to be
-	   initially downloaded into their final destination because
-	   it's in the monitor's scratch-RAM area.  Unfortunately, Multi
-	   also doesn't deal correctly with ELF sections where the LMA
-	   and VMA differ -- it just ignores the LMA -- so we can't use
-	   that feature to work around the problem.  What we do instead
-	   is just put the interrupt vectors into a normal section, and
-	   do the necessary copying and relocation here.  Since the
-	   interrupt vector basically only contains `jr' instructions
-	   and no-ops, it's not that hard.  */
-	extern unsigned long _intv_load_start, _intv_start;
-	register unsigned long *src = &_intv_load_start;
-	register unsigned long *dst = (unsigned long *)INTV_BASE;
-	register unsigned long jr_fixup = (char *)&_intv_start - (char *)dst;
-	register long *ii;
-
-	/* Copy interrupt vectors as instructed by multi_intv_install_table. */
-	for (ii = multi_intv_install_table; *ii >= 0; ii++) {
-		/* Copy 16-byte interrupt vector at offset *ii.  */
-		int boffs;
-		for (boffs = 0; boffs < 0x10; boffs += sizeof *src) {
-			/* Copy a single word, fixing up the jump offs
-			   if it's a `jr' instruction.  */
-			int woffs = (*ii + boffs) / sizeof *src;
-			unsigned long word = src[woffs];
-
-			if ((word & 0xFC0) == 0x780) {
-				/* A `jr' insn, fix up its offset (and yes, the
-				   weird half-word swapping is intentional). */
-				unsigned short hi = word & 0xFFFF;
-				unsigned short lo = word >> 16;
-				unsigned long udisp22
-					= lo + ((hi & 0x3F) << 16);
-				long disp22 = (long)(udisp22 << 10) >> 10;
-
-				disp22 += jr_fixup;
-
-				hi = ((disp22 >> 16) & 0x3F) | 0x780;
-				lo = disp22 & 0xFFFF;
-
-				word = hi + (lo << 16);
-			}
-
-			dst[woffs] = word;
-		}
-	}
-}
diff --git a/arch/v850/kernel/rte_ma1_cb-rom.ld b/arch/v850/kernel/rte_ma1_cb-rom.ld
deleted file mode 100644
index 87b618f8253b..000000000000
--- a/arch/v850/kernel/rte_ma1_cb-rom.ld
+++ /dev/null
@@ -1,14 +0,0 @@
-/* Linker script for the Midas labs RTE-V850E/MA1-CB evaluation board
-   (CONFIG_RTE_CB_MA1), with kernel in ROM.  */
-
-MEMORY {
-	ROM   : ORIGIN = 0x00000000, LENGTH = 0x00100000
-	/* 1MB of SRAM.  This memory is mirrored 4 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-	/* 32MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	ROMK_SECTIONS(ROM, SRAM)
-}
diff --git a/arch/v850/kernel/rte_ma1_cb.c b/arch/v850/kernel/rte_ma1_cb.c
deleted file mode 100644
index 08abf3d5f8df..000000000000
--- a/arch/v850/kernel/rte_ma1_cb.c
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * arch/v850/kernel/rte_ma1_cb.c -- Midas labs RTE-V850E/MA1-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/ma1.h>
-#include <asm/rte_ma1_cb.h>
-#include <asm/v850e_timer_c.h>
-
-#include "mach.h"
-
-
-/* SRAM and SDRAM are almost contiguous (with a small hole in between;
-   see mach_reserve_bootmem for details), so just use both as one big area.  */
-#define RAM_START 	SRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-
-
-void __init mach_early_init (void)
-{
-	rte_cb_early_init ();
-}
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-void __init mach_reserve_bootmem ()
-{
-#ifdef CONFIG_RTE_CB_MULTI
-	/* Prevent the kernel from touching the monitor's scratch RAM.  */
-	reserve_bootmem(MON_SCRATCH_ADDR, MON_SCRATCH_SIZE,
-			BOOTMEM_DEFAULT);
-#endif
-
-	/* The space between SRAM and SDRAM is filled with duplicate
-	   images of SRAM.  Prevent the kernel from using them.  */
-	reserve_bootmem (SRAM_ADDR + SRAM_SIZE,
-			 SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE),
-			 BOOTMEM_DEFAULT);
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-/* Called before configuring an on-chip UART.  */
-void rte_ma1_cb_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud)
-{
-	/* The RTE-MA1-CB connects some general-purpose I/O pins on the
-	   CPU to the RTS/CTS lines of UART 0's serial connection.
-	   I/O pins P42 and P43 are RTS and CTS respectively.  */
-	if (chan == 0) {
-		/* Put P42 & P43 in I/O port mode.  */
-		MA_PORT4_PMC &= ~0xC;
-		/* Make P42 an output, and P43 an input.  */
-		MA_PORT4_PM = (MA_PORT4_PM & ~0xC) | 0x8;
-	}
-
-	/* Do pre-configuration for the actual UART.  */
-	ma_uart_pre_configure (chan, cflags, baud);
-}
-
-void __init mach_init_irqs (void)
-{
-	unsigned tc;
-
-	/* Initialize interrupts.  */
-	ma_init_irqs ();
-	rte_cb_init_irqs ();
-
-	/* Use falling-edge-sensitivity for interrupts .  */
-	V850E_TIMER_C_SESC (0) &= ~0xC;
-	V850E_TIMER_C_SESC (1) &= ~0xF;
-
-	/* INTP000-INTP011 are shared with `Timer C', so we have to set
-	   up Timer C to pass them through as raw interrupts.  */
-	for (tc = 0; tc < 2; tc++)
-		/* Turn on the timer.  */
-		V850E_TIMER_C_TMCC0 (tc) |= V850E_TIMER_C_TMCC0_CAE;
-
-	/* Make sure the relevant port0/port1 pins are assigned
-	   interrupt duty.  We used INTP001-INTP011 (don't screw with
-	   INTP000 because the monitor uses it).  */
-	MA_PORT0_PMC |= 0x4;	/* P02 (INTP001) in IRQ mode.  */
-	MA_PORT1_PMC |= 0x6;	/* P11 (INTP010) & P12 (INTP011) in IRQ mode.*/
-}
diff --git a/arch/v850/kernel/rte_ma1_cb.ld b/arch/v850/kernel/rte_ma1_cb.ld
deleted file mode 100644
index c8e16d16be41..000000000000
--- a/arch/v850/kernel/rte_ma1_cb.ld
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Linker script for the Midas labs RTE-V850E/MA1-CB evaluation board
-   (CONFIG_RTE_CB_MA1), with kernel in SDRAM, under Multi debugger.  */
-
-MEMORY {
-	/* 1MB of SRAM; we can't use the last 32KB, because it's used by
-	   the monitor scratch-RAM.  This memory is mirrored 4 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = (SRAM_SIZE - MON_SCRATCH_SIZE)
-	/* Monitor scratch RAM; only the interrupt vectors should go here.  */
-	MRAM  : ORIGIN = MON_SCRATCH_ADDR,  LENGTH = MON_SCRATCH_SIZE
-	/* 32MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-#ifdef CONFIG_RTE_CB_MA1_KSRAM
-# define KRAM SRAM
-#else
-# define KRAM SDRAM
-#endif
-
-SECTIONS {
-	/* We can't use RAMK_KRAM_CONTENTS because that puts the whole
-	   kernel in a single ELF segment, and the Multi debugger (which
-	   we use to load the kernel) appears to have bizarre problems
-	   dealing with it.  */
-
-	.text : {
-		__kram_start = . ;
-		TEXT_CONTENTS
-	} > KRAM
-
-	.data : {
-		DATA_CONTENTS
-		BSS_CONTENTS
-		RAMK_INIT_CONTENTS
-		__kram_end = . ;
-		BOOTMAP_CONTENTS
-
-		/* The address at which the interrupt vectors are initially
-		   loaded by the loader.  We can't load the interrupt vectors
-		   directly into their target location, because the monitor
-		   ROM for the GHS Multi debugger barfs if we try.
-		   Unfortunately, Multi also doesn't deal correctly with ELF
-		   sections where the LMA and VMA differ (it just ignores the
-		   LMA), so we can't use that feature to work around the
-		   problem!  What we do instead is just put the interrupt
-		   vectors into a normal section, and have the
-		   `mach_early_init' function for Midas boards do the
-		   necessary copying and relocation at runtime (this section
-		   basically only contains `jr' instructions, so it's not
-		   that hard).  */
-		. = ALIGN (0x10) ;
-		__intv_load_start = . ;
-		INTV_CONTENTS
-	} > KRAM
-
-	.root ALIGN (4096) : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/rte_mb_a_pci.c b/arch/v850/kernel/rte_mb_a_pci.c
deleted file mode 100644
index 687e367d8b64..000000000000
--- a/arch/v850/kernel/rte_mb_a_pci.c
+++ /dev/null
@@ -1,819 +0,0 @@
-/*
- * arch/v850/kernel/mb_a_pci.c -- PCI support for Midas lab RTE-MOTHER-A board
- *
- *  Copyright (C) 2001,02,03,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-
-#include <asm/machdep.h>
-
-/* __nomods_init is like __devinit, but is a no-op when modules are enabled.
-   This is used by some routines that can be called either during boot
-   or by a module.  */
-#ifdef CONFIG_MODULES
-#define __nomods_init /*nothing*/
-#else
-#define __nomods_init __devinit
-#endif
-
-/* PCI devices on the Mother-A board can only do DMA to/from the MB SRAM
-   (the RTE-V850E/MA1-CB cpu board doesn't support PCI access to
-   CPU-board memory), and since linux DMA buffers are allocated in
-   normal kernel memory, we basically have to copy DMA blocks around
-   (this is like a `bounce buffer').  When a DMA block is `mapped', we
-   allocate an identically sized block in MB SRAM, and if we're doing
-   output to the device, copy the CPU-memory block to the MB-SRAM block.
-   When an active block is `unmapped', we will copy the block back to
-   CPU memory if necessary, and then deallocate the MB SRAM block.
-   Ack.  */
-
-/* Where the motherboard SRAM is in the PCI-bus address space (the
-   first 512K of it is also mapped at PCI address 0).  */
-#define PCI_MB_SRAM_ADDR 0x800000
-
-/* Convert CPU-view MB SRAM address to/from PCI-view addresses of the
-   same memory.  */
-#define MB_SRAM_TO_PCI(mb_sram_addr) \
-   ((dma_addr_t)mb_sram_addr - MB_A_SRAM_ADDR + PCI_MB_SRAM_ADDR)
-#define PCI_TO_MB_SRAM(pci_addr)     \
-   (void *)(pci_addr - PCI_MB_SRAM_ADDR + MB_A_SRAM_ADDR)
-
-static void pcibios_assign_resources (void);
-
-struct mb_pci_dev_irq {
-	unsigned dev;		/* PCI device number */
-	unsigned irq_base;	/* First IRQ  */
-	unsigned query_pin;	/* True if we should read the device's
-				   Interrupt Pin info, and allocate
-				   interrupt IRQ_BASE + PIN.  */
-};
-
-/* PCI interrupts are mapped statically to GBUS interrupts.  */
-static struct mb_pci_dev_irq mb_pci_dev_irqs[] = {
-	/* Motherboard SB82558 ethernet controller */
-	{ 10,	IRQ_MB_A_LAN,		0 },
-	/* PCI slot 1 */
-	{ 8, 	IRQ_MB_A_PCI1(0),	1 },
-	/* PCI slot 2 */
-	{ 9, 	IRQ_MB_A_PCI2(0),	1 }
-};
-#define NUM_MB_PCI_DEV_IRQS ARRAY_SIZE(mb_pci_dev_irqs)
-
-
-/* PCI configuration primitives.  */
-
-#define CONFIG_DMCFGA(bus, devfn, offs)					\
-   (0x80000000								\
-    | ((offs) & ~0x3)							\
-    | ((devfn) << 8)							\
-    | ((bus)->number << 16))
-
-static int
-mb_pci_read (struct pci_bus *bus, unsigned devfn, int offs, int size, u32 *rval)
-{
-	u32 addr;
-	int flags;
-
-	local_irq_save (flags);
-
-	MB_A_PCI_PCICR = 0x7;
-	MB_A_PCI_DMCFGA = CONFIG_DMCFGA (bus, devfn, offs);
-
-	addr = MB_A_PCI_IO_ADDR + (offs & 0x3);
-
-	switch (size) {
-	case 1:	*rval = *(volatile  u8 *)addr; break;
-	case 2:	*rval = *(volatile u16 *)addr; break;
-	case 4:	*rval = *(volatile u32 *)addr; break;
-	}
-
-        if (MB_A_PCI_PCISR & 0x2000) {
-		MB_A_PCI_PCISR = 0x2000;
-		*rval = ~0;
-        }
-
-	MB_A_PCI_DMCFGA = 0;
-
-	local_irq_restore (flags);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int
-mb_pci_write (struct pci_bus *bus, unsigned devfn, int offs, int size, u32 val)
-{
-	u32 addr;
-	int flags;
-
-	local_irq_save (flags);
-
-	MB_A_PCI_PCICR = 0x7;
-	MB_A_PCI_DMCFGA = CONFIG_DMCFGA (bus, devfn, offs);
-
-	addr = MB_A_PCI_IO_ADDR + (offs & 0x3);
-
-	switch (size) {
-	case 1: *(volatile  u8 *)addr = val; break;
-	case 2: *(volatile u16 *)addr = val; break;
-	case 4: *(volatile u32 *)addr = val; break;
-	}
-
-        if (MB_A_PCI_PCISR & 0x2000)
-		MB_A_PCI_PCISR = 0x2000;
-
-	MB_A_PCI_DMCFGA = 0;
-
-	local_irq_restore (flags);
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops mb_pci_config_ops = {
-	.read	= mb_pci_read,
-	.write	= mb_pci_write,
-};
-
-
-/* PCI Initialization.  */
-
-static struct pci_bus *mb_pci_bus = 0;
-
-/* Do initial PCI setup.  */
-static int __devinit pcibios_init (void)
-{
-	u32 id = MB_A_PCI_PCIHIDR;
-	u16 vendor = id & 0xFFFF;
-	u16 device = (id >> 16) & 0xFFFF;
-
-	if (vendor == PCI_VENDOR_ID_PLX && device == PCI_DEVICE_ID_PLX_9080) {
-		printk (KERN_INFO
-			"PCI: PLX Technology PCI9080 HOST/PCI bridge\n");
-
-		MB_A_PCI_PCICR = 0x147;
-
-		MB_A_PCI_PCIBAR0 = 0x007FFF00;
-		MB_A_PCI_PCIBAR1 = 0x0000FF00;
-		MB_A_PCI_PCIBAR2 = 0x00800000;
-
-		MB_A_PCI_PCILTR = 0x20;
-
-		MB_A_PCI_PCIPBAM |= 0x3;
-
-		MB_A_PCI_PCISR =  ~0; /* Clear errors.  */
-
-		/* Reprogram the motherboard's IO/config address space,
-		   as we don't support the GCS7 address space that the
-		   default uses.  */
-
-		/* Significant address bits used for decoding PCI GCS5 space
-		   accesses.  */
-		MB_A_PCI_DMRR = ~(MB_A_PCI_MEM_SIZE - 1);
-
-		/* I don't understand this, but the SolutionGear example code
-		   uses such an offset, and it doesn't work without it.  XXX */
-#if GCS5_SIZE == 0x00800000
-#define GCS5_CFG_OFFS 0x00800000
-#else
-#define GCS5_CFG_OFFS 0
-#endif
-
-		/* Address bit values for matching.  Note that we have to give
-		   the address from the motherboard's point of view, which is
-		   different than the CPU's.  */
-		/* PCI memory space.  */
-		MB_A_PCI_DMLBAM = GCS5_CFG_OFFS + 0x0;
-		/* PCI I/O space.  */
-		MB_A_PCI_DMLBAI =
-			GCS5_CFG_OFFS + (MB_A_PCI_IO_ADDR - GCS5_ADDR);
-
-		mb_pci_bus = pci_scan_bus (0, &mb_pci_config_ops, 0);
-
-		pcibios_assign_resources ();
-	} else
-		printk (KERN_ERR "PCI: HOST/PCI bridge not found\n");
-
-	return 0;
-}
-
-subsys_initcall (pcibios_init);
-
-char __devinit *pcibios_setup (char *option)
-{
-	/* Don't handle any options. */
-	return option;
-}
-
-
-int __nomods_init pcibios_enable_device (struct pci_dev *dev, int mask)
-{
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx = 0; idx < 6; idx++) {
-		r = &dev->resource[idx];
-		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available because "
-			       "of resource collisions\n", pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	if (cmd != old_cmd) {
-		printk("PCI: Enabling device %s (%04x -> %04x)\n",
-		       pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
-}
-
-
-/* Resource allocation.  */
-static void __devinit pcibios_assign_resources (void)
-{
-	struct pci_dev *dev = NULL;
-	struct resource *r;
-
-	for_each_pci_dev(dev) {
-		unsigned di_num;
-		unsigned class = dev->class >> 8;
-
-		if (class && class != PCI_CLASS_BRIDGE_HOST) {
-			unsigned r_num;
-			for(r_num = 0; r_num < 6; r_num++) {
-				r = &dev->resource[r_num];
-				if (!r->start && r->end)
-					pci_assign_resource (dev, r_num);
-			}
-		}
-
-		/* Assign interrupts.  */
-		for (di_num = 0; di_num < NUM_MB_PCI_DEV_IRQS; di_num++) {
-			struct mb_pci_dev_irq *di = &mb_pci_dev_irqs[di_num];
-
-			if (di->dev == PCI_SLOT (dev->devfn)) {
-				unsigned irq = di->irq_base;
-
-				if (di->query_pin) {
-					/* Find out which interrupt pin
-					   this device uses (each PCI
-					   slot has 4).  */
-					u8 irq_pin;
-
-					pci_read_config_byte (dev,
-							     PCI_INTERRUPT_PIN,
-							      &irq_pin);
-
-					if (irq_pin == 0)
-						/* Doesn't use interrupts.  */ 
-						continue;
-					else
-						irq += irq_pin - 1;
-				}
-
-				pcibios_update_irq (dev, irq);
-			}
-		}
-	}
-}
-
-void __devinit pcibios_update_irq (struct pci_dev *dev, int irq)
-{
-	dev->irq = irq;
-	pci_write_config_byte (dev, PCI_INTERRUPT_LINE, irq);
-}
-
-void __devinit
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-			struct resource *res)
-{
-	unsigned long offset = 0;
-
-	if (res->flags & IORESOURCE_IO) {
-		offset = MB_A_PCI_IO_ADDR;
-	} else if (res->flags & IORESOURCE_MEM) {
-		offset = MB_A_PCI_MEM_ADDR;
-	}
-
-	region->start = res->start - offset;
-	region->end = res->end - offset;
-}
-
-
-/* Stubs for things we don't use.  */
-
-/* Called after each bus is probed, but before its children are examined. */
-void pcibios_fixup_bus(struct pci_bus *b)
-{
-}
-
-void
-pcibios_align_resource (void *data, struct resource *res,
-			resource_size_t size, resource_size_t align)
-{
-}
-
-void pcibios_set_master (struct pci_dev *dev)
-{
-}
-
-
-/* Mother-A SRAM memory allocation.  This is a simple first-fit allocator.  */
-
-/* A memory free-list node.  */
-struct mb_sram_free_area {
-	void *mem;
-	unsigned long size;
-	struct mb_sram_free_area *next;
-};
-
-/* The tail of the free-list, which starts out containing all the SRAM.  */
-static struct mb_sram_free_area mb_sram_free_tail = {
-	(void *)MB_A_SRAM_ADDR, MB_A_SRAM_SIZE, 0
-};
-
-/* The free-list.  */
-static struct mb_sram_free_area *mb_sram_free_areas = &mb_sram_free_tail;
-
-/* The free-list of free free-list nodes. (:-)  */
-static struct mb_sram_free_area *mb_sram_free_free_areas = 0;
-
-/* Spinlock protecting the above globals.  */
-static DEFINE_SPINLOCK(mb_sram_lock);
-
-/* Allocate a memory block at least SIZE bytes long in the Mother-A SRAM
-   space.  */
-static void *alloc_mb_sram (size_t size)
-{
-	struct mb_sram_free_area *prev, *fa;
-	unsigned long flags;
-	void *mem = 0;
-
-	spin_lock_irqsave (mb_sram_lock, flags);
-
-	/* Look for a free area that can contain SIZE bytes.  */
-	for (prev = 0, fa = mb_sram_free_areas; fa; prev = fa, fa = fa->next)
-		if (fa->size >= size) {
-			/* Found one!  */
-			mem = fa->mem;
-
-			if (fa->size == size) {
-				/* In fact, it fits exactly, so remove
-				   this node from the free-list.  */
-				if (prev)
-					prev->next = fa->next;
-				else
-					mb_sram_free_areas = fa->next;
-				/* Put it on the free-list-entry-free-list. */
-				fa->next = mb_sram_free_free_areas;
-				mb_sram_free_free_areas = fa;
-			} else {
-				/* FA is bigger than SIZE, so just
-				   reduce its size to account for this
-				   allocation.  */
-				fa->mem += size;
-				fa->size -= size;
-			}
-
-			break;
-		}
-
-	spin_unlock_irqrestore (mb_sram_lock, flags);
-
-	return mem;
-}
-
-/* Return the memory area MEM of size SIZE to the MB SRAM free pool.  */
-static void free_mb_sram (void *mem, size_t size)
-{
-	struct mb_sram_free_area *prev, *fa, *new_fa;
-	unsigned long flags;
-	void *end = mem + size;
-
-	spin_lock_irqsave (mb_sram_lock, flags);
-
- retry:
-	/* Find an adjacent free-list entry.  */
-	for (prev = 0, fa = mb_sram_free_areas; fa; prev = fa, fa = fa->next)
-		if (fa->mem == end) {
-			/* FA is just after MEM, grow down to encompass it. */
-			fa->mem = mem;
-			fa->size += size;
-			goto done;
-		} else if (fa->mem + fa->size == mem) {
-			struct mb_sram_free_area *next_fa = fa->next;
-
-			/* FA is just before MEM, expand to encompass it. */
-			fa->size += size;
-
-			/* See if FA can now be merged with its successor. */
-			if (next_fa && fa->mem + fa->size == next_fa->mem) {
-				/* Yup; merge NEXT_FA's info into FA.  */
-				fa->size += next_fa->size;
-				fa->next = next_fa->next;
-				/* Free NEXT_FA.  */
-				next_fa->next = mb_sram_free_free_areas;
-				mb_sram_free_free_areas = next_fa;
-			}
-			goto done;
-		} else if (fa->mem > mem)
-			/* We've reached the right spot in the free-list
-			   without finding an adjacent free-area, so add
-			   a new free area to hold mem. */
-			break;
-
-	/* Make a new free-list entry.  */
-
-	/* First, get a free-list entry.  */
-	if (! mb_sram_free_free_areas) {
-		/* There are none, so make some.  */
-		void *block;
-		size_t block_size = sizeof (struct mb_sram_free_area) * 8;
-
-		/* Don't hold the lock while calling kmalloc (I'm not
-		   sure whether it would be a problem, since we use
-		   GFP_ATOMIC, but it makes me nervous).  */
-		spin_unlock_irqrestore (mb_sram_lock, flags);
-
-		block = kmalloc (block_size, GFP_ATOMIC);
-		if (! block)
-			panic ("free_mb_sram: can't allocate free-list entry");
-
-		/* Now get the lock back.  */
-		spin_lock_irqsave (mb_sram_lock, flags);
-
-		/* Add the new free free-list entries.  */
-		while (block_size > 0) {
-			struct mb_sram_free_area *nfa = block;
-			nfa->next = mb_sram_free_free_areas;
-			mb_sram_free_free_areas = nfa;
-			block += sizeof *nfa;
-			block_size -= sizeof *nfa;
-		}
-
-		/* Since we dropped the lock to call kmalloc, the
-		   free-list could have changed, so retry from the
-		   beginning.  */
-		goto retry;
-	}
-
-	/* Remove NEW_FA from the free-list of free-list entries.  */
-	new_fa = mb_sram_free_free_areas;
-	mb_sram_free_free_areas = new_fa->next;
-
-	/* NEW_FA initially holds only MEM.  */
-	new_fa->mem = mem;
-	new_fa->size = size;
-
-	/* Insert NEW_FA in the free-list between PREV and FA. */
-	new_fa->next = fa;
-	if (prev)
-		prev->next = new_fa;
-	else
-		mb_sram_free_areas = new_fa;
-
- done:
-	spin_unlock_irqrestore (mb_sram_lock, flags);
-}
-
-
-/* Maintainence of CPU -> Mother-A DMA mappings.  */
-
-struct dma_mapping {
-	void *cpu_addr;
-	void *mb_sram_addr;
-	size_t size;
-	struct dma_mapping *next;
-};
-
-/* A list of mappings from CPU addresses to MB SRAM addresses for active
-   DMA blocks (that have been `granted' to the PCI device).  */
-static struct dma_mapping *active_dma_mappings = 0;
-
-/* A list of free mapping objects.  */
-static struct dma_mapping *free_dma_mappings = 0;
-
-/* Spinlock protecting the above globals.  */
-static DEFINE_SPINLOCK(dma_mappings_lock);
-
-static struct dma_mapping *new_dma_mapping (size_t size)
-{
-	unsigned long flags;
-	struct dma_mapping *mapping;
-	void *mb_sram_block = alloc_mb_sram (size);
-
-	if (! mb_sram_block)
-		return 0;
-
-	spin_lock_irqsave (dma_mappings_lock, flags);
-
-	if (! free_dma_mappings) {
-		/* We're out of mapping structures, make more.  */
-		void *mblock;
-		size_t mblock_size = sizeof (struct dma_mapping) * 8;
-
-		/* Don't hold the lock while calling kmalloc (I'm not
-		   sure whether it would be a problem, since we use
-		   GFP_ATOMIC, but it makes me nervous).  */
-		spin_unlock_irqrestore (dma_mappings_lock, flags);
-
-		mblock = kmalloc (mblock_size, GFP_ATOMIC);
-		if (! mblock) {
-			free_mb_sram (mb_sram_block, size);
-			return 0;
-		}
-
-		/* Get the lock back.  */
-		spin_lock_irqsave (dma_mappings_lock, flags);
-
-		/* Add the new mapping structures to the free-list.  */
-		while (mblock_size > 0) {
-			struct dma_mapping *fm = mblock;
-			fm->next = free_dma_mappings;
-			free_dma_mappings = fm;
-			mblock += sizeof *fm;
-			mblock_size -= sizeof *fm;
-		}
-	}
-
-	/* Get a mapping struct from the freelist.  */
-	mapping = free_dma_mappings;
-	free_dma_mappings = mapping->next;
-
-	/* Initialize the mapping.  Other fields should be filled in by
-	   caller.  */
-	mapping->mb_sram_addr = mb_sram_block;
-	mapping->size = size;
-
-	/* Add it to the list of active mappings.  */
-	mapping->next = active_dma_mappings;
-	active_dma_mappings = mapping;
-
-	spin_unlock_irqrestore (dma_mappings_lock, flags);
-
-	return mapping;
-}
-
-static struct dma_mapping *find_dma_mapping (void *mb_sram_addr)
-{
-	unsigned long flags;
-	struct dma_mapping *mapping;
-
-	spin_lock_irqsave (dma_mappings_lock, flags);
-
-	for (mapping = active_dma_mappings; mapping; mapping = mapping->next)
-		if (mapping->mb_sram_addr == mb_sram_addr) {
-			spin_unlock_irqrestore (dma_mappings_lock, flags);
-			return mapping;
-		}
-
-	panic ("find_dma_mapping: unmapped PCI DMA addr 0x%x",
-	       MB_SRAM_TO_PCI (mb_sram_addr));
-}
-
-static struct dma_mapping *deactivate_dma_mapping (void *mb_sram_addr)
-{
-	unsigned long flags;
-	struct dma_mapping *mapping, *prev;
-
-	spin_lock_irqsave (dma_mappings_lock, flags);
-
-	for (prev = 0, mapping = active_dma_mappings;
-	     mapping;
-	     prev = mapping, mapping = mapping->next)
-	{
-		if (mapping->mb_sram_addr == mb_sram_addr) {
-			/* This is the MAPPING; deactivate it.  */
-			if (prev)
-				prev->next = mapping->next;
-			else
-				active_dma_mappings = mapping->next;
-
-			spin_unlock_irqrestore (dma_mappings_lock, flags);
-
-			return mapping;
-		}
-	}
-
-	panic ("deactivate_dma_mapping: unmapped PCI DMA addr 0x%x",
-	       MB_SRAM_TO_PCI (mb_sram_addr));
-}
-
-/* Return MAPPING to the freelist.  */
-static inline void
-free_dma_mapping (struct dma_mapping *mapping)
-{
-	unsigned long flags;
-
-	free_mb_sram (mapping->mb_sram_addr, mapping->size);
-
-	spin_lock_irqsave (dma_mappings_lock, flags);
-
-	mapping->next = free_dma_mappings;
-	free_dma_mappings = mapping;
-
-	spin_unlock_irqrestore (dma_mappings_lock, flags);
-}
-
-
-/* Single PCI DMA mappings.  */
-
-/* `Grant' to PDEV the memory block at CPU_ADDR, for doing DMA.  The
-   32-bit PCI bus mastering address to use is returned.  the device owns
-   this memory until either pci_unmap_single or pci_dma_sync_single is
-   performed.  */
-dma_addr_t
-pci_map_single (struct pci_dev *pdev, void *cpu_addr, size_t size, int dir)
-{
-	struct dma_mapping *mapping = new_dma_mapping (size);
-
-	if (! mapping)
-		return 0;
-
-	mapping->cpu_addr = cpu_addr;
-
-	if (dir == PCI_DMA_BIDIRECTIONAL || dir == PCI_DMA_TODEVICE)
-		memcpy (mapping->mb_sram_addr, cpu_addr, size);
-
-	return MB_SRAM_TO_PCI (mapping->mb_sram_addr);
-}
-
-/* Return to the CPU the PCI DMA memory block previously `granted' to
-   PDEV, at DMA_ADDR.  */
-void pci_unmap_single (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
-		       int dir)
-{
-	void *mb_sram_addr = PCI_TO_MB_SRAM (dma_addr);
-	struct dma_mapping *mapping = deactivate_dma_mapping (mb_sram_addr);
-
-	if (size != mapping->size)
-		panic ("pci_unmap_single: size (%d) doesn't match"
-		       " size of mapping at PCI DMA addr 0x%x (%d)\n",
-		       size, dma_addr, mapping->size);
-
-	/* Copy back the DMA'd contents if necessary.  */
-	if (dir == PCI_DMA_BIDIRECTIONAL || dir == PCI_DMA_FROMDEVICE)
-		memcpy (mapping->cpu_addr, mb_sram_addr, size);
-
-	/* Return mapping to the freelist.  */
-	free_dma_mapping (mapping);
-}
-
-/* Make physical memory consistent for a single streaming mode DMA
-   translation after a transfer.
-
-   If you perform a pci_map_single() but wish to interrogate the
-   buffer using the cpu, yet do not wish to teardown the PCI dma
-   mapping, you must call this function before doing so.  At the next
-   point you give the PCI dma address back to the card, you must first
-   perform a pci_dma_sync_for_device, and then the device again owns
-   the buffer.  */
-void
-pci_dma_sync_single_for_cpu (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
-		     int dir)
-{
-	void *mb_sram_addr = PCI_TO_MB_SRAM (dma_addr);
-	struct dma_mapping *mapping = find_dma_mapping (mb_sram_addr);
-
-	/* Synchronize the DMA buffer with the CPU buffer if necessary.  */
-	if (dir == PCI_DMA_FROMDEVICE)
-		memcpy (mapping->cpu_addr, mb_sram_addr, size);
-	else if (dir == PCI_DMA_TODEVICE)
-		; /* nothing to do */
-	else
-		panic("pci_dma_sync_single: unsupported sync dir: %d", dir);
-}
-
-void
-pci_dma_sync_single_for_device (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
-				int dir)
-{
-	void *mb_sram_addr = PCI_TO_MB_SRAM (dma_addr);
-	struct dma_mapping *mapping = find_dma_mapping (mb_sram_addr);
-
-	/* Synchronize the DMA buffer with the CPU buffer if necessary.  */
-	if (dir == PCI_DMA_FROMDEVICE)
-		; /* nothing to do */
-	else if (dir == PCI_DMA_TODEVICE)
-		memcpy (mb_sram_addr, mapping->cpu_addr, size);
-	else
-		panic("pci_dma_sync_single: unsupported sync dir: %d", dir);
-}
-
-
-/* Scatter-gather PCI DMA mappings.  */
-
-/* Do multiple DMA mappings at once.  */
-int
-pci_map_sg (struct pci_dev *pdev, struct scatterlist *sg, int sg_len, int dir)
-{
-	BUG ();
-	return 0;
-}
-
-/* Unmap multiple DMA mappings at once.  */
-void
-pci_unmap_sg (struct pci_dev *pdev, struct scatterlist *sg, int sg_len,int dir)
-{
-	BUG ();
-}
-
-/* Make physical memory consistent for a set of streaming mode DMA
-   translations after a transfer.  The same as pci_dma_sync_single_* but
-   for a scatter-gather list, same rules and usage.  */
-
-void
-pci_dma_sync_sg_for_cpu (struct pci_dev *dev,
-			 struct scatterlist *sg, int sg_len,
-			 int dir)
-{
-	BUG ();
-}
-
-void
-pci_dma_sync_sg_for_device (struct pci_dev *dev,
-			    struct scatterlist *sg, int sg_len,
-			    int dir)
-{
-	BUG ();
-}
-
-
-/* PCI mem mapping.  */
-
-/* Allocate and map kernel buffer using consistent mode DMA for PCI
-   device.  Returns non-NULL cpu-view pointer to the buffer if
-   successful and sets *DMA_ADDR to the pci side dma address as well,
-   else DMA_ADDR is undefined.  */
-void *
-pci_alloc_consistent (struct pci_dev *pdev, size_t size, dma_addr_t *dma_addr)
-{
-	void *mb_sram_mem = alloc_mb_sram (size);
-	if (mb_sram_mem)
-		*dma_addr = MB_SRAM_TO_PCI (mb_sram_mem);
-	return mb_sram_mem;
-}
-
-/* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
-   be values that were returned from pci_alloc_consistent.  SIZE must be
-   the same as what as passed into pci_alloc_consistent.  References to
-   the memory and mappings associated with CPU_ADDR or DMA_ADDR past
-   this call are illegal.  */
-void
-pci_free_consistent (struct pci_dev *pdev, size_t size, void *cpu_addr,
-		     dma_addr_t dma_addr)
-{
-	void *mb_sram_mem = PCI_TO_MB_SRAM (dma_addr);
-	free_mb_sram (mb_sram_mem, size);
-}
-
-
-/* iomap/iomap */
-
-void __iomem *pci_iomap (struct pci_dev *dev, int bar, unsigned long max)
-{
-	resource_size_t start = pci_resource_start (dev, bar);
-	resource_size_t len = pci_resource_len (dev, bar);
-
-	if (!start || len == 0)
-		return 0;
-
-	/* None of the ioremap functions actually do anything, other than
-	   re-casting their argument, so don't bother differentiating them.  */
-	return ioremap (start, len);
-}
-
-void pci_iounmap (struct pci_dev *dev, void __iomem *addr)
-{
-	/* nothing */
-}
-
-
-/* symbol exports (for modules) */
-
-EXPORT_SYMBOL (pci_map_single);
-EXPORT_SYMBOL (pci_unmap_single);
-EXPORT_SYMBOL (pci_alloc_consistent);
-EXPORT_SYMBOL (pci_free_consistent);
-EXPORT_SYMBOL (pci_dma_sync_single_for_cpu);
-EXPORT_SYMBOL (pci_dma_sync_single_for_device);
-EXPORT_SYMBOL (pci_iomap);
-EXPORT_SYMBOL (pci_iounmap);
diff --git a/arch/v850/kernel/rte_me2_cb.c b/arch/v850/kernel/rte_me2_cb.c
deleted file mode 100644
index 46803d48dffe..000000000000
--- a/arch/v850/kernel/rte_me2_cb.c
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * arch/v850/kernel/rte_me2_cb.c -- Midas labs RTE-V850E/ME2-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/fs.h>
-#include <linux/major.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/me2.h>
-#include <asm/rte_me2_cb.h>
-#include <asm/machdep.h>
-#include <asm/v850e_intc.h>
-#include <asm/v850e_cache.h>
-#include <asm/irq.h>
-
-#include "mach.h"
-
-extern unsigned long *_intv_start;
-extern unsigned long *_intv_end;
-
-/* LED access routines.  */
-extern unsigned read_leds (int pos, char *buf, int len);
-extern unsigned write_leds (int pos, const char *buf, int len);
-
-
-/* SDRAM are almost contiguous (with a small hole in between;
-   see mach_reserve_bootmem for details), so just use both as one big area.  */
-#define RAM_START 	SDRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-/* Called before configuring an on-chip UART.  */
-void rte_me2_cb_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud)
-{
-	/* The RTE-V850E/ME2-CB connects some general-purpose I/O
-	   pins on the CPU to the RTS/CTS lines of UARTB channel 0's
-	   serial connection.
-	   I/O pins P21 and P22 are RTS and CTS respectively.  */
-	if (chan == 0) {
-		/* Put P21 & P22 in I/O port mode.  */
-		ME2_PORT2_PMC &= ~0x6;
-		/* Make P21 and output, and P22 an input.  */
-		ME2_PORT2_PM = (ME2_PORT2_PM & ~0xC) | 0x4;
-	}
-
-	me2_uart_pre_configure (chan, cflags, baud);
-}
-
-void __init mach_init_irqs (void)
-{
-	/* Initialize interrupts.  */
-	me2_init_irqs ();
-	rte_me2_cb_init_irqs ();
-}
-
-#ifdef CONFIG_ROM_KERNEL
-/* Initialization for kernel in ROM.  */
-static inline rom_kernel_init (void)
-{
-	/* If the kernel is in ROM, we have to copy any initialized data
-	   from ROM into RAM.  */
-	extern unsigned long _data_load_start, _sdata, _edata;
-	register unsigned long *src = &_data_load_start;
-	register unsigned long *dst = &_sdata, *end = &_edata;
-
-	while (dst != end)
-		*dst++ = *src++;
-}
-#endif /* CONFIG_ROM_KERNEL */
-
-static void install_interrupt_vectors (void)
-{
-	unsigned long *p1, *p2;
-
-	ME2_IRAMM = 0x03; /* V850E/ME2 iRAM write mode */
-
-	/* vector copy to iRAM */
-	p1 = (unsigned long *)0; /* v85x vector start */
-	p2 = (unsigned long *)&_intv_start;
-	while (p2 < (unsigned long *)&_intv_end)
-		*p1++ = *p2++;
-
-	ME2_IRAMM = 0x00; /* V850E/ME2 iRAM read mode */
-}
-
-/* CompactFlash */
-
-static void cf_power_on (void)
-{
-	/* CF card detected? */
-	if (CB_CF_STS0 & 0x0030)
-		return;
-
-	CB_CF_REG0 = 0x0002; /* reest on */
-	mdelay (10);
-	CB_CF_REG0 = 0x0003; /* power on */
-	mdelay (10);
-	CB_CF_REG0 = 0x0001; /* reset off */
-	mdelay (10);
-}
-
-static void cf_power_off (void)
-{
-	CB_CF_REG0 = 0x0003; /* power on */
-	mdelay (10);
-	CB_CF_REG0 = 0x0002; /* reest on */
-	mdelay (10);
-}
-
-void __init mach_early_init (void)
-{
-	install_interrupt_vectors ();
-
-	/* CS1 SDRAM instruction cache enable */
-	v850e_cache_enable (0x04, 0x03, 0);
-
-	rte_cb_early_init ();
-
-	/* CompactFlash power on */
-	cf_power_on ();
-
-#if defined (CONFIG_ROM_KERNEL)
-	rom_kernel_init ();
-#endif
-}
-
-
-/* RTE-V850E/ME2-CB Programmable Interrupt Controller.  */
-
-static struct cb_pic_irq_init cb_pic_irq_inits[] = {
-	{ "CB_EXTTM0",       IRQ_CB_EXTTM0,       1, 1, 6 },
-	{ "CB_EXTSIO",       IRQ_CB_EXTSIO,       1, 1, 6 },
-	{ "CB_TOVER",        IRQ_CB_TOVER,        1, 1, 6 },
-	{ "CB_GINT0",        IRQ_CB_GINT0,        1, 1, 6 },
-	{ "CB_USB",          IRQ_CB_USB,          1, 1, 6 },
-	{ "CB_LANC",         IRQ_CB_LANC,         1, 1, 6 },
-	{ "CB_USB_VBUS_ON",  IRQ_CB_USB_VBUS_ON,  1, 1, 6 },
-	{ "CB_USB_VBUS_OFF", IRQ_CB_USB_VBUS_OFF, 1, 1, 6 },
-	{ "CB_EXTTM1",       IRQ_CB_EXTTM1,       1, 1, 6 },
-	{ "CB_EXTTM2",       IRQ_CB_EXTTM2,       1, 1, 6 },
-	{ 0 }
-};
-#define NUM_CB_PIC_IRQ_INITS (ARRAY_SIZE(cb_pic_irq_inits) - 1)
-
-static struct hw_interrupt_type cb_pic_hw_itypes[NUM_CB_PIC_IRQ_INITS];
-static unsigned char cb_pic_active_irqs = 0;
-
-void __init rte_me2_cb_init_irqs (void)
-{
-	cb_pic_init_irq_types (cb_pic_irq_inits, cb_pic_hw_itypes);
-
-	/* Initalize on board PIC1 (not PIC0) enable */
-	CB_PIC_INT0M  = 0x0000;
-	CB_PIC_INT1M  = 0x0000;
-	CB_PIC_INTR   = 0x0000;
-	CB_PIC_INTEN |= CB_PIC_INT1EN;
-
-	ME2_PORT2_PMC 	 |= 0x08;	/* INTP23/SCK1 mode */
-	ME2_PORT2_PFC 	 &= ~0x08;	/* INTP23 mode */
-	ME2_INTR(2) 	 &= ~0x08;	/* INTP23 falling-edge detect */
-	ME2_INTF(2) 	 &= ~0x08;	/*   " */
-
-	rte_cb_init_irqs ();	/* gbus &c */
-}
-
-
-/* Enable interrupt handling for interrupt IRQ.  */
-void cb_pic_enable_irq (unsigned irq)
-{
-	CB_PIC_INT1M |= 1 << (irq - CB_PIC_BASE_IRQ);
-}
-
-void cb_pic_disable_irq (unsigned irq)
-{
-	CB_PIC_INT1M &= ~(1 << (irq - CB_PIC_BASE_IRQ));
-}
-
-void cb_pic_shutdown_irq (unsigned irq)
-{
-	cb_pic_disable_irq (irq);
-
-	if (--cb_pic_active_irqs == 0)
-		free_irq (IRQ_CB_PIC, 0);
-
-	CB_PIC_INT1M &= ~(1 << (irq - CB_PIC_BASE_IRQ));
-}
-
-static irqreturn_t cb_pic_handle_irq (int irq, void *dev_id,
-				      struct pt_regs *regs)
-{
-	irqreturn_t rval = IRQ_NONE;
-	unsigned status = CB_PIC_INTR;
-	unsigned enable = CB_PIC_INT1M;
-
-	/* Only pay attention to enabled interrupts.  */
-	status &= enable;
-
-	CB_PIC_INTEN &= ~CB_PIC_INT1EN;
-
-	if (status) {
-		unsigned mask = 1;
-
-		irq = CB_PIC_BASE_IRQ;
-		do {
-			/* There's an active interrupt, find out which one,
-			   and call its handler.  */
-			while (! (status & mask)) {
-				irq++;
-				mask <<= 1;
-			}
-			status &= ~mask;
-
-			CB_PIC_INTR = mask;
-
-			/* Recursively call handle_irq to handle it. */
-			handle_irq (irq, regs);
-			rval = IRQ_HANDLED;
-		} while (status);
-	}
-
-	CB_PIC_INTEN |= CB_PIC_INT1EN;
-
-	return rval;
-}
-
-
-static void irq_nop (unsigned irq) { }
-
-static unsigned cb_pic_startup_irq (unsigned irq)
-{
-	int rval;
-
-	if (cb_pic_active_irqs == 0) {
-		rval = request_irq (IRQ_CB_PIC, cb_pic_handle_irq,
-				    IRQF_DISABLED, "cb_pic_handler", 0);
-		if (rval != 0)
-			return rval;
-	}
-
-	cb_pic_active_irqs++;
-
-	cb_pic_enable_irq (irq);
-
-	return 0;
-}
-
-/* Initialize HW_IRQ_TYPES for INTC-controlled irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-void __init cb_pic_init_irq_types (struct cb_pic_irq_init *inits,
-				   struct hw_interrupt_type *hw_irq_types)
-{
-	struct cb_pic_irq_init *init;
-	for (init = inits; init->name; init++) {
-		struct hw_interrupt_type *hwit = hw_irq_types++;
-
-		hwit->typename = init->name;
-
-		hwit->startup  = cb_pic_startup_irq;
-		hwit->shutdown = cb_pic_shutdown_irq;
-		hwit->enable   = cb_pic_enable_irq;
-		hwit->disable  = cb_pic_disable_irq;
-		hwit->ack      = irq_nop;
-		hwit->end      = irq_nop;
-
-		/* Initialize kernel IRQ infrastructure for this interrupt.  */
-		init_irq_handlers(init->base, init->num, init->interval, hwit);
-	}
-}
diff --git a/arch/v850/kernel/rte_me2_cb.ld b/arch/v850/kernel/rte_me2_cb.ld
deleted file mode 100644
index cf0766065ec6..000000000000
--- a/arch/v850/kernel/rte_me2_cb.ld
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Linker script for the Midas labs RTE-V850E/ME2-CB evaluation board
-   (CONFIG_RTE_CB_ME2), with kernel in SDRAM.  */
-
-MEMORY {
-	/* 128Kbyte of IRAM */
-	IRAM : ORIGIN = 0x00000000, LENGTH = 0x00020000
-
-	/* 32MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-#define KRAM SDRAM
-
-SECTIONS {
-	.text : {
-		__kram_start = . ;
-		TEXT_CONTENTS
-		INTV_CONTENTS	/* copy to iRAM (0x0-0x620) */
-	} > KRAM
-
-	.data : {
-		DATA_CONTENTS
-		BSS_CONTENTS
-		RAMK_INIT_CONTENTS
-		__kram_end = . ;
-		BOOTMAP_CONTENTS
-	} > KRAM
-	
-	.root ALIGN (4096) : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/rte_nb85e_cb-multi.ld b/arch/v850/kernel/rte_nb85e_cb-multi.ld
deleted file mode 100644
index de347b4fffac..000000000000
--- a/arch/v850/kernel/rte_nb85e_cb-multi.ld
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Linker script for the Midas labs RTE-NB85E-CB evaluation board
-   (CONFIG_RTE_CB_NB85E), with the Multi debugger ROM monitor .  */
-
-MEMORY {
-	/* 1MB of SRAM; we can't use the last 96KB, because it's used by
-	   the monitor scratch-RAM.  This memory is mirrored 4 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = (SRAM_SIZE - MON_SCRATCH_SIZE)
-	/* Monitor scratch RAM; only the interrupt vectors should go here.  */
-	MRAM  : ORIGIN = MON_SCRATCH_ADDR,  LENGTH = MON_SCRATCH_SIZE
-	/* 16MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-#ifdef CONFIG_RTE_CB_NB85E_KSRAM
-# define KRAM SRAM
-#else
-# define KRAM SDRAM
-#endif
-
-SECTIONS {
-	/* We can't use RAMK_KRAM_CONTENTS because that puts the whole
-	   kernel in a single ELF segment, and the Multi debugger (which
-	   we use to load the kernel) appears to have bizarre problems
-	   dealing with it.  */
-
-	.text : {
-		__kram_start = . ;
-		TEXT_CONTENTS
-	} > KRAM
-
-	.data : {
-		DATA_CONTENTS
-		BSS_CONTENTS
-		RAMK_INIT_CONTENTS
-		__kram_end = . ;
-		BOOTMAP_CONTENTS
-
-		/* The address at which the interrupt vectors are initially
-		   loaded by the loader.  We can't load the interrupt vectors
-		   directly into their target location, because the monitor
-		   ROM for the GHS Multi debugger barfs if we try.
-		   Unfortunately, Multi also doesn't deal correctly with ELF
-		   sections where the LMA and VMA differ (it just ignores the
-		   LMA), so we can't use that feature to work around the
-		   problem!  What we do instead is just put the interrupt
-		   vectors into a normal section, and have the
-		   `mach_early_init' function for Midas boards do the
-		   necessary copying and relocation at runtime (this section
-		   basically only contains `jr' instructions, so it's not
-		   that hard).  */
-		. = ALIGN (0x10) ;
-		__intv_load_start = . ;
-		INTV_CONTENTS
-	} > KRAM
-
-	.root ALIGN (4096) : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/rte_nb85e_cb.c b/arch/v850/kernel/rte_nb85e_cb.c
deleted file mode 100644
index b4a045da5d70..000000000000
--- a/arch/v850/kernel/rte_nb85e_cb.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * arch/v850/kernel/rte_nb85e_cb.c -- Midas labs RTE-V850E/NB85E-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/v850e.h>
-#include <asm/rte_nb85e_cb.h>
-
-#include "mach.h"
-
-void __init mach_early_init (void)
-{
-	/* Configure caching; some possible settings:
-
-	     BHC = 0x0000, DCC = 0x0000	 -- all caching disabled
-	     BHC = 0x0040, DCC = 0x0000	 -- SDRAM: icache only
-	     BHC = 0x0080, DCC = 0x0C00	 -- SDRAM: write-back dcache only
-	     BHC = 0x00C0, DCC = 0x0C00	 -- SDRAM: icache + write-back dcache
-	     BHC = 0x00C0, DCC = 0x0800	 -- SDRAM: icache + write-thru dcache
-
-	   We can only cache SDRAM (we can't use cache SRAM because it's in
-	   the same memory region as the on-chip RAM and I/O space).
-
-	   Unfortunately, the dcache seems to be buggy, so we only use the
-	   icache for now.  */
-	v850e_cache_enable (0x0040 /*BHC*/, 0x0003 /*ICC*/, 0x0000 /*DCC*/);
-
-	rte_cb_early_init ();
-}
-
-void __init mach_get_physical_ram (unsigned long *ram_start,
-				   unsigned long *ram_len)
-{
-	/* We just use SDRAM here.  */
-	*ram_start = SDRAM_ADDR;
-	*ram_len = SDRAM_SIZE;
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-/* Called before configuring an on-chip UART.  */
-void rte_nb85e_cb_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud)
-{
-	/* The RTE-NB85E-CB connects some general-purpose I/O pins on the
-	   CPU to the RTS/CTS lines the UART's serial connection, as follows:
-	   P00 = CTS (in), P01 = DSR (in), P02 = RTS (out), P03 = DTR (out). */
-
-	TEG_PORT0_PM = 0x03;	/* P00 and P01 inputs, P02 and P03 outputs */
-	TEG_PORT0_IO = 0x03;	/* Accept input */
-
-	/* Do pre-configuration for the actual UART.  */
-	teg_uart_pre_configure (chan, cflags, baud);
-}
-
-void __init mach_init_irqs (void)
-{
-	teg_init_irqs ();
-	rte_cb_init_irqs ();
-}
diff --git a/arch/v850/kernel/rte_nb85e_cb.ld b/arch/v850/kernel/rte_nb85e_cb.ld
deleted file mode 100644
index b672f484f085..000000000000
--- a/arch/v850/kernel/rte_nb85e_cb.ld
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Linker script for the Midas labs RTE-NB85E-CB evaluation board
-   (CONFIG_RTE_CB_NB85E).  */
-
-MEMORY {
-	LOW   : ORIGIN = 0x0,	     LENGTH = 0x00100000
-	/* 1MB of SRAM  This memory is mirrored 4 times.  */
-	SRAM  : ORIGIN = SRAM_ADDR,  LENGTH = SRAM_SIZE
-	/* 16MB of SDRAM.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-#ifdef CONFIG_RTE_CB_NB85E_KSRAM
-# define KRAM SRAM
-#else
-# define KRAM SDRAM
-#endif
-
-SECTIONS {
-	.intv : { INTV_CONTENTS } > LOW
-	.sram : { RAMK_KRAM_CONTENTS } > KRAM
-	.root : { ROOT_FS_CONTENTS } > SDRAM
-}
diff --git a/arch/v850/kernel/setup.c b/arch/v850/kernel/setup.c
deleted file mode 100644
index 10335cecf7bd..000000000000
--- a/arch/v850/kernel/setup.c
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * arch/v850/kernel/setup.c -- Arch-dependent initialization functions
- *
- *  Copyright (C) 2001,02,03,05,06  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,05,06  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/swap.h>		/* we don't have swap, but for nr_free_pages */
-#include <linux/irq.h>
-#include <linux/reboot.h>
-#include <linux/personality.h>
-#include <linux/major.h>
-#include <linux/root_dev.h>
-#include <linux/mtd/mtd.h>
-#include <linux/init.h>
-
-#include <asm/irq.h>
-#include <asm/setup.h>
-
-#include "mach.h"
-
-/* These symbols are all defined in the linker map to delineate various
-   statically allocated regions of memory.  */
-
-extern char _intv_start, _intv_end;
-/* `kram' is only used if the kernel uses part of normal user RAM.  */
-extern char _kram_start __attribute__ ((__weak__));
-extern char _kram_end __attribute__ ((__weak__));
-extern char _init_start, _init_end;
-extern char _bootmap;
-extern char _stext, _etext, _sdata, _edata, _sbss, _ebss;
-/* Many platforms use an embedded root image.  */
-extern char _root_fs_image_start __attribute__ ((__weak__));
-extern char _root_fs_image_end __attribute__ ((__weak__));
-
-
-char __initdata command_line[COMMAND_LINE_SIZE];
-
-/* Memory not used by the kernel.  */
-static unsigned long total_ram_pages;
-
-/* System RAM.  */
-static unsigned long ram_start = 0, ram_len = 0;
-
-
-#define ADDR_TO_PAGE_UP(x)   ((((unsigned long)x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define ADDR_TO_PAGE(x)	     (((unsigned long)x) >> PAGE_SHIFT)
-#define PAGE_TO_ADDR(x)	     (((unsigned long)x) << PAGE_SHIFT)
-
-static void init_mem_alloc (unsigned long ram_start, unsigned long ram_len);
-
-void set_mem_root (void *addr, size_t len, char *cmd_line);
-
-
-void __init setup_arch (char **cmdline)
-{
-	/* Keep a copy of command line */
-	*cmdline = command_line;
-	memcpy (boot_command_line, command_line, COMMAND_LINE_SIZE);
-	boot_command_line[COMMAND_LINE_SIZE - 1] = '\0';
-
-	console_verbose ();
-
-	init_mm.start_code = (unsigned long) &_stext;
-	init_mm.end_code = (unsigned long) &_etext;
-	init_mm.end_data = (unsigned long) &_edata;
-	init_mm.brk = (unsigned long) &_kram_end;
-
-	/* Find out what mem this machine has.  */
-	mach_get_physical_ram (&ram_start, &ram_len);
-	/* ... and tell the kernel about it.  */
-	init_mem_alloc (ram_start, ram_len);
-
-	printk (KERN_INFO "CPU: %s\nPlatform: %s\n",
-		CPU_MODEL_LONG, PLATFORM_LONG);
-
-	/* do machine-specific setups.  */
-	mach_setup (cmdline);
-
-#ifdef CONFIG_MTD
-	if (!ROOT_DEV && &_root_fs_image_end > &_root_fs_image_start)
-		set_mem_root (&_root_fs_image_start,
-			      &_root_fs_image_end - &_root_fs_image_start,
-			      *cmdline);
-#endif
-}
-
-void __init trap_init (void)
-{
-}
-
-#ifdef CONFIG_MTD
-
-/* From drivers/mtd/devices/slram.c */
-#define SLRAM_BLK_SZ 0x4000
-
-/* Set the root filesystem to be the given memory region.
-   Some parameter may be appended to CMD_LINE.  */
-void set_mem_root (void *addr, size_t len, char *cmd_line)
-{
-	/* Some sort of idiocy in MTD means we must supply a length that's
-	   a multiple of SLRAM_BLK_SZ.  We just round up the real length,
-	   as the file system shouldn't attempt to access anything beyond
-	   the end of the image anyway.  */
-	len = (((len - 1) + SLRAM_BLK_SZ) / SLRAM_BLK_SZ) * SLRAM_BLK_SZ;
-
-	/* The only way to pass info to the MTD slram driver is via
-	   the command line.  */
-	if (*cmd_line) {
-		cmd_line += strlen (cmd_line);
-		*cmd_line++ = ' ';
-	}
-	sprintf (cmd_line, "slram=root,0x%x,+0x%x", (u32)addr, (u32)len);
-
-	ROOT_DEV = MKDEV (MTD_BLOCK_MAJOR, 0);
-}
-#endif
-
-
-static void irq_nop (unsigned irq) { }
-static unsigned irq_zero (unsigned irq) { return 0; }
-
-static void nmi_end (unsigned irq)
-{
-	if (irq != IRQ_NMI (0)) {
-		printk (KERN_CRIT "NMI %d is unrecoverable; restarting...",
-			irq - IRQ_NMI (0));
-		machine_restart (0);
-	}
-}
-
-static struct hw_interrupt_type nmi_irq_type = {
-	.typename = "NMI",
-	.startup = irq_zero,		/* startup */
-	.shutdown = irq_nop,		/* shutdown */
-	.enable = irq_nop,		/* enable */
-	.disable = irq_nop,		/* disable */
-	.ack = irq_nop,		/* ack */
-	.end = nmi_end,		/* end */
-};
-
-void __init init_IRQ (void)
-{
-	init_irq_handlers (0, NUM_MACH_IRQS, 1, 0);
-	init_irq_handlers (IRQ_NMI (0), NUM_NMIS, 1, &nmi_irq_type);
-	mach_init_irqs ();
-}
-
-
-void __init mem_init (void)
-{
-	max_mapnr = MAP_NR (ram_start + ram_len);
-
-	num_physpages = ADDR_TO_PAGE (ram_len);
-
-	total_ram_pages = free_all_bootmem ();
-
-	printk (KERN_INFO
-		"Memory: %luK/%luK available"
-		" (%luK kernel code, %luK data)\n",
-		PAGE_TO_ADDR (nr_free_pages()) / 1024,
-		ram_len / 1024,
-		((unsigned long)&_etext - (unsigned long)&_stext) / 1024,
-		((unsigned long)&_ebss - (unsigned long)&_sdata) / 1024);
-}
-
-void free_initmem (void)
-{
-	unsigned long ram_end = ram_start + ram_len;
-	unsigned long start = PAGE_ALIGN ((unsigned long)(&_init_start));
-
-	if (start >= ram_start && start < ram_end) {
-		unsigned long addr;
-		unsigned long end = PAGE_ALIGN ((unsigned long)(&_init_end));
-
-		if (end > ram_end)
-			end = ram_end;
-
-		printk("Freeing unused kernel memory: %ldK freed\n",
-		       (end - start) / 1024);
-
-		for (addr = start; addr < end; addr += PAGE_SIZE) {
-			struct page *page = virt_to_page (addr);
-			ClearPageReserved (page);
-			init_page_count (page);
-			__free_page (page);
-			total_ram_pages++;
-		}
-	}
-}
-
-
-/* Initialize the `bootmem allocator'.  RAM_START and RAM_LEN identify
-   what RAM may be used.  */
-static void __init
-init_bootmem_alloc (unsigned long ram_start, unsigned long ram_len)
-{
-	/* The part of the kernel that's in the same managed RAM space
-	   used for general allocation.  */
-	unsigned long kram_start = (unsigned long)&_kram_start;
-	unsigned long kram_end = (unsigned long)&_kram_end;
-	/* End of the managed RAM space.  */
-	unsigned long ram_end = ram_start + ram_len;
-	/* Address range of the interrupt vector table.  */
-	unsigned long intv_start = (unsigned long)&_intv_start;
-	unsigned long intv_end = (unsigned long)&_intv_end;
-	/* True if the interrupt vectors are in the managed RAM area.  */
-	int intv_in_ram = (intv_end > ram_start && intv_start < ram_end);
-	/* True if the interrupt vectors are inside the kernel's RAM.  */
-	int intv_in_kram = (intv_end > kram_start && intv_start < kram_end);
-	/* A pointer to an optional function that reserves platform-specific
-	   memory regions.  We declare the pointer `volatile' to avoid gcc
-	   turning the call into a static call (the problem is that since
-	   it's a weak symbol, a static call may end up trying to reference
-	   the location 0x0, which is not always reachable).  */
-	void (*volatile mrb) (void) = mach_reserve_bootmem;
-	/* The bootmem allocator's allocation bitmap.  */
-	unsigned long bootmap = (unsigned long)&_bootmap;
-	unsigned long bootmap_len;
-
-	/* Round bootmap location up to next page.  */
-	bootmap = PAGE_TO_ADDR (ADDR_TO_PAGE_UP (bootmap));
-
-	/* Initialize bootmem allocator.  */
-	bootmap_len = init_bootmem_node (NODE_DATA (0),
-					 ADDR_TO_PAGE (bootmap),
-					 ADDR_TO_PAGE (PAGE_OFFSET),
-					 ADDR_TO_PAGE (ram_end));
-
-	/* Now make the RAM actually allocatable (it starts out `reserved'). */
-	free_bootmem (ram_start, ram_len);
-
-	if (kram_end > kram_start)
-		/* Reserve the RAM part of the kernel's address space, so it
-		   doesn't get allocated.  */
-		reserve_bootmem(kram_start, kram_end - kram_start,
-				BOOTMEM_DEFAULT);
-	
-	if (intv_in_ram && !intv_in_kram)
-		/* Reserve the interrupt vector space.  */
-		reserve_bootmem(intv_start, intv_end - intv_start,
-				BOOTMEM_DEFAULT);
-
-	if (bootmap >= ram_start && bootmap < ram_end)
-		/* Reserve the bootmap space.  */
-		reserve_bootmem(bootmap, bootmap_len,
-				BOOTMEM_DEFAULT);
-
-	/* Reserve the memory used by the root filesystem image if it's
-	   in RAM.  */
-	if (&_root_fs_image_end > &_root_fs_image_start
-	    && (unsigned long)&_root_fs_image_start >= ram_start
-	    && (unsigned long)&_root_fs_image_start < ram_end)
-		reserve_bootmem ((unsigned long)&_root_fs_image_start,
-				 &_root_fs_image_end - &_root_fs_image_start,
-				 BOOTMEM_DEFAULT);
-
-	/* Let the platform-dependent code reserve some too.  */
-	if (mrb)
-		(*mrb) ();
-}
-
-/* Tell the kernel about what RAM it may use for memory allocation.  */
-static void __init
-init_mem_alloc (unsigned long ram_start, unsigned long ram_len)
-{
-	unsigned i;
-	unsigned long zones_size[MAX_NR_ZONES];
-
-	init_bootmem_alloc (ram_start, ram_len);
-
-	for (i = 0; i < MAX_NR_ZONES; i++)
-		zones_size[i] = 0;
-
-	/* We stuff all the memory into one area, which includes the
-	   initial gap from PAGE_OFFSET to ram_start.  */
-	zones_size[ZONE_DMA]
-		= ADDR_TO_PAGE (ram_len + (ram_start - PAGE_OFFSET));
-
-	/* The allocator is very picky about the address of the first
-	   allocatable page -- it must be at least as aligned as the
-	   maximum allocation -- so try to detect cases where it will get
-	   confused and signal them at compile time (this is a common
-	   problem when porting to a new platform with ).  There is a
-	   similar runtime check in free_area_init_core.  */
-#if ((PAGE_OFFSET >> PAGE_SHIFT) & ((1UL << (MAX_ORDER - 1)) - 1))
-#error MAX_ORDER is too large for given PAGE_OFFSET (use CONFIG_FORCE_MAX_ZONEORDER to change it)
-#endif
-	NODE_DATA(0)->node_mem_map = NULL;
-	free_area_init_node(0, zones_size, ADDR_TO_PAGE (PAGE_OFFSET), 0);
-}
-
-
-
-/* Taken from m68knommu */
-void show_mem(void)
-{
-    unsigned long i;
-    int free = 0, total = 0, reserved = 0, shared = 0;
-    int cached = 0;
-
-    printk(KERN_INFO "\nMem-info:\n");
-    show_free_areas();
-    i = max_mapnr;
-    while (i-- > 0) {
-	total++;
-	if (PageReserved(mem_map+i))
-	    reserved++;
-	else if (PageSwapCache(mem_map+i))
-	    cached++;
-	else if (!page_count(mem_map+i))
-	    free++;
-	else
-	    shared += page_count(mem_map+i) - 1;
-    }
-    printk(KERN_INFO "%d pages of RAM\n",total);
-    printk(KERN_INFO "%d free pages\n",free);
-    printk(KERN_INFO "%d reserved pages\n",reserved);
-    printk(KERN_INFO "%d pages shared\n",shared);
-    printk(KERN_INFO "%d pages swap cached\n",cached);
-}
diff --git a/arch/v850/kernel/signal.c b/arch/v850/kernel/signal.c
deleted file mode 100644
index bf166e7e762c..000000000000
--- a/arch/v850/kernel/signal.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * arch/v850/kernel/signal.c -- Signal handling
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *  Copyright (C) 1999,2000,2002  Niibe Yutaka & Kaz Kojima
- *  Copyright (C) 1991,1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * 1997-11-28  Modified for POSIX.1b signals by Richard Henderson
- *
- * This file was derived from the sh version, arch/sh/kernel/signal.c
- */
-
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/personality.h>
-#include <linux/tty.h>
-
-#include <asm/ucontext.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/thread_info.h>
-#include <asm/cacheflush.h>
-
-#define DEBUG_SIG 0
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
-
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-asmlinkage int
-sys_sigsuspend(old_sigset_t mask, struct pt_regs *regs)
-{
-	sigset_t saveset;
-
-	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sighand->siglock);
-	saveset = current->blocked;
-	siginitset(&current->blocked, mask);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	regs->gpr[GPR_RVAL] = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (do_signal(regs, &saveset))
-			return -EINTR;
-	}
-}
-
-asmlinkage int
-sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize,
-		  struct pt_regs *regs)
-{
-	sigset_t saveset, newset;
-
-	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(sigset_t))
-		return -EINVAL;
-
-	if (copy_from_user(&newset, unewset, sizeof(newset)))
-		return -EFAULT;
-	sigdelsetmask(&newset, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	saveset = current->blocked;
-	current->blocked = newset;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	regs->gpr[GPR_RVAL] = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (do_signal(regs, &saveset))
-			return -EINTR;
-	}
-}
-
-asmlinkage int 
-sys_sigaction(int sig, const struct old_sigaction *act,
-	      struct old_sigaction *oact)
-{
-	struct k_sigaction new_ka, old_ka;
-	int ret;
-
-	if (act) {
-		old_sigset_t mask;
-		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
-		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
-		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
-			return -EFAULT;
-		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		__get_user(mask, &act->sa_mask);
-		siginitset(&new_ka.sa.sa_mask, mask);
-	}
-
-	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
-		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
-			return -EFAULT;
-		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
-	}
-
-	return ret;
-}
-
-asmlinkage int
-sys_sigaltstack(const stack_t *uss, stack_t *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->gpr[GPR_SP]);
-}
-
-
-/*
- * Do a signal return; undo the signal stack.
- */
-
-struct sigframe
-{
-	struct sigcontext sc;
-	unsigned long extramask[_NSIG_WORDS-1];
-	unsigned long tramp[2];	/* signal trampoline */
-};
-
-struct rt_sigframe
-{
-	struct siginfo info;
-	struct ucontext uc;
-	unsigned long tramp[2];	/* signal trampoline */
-};
-
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *rval_p)
-{
-	unsigned int err = 0;
-
-#define COPY(x)		err |= __get_user(regs->x, &sc->regs.x)
-	COPY(gpr[0]);	COPY(gpr[1]);	COPY(gpr[2]);	COPY(gpr[3]);
-	COPY(gpr[4]);	COPY(gpr[5]);	COPY(gpr[6]);	COPY(gpr[7]);
-	COPY(gpr[8]);	COPY(gpr[9]);	COPY(gpr[10]);	COPY(gpr[11]);
-	COPY(gpr[12]);	COPY(gpr[13]);	COPY(gpr[14]);	COPY(gpr[15]);
-	COPY(gpr[16]);	COPY(gpr[17]);	COPY(gpr[18]);	COPY(gpr[19]);
-	COPY(gpr[20]);	COPY(gpr[21]);	COPY(gpr[22]);	COPY(gpr[23]);
-	COPY(gpr[24]);	COPY(gpr[25]);	COPY(gpr[26]);	COPY(gpr[27]);
-	COPY(gpr[28]);	COPY(gpr[29]);	COPY(gpr[30]);	COPY(gpr[31]);
-	COPY(pc);	COPY(psw);
-	COPY(ctpc);	COPY(ctpsw);	COPY(ctbp);
-#undef COPY
-
-	return err;
-}
-
-asmlinkage int sys_sigreturn(struct pt_regs *regs)
-{
-	struct sigframe *frame = (struct sigframe *)regs->gpr[GPR_SP];
-	sigset_t set;
-	int rval;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-
-	if (__get_user(set.sig[0], &frame->sc.oldmask)
-	    || (_NSIG_WORDS > 1
-		&& __copy_from_user(&set.sig[1], &frame->extramask,
-				    sizeof(frame->extramask))))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->sc, &rval))
-		goto badframe;
-	return rval;
-
-badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}
-
-asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
-{
-	struct rt_sigframe *frame = (struct rt_sigframe *)regs->gpr[GPR_SP];
-	sigset_t set;
-	stack_t st;
-	int rval;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &rval))
-		goto badframe;
-
-	if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
-		goto badframe;
-	/* It is more difficult to avoid calling this function than to
-	   call it and ignore errors.  */
-	do_sigaltstack(&st, NULL, regs->gpr[GPR_SP]);
-
-	return rval;
-
-badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}	
-
-/*
- * Set up a signal frame.
- */
-
-static int
-setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
-		 unsigned long mask)
-{
-	int err = 0;
-
-#define COPY(x)		err |= __put_user(regs->x, &sc->regs.x)
-	COPY(gpr[0]);	COPY(gpr[1]);	COPY(gpr[2]);	COPY(gpr[3]);
-	COPY(gpr[4]);	COPY(gpr[5]);	COPY(gpr[6]);	COPY(gpr[7]);
-	COPY(gpr[8]);	COPY(gpr[9]);	COPY(gpr[10]);	COPY(gpr[11]);
-	COPY(gpr[12]);	COPY(gpr[13]);	COPY(gpr[14]);	COPY(gpr[15]);
-	COPY(gpr[16]);	COPY(gpr[17]);	COPY(gpr[18]);	COPY(gpr[19]);
-	COPY(gpr[20]);	COPY(gpr[21]);	COPY(gpr[22]);	COPY(gpr[23]);
-	COPY(gpr[24]);	COPY(gpr[25]);	COPY(gpr[26]);	COPY(gpr[27]);
-	COPY(gpr[28]);	COPY(gpr[29]);	COPY(gpr[30]);	COPY(gpr[31]);
-	COPY(pc);	COPY(psw);
-	COPY(ctpc);	COPY(ctpsw);	COPY(ctbp);
-#undef COPY
-
-	err |= __put_user(mask, &sc->oldmask);
-
-	return err;
-}
-
-/*
- * Determine which stack to use..
- */
-static inline void *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
-{
-	/* Default to using normal stack */
-	unsigned long sp = regs->gpr[GPR_SP];
-
-	if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! sas_ss_flags(sp))
-		sp = current->sas_ss_sp + current->sas_ss_size;
-
-	return (void *)((sp - frame_size) & -8UL);
-}
-
-static void setup_frame(int sig, struct k_sigaction *ka,
-			sigset_t *set, struct pt_regs *regs)
-{
-	struct sigframe *frame;
-	int err = 0;
-	int signal;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= setup_sigcontext(&frame->sc, regs, set->sig[0]);
-
-	if (_NSIG_WORDS > 1) {
-		err |= __copy_to_user(frame->extramask, &set->sig[1],
-				      sizeof(frame->extramask));
-	}
-
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace.  */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		regs->gpr[GPR_LP] = (unsigned long) ka->sa.sa_restorer;
-	} else {
-		/* Note, these encodings are _little endian_!  */
-
-		/* addi  __NR_sigreturn, r0, r12  */
-		err |= __put_user(0x6600 | (__NR_sigreturn << 16),
-				  frame->tramp + 0);
-		/* trap 0 */
-		err |= __put_user(0x010007e0,
-				  frame->tramp + 1);
-
-		regs->gpr[GPR_LP] = (unsigned long)frame->tramp;
-
-		flush_cache_sigtramp (regs->gpr[GPR_LP]);
-	}
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler.  */
-	regs->pc = (v850_reg_t) ka->sa.sa_handler;
-	regs->gpr[GPR_SP] = (v850_reg_t)frame;
-	/* Signal handler args:  */
-	regs->gpr[GPR_ARG0] = signal; /* arg 0: signum */
-	regs->gpr[GPR_ARG1] = (v850_reg_t)&frame->sc;/* arg 1: sigcontext */
-
-	set_fs(USER_DS);
-
-#if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%08lx ra=%08lx\n",
-		current->comm, current->pid, frame, regs->pc, );
-#endif
-
-	return;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-}
-
-static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			   sigset_t *set, struct pt_regs *regs)
-{
-	struct rt_sigframe *frame;
-	int err = 0;
-	int signal;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= copy_siginfo_to_user(&frame->info, info);
-
-	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user((void *)current->sas_ss_sp,
-			  &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->gpr[GPR_SP]),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext,
-			        regs, set->sig[0]);
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace.  */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		regs->gpr[GPR_LP] = (unsigned long) ka->sa.sa_restorer;
-	} else {
-		/* Note, these encodings are _little endian_!  */
-
-		/* addi  __NR_sigreturn, r0, r12  */
-		err |= __put_user(0x6600 | (__NR_sigreturn << 16),
-				  frame->tramp + 0);
-		/* trap 0 */
-		err |= __put_user(0x010007e0,
-				  frame->tramp + 1);
-
-		regs->gpr[GPR_LP] = (unsigned long)frame->tramp;
-
-		flush_cache_sigtramp (regs->gpr[GPR_LP]);
-	}
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler.  */
-	regs->pc = (v850_reg_t) ka->sa.sa_handler;
-	regs->gpr[GPR_SP] = (v850_reg_t)frame;
-	/* Signal handler args:  */
-	regs->gpr[GPR_ARG0] = signal; /* arg 0: signum */
-	regs->gpr[GPR_ARG1] = (v850_reg_t)&frame->info; /* arg 1: siginfo */
-	regs->gpr[GPR_ARG2] = (v850_reg_t)&frame->uc; /* arg 2: ucontext */
-
-	set_fs(USER_DS);
-
-#if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
-		current->comm, current->pid, frame, regs->pc, regs->pr);
-#endif
-
-	return;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-}
-
-/*
- * OK, we're invoking a handler
- */	
-
-static void
-handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-	      sigset_t *oldset,	struct pt_regs * regs)
-{
-	/* Are we from a system call? */
-	if (PT_REGS_SYSCALL (regs)) {
-		/* If so, check system call restarting.. */
-		switch (regs->gpr[GPR_RVAL]) {
-		case -ERESTART_RESTARTBLOCK:
-			current_thread_info()->restart_block.fn =
-				do_no_restart_syscall;
-			/* fall through */
-		case -ERESTARTNOHAND:
-			regs->gpr[GPR_RVAL] = -EINTR;
-			break;
-
-		case -ERESTARTSYS:
-			if (!(ka->sa.sa_flags & SA_RESTART)) {
-				regs->gpr[GPR_RVAL] = -EINTR;
-				break;
-			}
-			/* fallthrough */
-		case -ERESTARTNOINTR:
-			regs->gpr[12] = PT_REGS_SYSCALL (regs);
-			regs->pc -= 4; /* Size of `trap 0' insn.  */
-		}
-
-		PT_REGS_SET_SYSCALL (regs, 0);
-	}
-
-	/* Set up the stack frame */
-	if (ka->sa.sa_flags & SA_SIGINFO)
-		setup_rt_frame(sig, ka, info, oldset, regs);
-	else
-		setup_frame(sig, ka, oldset, regs);
-
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked,sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-}
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- *
- * Note that we go through the signals twice: once to check the signals that
- * the kernel can handle, and then we build all the user-level signal handling
- * stack-frames in one go after that.
- */
-int do_signal(struct pt_regs *regs, sigset_t *oldset)
-{
-	siginfo_t info;
-	int signr;
-	struct k_sigaction ka;
-
-	/*
-	 * We want the common case to go fast, which
-	 * is why we may in certain cases get here from
-	 * kernel mode. Just return without doing anything
-	 * if so.
-	 */
-	if (!user_mode(regs))
-		return 1;
-
-	if (!oldset)
-		oldset = &current->blocked;
-
-	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-	if (signr > 0) {
-		/* Whee!  Actually deliver the signal.  */
-		handle_signal(signr, &info, &ka, oldset, regs);
-		return 1;
-	}
-
-	/* Did we come from a system call? */
-	if (PT_REGS_SYSCALL (regs)) {
-		int rval = (int)regs->gpr[GPR_RVAL];
-		/* Restart the system call - no handlers present */
-		if (rval == -ERESTARTNOHAND
-		    || rval == -ERESTARTSYS
-		    || rval == -ERESTARTNOINTR)
-		{
-			regs->gpr[12] = PT_REGS_SYSCALL (regs);
-			regs->pc -= 4; /* Size of `trap 0' insn.  */
-		}
-		else if (rval == -ERESTART_RESTARTBLOCK) {
-			regs->gpr[12] = __NR_restart_syscall;
-			regs->pc -= 4; /* Size of `trap 0' insn.  */
-		}
-	}
-	return 0;
-}
diff --git a/arch/v850/kernel/sim.c b/arch/v850/kernel/sim.c
deleted file mode 100644
index 467b4aa0acdd..000000000000
--- a/arch/v850/kernel/sim.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * arch/v850/kernel/sim.c -- Machine-specific stuff for GDB v850e simulator
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-#include <asm/simsyscall.h>
-
-#include "mach.h"
-
-/* The name of a file containing the root filesystem.  */
-#define ROOT_FS "rootfs.image"
-
-extern void simcons_setup (void);
-extern void simcons_poll_ttys (void);
-extern void set_mem_root (void *addr, size_t len, char *cmd_line);
-
-static int read_file (const char *name,
-		      unsigned long *addr, unsigned long *len,
-		      const char **err);
-
-void __init mach_setup (char **cmdline)
-{
-	const char *err;
-	unsigned long root_dev_addr, root_dev_len;
-
-	simcons_setup ();
-
-	printk (KERN_INFO "Reading root filesystem: %s", ROOT_FS);
-
-	if (read_file (ROOT_FS, &root_dev_addr, &root_dev_len, &err)) {
-		printk (" (size %luK)\n", root_dev_len / 1024);
-		set_mem_root ((void *)root_dev_addr, (size_t)root_dev_len,
-			      *cmdline);
-	} else
-		printk ("...%s failed!\n", err);
-}
-
-void mach_get_physical_ram (unsigned long *ram_start, unsigned long *ram_len)
-{
-	*ram_start = RAM_ADDR;
-	*ram_len = RAM_SIZE;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* ...do magic timer initialization?...  */
-	mach_tick = simcons_poll_ttys;
-	setup_irq (0, timer_action);
-}
-
-
-static void irq_nop (unsigned irq) { }
-static unsigned irq_zero (unsigned irq) { return 0; }
-
-static struct hw_interrupt_type sim_irq_type = {
-	.typename = "IRQ",
-	.startup = irq_zero,		/* startup */
-	.shutdown = irq_nop,		/* shutdown */
-	.enable = irq_nop,		/* enable */
-	.disable = irq_nop,		/* disable */
-	.ack = irq_nop,		/* ack */
-	.end = irq_nop,		/* end */
-};
-
-void __init mach_init_irqs (void)
-{
-	init_irq_handlers (0, NUM_MACH_IRQS, 1, &sim_irq_type);
-}
-
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	long timeval[2], timezone[2];
-	int rval = V850_SIM_SYSCALL (gettimeofday, timeval, timezone);
-	if (rval == 0) {
-		tv->tv_sec = timeval[0];
-		tv->tv_nsec = timeval[1] * 1000;
-	}
-}
-
-void machine_restart (char *__unused)
-{
-	V850_SIM_SYSCALL (write, 1, "RESTART\n", 8);
-	V850_SIM_SYSCALL (exit, 0);
-}
-
-void machine_halt (void)
-{
-	V850_SIM_SYSCALL (write, 1, "HALT\n", 5);
-	V850_SIM_SYSCALL (exit, 0);
-}
-
-void machine_power_off (void)
-{
-	V850_SIM_SYSCALL (write, 1, "POWER OFF\n", 10);
-	V850_SIM_SYSCALL (exit, 0);
-}
-
-
-/* Load data from a file called NAME into ram.  The address and length
-   of the data image are returned in ADDR and LEN.  */
-static int __init
-read_file (const char *name,
-	   unsigned long *addr, unsigned long *len,
-	   const char **err)
-{
-	int rval, fd;
-	unsigned long cur, left;
-	/* Note this is not a normal stat buffer, it's an ad-hoc
-	   structure defined by the simulator.  */
-	unsigned long stat_buf[10];
-
-	/* Stat the file to find out the length.  */
-	rval = V850_SIM_SYSCALL (stat, name, stat_buf);
-	if (rval < 0) {
-		if (err) *err = "stat";
-		return 0;
-	}
-	*len = stat_buf[4];
-
-	/* Open the file; `0' is O_RDONLY.  */
-	fd = V850_SIM_SYSCALL (open, name, 0);
-	if (fd < 0) {
-		if (err) *err = "open";
-		return 0;
-	}
-
-	*addr = (unsigned long)alloc_bootmem(*len);
-	if (! *addr) {
-		V850_SIM_SYSCALL (close, fd);
-		if (err) *err = "alloc_bootmem";
-		return 0;
-	}
-
-	cur = *addr;
-	left = *len;
-	while (left > 0) {
-		int chunk = V850_SIM_SYSCALL (read, fd, cur, left);
-		if (chunk <= 0)
-			break;
-		cur += chunk;
-		left -= chunk;
-	}
-	V850_SIM_SYSCALL (close, fd);
-	if (left > 0) {
-		/* Some read failed.  */
-		free_bootmem (*addr, *len);
-		if (err) *err = "read";
-		return 0;
-	}
-
-	return 1;
-}
diff --git a/arch/v850/kernel/sim.ld b/arch/v850/kernel/sim.ld
deleted file mode 100644
index 101885f3c9f0..000000000000
--- a/arch/v850/kernel/sim.ld
+++ /dev/null
@@ -1,13 +0,0 @@
-/* Linker script for the gdb v850e simulator (CONFIG_V850E_SIM).  */
-
-MEMORY {
-	/* Interrupt vectors.  */
-	INTV  : ORIGIN = 0x0, LENGTH = 0xe0
-	/* Main RAM.  */
-	RAM   : ORIGIN = RAM_ADDR, LENGTH = RAM_SIZE
-}
-
-SECTIONS {
-	.intv : { INTV_CONTENTS } > INTV
-	.ram : { RAMK_KRAM_CONTENTS } > RAM
-}
diff --git a/arch/v850/kernel/sim85e2.c b/arch/v850/kernel/sim85e2.c
deleted file mode 100644
index 566dde5e6070..000000000000
--- a/arch/v850/kernel/sim85e2.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * arch/v850/kernel/sim85e2.c -- Machine-specific stuff for
- *	V850E2 RTL simulator
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-
-#include "mach.h"
-
-
-/* There are 4 possible areas we can use:
-
-     IRAM (1MB) is fast for instruction fetches, but slow for data
-     DRAM (1020KB) is fast for data, but slow for instructions
-     ERAM is cached, so should be fast for both insns and data
-     SDRAM is external DRAM, similar to ERAM
-*/
-
-#define INIT_MEMC_FOR_SDRAM
-#define USE_SDRAM_AREA
-#define KERNEL_IN_SDRAM_AREA
-
-#define DCACHE_MODE	V850E2_CACHE_BTSC_DCM_WT
-/*#define DCACHE_MODE	V850E2_CACHE_BTSC_DCM_WB_ALLOC*/
-
-#ifdef USE_SDRAM_AREA
-#define RAM_START 	SDRAM_ADDR
-#define RAM_END		(SDRAM_ADDR + SDRAM_SIZE)
-#else
-/* When we use DRAM, we need to account for the fact that the end of it is
-   used for R0_RAM.  */
-#define RAM_START	DRAM_ADDR
-#define RAM_END		R0_RAM_ADDR
-#endif
-
-
-extern void memcons_setup (void);
-
-
-#ifdef KERNEL_IN_SDRAM_AREA
-#define EARLY_INIT_SECTION_ATTR __attribute__ ((section (".early.text")))
-#else
-#define EARLY_INIT_SECTION_ATTR __init
-#endif
-
-void EARLY_INIT_SECTION_ATTR mach_early_init (void)
-{
-	/* The sim85e2 simulator tracks `undefined' values, so to make
-	   debugging easier, we begin by zeroing out all otherwise
-	   undefined registers.  This is not strictly necessary.
-
-	   The registers we zero are:
-	       Every GPR except:
-	           stack-pointer (r3)
-		   task-pointer (r16)
-		   our return addr (r31)
-	       Every system register (SPR) that we know about except for
-	       the PSW (SPR 5), which we zero except for the
-	       disable-interrupts bit.
-	*/
-
-	/* GPRs */
-	asm volatile ("             mov r0, r1 ; mov r0, r2              ");
-	asm volatile ("mov r0, r4 ; mov r0, r5 ; mov r0, r6 ; mov r0, r7 ");
-	asm volatile ("mov r0, r8 ; mov r0, r9 ; mov r0, r10; mov r0, r11");
-	asm volatile ("mov r0, r12; mov r0, r13; mov r0, r14; mov r0, r15");
-	asm volatile ("             mov r0, r17; mov r0, r18; mov r0, r19");
-	asm volatile ("mov r0, r20; mov r0, r21; mov r0, r22; mov r0, r23");
-	asm volatile ("mov r0, r24; mov r0, r25; mov r0, r26; mov r0, r27");
-	asm volatile ("mov r0, r28; mov r0, r29; mov r0, r30");
-
-	/* SPRs */
-	asm volatile ("ldsr r0, 0;  ldsr r0, 1;  ldsr r0, 2;  ldsr r0, 3");
-	asm volatile ("ldsr r0, 4");
-	asm volatile ("addi 0x20, r0, r1; ldsr r1, 5"); /* PSW */
-	asm volatile ("ldsr r0, 16; ldsr r0, 17; ldsr r0, 18; ldsr r0, 19");
-	asm volatile ("ldsr r0, 20");
-
-
-#ifdef INIT_MEMC_FOR_SDRAM
-	/* Settings for SDRAM controller.  */
-	V850E2_VSWC   = 0x0042;
-	V850E2_BSC    = 0x9286;
-	V850E2_BCT(0) = 0xb000;	/* was: 0 */
-	V850E2_BCT(1) = 0x000b;
-	V850E2_ASC    = 0;
-	V850E2_LBS    = 0xa9aa;	/* was: 0xaaaa */
-	V850E2_LBC(0) = 0;
-	V850E2_LBC(1) = 0;	/* was: 0x3 */
-	V850E2_BCC    = 0;
-	V850E2_RFS(4) = 0x800a;	/* was: 0xf109 */
-	V850E2_SCR(4) = 0x2091;	/* was: 0x20a1 */
-	V850E2_RFS(3) = 0x800c;
-	V850E2_SCR(3) = 0x20a1;
-	V850E2_DWC(0) = 0;
-	V850E2_DWC(1) = 0;
-#endif
-
-#if 0
-#ifdef CONFIG_V850E2_SIM85E2S
-	/* Turn on the caches.  */
-	V850E2_CACHE_BTSC = V850E2_CACHE_BTSC_ICM | DCACHE_MODE;
-	V850E2_BHC  = 0x1010;
-#elif CONFIG_V850E2_SIM85E2C
-	V850E2_CACHE_BTSC |= (V850E2_CACHE_BTSC_ICM | V850E2_CACHE_BTSC_DCM0);
-	V850E2_BUSM_BHC = 0xFFFF;
-#endif
-#else
-	V850E2_BHC  = 0;
-#endif
-
-	/* Don't stop the simulator at `halt' instructions.  */
-	SIM85E2_NOTHAL = 1;
-
-	/* Ensure that the simulator halts on a panic, instead of going
-	   into an infinite loop inside the panic function.  */
-	panic_timeout = -1;
-}
-
-void __init mach_setup (char **cmdline)
-{
-	memcons_setup ();
-}
-
-void mach_get_physical_ram (unsigned long *ram_start, unsigned long *ram_len)
-{
-	*ram_start = RAM_START;
-	*ram_len = RAM_END - RAM_START;
-}
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* The simulator actually cycles through all interrupts
-	   periodically.  We just pay attention to IRQ0, which gives us
-	   1/64 the rate of the periodic interrupts.  */
-	setup_irq (0, timer_action);
-}
-
-void mach_gettimeofday (struct timespec *tv)
-{
-	tv->tv_sec = 0;
-	tv->tv_nsec = 0;
-}
-
-/* Interrupts */
-
-struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0, NUM_MACH_IRQS, 1, 7 },
-	{ 0 }
-};
-struct hw_interrupt_type hw_itypes[1];
-
-/* Initialize interrupts.  */
-void __init mach_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-
-void machine_halt (void) __attribute__ ((noreturn));
-void machine_halt (void)
-{
-	SIM85E2_SIMFIN = 0;	/* Halt immediately.  */
-	for (;;) {}
-}
-
-void machine_restart (char *__unused)
-{
-	machine_halt ();
-}
-
-void machine_power_off (void)
-{
-	machine_halt ();
-}
-
diff --git a/arch/v850/kernel/sim85e2.ld b/arch/v850/kernel/sim85e2.ld
deleted file mode 100644
index 7470fd2ffb5b..000000000000
--- a/arch/v850/kernel/sim85e2.ld
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Linker script for the sim85e2c simulator, which is a verilog simulation of
-   the V850E2 NA85E2C cpu core (CONFIG_V850E2_SIM85E2C).  */
-
-MEMORY {
-	/* 1MB of `instruction RAM', starting at 0.
-	   Instruction fetches are much faster from IRAM than from DRAM.  */
-	IRAM : ORIGIN = IRAM_ADDR, LENGTH = IRAM_SIZE
-
-	/* 1MB of `data RAM', below and contiguous with the I/O space.
-	   Data fetches are much faster from DRAM than from IRAM.  */
-	DRAM : ORIGIN = DRAM_ADDR, LENGTH = DRAM_SIZE
-
-	/* `external ram' (CS1 area), comes after IRAM.  */
-	ERAM : ORIGIN = ERAM_ADDR, LENGTH = ERAM_SIZE
-
-	/* Dynamic RAM; uses memory controller.  */
-	SDRAM : ORIGIN = SDRAM_ADDR, LENGTH = SDRAM_SIZE
-}
-
-SECTIONS {
-	.iram : {
-		INTV_CONTENTS
-		*arch/v850/kernel/head.o
-		*(.early.text)
-	} > IRAM
-	.dram : {
-		_memcons_output = . ;
-		. = . + 0x8000 ;
-		_memcons_output_end = . ;
-	} > DRAM
-	.sdram : {
-		/* We stick console output into a buffer here.  */
-		RAMK_KRAM_CONTENTS
-		ROOT_FS_CONTENTS
-	} > SDRAM
-}
diff --git a/arch/v850/kernel/simcons.c b/arch/v850/kernel/simcons.c
deleted file mode 100644
index 9973596ae304..000000000000
--- a/arch/v850/kernel/simcons.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * arch/v850/kernel/simcons.c -- Console I/O for GDB v850e simulator
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/console.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/tty_driver.h>
-#include <linux/init.h>
-
-#include <asm/poll.h>
-#include <asm/string.h>
-#include <asm/simsyscall.h>
-
-
-/*  Low-level console. */
-
-static void simcons_write (struct console *co, const char *buf, unsigned len)
-{
-	V850_SIM_SYSCALL (write, 1, buf, len);
-}
-
-static int simcons_read (struct console *co, char *buf, unsigned len)
-{
-	return V850_SIM_SYSCALL (read, 0, buf, len);
-}
-
-static struct tty_driver *tty_driver;
-static struct tty_driver *simcons_device (struct console *c, int *index)
-{
-	*index = c->index;
-	return tty_driver;
-}
-
-static struct console simcons =
-{
-    .name	= "simcons",
-    .write	= simcons_write,
-    .read	= simcons_read,
-    .device	= simcons_device,
-    .flags	= CON_PRINTBUFFER,
-    .index	= -1,
-};
-
-/* Higher level TTY interface.  */
-
-int simcons_tty_open (struct tty_struct *tty, struct file *filp)
-{
-	return 0;
-}
-
-int simcons_tty_write (struct tty_struct *tty,
-		       const unsigned char *buf, int count)
-{
-	return V850_SIM_SYSCALL (write, 1, buf, count);
-}
-
-int simcons_tty_write_room (struct tty_struct *tty)
-{
-	/* Completely arbitrary.  */
-	return 0x100000;
-}
-
-int simcons_tty_chars_in_buffer (struct tty_struct *tty)
-{
-	/* We have no buffer.  */
-	return 0;
-}
-
-static const struct tty_operations ops = {
-	.open = simcons_tty_open,
-	.write = simcons_tty_write,
-	.write_room = simcons_tty_write_room,
-	.chars_in_buffer = simcons_tty_chars_in_buffer,
-};
-
-int __init simcons_tty_init (void)
-{
-	struct tty_driver *driver = alloc_tty_driver(1);
-	int err;
-	if (!driver)
-		return -ENOMEM;
-	driver->name = "simcons";
-	driver->major = TTY_MAJOR;
-	driver->minor_start = 64;
-	driver->type = TTY_DRIVER_TYPE_SYSCONS;
-	driver->init_termios = tty_std_termios;
-	tty_set_operations(driver, &ops);
-	err = tty_register_driver(driver);
-	if (err) {
-		put_tty_driver(driver);
-		return err;
-	}
-	tty_driver = driver;
-	return 0;
-}
-/* We use `late_initcall' instead of just `__initcall' as a workaround for
-   the fact that (1) simcons_tty_init can't be called before tty_init,
-   (2) tty_init is called via `module_init', (3) if statically linked,
-   module_init == device_init, and (4) there's no ordering of init lists.
-   We can do this easily because simcons is always statically linked, but
-   other tty drivers that depend on tty_init and which must use
-   `module_init' to declare their init routines are likely to be broken.  */
-late_initcall(simcons_tty_init);
-
-/* Poll for input on the console, and if there's any, deliver it to the
-   tty driver.  */
-void simcons_poll_tty (struct tty_struct *tty)
-{
-	char buf[32];	/* Not the nicest way to do it but I need it correct first */
-	int flip = 0, send_break = 0;
-	struct pollfd pfd;
-	pfd.fd = 0;
-	pfd.events = POLLIN;
-
-	if (V850_SIM_SYSCALL (poll, &pfd, 1, 0) > 0) {
-		if (pfd.revents & POLLIN) {
-			/* Real block hardware knows the transfer size before
-			   transfer so the new tty buffering doesn't try to handle
-			   this rather weird simulator specific case well */
-			int rd = V850_SIM_SYSCALL (read, 0, buf, 32);
-			if (rd > 0) {
-				tty_insert_flip_string(tty, buf, rd);
-				flip = 1;
-			} else
-				send_break = 1;
-		} else if (pfd.revents & POLLERR)
-			send_break = 1;
-	}
-
-	if (send_break) {
-		tty_insert_flip_char (tty, 0, TTY_BREAK);		
-		flip = 1;
-	}
-
-	if (flip)
-		tty_schedule_flip (tty);
-}
-
-void simcons_poll_ttys (void)
-{
-	if (tty_driver && tty_driver->ttys[0])
-		simcons_poll_tty (tty_driver->ttys[0]);
-}
-
-void simcons_setup (void)
-{
-	V850_SIM_SYSCALL (make_raw, 0);
-	register_console (&simcons);
-	printk (KERN_INFO "Console: GDB V850E simulator stdio\n");
-}
diff --git a/arch/v850/kernel/syscalls.c b/arch/v850/kernel/syscalls.c
deleted file mode 100644
index 1a83daf8e24f..000000000000
--- a/arch/v850/kernel/syscalls.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * arch/v850/kernel/syscalls.c -- Various system-call definitions not
- * 	defined in machine-independent code
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file was derived the ppc version, arch/ppc/kernel/syscalls.c
- * ... which was derived from "arch/i386/kernel/sys_i386.c" by Gary Thomas;
- *     modified by Cort Dougan (cort@cs.nmt.edu)
- *     and Paul Mackerras (paulus@cs.anu.edu.au).
- */
-
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/syscalls.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/mman.h>
-#include <linux/sys.h>
-#include <linux/ipc.h>
-#include <linux/utsname.h>
-#include <linux/file.h>
-
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-int
-sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	ret = -EINVAL;
-	switch (call) {
-	case SEMOP:
-		ret = sys_semop (first, (struct sembuf *)ptr, second);
-		break;
-	case SEMGET:
-		ret = sys_semget (first, second, third);
-		break;
-	case SEMCTL:
-	{
-		union semun fourth;
-
-		if (!ptr)
-			break;
-		if ((ret = access_ok(VERIFY_READ, ptr, sizeof(long)) ? 0 : -EFAULT)
-		    || (ret = get_user(fourth.__pad, (void **)ptr)))
-			break;
-		ret = sys_semctl (first, second, third, fourth);
-		break;
-	}
-	case MSGSND:
-		ret = sys_msgsnd (first, (struct msgbuf *) ptr, second, third);
-		break;
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-
-			if (!ptr)
-				break;
-			if ((ret = access_ok(VERIFY_READ, ptr, sizeof(tmp)) ? 0 : -EFAULT)
-			    || (ret = copy_from_user(&tmp,
-						(struct ipc_kludge *) ptr,
-						sizeof (tmp))))
-				break;
-			ret = sys_msgrcv (first, tmp.msgp, second, tmp.msgtyp,
-					  third);
-			break;
-			}
-		default:
-			ret = sys_msgrcv (first, (struct msgbuf *) ptr,
-					  second, fifth, third);
-			break;
-		}
-		break;
-	case MSGGET:
-		ret = sys_msgget ((key_t) first, second);
-		break;
-	case MSGCTL:
-		ret = sys_msgctl (first, second, (struct msqid_ds *) ptr);
-		break;
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-
-			if ((ret = access_ok(VERIFY_WRITE, (ulong*) third,
-					       sizeof(ulong)) ? 0 : -EFAULT))
-				break;
-			ret = do_shmat (first, (char *) ptr, second, &raddr);
-			if (ret)
-				break;
-			ret = put_user (raddr, (ulong *) third);
-			break;
-			}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				break;
-			ret = do_shmat (first, (char *) ptr, second,
-					 (ulong *) third);
-			break;
-		}
-		break;
-	case SHMDT: 
-		ret = sys_shmdt ((char *)ptr);
-		break;
-	case SHMGET:
-		ret = sys_shmget (first, second, third);
-		break;
-	case SHMCTL:
-		ret = sys_shmctl (first, second, (struct shmid_ds *) ptr);
-		break;
-	}
-
-	return ret;
-}
-
-static inline unsigned long
-do_mmap2 (unsigned long addr, size_t len,
-	 unsigned long prot, unsigned long flags,
-	 unsigned long fd, unsigned long pgoff)
-{
-	struct file * file = NULL;
-	int ret = -EBADF;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (! (flags & MAP_ANONYMOUS)) {
-		if (!(file = fget (fd)))
-			goto out;
-	}
-	
-	down_write (&current->mm->mmap_sem);
-	ret = do_mmap_pgoff (file, addr, len, prot, flags, pgoff);
-	up_write (&current->mm->mmap_sem);
-	if (file)
-		fput (file);
-out:
-	return ret;
-}
-
-unsigned long sys_mmap2 (unsigned long addr, size_t len,
-			unsigned long prot, unsigned long flags,
-			unsigned long fd, unsigned long pgoff)
-{
-	return do_mmap2 (addr, len, prot, flags, fd, pgoff);
-}
-
-unsigned long sys_mmap (unsigned long addr, size_t len,
-		       unsigned long prot, unsigned long flags,
-		       unsigned long fd, off_t offset)
-{
-	int err = -EINVAL;
-
-	if (offset & ~PAGE_MASK)
-		goto out;
-
-	err = do_mmap2 (addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
-out:
-	return err;
-}
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
-{
-	register char *__a __asm__ ("r6") = filename;
-	register void *__b __asm__ ("r7") = argv;
-	register void *__c __asm__ ("r8") = envp;
-	register unsigned long __syscall __asm__ ("r12") = __NR_execve;
-	register unsigned long __ret __asm__ ("r10");
-	__asm__ __volatile__ ("trap 0"
-			: "=r" (__ret), "=r" (__syscall)
-			: "1" (__syscall), "r" (__a), "r" (__b), "r" (__c)
-			: "r1", "r5", "r11", "r13", "r14",
-			  "r15", "r16", "r17", "r18", "r19");
-	return __ret;
-}
diff --git a/arch/v850/kernel/teg.c b/arch/v850/kernel/teg.c
deleted file mode 100644
index 699248f92aae..000000000000
--- a/arch/v850/kernel/teg.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * arch/v850/kernel/teg.c -- NB85E-TEG cpu chip
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/page.h>
-#include <asm/machdep.h>
-#include <asm/v850e_timer_d.h>
-
-#include "mach.h"
-
-void __init mach_sched_init (struct irqaction *timer_action)
-{
-	/* Select timer interrupt instead of external pin.  */
-	TEG_ISS |= 0x1;
-	/* Start hardware timer.  */
-	v850e_timer_d_configure (0, HZ);
-	/* Install timer interrupt handler.  */
-	setup_irq (IRQ_INTCMD(0), timer_action);
-}
-
-static struct v850e_intc_irq_init irq_inits[] = {
-	{ "IRQ", 0,		NUM_CPU_IRQS,	1, 7 },
-	{ "CMD", IRQ_INTCMD(0),	IRQ_INTCMD_NUM,	1, 5 },
-	{ "SER", IRQ_INTSER(0),	IRQ_INTSER_NUM,	1, 3 },
-	{ "SR",	 IRQ_INTSR(0),	IRQ_INTSR_NUM,	1, 4 },
-	{ "ST",	 IRQ_INTST(0),	IRQ_INTST_NUM,	1, 5 },
-	{ 0 }
-};
-#define NUM_IRQ_INITS (ARRAY_SIZE(irq_inits) - 1)
-
-static struct hw_interrupt_type hw_itypes[NUM_IRQ_INITS];
-
-/* Initialize MA chip interrupts.  */
-void __init teg_init_irqs (void)
-{
-	v850e_intc_init_irq_types (irq_inits, hw_itypes);
-}
-
-/* Called before configuring an on-chip UART.  */
-void teg_uart_pre_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
-	/* Enable UART I/O pins instead of external interrupt pins, and
-	   UART interrupts instead of external pin interrupts.  */
-	TEG_ISS |= 0x4E;
-}
diff --git a/arch/v850/kernel/time.c b/arch/v850/kernel/time.c
deleted file mode 100644
index d810c93fe665..000000000000
--- a/arch/v850/kernel/time.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * linux/arch/v850/kernel/time.c -- Arch-dependent timer functions
- *
- *  Copyright (C) 1991, 1992, 1995, 2001, 2002  Linus Torvalds
- *
- * This file contains the v850-specific time handling details.
- * Most of the stuff is located in the machine specific files.
- *
- * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
- *		"A Kernel Model for Precision Timekeeping" by Dave Mills
- */
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/time.h>
-#include <linux/timex.h>
-#include <linux/profile.h>
-
-#include <asm/io.h>
-
-#include "mach.h"
-
-#define TICK_SIZE	(tick_nsec / 1000)
-
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static irqreturn_t timer_interrupt (int irq, void *dummy, struct pt_regs *regs)
-{
-#if 0
-	/* last time the cmos clock got updated */
-	static long last_rtc_update=0;
-#endif
-
-	/* may need to kick the hardware timer */
-	if (mach_tick)
-	  mach_tick ();
-
-	do_timer (1);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(regs));
-#endif
-	profile_tick(CPU_PROFILING, regs);
-#if 0
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 */
-	if (ntp_synced() &&
-	    xtime.tv_sec > last_rtc_update + 660 &&
-	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
-	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
-	  if (set_rtc_mmss (xtime.tv_sec) == 0)
-	    last_rtc_update = xtime.tv_sec;
-	  else
-	    last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
-	}
-#ifdef CONFIG_HEARTBEAT
-	/* use power LED as a heartbeat instead -- much more useful
-	   for debugging -- based on the version for PReP by Cort */
-	/* acts like an actual heart beat -- ie thump-thump-pause... */
-	if (mach_heartbeat) {
-	    static unsigned cnt = 0, period = 0, dist = 0;
-
-	    if (cnt == 0 || cnt == dist)
-		mach_heartbeat ( 1 );
-	    else if (cnt == 7 || cnt == dist+7)
-		mach_heartbeat ( 0 );
-
-	    if (++cnt > period) {
-		cnt = 0;
-		/* The hyperbolic function below modifies the heartbeat period
-		 * length in dependency of the current (5min) load. It goes
-		 * through the points f(0)=126, f(1)=86, f(5)=51,
-		 * f(inf)->30. */
-		period = ((672<<FSHIFT)/(5*avenrun[0]+(7<<FSHIFT))) + 30;
-		dist = period / 4;
-	    }
-	}
-#endif /* CONFIG_HEARTBEAT */
-#endif /* 0 */
-
-	return IRQ_HANDLED;
-}
-
-static int timer_dev_id;
-static struct irqaction timer_irqaction = {
-	.handler = timer_interrupt,
-	.flags = IRQF_DISABLED,
-	.mask = CPU_MASK_NONE,
-	.name = "timer",
-	.dev_id = &timer_dev_id,
-};
-
-void time_init (void)
-{
-	mach_gettimeofday (&xtime);
-	mach_sched_init (&timer_irqaction);
-}
diff --git a/arch/v850/kernel/v850_ksyms.c b/arch/v850/kernel/v850_ksyms.c
deleted file mode 100644
index 8d386a5dbc4a..000000000000
--- a/arch/v850/kernel/v850_ksyms.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <linux/module.h>
-#include <linux/linkage.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/user.h>
-#include <linux/elfcore.h>
-#include <linux/in6.h>
-#include <linux/interrupt.h>
-
-#include <asm/pgalloc.h>
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/checksum.h>
-#include <asm/current.h>
-
-
-extern void *trap_table;
-EXPORT_SYMBOL (trap_table);
-
-/* platform dependent support */
-EXPORT_SYMBOL (kernel_thread);
-EXPORT_SYMBOL (__bug);
-
-/* Networking helper routines. */
-EXPORT_SYMBOL (csum_partial_copy_nocheck);
-EXPORT_SYMBOL (csum_partial_copy_from_user);
-EXPORT_SYMBOL (ip_compute_csum);
-EXPORT_SYMBOL (ip_fast_csum);
-
-/* string / mem functions */
-EXPORT_SYMBOL (memset);
-EXPORT_SYMBOL (memcpy);
-EXPORT_SYMBOL (memmove);
-
-/*
- * libgcc functions - functions that are used internally by the
- * compiler...  (prototypes are not correct though, but that
- * doesn't really matter since they're not versioned).
- */
-extern void __ashldi3 (void);
-extern void __ashrdi3 (void);
-extern void __lshrdi3 (void);
-extern void __muldi3 (void);
-extern void __negdi2 (void);
-
-EXPORT_SYMBOL (__ashldi3);
-EXPORT_SYMBOL (__ashrdi3);
-EXPORT_SYMBOL (__lshrdi3);
-EXPORT_SYMBOL (__muldi3);
-EXPORT_SYMBOL (__negdi2);
diff --git a/arch/v850/kernel/v850e2_cache.c b/arch/v850/kernel/v850e2_cache.c
deleted file mode 100644
index 4570312c689c..000000000000
--- a/arch/v850/kernel/v850e2_cache.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * arch/v850/kernel/v850e2_cache.c -- Cache control for V850E2 cache
- * 	memories
- *
- *  Copyright (C) 2003  NEC Electronics Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/mm.h>
-
-#include <asm/v850e2_cache.h>
-
-/* Cache operations we can do.  The encoding corresponds directly to the
-   value we need to write into the COPR register.  */
-enum cache_op {
-	OP_SYNC_IF_DIRTY 	   = V850E2_CACHE_COPR_CFC(0), /* 000 */
-	OP_SYNC_IF_VALID 	   = V850E2_CACHE_COPR_CFC(1), /* 001 */
-	OP_SYNC_IF_VALID_AND_CLEAR = V850E2_CACHE_COPR_CFC(3), /* 011 */
-	OP_WAY_CLEAR 		   = V850E2_CACHE_COPR_CFC(4), /* 100 */
-	OP_FILL 		   = V850E2_CACHE_COPR_CFC(5), /* 101 */
-	OP_CLEAR 		   = V850E2_CACHE_COPR_CFC(6), /* 110 */
-	OP_CREATE_DIRTY 	   = V850E2_CACHE_COPR_CFC(7)  /* 111 */
-};
-
-/* Which cache to use.  This encoding also corresponds directly to the
-   value we need to write into the COPR register. */
-enum cache {
-	ICACHE = 0,
-	DCACHE = V850E2_CACHE_COPR_LBSL
-};
-
-/* Returns ADDR rounded down to the beginning of its cache-line.  */
-#define CACHE_LINE_ADDR(addr)  \
-   ((addr) & ~(V850E2_CACHE_LINE_SIZE - 1))
-/* Returns END_ADDR rounded up to the `limit' of its cache-line.  */
-#define CACHE_LINE_END_ADDR(end_addr)  \
-   CACHE_LINE_ADDR(end_addr + (V850E2_CACHE_LINE_SIZE - 1))
-
-
-/* Low-level cache ops.  */
-
-/* Apply cache-op OP to all entries in CACHE.  */
-static inline void cache_op_all (enum cache_op op, enum cache cache)
-{
-	int cmd = op | cache | V850E2_CACHE_COPR_WSLE | V850E2_CACHE_COPR_STRT;
-
-	if (op != OP_WAY_CLEAR) {
-		/* The WAY_CLEAR operation does the whole way, but other
-		   ops take begin-index and count params; we just indicate
-		   the entire cache.  */
-		V850E2_CACHE_CADL = 0;
-		V850E2_CACHE_CADH = 0;
-		V850E2_CACHE_CCNT = V850E2_CACHE_WAY_SIZE - 1;
-	}
-
-	V850E2_CACHE_COPR = cmd | V850E2_CACHE_COPR_WSL(0); /* way 0 */
-	V850E2_CACHE_COPR = cmd | V850E2_CACHE_COPR_WSL(1); /* way 1 */
-	V850E2_CACHE_COPR = cmd | V850E2_CACHE_COPR_WSL(2); /* way 2 */
-	V850E2_CACHE_COPR = cmd | V850E2_CACHE_COPR_WSL(3); /* way 3 */
-}
-
-/* Apply cache-op OP to all entries in CACHE covering addresses ADDR
-   through ADDR+LEN.  */
-static inline void cache_op_range (enum cache_op op, u32 addr, u32 len,
-				   enum cache cache)
-{
-	u32 start = CACHE_LINE_ADDR (addr);
-	u32 end = CACHE_LINE_END_ADDR (addr + len);
-	u32 num_lines = (end - start) >> V850E2_CACHE_LINE_SIZE_BITS;
-
-	V850E2_CACHE_CADL = start & 0xFFFF;
-	V850E2_CACHE_CADH = start >> 16;
-	V850E2_CACHE_CCNT = num_lines - 1;
-
-	V850E2_CACHE_COPR = op | cache | V850E2_CACHE_COPR_STRT;
-}
-
-
-/* High-level ops.  */
-
-static void cache_exec_after_store_all (void)
-{
-	cache_op_all (OP_SYNC_IF_DIRTY, DCACHE);
-	cache_op_all (OP_WAY_CLEAR, ICACHE);
-}
-
-static void cache_exec_after_store_range (u32 start, u32 len)
-{
-	cache_op_range (OP_SYNC_IF_DIRTY, start, len, DCACHE);
-	cache_op_range (OP_CLEAR, start, len, ICACHE);
-}
-
-
-/* Exported functions.  */
-
-void flush_icache (void)
-{
-	cache_exec_after_store_all ();
-}
-
-void flush_icache_range (unsigned long start, unsigned long end)
-{
-	cache_exec_after_store_range (start, end - start);
-}
-
-void flush_icache_page (struct vm_area_struct *vma, struct page *page)
-{
-	cache_exec_after_store_range (page_to_virt (page), PAGE_SIZE);
-}
-
-void flush_icache_user_range (struct vm_area_struct *vma, struct page *page,
-			      unsigned long addr, int len)
-{
-	cache_exec_after_store_range (addr, len);
-}
-
-void flush_cache_sigtramp (unsigned long addr)
-{
-	/* For the exact size, see signal.c, but 16 bytes should be enough.  */
-	cache_exec_after_store_range (addr, 16);
-}
diff --git a/arch/v850/kernel/v850e_cache.c b/arch/v850/kernel/v850e_cache.c
deleted file mode 100644
index ea3e51cfb259..000000000000
--- a/arch/v850/kernel/v850e_cache.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * arch/v850/kernel/v850e_cache.c -- Cache control for V850E cache memories
- *
- *  Copyright (C) 2003  NEC Electronics Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* This file implements cache control for the rather simple cache used on
-   some V850E CPUs, specifically the NB85E/TEG CPU-core and the V850E/ME2
-   CPU.  V850E2 processors have their own (better) cache
-   implementation.  */
-
-#include <asm/entry.h>
-#include <asm/cacheflush.h>
-#include <asm/v850e_cache.h>
-
-#define WAIT_UNTIL_CLEAR(value) while (value) {}
-
-/* Set caching params via the BHC and DCC registers.  */
-void v850e_cache_enable (u16 bhc, u16 icc, u16 dcc)
-{
-	unsigned long *r0_ram = (unsigned long *)R0_RAM_ADDR;
-	register u16 bhc_val asm ("r6") = bhc;
-
-	/* Read the instruction cache control register (ICC) and confirm
-	   that bits 0 and 1 (TCLR0, TCLR1) are all cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x3);
-	V850E_CACHE_ICC = icc;
-
-#ifdef V850E_CACHE_DCC
-	/* Configure data-cache.  */
-	V850E_CACHE_DCC = dcc;
-#endif /* V850E_CACHE_DCC */
-
-	/* Configure caching for various memory regions by writing the BHC
-	   register.  The documentation says that an instruction _cannot_
-	   enable/disable caching for the memory region in which the
-	   instruction itself exists; to work around this, we store
-	   appropriate instructions into the on-chip RAM area (which is never
-	   cached), and briefly jump there to do the work.  */
-#ifdef V850E_CACHE_WRITE_IBS
-	*r0_ram++ 	= 0xf0720760;	/* st.h r0, 0xfffff072[r0] */
-#endif
-	*r0_ram++ 	= 0xf06a3760;	/* st.h r6, 0xfffff06a[r0] */
-	*r0_ram 	= 0x5640006b;	/* jmp [r11] */
-
-	asm ("mov hilo(1f), r11; jmp [%1]; 1:;"
-	     :: "r" (bhc_val), "r" (R0_RAM_ADDR) : "r11");
-}
-
-static void clear_icache (void)
-{
-	/* 1. Read the instruction cache control register (ICC) and confirm
-	      that bits 0 and 1 (TCLR0, TCLR1) are all cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x3);
-
-	/* 2. Read the ICC register and confirm that bit 12 (LOCK0) is
-  	      cleared.  Bit 13 of the ICC register is always cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x1000);
-
-	/* 3. Set the TCLR0 and TCLR1 bits of the ICC register as follows,
-	      when clearing way 0 and way 1 at the same time:
-	        (a) Set the TCLR0 and TCLR1 bits.
-		(b) Read the TCLR0 and TCLR1 bits to confirm that these bits
-		    are cleared.
-		(c) Perform (a) and (b) above again.  */
-	V850E_CACHE_ICC |= 0x3;
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x3);
-
-#ifdef V850E_CACHE_REPEAT_ICC_WRITE
-	/* Do it again.  */
-	V850E_CACHE_ICC |= 0x3;
-	WAIT_UNTIL_CLEAR (V850E_CACHE_ICC & 0x3);
-#endif
-}
-
-#ifdef V850E_CACHE_DCC
-/* Flush or clear (or both) the data cache, depending on the value of FLAGS;
-   the procedure is the same for both, just the control bits used differ (and
-   both may be performed simultaneously).  */
-static void dcache_op (unsigned short flags)
-{
-	/* 1. Read the data cache control register (DCC) and confirm that bits
-	      0, 1, 4, and 5 (DC00, DC01, DC04, DC05) are all cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_DCC & 0x33);
-
-	/* 2. Clear DCC register bit 12 (DC12), bit 13 (DC13), or both
-	      depending on the way for which tags are to be cleared.  */
-	V850E_CACHE_DCC &= ~0xC000;
-
-	/* 3. Set DCC register bit 0 (DC00), bit 1 (DC01) or both depending on
-	      the way for which tags are to be cleared.
-	      ...
-	      Set DCC register bit 4 (DC04), bit 5 (DC05), or both depending
-	      on the way to be data flushed.  */
-	V850E_CACHE_DCC |= flags;
-
-	/* 4. Read DCC register bit DC00, DC01 [DC04, DC05], or both depending
-	      on the way for which tags were cleared [flushed] and confirm
-	      that that bit is cleared.  */
-	WAIT_UNTIL_CLEAR (V850E_CACHE_DCC & flags);
-}
-#endif /* V850E_CACHE_DCC */
-
-/* Flushes the contents of the dcache to memory.  */
-static inline void flush_dcache (void)
-{
-#ifdef V850E_CACHE_DCC
-	/* We only need to do something if in write-back mode.  */
-	if (V850E_CACHE_DCC & 0x0400)
-		dcache_op (0x30);
-#endif /* V850E_CACHE_DCC */
-}
-
-/* Flushes the contents of the dcache to memory, and then clears it.  */
-static inline void clear_dcache (void)
-{
-#ifdef V850E_CACHE_DCC
-	/* We only need to do something if the dcache is enabled.  */
-	if (V850E_CACHE_DCC & 0x0C00)
-		dcache_op (0x33);
-#endif /* V850E_CACHE_DCC */
-}
-
-/* Clears the dcache without flushing to memory first.  */
-static inline void clear_dcache_no_flush (void)
-{
-#ifdef V850E_CACHE_DCC
-	/* We only need to do something if the dcache is enabled.  */
-	if (V850E_CACHE_DCC & 0x0C00)
-		dcache_op (0x3);
-#endif /* V850E_CACHE_DCC */
-}
-
-static inline void cache_exec_after_store (void)
-{
-	flush_dcache ();
-	clear_icache ();
-}
-
-
-/* Exported functions.  */
-
-void flush_icache (void)
-{
-	cache_exec_after_store ();
-}
-
-void flush_icache_range (unsigned long start, unsigned long end)
-{
-	cache_exec_after_store ();
-}
-
-void flush_icache_page (struct vm_area_struct *vma, struct page *page)
-{
-	cache_exec_after_store ();
-}
-
-void flush_icache_user_range (struct vm_area_struct *vma, struct page *page,
-			      unsigned long adr, int len)
-{
-	cache_exec_after_store ();
-}
-
-void flush_cache_sigtramp (unsigned long addr)
-{
-	cache_exec_after_store ();
-}
diff --git a/arch/v850/kernel/v850e_intc.c b/arch/v850/kernel/v850e_intc.c
deleted file mode 100644
index 8d39a52ee6d1..000000000000
--- a/arch/v850/kernel/v850e_intc.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * arch/v850/kernel/v850e_intc.c -- V850E interrupt controller (INTC)
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-
-#include <asm/v850e_intc.h>
-
-static void irq_nop (unsigned irq) { }
-
-static unsigned v850e_intc_irq_startup (unsigned irq)
-{
-	v850e_intc_clear_pending_irq (irq);
-	v850e_intc_enable_irq (irq);
-	return 0;
-}
-
-static void v850e_intc_end_irq (unsigned irq)
-{
-	unsigned long psw, temp;
-
-	/* Clear the highest-level bit in the In-service priority register
-	   (ISPR), to allow this interrupt (or another of the same or
-	   lesser priority) to happen again.
-
-	   The `reti' instruction normally does this automatically when the
-	   PSW bits EP and NP are zero, but we can't always rely on reti
-	   being used consistently to return after an interrupt (another
-	   process can be scheduled, for instance, which can delay the
-	   associated reti for a long time, or this process may be being
-	   single-stepped, which uses the `dbret' instruction to return
-	   from the kernel).
-
-	   We also set the PSW EP bit, which prevents reti from also
-	   trying to modify the ISPR itself.  */
-
-	/* Get PSW and disable interrupts.  */
-	asm volatile ("stsr psw, %0; di" : "=r" (psw));
-	/* We don't want to do anything for NMIs (they don't use the ISPR).  */
-	if (! (psw & 0xC0)) {
-		/* Transition to `trap' state, so that an eventual real
-		   reti instruction won't modify the ISPR.  */
-		psw |= 0x40;
-		/* Fake an interrupt return, which automatically clears the
-		   appropriate bit in the ISPR.  */
-		asm volatile ("mov hilo(1f), %0;"
-			      "ldsr %0, eipc; ldsr %1, eipsw;"
-			      "reti;"
-			      "1:"
-			      : "=&r" (temp) : "r" (psw));
-	}
-}
-
-/* Initialize HW_IRQ_TYPES for INTC-controlled irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-void __init v850e_intc_init_irq_types (struct v850e_intc_irq_init *inits,
-				       struct hw_interrupt_type *hw_irq_types)
-{
-	struct v850e_intc_irq_init *init;
-	for (init = inits; init->name; init++) {
-		unsigned i;
-		struct hw_interrupt_type *hwit = hw_irq_types++;
-
-		hwit->typename = init->name;
-
-		hwit->startup  = v850e_intc_irq_startup;
-		hwit->shutdown = v850e_intc_disable_irq;
-		hwit->enable   = v850e_intc_enable_irq;
-		hwit->disable  = v850e_intc_disable_irq;
-		hwit->ack      = irq_nop;
-		hwit->end      = v850e_intc_end_irq;
-		
-		/* Initialize kernel IRQ infrastructure for this interrupt.  */
-		init_irq_handlers(init->base, init->num, init->interval, hwit);
-
-		/* Set the interrupt priorities.  */
-		for (i = 0; i < init->num; i++) {
-			unsigned irq = init->base + i * init->interval;
-
-			/* If the interrupt is currently enabled (all
-			   interrupts are initially disabled), then
-			   assume whoever enabled it has set things up
-			   properly, and avoid messing with it.  */
-			if (! v850e_intc_irq_enabled (irq))
-				/* This write also (1) disables the
-				   interrupt, and (2) clears any pending
-				   interrupts.  */
-				V850E_INTC_IC (irq)
-					= (V850E_INTC_IC_PR (init->priority)
-					   | V850E_INTC_IC_MK);
-		}
-	}
-}
diff --git a/arch/v850/kernel/v850e_timer_d.c b/arch/v850/kernel/v850e_timer_d.c
deleted file mode 100644
index d2a4ece2574c..000000000000
--- a/arch/v850/kernel/v850e_timer_d.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * include/asm-v850/v850e_timer_d.c -- `Timer D' component often used
- *	with V850E CPUs
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/kernel.h>
-
-#include <asm/v850e_utils.h>
-#include <asm/v850e_timer_d.h>
-
-/* Start interval timer TIMER (0-3).  The timer will issue the
-   corresponding INTCMD interrupt RATE times per second.
-   This function does not enable the interrupt.  */
-void v850e_timer_d_configure (unsigned timer, unsigned rate)
-{
-	unsigned divlog2, count;
-
-	/* Calculate params for timer.  */
-	if (! calc_counter_params (
-		    V850E_TIMER_D_BASE_FREQ, rate,
-		    V850E_TIMER_D_TMCD_CS_MIN, V850E_TIMER_D_TMCD_CS_MAX, 16,
-		    &divlog2, &count))
-		printk (KERN_WARNING
-			"Cannot find interval timer %d setting suitable"
-			" for rate of %dHz.\n"
-			"Using rate of %dHz instead.\n",
-			timer, rate,
-			(V850E_TIMER_D_BASE_FREQ >> divlog2) >> 16);
-
-	/* Do the actual hardware timer initialization:  */
-
-	/* Enable timer.  */
-	V850E_TIMER_D_TMCD(timer) = V850E_TIMER_D_TMCD_CAE;
-	/* Set clock divider.  */
-	V850E_TIMER_D_TMCD(timer)
-		= V850E_TIMER_D_TMCD_CAE
-		| V850E_TIMER_D_TMCD_CS(divlog2);
-	/* Set timer compare register.  */
-	V850E_TIMER_D_CMD(timer) = count;
-	/* Start counting.  */
-	V850E_TIMER_D_TMCD(timer)
-		= V850E_TIMER_D_TMCD_CAE
-		| V850E_TIMER_D_TMCD_CS(divlog2)
-		| V850E_TIMER_D_TMCD_CE;
-}
diff --git a/arch/v850/kernel/v850e_utils.c b/arch/v850/kernel/v850e_utils.c
deleted file mode 100644
index e6807ef8dee6..000000000000
--- a/arch/v850/kernel/v850e_utils.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * include/asm-v850/v850e_utils.h -- Utility functions associated with
- *	V850E CPUs
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <asm/v850e_utils.h>
-
-/* Calculate counter clock-divider and count values to attain the
-   desired frequency RATE from the base frequency BASE_FREQ.  The
-   counter is expected to have a clock-divider, which can divide the
-   system cpu clock by a power of two value from MIN_DIVLOG2 to
-   MAX_DIV_LOG2, and a word-size of COUNTER_SIZE bits (the counter
-   counts up and resets whenever it's equal to the compare register,
-   generating an interrupt or whatever when it does so).  The returned
-   values are: *DIVLOG2 -- log2 of the desired clock divider and *COUNT
-   -- the counter compare value to use.  Returns true if it was possible
-   to find a reasonable value, otherwise false (and the other return
-   values will be set to be as good as possible).  */
-int calc_counter_params (unsigned long base_freq,
-			 unsigned long rate,
-			 unsigned min_divlog2, unsigned max_divlog2,
-			 unsigned counter_size,
-			 unsigned *divlog2, unsigned *count)
-{
-	unsigned _divlog2;
-	int ok = 0;
-
-	/* Find the lowest clock divider setting that can represent RATE.  */
-	for (_divlog2 = min_divlog2; _divlog2 <= max_divlog2; _divlog2++) {
-		/* Minimum interrupt rate possible using this divider.  */
-		unsigned min_int_rate
-			= (base_freq >> _divlog2) >> counter_size;
-
-		if (min_int_rate <= rate) {
-			/* This setting is the highest resolution
-			   setting that's slow enough enough to attain
-			   RATE interrupts per second, so use it.  */
-			ok = 1;
-			break;
-		}
-	}
-
-	if (_divlog2 > max_divlog2)
-		/* Can't find correct setting.  */
-		_divlog2 = max_divlog2;
-
-	if (divlog2)
-		*divlog2 = _divlog2;
-	if (count)
-		*count = ((base_freq >> _divlog2) + rate/2) / rate;
-
-	return ok;
-}
diff --git a/arch/v850/kernel/vmlinux.lds.S b/arch/v850/kernel/vmlinux.lds.S
deleted file mode 100644
index d08cd1d27f27..000000000000
--- a/arch/v850/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * arch/v850/vmlinux.lds.S -- kernel linker script for v850 platforms
- *
- *  Copyright (C) 2002,03,04,05  NEC Electronics Corporation
- *  Copyright (C) 2002,03,04,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
-#include <asm-generic/vmlinux.lds.h>
-
-/* For most platforms, this will define useful things like RAM addr/size.  */
-#include <asm/machdep.h>
-
-
-/* The following macros contain the usual definitions for various data areas.
-   The prefix `RAMK_' is used to indicate macros suitable for kernels loaded
-   into RAM, and similarly `ROMK_' for ROM-resident kernels.  Note that all
-   symbols are prefixed with an extra `_' for compatibility with the v850
-   toolchain.  */
-
-	
-/* Interrupt vectors.  */
-#define INTV_CONTENTS							      \
-		. = ALIGN (0x10) ;					      \
-		__intv_start = . ;					      \
-			*(.intv.reset)	/* Reset vector */		      \
-		. = __intv_start + 0x10 ;				      \
-			*(.intv.common)	/* Vectors common to all v850e proc */\
-		. = __intv_start + 0x80 ;				      \
-			*(.intv.mach)	/* Machine-specific int. vectors.  */ \
-		__intv_end = . ;
-
-#define RODATA_CONTENTS							      \
-		. = ALIGN (16) ;					      \
-			*(.rodata) *(.rodata.*)				      \
-			*(__vermagic)		/* Kernel version magic */    \
-			*(.rodata1)					      \
-		/* PCI quirks */					      \
-		___start_pci_fixups_early = . ;				      \
-			*(.pci_fixup_early)				      \
-		___end_pci_fixups_early = . ;				      \
-		___start_pci_fixups_header = . ;			      \
-			*(.pci_fixup_header)				      \
-		___end_pci_fixups_header = . ;				      \
-		___start_pci_fixups_final = . ;				      \
-			*(.pci_fixup_final)				      \
-		___end_pci_fixups_final = . ;				      \
-		___start_pci_fixups_enable = . ;			      \
-			*(.pci_fixup_enable)				      \
-		___end_pci_fixups_enable = . ;				      \
-		/* Kernel symbol table: Normal symbols */		      \
-		___start___ksymtab = .;					      \
-			*(__ksymtab)					      \
-		___stop___ksymtab = .;					      \
-		/* Kernel symbol table: GPL-only symbols */		      \
-		___start___ksymtab_gpl = .;				      \
-			*(__ksymtab_gpl)				      \
-		___stop___ksymtab_gpl = .;				      \
-		/* Kernel symbol table: GPL-future symbols */		      \
-		___start___ksymtab_gpl_future = .;			      \
-			*(__ksymtab_gpl_future)				      \
-		___stop___ksymtab_gpl_future = .;			      \
-		/* Kernel symbol table: strings */			      \
-			*(__ksymtab_strings)				      \
-		/* Kernel symbol table: Normal symbols */		      \
-		___start___kcrctab = .;					      \
-			*(__kcrctab)					      \
-		___stop___kcrctab = .;					      \
-		/* Kernel symbol table: GPL-only symbols */		      \
-		___start___kcrctab_gpl = .;				      \
-			*(__kcrctab_gpl)				      \
-		___stop___kcrctab_gpl = .;				      \
-		/* Kernel symbol table: GPL-future symbols */		      \
-		___start___kcrctab_gpl_future = .;			      \
-			*(__kcrctab_gpl_future)				      \
-		___stop___kcrctab_gpl_future = .;			      \
-		/* Built-in module parameters */			      \
-		. = ALIGN (4) ;						      \
-		___start___param = .;					      \
-		*(__param)						      \
-		___stop___param = .;
-
-
-/* Kernel text segment, and some constant data areas.  */
-#define TEXT_CONTENTS							      \
-		_text = .;						      \
-		__stext = . ;						      \
-		TEXT_TEXT						      \
-		SCHED_TEXT						      \
-			*(.exit.text)	/* 2.5 convention */		      \
-			*(.text.exit)	/* 2.4 convention */		      \
-			*(.text.lock)					      \
-			*(.exitcall.exit)				      \
-		__real_etext = . ;	/* There may be data after here.  */  \
-		RODATA_CONTENTS						      \
-		. = ALIGN (4) ;						      \
-		    	*(.call_table_data)				      \
-			*(.call_table_text)				      \
-		. = ALIGN (16) ;	/* Exception table.  */		      \
-		___start___ex_table = . ;				      \
-			*(__ex_table)					      \
-		___stop___ex_table = . ;				      \
-		. = ALIGN (4) ;						      \
-		__etext = . ;
-
-/* Kernel data segment.  */
-#define DATA_CONTENTS							      \
-		__sdata = . ;						      \
-		DATA_DATA						      \
-			EXIT_DATA	/* 2.5 convention */		      \
-			*(.data.exit)	/* 2.4 convention */		      \
-		. = ALIGN (16) ;					      \
-		*(.data.cacheline_aligned)				      \
-		. = ALIGN (0x2000) ;					      \
-        	*(.data.init_task)					      \
-		. = ALIGN (0x2000) ;					      \
-		__edata = . ;
-
-/* Kernel BSS segment.  */
-#define BSS_CONTENTS							      \
-		__sbss = . ;						      \
-			*(.bss)						      \
-			*(COMMON)					      \
-		. = ALIGN (4) ;						      \
-		__init_stack_end = . ;					      \
-		__ebss = . ;
-
-/* `initcall' tables.  */
-#define INITCALL_CONTENTS						      \
-		. = ALIGN (16) ;					      \
-		___setup_start = . ;					      \
-			*(.init.setup)	/* 2.5 convention */		      \
-			*(.setup.init)	/* 2.4 convention */		      \
-		___setup_end = . ;					      \
-		___initcall_start = . ;					      \
-			*(.initcall.init)				      \
-			INITCALLS					      \
-		. = ALIGN (4) ;						      \
-		___initcall_end = . ;					      \
-		___con_initcall_start = .;				      \
-			*(.con_initcall.init)				      \
-		___con_initcall_end = .;
-
-/* Contents of `init' section for a kernel that's loaded into RAM.  */
-#define RAMK_INIT_CONTENTS						      \
-		RAMK_INIT_CONTENTS_NO_END				      \
-		__init_end = . ;
-/* Same as RAMK_INIT_CONTENTS, but doesn't define the `__init_end' symbol.  */
-#define RAMK_INIT_CONTENTS_NO_END					      \
-		. = ALIGN (4096) ;					      \
-		__init_start = . ;					      \
-			__sinittext = .;				      \
-			INIT_TEXT	/* 2.5 convention */		      \
-			__einittext = .;				      \
-			INIT_DATA					      \
-			*(.text.init)	/* 2.4 convention */		      \
-			*(.data.init)					      \
-		INITCALL_CONTENTS					      \
-		INITRAMFS_CONTENTS
-
-/* The contents of `init' section for a ROM-resident kernel which
-   should go into RAM.  */	
-#define ROMK_INIT_RAM_CONTENTS						      \
-		. = ALIGN (4096) ;					      \
-		__init_start = . ;					      \
-			INIT_DATA	/* 2.5 convention */		      \
-			*(.data.init)	/* 2.4 convention */		      \
-		__init_end = . ;					      \
-		. = ALIGN (4096) ;
-
-/* The contents of `init' section for a ROM-resident kernel which
-   should go into ROM.  */	
-#define ROMK_INIT_ROM_CONTENTS						      \
-			_sinittext = .;					      \
-			INIT_TEXT	/* 2.5 convention */		      \
-			_einittext = .;					      \
-			*(.text.init)	/* 2.4 convention */		      \
-		INITCALL_CONTENTS					      \
-		INITRAMFS_CONTENTS
-
-/* A root filesystem image, for kernels with an embedded root filesystem.  */
-#define ROOT_FS_CONTENTS						      \
-		__root_fs_image_start = . ;				      \
-		*(.root)						      \
-		__root_fs_image_end = . ;
-
-#ifdef CONFIG_BLK_DEV_INITRD
-/* The initramfs archive.  */
-#define INITRAMFS_CONTENTS						      \
-		. = ALIGN (4) ;						      \
-		___initramfs_start = . ;				      \
-			*(.init.ramfs)					      \
-		___initramfs_end = . ;
-#endif
-
-/* Where the initial bootmap (bitmap for the boot-time memory allocator) 
-   should be place.  */
-#define BOOTMAP_CONTENTS						      \
-		. = ALIGN (4096) ;					      \
-		__bootmap = . ;						      \
-		. = . + 4096 ;		/* enough for 128MB.   */
-
-/* The contents of a `typical' kram area for a kernel in RAM.  */
-#define RAMK_KRAM_CONTENTS						      \
-		__kram_start = . ;					      \
-		TEXT_CONTENTS						      \
-		DATA_CONTENTS						      \
-		BSS_CONTENTS						      \
-		RAMK_INIT_CONTENTS					      \
-		__kram_end = . ;					      \
-		BOOTMAP_CONTENTS
-
-
-/* Define output sections normally used for a ROM-resident kernel.  
-   ROM and RAM should be appropriate memory areas to use for kernel
-   ROM and RAM data.  This assumes that ROM starts at 0 (and thus can
-   hold the interrupt vectors).  */
-#define ROMK_SECTIONS(ROM, RAM)						      \
-	.rom : {							      \
-		INTV_CONTENTS						      \
-		TEXT_CONTENTS						      \
-		ROMK_INIT_ROM_CONTENTS					      \
-		ROOT_FS_CONTENTS					      \
-	} > ROM								      \
-									      \
-	__rom_copy_src_start = . ;					      \
-									      \
-	.data : {							      \
-		__kram_start = . ;					      \
-		__rom_copy_dst_start = . ;				      \
-		DATA_CONTENTS						      \
-		ROMK_INIT_RAM_CONTENTS					      \
-		__rom_copy_dst_end = . ;				      \
-	} > RAM  AT> ROM						      \
-									      \
-	.bss ALIGN (4) : {						      \
-		BSS_CONTENTS						      \
-		__kram_end = . ;					      \
-		BOOTMAP_CONTENTS					      \
-	} > RAM
-
-
-/* The 32-bit variable `jiffies' is just the lower 32-bits of `jiffies_64'.  */
-_jiffies = _jiffies_64 ;
-
-
-/* Include an appropriate platform-dependent linker-script (which
-   usually should use the above macros to do most of the work).  */
-
-#ifdef CONFIG_V850E_SIM
-# include "sim.ld"
-#endif
-
-#ifdef CONFIG_V850E2_SIM85E2
-# include "sim85e2.ld"
-#endif
-
-#ifdef CONFIG_V850E2_FPGA85E2C
-# include "fpga85e2c.ld"
-#endif
-
-#ifdef CONFIG_V850E2_ANNA
-# ifdef CONFIG_ROM_KERNEL
-#  include "anna-rom.ld"
-# else
-#  include "anna.ld"
-# endif
-#endif
-
-#ifdef CONFIG_V850E_AS85EP1
-# ifdef CONFIG_ROM_KERNEL
-#  include "as85ep1-rom.ld"
-# else
-#  include "as85ep1.ld"
-# endif
-#endif
-
-#ifdef CONFIG_RTE_CB_MA1
-# ifdef CONFIG_ROM_KERNEL
-#  include "rte_ma1_cb-rom.ld"
-# else
-#  include "rte_ma1_cb.ld"
-# endif
-#endif
-
-#ifdef CONFIG_RTE_CB_NB85E
-# ifdef CONFIG_ROM_KERNEL
-#  include "rte_nb85e_cb-rom.ld"
-# elif defined(CONFIG_RTE_CB_MULTI)
-#  include "rte_nb85e_cb-multi.ld"
-# else
-#  include "rte_nb85e_cb.ld"
-# endif
-#endif
-
-#ifdef CONFIG_RTE_CB_ME2
-#  include "rte_me2_cb.ld"
-#endif
-
diff --git a/arch/v850/lib/Makefile b/arch/v850/lib/Makefile
deleted file mode 100644
index 1c78b728a117..000000000000
--- a/arch/v850/lib/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-#
-# arch/v850/lib/Makefile
-#
-
-lib-y  = ashrdi3.o ashldi3.o lshrdi3.o muldi3.o negdi2.o \
-	 checksum.o memcpy.o memset.o
diff --git a/arch/v850/lib/ashldi3.c b/arch/v850/lib/ashldi3.c
deleted file mode 100644
index 9e792d53f0e4..000000000000
--- a/arch/v850/lib/ashldi3.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* ashldi3.c extracted from gcc-2.95.2/libgcc2.c which is: */
-/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__ashldi3 (DItype u, word_type b)
-{
-  DIunion w;
-  word_type bm;
-  DIunion uu;
-
-  if (b == 0)
-    return u;
-
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      w.s.low = 0;
-      w.s.high = (USItype)uu.s.low << -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.low >> bm;
-      w.s.low = (USItype)uu.s.low << b;
-      w.s.high = ((USItype)uu.s.high << b) | carries;
-    }
-
-  return w.ll;
-}
diff --git a/arch/v850/lib/ashrdi3.c b/arch/v850/lib/ashrdi3.c
deleted file mode 100644
index 78efb65e315a..000000000000
--- a/arch/v850/lib/ashrdi3.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/* ashrdi3.c extracted from gcc-2.7.2/libgcc2.c which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__ashrdi3 (DItype u, word_type b)
-{
-  DIunion w;
-  word_type bm;
-  DIunion uu;
-
-  if (b == 0)
-    return u;
-
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      /* w.s.high = 1..1 or 0..0 */
-      w.s.high = uu.s.high >> (sizeof (SItype) * BITS_PER_UNIT - 1);
-      w.s.low = uu.s.high >> -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.high << bm;
-      w.s.high = uu.s.high >> b;
-      w.s.low = ((USItype)uu.s.low >> b) | carries;
-    }
-
-  return w.ll;
-}
diff --git a/arch/v850/lib/checksum.c b/arch/v850/lib/checksum.c
deleted file mode 100644
index 042158dfe17a..000000000000
--- a/arch/v850/lib/checksum.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		MIPS specific IP/TCP/UDP checksumming routines
- *
- * Authors:	Ralf Baechle, <ralf@waldorf-gmbh.de>
- *		Lots of code moved from tcp.c and ip.c; see those files
- *		for more names.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- * $Id: checksum.c,v 1.1 2002/09/28 14:58:40 gerg Exp $
- */
-#include <net/checksum.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-#include <asm/string.h>
-#include <asm/uaccess.h>
-
-static inline unsigned short from32to16 (unsigned long sum)
-{
-	unsigned int result;
-	/*
-			        %0		%1
-	      hsw %1, %0	H     L		L     H
-	      add %1, %0	H     L		H+L+C H+L
-	*/
-	asm ("hsw %1, %0; add %1, %0" : "=&r" (result) : "r" (sum));
-	return result >> 16;
-}
-
-static inline unsigned int do_csum(const unsigned char * buff, int len)
-{
-	int odd, count;
-	unsigned int result = 0;
-
-	if (len <= 0)
-		goto out;
-	odd = 1 & (unsigned long) buff;
-	if (odd) {
-		result = be16_to_cpu(*buff);
-		len--;
-		buff++;
-	}
-	count = len >> 1;		/* nr of 16-bit words.. */
-	if (count) {
-		if (2 & (unsigned long) buff) {
-			result += *(unsigned short *) buff;
-			count--;
-			len -= 2;
-			buff += 2;
-		}
-		count >>= 1;		/* nr of 32-bit words.. */
-		if (count) {
-			unsigned int carry = 0;
-			do {
-				unsigned int w = *(unsigned int *) buff;
-				count--;
-				buff += 4;
-				result += carry;
-				result += w;
-				carry = (w > result);
-			} while (count);
-			result += carry;
-			result = (result & 0xffff) + (result >> 16);
-		}
-		if (len & 2) {
-			result += *(unsigned short *) buff;
-			buff += 2;
-		}
-	}
-	if (len & 1)
-		result += le16_to_cpu(*buff);
-	result = from32to16(result);
-	if (odd)
-		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-out:
-	return result;
-}
-
-/*
- *	This is a version of ip_compute_csum() optimized for IP headers,
- *	which always checksum on 4 octet boundaries.
- */
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-	return (__force __sum16)~do_csum(iph,ihl*4);
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-__sum16 ip_compute_csum(const void *buff, int len)
-{
-	return (__force __sum16)~do_csum(buff,len);
-}
-
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum)
-{
-	unsigned int result = do_csum(buff, len);
-
-	/* add in old sum, and carry.. */
-	result += (__force u32)sum;
-	if ((__force u32)sum > result)
-		result += 1;
-	return (__force __wsum)result;
-}
-
-EXPORT_SYMBOL(csum_partial);
-
-/*
- * copy while checksumming, otherwise like csum_partial
- */
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-                               int len, __wsum sum)
-{
-	/*
-	 * It's 2:30 am and I don't feel like doing it real ...
-	 * This is lots slower than the real thing (tm)
-	 */
-	sum = csum_partial(src, len, sum);
-	memcpy(dst, src, len);
-
-	return sum;
-}
-
-/*
- * Copy from userspace and compute checksum.  If we catch an exception
- * then zero the rest of the buffer.
- */
-__wsum csum_partial_copy_from_user (const void *src,
-					  void *dst,
-                                          int len, __wsum sum,
-                                          int *err_ptr)
-{
-	int missing;
-
-	missing = copy_from_user(dst, src, len);
-	if (missing) {
-		memset(dst + len - missing, 0, missing);
-		*err_ptr = -EFAULT;
-	}
-		
-	return csum_partial(dst, len, sum);
-}
diff --git a/arch/v850/lib/lshrdi3.c b/arch/v850/lib/lshrdi3.c
deleted file mode 100644
index 93b1cb6fdee8..000000000000
--- a/arch/v850/lib/lshrdi3.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* lshrdi3.c extracted from gcc-2.7.2/libgcc2.c which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__lshrdi3 (DItype u, word_type b)
-{
-  DIunion w;
-  word_type bm;
-  DIunion uu;
-
-  if (b == 0)
-    return u;
-
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      w.s.high = 0;
-      w.s.low = (USItype)uu.s.high >> -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.high << bm;
-      w.s.high = (USItype)uu.s.high >> b;
-      w.s.low = ((USItype)uu.s.low >> b) | carries;
-    }
-
-  return w.ll;
-}
diff --git a/arch/v850/lib/memcpy.c b/arch/v850/lib/memcpy.c
deleted file mode 100644
index 492847b3e612..000000000000
--- a/arch/v850/lib/memcpy.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * arch/v850/lib/memcpy.c -- Memory copying
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/types.h>
-#include <asm/string.h>
-
-#define CHUNK_SIZE		32 /* bytes */
-#define CHUNK_ALIGNED(addr)	(((unsigned long)addr & 0x3) == 0)
-
-/* Note that this macro uses 8 call-clobbered registers (not including
-   R1), which are few enough so that the following functions don't need
-   to spill anything to memory.  It also uses R1, which is nominally
-   reserved for the assembler, but here it should be OK.  */
-#define COPY_CHUNK(src, dst)			\
-   asm ("mov %0, ep;"				\
-	"sld.w 0[ep], r1; sld.w 4[ep], r12;"	\
-	"sld.w 8[ep], r13; sld.w 12[ep], r14;"	\
-	"sld.w 16[ep], r15; sld.w 20[ep], r17;"	\
-	"sld.w 24[ep], r18; sld.w 28[ep], r19;"	\
-	"mov %1, ep;"				\
-	"sst.w r1, 0[ep]; sst.w r12, 4[ep];"	\
-	"sst.w r13, 8[ep]; sst.w r14, 12[ep];"	\
-	"sst.w r15, 16[ep]; sst.w r17, 20[ep];"	\
-	"sst.w r18, 24[ep]; sst.w r19, 28[ep]"	\
-	:: "r" (src), "r" (dst)			\
-	: "r1", "r12", "r13", "r14", "r15",	\
-	  "r17", "r18", "r19", "ep", "memory");
-
-void *memcpy (void *dst, const void *src, __kernel_size_t size)
-{
-	char *_dst = dst;
-	const char *_src = src;
-
-	if (size >= CHUNK_SIZE && CHUNK_ALIGNED(_src) && CHUNK_ALIGNED(_dst)) {
-		/* Copy large blocks efficiently.  */
-		unsigned count;
-		for (count = size / CHUNK_SIZE; count; count--) {
-			COPY_CHUNK (_src, _dst);
-			_src += CHUNK_SIZE;
-			_dst += CHUNK_SIZE;
-		}
-		size %= CHUNK_SIZE;
-	}
-
-	if (size > 0)
-		do
-			*_dst++ = *_src++;
-		while (--size);
-
-	return dst;
-}
-
-void *memmove (void *dst, const void *src, __kernel_size_t size)
-{
-	if ((unsigned long)dst < (unsigned long)src
-	    || (unsigned long)src + size < (unsigned long)dst)
-		return memcpy (dst, src, size);
-	else {
-		char *_dst = dst + size;
-		const char *_src = src + size;
-
-		if (size >= CHUNK_SIZE
-		    && CHUNK_ALIGNED (_src) && CHUNK_ALIGNED (_dst))
-		{
-			/* Copy large blocks efficiently.  */
-			unsigned count;
-			for (count = size / CHUNK_SIZE; count; count--) {
-				_src -= CHUNK_SIZE;
-				_dst -= CHUNK_SIZE;
-				COPY_CHUNK (_src, _dst);
-			}
-			size %= CHUNK_SIZE;
-		}
-
-		if (size > 0)
-			do
-				*--_dst = *--_src;
-			while (--size);
-
-		return _dst;
-	}
-}
diff --git a/arch/v850/lib/memset.c b/arch/v850/lib/memset.c
deleted file mode 100644
index d1b2ad821b15..000000000000
--- a/arch/v850/lib/memset.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * arch/v850/lib/memset.c -- Memory initialization
- *
- *  Copyright (C) 2001,02,04  NEC Corporation
- *  Copyright (C) 2001,02,04  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <linux/types.h>
-
-void *memset (void *dst, int val, __kernel_size_t count)
-{
-	if (count) {
-		register unsigned loop;
-		register void *ptr asm ("ep") = dst;
-
-		/* replicate VAL into a long.  */
-		val &= 0xff;
-		val |= val << 8;
-		val |= val << 16;
-
-		/* copy initial unaligned bytes.  */
-		if ((long)ptr & 1) {
-			*(char *)ptr = val;
-			ptr = (void *)((char *)ptr + 1);
-			count--;
-		}
-		if (count > 2 && ((long)ptr & 2)) {
-			*(short *)ptr = val;
-			ptr = (void *)((short *)ptr + 1);
-			count -= 2;
-		}
-
-		/* 32-byte copying loop.  */
-		for (loop = count / 32; loop; loop--) {
-			asm ("sst.w %0, 0[ep]; sst.w %0, 4[ep];"
-			     "sst.w %0, 8[ep]; sst.w %0, 12[ep];"
-			     "sst.w %0, 16[ep]; sst.w %0, 20[ep];"
-			     "sst.w %0, 24[ep]; sst.w %0, 28[ep]"
-			     :: "r" (val) : "memory");
-			ptr += 32;
-		}
-		count %= 32;
-
-		/* long copying loop.  */
-		for (loop = count / 4; loop; loop--) {
-			*(long *)ptr = val;
-			ptr = (void *)((long *)ptr + 1);
-		}
-		count %= 4;
-
-		/* finish up with any trailing bytes.  */
-		if (count & 2) {
-			*(short *)ptr = val;
-			ptr = (void *)((short *)ptr + 1);
-		}
-		if (count & 1) {
-			*(char *)ptr = val;
-		}
-	}
-
-	return dst;
-}
diff --git a/arch/v850/lib/muldi3.c b/arch/v850/lib/muldi3.c
deleted file mode 100644
index 277ca25c82c8..000000000000
--- a/arch/v850/lib/muldi3.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* muldi3.c extracted from gcc-2.7.2.3/libgcc2.c and 
-			   gcc-2.7.2.3/longlong.h which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulu %3, %0, %1"						\
-           : "=r" ((USItype)(w0)),					\
-             "=r" ((USItype)(w1))					\
-           : "%0" ((USItype)(u)),					\
-             "r" ((USItype)(v)))
-
-#define __umulsidi3(u, v) \
-  ({DIunion __w;							\
-    umul_ppmm (__w.s.high, __w.s.low, u, v);				\
-    __w.ll; })
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__muldi3 (DItype u, DItype v)
-{
-  DIunion w;
-  DIunion uu, vv;
-
-  uu.ll = u,
-  vv.ll = v;
-
-  w.ll = __umulsidi3 (uu.s.low, vv.s.low);
-  w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high
-	       + (USItype) uu.s.high * (USItype) vv.s.low);
-
-  return w.ll;
-}
diff --git a/arch/v850/lib/negdi2.c b/arch/v850/lib/negdi2.c
deleted file mode 100644
index 571e04fc619a..000000000000
--- a/arch/v850/lib/negdi2.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * arch/v850/lib/negdi2.c -- 64-bit negation
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-typedef		 int DItype	__attribute__ ((mode (DI)));
-
-DItype __negdi2 (DItype x)
-{
-	__asm__ __volatile__
-		("not	r6, r10;"
-		 "add	1, r10;"
-		 "setf	c, r6;"
-		 "not	r7, r11;"
-		 "add	r6, r11"
-		 ::: "r6", "r7", "r10", "r11");
-}
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 8fc7451c0049..3b4a14e355c1 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -942,22 +942,6 @@ config SERIAL_IP22_ZILOG_CONSOLE
 	depends on SERIAL_IP22_ZILOG=y
 	select SERIAL_CORE_CONSOLE
 
-config V850E_UART
-	bool "NEC V850E on-chip UART support"
-	depends on V850E_MA1 || V850E_ME2 || V850E_TEG || V850E2_ANNA || V850E_AS85EP1
-	select SERIAL_CORE
-	default y
-
-config V850E_UARTB
-        bool
-	depends on V850E_UART && V850E_ME2
-	default y
-
-config V850E_UART_CONSOLE
-	bool "Use NEC V850E on-chip UART for console"
-	depends on V850E_UART
-	select SERIAL_CORE_CONSOLE
-
 config SERIAL_SH_SCI
 	tristate "SuperH SCI(F) serial port support"
 	depends on SUPERH || H8300
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index ccb78f66c2b6..48399e134c0d 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -788,8 +788,6 @@ config WATCHDOG_RIO
 	  machines.  The watchdog timeout period is normally one minute but
 	  can be changed with a boot-time parameter.
 
-# V850 Architecture
-
 # XTENSA Architecture
 
 #
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 25b352b664d9..edd305a64e63 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -119,8 +119,6 @@ obj-$(CONFIG_SH_WDT) += shwdt.o
 
 # SPARC64 Architecture
 
-# V850 Architecture
-
 # XTENSA Architecture
 
 # Architecture Independant
diff --git a/include/asm-v850/Kbuild b/include/asm-v850/Kbuild
deleted file mode 100644
index c68e1680da01..000000000000
--- a/include/asm-v850/Kbuild
+++ /dev/null
@@ -1 +0,0 @@
-include include/asm-generic/Kbuild.asm
diff --git a/include/asm-v850/a.out.h b/include/asm-v850/a.out.h
deleted file mode 100644
index e9439a0708f6..000000000000
--- a/include/asm-v850/a.out.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef __V850_A_OUT_H__
-#define __V850_A_OUT_H__
-
-struct exec
-{
-  unsigned long a_info;		/* Use macros N_MAGIC, etc for access */
-  unsigned a_text;		/* length of text, in bytes */
-  unsigned a_data;		/* length of data, in bytes */
-  unsigned a_bss;		/* length of uninitialized data area for file, in bytes */
-  unsigned a_syms;		/* length of symbol table data in file, in bytes */
-  unsigned a_entry;		/* start address */
-  unsigned a_trsize;		/* length of relocation info for text, in bytes */
-  unsigned a_drsize;		/* length of relocation info for data, in bytes */
-};
-
-#define N_TRSIZE(a)	((a).a_trsize)
-#define N_DRSIZE(a)	((a).a_drsize)
-#define N_SYMSIZE(a)	((a).a_syms)
-
-
-#endif /* __V850_A_OUT_H__ */
diff --git a/include/asm-v850/anna.h b/include/asm-v850/anna.h
deleted file mode 100644
index cd5eaee103b0..000000000000
--- a/include/asm-v850/anna.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * include/asm-v850/anna.h -- Anna V850E2 evaluation cpu chip/board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_ANNA_H__
-#define __V850_ANNA_H__
-
-#include <asm/v850e2.h>		/* Based on V850E2 core.  */
-
-
-#define CPU_MODEL	"v850e2/anna"
-#define CPU_MODEL_LONG	"NEC V850E2/Anna"
-#define PLATFORM	"anna"
-#define PLATFORM_LONG	"NEC/Midas lab V850E2/Anna evaluation board"
-
-#define CPU_CLOCK_FREQ	200000000 /*  200MHz */
-#define SYS_CLOCK_FREQ	 33300000 /* 33.3MHz */
-
-
-/* 1MB of static RAM.  This memory is mirrored 64 times.  */
-#define SRAM_ADDR	0x04000000
-#define SRAM_SIZE	0x00100000 /* 1MB */
-/* 64MB of DRAM.  */
-#define SDRAM_ADDR	0x08000000	
-#define SDRAM_SIZE	0x04000000 /* 64MB */
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 	SRAM_ADDR
-
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  The Anna chip has
-   128K of `dLB' ram nominally located at 0xFFF00000, but it's mirrored
-   every 128K, so we can use the `last mirror' (except for the portion at
-   the top which is overridden by I/O space).  In addition, the early
-   sample chip we're using has lots of memory errors in the dLB ram, so we
-   use a specially chosen location that has at least 20 bytes of contiguous
-   valid memory (xxxF0020 - xxxF003F).  */
-#define R0_RAM_ADDR			0xFFFF8020
-
-
-/* Anna specific control registers.  */
-#define ANNA_ILBEN_ADDR			0xFFFFF7F2
-#define ANNA_ILBEN			(*(volatile u16 *)ANNA_ILBEN_ADDR)
-
-
-/* I/O port P0-P3. */
-/* Direct I/O.  Bits 0-7 are pins Pn0-Pn7.  */
-#define ANNA_PORT_IO_ADDR(n)		(0xFFFFF400 + (n) * 2)
-#define ANNA_PORT_IO(n)			(*(volatile u8 *)ANNA_PORT_IO_ADDR(n))
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ANNA_PORT_PM_ADDR(n)		(0xFFFFF410 + (n) * 2)
-#define ANNA_PORT_PM(n)			(*(volatile u8 *)ANNA_PORT_PM_ADDR(n))
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).  */
-#define IRQ_INTP(n)	(n)	/* Pnnn (pin) interrupts 0-15 */
-#define IRQ_INTP_NUM	16
-#define IRQ_INTOV(n)	(0x10 + (n)) /* 0-2 */
-#define IRQ_INTOV_NUM	2
-#define IRQ_INTCCC(n)	(0x12 + (n))
-#define IRQ_INTCCC_NUM	4
-#define IRQ_INTCMD(n)	(0x16 + (n)) /* interval timer interrupts 0-5 */
-#define IRQ_INTCMD_NUM	6
-#define IRQ_INTDMA(n)	(0x1C + (n)) /* DMA interrupts 0-3 */
-#define IRQ_INTDMA_NUM	4
-#define IRQ_INTDMXER	0x20
-#define IRQ_INTSRE(n)	(0x21 + (n)*3) /* UART 0-1 reception error */
-#define IRQ_INTSRE_NUM	2
-#define IRQ_INTSR(n)	(0x22 + (n)*3) /* UART 0-1 reception completion */
-#define IRQ_INTSR_NUM	2
-#define IRQ_INTST(n)	(0x23 + (n)*3) /* UART 0-1 transmission completion */
-#define IRQ_INTST_NUM	2
-
-#define NUM_CPU_IRQS	64
-
-#ifndef __ASSEMBLY__
-/* Initialize chip interrupts.  */
-extern void anna_init_irqs (void);
-#endif
-
-
-/* Anna UART details (basically the same as the V850E/MA1, but 2 channels).  */
-#define V850E_UART_NUM_CHANNELS		2
-#define V850E_UART_BASE_FREQ		(SYS_CLOCK_FREQ / 2)
-#define V850E_UART_CHIP_NAME 		"V850E2/NA85E2A"
-
-/* This is the UART channel that's actually connected on the board.  */
-#define V850E_UART_CONSOLE_CHANNEL	1
-
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	anna_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void anna_uart_pre_configure (unsigned chan,
-				     unsigned cflags, unsigned baud);
-#endif
-
-/* This board supports RTS/CTS for the on-chip UART, but only for channel 1. */
-
-/* CTS for UART channel 1 is pin P37 (bit 7 of port 3).  */
-#define V850E_UART_CTS(chan)	((chan) == 1 ? !(ANNA_PORT_IO(3) & 0x80) : 1)
-/* RTS for UART channel 1 is pin P07 (bit 7 of port 0).  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   if (chan == 1) {						      \
-		   unsigned old = ANNA_PORT_IO(0); 			      \
-		   if (val)						      \
-			   ANNA_PORT_IO(0) = old & ~0x80;		      \
-		   else							      \
-			   ANNA_PORT_IO(0) = old | 0x80;		      \
-	   }								      \
-   } while (0)
-
-
-/* Timer C details.  */
-#define V850E_TIMER_C_BASE_ADDR		0xFFFFF600
-
-/* Timer D details (the Anna actually has 5 of these; should change later). */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF540
-#define V850E_TIMER_D_TMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x2)
-#define V850E_TIMER_D_TMCD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x4)
-
-#define V850E_TIMER_D_BASE_FREQ		SYS_CLOCK_FREQ
-#define V850E_TIMER_D_TMCD_CS_MIN	1 /* min 2^1 divider */
-
-
-#endif /* __V850_ANNA_H__ */
diff --git a/include/asm-v850/as85ep1.h b/include/asm-v850/as85ep1.h
deleted file mode 100644
index 5a5ca9073d09..000000000000
--- a/include/asm-v850/as85ep1.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * include/asm-v850/as85ep1.h -- AS85EP1 evaluation CPU chip/board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_AS85EP1_H__
-#define __V850_AS85EP1_H__
-
-#include <asm/v850e.h>
-
-
-#define CPU_MODEL	"as85ep1"
-#define CPU_MODEL_LONG	"NEC V850E/AS85EP1"
-#define PLATFORM	"AS85EP1"
-#define PLATFORM_LONG	"NEC V850E/AS85EP1 evaluation board"
-
-#define CPU_CLOCK_FREQ	96000000 /*  96MHz */
-#define SYS_CLOCK_FREQ	CPU_CLOCK_FREQ
-
-
-/* 1MB of static RAM.  */
-#define SRAM_ADDR	0x00400000
-#define SRAM_SIZE	0x00100000 /* 1MB */
-/* About 58MB of DRAM.  This can actually be at one of two positions,
-   determined by jump JP3; we have to use the first position because the
-   second is partially out of processor instruction addressing range
-   (though in the second position there's actually 64MB available).  */
-#define SDRAM_ADDR	0x00600000
-#define SDRAM_SIZE	0x039F8000 /* approx 58MB */
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 	SRAM_ADDR
-
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  The AS85EP1 chip
-   16K of internal RAM located slightly before I/O space.  */
-#define R0_RAM_ADDR	0xFFFF8000
-
-
-/* AS85EP1 specific control registers.  */
-#define AS85EP1_CSC_ADDR(n)	(0xFFFFF060 + (n) * 2)
-#define AS85EP1_CSC(n)		(*(volatile u16 *)AS85EP1_CSC_ADDR(n))
-#define AS85EP1_BSC_ADDR	0xFFFFF066
-#define AS85EP1_BSC		(*(volatile u16 *)AS85EP1_BSC_ADDR)
-#define AS85EP1_BCT_ADDR(n)	(0xFFFFF480 + (n) * 2)
-#define AS85EP1_BCT(n)		(*(volatile u16 *)AS85EP1_BCT_ADDR(n))
-#define AS85EP1_DWC_ADDR(n)	(0xFFFFF484 + (n) * 2)
-#define AS85EP1_DWC(n)		(*(volatile u16 *)AS85EP1_DWC_ADDR(n))
-#define AS85EP1_BCC_ADDR	0xFFFFF488
-#define AS85EP1_BCC		(*(volatile u16 *)AS85EP1_BCC_ADDR)
-#define AS85EP1_ASC_ADDR	0xFFFFF48A
-#define AS85EP1_ASC		(*(volatile u16 *)AS85EP1_ASC_ADDR)
-#define AS85EP1_BCP_ADDR	0xFFFFF48C
-#define AS85EP1_BCP		(*(volatile u16 *)AS85EP1_BCP_ADDR)
-#define AS85EP1_LBS_ADDR	0xFFFFF48E
-#define AS85EP1_LBS		(*(volatile u16 *)AS85EP1_LBS_ADDR)
-#define AS85EP1_BMC_ADDR	0xFFFFF498
-#define AS85EP1_BMC		(*(volatile u16 *)AS85EP1_BMC_ADDR)
-#define AS85EP1_PRC_ADDR	0xFFFFF49A
-#define AS85EP1_PRC		(*(volatile u16 *)AS85EP1_PRC_ADDR)
-#define AS85EP1_SCR_ADDR(n)	(0xFFFFF4A0 + (n) * 4)
-#define AS85EP1_SCR(n)		(*(volatile u16 *)AS85EP1_SCR_ADDR(n))
-#define AS85EP1_RFS_ADDR(n)	(0xFFFFF4A2 + (n) * 4)
-#define AS85EP1_RFS(n)		(*(volatile u16 *)AS85EP1_RFS_ADDR(n))
-#define AS85EP1_IRAMM_ADDR	0xFFFFF80A
-#define AS85EP1_IRAMM		(*(volatile u8 *)AS85EP1_IRAMM_ADDR)
-
-
-
-/* I/O port P0-P13. */
-/* Direct I/O.  Bits 0-7 are pins Pn0-Pn7.  */
-#define AS85EP1_PORT_IO_ADDR(n)	(0xFFFFF400 + (n) * 2)
-#define AS85EP1_PORT_IO(n)	(*(volatile u8 *)AS85EP1_PORT_IO_ADDR(n))
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define AS85EP1_PORT_PM_ADDR(n)	(0xFFFFF420 + (n) * 2)
-#define AS85EP1_PORT_PM(n)	(*(volatile u8 *)AS85EP1_PORT_PM_ADDR(n))
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define AS85EP1_PORT_PMC_ADDR(n) (0xFFFFF440 + (n) * 2)
-#define AS85EP1_PORT_PMC(n)	(*(volatile u8 *)AS85EP1_PORT_PMC_ADDR(n))
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).  */
-#define IRQ_INTCCC(n)	(0x0C + (n))
-#define IRQ_INTCCC_NUM	8
-#define IRQ_INTCMD(n)	(0x14 + (n)) /* interval timer interrupts 0-5 */
-#define IRQ_INTCMD_NUM	6
-#define IRQ_INTSRE(n)	(0x1E + (n)*3) /* UART 0-1 reception error */
-#define IRQ_INTSRE_NUM	2
-#define IRQ_INTSR(n)	(0x1F + (n)*3) /* UART 0-1 reception completion */
-#define IRQ_INTSR_NUM	2
-#define IRQ_INTST(n)	(0x20 + (n)*3) /* UART 0-1 transmission completion */
-#define IRQ_INTST_NUM	2
-
-#define NUM_CPU_IRQS	64
-
-#ifndef __ASSEMBLY__
-/* Initialize chip interrupts.  */
-extern void as85ep1_init_irqs (void);
-#endif
-
-
-/* AS85EP1 UART details (basically the same as the V850E/MA1, but 2 channels).  */
-#define V850E_UART_NUM_CHANNELS		2
-#define V850E_UART_BASE_FREQ		(SYS_CLOCK_FREQ / 4)
-#define V850E_UART_CHIP_NAME 		"V850E/NA85E"
-
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	as85ep1_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void as85ep1_uart_pre_configure (unsigned chan,
-					unsigned cflags, unsigned baud);
-#endif
-
-/* This board supports RTS/CTS for the on-chip UART, but only for channel 1. */
-
-/* CTS for UART channel 1 is pin P54 (bit 4 of port 5).  */
-#define V850E_UART_CTS(chan)   ((chan) == 1 ? !(AS85EP1_PORT_IO(5) & 0x10) : 1)
-/* RTS for UART channel 1 is pin P53 (bit 3 of port 5).  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   if (chan == 1) {						      \
-		   unsigned old = AS85EP1_PORT_IO(5); 			      \
-		   if (val)						      \
-			   AS85EP1_PORT_IO(5) = old & ~0x8;		      \
-		   else							      \
-			   AS85EP1_PORT_IO(5) = old | 0x8;		      \
-	   }								      \
-   } while (0)
-
-
-/* Timer C details.  */
-#define V850E_TIMER_C_BASE_ADDR		0xFFFFF600
-
-/* Timer D details (the AS85EP1 actually has 5 of these; should change later). */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF540
-#define V850E_TIMER_D_TMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x2)
-#define V850E_TIMER_D_TMCD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x4)
-
-#define V850E_TIMER_D_BASE_FREQ		SYS_CLOCK_FREQ
-#define V850E_TIMER_D_TMCD_CS_MIN	2 /* min 2^2 divider */
-
-
-#endif /* __V850_AS85EP1_H__ */
diff --git a/include/asm-v850/asm.h b/include/asm-v850/asm.h
deleted file mode 100644
index bf1e785a5dde..000000000000
--- a/include/asm-v850/asm.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * include/asm-v850/asm.h -- Macros for writing assembly code
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#define G_ENTRY(name)							      \
-   .balign 4;								      \
-   .globl name;								      \
-   .type  name,@function;						      \
-   name
-#define G_DATA(name)							      \
-   .globl name;								      \
-   .type  name,@object;							      \
-   name
-#define END(name)							      \
-   .size  name,.-name
-
-#define L_ENTRY(name)							      \
-   .balign 4;								      \
-   .type  name,@function;						      \
-   name
-#define L_DATA(name)							      \
-   .type  name,@object;							      \
-   name
diff --git a/include/asm-v850/atomic.h b/include/asm-v850/atomic.h
deleted file mode 100644
index e4e57de08f73..000000000000
--- a/include/asm-v850/atomic.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * include/asm-v850/atomic.h -- Atomic operations
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_ATOMIC_H__
-#define __V850_ATOMIC_H__
-
-
-#include <asm/system.h>
-
-#ifdef CONFIG_SMP
-#error SMP not supported
-#endif
-
-typedef struct { int counter; } atomic_t;
-
-#define ATOMIC_INIT(i)	{ (i) }
-
-#ifdef __KERNEL__
-
-#define atomic_read(v)		((v)->counter)
-#define atomic_set(v,i)		(((v)->counter) = (i))
-
-static inline int atomic_add_return (int i, volatile atomic_t *v)
-{
-	unsigned long flags;
-	int res;
-
-	local_irq_save (flags);
-	res = v->counter + i;
-	v->counter = res;
-	local_irq_restore (flags);
-
-	return res;
-}
-
-static __inline__ int atomic_sub_return (int i, volatile atomic_t *v)
-{
-	unsigned long flags;
-	int res;
-
-	local_irq_save (flags);
-	res = v->counter - i;
-	v->counter = res;
-	local_irq_restore (flags);
-
-	return res;
-}
-
-static __inline__ void atomic_clear_mask (unsigned long mask, unsigned long *addr)
-{
-	unsigned long flags;
-
-	local_irq_save (flags);
-	*addr &= ~mask;
-	local_irq_restore (flags);
-}
-
-#endif
-
-#define atomic_add(i, v)	atomic_add_return ((i), (v))
-#define atomic_sub(i, v)	atomic_sub_return ((i), (v))
-
-#define atomic_dec_return(v)	atomic_sub_return (1, (v))
-#define atomic_inc_return(v)	atomic_add_return (1, (v))
-#define atomic_inc(v) 		atomic_inc_return (v)
-#define atomic_dec(v) 		atomic_dec_return (v)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-#define atomic_sub_and_test(i,v)	(atomic_sub_return ((i), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_sub_return (1, (v)) == 0)
-#define atomic_add_negative(i,v)	(atomic_add_return ((i), (v)) < 0)
-
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
-{
-	int ret;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	ret = v->counter;
-	if (likely(ret == old))
-		v->counter = new;
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-static inline int atomic_add_unless(atomic_t *v, int a, int u)
-{
-	int ret;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	ret = v->counter;
-	if (ret != u)
-		v->counter += a;
-	local_irq_restore(flags);
-
-	return ret != u;
-}
-
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-/* Atomic operations are already serializing on ARM */
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
-
-#include <asm-generic/atomic.h>
-#endif /* __V850_ATOMIC_H__ */
diff --git a/include/asm-v850/auxvec.h b/include/asm-v850/auxvec.h
deleted file mode 100644
index f493232d0224..000000000000
--- a/include/asm-v850/auxvec.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __V850_AUXVEC_H__
-#define __V850_AUXVEC_H__
-
-#endif /* __V850_AUXVEC_H__ */
diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h
deleted file mode 100644
index f82f5b4a56e0..000000000000
--- a/include/asm-v850/bitops.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * include/asm-v850/bitops.h -- Bit operations
- *
- *  Copyright (C) 2001,02,03,04,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,04,05  Miles Bader <miles@gnu.org>
- *  Copyright (C) 1992  Linus Torvalds.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#ifndef __V850_BITOPS_H__
-#define __V850_BITOPS_H__
-
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-#include <linux/compiler.h>	/* unlikely  */
-#include <asm/byteorder.h>	/* swab32 */
-#include <asm/system.h>		/* interrupt enable/disable */
-
-
-#ifdef __KERNEL__
-
-#include <asm-generic/bitops/ffz.h>
-
-/*
- * The __ functions are not atomic
- */
-
-/* In the following constant-bit-op macros, a "g" constraint is used when
-   we really need an integer ("i" constraint).  This is to avoid
-   warnings/errors from the compiler in the case where the associated
-   operand _isn't_ an integer, and shouldn't produce bogus assembly because
-   use of that form is protected by a guard statement that checks for
-   constants, and should otherwise be removed by the optimizer.  This
-   _usually_ works -- however, __builtin_constant_p returns true for a
-   variable with a known constant value too, and unfortunately gcc will
-   happily put the variable in a register and use the register for the "g"
-   constraint'd asm operand.  To avoid the latter problem, we add a
-   constant offset to the operand and subtract it back in the asm code;
-   forcing gcc to do arithmetic on the value is usually enough to get it
-   to use a real constant value.  This is horrible, and ultimately
-   unreliable too, but it seems to work for now (hopefully gcc will offer
-   us more control in the future, so we can do a better job).  */
-
-#define __const_bit_op(op, nr, addr)					\
-  ({ __asm__ (op " (%0 - 0x123), %1"					\
-	      :: "g" (((nr) & 0x7) + 0x123),				\
-		 "m" (*((char *)(addr) + ((nr) >> 3)))			\
-	      : "memory"); })
-#define __var_bit_op(op, nr, addr)					\
-  ({ int __nr = (nr);							\
-     __asm__ (op " %0, [%1]"						\
-	      :: "r" (__nr & 0x7),					\
-		 "r" ((char *)(addr) + (__nr >> 3))			\
-	      : "memory"); })
-#define __bit_op(op, nr, addr)						\
-  ((__builtin_constant_p (nr) && (unsigned)(nr) <= 0x7FFFF)		\
-   ? __const_bit_op (op, nr, addr)					\
-   : __var_bit_op (op, nr, addr))
-
-#define __set_bit(nr, addr)		__bit_op ("set1", nr, addr)
-#define __clear_bit(nr, addr)		__bit_op ("clr1", nr, addr)
-#define __change_bit(nr, addr)		__bit_op ("not1", nr, addr)
-
-/* The bit instructions used by `non-atomic' variants are actually atomic.  */
-#define set_bit __set_bit
-#define clear_bit __clear_bit
-#define change_bit __change_bit
-
-
-#define __const_tns_bit_op(op, nr, addr)				      \
-  ({ int __tns_res;							      \
-     __asm__ __volatile__ (						      \
-	     "tst1 (%1 - 0x123), %2; setf nz, %0; " op " (%1 - 0x123), %2"    \
-	     : "=&r" (__tns_res)					      \
-	     : "g" (((nr) & 0x7) + 0x123),				      \
-	       "m" (*((char *)(addr) + ((nr) >> 3)))			      \
-	     : "memory");						      \
-     __tns_res;								      \
-  })
-#define __var_tns_bit_op(op, nr, addr)					      \
-  ({ int __nr = (nr);							      \
-     int __tns_res;							      \
-     __asm__ __volatile__ (						      \
-	     "tst1 %1, [%2]; setf nz, %0; " op " %1, [%2]"		      \
-	      : "=&r" (__tns_res)					      \
-	      : "r" (__nr & 0x7),					      \
-		"r" ((char *)(addr) + (__nr >> 3))			      \
-	      : "memory");						      \
-     __tns_res;								      \
-  })
-#define __tns_bit_op(op, nr, addr)					\
-  ((__builtin_constant_p (nr) && (unsigned)(nr) <= 0x7FFFF)		\
-   ? __const_tns_bit_op (op, nr, addr)					\
-   : __var_tns_bit_op (op, nr, addr))
-#define __tns_atomic_bit_op(op, nr, addr)				\
-  ({ int __tns_atomic_res, __tns_atomic_flags;				\
-     local_irq_save (__tns_atomic_flags);				\
-     __tns_atomic_res = __tns_bit_op (op, nr, addr);			\
-     local_irq_restore (__tns_atomic_flags);				\
-     __tns_atomic_res;							\
-  })
-
-#define __test_and_set_bit(nr, addr)	__tns_bit_op ("set1", nr, addr)
-#define test_and_set_bit(nr, addr)	__tns_atomic_bit_op ("set1", nr, addr)
-
-#define __test_and_clear_bit(nr, addr)	__tns_bit_op ("clr1", nr, addr)
-#define test_and_clear_bit(nr, addr)	__tns_atomic_bit_op ("clr1", nr, addr)
-
-#define __test_and_change_bit(nr, addr)	__tns_bit_op ("not1", nr, addr)
-#define test_and_change_bit(nr, addr)	__tns_atomic_bit_op ("not1", nr, addr)
-
-
-#define __const_test_bit(nr, addr)					      \
-  ({ int __test_bit_res;						      \
-     __asm__ __volatile__ ("tst1 (%1 - 0x123), %2; setf nz, %0"		      \
-			   : "=r" (__test_bit_res)			      \
-			   : "g" (((nr) & 0x7) + 0x123),		      \
-			     "m" (*((const char *)(addr) + ((nr) >> 3))));    \
-     __test_bit_res;							      \
-  })
-static inline int __test_bit (int nr, const void *addr)
-{
-	int res;
-	__asm__ __volatile__ ("tst1 %1, [%2]; setf nz, %0"
-			      : "=r" (res)
-			      : "r" (nr & 0x7), "r" (addr + (nr >> 3)));
-	return res;
-}
-#define test_bit(nr,addr)						\
-  ((__builtin_constant_p (nr) && (unsigned)(nr) <= 0x7FFFF)		\
-   ? __const_test_bit ((nr), (addr))					\
-   : __test_bit ((nr), (addr)))
-
-
-/* clear_bit doesn't provide any barrier for the compiler.  */
-#define smp_mb__before_clear_bit()	barrier ()
-#define smp_mb__after_clear_bit()	barrier ()
-
-#include <asm-generic/bitops/ffs.h>
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/find.h>
-#include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/hweight.h>
-#include <asm-generic/bitops/lock.h>
-
-#include <asm-generic/bitops/ext2-non-atomic.h>
-#define ext2_set_bit_atomic(l,n,a)      test_and_set_bit(n,a)
-#define ext2_clear_bit_atomic(l,n,a)    test_and_clear_bit(n,a)
-
-#include <asm-generic/bitops/minix.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __V850_BITOPS_H__ */
diff --git a/include/asm-v850/bug.h b/include/asm-v850/bug.h
deleted file mode 100644
index b0ed2d35f3e8..000000000000
--- a/include/asm-v850/bug.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * include/asm-v850/bug.h -- Bug reporting
- *
- *  Copyright (C) 2003  NEC Electronics Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_BUG_H__
-#define __V850_BUG_H__
-
-#ifdef CONFIG_BUG
-extern void __bug (void) __attribute__ ((noreturn));
-#define BUG()		__bug()
-#define HAVE_ARCH_BUG
-#endif
-
-#include <asm-generic/bug.h>
-
-#endif /* __V850_BUG_H__ */
diff --git a/include/asm-v850/bugs.h b/include/asm-v850/bugs.h
deleted file mode 100644
index 71110a65c1d7..000000000000
--- a/include/asm-v850/bugs.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *  include/asm-v850e/bugs.h
- *
- *  Copyright (C) 1994  Linus Torvalds
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- *	void check_bugs(void);
- */
-
-static void check_bugs(void)
-{
-}
diff --git a/include/asm-v850/byteorder.h b/include/asm-v850/byteorder.h
deleted file mode 100644
index a6f07530050e..000000000000
--- a/include/asm-v850/byteorder.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * include/asm-v850/byteorder.h -- Endian id and conversion ops
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_BYTEORDER_H__
-#define __V850_BYTEORDER_H__
-
-#include <asm/types.h>
-#include <linux/compiler.h>
-
-#ifdef __GNUC__
-
-static __inline__ __attribute_const__ __u32 ___arch__swab32 (__u32 word)
-{
-	__u32 res;
-	__asm__ ("bsw %1, %0" : "=r" (res) : "r" (word));
-	return res;
-}
-
-static __inline__ __attribute_const__ __u16 ___arch__swab16 (__u16 half_word)
-{
-	__u16 res;
-	__asm__ ("bsh %1, %0" : "=r" (res) : "r" (half_word));
-	return res;
-}
-
-#define __arch__swab32(x) ___arch__swab32(x)
-#define __arch__swab16(x) ___arch__swab16(x)
-
-#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
-#  define __BYTEORDER_HAS_U64__
-#  define __SWAB_64_THRU_32__
-#endif
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder/little_endian.h>
-
-#endif /* __V850_BYTEORDER_H__ */
diff --git a/include/asm-v850/cache.h b/include/asm-v850/cache.h
deleted file mode 100644
index 8832c7ea3242..000000000000
--- a/include/asm-v850/cache.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * include/asm-v850/cache.h -- Cache operations
- *
- *  Copyright (C) 2001,05  NEC Corporation
- *  Copyright (C) 2001,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CACHE_H__
-#define __V850_CACHE_H__
-
-/* All cache operations are machine-dependent.  */
-#include <asm/machdep.h>
-
-#ifndef L1_CACHE_BYTES
-/* This processor has no cache, so just choose an arbitrary value.  */
-#define L1_CACHE_BYTES		16
-#define L1_CACHE_SHIFT		4
-#endif
-
-#endif /* __V850_CACHE_H__ */
diff --git a/include/asm-v850/cacheflush.h b/include/asm-v850/cacheflush.h
deleted file mode 100644
index 9ece05a202ef..000000000000
--- a/include/asm-v850/cacheflush.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * include/asm-v850/cacheflush.h
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CACHEFLUSH_H__
-#define __V850_CACHEFLUSH_H__
-
-/* Somebody depends on this; sigh...  */
-#include <linux/mm.h>
-
-#include <asm/machdep.h>
-
-
-/* The following are all used by the kernel in ways that only affect
-   systems with MMUs, so we don't need them.  */
-#define flush_cache_all()			((void)0)
-#define flush_cache_mm(mm)			((void)0)
-#define flush_cache_dup_mm(mm)			((void)0)
-#define flush_cache_range(vma, start, end)	((void)0)
-#define flush_cache_page(vma, vmaddr, pfn)	((void)0)
-#define flush_dcache_page(page)			((void)0)
-#define flush_dcache_mmap_lock(mapping)		((void)0)
-#define flush_dcache_mmap_unlock(mapping)	((void)0)
-#define flush_cache_vmap(start, end)		((void)0)
-#define flush_cache_vunmap(start, end)		((void)0)
-
-#ifdef CONFIG_NO_CACHE
-
-/* Some systems have no cache at all, in which case we don't need these
-   either.  */
-#define flush_icache()				((void)0)
-#define flush_icache_range(start, end)		((void)0)
-#define flush_icache_page(vma,pg)		((void)0)
-#define flush_icache_user_range(vma,pg,adr,len)	((void)0)
-#define flush_cache_sigtramp(vaddr)		((void)0)
-
-#else /* !CONFIG_NO_CACHE */
-
-struct page;
-struct mm_struct;
-struct vm_area_struct;
-
-/* Otherwise, somebody had better define them.  */
-extern void flush_icache (void);
-extern void flush_icache_range (unsigned long start, unsigned long end);
-extern void flush_icache_page (struct vm_area_struct *vma, struct page *page);
-extern void flush_icache_user_range (struct vm_area_struct *vma,
-				     struct page *page,
-				     unsigned long adr, int len);
-extern void flush_cache_sigtramp (unsigned long addr);
-
-#endif /* CONFIG_NO_CACHE */
-
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-do { memcpy(dst, src, len); \
-     flush_icache_user_range(vma, page, vaddr, len); \
-} while (0)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
-	memcpy(dst, src, len)
-
-#endif /* __V850_CACHEFLUSH_H__ */
diff --git a/include/asm-v850/checksum.h b/include/asm-v850/checksum.h
deleted file mode 100644
index d1dddd938262..000000000000
--- a/include/asm-v850/checksum.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * include/asm-v850/checksum.h -- Checksum ops
- *
- *  Copyright (C) 2001,2005  NEC Corporation
- *  Copyright (C) 2001,2005  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CHECKSUM_H__
-#define __V850_CHECKSUM_H__
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- * the same as csum_partial, but copies from src while it
- * checksums
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-extern __wsum csum_partial_copy_nocheck(const void *src,
-				   void *dst, int len, __wsum sum);
-
-
-/*
- * the same as csum_partial_copy, but copies from user space.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-extern __wsum csum_partial_copy_from_user (const void *src,
-					     void *dst,
-					     int len, __wsum sum,
-					     int *csum_err);
-
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
-/*
- *	Fold a partial checksum
- */
-static inline __sum16 csum_fold (__wsum sum)
-{
-	unsigned int result;
-	/*
-			        %0		%1
-	      hsw %1, %0	H     L		L     H
-	      add %1, %0	H     L		H+L+C H+L
-	*/
-	asm ("hsw %1, %0; add %1, %0" : "=&r" (result) : "r" (sum));
-	return (__force __sum16)(~result >> 16);
-}
-
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __wsum
-csum_tcpudp_nofold (__be32 saddr, __be32 daddr,
-		    unsigned short len,
-		    unsigned short proto, __wsum sum)
-{
-	int __carry;
-	__asm__ ("add %2, %0;"
-		 "setf c, %1;"
-		 "add %1, %0;"
-		 "add %3, %0;"
-		 "setf c, %1;"
-		 "add %1, %0;"
-		 "add %4, %0;"
-		 "setf c, %1;"
-		 "add %1, %0"
-		 : "=&r" (sum), "=&r" (__carry)
-		 : "r" (daddr), "r" (saddr),
-		 "r" ((len + proto) << 8),
-		 "0" (sum));
-	return sum;
-}
-
-static inline __sum16
-csum_tcpudp_magic (__be32 saddr, __be32 daddr,
-		   unsigned short len,
-		   unsigned short proto, __wsum sum)
-{
-	return csum_fold (csum_tcpudp_nofold (saddr, daddr, len, proto, sum));
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-extern __sum16 ip_compute_csum(const void *buff, int len);
-
-
-#endif /* __V850_CHECKSUM_H__ */
diff --git a/include/asm-v850/clinkage.h b/include/asm-v850/clinkage.h
deleted file mode 100644
index c389691d6f86..000000000000
--- a/include/asm-v850/clinkage.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * include/asm-v850/clinkage.h -- Macros to reflect C symbol-naming conventions
- *
- *  Copyright (C) 2001,02  NEC Corporatione
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CLINKAGE_H__
-#define __V850_CLINKAGE_H__
-
-#include <asm/macrology.h>
-#include <asm/asm.h>
-
-#define C_SYMBOL_NAME(name) 	macrology_paste(_, name)
-#define C_SYMBOL_STRING(name)	macrology_stringify(C_SYMBOL_NAME(name))
-#define C_ENTRY(name)		G_ENTRY(C_SYMBOL_NAME(name))
-#define C_DATA(name)		G_DATA(C_SYMBOL_NAME(name))
-#define C_END(name)		END(C_SYMBOL_NAME(name))
-
-#endif /* __V850_CLINKAGE_H__ */
diff --git a/include/asm-v850/cputime.h b/include/asm-v850/cputime.h
deleted file mode 100644
index 7c799c33b8a9..000000000000
--- a/include/asm-v850/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_CPUTIME_H
-#define __V850_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __V850_CPUTIME_H */
diff --git a/include/asm-v850/current.h b/include/asm-v850/current.h
deleted file mode 100644
index 30aae5673770..000000000000
--- a/include/asm-v850/current.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * include/asm-v850/current.h -- Current task
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_CURRENT_H__
-#define __V850_CURRENT_H__
-
-#ifndef __ASSEMBLY__ /* <linux/thread_info.h> is not asm-safe.  */
-#include <linux/thread_info.h>
-#endif
-
-#include <asm/macrology.h>
-
-
-/* Register used to hold the current task pointer while in the kernel.
-   Any `call clobbered' register without a special meaning should be OK,
-   but check asm/v850/kernel/entry.S to be sure.  */
-#define CURRENT_TASK_REGNUM	16
-#define CURRENT_TASK 		macrology_paste (r, CURRENT_TASK_REGNUM)
-
-
-#ifdef __ASSEMBLY__
-
-/* Put a pointer to the current task structure into REG.  */
-#define GET_CURRENT_TASK(reg)						\
-	GET_CURRENT_THREAD(reg);					\
-	ld.w	TI_TASK[reg], reg
-
-#else /* !__ASSEMBLY__ */
-
-/* A pointer to the current task.  */
-register struct task_struct *current					\
-   __asm__ (macrology_stringify (CURRENT_TASK));
-
-#endif /* __ASSEMBLY__ */
-
-
-#endif /* _V850_CURRENT_H */
diff --git a/include/asm-v850/delay.h b/include/asm-v850/delay.h
deleted file mode 100644
index 6d028e6b2354..000000000000
--- a/include/asm-v850/delay.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * include/asm-v850/delay.h -- Delay routines, using a pre-computed
- * 	"loops_per_second" value
- *
- *  Copyright (C) 2001,03  NEC Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *  Copyright (C) 1994 Hamish Macdonald
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#ifndef __V850_DELAY_H__
-#define __V850_DELAY_H__
-
-#include <asm/param.h>
-
-static inline void __delay(unsigned long loops)
-{
-	if (loops)
-		__asm__ __volatile__ ("1: add -1, %0; bnz 1b"
-				      : "=r" (loops) : "0" (loops));
-}
-
-/*
- * Use only for very small delays ( < 1 msec).  Should probably use a
- * lookup table, really, as the multiplications take much too long with
- * short delays.  This is a "reasonable" implementation, though (and the
- * first constant multiplications gets optimized away if the delay is
- * a constant)  
- */
-
-extern unsigned long loops_per_jiffy;
-
-static inline void udelay(unsigned long usecs)
-{
-	register unsigned long full_loops, part_loops;
-
-	full_loops = ((usecs * HZ) / 1000000) * loops_per_jiffy;
-	usecs %= (1000000 / HZ);
-	part_loops = (usecs * HZ * loops_per_jiffy) / 1000000;
-
-	__delay(full_loops + part_loops);
-}
-
-#endif /* __V850_DELAY_H__ */
diff --git a/include/asm-v850/device.h b/include/asm-v850/device.h
deleted file mode 100644
index d8f9872b0e2d..000000000000
--- a/include/asm-v850/device.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/include/asm-v850/div64.h b/include/asm-v850/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/include/asm-v850/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/include/asm-v850/dma-mapping.h b/include/asm-v850/dma-mapping.h
deleted file mode 100644
index 1cc42c603a1b..000000000000
--- a/include/asm-v850/dma-mapping.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __V850_DMA_MAPPING_H__
-#define __V850_DMA_MAPPING_H__
-
-
-#ifdef CONFIG_PCI
-#include <asm-generic/dma-mapping.h>
-#else
-#include <asm-generic/dma-mapping-broken.h>
-#endif
-
-#endif /* __V850_DMA_MAPPING_H__ */
diff --git a/include/asm-v850/dma.h b/include/asm-v850/dma.h
deleted file mode 100644
index 2369849e2d0a..000000000000
--- a/include/asm-v850/dma.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef __V850_DMA_H__
-#define __V850_DMA_H__
-
-/* What should this be?  */
-#define MAX_DMA_ADDRESS	0xFFFFFFFF
-
-/* reserve a DMA channel */
-extern int request_dma (unsigned int dmanr, const char * device_id);
-/* release it again */
-extern void free_dma (unsigned int dmanr);
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy    (0)
-#endif
-
-#endif /* __V850_DMA_H__ */
diff --git a/include/asm-v850/elf.h b/include/asm-v850/elf.h
deleted file mode 100644
index 28f5b176ff1a..000000000000
--- a/include/asm-v850/elf.h
+++ /dev/null
@@ -1,99 +0,0 @@
-#ifndef __V850_ELF_H__
-#define __V850_ELF_H__
-
-/*
- * ELF register definitions..
- */
-
-#include <asm/ptrace.h>
-#include <asm/user.h>
-#include <asm/byteorder.h>
-
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct user_fpu_struct elf_fpregset_t;
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x)  \
-  ((x)->e_machine == EM_V850 || (x)->e_machine == EM_CYGNUS_V850)
-
-
-/* v850 relocation types.  */
-#define R_V850_NONE		0
-#define R_V850_9_PCREL		1
-#define R_V850_22_PCREL		2
-#define R_V850_HI16_S		3
-#define R_V850_HI16		4
-#define R_V850_LO16		5
-#define R_V850_32		6
-#define R_V850_16		7
-#define R_V850_8		8
-#define R_V850_SDA_16_16_OFFSET	9	/* For ld.b, st.b, set1, clr1,
-					   not1, tst1, movea, movhi */
-#define R_V850_SDA_15_16_OFFSET	10	/* For ld.w, ld.h, ld.hu, st.w, st.h */
-#define R_V850_ZDA_16_16_OFFSET	11	/* For ld.b, st.b, set1, clr1,
-					   not1, tst1, movea, movhi */
-#define R_V850_ZDA_15_16_OFFSET	12	/* For ld.w, ld.h, ld.hu, st.w, st.h */
-#define R_V850_TDA_6_8_OFFSET	13	/* For sst.w, sld.w */
-#define R_V850_TDA_7_8_OFFSET	14	/* For sst.h, sld.h */
-#define R_V850_TDA_7_7_OFFSET	15	/* For sst.b, sld.b */
-#define R_V850_TDA_16_16_OFFSET	16	/* For set1, clr1, not1, tst1,
-					   movea, movhi */
-#define R_V850_NUM		17
-
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS	ELFCLASS32
-#ifdef __LITTLE_ENDIAN__
-#define ELF_DATA	ELFDATA2LSB
-#else
-#define ELF_DATA	ELFDATA2MSB
-#endif
-#define ELF_ARCH	EM_V850
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE	4096
-
-
-#define ELF_CORE_COPY_REGS(_dest,_regs)				\
-	memcpy((char *) &_dest, (char *) _regs,			\
-	       sizeof(struct pt_regs));
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this CPU supports.  This could be done in user space,
-   but it's not easy, and we've already done it here.  */
-
-#define ELF_HWCAP	(0)
-
-/* This yields a string that ld.so will use to load implementation
-   specific libraries for optimization.  This is more specific in
-   intent than poking at uname or /proc/cpuinfo.
-
-   For the moment, we have only optimizations for the Intel generations,
-   but that could change... */
-
-#define ELF_PLATFORM  (NULL)
-
-#define ELF_PLAT_INIT(_r, load_addr)					      \
-  do {									      \
-	 _r->gpr[0] =  _r->gpr[1] =  _r->gpr[2] =  _r->gpr[3] =		      \
-	 _r->gpr[4] =  _r->gpr[5] =  _r->gpr[6] =  _r->gpr[7] =		      \
-	 _r->gpr[8] =  _r->gpr[9] = _r->gpr[10] = _r->gpr[11] =		      \
-	_r->gpr[12] = _r->gpr[13] = _r->gpr[14] = _r->gpr[15] =		      \
-	_r->gpr[16] = _r->gpr[17] = _r->gpr[18] = _r->gpr[19] =		      \
-	_r->gpr[20] = _r->gpr[21] = _r->gpr[22] = _r->gpr[23] =		      \
-	_r->gpr[24] = _r->gpr[25] = _r->gpr[26] = _r->gpr[27] =		      \
-	_r->gpr[28] = _r->gpr[29] = _r->gpr[30] = _r->gpr[31] =		      \
-	0;								      \
-  } while (0)
-
-#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT)
-
-#endif /* __V850_ELF_H__ */
diff --git a/include/asm-v850/emergency-restart.h b/include/asm-v850/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/include/asm-v850/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/include/asm-v850/entry.h b/include/asm-v850/entry.h
deleted file mode 100644
index d9df8ac48584..000000000000
--- a/include/asm-v850/entry.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * include/asm-v850/entry.h -- Definitions used by low-level trap handlers
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_ENTRY_H__
-#define __V850_ENTRY_H__
-
-
-#include <asm/ptrace.h>
-#include <asm/machdep.h>
-
-
-/* These are special variables using by the kernel trap/interrupt code
-   to save registers in, at a time when there are no spare registers we
-   can use to do so, and we can't depend on the value of the stack
-   pointer.  This means that they must be within a signed 16-bit
-   displacement of 0x00000000.  */
-
-#define KERNEL_VAR_SPACE_ADDR	R0_RAM_ADDR
-
-#ifdef __ASSEMBLY__
-#define KERNEL_VAR(addr)	addr[r0]
-#else
-#define KERNEL_VAR(addr)	(*(volatile unsigned long *)(addr))
-#endif
-
-/* Kernel stack pointer, 4 bytes.  */
-#define KSP_ADDR		(KERNEL_VAR_SPACE_ADDR +  0)
-#define KSP			KERNEL_VAR (KSP_ADDR)
-/* 1 if in kernel-mode, 0 if in user mode, 1 byte.  */
-#define KM_ADDR 		(KERNEL_VAR_SPACE_ADDR +  4)
-#define KM			KERNEL_VAR (KM_ADDR)
-/* Temporary storage for interrupt handlers, 4 bytes.  */
-#define INT_SCRATCH_ADDR	(KERNEL_VAR_SPACE_ADDR +  8)
-#define INT_SCRATCH		KERNEL_VAR (INT_SCRATCH_ADDR)
-/* Where the stack-pointer is saved when jumping to various sorts of
-   interrupt handlers.  ENTRY_SP is used by everything except NMIs,
-   which have their own location.  Higher-priority NMIs can clobber the
-   value written by a lower priority NMI, since they can't be disabled,
-   but that's OK, because only NMI0 (the lowest-priority one) is allowed
-   to return.  */
-#define ENTRY_SP_ADDR		(KERNEL_VAR_SPACE_ADDR + 12)
-#define ENTRY_SP		KERNEL_VAR (ENTRY_SP_ADDR)
-#define NMI_ENTRY_SP_ADDR	(KERNEL_VAR_SPACE_ADDR + 16)
-#define NMI_ENTRY_SP		KERNEL_VAR (NMI_ENTRY_SP_ADDR)
-
-#ifdef CONFIG_RESET_GUARD
-/* Used to detect unexpected resets (since the v850 has no MMU, any call
-   through a null pointer will jump to the reset vector).  We detect
-   such resets by checking for a magic value, RESET_GUARD_ACTIVE, in
-   this location.  Properly resetting the machine stores zero there, so
-   it shouldn't trigger the guard; the power-on value is uncertain, but
-   it's unlikely to be RESET_GUARD_ACTIVE.  */
-#define RESET_GUARD_ADDR	(KERNEL_VAR_SPACE_ADDR + 28)
-#define RESET_GUARD		KERNEL_VAR (RESET_GUARD_ADDR)
-#define RESET_GUARD_ACTIVE	0xFAB4BEEF
-#endif /* CONFIG_RESET_GUARD */
-
-#ifdef CONFIG_V850E_HIGHRES_TIMER
-#define HIGHRES_TIMER_SLOW_TICKS_ADDR (KERNEL_VAR_SPACE_ADDR + 32)
-#define HIGHRES_TIMER_SLOW_TICKS     KERNEL_VAR (HIGHRES_TIMER_SLOW_TICKS_ADDR)
-#endif /* CONFIG_V850E_HIGHRES_TIMER */
-
-#ifndef __ASSEMBLY__
-
-#ifdef CONFIG_RESET_GUARD
-/* Turn off reset guard, so that resetting the machine works normally.
-   This should be called in the various machine_halt, etc., functions.  */
-static inline void disable_reset_guard (void)
-{
-	RESET_GUARD = 0;
-}
-#endif /* CONFIG_RESET_GUARD */
-
-#endif /* !__ASSEMBLY__ */
-
-
-/* A `state save frame' is a struct pt_regs preceded by some extra space
-   suitable for a function call stack frame.  */
-
-/* Amount of room on the stack reserved for arguments and to satisfy the
-   C calling conventions, in addition to the space used by the struct
-   pt_regs that actually holds saved values.  */
-#define STATE_SAVE_ARG_SPACE	(6*4) /* Up to six arguments.  */
-
-
-#ifdef __ASSEMBLY__
-
-/* The size of a state save frame.  */
-#define STATE_SAVE_SIZE		(PT_SIZE + STATE_SAVE_ARG_SPACE)
-
-#else /* !__ASSEMBLY__ */
-
-/* The size of a state save frame.  */
-#define STATE_SAVE_SIZE	       (sizeof (struct pt_regs) + STATE_SAVE_ARG_SPACE)
-
-#endif /* __ASSEMBLY__ */
-
-
-/* Offset of the struct pt_regs in a state save frame.  */
-#define STATE_SAVE_PT_OFFSET	STATE_SAVE_ARG_SPACE
-
-
-#endif /* __V850_ENTRY_H__ */
diff --git a/include/asm-v850/errno.h b/include/asm-v850/errno.h
deleted file mode 100644
index 31c91df01205..000000000000
--- a/include/asm-v850/errno.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_ERRNO_H__
-#define __V850_ERRNO_H__
-
-#include <asm-generic/errno.h>
-
-#endif /* __V850_ERRNO_H__ */
diff --git a/include/asm-v850/fb.h b/include/asm-v850/fb.h
deleted file mode 100644
index c7df38030992..000000000000
--- a/include/asm-v850/fb.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/fb.h>
-
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/include/asm-v850/fcntl.h b/include/asm-v850/fcntl.h
deleted file mode 100644
index 3af4d56776dd..000000000000
--- a/include/asm-v850/fcntl.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __V850_FCNTL_H__
-#define __V850_FCNTL_H__
-
-#define O_DIRECTORY	040000	/* must be a directory */
-#define O_NOFOLLOW     0100000	/* don't follow links */
-#define O_DIRECT       0200000	/* direct disk access hint - currently ignored */
-#define O_LARGEFILE    0400000
-
-#include <asm-generic/fcntl.h>
-
-#endif /* __V850_FCNTL_H__ */
diff --git a/include/asm-v850/flat.h b/include/asm-v850/flat.h
deleted file mode 100644
index 17f0ea566611..000000000000
--- a/include/asm-v850/flat.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * include/asm-v850/flat.h -- uClinux flat-format executables
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_FLAT_H__
-#define __V850_FLAT_H__
-
-/* The amount by which a relocation can exceed the program image limits
-   without being regarded as an error.  On the v850, the relocations of
-   some base-pointers can be offset by 0x8000 (to allow better usage of the
-   space offered by 16-bit signed offsets -- in most cases the offsets used
-   with such a base-pointer will be negative).  */
-
-#define	flat_reloc_valid(reloc, size)	((reloc) <= (size + 0x8000))
-
-#define	flat_stack_align(sp)		/* nothing needed */
-#define	flat_argvp_envp_on_stack()	0
-#define	flat_old_ram_flag(flags)	(flags)
-#define	flat_set_persistent(relval, p)	0
-
-/* We store the type of relocation in the top 4 bits of the `relval.' */
-
-/* Convert a relocation entry into an address.  */
-static inline unsigned long
-flat_get_relocate_addr (unsigned long relval)
-{
-	return relval & 0x0fffffff; /* Mask out top 4-bits */
-}
-
-#define flat_v850_get_reloc_type(relval) ((relval) >> 28)
-
-#define FLAT_V850_R_32		0 /* Normal 32-bit reloc */
-#define FLAT_V850_R_HI16S_LO15	1 /* High 16-bits + signed 15-bit low field */
-#define FLAT_V850_R_HI16S_LO16	2 /* High 16-bits + signed 16-bit low field */
-
-/* Extract the address to be relocated from the symbol reference at RP;
-   RELVAL is the raw relocation-table entry from which RP is derived.
-   For the v850, RP should always be half-word aligned.  */
-static inline unsigned long flat_get_addr_from_rp (unsigned long *rp,
-						   unsigned long relval,
-						   unsigned long flags,
-						   unsigned long *persistent)
-{
-	short *srp = (short *)rp;
-
-	switch (flat_v850_get_reloc_type (relval))
-	{
-	case FLAT_V850_R_32:
-		/* Simple 32-bit address.  */
-		return srp[0] | (srp[1] << 16);
-
-	case FLAT_V850_R_HI16S_LO16:
-		/* The high and low halves of the address are in the 16
-		   bits at RP, and the 2nd word of the 32-bit instruction
-		   following that, respectively.  The low half is _signed_
-		   so we have to sign-extend it and add it to the upper
-		   half instead of simply or-ing them together.
-
-		   Unlike most relocated address, this one is stored in
-		   native (little-endian) byte-order to avoid problems with
-		   trashing the low-order bit, so we have to convert to
-		   network-byte-order before returning, as that's what the
-		   caller expects.  */
-		return htonl ((srp[0] << 16) + srp[2]);
-
-	case FLAT_V850_R_HI16S_LO15:
-		/* The high and low halves of the address are in the 16
-		   bits at RP, and the upper 15 bits of the 2nd word of the
-		   32-bit instruction following that, respectively.  The
-		   low half is _signed_ so we have to sign-extend it and
-		   add it to the upper half instead of simply or-ing them
-		   together.  The lowest bit is always zero.
-
-		   Unlike most relocated address, this one is stored in
-		   native (little-endian) byte-order to avoid problems with
-		   trashing the low-order bit, so we have to convert to
-		   network-byte-order before returning, as that's what the
-		   caller expects.  */
-		return htonl ((srp[0] << 16) + (srp[2] & ~0x1));
-
-	default:
-		return ~0;	/* bogus value */
-	}
-}
-
-/* Insert the address ADDR into the symbol reference at RP;
-   RELVAL is the raw relocation-table entry from which RP is derived.
-   For the v850, RP should always be half-word aligned.  */
-static inline void flat_put_addr_at_rp (unsigned long *rp, unsigned long addr,
-					unsigned long relval)
-{
-	short *srp = (short *)rp;
-
-	switch (flat_v850_get_reloc_type (relval)) {
-	case FLAT_V850_R_32:
-		/* Simple 32-bit address.  */
-		srp[0] = addr & 0xFFFF;
-		srp[1] = (addr >> 16);
-		break;
-
-	case FLAT_V850_R_HI16S_LO16:
-		/* The high and low halves of the address are in the 16
-		   bits at RP, and the 2nd word of the 32-bit instruction
-		   following that, respectively.  The low half is _signed_
-		   so we must carry its sign bit to the upper half before
-		   writing the upper half.  */
-		srp[0] = (addr >> 16) + ((addr >> 15) & 0x1);
-		srp[2] = addr & 0xFFFF;
-		break;
-
-	case FLAT_V850_R_HI16S_LO15:
-		/* The high and low halves of the address are in the 16
-		   bits at RP, and the upper 15 bits of the 2nd word of the
-		   32-bit instruction following that, respectively.  The
-		   low half is _signed_ so we must carry its sign bit to
-		   the upper half before writing the upper half.  The
-		   lowest bit we preserve from the existing instruction.  */
-		srp[0] = (addr >> 16) + ((addr >> 15) & 0x1);
-		srp[2] = (addr & 0xFFFE) | (srp[2] & 0x1);
-		break;
-	}
-}
-
-#endif /* __V850_FLAT_H__ */
diff --git a/include/asm-v850/fpga85e2c.h b/include/asm-v850/fpga85e2c.h
deleted file mode 100644
index 23aae666c718..000000000000
--- a/include/asm-v850/fpga85e2c.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * include/asm-v850/fpga85e2c.h -- Machine-dependent defs for
- *	FPGA implementation of V850E2/NA85E2C
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_FPGA85E2C_H__
-#define __V850_FPGA85E2C_H__
-
-#include <asm/v850e2.h>
-#include <asm/clinkage.h>
-
-
-#define CPU_MODEL	"v850e2/fpga85e2c"
-#define CPU_MODEL_LONG	"NEC V850E2/NA85E2C"
-#define PLATFORM	"fpga85e2c"
-#define PLATFORM_LONG	"NA85E2C FPGA implementation"
-
-
-/* `external ram'.  */
-#define ERAM_ADDR		0
-#define ERAM_SIZE		0x00100000 /* 1MB */
-
-
-/* FPGA specific control registers.  */
-
-/* Writing a non-zero value to FLGREG(0) will signal the controlling CPU
-   to stop execution.  */
-#define FLGREG_ADDR(n)		(0xFFE80100 + 2*(n))
-#define FLGREG(n)		(*(volatile unsigned char *)FLGREG_ADDR (n))
-#define FLGREG_NUM		2
-
-#define CSDEV_ADDR(n)		(0xFFE80110 + 2*(n))
-#define CSDEV(n)		(*(volatile unsigned char *)CSDEV_ADDR (n))
-
-
-/* Timer interrupts 0-3, interrupt at intervals from CLK/4096 to CLK/16384.  */
-#define IRQ_RPU(n)		(60 + (n))
-#define IRQ_RPU_NUM		4
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS		64
-
-
-/* General-purpose timer.  */
-/* control/status register (can only be read/written via bit insns) */
-#define RPU_GTMC_ADDR		0xFFFFFB00
-#define RPU_GTMC		(*(volatile unsigned char *)RPU_GTMC_ADDR)
-#define RPU_GTMC_CE_BIT		7 /* clock enable (control) */
-#define RPU_GTMC_OV_BIT		6 /* overflow (status) */
-#define RPU_GTMC_CLK_BIT	1 /* 0 = .5 MHz CLK, 1 = 1 Mhz (control) */
-/* 32-bit count (8 least-significant bits are always zero).  */
-#define RPU_GTM_ADDR		0xFFFFFB28
-#define RPU_GTM			(*(volatile unsigned long *)RPU_GTMC_ADDR)
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET		ERAM_ADDR /* minimum allocatable address */
-
-
-/* For <asm/entry.h> */
-/* `R0 RAM', used for a few miscellaneous variables that must be accessible
-   using a load instruction relative to R0.  The FPGA implementation
-   actually has no on-chip RAM, so we use part of main ram just after the
-   interrupt vectors.  */
-#ifdef __ASSEMBLY__
-#define R0_RAM_ADDR		lo(C_SYMBOL_NAME(_r0_ram))
-#else
-extern char _r0_ram;
-#define R0_RAM_ADDR		((unsigned long)&_r0_ram);
-#endif
-
-
-#endif /* __V850_FPGA85E2C_H__ */
diff --git a/include/asm-v850/futex.h b/include/asm-v850/futex.h
deleted file mode 100644
index 6a332a9f099c..000000000000
--- a/include/asm-v850/futex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#include <asm-generic/futex.h>
-
-#endif
diff --git a/include/asm-v850/gbus_int.h b/include/asm-v850/gbus_int.h
deleted file mode 100644
index 0c4bce753c7e..000000000000
--- a/include/asm-v850/gbus_int.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * include/asm-v850/gbus_int.h -- Midas labs GBUS interrupt support
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_GBUS_INT_H__
-#define __V850_GBUS_INT_H__
-
-
-/* The GBUS interrupt interface has 32 interrupts shared among 4
-   processor interrupts.  The 32 GBUS interrupts are divided into two
-   sets of 16 each, for allocating among control registers, etc (there
-   are two of each control register, with bits 0-15 controlling an
-   interrupt each).  */
-
-/* The GBUS interrupts themselves.  */
-#define IRQ_GBUS_INT(n)		(GBUS_INT_BASE_IRQ + (n))
-#define IRQ_GBUS_INT_NUM	32
-
-/* Control registers.  */
-#define GBUS_INT_STATUS_ADDR(w)	(GBUS_INT_BASE_ADDR + (w)*0x40)
-#define GBUS_INT_STATUS(w)	(*(volatile u16 *)GBUS_INT_STATUS_ADDR(w))
-#define GBUS_INT_CLEAR_ADDR(w)	(GBUS_INT_BASE_ADDR + 0x10 + (w)*0x40)
-#define GBUS_INT_CLEAR(w)	(*(volatile u16 *)GBUS_INT_CLEAR_ADDR(w))
-#define GBUS_INT_EDGE_ADDR(w)	(GBUS_INT_BASE_ADDR + 0x20 + (w)*0x40)
-#define GBUS_INT_EDGE(w)	(*(volatile u16 *)GBUS_INT_EDGE_ADDR(w))
-#define GBUS_INT_POLARITY_ADDR(w)	(GBUS_INT_BASE_ADDR + 0x30 + (w)*0x40)
-#define GBUS_INT_POLARITY(w)	(*(volatile u16 *)GBUS_INT_POLARITY_ADDR(w))
-/* This allows enabling interrupt bits in word W for interrupt GINTn.  */
-#define GBUS_INT_ENABLE_ADDR(w, n) \
-   (GBUS_INT_BASE_ADDR + 0x100 + (w)*0x10 + (n)*0x20)
-#define GBUS_INT_ENABLE(w, n)	(*(volatile u16 *)GBUS_INT_ENABLE_ADDR(w, n))
-
-/* Mapping between kernel interrupt numbers and hardware control regs/bits.  */
-#define GBUS_INT_BITS_PER_WORD	16
-#define GBUS_INT_NUM_WORDS	(IRQ_GBUS_INT_NUM / GBUS_INT_BITS_PER_WORD)
-#define GBUS_INT_IRQ_WORD(irq)	(((irq) - GBUS_INT_BASE_IRQ) >> 4)
-#define GBUS_INT_IRQ_BIT(irq)	(((irq) - GBUS_INT_BASE_IRQ) & 0xF)
-#define GBUS_INT_IRQ_MASK(irq)	(1 << GBUS_INT_IRQ_BIT(irq))
-
-
-/* Possible priorities for GBUS interrupts.  */
-#define GBUS_INT_PRIORITY_HIGH		2
-#define GBUS_INT_PRIORITY_MEDIUM	4
-#define GBUS_INT_PRIORITY_LOW		6
-
-
-#ifndef __ASSEMBLY__
-
-/* Enable interrupt handling for interrupt IRQ.  */
-extern void gbus_int_enable_irq (unsigned irq);
-/* Disable interrupt handling for interrupt IRQ.  Note that any
-   interrupts received while disabled will be delivered once the
-   interrupt is enabled again, unless they are explicitly cleared using
-   `gbus_int_clear_pending_irq'.  */
-extern void gbus_int_disable_irq (unsigned irq);
-/* Return true if interrupt handling for interrupt IRQ is enabled.  */
-extern int gbus_int_irq_enabled (unsigned irq);
-/* Disable all GBUS irqs.  */
-extern void gbus_int_disable_irqs (void);
-/* Clear any pending interrupts for IRQ.  */
-extern void gbus_int_clear_pending_irq (unsigned irq);
-/* Return true if interrupt IRQ is pending (but disabled).  */
-extern int gbus_int_irq_pending (unsigned irq);
-
-
-struct gbus_int_irq_init {
-	const char *name;	/* name of interrupt type */
-
-	/* Range of kernel irq numbers for this type:
-	   BASE, BASE+INTERVAL, ..., BASE+INTERVAL*NUM  */
-	unsigned base, num, interval;
-
-	unsigned priority;	/* interrupt priority to assign */
-};
-struct hw_interrupt_type;	/* fwd decl */
-
-/* Initialize HW_IRQ_TYPES for GBUS irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-extern void gbus_int_init_irq_types (struct gbus_int_irq_init *inits,
-				     struct hw_interrupt_type *hw_irq_types);
-
-/* Initialize GBUS interrupts.  */
-extern void gbus_int_init_irqs (void);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_GBUS_INT_H__ */
diff --git a/include/asm-v850/hardirq.h b/include/asm-v850/hardirq.h
deleted file mode 100644
index 04e20127c5af..000000000000
--- a/include/asm-v850/hardirq.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef __V850_HARDIRQ_H__
-#define __V850_HARDIRQ_H__
-
-#include <linux/threads.h>
-#include <linux/cache.h>
-
-#include <asm/irq.h>
-
-typedef struct {
-	unsigned int __softirq_pending;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
-
-#define HARDIRQ_BITS	8
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
-void ack_bad_irq(unsigned int irq);
-
-#endif /* __V850_HARDIRQ_H__ */
diff --git a/include/asm-v850/highres_timer.h b/include/asm-v850/highres_timer.h
deleted file mode 100644
index 486fb49ceab6..000000000000
--- a/include/asm-v850/highres_timer.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * include/asm-v850/highres_timer.h -- High resolution timing routines
- *
- *  Copyright (C) 2001,03  NEC Electronics Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_HIGHRES_TIMER_H__
-#define __V850_HIGHRES_TIMER_H__
-
-#ifndef __ASSEMBLY__
-#include <linux/time.h>
-#endif
-
-#include <asm/entry.h>
-
-
-/* Frequency of the `slow ticks' (one tick each time the fast-tick
-   counter overflows).  */
-#define HIGHRES_TIMER_SLOW_TICK_RATE	25
-
-/* Which timer in the V850E `Timer D' we use.  */
-#define HIGHRES_TIMER_TIMER_D_UNIT	3
-
-
-#ifndef __ASSEMBLY__
-
-extern void highres_timer_start (void), highres_timer_stop (void);
-extern void highres_timer_reset (void);
-extern void highres_timer_read_ticks (u32 *slow_ticks, u32 *fast_ticks);
-extern void highres_timer_ticks_to_timeval (u32 slow_ticks, u32 fast_ticks,
-					    struct timeval *tv);
-extern void highres_timer_read (struct timeval *tv);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_HIGHRES_TIMER_H__ */
diff --git a/include/asm-v850/hw_irq.h b/include/asm-v850/hw_irq.h
deleted file mode 100644
index 043e94bb6bd8..000000000000
--- a/include/asm-v850/hw_irq.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __V850_HW_IRQ_H__
-#define __V850_HW_IRQ_H__
-
-#endif /* __V850_HW_IRQ_H__ */
diff --git a/include/asm-v850/io.h b/include/asm-v850/io.h
deleted file mode 100644
index cdad251fba9f..000000000000
--- a/include/asm-v850/io.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * include/asm-v850/io.h -- Misc I/O operations
- *
- *  Copyright (C) 2001,02,03,04,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,04,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_IO_H__
-#define __V850_IO_H__
-
-#define IO_SPACE_LIMIT 0xFFFFFFFF
-
-#define readb(addr) \
-  ({ unsigned char __v = (*(volatile unsigned char *) (addr)); __v; })
-#define readw(addr) \
-  ({ unsigned short __v = (*(volatile unsigned short *) (addr)); __v; })
-#define readl(addr) \
-  ({ unsigned long __v = (*(volatile unsigned long *) (addr)); __v; })
-
-#define readb_relaxed(a) readb(a)
-#define readw_relaxed(a) readw(a)
-#define readl_relaxed(a) readl(a)
-
-#define writeb(val, addr) \
-  (void)((*(volatile unsigned char *) (addr)) = (val))
-#define writew(val, addr) \
-  (void)((*(volatile unsigned short *) (addr)) = (val))
-#define writel(val, addr) \
-  (void)((*(volatile unsigned int *) (addr)) = (val))
-
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-
-#define inb(addr)	readb (addr)
-#define inw(addr)	readw (addr)
-#define inl(addr)	readl (addr)
-#define outb(x, addr)	((void) writeb (x, addr))
-#define outw(x, addr)	((void) writew (x, addr))
-#define outl(x, addr)	((void) writel (x, addr))
-
-#define inb_p(port)		inb((port))
-#define outb_p(val, port)	outb((val), (port))
-#define inw_p(port)		inw((port))
-#define outw_p(val, port)	outw((val), (port))
-#define inl_p(port)		inl((port))
-#define outl_p(val, port)	outl((val), (port))
-
-static inline void insb (unsigned long port, void *dst, unsigned long count)
-{
-	unsigned char *p = dst;
-	while (count--)
-		*p++ = inb (port);
-}
-static inline void insw (unsigned long port, void *dst, unsigned long count)
-{
-	unsigned short *p = dst;
-	while (count--)
-		*p++ = inw (port);
-}
-static inline void insl (unsigned long port, void *dst, unsigned long count)
-{
-	unsigned long *p = dst;
-	while (count--)
-		*p++ = inl (port);
-}
-
-static inline void
-outsb (unsigned long port, const void *src, unsigned long count)
-{
-	const unsigned char *p = src;
-	while (count--)
-		outb (*p++, port);
-}
-static inline void
-outsw (unsigned long port, const void *src, unsigned long count)
-{
-	const unsigned short *p = src;
-	while (count--)
-		outw (*p++, port);
-}
-static inline void
-outsl (unsigned long port, const void *src, unsigned long count)
-{
-	const unsigned long *p = src;
-	while (count--)
-		outl (*p++, port);
-}
-
-
-/* Some places try to pass in an loff_t for PHYSADDR (?!), so we cast it to
-   long before casting it to a pointer to avoid compiler warnings.  */
-#define ioremap(physaddr, size)	((void __iomem *)(unsigned long)(physaddr))
-#define iounmap(addr)		((void)0)
-
-#define ioremap_nocache(physaddr, size)		ioremap (physaddr, size)
-#define ioremap_writethrough(physaddr, size)	ioremap (physaddr, size)
-#define ioremap_fullcache(physaddr, size)	ioremap (physaddr, size)
-
-#define ioread8(addr)		readb (addr)
-#define ioread16(addr)		readw (addr)
-#define ioread32(addr)		readl (addr)
-#define iowrite8(val, addr)	writeb (val, addr)
-#define iowrite16(val, addr)	writew (val, addr)
-#define iowrite32(val, addr)	writel (val, addr)
-
-#define mmiowb()
-
-#define page_to_phys(page)      ((page - mem_map) << PAGE_SHIFT)
-#if 0
-/* This is really stupid; don't define it.  */
-#define page_to_bus(page)       page_to_phys (page)
-#endif
-
-/* Conversion between virtual and physical mappings.  */
-#define phys_to_virt(addr)	((void *)__phys_to_virt (addr))
-#define virt_to_phys(addr)	((unsigned long)__virt_to_phys (addr))
-
-#define memcpy_fromio(dst, src, len) memcpy (dst, (void *)src, len)
-#define memcpy_toio(dst, src, len) memcpy ((void *)dst, src, len)
-
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)	__va(p)
-
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p)	p
-
-#endif /* __V850_IO_H__ */
diff --git a/include/asm-v850/ioctl.h b/include/asm-v850/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/include/asm-v850/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/include/asm-v850/ioctls.h b/include/asm-v850/ioctls.h
deleted file mode 100644
index 5313abd5f388..000000000000
--- a/include/asm-v850/ioctls.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef __V850_IOCTLS_H__
-#define __V850_IOCTLS_H__
-
-#include <asm/ioctl.h>
-
-/* 0x54 is just a magic number to make these relatively unique ('T') */
-
-#define TCGETS		0x5401
-#define TCSETS		0x5402
-#define TCSETSW		0x5403
-#define TCSETSF		0x5404
-#define TCGETA		0x5405
-#define TCSETA		0x5406
-#define TCSETAW		0x5407
-#define TCSETAF		0x5408
-#define TCSBRK		0x5409
-#define TCXONC		0x540A
-#define TCFLSH		0x540B
-#define TIOCEXCL	0x540C
-#define TIOCNXCL	0x540D
-#define TIOCSCTTY	0x540E
-#define TIOCGPGRP	0x540F
-#define TIOCSPGRP	0x5410
-#define TIOCOUTQ	0x5411
-#define TIOCSTI		0x5412
-#define TIOCGWINSZ	0x5413
-#define TIOCSWINSZ	0x5414
-#define TIOCMGET	0x5415
-#define TIOCMBIS	0x5416
-#define TIOCMBIC	0x5417
-#define TIOCMSET	0x5418
-#define TIOCGSOFTCAR	0x5419
-#define TIOCSSOFTCAR	0x541A
-#define FIONREAD	0x541B
-#define TIOCINQ		FIONREAD
-#define TIOCLINUX	0x541C
-#define TIOCCONS	0x541D
-#define TIOCGSERIAL	0x541E
-#define TIOCSSERIAL	0x541F
-#define TIOCPKT		0x5420
-#define FIONBIO		0x5421
-#define TIOCNOTTY	0x5422
-#define TIOCSETD	0x5423
-#define TIOCGETD	0x5424
-#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
-#define TIOCSBRK	0x5427  /* BSD compatibility */
-#define TIOCCBRK	0x5428  /* BSD compatibility */
-#define TIOCGSID	0x5429  /* Return the session ID of FD */
-#define TCGETS2		_IOR('T',0x2A, struct termios2)
-#define TCSETS2		_IOW('T',0x2B, struct termios2)
-#define TCSETSW2	_IOW('T',0x2C, struct termios2)
-#define TCSETSF2	_IOW('T',0x2D, struct termios2)
-#define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
-#define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
-
-#define FIONCLEX	0x5450  /* these numbers need to be adjusted. */
-#define FIOCLEX		0x5451
-#define FIOASYNC	0x5452
-#define TIOCSERCONFIG	0x5453
-#define TIOCSERGWILD	0x5454
-#define TIOCSERSWILD	0x5455
-#define TIOCGLCKTRMIOS	0x5456
-#define TIOCSLCKTRMIOS	0x5457
-#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
-#define TIOCSERGETLSR   0x5459 /* Get line status register */
-#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
-#define TIOCSERSETMULTI 0x545B /* Set multiport config */
-
-#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
-#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
-#define FIOQSIZE	0x545E
-
-/* Used for packet mode */
-#define TIOCPKT_DATA		 0
-#define TIOCPKT_FLUSHREAD	 1
-#define TIOCPKT_FLUSHWRITE	 2
-#define TIOCPKT_STOP		 4
-#define TIOCPKT_START		 8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-
-#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
-
-#endif /* __V850_IOCTLS_H__ */
diff --git a/include/asm-v850/ipcbuf.h b/include/asm-v850/ipcbuf.h
deleted file mode 100644
index d8cbe9886d95..000000000000
--- a/include/asm-v850/ipcbuf.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef __V850E_IPCBUF_H__
-#define __V850E_IPCBUF_H__
-
-/*
- * The user_ipc_perm structure for v850e architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 32-bit mode_t and seq
- * - 2 miscellaneous 32-bit values
- */
-
-struct ipc64_perm
-{
-	__kernel_key_t		key;
-	__kernel_uid32_t	uid;
-	__kernel_gid32_t	gid;
-	__kernel_uid32_t	cuid;
-	__kernel_gid32_t	cgid;
-	__kernel_mode_t		mode;
-	unsigned short		__pad1;
-	unsigned short		seq;
-	unsigned short		__pad2;
-	unsigned long		__unused1;
-	unsigned long		__unused2;
-};
-
-#endif /* __V850E_IPCBUF_H__ */
diff --git a/include/asm-v850/irq.h b/include/asm-v850/irq.h
deleted file mode 100644
index 7d0d4cd1ce54..000000000000
--- a/include/asm-v850/irq.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * include/asm-v850/irq.h -- Machine interrupt handling
- *
- *  Copyright (C) 2001,02,04  NEC Electronics Corporation
- *  Copyright (C) 2001,02,04  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_IRQ_H__
-#define __V850_IRQ_H__
-
-#include <asm/machdep.h>
-
-/* Default NUM_MACH_IRQS.  */
-#ifndef NUM_MACH_IRQS
-#define NUM_MACH_IRQS	NUM_CPU_IRQS
-#endif
-
-/* NMIs have IRQ numbers from FIRST_NMI to FIRST_NMI+NUM_NMIS-1.  */
-#define FIRST_NMI	NUM_MACH_IRQS
-#define IRQ_NMI(n)	(FIRST_NMI + (n))
-/* v850 processors have 3 non-maskable interrupts.  */
-#define NUM_NMIS	3
-
-/* Includes both maskable and non-maskable irqs.  */
-#define NR_IRQS		(NUM_MACH_IRQS + NUM_NMIS)
-
-
-#ifndef __ASSEMBLY__
-
-struct pt_regs;
-struct hw_interrupt_type;
-struct irqaction;
-
-#define irq_canonicalize(irq)	(irq)
-
-/* Initialize irq handling for IRQs.
-   BASE_IRQ, BASE_IRQ+INTERVAL, ..., BASE_IRQ+NUM*INTERVAL
-   to IRQ_TYPE.  An IRQ_TYPE of 0 means to use a generic interrupt type.  */
-extern void
-init_irq_handlers (int base_irq, int num, int interval,
-		   struct hw_interrupt_type *irq_type);
-
-/* Handle interrupt IRQ.  REGS are the registers at the time of ther
-   interrupt.  */
-extern unsigned int handle_irq (int irq, struct pt_regs *regs);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __V850_IRQ_H__ */
diff --git a/include/asm-v850/irq_regs.h b/include/asm-v850/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/include/asm-v850/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/include/asm-v850/kdebug.h b/include/asm-v850/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/include/asm-v850/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/include/asm-v850/kmap_types.h b/include/asm-v850/kmap_types.h
deleted file mode 100644
index 3288976b161f..000000000000
--- a/include/asm-v850/kmap_types.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __V850_KMAP_TYPES_H__
-#define __V850_KMAP_TYPES_H__
-
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_TYPE_NR
-};
-
-#endif /* __V850_KMAP_TYPES_H__ */
diff --git a/include/asm-v850/kvm.h b/include/asm-v850/kvm.h
deleted file mode 100644
index 3f729b79febc..000000000000
--- a/include/asm-v850/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_V850_H
-#define __LINUX_KVM_V850_H
-
-/* v850 does not support KVM */
-
-#endif
diff --git a/include/asm-v850/linkage.h b/include/asm-v850/linkage.h
deleted file mode 100644
index b6185d3cfe68..000000000000
--- a/include/asm-v850/linkage.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-#ifdef __ASSEMBLY__
-#include <asm/asm.h>
-#endif
-
-#endif
diff --git a/include/asm-v850/local.h b/include/asm-v850/local.h
deleted file mode 100644
index 705148abe276..000000000000
--- a/include/asm-v850/local.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_LOCAL_H__
-#define __V850_LOCAL_H__
-
-#include <asm-generic/local.h>
-
-#endif /* __V850_LOCAL_H__ */
diff --git a/include/asm-v850/ma.h b/include/asm-v850/ma.h
deleted file mode 100644
index 89e66473a176..000000000000
--- a/include/asm-v850/ma.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * include/asm-v850/ma.h -- V850E/MA series of cpu chips
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MA_H__
-#define __V850_MA_H__
-
-/* The MA series uses the V850E cpu core.  */
-#include <asm/v850e.h>
-
-
-/* For <asm/entry.h> */
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  The amount
-   varies between chip models, but there's always at least 4K, and it
-   should always start at FFFFC000.  */
-#define R0_RAM_ADDR			0xFFFFC000
-
-
-/* MA series UART details.  */
-#define V850E_UART_BASE_FREQ		CPU_CLOCK_FREQ
-
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	ma_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void ma_uart_pre_configure (unsigned chan,
-				   unsigned cflags, unsigned baud);
-#endif
-
-
-/* MA series timer C details.  */
-#define V850E_TIMER_C_BASE_ADDR		0xFFFFF600
-
-
-/* MA series timer D details.  */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF540
-#define V850E_TIMER_D_TMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x2)
-#define V850E_TIMER_D_TMCD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x4)
-
-#define V850E_TIMER_D_BASE_FREQ		CPU_CLOCK_FREQ
-
-
-/* Port 0 */
-/* Direct I/O.  Bits 0-7 are pins P00-P07.  */
-#define MA_PORT0_IO_ADDR		0xFFFFF400
-#define MA_PORT0_IO			(*(volatile u8 *)MA_PORT0_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define MA_PORT0_PM_ADDR		0xFFFFF420
-#define MA_PORT0_PM			(*(volatile u8 *)MA_PORT0_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define MA_PORT0_PMC_ADDR		0xFFFFF440
-#define MA_PORT0_PMC			(*(volatile u8 *)MA_PORT0_PMC_ADDR)
-/* Port function control (for P04-P07, 0 = IRQ, 1 = DMARQ).  */
-#define MA_PORT0_PFC_ADDR		0xFFFFF460
-#define MA_PORT0_PFC			(*(volatile u8 *)MA_PORT0_PFC_ADDR)
-
-/* Port 1 */
-/* Direct I/O.  Bits 0-3 are pins P10-P13.  */
-#define MA_PORT1_IO_ADDR		0xFFFFF402
-#define MA_PORT1_IO			(*(volatile u8 *)MA_PORT1_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define MA_PORT1_PM_ADDR		0xFFFFF420
-#define MA_PORT1_PM			(*(volatile u8 *)MA_PORT1_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define MA_PORT1_PMC_ADDR		0xFFFFF442
-#define MA_PORT1_PMC			(*(volatile u8 *)MA_PORT1_PMC_ADDR)
-
-/* Port 4 */
-/* Direct I/O.  Bits 0-5 are pins P40-P45.  */
-#define MA_PORT4_IO_ADDR		0xFFFFF408
-#define MA_PORT4_IO			(*(volatile u8 *)MA_PORT4_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define MA_PORT4_PM_ADDR		0xFFFFF428
-#define MA_PORT4_PM			(*(volatile u8 *)MA_PORT4_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define MA_PORT4_PMC_ADDR		0xFFFFF448
-#define MA_PORT4_PMC			(*(volatile u8 *)MA_PORT4_PMC_ADDR)
-/* Port function control (for serial interfaces, 0 = CSI, 1 = UART).  */
-#define MA_PORT4_PFC_ADDR		0xFFFFF468
-#define MA_PORT4_PFC			(*(volatile u8 *)MA_PORT4_PFC_ADDR)
-
-
-#ifndef __ASSEMBLY__
-
-/* Initialize MA chip interrupts.  */
-extern void ma_init_irqs (void);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_MA_H__ */
diff --git a/include/asm-v850/ma1.h b/include/asm-v850/ma1.h
deleted file mode 100644
index ede1f1de2b7a..000000000000
--- a/include/asm-v850/ma1.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * include/asm-v850/ma1.h -- V850E/MA1 cpu chip
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MA1_H__
-#define __V850_MA1_H__
-
-/* Inherit more generic details from MA series.  */
-#include <asm/ma.h>
-
-
-#define CPU_MODEL	"v850e/ma1"
-#define CPU_MODEL_LONG	"NEC V850E/MA1"
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).  */
-#define IRQ_INTOV(n)	(n)	/* 0-3 */
-#define IRQ_INTOV_NUM	4
-#define IRQ_INTP(n)	(0x4  + (n)) /* Pnnn (pin) interrupts */
-#define IRQ_INTP_NUM	24
-#define IRQ_INTCMD(n)	(0x1c + (n)) /* interval timer interrupts 0-3 */
-#define IRQ_INTCMD_NUM	4
-#define IRQ_INTDMA(n)	(0x20 + (n)) /* DMA interrupts 0-3 */
-#define IRQ_INTDMA_NUM	4
-#define IRQ_INTCSI(n)	(0x24 + (n)*4)/* CSI 0-2 transmit/receive completion */
-#define IRQ_INTCSI_NUM	3
-#define IRQ_INTSER(n)	(0x25 + (n)*4) /* UART 0-2 reception error */
-#define IRQ_INTSER_NUM	3
-#define IRQ_INTSR(n)	(0x26 + (n)*4) /* UART 0-2 reception completion */
-#define IRQ_INTSR_NUM	3
-#define IRQ_INTST(n)	(0x27 + (n)*4) /* UART 0-2 transmission completion */
-#define IRQ_INTST_NUM	3
-
-#define NUM_CPU_IRQS	0x30
-
-
-/* The MA1 has a UART with 3 channels.  */
-#define V850E_UART_NUM_CHANNELS	3
-
-
-#endif /* __V850_MA1_H__ */
diff --git a/include/asm-v850/machdep.h b/include/asm-v850/machdep.h
deleted file mode 100644
index f1e3b8b91508..000000000000
--- a/include/asm-v850/machdep.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * include/asm-v850/machdep.h -- Machine-dependent definitions
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MACHDEP_H__
-#define __V850_MACHDEP_H__
-
-
-/* chips */
-#ifdef CONFIG_V850E_MA1
-#include <asm/ma1.h>
-#endif
-#ifdef CONFIG_V850E_ME2
-#include <asm/me2.h>
-#endif
-#ifdef CONFIG_V850E_TEG
-#include <asm/teg.h>
-#endif
-
-/* These are both chips _and_ platforms, so put them in the middle... */
-#ifdef CONFIG_V850E2_ANNA
-#include <asm/anna.h>
-#endif
-#ifdef CONFIG_V850E_AS85EP1
-#include <asm/as85ep1.h>
-#endif
-
-/* platforms */
-#ifdef CONFIG_RTE_CB_MA1
-#include <asm/rte_ma1_cb.h>
-#endif
-#ifdef CONFIG_RTE_CB_ME2
-#include <asm/rte_me2_cb.h>
-#endif
-#ifdef CONFIG_RTE_CB_NB85E
-#include <asm/rte_nb85e_cb.h>
-#endif
-#ifdef CONFIG_V850E_SIM
-#include <asm/sim.h>
-#endif
-#ifdef CONFIG_V850E2_SIM85E2C
-#include <asm/sim85e2c.h>
-#endif
-#ifdef CONFIG_V850E2_SIM85E2S
-#include <asm/sim85e2s.h>
-#endif
-#ifdef CONFIG_V850E2_FPGA85E2C
-#include <asm/fpga85e2c.h>
-#endif
-
-#endif /* __V850_MACHDEP_H__ */
diff --git a/include/asm-v850/macrology.h b/include/asm-v850/macrology.h
deleted file mode 100644
index 37abf874832c..000000000000
--- a/include/asm-v850/macrology.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * include/asm-v850/macrology.h -- Various useful CPP macros
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#define macrology_paste(arg1, arg2)	macrology_paste_1(arg1, arg2)
-#define macrology_paste_1(arg1, arg2)	arg1 ## arg2
-#define macrology_stringify(sym)	macrology_stringify_1(sym)
-#define macrology_stringify_1(sym)	#sym
diff --git a/include/asm-v850/me2.h b/include/asm-v850/me2.h
deleted file mode 100644
index ac7c9ce0bdc1..000000000000
--- a/include/asm-v850/me2.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * include/asm-v850/me2.h -- V850E/ME2 cpu chip
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_ME2_H__
-#define __V850_ME2_H__
-
-#include <asm/v850e.h>
-#include <asm/v850e_cache.h>
-
-
-#define CPU_MODEL	"v850e/me2"
-#define CPU_MODEL_LONG	"NEC V850E/ME2"
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).  */
-#define IRQ_INTP(n)       (n) /* Pnnn (pin) interrupts */
-#define IRQ_INTP_NUM      31
-#define IRQ_INTCMD(n)     (0x31 + (n)) /* interval timer interrupts 0-3 */
-#define IRQ_INTCMD_NUM    4
-#define IRQ_INTDMA(n)     (0x41 + (n)) /* DMA interrupts 0-3 */
-#define IRQ_INTDMA_NUM    4
-#define IRQ_INTUBTIRE(n)  (0x49 + (n)*5)/* UARTB 0-1 reception error */
-#define IRQ_INTUBTIRE_NUM 2
-#define IRQ_INTUBTIR(n)   (0x4a + (n)*5) /* UARTB 0-1 reception complete */
-#define IRQ_INTUBTIR_NUM  2
-#define IRQ_INTUBTIT(n)   (0x4b + (n)*5) /* UARTB 0-1 transmission complete */
-#define IRQ_INTUBTIT_NUM  2
-#define IRQ_INTUBTIF(n)   (0x4c + (n)*5) /* UARTB 0-1 FIFO trans. complete */
-#define IRQ_INTUBTIF_NUM  2
-#define IRQ_INTUBTITO(n)  (0x4d + (n)*5) /* UARTB 0-1 reception timeout */
-#define IRQ_INTUBTITO_NUM 2
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS		0x59 /* V850E/ME2 */
-
-
-/* For <asm/entry.h> */
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  */
-#define R0_RAM_ADDR			0xFFFFB000 /* V850E/ME2 */
-
-
-/* V850E/ME2 UARTB details.*/
-#define V850E_UART_NUM_CHANNELS		2
-#define V850E_UARTB_BASE_FREQ		(CPU_CLOCK_FREQ / 4)
-
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	me2_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void me2_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud);
-#endif /* __ASSEMBLY__ */
-
-
-/* V850E/ME2 timer C details.  */
-#define V850E_TIMER_C_BASE_ADDR		0xFFFFF600
-
-
-/* V850E/ME2 timer D details.  */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF540
-#define V850E_TIMER_D_TMD_BASE_ADDR	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x2)
-#define V850E_TIMER_D_TMCD_BASE_ADDR	(V850E_TIMER_D_BASE_ADDR + 0x4)
-
-#define V850E_TIMER_D_BASE_FREQ		(CPU_CLOCK_FREQ / 2)
-
-
-/* Select iRAM mode.  */
-#define ME2_IRAMM_ADDR			0xFFFFF80A
-#define ME2_IRAMM			(*(volatile u8*)ME2_IRAMM_ADDR)
-
-
-/* Interrupt edge-detection configuration.  INTF(n) and INTR(n) are only
-   valid for n == 1, 2, or 5.  */
-#define ME2_INTF_ADDR(n)		(0xFFFFFC00 + (n) * 0x2)
-#define ME2_INTF(n)			(*(volatile u8*)ME2_INTF_ADDR(n))
-#define ME2_INTR_ADDR(n)		(0xFFFFFC20 + (n) * 0x2)
-#define ME2_INTR(n)			(*(volatile u8*)ME2_INTR_ADDR(n))
-#define ME2_INTFAL_ADDR			0xFFFFFC10
-#define ME2_INTFAL			(*(volatile u8*)ME2_INTFAL_ADDR)
-#define ME2_INTRAL_ADDR			0xFFFFFC30
-#define ME2_INTRAL			(*(volatile u8*)ME2_INTRAL_ADDR)
-#define ME2_INTFDH_ADDR			0xFFFFFC16
-#define ME2_INTFDH			(*(volatile u16*)ME2_INTFDH_ADDR)
-#define ME2_INTRDH_ADDR			0xFFFFFC36
-#define ME2_INTRDH			(*(volatile u16*)ME2_INTRDH_ADDR)
-#define ME2_SESC_ADDR(n)		(0xFFFFF609 + (n) * 0x10)
-#define ME2_SESC(n)			(*(volatile u8*)ME2_SESC_ADDR(n))
-#define ME2_SESA10_ADDR			0xFFFFF5AD
-#define ME2_SESA10			(*(volatile u8*)ME2_SESA10_ADDR)
-#define ME2_SESA11_ADDR			0xFFFFF5DD
-#define ME2_SESA11			(*(volatile u8*)ME2_SESA11_ADDR)
-
-
-/* Port 1 */
-/* Direct I/O.  Bits 0-3 are pins P10-P13.  */
-#define ME2_PORT1_IO_ADDR		0xFFFFF402
-#define ME2_PORT1_IO			(*(volatile u8 *)ME2_PORT1_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT1_PM_ADDR		0xFFFFF422
-#define ME2_PORT1_PM			(*(volatile u8 *)ME2_PORT1_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT1_PMC_ADDR		0xFFFFF442
-#define ME2_PORT1_PMC			(*(volatile u8 *)ME2_PORT1_PMC_ADDR)
-/* Port function control (for serial interfaces, 0 = CSI30, 1 = UARTB0 ).  */
-#define ME2_PORT1_PFC_ADDR		0xFFFFF462
-#define ME2_PORT1_PFC			(*(volatile u8 *)ME2_PORT1_PFC_ADDR)
-
-/* Port 2 */
-/* Direct I/O.  Bits 0-3 are pins P20-P25.  */
-#define ME2_PORT2_IO_ADDR		0xFFFFF404
-#define ME2_PORT2_IO			(*(volatile u8 *)ME2_PORT2_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT2_PM_ADDR		0xFFFFF424
-#define ME2_PORT2_PM			(*(volatile u8 *)ME2_PORT2_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT2_PMC_ADDR		0xFFFFF444
-#define ME2_PORT2_PMC			(*(volatile u8 *)ME2_PORT2_PMC_ADDR)
-/* Port function control (for serial interfaces, 0 = INTP2x, 1 = UARTB1 ).  */
-#define ME2_PORT2_PFC_ADDR		0xFFFFF464
-#define ME2_PORT2_PFC			(*(volatile u8 *)ME2_PORT2_PFC_ADDR)
-
-/* Port 5 */
-/* Direct I/O.  Bits 0-5 are pins P50-P55.  */
-#define ME2_PORT5_IO_ADDR		0xFFFFF40A
-#define ME2_PORT5_IO			(*(volatile u8 *)ME2_PORT5_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT5_PM_ADDR		0xFFFFF42A
-#define ME2_PORT5_PM			(*(volatile u8 *)ME2_PORT5_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT5_PMC_ADDR		0xFFFFF44A
-#define ME2_PORT5_PMC			(*(volatile u8 *)ME2_PORT5_PMC_ADDR)
-/* Port function control ().  */
-#define ME2_PORT5_PFC_ADDR		0xFFFFF46A
-#define ME2_PORT5_PFC			(*(volatile u8 *)ME2_PORT5_PFC_ADDR)
-
-/* Port 6 */
-/* Direct I/O.  Bits 5-7 are pins P65-P67.  */
-#define ME2_PORT6_IO_ADDR		0xFFFFF40C
-#define ME2_PORT6_IO			(*(volatile u8 *)ME2_PORT6_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT6_PM_ADDR		0xFFFFF42C
-#define ME2_PORT6_PM			(*(volatile u8 *)ME2_PORT6_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT6_PMC_ADDR		0xFFFFF44C
-#define ME2_PORT6_PMC			(*(volatile u8 *)ME2_PORT6_PMC_ADDR)
-/* Port function control ().  */
-#define ME2_PORT6_PFC_ADDR		0xFFFFF46C
-#define ME2_PORT6_PFC			(*(volatile u8 *)ME2_PORT6_PFC_ADDR)
-
-/* Port 7 */
-/* Direct I/O.  Bits 2-7 are pins P72-P77.  */
-#define ME2_PORT7_IO_ADDR		0xFFFFF40E
-#define ME2_PORT7_IO			(*(volatile u8 *)ME2_PORT7_IO_ADDR)
-/* Port mode (for direct I/O, 0 = output, 1 = input).  */
-#define ME2_PORT7_PM_ADDR		0xFFFFF42E
-#define ME2_PORT7_PM			(*(volatile u8 *)ME2_PORT7_PM_ADDR)
-/* Port mode control (0 = direct I/O mode, 1 = alternative I/O mode).  */
-#define ME2_PORT7_PMC_ADDR		0xFFFFF44E
-#define ME2_PORT7_PMC			(*(volatile u8 *)ME2_PORT7_PMC_ADDR)
-/* Port function control ().  */
-#define ME2_PORT7_PFC_ADDR		0xFFFFF46E
-#define ME2_PORT7_PFC			(*(volatile u8 *)ME2_PORT7_PFC_ADDR)
-
-
-#ifndef __ASSEMBLY__
-/* Initialize V850E/ME2 chip interrupts.  */
-extern void me2_init_irqs (void);
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_ME2_H__ */
diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h
deleted file mode 100644
index edbf6edbfb37..000000000000
--- a/include/asm-v850/mman.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __V850_MMAN_H__
-#define __V850_MMAN_H__
-
-#include <asm-generic/mman.h>
-
-#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
-#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
-#define MAP_LOCKED	0x2000		/* pages are locked */
-#define MAP_NORESERVE	0x4000		/* don't check for reservations */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
-
-#endif /* __V850_MMAN_H__ */
diff --git a/include/asm-v850/mmu.h b/include/asm-v850/mmu.h
deleted file mode 100644
index 267768c66ef6..000000000000
--- a/include/asm-v850/mmu.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* Copyright (C) 2002, 2005, David McCullough <davidm@snapgear.com> */
-
-#ifndef __V850_MMU_H__
-#define __V850_MMU_H__
-
-typedef struct {
-	struct vm_list_struct	*vmlist;
-	unsigned long		end_brk;
-} mm_context_t;
-
-#endif /* __V850_MMU_H__ */
diff --git a/include/asm-v850/mmu_context.h b/include/asm-v850/mmu_context.h
deleted file mode 100644
index 01daacd5474e..000000000000
--- a/include/asm-v850/mmu_context.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __V850_MMU_CONTEXT_H__
-#define __V850_MMU_CONTEXT_H__
-
-#include <asm-generic/mm_hooks.h>
-
-#define destroy_context(mm)		((void)0)
-#define init_new_context(tsk,mm)	0
-#define switch_mm(prev,next,tsk)	((void)0)
-#define deactivate_mm(tsk,mm)		do { } while (0)
-#define activate_mm(prev,next)		((void)0)
-#define enter_lazy_tlb(mm,tsk)		((void)0)
-
-#endif /* __V850_MMU_CONTEXT_H__ */
diff --git a/include/asm-v850/module.h b/include/asm-v850/module.h
deleted file mode 100644
index 2c2f4944f09f..000000000000
--- a/include/asm-v850/module.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * include/asm-v850/module.h -- Architecture-specific module hooks
- *
- *  Copyright (C) 2001,02,03,04  NEC Corporation
- *  Copyright (C) 2001,02,03,04  Miles Bader <miles@gnu.org>
- *  Copyright (C) 2001,03  Rusty Russell
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- *
- * Derived in part from include/asm-ppc/module.h
- */
-
-#ifndef __V850_MODULE_H__
-#define __V850_MODULE_H__
-
-#define MODULE_SYMBOL_PREFIX "_"
-
-struct v850_plt_entry
-{
-	/* Indirect jump instruction sequence (6-byte mov + 2-byte jr).  */
-	unsigned long tramp[2];
-};
-
-struct mod_arch_specific
-{
-	/* Indices of PLT sections within module. */
-	unsigned int core_plt_section, init_plt_section;
-};
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-/* Make empty sections for module_frob_arch_sections to expand. */
-#ifdef MODULE
-asm(".section .plt,\"ax\",@nobits; .align 3; .previous");
-asm(".section .init.plt,\"ax\",@nobits; .align 3; .previous");
-#endif
-
-/* We don't do exception tables.  */
-struct exception_table_entry;
-static inline const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
-	       unsigned long value)
-{
-	return 0;
-}
-#define ARCH_HAS_SEARCH_EXTABLE
-static inline void
-sort_extable(struct exception_table_entry *start,
-	     struct exception_table_entry *finish)
-{
-	/* nada */
-}
-#define ARCH_HAS_SORT_EXTABLE
-
-#endif /* __V850_MODULE_H__ */
diff --git a/include/asm-v850/msgbuf.h b/include/asm-v850/msgbuf.h
deleted file mode 100644
index ed07dbd01637..000000000000
--- a/include/asm-v850/msgbuf.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef __V850_MSGBUF_H__
-#define __V850_MSGBUF_H__
-
-/* 
- * The msqid64_ds structure for v850 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct msqid64_ds {
-	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	unsigned long	__unused1;
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	unsigned long	__unused2;
-	__kernel_time_t msg_ctime;	/* last change time */
-	unsigned long	__unused3;
-	unsigned long  msg_cbytes;	/* current number of bytes on queue */
-	unsigned long  msg_qnum;	/* number of messages in queue */
-	unsigned long  msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	unsigned long  __unused4;
-	unsigned long  __unused5;
-};
-
-#endif /* __V850_MSGBUF_H__ */
diff --git a/include/asm-v850/mutex.h b/include/asm-v850/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/include/asm-v850/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/include/asm-v850/page.h b/include/asm-v850/page.h
deleted file mode 100644
index f9de35d873fa..000000000000
--- a/include/asm-v850/page.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * include/asm-v850/page.h -- VM ops
- *
- *  Copyright (C) 2001,02,03,05  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PAGE_H__
-#define __V850_PAGE_H__
-
-#include <asm/machdep.h>
-
-
-#define PAGE_SHIFT	12
-#define PAGE_SIZE       (1UL << PAGE_SHIFT)
-#define PAGE_MASK       (~(PAGE_SIZE-1))
-
-
-/*
- * PAGE_OFFSET -- the first address of the first page of memory. For archs with
- * no MMU this corresponds to the first free page in physical memory (aligned
- * on a page boundary).
- */
-#ifndef PAGE_OFFSET
-#define PAGE_OFFSET  0x0000000
-#endif
-
-
-#ifndef __ASSEMBLY__
-
-#define STRICT_MM_TYPECHECKS
-
-#define clear_page(page)	memset ((void *)(page), 0, PAGE_SIZE)
-#define copy_page(to, from)	memcpy ((void *)(to), (void *)from, PAGE_SIZE)
-
-#define clear_user_page(addr, vaddr, page)	\
-	do { 	clear_page(addr);		\
-		flush_dcache_page(page);	\
-	} while (0)
-#define copy_user_page(to, from, vaddr, page)	\
-	do {	copy_page(to, from);		\
-		flush_dcache_page(page);	\
-	} while (0)
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking..
- */
-
-typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned long pmd; } pmd_t;
-typedef struct { unsigned long pgd; } pgd_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
-typedef struct page *pgtable_t;
-
-#define pte_val(x)      ((x).pte)
-#define pmd_val(x)      ((x).pmd)
-#define pgd_val(x)      ((x).pgd)
-#define pgprot_val(x)   ((x).pgprot)
-
-#define __pte(x)        ((pte_t) { (x) } )
-#define __pmd(x)        ((pmd_t) { (x) } )
-#define __pgd(x)        ((pgd_t) { (x) } )
-#define __pgprot(x)     ((pgprot_t) { (x) } )
-
-#else /* !STRICT_MM_TYPECHECKS */
-/*
- * .. while these make it easier on the compiler
- */
-
-typedef unsigned long pte_t;
-typedef unsigned long pmd_t;
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
-
-#define pte_val(x)      (x)
-#define pmd_val(x)      (x)
-#define pgd_val(x)      (x)
-#define pgprot_val(x)   (x)
-
-#define __pte(x)        (x)
-#define __pmd(x)        (x)
-#define __pgd(x)        (x)
-#define __pgprot(x)     (x)
-
-#endif /* STRICT_MM_TYPECHECKS */
-
-#endif /* !__ASSEMBLY__ */
-
-
-/* No current v850 processor has virtual memory.  */
-#define __virt_to_phys(addr)	(addr)
-#define __phys_to_virt(addr)	(addr)
-
-#define virt_to_pfn(kaddr)	(__virt_to_phys (kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn)	__phys_to_virt ((pfn) << PAGE_SHIFT)
-
-#define MAP_NR(kaddr) \
-  (((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT)
-#define virt_to_page(kaddr)	(mem_map + MAP_NR (kaddr))
-#define page_to_virt(page) \
-  ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
-
-#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
-#define pfn_valid(pfn)	        ((pfn) < max_mapnr)
-
-#define	virt_addr_valid(kaddr)						\
-  (((void *)(kaddr) >= (void *)PAGE_OFFSET) && MAP_NR (kaddr) < max_mapnr)
-
-
-#define __pa(x)		     __virt_to_phys ((unsigned long)(x))
-#define __va(x)		     ((void *)__phys_to_virt ((unsigned long)(x)))
-
-
-#include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
-
-#endif /* __V850_PAGE_H__ */
diff --git a/include/asm-v850/param.h b/include/asm-v850/param.h
deleted file mode 100644
index 4391f5fe0204..000000000000
--- a/include/asm-v850/param.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * include/asm-v850/param.h -- Varions kernel parameters
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PARAM_H__
-#define __V850_PARAM_H__
-
-#define EXEC_PAGESIZE	4096
-
-#ifndef NOGROUP
-#define NOGROUP		(-1)
-#endif
-
-#define MAXHOSTNAMELEN	64	/* max length of hostname */
-
-#ifdef __KERNEL__
-# define HZ		CONFIG_HZ
-# define USER_HZ	100
-# define CLOCKS_PER_SEC	USER_HZ
-#else
-# define HZ		100
-#endif
-
-#endif /* __V850_PARAM_H__ */
diff --git a/include/asm-v850/pci.h b/include/asm-v850/pci.h
deleted file mode 100644
index de2a7d0a81cc..000000000000
--- a/include/asm-v850/pci.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * include/asm-v850/pci.h -- PCI support
- *
- *  Copyright (C) 2001,02,05  NEC Corporation
- *  Copyright (C) 2001,02,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PCI_H__
-#define __V850_PCI_H__
-
-/* Get any platform-dependent definitions.  */
-#include <asm/machdep.h>
-
-#define pcibios_scan_all_fns(a, b)	0
-
-/* Generic declarations.  */
-
-struct scatterlist;
-
-extern void pcibios_set_master (struct pci_dev *dev);
-
-/* `Grant' to PDEV the memory block at CPU_ADDR, for doing DMA.  The
-   32-bit PCI bus mastering address to use is returned.  the device owns
-   this memory until either pci_unmap_single or pci_dma_sync_single_for_cpu is
-   performed.  */
-extern dma_addr_t
-pci_map_single (struct pci_dev *pdev, void *cpu_addr, size_t size, int dir);
-
-/* Return to the CPU the PCI DMA memory block previously `granted' to
-   PDEV, at DMA_ADDR.  */
-extern void
-pci_unmap_single (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
-		  int dir);
-
-/* Make physical memory consistent for a single streaming mode DMA
-   translation after a transfer.
-
-   If you perform a pci_map_single() but wish to interrogate the
-   buffer using the cpu, yet do not wish to teardown the PCI dma
-   mapping, you must call this function before doing so.  At the next
-   point you give the PCI dma address back to the card, you must first
-   perform a pci_dma_sync_for_device, and then the device again owns
-   the buffer.  */
-extern void
-pci_dma_sync_single_for_cpu (struct pci_dev *dev, dma_addr_t dma_addr,
-			     size_t size, int dir);
-
-extern void
-pci_dma_sync_single_for_device (struct pci_dev *dev, dma_addr_t dma_addr,
-				size_t size, int dir);
-
-
-/* Do multiple DMA mappings at once.  */
-extern int
-pci_map_sg (struct pci_dev *pdev, struct scatterlist *sg, int sg_len, int dir);
-
-/* Unmap multiple DMA mappings at once.  */
-extern void
-pci_unmap_sg (struct pci_dev *pdev, struct scatterlist *sg, int sg_len,
-	      int dir);
-
-/* SG-list versions of pci_dma_sync functions.  */
-extern void
-pci_dma_sync_sg_for_cpu (struct pci_dev *dev,
-			 struct scatterlist *sg, int sg_len,
-			 int dir);
-extern void
-pci_dma_sync_sg_for_device (struct pci_dev *dev,
-			    struct scatterlist *sg, int sg_len,
-			    int dir);
-
-#define pci_map_page(dev, page, offs, size, dir) \
-  pci_map_single(dev, (page_address(page) + (offs)), size, dir)
-#define pci_unmap_page(dev,addr,sz,dir) \
-  pci_unmap_single(dev, addr, sz, dir)
-
-/* Test for pci_map_single or pci_map_page having generated an error.  */
-static inline int
-pci_dma_mapping_error (dma_addr_t dma_addr)
-{
-	return dma_addr == 0;
-}
-
-/* Allocate and map kernel buffer using consistent mode DMA for PCI
-   device.  Returns non-NULL cpu-view pointer to the buffer if
-   successful and sets *DMA_ADDR to the pci side dma address as well,
-   else DMA_ADDR is undefined.  */
-extern void *
-pci_alloc_consistent (struct pci_dev *pdev, size_t size, dma_addr_t *dma_addr);
-
-/* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
-   be values that were returned from pci_alloc_consistent.  SIZE must be
-   the same as what as passed into pci_alloc_consistent.  References to
-   the memory and mappings assosciated with CPU_ADDR or DMA_ADDR past
-   this call are illegal.  */
-extern void
-pci_free_consistent (struct pci_dev *pdev, size_t size, void *cpu_addr,
-		     dma_addr_t dma_addr);
-
-#ifdef CONFIG_PCI
-static inline void pci_dma_burst_advice(struct pci_dev *pdev,
-					enum pci_dma_burst_strategy *strat,
-					unsigned long *strategy_parameter)
-{
-	*strat = PCI_DMA_BURST_INFINITY;
-	*strategy_parameter = ~0UL;
-}
-#endif
-
-extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
-extern void pci_iounmap (struct pci_dev *dev, void __iomem *addr);
-
-#endif /* __V850_PCI_H__ */
diff --git a/include/asm-v850/percpu.h b/include/asm-v850/percpu.h
deleted file mode 100644
index 755ac6522b63..000000000000
--- a/include/asm-v850/percpu.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __V850_PERCPU_H__
-#define __V850_PERCPU_H__
-
-#include <asm-generic/percpu.h>
-
-/* This is a stupid hack to satisfy some grotty implicit include-file
-   dependency; basically, <linux/smp.h> uses BUG_ON, which calls BUG, but
-   doesn't include the necessary headers to define it.  In the twisted
-   festering mess of includes this must all be resolved somehow on other
-   platforms, but I haven't the faintest idea how, and don't care; here will
-   do, even though doesn't actually make any sense.  */
-#include <asm/page.h>
-
-#endif /* __V850_PERCPU_H__ */
diff --git a/include/asm-v850/pgalloc.h b/include/asm-v850/pgalloc.h
deleted file mode 100644
index b91eb2d02bfd..000000000000
--- a/include/asm-v850/pgalloc.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * include/asm-v850/pgalloc.h
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PGALLOC_H__
-#define __V850_PGALLOC_H__
-
-#include <linux/mm.h>  /* some crap code expects this */
-
-/* ... and then, there was one.  */
-#define check_pgt_cache()	((void)0)
-
-#endif /* __V850_PGALLOC_H__ */
diff --git a/include/asm-v850/pgtable.h b/include/asm-v850/pgtable.h
deleted file mode 100644
index 1ea2a900f0f8..000000000000
--- a/include/asm-v850/pgtable.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef __V850_PGTABLE_H__
-#define __V850_PGTABLE_H__
-
-#include <asm-generic/4level-fixup.h>
-
-#include <asm/page.h>
-
-
-#define pgd_present(pgd)	(1) /* pages are always present on NO_MM */
-#define pgd_none(pgd)		(0)
-#define pgd_bad(pgd)		(0)
-#define pgd_clear(pgdp)		((void)0)
-
-#define	pmd_offset(a, b)	((void *)0)
-
-#define kern_addr_valid(addr)	(1)
-
-
-#define __swp_type(x)		(0)
-#define __swp_offset(x)		(0)
-#define __swp_entry(typ,off)	((swp_entry_t) { ((typ) | ((off) << 7)) })
-#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
-
-static inline int pte_file (pte_t pte) { return 0; }
-
-
-/* These mean nothing to !CONFIG_MMU.  */
-#define PAGE_NONE		__pgprot(0)
-#define PAGE_SHARED		__pgprot(0)
-#define PAGE_COPY		__pgprot(0)
-#define PAGE_READONLY		__pgprot(0)
-#define PAGE_KERNEL		__pgprot(0)
-
-
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc.  When CONFIG_MMU is not defined, this
- * should never actually be used, so just define it to something that's
- * will hopefully cause a bus error if it is.
- */
-#define ZERO_PAGE(vaddr)	((void *)0x87654321)
-
-
-/* Some bogus code in procfs uses these; whatever.  */
-#define VMALLOC_START	0
-#define VMALLOC_END	(~0)
-
-
-extern void paging_init (void);
-#define swapper_pg_dir ((pgd_t *) 0)
-
-#define pgtable_cache_init()   ((void)0)
-
-
-extern unsigned int kobjsize(const void *objp);
-
-
-#endif /* __V850_PGTABLE_H__ */
diff --git a/include/asm-v850/poll.h b/include/asm-v850/poll.h
deleted file mode 100644
index 803cad0b9b59..000000000000
--- a/include/asm-v850/poll.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __V850_POLL_H__
-#define __V850_POLL_H__
-
-#define POLLWRNORM	POLLOUT
-#define POLLWRBAND	0x0100
-
-#include <asm-generic/poll.h>
-
-#endif /* __V850_POLL_H__ */
diff --git a/include/asm-v850/posix_types.h b/include/asm-v850/posix_types.h
deleted file mode 100644
index 7f403b765390..000000000000
--- a/include/asm-v850/posix_types.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * include/asm-v850/posix_types.h -- Kernel versions of standard types
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_POSIX_TYPES_H__
-#define __V850_POSIX_TYPES_H__
-
-typedef unsigned long	__kernel_ino_t;
-typedef unsigned long long __kernel_ino64_t;
-typedef unsigned int	__kernel_mode_t;
-typedef unsigned int	__kernel_nlink_t;
-typedef long		__kernel_off_t;
-typedef long long	__kernel_loff_t;
-typedef int		__kernel_pid_t;
-typedef unsigned short	__kernel_ipc_pid_t;
-typedef unsigned int	__kernel_uid_t;
-typedef unsigned int	__kernel_gid_t;
-typedef unsigned int	__kernel_size_t;
-typedef int		__kernel_ssize_t;
-typedef int		__kernel_ptrdiff_t;
-typedef long		__kernel_time_t;
-typedef long		__kernel_suseconds_t;
-typedef long		__kernel_clock_t;
-typedef int		__kernel_timer_t;
-typedef int		__kernel_clockid_t;
-typedef int		__kernel_daddr_t;
-typedef char *		__kernel_caddr_t;
-typedef unsigned short	__kernel_uid16_t;
-typedef unsigned short	__kernel_gid16_t;
-typedef unsigned int	__kernel_uid32_t;
-typedef unsigned int	__kernel_gid32_t;
-
-/* Some bogus code depends on this; we don't care.  */
-typedef __kernel_uid_t __kernel_old_uid_t;
-typedef unsigned int	__kernel_old_dev_t;
-
-typedef struct {
-	int	val[2];
-} __kernel_fsid_t;
-
-
-#if defined(__KERNEL__)
-
-/* We used to include <asm/bitops.h> here, which seems the right thing, but
-   it caused nasty include-file definition order problems.  Removing the
-   include seems to work, so fingers crossed...  */
-
-#undef	__FD_SET
-#define __FD_SET(fd, fd_set) \
-  __set_bit (fd, (void *)&((__kernel_fd_set *)fd_set)->fds_bits)
-#undef __FD_CLR
-#define __FD_CLR(fd, fd_set) \
-  __clear_bit (fd, (void *)&((__kernel_fd_set *)fd_set)->fds_bits)
-#undef	__FD_ISSET
-#define __FD_ISSET(fd, fd_set) \
-  __test_bit (fd, (void *)&((__kernel_fd_set *)fd_set)->fds_bits)
-#undef	__FD_ZERO
-#define __FD_ZERO(fd_set) \
-  memset (fd_set, 0, sizeof (*(fd_set *)fd_set))
-
-#endif /* defined(__KERNEL__) */
-
-#endif /* __V850_POSIX_TYPES_H__ */
diff --git a/include/asm-v850/processor.h b/include/asm-v850/processor.h
deleted file mode 100644
index 979e3467f9af..000000000000
--- a/include/asm-v850/processor.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * include/asm-v850/processor.h
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PROCESSOR_H__
-#define __V850_PROCESSOR_H__
-
-#ifndef __ASSEMBLY__ /* <linux/thread_info.h> is not asm-safe.  */
-#include <linux/thread_info.h>
-#endif
-
-#include <linux/compiler.h>
-#include <asm/ptrace.h>
-#include <asm/entry.h>
-
-/* Some code expects `segment' stuff to be defined here.  */
-#include <asm/segment.h>
-
-
-/*
- * The only places this is used seem to be horrible bletcherous kludges,
- * so we just define it to be as large as possible.
- */
-#define TASK_SIZE	(0xFFFFFFFF)
-
-/*
- * This decides where the kernel will search for a free chunk of vm
- * space during mmap's.  We won't be using it.
- */
-#define TASK_UNMAPPED_BASE	0
-
-
-#ifndef __ASSEMBLY__
-
-
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr()	({ __label__ _l; _l: &&_l;})
-
-/* If you change this, you must change the associated assembly-languages
-   constants defined below, THREAD_*.  */
-struct thread_struct {
-	/* kernel stack pointer (must be first field in structure) */
-	unsigned long  ksp;
-};
-
-#define INIT_THREAD { sizeof init_stack + (unsigned long)init_stack }
-
-
-/* Do necessary setup to start up a newly executed thread.  */
-static inline void start_thread (struct pt_regs *regs,
-				 unsigned long pc, unsigned long usp)
-{
-	regs->pc = pc;
-	regs->gpr[GPR_SP] = usp;
-	regs->kernel_mode = 0;
-}
-
-/* Free all resources held by a thread. */
-static inline void release_thread (struct task_struct *dead_task)
-{
-}
-
-/* Prepare to copy thread state - unlazy all lazy status */
-#define prepare_to_copy(tsk)	do { } while (0)
-
-extern int kernel_thread (int (*fn)(void *), void * arg, unsigned long flags);
-
-/* Free current thread data structures etc.  */
-static inline void exit_thread (void)
-{
-}
-
-
-/* Return the registers saved during context-switch by the currently
-   not-running thread T.  Note that this only includes some registers!
-   See entry.S for details.  */
-#define thread_saved_regs(t) \
-   ((struct pt_regs*)((t)->thread.ksp + STATE_SAVE_PT_OFFSET))
-/* Return saved (kernel) PC of a blocked thread.  Actually, we return the
-   LP register, because the thread is actually blocked in switch_thread,
-   and we're interested in the PC it will _return_ to.  */
-#define thread_saved_pc(t)   (thread_saved_regs(t)->gpr[GPR_LP])
-
-
-unsigned long get_wchan (struct task_struct *p);
-
-
-/* Return some info about the user process TASK.  */
-#define task_tos(task)	((unsigned long)task_stack_page(task) + THREAD_SIZE)
-#define task_pt_regs(task) ((struct pt_regs *)task_tos (task) - 1)
-#define task_sp(task)	(task_pt_regs (task)->gpr[GPR_SP])
-#define task_pc(task)	(task_pt_regs (task)->pc)
-/* Grotty old names for some.  */
-#define KSTK_EIP(task)	task_pc (task)
-#define KSTK_ESP(task)	task_sp (task)
-
-
-#define cpu_relax()    barrier()
-
-
-#else /* __ASSEMBLY__ */
-
-#define THREAD_KSP	0
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_PROCESSOR_H__ */
diff --git a/include/asm-v850/ptrace.h b/include/asm-v850/ptrace.h
deleted file mode 100644
index 4f35cf2cd641..000000000000
--- a/include/asm-v850/ptrace.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * include/asm-v850/ptrace.h -- Access to CPU registers
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_PTRACE_H__
-#define __V850_PTRACE_H__
-
-
-/* v850 general purpose registers with special meanings.  */
-#define GPR_ZERO	0	/* constant zero */
-#define GPR_ASM		1	/* reserved for assembler */
-#define GPR_SP		3	/* stack pointer */
-#define GPR_GP		4	/* global data pointer */
-#define GPR_TP		5	/* `text pointer' */
-#define GPR_EP		30	/* `element pointer' */
-#define GPR_LP		31	/* link pointer (current return address) */
-
-/* These aren't official names, but they make some code more descriptive.  */
-#define GPR_ARG0	6
-#define GPR_ARG1	7
-#define GPR_ARG2	8
-#define GPR_ARG3	9
-#define GPR_RVAL0	10
-#define GPR_RVAL1	11
-#define GPR_RVAL	GPR_RVAL0
-
-#define NUM_GPRS	32
-
-/* v850 `system' registers.  */
-#define SR_EIPC		0
-#define SR_EIPSW	1
-#define SR_FEPC		2
-#define SR_FEPSW	3
-#define SR_ECR		4
-#define SR_PSW		5
-#define SR_CTPC		16
-#define SR_CTPSW	17
-#define SR_DBPC		18
-#define SR_DBPSW	19
-#define SR_CTBP		20
-#define SR_DIR		21
-#define SR_ASID		23
-
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long v850_reg_t;
-
-/* How processor state is stored on the stack during a syscall/signal.
-   If you change this structure, change the associated assembly-language
-   macros below too (PT_*)!  */
-struct pt_regs
-{
-	/* General purpose registers.  */
-	v850_reg_t gpr[NUM_GPRS];
-
-	v850_reg_t pc;		/* program counter */
-	v850_reg_t psw;		/* program status word */
-
-	/* Registers used by `callt' instruction:  */
-	v850_reg_t ctpc;	/* saved program counter */
-	v850_reg_t ctpsw;	/* saved psw */
-	v850_reg_t ctbp;	/* base pointer for callt table */
-
-	char kernel_mode;	/* 1 if in `kernel mode', 0 if user mode */
-};
-
-
-#define instruction_pointer(regs)	((regs)->pc)
-#define profile_pc(regs) instruction_pointer(regs)
-#define user_mode(regs)			(!(regs)->kernel_mode)
-
-/* When a struct pt_regs is used to save user state for a system call in
-   the kernel, the system call is stored in the space for R0 (since it's
-   never used otherwise, R0 being a constant 0).  Non-system-calls
-   simply store 0 there.  */
-#define PT_REGS_SYSCALL(regs)		(regs)->gpr[0]
-#define PT_REGS_SET_SYSCALL(regs, val)	((regs)->gpr[0] = (val))
-
-#endif /* !__ASSEMBLY__ */
-
-
-/* The number of bytes used to store each register.  */
-#define _PT_REG_SIZE	4
-
-/* Offset of a general purpose register in a struct pt_regs.  */
-#define PT_GPR(num)	((num) * _PT_REG_SIZE)
-
-/* Offsets of various special registers & fields in a struct pt_regs.  */
-#define PT_PC		((NUM_GPRS + 0) * _PT_REG_SIZE)
-#define PT_PSW		((NUM_GPRS + 1) * _PT_REG_SIZE)
-#define PT_CTPC		((NUM_GPRS + 2) * _PT_REG_SIZE)
-#define PT_CTPSW	((NUM_GPRS + 3) * _PT_REG_SIZE)
-#define PT_CTBP		((NUM_GPRS + 4) * _PT_REG_SIZE)
-#define PT_KERNEL_MODE	((NUM_GPRS + 5) * _PT_REG_SIZE)
-
-/* Where the current syscall number is stashed; obviously only valid in
-   the kernel!  */
-#define PT_CUR_SYSCALL	PT_GPR(0)
-
-/* Size of struct pt_regs, including alignment.  */
-#define PT_SIZE		((NUM_GPRS + 6) * _PT_REG_SIZE)
-
-
-/* These are `magic' values for PTRACE_PEEKUSR that return info about where
-   a process is located in memory.  */
-#define PT_TEXT_ADDR	(PT_SIZE + 1)
-#define PT_TEXT_LEN	(PT_SIZE + 2)
-#define PT_DATA_ADDR	(PT_SIZE + 3)
-
-
-#endif /* __V850_PTRACE_H__ */
diff --git a/include/asm-v850/resource.h b/include/asm-v850/resource.h
deleted file mode 100644
index 4b9dcd44f8d1..000000000000
--- a/include/asm-v850/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_RESOURCE_H__
-#define __V850_RESOURCE_H__
-
-#include <asm-generic/resource.h>
-
-#endif /* __V850_RESOURCE_H__ */
diff --git a/include/asm-v850/rte_cb.h b/include/asm-v850/rte_cb.h
deleted file mode 100644
index db9879f00aa7..000000000000
--- a/include/asm-v850/rte_cb.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * include/asm-v850/rte_cb.h -- Midas labs RTE-CB series of evaluation boards
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_RTE_CB_H__
-#define __V850_RTE_CB_H__
-
-
-/* The SRAM on the Mother-A motherboard.  */
-#define MB_A_SRAM_ADDR		GCS0_ADDR
-#define MB_A_SRAM_SIZE		0x00200000 /* 2MB */
-
-
-#ifdef CONFIG_RTE_GBUS_INT
-/* GBUS interrupt support.  */
-
-# include <asm/gbus_int.h>
-
-# define GBUS_INT_BASE_IRQ	NUM_RTE_CB_IRQS
-# define GBUS_INT_BASE_ADDR	(GCS2_ADDR + 0x00006000)
-
-/* Some specific interrupts.  */
-# define IRQ_MB_A_LAN		IRQ_GBUS_INT(10)
-# define IRQ_MB_A_PCI1(n)	(IRQ_GBUS_INT(16) + (n))
-# define IRQ_MB_A_PCI1_NUM	4
-# define IRQ_MB_A_PCI2(n)	(IRQ_GBUS_INT(20) + (n))
-# define IRQ_MB_A_PCI2_NUM	4
-# define IRQ_MB_A_EXT(n)	(IRQ_GBUS_INT(24) + (n))
-# define IRQ_MB_A_EXT_NUM	4
-# define IRQ_MB_A_USB_OC(n)	(IRQ_GBUS_INT(28) + (n))
-# define IRQ_MB_A_USB_OC_NUM	2
-# define IRQ_MB_A_PCMCIA_OC	IRQ_GBUS_INT(30)
-
-/* We define NUM_MACH_IRQS to include extra interrupts from the GBUS.  */
-# define NUM_MACH_IRQS		(NUM_RTE_CB_IRQS + IRQ_GBUS_INT_NUM)
-
-#else /* !CONFIG_RTE_GBUS_INT */
-
-# define NUM_MACH_IRQS		NUM_RTE_CB_IRQS
-
-#endif /* CONFIG_RTE_GBUS_INT */
-
-
-#ifdef CONFIG_RTE_MB_A_PCI
-/* Mother-A PCI bus support.  */
-
-# include <asm/rte_mb_a_pci.h>
-
-/* These are the base addresses used for allocating device address
-   space.  512K of the motherboard SRAM is in the same space, so we have
-   to be careful not to let it be allocated.  */
-# define PCIBIOS_MIN_MEM	(MB_A_PCI_MEM_ADDR + 0x80000)
-# define PCIBIOS_MIN_IO		MB_A_PCI_IO_ADDR
-
-/* As we don't really support PCI DMA to cpu memory, and use bounce-buffers
-   instead, perversely enough, this becomes always true! */
-# define pci_dma_supported(dev, mask)		1
-# define pcibios_assign_all_busses()		1
-
-#endif /* CONFIG_RTE_MB_A_PCI */
-
-
-#ifndef __ASSEMBLY__
-extern void rte_cb_early_init (void);
-extern void rte_cb_init_irqs (void);
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_RTE_CB_H__ */
diff --git a/include/asm-v850/rte_ma1_cb.h b/include/asm-v850/rte_ma1_cb.h
deleted file mode 100644
index bd3162ab9844..000000000000
--- a/include/asm-v850/rte_ma1_cb.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * include/asm-v850/rte_ma1_cb.h -- Midas labs RTE-V850/MA1-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_RTE_MA1_CB_H__
-#define __V850_RTE_MA1_CB_H__
-
-#include <asm/rte_cb.h>		/* Common defs for Midas RTE-CB boards.  */
-
-
-#define PLATFORM		"rte-v850e/ma1-cb"
-#define PLATFORM_LONG		"Midas lab RTE-V850E/MA1-CB"
-
-#define CPU_CLOCK_FREQ		50000000 /* 50MHz */
-
-/* 1MB of onboard SRAM.  Note that the monitor ROM uses parts of this
-   for its own purposes, so care must be taken.  Some address lines are
-   not decoded, so the SRAM area is mirrored every 1MB from 0x400000 to
-   0x800000 (exclusive).  */
-#define SRAM_ADDR		0x00400000
-#define SRAM_SIZE		0x00100000 /* 1MB */
-
-/* 32MB of onbard SDRAM.  */
-#define SDRAM_ADDR		0x00800000
-#define SDRAM_SIZE		0x02000000 /* 32MB */
-
-
-/* CPU addresses of GBUS memory spaces.  */
-#define GCS0_ADDR		0x05000000 /* GCS0 - Common SRAM (2MB) */
-#define GCS0_SIZE		0x00200000 /*   2MB */
-#define GCS1_ADDR		0x06000000 /* GCS1 - Flash ROM (8MB) */
-#define GCS1_SIZE		0x00800000 /*   8MB */
-#define GCS2_ADDR		0x07900000 /* GCS2 - I/O registers */
-#define GCS2_SIZE		0x00400000 /*   4MB */
-#define GCS5_ADDR		0x04000000 /* GCS5 - PCI bus space */
-#define GCS5_SIZE		0x01000000 /*   16MB */
-#define GCS6_ADDR		0x07980000 /* GCS6 - PCI control registers */
-#define GCS6_SIZE		0x00000200 /*   512B */
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 		SRAM_ADDR
-
-
-/* The GBUS GINT0 - GINT3 interrupts are connected to the INTP000 - INTP011
-   pins on the CPU.  These are shared among the GBUS interrupts.  */
-#define IRQ_GINT(n)		IRQ_INTP(n)
-#define IRQ_GINT_NUM		4
-
-/* Used by <asm/rte_cb.h> to derive NUM_MACH_IRQS.  */
-#define NUM_RTE_CB_IRQS		NUM_CPU_IRQS
-
-
-#ifdef CONFIG_ROM_KERNEL
-/* Kernel is in ROM, starting at address 0.  */
-
-#define INTV_BASE		0
-
-#else /* !CONFIG_ROM_KERNEL */
-
-#ifdef CONFIG_RTE_CB_MULTI
-/* Using RAM kernel with ROM monitor for Multi debugger.  */
-
-/* The chip's real interrupt vectors are in ROM, but they jump to a
-   secondary interrupt vector table in RAM.  */
-#define INTV_BASE		0x004F8000
-
-/* Scratch memory used by the ROM monitor, which shouldn't be used by
-   linux (except for the alternate interrupt vector area, defined
-   above).  */
-#define MON_SCRATCH_ADDR	0x004F8000
-#define MON_SCRATCH_SIZE	0x00008000 /* 32KB */
-
-#else /* !CONFIG_RTE_CB_MULTI */
-/* Using RAM-kernel.  Assume some sort of boot-loader got us loaded at
-   address 0.  */
-
-#define INTV_BASE		0
-
-#endif /* CONFIG_RTE_CB_MULTI */
-
-#endif /* CONFIG_ROM_KERNEL */
-
-
-/* Some misc. on-board devices.  */
-
-/* Seven-segment LED display (two digits).  Write-only.  */
-#define LED_ADDR(n)		(0x07802000 + (n))
-#define LED(n)			(*(volatile unsigned char *)LED_ADDR(n))
-#define LED_NUM_DIGITS		2
-
-
-/* Override the basic MA uart pre-initialization so that we can
-   initialize extra stuff.  */
-#undef V850E_UART_PRE_CONFIGURE	/* should be defined by <asm/ma.h> */
-#define V850E_UART_PRE_CONFIGURE	rte_ma1_cb_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void rte_ma1_cb_uart_pre_configure (unsigned chan,
-					   unsigned cflags, unsigned baud);
-#endif
-
-/* This board supports RTS/CTS for the on-chip UART, but only for channel 0. */
-
-/* CTS for UART channel 0 is pin P43 (bit 3 of port 4).  */
-#define V850E_UART_CTS(chan)	((chan) == 0 ? !(MA_PORT4_IO & 0x8) : 1)
-/* RTS for UART channel 0 is pin P42 (bit 2 of port 4).  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   if (chan == 0) {						      \
-		   unsigned old = MA_PORT4_IO; 				      \
-		   if (val)						      \
-			   MA_PORT4_IO = old & ~0x4;			      \
-		   else							      \
-			   MA_PORT4_IO = old | 0x4;			      \
-	   }								      \
-   } while (0)
-
-
-#endif /* __V850_RTE_MA1_CB_H__ */
diff --git a/include/asm-v850/rte_mb_a_pci.h b/include/asm-v850/rte_mb_a_pci.h
deleted file mode 100644
index 41ac185ca9cd..000000000000
--- a/include/asm-v850/rte_mb_a_pci.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * include/asm-v850/mb_a_pci.h -- PCI support for Midas lab RTE-MOTHER-A board
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_MB_A_PCI_H__
-#define __V850_MB_A_PCI_H__
-
-
-#define MB_A_PCI_MEM_ADDR	GCS5_ADDR
-#define MB_A_PCI_MEM_SIZE	(GCS5_SIZE / 2)
-#define MB_A_PCI_IO_ADDR	(GCS5_ADDR + MB_A_PCI_MEM_SIZE)
-#define MB_A_PCI_IO_SIZE	(GCS5_SIZE / 2)
-#define MB_A_PCI_REG_BASE_ADDR	GCS6_ADDR
-
-#define MB_A_PCI_PCICR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x4)
-#define MB_A_PCI_PCICR		(*(volatile u16 *)MB_A_PCI_PCICR_ADDR)
-#define MB_A_PCI_PCISR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x6)
-#define MB_A_PCI_PCISR		(*(volatile u16 *)MB_A_PCI_PCISR_ADDR)
-#define MB_A_PCI_PCILTR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xD)
-#define MB_A_PCI_PCILTR		(*(volatile u8 *)MB_A_PCI_PCILTR_ADDR)
-#define MB_A_PCI_PCIBAR0_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x10)
-#define MB_A_PCI_PCIBAR0	(*(volatile u32 *)MB_A_PCI_PCIBAR0_ADDR)
-#define MB_A_PCI_PCIBAR1_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x14)
-#define MB_A_PCI_PCIBAR1	(*(volatile u32 *)MB_A_PCI_PCIBAR1_ADDR)
-#define MB_A_PCI_PCIBAR2_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x18)
-#define MB_A_PCI_PCIBAR2	(*(volatile u32 *)MB_A_PCI_PCIBAR2_ADDR)
-#define MB_A_PCI_VENDOR_ID_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x2C)
-#define MB_A_PCI_VENDOR_ID	(*(volatile u16 *)MB_A_PCI_VENDOR_ID_ADDR)
-#define MB_A_PCI_DEVICE_ID_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x2E)
-#define MB_A_PCI_DEVICE_ID	(*(volatile u16 *)MB_A_PCI_DEVICE_ID_ADDR)
-#define MB_A_PCI_DMRR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0x9C)
-#define MB_A_PCI_DMRR		(*(volatile u32 *)MB_A_PCI_DMRR_ADDR)
-#define MB_A_PCI_DMLBAM_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xA0)
-#define MB_A_PCI_DMLBAM		(*(volatile u32 *)MB_A_PCI_DMLBAM_ADDR)
-#define MB_A_PCI_DMLBAI_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xA4)
-#define MB_A_PCI_DMLBAI		(*(volatile u32 *)MB_A_PCI_DMLBAI_ADDR)
-#define MB_A_PCI_PCIPBAM_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xA8)
-#define MB_A_PCI_PCIPBAM	(*(volatile u32 *)MB_A_PCI_PCIPBAM_ADDR)
-/* `PCI Configuration Address Register for Direct Master to PCI IO/CFG'  */
-#define MB_A_PCI_DMCFGA_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xAC)
-#define MB_A_PCI_DMCFGA		(*(volatile u32 *)MB_A_PCI_DMCFGA_ADDR)
-/* `PCI Permanent Configuration ID Register'  */
-#define MB_A_PCI_PCIHIDR_ADDR	(MB_A_PCI_REG_BASE_ADDR + 0xF0)
-#define MB_A_PCI_PCIHIDR	(*(volatile u32 *)MB_A_PCI_PCIHIDR_ADDR)
-
-
-#endif /* __V850_MB_A_PCI_H__ */
diff --git a/include/asm-v850/rte_me2_cb.h b/include/asm-v850/rte_me2_cb.h
deleted file mode 100644
index 9922c85c85a8..000000000000
--- a/include/asm-v850/rte_me2_cb.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * include/asm-v850/rte_me2_cb.h -- Midas labs RTE-V850E/ME2-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_RTE_ME2_CB_H__
-#define __V850_RTE_ME2_CB_H__
-
-#include <asm/rte_cb.h>		/* Common defs for Midas RTE-CB boards.  */
-
-
-#define PLATFORM		"rte-v850e/me2-cb"
-#define PLATFORM_LONG		"Midas lab RTE-V850E/ME2-CB"
-
-#define CPU_CLOCK_FREQ		150000000 /* 150MHz */
-#define FIXED_BOGOMIPS		50
-
-/* 32MB of onbard SDRAM.  */
-#define SDRAM_ADDR		0x00800000
-#define SDRAM_SIZE		0x02000000 /* 32MB */
-
-
-/* CPU addresses of GBUS memory spaces.  */
-#define GCS0_ADDR		0x04000000 /* GCS0 - Common SRAM (2MB) */
-#define GCS0_SIZE		0x00800000 /*   8MB */
-#define GCS1_ADDR		0x04800000 /* GCS1 - Flash ROM (8MB) */
-#define GCS1_SIZE		0x00800000 /*   8MB */
-#define GCS2_ADDR		0x07000000 /* GCS2 - I/O registers */
-#define GCS2_SIZE		0x00800000 /*   8MB */
-#define GCS5_ADDR		0x08000000 /* GCS5 - PCI bus space */
-#define GCS5_SIZE		0x02000000 /*   32MB */
-#define GCS6_ADDR		0x07800000 /* GCS6 - PCI control registers */
-#define GCS6_SIZE		0x00800000 /*   8MB */
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 		SDRAM_ADDR
-
-
-#ifdef CONFIG_ROM_KERNEL
-/* Kernel is in ROM, starting at address 0.  */
-
-#define INTV_BASE		0
-#define ROOT_FS_IMAGE_RW	0
-
-#else /* !CONFIG_ROM_KERNEL */
-/* Using RAM-kernel.  Assume some sort of boot-loader got us loaded at
-   address 0.  */
-
-#define INTV_BASE		0
-#define ROOT_FS_IMAGE_RW	1
-
-#endif /* CONFIG_ROM_KERNEL */
-
-
-/* Some misc. on-board devices.  */
-
-/* Seven-segment LED display (four digits).  */
-#define LED_ADDR(n)		(0x0FE02000 + (n))
-#define LED(n)			(*(volatile unsigned char *)LED_ADDR(n))
-#define LED_NUM_DIGITS		4
-
-
-/* On-board PIC.  */
-
-#define CB_PIC_BASE_ADDR 	0x0FE04000
-
-#define CB_PIC_INT0M_ADDR 	(CB_PIC_BASE_ADDR + 0x00)
-#define CB_PIC_INT0M      	(*(volatile u16 *)CB_PIC_INT0M_ADDR)
-#define CB_PIC_INT1M_ADDR 	(CB_PIC_BASE_ADDR + 0x10)
-#define CB_PIC_INT1M      	(*(volatile u16 *)CB_PIC_INT1M_ADDR)
-#define CB_PIC_INTR_ADDR  	(CB_PIC_BASE_ADDR + 0x20)
-#define CB_PIC_INTR       	(*(volatile u16 *)CB_PIC_INTR_ADDR)
-#define CB_PIC_INTEN_ADDR 	(CB_PIC_BASE_ADDR + 0x30)
-#define CB_PIC_INTEN      	(*(volatile u16 *)CB_PIC_INTEN_ADDR)
-
-#define CB_PIC_INT0EN        	0x0001
-#define CB_PIC_INT1EN        	0x0002
-#define CB_PIC_INT0SEL       	0x0080
-
-/* The PIC interrupts themselves.  */
-#define CB_PIC_BASE_IRQ		NUM_CPU_IRQS
-#define IRQ_CB_PIC_NUM		10
-
-/* Some specific CB_PIC interrupts. */
-#define IRQ_CB_EXTTM0		(CB_PIC_BASE_IRQ + 0)
-#define IRQ_CB_EXTSIO		(CB_PIC_BASE_IRQ + 1)
-#define IRQ_CB_TOVER		(CB_PIC_BASE_IRQ + 2)
-#define IRQ_CB_GINT0		(CB_PIC_BASE_IRQ + 3)
-#define IRQ_CB_USB		(CB_PIC_BASE_IRQ + 4)
-#define IRQ_CB_LANC		(CB_PIC_BASE_IRQ + 5)
-#define IRQ_CB_USB_VBUS_ON	(CB_PIC_BASE_IRQ + 6)
-#define IRQ_CB_USB_VBUS_OFF	(CB_PIC_BASE_IRQ + 7)
-#define IRQ_CB_EXTTM1		(CB_PIC_BASE_IRQ + 8)
-#define IRQ_CB_EXTTM2		(CB_PIC_BASE_IRQ + 9)
-
-/* The GBUS GINT1 - GINT3 (note, not GINT0!) interrupts are connected to
-   the INTP65 - INTP67 pins on the CPU.  These are shared among the GBUS
-   interrupts.  */
-#define IRQ_GINT(n)		IRQ_INTP((n) + 9)  /* 0 is unused! */
-#define IRQ_GINT_NUM		4		   /* 0 is unused! */
-
-/* The shared interrupt line from the PIC is connected to CPU pin INTP23.  */
-#define IRQ_CB_PIC		IRQ_INTP(4) /* P23 */
-
-/* Used by <asm/rte_cb.h> to derive NUM_MACH_IRQS.  */
-#define NUM_RTE_CB_IRQS		(NUM_CPU_IRQS + IRQ_CB_PIC_NUM)
-
-
-#ifndef __ASSEMBLY__
-struct cb_pic_irq_init {
-	const char *name;	/* name of interrupt type */
-
-	/* Range of kernel irq numbers for this type:
-	   BASE, BASE+INTERVAL, ..., BASE+INTERVAL*NUM  */
-	unsigned base, num, interval;
-
-	unsigned priority;	/* interrupt priority to assign */
-};
-struct hw_interrupt_type;	/* fwd decl */
-
-/* Enable interrupt handling for interrupt IRQ.  */
-extern void cb_pic_enable_irq (unsigned irq);
-/* Disable interrupt handling for interrupt IRQ.  Note that any interrupts
-   received while disabled will be delivered once the interrupt is enabled
-   again, unless they are explicitly cleared using `cb_pic_clear_pending_irq'.  */
-extern void cb_pic_disable_irq (unsigned irq);
-/* Initialize HW_IRQ_TYPES for PIC irqs described in array INITS (which is
-   terminated by an entry with the name field == 0).  */
-extern void cb_pic_init_irq_types (struct cb_pic_irq_init *inits,
-				   struct hw_interrupt_type *hw_irq_types);
-/* Initialize PIC interrupts.  */
-extern void cb_pic_init_irqs (void);
-#endif /* __ASSEMBLY__ */
-
-
-/* TL16C550C on board UART see also asm/serial.h */
-#define CB_UART_BASE    	0x0FE08000
-#define CB_UART_REG_GAP 	0x10
-#define CB_UART_CLOCK   	0x16000000
-
-/* CompactFlash setting */
-#define CB_CF_BASE     		0x0FE0C000
-#define CB_CF_CCR_ADDR 		(CB_CF_BASE+0x200)
-#define CB_CF_CCR      		(*(volatile u8 *)CB_CF_CCR_ADDR)
-#define CB_CF_REG0_ADDR		(CB_CF_BASE+0x1000)
-#define CB_CF_REG0     		(*(volatile u16 *)CB_CF_REG0_ADDR)
-#define CB_CF_STS0_ADDR		(CB_CF_BASE+0x1004)
-#define CB_CF_STS0     		(*(volatile u16 *)CB_CF_STS0_ADDR)
-#define CB_PCATA_BASE  		(CB_CF_BASE+0x800)
-#define CB_IDE_BASE    		(CB_CF_BASE+0x9F0)
-#define CB_IDE_CTRL    		(CB_CF_BASE+0xBF6)
-#define CB_IDE_REG_OFFS		0x1
-
-
-/* SMSC LAN91C111 setting */
-#if defined(CONFIG_SMC91111)
-#define CB_LANC_BASE 		0x0FE10300
-#define CONFIG_SMC16BITONLY
-#define ETH0_ADDR 		CB_LANC_BASE
-#define ETH0_IRQ 		IRQ_CB_LANC
-#endif /* CONFIG_SMC16BITONLY */
-
-
-#undef V850E_UART_PRE_CONFIGURE
-#define V850E_UART_PRE_CONFIGURE	rte_me2_cb_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void rte_me2_cb_uart_pre_configure (unsigned chan,
-					   unsigned cflags, unsigned baud);
-#endif /* __ASSEMBLY__ */
-
-/* This board supports RTS/CTS for the on-chip UART, but only for channel 0. */
-
-/* CTS for UART channel 0 is pin P22 (bit 2 of port 2).  */
-#define V850E_UART_CTS(chan)	((chan) == 0 ? !(ME2_PORT2_IO & 0x4) : 1)
-/* RTS for UART channel 0 is pin P21 (bit 1 of port 2).  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   if (chan == 0) {						      \
-		   unsigned old = ME2_PORT2_IO; 			      \
-		   if (val)						      \
-			   ME2_PORT2_IO = old & ~0x2;			      \
-		   else							      \
-			   ME2_PORT2_IO = old | 0x2;			      \
-	   }								      \
-   } while (0)
-
-
-#ifndef __ASSEMBLY__
-extern void rte_me2_cb_init_irqs (void);
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_RTE_ME2_CB_H__ */
diff --git a/include/asm-v850/rte_nb85e_cb.h b/include/asm-v850/rte_nb85e_cb.h
deleted file mode 100644
index f56591cad90a..000000000000
--- a/include/asm-v850/rte_nb85e_cb.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * include/asm-v850/rte_nb85e_cb.h -- Midas labs RTE-V850/NB85E-CB board
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_RTE_NB85E_CB_H__
-#define __V850_RTE_NB85E_CB_H__
-
-#include <asm/rte_cb.h>		/* Common defs for Midas RTE-CB boards.  */
-
-
-#define PLATFORM		"rte-v850e/nb85e-cb"
-#define PLATFORM_LONG		"Midas lab RTE-V850E/NB85E-CB"
-
-#define CPU_CLOCK_FREQ		50000000 /* 50MHz */
-
-/* 1MB of onboard SRAM.  Note that the monitor ROM uses parts of this
-   for its own purposes, so care must be taken.  */
-#define SRAM_ADDR		0x03C00000
-#define SRAM_SIZE		0x00100000 /* 1MB */
-
-/* 16MB of onbard SDRAM.  */
-#define SDRAM_ADDR		0x01000000
-#define SDRAM_SIZE		0x01000000 /* 16MB */
-
-
-/* CPU addresses of GBUS memory spaces.  */
-#define GCS0_ADDR		0x00400000 /* GCS0 - Common SRAM (2MB) */
-#define GCS0_SIZE		0x00400000 /*   4MB */
-#define GCS1_ADDR		0x02000000 /* GCS1 - Flash ROM (8MB) */
-#define GCS1_SIZE		0x00800000 /*   8MB */
-#define GCS2_ADDR		0x03900000 /* GCS2 - I/O registers */
-#define GCS2_SIZE		0x00080000 /*   512KB */
-#define GCS3_ADDR		0x02800000 /* GCS3 - EXT-bus: memory space */
-#define GCS3_SIZE		0x00800000 /*   8MB */
-#define GCS4_ADDR		0x03A00000 /* GCS4 - EXT-bus: I/O space */
-#define GCS4_SIZE		0x00200000 /*   2MB */
-#define GCS5_ADDR		0x00800000 /* GCS5 - PCI bus space */
-#define GCS5_SIZE		0x00800000 /*   8MB */
-#define GCS6_ADDR		0x03980000 /* GCS6 - PCI control registers */
-#define GCS6_SIZE		0x00010000 /*   64KB */
-
-
-/* The GBUS GINT0 - GINT3 interrupts are connected to CPU interrupts 10-12.
-   These are shared among the GBUS interrupts.  */
-#define IRQ_GINT(n)		(10 + (n))
-#define IRQ_GINT_NUM		3
-
-/* Used by <asm/rte_cb.h> to derive NUM_MACH_IRQS.  */
-#define NUM_RTE_CB_IRQS		NUM_CPU_IRQS
-
-
-#ifdef CONFIG_ROM_KERNEL
-/* Kernel is in ROM, starting at address 0.  */
-
-#define INTV_BASE	0
-
-#else /* !CONFIG_ROM_KERNEL */
-/* We're using the ROM monitor.  */
-
-/* The chip's real interrupt vectors are in ROM, but they jump to a
-   secondary interrupt vector table in RAM.  */
-#define INTV_BASE		0x03CF8000
-
-/* Scratch memory used by the ROM monitor, which shouldn't be used by
-   linux (except for the alternate interrupt vector area, defined
-   above).  */
-#define MON_SCRATCH_ADDR	0x03CE8000
-#define MON_SCRATCH_SIZE	0x00018000 /* 96KB */
-
-#endif /* CONFIG_ROM_KERNEL */
-
-
-/* Some misc. on-board devices.  */
-
-/* Seven-segment LED display (two digits).  Write-only.  */
-#define LED_ADDR(n)	(0x03802000 + (n))
-#define LED(n)		(*(volatile unsigned char *)LED_ADDR(n))
-#define LED_NUM_DIGITS	4
-
-
-/* Override the basic TEG UART pre-initialization so that we can
-   initialize extra stuff.  */
-#undef V850E_UART_PRE_CONFIGURE	/* should be defined by <asm/teg.h> */
-#define V850E_UART_PRE_CONFIGURE	rte_nb85e_cb_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void rte_nb85e_cb_uart_pre_configure (unsigned chan,
-					     unsigned cflags, unsigned baud);
-#endif
-
-/* This board supports RTS/CTS for the on-chip UART. */
-
-/* CTS is pin P00.  */
-#define V850E_UART_CTS(chan)	(! (TEG_PORT0_IO & 0x1))
-/* RTS is pin P02.  */
-#define V850E_UART_SET_RTS(chan, val)					      \
-   do {									      \
-	   unsigned old = TEG_PORT0_IO;					      \
-	   TEG_PORT0_IO = val ? (old & ~0x4) : (old | 0x4);		      \
-   } while (0)
-
-
-#endif /* __V850_RTE_NB85E_CB_H__ */
diff --git a/include/asm-v850/scatterlist.h b/include/asm-v850/scatterlist.h
deleted file mode 100644
index 02d27b3fb061..000000000000
--- a/include/asm-v850/scatterlist.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * include/asm-v850/scatterlist.h
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SCATTERLIST_H__
-#define __V850_SCATTERLIST_H__
-
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-	unsigned long	sg_magic;
-#endif
-	unsigned long	page_link;
-	unsigned	offset;
-	dma_addr_t	dma_address;
-	unsigned	length;
-};
-
-#define ISA_DMA_THRESHOLD	(~0UL)
-
-#endif /* __V850_SCATTERLIST_H__ */
diff --git a/include/asm-v850/sections.h b/include/asm-v850/sections.h
deleted file mode 100644
index e0238253a0d0..000000000000
--- a/include/asm-v850/sections.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_SECTIONS_H__
-#define __V850_SECTIONS_H__
-
-#include <asm-generic/sections.h>
-
-#endif /* __V850_SECTIONS_H__ */
diff --git a/include/asm-v850/segment.h b/include/asm-v850/segment.h
deleted file mode 100644
index 5e2b15dcf3d9..000000000000
--- a/include/asm-v850/segment.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __V850_SEGMENT_H__
-#define __V850_SEGMENT_H__
-
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long mm_segment_t;	/* domain register */
-
-#endif /* !__ASSEMBLY__ */
-
-
-#define __KERNEL_CS	0x0
-#define __KERNEL_DS	0x0
-
-#define __USER_CS	0x1
-#define __USER_DS	0x1
-
-#define KERNEL_DS	__KERNEL_DS
-#define KERNEL_CS	__KERNEL_CS
-#define USER_DS		__USER_DS
-#define USER_CS		__USER_CS
-
-#define segment_eq(a,b)	((a) == (b))
-
-#define get_ds()	(KERNEL_DS)
-#define get_fs()	(USER_DS)
-
-#define set_fs(seg)	((void)(seg))
-
-
-#define copy_segments(task, mm)	((void)((void)(task), (mm)))
-#define release_segments(mm)	((void)(mm))
-#define forget_segments()	((void)0)
-
-
-#endif /* __V850_SEGMENT_H__ */
diff --git a/include/asm-v850/semaphore.h b/include/asm-v850/semaphore.h
deleted file mode 100644
index d9b2034ed1d2..000000000000
--- a/include/asm-v850/semaphore.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/semaphore.h>
diff --git a/include/asm-v850/sembuf.h b/include/asm-v850/sembuf.h
deleted file mode 100644
index 1622231a8b85..000000000000
--- a/include/asm-v850/sembuf.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __V850_SEMBUF_H__
-#define __V850_SEMBUF_H__
-
-/* 
- * The semid64_ds structure for v850 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct semid64_ds {
-	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-	__kernel_time_t	sem_otime;		/* last semop time */
-	unsigned long	__unused1;
-	__kernel_time_t	sem_ctime;		/* last change time */
-	unsigned long	__unused2;
-	unsigned long	sem_nsems;		/* no. of semaphores in array */
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* __V850_SEMBUF_H__ */
diff --git a/include/asm-v850/serial.h b/include/asm-v850/serial.h
deleted file mode 100644
index 36d8f4cbbf39..000000000000
--- a/include/asm-v850/serial.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1999 by Ralf Baechle
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- */ 
-
-#ifdef CONFIG_RTE_CB_ME2
-
-#include <asm/rte_me2_cb.h>
-
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
-
-#define irq_cannonicalize(x) (x)
-#define BASE_BAUD	250000	/* (16MHz / (16 * 38400)) * 9600 */
-#define SERIAL_PORT_DFNS \
-   { 0, BASE_BAUD, CB_UART_BASE, IRQ_CB_EXTSIO, STD_COM_FLAGS },
-
-/* Redefine UART register offsets.  */
-#undef UART_RX
-#undef UART_TX
-#undef UART_DLL
-#undef UART_TRG
-#undef UART_DLM
-#undef UART_IER
-#undef UART_FCTR
-#undef UART_IIR
-#undef UART_FCR
-#undef UART_EFR
-#undef UART_LCR
-#undef UART_MCR
-#undef UART_LSR
-#undef UART_MSR
-#undef UART_SCR
-#undef UART_EMSR
-
-#define UART_RX		(0 * CB_UART_REG_GAP)
-#define UART_TX		(0 * CB_UART_REG_GAP)
-#define UART_DLL	(0 * CB_UART_REG_GAP)
-#define UART_TRG	(0 * CB_UART_REG_GAP)
-#define UART_DLM	(1 * CB_UART_REG_GAP)
-#define UART_IER	(1 * CB_UART_REG_GAP)
-#define UART_FCTR	(1 * CB_UART_REG_GAP)
-#define UART_IIR	(2 * CB_UART_REG_GAP)
-#define UART_FCR	(2 * CB_UART_REG_GAP)
-#define UART_EFR	(2 * CB_UART_REG_GAP)
-#define UART_LCR	(3 * CB_UART_REG_GAP)
-#define UART_MCR	(4 * CB_UART_REG_GAP)
-#define UART_LSR	(5 * CB_UART_REG_GAP)
-#define UART_MSR	(6 * CB_UART_REG_GAP)
-#define UART_SCR	(7 * CB_UART_REG_GAP)
-#define UART_EMSR	(7 * CB_UART_REG_GAP)
-
-#endif /* CONFIG_RTE_CB_ME2 */
diff --git a/include/asm-v850/setup.h b/include/asm-v850/setup.h
deleted file mode 100644
index c48a9b97d05b..000000000000
--- a/include/asm-v850/setup.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _V850_SETUP_H
-#define _V850_SETUP_H
-
-#define COMMAND_LINE_SIZE	512
-
-#endif /* __SETUP_H */
diff --git a/include/asm-v850/shmbuf.h b/include/asm-v850/shmbuf.h
deleted file mode 100644
index 3d085c9c418e..000000000000
--- a/include/asm-v850/shmbuf.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef __V850_SHMBUF_H__
-#define __V850_SHMBUF_H__
-
-/* 
- * The shmid64_ds structure for v850 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	unsigned long		__unused1;
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	unsigned long		__unused2;
-	__kernel_time_t		shm_ctime;	/* last change time */
-	unsigned long		__unused3;
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned long		shm_nattch;	/* no. of current attaches */
-	unsigned long		__unused4;
-	unsigned long		__unused5;
-};
-
-struct shminfo64 {
-	unsigned long	shmmax;
-	unsigned long	shmmin;
-	unsigned long	shmmni;
-	unsigned long	shmseg;
-	unsigned long	shmall;
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* __V850_SHMBUF_H__ */
diff --git a/include/asm-v850/shmparam.h b/include/asm-v850/shmparam.h
deleted file mode 100644
index 7dcb6739073e..000000000000
--- a/include/asm-v850/shmparam.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_SHMPARAM_H__
-#define __V850_SHMPARAM_H__
-
-#define	SHMLBA		PAGE_SIZE	/* attach addr a multiple of this */
-
-#endif /* __V850_SHMPARAM_H__ */
diff --git a/include/asm-v850/sigcontext.h b/include/asm-v850/sigcontext.h
deleted file mode 100644
index e0890f6f4bc9..000000000000
--- a/include/asm-v850/sigcontext.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * include/asm-v850/sigcontext.h -- Signal contexts
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIGCONTEXT_H__
-#define __V850_SIGCONTEXT_H__
-
-#include <asm/ptrace.h>
-
-struct sigcontext
-{
-	struct pt_regs 	regs;
-	unsigned long	oldmask;
-};
-
-#endif /* __V850_SIGCONTEXT_H__ */
diff --git a/include/asm-v850/siginfo.h b/include/asm-v850/siginfo.h
deleted file mode 100644
index 7eb94703dce0..000000000000
--- a/include/asm-v850/siginfo.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_SIGINFO_H__
-#define __V850_SIGINFO_H__
-
-#include <asm-generic/siginfo.h>
-
-#endif /* __V850_SIGINFO_H__ */
diff --git a/include/asm-v850/signal.h b/include/asm-v850/signal.h
deleted file mode 100644
index a38df0834bbf..000000000000
--- a/include/asm-v850/signal.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#ifndef __V850_SIGNAL_H__
-#define __V850_SIGNAL_H__
-
-#include <linux/types.h>
-
-/* Avoid too many header ordering problems.  */
-struct siginfo;
-
-
-#ifdef __KERNEL__
-
-/* Most things should be clean enough to redefine this at will, if care
-   is taken to make libc match.  */
-#define _NSIG		64
-#define _NSIG_BPW	32
-#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
-
-typedef unsigned long old_sigset_t;		/* at least 32 bits */
-
-typedef struct {
-	unsigned long sig[_NSIG_WORDS];
-} sigset_t;
-
-#else /* !__KERNEL__ */
-
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-#define NSIG		32
-typedef unsigned long sigset_t;
-
-#endif /* __KERNEL__ */
-
-
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-#define SIGBUS		 7
-#define SIGFPE		 8
-#define SIGKILL		 9
-#define SIGUSR1		10
-#define SIGSEGV		11
-#define SIGUSR2		12
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGSTKFLT	16
-#define SIGCHLD		17
-#define SIGCONT		18
-#define SIGSTOP		19
-#define SIGTSTP		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGURG		23
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGIO		29
-#define SIGPOLL		SIGIO
-/*
-#define SIGLOST		29
-*/
-#define SIGPWR		30
-#define SIGSYS		31
-#define	SIGUNUSED	31
-
-/* These should not be considered constants from userland.  */
-#define SIGRTMIN	32
-#define SIGRTMAX	_NSIG
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
-#define SA_RESTORER	0x04000000
-
-/* 
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
-
-#define MINSIGSTKSZ	2048
-#define SIGSTKSZ	8192
-
-#include <asm-generic/signal.h>
-
-#ifdef __KERNEL__
-
-struct old_sigaction {
-	__sighandler_t sa_handler;
-	old_sigset_t sa_mask;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-};
-
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
-
-struct k_sigaction {
-	struct sigaction sa;
-};
-
-#else /* !__KERNEL__ */
-
-/* Here we must cater to libcs that poke about in kernel headers.  */
-
-struct sigaction {
-	union {
-	  __sighandler_t _sa_handler;
-	  void (*_sa_sigaction)(int, struct siginfo *, void *);
-	} _u;
-	sigset_t sa_mask;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-};
-
-#define sa_handler	_u._sa_handler
-#define sa_sigaction	_u._sa_sigaction
-
-#endif /* __KERNEL__ */
-
-
-typedef struct sigaltstack {
-	void *ss_sp;
-	int ss_flags;
-	size_t ss_size;
-} stack_t;
-
-#ifdef __KERNEL__
-
-#include <asm/sigcontext.h>
-#undef __HAVE_ARCH_SIG_BITOPS
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
-#endif /* __KERNEL__ */
-
-#endif /* __V850_SIGNAL_H__ */
diff --git a/include/asm-v850/sim.h b/include/asm-v850/sim.h
deleted file mode 100644
index 026932d476cd..000000000000
--- a/include/asm-v850/sim.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * include/asm-v850/sim.h -- Machine-dependent defs for GDB v850e simulator
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIM_H__
-#define __V850_SIM_H__
-
-
-#define CPU_ARCH		"v850e"
-#define CPU_MODEL		"v850e"
-#define CPU_MODEL_LONG		"NEC V850E"
-#define PLATFORM		"gdb/v850e"
-#define PLATFORM_LONG		"GDB V850E simulator"
-
-
-/* We use a weird value for RAM, not just 0, for testing purposes.
-   These must match the values used in the linker script.  */
-#define RAM_ADDR		0x8F000000
-#define RAM_SIZE		0x03000000
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET 		RAM_ADDR
-
-
-/* For <asm/entry.h> */
-/* `R0 RAM', used for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  On real
-   processors, this usually is on-chip RAM, but here we just
-   choose an arbitrary address that meets the above constraint.  */
-#define R0_RAM_ADDR		0xFFFFF000
-
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS		6
-
-
-#endif /* __V850_SIM_H__ */
diff --git a/include/asm-v850/sim85e2.h b/include/asm-v850/sim85e2.h
deleted file mode 100644
index 8b4d6974066c..000000000000
--- a/include/asm-v850/sim85e2.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * include/asm-v850/sim85e2.h -- Machine-dependent defs for
- *	V850E2 RTL simulator
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIM85E2_H__
-#define __V850_SIM85E2_H__
-
-
-#include <asm/v850e2.h>		/* Based on V850E2 core.  */
-
-
-/* Various memory areas supported by the simulator.
-   These should match the corresponding definitions in the linker script.  */
-
-/* `instruction RAM'; instruction fetches are much faster from IRAM than
-   from DRAM.  */
-#define IRAM_ADDR		0
-#define IRAM_SIZE		0x00100000 /* 1MB */
-/* `data RAM', below and contiguous with the I/O space.
-   Data fetches are much faster from DRAM than from IRAM.  */
-#define DRAM_ADDR		0xfff00000
-#define DRAM_SIZE		0x000ff000 /* 1020KB */
-/* `external ram'.  Unlike the above RAM areas, this memory is cached,
-   so both instruction and data fetches should be (mostly) fast --
-   however, currently only write-through caching is supported, so writes
-   to ERAM will be slow.  */
-#define ERAM_ADDR		0x00100000
-#define ERAM_SIZE		0x07f00000 /* 127MB (max) */
-/* Dynamic RAM; uses memory controller.  */
-#define SDRAM_ADDR		0x10000000
-#define SDRAM_SIZE		0x01000000 /* 16MB */
-
-
-/* Simulator specific control registers.  */
-/* NOTHAL controls whether the simulator will stop at a `halt' insn.  */
-#define SIM85E2_NOTHAL_ADDR	0xffffff22
-#define SIM85E2_NOTHAL		(*(volatile u8 *)SIM85E2_NOTHAL_ADDR)
-/* The simulator will stop N cycles after N is written to SIMFIN.  */
-#define SIM85E2_SIMFIN_ADDR	0xffffff24
-#define SIM85E2_SIMFIN		(*(volatile u16 *)SIM85E2_SIMFIN_ADDR)
-
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS		64
-
-
-/* For <asm/page.h> */
-#define PAGE_OFFSET		SDRAM_ADDR
-
-
-/* For <asm/entry.h> */
-/* `R0 RAM', used for a few miscellaneous variables that must be accessible
-   using a load instruction relative to R0.  The sim85e2 simulator
-   actually puts 1020K of RAM from FFF00000 to FFFFF000, so we arbitarily
-   choose a small portion at the end of that.  */
-#define R0_RAM_ADDR		0xFFFFE000
-
-
-#endif /* __V850_SIM85E2_H__ */
diff --git a/include/asm-v850/sim85e2c.h b/include/asm-v850/sim85e2c.h
deleted file mode 100644
index eee543ff3af8..000000000000
--- a/include/asm-v850/sim85e2c.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * include/asm-v850/sim85e2c.h -- Machine-dependent defs for
- *	V850E2 RTL simulator
- *
- *  Copyright (C) 2002  NEC Corporation
- *  Copyright (C) 2002  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIM85E2C_H__
-#define __V850_SIM85E2C_H__
-
-/* Use generic sim85e2 settings, other than the various names.  */
-#include <asm/sim85e2.h>
-
-#define CPU_MODEL	"v850e2"
-#define CPU_MODEL_LONG	"NEC V850E2"
-#define PLATFORM	"sim85e2c"
-#define PLATFORM_LONG	"SIM85E2C V850E2 simulator"
-
-#endif /* __V850_SIM85E2C_H__ */
diff --git a/include/asm-v850/sim85e2s.h b/include/asm-v850/sim85e2s.h
deleted file mode 100644
index ee066d5d3c51..000000000000
--- a/include/asm-v850/sim85e2s.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * include/asm-v850/sim85e2s.h -- Machine-dependent defs for
- *	V850E2 RTL simulator
- *
- *  Copyright (C) 2003  NEC Electronics Corporation
- *  Copyright (C) 2003  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIM85E2S_H__
-#define __V850_SIM85E2S_H__
-
-#include <asm/sim85e2.h>	/* Use generic sim85e2 settings.  */
-#if 0
-#include <asm/v850e2_cache.h>	/* + cache */
-#endif
-
-#define CPU_MODEL	"v850e2"
-#define CPU_MODEL_LONG	"NEC V850E2"
-#define PLATFORM	"sim85e2s"
-#define PLATFORM_LONG	"SIM85E2S V850E2 simulator"
-
-#endif /* __V850_SIM85E2S_H__ */
diff --git a/include/asm-v850/simsyscall.h b/include/asm-v850/simsyscall.h
deleted file mode 100644
index 4a19d5ae9d17..000000000000
--- a/include/asm-v850/simsyscall.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * include/asm-v850/simsyscall.h -- `System calls' under the v850e emulator
- *
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SIMSYSCALL_H__
-#define __V850_SIMSYSCALL_H__
-
-#define V850_SIM_SYS_exit(a...)		V850_SIM_SYSCALL_1 (1	, ##a)
-#define V850_SIM_SYS_fork(a...)		V850_SIM_SYSCALL_0 (2	, ##a)
-#define V850_SIM_SYS_read(a...)		V850_SIM_SYSCALL_3 (3	, ##a)
-#define V850_SIM_SYS_write(a...)	V850_SIM_SYSCALL_3 (4	, ##a)
-#define V850_SIM_SYS_open(a...)		V850_SIM_SYSCALL_2 (5	, ##a)
-#define V850_SIM_SYS_close(a...)	V850_SIM_SYSCALL_1 (6	, ##a)
-#define V850_SIM_SYS_wait4(a...)	V850_SIM_SYSCALL_4 (7	, ##a)
-/* #define V850_SIM_SYS_creat(a...)	V850_SIM_SYSCALL_1 (8	, ##a) */
-/* #define V850_SIM_SYS_link(a...)	V850_SIM_SYSCALL_1 (9	, ##a) */
-/* #define V850_SIM_SYS_unlink(a...)	V850_SIM_SYSCALL_1 (10	, ##a) */
-#define V850_SIM_SYS_execv(a...)	V850_SIM_SYSCALL_2 (11	, ##a)
-/* #define V850_SIM_SYS_chdir(a...)	V850_SIM_SYSCALL_1 (12	, ##a) */
-/* #define V850_SIM_SYS_mknod(a...)	V850_SIM_SYSCALL_1 (14	, ##a) */
-#define V850_SIM_SYS_chmod(a...)	V850_SIM_SYSCALL_2 (15	, ##a)
-#define V850_SIM_SYS_chown(a...)	V850_SIM_SYSCALL_2 (16	, ##a)
-#define V850_SIM_SYS_lseek(a...)	V850_SIM_SYSCALL_3 (19	, ##a)
-/* #define V850_SIM_SYS_getpid(a...)	V850_SIM_SYSCALL_1 (20	, ##a) */
-/* #define V850_SIM_SYS_isatty(a...)	V850_SIM_SYSCALL_1 (21	, ##a) */
-/* #define V850_SIM_SYS_fstat(a...)	V850_SIM_SYSCALL_1 (22	, ##a) */
-#define V850_SIM_SYS_time(a...)		V850_SIM_SYSCALL_1 (23	, ##a)
-#define V850_SIM_SYS_poll(a...)		V850_SIM_SYSCALL_3 (24	, ##a)
-#define V850_SIM_SYS_stat(a...)		V850_SIM_SYSCALL_2 (38	, ##a)
-#define V850_SIM_SYS_pipe(a...)		V850_SIM_SYSCALL_1 (42	, ##a)
-#define V850_SIM_SYS_times(a...)	V850_SIM_SYSCALL_1 (43	, ##a)
-#define V850_SIM_SYS_execve(a...)	V850_SIM_SYSCALL_3 (59	, ##a)
-#define V850_SIM_SYS_gettimeofday(a...)	V850_SIM_SYSCALL_2 (116	, ##a)
-/* #define V850_SIM_SYS_utime(a...)	V850_SIM_SYSCALL_2 (201	, ##a) */
-/* #define V850_SIM_SYS_wait(a...)	V850_SIM_SYSCALL_1 (202	, ##a) */
-
-#define V850_SIM_SYS_make_raw(a...)	V850_SIM_SYSCALL_1 (1024 , ##a)
-
-
-#define V850_SIM_SYSCALL_0(_call)					      \
-({									      \
-	register int call __asm__ ("r6") = _call;			      \
-	register int rval __asm__ ("r10");				      \
-	__asm__ __volatile__ ("trap 31"					      \
-			      : "=r" (rval)				      \
-			      : "r" (call)				      \
-			      : "r11", "memory");			      \
-	rval;								      \
-})
-#define V850_SIM_SYSCALL_1(_call, _arg0)				      \
-({									      \
-	register int call __asm__ ("r6") = _call;			      \
-	register long arg0 __asm__ ("r7") = (long)_arg0;		      \
-	register int rval __asm__ ("r10");				      \
-	__asm__ __volatile__ ("trap 31"					      \
-			      : "=r" (rval)				      \
-			      : "r" (call), "r" (arg0)			      \
-			      : "r11", "memory");			      \
-	rval;								      \
-})
-#define V850_SIM_SYSCALL_2(_call, _arg0, _arg1)				      \
-({									      \
-	register int call __asm__ ("r6") = _call;			      \
-	register long arg0 __asm__ ("r7") = (long)_arg0;		      \
-	register long arg1 __asm__ ("r8") = (long)_arg1;		      \
-	register int rval __asm__ ("r10");				      \
-	__asm__ __volatile__ ("trap 31"					      \
-			      : "=r" (rval)				      \
-			      : "r" (call), "r" (arg0), "r" (arg1)	      \
-			      : "r11", "memory");			      \
-	rval;								      \
-})
-#define V850_SIM_SYSCALL_3(_call, _arg0, _arg1, _arg2)			      \
-({									      \
-	register int call __asm__ ("r6") = _call;			      \
-	register long arg0 __asm__ ("r7") = (long)_arg0;		      \
-	register long arg1 __asm__ ("r8") = (long)_arg1;		      \
-	register long arg2 __asm__ ("r9") = (long)_arg2;		      \
-	register int rval __asm__ ("r10");				      \
-	__asm__ __volatile__ ("trap 31"					      \
-			      : "=r" (rval)				      \
-			      : "r" (call), "r" (arg0), "r" (arg1), "r" (arg2)\
-			      : "r11", "memory");			      \
-	rval;								      \
-})
-
-#define V850_SIM_SYSCALL(call, args...) \
-   V850_SIM_SYS_##call (args)
-
-#endif /* __V850_SIMSYSCALL_H__ */
diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h
deleted file mode 100644
index e199a2bf12aa..000000000000
--- a/include/asm-v850/socket.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef __V850_SOCKET_H__
-#define __V850_SOCKET_H__
-
-#include <asm/sockios.h>
-
-/* For setsockoptions(2) */
-#define SOL_SOCKET	1
-
-#define SO_DEBUG	1
-#define SO_REUSEADDR	2
-#define SO_TYPE		3
-#define SO_ERROR	4
-#define SO_DONTROUTE	5
-#define SO_BROADCAST	6
-#define SO_SNDBUF	7
-#define SO_RCVBUF	8
-#define SO_SNDBUFFORCE	32
-#define SO_RCVBUFFORCE	33
-#define SO_KEEPALIVE	9
-#define SO_OOBINLINE	10
-#define SO_NO_CHECK	11
-#define SO_PRIORITY	12
-#define SO_LINGER	13
-#define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
-#define SO_PASSCRED	16
-#define SO_PEERCRED	17
-#define SO_RCVLOWAT	18
-#define SO_SNDLOWAT	19
-#define SO_RCVTIMEO	20
-#define SO_SNDTIMEO	21
-
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION		22
-#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
-#define SO_SECURITY_ENCRYPTION_NETWORK		24
-
-#define SO_BINDTODEVICE	25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER        26
-#define SO_DETACH_FILTER        27
-
-#define SO_PEERNAME             28
-#define SO_TIMESTAMP		29
-#define SCM_TIMESTAMP		SO_TIMESTAMP
-
-#define SO_ACCEPTCONN		30
-
-#define SO_PEERSEC		31
-#define SO_PASSSEC		34
-#define SO_TIMESTAMPNS		35
-#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
-
-#define SO_MARK			36
-
-#endif /* __V850_SOCKET_H__ */
diff --git a/include/asm-v850/sockios.h b/include/asm-v850/sockios.h
deleted file mode 100644
index 823e106e6cd0..000000000000
--- a/include/asm-v850/sockios.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __V850_SOCKIOS_H__
-#define __V850_SOCKIOS_H__
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 	0x8901
-#define SIOCSPGRP	0x8902
-#define FIOGETOWN	0x8903
-#define SIOCGPGRP	0x8904
-#define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
-#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
-
-#endif /* __V850_SOCKIOS_H__ */
diff --git a/include/asm-v850/stat.h b/include/asm-v850/stat.h
deleted file mode 100644
index c68c60d06e2f..000000000000
--- a/include/asm-v850/stat.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * include/asm-v850/stat.h -- v850 stat structure
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_STAT_H__
-#define __V850_STAT_H__
-
-#include <asm/posix_types.h>
-
-struct stat {
-	unsigned int	st_dev;
-	unsigned long	st_ino;
-	unsigned int	st_mode;
-	unsigned int	st_nlink;
-	unsigned int 	st_uid;
-	unsigned int 	st_gid;
-	unsigned int	st_rdev;
-	long		st_size;
-	unsigned long	st_blksize;
-	unsigned long	st_blocks;
-	unsigned long	st_atime;
-	unsigned long	__unused1;
-	unsigned long	st_mtime;
-	unsigned long	__unused2;
-	unsigned long	st_ctime;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-	unsigned long	__unused5;
-};
-
-struct stat64 {
-	unsigned long long	st_dev;
-	unsigned long	__unused1;
-
-	unsigned long long	st_ino;
-
-	unsigned int	st_mode;
-	unsigned int	st_nlink;
-
-	unsigned int	st_uid;
-	unsigned int	st_gid;
-
-	unsigned long long	st_rdev;
-	unsigned long	__unused3;
-
-	long long	st_size;
-	unsigned long	st_blksize;
-
-	unsigned long	st_blocks; /* No. of 512-byte blocks allocated */
-	unsigned long	__unused4; /* future possible st_blocks high bits */
-
-	unsigned long	st_atime;
-	unsigned long	st_atime_nsec;
-
-	unsigned long	st_mtime;
-	unsigned long	st_mtime_nsec;
-
-	unsigned long	st_ctime;
-	unsigned long	st_ctime_nsec;
-
-	unsigned long	__unused8;
-};
-
-#endif /* __V850_STAT_H__ */
diff --git a/include/asm-v850/statfs.h b/include/asm-v850/statfs.h
deleted file mode 100644
index ea1596607f26..000000000000
--- a/include/asm-v850/statfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_STATFS_H__
-#define __V850_STATFS_H__
-
-#include <asm-generic/statfs.h>
-
-#endif /* __V850_STATFS_H__ */
diff --git a/include/asm-v850/string.h b/include/asm-v850/string.h
deleted file mode 100644
index 478e234789d6..000000000000
--- a/include/asm-v850/string.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * include/asm-v850/string.h -- Architecture specific string routines
- *
- *  Copyright (C) 2001,02  NEC Corporation
- *  Copyright (C) 2001,02  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_STRING_H__
-#define __V850_STRING_H__
-
-#define __HAVE_ARCH_MEMCPY
-#define __HAVE_ARCH_MEMSET
-#define __HAVE_ARCH_MEMMOVE
-
-extern void *memcpy (void *, const void *, __kernel_size_t);
-extern void *memset (void *, int, __kernel_size_t);
-extern void *memmove (void *, const void *, __kernel_size_t);
-
-#endif /* __V850_STRING_H__ */
diff --git a/include/asm-v850/system.h b/include/asm-v850/system.h
deleted file mode 100644
index 7daf1fdee119..000000000000
--- a/include/asm-v850/system.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * include/asm-v850/system.h -- Low-level interrupt/thread ops
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_SYSTEM_H__
-#define __V850_SYSTEM_H__
-
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-
-/*
- * switch_to(n) should switch tasks to task ptr, first checking that
- * ptr isn't the current task, in which case it does nothing.
- */
-struct thread_struct;
-extern void *switch_thread (struct thread_struct *last,
-			    struct thread_struct *next);
-#define switch_to(prev,next,last)					      \
-  do {									      \
-        if (prev != next) {						      \
- 		(last) = switch_thread (&prev->thread, &next->thread);	      \
-	}								      \
-  } while (0)
-
-
-/* Enable/disable interrupts.  */
-#define local_irq_enable()	__asm__ __volatile__ ("ei")
-#define local_irq_disable()	__asm__ __volatile__ ("di")
-
-#define local_save_flags(flags) \
-  __asm__ __volatile__ ("stsr %1, %0" : "=r" (flags) : "i" (SR_PSW))
-#define local_restore_flags(flags) \
-  __asm__ __volatile__ ("ldsr %0, %1" :: "r" (flags), "i" (SR_PSW))
-
-/* For spinlocks etc */
-#define	local_irq_save(flags) \
-  do { local_save_flags (flags); local_irq_disable (); } while (0) 
-#define local_irq_restore(flags) \
-  local_restore_flags (flags);
-
-
-static inline int irqs_disabled (void)
-{
-	unsigned flags;
-	local_save_flags (flags);
-	return !!(flags & 0x20);
-}
-
-
-/*
- * Force strict CPU ordering.
- * Not really required on v850...
- */
-#define nop()			__asm__ __volatile__ ("nop")
-#define mb()			__asm__ __volatile__ ("" ::: "memory")
-#define rmb()			mb ()
-#define wmb()			mb ()
-#define read_barrier_depends()	((void)0)
-#define set_mb(var, value)	do { xchg (&var, value); } while (0)
-
-#define smp_mb()	mb ()
-#define smp_rmb()	rmb ()
-#define smp_wmb()	wmb ()
-#define smp_read_barrier_depends()	read_barrier_depends()
-
-#define xchg(ptr, with) \
-  ((__typeof__ (*(ptr)))__xchg ((unsigned long)(with), (ptr), sizeof (*(ptr))))
-
-static inline unsigned long __xchg (unsigned long with,
-				    __volatile__ void *ptr, int size)
-{
-	unsigned long tmp, flags;
-
-	local_irq_save (flags);
-
-	switch (size) {
-	case 1:
-		tmp = *(unsigned char *)ptr;
-		*(unsigned char *)ptr = with;
-		break;
-	case 2:
-		tmp = *(unsigned short *)ptr;
-		*(unsigned short *)ptr = with;
-		break;
-	case 4:
-		tmp = *(unsigned long *)ptr;
-		*(unsigned long *)ptr = with;
-		break;
-	}
-
-	local_irq_restore (flags);
-
-	return tmp;
-}
-
-#include <asm-generic/cmpxchg-local.h>
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)				  	       \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
-			(unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-
-#ifndef CONFIG_SMP
-#include <asm-generic/cmpxchg.h>
-#endif
-
-#define arch_align_stack(x) (x)
-
-#endif /* __V850_SYSTEM_H__ */
diff --git a/include/asm-v850/teg.h b/include/asm-v850/teg.h
deleted file mode 100644
index acc8c7d95329..000000000000
--- a/include/asm-v850/teg.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * include/asm-v850/teg.h -- NB85E-TEG cpu chip
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_TEG_H__
-#define __V850_TEG_H__
-
-
-/* The TEG uses the V850E cpu core.  */
-#include <asm/v850e.h>
-#include <asm/v850e_cache.h>
-
-
-#define CPU_MODEL	"v850e/nb85e-teg"
-#define CPU_MODEL_LONG	"NEC V850E/NB85E TEG"
-
-
-/* For <asm/entry.h> */
-/* We use on-chip RAM, for a few miscellaneous variables that must be
-   accessible using a load instruction relative to R0.  On the NB85E/TEG,
-   There's 60KB of iRAM starting at 0xFFFF0000, however we need the base
-   address to be addressable by a 16-bit signed offset, so we only use the
-   second half of it starting from 0xFFFF8000.  */
-#define R0_RAM_ADDR			0xFFFF8000
-
-
-/* Hardware-specific interrupt numbers (in the kernel IRQ namespace).
-   Some of these are parameterized even though there's only a single
-   interrupt, for compatibility with some generic code that works on other
-   processor models.  */
-#define IRQ_INTCMD(n)	6	/* interval timer interrupt */
-#define IRQ_INTCMD_NUM	1
-#define IRQ_INTSER(n)	16	/* UART reception error */
-#define IRQ_INTSER_NUM	1
-#define IRQ_INTSR(n)	17	/* UART reception completion */
-#define IRQ_INTSR_NUM	1
-#define IRQ_INTST(n)	18	/* UART transmission completion */
-#define IRQ_INTST_NUM	1
-
-/* For <asm/irq.h> */
-#define NUM_CPU_IRQS	64
-
-
-/* TEG UART details.  */
-#define V850E_UART_BASE_ADDR(n)		(0xFFFFF600 + 0x10 * (n))
-#define V850E_UART_ASIM_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x0)
-#define V850E_UART_ASIS_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x2)
-#define V850E_UART_ASIF_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x4)
-#define V850E_UART_CKSR_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x6)
-#define V850E_UART_BRGC_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x8)
-#define V850E_UART_TXB_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0xA)
-#define V850E_UART_RXB_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0xC)
-#define V850E_UART_NUM_CHANNELS		1
-#define V850E_UART_BASE_FREQ		CPU_CLOCK_FREQ
-/* This is a function that gets called before configuring the UART.  */
-#define V850E_UART_PRE_CONFIGURE	teg_uart_pre_configure
-#ifndef __ASSEMBLY__
-extern void teg_uart_pre_configure (unsigned chan,
-				    unsigned cflags, unsigned baud);
-#endif
-
-
-/* The TEG RTPU.  */
-#define V850E_RTPU_BASE_ADDR		0xFFFFF210
-
-
-/* TEG series timer D details.  */
-#define V850E_TIMER_D_BASE_ADDR		0xFFFFF210
-#define V850E_TIMER_D_TMCD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x0)
-#define V850E_TIMER_D_TMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x4)
-#define V850E_TIMER_D_CMD_BASE_ADDR 	(V850E_TIMER_D_BASE_ADDR + 0x8)
-#define V850E_TIMER_D_BASE_FREQ		CPU_CLOCK_FREQ
-
-
-/* `Interrupt Source Select' control register.  */
-#define TEG_ISS_ADDR			0xFFFFF7FA
-#define TEG_ISS				(*(volatile u8 *)TEG_ISS_ADDR)
-
-/* Port 0 I/O register (bits 0-3 used).  */
-#define TEG_PORT0_IO_ADDR		0xFFFFF7F2
-#define TEG_PORT0_IO			(*(volatile u8 *)TEG_PORT0_IO_ADDR)
-/* Port 0 control register (bits 0-3 control mode, 0 = output, 1 = input).  */
-#define TEG_PORT0_PM_ADDR		0xFFFFF7F4
-#define TEG_PORT0_PM			(*(volatile u8 *)TEG_PORT0_PM_ADDR)
-
-
-#ifndef __ASSEMBLY__
-extern void teg_init_irqs (void);
-#endif
-
-
-#endif /* __V850_TEG_H__ */
diff --git a/include/asm-v850/termbits.h b/include/asm-v850/termbits.h
deleted file mode 100644
index 295d7bf69451..000000000000
--- a/include/asm-v850/termbits.h
+++ /dev/null
@@ -1,200 +0,0 @@
-#ifndef __V850_TERMBITS_H__
-#define __V850_TERMBITS_H__
-
-#include <linux/posix_types.h>
-
-typedef unsigned char	cc_t;
-typedef unsigned int	speed_t;
-typedef unsigned int	tcflag_t;
-
-#define NCCS 19
-struct termios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-};
-
-struct termios2 {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-struct ktermios {
-	tcflag_t c_iflag;		/* input mode flags */
-	tcflag_t c_oflag;		/* output mode flags */
-	tcflag_t c_cflag;		/* control mode flags */
-	tcflag_t c_lflag;		/* local mode flags */
-	cc_t c_line;			/* line discipline */
-	cc_t c_cc[NCCS];		/* control characters */
-	speed_t c_ispeed;		/* input speed */
-	speed_t c_ospeed;		/* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-
-/* c_iflag bits */
-#define IGNBRK	0000001
-#define BRKINT	0000002
-#define IGNPAR	0000004
-#define PARMRK	0000010
-#define INPCK	0000020
-#define ISTRIP	0000040
-#define INLCR	0000100
-#define IGNCR	0000200
-#define ICRNL	0000400
-#define IUCLC	0001000
-#define IXON	0002000
-#define IXANY	0004000
-#define IXOFF	0010000
-#define IMAXBEL	0020000
-#define IUTF8	0040000
-
-/* c_oflag bits */
-#define OPOST	0000001
-#define OLCUC	0000002
-#define ONLCR	0000004
-#define OCRNL	0000010
-#define ONOCR	0000020
-#define ONLRET	0000040
-#define OFILL	0000100
-#define OFDEL	0000200
-#define NLDLY	0000400
-#define   NL0	0000000
-#define   NL1	0000400
-#define CRDLY	0003000
-#define   CR0	0000000
-#define   CR1	0001000
-#define   CR2	0002000
-#define   CR3	0003000
-#define TABDLY	0014000
-#define   TAB0	0000000
-#define   TAB1	0004000
-#define   TAB2	0010000
-#define   TAB3	0014000
-#define   XTABS	0014000
-#define BSDLY	0020000
-#define   BS0	0000000
-#define   BS1	0020000
-#define VTDLY	0040000
-#define   VT0	0000000
-#define   VT1	0040000
-#define FFDLY	0100000
-#define   FF0	0000000
-#define   FF1	0100000
-
-/* c_cflag bit meaning */
-#define CBAUD	0010017
-#define  B0	0000000		/* hang up */
-#define  B50	0000001
-#define  B75	0000002
-#define  B110	0000003
-#define  B134	0000004
-#define  B150	0000005
-#define  B200	0000006
-#define  B300	0000007
-#define  B600	0000010
-#define  B1200	0000011
-#define  B1800	0000012
-#define  B2400	0000013
-#define  B4800	0000014
-#define  B9600	0000015
-#define  B19200	0000016
-#define  B38400	0000017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE	0000060
-#define   CS5	0000000
-#define   CS6	0000020
-#define   CS7	0000040
-#define   CS8	0000060
-#define CSTOPB	0000100
-#define CREAD	0000200
-#define PARENB	0000400
-#define PARODD	0001000
-#define HUPCL	0002000
-#define CLOCAL	0004000
-#define CBAUDEX 0010000
-#define    BOTHER 0010000
-#define    B57600 0010001
-#define   B115200 0010002
-#define   B230400 0010003
-#define   B460800 0010004
-#define   B500000 0010005
-#define   B576000 0010006
-#define   B921600 0010007
-#define  B1000000 0010010
-#define  B1152000 0010011
-#define  B1500000 0010012
-#define  B2000000 0010013
-#define  B2500000 0010014
-#define  B3000000 0010015
-#define  B3500000 0010016
-#define  B4000000 0010017
-#define CIBAUD	  002003600000		/* input baud rate */
-#define CMSPAR	  010000000000		/* mark or space (stick) parity */
-#define CRTSCTS	  020000000000		/* flow control */
-
-#define	IBSHIFT	16		/* Shifr from CBAUD to CIBAUD */
-
-/* c_lflag bits */
-#define ISIG	0000001
-#define ICANON	0000002
-#define XCASE	0000004
-#define ECHO	0000010
-#define ECHOE	0000020
-#define ECHOK	0000040
-#define ECHONL	0000100
-#define NOFLSH	0000200
-#define TOSTOP	0000400
-#define ECHOCTL	0001000
-#define ECHOPRT	0002000
-#define ECHOKE	0004000
-#define FLUSHO	0010000
-#define PENDIN	0040000
-#define IEXTEN	0100000
-
-
-/* tcflow() and TCXONC use these */
-#define	TCOOFF		0
-#define	TCOON		1
-#define	TCIOFF		2
-#define	TCION		3
-
-/* tcflush() and TCFLSH use these */
-#define	TCIFLUSH	0
-#define	TCOFLUSH	1
-#define	TCIOFLUSH	2
-
-/* tcsetattr uses these */
-#define	TCSANOW		0
-#define	TCSADRAIN	1
-#define	TCSAFLUSH	2
-
-#endif /* __V850_TERMBITS_H__ */
diff --git a/include/asm-v850/termios.h b/include/asm-v850/termios.h
deleted file mode 100644
index fcd171838d9c..000000000000
--- a/include/asm-v850/termios.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef __V850_TERMIOS_H__
-#define __V850_TERMIOS_H__
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE	0x001
-#define TIOCM_DTR	0x002
-#define TIOCM_RTS	0x004
-#define TIOCM_ST	0x008
-#define TIOCM_SR	0x010
-#define TIOCM_CTS	0x020
-#define TIOCM_CAR	0x040
-#define TIOCM_RNG	0x080
-#define TIOCM_DSR	0x100
-#define TIOCM_CD	TIOCM_CAR
-#define TIOCM_RI	TIOCM_RNG
-#define TIOCM_OUT1	0x2000
-#define TIOCM_OUT2	0x4000
-#define TIOCM_LOOP	0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-#ifdef __KERNEL__
-
-/*	intr=^C		quit=^\		erase=del	kill=^U
-	eof=^D		vtime=\0	vmin=\1		sxtc=\0
-	start=^Q	stop=^S		susp=^Z		eol=\0
-	reprint=^R	discard=^U	werase=^W	lnext=^V
-	eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-/*
- * Translate a "termio" structure into a "termios". Ugh.
- */
-#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
-	unsigned short __tmp; \
-	get_user(__tmp,&(termio)->x); \
-	*(unsigned short *) &(termios)->x = __tmp; \
-}
-
-#define user_termio_to_kernel_termios(termios, termio) \
-({ \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \
-	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \
-	copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
-})
-
-/*
- * Translate a "termios" structure into a "termio". Ugh.
- */
-#define kernel_termios_to_user_termio(termio, termios) \
-({ \
-	put_user((termios)->c_iflag, &(termio)->c_iflag); \
-	put_user((termios)->c_oflag, &(termio)->c_oflag); \
-	put_user((termios)->c_cflag, &(termio)->c_cflag); \
-	put_user((termios)->c_lflag, &(termio)->c_lflag); \
-	put_user((termios)->c_line,  &(termio)->c_line); \
-	copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
-})
-
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
-#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
-#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
-
-#endif	/* __KERNEL__ */
-
-#endif	/* __V850_TERMIOS_H__ */
diff --git a/include/asm-v850/thread_info.h b/include/asm-v850/thread_info.h
deleted file mode 100644
index 1a9e6ae0c5fd..000000000000
--- a/include/asm-v850/thread_info.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * include/asm-v850/thread_info.h -- v850 low-level thread information
- *
- *  Copyright (C) 2002  NEC Corporation
- *  Copyright (C) 2002  Miles Bader <miles@gnu.org>
- *  Copyright (C) 2002  David Howells (dhowells@redhat.com)
- *    - Incorporating suggestions made by Linus Torvalds and Dave Miller
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file was derived from the PPC version, include/asm-ppc/thread_info.h
- * which was adapted from the i386 version by Paul Mackerras
- */
-
-#ifndef __V850_THREAD_INFO_H__
-#define __V850_THREAD_INFO_H__
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-/*
- * low level task data.
- * If you change this, change the TI_* offsets below to match.
- */
-struct thread_info {
-	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
-	unsigned long		flags;		/* low level flags */
-	int			cpu;		/* cpu we're on */
-	int			preempt_count;	/* 0 => preemptable,
-						   <0 => BUG */
-	struct restart_block	restart_block;
-};
-
-#define INIT_THREAD_INFO(tsk)						      \
-{									      \
-	.task =		&tsk,						      \
-	.exec_domain =	&default_exec_domain,				      \
-	.flags =	0,						      \
-	.cpu =		0,						      \
-	.preempt_count = 1,						      \
-	.restart_block = {						      \
-		.fn = do_no_restart_syscall,				      \
-	},								      \
-}
-
-#define init_thread_info	(init_thread_union.thread_info)
-#define init_stack		(init_thread_union.stack)
-
-/*
- * macros/functions for gaining access to the thread information structure
- */
-
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-				__get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti)	free_pages((unsigned long) (ti), 1)
-
-#endif /* __ASSEMBLY__ */
-
-
-/*
- * Offsets in thread_info structure, used in assembly code
- */
-#define TI_TASK		0
-#define TI_EXECDOMAIN	4
-#define TI_FLAGS	8
-#define TI_CPU		12
-#define TI_PREEMPT	16
-
-#define PREEMPT_ACTIVE		0x4000000
-
-/*
- * thread information flag bit numbers
- */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		1	/* signal pending */
-#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_POLLING_NRFLAG	3	/* true if poll_idle() is polling
-					   TIF_NEED_RESCHED */
-#define TIF_MEMDIE		4
-
-/* as above, but as bit values */
-#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
-#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
-
-
-/* Size of kernel stack for each process.  */
-#define THREAD_SIZE		0x2000
-
-/* The alignment of kernel threads, with thread_info structures at their
-   base.  Thus, a pointer for a task's task structure can be derived from
-   its kernel stack pointer.  */
-#define THREAD_ALIGNMENT	THREAD_SIZE
-#define THREAD_MASK		(-THREAD_ALIGNMENT)
-
-
-#ifdef __ASSEMBLY__
-
-/* Put a pointer to the current thread_info structure into REG.  Note that
-   this definition requires THREAD_MASK to be representable as a signed
-   16-bit value.  */
-#define GET_CURRENT_THREAD(reg)						\
-        /* Use `addi' and then `and' instead of just `andi', because	\
-	   `addi' sign-extends the immediate value, whereas `andi'	\
-	   zero-extends it.  */						\
-	addi	THREAD_MASK, r0, reg;					\
-	and	sp, reg
-
-#else
-
-/* Return a pointer to the current thread_info structure.  */
-static inline struct thread_info *current_thread_info (void)
-{
-	register unsigned long sp __asm__ ("sp");
-	return (struct thread_info *)(sp & THREAD_MASK);
-}
-
-#endif /* __ASSEMBLY__ */
-
-
-#endif /* __KERNEL__ */
-
-#endif /* __V850_THREAD_INFO_H__ */
diff --git a/include/asm-v850/timex.h b/include/asm-v850/timex.h
deleted file mode 100644
index 6279e5a0ee8e..000000000000
--- a/include/asm-v850/timex.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * linux/include/asm-v850/timex.h
- *
- * v850 architecture timex specifications
- */
-#ifndef __V850_TIMEX_H__
-#define __V850_TIMEX_H__
-
-#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
-
-typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles(void)
-{
-	return 0;
-}
-
-#endif /* __V850_TIMEX_H__ */
diff --git a/include/asm-v850/tlb.h b/include/asm-v850/tlb.h
deleted file mode 100644
index 73bc9ead40dd..000000000000
--- a/include/asm-v850/tlb.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * include/asm-v850/tlb.h
- *
- *  Copyright (C) 2002  NEC Corporation
- *  Copyright (C) 2002  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_TLB_H__
-#define __V850_TLB_H__
-
-#define tlb_flush(tlb)	((void)0)
-
-#include <asm-generic/tlb.h>
-
-#endif /* __V850_TLB_H__ */
diff --git a/include/asm-v850/tlbflush.h b/include/asm-v850/tlbflush.h
deleted file mode 100644
index c44aa64449c8..000000000000
--- a/include/asm-v850/tlbflush.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * include/asm-v850/tlbflush.h
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_TLBFLUSH_H__
-#define __V850_TLBFLUSH_H__
-
-#include <asm/machdep.h>
-
-
-/*
- * flush all user-space atc entries.
- */
-static inline void __flush_tlb(void)
-{
-	BUG ();
-}
-
-static inline void __flush_tlb_one(unsigned long addr)
-{
-	BUG ();
-}
-
-#define flush_tlb() __flush_tlb()
-
-/*
- * flush all atc entries (both kernel and user-space entries).
- */
-static inline void flush_tlb_all(void)
-{
-	BUG ();
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	BUG ();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	BUG ();
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	BUG ();
-}
-
-static inline void flush_tlb_kernel_page(unsigned long addr)
-{
-	BUG ();
-}
-
-#endif /* __V850_TLBFLUSH_H__ */
diff --git a/include/asm-v850/topology.h b/include/asm-v850/topology.h
deleted file mode 100644
index 6040e41d7945..000000000000
--- a/include/asm-v850/topology.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __V850_TOPOLOGY_H__
-#define __V850_TOPOLOGY_H__
-
-#include <asm-generic/topology.h>
-
-#endif /* __V850_TOPOLOGY_H__ */
diff --git a/include/asm-v850/types.h b/include/asm-v850/types.h
deleted file mode 100644
index 89f735ee41dd..000000000000
--- a/include/asm-v850/types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __V850_TYPES_H__
-#define __V850_TYPES_H__
-
-#ifndef __ASSEMBLY__
-
-/*
- * This file is never included by application software unless
- * explicitly requested (e.g., via linux/types.h) in which case the
- * application is Linux specific so (user-) name space pollution is
- * not a major issue.  However, for interoperability, libraries still
- * need to be careful to avoid a name clashes.
- */
-#include <asm-generic/int-ll64.h>
-
-typedef unsigned short umode_t;
-
-#endif /* !__ASSEMBLY__ */
-
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-#ifdef __KERNEL__
-
-#define BITS_PER_LONG 32
-
-#ifndef __ASSEMBLY__
-
-/* Dma addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif /* __V850_TYPES_H__ */
diff --git a/include/asm-v850/uaccess.h b/include/asm-v850/uaccess.h
deleted file mode 100644
index 64563c409bb2..000000000000
--- a/include/asm-v850/uaccess.h
+++ /dev/null
@@ -1,159 +0,0 @@
-#ifndef __V850_UACCESS_H__
-#define __V850_UACCESS_H__
-
-/*
- * User space memory access functions
- */
-
-#include <linux/errno.h>
-#include <linux/string.h>
-
-#include <asm/segment.h>
-#include <asm/machdep.h>
-
-#define VERIFY_READ	0
-#define VERIFY_WRITE	1
-
-static inline int access_ok (int type, const void *addr, unsigned long size)
-{
-	/* XXX I guess we should check against real ram bounds at least, and
-	   possibly make sure ADDR is not within the kernel.
-	   For now we just check to make sure it's not a small positive
-	   or negative value, as that will at least catch some kinds of
-	   error.  In particular, we make sure that ADDR's not within the
-	   interrupt vector area, which we know starts at zero, or within the
-	   peripheral-I/O area, which is located just _before_ zero.  */
-	unsigned long val = (unsigned long)addr;
-	return val >= (0x80 + NUM_CPU_IRQS*16) && val < 0xFFFFF000;
-}
-
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path.  This means when everything is well,
- * we don't even have to jump over them.  Further, they do not intrude
- * on our cache or tlb entries.
- */
-
-struct exception_table_entry
-{
-	unsigned long insn, fixup;
-};
-
-/* Returns 0 if exception not found and fixup otherwise.  */
-extern unsigned long search_exception_table (unsigned long);
-
-
-/*
- * These are the main single-value transfer routines.  They automatically
- * use the right size if we just have the right pointer type.
- */
-
-extern int bad_user_access_length (void);
-
-#define __get_user(var, ptr)						      \
-  ({									      \
-	  int __gu_err = 0;						      \
-	  typeof(*(ptr)) __gu_val = 0;					      \
-	  switch (sizeof (*(ptr))) {					      \
-	  case 1:							      \
-	  case 2:							      \
-	  case 4:							      \
-		  __gu_val = *(ptr);					      \
-		  break;						      \
-	  case 8:							      \
-		  memcpy(&__gu_val, ptr, sizeof(__gu_val));		      \
-		  break;						      \
-	  default:							      \
-		  __gu_val = 0;						      \
-		  __gu_err = __get_user_bad ();				      \
-		  break;						      \
-	  }								      \
-	  (var) = __gu_val;						      \
-	  __gu_err;							      \
-  })
-#define __get_user_bad()	(bad_user_access_length (), (-EFAULT))
-
-#define __put_user(var, ptr)						      \
-  ({									      \
-	  int __pu_err = 0;						      \
-	  switch (sizeof (*(ptr))) {					      \
-	  case 1:							      \
-	  case 2:							      \
-	  case 4:							      \
-		  *(ptr) = (var);					      \
-		  break;						      \
-	  case 8: {							      \
-	  	  typeof(*(ptr)) __pu_val = 0;				      \
-		  memcpy(ptr, &__pu_val, sizeof(__pu_val));		      \
-		  }							      \
-		  break;						      \
-	  default:							      \
-		  __pu_err = __put_user_bad ();				      \
-		  break;						      \
-	  }								      \
-	  __pu_err;							      \
-  })
-#define __put_user_bad()	(bad_user_access_length (), (-EFAULT))
-
-#define put_user(x, ptr)	__put_user(x, ptr)
-#define get_user(x, ptr)	__get_user(x, ptr)
-
-#define __copy_from_user(to, from, n)	(memcpy (to, from, n), 0)
-#define __copy_to_user(to, from, n)	(memcpy(to, from, n), 0)
-
-#define __copy_to_user_inatomic __copy_to_user
-#define __copy_from_user_inatomic __copy_from_user
-
-#define copy_from_user(to, from, n)	__copy_from_user (to, from, n)
-#define copy_to_user(to, from, n) 	__copy_to_user(to, from, n)
-
-#define copy_to_user_ret(to,from,n,retval) \
-  ({ if (copy_to_user (to,from,n)) return retval; })
-
-#define copy_from_user_ret(to,from,n,retval) \
-  ({ if (copy_from_user (to,from,n)) return retval; })
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-static inline long
-strncpy_from_user (char *dst, const char *src, long count)
-{
-	char *tmp;
-	strncpy (dst, src, count);
-	for (tmp = dst; *tmp && count > 0; tmp++, count--)
-		;
-	return tmp - dst;
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-static inline long strnlen_user (const char *src, long n)
-{
-	return strlen (src) + 1;
-}
-
-#define strlen_user(str)	strnlen_user (str, 32767)
-
-/*
- * Zero Userspace
- */
-
-static inline unsigned long
-clear_user (void *to, unsigned long n)
-{
-	memset (to, 0, n);
-	return 0;
-}
-
-#endif /* __V850_UACCESS_H__ */
diff --git a/include/asm-v850/ucontext.h b/include/asm-v850/ucontext.h
deleted file mode 100644
index 303c21590cff..000000000000
--- a/include/asm-v850/ucontext.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __V850_UCONTEXT_H__
-#define __V850_UCONTEXT_H__
-
-#include <asm/sigcontext.h>
-
-struct ucontext {
-	unsigned long	  uc_flags;
-	struct ucontext  *uc_link;
-	stack_t		  uc_stack;
-	struct sigcontext uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif /* __V850_UCONTEXT_H__ */
diff --git a/include/asm-v850/unaligned.h b/include/asm-v850/unaligned.h
deleted file mode 100644
index 53122b28491e..000000000000
--- a/include/asm-v850/unaligned.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Note that some v850 chips support unaligned access, but it seems too
- * annoying to use.
- */
-#ifndef _ASM_V850_UNALIGNED_H
-#define _ASM_V850_UNALIGNED_H
-
-#include <linux/unaligned/be_byteshift.h>
-#include <linux/unaligned/le_byteshift.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned	__get_unaligned_le
-#define put_unaligned	__put_unaligned_le
-
-#endif /* _ASM_V850_UNALIGNED_H */
diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h
deleted file mode 100644
index 2241ed45ecfe..000000000000
--- a/include/asm-v850/unistd.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * include/asm-v850/unistd.h -- System call numbers and invocation mechanism
- *
- *  Copyright (C) 2001,02,03,04  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03,04  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_UNISTD_H__
-#define __V850_UNISTD_H__
-
-#define __NR_restart_syscall	  0
-#define __NR_exit		  1
-#define __NR_fork		  2
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_open		  5
-#define __NR_close		  6
-#define __NR_waitpid		  7
-#define __NR_creat		  8
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_execve		 11
-#define __NR_chdir		 12
-#define __NR_time		 13
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-#define __NR_chown		 16
-#define __NR_break		 17
-#define __NR_lseek		 19
-#define __NR_getpid		 20
-#define __NR_mount		 21
-#define __NR_umount		 22
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_ptrace		 26
-#define __NR_alarm		 27
-#define __NR_pause		 29
-#define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
-#define __NR_access		 33
-#define __NR_nice		 34
-#define __NR_ftime		 35
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_rename		 38
-#define __NR_mkdir		 39
-#define __NR_rmdir		 40
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_times		 43
-#define __NR_prof		 44
-#define __NR_brk		 45
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_signal		 48
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_acct		 51
-#define __NR_umount2		 52
-#define __NR_lock		 53
-#define __NR_ioctl		 54
-#define __NR_fcntl		 55
-#define __NR_setpgid		 57
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_ustat		 62
-#define __NR_dup2		 63
-#define __NR_getppid		 64
-#define __NR_getpgrp		 65
-#define __NR_setsid		 66
-#define __NR_sigaction		 67
-#define __NR_sgetmask		 68
-#define __NR_ssetmask		 69
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_sigsuspend		 72
-#define __NR_sigpending		 73
-#define __NR_sethostname	 74
-#define __NR_setrlimit		 75
-#define __NR_ugetrlimit	 	 76
-#define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_select		 82
-#define __NR_symlink		 83
-#define __NR_readlink		 85
-#define __NR_uselib		 86
-#define __NR_swapon		 87
-#define __NR_reboot		 88
-#define __NR_readdir		 89
-#define __NR_mmap		 90
-#define __NR_munmap		 91
-#define __NR_truncate		 92
-#define __NR_ftruncate		 93
-#define __NR_fchmod		 94
-#define __NR_fchown		 95
-#define __NR_getpriority	 96
-#define __NR_setpriority	 97
-#define __NR_profil		 98
-#define __NR_statfs		 99
-#define __NR_fstatfs		100
-#define __NR_socketcall		102
-#define __NR_syslog		103
-#define __NR_setitimer		104
-#define __NR_getitimer		105
-#define __NR_stat		106
-#define __NR_lstat		107
-#define __NR_fstat		108
-#define __NR_vhangup		111
-#define __NR_wait4		114
-#define __NR_swapoff		115
-#define __NR_sysinfo		116
-#define __NR_ipc		117
-#define __NR_fsync		118
-#define __NR_sigreturn		119
-#define __NR_clone		120
-#define __NR_setdomainname	121
-#define __NR_uname		122
-#define __NR_cacheflush		123
-#define __NR_adjtimex		124
-#define __NR_mprotect		125
-#define __NR_sigprocmask	126
-#define __NR_create_module	127
-#define __NR_init_module	128
-#define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
-#define __NR_quotactl		131
-#define __NR_getpgid		132
-#define __NR_fchdir		133
-#define __NR_bdflush		134
-#define __NR_sysfs		135
-#define __NR_personality	136
-#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR_getdents		141
-#define __NR_flock		143
-#define __NR_msync		144
-#define __NR_readv		145
-#define __NR_writev		146
-#define __NR_getsid		147
-#define __NR_fdatasync		148
-#define __NR__sysctl		149
-#define __NR_mlock		150
-#define __NR_munlock		151
-#define __NR_mlockall		152
-#define __NR_munlockall		153
-#define __NR_sched_setparam		154
-#define __NR_sched_getparam		155
-#define __NR_sched_setscheduler		156
-#define __NR_sched_getscheduler		157
-#define __NR_sched_yield		158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep		162
-#define __NR_mremap		163
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-#define __NR_query_module	167
-#define __NR_poll		168
-#define __NR_nfsservctl		169
-#define __NR_setresgid		170
-#define __NR_getresgid		171
-#define __NR_prctl		172
-#define __NR_rt_sigreturn	173
-#define __NR_rt_sigaction	174
-#define __NR_rt_sigprocmask	175
-#define __NR_rt_sigpending	176
-#define __NR_rt_sigtimedwait	177
-#define __NR_rt_sigqueueinfo	178
-#define __NR_rt_sigsuspend	179
-#define __NR_pread		180
-#define __NR_pwrite		181
-#define __NR_lchown		182
-#define __NR_getcwd		183
-#define __NR_capget		184
-#define __NR_capset		185
-#define __NR_sigaltstack	186
-#define __NR_sendfile		187
-#define __NR_getpmsg		188	/* some people actually want streams */
-#define __NR_putpmsg		189	/* some people actually want streams */
-#define __NR_vfork		190
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_fcntl64		198
-#define __NR_getdents64		199
-#define __NR_pivot_root		200
-#define __NR_gettid		201
-#define __NR_tkill		202
-
-#ifdef __KERNEL__
-
-#define __ARCH_WANT_IPC_PARSE_VERSION
-#define __ARCH_WANT_OLD_READDIR
-#define __ARCH_WANT_STAT64
-#define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_GETHOSTNAME
-#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
-#define __ARCH_WANT_SYS_SIGNAL
-#define __ARCH_WANT_SYS_TIME
-#define __ARCH_WANT_SYS_UTIME
-#define __ARCH_WANT_SYS_WAITPID
-#define __ARCH_WANT_SYS_SOCKETCALL
-#define __ARCH_WANT_SYS_FADVISE64
-#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
-#define __ARCH_WANT_SYS_NICE
-#define __ARCH_WANT_SYS_OLDUMOUNT
-#define __ARCH_WANT_SYS_SIGPENDING
-#define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-
-/*
- * "Conditional" syscalls
- */
-#define cond_syscall(name)						      \
-  asm (".weak\t" C_SYMBOL_STRING(name) ";"				      \
-       ".set\t" C_SYMBOL_STRING(name) "," C_SYMBOL_STRING(sys_ni_syscall))
-#if 0
-/* This doesn't work if there's a function prototype for NAME visible,
-   because the argument types probably won't match.  */
-#define cond_syscall(name)  \
-  void name (void) __attribute__ ((weak, alias ("sys_ni_syscall")));
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* __V850_UNISTD_H__ */
diff --git a/include/asm-v850/user.h b/include/asm-v850/user.h
deleted file mode 100644
index 63cdc567d272..000000000000
--- a/include/asm-v850/user.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef __V850_USER_H__
-#define __V850_USER_H__
-
-/* Adapted from <asm-ppc/user.h>.  */
-
-#include <linux/ptrace.h>
-#include <asm/page.h>
-
-/*
- * Core file format: The core file is written in such a way that gdb
- * can understand it and provide useful information to the user (under
- * linux we use the `trad-core' bfd, NOT the osf-core).  The file contents
- * are as follows:
- *
- *  upage: 1 page consisting of a user struct that tells gdb
- *	what is present in the file.  Directly after this is a
- *	copy of the task_struct, which is currently not used by gdb,
- *	but it may come in handy at some point.  All of the registers
- *	are stored as part of the upage.  The upage should always be
- *	only one page long.
- *  data: The data segment follows next.  We use current->end_text to
- *	current->brk to pick up all of the user variables, plus any memory
- *	that may have been sbrk'ed.  No attempt is made to determine if a
- *	page is demand-zero or if a page is totally unused, we just cover
- *	the entire range.  All of the addresses are rounded in such a way
- *	that an integral number of pages is written.
- *  stack: We need the stack information in order to get a meaningful
- *	backtrace.  We need to write the data from usp to
- *	current->start_stack, so we round each of these in order to be able
- *	to write an integer number of pages.
- */
-struct user {
-	struct pt_regs	regs;			/* entire machine state */
-	size_t		u_tsize;		/* text size (pages) */
-	size_t		u_dsize;		/* data size (pages) */
-	size_t		u_ssize;		/* stack size (pages) */
-	unsigned long	start_code;		/* text starting address */
-	unsigned long	start_data;		/* data starting address */
-	unsigned long	start_stack;		/* stack starting address */
-	long int	signal;			/* signal causing core dump */
-	unsigned long	u_ar0;			/* help gdb find registers */
-	unsigned long	magic;			/* identifies a core file */
-	char		u_comm[32];		/* user command name */
-};
-
-#define NBPG			PAGE_SIZE
-#define UPAGES			1
-#define HOST_TEXT_START_ADDR	(u.start_code)
-#define HOST_DATA_START_ADDR	(u.start_data)
-#define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
-
-#endif /* __V850_USER_H__ */
diff --git a/include/asm-v850/v850e.h b/include/asm-v850/v850e.h
deleted file mode 100644
index 5a222eb5117f..000000000000
--- a/include/asm-v850/v850e.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * include/asm-v850/v850e.h -- V850E CPU
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E_H__
-#define __V850_V850E_H__
-
-#include <asm/v850e_intc.h>
-
-#define CPU_ARCH "v850e"
-
-#endif /* __V850_V850E_H__ */
diff --git a/include/asm-v850/v850e2.h b/include/asm-v850/v850e2.h
deleted file mode 100644
index 48680408ab7e..000000000000
--- a/include/asm-v850/v850e2.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * include/asm-v850/v850e2.h -- Machine-dependent defs for V850E2 CPUs
- *
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E2_H__
-#define __V850_V850E2_H__
-
-#include <asm/v850e_intc.h>	/* v850e-style interrupt system.  */
-
-
-#define CPU_ARCH "v850e2"
-
-
-/* Control registers.  */
-
-/* Chip area select control */ 
-#define V850E2_CSC_ADDR(n)	(0xFFFFF060 + (n) * 2)
-#define V850E2_CSC(n)		(*(volatile u16 *)V850E2_CSC_ADDR(n))
-/* I/O area select control */
-#define V850E2_BPC_ADDR		0xFFFFF064
-#define V850E2_BPC		(*(volatile u16 *)V850E2_BPC_ADDR)
-/* Bus size configuration */
-#define V850E2_BSC_ADDR		0xFFFFF066
-#define V850E2_BSC		(*(volatile u16 *)V850E2_BSC_ADDR)
-/* Endian configuration */
-#define V850E2_BEC_ADDR		0xFFFFF068
-#define V850E2_BEC		(*(volatile u16 *)V850E2_BEC_ADDR)
-/* Cache configuration */
-#define V850E2_BHC_ADDR		0xFFFFF06A
-#define V850E2_BHC		(*(volatile u16 *)V850E2_BHC_ADDR)
-/* NPB strobe-wait configuration */
-#define V850E2_VSWC_ADDR	0xFFFFF06E
-#define V850E2_VSWC		(*(volatile u16 *)V850E2_VSWC_ADDR)
-/* Bus cycle type */
-#define V850E2_BCT_ADDR(n)	(0xFFFFF480 + (n) * 2)
-#define V850E2_BCT(n)		(*(volatile u16 *)V850E2_BCT_ADDR(n))
-/* Data wait control */
-#define V850E2_DWC_ADDR(n)	(0xFFFFF484 + (n) * 2)
-#define V850E2_DWC(n)		(*(volatile u16 *)V850E2_DWC_ADDR(n))
-/* Bus cycle control */
-#define V850E2_BCC_ADDR		0xFFFFF488
-#define V850E2_BCC		(*(volatile u16 *)V850E2_BCC_ADDR)
-/* Address wait control */
-#define V850E2_ASC_ADDR		0xFFFFF48A
-#define V850E2_ASC		(*(volatile u16 *)V850E2_ASC_ADDR)
-/* Local bus sizing control */
-#define V850E2_LBS_ADDR		0xFFFFF48E
-#define V850E2_LBS		(*(volatile u16 *)V850E2_LBS_ADDR)
-/* Line buffer control */
-#define V850E2_LBC_ADDR(n)	(0xFFFFF490 + (n) * 2)
-#define V850E2_LBC(n)		(*(volatile u16 *)V850E2_LBC_ADDR(n))
-/* SDRAM configuration */
-#define V850E2_SCR_ADDR(n)	(0xFFFFF4A0 + (n) * 4)
-#define V850E2_SCR(n)		(*(volatile u16 *)V850E2_SCR_ADDR(n))
-/* SDRAM refresh cycle control */
-#define V850E2_RFS_ADDR(n)	(0xFFFFF4A2 + (n) * 4)
-#define V850E2_RFS(n)		(*(volatile u16 *)V850E2_RFS_ADDR(n))
-
-
-#endif /* __V850_V850E2_H__ */
diff --git a/include/asm-v850/v850e2_cache.h b/include/asm-v850/v850e2_cache.h
deleted file mode 100644
index 87edf0d311d5..000000000000
--- a/include/asm-v850/v850e2_cache.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * include/asm-v850/v850e2_cache_cache.h -- Cache control for V850E2
- * 	cache memories
- *
- *  Copyright (C) 2003,05  NEC Electronics Corporation
- *  Copyright (C) 2003,05  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E2_CACHE_H__
-#define __V850_V850E2_CACHE_H__
-
-#include <asm/types.h>
-
-
-/* Cache control registers.  */
-
-/* Bus Transaction Control */
-#define V850E2_CACHE_BTSC_ADDR	0xFFFFF070
-#define V850E2_CACHE_BTSC 	(*(volatile u16 *)V850E2_CACHE_BTSC_ADDR)
-#define V850E2_CACHE_BTSC_ICM	0x0001 /* icache enable */
-#define V850E2_CACHE_BTSC_DCM0	0x0004 /* dcache enable, bit 0 */
-#define V850E2_CACHE_BTSC_DCM1	0x0008 /* dcache enable, bit 1 */
-#define V850E2_CACHE_BTSC_DCM_WT		      /* write-through */ \
-			V850E2_CACHE_BTSC_DCM0
-#ifdef CONFIG_V850E2_V850E2S
-# define V850E2_CACHE_BTSC_DCM_WB_NO_ALLOC    /* write-back, non-alloc */ \
-			V850E2_CACHE_BTSC_DCM1	
-# define V850E2_CACHE_BTSC_DCM_WB_ALLOC	      /* write-back, non-alloc */ \
-			(V850E2_CACHE_BTSC_DCM1 | V850E2_CACHE_BTSC_DCM0)
-# define V850E2_CACHE_BTSC_ISEQ	0x0010 /* icache `address sequence mode' */
-# define V850E2_CACHE_BTSC_DSEQ	0x0020 /* dcache `address sequence mode' */
-# define V850E2_CACHE_BTSC_IRFC	0x0030
-# define V850E2_CACHE_BTSC_ILCD	0x4000
-# define V850E2_CACHE_BTSC_VABE	0x8000
-#endif /* CONFIG_V850E2_V850E2S */
-
-/* Cache operation start address register (low-bits).  */
-#define V850E2_CACHE_CADL_ADDR	0xFFFFF074
-#define V850E2_CACHE_CADL 	(*(volatile u16 *)V850E2_CACHE_CADL_ADDR)
-/* Cache operation start address register (high-bits).  */
-#define V850E2_CACHE_CADH_ADDR	0xFFFFF076
-#define V850E2_CACHE_CADH 	(*(volatile u16 *)V850E2_CACHE_CADH_ADDR)
-/* Cache operation count register.  */
-#define V850E2_CACHE_CCNT_ADDR	0xFFFFF078
-#define V850E2_CACHE_CCNT 	(*(volatile u16 *)V850E2_CACHE_CCNT_ADDR)
-/* Cache operation specification register.  */
-#define V850E2_CACHE_COPR_ADDR	0xFFFFF07A
-#define V850E2_CACHE_COPR 	(*(volatile u16 *)V850E2_CACHE_COPR_ADDR)
-#define V850E2_CACHE_COPR_STRT	0x0001 /* start cache operation */
-#define V850E2_CACHE_COPR_LBSL	0x0100 /* 0 = icache, 1 = dcache */
-#define V850E2_CACHE_COPR_WSLE	0x0200 /* operate on cache way */
-#define V850E2_CACHE_COPR_WSL(way) ((way) * 0x0400) /* way select */
-#define V850E2_CACHE_COPR_CFC(op)  ((op)  * 0x1000) /* cache function code */
-
-
-/* Size of a cache line in bytes.  */
-#define V850E2_CACHE_LINE_SIZE_BITS	4
-#define V850E2_CACHE_LINE_SIZE		(1 << V850E2_CACHE_LINE_SIZE_BITS)
-
-/* The size of each cache `way' in lines.  */
-#define V850E2_CACHE_WAY_SIZE		256
-
-
-/* For <asm/cache.h> */
-#define L1_CACHE_BYTES			V850E2_CACHE_LINE_SIZE
-#define L1_CACHE_SHIFT			V850E2_CACHE_LINE_SIZE_BITS
-
-
-#endif /* __V850_V850E2_CACHE_H__ */
diff --git a/include/asm-v850/v850e_cache.h b/include/asm-v850/v850e_cache.h
deleted file mode 100644
index aa7d7eb9da50..000000000000
--- a/include/asm-v850/v850e_cache.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * include/asm-v850/v850e_cache.h -- Cache control for V850E cache memories
- *
- *  Copyright (C) 2001,03  NEC Electronics Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* This file implements cache control for the rather simple cache used on
-   some V850E CPUs, specifically the NB85E/TEG CPU-core and the V850E/ME2
-   CPU.  V850E2 processors have their own (better) cache
-   implementation.  */
-
-#ifndef __V850_V850E_CACHE_H__
-#define __V850_V850E_CACHE_H__
-
-#include <asm/types.h>
-
-
-/* Cache control registers.  */
-#define V850E_CACHE_BHC_ADDR	0xFFFFF06A
-#define V850E_CACHE_BHC		(*(volatile u16 *)V850E_CACHE_BHC_ADDR)
-#define V850E_CACHE_ICC_ADDR	0xFFFFF070
-#define V850E_CACHE_ICC		(*(volatile u16 *)V850E_CACHE_ICC_ADDR)
-#define V850E_CACHE_ISI_ADDR	0xFFFFF072
-#define V850E_CACHE_ISI		(*(volatile u16 *)V850E_CACHE_ISI_ADDR)
-#define V850E_CACHE_DCC_ADDR	0xFFFFF078
-#define V850E_CACHE_DCC		(*(volatile u16 *)V850E_CACHE_DCC_ADDR)
-
-/* Size of a cache line in bytes.  */
-#define V850E_CACHE_LINE_SIZE	16
-
-/* For <asm/cache.h> */
-#define L1_CACHE_BYTES		V850E_CACHE_LINE_SIZE
-
-
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
-/* Set caching params via the BHC, ICC, and DCC registers.  */
-void v850e_cache_enable (u16 bhc, u16 icc, u16 dcc);
-#endif /* __KERNEL__ && !__ASSEMBLY__ */
-
-
-#endif /* __V850_V850E_CACHE_H__ */
diff --git a/include/asm-v850/v850e_intc.h b/include/asm-v850/v850e_intc.h
deleted file mode 100644
index 6fdf95708317..000000000000
--- a/include/asm-v850/v850e_intc.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * include/asm-v850/v850e_intc.h -- V850E CPU interrupt controller (INTC)
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E_INTC_H__
-#define __V850_V850E_INTC_H__
-
-
-/* There are 4 16-bit `Interrupt Mask Registers' located contiguously
-   starting from this base.  Each interrupt uses a single bit to
-   indicated enabled/disabled status.  */
-#define V850E_INTC_IMR_BASE_ADDR  0xFFFFF100
-#define V850E_INTC_IMR_ADDR(irq)  (V850E_INTC_IMR_BASE_ADDR + ((irq) >> 3))
-#define V850E_INTC_IMR_BIT(irq)	  ((irq) & 0x7)
-
-/* Each maskable interrupt has a single-byte control register at this
-   address.  */
-#define V850E_INTC_IC_BASE_ADDR	  0xFFFFF110
-#define V850E_INTC_IC_ADDR(irq)	  (V850E_INTC_IC_BASE_ADDR + ((irq) << 1))
-#define V850E_INTC_IC(irq)	  (*(volatile u8 *)V850E_INTC_IC_ADDR(irq))
-/* Encode priority PR for storing in an interrupt control register.  */
-#define V850E_INTC_IC_PR(pr)	  (pr)
-/* Interrupt disable bit in an interrupt control register.  */
-#define V850E_INTC_IC_MK_BIT	  6
-#define V850E_INTC_IC_MK	  (1 << V850E_INTC_IC_MK_BIT)
-/* Interrupt pending flag in an interrupt control register.  */
-#define V850E_INTC_IC_IF_BIT	  7
-#define V850E_INTC_IC_IF	  (1 << V850E_INTC_IC_IF_BIT)
-
-/* The ISPR (In-service priority register) contains one bit for each interrupt
-   priority level, which is set to one when that level is currently being
-   serviced (and thus blocking any interrupts of equal or lesser level).  */
-#define V850E_INTC_ISPR_ADDR	  0xFFFFF1FA
-#define V850E_INTC_ISPR		  (*(volatile u8 *)V850E_INTC_ISPR_ADDR)
-
-
-#ifndef __ASSEMBLY__
-
-/* Enable interrupt handling for interrupt IRQ.  */
-static inline void v850e_intc_enable_irq (unsigned irq)
-{
-	__asm__ __volatile__ ("clr1 %0, [%1]"
-			      :: "r" (V850E_INTC_IMR_BIT (irq)),
-			         "r" (V850E_INTC_IMR_ADDR (irq))
-			      : "memory");
-}
-
-/* Disable interrupt handling for interrupt IRQ.  Note that any
-   interrupts received while disabled will be delivered once the
-   interrupt is enabled again, unless they are explicitly cleared using
-   `v850e_intc_clear_pending_irq'.  */
-static inline void v850e_intc_disable_irq (unsigned irq)
-{
-	__asm__ __volatile__ ("set1 %0, [%1]"
-			      :: "r" (V850E_INTC_IMR_BIT (irq)),
-			         "r" (V850E_INTC_IMR_ADDR (irq))
-			      : "memory");
-}
-
-/* Return true if interrupt handling for interrupt IRQ is enabled.  */
-static inline int v850e_intc_irq_enabled (unsigned irq)
-{
-	int rval;
-	__asm__ __volatile__ ("tst1 %1, [%2]; setf z, %0"
-			      : "=r" (rval)
-			      : "r" (V850E_INTC_IMR_BIT (irq)),
-			        "r" (V850E_INTC_IMR_ADDR (irq)));
-	return rval;
-}
-
-/* Disable irqs from 0 until LIMIT.  LIMIT must be a multiple of 8.  */
-static inline void _v850e_intc_disable_irqs (unsigned limit)
-{
-	unsigned long addr;
-	for (addr = V850E_INTC_IMR_BASE_ADDR; limit >= 8; addr++, limit -= 8)
-		*(char *)addr = 0xFF;
-}
-
-/* Disable all irqs.  This is purposely a macro, because NUM_MACH_IRQS
-   will be only be defined later.  */
-#define v850e_intc_disable_irqs()   _v850e_intc_disable_irqs (NUM_MACH_IRQS)
-
-/* Clear any pending interrupts for IRQ.  */
-static inline void v850e_intc_clear_pending_irq (unsigned irq)
-{
-	__asm__ __volatile__ ("clr1 %0, 0[%1]"
-			      :: "i" (V850E_INTC_IC_IF_BIT),
-			         "r" (V850E_INTC_IC_ADDR (irq))
-			      : "memory");
-}
-
-/* Return true if interrupt IRQ is pending (but disabled).  */
-static inline int v850e_intc_irq_pending (unsigned irq)
-{
-	int rval;
-	__asm__ __volatile__ ("tst1 %1, 0[%2]; setf nz, %0"
-			      : "=r" (rval)
-			      : "i" (V850E_INTC_IC_IF_BIT),
-			        "r" (V850E_INTC_IC_ADDR (irq)));
-	return rval;
-}
-
-
-struct v850e_intc_irq_init {
-	const char *name;	/* name of interrupt type */
-
-	/* Range of kernel irq numbers for this type:
-	   BASE, BASE+INTERVAL, ..., BASE+INTERVAL*NUM  */
-	unsigned base, num, interval;
-
-	unsigned priority;	/* interrupt priority to assign */
-};
-struct hw_interrupt_type;	/* fwd decl */
-
-/* Initialize HW_IRQ_TYPES for INTC-controlled irqs described in array
-   INITS (which is terminated by an entry with the name field == 0).  */
-extern void v850e_intc_init_irq_types (struct v850e_intc_irq_init *inits,
-				       struct hw_interrupt_type *hw_irq_types);
-
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_V850E_INTC_H__ */
diff --git a/include/asm-v850/v850e_timer_c.h b/include/asm-v850/v850e_timer_c.h
deleted file mode 100644
index f70575df6ea9..000000000000
--- a/include/asm-v850/v850e_timer_c.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * include/asm-v850/v850e_timer_c.h -- `Timer C' component often used
- *	with the V850E cpu core
- *
- *  Copyright (C) 2001,03  NEC Electronics Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* NOTE: this include file currently contains only enough to allow us to
-   use timer C as an interrupt pass-through.  */
-
-#ifndef __V850_V850E_TIMER_C_H__
-#define __V850_V850E_TIMER_C_H__
-
-#include <asm/types.h>
-#include <asm/machdep.h>	/* Pick up chip-specific defs.  */
-
-
-/* Timer C (16-bit interval timers).  */
-
-/* Control register 0 for timer C.  */
-#define V850E_TIMER_C_TMCC0_ADDR(n) (V850E_TIMER_C_BASE_ADDR + 0x6 + 0x10 *(n))
-#define V850E_TIMER_C_TMCC0(n)	  (*(volatile u8 *)V850E_TIMER_C_TMCC0_ADDR(n))
-#define V850E_TIMER_C_TMCC0_CAE	  0x01 /* clock action enable */
-#define V850E_TIMER_C_TMCC0_CE	  0x02 /* count enable */
-/* ... */
-
-/* Control register 1 for timer C.  */
-#define V850E_TIMER_C_TMCC1_ADDR(n) (V850E_TIMER_C_BASE_ADDR + 0x8 + 0x10 *(n))
-#define V850E_TIMER_C_TMCC1(n)	  (*(volatile u8 *)V850E_TIMER_C_TMCC1_ADDR(n))
-#define V850E_TIMER_C_TMCC1_CMS0  0x01 /* capture/compare mode select (ccc0) */
-#define V850E_TIMER_C_TMCC1_CMS1  0x02 /* capture/compare mode select (ccc1) */
-/* ... */
-
-/* Interrupt edge-sensitivity control for timer C.  */
-#define V850E_TIMER_C_SESC_ADDR(n) (V850E_TIMER_C_BASE_ADDR + 0x9 + 0x10 *(n))
-#define V850E_TIMER_C_SESC(n)	  (*(volatile u8 *)V850E_TIMER_C_SESC_ADDR(n))
-
-/* ...etc... */
-
-
-#endif /* __V850_V850E_TIMER_C_H__  */
diff --git a/include/asm-v850/v850e_timer_d.h b/include/asm-v850/v850e_timer_d.h
deleted file mode 100644
index 417612c5b22f..000000000000
--- a/include/asm-v850/v850e_timer_d.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * include/asm-v850/v850e_timer_d.h -- `Timer D' component often used
- *	with the V850E cpu core
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E_TIMER_D_H__
-#define __V850_V850E_TIMER_D_H__
-
-#include <asm/types.h>
-#include <asm/machdep.h>	/* Pick up chip-specific defs.  */
-
-
-/* Timer D (16-bit interval timers).  */
-
-/* Count registers for timer D.  */
-#define V850E_TIMER_D_TMD_ADDR(n) (V850E_TIMER_D_TMD_BASE_ADDR + 0x10 * (n))
-#define V850E_TIMER_D_TMD(n)	  (*(volatile u16 *)V850E_TIMER_D_TMD_ADDR(n))
-
-/* Count compare registers for timer D.  */
-#define V850E_TIMER_D_CMD_ADDR(n) (V850E_TIMER_D_CMD_BASE_ADDR + 0x10 * (n))
-#define V850E_TIMER_D_CMD(n)	  (*(volatile u16 *)V850E_TIMER_D_CMD_ADDR(n))
-
-/* Control registers for timer D.  */
-#define V850E_TIMER_D_TMCD_ADDR(n) (V850E_TIMER_D_TMCD_BASE_ADDR + 0x10 * (n))
-#define V850E_TIMER_D_TMCD(n)	   (*(volatile u8 *)V850E_TIMER_D_TMCD_ADDR(n))
-/* Control bits for timer D.  */
-#define V850E_TIMER_D_TMCD_CE  	   0x2 /* count enable */
-#define V850E_TIMER_D_TMCD_CAE	   0x1 /* clock action enable */
-/* Clock divider setting (log2).  */
-#define V850E_TIMER_D_TMCD_CS(divlog2) (((divlog2) - V850E_TIMER_D_TMCD_CS_MIN) << 4)
-/* Minimum clock divider setting (log2).  */
-#ifndef V850E_TIMER_D_TMCD_CS_MIN /* Can be overridden by mach-specific hdrs */
-#define V850E_TIMER_D_TMCD_CS_MIN  2 /* Default is correct for the v850e/ma1 */
-#endif
-/* Maximum clock divider setting (log2).  */
-#define V850E_TIMER_D_TMCD_CS_MAX  (V850E_TIMER_D_TMCD_CS_MIN + 7)
-
-/* Return the clock-divider (log2) of timer D unit N.  */
-#define V850E_TIMER_D_DIVLOG2(n) \
-  (((V850E_TIMER_D_TMCD(n) >> 4) & 0x7) + V850E_TIMER_D_TMCD_CS_MIN)
-
-
-#ifndef __ASSEMBLY__
-
-/* Start interval timer TIMER (0-3).  The timer will issue the
-   corresponding INTCMD interrupt RATE times per second.  This function
-   does not enable the interrupt.  */
-extern void v850e_timer_d_configure (unsigned timer, unsigned rate);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_V850E_TIMER_D_H__  */
diff --git a/include/asm-v850/v850e_uart.h b/include/asm-v850/v850e_uart.h
deleted file mode 100644
index 5182fb4cc989..000000000000
--- a/include/asm-v850/v850e_uart.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * include/asm-v850/v850e_uart.h -- common V850E on-chip UART driver
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* There's not actually a single UART implementation used by V850E CPUs,
-   but rather a series of implementations that are all `close' to one
-   another.  This file corresponds to the single driver which handles all
-   of them.  */
-
-#ifndef __V850_V850E_UART_H__
-#define __V850_V850E_UART_H__
-
-#include <linux/termios.h>
-
-#include <asm/v850e_utils.h>
-#include <asm/types.h>
-#include <asm/machdep.h>	/* Pick up chip-specific defs.  */
-
-
-/* Include model-specific definitions.  */
-#ifdef CONFIG_V850E_UART
-# ifdef CONFIG_V850E_UARTB
-#  include <asm-v850/v850e_uartb.h>
-# else
-#  include <asm-v850/v850e_uarta.h> /* original V850E UART */
-# endif
-#endif
-
-
-/* Optional capabilities some hardware provides.  */
-
-/* This UART doesn't implement RTS/CTS by default, but some platforms
-   implement them externally, so check to see if <asm/machdep.h> defined
-   anything.  */
-#ifdef V850E_UART_CTS
-#define v850e_uart_cts(n)		V850E_UART_CTS(n)
-#else
-#define v850e_uart_cts(n)		(1)
-#endif
-
-/* Do the same for RTS.  */
-#ifdef V850E_UART_SET_RTS
-#define v850e_uart_set_rts(n,v)		V850E_UART_SET_RTS(n,v)
-#else
-#define v850e_uart_set_rts(n,v)		((void)0)
-#endif
-
-
-/* This is the serial channel to use for the boot console (if desired).  */
-#ifndef V850E_UART_CONSOLE_CHANNEL
-# define V850E_UART_CONSOLE_CHANNEL 0
-#endif
-
-
-#ifndef __ASSEMBLY__
-
-/* Setup a console using channel 0 of the builtin uart.  */
-extern void v850e_uart_cons_init (unsigned chan);
-
-/* Configure and turn on uart channel CHAN, using the termios `control
-   modes' bits in CFLAGS, and a baud-rate of BAUD.  */
-void v850e_uart_configure (unsigned chan, unsigned cflags, unsigned baud);
-
-#endif /* !__ASSEMBLY__ */
-
-
-#endif /* __V850_V850E_UART_H__ */
diff --git a/include/asm-v850/v850e_uarta.h b/include/asm-v850/v850e_uarta.h
deleted file mode 100644
index e483e0950725..000000000000
--- a/include/asm-v850/v850e_uarta.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * include/asm-v850/v850e_uarta.h -- original V850E on-chip UART
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* This is the original V850E UART implementation is called just `UART' in
-   the docs, but we name this header file <asm/v850e_uarta.h> because the
-   name <asm/v850e_uart.h> is used for the common driver that handles both
-   `UART' and `UARTB' implementations.  */
-
-#ifndef __V850_V850E_UARTA_H__
-#define __V850_V850E_UARTA_H__
-
-
-/* Raw hardware interface.  */
-
-/* The base address of the UART control registers for channel N.
-   The default is the address used on the V850E/MA1.  */
-#ifndef V850E_UART_BASE_ADDR
-#define V850E_UART_BASE_ADDR(n)		(0xFFFFFA00 + 0x10 * (n))
-#endif 
-
-/* Addresses of specific UART control registers for channel N.
-   The defaults are the addresses used on the V850E/MA1; if a platform
-   wants to redefine any of these, it must redefine them all.  */
-#ifndef V850E_UART_ASIM_ADDR
-#define V850E_UART_ASIM_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x0)
-#define V850E_UART_RXB_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x2)
-#define V850E_UART_ASIS_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x3)
-#define V850E_UART_TXB_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x4)
-#define V850E_UART_ASIF_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x5)
-#define V850E_UART_CKSR_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x6)
-#define V850E_UART_BRGC_ADDR(n)		(V850E_UART_BASE_ADDR(n) + 0x7)
-#endif
-
-/* UART config registers.  */
-#define V850E_UART_ASIM(n)	(*(volatile u8 *)V850E_UART_ASIM_ADDR(n))
-/* Control bits for config registers.  */
-#define V850E_UART_ASIM_CAE	0x80 /* clock enable */
-#define V850E_UART_ASIM_TXE	0x40 /* transmit enable */
-#define V850E_UART_ASIM_RXE	0x20 /* receive enable */
-#define V850E_UART_ASIM_PS_MASK	0x18 /* mask covering parity-select bits */
-#define V850E_UART_ASIM_PS_NONE	0x00 /* no parity */
-#define V850E_UART_ASIM_PS_ZERO	0x08 /* zero parity */
-#define V850E_UART_ASIM_PS_ODD	0x10 /* odd parity */
-#define V850E_UART_ASIM_PS_EVEN	0x18 /* even parity */
-#define V850E_UART_ASIM_CL_8	0x04 /* char len is 8 bits (otherwise, 7) */
-#define V850E_UART_ASIM_SL_2	0x02 /* 2 stop bits (otherwise, 1) */
-#define V850E_UART_ASIM_ISRM	0x01 /* generate INTSR interrupt on errors
-					(otherwise, generate INTSER) */
-
-/* UART serial interface status registers.  */
-#define V850E_UART_ASIS(n)	(*(volatile u8 *)V850E_UART_ASIS_ADDR(n))
-/* Control bits for status registers.  */
-#define V850E_UART_ASIS_PE	0x04 /* parity error */
-#define V850E_UART_ASIS_FE	0x02 /* framing error */
-#define V850E_UART_ASIS_OVE	0x01 /* overrun error */
-
-/* UART serial interface transmission status registers.  */
-#define V850E_UART_ASIF(n)	(*(volatile u8 *)V850E_UART_ASIF_ADDR(n))
-#define V850E_UART_ASIF_TXBF	0x02 /* transmit buffer flag (data in TXB) */
-#define V850E_UART_ASIF_TXSF	0x01 /* transmit shift flag (sending data) */
-
-/* UART receive buffer register.  */
-#define V850E_UART_RXB(n)	(*(volatile u8 *)V850E_UART_RXB_ADDR(n))
-
-/* UART transmit buffer register.  */
-#define V850E_UART_TXB(n)	(*(volatile u8 *)V850E_UART_TXB_ADDR(n))
-
-/* UART baud-rate generator control registers.  */
-#define V850E_UART_CKSR(n)	(*(volatile u8 *)V850E_UART_CKSR_ADDR(n))
-#define V850E_UART_CKSR_MAX	11
-#define V850E_UART_BRGC(n)	(*(volatile u8 *)V850E_UART_BRGC_ADDR(n))
-#define V850E_UART_BRGC_MIN	8
-
-
-#ifndef V850E_UART_CKSR_MAX_FREQ
-#define V850E_UART_CKSR_MAX_FREQ (25*1000*1000)
-#endif
-
-/* Calculate the minimum value for CKSR on this processor.  */
-static inline unsigned v850e_uart_cksr_min (void)
-{
-	int min = 0;
-	unsigned freq = V850E_UART_BASE_FREQ;
-	while (freq > V850E_UART_CKSR_MAX_FREQ) {
-		freq >>= 1;
-		min++;
-	}
-	return min;
-}
-
-
-/* Slightly abstract interface used by driver.  */
-
-
-/* Interrupts used by the UART.  */
-
-/* Received when the most recently transmitted character has been sent.  */
-#define V850E_UART_TX_IRQ(chan)		IRQ_INTST (chan)
-/* Received when a new character has been received.  */
-#define V850E_UART_RX_IRQ(chan)		IRQ_INTSR (chan)
-
-
-/* UART clock generator interface.  */
-
-/* This type encapsulates a particular uart frequency.  */
-typedef struct {
-	unsigned clk_divlog2;
-	unsigned brgen_count;
-} v850e_uart_speed_t;
-
-/* Calculate a uart speed from BAUD for this uart.  */
-static inline v850e_uart_speed_t v850e_uart_calc_speed (unsigned baud)
-{
-	v850e_uart_speed_t speed;
-
-	/* Calculate the log2 clock divider and baud-rate counter values
-	   (note that the UART divides the resulting clock by 2, so
-	   multiply BAUD by 2 here to compensate).  */
-	calc_counter_params (V850E_UART_BASE_FREQ, baud * 2,
-			     v850e_uart_cksr_min(),
-			     V850E_UART_CKSR_MAX, 8/*bits*/,
-			     &speed.clk_divlog2, &speed.brgen_count);
-
-	return speed;
-}
-
-/* Return the current speed of uart channel CHAN.  */
-static inline v850e_uart_speed_t v850e_uart_speed (unsigned chan)
-{
-	v850e_uart_speed_t speed;
-	speed.clk_divlog2 = V850E_UART_CKSR (chan);
-	speed.brgen_count = V850E_UART_BRGC (chan);
-	return speed;
-}
-
-/* Set the current speed of uart channel CHAN.  */
-static inline void v850e_uart_set_speed(unsigned chan,v850e_uart_speed_t speed)
-{
-	V850E_UART_CKSR (chan) = speed.clk_divlog2;
-	V850E_UART_BRGC (chan) = speed.brgen_count;
-}
-
-static inline int
-v850e_uart_speed_eq (v850e_uart_speed_t speed1, v850e_uart_speed_t speed2)
-{
-	return speed1.clk_divlog2 == speed2.clk_divlog2
-		&& speed1.brgen_count == speed2.brgen_count;
-}
-
-/* Minimum baud rate possible.  */
-#define v850e_uart_min_baud() \
-   ((V850E_UART_BASE_FREQ >> V850E_UART_CKSR_MAX) / (2 * 255) + 1)
-
-/* Maximum baud rate possible.  The error is quite high at max, though.  */
-#define v850e_uart_max_baud() \
-   ((V850E_UART_BASE_FREQ >> v850e_uart_cksr_min()) / (2 *V850E_UART_BRGC_MIN))
-
-/* The `maximum' clock rate the uart can used, which is wanted (though not
-   really used in any useful way) by the serial framework.  */
-#define v850e_uart_max_clock() \
-   ((V850E_UART_BASE_FREQ >> v850e_uart_cksr_min()) / 2)
-
-
-/* UART configuration interface.  */
-
-/* Type of the uart config register; must be a scalar.  */
-typedef u16 v850e_uart_config_t;
-
-/* The uart hardware config register for channel CHAN.  */
-#define V850E_UART_CONFIG(chan)		V850E_UART_ASIM (chan)
-
-/* This config bit set if the uart is enabled.  */
-#define V850E_UART_CONFIG_ENABLED	V850E_UART_ASIM_CAE
-/* If the uart _isn't_ enabled, store this value to it to do so.  */
-#define V850E_UART_CONFIG_INIT		V850E_UART_ASIM_CAE
-/* Store this config value to disable the uart channel completely.  */
-#define V850E_UART_CONFIG_FINI		0
-
-/* Setting/clearing these bits enable/disable TX/RX, respectively (but
-   otherwise generally leave things running).  */
-#define V850E_UART_CONFIG_RX_ENABLE	V850E_UART_ASIM_RXE
-#define V850E_UART_CONFIG_TX_ENABLE	V850E_UART_ASIM_TXE
-
-/* These masks define which config bits affect TX/RX modes, respectively.  */
-#define V850E_UART_CONFIG_RX_BITS \
-  (V850E_UART_ASIM_PS_MASK | V850E_UART_ASIM_CL_8 | V850E_UART_ASIM_ISRM)
-#define V850E_UART_CONFIG_TX_BITS \
-  (V850E_UART_ASIM_PS_MASK | V850E_UART_ASIM_CL_8 | V850E_UART_ASIM_SL_2)
-
-static inline v850e_uart_config_t v850e_uart_calc_config (unsigned cflags)
-{
-	v850e_uart_config_t config = 0;
-
-	/* Figure out new configuration of control register.  */
-	if (cflags & CSTOPB)
-		/* Number of stop bits, 1 or 2.  */
-		config |= V850E_UART_ASIM_SL_2;
-	if ((cflags & CSIZE) == CS8)
-		/* Number of data bits, 7 or 8.  */
-		config |= V850E_UART_ASIM_CL_8;
-	if (! (cflags & PARENB))
-		/* No parity check/generation.  */
-		config |= V850E_UART_ASIM_PS_NONE;
-	else if (cflags & PARODD)
-		/* Odd parity check/generation.  */
-		config |= V850E_UART_ASIM_PS_ODD;
-	else
-		/* Even parity check/generation.  */
-		config |= V850E_UART_ASIM_PS_EVEN;
-	if (cflags & CREAD)
-		/* Reading enabled.  */
-		config |= V850E_UART_ASIM_RXE;
-
-	config |= V850E_UART_ASIM_CAE;
-	config |= V850E_UART_ASIM_TXE; /* Writing is always enabled.  */
-	config |= V850E_UART_ASIM_ISRM; /* Errors generate a read-irq.  */
-
-	return config;
-}
-
-/* This should delay as long as necessary for a recently written config
-   setting to settle, before we turn the uart back on.  */
-static inline void
-v850e_uart_config_delay (v850e_uart_config_t config, v850e_uart_speed_t speed)
-{
-	/* The UART may not be reset properly unless we wait at least 2
-	   `basic-clocks' until turning on the TXE/RXE bits again.
-	   A `basic clock' is the clock used by the baud-rate generator,
-	   i.e., the cpu clock divided by the 2^new_clk_divlog2.
-	   The loop takes 2 insns, so loop CYCLES / 2 times.  */
-	register unsigned count = 1 << speed.clk_divlog2;
-	while (--count != 0)
-		/* nothing */;
-}
-
-
-/* RX/TX interface.  */
-
-/* Return true if all characters awaiting transmission on uart channel N
-   have been transmitted.  */
-#define v850e_uart_xmit_done(n)						      \
-   (! (V850E_UART_ASIF(n) & V850E_UART_ASIF_TXBF))
-/* Wait for this to be true.  */
-#define v850e_uart_wait_for_xmit_done(n)				      \
-   do { } while (! v850e_uart_xmit_done (n))
-
-/* Return true if uart channel N is ready to transmit a character.  */
-#define v850e_uart_xmit_ok(n)						      \
-   (v850e_uart_xmit_done(n) && v850e_uart_cts(n))
-/* Wait for this to be true.  */
-#define v850e_uart_wait_for_xmit_ok(n)					      \
-   do { } while (! v850e_uart_xmit_ok (n))
-
-/* Write character CH to uart channel CHAN.  */
-#define v850e_uart_putc(chan, ch)	(V850E_UART_TXB(chan) = (ch))
-
-/* Return latest character read on channel CHAN.  */
-#define v850e_uart_getc(chan)		V850E_UART_RXB (chan)
-
-/* Return bit-mask of uart error status.  */
-#define v850e_uart_err(chan)		V850E_UART_ASIS (chan)
-/* Various error bits set in the error result.  */
-#define V850E_UART_ERR_OVERRUN		V850E_UART_ASIS_OVE
-#define V850E_UART_ERR_FRAME		V850E_UART_ASIS_FE
-#define V850E_UART_ERR_PARITY		V850E_UART_ASIS_PE
-
-
-#endif /* __V850_V850E_UARTA_H__ */
diff --git a/include/asm-v850/v850e_uartb.h b/include/asm-v850/v850e_uartb.h
deleted file mode 100644
index 6d4767d5a835..000000000000
--- a/include/asm-v850/v850e_uartb.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * include/asm-v850/v850e_uartb.h -- V850E on-chip `UARTB' UART
- *
- *  Copyright (C) 2001,02,03  NEC Electronics Corporation
- *  Copyright (C) 2001,02,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* The V850E UARTB is basically a superset of the original V850E UART, but
-   even where it's the same, the names and details have changed a bit.
-   It's similar enough to use the same driver (v850e_uart.c), but the
-   details have been abstracted slightly to do so.  */
-
-#ifndef __V850_V850E_UARTB_H__
-#define __V850_V850E_UARTB_H__
-
-
-/* Raw hardware interface.  */
-
-#define V850E_UARTB_BASE_ADDR(n)	(0xFFFFFA00 + 0x10 * (n))
-
-/* Addresses of specific UART control registers for channel N.  */
-#define V850E_UARTB_CTL0_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0x0)
-#define V850E_UARTB_CTL2_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0x2)
-#define V850E_UARTB_STR_ADDR(n)		(V850E_UARTB_BASE_ADDR(n) + 0x4)
-#define V850E_UARTB_RX_ADDR(n)		(V850E_UARTB_BASE_ADDR(n) + 0x6)
-#define V850E_UARTB_RXAP_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0x6)
-#define V850E_UARTB_TX_ADDR(n)		(V850E_UARTB_BASE_ADDR(n) + 0x8)
-#define V850E_UARTB_FIC0_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xA)
-#define V850E_UARTB_FIC1_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xB)
-#define V850E_UARTB_FIC2_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xC)
-#define V850E_UARTB_FIS0_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xE)
-#define V850E_UARTB_FIS1_ADDR(n)	(V850E_UARTB_BASE_ADDR(n) + 0xF)
-
-/* UARTB control register 0 (general config).  */
-#define V850E_UARTB_CTL0(n)	(*(volatile u8 *)V850E_UARTB_CTL0_ADDR(n))
-/* Control bits for config registers.  */
-#define V850E_UARTB_CTL0_PWR		0x80	/* clock enable */
-#define V850E_UARTB_CTL0_TXE		0x40	/* transmit enable */
-#define V850E_UARTB_CTL0_RXE		0x20	/* receive enable */
-#define V850E_UARTB_CTL0_DIR		0x10	/*  */
-#define V850E_UARTB_CTL0_PS1		0x08	/* parity */
-#define V850E_UARTB_CTL0_PS0		0x04	/* parity */
-#define V850E_UARTB_CTL0_CL		0x02	/* char len 1:8bit, 0:7bit */
-#define V850E_UARTB_CTL0_SL		0x01	/* stop bit 1:2bit, 0:1bit */
-#define V850E_UARTB_CTL0_PS_MASK	0x0C	/* mask covering parity bits */
-#define V850E_UARTB_CTL0_PS_NONE	0x00	/* no parity */
-#define V850E_UARTB_CTL0_PS_ZERO	0x04	/* zero parity */
-#define V850E_UARTB_CTL0_PS_ODD		0x08	/* odd parity */
-#define V850E_UARTB_CTL0_PS_EVEN	0x0C	/* even parity */
-#define V850E_UARTB_CTL0_CL_8		0x02	/* char len 1:8bit, 0:7bit */
-#define V850E_UARTB_CTL0_SL_2		0x01	/* stop bit 1:2bit, 0:1bit */
-
-/* UARTB control register 2 (clock divider).  */
-#define V850E_UARTB_CTL2(n)	(*(volatile u16 *)V850E_UARTB_CTL2_ADDR(n))
-#define V850E_UARTB_CTL2_MIN	4
-#define V850E_UARTB_CTL2_MAX	0xFFFF
-
-/* UARTB serial interface status register.  */
-#define V850E_UARTB_STR(n)	(*(volatile u8 *)V850E_UARTB_STR_ADDR(n))
-/* Control bits for status registers.  */
-#define V850E_UARTB_STR_TSF	0x80	/* UBTX or FIFO exist data  */
-#define V850E_UARTB_STR_OVF	0x08	/* overflow error */
-#define V850E_UARTB_STR_PE	0x04	/* parity error */
-#define V850E_UARTB_STR_FE	0x02	/* framing error */
-#define V850E_UARTB_STR_OVE	0x01	/* overrun error */
-
-/* UARTB receive data register.  */
-#define V850E_UARTB_RX(n)	(*(volatile u8 *)V850E_UARTB_RX_ADDR(n))
-#define V850E_UARTB_RXAP(n)	(*(volatile u16 *)V850E_UARTB_RXAP_ADDR(n))
-/* Control bits for status registers.  */
-#define V850E_UARTB_RXAP_PEF	0x0200 /* parity error */
-#define V850E_UARTB_RXAP_FEF	0x0100 /* framing error */
-
-/* UARTB transmit data register.  */
-#define V850E_UARTB_TX(n)	(*(volatile u8 *)V850E_UARTB_TX_ADDR(n))
-
-/* UARTB FIFO control register 0.  */
-#define V850E_UARTB_FIC0(n)	(*(volatile u8 *)V850E_UARTB_FIC0_ADDR(n))
-
-/* UARTB FIFO control register 1.  */
-#define V850E_UARTB_FIC1(n)	(*(volatile u8 *)V850E_UARTB_FIC1_ADDR(n))
-
-/* UARTB FIFO control register 2.  */
-#define V850E_UARTB_FIC2(n)	(*(volatile u16 *)V850E_UARTB_FIC2_ADDR(n))
-
-/* UARTB FIFO status register 0.  */
-#define V850E_UARTB_FIS0(n)	(*(volatile u8 *)V850E_UARTB_FIS0_ADDR(n))
-
-/* UARTB FIFO status register 1.  */
-#define V850E_UARTB_FIS1(n)	(*(volatile u8 *)V850E_UARTB_FIS1_ADDR(n))
-
-
-/* Slightly abstract interface used by driver.  */
-
-
-/* Interrupts used by the UART.  */
-
-/* Received when the most recently transmitted character has been sent.  */
-#define V850E_UART_TX_IRQ(chan)		IRQ_INTUBTIT (chan)
-/* Received when a new character has been received.  */
-#define V850E_UART_RX_IRQ(chan)		IRQ_INTUBTIR (chan)
-
-/* Use by serial driver for information purposes.  */
-#define V850E_UART_BASE_ADDR(chan)	V850E_UARTB_BASE_ADDR(chan)
-
-
-/* UART clock generator interface.  */
-
-/* This type encapsulates a particular uart frequency.  */
-typedef u16 v850e_uart_speed_t;
-
-/* Calculate a uart speed from BAUD for this uart.  */
-static inline v850e_uart_speed_t v850e_uart_calc_speed (unsigned baud)
-{
-	v850e_uart_speed_t speed;
-
-	/*
-	 * V850E/ME2 UARTB baud rate is determined by the value of UBCTL2
-	 * fx = V850E_UARTB_BASE_FREQ = CPU_CLOCK_FREQ/4
-	 * baud = fx / 2*speed   [ speed >= 4 ]
-	 */
-	speed = V850E_UARTB_CTL2_MIN;
-	while (((V850E_UARTB_BASE_FREQ / 2) / speed ) > baud)
-		speed++;
-
-	return speed;
-}
-
-/* Return the current speed of uart channel CHAN.  */
-#define v850e_uart_speed(chan)		    V850E_UARTB_CTL2 (chan)
-
-/* Set the current speed of uart channel CHAN.  */
-#define v850e_uart_set_speed(chan, speed)   (V850E_UARTB_CTL2 (chan) = (speed))
-
-/* Return true if SPEED1 and SPEED2 are the same.  */
-#define v850e_uart_speed_eq(speed1, speed2) ((speed1) == (speed2))
-
-/* Minimum baud rate possible.  */
-#define v850e_uart_min_baud() \
-   ((V850E_UARTB_BASE_FREQ / 2) / V850E_UARTB_CTL2_MAX)
-
-/* Maximum baud rate possible.  The error is quite high at max, though.  */
-#define v850e_uart_max_baud() \
-   ((V850E_UARTB_BASE_FREQ / 2) / V850E_UARTB_CTL2_MIN)
-
-/* The `maximum' clock rate the uart can used, which is wanted (though not
-   really used in any useful way) by the serial framework.  */
-#define v850e_uart_max_clock() \
-   (V850E_UARTB_BASE_FREQ / 2)
-
-
-/* UART configuration interface.  */
-
-/* Type of the uart config register; must be a scalar.  */
-typedef u16 v850e_uart_config_t;
-
-/* The uart hardware config register for channel CHAN.  */
-#define V850E_UART_CONFIG(chan)		V850E_UARTB_CTL0 (chan)
-
-/* This config bit set if the uart is enabled.  */
-#define V850E_UART_CONFIG_ENABLED	V850E_UARTB_CTL0_PWR
-/* If the uart _isn't_ enabled, store this value to it to do so.  */
-#define V850E_UART_CONFIG_INIT		V850E_UARTB_CTL0_PWR
-/* Store this config value to disable the uart channel completely.  */
-#define V850E_UART_CONFIG_FINI		0
-
-/* Setting/clearing these bits enable/disable TX/RX, respectively (but
-   otherwise generally leave things running).  */
-#define V850E_UART_CONFIG_RX_ENABLE	V850E_UARTB_CTL0_RXE
-#define V850E_UART_CONFIG_TX_ENABLE	V850E_UARTB_CTL0_TXE
-
-/* These masks define which config bits affect TX/RX modes, respectively.  */
-#define V850E_UART_CONFIG_RX_BITS \
-  (V850E_UARTB_CTL0_PS_MASK | V850E_UARTB_CTL0_CL_8)
-#define V850E_UART_CONFIG_TX_BITS \
-  (V850E_UARTB_CTL0_PS_MASK | V850E_UARTB_CTL0_CL_8 | V850E_UARTB_CTL0_SL_2)
-
-static inline v850e_uart_config_t v850e_uart_calc_config (unsigned cflags)
-{
-	v850e_uart_config_t config = 0;
-
-	/* Figure out new configuration of control register.  */
-	if (cflags & CSTOPB)
-		/* Number of stop bits, 1 or 2.  */
-		config |= V850E_UARTB_CTL0_SL_2;
-	if ((cflags & CSIZE) == CS8)
-		/* Number of data bits, 7 or 8.  */
-		config |= V850E_UARTB_CTL0_CL_8;
-	if (! (cflags & PARENB))
-		/* No parity check/generation.  */
-		config |= V850E_UARTB_CTL0_PS_NONE;
-	else if (cflags & PARODD)
-		/* Odd parity check/generation.  */
-		config |= V850E_UARTB_CTL0_PS_ODD;
-	else
-		/* Even parity check/generation.  */
-		config |= V850E_UARTB_CTL0_PS_EVEN;
-	if (cflags & CREAD)
-		/* Reading enabled.  */
-		config |= V850E_UARTB_CTL0_RXE;
-
-	config |= V850E_UARTB_CTL0_PWR;
-	config |= V850E_UARTB_CTL0_TXE; /* Writing is always enabled.  */
-	config |= V850E_UARTB_CTL0_DIR; /* LSB first.  */
-
-	return config;
-}
-
-/* This should delay as long as necessary for a recently written config
-   setting to settle, before we turn the uart back on.  */
-static inline void
-v850e_uart_config_delay (v850e_uart_config_t config, v850e_uart_speed_t speed)
-{
-	/* The UART may not be reset properly unless we wait at least 2
-	   `basic-clocks' until turning on the TXE/RXE bits again.
-	   A `basic clock' is the clock used by the baud-rate generator,
-	   i.e., the cpu clock divided by the 2^new_clk_divlog2.
-	   The loop takes 2 insns, so loop CYCLES / 2 times.  */
-	register unsigned count = 1 << speed;
-	while (--count != 0)
-		/* nothing */;
-}
-
-
-/* RX/TX interface.  */
-
-/* Return true if all characters awaiting transmission on uart channel N
-   have been transmitted.  */
-#define v850e_uart_xmit_done(n)						      \
-   (! (V850E_UARTB_STR(n) & V850E_UARTB_STR_TSF))
-/* Wait for this to be true.  */
-#define v850e_uart_wait_for_xmit_done(n)				      \
-   do { } while (! v850e_uart_xmit_done (n))
-
-/* Return true if uart channel N is ready to transmit a character.  */
-#define v850e_uart_xmit_ok(n)						      \
-   (v850e_uart_xmit_done(n) && v850e_uart_cts(n))
-/* Wait for this to be true.  */
-#define v850e_uart_wait_for_xmit_ok(n)					      \
-   do { } while (! v850e_uart_xmit_ok (n))
-
-/* Write character CH to uart channel CHAN.  */
-#define v850e_uart_putc(chan, ch)	(V850E_UARTB_TX(chan) = (ch))
-
-/* Return latest character read on channel CHAN.  */
-#define v850e_uart_getc(chan)		V850E_UARTB_RX (chan)
-
-/* Return bit-mask of uart error status.  */
-#define v850e_uart_err(chan)		V850E_UARTB_STR (chan)
-/* Various error bits set in the error result.  */
-#define V850E_UART_ERR_OVERRUN		V850E_UARTB_STR_OVE
-#define V850E_UART_ERR_FRAME		V850E_UARTB_STR_FE
-#define V850E_UART_ERR_PARITY		V850E_UARTB_STR_PE
-
-
-#endif /* __V850_V850E_UARTB_H__ */
diff --git a/include/asm-v850/v850e_utils.h b/include/asm-v850/v850e_utils.h
deleted file mode 100644
index 52eb72822d3d..000000000000
--- a/include/asm-v850/v850e_utils.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * include/asm-v850/v850e_utils.h -- Utility functions associated with
- *	V850E CPUs
- *
- *  Copyright (C) 2001,03  NEC Electronics Corporation
- *  Copyright (C) 2001,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#ifndef __V850_V850E_UTILS_H__
-#define __V850_V850E_UTILS_H__
-
-/* Calculate counter clock-divider and count values to attain the
-   desired frequency RATE from the base frequency BASE_FREQ.  The
-   counter is expected to have a clock-divider, which can divide the
-   system cpu clock by a power of two value from MIN_DIVLOG2 to
-   MAX_DIV_LOG2, and a word-size of COUNTER_SIZE bits (the counter
-   counts up and resets whenever it's equal to the compare register,
-   generating an interrupt or whatever when it does so).  The returned
-   values are: *DIVLOG2 -- log2 of the desired clock divider and *COUNT
-   -- the counter compare value to use.  Returns true if it was possible
-   to find a reasonable value, otherwise false (and the other return
-   values will be set to be as good as possible).  */
-extern int calc_counter_params (unsigned long base_freq,
-				unsigned long rate,
-				unsigned min_divlog2, unsigned max_divlog2,
-				unsigned counter_size,
-				unsigned *divlog2, unsigned *count);
-
-#endif /* __V850_V850E_UTILS_H__ */
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8b82974bdc12..6272a395d43c 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -286,7 +286,6 @@
 #define AUDIT_ARCH_SHEL64	(EM_SH|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_SPARC	(EM_SPARC)
 #define AUDIT_ARCH_SPARC64	(EM_SPARCV9|__AUDIT_ARCH_64BIT)
-#define AUDIT_ARCH_V850		(EM_V850|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 
 #define AUDIT_PERM_EXEC		1
diff --git a/include/linux/module.h b/include/linux/module.h
index fce15ebd0e1c..68e09557c951 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -23,7 +23,7 @@
 /* Not Yet Implemented */
 #define MODULE_SUPPORTED_DEVICE(name)
 
-/* v850 toolchain uses a `_' prefix for all user symbols */
+/* some toolchains uses a `_' prefix for all user symbols */
 #ifndef MODULE_SYMBOL_PREFIX
 #define MODULE_SYMBOL_PREFIX ""
 #endif
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index f3a1c0e45021..3b2f6c04855e 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -59,9 +59,6 @@
 #define PORT_SUNZILOG	38
 #define PORT_SUNSAB	39
 
-/* NEC v850.  */
-#define PORT_V850E_UART	40
-
 /* DEC */
 #define PORT_DZ		46
 #define PORT_ZS		47
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0522f368f9d7..4394dadff813 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -443,7 +443,7 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
-#if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64) || defined(CONFIG_V850))
+#if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 asmlinkage long sys_setrlimit(unsigned int resource,
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index dca5e0dd09bf..4f8a3007e457 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -520,8 +520,7 @@ int main(int argc, char **argv)
 			genksyms_usage();
 			return 1;
 		}
-	if ((strcmp(arch, "v850") == 0) || (strcmp(arch, "h8300") == 0)
-	    || (strcmp(arch, "blackfin") == 0))
+	if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0))
 		mod_prefix = "_";
 	{
 		extern int yydebug;
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 1fcaf3284a6a..4fa1f3ad2513 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -623,7 +623,7 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *id,
 	return 1;
 }
 
-/* Ignore any prefix, eg. v850 prepends _ */
+/* Ignore any prefix, eg. some architectures prepend _ */
 static inline int sym_is(const char *symbol, const char *name)
 {
 	const char *match;
diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c
index db3881f14c2d..6a96d47bd1e6 100644
--- a/scripts/mod/mk_elfconfig.c
+++ b/scripts/mod/mk_elfconfig.c
@@ -55,7 +55,7 @@ main(int argc, char **argv)
 	else
 		exit(1);
 
-	if ((strcmp(argv[1], "v850") == 0) || (strcmp(argv[1], "h8300") == 0)
+	if ((strcmp(argv[1], "h8300") == 0)
 	    || (strcmp(argv[1], "blackfin") == 0))
 		printf("#define MODULE_SYMBOL_PREFIX \"_\"\n");
 	else
-- 
cgit v1.2.3


From a677a039be7243357d93502bff2b40850c942e2d Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:17 -0700
Subject: flag parameters: socket and socketpair

This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair.  The additional code is minimal.  The flag
values in this implementation can and must match the O_* flags.  This
avoids overhead in the conversion.

The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>

#define PORT 57392

/* For Linux these must be the same.  */
#define SOCK_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd;
  fd = socket (PF_INET, SOCK_STREAM, 0);
  if (fd == -1)
    {
      puts ("socket(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("socket(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
  if (fd == -1)
    {
      puts ("socket(SOCK_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (fd);

  int fds[2];
  if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
    {
      puts ("socketpair(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      coe = fcntl (fds[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (coe & FD_CLOEXEC)
        {
          printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
    {
      puts ("socketpair(SOCK_CLOEXEC) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      coe = fcntl (fds[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((coe & FD_CLOEXEC) == 0)
        {
          printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-mips/socket.h |  7 +++++++
 include/linux/net.h       |  9 ++++++++-
 net/9p/trans_fd.c         |  2 +-
 net/sctp/socket.c         |  2 +-
 net/socket.c              | 28 ++++++++++++++++++++--------
 5 files changed, 37 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h
index 63f60254d308..facc2d7a87ca 100644
--- a/include/asm-mips/socket.h
+++ b/include/asm-mips/socket.h
@@ -102,6 +102,13 @@ enum sock_type {
 };
 
 #define SOCK_MAX (SOCK_PACKET + 1)
+/* Mask which covers at least up to SOCK_MASK-1.  The
+ *  * remaining bits are used as flags. */
+#define SOCK_TYPE_MASK 0xf
+
+/* Flags for socket, socketpair, paccept */
+#define SOCK_CLOEXEC	O_CLOEXEC
+#define SOCK_NONBLOCK	O_NONBLOCK
 
 #define ARCH_HAS_SOCKET_TYPES 1
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 150a48c68d52..8b5383c45b45 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -20,6 +20,7 @@
 
 #include <linux/wait.h>
 #include <linux/socket.h>
+#include <linux/fcntl.h>	/* For O_CLOEXEC */
 #include <asm/socket.h>
 
 struct poll_table_struct;
@@ -94,6 +95,12 @@ enum sock_type {
 };
 
 #define SOCK_MAX (SOCK_PACKET + 1)
+/* Mask which covers at least up to SOCK_MASK-1.  The
+ * remaining bits are used as flags. */
+#define SOCK_TYPE_MASK 0xf
+
+/* Flags for socket, socketpair, paccept */
+#define SOCK_CLOEXEC	O_CLOEXEC
 
 #endif /* ARCH_HAS_SOCKET_TYPES */
 
@@ -208,7 +215,7 @@ extern int   	     sock_sendmsg(struct socket *sock, struct msghdr *msg,
 				  size_t len);
 extern int	     sock_recvmsg(struct socket *sock, struct msghdr *msg,
 				  size_t size, int flags);
-extern int 	     sock_map_fd(struct socket *sock);
+extern int 	     sock_map_fd(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 4507f744f44e..cdf137af7adc 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1285,7 +1285,7 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
 	int fd, ret;
 
 	csocket->sk->sk_allocation = GFP_NOIO;
-	fd = sock_map_fd(csocket);
+	fd = sock_map_fd(csocket, 0);
 	if (fd < 0) {
 		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n");
 		return fd;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 79bece16aede..dbb79adf8f3c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3910,7 +3910,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
 		goto out;
 
 	/* Map the socket to an unused fd that can be returned to the user.  */
-	retval = sock_map_fd(newsock);
+	retval = sock_map_fd(newsock, 0);
 	if (retval < 0) {
 		sock_release(newsock);
 		goto out;
diff --git a/net/socket.c b/net/socket.c
index 1ba57d888981..64601f900352 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -349,11 +349,11 @@ static struct dentry_operations sockfs_dentry_operations = {
  *	but we take care of internal coherence yet.
  */
 
-static int sock_alloc_fd(struct file **filep)
+static int sock_alloc_fd(struct file **filep, int flags)
 {
 	int fd;
 
-	fd = get_unused_fd();
+	fd = get_unused_fd_flags(flags);
 	if (likely(fd >= 0)) {
 		struct file *file = get_empty_filp();
 
@@ -396,10 +396,10 @@ static int sock_attach_fd(struct socket *sock, struct file *file)
 	return 0;
 }
 
-int sock_map_fd(struct socket *sock)
+int sock_map_fd(struct socket *sock, int flags)
 {
 	struct file *newfile;
-	int fd = sock_alloc_fd(&newfile);
+	int fd = sock_alloc_fd(&newfile, flags);
 
 	if (likely(fd >= 0)) {
 		int err = sock_attach_fd(sock, newfile);
@@ -1218,12 +1218,18 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 {
 	int retval;
 	struct socket *sock;
+	int flags;
+
+	flags = type & ~SOCK_TYPE_MASK;
+	if (flags & ~SOCK_CLOEXEC)
+		return -EINVAL;
+	type &= SOCK_TYPE_MASK;
 
 	retval = sock_create(family, type, protocol, &sock);
 	if (retval < 0)
 		goto out;
 
-	retval = sock_map_fd(sock);
+	retval = sock_map_fd(sock, flags & O_CLOEXEC);
 	if (retval < 0)
 		goto out_release;
 
@@ -1246,6 +1252,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 	struct socket *sock1, *sock2;
 	int fd1, fd2, err;
 	struct file *newfile1, *newfile2;
+	int flags;
+
+	flags = type & ~SOCK_TYPE_MASK;
+	if (flags & ~SOCK_CLOEXEC)
+		return -EINVAL;
+	type &= SOCK_TYPE_MASK;
 
 	/*
 	 * Obtain the first socket and check if the underlying protocol
@@ -1264,13 +1276,13 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 	if (err < 0)
 		goto out_release_both;
 
-	fd1 = sock_alloc_fd(&newfile1);
+	fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
 	if (unlikely(fd1 < 0)) {
 		err = fd1;
 		goto out_release_both;
 	}
 
-	fd2 = sock_alloc_fd(&newfile2);
+	fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
 	if (unlikely(fd2 < 0)) {
 		err = fd2;
 		put_filp(newfile1);
@@ -1426,7 +1438,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	 */
 	__module_get(newsock->ops->owner);
 
-	newfd = sock_alloc_fd(&newfile);
+	newfd = sock_alloc_fd(&newfile, 0);
 	if (unlikely(newfd < 0)) {
 		err = newfd;
 		sock_release(newsock);
-- 
cgit v1.2.3


From aaca0bdca573f3f51ea03139f9c7289541e7bca3 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:20 -0700
Subject: flag parameters: paccept

This patch is by far the most complex in the series.  It adds a new syscall
paccept.  This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:

- a signal mask
- a flags value

The flags parameter can be used to set flag like SOCK_CLOEXEC.  This is
imlpemented here as well.  Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX.  Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc).  So an interface change in inevitable.

The flag value is the same as for socket and socketpair.  I think diverging
here will only create confusion.  Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.

The signal mask is handled as for pselect etc.  The mask is temporarily
installed for the thread and removed before the call returns.  I modeled the
code after pselect.  If there is a problem it's likely also in pselect.

For architectures which use socketcall I maintained this interface instead of
adding a system call.  The symmetry shouldn't be broken.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifndef __NR_paccept
# ifdef __x86_64__
#  define __NR_paccept 288
# elif defined __i386__
#  define SYS_PACCEPT 18
#  define USE_SOCKETCALL 1
# else
#  error "need __NR_paccept"
# endif
#endif

#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
  ({ long args[6] = { \
       (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
     syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
  syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif

#define PORT 57392

#define SOCK_CLOEXEC O_CLOEXEC

static pthread_barrier_t b;

static void *
tf (void *arg)
{
  pthread_barrier_wait (&b);
  int s = socket (AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);

  pthread_barrier_wait (&b);
  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  pthread_barrier_wait (&b);
  sleep (2);
  pthread_kill ((pthread_t) arg, SIGUSR1);

  return NULL;
}

static void
handler (int s)
{
}

int
main (void)
{
  pthread_barrier_init (&b, NULL, 2);

  struct sockaddr_in sin;
  pthread_t th;
  if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
    {
      puts ("pthread_create failed");
      return 1;
    }

  int s = socket (AF_INET, SOCK_STREAM, 0);
  int reuse = 1;
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  int s2 = paccept (s, NULL, 0, NULL, 0);
  if (s2 < 0)
    {
      puts ("paccept(0) failed");
      return 1;
    }

  int coe = fcntl (s2, F_GETFD);
  if (coe & FD_CLOEXEC)
    {
      puts ("paccept(0) set close-on-exec-flag");
      return 1;
    }
  close (s2);

  pthread_barrier_wait (&b);

  s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
  if (s2 < 0)
    {
      puts ("paccept(SOCK_CLOEXEC) failed");
      return 1;
    }

  coe = fcntl (s2, F_GETFD);
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (s2);

  pthread_barrier_wait (&b);

  struct sigaction sa;
  sa.sa_handler = handler;
  sa.sa_flags = 0;
  sigemptyset (&sa.sa_mask);
  sigaction (SIGUSR1, &sa, NULL);

  sigset_t ss;
  pthread_sigmask (SIG_SETMASK, NULL, &ss);
  sigaddset (&ss, SIGUSR1);
  pthread_sigmask (SIG_SETMASK, &ss, NULL);

  sigdelset (&ss, SIGUSR1);
  alarm (4);
  pthread_barrier_wait (&b);

  errno = 0 ;
  s2 = paccept (s, NULL, 0, &ss, 0);
  if (s2 != -1 || errno != EINTR)
    {
      puts ("paccept did not fail with EINTR");
      return 1;
    }

  close (s);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-alpha/socket.h  |  5 +++
 include/asm-parisc/socket.h |  5 +++
 include/asm-x86/unistd_64.h |  2 ++
 include/linux/net.h         |  3 ++
 include/linux/syscalls.h    |  2 ++
 kernel/sys_ni.c             |  1 +
 net/compat.c                | 52 ++++++++++++++++++++++++++---
 net/socket.c                | 81 ++++++++++++++++++++++++++++++++++++++++-----
 8 files changed, 139 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h
index 08c979319929..a1057c2d95e7 100644
--- a/include/asm-alpha/socket.h
+++ b/include/asm-alpha/socket.h
@@ -62,4 +62,9 @@
 
 #define SO_MARK			36
 
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK	0x40000000
+
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h
index 69a7a0d30b02..fba402c95ac2 100644
--- a/include/asm-parisc/socket.h
+++ b/include/asm-parisc/socket.h
@@ -54,4 +54,9 @@
 
 #define SO_MARK			0x401f
 
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK   0x40000000
+
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index 9c1a4a3470d9..e323994a370f 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -639,6 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 #define __NR_timerfd_gettime			287
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
+#define __NR_paccept				288
+__SYSCALL(__NR_paccept, sys_paccept)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/net.h b/include/linux/net.h
index 8b5383c45b45..3a9b06d4d0fe 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -47,6 +47,7 @@ struct net;
 #define SYS_GETSOCKOPT	15		/* sys_getsockopt(2)		*/
 #define SYS_SENDMSG	16		/* sys_sendmsg(2)		*/
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
+#define SYS_PACCEPT	18		/* sys_paccept(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
@@ -219,6 +220,8 @@ extern int 	     sock_map_fd(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
+extern long	     do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			       int __user *upeer_addrlen, int flags);
 
 #define net_random()		random32()
 #define net_srandom(seed)	srandom32((__force u32)seed)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4394dadff813..2a2a40af6b2c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -409,6 +409,8 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname,
 asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
+asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
+			    const sigset_t *, size_t, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0fea0ee12da9..2f0b8a2e600f 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,6 +31,7 @@ cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
 cond_syscall(sys_listen);
 cond_syscall(sys_accept);
+cond_syscall(sys_paccept);
 cond_syscall(sys_connect);
 cond_syscall(sys_getsockname);
 cond_syscall(sys_getpeername);
diff --git a/net/compat.c b/net/compat.c
index 6e1b03b51933..67fb6a3834a3 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -722,9 +722,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
+				AL(6)};
 #undef AL
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -737,13 +738,52 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
+asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
+				   int __user *upeer_addrlen,
+				   const compat_sigset_t __user *sigmask,
+				   compat_size_t sigsetsize, int flags)
+{
+	compat_sigset_t ss32;
+	sigset_t ksigmask, sigsaved;
+	int ret;
+
+	if (sigmask) {
+		if (sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &ss32);
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
+
+	if (ret == -ERESTARTNOHAND) {
+		/*
+		 * Don't restore the signal mask yet. Let do_signal() deliver
+		 * the signal on the way back to userspace, before the signal
+		 * mask is restored.
+		 */
+		if (sigmask) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+			       sizeof(sigsaved));
+			set_restore_sigmask();
+		}
+	} else if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 {
 	int ret;
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_RECVMSG)
+	if (call < SYS_SOCKET || call > SYS_PACCEPT)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -764,7 +804,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 		ret = sys_listen(a0, a1);
 		break;
 	case SYS_ACCEPT:
-		ret = sys_accept(a0, compat_ptr(a1), compat_ptr(a[2]));
+		ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
 		break;
 	case SYS_GETSOCKNAME:
 		ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
@@ -804,6 +844,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
+	case SYS_PACCEPT:
+		ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
+					 compat_ptr(a[3]), a[4], a[5]);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/net/socket.c b/net/socket.c
index 64601f900352..a0ce8ad72252 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -63,6 +63,7 @@
 #include <linux/file.h>
 #include <linux/net.h>
 #include <linux/interrupt.h>
+#include <linux/thread_info.h>
 #include <linux/rcupdate.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
@@ -1225,6 +1226,9 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
+	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
 	retval = sock_create(family, type, protocol, &sock);
 	if (retval < 0)
 		goto out;
@@ -1259,6 +1263,9 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
+	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
 	/*
 	 * Obtain the first socket and check if the underlying protocol
 	 * supports the socketpair call.
@@ -1413,14 +1420,20 @@ asmlinkage long sys_listen(int fd, int backlog)
  *	clean when we restucture accept also.
  */
 
-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-			   int __user *upeer_addrlen)
+long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+	       int __user *upeer_addrlen, int flags)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
 	int err, len, newfd, fput_needed;
 	struct sockaddr_storage address;
 
+	if (flags & ~SOCK_CLOEXEC)
+		return -EINVAL;
+
+	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
@@ -1438,7 +1451,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	 */
 	__module_get(newsock->ops->owner);
 
-	newfd = sock_alloc_fd(&newfile, 0);
+	newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
 	if (unlikely(newfd < 0)) {
 		err = newfd;
 		sock_release(newsock);
@@ -1491,6 +1504,50 @@ out_fd:
 	goto out_put;
 }
 
+asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen,
+			    const sigset_t __user *sigmask,
+			    size_t sigsetsize, int flags)
+{
+	sigset_t ksigmask, sigsaved;
+	int ret;
+
+	if (sigmask) {
+		/* XXX: Don't preclude handling different sized sigset_t's.  */
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+        }
+
+	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
+
+	if (ret < 0 && signal_pending(current)) {
+		/*
+		 * Don't restore the signal mask yet. Let do_signal() deliver
+		 * the signal on the way back to userspace, before the signal
+		 * mask is restored.
+		 */
+		if (sigmask) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+			       sizeof(sigsaved));
+			set_restore_sigmask();
+		}
+	} else if (sigmask)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return ret;
+}
+
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			   int __user *upeer_addrlen)
+{
+	return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
+}
+
 /*
  *	Attempt to connect to a socket with the server address.  The address
  *	is in user space so we verify it is OK and move it to kernel space.
@@ -2011,10 +2068,11 @@ out:
 
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static const unsigned char nargs[18]={
+static const unsigned char nargs[19]={
 	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
+	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
+	AL(6)
 };
 
 #undef AL
@@ -2033,7 +2091,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	unsigned long a0, a1;
 	int err;
 
-	if (call < 1 || call > SYS_RECVMSG)
+	if (call < 1 || call > SYS_PACCEPT)
 		return -EINVAL;
 
 	/* copy_from_user should be SMP safe. */
@@ -2062,8 +2120,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 		break;
 	case SYS_ACCEPT:
 		err =
-		    sys_accept(a0, (struct sockaddr __user *)a1,
-			       (int __user *)a[2]);
+		    do_accept(a0, (struct sockaddr __user *)a1,
+			      (int __user *)a[2], 0);
 		break;
 	case SYS_GETSOCKNAME:
 		err =
@@ -2110,6 +2168,13 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
+	case SYS_PACCEPT:
+		err =
+		    sys_paccept(a0, (struct sockaddr __user *)a1,
+			        (int __user *)a[2],
+				(const sigset_t __user *) a[3],
+				a[4], a[5]);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3


From c019bbc612f6633ede7ed67725cbf68de45ae8a4 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:21 -0700
Subject: flag parameters: paccept w/out set_restore_sigmask

Some platforms do not have support to restore the signal mask in the
return path from a syscall.  For those platforms syscalls like pselect are
not defined at all.  This is, I think, not a good choice for paccept()
since paccept() adds more value on top of accept() than just the signal
mask handling.

Therefore this patch defines a scaled down version of the sys_paccept
function for those platforms.  It returns -EINVAL in case the signal mask
is non-NULL but behaves the same otherwise.

Note that I explicitly included <linux/thread_info.h>.  I saw that it is
currently included but indirectly two levels down.  There is too much risk
in relying on this.  The header might change and then suddenly the
function definition would change without anyone immediately noticing.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/net.h |  3 +++
 net/socket.c        | 17 +++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 3a9b06d4d0fe..39a23af059b4 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -102,6 +102,9 @@ enum sock_type {
 
 /* Flags for socket, socketpair, paccept */
 #define SOCK_CLOEXEC	O_CLOEXEC
+#ifndef SOCK_NONBLOCK
+#define SOCK_NONBLOCK	O_NONBLOCK
+#endif
 
 #endif /* ARCH_HAS_SOCKET_TYPES */
 
diff --git a/net/socket.c b/net/socket.c
index a0ce8ad72252..d163adff95bf 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -69,6 +69,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/thread_info.h>
 #include <linux/wanrouter.h>
 #include <linux/if_bridge.h>
 #include <linux/if_frad.h>
@@ -1504,6 +1505,7 @@ out_fd:
 	goto out_put;
 }
 
+#ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 			    int __user *upeer_addrlen,
 			    const sigset_t __user *sigmask,
@@ -1541,6 +1543,21 @@ asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
 
 	return ret;
 }
+#else
+asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen,
+			    const sigset_t __user *sigmask,
+			    size_t sigsetsize, int flags)
+{
+	/* The platform does not support restoring the signal mask in the
+	 * return path.  So we do not allow using paccept() with a signal
+	 * mask.  */
+	if (sigmask)
+		return -EINVAL;
+
+	return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
+}
+#endif
 
 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 			   int __user *upeer_addrlen)
-- 
cgit v1.2.3


From 7d9dbca34240ebb6ff88d8a29c6c7bffd098f0c1 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:22 -0700
Subject: flag parameters: anon_inode_getfd extension

This patch just extends the anon_inode_getfd interface to take an additional
parameter with a flag value.  The flag value is passed on to
get_unused_fd_flags in anticipation for a use with the O_CLOEXEC flag.

No actual semantic changes here, the changed callers all pass 0 for now.

[akpm@linux-foundation.org: KVM fix]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/anon_inodes.c            | 9 +++++----
 fs/eventfd.c                | 2 +-
 fs/eventpoll.c              | 2 +-
 fs/signalfd.c               | 3 ++-
 fs/timerfd.c                | 2 +-
 include/linux/anon_inodes.h | 2 +-
 virt/kvm/kvm_main.c         | 4 ++--
 7 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 977ef208c051..1a4eee620b0d 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -58,8 +58,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
  *                    of the file
  *
  * @name:    [in]    name of the "class" of the new file
- * @fops     [in]    file operations for the new file
- * @priv     [in]    private data for the new file (will be file's private_data)
+ * @fops:    [in]    file operations for the new file
+ * @priv:    [in]    private data for the new file (will be file's private_data)
+ * @flags:   [in]    flags
  *
  * Creates a new file by hooking it on a single inode. This is useful for files
  * that do not need to have a full-fledged inode in order to operate correctly.
@@ -68,7 +69,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
  * setup.  Returns new descriptor or -error.
  */
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
-		     void *priv)
+		     void *priv, int flags)
 {
 	struct qstr this;
 	struct dentry *dentry;
@@ -78,7 +79,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	if (IS_ERR(anon_inode_inode))
 		return -ENODEV;
 
-	error = get_unused_fd();
+	error = get_unused_fd_flags(flags);
 	if (error < 0)
 		return error;
 	fd = error;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 343942deeec1..6094265ca409 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -214,7 +214,7 @@ asmlinkage long sys_eventfd(unsigned int count)
 	 * When we call this, the initialization must be complete, since
 	 * anon_inode_getfd() will install the fd.
 	 */
-	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
+	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, 0);
 	if (fd < 0)
 		kfree(ctx);
 	return fd;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 990c01d2d66b..9392dd968125 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1068,7 +1068,7 @@ asmlinkage long sys_epoll_create(int size)
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure and a free file descriptor.
 	 */
-	fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
+	fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 0);
 	if (fd < 0)
 		ep_free(ep);
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 619725644c75..ddb328b74bde 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -227,7 +227,8 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
 		 * When we call this, the initialization must be complete, since
 		 * anon_inode_getfd() will install the fd.
 		 */
-		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
+		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
+				       0);
 		if (ufd < 0)
 			kfree(ctx);
 	} else {
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d87d354ec424..77c2bc92cbee 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -198,7 +198,7 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	ctx->clockid = clockid;
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
 
-	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
+	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 0);
 	if (ufd < 0)
 		kfree(ctx);
 
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index 6129e58ca7c9..e0a0cdc2da43 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -9,7 +9,7 @@
 #define _LINUX_ANON_INODES_H
 
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
-		     void *priv);
+		     void *priv, int flags);
 
 #endif /* _LINUX_ANON_INODES_H */
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 904d7b7bd780..a845890b6800 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -902,7 +902,7 @@ static const struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-	int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu);
+	int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
 	if (fd < 0)
 		kvm_put_kvm(vcpu->kvm);
 	return fd;
@@ -1261,7 +1261,7 @@ static int kvm_dev_ioctl_create_vm(void)
 	kvm = kvm_create_vm();
 	if (IS_ERR(kvm))
 		return PTR_ERR(kvm);
-	fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm);
+	fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0);
 	if (fd < 0)
 		kvm_put_kvm(kvm);
 
-- 
cgit v1.2.3


From 9deb27baedb79759c3ab9435a7d8b841842d56e9 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:24 -0700
Subject: flag parameters: signalfd

This patch adds the new signalfd4 syscall.  It extends the old signalfd
syscall by one parameter which is meant to hold a flag value.  In this
patch the only flag support is SFD_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name SFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_signalfd4
# ifdef __x86_64__
#  define __NR_signalfd4 289
# elif defined __i386__
#  define __NR_signalfd4 327
# else
#  error "need __NR_signalfd4"
# endif
#endif

#define SFD_CLOEXEC O_CLOEXEC

int
main (void)
{
  sigset_t ss;
  sigemptyset (&ss);
  sigaddset (&ss, SIGUSR1);
  int fd = syscall (__NR_signalfd4, -1, &ss, 8, 0);
  if (fd == -1)
    {
      puts ("signalfd4(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("signalfd4(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_signalfd4, -1, &ss, 8, SFD_CLOEXEC);
  if (fd == -1)
    {
      puts ("signalfd4(SFD_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("signalfd4(SFD_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/compat.c                        | 14 ++++++++++----
 fs/signalfd.c                      | 14 ++++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/signalfd.h           |  5 +++++
 include/linux/syscalls.h           |  1 +
 kernel/sys_ni.c                    |  1 +
 9 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 021d71bc69b5..c308128b9251 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -826,4 +826,5 @@ ia32_sys_call_table:
 	.quad sys32_fallocate
 	.quad compat_sys_timerfd_settime	/* 325 */
 	.quad compat_sys_timerfd_gettime
+	.quad compat_sys_signalfd4
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index adff5562f5fd..c12a36c9fd51 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -326,3 +326,4 @@ ENTRY(sys_call_table)
 	.long sys_fallocate
 	.long sys_timerfd_settime	/* 325 */
 	.long sys_timerfd_gettime
+	.long sys_signalfd4
diff --git a/fs/compat.c b/fs/compat.c
index b46604281766..106eba28ec5a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2131,9 +2131,9 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 
 #ifdef CONFIG_SIGNALFD
 
-asmlinkage long compat_sys_signalfd(int ufd,
-				    const compat_sigset_t __user *sigmask,
-				    compat_size_t sigsetsize)
+asmlinkage long compat_sys_signalfd4(int ufd,
+				     const compat_sigset_t __user *sigmask,
+				     compat_size_t sigsetsize, int flags)
 {
 	compat_sigset_t ss32;
 	sigset_t tmp;
@@ -2148,9 +2148,15 @@ asmlinkage long compat_sys_signalfd(int ufd,
 	if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
 		return -EFAULT;
 
-	return sys_signalfd(ufd, ksigmask, sizeof(sigset_t));
+	return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
 }
 
+asmlinkage long compat_sys_signalfd(int ufd,
+				    const compat_sigset_t __user *sigmask,
+				    compat_size_t sigsetsize)
+{
+	return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
+}
 #endif /* CONFIG_SIGNALFD */
 
 #ifdef CONFIG_TIMERFD
diff --git a/fs/signalfd.c b/fs/signalfd.c
index ddb328b74bde..c8609fa51a13 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -205,11 +205,15 @@ static const struct file_operations signalfd_fops = {
 	.read		= signalfd_read,
 };
 
-asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
+asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
+			      size_t sizemask, int flags)
 {
 	sigset_t sigmask;
 	struct signalfd_ctx *ctx;
 
+	if (flags & ~SFD_CLOEXEC)
+		return -EINVAL;
+
 	if (sizemask != sizeof(sigset_t) ||
 	    copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
 		return -EINVAL;
@@ -228,7 +232,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
 		 * anon_inode_getfd() will install the fd.
 		 */
 		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
-				       0);
+				       flags & O_CLOEXEC);
 		if (ufd < 0)
 			kfree(ctx);
 	} else {
@@ -250,3 +254,9 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
 
 	return ufd;
 }
+
+asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask,
+			     size_t sizemask)
+{
+	return sys_signalfd4(ufd, user_mask, sizemask, 0);
+}
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 8317d94771d3..c310371f5613 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -332,6 +332,7 @@
 #define __NR_fallocate		324
 #define __NR_timerfd_settime	325
 #define __NR_timerfd_gettime	326
+#define __NR_signalfd4		327
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index e323994a370f..e0a9b45b2346 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -641,6 +641,8 @@ __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
 #define __NR_paccept				288
 __SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_signalfd4				289
+__SYSCALL(__NR_signalfd4, sys_signalfd4)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index ea037f28df91..8b3f7b7420a1 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -8,6 +8,11 @@
 #ifndef _LINUX_SIGNALFD_H
 #define _LINUX_SIGNALFD_H
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
+
+/* Flags for signalfd4.  */
+#define SFD_CLOEXEC O_CLOEXEC
 
 struct signalfd_siginfo {
 	__u32 ssi_signo;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2a2a40af6b2c..1c2707797845 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,7 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
 asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
 asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask);
+asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, int flags);
 asmlinkage long sys_timerfd_create(int clockid, int flags);
 asmlinkage long sys_timerfd_settime(int ufd, int flags,
 				    const struct itimerspec __user *utmr,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2f0b8a2e600f..8627c89ae9e8 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -156,6 +156,7 @@ cond_syscall(sys_ioprio_get);
 
 /* New file descriptors */
 cond_syscall(sys_signalfd);
+cond_syscall(sys_signalfd4);
 cond_syscall(compat_sys_signalfd);
 cond_syscall(sys_timerfd_create);
 cond_syscall(sys_timerfd_settime);
-- 
cgit v1.2.3


From b087498eb5605673b0f260a7620d91818cd72304 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:25 -0700
Subject: flag parameters: eventfd

This patch adds the new eventfd2 syscall.  It extends the old eventfd
syscall by one parameter which is meant to hold a flag value.  In this
patch the only flag support is EFD_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name EFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_eventfd2
# ifdef __x86_64__
#  define __NR_eventfd2 290
# elif defined __i386__
#  define __NR_eventfd2 328
# else
#  error "need __NR_eventfd2"
# endif
#endif

#define EFD_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_eventfd2, 1, 0);
  if (fd == -1)
    {
      puts ("eventfd2(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("eventfd2(0) sets close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_eventfd2, 1, EFD_CLOEXEC);
  if (fd == -1)
    {
      puts ("eventfd2(EFD_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("eventfd2(EFD_CLOEXEC) does not set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/eventfd.c                       | 13 +++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/eventfd.h            |  6 ++++++
 include/linux/syscalls.h           |  1 +
 kernel/sys_ni.c                    |  1 +
 8 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index c308128b9251..cf0eb31745ca 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -827,4 +827,5 @@ ia32_sys_call_table:
 	.quad compat_sys_timerfd_settime	/* 325 */
 	.quad compat_sys_timerfd_gettime
 	.quad compat_sys_signalfd4
+	.quad sys_eventfd2
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index c12a36c9fd51..cf112cb11c37 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -327,3 +327,4 @@ ENTRY(sys_call_table)
 	.long sys_timerfd_settime	/* 325 */
 	.long sys_timerfd_gettime
 	.long sys_signalfd4
+	.long sys_eventfd2
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 6094265ca409..bd420e6478ad 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -198,11 +198,14 @@ struct file *eventfd_fget(int fd)
 	return file;
 }
 
-asmlinkage long sys_eventfd(unsigned int count)
+asmlinkage long sys_eventfd2(unsigned int count, int flags)
 {
 	int fd;
 	struct eventfd_ctx *ctx;
 
+	if (flags & ~EFD_CLOEXEC)
+		return -EINVAL;
+
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
@@ -214,9 +217,15 @@ asmlinkage long sys_eventfd(unsigned int count)
 	 * When we call this, the initialization must be complete, since
 	 * anon_inode_getfd() will install the fd.
 	 */
-	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, 0);
+	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
+			      flags & O_CLOEXEC);
 	if (fd < 0)
 		kfree(ctx);
 	return fd;
 }
 
+asmlinkage long sys_eventfd(unsigned int count)
+{
+	return sys_eventfd2(count, 0);
+}
+
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index c310371f5613..edbd8723c939 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -333,6 +333,7 @@
 #define __NR_timerfd_settime	325
 #define __NR_timerfd_gettime	326
 #define __NR_signalfd4		327
+#define __NR_eventfd2		328
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index e0a9b45b2346..fb059a6feeb1 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -643,6 +643,8 @@ __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
 __SYSCALL(__NR_paccept, sys_paccept)
 #define __NR_signalfd4				289
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
+#define __NR_eventfd2				290
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index a701399b7fed..a6c0eaedb1b0 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,6 +10,12 @@
 
 #ifdef CONFIG_EVENTFD
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
+
+/* Flags for eventfd2.  */
+#define EFD_CLOEXEC O_CLOEXEC
+
 struct file *eventfd_fget(int fd);
 int eventfd_signal(struct file *file, int n);
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1c2707797845..9ab09926a7f2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -617,6 +617,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
 				    struct itimerspec __user *otmr);
 asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
 asmlinkage long sys_eventfd(unsigned int count);
+asmlinkage long sys_eventfd2(unsigned int count, int flags);
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 8627c89ae9e8..2a361ccdc7ca 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -164,3 +164,4 @@ cond_syscall(sys_timerfd_gettime);
 cond_syscall(compat_sys_timerfd_settime);
 cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
+cond_syscall(sys_eventfd2);
-- 
cgit v1.2.3


From 11fcb6c14676023d0bd437841f5dcd670e7990a0 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:26 -0700
Subject: flag parameters: timerfd_create

The timerfd_create syscall already has a flags parameter.  It just is
unused so far.  This patch changes this by introducing the TFD_CLOEXEC
flag to set the close-on-exec flag for the returned file descriptor.

A new name TFD_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_timerfd_create
# ifdef __x86_64__
#  define __NR_timerfd_create 283
# elif defined __i386__
#  define __NR_timerfd_create 322
# else
#  error "need __NR_timerfd_create"
# endif
#endif

#define TFD_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, 0);
  if (fd == -1)
    {
      puts ("timerfd_create(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("timerfd_create(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, TFD_CLOEXEC);
  if (fd == -1)
    {
      puts ("timerfd_create(TFD_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("timerfd_create(TFD_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/timerfd.c            | 5 +++--
 include/linux/timerfd.h | 5 +++++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 77c2bc92cbee..c6ef5e33cb34 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,7 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	int ufd;
 	struct timerfd_ctx *ctx;
 
-	if (flags)
+	if (flags & ~TFD_CLOEXEC)
 		return -EINVAL;
 	if (clockid != CLOCK_MONOTONIC &&
 	    clockid != CLOCK_REALTIME)
@@ -198,7 +198,8 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	ctx->clockid = clockid;
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
 
-	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 0);
+	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
+			       flags & O_CLOEXEC);
 	if (ufd < 0)
 		kfree(ctx);
 
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index cf2b10d75731..96ed97dff00f 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -8,9 +8,14 @@
 #ifndef _LINUX_TIMERFD_H
 #define _LINUX_TIMERFD_H
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
 
+/* Flags for timerfd_settime.  */
 #define TFD_TIMER_ABSTIME (1 << 0)
 
+/* Flags for timerfd_create.  */
+#define TFD_CLOEXEC O_CLOEXEC
 
 
 #endif /* _LINUX_TIMERFD_H */
-- 
cgit v1.2.3


From a0998b50c3f0b8fdd265c63e0032f86ebe377dbf Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:27 -0700
Subject: flag parameters: epoll_create

This patch adds the new epoll_create2 syscall.  It extends the old epoll_create
syscall by one parameter which is meant to hold a flag value.  In this
patch the only flag support is EPOLL_CLOEXEC which causes the close-on-exec
flag for the returned file descriptor to be set.

A new name EPOLL_CLOEXEC is introduced which in this implementation must
have the same value as O_CLOEXEC.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_epoll_create2
# ifdef __x86_64__
#  define __NR_epoll_create2 291
# elif defined __i386__
#  define __NR_epoll_create2 329
# else
#  error "need __NR_epoll_create2"
# endif
#endif

#define EPOLL_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_epoll_create2, 1, 0);
  if (fd == -1)
    {
      puts ("epoll_create2(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("epoll_create2(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_epoll_create2, 1, EPOLL_CLOEXEC);
  if (fd == -1)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/eventpoll.c                     | 13 +++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/eventpoll.h          |  4 ++++
 include/linux/syscalls.h           |  1 +
 7 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index cf0eb31745ca..04366f08f424 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -828,4 +828,5 @@ ia32_sys_call_table:
 	.quad compat_sys_timerfd_gettime
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
+	.quad sys_epoll_create2
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index cf112cb11c37..4d7007ca263d 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -328,3 +328,4 @@ ENTRY(sys_call_table)
 	.long sys_timerfd_gettime
 	.long sys_signalfd4
 	.long sys_eventfd2
+	.long sys_epoll_create2
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9392dd968125..3fd4014f3c5a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1046,11 +1046,14 @@ retry:
  * RB tree. With the current implementation, the "size" parameter is ignored
  * (besides sanity checks).
  */
-asmlinkage long sys_epoll_create(int size)
+asmlinkage long sys_epoll_create2(int size, int flags)
 {
 	int error, fd = -1;
 	struct eventpoll *ep;
 
+	if (flags & ~EPOLL_CLOEXEC)
+		return -EINVAL;
+
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
 		     current, size));
 
@@ -1068,7 +1071,8 @@ asmlinkage long sys_epoll_create(int size)
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure and a free file descriptor.
 	 */
-	fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 0);
+	fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+			      flags & O_CLOEXEC);
 	if (fd < 0)
 		ep_free(ep);
 
@@ -1079,6 +1083,11 @@ error_return:
 	return fd;
 }
 
+asmlinkage long sys_epoll_create(int size)
+{
+	return sys_epoll_create2(size, 0);
+}
+
 /*
  * The following function implements the controller interface for
  * the eventpoll file that enables the insertion/removal/change of
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index edbd8723c939..a37d6b0c4e1e 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -334,6 +334,7 @@
 #define __NR_timerfd_gettime	326
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
+#define __NR_epoll_create2	329
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index fb059a6feeb1..a1a4a5b6e5ee 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -645,6 +645,8 @@ __SYSCALL(__NR_paccept, sys_paccept)
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
+#define __NR_epoll_create2			291
+__SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index cf79853967ff..1cfaa40059c8 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -14,8 +14,12 @@
 #ifndef _LINUX_EVENTPOLL_H
 #define _LINUX_EVENTPOLL_H
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
 #include <linux/types.h>
 
+/* Flags for epoll_create2.  */
+#define EPOLL_CLOEXEC O_CLOEXEC
 
 /* Valid opcodes to issue to sys_epoll_ctl() */
 #define EPOLL_CTL_ADD 1
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 9ab09926a7f2..85953240f28c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -430,6 +430,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_epoll_create(int size);
+asmlinkage long sys_epoll_create2(int size, int flags);
 asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
 				struct epoll_event __user *event);
 asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
-- 
cgit v1.2.3


From 336dd1f70ff62d7dd8655228caed4c5bfc818c56 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:29 -0700
Subject: flag parameters: dup2

This patch adds the new dup3 syscall.  It extends the old dup2 syscall by one
parameter which is meant to hold a flag value.  Support for the O_CLOEXEC flag
is added in this patch.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_dup3
# ifdef __x86_64__
#  define __NR_dup3 292
# elif defined __i386__
#  define __NR_dup3 330
# else
#  error "need __NR_dup3"
# endif
#endif

int
main (void)
{
  int fd = syscall (__NR_dup3, 1, 4, 0);
  if (fd == -1)
    {
      puts ("dup3(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("dup3(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_dup3, 1, 4, O_CLOEXEC);
  if (fd == -1)
    {
      puts ("dup3(O_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("dup3(O_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/fcntl.c                         | 15 +++++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/syscalls.h           |  1 +
 6 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 04366f08f424..5614a8f7bed4 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -829,4 +829,5 @@ ia32_sys_call_table:
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
 	.quad sys_epoll_create2
+	.quad sys_dup3			/* 330 */
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 4d7007ca263d..24a3f1ea6a0e 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -329,3 +329,4 @@ ENTRY(sys_call_table)
 	.long sys_signalfd4
 	.long sys_eventfd2
 	.long sys_epoll_create2
+	.long sys_dup3			/* 330 */
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 330a7d782591..9679fcbdeaa0 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -125,13 +125,16 @@ static int dupfd(struct file *file, unsigned int start, int cloexec)
 	return fd;
 }
 
-asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
 	int err = -EBADF;
 	struct file * file, *tofree;
 	struct files_struct * files = current->files;
 	struct fdtable *fdt;
 
+	if ((flags & ~O_CLOEXEC) != 0)
+		return -EINVAL;
+
 	spin_lock(&files->file_lock);
 	if (!(file = fcheck(oldfd)))
 		goto out_unlock;
@@ -163,7 +166,10 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 
 	rcu_assign_pointer(fdt->fd[newfd], file);
 	FD_SET(newfd, fdt->open_fds);
-	FD_CLR(newfd, fdt->close_on_exec);
+	if (flags & O_CLOEXEC)
+		FD_SET(newfd, fdt->close_on_exec);
+	else
+		FD_CLR(newfd, fdt->close_on_exec);
 	spin_unlock(&files->file_lock);
 
 	if (tofree)
@@ -181,6 +187,11 @@ out_fput:
 	goto out;
 }
 
+asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
+{
+	return sys_dup3(oldfd, newfd, 0);
+}
+
 asmlinkage long sys_dup(unsigned int fildes)
 {
 	int ret = -EBADF;
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index a37d6b0c4e1e..a1f6383bf695 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -335,6 +335,7 @@
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
 #define __NR_epoll_create2	329
+#define __NR_dup3		330
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index a1a4a5b6e5ee..f0fb2bd40cdb 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -647,6 +647,8 @@ __SYSCALL(__NR_signalfd4, sys_signalfd4)
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
 #define __NR_epoll_create2			291
 __SYSCALL(__NR_epoll_create2, sys_epoll_create2)
+#define __NR_dup3				292
+__SYSCALL(__NR_dup3, sys_dup3)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 85953240f28c..034d3358549e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -305,6 +305,7 @@ asmlinkage long sys_fcntl64(unsigned int fd,
 #endif
 asmlinkage long sys_dup(unsigned int fildes);
 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd);
+asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags);
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
 asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd,
 				unsigned long arg);
-- 
cgit v1.2.3


From ed8cae8ba01348bfd83333f4648dd807b04d7f08 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:30 -0700
Subject: flag parameters: pipe

This patch introduces the new syscall pipe2 which is like pipe but it also
takes an additional parameter which takes a flag value.  This patch implements
the handling of O_CLOEXEC for the flag.  I did not add support for the new
syscall for the architectures which have a special sys_pipe implementation.  I
think the maintainers of those archs have the chance to go with the unified
implementation but that's up to them.

The implementation introduces do_pipe_flags.  I did that instead of changing
all callers of do_pipe because some of the callers are written in assembler.
I would probably screw up changing the assembly code.  To avoid breaking code
do_pipe is now a small wrapper around do_pipe_flags.  Once all callers are
changed over to do_pipe_flags the old do_pipe function can be removed.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_pipe2
# ifdef __x86_64__
#  define __NR_pipe2 293
# elif defined __i386__
#  define __NR_pipe2 331
# else
#  error "need __NR_pipe2"
# endif
#endif

int
main (void)
{
  int fd[2];
  if (syscall (__NR_pipe2, fd, 0) != 0)
    {
      puts ("pipe2(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int coe = fcntl (fd[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (coe & FD_CLOEXEC)
        {
          printf ("pipe2(0) set close-on-exit for fd[%d]\n", i);
          return 1;
        }
    }
  close (fd[0]);
  close (fd[1]);

  if (syscall (__NR_pipe2, fd, O_CLOEXEC) != 0)
    {
      puts ("pipe2(O_CLOEXEC) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int coe = fcntl (fd[i], F_GETFD);
      if (coe == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((coe & FD_CLOEXEC) == 0)
        {
          printf ("pipe2(O_CLOEXEC) does not set close-on-exit for fd[%d]\n", i);
          return 1;
        }
    }
  close (fd[0]);
  close (fd[1]);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/ia32/sys_ia32.c          |  2 +-
 arch/ia64/kernel/sys_ia64.c        |  2 +-
 arch/mips/kernel/syscall.c         |  2 +-
 arch/parisc/hpux/sys_hpux.c        |  2 +-
 arch/sh/kernel/sys_sh32.c          |  2 +-
 arch/sparc/kernel/sys_sparc.c      |  2 +-
 arch/sparc64/kernel/sys_sparc.c    |  2 +-
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/ia32/sys_ia32.c           |  2 +-
 arch/x86/kernel/syscall_table_32.S |  1 +
 arch/xtensa/kernel/syscall.c       |  2 +-
 fs/pipe.c                          | 23 ++++++++++++++++++-----
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/fs.h                 |  1 +
 15 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 7e028ceb93ba..465116aecb85 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1139,7 +1139,7 @@ sys32_pipe (int __user *fd)
 	int retval;
 	int fds[2];
 
-	retval = do_pipe(fds);
+	retval = do_pipe_flags(fds, 0);
 	if (retval)
 		goto out;
 	if (copy_to_user(fd, fds, sizeof(fds)))
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 1eda194b9559..bcbb6d8792d3 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -160,7 +160,7 @@ sys_pipe (void)
 	int fd[2];
 	int retval;
 
-	retval = do_pipe(fd);
+	retval = do_pipe_flags(fd, 0);
 	if (retval)
 		goto out;
 	retval = fd[0];
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 3523c8d12eda..343015a2f418 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -52,7 +52,7 @@ asmlinkage int sysm_pipe(nabi_no_regargs volatile struct pt_regs regs)
 	int fd[2];
 	int error, res;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (error) {
 		res = error;
 		goto out;
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 0c5b9dabb475..be255ebb609c 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -448,7 +448,7 @@ int hpux_pipe(int *kstack_fildes)
 	int error;
 
 	lock_kernel();
-	error = do_pipe(kstack_fildes);
+	error = do_pipe_flags(kstack_fildes, 0);
 	unlock_kernel();
 	return error;
 }
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index 125e493ead82..f0aa5c398656 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -29,7 +29,7 @@ asmlinkage int sys_pipe(unsigned long r4, unsigned long r5,
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (!error) {
 		regs->regs[1] = fd[1];
 		return fd[0];
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index 3c6b49a53ae8..4d73421559c3 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -97,7 +97,7 @@ asmlinkage int sparc_pipe(struct pt_regs *regs)
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (error)
 		goto out;
 	regs->u_regs[UREG_I1] = fd[1];
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index e1f4eba2e576..39749e32dc7e 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -418,7 +418,7 @@ asmlinkage long sparc_pipe(struct pt_regs *regs)
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (error)
 		goto out;
 	regs->u_regs[UREG_I1] = fd[1];
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 5614a8f7bed4..18808b164570 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -830,4 +830,5 @@ ia32_sys_call_table:
 	.quad sys_eventfd2
 	.quad sys_epoll_create2
 	.quad sys_dup3			/* 330 */
+	.quad sys_pipe2
 ia32_syscall_end:
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index f00afdf61e67..d3c64088b981 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -238,7 +238,7 @@ asmlinkage long sys32_pipe(int __user *fd)
 	int retval;
 	int fds[2];
 
-	retval = do_pipe(fds);
+	retval = do_pipe_flags(fds, 0);
 	if (retval)
 		goto out;
 	if (copy_to_user(fd, fds, sizeof(fds)))
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 24a3f1ea6a0e..66154769d52f 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -330,3 +330,4 @@ ENTRY(sys_call_table)
 	.long sys_eventfd2
 	.long sys_epoll_create2
 	.long sys_dup3			/* 330 */
+	.long sys_pipe2
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index f3e16efcd47a..ac15ecbdf919 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -49,7 +49,7 @@ asmlinkage long xtensa_pipe(int __user *userfds)
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, 0);
 	if (!error) {
 		if (copy_to_user(userfds, fd, 2 * sizeof(int)))
 			error = -EFAULT;
diff --git a/fs/pipe.c b/fs/pipe.c
index 700f4e0d9572..68e82061070c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1027,12 +1027,15 @@ struct file *create_read_pipe(struct file *wrf)
 	return f;
 }
 
-int do_pipe(int *fd)
+int do_pipe_flags(int *fd, int flags)
 {
 	struct file *fw, *fr;
 	int error;
 	int fdw, fdr;
 
+	if (flags & ~O_CLOEXEC)
+		return -EINVAL;
+
 	fw = create_write_pipe();
 	if (IS_ERR(fw))
 		return PTR_ERR(fw);
@@ -1041,12 +1044,12 @@ int do_pipe(int *fd)
 	if (IS_ERR(fr))
 		goto err_write_pipe;
 
-	error = get_unused_fd();
+	error = get_unused_fd_flags(flags);
 	if (error < 0)
 		goto err_read_pipe;
 	fdr = error;
 
-	error = get_unused_fd();
+	error = get_unused_fd_flags(flags);
 	if (error < 0)
 		goto err_fdr;
 	fdw = error;
@@ -1074,16 +1077,21 @@ int do_pipe(int *fd)
 	return error;
 }
 
+int do_pipe(int *fd)
+{
+	return do_pipe_flags(fd, 0);
+}
+
 /*
  * sys_pipe() is the normal C calling standard for creating
  * a pipe. It's not the way Unix traditionally does this, though.
  */
-asmlinkage long __weak sys_pipe(int __user *fildes)
+asmlinkage long __weak sys_pipe2(int __user *fildes, int flags)
 {
 	int fd[2];
 	int error;
 
-	error = do_pipe(fd);
+	error = do_pipe_flags(fd, flags);
 	if (!error) {
 		if (copy_to_user(fildes, fd, sizeof(fd))) {
 			sys_close(fd[0]);
@@ -1094,6 +1102,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes)
 	return error;
 }
 
+asmlinkage long __weak sys_pipe(int __user *fildes)
+{
+	return sys_pipe2(fildes, 0);
+}
+
 /*
  * pipefs should _never_ be mounted by userland - too much of security hassle,
  * no real gain from having the whole whorehouse mounted. So we don't need
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index a1f6383bf695..748a05c77da4 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -336,6 +336,7 @@
 #define __NR_eventfd2		328
 #define __NR_epoll_create2	329
 #define __NR_dup3		330
+#define __NR_pipe2		331
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index f0fb2bd40cdb..d2284b43ad58 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -649,6 +649,8 @@ __SYSCALL(__NR_eventfd2, sys_eventfd2)
 __SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 #define __NR_dup3				292
 __SYSCALL(__NR_dup3, sys_dup3)
+#define __NR_pipe2				293
+__SYSCALL(__NR_pipe2, sys_pipe2)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e5e6a244096c..0e80cd717d32 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1777,6 +1777,7 @@ static inline void allow_write_access(struct file *file)
 		atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
 }
 extern int do_pipe(int *);
+extern int do_pipe_flags(int *, int);
 extern struct file *create_read_pipe(struct file *f);
 extern struct file *create_write_pipe(void);
 extern void free_write_pipe(struct file *);
-- 
cgit v1.2.3


From 4006553b06306b34054529477b06b68a1c66249b Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:32 -0700
Subject: flag parameters: inotify_init

This patch introduces the new syscall inotify_init1 (note: the 1 stands for
the one parameter the syscall takes, as opposed to no parameter before).  The
values accepted for this parameter are function-specific and defined in the
inotify.h header.  Here the values must match the O_* flags, though.  In this
patch CLOEXEC support is introduced.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_inotify_init1
# ifdef __x86_64__
#  define __NR_inotify_init1 294
# elif defined __i386__
#  define __NR_inotify_init1 332
# else
#  error "need __NR_inotify_init1"
# endif
#endif

#define IN_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd;
  fd = syscall (__NR_inotify_init1, 0);
  if (fd == -1)
    {
      puts ("inotify_init1(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("inotify_init1(0) set close-on-exit");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_inotify_init1, IN_CLOEXEC);
  if (fd == -1)
    {
      puts ("inotify_init1(IN_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("inotify_init1(O_CLOEXEC) does not set close-on-exit");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  1 +
 arch/x86/kernel/syscall_table_32.S |  1 +
 fs/inotify_user.c                  | 12 ++++++++++--
 include/asm-x86/unistd_32.h        |  1 +
 include/asm-x86/unistd_64.h        |  2 ++
 include/linux/inotify.h            |  5 +++++
 include/linux/syscalls.h           |  1 +
 kernel/sys_ni.c                    |  1 +
 8 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 18808b164570..4541073dd837 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -831,4 +831,5 @@ ia32_sys_call_table:
 	.quad sys_epoll_create2
 	.quad sys_dup3			/* 330 */
 	.quad sys_pipe2
+	.quad sys_inotify_init1
 ia32_syscall_end:
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 66154769d52f..f59aba5ff0f0 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -331,3 +331,4 @@ ENTRY(sys_call_table)
 	.long sys_epoll_create2
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
+	.long sys_inotify_init1
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 6676c06bb7c1..851005998cd4 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -566,7 +566,7 @@ static const struct inotify_operations inotify_user_ops = {
 	.destroy_watch	= free_inotify_user_watch,
 };
 
-asmlinkage long sys_inotify_init(void)
+asmlinkage long sys_inotify_init1(int flags)
 {
 	struct inotify_device *dev;
 	struct inotify_handle *ih;
@@ -574,7 +574,10 @@ asmlinkage long sys_inotify_init(void)
 	struct file *filp;
 	int fd, ret;
 
-	fd = get_unused_fd();
+	if (flags & ~IN_CLOEXEC)
+		return -EINVAL;
+
+	fd = get_unused_fd_flags(flags & O_CLOEXEC);
 	if (fd < 0)
 		return fd;
 
@@ -638,6 +641,11 @@ out_put_fd:
 	return ret;
 }
 
+asmlinkage long sys_inotify_init(void)
+{
+	return sys_inotify_init1(0);
+}
+
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 {
 	struct inode *inode;
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index 748a05c77da4..b3daf503ab93 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -337,6 +337,7 @@
 #define __NR_epoll_create2	329
 #define __NR_dup3		330
 #define __NR_pipe2		331
+#define __NR_inotify_init1	332
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index d2284b43ad58..c8cb88d70c6b 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -651,6 +651,8 @@ __SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR_pipe2				293
 __SYSCALL(__NR_pipe2, sys_pipe2)
+#define __NR_inotify_init1			294
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 
 
 #ifndef __NO_STUBS
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 742b917e7d1b..72ef82120512 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -7,6 +7,8 @@
 #ifndef _LINUX_INOTIFY_H
 #define _LINUX_INOTIFY_H
 
+/* For O_CLOEXEC */
+#include <linux/fcntl.h>
 #include <linux/types.h>
 
 /*
@@ -63,6 +65,9 @@ struct inotify_event {
 			 IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF | \
 			 IN_MOVE_SELF)
 
+/* Flags for sys_inotify_init1.  */
+#define IN_CLOEXEC O_CLOEXEC
+
 #ifdef __KERNEL__
 
 #include <linux/dcache.h>
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 034d3358549e..93a7e7f017a6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -547,6 +547,7 @@ asmlinkage long sys_get_mempolicy(int __user *policy,
 				unsigned long addr, unsigned long flags);
 
 asmlinkage long sys_inotify_init(void);
+asmlinkage long sys_inotify_init1(int flags);
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path,
 					u32 mask);
 asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2a361ccdc7ca..bd66ac5406f3 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -96,6 +96,7 @@ cond_syscall(sys_keyctl);
 cond_syscall(compat_sys_keyctl);
 cond_syscall(compat_sys_socketcall);
 cond_syscall(sys_inotify_init);
+cond_syscall(sys_inotify_init1);
 cond_syscall(sys_inotify_add_watch);
 cond_syscall(sys_inotify_rm_watch);
 cond_syscall(sys_migrate_pages);
-- 
cgit v1.2.3


From 77d2720059618b9b6e827a8b73831eb6c6fad63c Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:35 -0700
Subject: flag parameters: NONBLOCK in socket and socketpair

This patch introduces support for the SOCK_NONBLOCK flag in socket,
socketpair, and  paccept.  To do this the internal function sock_attach_fd
gets an additional parameter which it uses to set the appropriate flag for
the file descriptor.

Given that in modern, scalable programs almost all socket connections are
non-blocking and the minimal additional cost for the new functionality
I see no reason not to add this code.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifndef __NR_paccept
# ifdef __x86_64__
#  define __NR_paccept 288
# elif defined __i386__
#  define SYS_PACCEPT 18
#  define USE_SOCKETCALL 1
# else
#  error "need __NR_paccept"
# endif
#endif

#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
  ({ long args[6] = { \
       (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
     syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
  syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif

#define PORT 57392

#define SOCK_NONBLOCK O_NONBLOCK

static pthread_barrier_t b;

static void *
tf (void *arg)
{
  pthread_barrier_wait (&b);
  int s = socket (AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  pthread_barrier_wait (&b);
  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  connect (s, (const struct sockaddr *) &sin, sizeof (sin));
  close (s);
  pthread_barrier_wait (&b);

  return NULL;
}

int
main (void)
{
  int fd;
  fd = socket (PF_INET, SOCK_STREAM, 0);
  if (fd == -1)
    {
      puts ("socket(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("socket(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
  if (fd == -1)
    {
      puts ("socket(SOCK_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (fd);

  int fds[2];
  if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
    {
      puts ("socketpair(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (fl & O_NONBLOCK)
        {
          printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
    {
      puts ("socketpair(SOCK_NONBLOCK) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((fl & O_NONBLOCK) == 0)
        {
          printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  pthread_barrier_init (&b, NULL, 2);

  struct sockaddr_in sin;
  pthread_t th;
  if (pthread_create (&th, NULL, tf, NULL) != 0)
    {
      puts ("pthread_create failed");
      return 1;
    }

  int s = socket (AF_INET, SOCK_STREAM, 0);
  int reuse = 1;
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  sin.sin_family = AF_INET;
  sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  sin.sin_port = htons (PORT);
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  int s2 = paccept (s, NULL, 0, NULL, 0);
  if (s2 < 0)
    {
      puts ("paccept(0) failed");
      return 1;
    }

  fl = fcntl (s2, F_GETFL);
  if (fl & O_NONBLOCK)
    {
      puts ("paccept(0) set non-blocking mode");
      return 1;
    }
  close (s2);
  close (s);

  pthread_barrier_wait (&b);

  s = socket (AF_INET, SOCK_STREAM, 0);
  sin.sin_port = htons (PORT);
  setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
  bind (s, (struct sockaddr *) &sin, sizeof (sin));
  listen (s, SOMAXCONN);

  pthread_barrier_wait (&b);

  s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
  if (s2 < 0)
    {
      puts ("paccept(SOCK_NONBLOCK) failed");
      return 1;
    }

  fl = fcntl (s2, F_GETFL);
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (s2);
  close (s);

  pthread_barrier_wait (&b);
  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/net.h |  2 +-
 net/socket.c        | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 39a23af059b4..2f999fbb188d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -20,7 +20,7 @@
 
 #include <linux/wait.h>
 #include <linux/socket.h>
-#include <linux/fcntl.h>	/* For O_CLOEXEC */
+#include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
 #include <asm/socket.h>
 
 struct poll_table_struct;
diff --git a/net/socket.c b/net/socket.c
index d163adff95bf..31105f9048a8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -369,7 +369,7 @@ static int sock_alloc_fd(struct file **filep, int flags)
 	return fd;
 }
 
-static int sock_attach_fd(struct socket *sock, struct file *file)
+static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
 {
 	struct dentry *dentry;
 	struct qstr name = { .name = "" };
@@ -391,7 +391,7 @@ static int sock_attach_fd(struct socket *sock, struct file *file)
 	init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
-	file->f_flags = O_RDWR;
+	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->f_pos = 0;
 	file->private_data = sock;
 
@@ -404,7 +404,7 @@ int sock_map_fd(struct socket *sock, int flags)
 	int fd = sock_alloc_fd(&newfile, flags);
 
 	if (likely(fd >= 0)) {
-		int err = sock_attach_fd(sock, newfile);
+		int err = sock_attach_fd(sock, newfile, flags);
 
 		if (unlikely(err < 0)) {
 			put_filp(newfile);
@@ -1223,7 +1223,7 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 	int flags;
 
 	flags = type & ~SOCK_TYPE_MASK;
-	if (flags & ~SOCK_CLOEXEC)
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
@@ -1234,7 +1234,7 @@ asmlinkage long sys_socket(int family, int type, int protocol)
 	if (retval < 0)
 		goto out;
 
-	retval = sock_map_fd(sock, flags & O_CLOEXEC);
+	retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
 	if (retval < 0)
 		goto out_release;
 
@@ -1260,7 +1260,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 	int flags;
 
 	flags = type & ~SOCK_TYPE_MASK;
-	if (flags & ~SOCK_CLOEXEC)
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 	type &= SOCK_TYPE_MASK;
 
@@ -1298,12 +1298,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 		goto out_release_both;
 	}
 
-	err = sock_attach_fd(sock1, newfile1);
+	err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
 	if (unlikely(err < 0)) {
 		goto out_fd2;
 	}
 
-	err = sock_attach_fd(sock2, newfile2);
+	err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
 	if (unlikely(err < 0)) {
 		fput(newfile1);
 		goto out_fd1;
@@ -1429,7 +1429,7 @@ long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 	int err, len, newfd, fput_needed;
 	struct sockaddr_storage address;
 
-	if (flags & ~SOCK_CLOEXEC)
+	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
 
 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
@@ -1459,7 +1459,7 @@ long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 		goto out_put;
 	}
 
-	err = sock_attach_fd(newsock, newfile);
+	err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
 	if (err < 0)
 		goto out_fd_simple;
 
-- 
cgit v1.2.3


From 5fb5e04926a54bc1c22bba7ca166840f4476196f Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:37 -0700
Subject: flag parameters: NONBLOCK in signalfd

This patch adds support for the SFD_NONBLOCK flag to signalfd4.  The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_signalfd4
# ifdef __x86_64__
#  define __NR_signalfd4 289
# elif defined __i386__
#  define __NR_signalfd4 327
# else
#  error "need __NR_signalfd4"
# endif
#endif

#define SFD_NONBLOCK O_NONBLOCK

int
main (void)
{
  sigset_t ss;
  sigemptyset (&ss);
  sigaddset (&ss, SIGUSR1);
  int fd = syscall (__NR_signalfd4, -1, &ss, 8, 0);
  if (fd == -1)
    {
      puts ("signalfd4(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("signalfd4(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_signalfd4, -1, &ss, 8, SFD_NONBLOCK);
  if (fd == -1)
    {
      puts ("signalfd4(SFD_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("signalfd4(SFD_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/signalfd.c            | 4 ++--
 include/linux/signalfd.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/signalfd.c b/fs/signalfd.c
index c8609fa51a13..5441a4bca772 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -211,7 +211,7 @@ asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
 	sigset_t sigmask;
 	struct signalfd_ctx *ctx;
 
-	if (flags & ~SFD_CLOEXEC)
+	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
 		return -EINVAL;
 
 	if (sizemask != sizeof(sigset_t) ||
@@ -232,7 +232,7 @@ asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
 		 * anon_inode_getfd() will install the fd.
 		 */
 		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
-				       flags & O_CLOEXEC);
+				       flags & (O_CLOEXEC | O_NONBLOCK));
 		if (ufd < 0)
 			kfree(ctx);
 	} else {
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index 8b3f7b7420a1..bef0c46d4713 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -8,11 +8,12 @@
 #ifndef _LINUX_SIGNALFD_H
 #define _LINUX_SIGNALFD_H
 
-/* For O_CLOEXEC */
+/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 
 /* Flags for signalfd4.  */
 #define SFD_CLOEXEC O_CLOEXEC
+#define SFD_NONBLOCK O_NONBLOCK
 
 struct signalfd_siginfo {
 	__u32 ssi_signo;
-- 
cgit v1.2.3


From e7d476dfdf0bcfed478a207aecfdc84f81efecaf Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:38 -0700
Subject: flag parameters: NONBLOCK in eventfd

This patch adds support for the EFD_NONBLOCK flag to eventfd2.  The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_eventfd2
# ifdef __x86_64__
#  define __NR_eventfd2 290
# elif defined __i386__
#  define __NR_eventfd2 328
# else
#  error "need __NR_eventfd2"
# endif
#endif

#define EFD_NONBLOCK O_NONBLOCK

int
main (void)
{
  int fd = syscall (__NR_eventfd2, 1, 0);
  if (fd == -1)
    {
      puts ("eventfd2(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("eventfd2(0) sets non-blocking mode");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_eventfd2, 1, EFD_NONBLOCK);
  if (fd == -1)
    {
      puts ("eventfd2(EFD_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("eventfd2(EFD_NONBLOCK) does not set non-blocking mode");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventfd.c            | 4 ++--
 include/linux/eventfd.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventfd.c b/fs/eventfd.c
index bd420e6478ad..3ed4466177a7 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -203,7 +203,7 @@ asmlinkage long sys_eventfd2(unsigned int count, int flags)
 	int fd;
 	struct eventfd_ctx *ctx;
 
-	if (flags & ~EFD_CLOEXEC)
+	if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
 		return -EINVAL;
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
@@ -218,7 +218,7 @@ asmlinkage long sys_eventfd2(unsigned int count, int flags)
 	 * anon_inode_getfd() will install the fd.
 	 */
 	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
-			      flags & O_CLOEXEC);
+			      flags & (O_CLOEXEC | O_NONBLOCK));
 	if (fd < 0)
 		kfree(ctx);
 	return fd;
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index a6c0eaedb1b0..a667637b54e3 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,11 +10,12 @@
 
 #ifdef CONFIG_EVENTFD
 
-/* For O_CLOEXEC */
+/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 
 /* Flags for eventfd2.  */
 #define EFD_CLOEXEC O_CLOEXEC
+#define EFD_NONBLOCK O_NONBLOCK
 
 struct file *eventfd_fget(int fd);
 int eventfd_signal(struct file *file, int n);
-- 
cgit v1.2.3


From 6b1ef0e60d42f2fdaec26baee8327eb156347b4f Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:39 -0700
Subject: flag parameters: NONBLOCK in timerfd_create

This patch adds support for the TFD_NONBLOCK flag to timerfd_create.  The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_timerfd_create
# ifdef __x86_64__
#  define __NR_timerfd_create 283
# elif defined __i386__
#  define __NR_timerfd_create 322
# else
#  error "need __NR_timerfd_create"
# endif
#endif

#define TFD_NONBLOCK O_NONBLOCK

int
main (void)
{
  int fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, 0);
  if (fd == -1)
    {
      puts ("timerfd_create(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("timerfd_create(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_timerfd_create, CLOCK_REALTIME, TFD_NONBLOCK);
  if (fd == -1)
    {
      puts ("timerfd_create(TFD_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("timerfd_create(TFD_NONBLOCK) set non-blocking mode");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/timerfd.c            | 4 ++--
 include/linux/timerfd.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index c6ef5e33cb34..75d44efe346c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,7 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	int ufd;
 	struct timerfd_ctx *ctx;
 
-	if (flags & ~TFD_CLOEXEC)
+	if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
 		return -EINVAL;
 	if (clockid != CLOCK_MONOTONIC &&
 	    clockid != CLOCK_REALTIME)
@@ -199,7 +199,7 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
 
 	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
-			       flags & O_CLOEXEC);
+			       flags & (O_CLOEXEC | O_NONBLOCK));
 	if (ufd < 0)
 		kfree(ctx);
 
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index 96ed97dff00f..86cb0501d3e2 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -8,7 +8,7 @@
 #ifndef _LINUX_TIMERFD_H
 #define _LINUX_TIMERFD_H
 
-/* For O_CLOEXEC */
+/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 
 /* Flags for timerfd_settime.  */
@@ -16,6 +16,7 @@
 
 /* Flags for timerfd_create.  */
 #define TFD_CLOEXEC O_CLOEXEC
+#define TFD_NONBLOCK O_NONBLOCK
 
 
 #endif /* _LINUX_TIMERFD_H */
-- 
cgit v1.2.3


From be61a86d7237dd80510615f38ae21d6e1e98660c Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:40 -0700
Subject: flag parameters: NONBLOCK in pipe

This patch adds O_NONBLOCK support to pipe2.  It is minimally more involved
than the patches for eventfd et.al but still trivial.  The interfaces of the
create_write_pipe and create_read_pipe helper functions were changed and the
one other caller as well.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_pipe2
# ifdef __x86_64__
#  define __NR_pipe2 293
# elif defined __i386__
#  define __NR_pipe2 331
# else
#  error "need __NR_pipe2"
# endif
#endif

int
main (void)
{
  int fds[2];
  if (syscall (__NR_pipe2, fds, 0) == -1)
    {
      puts ("pipe2(0) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if (fl & O_NONBLOCK)
        {
          printf ("pipe2(0) set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  if (syscall (__NR_pipe2, fds, O_NONBLOCK) == -1)
    {
      puts ("pipe2(O_NONBLOCK) failed");
      return 1;
    }
  for (int i = 0; i < 2; ++i)
    {
      int fl = fcntl (fds[i], F_GETFL);
      if (fl == -1)
        {
          puts ("fcntl failed");
          return 1;
        }
      if ((fl & O_NONBLOCK) == 0)
        {
          printf ("pipe2(O_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
          return 1;
        }
      close (fds[i]);
    }

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c          | 14 +++++++-------
 include/linux/fs.h |  4 ++--
 kernel/kmod.c      |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pipe.c b/fs/pipe.c
index 68e82061070c..10c4e9aa5c49 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -950,7 +950,7 @@ fail_inode:
 	return NULL;
 }
 
-struct file *create_write_pipe(void)
+struct file *create_write_pipe(int flags)
 {
 	int err;
 	struct inode *inode;
@@ -983,7 +983,7 @@ struct file *create_write_pipe(void)
 		goto err_dentry;
 	f->f_mapping = inode->i_mapping;
 
-	f->f_flags = O_WRONLY;
+	f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
 	f->f_version = 0;
 
 	return f;
@@ -1007,7 +1007,7 @@ void free_write_pipe(struct file *f)
 	put_filp(f);
 }
 
-struct file *create_read_pipe(struct file *wrf)
+struct file *create_read_pipe(struct file *wrf, int flags)
 {
 	struct file *f = get_empty_filp();
 	if (!f)
@@ -1019,7 +1019,7 @@ struct file *create_read_pipe(struct file *wrf)
 	f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
 
 	f->f_pos = 0;
-	f->f_flags = O_RDONLY;
+	f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 	f->f_op = &read_pipe_fops;
 	f->f_mode = FMODE_READ;
 	f->f_version = 0;
@@ -1033,13 +1033,13 @@ int do_pipe_flags(int *fd, int flags)
 	int error;
 	int fdw, fdr;
 
-	if (flags & ~O_CLOEXEC)
+	if (flags & ~(O_CLOEXEC | O_NONBLOCK))
 		return -EINVAL;
 
-	fw = create_write_pipe();
+	fw = create_write_pipe(flags);
 	if (IS_ERR(fw))
 		return PTR_ERR(fw);
-	fr = create_read_pipe(fw);
+	fr = create_read_pipe(fw, flags);
 	error = PTR_ERR(fr);
 	if (IS_ERR(fr))
 		goto err_write_pipe;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0e80cd717d32..4b86f806014c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1778,8 +1778,8 @@ static inline void allow_write_access(struct file *file)
 }
 extern int do_pipe(int *);
 extern int do_pipe_flags(int *, int);
-extern struct file *create_read_pipe(struct file *f);
-extern struct file *create_write_pipe(void);
+extern struct file *create_read_pipe(struct file *f, int flags);
+extern struct file *create_write_pipe(int flags);
 extern void free_write_pipe(struct file *);
 
 extern struct file *do_filp_open(int dfd, const char *pathname,
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 90d7af1c1655..2989f67c4446 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -417,12 +417,12 @@ int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
 {
 	struct file *f;
 
-	f = create_write_pipe();
+	f = create_write_pipe(0);
 	if (IS_ERR(f))
 		return PTR_ERR(f);
 	*filp = f;
 
-	f = create_read_pipe(f);
+	f = create_read_pipe(f, 0);
 	if (IS_ERR(f)) {
 		free_write_pipe(*filp);
 		return PTR_ERR(f);
-- 
cgit v1.2.3


From 510df2dd482496083e1c3b1a8c9b6afd5fa4c7d7 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:41 -0700
Subject: flag parameters: NONBLOCK in inotify_init

This patch adds non-blocking support for inotify_init1.  The
additional changes needed are minimal.

The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_inotify_init1
# ifdef __x86_64__
#  define __NR_inotify_init1 294
# elif defined __i386__
#  define __NR_inotify_init1 332
# else
#  error "need __NR_inotify_init1"
# endif
#endif

#define IN_NONBLOCK O_NONBLOCK

int
main (void)
{
  int fd = syscall (__NR_inotify_init1, 0);
  if (fd == -1)
    {
      puts ("inotify_init1(0) failed");
      return 1;
    }
  int fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (fl & O_NONBLOCK)
    {
      puts ("inotify_init1(0) set non-blocking mode");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_inotify_init1, IN_NONBLOCK);
  if (fd == -1)
    {
      puts ("inotify_init1(IN_NONBLOCK) failed");
      return 1;
    }
  fl = fcntl (fd, F_GETFL);
  if (fl == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((fl & O_NONBLOCK) == 0)
    {
      puts ("inotify_init1(IN_NONBLOCK) set non-blocking mode");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inotify_user.c       | 4 ++--
 include/linux/inotify.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 851005998cd4..dc7e1f619748 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -574,7 +574,7 @@ asmlinkage long sys_inotify_init1(int flags)
 	struct file *filp;
 	int fd, ret;
 
-	if (flags & ~IN_CLOEXEC)
+	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
 		return -EINVAL;
 
 	fd = get_unused_fd_flags(flags & O_CLOEXEC);
@@ -613,7 +613,7 @@ asmlinkage long sys_inotify_init1(int flags)
 	filp->f_path.dentry = dget(inotify_mnt->mnt_root);
 	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
 	filp->f_mode = FMODE_READ;
-	filp->f_flags = O_RDONLY;
+	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 	filp->private_data = dev;
 
 	INIT_LIST_HEAD(&dev->events);
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 72ef82120512..bd578578a8b9 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -7,7 +7,7 @@
 #ifndef _LINUX_INOTIFY_H
 #define _LINUX_INOTIFY_H
 
-/* For O_CLOEXEC */
+/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 #include <linux/types.h>
 
@@ -67,6 +67,7 @@ struct inotify_event {
 
 /* Flags for sys_inotify_init1.  */
 #define IN_CLOEXEC O_CLOEXEC
+#define IN_NONBLOCK O_NONBLOCK
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From 9fe5ad9c8cef9ad5873d8ee55d1cf00d9b607df0 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 23 Jul 2008 21:29:43 -0700
Subject: flag parameters add-on: remove epoll_create size param

Remove the size parameter from the new epoll_create syscall and renames the
syscall itself.  The updated test program follows.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <sys/syscall.h>

#ifndef __NR_epoll_create2
# ifdef __x86_64__
#  define __NR_epoll_create2 291
# elif defined __i386__
#  define __NR_epoll_create2 329
# else
#  error "need __NR_epoll_create2"
# endif
#endif

#define EPOLL_CLOEXEC O_CLOEXEC

int
main (void)
{
  int fd = syscall (__NR_epoll_create2, 0);
  if (fd == -1)
    {
      puts ("epoll_create2(0) failed");
      return 1;
    }
  int coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if (coe & FD_CLOEXEC)
    {
      puts ("epoll_create2(0) set close-on-exec flag");
      return 1;
    }
  close (fd);

  fd = syscall (__NR_epoll_create2, EPOLL_CLOEXEC);
  if (fd == -1)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) failed");
      return 1;
    }
  coe = fcntl (fd, F_GETFD);
  if (coe == -1)
    {
      puts ("fcntl failed");
      return 1;
    }
  if ((coe & FD_CLOEXEC) == 0)
    {
      puts ("epoll_create2(EPOLL_CLOEXEC) set close-on-exec flag");
      return 1;
    }
  close (fd);

  puts ("OK");

  return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32entry.S          |  2 +-
 arch/x86/kernel/syscall_table_32.S |  2 +-
 fs/eventpoll.c                     | 18 ++++++++++--------
 include/asm-x86/unistd_32.h        |  2 +-
 include/asm-x86/unistd_64.h        |  4 ++--
 include/linux/eventpoll.h          |  2 +-
 include/linux/syscalls.h           |  2 +-
 7 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 4541073dd837..e4bd1793a5e4 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -828,7 +828,7 @@ ia32_sys_call_table:
 	.quad compat_sys_timerfd_gettime
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
-	.quad sys_epoll_create2
+	.quad sys_epoll_create1
 	.quad sys_dup3			/* 330 */
 	.quad sys_pipe2
 	.quad sys_inotify_init1
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index f59aba5ff0f0..d44395ff34c3 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -328,7 +328,7 @@ ENTRY(sys_call_table)
 	.long sys_timerfd_gettime
 	.long sys_signalfd4
 	.long sys_eventfd2
-	.long sys_epoll_create2
+	.long sys_epoll_create1
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
 	.long sys_inotify_init1
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 2fdad4204044..0c87474f7917 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1046,7 +1046,7 @@ retry:
  * RB tree. With the current implementation, the "size" parameter is ignored
  * (besides sanity checks).
  */
-asmlinkage long sys_epoll_create2(int size, int flags)
+asmlinkage long sys_epoll_create1(int flags)
 {
 	int error, fd = -1;
 	struct eventpoll *ep;
@@ -1058,14 +1058,13 @@ asmlinkage long sys_epoll_create2(int size, int flags)
 		return -EINVAL;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
-		     current, size));
+		     current, flags));
 
 	/*
-	 * Sanity check on the size parameter, and create the internal data
-	 * structure ( "struct eventpoll" ).
+	 * Create the internal data structure ( "struct eventpoll" ).
 	 */
-	error = -EINVAL;
-	if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
+	error = ep_alloc(&ep);
+	if (error < 0) {
 		fd = error;
 		goto error_return;
 	}
@@ -1081,14 +1080,17 @@ asmlinkage long sys_epoll_create2(int size, int flags)
 
 error_return:
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
-		     current, size, fd));
+		     current, flags, fd));
 
 	return fd;
 }
 
 asmlinkage long sys_epoll_create(int size)
 {
-	return sys_epoll_create2(size, 0);
+	if (size < 0)
+		return -EINVAL;
+
+	return sys_epoll_create1(0);
 }
 
 /*
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index b3daf503ab93..d7394673b772 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -334,7 +334,7 @@
 #define __NR_timerfd_gettime	326
 #define __NR_signalfd4		327
 #define __NR_eventfd2		328
-#define __NR_epoll_create2	329
+#define __NR_epoll_create1	329
 #define __NR_dup3		330
 #define __NR_pipe2		331
 #define __NR_inotify_init1	332
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index c8cb88d70c6b..3a341d791792 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -645,8 +645,8 @@ __SYSCALL(__NR_paccept, sys_paccept)
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
-#define __NR_epoll_create2			291
-__SYSCALL(__NR_epoll_create2, sys_epoll_create2)
+#define __NR_epoll_create1			291
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
 #define __NR_dup3				292
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR_pipe2				293
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 1cfaa40059c8..f1e1d3c47125 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -18,7 +18,7 @@
 #include <linux/fcntl.h>
 #include <linux/types.h>
 
-/* Flags for epoll_create2.  */
+/* Flags for epoll_create1.  */
 #define EPOLL_CLOEXEC O_CLOEXEC
 
 /* Valid opcodes to issue to sys_epoll_ctl() */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 93a7e7f017a6..06f2bf76c030 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -431,7 +431,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_epoll_create(int size);
-asmlinkage long sys_epoll_create2(int size, int flags);
+asmlinkage long sys_epoll_create1(int flags);
 asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
 				struct epoll_event __user *event);
 asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
-- 
cgit v1.2.3


From 102eb97564c73ea73645b38599c5cbe6f54b030c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 23 Jul 2008 21:29:55 -0700
Subject: spi: make spi_board_info.modalias a char array

Currently, 'modalias' in the spi_device structure is a 'const char *'.
The spi_new_device() function fills in the modalias value from a passed in
spi_board_info data block.  Since it is a pointer copy, the new spi_device
remains dependent on the spi_board_info structure after the new spi_device
is registered (no other fields in spi_device directly depend on the
spi_board_info structure; all of the other data is copied).

This causes a problem when dynamically propulating the list of attached
SPI devices.  For example, in arch/powerpc, the list of SPI devices can be
populated from data in the device tree.  With the current code, the device
tree adapter must kmalloc() a new spi_board_info structure for each new
SPI device it finds in the device tree, and there is no simple mechanism
in place for keeping track of these allocations.

This patch changes modalias from a 'const char *' to a fixed char array.
By copying the modalias string instead of referencing it, the dependency
on the spi_board_info structure is eliminated and an outside caller does
not need to maintain a separate spi_board_info allocation for each device.

If searched through the code to the best of my ability for any references
to modalias which may be affected by this change and haven't found
anything.  It has been tested with the lite5200b platform in arch/powerpc.

[dbrownell@users.sourceforge.net: cope with linux-next changes: KOBJ_NAME_LEN obliterated, etc]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/spi.c       | 4 +++-
 include/linux/spi/spi.h | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 1771b2456bfa..ecca4a6a6f94 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -218,6 +218,8 @@ struct spi_device *spi_new_device(struct spi_master *master,
 	if (!spi_master_get(master))
 		return NULL;
 
+	WARN_ON(strlen(chip->modalias) >= sizeof(proxy->modalias));
+
 	proxy = kzalloc(sizeof *proxy, GFP_KERNEL);
 	if (!proxy) {
 		dev_err(dev, "can't alloc dev for cs%d\n",
@@ -229,7 +231,7 @@ struct spi_device *spi_new_device(struct spi_master *master,
 	proxy->max_speed_hz = chip->max_speed_hz;
 	proxy->mode = chip->mode;
 	proxy->irq = chip->irq;
-	proxy->modalias = chip->modalias;
+	strlcpy(proxy->modalias, chip->modalias, sizeof(proxy->modalias));
 
 	snprintf(proxy->dev.bus_id, sizeof proxy->dev.bus_id,
 			"%s.%u", master->dev.bus_id,
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index b9a76c972084..a9cc29d46653 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -82,7 +82,7 @@ struct spi_device {
 	int			irq;
 	void			*controller_state;
 	void			*controller_data;
-	const char		*modalias;
+	char			modalias[32];
 
 	/*
 	 * likely need more hooks for more protocol options affecting how
-- 
cgit v1.2.3


From aa55ddf340c9fa3f303ee16bbf35887e42c50304 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 23 Jul 2008 21:30:29 -0700
Subject: autofs4: remove unused ioctls

The ioctls AUTOFS_IOC_TOGGLEREGHOST and AUTOFS_IOC_ASKREGHOST were added
several years ago but what they were intended for has never been
implemented (as far as I'm aware noone uses them) so remove them.

Signed-off-by: Ian Kent <raven@themaw.net>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c        | 68 +-----------------------------------------------
 fs/compat_ioctl.c        |  2 --
 include/linux/auto_fs4.h |  2 --
 3 files changed, 1 insertion(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index ae22bde0bbd7..bcfb2dc0a61b 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -25,7 +25,6 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *);
 static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
 static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
 static int autofs4_dir_open(struct inode *inode, struct file *file);
-static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
 
@@ -36,7 +35,7 @@ const struct file_operations autofs4_root_operations = {
 	.open		= dcache_dir_open,
 	.release	= dcache_dir_close,
 	.read		= generic_read_dir,
-	.readdir	= autofs4_root_readdir,
+	.readdir	= dcache_readdir,
 	.ioctl		= autofs4_root_ioctl,
 };
 
@@ -71,28 +70,6 @@ const struct inode_operations autofs4_dir_inode_operations = {
 	.rmdir		= autofs4_dir_rmdir,
 };
 
-static int autofs4_root_readdir(struct file *file, void *dirent,
-				filldir_t filldir)
-{
-	struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb);
-	int oz_mode = autofs4_oz_mode(sbi);
-
-	DPRINTK("called, filp->f_pos = %lld", file->f_pos);
-
-	/*
-	 * Don't set reghost flag if:
-	 * 1) f_pos is larger than zero -- we've already been here.
-	 * 2) we haven't even enabled reghosting in the 1st place.
-	 * 3) this is the daemon doing a readdir
-	 */
-	if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled)
-		sbi->needs_reghost = 1;
-
-	DPRINTK("needs_reghost = %d", sbi->needs_reghost);
-
-	return dcache_readdir(file, dirent, filldir);
-}
-
 static int autofs4_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file->f_path.dentry;
@@ -858,44 +835,6 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user
 	return put_user(sbi->sub_version, p);
 }
 
-/*
- * Tells the daemon whether we need to reghost or not. Also, clears
- * the reghost_needed flag.
- */
-static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p)
-{
-	int status;
-
-	DPRINTK("returning %d", sbi->needs_reghost);
-
-	status = put_user(sbi->needs_reghost, p);
-	if (status)
-		return status;
-
-	sbi->needs_reghost = 0;
-	return 0;
-}
-
-/*
- * Enable / Disable reghosting ioctl() operation
- */
-static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int __user *p)
-{
-	int status;
-	int val;
-
-	status = get_user(val, p);
-
-	DPRINTK("reghost = %d", val);
-
-	if (status)
-		return status;
-
-	/* turn on/off reghosting, with the val */
-	sbi->reghost_enabled = val;
-	return 0;
-}
-
 /*
 * Tells the daemon whether it can umount the autofs mount.
 */
@@ -960,11 +899,6 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
 	case AUTOFS_IOC_SETTIMEOUT:
 		return autofs4_get_set_timeout(sbi, p);
 
-	case AUTOFS_IOC_TOGGLEREGHOST:
-		return autofs4_toggle_reghost(sbi, p);
-	case AUTOFS_IOC_ASKREGHOST:
-		return autofs4_ask_reghost(sbi, p);
-
 	case AUTOFS_IOC_ASKUMOUNT:
 		return autofs4_ask_umount(filp->f_path.mnt, p);
 
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 7b3a03c7c6a9..18e2c548161d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2297,8 +2297,6 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
 COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
 COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
 COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST)
-COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST)
 COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
 /* Raw devices */
 COMPATIBLE_IOCTL(RAW_SETBIND)
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index 31a29541b504..b785c6f8644d 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -98,8 +98,6 @@ union autofs_v5_packet_union {
 #define AUTOFS_IOC_EXPIRE_INDIRECT	AUTOFS_IOC_EXPIRE_MULTI
 #define AUTOFS_IOC_EXPIRE_DIRECT	AUTOFS_IOC_EXPIRE_MULTI
 #define AUTOFS_IOC_PROTOSUBVER		_IOR(0x93,0x67,int)
-#define AUTOFS_IOC_ASKREGHOST           _IOR(0x93,0x68,int)
-#define AUTOFS_IOC_TOGGLEREGHOST        _IOR(0x93,0x69,int)
 #define AUTOFS_IOC_ASKUMOUNT		_IOR(0x93,0x70,int)
 
 
-- 
cgit v1.2.3


From 5ad31a575157147b43fa84ef1e21471661653878 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 23 Jul 2008 21:30:33 -0700
Subject: rtc: remove BKL for ioctl()

Remove implicit use of BKL in ioctl() from the RTC framework.

Instead, the rtc->ops_lock is used.  That's the same lock that already
protects the RTC operations when they're issued through the exported
rtc_*() calls in drivers/rtc/interface.c ...  making this a bugfix, not
just a cleanup, since both ioctl calls and set_alarm() need to update IRQ
enable flags and that implies a common lock (which RTC drivers as a rule
do not provide on their own).

A new comment at the declaration of "struct rtc_class_ops" summarizes
current locking rules.  It's not clear to me that the exceptions listed
there should exist ...  if not, those are pre-existing problems which can
be fixed in a patch that doesn't relate to BKL removal.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Jonathan Corbet <corbet@lwn.net>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-dev.c | 58 +++++++++++++++++++++++++++++++++------------------
 include/linux/rtc.h   | 17 +++++++++++++++
 2 files changed, 55 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 0114a78b7cbb..0a870b7e5c32 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -209,7 +209,7 @@ static unsigned int rtc_dev_poll(struct file *file, poll_table *wait)
 	return (data != 0) ? (POLLIN | POLLRDNORM) : 0;
 }
 
-static int rtc_dev_ioctl(struct inode *inode, struct file *file,
+static long rtc_dev_ioctl(struct file *file,
 		unsigned int cmd, unsigned long arg)
 {
 	int err = 0;
@@ -219,6 +219,10 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 	struct rtc_wkalrm alarm;
 	void __user *uarg = (void __user *) arg;
 
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return -EBUSY;
+
 	/* check that the calling task has appropriate permissions
 	 * for certain ioctls. doing this check here is useful
 	 * to avoid duplicate code in each driver.
@@ -227,26 +231,31 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 	case RTC_EPOCH_SET:
 	case RTC_SET_TIME:
 		if (!capable(CAP_SYS_TIME))
-			return -EACCES;
+			err = -EACCES;
 		break;
 
 	case RTC_IRQP_SET:
 		if (arg > rtc->max_user_freq && !capable(CAP_SYS_RESOURCE))
-			return -EACCES;
+			err = -EACCES;
 		break;
 
 	case RTC_PIE_ON:
 		if (rtc->irq_freq > rtc->max_user_freq &&
 				!capable(CAP_SYS_RESOURCE))
-			return -EACCES;
+			err = -EACCES;
 		break;
 	}
 
+	if (err)
+		goto done;
+
 	/* try the driver's ioctl interface */
 	if (ops->ioctl) {
 		err = ops->ioctl(rtc->dev.parent, cmd, arg);
-		if (err != -ENOIOCTLCMD)
+		if (err != -ENOIOCTLCMD) {
+			mutex_unlock(&rtc->ops_lock);
 			return err;
+		}
 	}
 
 	/* if the driver does not provide the ioctl interface
@@ -265,15 +274,19 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 
 	switch (cmd) {
 	case RTC_ALM_READ:
+		mutex_unlock(&rtc->ops_lock);
+
 		err = rtc_read_alarm(rtc, &alarm);
 		if (err < 0)
 			return err;
 
 		if (copy_to_user(uarg, &alarm.time, sizeof(tm)))
-			return -EFAULT;
-		break;
+			err = -EFAULT;
+		return err;
 
 	case RTC_ALM_SET:
+		mutex_unlock(&rtc->ops_lock);
+
 		if (copy_from_user(&alarm.time, uarg, sizeof(tm)))
 			return -EFAULT;
 
@@ -321,24 +334,26 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 			}
 		}
 
-		err = rtc_set_alarm(rtc, &alarm);
-		break;
+		return rtc_set_alarm(rtc, &alarm);
 
 	case RTC_RD_TIME:
+		mutex_unlock(&rtc->ops_lock);
+
 		err = rtc_read_time(rtc, &tm);
 		if (err < 0)
 			return err;
 
 		if (copy_to_user(uarg, &tm, sizeof(tm)))
-			return -EFAULT;
-		break;
+			err = -EFAULT;
+		return err;
 
 	case RTC_SET_TIME:
+		mutex_unlock(&rtc->ops_lock);
+
 		if (copy_from_user(&tm, uarg, sizeof(tm)))
 			return -EFAULT;
 
-		err = rtc_set_time(rtc, &tm);
-		break;
+		return rtc_set_time(rtc, &tm);
 
 	case RTC_PIE_ON:
 		err = rtc_irq_set_state(rtc, NULL, 1);
@@ -376,34 +391,37 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 		break;
 #endif
 	case RTC_WKALM_SET:
+		mutex_unlock(&rtc->ops_lock);
 		if (copy_from_user(&alarm, uarg, sizeof(alarm)))
 			return -EFAULT;
 
-		err = rtc_set_alarm(rtc, &alarm);
-		break;
+		return rtc_set_alarm(rtc, &alarm);
 
 	case RTC_WKALM_RD:
+		mutex_unlock(&rtc->ops_lock);
 		err = rtc_read_alarm(rtc, &alarm);
 		if (err < 0)
 			return err;
 
 		if (copy_to_user(uarg, &alarm, sizeof(alarm)))
-			return -EFAULT;
-		break;
+			err = -EFAULT;
+		return err;
 
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	case RTC_UIE_OFF:
 		clear_uie(rtc);
-		return 0;
+		break;
 
 	case RTC_UIE_ON:
-		return set_uie(rtc);
+		err = set_uie(rtc);
 #endif
 	default:
 		err = -ENOTTY;
 		break;
 	}
 
+done:
+	mutex_unlock(&rtc->ops_lock);
 	return err;
 }
 
@@ -432,7 +450,7 @@ static const struct file_operations rtc_dev_fops = {
 	.llseek		= no_llseek,
 	.read		= rtc_dev_read,
 	.poll		= rtc_dev_poll,
-	.ioctl		= rtc_dev_ioctl,
+	.unlocked_ioctl	= rtc_dev_ioctl,
 	.open		= rtc_dev_open,
 	.release	= rtc_dev_release,
 	.fasync		= rtc_dev_fasync,
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index f2d0d1527721..b01fe004cb5e 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -115,6 +115,23 @@ extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm);
 
 extern struct class *rtc_class;
 
+/*
+ * For these RTC methods the device parameter is the physical device
+ * on whatever bus holds the hardware (I2C, Platform, SPI, etc), which
+ * was passed to rtc_device_register().  Its driver_data normally holds
+ * device state, including the rtc_device pointer for the RTC.
+ *
+ * Most of these methods are called with rtc_device.ops_lock held,
+ * through the rtc_*(struct rtc_device *, ...) calls.
+ *
+ * The (current) exceptions are mostly filesystem hooks:
+ *   - the proc() hook for procfs
+ *   - non-ioctl() chardev hooks:  open(), release(), read_callback()
+ *   - periodic irq calls:  irq_set_state(), irq_set_freq()
+ *
+ * REVISIT those periodic irq calls *do* have ops_lock when they're
+ * issued through ioctl() ...
+ */
 struct rtc_class_ops {
 	int (*open)(struct device *);
 	void (*release)(struct device *);
-- 
cgit v1.2.3


From 53e84b672c1a8190af2b376c35c7a39cf1214f59 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 23 Jul 2008 21:30:36 -0700
Subject: rtc: ds1305/ds1306 driver

Support the Dallas/Maxim DS1305 and DS1306 RTC chips.  These use SPI, and
support alarms, NVRAM, and a trickle charger for use when their backup
power supply is a supercap or rechargeable cell.

This basic driver doesn't yet support suspend/resume or wakealarms.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/Kconfig        |  10 +
 drivers/rtc/Makefile       |   1 +
 drivers/rtc/rtc-ds1305.c   | 847 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/ds1305.h |  35 ++
 4 files changed, 893 insertions(+)
 create mode 100644 drivers/rtc/rtc-ds1305.c
 create mode 100644 include/linux/spi/ds1305.h

(limited to 'include/linux')

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index beffb834c440..90ab73825401 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -282,6 +282,16 @@ config RTC_DRV_M41T94
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-m41t94.
 
+config RTC_DRV_DS1305
+	tristate "Dallas/Maxim DS1305/DS1306"
+	help
+	  Select this driver to get support for the Dallas/Maxim DS1305
+	  and DS1306 real time clock chips.  These support a trickle
+	  charger, alarms, and NVRAM in addition to the clock.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-ds1305.
+
 config RTC_DRV_MAX6902
 	tristate "Maxim MAX6902"
 	help
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index b0e1af54f800..18622ef84cab 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_RTC_DRV_BFIN)	+= rtc-bfin.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_DS1216)	+= rtc-ds1216.o
 obj-$(CONFIG_RTC_DRV_DS1302)	+= rtc-ds1302.o
+obj-$(CONFIG_RTC_DRV_DS1305)	+= rtc-ds1305.o
 obj-$(CONFIG_RTC_DRV_DS1307)	+= rtc-ds1307.o
 obj-$(CONFIG_RTC_DRV_DS1374)	+= rtc-ds1374.o
 obj-$(CONFIG_RTC_DRV_DS1511)	+= rtc-ds1511.o
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
new file mode 100644
index 000000000000..b91d02a3ace9
--- /dev/null
+++ b/drivers/rtc/rtc-ds1305.c
@@ -0,0 +1,847 @@
+/*
+ * rtc-ds1305.c -- driver for DS1305 and DS1306 SPI RTC chips
+ *
+ * Copyright (C) 2008 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/workqueue.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/ds1305.h>
+
+
+/*
+ * Registers ... mask DS1305_WRITE into register address to write,
+ * otherwise you're reading it.  All non-bitmask values are BCD.
+ */
+#define DS1305_WRITE		0x80
+
+
+/* RTC date/time ... the main special cases are that we:
+ *  - Need fancy "hours" encoding in 12hour mode
+ *  - Don't rely on the "day-of-week" field (or tm_wday)
+ *  - Are a 21st-century clock (2000 <= year < 2100)
+ */
+#define DS1305_RTC_LEN		7		/* bytes for RTC regs */
+
+#define DS1305_SEC		0x00		/* register addresses */
+#define DS1305_MIN		0x01
+#define DS1305_HOUR		0x02
+#	define DS1305_HR_12		0x40	/* set == 12 hr mode */
+#	define DS1305_HR_PM		0x20	/* set == PM (12hr mode) */
+#define DS1305_WDAY		0x03
+#define DS1305_MDAY		0x04
+#define DS1305_MON		0x05
+#define DS1305_YEAR		0x06
+
+
+/* The two alarms have only sec/min/hour/wday fields (ALM_LEN).
+ * DS1305_ALM_DISABLE disables a match field (some combos are bad).
+ *
+ * NOTE that since we don't use WDAY, we limit ourselves to alarms
+ * only one day into the future (vs potentially up to a week).
+ *
+ * NOTE ALSO that while we could generate once-a-second IRQs (UIE), we
+ * don't currently support them.  We'd either need to do it only when
+ * no alarm is pending (not the standard model), or to use the second
+ * alarm (implying that this is a DS1305 not DS1306, *and* that either
+ * it's wired up a second IRQ we know, or that INTCN is set)
+ */
+#define DS1305_ALM_LEN		4		/* bytes for ALM regs */
+#define DS1305_ALM_DISABLE	0x80
+
+#define DS1305_ALM0(r)		(0x07 + (r))	/* register addresses */
+#define DS1305_ALM1(r)		(0x0b + (r))
+
+
+/* three control registers */
+#define DS1305_CONTROL_LEN	3		/* bytes of control regs */
+
+#define DS1305_CONTROL		0x0f		/* register addresses */
+#	define DS1305_nEOSC		0x80	/* low enables oscillator */
+#	define DS1305_WP		0x40	/* write protect */
+#	define DS1305_INTCN		0x04	/* clear == only int0 used */
+#	define DS1306_1HZ		0x04	/* enable 1Hz output */
+#	define DS1305_AEI1		0x02	/* enable ALM1 IRQ */
+#	define DS1305_AEI0		0x01	/* enable ALM0 IRQ */
+#define DS1305_STATUS		0x10
+/* status has just AEIx bits, mirrored as IRQFx */
+#define DS1305_TRICKLE		0x11
+/* trickle bits are defined in <linux/spi/ds1305.h> */
+
+/* a bunch of NVRAM */
+#define DS1305_NVRAM_LEN	96		/* bytes of NVRAM */
+
+#define DS1305_NVRAM		0x20		/* register addresses */
+
+
+struct ds1305 {
+	struct spi_device	*spi;
+	struct rtc_device	*rtc;
+
+	struct work_struct	work;
+
+	unsigned long		flags;
+#define FLAG_EXITING	0
+
+	bool			hr12;
+	u8			ctrl[DS1305_CONTROL_LEN];
+};
+
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Utilities ...  tolerate 12-hour AM/PM notation in case of non-Linux
+ * software (like a bootloader) which may require it.
+ */
+
+static unsigned bcd2hour(u8 bcd)
+{
+	if (bcd & DS1305_HR_12) {
+		unsigned	hour = 0;
+
+		bcd &= ~DS1305_HR_12;
+		if (bcd & DS1305_HR_PM) {
+			hour = 12;
+			bcd &= ~DS1305_HR_PM;
+		}
+		hour += BCD2BIN(bcd);
+		return hour - 1;
+	}
+	return BCD2BIN(bcd);
+}
+
+static u8 hour2bcd(bool hr12, int hour)
+{
+	if (hr12) {
+		hour++;
+		if (hour <= 12)
+			return DS1305_HR_12 | BIN2BCD(hour);
+		hour -= 12;
+		return DS1305_HR_12 | DS1305_HR_PM | BIN2BCD(hour);
+	}
+	return BIN2BCD(hour);
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Interface to RTC framework
+ */
+
+#ifdef CONFIG_RTC_INTF_DEV
+
+/*
+ * Context: caller holds rtc->ops_lock (to protect ds1305->ctrl)
+ */
+static int ds1305_ioctl(struct device *dev, unsigned cmd, unsigned long arg)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	u8		buf[2];
+	int		status = -ENOIOCTLCMD;
+
+	buf[0] = DS1305_WRITE | DS1305_CONTROL;
+	buf[1] = ds1305->ctrl[0];
+
+	switch (cmd) {
+	case RTC_AIE_OFF:
+		status = 0;
+		if (!(buf[1] & DS1305_AEI0))
+			goto done;
+		buf[1] &= ~DS1305_AEI0;
+		break;
+
+	case RTC_AIE_ON:
+		status = 0;
+		if (ds1305->ctrl[0] & DS1305_AEI0)
+			goto done;
+		buf[1] |= DS1305_AEI0;
+		break;
+	}
+	if (status == 0) {
+		status = spi_write_then_read(ds1305->spi, buf, sizeof buf,
+				NULL, 0);
+		if (status >= 0)
+			ds1305->ctrl[0] = buf[1];
+	}
+
+done:
+	return status;
+}
+
+#else
+#define ds1305_ioctl	NULL
+#endif
+
+/*
+ * Get/set of date and time is pretty normal.
+ */
+
+static int ds1305_get_time(struct device *dev, struct rtc_time *time)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	u8		addr = DS1305_SEC;
+	u8		buf[DS1305_RTC_LEN];
+	int		status;
+
+	/* Use write-then-read to get all the date/time registers
+	 * since dma from stack is nonportable
+	 */
+	status = spi_write_then_read(ds1305->spi, &addr, sizeof addr,
+			buf, sizeof buf);
+	if (status < 0)
+		return status;
+
+	dev_vdbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n",
+		"read", buf[0], buf[1], buf[2], buf[3],
+		buf[4], buf[5], buf[6]);
+
+	/* Decode the registers */
+	time->tm_sec = BCD2BIN(buf[DS1305_SEC]);
+	time->tm_min = BCD2BIN(buf[DS1305_MIN]);
+	time->tm_hour = bcd2hour(buf[DS1305_HOUR]);
+	time->tm_wday = buf[DS1305_WDAY] - 1;
+	time->tm_mday = BCD2BIN(buf[DS1305_MDAY]);
+	time->tm_mon = BCD2BIN(buf[DS1305_MON]) - 1;
+	time->tm_year = BCD2BIN(buf[DS1305_YEAR]) + 100;
+
+	dev_vdbg(dev, "%s secs=%d, mins=%d, "
+		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
+		"read", time->tm_sec, time->tm_min,
+		time->tm_hour, time->tm_mday,
+		time->tm_mon, time->tm_year, time->tm_wday);
+
+	/* Time may not be set */
+	return rtc_valid_tm(time);
+}
+
+static int ds1305_set_time(struct device *dev, struct rtc_time *time)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	u8		buf[1 + DS1305_RTC_LEN];
+	u8		*bp = buf;
+
+	dev_vdbg(dev, "%s secs=%d, mins=%d, "
+		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
+		"write", time->tm_sec, time->tm_min,
+		time->tm_hour, time->tm_mday,
+		time->tm_mon, time->tm_year, time->tm_wday);
+
+	/* Write registers starting at the first time/date address. */
+	*bp++ = DS1305_WRITE | DS1305_SEC;
+
+	*bp++ = BIN2BCD(time->tm_sec);
+	*bp++ = BIN2BCD(time->tm_min);
+	*bp++ = hour2bcd(ds1305->hr12, time->tm_hour);
+	*bp++ = (time->tm_wday < 7) ? (time->tm_wday + 1) : 1;
+	*bp++ = BIN2BCD(time->tm_mday);
+	*bp++ = BIN2BCD(time->tm_mon + 1);
+	*bp++ = BIN2BCD(time->tm_year - 100);
+
+	dev_dbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n",
+		"write", buf[1], buf[2], buf[3],
+		buf[4], buf[5], buf[6], buf[7]);
+
+	/* use write-then-read since dma from stack is nonportable */
+	return spi_write_then_read(ds1305->spi, buf, sizeof buf,
+			NULL, 0);
+}
+
+/*
+ * Get/set of alarm is a bit funky:
+ *
+ * - First there's the inherent raciness of getting the (partitioned)
+ *   status of an alarm that could trigger while we're reading parts
+ *   of that status.
+ *
+ * - Second there's its limited range (we could increase it a bit by
+ *   relying on WDAY), which means it will easily roll over.
+ *
+ * - Third there's the choice of two alarms and alarm signals.
+ *   Here we use ALM0 and expect that nINT0 (open drain) is used;
+ *   that's the only real option for DS1306 runtime alarms, and is
+ *   natural on DS1305.
+ *
+ * - Fourth, there's also ALM1, and a second interrupt signal:
+ *     + On DS1305 ALM1 uses nINT1 (when INTCN=1) else nINT0;
+ *     + On DS1306 ALM1 only uses INT1 (an active high pulse)
+ *       and it won't work when VCC1 is active.
+ *
+ *   So to be most general, we should probably set both alarms to the
+ *   same value, letting ALM1 be the wakeup event source on DS1306
+ *   and handling several wiring options on DS1305.
+ *
+ * - Fifth, we support the polled mode (as well as possible; why not?)
+ *   even when no interrupt line is wired to an IRQ.
+ */
+
+/*
+ * Context: caller holds rtc->ops_lock (to protect ds1305->ctrl)
+ */
+static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	struct spi_device *spi = ds1305->spi;
+	u8		addr;
+	int		status;
+	u8		buf[DS1305_ALM_LEN];
+
+	/* Refresh control register cache BEFORE reading ALM0 registers,
+	 * since reading alarm registers acks any pending IRQ.  That
+	 * makes returning "pending" status a bit of a lie, but that bit
+	 * of EFI status is at best fragile anyway (given IRQ handlers).
+	 */
+	addr = DS1305_CONTROL;
+	status = spi_write_then_read(spi, &addr, sizeof addr,
+			ds1305->ctrl, sizeof ds1305->ctrl);
+	if (status < 0)
+		return status;
+
+	alm->enabled = !!(ds1305->ctrl[0] & DS1305_AEI0);
+	alm->pending = !!(ds1305->ctrl[1] & DS1305_AEI0);
+
+	/* get and check ALM0 registers */
+	addr = DS1305_ALM0(DS1305_SEC);
+	status = spi_write_then_read(spi, &addr, sizeof addr,
+			buf, sizeof buf);
+	if (status < 0)
+		return status;
+
+	dev_vdbg(dev, "%s: %02x %02x %02x %02x\n",
+		"alm0 read", buf[DS1305_SEC], buf[DS1305_MIN],
+		buf[DS1305_HOUR], buf[DS1305_WDAY]);
+
+	if ((DS1305_ALM_DISABLE & buf[DS1305_SEC])
+			|| (DS1305_ALM_DISABLE & buf[DS1305_MIN])
+			|| (DS1305_ALM_DISABLE & buf[DS1305_HOUR]))
+		return -EIO;
+
+	/* Stuff these values into alm->time and let RTC framework code
+	 * fill in the rest ... and also handle rollover to tomorrow when
+	 * that's needed.
+	 */
+	alm->time.tm_sec = BCD2BIN(buf[DS1305_SEC]);
+	alm->time.tm_min = BCD2BIN(buf[DS1305_MIN]);
+	alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]);
+	alm->time.tm_mday = -1;
+	alm->time.tm_mon = -1;
+	alm->time.tm_year = -1;
+	/* next three fields are unused by Linux */
+	alm->time.tm_wday = -1;
+	alm->time.tm_mday = -1;
+	alm->time.tm_isdst = -1;
+
+	return 0;
+}
+
+/*
+ * Context: caller holds rtc->ops_lock (to protect ds1305->ctrl)
+ */
+static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	struct spi_device *spi = ds1305->spi;
+	unsigned long	now, later;
+	struct rtc_time	tm;
+	int		status;
+	u8		buf[1 + DS1305_ALM_LEN];
+
+	/* convert desired alarm to time_t */
+	status = rtc_tm_to_time(&alm->time, &later);
+	if (status < 0)
+		return status;
+
+	/* Read current time as time_t */
+	status = ds1305_get_time(dev, &tm);
+	if (status < 0)
+		return status;
+	status = rtc_tm_to_time(&tm, &now);
+	if (status < 0)
+		return status;
+
+	/* make sure alarm fires within the next 24 hours */
+	if (later <= now)
+		return -EINVAL;
+	if ((later - now) > 24 * 60 * 60)
+		return -EDOM;
+
+	/* disable alarm if needed */
+	if (ds1305->ctrl[0] & DS1305_AEI0) {
+		ds1305->ctrl[0] &= ~DS1305_AEI0;
+
+		buf[0] = DS1305_WRITE | DS1305_CONTROL;
+		buf[1] = ds1305->ctrl[0];
+		status = spi_write_then_read(ds1305->spi, buf, 2, NULL, 0);
+		if (status < 0)
+			return status;
+	}
+
+	/* write alarm */
+	buf[0] = DS1305_WRITE | DS1305_ALM0(DS1305_SEC);
+	buf[1 + DS1305_SEC] = BIN2BCD(alm->time.tm_sec);
+	buf[1 + DS1305_MIN] = BIN2BCD(alm->time.tm_min);
+	buf[1 + DS1305_HOUR] = hour2bcd(ds1305->hr12, alm->time.tm_hour);
+	buf[1 + DS1305_WDAY] = DS1305_ALM_DISABLE;
+
+	dev_dbg(dev, "%s: %02x %02x %02x %02x\n",
+		"alm0 write", buf[1 + DS1305_SEC], buf[1 + DS1305_MIN],
+		buf[1 + DS1305_HOUR], buf[1 + DS1305_WDAY]);
+
+	status = spi_write_then_read(spi, buf, sizeof buf, NULL, 0);
+	if (status < 0)
+		return status;
+
+	/* enable alarm if requested */
+	if (alm->enabled) {
+		ds1305->ctrl[0] |= DS1305_AEI0;
+
+		buf[0] = DS1305_WRITE | DS1305_CONTROL;
+		buf[1] = ds1305->ctrl[0];
+		status = spi_write_then_read(ds1305->spi, buf, 2, NULL, 0);
+	}
+
+	return status;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int ds1305_proc(struct device *dev, struct seq_file *seq)
+{
+	struct ds1305	*ds1305 = dev_get_drvdata(dev);
+	char		*diodes = "no";
+	char		*resistors = "";
+
+	/* ctrl[2] is treated as read-only; no locking needed */
+	if ((ds1305->ctrl[2] & 0xf0) == DS1305_TRICKLE_MAGIC) {
+		switch (ds1305->ctrl[2] & 0x0c) {
+		case DS1305_TRICKLE_DS2:
+			diodes = "2 diodes, ";
+			break;
+		case DS1305_TRICKLE_DS1:
+			diodes = "1 diode, ";
+			break;
+		default:
+			goto done;
+		}
+		switch (ds1305->ctrl[2] & 0x03) {
+		case DS1305_TRICKLE_2K:
+			resistors = "2k Ohm";
+			break;
+		case DS1305_TRICKLE_4K:
+			resistors = "4k Ohm";
+			break;
+		case DS1305_TRICKLE_8K:
+			resistors = "8k Ohm";
+			break;
+		default:
+			diodes = "no";
+			break;
+		}
+	}
+
+done:
+	return seq_printf(seq,
+			"trickle_charge\t: %s%s\n",
+			diodes, resistors);
+}
+
+#else
+#define ds1305_proc	NULL
+#endif
+
+static const struct rtc_class_ops ds1305_ops = {
+	.ioctl		= ds1305_ioctl,
+	.read_time	= ds1305_get_time,
+	.set_time	= ds1305_set_time,
+	.read_alarm	= ds1305_get_alarm,
+	.set_alarm	= ds1305_set_alarm,
+	.proc		= ds1305_proc,
+};
+
+static void ds1305_work(struct work_struct *work)
+{
+	struct ds1305	*ds1305 = container_of(work, struct ds1305, work);
+	struct mutex	*lock = &ds1305->rtc->ops_lock;
+	struct spi_device *spi = ds1305->spi;
+	u8		buf[3];
+	int		status;
+
+	/* lock to protect ds1305->ctrl */
+	mutex_lock(lock);
+
+	/* Disable the IRQ, and clear its status ... for now, we "know"
+	 * that if more than one alarm is active, they're in sync.
+	 * Note that reading ALM data registers also clears IRQ status.
+	 */
+	ds1305->ctrl[0] &= ~(DS1305_AEI1 | DS1305_AEI0);
+	ds1305->ctrl[1] = 0;
+
+	buf[0] = DS1305_WRITE | DS1305_CONTROL;
+	buf[1] = ds1305->ctrl[0];
+	buf[2] = 0;
+
+	status = spi_write_then_read(spi, buf, sizeof buf,
+			NULL, 0);
+	if (status < 0)
+		dev_dbg(&spi->dev, "clear irq --> %d\n", status);
+
+	mutex_unlock(lock);
+
+	if (!test_bit(FLAG_EXITING, &ds1305->flags))
+		enable_irq(spi->irq);
+
+	/* rtc_update_irq() requires an IRQ-disabled context */
+	local_irq_disable();
+	rtc_update_irq(ds1305->rtc, 1, RTC_AF | RTC_IRQF);
+	local_irq_enable();
+}
+
+/*
+ * This "real" IRQ handler hands off to a workqueue mostly to allow
+ * mutex locking for ds1305->ctrl ... unlike I2C, we could issue async
+ * I/O requests in IRQ context (to clear the IRQ status).
+ */
+static irqreturn_t ds1305_irq(int irq, void *p)
+{
+	struct ds1305		*ds1305 = p;
+
+	disable_irq(irq);
+	schedule_work(&ds1305->work);
+	return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Interface for NVRAM
+ */
+
+static void msg_init(struct spi_message *m, struct spi_transfer *x,
+		u8 *addr, size_t count, char *tx, char *rx)
+{
+	spi_message_init(m);
+	memset(x, 0, 2 * sizeof(*x));
+
+	x->tx_buf = addr;
+	x->len = 1;
+	spi_message_add_tail(x, m);
+
+	x++;
+
+	x->tx_buf = tx;
+	x->rx_buf = rx;
+	x->len = count;
+	spi_message_add_tail(x, m);
+}
+
+static ssize_t
+ds1305_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct spi_device	*spi;
+	u8			addr;
+	struct spi_message	m;
+	struct spi_transfer	x[2];
+	int			status;
+
+	spi = container_of(kobj, struct spi_device, dev.kobj);
+
+	if (unlikely(off >= DS1305_NVRAM_LEN))
+		return 0;
+	if (count >= DS1305_NVRAM_LEN)
+		count = DS1305_NVRAM_LEN;
+	if ((off + count) > DS1305_NVRAM_LEN)
+		count = DS1305_NVRAM_LEN - off;
+	if (unlikely(!count))
+		return count;
+
+	addr = DS1305_NVRAM + off;
+	msg_init(&m, x, &addr, count, NULL, buf);
+
+	status = spi_sync(spi, &m);
+	if (status < 0)
+		dev_err(&spi->dev, "nvram %s error %d\n", "read", status);
+	return (status < 0) ? status : count;
+}
+
+static ssize_t
+ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct spi_device	*spi;
+	u8			addr;
+	struct spi_message	m;
+	struct spi_transfer	x[2];
+	int			status;
+
+	spi = container_of(kobj, struct spi_device, dev.kobj);
+
+	if (unlikely(off >= DS1305_NVRAM_LEN))
+		return -EFBIG;
+	if (count >= DS1305_NVRAM_LEN)
+		count = DS1305_NVRAM_LEN;
+	if ((off + count) > DS1305_NVRAM_LEN)
+		count = DS1305_NVRAM_LEN - off;
+	if (unlikely(!count))
+		return count;
+
+	addr = (DS1305_WRITE | DS1305_NVRAM) + off;
+	msg_init(&m, x, &addr, count, buf, NULL);
+
+	status = spi_sync(spi, &m);
+	if (status < 0)
+		dev_err(&spi->dev, "nvram %s error %d\n", "write", status);
+	return (status < 0) ? status : count;
+}
+
+static struct bin_attribute nvram = {
+	.attr.name	= "nvram",
+	.attr.mode	= S_IRUGO | S_IWUSR,
+	.attr.owner	= THIS_MODULE,
+	.read		= ds1305_nvram_read,
+	.write		= ds1305_nvram_write,
+	.size		= DS1305_NVRAM_LEN,
+};
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Interface to SPI stack
+ */
+
+static int __devinit ds1305_probe(struct spi_device *spi)
+{
+	struct ds1305			*ds1305;
+	struct rtc_device		*rtc;
+	int				status;
+	u8				addr, value;
+	struct ds1305_platform_data	*pdata = spi->dev.platform_data;
+	bool				write_ctrl = false;
+
+	/* Sanity check board setup data.  This may be hooked up
+	 * in 3wire mode, but we don't care.  Note that unless
+	 * there's an inverter in place, this needs SPI_CS_HIGH!
+	 */
+	if ((spi->bits_per_word && spi->bits_per_word != 8)
+			|| (spi->max_speed_hz > 2000000)
+			|| !(spi->mode & SPI_CPHA))
+		return -EINVAL;
+
+	/* set up driver data */
+	ds1305 = kzalloc(sizeof *ds1305, GFP_KERNEL);
+	if (!ds1305)
+		return -ENOMEM;
+	ds1305->spi = spi;
+	spi_set_drvdata(spi, ds1305);
+
+	/* read and cache control registers */
+	addr = DS1305_CONTROL;
+	status = spi_write_then_read(spi, &addr, sizeof addr,
+			ds1305->ctrl, sizeof ds1305->ctrl);
+	if (status < 0) {
+		dev_dbg(&spi->dev, "can't %s, %d\n",
+				"read", status);
+		goto fail0;
+	}
+
+	dev_dbg(&spi->dev, "ctrl %s: %02x %02x %02x\n",
+			"read", ds1305->ctrl[0],
+			ds1305->ctrl[1], ds1305->ctrl[2]);
+
+	/* Sanity check register values ... partially compensating for the
+	 * fact that SPI has no device handshake.  A pullup on MISO would
+	 * make these tests fail; but not all systems will have one.  If
+	 * some register is neither 0x00 nor 0xff, a chip is likely there.
+	 */
+	if ((ds1305->ctrl[0] & 0x38) != 0 || (ds1305->ctrl[1] & 0xfc) != 0) {
+		dev_dbg(&spi->dev, "RTC chip is not present\n");
+		status = -ENODEV;
+		goto fail0;
+	}
+	if (ds1305->ctrl[2] == 0)
+		dev_dbg(&spi->dev, "chip may not be present\n");
+
+	/* enable writes if needed ... if we were paranoid it would
+	 * make sense to enable them only when absolutely necessary.
+	 */
+	if (ds1305->ctrl[0] & DS1305_WP) {
+		u8		buf[2];
+
+		ds1305->ctrl[0] &= ~DS1305_WP;
+
+		buf[0] = DS1305_WRITE | DS1305_CONTROL;
+		buf[1] = ds1305->ctrl[0];
+		status = spi_write_then_read(spi, buf, sizeof buf, NULL, 0);
+
+		dev_dbg(&spi->dev, "clear WP --> %d\n", status);
+		if (status < 0)
+			goto fail0;
+	}
+
+	/* on DS1305, maybe start oscillator; like most low power
+	 * oscillators, it may take a second to stabilize
+	 */
+	if (ds1305->ctrl[0] & DS1305_nEOSC) {
+		ds1305->ctrl[0] &= ~DS1305_nEOSC;
+		write_ctrl = true;
+		dev_warn(&spi->dev, "SET TIME!\n");
+	}
+
+	/* ack any pending IRQs */
+	if (ds1305->ctrl[1]) {
+		ds1305->ctrl[1] = 0;
+		write_ctrl = true;
+	}
+
+	/* this may need one-time (re)init */
+	if (pdata) {
+		/* maybe enable trickle charge */
+		if (((ds1305->ctrl[2] & 0xf0) != DS1305_TRICKLE_MAGIC)) {
+			ds1305->ctrl[2] = DS1305_TRICKLE_MAGIC
+						| pdata->trickle;
+			write_ctrl = true;
+		}
+
+		/* on DS1306, configure 1 Hz signal */
+		if (pdata->is_ds1306) {
+			if (pdata->en_1hz) {
+				if (!(ds1305->ctrl[0] & DS1306_1HZ)) {
+					ds1305->ctrl[0] |= DS1306_1HZ;
+					write_ctrl = true;
+				}
+			} else {
+				if (ds1305->ctrl[0] & DS1306_1HZ) {
+					ds1305->ctrl[0] &= ~DS1306_1HZ;
+					write_ctrl = true;
+				}
+			}
+		}
+	}
+
+	if (write_ctrl) {
+		u8		buf[4];
+
+		buf[0] = DS1305_WRITE | DS1305_CONTROL;
+		buf[1] = ds1305->ctrl[0];
+		buf[2] = ds1305->ctrl[1];
+		buf[3] = ds1305->ctrl[2];
+		status = spi_write_then_read(spi, buf, sizeof buf, NULL, 0);
+		if (status < 0) {
+			dev_dbg(&spi->dev, "can't %s, %d\n",
+					"write", status);
+			goto fail0;
+		}
+
+		dev_dbg(&spi->dev, "ctrl %s: %02x %02x %02x\n",
+				"write", ds1305->ctrl[0],
+				ds1305->ctrl[1], ds1305->ctrl[2]);
+	}
+
+	/* see if non-Linux software set up AM/PM mode */
+	addr = DS1305_HOUR;
+	status = spi_write_then_read(spi, &addr, sizeof addr,
+				&value, sizeof value);
+	if (status < 0) {
+		dev_dbg(&spi->dev, "read HOUR --> %d\n", status);
+		goto fail0;
+	}
+
+	ds1305->hr12 = (DS1305_HR_12 & value) != 0;
+	if (ds1305->hr12)
+		dev_dbg(&spi->dev, "AM/PM\n");
+
+	/* register RTC ... from here on, ds1305->ctrl needs locking */
+	rtc = rtc_device_register("ds1305", &spi->dev,
+			&ds1305_ops, THIS_MODULE);
+	if (IS_ERR(rtc)) {
+		status = PTR_ERR(rtc);
+		dev_dbg(&spi->dev, "register rtc --> %d\n", status);
+		goto fail0;
+	}
+	ds1305->rtc = rtc;
+
+	/* Maybe set up alarm IRQ; be ready to handle it triggering right
+	 * away.  NOTE that we don't share this.  The signal is active low,
+	 * and we can't ack it before a SPI message delay.  We temporarily
+	 * disable the IRQ until it's acked, which lets us work with more
+	 * IRQ trigger modes (not all IRQ controllers can do falling edge).
+	 */
+	if (spi->irq) {
+		INIT_WORK(&ds1305->work, ds1305_work);
+		status = request_irq(spi->irq, ds1305_irq,
+				0, dev_name(&rtc->dev), ds1305);
+		if (status < 0) {
+			dev_dbg(&spi->dev, "request_irq %d --> %d\n",
+					spi->irq, status);
+			goto fail1;
+		}
+	}
+
+	/* export NVRAM */
+	status = sysfs_create_bin_file(&spi->dev.kobj, &nvram);
+	if (status < 0) {
+		dev_dbg(&spi->dev, "register nvram --> %d\n", status);
+		goto fail2;
+	}
+
+	return 0;
+
+fail2:
+	free_irq(spi->irq, ds1305);
+fail1:
+	rtc_device_unregister(rtc);
+fail0:
+	kfree(ds1305);
+	return status;
+}
+
+static int __devexit ds1305_remove(struct spi_device *spi)
+{
+	struct ds1305	*ds1305 = spi_get_drvdata(spi);
+
+	sysfs_remove_bin_file(&spi->dev.kobj, &nvram);
+
+	/* carefully shut down irq and workqueue, if present */
+	if (spi->irq) {
+		set_bit(FLAG_EXITING, &ds1305->flags);
+		free_irq(spi->irq, ds1305);
+		flush_scheduled_work();
+	}
+
+	rtc_device_unregister(ds1305->rtc);
+	spi_set_drvdata(spi, NULL);
+	kfree(ds1305);
+	return 0;
+}
+
+static struct spi_driver ds1305_driver = {
+	.driver.name	= "rtc-ds1305",
+	.driver.owner	= THIS_MODULE,
+	.probe		= ds1305_probe,
+	.remove		= __devexit_p(ds1305_remove),
+	/* REVISIT add suspend/resume */
+};
+
+static int __init ds1305_init(void)
+{
+	return spi_register_driver(&ds1305_driver);
+}
+module_init(ds1305_init);
+
+static void __exit ds1305_exit(void)
+{
+	spi_unregister_driver(&ds1305_driver);
+}
+module_exit(ds1305_exit);
+
+MODULE_DESCRIPTION("RTC driver for DS1305 and DS1306 chips");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/spi/ds1305.h b/include/linux/spi/ds1305.h
new file mode 100644
index 000000000000..287ec830eab7
--- /dev/null
+++ b/include/linux/spi/ds1305.h
@@ -0,0 +1,35 @@
+#ifndef __LINUX_SPI_DS1305_H
+#define __LINUX_SPI_DS1305_H
+
+/*
+ * One-time configuration for ds1305 and ds1306 RTC chips.
+ *
+ * Put a pointer to this in spi_board_info.platform_data if you want to
+ * be sure that Linux (re)initializes this as needed ... after losing
+ * backup power, and potentially on the first boot.
+ */
+struct ds1305_platform_data {
+
+	/* Trickle charge configuration:  it's OK to leave out the MAGIC
+	 * bitmask; mask in either DS1 or DS2, and then one of 2K/4k/8K.
+	 */
+#define DS1305_TRICKLE_MAGIC	0xa0
+#define DS1305_TRICKLE_DS2	0x08	/* two diodes */
+#define DS1305_TRICKLE_DS1	0x04	/* one diode */
+#define DS1305_TRICKLE_2K	0x01	/* 2 KOhm resistance */
+#define DS1305_TRICKLE_4K	0x02	/* 4 KOhm resistance */
+#define DS1305_TRICKLE_8K	0x03	/* 8 KOhm resistance */
+	u8	trickle;
+
+	/* set only on ds1306 parts */
+	bool	is_ds1306;
+
+	/* ds1306 only:  enable 1 Hz output */
+	bool	en_1hz;
+
+	/* REVISIT:  the driver currently expects nINT0 to be wired
+	 * as the alarm IRQ.  ALM1 may also need to be set up ...
+	 */
+};
+
+#endif /* __LINUX_SPI_DS1305_H */
-- 
cgit v1.2.3


From d3de851a445123f24ad8ece18662014b5e8a8b4e Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 23 Jul 2008 21:30:37 -0700
Subject: rtc: BCD codeshrink

This updates <linux/bcd.h> to define the key routines as constant
functions, which the macros will then call.  Newer code can now call
bcd2bin() instead of SCREAMING BCD2BIN() TO THE FOUR WINDS.

This lets each driver shrink their codespace by using N function calls to
a single (global) copy of those routines, instead of N inlined copies of
these functions per driver.

These routines aren't used in speed-critical code.  Almost all callers are
in the RTC framework.  Typical per-driver savings is near 300 bytes.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: Adrian Bunk <bunk@kernel.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bcd.h |  9 +++++++--
 lib/Makefile        |  2 +-
 lib/bcd.c           | 14 ++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)
 create mode 100644 lib/bcd.c

(limited to 'include/linux')

diff --git a/include/linux/bcd.h b/include/linux/bcd.h
index c545308125b0..7ac518e3c152 100644
--- a/include/linux/bcd.h
+++ b/include/linux/bcd.h
@@ -10,8 +10,13 @@
 #ifndef _BCD_H
 #define _BCD_H
 
-#define BCD2BIN(val)	(((val) & 0x0f) + ((val)>>4)*10)
-#define BIN2BCD(val)	((((val)/10)<<4) + (val)%10)
+#include <linux/compiler.h>
+
+unsigned bcd2bin(unsigned char val) __attribute_const__;
+unsigned char bin2bcd(unsigned val) __attribute_const__;
+
+#define BCD2BIN(val)	bcd2bin(val)
+#define BIN2BCD(val)	bin2bcd(val)
 
 /* backwards compat */
 #define BCD_TO_BIN(val) ((val)=BCD2BIN(val))
diff --git a/lib/Makefile b/lib/Makefile
index 818c4d455518..9085ad6fa53d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o kref.o klist.o
 
-obj-y += div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
+obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
diff --git a/lib/bcd.c b/lib/bcd.c
new file mode 100644
index 000000000000..d74257fd0fe7
--- /dev/null
+++ b/lib/bcd.c
@@ -0,0 +1,14 @@
+#include <linux/bcd.h>
+#include <linux/module.h>
+
+unsigned bcd2bin(unsigned char val)
+{
+	return (val & 0x0f) + (val >> 4) * 10;
+}
+EXPORT_SYMBOL(bcd2bin);
+
+unsigned char bin2bcd(unsigned val)
+{
+	return ((val / 10) << 4) + val % 10;
+}
+EXPORT_SYMBOL(bin2bcd);
-- 
cgit v1.2.3


From 01a2d9ed85c945fc8a672622780533a1a0b7caf5 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Wed, 23 Jul 2008 21:31:04 -0700
Subject: tridentfb: acceleration constants change

This patch replaces deprecated constant FB_ACCELF_TEXT with
FBINFO_HWACCEL_DISABLED and adds constants for Trident families of
accelerators.

The FBINFO_HWACCEL_DISABLED is correctly used so noaccel parameter works
now.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/tridentfb.c | 45 +++++++++++++++++++++++++++++++--------------
 include/linux/fb.h        |  4 ++++
 2 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index ff82ec1e5e4d..279411523ed9 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -499,6 +499,10 @@ static void tridentfb_fillrect(struct fb_info *info,
 	struct tridentfb_par *par = info->par;
 	int col;
 
+	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+		cfb_fillrect(info, fr);
+		return;
+	}
 	if (info->var.bits_per_pixel == 8) {
 		col = fr->color;
 		col |= col << 8;
@@ -516,6 +520,10 @@ static void tridentfb_copyarea(struct fb_info *info,
 {
 	struct tridentfb_par *par = info->par;
 
+	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+		cfb_copyarea(info, ca);
+		return;
+	}
 	par->wait_engine(par);
 	par->copy_rect(par, ca->sx, ca->sy, ca->dx, ca->dy,
 		       ca->width, ca->height);
@@ -525,7 +533,8 @@ static int tridentfb_sync(struct fb_info *info)
 {
 	struct tridentfb_par *par = info->par;
 
-	par->wait_engine(par);
+	if (!(info->flags & FBINFO_HWACCEL_DISABLED))
+		par->wait_engine(par);
 	return 0;
 }
 #else
@@ -855,8 +864,9 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 	if (var->yres_virtual > 0xffff)
 		return -EINVAL;
 	line_length = var->xres_virtual * bpp / 8;
-#ifdef CONFIG_FB_TRIDENT_ACCEL
-	if (!is3Dchip(par->chip_id)) {
+
+	if (!is3Dchip(par->chip_id) &&
+	    !(info->flags & FBINFO_HWACCEL_DISABLED)) {
 		/* acceleration requires line length to be power of 2 */
 		if (line_length <= 512)
 			var->xres_virtual = 512 * 8 / bpp;
@@ -873,7 +883,7 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
 
 		line_length = var->xres_virtual * bpp / 8;
 	}
-#endif
+
 	if (var->yres > var->yres_virtual)
 		var->yres_virtual = var->yres;
 	if (line_length * var->yres_virtual > info->fix.smem_len)
@@ -1190,9 +1200,9 @@ static int tridentfb_set_par(struct fb_info *info)
 		set_number_of_lines(par, info->var.yres);
 	info->fix.line_length = info->var.xres_virtual * bpp / 8;
 	set_lwidth(par, info->fix.line_length / 8);
-#ifdef CONFIG_FB_TRIDENT_ACCEL
-	par->init_accel(par, info->var.xres_virtual, bpp);
-#endif
+
+	if (!(info->flags & FBINFO_HWACCEL_DISABLED))
+		par->init_accel(par, info->var.xres_virtual, bpp);
 
 	info->fix.visual = (bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
 	info->cmap.len = (bpp == 8) ? 256 : 16;
@@ -1326,6 +1336,9 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		output("*** Please do use cyblafb, Cyberblade/i1 support "
 		       "will soon be removed from tridentfb!\n");
 
+#ifndef CONFIG_FB_TRIDENT_ACCEL
+	noaccel = 1;
+#endif
 
 	/* If PCI id is 0x9660 then further detect chip type */
 
@@ -1370,21 +1383,25 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 		default_par->wait_engine = xp_wait_engine;
 		default_par->fill_rect = xp_fill_rect;
 		default_par->copy_rect = xp_copy_rect;
+		tridentfb_fix.accel = FB_ACCEL_TRIDENT_BLADEXP;
 	} else if (is_blade(chip_id)) {
 		default_par->init_accel = blade_init_accel;
 		default_par->wait_engine = blade_wait_engine;
 		default_par->fill_rect = blade_fill_rect;
 		default_par->copy_rect = blade_copy_rect;
+		tridentfb_fix.accel = FB_ACCEL_TRIDENT_BLADE3D;
 	} else if (chip3D) {			/* 3DImage family left */
 		default_par->init_accel = image_init_accel;
 		default_par->wait_engine = image_wait_engine;
 		default_par->fill_rect = image_fill_rect;
 		default_par->copy_rect = image_copy_rect;
+		tridentfb_fix.accel = FB_ACCEL_TRIDENT_3DIMAGE;
 	} else { 				/* TGUI 9440/96XX family */
 		default_par->init_accel = tgui_init_accel;
 		default_par->wait_engine = xp_wait_engine;
 		default_par->fill_rect = tgui_fill_rect;
 		default_par->copy_rect = tgui_copy_rect;
+		tridentfb_fix.accel = FB_ACCEL_TRIDENT_TGUI;
 	}
 
 	default_par->chip_id = chip_id;
@@ -1441,9 +1458,13 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	info->pseudo_palette = default_par->pseudo_pal;
 
 	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
-#ifdef CONFIG_FB_TRIDENT_ACCEL
-	info->flags |= FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
-#endif
+	if (!noaccel && default_par->init_accel) {
+		info->flags &= ~FBINFO_HWACCEL_DISABLED;
+		info->flags |= FBINFO_HWACCEL_COPYAREA;
+		info->flags |= FBINFO_HWACCEL_FILLRECT;
+	} else
+		info->flags |= FBINFO_HWACCEL_DISABLED;
+
 	if (!fb_find_mode(&info->var, info,
 			  mode_option, NULL, 0, NULL, bpp)) {
 		err = -EINVAL;
@@ -1453,10 +1474,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev,
 	if (err < 0)
 		goto out_unmap2;
 
-	if (!noaccel && default_par->init_accel)
-		info->var.accel_flags |= FB_ACCELF_TEXT;
-	else
-		info->var.accel_flags &= ~FB_ACCELF_TEXT;
 	info->var.activate |= FB_ACTIVATE_NOW;
 	info->device = &dev->dev;
 	if (register_framebuffer(info) < 0) {
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 72295b099228..a084d1335869 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -120,6 +120,10 @@ struct dentry;
 #define FB_ACCEL_XGI_VOLARI_V	47	/* XGI Volari V3XT, V5, V8      */
 #define FB_ACCEL_XGI_VOLARI_Z	48	/* XGI Volari Z7                */
 #define FB_ACCEL_OMAP1610	49	/* TI OMAP16xx                  */
+#define FB_ACCEL_TRIDENT_TGUI	50	/* Trident TGUI			*/
+#define FB_ACCEL_TRIDENT_3DIMAGE 51	/* Trident 3DImage		*/
+#define FB_ACCEL_TRIDENT_BLADE3D 52	/* Trident Blade3D		*/
+#define FB_ACCEL_TRIDENT_BLADEXP 53	/* Trident BladeXP		*/
 #define FB_ACCEL_NEOMAGIC_NM2070 90	/* NeoMagic NM2070              */
 #define FB_ACCEL_NEOMAGIC_NM2090 91	/* NeoMagic NM2090              */
 #define FB_ACCEL_NEOMAGIC_NM2093 92	/* NeoMagic NM2093              */
-- 
cgit v1.2.3


From 206c5d69d0540024faffd423fc703f1e457332d7 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:31:35 -0700
Subject: sm501: add inversion controls for VBIASEN and FPEN

Add flags to allow the driver to invert the sense of both VBIASEN and FPEN
signals comming from the SM501.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/sm501fb.c | 26 ++++++++++++++++++++++----
 include/linux/sm501.h   |  2 ++
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index 15d4a768b1f6..122a0f8495c8 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -663,15 +663,25 @@ static void sm501fb_panel_power(struct sm501fb_info *fbi, int to)
 		sm501fb_sync_regs(fbi);
 		mdelay(10);
 
+		/* VBIASEN */
+
 		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_VBIASEN)) {
-			control |= SM501_DC_PANEL_CONTROL_BIAS;	/* VBIASEN */
+			if (pd->flags & SM501FB_FLAG_PANEL_INV_VBIASEN)
+				control &= ~SM501_DC_PANEL_CONTROL_BIAS;
+			else
+				control |= SM501_DC_PANEL_CONTROL_BIAS;
+
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
 		}
 
 		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_FPEN)) {
-			control |= SM501_DC_PANEL_CONTROL_FPEN;
+			if (pd->flags & SM501FB_FLAG_PANEL_INV_FPEN)
+				control &= ~SM501_DC_PANEL_CONTROL_FPEN;
+			else
+				control |= SM501_DC_PANEL_CONTROL_FPEN;
+
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
@@ -679,14 +689,22 @@ static void sm501fb_panel_power(struct sm501fb_info *fbi, int to)
 	} else if (!to && (control & SM501_DC_PANEL_CONTROL_VDD) != 0) {
 		/* disable panel power */
 		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_FPEN)) {
-			control &= ~SM501_DC_PANEL_CONTROL_FPEN;
+			if (pd->flags & SM501FB_FLAG_PANEL_INV_FPEN)
+				control |= SM501_DC_PANEL_CONTROL_FPEN;
+			else
+				control &= ~SM501_DC_PANEL_CONTROL_FPEN;
+
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
 		}
 
 		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_VBIASEN)) {
-			control &= ~SM501_DC_PANEL_CONTROL_BIAS;
+			if (pd->flags & SM501FB_FLAG_PANEL_INV_VBIASEN)
+				control |= SM501_DC_PANEL_CONTROL_BIAS;
+			else
+				control &= ~SM501_DC_PANEL_CONTROL_BIAS;
+
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 95c1c39ba445..b530fa6a1d34 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -73,6 +73,8 @@ extern unsigned long sm501_gpio_get(struct device *dev,
 #define SM501FB_FLAG_USE_HWACCEL	(1<<3)
 #define SM501FB_FLAG_PANEL_NO_FPEN	(1<<4)
 #define SM501FB_FLAG_PANEL_NO_VBIASEN	(1<<5)
+#define SM501FB_FLAG_PANEL_INV_FPEN	(1<<6)
+#define SM501FB_FLAG_PANEL_INV_VBIASEN	(1<<7)
 
 struct sm501_platdata_fbsub {
 	struct fb_videomode	*def_mode;
-- 
cgit v1.2.3


From 0c531360ed504aa0ce995fcb8ef08e82b6534d0b Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 23 Jul 2008 21:31:38 -0700
Subject: lcd: add lcd_device to check_fb() entry in lcd_ops

Add the lcd_device being checked to the check_fb entry of lcd_ops.  This
ensures that any driver using this to check against it's own state can do
so, and also makes all the calls in lcd_ops more orthogonal in their
arguments.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/lcd.c    | 2 +-
 drivers/video/bf54x-lq043fb.c    | 2 +-
 drivers/video/bfin-t350mcqb-fb.c | 2 +-
 include/linux/lcd.h              | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c
index 299fd318dd45..b15b2b84a6f7 100644
--- a/drivers/video/backlight/lcd.c
+++ b/drivers/video/backlight/lcd.c
@@ -33,7 +33,7 @@ static int fb_notifier_callback(struct notifier_block *self,
 	ld = container_of(self, struct lcd_device, fb_notif);
 	mutex_lock(&ld->ops_lock);
 	if (ld->ops)
-		if (!ld->ops->check_fb || ld->ops->check_fb(evdata->info))
+		if (!ld->ops->check_fb || ld->ops->check_fb(ld, evdata->info))
 			ld->ops->set_power(ld, *(int *)evdata->data);
 	mutex_unlock(&ld->ops_lock);
 	return 0;
diff --git a/drivers/video/bf54x-lq043fb.c b/drivers/video/bf54x-lq043fb.c
index 49834a67a623..940467aed13f 100644
--- a/drivers/video/bf54x-lq043fb.c
+++ b/drivers/video/bf54x-lq043fb.c
@@ -478,7 +478,7 @@ static int bfin_lcd_set_contrast(struct lcd_device *dev, int contrast)
 	return 0;
 }
 
-static int bfin_lcd_check_fb(struct fb_info *fi)
+static int bfin_lcd_check_fb(struct lcd_device *dev, struct fb_info *fi)
 {
 	if (!fi || (fi == &bfin_bf54x_fb))
 		return 1;
diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c
index 135d6dd7e672..7d1b819e501c 100644
--- a/drivers/video/bfin-t350mcqb-fb.c
+++ b/drivers/video/bfin-t350mcqb-fb.c
@@ -396,7 +396,7 @@ static int bfin_lcd_set_contrast(struct lcd_device *dev, int contrast)
 	return 0;
 }
 
-static int bfin_lcd_check_fb(struct fb_info *fi)
+static int bfin_lcd_check_fb(struct lcd_device *dev, struct fb_info *fi)
 {
 	if (!fi || (fi == &bfin_t350mcqb_fb))
 		return 1;
diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index 1d379787f2e7..173febac6656 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -47,7 +47,7 @@ struct lcd_ops {
         int (*set_contrast)(struct lcd_device *, int contrast);
 	/* Check if given framebuffer device is the one LCD is bound to;
 	   return 0 if not, !=0 if it is. If NULL, lcd always matches the fb. */
-	int (*check_fb)(struct fb_info *);
+	int (*check_fb)(struct lcd_device *, struct fb_info *);
 };
 
 struct lcd_device {
-- 
cgit v1.2.3


From 3e074058d72486676f6fdf6fe803200c62dcb403 Mon Sep 17 00:00:00 2001
From: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Date: Wed, 23 Jul 2008 21:31:48 -0700
Subject: fbdev: LCD backlight driver using Atmel PWM driver

This patch adds a platform driver using the ATMEL PWM driver to control a
backlight which requires a PWM signal and optional GPIO signal for discrete
on/off signal.  It has been tested on Favr-32 board from EarthLCD.

The driver is configurable by supplying a struct with the platform data.  See
the include/linux/atmel-pwm-bl.h for details.

The board code for Favr-32 will be submitted to the AVR32 kernel list.

Signed-off-by: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/Kconfig        |  12 ++
 drivers/video/backlight/Makefile       |   1 +
 drivers/video/backlight/atmel-pwm-bl.c | 244 +++++++++++++++++++++++++++++++++
 include/linux/atmel-pwm-bl.h           |  43 ++++++
 4 files changed, 300 insertions(+)
 create mode 100644 drivers/video/backlight/atmel-pwm-bl.c
 create mode 100644 include/linux/atmel-pwm-bl.h

(limited to 'include/linux')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index b289e197e55d..98d9faf4970d 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -87,6 +87,18 @@ config BACKLIGHT_ATMEL_LCDC
 	  If in doubt, it's safe to enable this option; it doesn't kick
 	  in unless the board's description says it's wired that way.
 
+config BACKLIGHT_ATMEL_PWM
+	tristate "Atmel PWM backlight control"
+	depends on BACKLIGHT_CLASS_DEVICE && ATMEL_PWM
+	default n
+	help
+	  Say Y here if you want to use the PWM peripheral in Atmel AT91 and
+	  AVR32 devices. This driver will need additional platform data to know
+	  which PWM instance to use and how to configure it.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called atmel-pwm-bl.
+
 config BACKLIGHT_CORGI
 	tristate "Generic (aka Sharp Corgi) Backlight Driver"
 	depends on BACKLIGHT_CLASS_DEVICE
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 7d31c14088aa..d8a08e468cc7 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_LCD_PLATFORM)	   += platform_lcd.o
 obj-$(CONFIG_LCD_VGG2432A4)	   += vgg2432a4.o
 
 obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o
+obj-$(CONFIG_BACKLIGHT_ATMEL_PWM)    += atmel-pwm-bl.o
 obj-$(CONFIG_BACKLIGHT_CORGI)	+= corgi_bl.o
 obj-$(CONFIG_BACKLIGHT_HP680)	+= hp680_bl.o
 obj-$(CONFIG_BACKLIGHT_LOCOMO)	+= locomolcd.o
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
new file mode 100644
index 000000000000..505c0823a105
--- /dev/null
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * Backlight driver using Atmel PWM peripheral.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/fb.h>
+#include <linux/clk.h>
+#include <linux/gpio.h>
+#include <linux/backlight.h>
+#include <linux/atmel_pwm.h>
+#include <linux/atmel-pwm-bl.h>
+
+struct atmel_pwm_bl {
+	const struct atmel_pwm_bl_platform_data	*pdata;
+	struct backlight_device			*bldev;
+	struct platform_device			*pdev;
+	struct pwm_channel			pwmc;
+	int					gpio_on;
+};
+
+static int atmel_pwm_bl_set_intensity(struct backlight_device *bd)
+{
+	struct atmel_pwm_bl *pwmbl = bl_get_data(bd);
+	int intensity = bd->props.brightness;
+	int pwm_duty;
+
+	if (bd->props.power != FB_BLANK_UNBLANK)
+		intensity = 0;
+	if (bd->props.fb_blank != FB_BLANK_UNBLANK)
+		intensity = 0;
+
+	if (pwmbl->pdata->pwm_active_low)
+		pwm_duty = pwmbl->pdata->pwm_duty_min + intensity;
+	else
+		pwm_duty = pwmbl->pdata->pwm_duty_max - intensity;
+
+	if (pwm_duty > pwmbl->pdata->pwm_duty_max)
+		pwm_duty = pwmbl->pdata->pwm_duty_max;
+	if (pwm_duty < pwmbl->pdata->pwm_duty_min)
+		pwm_duty = pwmbl->pdata->pwm_duty_min;
+
+	if (!intensity) {
+		if (pwmbl->gpio_on != -1) {
+			gpio_set_value(pwmbl->gpio_on,
+					0 ^ pwmbl->pdata->on_active_low);
+		}
+		pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty);
+		pwm_channel_disable(&pwmbl->pwmc);
+	} else {
+		pwm_channel_enable(&pwmbl->pwmc);
+		pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty);
+		if (pwmbl->gpio_on != -1) {
+			gpio_set_value(pwmbl->gpio_on,
+					1 ^ pwmbl->pdata->on_active_low);
+		}
+	}
+
+	return 0;
+}
+
+static int atmel_pwm_bl_get_intensity(struct backlight_device *bd)
+{
+	struct atmel_pwm_bl *pwmbl = bl_get_data(bd);
+	u8 intensity;
+
+	if (pwmbl->pdata->pwm_active_low) {
+		intensity = pwm_channel_readl(&pwmbl->pwmc, PWM_CDTY) -
+			pwmbl->pdata->pwm_duty_min;
+	} else {
+		intensity = pwmbl->pdata->pwm_duty_max -
+			pwm_channel_readl(&pwmbl->pwmc, PWM_CDTY);
+	}
+
+	return intensity;
+}
+
+static int atmel_pwm_bl_init_pwm(struct atmel_pwm_bl *pwmbl)
+{
+	unsigned long pwm_rate = pwmbl->pwmc.mck;
+	unsigned long prescale = DIV_ROUND_UP(pwm_rate,
+			(pwmbl->pdata->pwm_frequency *
+			 pwmbl->pdata->pwm_compare_max)) - 1;
+
+	/*
+	 * Prescale must be power of two and maximum 0xf in size because of
+	 * hardware limit. PWM speed will be:
+	 *	PWM module clock speed / (2 ^ prescale).
+	 */
+	prescale = fls(prescale);
+	if (prescale > 0xf)
+		prescale = 0xf;
+
+	pwm_channel_writel(&pwmbl->pwmc, PWM_CMR, prescale);
+	pwm_channel_writel(&pwmbl->pwmc, PWM_CDTY,
+			pwmbl->pdata->pwm_duty_min +
+			pwmbl->bldev->props.brightness);
+	pwm_channel_writel(&pwmbl->pwmc, PWM_CPRD,
+			pwmbl->pdata->pwm_compare_max);
+
+	dev_info(&pwmbl->pdev->dev, "Atmel PWM backlight driver "
+			"(%lu Hz)\n", pwmbl->pwmc.mck /
+			pwmbl->pdata->pwm_compare_max /
+			(1 << prescale));
+
+	return pwm_channel_enable(&pwmbl->pwmc);
+}
+
+static struct backlight_ops atmel_pwm_bl_ops = {
+	.get_brightness = atmel_pwm_bl_get_intensity,
+	.update_status  = atmel_pwm_bl_set_intensity,
+};
+
+static int atmel_pwm_bl_probe(struct platform_device *pdev)
+{
+	const struct atmel_pwm_bl_platform_data *pdata;
+	struct backlight_device *bldev;
+	struct atmel_pwm_bl *pwmbl;
+	int retval;
+
+	pwmbl = kzalloc(sizeof(struct atmel_pwm_bl), GFP_KERNEL);
+	if (!pwmbl)
+		return -ENOMEM;
+
+	pwmbl->pdev = pdev;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		retval = -ENODEV;
+		goto err_free_mem;
+	}
+
+	if (pdata->pwm_compare_max < pdata->pwm_duty_max ||
+			pdata->pwm_duty_min > pdata->pwm_duty_max ||
+			pdata->pwm_frequency == 0) {
+		retval = -EINVAL;
+		goto err_free_mem;
+	}
+
+	pwmbl->pdata = pdata;
+	pwmbl->gpio_on = pdata->gpio_on;
+
+	retval = pwm_channel_alloc(pdata->pwm_channel, &pwmbl->pwmc);
+	if (retval)
+		goto err_free_mem;
+
+	if (pwmbl->gpio_on != -1) {
+		retval = gpio_request(pwmbl->gpio_on, "gpio_atmel_pwm_bl");
+		if (retval) {
+			pwmbl->gpio_on = -1;
+			goto err_free_pwm;
+		}
+
+		/* Turn display off by defatult. */
+		retval = gpio_direction_output(pwmbl->gpio_on,
+				0 ^ pdata->on_active_low);
+		if (retval)
+			goto err_free_gpio;
+	}
+
+	bldev = backlight_device_register("atmel-pwm-bl",
+			&pdev->dev, pwmbl, &atmel_pwm_bl_ops);
+	if (IS_ERR(bldev)) {
+		retval = PTR_ERR(bldev);
+		goto err_free_gpio;
+	}
+
+	pwmbl->bldev = bldev;
+
+	platform_set_drvdata(pdev, pwmbl);
+
+	/* Power up the backlight by default at middle intesity. */
+	bldev->props.power = FB_BLANK_UNBLANK;
+	bldev->props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min;
+	bldev->props.brightness = bldev->props.max_brightness / 2;
+
+	retval = atmel_pwm_bl_init_pwm(pwmbl);
+	if (retval)
+		goto err_free_bl_dev;
+
+	atmel_pwm_bl_set_intensity(bldev);
+
+	return 0;
+
+err_free_bl_dev:
+	platform_set_drvdata(pdev, NULL);
+	backlight_device_unregister(bldev);
+err_free_gpio:
+	if (pwmbl->gpio_on != -1)
+		gpio_free(pwmbl->gpio_on);
+err_free_pwm:
+	pwm_channel_free(&pwmbl->pwmc);
+err_free_mem:
+	kfree(pwmbl);
+	return retval;
+}
+
+static int __exit atmel_pwm_bl_remove(struct platform_device *pdev)
+{
+	struct atmel_pwm_bl *pwmbl = platform_get_drvdata(pdev);
+
+	if (pwmbl->gpio_on != -1) {
+		gpio_set_value(pwmbl->gpio_on, 0);
+		gpio_free(pwmbl->gpio_on);
+	}
+	pwm_channel_disable(&pwmbl->pwmc);
+	pwm_channel_free(&pwmbl->pwmc);
+	backlight_device_unregister(pwmbl->bldev);
+	platform_set_drvdata(pdev, NULL);
+	kfree(pwmbl);
+
+	return 0;
+}
+
+static struct platform_driver atmel_pwm_bl_driver = {
+	.driver = {
+		.name = "atmel-pwm-bl",
+	},
+	/* REVISIT add suspend() and resume() */
+	.remove = __exit_p(atmel_pwm_bl_remove),
+};
+
+static int __init atmel_pwm_bl_init(void)
+{
+	return platform_driver_probe(&atmel_pwm_bl_driver, atmel_pwm_bl_probe);
+}
+module_init(atmel_pwm_bl_init);
+
+static void __exit atmel_pwm_bl_exit(void)
+{
+	platform_driver_unregister(&atmel_pwm_bl_driver);
+}
+module_exit(atmel_pwm_bl_exit);
+
+MODULE_AUTHOR("Hans-Christian egtvedt <hans-christian.egtvedt@atmel.com>");
+MODULE_DESCRIPTION("Atmel PWM backlight driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/atmel-pwm-bl.h b/include/linux/atmel-pwm-bl.h
new file mode 100644
index 000000000000..0153a47806c2
--- /dev/null
+++ b/include/linux/atmel-pwm-bl.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * Driver for the AT32AP700X PS/2 controller (PSIF).
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef __INCLUDE_ATMEL_PWM_BL_H
+#define __INCLUDE_ATMEL_PWM_BL_H
+
+/**
+ * struct atmel_pwm_bl_platform_data
+ * @pwm_channel: which PWM channel in the PWM module to use.
+ * @pwm_frequency: PWM frequency to generate, the driver will try to be as
+ *	close as the prescaler allows.
+ * @pwm_compare_max: value to use in the PWM channel compare register.
+ * @pwm_duty_max: maximum duty cycle value, must be less than or equal to
+ *	pwm_compare_max.
+ * @pwm_duty_min: minimum duty cycle value, must be less than pwm_duty_max.
+ * @pwm_active_low: set to one if the low part of the PWM signal increases the
+ *	brightness of the backlight.
+ * @gpio_on: GPIO line to control the backlight on/off, set to -1 if not used.
+ * @on_active_low: set to one if the on/off signal is on when GPIO is low.
+ *
+ * This struct must be added to the platform device in the board code. It is
+ * used by the atmel-pwm-bl driver to setup the GPIO to control on/off and the
+ * PWM device.
+ */
+struct atmel_pwm_bl_platform_data {
+	unsigned int pwm_channel;
+	unsigned int pwm_frequency;
+	unsigned int pwm_compare_max;
+	unsigned int pwm_duty_max;
+	unsigned int pwm_duty_min;
+	unsigned int pwm_active_low;
+	int gpio_on;
+	unsigned int on_active_low;
+};
+
+#endif /* __INCLUDE_ATMEL_PWM_BL_H */
-- 
cgit v1.2.3


From 5bb49fcd501aa9fd3d321a22b7c01d9b0db7ab36 Mon Sep 17 00:00:00 2001
From: Philippe De Muyter <phdm@macqel.be>
Date: Wed, 23 Jul 2008 21:31:50 -0700
Subject: video/fb: cleanup FB_MAJOR usage

Currently, linux/major.h defines a GRAPHDEV_MAJOR (29) that nobody uses,
and linux/fb.h defines the real FB_MAJOR (also 29), that only fbmem.c
needs.  Drop GRAPHDEV_MAJOR from major.h, move FB_MAJOR definition from
fb.h to major.h, and fix fbmem.c to use major.h's definition.

Signed-off-by: Philippe De Muyter <phdm@macqel.be>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbmem.c | 1 +
 include/linux/fb.h    | 1 -
 include/linux/major.h | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 6b487801eeae..5d84b3431098 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -35,6 +35,7 @@
 #include <linux/device.h>
 #include <linux/efi.h>
 #include <linux/fb.h>
+#include <linux/major.h>
 
 #include <asm/fb.h>
 
diff --git a/include/linux/fb.h b/include/linux/fb.h
index a084d1335869..3b8870e32afd 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -8,7 +8,6 @@ struct dentry;
 
 /* Definitions of frame buffers						*/
 
-#define FB_MAJOR		29
 #define FB_MAX			32	/* sufficient for now */
 
 /* ioctls
diff --git a/include/linux/major.h b/include/linux/major.h
index 0cb98053537a..53d5fafd85c3 100644
--- a/include/linux/major.h
+++ b/include/linux/major.h
@@ -53,7 +53,7 @@
 #define STL_SIOMEMMAJOR		28
 #define ACSI_MAJOR		28
 #define AZTECH_CDROM_MAJOR	29
-#define GRAPHDEV_MAJOR		29   /* SparcLinux & Linux/68k /dev/fb */
+#define FB_MAJOR		29   /* /dev/fb* framebuffers */
 #define CM206_CDROM_MAJOR	32
 #define IDE2_MAJOR		33
 #define IDE3_MAJOR		34
-- 
cgit v1.2.3


From f9247273cb69ba101877e946d2d83044409cc8c5 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 24 Jul 2008 17:22:13 +0100
Subject: UFS: add const to parser token table

This patch adds a "const" to the parser token table. I've done an
allmodconfig build to see if this produces any warnings/failures and the
patch includes a fix for the only warning that was produced.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Acked-by: Alexander Viro <aviro@redhat.com>
Acked-by: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ufs/super.c         | 2 +-
 include/linux/parser.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85b22b5977fa..506f724055c2 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1232,7 +1232,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
 	unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
-	struct match_token *tp = tokens;
+	const struct match_token *tp = tokens;
 
 	while (tp->token != Opt_onerror_panic && tp->token != mval)
 		++tp;
diff --git a/include/linux/parser.h b/include/linux/parser.h
index 7dcd05075756..cc554ca8bc78 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -14,7 +14,7 @@ struct match_token {
 	const char *pattern;
 };
 
-typedef struct match_token match_table_t[];
+typedef const struct match_token match_table_t[];
 
 /* Maximum number of arguments that match_token will find in a pattern */
 enum {MAX_OPT_ARGS = 3};
-- 
cgit v1.2.3


From 6cdf6eb357c2681596b7b1672b92396ba82333d4 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:14 +0200
Subject: ide: add ->dev and ->host_priv fields to struct ide_host

* Add 'struct device *dev[2]' and 'void *host_priv' fields
  to struct ide_host.

* Set ->dev[] in ide_host_alloc_all()/ide_setup_pci_device[s]().

* Pass 'void *priv' argument to ide_setup_pci_device[s]()
  and use it to set ->host_priv.

* Set PCI dev's ->driver_data to point to the struct ide_host
  instance if PCI host driver wants to use ->host_priv.

* Rename ide_setup_pci_device[s]() to ide_pci_init_{one,two}().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c        |  3 +++
 drivers/ide/pci/aec62xx.c      |  2 +-
 drivers/ide/pci/alim15x3.c     |  2 +-
 drivers/ide/pci/amd74xx.c      |  2 +-
 drivers/ide/pci/atiixp.c       |  2 +-
 drivers/ide/pci/cmd64x.c       |  2 +-
 drivers/ide/pci/cs5530.c       |  2 +-
 drivers/ide/pci/cs5535.c       |  2 +-
 drivers/ide/pci/cy82c693.c     |  2 +-
 drivers/ide/pci/generic.c      |  2 +-
 drivers/ide/pci/hpt34x.c       |  2 +-
 drivers/ide/pci/hpt366.c       |  4 ++--
 drivers/ide/pci/it8213.c       |  2 +-
 drivers/ide/pci/it821x.c       |  2 +-
 drivers/ide/pci/jmicron.c      |  2 +-
 drivers/ide/pci/ns87415.c      |  2 +-
 drivers/ide/pci/opti621.c      |  2 +-
 drivers/ide/pci/pdc202xx_new.c |  4 ++--
 drivers/ide/pci/pdc202xx_old.c |  2 +-
 drivers/ide/pci/piix.c         |  2 +-
 drivers/ide/pci/rz1000.c       |  2 +-
 drivers/ide/pci/sc1200.c       |  2 +-
 drivers/ide/pci/serverworks.c  |  2 +-
 drivers/ide/pci/siimage.c      |  2 +-
 drivers/ide/pci/sis5513.c      |  2 +-
 drivers/ide/pci/sl82c105.c     |  2 +-
 drivers/ide/pci/slc90e66.c     |  2 +-
 drivers/ide/pci/tc86c001.c     |  2 +-
 drivers/ide/pci/triflex.c      |  2 +-
 drivers/ide/pci/trm290.c       |  2 +-
 drivers/ide/pci/via82cxxx.c    |  2 +-
 drivers/ide/setup-pci.c        | 52 ++++++++++++++++++++++++++++++++++++------
 include/linux/ide.h            |  7 ++++--
 33 files changed, 85 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 4aa76c453755..890c15b1b3ae 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1604,6 +1604,9 @@ struct ide_host *ide_host_alloc_all(const struct ide_port_info *d,
 		return NULL;
 	}
 
+	if (hws[0])
+		host->dev[0] = hws[0]->dev;
+
 	return host;
 }
 EXPORT_SYMBOL_GPL(ide_host_alloc_all);
diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index fbc43e121e6b..7a5d246fe9b1 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -273,7 +273,7 @@ static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_devi
 		}
 	}
 
-	err = ide_setup_pci_device(dev, &d);
+	err = ide_pci_init_one(dev, &d, NULL);
 	if (err)
 		pci_disable_device(dev);
 
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index 5ef7817ac64f..7f96e7ca3864 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -565,7 +565,7 @@ static int __devinit alim15x3_init_one(struct pci_dev *dev, const struct pci_dev
 	if (idx == 0)
 		d.host_flags |= IDE_HFLAG_CLEAR_SIMPLEX;
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index ef7d971031ee..b6a475313c7c 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -302,7 +302,7 @@ static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_
 			 d.name, pci_name(dev), dev->revision,
 			 amd_dma[fls(d.udma_mask) - 1]);
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id amd74xx_pci_tbl[] = {
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index 8b637181681a..b483a68b39f6 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -167,7 +167,7 @@ static const struct ide_port_info atiixp_pci_info[] __devinitdata = {
 
 static int __devinit atiixp_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &atiixp_pci_info[id->driver_data]);
+	return ide_pci_init_one(dev, &atiixp_pci_info[id->driver_data], NULL);
 }
 
 static const struct pci_device_id atiixp_pci_tbl[] = {
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index ce58bfcdb3c6..fc0333c9a4e5 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -507,7 +507,7 @@ static int __devinit cmd64x_init_one(struct pci_dev *dev, const struct pci_devic
 		}
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id cmd64x_pci_tbl[] = {
diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c
index f5534c1ff349..ba82bad8bf4e 100644
--- a/drivers/ide/pci/cs5530.c
+++ b/drivers/ide/pci/cs5530.c
@@ -256,7 +256,7 @@ static const struct ide_port_info cs5530_chipset __devinitdata = {
 
 static int __devinit cs5530_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &cs5530_chipset);
+	return ide_pci_init_one(dev, &cs5530_chipset, NULL);
 }
 
 static const struct pci_device_id cs5530_pci_tbl[] = {
diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c
index 5404fe4f701d..2161f43ca1b8 100644
--- a/drivers/ide/pci/cs5535.c
+++ b/drivers/ide/pci/cs5535.c
@@ -180,7 +180,7 @@ static const struct ide_port_info cs5535_chipset __devinitdata = {
 static int __devinit cs5535_init_one(struct pci_dev *dev,
 					const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &cs5535_chipset);
+	return ide_pci_init_one(dev, &cs5535_chipset, NULL);
 }
 
 static const struct pci_device_id cs5535_pci_tbl[] = {
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index e14ad5530fa4..abd27ed7c30c 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -419,7 +419,7 @@ static int __devinit cy82c693_init_one(struct pci_dev *dev, const struct pci_dev
 	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE &&
 	    PCI_FUNC(dev->devfn) == 1) {
 		dev2 = pci_get_slot(dev->bus, dev->devfn + 1);
-		ret = ide_setup_pci_devices(dev, dev2, &cy82c693_chipset);
+		ret = ide_pci_init_two(dev, dev2, &cy82c693_chipset, NULL);
 		/* We leak pci refs here but thats ok - we can't be unloaded */
 	}
 	return ret;
diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index 041720e22762..dd0caea5e4f3 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -139,7 +139,7 @@ static int __devinit generic_init_one(struct pci_dev *dev, const struct pci_devi
 			goto out;
 		}
 	}
-	ret = ide_setup_pci_device(dev, d);
+	ret = ide_pci_init_one(dev, d, NULL);
 out:
 	return ret;
 }
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
index 9e1d1c4741da..3d70c5150ac6 100644
--- a/drivers/ide/pci/hpt34x.c
+++ b/drivers/ide/pci/hpt34x.c
@@ -156,7 +156,7 @@ static int __devinit hpt34x_init_one(struct pci_dev *dev, const struct pci_devic
 
 	d = &hpt34x_chipsets[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0];
 
-	return ide_setup_pci_device(dev, d);
+	return ide_pci_init_one(dev, d, NULL);
 }
 
 static const struct pci_device_id hpt34x_pci_tbl[] = {
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 1f1135ce7cd6..b23b7a278005 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -1608,13 +1608,13 @@ static int __devinit hpt366_init_one(struct pci_dev *dev, const struct pci_devic
 				d.host_flags &= ~IDE_HFLAG_NON_BOOTABLE;
 		}
 
-		ret = ide_setup_pci_devices(dev, dev2, &d);
+		ret = ide_pci_init_two(dev, dev2, &d, NULL);
 		if (ret < 0)
 			pci_dev_put(dev2);
 		return ret;
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id hpt366_pci_tbl[] __devinitconst = {
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index 2b71bdf74e73..18219fa9ef01 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -184,7 +184,7 @@ static const struct ide_port_info it8213_chipsets[] __devinitdata = {
 
 static int __devinit it8213_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &it8213_chipsets[id->driver_data]);
+	return ide_pci_init_one(dev, &it8213_chipsets[id->driver_data], NULL);
 }
 
 static const struct pci_device_id it8213_pci_tbl[] = {
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index cbf647202994..40186f9e56aa 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -664,7 +664,7 @@ static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_devic
 
 	pci_set_drvdata(dev, itdevs);
 
-	return ide_setup_pci_device(dev, &it821x_chipsets[id->driver_data]);
+	return ide_pci_init_one(dev, &it821x_chipsets[id->driver_data], NULL);
 }
 
 static const struct pci_device_id it821x_pci_tbl[] = {
diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c
index 96ef7394f283..a7e3c14f7b07 100644
--- a/drivers/ide/pci/jmicron.c
+++ b/drivers/ide/pci/jmicron.c
@@ -121,7 +121,7 @@ static const struct ide_port_info jmicron_chipset __devinitdata = {
 
 static int __devinit jmicron_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &jmicron_chipset);
+	return ide_pci_init_one(dev, &jmicron_chipset, NULL);
 }
 
 /* All JMB PATA controllers have and will continue to have the same
diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index 5cd2b32ff0ef..a45c33c0c792 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -324,7 +324,7 @@ static int __devinit ns87415_init_one(struct pci_dev *dev, const struct pci_devi
 		d.tp_ops = &superio_tp_ops;
 	}
 #endif
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id ns87415_pci_tbl[] = {
diff --git a/drivers/ide/pci/opti621.c b/drivers/ide/pci/opti621.c
index 725c80508d90..edb9132ffbe4 100644
--- a/drivers/ide/pci/opti621.c
+++ b/drivers/ide/pci/opti621.c
@@ -209,7 +209,7 @@ static const struct ide_port_info opti621_chipset __devinitdata = {
 
 static int __devinit opti621_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &opti621_chipset);
+	return ide_pci_init_one(dev, &opti621_chipset, NULL);
 }
 
 static const struct pci_device_id opti621_pci_tbl[] = {
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 070df8ab3b21..71a420feb981 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -524,7 +524,7 @@ static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_de
 		dev2 = pdc20270_get_dev2(dev);
 
 		if (dev2) {
-			int ret = ide_setup_pci_devices(dev, dev2, d);
+			int ret = ide_pci_init_two(dev, dev2, d, NULL);
 			if (ret < 0)
 				pci_dev_put(dev2);
 			return ret;
@@ -540,7 +540,7 @@ static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_de
 		return -ENODEV;
 	}
 
-	return ide_setup_pci_device(dev, d);
+	return ide_pci_init_one(dev, d, NULL);
 }
 
 static const struct pci_device_id pdc202new_pci_tbl[] = {
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index e54dc653b8c4..eba1d60a73a0 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -412,7 +412,7 @@ static int __devinit pdc202xx_init_one(struct pci_dev *dev, const struct pci_dev
 		}
 	}
 
-	return ide_setup_pci_device(dev, d);
+	return ide_pci_init_one(dev, d, NULL);
 }
 
 static const struct pci_device_id pdc202xx_pci_tbl[] = {
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index 0ce41b4dddaf..359f65ddcbf9 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -394,7 +394,7 @@ static const struct ide_port_info piix_pci_info[] __devinitdata = {
  
 static int __devinit piix_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &piix_pci_info[id->driver_data]);
+	return ide_pci_init_one(dev, &piix_pci_info[id->driver_data], NULL);
 }
 
 /**
diff --git a/drivers/ide/pci/rz1000.c b/drivers/ide/pci/rz1000.c
index 532154adba29..860ffdeca095 100644
--- a/drivers/ide/pci/rz1000.c
+++ b/drivers/ide/pci/rz1000.c
@@ -48,7 +48,7 @@ static const struct ide_port_info rz1000_chipset __devinitdata = {
 
 static int __devinit rz1000_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &rz1000_chipset);
+	return ide_pci_init_one(dev, &rz1000_chipset, NULL);
 }
 
 static const struct pci_device_id rz1000_pci_tbl[] = {
diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c
index 14c787b5d95f..8fd9cc2119d6 100644
--- a/drivers/ide/pci/sc1200.c
+++ b/drivers/ide/pci/sc1200.c
@@ -317,7 +317,7 @@ static const struct ide_port_info sc1200_chipset __devinitdata = {
 
 static int __devinit sc1200_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &sc1200_chipset);
+	return ide_pci_init_one(dev, &sc1200_chipset, NULL);
 }
 
 static const struct pci_device_id sc1200_pci_tbl[] = {
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index 127ccb45e261..34abdfc8d567 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -422,7 +422,7 @@ static int __devinit svwks_init_one(struct pci_dev *dev, const struct pci_device
 			d.host_flags &= ~IDE_HFLAG_SINGLE;
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id svwks_pci_tbl[] = {
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 5965a35d94ae..48124133601a 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -795,7 +795,7 @@ static int __devinit siimage_init_one(struct pci_dev *dev,
 		d.host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id siimage_pci_tbl[] = {
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index 2389945ca95d..a2330c4ac75b 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -583,7 +583,7 @@ static int __devinit sis5513_init_one(struct pci_dev *dev, const struct pci_devi
 
 	d.udma_mask = udma_rates[chipset_family];
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id sis5513_pci_tbl[] = {
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index f82a6502c1b7..be22f8125d71 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -335,7 +335,7 @@ static int __devinit sl82c105_init_one(struct pci_dev *dev, const struct pci_dev
 		d.host_flags &= ~IDE_HFLAG_SERIALIZE_DMA;
 	}
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id sl82c105_pci_tbl[] = {
diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c
index dae6e2c94d86..2fc2f2cf2206 100644
--- a/drivers/ide/pci/slc90e66.c
+++ b/drivers/ide/pci/slc90e66.c
@@ -144,7 +144,7 @@ static const struct ide_port_info slc90e66_chipset __devinitdata = {
 
 static int __devinit slc90e66_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &slc90e66_chipset);
+	return ide_pci_init_one(dev, &slc90e66_chipset, NULL);
 }
 
 static const struct pci_device_id slc90e66_pci_tbl[] = {
diff --git a/drivers/ide/pci/tc86c001.c b/drivers/ide/pci/tc86c001.c
index 477e19790102..e16e79d21772 100644
--- a/drivers/ide/pci/tc86c001.c
+++ b/drivers/ide/pci/tc86c001.c
@@ -215,7 +215,7 @@ static const struct ide_port_info tc86c001_chipset __devinitdata = {
 static int __devinit tc86c001_init_one(struct pci_dev *dev,
 				       const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &tc86c001_chipset);
+	return ide_pci_init_one(dev, &tc86c001_chipset, NULL);
 }
 
 static const struct pci_device_id tc86c001_pci_tbl[] = {
diff --git a/drivers/ide/pci/triflex.c b/drivers/ide/pci/triflex.c
index db65a558d4ec..60dcb645d1b0 100644
--- a/drivers/ide/pci/triflex.c
+++ b/drivers/ide/pci/triflex.c
@@ -104,7 +104,7 @@ static const struct ide_port_info triflex_device __devinitdata = {
 static int __devinit triflex_init_one(struct pci_dev *dev, 
 		const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &triflex_device);
+	return ide_pci_init_one(dev, &triflex_device, NULL);
 }
 
 static const struct pci_device_id triflex_pci_tbl[] = {
diff --git a/drivers/ide/pci/trm290.c b/drivers/ide/pci/trm290.c
index a8a3138682ef..d8127b51a542 100644
--- a/drivers/ide/pci/trm290.c
+++ b/drivers/ide/pci/trm290.c
@@ -340,7 +340,7 @@ static const struct ide_port_info trm290_chipset __devinitdata = {
 
 static int __devinit trm290_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_setup_pci_device(dev, &trm290_chipset);
+	return ide_pci_init_one(dev, &trm290_chipset, NULL);
 }
 
 static const struct pci_device_id trm290_pci_tbl[] = {
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 09dc4803ef9d..2f22abfe003b 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -466,7 +466,7 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 
 	d.udma_mask = via_config->udma_mask;
 
-	return ide_setup_pci_device(dev, &d);
+	return ide_pci_init_one(dev, &d, NULL);
 }
 
 static const struct pci_device_id via_pci_tbl[] = {
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index b85de71fdc88..ca17bf8896df 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -525,8 +525,10 @@ out:
 	return ret;
 }
 
-int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
+int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
+		     void *priv)
 {
+	struct ide_host *host;
 	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 	int ret;
 
@@ -536,6 +538,19 @@ int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
 
 	ide_pci_setup_ports(dev, d, 0, &hw[0], &hws[0]);
 
+	host = ide_host_alloc(d, hws);
+	if (host == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	host->dev[0] = &dev->dev;
+
+	host->host_priv = priv;
+
+	if (priv)
+		pci_set_drvdata(dev, host);
+
 	ret = do_ide_setup_pci_device(dev, d, 1);
 	if (ret < 0)
 		goto out;
@@ -543,16 +558,19 @@ int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
 	/* fixup IRQ */
 	hw[1].irq = hw[0].irq = ret;
 
-	ret = ide_host_add(d, hws, NULL);
+	ret = ide_host_register(host, d, hws);
+	if (ret)
+		ide_host_free(host);
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(ide_setup_pci_device);
+EXPORT_SYMBOL_GPL(ide_pci_init_one);
 
-int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
-			  const struct ide_port_info *d)
+int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
+		     const struct ide_port_info *d, void *priv)
 {
 	struct pci_dev *pdev[] = { dev1, dev2 };
+	struct ide_host *host;
 	int ret, i;
 	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
@@ -562,7 +580,25 @@ int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
 			goto out;
 
 		ide_pci_setup_ports(pdev[i], d, 0, &hw[i*2], &hws[i*2]);
+	}
 
+	host = ide_host_alloc(d, hws);
+	if (host == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	host->dev[0] = &dev1->dev;
+	host->dev[1] = &dev2->dev;
+
+	host->host_priv = priv;
+
+	if (priv) {
+		pci_set_drvdata(pdev[0], host);
+		pci_set_drvdata(pdev[1], host);
+	}
+
+	for (i = 0; i < 2; i++) {
 		ret = do_ide_setup_pci_device(pdev[i], d, !i);
 
 		/*
@@ -576,8 +612,10 @@ int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
 		hw[i*2 + 1].irq = hw[i*2].irq = ret;
 	}
 
-	ret = ide_host_add(d, hws, NULL);
+	ret = ide_host_register(host, d, hws);
+	if (ret)
+		ide_host_free(host);
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(ide_setup_pci_devices);
+EXPORT_SYMBOL_GPL(ide_pci_init_two);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index d67ccca2b964..776c574c9640 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -626,6 +626,8 @@ typedef struct hwif_s {
 struct ide_host {
 	ide_hwif_t	*ports[MAX_HWIFS];
 	unsigned int	n_ports;
+	struct device	*dev[2];
+	void		*host_priv;
 };
 
 /*
@@ -1201,8 +1203,9 @@ struct ide_port_info {
 	u8			udma_mask;
 };
 
-int ide_setup_pci_device(struct pci_dev *, const struct ide_port_info *);
-int ide_setup_pci_devices(struct pci_dev *, struct pci_dev *, const struct ide_port_info *);
+int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *);
+int ide_pci_init_two(struct pci_dev *, struct pci_dev *,
+		     const struct ide_port_info *, void *);
 
 void ide_map_sg(ide_drive_t *, struct request *);
 void ide_init_sg_cmd(ide_drive_t *, struct request *);
-- 
cgit v1.2.3


From 08da591e14cf87247ec09b17c350235157a92fc3 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:15 +0200
Subject: ide: add ide_device_{get,put}() helpers

* Add 'struct ide_host *host' field to ide_hwif_t and set it
  in ide_host_alloc_all().

* Add ide_device_{get,put}() helpers loosely based on SCSI's
  scsi_device_{get,put}() ones.

* Convert IDE device drivers to use ide_device_{get,put}().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c     | 12 +++++++++---
 drivers/ide/ide-disk.c   | 12 +++++++++---
 drivers/ide/ide-floppy.c | 12 +++++++++---
 drivers/ide/ide-probe.c  |  2 ++
 drivers/ide/ide-tape.c   | 12 +++++++++---
 drivers/ide/ide.c        | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/ide-scsi.c  |  8 +++++++-
 include/linux/ide.h      |  7 +++++++
 8 files changed, 99 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 4e73aeee4053..8f253e5f26a8 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -57,23 +57,29 @@ static DEFINE_MUTEX(idecd_ref_mutex);
 #define ide_cd_g(disk) \
 	container_of((disk)->private_data, struct cdrom_info, driver)
 
+static void ide_cd_release(struct kref *);
+
 static struct cdrom_info *ide_cd_get(struct gendisk *disk)
 {
 	struct cdrom_info *cd = NULL;
 
 	mutex_lock(&idecd_ref_mutex);
 	cd = ide_cd_g(disk);
-	if (cd)
+	if (cd) {
 		kref_get(&cd->kref);
+		if (ide_device_get(cd->drive)) {
+			kref_put(&cd->kref, ide_cd_release);
+			cd = NULL;
+		}
+	}
 	mutex_unlock(&idecd_ref_mutex);
 	return cd;
 }
 
-static void ide_cd_release(struct kref *);
-
 static void ide_cd_put(struct cdrom_info *cd)
 {
 	mutex_lock(&idecd_ref_mutex);
+	ide_device_put(cd->drive);
 	kref_put(&cd->kref, ide_cd_release);
 	mutex_unlock(&idecd_ref_mutex);
 }
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index df5fe5756871..28d85b410f7c 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -56,23 +56,29 @@ static DEFINE_MUTEX(idedisk_ref_mutex);
 #define ide_disk_g(disk) \
 	container_of((disk)->private_data, struct ide_disk_obj, driver)
 
+static void ide_disk_release(struct kref *);
+
 static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
 {
 	struct ide_disk_obj *idkp = NULL;
 
 	mutex_lock(&idedisk_ref_mutex);
 	idkp = ide_disk_g(disk);
-	if (idkp)
+	if (idkp) {
 		kref_get(&idkp->kref);
+		if (ide_device_get(idkp->drive)) {
+			kref_put(&idkp->kref, ide_disk_release);
+			idkp = NULL;
+		}
+	}
 	mutex_unlock(&idedisk_ref_mutex);
 	return idkp;
 }
 
-static void ide_disk_release(struct kref *);
-
 static void ide_disk_put(struct ide_disk_obj *idkp)
 {
 	mutex_lock(&idedisk_ref_mutex);
+	ide_device_put(idkp->drive);
 	kref_put(&idkp->kref, ide_disk_release);
 	mutex_unlock(&idedisk_ref_mutex);
 }
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 3d8e6dd0f41e..ca11a26746f1 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -158,23 +158,29 @@ static DEFINE_MUTEX(idefloppy_ref_mutex);
 #define ide_floppy_g(disk) \
 	container_of((disk)->private_data, struct ide_floppy_obj, driver)
 
+static void idefloppy_cleanup_obj(struct kref *);
+
 static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
 {
 	struct ide_floppy_obj *floppy = NULL;
 
 	mutex_lock(&idefloppy_ref_mutex);
 	floppy = ide_floppy_g(disk);
-	if (floppy)
+	if (floppy) {
 		kref_get(&floppy->kref);
+		if (ide_device_get(floppy->drive)) {
+			kref_put(&floppy->kref, idefloppy_cleanup_obj);
+			floppy = NULL;
+		}
+	}
 	mutex_unlock(&idefloppy_ref_mutex);
 	return floppy;
 }
 
-static void idefloppy_cleanup_obj(struct kref *);
-
 static void ide_floppy_put(struct ide_floppy_obj *floppy)
 {
 	mutex_lock(&idefloppy_ref_mutex);
+	ide_device_put(floppy->drive);
 	kref_put(&floppy->kref, idefloppy_cleanup_obj);
 	mutex_unlock(&idefloppy_ref_mutex);
 }
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 890c15b1b3ae..9ab5892eaea1 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1595,6 +1595,8 @@ struct ide_host *ide_host_alloc_all(const struct ide_port_info *d,
 
 		ide_init_port_data(hwif, idx);
 
+		hwif->host = host;
+
 		host->ports[i] = hwif;
 		host->n_ports++;
 	}
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 6962ca4891a1..789f3428f072 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -322,23 +322,29 @@ static struct class *idetape_sysfs_class;
 #define ide_tape_g(disk) \
 	container_of((disk)->private_data, struct ide_tape_obj, driver)
 
+static void ide_tape_release(struct kref *);
+
 static struct ide_tape_obj *ide_tape_get(struct gendisk *disk)
 {
 	struct ide_tape_obj *tape = NULL;
 
 	mutex_lock(&idetape_ref_mutex);
 	tape = ide_tape_g(disk);
-	if (tape)
+	if (tape) {
 		kref_get(&tape->kref);
+		if (ide_device_get(tape->drive)) {
+			kref_put(&tape->kref, ide_tape_release);
+			tape = NULL;
+		}
+	}
 	mutex_unlock(&idetape_ref_mutex);
 	return tape;
 }
 
-static void ide_tape_release(struct kref *);
-
 static void ide_tape_put(struct ide_tape_obj *tape)
 {
 	mutex_lock(&idetape_ref_mutex);
+	ide_device_put(tape->drive);
 	kref_put(&tape->kref, ide_tape_release);
 	mutex_unlock(&idetape_ref_mutex);
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 60f0ca66aa93..772451600e4d 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -618,6 +618,53 @@ set_val:
 
 EXPORT_SYMBOL(generic_ide_ioctl);
 
+/**
+ * ide_device_get	-	get an additional reference to a ide_drive_t
+ * @drive:	device to get a reference to
+ *
+ * Gets a reference to the ide_drive_t and increments the use count of the
+ * underlying LLDD module.
+ */
+int ide_device_get(ide_drive_t *drive)
+{
+	struct device *host_dev;
+	struct module *module;
+
+	if (!get_device(&drive->gendev))
+		return -ENXIO;
+
+	host_dev = drive->hwif->host->dev[0];
+	module = host_dev ? host_dev->driver->owner : NULL;
+
+	if (module && !try_module_get(module)) {
+		put_device(&drive->gendev);
+		return -ENXIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ide_device_get);
+
+/**
+ * ide_device_put	-	release a reference to a ide_drive_t
+ * @drive:	device to release a reference on
+ *
+ * Release a reference to the ide_drive_t and decrements the use count of
+ * the underlying LLDD module.
+ */
+void ide_device_put(ide_drive_t *drive)
+{
+#ifdef CONFIG_MODULE_UNLOAD
+	struct device *host_dev = drive->hwif->host->dev[0];
+	struct module *module = host_dev ? host_dev->driver->owner : NULL;
+
+	if (module)
+		module_put(module);
+#endif
+	put_device(&drive->gendev);
+}
+EXPORT_SYMBOL_GPL(ide_device_put);
+
 static int ide_bus_match(struct device *dev, struct device_driver *drv)
 {
 	return 1;
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 538552495d48..318ef382448f 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -101,8 +101,13 @@ static struct ide_scsi_obj *ide_scsi_get(struct gendisk *disk)
 
 	mutex_lock(&idescsi_ref_mutex);
 	scsi = ide_scsi_g(disk);
-	if (scsi)
+	if (scsi) {
 		scsi_host_get(scsi->host);
+		if (ide_device_get(scsi->drive)) {
+			scsi_host_put(scsi->host);
+			scsi = NULL;
+		}
+	}
 	mutex_unlock(&idescsi_ref_mutex);
 	return scsi;
 }
@@ -110,6 +115,7 @@ static struct ide_scsi_obj *ide_scsi_get(struct gendisk *disk)
 static void ide_scsi_put(struct ide_scsi_obj *scsi)
 {
 	mutex_lock(&idescsi_ref_mutex);
+	ide_device_put(scsi->drive);
 	scsi_host_put(scsi->host);
 	mutex_unlock(&idescsi_ref_mutex);
 }
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 776c574c9640..3eccac0a2a36 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -532,12 +532,16 @@ struct ide_dma_ops {
 	void	(*dma_timeout)(struct ide_drive_s *);
 };
 
+struct ide_host;
+
 typedef struct hwif_s {
 	struct hwif_s *next;		/* for linked-list in ide_hwgroup_t */
 	struct hwif_s *mate;		/* other hwif from same PCI chip */
 	struct hwgroup_s *hwgroup;	/* actually (ide_hwgroup_t *) */
 	struct proc_dir_entry *proc;	/* /proc/ide/ directory entry */
 
+	struct ide_host *host;
+
 	char name[6];			/* name of interface, eg. "ide0" */
 
 	struct ide_io_ports	io_ports;
@@ -876,6 +880,9 @@ struct ide_driver_s {
 
 #define to_ide_driver(drv) container_of(drv, ide_driver_t, gen_driver)
 
+int ide_device_get(ide_drive_t *);
+void ide_device_put(ide_drive_t *);
+
 int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long);
 
 extern int ide_vlb_clk;
-- 
cgit v1.2.3


From ef0b04276d8f719d754c092434fbd62c2aeb5307 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:19 +0200
Subject: ide: add ide_pci_remove() helper

* Add 'unsigned long host_flags' field to struct ide_host.

* Set ->host_flags in ide_host_alloc_all().

* Always set PCI dev's ->driver_data in ide_pci_init_{one,two}().

* Add ide_pci_remove() helper (the default implementation for
  struct pci_driver's ->remove method).

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c |  3 +++
 drivers/ide/setup-pci.c | 39 +++++++++++++++++++++++++++++++++------
 include/linux/ide.h     |  2 ++
 3 files changed, 38 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 9ab5892eaea1..f0c162488ec4 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1609,6 +1609,9 @@ struct ide_host *ide_host_alloc_all(const struct ide_port_info *d,
 	if (hws[0])
 		host->dev[0] = hws[0]->dev;
 
+	if (d)
+		host->host_flags = d->host_flags;
+
 	return host;
 }
 EXPORT_SYMBOL_GPL(ide_host_alloc_all);
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index ca17bf8896df..20f0ee004695 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -548,8 +548,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
 
 	host->host_priv = priv;
 
-	if (priv)
-		pci_set_drvdata(dev, host);
+	pci_set_drvdata(dev, host);
 
 	ret = do_ide_setup_pci_device(dev, d, 1);
 	if (ret < 0)
@@ -593,10 +592,8 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 
 	host->host_priv = priv;
 
-	if (priv) {
-		pci_set_drvdata(pdev[0], host);
-		pci_set_drvdata(pdev[1], host);
-	}
+	pci_set_drvdata(pdev[0], host);
+	pci_set_drvdata(pdev[1], host);
 
 	for (i = 0; i < 2; i++) {
 		ret = do_ide_setup_pci_device(pdev[i], d, !i);
@@ -619,3 +616,33 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(ide_pci_init_two);
+
+void ide_pci_remove(struct pci_dev *dev)
+{
+	struct ide_host *host = pci_get_drvdata(dev);
+	struct pci_dev *dev2 = host->dev[1] ? to_pci_dev(host->dev[1]) : NULL;
+	int bars;
+
+	if (host->host_flags & IDE_HFLAG_SINGLE)
+		bars = (1 << 2) - 1;
+	else
+		bars = (1 << 4) - 1;
+
+	if ((host->host_flags & IDE_HFLAG_NO_DMA) == 0) {
+		if (host->host_flags & IDE_HFLAG_CS5520)
+			bars |= (1 << 2);
+		else
+			bars |= (1 << 4);
+	}
+
+	ide_host_remove(host);
+
+	if (dev2)
+		pci_release_selected_regions(dev2, bars);
+	pci_release_selected_regions(dev, bars);
+
+	if (dev2)
+		pci_disable_device(dev2);
+	pci_disable_device(dev);
+}
+EXPORT_SYMBOL_GPL(ide_pci_remove);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 3eccac0a2a36..dbd0aeb3a56d 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -631,6 +631,7 @@ struct ide_host {
 	ide_hwif_t	*ports[MAX_HWIFS];
 	unsigned int	n_ports;
 	struct device	*dev[2];
+	unsigned long	host_flags;
 	void		*host_priv;
 };
 
@@ -1213,6 +1214,7 @@ struct ide_port_info {
 int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *);
 int ide_pci_init_two(struct pci_dev *, struct pci_dev *,
 		     const struct ide_port_info *, void *);
+void ide_pci_remove(struct pci_dev *);
 
 void ide_map_sg(ide_drive_t *, struct request *);
 void ide_init_sg_cmd(ide_drive_t *, struct request *);
-- 
cgit v1.2.3


From d83b8b85cd56a083d30df73f3fd5e4714591b910 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:30 +0200
Subject: ide: define MAX_HWIFS in <linux/ide.h>

* Now that ide_hwif_t instances are allocated dynamically
  the difference between MAX_HWIFS == 2 and MAX_HWIFS == 10
  is ~100 bytes (x86-32) so use MAX_HWIFS == 10 on all archs
  except these ones that use MAX_HWIFS == 1.

* Define MAX_HWIFS in <linux/ide.h> instead of <asm/ide.h>.

[ Please note that avr32/cris/v850 have no <asm/ide.h>
  and alpha/ia64/sh always define CONFIG_IDE_MAX_HWIFS. ]

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-arm/ide.h               | 4 ----
 include/asm-blackfin/ide.h          | 2 --
 include/asm-frv/ide.h               | 4 ----
 include/asm-h8300/ide.h             | 2 --
 include/asm-m32r/ide.h              | 8 --------
 include/asm-m68k/ide.h              | 4 ----
 include/asm-mips/mach-generic/ide.h | 8 --------
 include/asm-mn10300/ide.h           | 4 ----
 include/asm-parisc/ide.h            | 4 ----
 include/asm-powerpc/ide.h           | 8 --------
 include/asm-sparc/ide.h             | 3 ---
 include/asm-x86/ide.h               | 9 ---------
 include/asm-xtensa/ide.h            | 5 -----
 include/linux/ide.h                 | 8 ++++++++
 14 files changed, 8 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-arm/ide.h b/include/asm-arm/ide.h
index 88f4d231ce4f..a48019f99d08 100644
--- a/include/asm-arm/ide.h
+++ b/include/asm-arm/ide.h
@@ -13,10 +13,6 @@
 
 #ifdef __KERNEL__
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS	4
-#endif
-
 #define __ide_mm_insw(port,addr,len)	readsw(port,addr,len)
 #define __ide_mm_insl(port,addr,len)	readsl(port,addr,len)
 #define __ide_mm_outsw(port,addr,len)	writesw(port,addr,len)
diff --git a/include/asm-blackfin/ide.h b/include/asm-blackfin/ide.h
index 5b88de115bf4..90bc50bd22e9 100644
--- a/include/asm-blackfin/ide.h
+++ b/include/asm-blackfin/ide.h
@@ -17,8 +17,6 @@
 #ifdef __KERNEL__
 /****************************************************************************/
 
-#define MAX_HWIFS	1
-
 #include <asm-generic/ide_iops.h>
 
 /****************************************************************************/
diff --git a/include/asm-frv/ide.h b/include/asm-frv/ide.h
index 8c9a540d4344..7ebcc56a2229 100644
--- a/include/asm-frv/ide.h
+++ b/include/asm-frv/ide.h
@@ -18,10 +18,6 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS 8
-#endif
-
 /****************************************************************************/
 /*
  * some bits needed for parts of the IDE subsystem to compile
diff --git a/include/asm-h8300/ide.h b/include/asm-h8300/ide.h
index f8535ce7476e..8f79ba2ff929 100644
--- a/include/asm-h8300/ide.h
+++ b/include/asm-h8300/ide.h
@@ -16,8 +16,6 @@
 #ifdef __KERNEL__
 /****************************************************************************/
 
-#define MAX_HWIFS	1
-
 #include <asm-generic/ide_iops.h>
 
 /****************************************************************************/
diff --git a/include/asm-m32r/ide.h b/include/asm-m32r/ide.h
index 72798d624221..d755d41b9931 100644
--- a/include/asm-m32r/ide.h
+++ b/include/asm-m32r/ide.h
@@ -15,14 +15,6 @@
 
 #include <asm/m32r.h>
 
-#ifndef MAX_HWIFS
-# ifdef CONFIG_BLK_DEV_IDEPCI
-#define MAX_HWIFS	10
-# else
-#define MAX_HWIFS	2
-# endif
-#endif
-
 static __inline__ int ide_default_irq(unsigned long base)
 {
 	switch (base) {
diff --git a/include/asm-m68k/ide.h b/include/asm-m68k/ide.h
index 909c6dfd3851..1daf6cbdd9f0 100644
--- a/include/asm-m68k/ide.h
+++ b/include/asm-m68k/ide.h
@@ -45,10 +45,6 @@
 #include <asm/macints.h>
 #endif
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS	4	/* same as the other archs */
-#endif
-
 /*
  * Get rid of defs from io.h - ide has its private and conflicting versions
  * Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we
diff --git a/include/asm-mips/mach-generic/ide.h b/include/asm-mips/mach-generic/ide.h
index f34740ee6775..8ee6bff030d9 100644
--- a/include/asm-mips/mach-generic/ide.h
+++ b/include/asm-mips/mach-generic/ide.h
@@ -19,14 +19,6 @@
 #include <linux/stddef.h>
 #include <asm/processor.h>
 
-#ifndef MAX_HWIFS
-# ifdef CONFIG_BLK_DEV_IDEPCI
-#define MAX_HWIFS	10
-# else
-#define MAX_HWIFS	6
-# endif
-#endif
-
 static __inline__ int ide_probe_legacy(void)
 {
 #ifdef CONFIG_PCI
diff --git a/include/asm-mn10300/ide.h b/include/asm-mn10300/ide.h
index dc235121ec42..6adcdd92e83d 100644
--- a/include/asm-mn10300/ide.h
+++ b/include/asm-mn10300/ide.h
@@ -23,10 +23,6 @@
 #undef SUPPORT_VLB_SYNC
 #define SUPPORT_VLB_SYNC 0
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS 8
-#endif
-
 /*
  * some bits needed for parts of the IDE subsystem to compile
  */
diff --git a/include/asm-parisc/ide.h b/include/asm-parisc/ide.h
index db0c94410095..c246ef75017d 100644
--- a/include/asm-parisc/ide.h
+++ b/include/asm-parisc/ide.h
@@ -13,10 +13,6 @@
 
 #ifdef __KERNEL__
 
-#ifndef MAX_HWIFS
-#define MAX_HWIFS	2
-#endif
-
 #define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
 #define ide_free_irq(irq,dev_id)		free_irq((irq), (dev_id))
 #define ide_request_region(from,extent,name)	request_region((from), (extent), (name))
diff --git a/include/asm-powerpc/ide.h b/include/asm-powerpc/ide.h
index 3d90bf7d3d73..262def6a9f0a 100644
--- a/include/asm-powerpc/ide.h
+++ b/include/asm-powerpc/ide.h
@@ -14,14 +14,6 @@
 #endif
 #include <asm/io.h>
 
-#ifndef MAX_HWIFS
-#ifdef __powerpc64__
-#define MAX_HWIFS	10
-#else
-#define MAX_HWIFS	8
-#endif
-#endif
-
 #define __ide_mm_insw(p, a, c)	readsw((void __iomem *)(p), (a), (c))
 #define __ide_mm_insl(p, a, c)	readsl((void __iomem *)(p), (a), (c))
 #define __ide_mm_outsw(p, a, c)	writesw((void __iomem *)(p), (a), (c))
diff --git a/include/asm-sparc/ide.h b/include/asm-sparc/ide.h
index 879fcec72dc1..b7af3d658239 100644
--- a/include/asm-sparc/ide.h
+++ b/include/asm-sparc/ide.h
@@ -21,9 +21,6 @@
 #include <asm/psr.h>
 #endif
 
-#undef  MAX_HWIFS
-#define MAX_HWIFS	2
-
 #define __ide_insl(data_reg, buffer, wcount) \
 	__ide_insw(data_reg, buffer, (wcount)<<1)
 #define __ide_outsl(data_reg, buffer, wcount) \
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index 34050747f38c..bc54879daed1 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -11,15 +11,6 @@
 
 #ifdef __KERNEL__
 
-
-#ifndef MAX_HWIFS
-# ifdef CONFIG_BLK_DEV_IDEPCI
-#define MAX_HWIFS	10
-# else
-#define MAX_HWIFS	6
-# endif
-#endif
-
 static __inline__ int ide_default_irq(unsigned long base)
 {
 	switch (base) {
diff --git a/include/asm-xtensa/ide.h b/include/asm-xtensa/ide.h
index cb995701c42d..18342a2cc774 100644
--- a/include/asm-xtensa/ide.h
+++ b/include/asm-xtensa/ide.h
@@ -14,11 +14,6 @@
 
 #ifdef __KERNEL__
 
-
-#ifndef MAX_HWIFS
-# define MAX_HWIFS	1
-#endif
-
 #include <asm-generic/ide_iops.h>
 
 #endif	/* __KERNEL__ */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index dbd0aeb3a56d..76fe00b24b51 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -213,6 +213,14 @@ static inline int __ide_default_irq(unsigned long base)
 
 #include <asm/ide.h>
 
+#ifndef MAX_HWIFS
+#if defined(CONFIG_BLACKFIN) || defined(CONFIG_H8300) || defined(CONFIG_XTENSA)
+# define MAX_HWIFS	1
+#else
+# define MAX_HWIFS	10
+#endif
+#endif
+
 #if !defined(MAX_HWIFS) || defined(CONFIG_EMBEDDED)
 #undef MAX_HWIFS
 #define MAX_HWIFS	CONFIG_IDE_MAX_HWIFS
-- 
cgit v1.2.3


From 2a8f7450f828eaee49d66f41f99ac2e54f1160a6 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:31 +0200
Subject: ide: remove <asm/ide.h> for some archs

* Remove <linux/irq.h> include from <asm-ia64.h> (<linux/ide.h> includes
  <linux/interrupt.h> which is enough).

* Remove <asm/ide.h> for alpha/blackfin/h8300/ia64/m32r/sh/x86/xtensa
  (this leaves us with arm/frv/m68k/mips/mn10300/parisc/powerpc/sparc[64]).

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/asm-alpha/ide.h    | 20 --------------------
 include/asm-blackfin/ide.h | 25 -------------------------
 include/asm-h8300/ide.h    | 24 ------------------------
 include/asm-ia64/ide.h     | 22 ----------------------
 include/asm-m32r/ide.h     | 20 --------------------
 include/asm-sh/ide.h       | 21 ---------------------
 include/asm-x86/ide.h      | 18 ------------------
 include/asm-xtensa/ide.h   | 21 ---------------------
 include/linux/ide.h        |  6 ++++++
 9 files changed, 6 insertions(+), 171 deletions(-)
 delete mode 100644 include/asm-alpha/ide.h
 delete mode 100644 include/asm-blackfin/ide.h
 delete mode 100644 include/asm-h8300/ide.h
 delete mode 100644 include/asm-ia64/ide.h
 delete mode 100644 include/asm-m32r/ide.h
 delete mode 100644 include/asm-sh/ide.h
 delete mode 100644 include/asm-x86/ide.h
 delete mode 100644 include/asm-xtensa/ide.h

(limited to 'include/linux')

diff --git a/include/asm-alpha/ide.h b/include/asm-alpha/ide.h
deleted file mode 100644
index 55f9f6870249..000000000000
--- a/include/asm-alpha/ide.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- *  linux/include/asm-alpha/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the alpha architecture specific IDE code.
- */
-
-#ifndef __ASMalpha_IDE_H
-#define __ASMalpha_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMalpha_IDE_H */
diff --git a/include/asm-blackfin/ide.h b/include/asm-blackfin/ide.h
deleted file mode 100644
index 90bc50bd22e9..000000000000
--- a/include/asm-blackfin/ide.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/****************************************************************************/
-
-/*
- *  linux/include/asm-blackfin/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- *  Copyright (C) 2001       Lineo Inc., davidm@snapgear.com
- *  Copyright (C) 2002       Greg Ungerer (gerg@snapgear.com)
- *  Copyright (C) 2002       Yoshinori Sato (ysato@users.sourceforge.jp)
- *  Copyright (C) 2005       Hennerich Michael (hennerich@blackfin.uclinux.org)
- */
-
-/****************************************************************************/
-#ifndef _BLACKFIN_IDE_H
-#define _BLACKFIN_IDE_H
-/****************************************************************************/
-#ifdef __KERNEL__
-/****************************************************************************/
-
-#include <asm-generic/ide_iops.h>
-
-/****************************************************************************/
-#endif				/* __KERNEL__ */
-#endif				/* _BLACKFIN_IDE_H */
-/****************************************************************************/
diff --git a/include/asm-h8300/ide.h b/include/asm-h8300/ide.h
deleted file mode 100644
index 8f79ba2ff929..000000000000
--- a/include/asm-h8300/ide.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/****************************************************************************/
-
-/*
- *  linux/include/asm-h8300/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- *  Copyright (C) 2001       Lineo Inc., davidm@snapgear.com
- *  Copyright (C) 2002       Greg Ungerer (gerg@snapgear.com)
- *  Copyright (C) 2002       Yoshinori Sato (ysato@users.sourceforge.jp)
- */
-
-/****************************************************************************/
-#ifndef _H8300_IDE_H
-#define _H8300_IDE_H
-/****************************************************************************/
-#ifdef __KERNEL__
-/****************************************************************************/
-
-#include <asm-generic/ide_iops.h>
-
-/****************************************************************************/
-#endif /* __KERNEL__ */
-#endif /* _H8300_IDE_H */
-/****************************************************************************/
diff --git a/include/asm-ia64/ide.h b/include/asm-ia64/ide.h
deleted file mode 100644
index 5a0aedea4764..000000000000
--- a/include/asm-ia64/ide.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  linux/include/asm-ia64/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the ia64 architecture specific IDE code.
- */
-
-#ifndef __ASM_IA64_IDE_H
-#define __ASM_IA64_IDE_H
-
-#ifdef __KERNEL__
-
-#include <linux/irq.h>
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_IA64_IDE_H */
diff --git a/include/asm-m32r/ide.h b/include/asm-m32r/ide.h
deleted file mode 100644
index 0f1ec6973879..000000000000
--- a/include/asm-m32r/ide.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_M32R_IDE_H
-#define _ASM_M32R_IDE_H
-
-/*
- *  linux/include/asm-m32r/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the i386 architecture specific IDE code.
- */
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_M32R_IDE_H */
diff --git a/include/asm-sh/ide.h b/include/asm-sh/ide.h
deleted file mode 100644
index 58e0bdd52be4..000000000000
--- a/include/asm-sh/ide.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- *  linux/include/asm-sh/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the i386 architecture specific IDE code.
- *  In future, SuperH code.
- */
-
-#ifndef __ASM_SH_IDE_H
-#define __ASM_SH_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_SH_IDE_H */
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
deleted file mode 100644
index 0289baf9ce0a..000000000000
--- a/include/asm-x86/ide.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the i386 architecture specific IDE code.
- */
-
-#ifndef __ASMi386_IDE_H
-#define __ASMi386_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMi386_IDE_H */
diff --git a/include/asm-xtensa/ide.h b/include/asm-xtensa/ide.h
deleted file mode 100644
index 18342a2cc774..000000000000
--- a/include/asm-xtensa/ide.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * include/asm-xtensa/ide.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994 - 1996  Linus Torvalds & authors
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_IDE_H
-#define _XTENSA_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm-generic/ide_iops.h>
-
-#endif	/* __KERNEL__ */
-
-#endif	/* _XTENSA_IDE_H */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 76fe00b24b51..fd78b401b036 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -211,7 +211,13 @@ static inline int __ide_default_irq(unsigned long base)
 	return 0;
 }
 
+#if defined(CONFIG_ARM) || defined(CONFIG_FRV) || defined(CONFIG_M68K) || \
+    defined(CONFIG_MIPS) || defined(CONFIG_MN10300) || defined(CONFIG_PARISC) \
+    || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || defined(CONFIG_SPARC64)
 #include <asm/ide.h>
+#else
+#include <asm-generic/ide_iops.h>
+#endif
 
 #ifndef MAX_HWIFS
 #if defined(CONFIG_BLACKFIN) || defined(CONFIG_H8300) || defined(CONFIG_XTENSA)
-- 
cgit v1.2.3


From a326b02b0c576001353dbc489154959b0889c6bf Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 24 Jul 2008 22:53:33 +0200
Subject: ide: drop 'name' parameter from ->init_chipset method

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/aec62xx.c      |  2 +-
 drivers/ide/pci/alim15x3.c     |  5 ++---
 drivers/ide/pci/amd74xx.c      | 19 ++++++++-----------
 drivers/ide/pci/cmd64x.c       |  2 +-
 drivers/ide/pci/cs5530.c       |  7 +++----
 drivers/ide/pci/cy82c693.c     | 10 +++++-----
 drivers/ide/pci/hpt34x.c       |  2 +-
 drivers/ide/pci/hpt366.c       |  3 ++-
 drivers/ide/pci/it821x.c       |  2 +-
 drivers/ide/pci/pdc202xx_new.c |  3 ++-
 drivers/ide/pci/pdc202xx_old.c |  3 +--
 drivers/ide/pci/piix.c         |  3 +--
 drivers/ide/pci/serverworks.c  |  6 +++---
 drivers/ide/pci/siimage.c      |  8 +++-----
 drivers/ide/pci/sis5513.c      |  3 +--
 drivers/ide/pci/sl82c105.c     |  2 +-
 drivers/ide/pci/via82cxxx.c    |  3 +--
 drivers/ide/setup-pci.c        |  2 +-
 include/linux/ide.h            |  2 +-
 19 files changed, 39 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index f6dc6c20f3af..e0c8fe7d9fea 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -140,7 +140,7 @@ static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0);
 }
 
-static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev)
 {
 	/* These are necessary to get AEC6280 Macintosh cards to work */
 	if ((dev->device == PCI_DEVICE_ID_ARTOP_ATP865) ||
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index a099c4dd599d..b582687e0cd4 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -209,13 +209,12 @@ static int ali15x3_dma_setup(ide_drive_t *drive)
 /**
  *	init_chipset_ali15x3	-	Initialise an ALi IDE controller
  *	@dev: PCI device
- *	@name: Name of the controller
  *
  *	This function initializes the ALI IDE controller and where 
  *	appropriate also sets up the 1533 southbridge.
  */
-  
-static unsigned int __devinit init_chipset_ali15x3 (struct pci_dev *dev, const char *name)
+
+static unsigned int __devinit init_chipset_ali15x3(struct pci_dev *dev)
 {
 	unsigned long flags;
 	u8 tmpbyte;
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index cbf78edfe00b..2cea7bf51a0f 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -112,15 +112,13 @@ static void amd_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	amd_set_drive(drive, XFER_PIO_0 + pio);
 }
 
-static void __devinit amd7409_cable_detect(struct pci_dev *dev,
-					   const char *name)
+static void __devinit amd7409_cable_detect(struct pci_dev *dev)
 {
 	/* no host side cable detection */
 	amd_80w = 0x03;
 }
 
-static void __devinit amd7411_cable_detect(struct pci_dev *dev,
-					   const char *name)
+static void __devinit amd7411_cable_detect(struct pci_dev *dev)
 {
 	int i;
 	u32 u = 0;
@@ -131,9 +129,9 @@ static void __devinit amd7411_cable_detect(struct pci_dev *dev,
 	amd_80w = ((t & 0x3) ? 1 : 0) | ((t & 0xc) ? 2 : 0);
 	for (i = 24; i >= 0; i -= 8)
 		if (((u >> i) & 4) && !(amd_80w & (1 << (1 - (i >> 4))))) {
-			printk(KERN_WARNING "%s %s: BIOS didn't set cable bits "
-				"correctly. Enabling workaround.\n",
-				name, pci_name(dev));
+			printk(KERN_WARNING DRV_NAME " %s: BIOS didn't set "
+				"cable bits correctly. Enabling workaround.\n",
+				pci_name(dev));
 			amd_80w |= (1 << (1 - (i >> 4)));
 		}
 }
@@ -142,8 +140,7 @@ static void __devinit amd7411_cable_detect(struct pci_dev *dev,
  * The initialization callback.  Initialize drive independent registers.
  */
 
-static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev,
-						   const char *name)
+static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev)
 {
 	u8 t = 0, offset = amd_offset(dev);
 
@@ -156,9 +153,9 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev,
 		; /* no UDMA > 2 */
 	else if (dev->vendor == PCI_VENDOR_ID_AMD &&
 		 dev->device == PCI_DEVICE_ID_AMD_VIPER_7409)
-		amd7409_cable_detect(dev, name);
+		amd7409_cable_detect(dev);
 	else
-		amd7411_cable_detect(dev, name);
+		amd7411_cable_detect(dev);
 
 /*
  * Take care of prefetch & postwrite.
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index 3d84debaf81f..1360b4fa9fd3 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -332,7 +332,7 @@ static int cmd646_1_dma_end(ide_drive_t *drive)
 	return (dma_stat & 7) != 4;
 }
 
-static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev)
 {
 	u8 mrdmode = 0;
 
diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c
index 5543c8677a5a..f235db8c678b 100644
--- a/drivers/ide/pci/cs5530.c
+++ b/drivers/ide/pci/cs5530.c
@@ -129,12 +129,11 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode)
 /**
  *	init_chipset_5530	-	set up 5530 bridge
  *	@dev: PCI device
- *	@name: device name
  *
  *	Initialize the cs5530 bridge for reliable IDE DMA operation.
  */
 
-static unsigned int __devinit init_chipset_cs5530 (struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_cs5530(struct pci_dev *dev)
 {
 	struct pci_dev *master_0 = NULL, *cs5530_0 = NULL;
 
@@ -153,11 +152,11 @@ static unsigned int __devinit init_chipset_cs5530 (struct pci_dev *dev, const ch
 		}
 	}
 	if (!master_0) {
-		printk(KERN_ERR "%s: unable to locate PCI MASTER function\n", name);
+		printk(KERN_ERR DRV_NAME ": unable to locate PCI MASTER function\n");
 		goto out;
 	}
 	if (!cs5530_0) {
-		printk(KERN_ERR "%s: unable to locate CS5530 LEGACY function\n", name);
+		printk(KERN_ERR DRV_NAME ": unable to locate CS5530 LEGACY function\n");
 		goto out;
 	}
 
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index 41c7f3351eb6..bfae2f882f48 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -332,7 +332,7 @@ static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
 /*
  * this function is called during init and is used to setup the cy82c693 chip
  */
-static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev)
 {
 	if (PCI_FUNC(dev->devfn) != 1)
 		return 0;
@@ -351,8 +351,8 @@ static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev, const c
 	data = inb(CY82_DATA_PORT);
 
 #if CY82C693_DEBUG_INFO
-	printk(KERN_INFO "%s: Peripheral Configuration Register: 0x%X\n",
-		name, data);
+	printk(KERN_INFO DRV_NAME ": Peripheral Configuration Register: 0x%X\n",
+		data);
 #endif /* CY82C693_DEBUG_INFO */
 
 	/*
@@ -373,8 +373,8 @@ static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev, const c
 	outb(data, CY82_DATA_PORT);
 
 #if CY82C693_DEBUG_INFO
-	printk(KERN_INFO "%s: New Peripheral Configuration Register: 0x%X\n",
-		name, data);
+	printk(KERN_INFO ": New Peripheral Configuration Register: 0x%X\n",
+		data);
 #endif /* CY82C693_DEBUG_INFO */
 
 #endif /* CY82C693_SETDMA_CLOCK */
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
index baabb4ce0d78..6009b0b9655d 100644
--- a/drivers/ide/pci/hpt34x.c
+++ b/drivers/ide/pci/hpt34x.c
@@ -79,7 +79,7 @@ static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio)
  */
 #define	HPT34X_PCI_INIT_REG		0x80
 
-static unsigned int __devinit init_chipset_hpt34x(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_hpt34x(struct pci_dev *dev)
 {
 	int i = 0;
 	unsigned long hpt34xIoBase = pci_resource_start(dev, 4);
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 6a1c65c3be3e..5271b246b88c 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -970,11 +970,12 @@ static int __devinit hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f
 	return 1;
 }
 
-static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev)
 {
 	unsigned long io_base	= pci_resource_start(dev, 4);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct hpt_info *info	= host->host_priv + (&dev->dev == host->dev[1]);
+	const char *name	= DRV_NAME;
 	u8 pci_clk,  dpll_clk	= 0;	/* PCI and DPLL clock in MHz */
 	u8 chip_type;
 	enum ata_clock	clock;
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index 74173352741f..e16a1d113a2a 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -605,7 +605,7 @@ static void __devinit it8212_disable_raid(struct pci_dev *dev)
 	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20);
 }
 
-static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev)
 {
 	u8 conf;
 	static char *mode[2] = { "pass through", "smart" };
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 1f6791957227..998615fa285f 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -326,8 +326,9 @@ static void __devinit apple_kiwi_init(struct pci_dev *pdev)
 }
 #endif /* CONFIG_PPC_PMAC */
 
-static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev)
 {
+	const char *name = DRV_NAME;
 	unsigned long dma_base = pci_resource_start(dev, 4);
 	unsigned long sec_dma_base = dma_base + 0x08;
 	long pll_input, pll_output, ratio;
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index da92d127868f..6ff2def58da0 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -265,8 +265,7 @@ static void pdc202xx_dma_timeout(ide_drive_t *drive)
 	ide_dma_timeout(drive);
 }
 
-static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev,
-						    const char *name)
+static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev)
 {
 	unsigned long dmabase = pci_resource_start(dev, 4);
 	u8 udma_speed_flag = 0, primary_mode = 0, secondary_mode = 0;
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index 9eb411f5c358..7fc3022dcf68 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -200,13 +200,12 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 /**
  *	init_chipset_ich	-	set up the ICH chipset
  *	@dev: PCI device to set up
- *	@name: Name of the device
  *
  *	Initialize the PCI device as required.  For the ICH this turns
  *	out to be nice and simple.
  */
 
-static unsigned int __devinit init_chipset_ich(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_ich(struct pci_dev *dev)
 {
 	u32 extra = 0;
 
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index e26bc8326dbb..d173f2937722 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -174,7 +174,7 @@ static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	pci_write_config_byte(dev, 0x54, ultra_enable);
 }
 
-static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_svwks(struct pci_dev *dev)
 {
 	unsigned int reg;
 	u8 btr;
@@ -190,8 +190,8 @@ static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const cha
 			pci_read_config_dword(isa_dev, 0x64, &reg);
 			reg &= ~0x00002000; /* disable 600ns interrupt mask */
 			if(!(reg & 0x00004000))
-				printk(KERN_DEBUG "%s %s: UDMA not BIOS "
-					"enabled.\n", name, pci_name(dev));
+				printk(KERN_DEBUG DRV_NAME " %s: UDMA not BIOS "
+					"enabled.\n", pci_name(dev));
 			reg |=  0x00004000; /* enable UDMA/33 support */
 			pci_write_config_dword(isa_dev, 0x64, reg);
 		}
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 572b479a3922..b8ad9ad6cf0d 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -457,14 +457,12 @@ static void sil_sata_pre_reset(ide_drive_t *drive)
 /**
  *	init_chipset_siimage	-	set up an SI device
  *	@dev: PCI device
- *	@name: device name
  *
  *	Perform the initial PCI set up for this device. Attempt to switch
  *	to 133 MHz clocking if the system isn't already set up to do it.
  */
 
-static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev,
-						   const char *name)
+static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev)
 {
 	struct ide_host *host = pci_get_drvdata(dev);
 	void __iomem *ioaddr = host->host_priv;
@@ -541,8 +539,8 @@ static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev,
 			{ "== 100", "== 133", "== 2X PCI", "DISABLED!" };
 
 		tmp >>= 4;
-		printk(KERN_INFO "%s %s: BASE CLOCK %s\n",
-			name, pci_name(dev), clk_str[tmp & 3]);
+		printk(KERN_INFO DRV_NAME " %s: BASE CLOCK %s\n",
+			pci_name(dev), clk_str[tmp & 3]);
 	}
 
 	return 0;
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index 6fcb46c87871..cc95f90b53b7 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -448,8 +448,7 @@ static int __devinit sis_find_family(struct pci_dev *dev)
 	return chipset_family;
 }
 
-static unsigned int __devinit init_chipset_sis5513(struct pci_dev *dev,
-						   const char *name)
+static unsigned int __devinit init_chipset_sis5513(struct pci_dev *dev)
 {
 	/* Make general config ops here
 	   1/ tell IDE channels to operate in Compatibility mode only
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index fa720db3de10..73905bcc08fb 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -272,7 +272,7 @@ static u8 sl82c105_bridge_revision(struct pci_dev *dev)
  * channel 0 here at least, but channel 1 has to be enabled by
  * firmware or arch code. We still set both to 16 bits mode.
  */
-static unsigned int __devinit init_chipset_sl82c105(struct pci_dev *dev, const char *msg)
+static unsigned int __devinit init_chipset_sl82c105(struct pci_dev *dev)
 {
 	u32 val;
 
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 170e058f1fbd..454d2bf62dce 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -262,13 +262,12 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
 /**
  *	init_chipset_via82cxxx	-	initialization handler
  *	@dev: PCI device
- *	@name: Name of interface
  *
  *	The initialization callback. Here we determine the IDE chip type
  *	and initialize its drive independent registers.
  */
 
-static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const char *name)
+static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev)
 {
 	struct ide_host *host = pci_get_drvdata(dev);
 	struct via82cxxx_dev *vdev = host->host_priv;
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index d9655aeb013b..a8e9e8a69a52 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -515,7 +515,7 @@ static int do_ide_setup_pci_device(struct pci_dev *dev,
 	 * space, place chipset into init-mode, and/or preserve
 	 * an interrupt if the card is not native ide support.
 	 */
-	ret = d->init_chipset ? d->init_chipset(dev, d->name) : 0;
+	ret = d->init_chipset ? d->init_chipset(dev) : 0;
 	if (ret < 0)
 		goto out;
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index fd78b401b036..b846bc44a27e 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1206,7 +1206,7 @@ enum {
 
 struct ide_port_info {
 	char			*name;
-	unsigned int		(*init_chipset)(struct pci_dev *, const char *);
+	unsigned int		(*init_chipset)(struct pci_dev *);
 	void			(*init_iops)(ide_hwif_t *);
 	void                    (*init_hwif)(ide_hwif_t *);
 	int			(*init_dma)(ide_hwif_t *,
-- 
cgit v1.2.3


From 674bfc23c585b34c42263d73fb51710d49762a23 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:03 -0500
Subject: virtio: clarify that ABI is usable by any implementations

We want others to implement and use virtio, so it makes sense to BSD
license the non-__KERNEL__ parts of the headers to make this crystal
clear.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Mark McLoughlin <markmc@redhat.com>
Acked-by: Ryan Harper <ryanh@us.ibm.com>
Acked-by: Eric Van Hensbergen <ericvh@gmail.com>
Acked-by: Anthony Liguori <aliguori@us.ibm.com>
---
 include/linux/virtio_9p.h      | 2 ++
 include/linux/virtio_balloon.h | 2 ++
 include/linux/virtio_blk.h     | 2 ++
 include/linux/virtio_config.h  | 3 +++
 include/linux/virtio_console.h | 2 ++
 include/linux/virtio_net.h     | 2 ++
 include/linux/virtio_pci.h     | 5 ++---
 include/linux/virtio_rng.h     | 2 ++
 8 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 8eff0b53910b..b3c4a60ceeb3 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_9P_H
 #define _LINUX_VIRTIO_9P_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio console */
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 979524ee75b7..c30c7bfbf39b 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_BALLOON_H
 #define _LINUX_VIRTIO_BALLOON_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_balloon */
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 5f79a5f9de79..6a66c7f30bcb 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_BLK_H
 #define _LINUX_VIRTIO_BLK_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_block */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index f364bbf63c34..7eb4b34d13bb 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -1,5 +1,8 @@
 #ifndef _LINUX_VIRTIO_CONFIG_H
 #define _LINUX_VIRTIO_CONFIG_H
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
+
 /* Virtio devices use a standardized configuration space to define their
  * features and pass configuration information, but each implementation can
  * store and access that space differently. */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index ed2d4ead7eb7..19a0da0dba41 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_VIRTIO_CONSOLE_H
 #define _LINUX_VIRTIO_CONSOLE_H
 #include <linux/virtio_config.h>
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
 
 /* The ID for virtio console */
 #define VIRTIO_ID_CONSOLE	3
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 38c0571820fb..5e33761b9b8a 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_NET_H
 #define _LINUX_VIRTIO_NET_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_net */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index b3151659cf49..cdef35742932 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -9,9 +9,8 @@
  * Authors:
  *  Anthony Liguori  <aliguori@us.ibm.com>
  *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
  */
 
 #ifndef _LINUX_VIRTIO_PCI_H
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 331afb6c9f62..1a85dab8a940 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_RNG_H
 #define _LINUX_VIRTIO_RNG_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_rng */
-- 
cgit v1.2.3


From 066f4d82a67f621ddd547bfa4b9c94631d8457b0 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 29 May 2008 11:08:26 +0200
Subject: virtio_blk: check for hardsector size from host

Currently virtio_blk assumes a 512 byte hard sector size. This can cause
trouble / performance issues if the backing has a different block size
(like a file on an ext3 file system formatted with 4k block size or a dasd).

Lets add a feature flag that tells the guest to use a different hard sector
size than 512 byte.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 10 +++++++++-
 include/linux/virtio_blk.h |  3 +++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index dd7ea203f940..42251095134f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -196,6 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	int err;
 	u64 cap;
 	u32 v;
+	u32 blk_size;
 
 	if (index_to_minor(index) >= 1 << MINORBITS)
 		return -ENOSPC;
@@ -290,6 +291,13 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err)
 		blk_queue_max_hw_segments(vblk->disk->queue, v);
 
+	/* Host can optionally specify the block size of the device */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
+				offsetof(struct virtio_blk_config, blk_size),
+				&blk_size);
+	if (!err)
+		blk_queue_hardsect_size(vblk->disk->queue, blk_size);
+
 	add_disk(vblk->disk);
 	return 0;
 
@@ -330,7 +338,7 @@ static struct virtio_device_id id_table[] = {
 
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
-	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO,
+	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 6a66c7f30bcb..c1aef85243bf 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -13,6 +13,7 @@
 #define VIRTIO_BLK_F_SEG_MAX	2	/* Indicates maximum # of segments */
 #define VIRTIO_BLK_F_GEOMETRY	4	/* Legacy geometry available  */
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 
 struct virtio_blk_config
 {
@@ -28,6 +29,8 @@ struct virtio_blk_config
 		__u8 heads;
 		__u8 sectors;
 	} geometry;
+	/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+	__u32 blk_size;
 } __attribute__((packed));
 
 /* These two define direction. */
-- 
cgit v1.2.3


From dd7c7bc46211785a1aa7d70feb15830f62682b3c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:07 -0500
Subject: virtio: Formally reserve bits 28-31 to be 'transport' features.

We assign feature bits as required, but it makes sense to reserve some
for the particular transport, rather than the particular device.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c       | 5 +++++
 include/linux/virtio_config.h | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index fc85cba64578..baf103361e3a 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -113,6 +113,11 @@ static int virtio_dev_probe(struct device *_d)
 			set_bit(f, dev->features);
 	}
 
+	/* Transport features are always preserved to pass to set_features. */
+	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
+		if (device_features & (1 << i))
+			set_bit(i, dev->features);
+
 	err = drv->probe(dev);
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7eb4b34d13bb..5a30cfb7934b 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -18,6 +18,12 @@
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
+/* Some virtio feature bits (currently bits 28 through 31) are reserved for the
+ * transport being used (eg. virtio_ring), the rest are per-device feature
+ * bits. */
+#define VIRTIO_TRANSPORT_F_START	28
+#define VIRTIO_TRANSPORT_F_END		32
+
 /* Do we get callbacks when the ring is completely used, even if we've
  * suppressed them? */
 #define VIRTIO_F_NOTIFY_ON_EMPTY	24
-- 
cgit v1.2.3


From c624896e488ba2bff5ae497782cfb265c8b00646 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:07 -0500
Subject: virtio: Rename set_features to finalize_features

Rather than explicitly handing the features to the lower-level, we just
hand the virtio_device and have it set the features.  This make it clear
that it has the chance to manipulate the features of the device at this
point (and that all feature negotiation is already done).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c | 11 ++++++-----
 drivers/s390/kvm/kvm_virtio.c  | 11 ++++++-----
 drivers/virtio/virtio.c        |  5 ++---
 drivers/virtio/virtio_pci.c    | 10 ++++++----
 include/linux/virtio_config.h  |  7 ++++---
 5 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 1a8de57289eb..54fdc2aa4806 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -98,16 +98,17 @@ static u32 lg_get_features(struct virtio_device *vdev)
 	return features;
 }
 
-static void lg_set_features(struct virtio_device *vdev, u32 features)
+static void lg_finalize_features(struct virtio_device *vdev)
 {
-	unsigned int i;
+	unsigned int i, bits;
 	struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = lg_features(desc) + desc->feature_len;
 
 	memset(out_features, 0, desc->feature_len);
-	for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
-		if (features & (1 << i))
+	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++) {
+		if (test_bit(i, vdev->features))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 }
@@ -297,7 +298,7 @@ static void lg_del_vq(struct virtqueue *vq)
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
-	.set_features = lg_set_features,
+	.finalize_features = lg_finalize_features,
 	.get = lg_get,
 	.set = lg_set,
 	.get_status = lg_get_status,
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index d41f234bb2c2..5953510e7d5f 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -88,16 +88,17 @@ static u32 kvm_get_features(struct virtio_device *vdev)
 	return features;
 }
 
-static void kvm_set_features(struct virtio_device *vdev, u32 features)
+static void kvm_finalize_features(struct virtio_device *vdev)
 {
-	unsigned int i;
+	unsigned int i, bits;
 	struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
 
 	memset(out_features, 0, desc->feature_len);
-	for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
-		if (features & (1 << i))
+	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++) {
+		if (test_bit(i, vdev->features))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 }
@@ -223,7 +224,7 @@ static void kvm_del_vq(struct virtqueue *vq)
  */
 static struct virtio_config_ops kvm_vq_configspace_ops = {
 	.get_features = kvm_get_features,
-	.set_features = kvm_set_features,
+	.finalize_features = kvm_finalize_features,
 	.get = kvm_get,
 	.set = kvm_set,
 	.get_status = kvm_get_status,
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index baf103361e3a..5b78fd0aff0a 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -113,7 +113,7 @@ static int virtio_dev_probe(struct device *_d)
 			set_bit(f, dev->features);
 	}
 
-	/* Transport features are always preserved to pass to set_features. */
+	/* Transport features always preserved to pass to finalize_features. */
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
 		if (device_features & (1 << i))
 			set_bit(i, dev->features);
@@ -122,8 +122,7 @@ static int virtio_dev_probe(struct device *_d)
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
 	else {
-		/* They should never have set feature bits beyond 32 */
-		dev->config->set_features(dev, dev->features[0]);
+		dev->config->finalize_features(dev);
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
 	}
 	return err;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index eae7236310e4..9855975a72a3 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -94,12 +94,14 @@ static u32 vp_get_features(struct virtio_device *vdev)
 	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
 }
 
-/* virtio config->set_features() implementation */
-static void vp_set_features(struct virtio_device *vdev, u32 features)
+/* virtio config->finalize_features() implementation */
+static void vp_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
-	iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+	/* We only support 32 feature bits. */
+	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
+	iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
 }
 
 /* virtio config->get() implementation */
@@ -297,7 +299,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
 	.find_vq	= vp_find_vq,
 	.del_vq		= vp_del_vq,
 	.get_features	= vp_get_features,
-	.set_features	= vp_set_features,
+	.finalize_features = vp_finalize_features,
 };
 
 /* the PCI probing function */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 5a30cfb7934b..bf8ec283b232 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -61,9 +61,10 @@
  * @get_features: get the array of feature bits for this device.
  *	vdev: the virtio_device
  *	Returns the first 32 feature bits (all we currently need).
- * @set_features: confirm what device features we'll be using.
+ * @finalize_features: confirm what device features we'll be using.
  *	vdev: the virtio_device
- *	feature: the first 32 feature bits
+ *	This gives the final feature bits for the device: it can change
+ *	the dev->feature bits if it wants.
  */
 struct virtio_config_ops
 {
@@ -79,7 +80,7 @@ struct virtio_config_ops
 				     void (*callback)(struct virtqueue *));
 	void (*del_vq)(struct virtqueue *vq);
 	u32 (*get_features)(struct virtio_device *vdev);
-	void (*set_features)(struct virtio_device *vdev, u32 features);
+	void (*finalize_features)(struct virtio_device *vdev);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
-- 
cgit v1.2.3


From e34f87256794b87e7f4a8f1812538be7b7b5214c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:13 -0500
Subject: virtio: Add transport feature handling stub for virtio_ring.

To prepare for virtio_ring transport feature bits, hook in a call in
all the users to manipulate them.  This currently just clears all the
bits, since it doesn't understand any features.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c |  3 +++
 drivers/s390/kvm/kvm_virtio.c  |  3 +++
 drivers/virtio/virtio_pci.c    |  3 +++
 drivers/virtio/virtio_ring.c   | 16 ++++++++++++++++
 include/linux/virtio_ring.h    |  2 ++
 5 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 54fdc2aa4806..37344aaee22f 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -105,6 +105,9 @@ static void lg_finalize_features(struct virtio_device *vdev)
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = lg_features(desc) + desc->feature_len;
 
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 5953510e7d5f..79954bd6bfa5 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -95,6 +95,9 @@ static void kvm_finalize_features(struct virtio_device *vdev)
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
 
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 9855975a72a3..c7dc37c7cce9 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -99,6 +99,9 @@ static void vp_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
 	/* We only support 32 feature bits. */
 	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
 	iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 21d9a62767af..6eb5303fed11 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -18,6 +18,7 @@
  */
 #include <linux/virtio.h>
 #include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
 #include <linux/device.h>
 
 #ifdef DEBUG
@@ -323,4 +324,19 @@ void vring_del_virtqueue(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
 
+/* Manipulates transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev)
+{
+	unsigned int i;
+
+	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
+		switch (i) {
+		default:
+			/* We don't understand this bit. */
+			clear_bit(i, vdev->features);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(vring_transport_features);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index abe481ed990e..c4a598fb3826 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -120,6 +120,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq));
 void vring_del_virtqueue(struct virtqueue *vq);
+/* Filter out transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev);
 
 irqreturn_t vring_interrupt(int irq, void *_vq);
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From ed9559d38a87a44e3bda87d73a50aab92471d7dc Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:11:09 +1000
Subject: Label kthread_create() with printf attribute tag.

Obvious misc patch been in my queue (& linux-next) for over a cycle.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 00dd957e245b..aabc8a13ba71 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -6,7 +6,8 @@
 
 struct task_struct *kthread_create(int (*threadfn)(void *data),
 				   void *data,
-				   const char namefmt[], ...);
+				   const char namefmt[], ...)
+	__attribute__((format(printf, 3, 4)));
 
 /**
  * kthread_run - create and wake a thread.
-- 
cgit v1.2.3


From 483fad1c3fa1060d7e6710e84a065ad514571739 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Tue, 22 Jul 2008 04:48:46 +1000
Subject: ELF loader support for auxvec base platform string

Some IBM POWER-based platforms have the ability to run in a
mode which mostly appears to the OS as a different processor from the
actual hardware.  For example, a Power6 system may appear to be a
Power5+, which makes the AT_PLATFORM value "power5+".  This means that
programs are restricted to the ISA supported by Power5+;
Power6-specific instructions are treated as illegal.

However, some applications (virtual machines, optimized libraries) can
benefit from knowledge of the underlying CPU model.  A new aux vector
entry, AT_BASE_PLATFORM, will denote the actual hardware.  For
example, on a Power6 system in Power5+ compatibility mode, AT_PLATFORM
will be "power5+" and AT_BASE_PLATFORM will be "power6".  The idea is
that AT_PLATFORM indicates the instruction set supported, while
AT_BASE_PLATFORM indicates the underlying microarchitecture.

If the architecture has defined ELF_BASE_PLATFORM, copy that value to
the user stack in the same manner as ELF_PLATFORM.

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 fs/binfmt_elf.c        | 28 ++++++++++++++++++++++++++++
 include/linux/auxvec.h |  6 +++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 639d2d8b5710..742c8f530481 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 #endif
 
+#ifndef ELF_BASE_PLATFORM
+/*
+ * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
+ * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
+ * will be copied to the user stack in the same manner as AT_PLATFORM.
+ */
+#define ELF_BASE_PLATFORM NULL
+#endif
+
 static int
 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	elf_addr_t __user *envp;
 	elf_addr_t __user *sp;
 	elf_addr_t __user *u_platform;
+	elf_addr_t __user *u_base_platform;
 	const char *k_platform = ELF_PLATFORM;
+	const char *k_base_platform = ELF_BASE_PLATFORM;
 	int items;
 	elf_addr_t *elf_info;
 	int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 			return -EFAULT;
 	}
 
+	/*
+	 * If this architecture has a "base" platform capability
+	 * string, copy it to userspace.
+	 */
+	u_base_platform = NULL;
+	if (k_base_platform) {
+		size_t len = strlen(k_base_platform) + 1;
+
+		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
+		if (__copy_to_user(u_base_platform, k_base_platform, len))
+			return -EFAULT;
+	}
+
 	/* Create the ELF interpreter info */
 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		NEW_AUX_ENT(AT_PLATFORM,
 			    (elf_addr_t)(unsigned long)u_platform);
 	}
+	if (k_base_platform) {
+		NEW_AUX_ENT(AT_BASE_PLATFORM,
+			    (elf_addr_t)(unsigned long)u_base_platform);
+	}
 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 	}
diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index 0da17d14fd13..d7afa9dd6635 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -26,9 +26,13 @@
 
 #define AT_SECURE 23   /* secure mode boolean */
 
+#define AT_BASE_PLATFORM 24	/* string identifying real platform, may
+				 * differ from AT_PLATFORM. */
+
 #define AT_EXECFN  31	/* filename of program */
+
 #ifdef __KERNEL__
-#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
   /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
 #endif
 
-- 
cgit v1.2.3


From 3d6f4a20cc287a8980c6186624834cf10a70752b Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Thu, 24 Jul 2008 23:38:31 -0700
Subject: endian: Always evaluate arguments.

Changeset 7fa897b91a3ea0f16c2873b869d7a0eef05acff4 ("ide: trivial sparse
annotations") created an IDE bootup regression on big-endian systems.

In drivers/ide/ide-iops.c, function ide_fixstring() we now have the
loop:

		for (p = end ; p != s;)
			be16_to_cpus((u16 *)(p -= 2));

which will never terminate on big-endian because in such
a configuration be16_to_cpus() evaluates to "do { } while (0)"

Therefore, always evaluate the arguments to nop endian transformation
operations.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/byteorder/big_endian.h    | 12 ++++++------
 include/linux/byteorder/little_endian.h | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 961ed4b48d8e..44f95b92393b 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -94,12 +94,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
 #define __le32_to_cpus(x) __swab32s((x))
 #define __cpu_to_le16s(x) __swab16s((x))
 #define __le16_to_cpus(x) __swab16s((x))
-#define __cpu_to_be64s(x) do {} while (0)
-#define __be64_to_cpus(x) do {} while (0)
-#define __cpu_to_be32s(x) do {} while (0)
-#define __be32_to_cpus(x) do {} while (0)
-#define __cpu_to_be16s(x) do {} while (0)
-#define __be16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) do { (void)(x); } while (0)
+#define __be64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be32s(x) do { (void)(x); } while (0)
+#define __be32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be16s(x) do { (void)(x); } while (0)
+#define __be16_to_cpus(x) do { (void)(x); } while (0)
 
 #ifdef __KERNEL__
 #include <linux/byteorder/generic.h>
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index 05dc7c35b3b2..4cc170a31762 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -88,12 +88,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
 {
 	return __swab16p((__u16 *)p);
 }
-#define __cpu_to_le64s(x) do {} while (0)
-#define __le64_to_cpus(x) do {} while (0)
-#define __cpu_to_le32s(x) do {} while (0)
-#define __le32_to_cpus(x) do {} while (0)
-#define __cpu_to_le16s(x) do {} while (0)
-#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_le64s(x) do { (void)(x); } while (0)
+#define __le64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le32s(x) do { (void)(x); } while (0)
+#define __le32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le16s(x) do { (void)(x); } while (0)
+#define __le16_to_cpus(x) do { (void)(x); } while (0)
 #define __cpu_to_be64s(x) __swab64s((x))
 #define __be64_to_cpus(x) __swab64s((x))
 #define __cpu_to_be32s(x) __swab32s((x))
-- 
cgit v1.2.3


From e0deaff470900a4c3222ca7139f6c9639e26a2f5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Jul 2008 01:45:24 -0700
Subject: split the typecheck macros out of include/linux/kernel.h

Needed to fix up a recursive include snafu in
locking-add-typecheck-on-irqsave-and-friends-for-correct-flags.patch

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h    | 21 +--------------------
 include/linux/typecheck.h | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 20 deletions(-)
 create mode 100644 include/linux/typecheck.h

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f9cd7a513f9c..5c4b1251e110 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,7 @@
 #include <linux/compiler.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/typecheck.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -441,26 +442,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
 	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
 	(type *)( (char *)__mptr - offsetof(type,member) );})
 
-/*
- * Check at compile time that something is of a particular type.
- * Always evaluates to 1 so you may use it easily in comparisons.
- */
-#define typecheck(type,x) \
-({	type __dummy; \
-	typeof(x) __dummy2; \
-	(void)(&__dummy == &__dummy2); \
-	1; \
-})
-
-/*
- * Check at compile time that 'function' is a certain type, or is a pointer
- * to that type (needs to use typedef for the function type.)
- */
-#define typecheck_fn(type,function) \
-({	typeof(type) __tmp = function; \
-	(void)__tmp; \
-})
-
 struct sysinfo;
 extern int do_sysinfo(struct sysinfo *info);
 
diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h
new file mode 100644
index 000000000000..eb5b74a575be
--- /dev/null
+++ b/include/linux/typecheck.h
@@ -0,0 +1,24 @@
+#ifndef TYPECHECK_H_INCLUDED
+#define TYPECHECK_H_INCLUDED
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({	type __dummy; \
+	typeof(x) __dummy2; \
+	(void)(&__dummy == &__dummy2); \
+	1; \
+})
+
+/*
+ * Check at compile time that 'function' is a certain type, or is a pointer
+ * to that type (needs to use typedef for the function type.)
+ */
+#define typecheck_fn(type,function) \
+({	typeof(type) __tmp = function; \
+	(void)__tmp; \
+})
+
+#endif		/* TYPECHECK_H_INCLUDED */
-- 
cgit v1.2.3


From 3f307891ce0e7b0438c432af1aacd656a092ff45 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 25 Jul 2008 01:45:25 -0700
Subject: locking: add typecheck on irqsave and friends for correct flags

There haave been several areas in the kernel where an int has been used for
flags in local_irq_save() and friends instead of a long.  This can cause some
hard to debug problems on some architectures.

This patch adds a typecheck inside the irqsave and restore functions to flag
these cases.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/irqflags.h | 54 ++++++++++++++++++++++++++----------
 include/linux/spinlock.h | 72 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 95 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 2b1c2e58566e..74bde13224c9 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -11,6 +11,8 @@
 #ifndef _LINUX_TRACE_IRQFLAGS_H
 #define _LINUX_TRACE_IRQFLAGS_H
 
+#include <linux/typecheck.h>
+
 #ifdef CONFIG_TRACE_IRQFLAGS
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
@@ -58,18 +60,24 @@
 	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
 #define local_irq_disable() \
 	do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
-#define local_irq_save(flags) \
-	do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
+#define local_irq_save(flags)				\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		raw_local_irq_save(flags);		\
+		trace_hardirqs_off();			\
+	} while (0)
 
-#define local_irq_restore(flags)				\
-	do {							\
-		if (raw_irqs_disabled_flags(flags)) {		\
-			raw_local_irq_restore(flags);		\
-			trace_hardirqs_off();			\
-		} else {					\
-			trace_hardirqs_on();			\
-			raw_local_irq_restore(flags);		\
-		}						\
+
+#define local_irq_restore(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		if (raw_irqs_disabled_flags(flags)) {	\
+			raw_local_irq_restore(flags);	\
+			trace_hardirqs_off();		\
+		} else {				\
+			trace_hardirqs_on();		\
+			raw_local_irq_restore(flags);	\
+		}					\
 	} while (0)
 #else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
 /*
@@ -78,8 +86,16 @@
  */
 # define raw_local_irq_disable()	local_irq_disable()
 # define raw_local_irq_enable()		local_irq_enable()
-# define raw_local_irq_save(flags)	local_irq_save(flags)
-# define raw_local_irq_restore(flags)	local_irq_restore(flags)
+# define raw_local_irq_save(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		local_irq_save(flags);			\
+	} while (0)
+# define raw_local_irq_restore(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		local_irq_restore(flags);		\
+	} while (0)
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
@@ -89,7 +105,11 @@
 		raw_safe_halt();				\
 	} while (0)
 
-#define local_save_flags(flags)		raw_local_save_flags(flags)
+#define local_save_flags(flags)				\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		raw_local_save_flags(flags);		\
+	} while (0)
 
 #define irqs_disabled()						\
 ({								\
@@ -99,7 +119,11 @@
 	raw_irqs_disabled_flags(_flags);			\
 })
 
-#define irqs_disabled_flags(flags)	raw_irqs_disabled_flags(flags)
+#define irqs_disabled_flags(flags)		\
+({						\
+	typecheck(unsigned long, flags);	\
+	raw_irqs_disabled_flags(flags);		\
+})
 #endif		/* CONFIG_X86 */
 
 #endif
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d311a090fae7..61e5610ad165 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -46,6 +46,7 @@
  *  linux/spinlock.h:     builds the final spin_*() APIs.
  */
 
+#include <linux/typecheck.h>
 #include <linux/preempt.h>
 #include <linux/linkage.h>
 #include <linux/compiler.h>
@@ -191,23 +192,53 @@ do {								\
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
-#define spin_lock_irqsave(lock, flags)	flags = _spin_lock_irqsave(lock)
-#define read_lock_irqsave(lock, flags)	flags = _read_lock_irqsave(lock)
-#define write_lock_irqsave(lock, flags)	flags = _write_lock_irqsave(lock)
+#define spin_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _spin_lock_irqsave(lock);	\
+	} while (0)
+#define read_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _read_lock_irqsave(lock);	\
+	} while (0)
+#define write_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _write_lock_irqsave(lock);	\
+	} while (0)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
-	flags = _spin_lock_irqsave_nested(lock, subclass)
+#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		flags = _spin_lock_irqsave_nested(lock, subclass);	\
+	} while (0)
 #else
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
-	flags = _spin_lock_irqsave(lock)
+#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		flags = _spin_lock_irqsave(lock);			\
+	} while (0)
 #endif
 
 #else
 
-#define spin_lock_irqsave(lock, flags)	_spin_lock_irqsave(lock, flags)
-#define read_lock_irqsave(lock, flags)	_read_lock_irqsave(lock, flags)
-#define write_lock_irqsave(lock, flags)	_write_lock_irqsave(lock, flags)
+#define spin_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_spin_lock_irqsave(lock, flags);	\
+	} while (0)
+#define read_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_read_lock_irqsave(lock, flags);	\
+	} while (0)
+#define write_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_write_lock_irqsave(lock, flags);	\
+	} while (0)
 #define spin_lock_irqsave_nested(lock, flags, subclass)	\
 	spin_lock_irqsave(lock, flags)
 
@@ -260,16 +291,25 @@ do {						\
 } while (0)
 #endif
 
-#define spin_unlock_irqrestore(lock, flags) \
-					_spin_unlock_irqrestore(lock, flags)
+#define spin_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_spin_unlock_irqrestore(lock, flags);	\
+	} while (0)
 #define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
 
-#define read_unlock_irqrestore(lock, flags) \
-					_read_unlock_irqrestore(lock, flags)
+#define read_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_read_unlock_irqrestore(lock, flags);	\
+	} while (0)
 #define read_unlock_bh(lock)		_read_unlock_bh(lock)
 
-#define write_unlock_irqrestore(lock, flags) \
-					_write_unlock_irqrestore(lock, flags)
+#define write_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_write_unlock_irqrestore(lock, flags);	\
+	} while (0)
 #define write_unlock_bh(lock)		_write_unlock_bh(lock)
 
 #define spin_trylock_bh(lock)	__cond_lock(lock, _spin_trylock_bh(lock))
-- 
cgit v1.2.3


From 8b5ac31e27135a6f2c210c40d03bf8f1b3a86b77 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 01:45:26 -0700
Subject: include: use get/put_unaligned_* helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/reiserfs_fs.h      |  4 ++--
 include/linux/smb_fs.h           | 19 +++++++------------
 include/net/ieee80211_radiotap.h |  2 +-
 3 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4aacaeecb56f..e9963af16cda 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -526,8 +526,8 @@ struct item_head {
 ** p is the array of __u32, i is the index into the array, v is the value
 ** to store there.
 */
-#define get_block_num(p, i) le32_to_cpu(get_unaligned((p) + (i)))
-#define put_block_num(p, i, v) put_unaligned(cpu_to_le32(v), (p) + (i))
+#define get_block_num(p, i) get_unaligned_le32((p) + (i))
+#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
 
 //
 // in old version uniqueness field shows key type
diff --git a/include/linux/smb_fs.h b/include/linux/smb_fs.h
index 2c5cd55f44ff..923cd8a247b1 100644
--- a/include/linux/smb_fs.h
+++ b/include/linux/smb_fs.h
@@ -43,18 +43,13 @@ static inline struct smb_inode_info *SMB_I(struct inode *inode)
 }
 
 /* macro names are short for word, double-word, long value (?) */
-#define WVAL(buf,pos) \
-	(le16_to_cpu(get_unaligned((__le16 *)((u8 *)(buf) + (pos)))))
-#define DVAL(buf,pos) \
-	(le32_to_cpu(get_unaligned((__le32 *)((u8 *)(buf) + (pos)))))
-#define LVAL(buf,pos) \
-	(le64_to_cpu(get_unaligned((__le64 *)((u8 *)(buf) + (pos)))))
-#define WSET(buf,pos,val) \
-	put_unaligned(cpu_to_le16((u16)(val)), (__le16 *)((u8 *)(buf) + (pos)))
-#define DSET(buf,pos,val) \
-	put_unaligned(cpu_to_le32((u32)(val)), (__le32 *)((u8 *)(buf) + (pos)))
-#define LSET(buf,pos,val) \
-	put_unaligned(cpu_to_le64((u64)(val)), (__le64 *)((u8 *)(buf) + (pos)))
+#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos)))
+#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos)))
+#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos)))
+
+#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos))
+#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos))
+#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos))
 
 /* where to find the base of the SMB packet proper */
 #define smb_base(buf) ((u8 *)(((u8 *)(buf))+4))
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index dfd8bf66ce27..d364fd594ea4 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -262,7 +262,7 @@ static inline int ieee80211_get_radiotap_len(unsigned char *data)
 	struct ieee80211_radiotap_header *hdr =
 		(struct ieee80211_radiotap_header *)data;
 
-	return le16_to_cpu(get_unaligned(&hdr->it_len));
+	return get_unaligned_le16(&hdr->it_len);
 }
 
 #endif				/* IEEE80211_RADIOTAP_H */
-- 
cgit v1.2.3


From b39c08cb692cb8898c30e0d8187c7cbe27cc905c Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:29 -0700
Subject: Remove apparently unused fd1772.h header file.

This header file has been unused for quite some time, and the
corresponding source files appear to have been removed back in commit
99eb8a550dbccc0e1f6c7e866fe421810e0585f6 ("Remove the arm26 port")

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Adrian Bunk <bunk@stusta.de>
Cc: Ian Molton <spyro@f2s.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fd1772.h | 80 --------------------------------------------------
 1 file changed, 80 deletions(-)
 delete mode 100644 include/linux/fd1772.h

(limited to 'include/linux')

diff --git a/include/linux/fd1772.h b/include/linux/fd1772.h
deleted file mode 100644
index 871d6e4c677e..000000000000
--- a/include/linux/fd1772.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef _LINUX_FD1772REG_H
-#define _LINUX_FD1772REG_H
-
-/*
-** WD1772 stuff - originally from the M68K Linux
- * Modified for Archimedes by Dave Gilbert (gilbertd@cs.man.ac.uk)
- */
-
-/* register codes */
-
-#define FDC1772SELREG_STP   (0x80)   /* command/status register */
-#define FDC1772SELREG_TRA   (0x82)   /* track register */
-#define FDC1772SELREG_SEC   (0x84)   /* sector register */
-#define FDC1772SELREG_DTA   (0x86)   /* data register */
-
-/* register names for FDC1772_READ/WRITE macros */
-
-#define FDC1772REG_CMD         0
-#define FDC1772REG_STATUS      0
-#define FDC1772REG_TRACK       2
-#define FDC1772REG_SECTOR      4
-#define FDC1772REG_DATA                6
-
-/* command opcodes */
-
-#define FDC1772CMD_RESTORE  (0x00)   /*  -                   */
-#define FDC1772CMD_SEEK     (0x10)   /*   |                  */
-#define FDC1772CMD_STEP     (0x20)   /*   |  TYP 1 Commands  */
-#define FDC1772CMD_STIN     (0x40)   /*   |                  */
-#define FDC1772CMD_STOT     (0x60)   /*  -                   */
-#define FDC1772CMD_RDSEC    (0x80)   /*  -   TYP 2 Commands  */
-#define FDC1772CMD_WRSEC    (0xa0)   /*  -          "        */
-#define FDC1772CMD_RDADR    (0xc0)   /*  -                   */
-#define FDC1772CMD_RDTRA    (0xe0)   /*   |  TYP 3 Commands  */
-#define FDC1772CMD_WRTRA    (0xf0)   /*  -                   */
-#define FDC1772CMD_FORCI    (0xd0)   /*  -   TYP 4 Command   */
-
-/* command modifier bits */
-
-#define FDC1772CMDADD_SR6   (0x00)   /* step rate settings */
-#define FDC1772CMDADD_SR12  (0x01)
-#define FDC1772CMDADD_SR2   (0x02)
-#define FDC1772CMDADD_SR3   (0x03)
-#define FDC1772CMDADD_V     (0x04)   /* verify */
-#define FDC1772CMDADD_H     (0x08)   /* wait for spin-up */
-#define FDC1772CMDADD_U     (0x10)   /* update track register */
-#define FDC1772CMDADD_M     (0x10)   /* multiple sector access */
-#define FDC1772CMDADD_E     (0x04)   /* head settling flag */
-#define FDC1772CMDADD_P     (0x02)   /* precompensation */
-#define FDC1772CMDADD_A0    (0x01)   /* DAM flag */
-
-/* status register bits */
-
-#define        FDC1772STAT_MOTORON     (0x80)   /* motor on */
-#define        FDC1772STAT_WPROT       (0x40)   /* write protected (FDC1772CMD_WR*) */
-#define        FDC1772STAT_SPINUP      (0x20)   /* motor speed stable (Type I) */
-#define        FDC1772STAT_DELDAM      (0x20)   /* sector has deleted DAM (Type II+III) */
-#define        FDC1772STAT_RECNF       (0x10)   /* record not found */
-#define        FDC1772STAT_CRC         (0x08)   /* CRC error */
-#define        FDC1772STAT_TR00        (0x04)   /* Track 00 flag (Type I) */
-#define        FDC1772STAT_LOST        (0x04)   /* Lost Data (Type II+III) */
-#define        FDC1772STAT_IDX         (0x02)   /* Index status (Type I) */
-#define        FDC1772STAT_DRQ         (0x02)   /* DRQ status (Type II+III) */
-#define        FDC1772STAT_BUSY        (0x01)   /* FDC1772 is busy */
-
-
-/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1  1 -> Side 2 */
-#define DSKSIDE     (0x01)
-        
-#define DSKDRVNONE  (0x06)
-#define DSKDRV0     (0x02)
-#define DSKDRV1     (0x04)
-
-/* step rates */
-#define        FDC1772STEP_6   0x00
-#define        FDC1772STEP_12  0x01
-#define        FDC1772STEP_2   0x02
-#define        FDC1772STEP_3   0x03
-
-#endif
-- 
cgit v1.2.3


From e0ce0da9fefcc723dc006c35a7f91a32750abd40 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:32 -0700
Subject: lists: remove a redundant conditional definition of list_add()

Remove the conditional surrounding the definition of list_add() from list.h
since, if you define CONFIG_DEBUG_LIST, the definition you will subsequently
pick up from lib/list_debug.c will be absolutely identical, at which point you
can remove that redundant definition from list_debug.c as well.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h |  4 ----
 lib/list_debug.c     | 14 --------------
 2 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 139ec41d9c2e..453916bc0412 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -61,14 +61,10 @@ extern void __list_add(struct list_head *new,
  * Insert a new entry after the specified head.
  * This is good for implementing stacks.
  */
-#ifndef CONFIG_DEBUG_LIST
 static inline void list_add(struct list_head *new, struct list_head *head)
 {
 	__list_add(new, head, head->next);
 }
-#else
-extern void list_add(struct list_head *new, struct list_head *head);
-#endif
 
 
 /**
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 4350ba9655bd..45c03fd608dd 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -39,20 +39,6 @@ void __list_add(struct list_head *new,
 }
 EXPORT_SYMBOL(__list_add);
 
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-void list_add(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head, head->next);
-}
-EXPORT_SYMBOL(list_add);
-
 /**
  * list_del - deletes entry from list.
  * @entry: the element to delete from the list.
-- 
cgit v1.2.3


From b03f6489f9f27dc519a4c60ebf39cc7b8a58eae7 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:45:35 -0700
Subject: build kernel/profile.o only when requested

Build kernel/profile.o only if CONFIG_PROFILING is enabled.

This makes CONFIG_PROFILING=n kernels smaller.

As a bonus, some profile_tick() calls and one branch from schedule() are
now eliminated with CONFIG_PROFILING=n (but I doubt these are
measurable effects).

This patch changes the effects of CONFIG_PROFILING=n, but I don't think
having more than two choices would be the better choice.

This patch also adds the name of the first parameter to the prototypes
of profile_{hits,tick}() since I anyway had to add them for the dummy
functions.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/profile.h | 56 ++++++++++++++++++++++++++++++++++---------------
 kernel/Makefile         |  3 ++-
 kernel/profile.c        |  4 ----
 3 files changed, 41 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index 05c1cc736937..4081fa31081f 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -8,8 +8,6 @@
 
 #include <asm/errno.h>
 
-extern int prof_on __read_mostly;
-
 #define CPU_PROFILING	1
 #define SCHED_PROFILING	2
 #define SLEEP_PROFILING	3
@@ -19,14 +17,29 @@ struct proc_dir_entry;
 struct pt_regs;
 struct notifier_block;
 
+#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
+void create_prof_cpu_mask(struct proc_dir_entry *);
+#else
+#define create_prof_cpu_mask(x)			do { (void)(x); } while (0)
+#endif
+
+enum profile_type {
+	PROFILE_TASK_EXIT,
+	PROFILE_MUNMAP
+};
+
+#ifdef CONFIG_PROFILING
+
+extern int prof_on __read_mostly;
+
 /* init basic kernel profiler */
 void __init profile_init(void);
-void profile_tick(int);
+void profile_tick(int type);
 
 /*
  * Add multiple profiler hits to a given address:
  */
-void profile_hits(int, void *ip, unsigned int nr_hits);
+void profile_hits(int type, void *ip, unsigned int nr_hits);
 
 /*
  * Single profiler hit:
@@ -40,19 +53,6 @@ static inline void profile_hit(int type, void *ip)
 		profile_hits(type, ip, 1);
 }
 
-#ifdef CONFIG_PROC_FS
-void create_prof_cpu_mask(struct proc_dir_entry *);
-#else
-#define create_prof_cpu_mask(x)			do { (void)(x); } while (0)
-#endif
-
-enum profile_type {
-	PROFILE_TASK_EXIT,
-	PROFILE_MUNMAP
-};
-
-#ifdef CONFIG_PROFILING
-
 struct task_struct;
 struct mm_struct;
 
@@ -80,6 +80,28 @@ struct pt_regs;
 
 #else
 
+#define prof_on 0
+
+static inline void profile_init(void)
+{
+	return;
+}
+
+static inline void profile_tick(int type)
+{
+	return;
+}
+
+static inline void profile_hits(int type, void *ip, unsigned int nr_hits)
+{
+	return;
+}
+
+static inline void profile_hit(int type, void *ip)
+{
+	return;
+}
+
 static inline int task_handoff_register(struct notifier_block * n)
 {
 	return -ENOSYS;
diff --git a/kernel/Makefile b/kernel/Makefile
index 15ab63ffe64d..54f69837d35a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
 
+obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff --git a/kernel/profile.c b/kernel/profile.c
index 58926411eb2a..cd26bed4cc26 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -112,8 +112,6 @@ void __init profile_init(void)
 
 /* Profile event notifications */
 
-#ifdef CONFIG_PROFILING
-
 static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
 static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
 static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
@@ -203,8 +201,6 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *))
 }
 EXPORT_SYMBOL_GPL(unregister_timer_hook);
 
-#endif /* CONFIG_PROFILING */
-
 
 #ifdef CONFIG_SMP
 /*
-- 
cgit v1.2.3


From cebbd3fb803603b12408458ba17c29ce1e15a5f2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Jul 2008 01:45:35 -0700
Subject: build-kernel-profileo-only-when-requested-cleanups

Cc: Adrian Bunk <bunk@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/profile.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index 4081fa31081f..7e7087239af5 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -18,9 +18,11 @@ struct pt_regs;
 struct notifier_block;
 
 #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
-void create_prof_cpu_mask(struct proc_dir_entry *);
+void create_prof_cpu_mask(struct proc_dir_entry *de);
 #else
-#define create_prof_cpu_mask(x)			do { (void)(x); } while (0)
+static inline void create_prof_cpu_mask(struct proc_dir_entry *de)
+{
+}
 #endif
 
 enum profile_type {
-- 
cgit v1.2.3


From ac331d158e198d2a91a5b0a3ec4ca9991fdb57af Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:45:38 -0700
Subject: call_usermodehelper(): increase reliability

Presently call_usermodehelper_setup() uses GFP_ATOMIC.  but it can return
NULL _very_ easily.

GFP_ATOMIC is needed only when we can't sleep.  and, GFP_KERNEL is robust
and better.

thus, I add gfp_mask argument to call_usermodehelper_setup().

So, its callers pass the gfp_t as below:

call_usermodehelper() and call_usermodehelper_keys():
	depend on 'wait' argument.
call_usermodehelper_pipe():
	always GFP_KERNEL because always run under process context.
orderly_poweroff():
	pass to GFP_ATOMIC because may run under interrupt context.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: "Paul Menage" <menage@google.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h | 11 +++++++----
 kernel/kmod.c        |  9 +++++----
 kernel/sys.c         |  2 +-
 3 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 0509c4ce4857..a1a91577813c 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -19,6 +19,7 @@
  *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/gfp.h>
 #include <linux/stddef.h>
 #include <linux/errno.h>
 #include <linux/compiler.h>
@@ -41,8 +42,8 @@ struct file;
 struct subprocess_info;
 
 /* Allocate a subprocess_info structure */
-struct subprocess_info *call_usermodehelper_setup(char *path,
-						  char **argv, char **envp);
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+						  char **envp, gfp_t gfp_mask);
 
 /* Set various pieces of state into the subprocess_info structure */
 void call_usermodehelper_setkeys(struct subprocess_info *info,
@@ -69,8 +70,9 @@ static inline int
 call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 {
 	struct subprocess_info *info;
+	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
 
-	info = call_usermodehelper_setup(path, argv, envp);
+	info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
 	if (info == NULL)
 		return -ENOMEM;
 	return call_usermodehelper_exec(info, wait);
@@ -81,8 +83,9 @@ call_usermodehelper_keys(char *path, char **argv, char **envp,
 			 struct key *session_keyring, enum umh_wait wait)
 {
 	struct subprocess_info *info;
+	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
 
-	info = call_usermodehelper_setup(path, argv, envp);
+	info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
 	if (info == NULL)
 		return -ENOMEM;
 
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 2989f67c4446..2456d1a0befb 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -352,16 +352,17 @@ static inline void register_pm_notifier_callback(void) {}
  * @path: path to usermode executable
  * @argv: arg vector for process
  * @envp: environment for process
+ * @gfp_mask: gfp mask for memory allocation
  *
  * Returns either %NULL on allocation failure, or a subprocess_info
  * structure.  This should be passed to call_usermodehelper_exec to
  * exec the process and free the structure.
  */
-struct subprocess_info *call_usermodehelper_setup(char *path,
-						  char **argv, char **envp)
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+						  char **envp, gfp_t gfp_mask)
 {
 	struct subprocess_info *sub_info;
-	sub_info = kzalloc(sizeof(struct subprocess_info),  GFP_ATOMIC);
+	sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
 	if (!sub_info)
 		goto out;
 
@@ -494,7 +495,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 	struct subprocess_info *sub_info;
 	int ret;
 
-	sub_info = call_usermodehelper_setup(path, argv, envp);
+	sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
 	if (sub_info == NULL)
 		return -ENOMEM;
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 14e97282eb6c..6c2188046048 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1795,7 +1795,7 @@ int orderly_poweroff(bool force)
 		goto out;
 	}
 
-	info = call_usermodehelper_setup(argv[0], argv, envp);
+	info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
 	if (info == NULL) {
 		argv_free(argv);
 		goto out;
-- 
cgit v1.2.3


From 4500d067eeb3d00679335d9cf5c6536e79cd3ef4 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:49 -0700
Subject: init.h: remove obsolete content

Remove apparently obsolete content from init.h referring to gcc 2.9x
and to "no_module_init".

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 21d658cdfa27..42ae95411a93 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -275,13 +275,7 @@ void __init parse_early_param(void);
 
 #define security_initcall(fn)		module_init(fn)
 
-/* These macros create a dummy inline: gcc 2.9x does not count alias
- as usage, hence the `unused function' warning when __init functions
- are declared static. We use the dummy __*_module_inline functions
- both to kill the warning and check the type of the init/cleanup
- function. */
-
-/* Each module must use one module_init(), or one no_module_init */
+/* Each module must use one module_init(). */
 #define module_init(initfn)					\
 	static inline initcall_t __inittest(void)		\
 	{ return initfn; }					\
-- 
cgit v1.2.3


From b6c63937001889af6fe431aaba97e59d04e028e7 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 25 Jul 2008 01:45:52 -0700
Subject: Rename WARN() to WARNING() to clear the namespace

We want to use WARN() as a variant of WARN_ON(), however a few drivers are
using WARN() internally.  This patch renames these to WARNING() to avoid the
namespace clash.  A few cases were defining but not using the thing, for those
cases I just deleted the definition.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Greg KH <greg@kroah.com>
Cc: Karsten Keil <kkeil@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/isdn/hisax/st5481.h       |  4 ++--
 drivers/isdn/hisax/st5481_b.c     |  4 ++--
 drivers/isdn/hisax/st5481_d.c     |  6 +++---
 drivers/isdn/hisax/st5481_usb.c   | 18 +++++++++---------
 drivers/usb/gadget/at91_udc.h     |  2 +-
 drivers/usb/gadget/cdc2.c         |  2 +-
 drivers/usb/gadget/ether.c        |  2 +-
 drivers/usb/gadget/file_storage.c | 14 +++++++-------
 drivers/usb/gadget/fsl_usb2_udc.c |  2 +-
 drivers/usb/gadget/fsl_usb2_udc.h |  2 +-
 drivers/usb/gadget/gmidi.c        |  2 --
 drivers/usb/gadget/goku_udc.c     |  2 +-
 drivers/usb/gadget/goku_udc.h     |  2 +-
 drivers/usb/gadget/inode.c        |  2 --
 drivers/usb/gadget/net2280.c      |  2 +-
 drivers/usb/gadget/net2280.h      |  2 +-
 drivers/usb/gadget/omap_udc.c     |  6 +++---
 drivers/usb/gadget/omap_udc.h     |  2 +-
 drivers/usb/gadget/printer.c      |  2 +-
 drivers/usb/gadget/pxa25x_udc.c   |  6 +++---
 drivers/usb/gadget/pxa25x_udc.h   |  2 +-
 drivers/usb/gadget/u_ether.c      |  3 ---
 drivers/usb/host/isp116x-hcd.c    |  2 +-
 drivers/usb/host/isp116x.h        |  2 +-
 drivers/usb/host/sl811-hcd.c      |  2 +-
 drivers/usb/host/sl811.h          |  2 +-
 drivers/usb/misc/usbtest.c        |  4 ++--
 include/linux/usb/composite.h     |  2 +-
 28 files changed, 48 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h
index 2044e7173ab4..cff7a6354334 100644
--- a/drivers/isdn/hisax/st5481.h
+++ b/drivers/isdn/hisax/st5481.h
@@ -220,7 +220,7 @@ enum {
 #define ERR(format, arg...) \
 printk(KERN_ERR "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
 
-#define WARN(format, arg...) \
+#define WARNING(format, arg...) \
 printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
 
 #define INFO(format, arg...) \
@@ -412,7 +412,7 @@ struct st5481_adapter {
 ({ \
 	int status; \
 	if ((status = usb_submit_urb(urb, mem_flags)) < 0) { \
-		WARN("usb_submit_urb failed,status=%d", status); \
+		WARNING("usb_submit_urb failed,status=%d", status); \
 	} \
         status; \
 })
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index fa64115cd7c7..0074b600a0ef 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -180,7 +180,7 @@ static void usb_b_out_complete(struct urb *urb)
 				DBG(4,"urb killed status %d", urb->status);
 				return; // Give up
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				if (b_out->busy == 0) {
 					st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2 | USB_DIR_OUT, NULL, NULL);
 				}
@@ -372,6 +372,6 @@ void st5481_b_l2l1(struct hisax_if *ifc, int pr, void *arg)
 		B_L1L2(bcs, PH_DEACTIVATE | INDICATION, NULL);
 		break;
 	default:
-		WARN("pr %#x\n", pr);
+		WARNING("pr %#x\n", pr);
 	}
 }
diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c
index b8c4855cc889..077991c1cd05 100644
--- a/drivers/isdn/hisax/st5481_d.c
+++ b/drivers/isdn/hisax/st5481_d.c
@@ -389,7 +389,7 @@ static void usb_d_out_complete(struct urb *urb)
 				DBG(1,"urb killed status %d", urb->status);
 				break;
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				if (d_out->busy == 0) {
 					st5481_usb_pipe_reset(adapter, EP_D_OUT | USB_DIR_OUT, fifo_reseted, adapter);
 				}
@@ -420,7 +420,7 @@ static void dout_start_xmit(struct FsmInst *fsm, int event, void *arg)
 	isdnhdlc_out_init(&d_out->hdlc_state, 1, 0);
 
 	if (test_and_set_bit(buf_nr, &d_out->busy)) {
-		WARN("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
+		WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
 		return;
 	}
 	urb = d_out->urb[buf_nr];
@@ -601,7 +601,7 @@ void st5481_d_l2l1(struct hisax_if *hisax_d_if, int pr, void *arg)
 		FsmEvent(&adapter->d_out.fsm, EV_DOUT_START_XMIT, NULL);
 		break;
 	default:
-		WARN("pr %#x\n", pr);
+		WARNING("pr %#x\n", pr);
 		break;
 	}
 }
diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c
index 427a8b0520f5..ec3c0e507669 100644
--- a/drivers/isdn/hisax/st5481_usb.c
+++ b/drivers/isdn/hisax/st5481_usb.c
@@ -66,7 +66,7 @@ static void usb_ctrl_msg(struct st5481_adapter *adapter,
 	struct ctrl_msg *ctrl_msg;
 	
 	if ((w_index = fifo_add(&ctrl->msg_fifo.f)) < 0) {
-		WARN("control msg FIFO full");
+		WARNING("control msg FIFO full");
 		return;
 	}
 	ctrl_msg = &ctrl->msg_fifo.data[w_index]; 
@@ -139,7 +139,7 @@ static void usb_ctrl_complete(struct urb *urb)
 				DBG(1,"urb killed status %d", urb->status);
 				return; // Give up
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				break;
 		}
 	}
@@ -198,7 +198,7 @@ static void usb_int_complete(struct urb *urb)
 			DBG(2, "urb shutting down with status: %d", urb->status);
 			return;
 		default:
-			WARN("nonzero urb status received: %d", urb->status);
+			WARNING("nonzero urb status received: %d", urb->status);
 			goto exit;
 	}
 
@@ -235,7 +235,7 @@ static void usb_int_complete(struct urb *urb)
 exit:
 	status = usb_submit_urb (urb, GFP_ATOMIC);
 	if (status)
-		WARN("usb_submit_urb failed with result %d", status);
+		WARNING("usb_submit_urb failed with result %d", status);
 }
 
 /* ======================================================================
@@ -257,7 +257,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
 	DBG(2,"");
 	
 	if ((status = usb_reset_configuration (dev)) < 0) {
-		WARN("reset_configuration failed,status=%d",status);
+		WARNING("reset_configuration failed,status=%d",status);
 		return status;
 	}
 
@@ -269,7 +269,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
 
 	// Check if the config is sane
 	if ( altsetting->desc.bNumEndpoints != 7 ) {
-		WARN("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
+		WARNING("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
 		return -EINVAL;
 	}
 
@@ -279,7 +279,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
 
 	// Use alternative setting 3 on interface 0 to have 2B+D
 	if ((status = usb_set_interface (dev, 0, 3)) < 0) {
-		WARN("usb_set_interface failed,status=%d",status);
+		WARNING("usb_set_interface failed,status=%d",status);
 		return status;
 	}
 
@@ -497,7 +497,7 @@ static void usb_in_complete(struct urb *urb)
 				DBG(1,"urb killed status %d", urb->status);
 				return; // Give up
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				break;
 		}
 	}
@@ -523,7 +523,7 @@ static void usb_in_complete(struct urb *urb)
 			DBG(4,"count=%d",status);
 			DBG_PACKET(0x400, in->rcvbuf, status);
 			if (!(skb = dev_alloc_skb(status))) {
-				WARN("receive out of memory\n");
+				WARNING("receive out of memory\n");
 				break;
 			}
 			memcpy(skb_put(skb, status), in->rcvbuf, status);
diff --git a/drivers/usb/gadget/at91_udc.h b/drivers/usb/gadget/at91_udc.h
index a973f2a50fb9..c65d62295890 100644
--- a/drivers/usb/gadget/at91_udc.h
+++ b/drivers/usb/gadget/at91_udc.h
@@ -171,7 +171,7 @@ struct at91_request {
 #endif
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)	pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 #define DBG(stuff...)		pr_debug("udc: " stuff)
 
diff --git a/drivers/usb/gadget/cdc2.c b/drivers/usb/gadget/cdc2.c
index d490d0289507..a39a4b940c33 100644
--- a/drivers/usb/gadget/cdc2.c
+++ b/drivers/usb/gadget/cdc2.c
@@ -170,7 +170,7 @@ static int __init cdc_bind(struct usb_composite_dev *cdev)
 		 * but if the controller isn't recognized at all then
 		 * that assumption is a bit more likely to be wrong.
 		 */
-		WARN(cdev, "controller '%s' not recognized; trying %s\n",
+		WARNING(cdev, "controller '%s' not recognized; trying %s\n",
 				gadget->name,
 				cdc_config_driver.label);
 		device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index d7aaaa29b1e1..bcac2e68660d 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -293,7 +293,7 @@ static int __init eth_bind(struct usb_composite_dev *cdev)
 		 * but if the controller isn't recognized at all then
 		 * that assumption is a bit more likely to be wrong.
 		 */
-		WARN(cdev, "controller '%s' not recognized; trying %s\n",
+		WARNING(cdev, "controller '%s' not recognized; trying %s\n",
 				gadget->name,
 				eth_config_driver.label);
 		device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 15c24edbb61a..ea2c31d18080 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -308,7 +308,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 	dev_vdbg(&(d)->gadget->dev , fmt , ## args)
 #define ERROR(d, fmt, args...) \
 	dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
 	dev_warn(&(d)->gadget->dev , fmt , ## args)
 #define INFO(d, fmt, args...) \
 	dev_info(&(d)->gadget->dev , fmt , ## args)
@@ -1091,7 +1091,7 @@ static int ep0_queue(struct fsg_dev *fsg)
 	if (rc != 0 && rc != -ESHUTDOWN) {
 
 		/* We can't do much more than wait for a reset */
-		WARN(fsg, "error in submission: %s --> %d\n",
+		WARNING(fsg, "error in submission: %s --> %d\n",
 				fsg->ep0->name, rc);
 	}
 	return rc;
@@ -1227,7 +1227,7 @@ static void received_cbi_adsc(struct fsg_dev *fsg, struct fsg_buffhd *bh)
 
 	/* Save the command for later */
 	if (fsg->cbbuf_cmnd_size)
-		WARN(fsg, "CB[I] overwriting previous command\n");
+		WARNING(fsg, "CB[I] overwriting previous command\n");
 	fsg->cbbuf_cmnd_size = req->actual;
 	memcpy(fsg->cbbuf_cmnd, req->buf, fsg->cbbuf_cmnd_size);
 
@@ -1506,7 +1506,7 @@ static void start_transfer(struct fsg_dev *fsg, struct usb_ep *ep,
 		 * submissions if DMA is enabled. */
 		if (rc != -ESHUTDOWN && !(rc == -EOPNOTSUPP &&
 						req->length == 0))
-			WARN(fsg, "error in submission: %s --> %d\n",
+			WARNING(fsg, "error in submission: %s --> %d\n",
 					ep->name, rc);
 	}
 }
@@ -2294,7 +2294,7 @@ static int halt_bulk_in_endpoint(struct fsg_dev *fsg)
 		VDBG(fsg, "delayed bulk-in endpoint halt\n");
 	while (rc != 0) {
 		if (rc != -EAGAIN) {
-			WARN(fsg, "usb_ep_set_halt -> %d\n", rc);
+			WARNING(fsg, "usb_ep_set_halt -> %d\n", rc);
 			rc = 0;
 			break;
 		}
@@ -2317,7 +2317,7 @@ static int wedge_bulk_in_endpoint(struct fsg_dev *fsg)
 		VDBG(fsg, "delayed bulk-in endpoint wedge\n");
 	while (rc != 0) {
 		if (rc != -EAGAIN) {
-			WARN(fsg, "usb_ep_set_wedge -> %d\n", rc);
+			WARNING(fsg, "usb_ep_set_wedge -> %d\n", rc);
 			rc = 0;
 			break;
 		}
@@ -3755,7 +3755,7 @@ static int __init check_parameters(struct fsg_dev *fsg)
 		if (gcnum >= 0)
 			mod_data.release = 0x0300 + gcnum;
 		else {
-			WARN(fsg, "controller '%s' not recognized\n",
+			WARNING(fsg, "controller '%s' not recognized\n",
 				fsg->gadget->name);
 			mod_data.release = 0x0399;
 		}
diff --git a/drivers/usb/gadget/fsl_usb2_udc.c b/drivers/usb/gadget/fsl_usb2_udc.c
index 1695382f30fe..1cfccf102a2d 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.c
+++ b/drivers/usb/gadget/fsl_usb2_udc.c
@@ -1538,7 +1538,7 @@ static void dtd_complete_irq(struct fsl_udc *udc)
 
 		/* If the ep is configured */
 		if (curr_ep->name == NULL) {
-			WARN("Invalid EP?");
+			WARNING("Invalid EP?");
 			continue;
 		}
 
diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h
index 98b1483ef6a5..6131752a38bc 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/fsl_usb2_udc.h
@@ -552,7 +552,7 @@ static void dump_msg(const char *label, const u8 * buf, unsigned int length)
 #endif
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)		pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index 7f4d4828e3aa..ea8651e3da1a 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -138,8 +138,6 @@ static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req);
 	dev_vdbg(&(d)->gadget->dev , fmt , ## args)
 #define ERROR(d, fmt, args...) \
 	dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
-	dev_warn(&(d)->gadget->dev , fmt , ## args)
 #define INFO(d, fmt, args...) \
 	dev_info(&(d)->gadget->dev , fmt , ## args)
 
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index 48f1c63b7013..60aa04847b18 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1768,7 +1768,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 * usb_gadget_driver_{register,unregister}() must change.
 	 */
 	if (the_controller) {
-		WARN(dev, "ignoring %s\n", pci_name(pdev));
+		WARNING(dev, "ignoring %s\n", pci_name(pdev));
 		return -EBUSY;
 	}
 	if (!pdev->irq) {
diff --git a/drivers/usb/gadget/goku_udc.h b/drivers/usb/gadget/goku_udc.h
index bc4eb1e0b507..566cb2319056 100644
--- a/drivers/usb/gadget/goku_udc.h
+++ b/drivers/usb/gadget/goku_udc.h
@@ -285,7 +285,7 @@ struct goku_udc {
 
 #define ERROR(dev,fmt,args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
 	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev,fmt,args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 04692d59fc1c..f4585d3e90d7 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -262,8 +262,6 @@ static const char *CHIP;
 
 #define ERROR(dev,fmt,args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
-	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev,fmt,args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
 
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index b67ab677af72..5cfb5ebf3881 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1007,7 +1007,7 @@ static void scan_dma_completions (struct net2280_ep *ep)
 			 * 0122, and 0124; not all cases trigger the warning.
 			 */
 			if ((tmp & (1 << NAK_OUT_PACKETS)) == 0) {
-				WARN (ep->dev, "%s lost packet sync!\n",
+				WARNING (ep->dev, "%s lost packet sync!\n",
 						ep->ep.name);
 				req->req.status = -EOVERFLOW;
 			} else if ((tmp = readl (&ep->regs->ep_avail)) != 0) {
diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h
index 1f2af398a9a4..81a71dbdc2c6 100644
--- a/drivers/usb/gadget/net2280.h
+++ b/drivers/usb/gadget/net2280.h
@@ -272,7 +272,7 @@ static inline void net2280_led_shutdown (struct net2280 *dev)
 
 #define ERROR(dev,fmt,args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
 	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev,fmt,args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 4b79a8509e84..395bd1844482 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -1120,7 +1120,7 @@ static int omap_ep_set_halt(struct usb_ep *_ep, int value)
 			status = -EINVAL;
 		else if (value) {
 			if (ep->udc->ep0_set_config) {
-				WARN("error changing config?\n");
+				WARNING("error changing config?\n");
 				omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
 			}
 			omap_writew(UDC_STALL_CMD, UDC_SYSCON2);
@@ -1764,7 +1764,7 @@ do_stall:
 					u.r.bRequestType, u.r.bRequest, status);
 			if (udc->ep0_set_config) {
 				if (udc->ep0_reset_config)
-					WARN("error resetting config?\n");
+					WARNING("error resetting config?\n");
 				else
 					omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
 			}
@@ -3076,7 +3076,7 @@ static int omap_udc_suspend(struct platform_device *dev, pm_message_t message)
 	 * which would prevent entry to deep sleep...
 	 */
 	if ((devstat & UDC_ATT) != 0 && (devstat & UDC_SUS) == 0) {
-		WARN("session active; suspend requires disconnect\n");
+		WARNING("session active; suspend requires disconnect\n");
 		omap_pullup(&udc->gadget, 0);
 	}
 
diff --git a/drivers/usb/gadget/omap_udc.h b/drivers/usb/gadget/omap_udc.h
index 8522bbb12278..29edc51b6b22 100644
--- a/drivers/usb/gadget/omap_udc.h
+++ b/drivers/usb/gadget/omap_udc.h
@@ -188,7 +188,7 @@ struct omap_udc {
 #endif
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)	pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 #define DBG(stuff...)		pr_debug("udc: " stuff)
 
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 49cd9e145a9b..e0090085b78e 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -179,7 +179,7 @@ module_param(qlen, uint, S_IRUGO|S_IWUSR);
 
 #define ERROR(dev, fmt, args...) \
 	xprintk(dev, KERN_ERR, fmt, ## args)
-#define WARN(dev, fmt, args...) \
+#define WARNING(dev, fmt, args...) \
 	xprintk(dev, KERN_WARNING, fmt, ## args)
 #define INFO(dev, fmt, args...) \
 	xprintk(dev, KERN_INFO, fmt, ## args)
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index 8fb0066609bb..7e6725d89976 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -342,7 +342,7 @@ pxa25x_ep_free_request (struct usb_ep *_ep, struct usb_request *_req)
 	struct pxa25x_request	*req;
 
 	req = container_of (_req, struct pxa25x_request, req);
-	WARN_ON (!list_empty (&req->queue));
+	WARN_ON(!list_empty (&req->queue));
 	kfree(req);
 }
 
@@ -1556,7 +1556,7 @@ config_change:
 					 * tell us about config change events,
 					 * so later ones may fail...
 					 */
-					WARN("config change %02x fail %d?\n",
+					WARNING("config change %02x fail %d?\n",
 						u.r.bRequest, i);
 					return;
 					/* TODO experiment:  if has_cfr,
@@ -2330,7 +2330,7 @@ static int pxa25x_udc_suspend(struct platform_device *dev, pm_message_t state)
 	unsigned long flags;
 
 	if (!udc->mach->gpio_pullup && !udc->mach->udc_command)
-		WARN("USB host won't detect disconnect!\n");
+		WARNING("USB host won't detect disconnect!\n");
 	udc->suspended = 1;
 
 	local_irq_save(flags);
diff --git a/drivers/usb/gadget/pxa25x_udc.h b/drivers/usb/gadget/pxa25x_udc.h
index 4d11ece7c95f..c8a13215e02c 100644
--- a/drivers/usb/gadget/pxa25x_udc.h
+++ b/drivers/usb/gadget/pxa25x_udc.h
@@ -259,7 +259,7 @@ dump_state(struct pxa25x_udc *dev)
 #define DBG(lvl, stuff...) do{if ((lvl) <= UDC_DEBUG) DMSG(stuff);}while(0)
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)	pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 
 
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 5458f43a8668..3791e6271903 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -116,7 +116,6 @@ static inline int qlen(struct usb_gadget *gadget)
 #undef DBG
 #undef VDBG
 #undef ERROR
-#undef WARN
 #undef INFO
 
 #define xprintk(d, level, fmt, args...) \
@@ -140,8 +139,6 @@ static inline int qlen(struct usb_gadget *gadget)
 
 #define ERROR(dev, fmt, args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev, fmt, args...) \
-	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev, fmt, args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
 
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index 31178e10cbbe..ce1ca0ba0515 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -882,7 +882,7 @@ static void isp116x_endpoint_disable(struct usb_hcd *hcd,
 	for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++)
 		msleep(3);
 	if (!list_empty(&hep->urb_list))
-		WARN("ep %p not empty?\n", ep);
+		WARNING("ep %p not empty?\n", ep);
 
 	kfree(ep);
 	hep->hcpriv = NULL;
diff --git a/drivers/usb/host/isp116x.h b/drivers/usb/host/isp116x.h
index 595b90a99848..aa211bafcff9 100644
--- a/drivers/usb/host/isp116x.h
+++ b/drivers/usb/host/isp116x.h
@@ -338,7 +338,7 @@ struct isp116x_ep {
 #endif
 
 #define ERR(stuff...)		printk(KERN_ERR "116x: " stuff)
-#define WARN(stuff...)		printk(KERN_WARNING "116x: " stuff)
+#define WARNING(stuff...)	printk(KERN_WARNING "116x: " stuff)
 #define INFO(stuff...)		printk(KERN_INFO "116x: " stuff)
 
 /* ------------------------------------------------- */
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 340d72da554a..8a74bbb57d08 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -1026,7 +1026,7 @@ sl811h_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep)
 	if (!list_empty(&hep->urb_list))
 		msleep(3);
 	if (!list_empty(&hep->urb_list))
-		WARN("ep %p not empty?\n", ep);
+		WARNING("ep %p not empty?\n", ep);
 
 	kfree(ep);
 	hep->hcpriv = NULL;
diff --git a/drivers/usb/host/sl811.h b/drivers/usb/host/sl811.h
index 7690d98e42a7..b6b8c1f233dd 100644
--- a/drivers/usb/host/sl811.h
+++ b/drivers/usb/host/sl811.h
@@ -261,6 +261,6 @@ sl811_read_buf(struct sl811 *sl811, int addr, void *buf, size_t count)
 #endif
 
 #define ERR(stuff...)		printk(KERN_ERR "sl811: " stuff)
-#define WARN(stuff...)		printk(KERN_WARNING "sl811: " stuff)
+#define WARNING(stuff...)	printk(KERN_WARNING "sl811: " stuff)
 #define INFO(stuff...)		printk(KERN_INFO "sl811: " stuff)
 
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 054dedd28127..b358c4e1cf21 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -81,7 +81,7 @@ static struct usb_device *testdev_to_usbdev (struct usbtest_dev *test)
 
 #define ERROR(tdev, fmt, args...) \
 	dev_err(&(tdev)->intf->dev , fmt , ## args)
-#define WARN(tdev, fmt, args...) \
+#define WARNING(tdev, fmt, args...) \
 	dev_warn(&(tdev)->intf->dev , fmt , ## args)
 
 /*-------------------------------------------------------------------------*/
@@ -1946,7 +1946,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id)
 
 			status = get_endpoints (dev, intf);
 			if (status < 0) {
-				WARN(dev, "couldn't get endpoints, %d\n",
+				WARNING(dev, "couldn't get endpoints, %d\n",
 						status);
 				return status;
 			}
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 747c3a49cdc9..c932390c6da0 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -330,7 +330,7 @@ extern int usb_string_id(struct usb_composite_dev *c);
 	dev_vdbg(&(d)->gadget->dev , fmt , ## args)
 #define ERROR(d, fmt, args...) \
 	dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
 	dev_warn(&(d)->gadget->dev , fmt , ## args)
 #define INFO(d, fmt, args...) \
 	dev_info(&(d)->gadget->dev , fmt , ## args)
-- 
cgit v1.2.3


From 2711b793eb62a5873a0ba583a69252040aef176e Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 25 Jul 2008 01:45:56 -0700
Subject: kallsyms: unify 32- and 64-bit code

Use the %p format string which already accounts for the padding you need
with a pointer type on a particular architecture.

Also replace the macro with a static inline function to match the rest of
the file.

Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kallsyms.h | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 00c1801099fa..57aefa160a92 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -6,6 +6,7 @@
 #define _LINUX_KALLSYMS_H
 
 #include <linux/errno.h>
+#include <linux/kernel.h>
 #include <linux/stddef.h>
 
 #define KSYM_NAME_LEN 128
@@ -105,18 +106,10 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr)
 	print_symbol(fmt, (unsigned long)addr);
 }
 
-#ifndef CONFIG_64BIT
-#define print_ip_sym(ip)		\
-do {					\
-	printk("[<%08lx>]", ip);	\
-	print_symbol(" %s\n", ip);	\
-} while(0)
-#else
-#define print_ip_sym(ip)		\
-do {					\
-	printk("[<%016lx>]", ip);	\
-	print_symbol(" %s\n", ip);	\
-} while(0)
-#endif
+static inline void print_ip_sym(unsigned long ip)
+{
+	printk("[<%p>]", (void *) ip);
+	print_symbol(" %s\n", ip);
+}
 
 #endif /*_LINUX_KALLSYMS_H*/
-- 
cgit v1.2.3


From 717115e1a5856b57af0f71e1df7149108294fc10 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Fri, 25 Jul 2008 01:45:58 -0700
Subject: printk ratelimiting rewrite

All ratelimit user use same jiffies and burst params, so some messages
(callbacks) will be lost.

For example:
a call printk_ratelimit(5 * HZ, 1)
b call printk_ratelimit(5 * HZ, 1) before the 5*HZ timeout of a, then b will
will be supressed.

- rewrite __ratelimit, and use a ratelimit_state as parameter.  Thanks for
  hints from andrew.

- Add WARN_ON_RATELIMIT, update rcupreempt.h

- remove __printk_ratelimit

- use __ratelimit in net_ratelimit

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h  |  3 +++
 include/linux/kernel.h     |  8 ++-----
 include/linux/net.h        |  3 +--
 include/linux/ratelimit.h  | 27 +++++++++++++++++++++++
 include/linux/rcupreempt.h |  9 ++++++--
 kernel/printk.c            | 17 +++-----------
 kernel/sysctl.c            |  4 ++--
 lib/ratelimit.c            | 55 +++++++++++++++++++++++++---------------------
 net/core/sysctl_net_core.c |  4 ++--
 net/core/utils.c           |  5 ++---
 10 files changed, 79 insertions(+), 56 deletions(-)
 create mode 100644 include/linux/ratelimit.h

(limited to 'include/linux')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index a346e744e770..a3f738cffdb6 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -97,6 +97,9 @@ extern void warn_slowpath(const char *file, const int line,
 	unlikely(__ret_warn_once);				\
 })
 
+#define WARN_ON_RATELIMIT(condition, state)			\
+		WARN_ON((condition) && __ratelimit(state))
+
 #ifdef CONFIG_SMP
 # define WARN_ON_SMP(x)			WARN_ON(x)
 #else
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5c4b1251e110..fdbbf72ca2eb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -15,6 +15,7 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/typecheck.h>
+#include <linux/ratelimit.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -189,11 +190,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2))) __cold;
 
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
+extern struct ratelimit_state printk_ratelimit_state;
 extern int printk_ratelimit(void);
-extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst);
-extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
 #else
@@ -204,8 +202,6 @@ static inline int printk(const char *s, ...)
 	__attribute__ ((format (printf, 1, 2)));
 static inline int __cold printk(const char *s, ...) { return 0; }
 static inline int printk_ratelimit(void) { return 0; }
-static inline int __printk_ratelimit(int ratelimit_jiffies, \
-				     int ratelimit_burst) { return 0; }
 static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
 					  unsigned int interval_msec)	\
 		{ return false; }
diff --git a/include/linux/net.h b/include/linux/net.h
index 2f999fbb188d..4a9a30f2d68f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -351,8 +351,7 @@ static const struct proto_ops name##_ops = {			\
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
-extern int net_msg_cost;
-extern int net_msg_burst;
+extern struct ratelimit_state net_ratelimit_state;
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
new file mode 100644
index 000000000000..18a5b9ba9d40
--- /dev/null
+++ b/include/linux/ratelimit.h
@@ -0,0 +1,27 @@
+#ifndef _LINUX_RATELIMIT_H
+#define _LINUX_RATELIMIT_H
+#include <linux/param.h>
+
+#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST 10
+
+struct ratelimit_state {
+	int interval;
+	int burst;
+	int printed;
+	int missed;
+	unsigned long begin;
+};
+
+#define DEFINE_RATELIMIT_STATE(name, interval, burst)		\
+		struct ratelimit_state name = {interval, burst,}
+
+extern int __ratelimit(struct ratelimit_state *rs);
+
+static inline int ratelimit(void)
+{
+	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+					DEFAULT_RATELIMIT_BURST);
+	return __ratelimit(&rs);
+}
+#endif
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index f04b64eca636..0967f03b0705 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -115,16 +115,21 @@ DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
 
 static inline void rcu_enter_nohz(void)
 {
+	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
 	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
 	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1);
+	WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
 }
 
 static inline void rcu_exit_nohz(void)
 {
+	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
 	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
 	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1));
+	WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
+				&rs);
 }
 
 #else /* CONFIG_NO_HZ */
diff --git a/kernel/printk.c b/kernel/printk.c
index 3f7a2a94583b..a7f7559c5f6c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1308,6 +1308,8 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 }
 
 #if defined CONFIG_PRINTK
+
+DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
 /*
  * printk rate limiting, lifted from the networking subsystem.
  *
@@ -1315,22 +1317,9 @@ void tty_write_message(struct tty_struct *tty, char *msg)
  * every printk_ratelimit_jiffies to make a denial-of-service
  * attack impossible.
  */
-int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
-{
-	return __ratelimit(ratelimit_jiffies, ratelimit_burst);
-}
-EXPORT_SYMBOL(__printk_ratelimit);
-
-/* minimum time in jiffies between messages */
-int printk_ratelimit_jiffies = 5 * HZ;
-
-/* number of messages we send before ratelimiting */
-int printk_ratelimit_burst = 10;
-
 int printk_ratelimit(void)
 {
-	return __printk_ratelimit(printk_ratelimit_jiffies,
-				printk_ratelimit_burst);
+	return __ratelimit(&printk_ratelimit_state);
 }
 EXPORT_SYMBOL(printk_ratelimit);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a8299d1fe59..35a50db9b6ce 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -624,7 +624,7 @@ static struct ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_PRINTK_RATELIMIT,
 		.procname	= "printk_ratelimit",
-		.data		= &printk_ratelimit_jiffies,
+		.data		= &printk_ratelimit_state.interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -633,7 +633,7 @@ static struct ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
 		.procname	= "printk_ratelimit_burst",
-		.data		= &printk_ratelimit_burst,
+		.data		= &printk_ratelimit_state.burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 485e3040dcd4..35136671b215 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -3,6 +3,9 @@
  *
  * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
  *
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ *
  * This file is released under the GPLv2.
  *
  */
@@ -11,41 +14,43 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
+static DEFINE_SPINLOCK(ratelimit_lock);
+static unsigned long flags;
+
 /*
  * __ratelimit - rate limiting
- * @ratelimit_jiffies: minimum time in jiffies between two callbacks
- * @ratelimit_burst: number of callbacks we do before ratelimiting
+ * @rs: ratelimit_state data
  *
- * This enforces a rate limit: not more than @ratelimit_burst callbacks
- * in every ratelimit_jiffies
+ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
+ * in every @rs->ratelimit_jiffies
  */
-int __ratelimit(int ratelimit_jiffies, int ratelimit_burst)
+int __ratelimit(struct ratelimit_state *rs)
 {
-	static DEFINE_SPINLOCK(ratelimit_lock);
-	static unsigned toks = 10 * 5 * HZ;
-	static unsigned long last_msg;
-	static int missed;
-	unsigned long flags;
-	unsigned long now = jiffies;
+	if (!rs->interval)
+		return 1;
 
 	spin_lock_irqsave(&ratelimit_lock, flags);
-	toks += now - last_msg;
-	last_msg = now;
-	if (toks > (ratelimit_burst * ratelimit_jiffies))
-		toks = ratelimit_burst * ratelimit_jiffies;
-	if (toks >= ratelimit_jiffies) {
-		int lost = missed;
+	if (!rs->begin)
+		rs->begin = jiffies;
 
-		missed = 0;
-		toks -= ratelimit_jiffies;
-		spin_unlock_irqrestore(&ratelimit_lock, flags);
-		if (lost)
-			printk(KERN_WARNING "%s: %d messages suppressed\n",
-				__func__, lost);
-		return 1;
+	if (time_is_before_jiffies(rs->begin + rs->interval)) {
+		if (rs->missed)
+			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
+				__func__, rs->missed);
+		rs->begin = 0;
+		rs->printed = 0;
+		rs->missed = 0;
 	}
-	missed++;
+	if (rs->burst && rs->burst > rs->printed)
+		goto print;
+
+	rs->missed++;
 	spin_unlock_irqrestore(&ratelimit_lock, flags);
 	return 0;
+
+print:
+	rs->printed++;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
 }
 EXPORT_SYMBOL(__ratelimit);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a570e2af22cb..f686467ff12b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = {
 	{
 		.ctl_name	= NET_CORE_MSG_COST,
 		.procname	= "message_cost",
-		.data		= &net_msg_cost,
+		.data		= &net_ratelimit_state.interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = {
 	{
 		.ctl_name	= NET_CORE_MSG_BURST,
 		.procname	= "message_burst",
-		.data		= &net_msg_burst,
+		.data		= &net_ratelimit_state.burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/net/core/utils.c b/net/core/utils.c
index 8031eb59054e..72e0ebe964a0 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,17 +31,16 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-int net_msg_cost __read_mostly = 5*HZ;
-int net_msg_burst __read_mostly = 10;
 int net_msg_warn __read_mostly = 1;
 EXPORT_SYMBOL(net_msg_warn);
 
+DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
 /*
  * All net warning printk()s should be guarded by this function.
  */
 int net_ratelimit(void)
 {
-	return __printk_ratelimit(net_msg_cost, net_msg_burst);
+	return __ratelimit(&net_ratelimit_state);
 }
 EXPORT_SYMBOL(net_ratelimit);
 
-- 
cgit v1.2.3


From 472dba7d117844c746be97db6be26c2810d79b62 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:45:58 -0700
Subject: sm501: add power control callback

Add callback to get or set the power control if the device has the sleep
connected to some form of GPIO.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <apatard@mandriva.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c   | 31 +++++++++++++++++++++++++++++++
 include/linux/sm501.h |  7 +++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index e2530df4d85c..9296b2673b52 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -1138,8 +1138,31 @@ static int sm501_plat_probe(struct platform_device *dev)
 }
 
 #ifdef CONFIG_PM
+
 /* power management support */
 
+static void sm501_set_power(struct sm501_devdata *sm, int on)
+{
+	struct sm501_platdata *pd = sm->platdata;
+
+	if (pd == NULL)
+		return;
+
+	if (pd->get_power) {
+		if (pd->get_power(sm->dev) == on) {
+			dev_dbg(sm->dev, "is already %d\n", on);
+			return;
+		}
+	}
+
+	if (pd->set_power) {
+		dev_dbg(sm->dev, "setting power to %d\n", on);
+
+		pd->set_power(sm->dev, on);
+		sm501_mdelay(sm, 10);
+	}
+}
+
 static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct sm501_devdata *sm = platform_get_drvdata(pdev);
@@ -1148,6 +1171,12 @@ static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
 	sm->pm_misc = readl(sm->regs + SM501_MISC_CONTROL);
 
 	sm501_dump_regs(sm);
+
+	if (sm->platdata) {
+		if (sm->platdata->flags & SM501_FLAG_SUSPEND_OFF)
+			sm501_set_power(sm, 0);
+	}
+
 	return 0;
 }
 
@@ -1155,6 +1184,8 @@ static int sm501_plat_resume(struct platform_device *pdev)
 {
 	struct sm501_devdata *sm = platform_get_drvdata(pdev);
 
+	sm501_set_power(sm, 1);
+
 	sm501_dump_regs(sm);
 	sm501_dump_gate(sm);
 	sm501_dump_clk(sm);
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index b530fa6a1d34..145405bf9efa 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -157,6 +157,8 @@ struct sm501_init_gpio {
 	struct sm501_reg_init	gpio_ddr_high;
 };
 
+#define SM501_FLAG_SUSPEND_OFF		(1<<4)
+
 /* sm501_platdata
  *
  * This is passed with the platform device to allow the board
@@ -170,6 +172,11 @@ struct sm501_platdata {
 	struct sm501_init_gpio		*init_gpiop;
 	struct sm501_platdata_fb	*fb;
 
+	int				 flags;
+
+	int	(*get_power)(struct device *dev);
+	int	(*set_power)(struct device *dev, unsigned int on);
+
 	struct sm501_platdata_gpio_i2c	*gpio_i2c;
 	unsigned int			 gpio_i2c_nr;
 };
-- 
cgit v1.2.3


From f61be273d3699d174bc1438e6804f9f9e52bb932 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:45:59 -0700
Subject: sm501: add gpiolib support

Add support for exporting the GPIOs on the SM501 via gpiolib.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <apatard@mandriva.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/Kconfig   |   8 ++
 drivers/mfd/sm501.c   | 299 +++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sm501.h |  20 +---
 3 files changed, 257 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9f93c29fed35..bac9e973ece0 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -19,6 +19,14 @@ config MFD_SM501
 	  interface. The device may be connected by PCI or local bus with
 	  varying functions enabled.
 
+config MFD_SM501_GPIO
+	bool "Export GPIO via GPIO layer"
+	depends on MFD_SM501 && HAVE_GPIO_LIB
+	 ---help---
+	 This option uses the gpio library layer to export the 64 GPIO
+	 lines on the SM501. The platform data is used to supply the
+	 base number for the first GPIO line to register.
+
 config MFD_ASIC3
 	bool "Support for Compaq ASIC3"
 	depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 9296b2673b52..be8713908125 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -19,6 +19,7 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/pci.h>
+#include <linux/gpio.h>
 
 #include <linux/sm501.h>
 #include <linux/sm501-regs.h>
@@ -31,10 +32,29 @@ struct sm501_device {
 	struct platform_device		pdev;
 };
 
+struct sm501_gpio;
+
+struct sm501_gpio_chip {
+	struct gpio_chip	gpio;
+	struct sm501_gpio	*ourgpio;	/* to get back to parent. */
+	void __iomem		*regbase;
+};
+
+struct sm501_gpio {
+	struct sm501_gpio_chip	low;
+	struct sm501_gpio_chip	high;
+	spinlock_t		lock;
+
+	unsigned int		 registered : 1;
+	void __iomem		*regs;
+	struct resource		*regs_res;
+};
+
 struct sm501_devdata {
 	spinlock_t			 reg_lock;
 	struct mutex			 clock_lock;
 	struct list_head		 devices;
+	struct sm501_gpio		 gpio;
 
 	struct device			*dev;
 	struct resource			*io_res;
@@ -42,6 +62,7 @@ struct sm501_devdata {
 	struct resource			*regs_claim;
 	struct sm501_platdata		*platdata;
 
+
 	unsigned int			 in_suspend;
 	unsigned long			 pm_misc;
 
@@ -52,6 +73,7 @@ struct sm501_devdata {
 	unsigned int			 rev;
 };
 
+
 #define MHZ (1000 * 1000)
 
 #ifdef DEBUG
@@ -276,58 +298,6 @@ unsigned long sm501_modify_reg(struct device *dev,
 
 EXPORT_SYMBOL_GPL(sm501_modify_reg);
 
-unsigned long sm501_gpio_get(struct device *dev,
-			     unsigned long gpio)
-{
-	struct sm501_devdata *sm = dev_get_drvdata(dev);
-	unsigned long result;
-	unsigned long reg;
-
-	reg = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
-	result = readl(sm->regs + reg);
-
-	result >>= (gpio & 31);
-	return result & 1UL;
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_get);
-
-void sm501_gpio_set(struct device *dev,
-		    unsigned long gpio,
-		    unsigned int to,
-		    unsigned int dir)
-{
-	struct sm501_devdata *sm = dev_get_drvdata(dev);
-
-	unsigned long bit = 1 << (gpio & 31);
-	unsigned long base;
-	unsigned long save;
-	unsigned long val;
-
-	base = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
-	base += SM501_GPIO;
-
-	spin_lock_irqsave(&sm->reg_lock, save);
-
-	val = readl(sm->regs + base) & ~bit;
-	if (to)
-		val |= bit;
-	writel(val, sm->regs + base);
-
-	val = readl(sm->regs + SM501_GPIO_DDR_LOW) & ~bit;
-	if (dir)
-		val |= bit;
-
-	writel(val, sm->regs + SM501_GPIO_DDR_LOW);
-	sm501_sync_regs(sm);
-
-	spin_unlock_irqrestore(&sm->reg_lock, save);
-
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_set);
-
-
 /* sm501_unit_power
  *
  * alters the power active gate to set specific units on or off
@@ -906,6 +876,226 @@ static int sm501_register_display(struct sm501_devdata *sm,
 	return sm501_register_device(sm, pdev);
 }
 
+#ifdef CONFIG_MFD_SM501_GPIO
+
+static inline struct sm501_gpio_chip *to_sm501_gpio(struct gpio_chip *gc)
+{
+	return container_of(gc, struct sm501_gpio_chip, gpio);
+}
+
+static inline struct sm501_devdata *sm501_gpio_to_dev(struct sm501_gpio *gpio)
+{
+	return container_of(gpio, struct sm501_devdata, gpio);
+}
+
+static int sm501_gpio_get(struct gpio_chip *chip, unsigned offset)
+
+{
+	struct sm501_gpio_chip *smgpio = to_sm501_gpio(chip);
+	unsigned long result;
+
+	result = readl(smgpio->regbase + SM501_GPIO_DATA_LOW);
+	result >>= offset;
+
+	return result & 1UL;
+}
+
+static void sm501_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+
+{
+	struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+	struct sm501_gpio *smgpio = smchip->ourgpio;
+	unsigned long bit = 1 << offset;
+	void __iomem *regs = smchip->regbase;
+	unsigned long save;
+	unsigned long val;
+
+	dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+		__func__, chip, offset);
+
+	spin_lock_irqsave(&smgpio->lock, save);
+
+	val = readl(regs + SM501_GPIO_DATA_LOW) & ~bit;
+	if (value)
+		val |= bit;
+	writel(val, regs);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	spin_unlock_irqrestore(&smgpio->lock, save);
+}
+
+static int sm501_gpio_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+	struct sm501_gpio *smgpio = smchip->ourgpio;
+	void __iomem *regs = smchip->regbase;
+	unsigned long bit = 1 << offset;
+	unsigned long save;
+	unsigned long ddr;
+
+	dev_info(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+		 __func__, chip, offset);
+
+	spin_lock_irqsave(&smgpio->lock, save);
+
+	ddr = readl(regs + SM501_GPIO_DDR_LOW);
+	writel(ddr & ~bit, regs + SM501_GPIO_DDR_LOW);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	spin_unlock_irqrestore(&smgpio->lock, save);
+
+	return 0;
+}
+
+static int sm501_gpio_output(struct gpio_chip *chip,
+			     unsigned offset, int value)
+{
+	struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+	struct sm501_gpio *smgpio = smchip->ourgpio;
+	unsigned long bit = 1 << offset;
+	void __iomem *regs = smchip->regbase;
+	unsigned long save;
+	unsigned long val;
+	unsigned long ddr;
+
+	dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d,%d)\n",
+		__func__, chip, offset, value);
+
+	spin_lock_irqsave(&smgpio->lock, save);
+
+	val = readl(regs + SM501_GPIO_DATA_LOW);
+	if (value)
+		val |= bit;
+	else
+		val &= ~bit;
+	writel(val, regs);
+
+	ddr = readl(regs + SM501_GPIO_DDR_LOW);
+	writel(ddr | bit, regs + SM501_GPIO_DDR_LOW);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	writel(val, regs + SM501_GPIO_DATA_LOW);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	spin_unlock_irqrestore(&smgpio->lock, save);
+
+	return 0;
+}
+
+static struct gpio_chip gpio_chip_template = {
+	.ngpio			= 32,
+	.direction_input	= sm501_gpio_input,
+	.direction_output	= sm501_gpio_output,
+	.set			= sm501_gpio_set,
+	.get			= sm501_gpio_get,
+};
+
+static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
+					      struct sm501_gpio *gpio,
+					      struct sm501_gpio_chip *chip)
+{
+	struct sm501_platdata *pdata = sm->platdata;
+	struct gpio_chip *gchip = &chip->gpio;
+	unsigned base = pdata->gpio_base;
+
+	memcpy(chip, &gpio_chip_template, sizeof(struct gpio_chip));
+
+	if (chip == &gpio->high) {
+		base += 32;
+		chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
+		gchip->label  = "SM501-HIGH";
+	} else {
+		chip->regbase = gpio->regs + SM501_GPIO_DATA_LOW;
+		gchip->label  = "SM501-LOW";
+	}
+
+	gchip->base   = base;
+	chip->ourgpio = gpio;
+
+	return gpiochip_add(gchip);
+}
+
+static int sm501_register_gpio(struct sm501_devdata *sm)
+{
+	struct sm501_gpio *gpio = &sm->gpio;
+	resource_size_t iobase = sm->io_res->start + SM501_GPIO;
+	int ret;
+	int tmp;
+
+	dev_dbg(sm->dev, "registering gpio block %08llx\n",
+		(unsigned long long)iobase);
+
+	spin_lock_init(&gpio->lock);
+
+	gpio->regs_res = request_mem_region(iobase, 0x20, "sm501-gpio");
+	if (gpio->regs_res == NULL) {
+		dev_err(sm->dev, "gpio: failed to request region\n");
+		return -ENXIO;
+	}
+
+	gpio->regs = ioremap(iobase, 0x20);
+	if (gpio->regs == NULL) {
+		dev_err(sm->dev, "gpio: failed to remap registers\n");
+		ret = -ENXIO;
+		goto err_mapped;
+	}
+
+	/* Register both our chips. */
+
+	ret = sm501_gpio_register_chip(sm, gpio, &gpio->low);
+	if (ret) {
+		dev_err(sm->dev, "failed to add low chip\n");
+		goto err_mapped;
+	}
+
+	ret = sm501_gpio_register_chip(sm, gpio, &gpio->high);
+	if (ret) {
+		dev_err(sm->dev, "failed to add high chip\n");
+		goto err_low_chip;
+	}
+
+	gpio->registered = 1;
+
+	return 0;
+
+ err_low_chip:
+	tmp = gpiochip_remove(&gpio->low.gpio);
+	if (tmp) {
+		dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+		return ret;
+	}
+
+ err_mapped:
+	release_resource(gpio->regs_res);
+	kfree(gpio->regs_res);
+
+	return ret;
+}
+
+static void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+	int ret;
+
+	ret = gpiochip_remove(&sm->gpio.low.gpio);
+	if (ret)
+		dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+
+	ret = gpiochip_remove(&sm->gpio.high.gpio);
+	if (ret)
+		dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
+}
+
+#else
+static int sm501_register_gpio(struct sm501_devdata *sm)
+{
+	return 0;
+}
+
+static void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+}
+#endif
+
 /* sm501_dbg_regs
  *
  * Debug attribute to attach to parent device to show core registers
@@ -1059,6 +1249,8 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 			sm501_register_usbhost(sm, &mem_avail);
 		if (idata->devices & (SM501_USE_UART0 | SM501_USE_UART1))
 			sm501_register_uart(sm, idata->devices);
+		if (idata->devices & SM501_USE_GPIO)
+			sm501_register_gpio(sm);
 	}
 
 	ret = sm501_check_clocks(sm);
@@ -1366,6 +1558,9 @@ static void sm501_dev_remove(struct sm501_devdata *sm)
 		sm501_remove_sub(sm, smdev);
 
 	device_remove_file(sm->dev, &dev_attr_dbg_regs);
+
+	if (sm->gpio.registered)
+		sm501_gpio_remove(sm);
 }
 
 static void sm501_pci_remove(struct pci_dev *dev)
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 145405bf9efa..6ea39007c8a3 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -46,24 +46,6 @@ extern unsigned long sm501_modify_reg(struct device *dev,
 				      unsigned long set,
 				      unsigned long clear);
 
-/* sm501_gpio_set
- *
- * set the state of the given GPIO line
-*/
-
-extern void sm501_gpio_set(struct device *dev,
-			   unsigned long gpio,
-			   unsigned int to,
-			   unsigned int dir);
-
-/* sm501_gpio_get
- *
- * get the state of the given GPIO line
-*/
-
-extern unsigned long sm501_gpio_get(struct device *dev,
-				    unsigned long gpio);
-
 
 /* Platform data definitions */
 
@@ -131,6 +113,7 @@ struct sm501_reg_init {
 #define SM501_USE_FBACCEL	(1<<6)
 #define SM501_USE_AC97		(1<<7)
 #define SM501_USE_I2S		(1<<8)
+#define SM501_USE_GPIO		(1<<9)
 
 #define SM501_USE_ALL		(0xffffffff)
 
@@ -173,6 +156,7 @@ struct sm501_platdata {
 	struct sm501_platdata_fb	*fb;
 
 	int				 flags;
+	unsigned			 gpio_base;
 
 	int	(*get_power)(struct device *dev);
 	int	(*set_power)(struct device *dev, unsigned int on);
-- 
cgit v1.2.3


From 60e540d617b40eb3d37f1dd99c97af588ff9b70b Mon Sep 17 00:00:00 2001
From: Arnaud Patard <apatard@mandriva.com>
Date: Fri, 25 Jul 2008 01:46:00 -0700
Subject: sm501: gpio dynamic registration for PCI devices

The SM501 PCI card requires a dyanmic gpio allocation as the number of
cards is not known at compile time.  Fixup the platform data and
registration to deal with this.

Acked-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Arnaud Patard <apatard@mandriva.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c   | 6 ++++--
 include/linux/sm501.h | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index be8713908125..c3e5a48f6148 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -996,12 +996,13 @@ static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
 {
 	struct sm501_platdata *pdata = sm->platdata;
 	struct gpio_chip *gchip = &chip->gpio;
-	unsigned base = pdata->gpio_base;
+	int base = pdata->gpio_base;
 
 	memcpy(chip, &gpio_chip_template, sizeof(struct gpio_chip));
 
 	if (chip == &gpio->high) {
-		base += 32;
+		if (base > 0)
+			base += 32;
 		chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
 		gchip->label  = "SM501-HIGH";
 	} else {
@@ -1452,6 +1453,7 @@ static struct sm501_platdata_fb sm501_fb_pdata = {
 static struct sm501_platdata sm501_pci_platdata = {
 	.init		= &sm501_pci_initdata,
 	.fb		= &sm501_fb_pdata,
+	.gpio_base	= -1,
 };
 
 static int sm501_pci_probe(struct pci_dev *dev,
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 6ea39007c8a3..a8d02f36ad32 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -156,7 +156,7 @@ struct sm501_platdata {
 	struct sm501_platdata_fb	*fb;
 
 	int				 flags;
-	unsigned			 gpio_base;
+	int				 gpio_base;
 
 	int	(*get_power)(struct device *dev);
 	int	(*set_power)(struct device *dev, unsigned int on);
-- 
cgit v1.2.3


From 42cd2366fb9b58cdfc1855be32b31a78e40b2079 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:46:01 -0700
Subject: sm501: gpio I2C support

Add support for adding the GPIO based I2C resources.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <apatard@mandriva.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c   | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/sm501.h | 10 ++++++-
 2 files changed, 84 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index c3e5a48f6148..107215b28805 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/gpio.h>
+#include <linux/i2c-gpio.h>
 
 #include <linux/sm501.h>
 #include <linux/sm501-regs.h>
@@ -1086,6 +1087,11 @@ static void sm501_gpio_remove(struct sm501_devdata *sm)
 		dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
 }
 
+static int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+	struct sm501_gpio *gpio = &sm->gpio;
+	return pin + (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
+}
 #else
 static int sm501_register_gpio(struct sm501_devdata *sm)
 {
@@ -1095,8 +1101,66 @@ static int sm501_register_gpio(struct sm501_devdata *sm)
 static void sm501_gpio_remove(struct sm501_devdata *sm)
 {
 }
+
+static int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+	return -1;
+}
 #endif
 
+static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm,
+					    struct sm501_platdata_gpio_i2c *iic)
+{
+	struct i2c_gpio_platform_data *icd;
+	struct platform_device *pdev;
+
+	pdev = sm501_create_subdev(sm, "i2c-gpio", 0,
+				   sizeof(struct i2c_gpio_platform_data));
+	if (!pdev)
+		return -ENOMEM;
+
+	icd = pdev->dev.platform_data;
+
+	/* We keep the pin_sda and pin_scl fields relative in case the
+	 * same platform data is passed to >1 SM501.
+	 */
+
+	icd->sda_pin = sm501_gpio_pin2nr(sm, iic->pin_sda);
+	icd->scl_pin = sm501_gpio_pin2nr(sm, iic->pin_scl);
+	icd->timeout = iic->timeout;
+	icd->udelay = iic->udelay;
+
+	/* note, we can't use either of the pin numbers, as the i2c-gpio
+	 * driver uses the platform.id field to generate the bus number
+	 * to register with the i2c core; The i2c core doesn't have enough
+	 * entries to deal with anything we currently use.
+	*/
+
+	pdev->id = iic->bus_num;
+
+	dev_info(sm->dev, "registering i2c-%d: sda=%d (%d), scl=%d (%d)\n",
+		 iic->bus_num,
+		 icd->sda_pin, iic->pin_sda, icd->scl_pin, iic->pin_scl);
+
+	return sm501_register_device(sm, pdev);
+}
+
+static int sm501_register_gpio_i2c(struct sm501_devdata *sm,
+				   struct sm501_platdata *pdata)
+{
+	struct sm501_platdata_gpio_i2c *iic = pdata->gpio_i2c;
+	int index;
+	int ret;
+
+	for (index = 0; index < pdata->gpio_i2c_nr; index++, iic++) {
+		ret = sm501_register_gpio_i2c_instance(sm, iic);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /* sm501_dbg_regs
  *
  * Debug attribute to attach to parent device to show core registers
@@ -1204,6 +1268,7 @@ static unsigned int sm501_mem_local[] = {
 static int sm501_init_dev(struct sm501_devdata *sm)
 {
 	struct sm501_initdata *idata;
+	struct sm501_platdata *pdata;
 	resource_size_t mem_avail;
 	unsigned long dramctrl;
 	unsigned long devid;
@@ -1242,7 +1307,9 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 
 	/* check to see if we have some device initialisation */
 
-	idata = sm->platdata ? sm->platdata->init : NULL;
+	pdata = sm->platdata;
+	idata = pdata ? pdata->init : NULL;
+
 	if (idata) {
 		sm501_init_regs(sm, idata);
 
@@ -1254,6 +1321,13 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 			sm501_register_gpio(sm);
 	}
 
+	if (pdata->gpio_i2c != NULL && pdata->gpio_i2c_nr > 0) {
+		if (!sm->gpio.registered)
+			dev_err(sm->dev, "no gpio registered for i2c gpio.\n");
+		else
+			sm501_register_gpio_i2c(sm, pdata);
+	}
+
 	ret = sm501_check_clocks(sm);
 	if (ret) {
 		dev_err(sm->dev, "M1X and M clocks sourced from different "
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index a8d02f36ad32..214f93209b8c 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -86,11 +86,19 @@ struct sm501_platdata_fb {
 	struct sm501_platdata_fbsub	*fb_pnl;
 };
 
-/* gpio i2c */
+/* gpio i2c
+ *
+ * Note, we have to pass in the bus number, as the number used will be
+ * passed to the i2c-gpio driver's platform_device.id, subsequently used
+ * to register the i2c bus.
+*/
 
 struct sm501_platdata_gpio_i2c {
+	unsigned int		bus_num;
 	unsigned int		pin_sda;
 	unsigned int		pin_scl;
+	int			udelay;
+	int			timeout;
 };
 
 /* sm501_initdata
-- 
cgit v1.2.3


From ef53d9c5e4da147ecaa43c44c5e5945eb83970a2 Mon Sep 17 00:00:00 2001
From: Srinivasa D S <srinivasa@in.ibm.com>
Date: Fri, 25 Jul 2008 01:46:04 -0700
Subject: kprobes: improve kretprobe scalability with hashed locking

Currently list of kretprobe instances are stored in kretprobe object (as
used_instances,free_instances) and in kretprobe hash table.  We have one
global kretprobe lock to serialise the access to these lists.  This causes
only one kretprobe handler to execute at a time.  Hence affects system
performance, particularly on SMP systems and when return probe is set on
lot of functions (like on all systemcalls).

Solution proposed here gives fine-grain locks that performs better on SMP
system compared to present kretprobe implementation.

Solution:

 1) Instead of having one global lock to protect kretprobe instances
    present in kretprobe object and kretprobe hash table.  We will have
    two locks, one lock for protecting kretprobe hash table and another
    lock for kretporbe object.

 2) We hold lock present in kretprobe object while we modify kretprobe
    instance in kretprobe object and we hold per-hash-list lock while
    modifying kretprobe instances present in that hash list.  To prevent
    deadlock, we never grab a per-hash-list lock while holding a kretprobe
    lock.

 3) We can remove used_instances from struct kretprobe, as we can
    track used instances of kretprobe instances using kretprobe hash
    table.

Time duration for kernel compilation ("make -j 8") on a 8-way ppc64 system
with return probes set on all systemcalls looks like this.

cacheline              non-cacheline             Un-patched kernel
aligned patch 	       aligned patch
===============================================================================
real    9m46.784s       9m54.412s                  10m2.450s
user    40m5.715s       40m7.142s                  40m4.273s
sys     2m57.754s       2m58.583s                  3m17.430s
===========================================================

Time duration for kernel compilation ("make -j 8) on the same system, when
kernel is not probed.
=========================
real    9m26.389s
user    40m8.775s
sys     2m7.283s
=========================

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/kprobes.c     |   6 +-
 arch/ia64/kernel/kprobes.c    |   6 +-
 arch/powerpc/kernel/kprobes.c |   6 +-
 arch/s390/kernel/kprobes.c    |   6 +-
 arch/sparc64/kernel/kprobes.c |  11 ++--
 arch/x86/kernel/kprobes.c     |   6 +-
 include/linux/kprobes.h       |   7 ++-
 kernel/kprobes.c              | 127 +++++++++++++++++++++++++++++-------------
 8 files changed, 108 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5ee39e10c8d1..d28513f14d05 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	}
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
@@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	return (void *)orig_ret_address;
 }
 
-/* Called with kretprobe_lock held. */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 233434f4f88f..f07688da947c 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 		((struct fnptr *)kretprobe_trampoline)->ip;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	return 1;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 4ba2af125450..de79915452c8 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 	kcb->kprobe_saved_msr = regs->msr;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
@@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	regs->nip = orig_ret_address;
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 288ad490a6dd..4f82e5b5f879 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 					struct pt_regs *regs)
 {
@@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index f43b5d755354..201a6e547e4a 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
-/* Called with kretprobe_lock held.  The value stored in the return
- * address register is actually 2 instructions before where the
- * callee will return to.  Sequences usually look something like this
+/* The value stored in the return address register is actually 2
+ * instructions before where the callee will return to.
+ * Sequences usually look something like this
  *
  *		call	some_function	<--- return register points here
  *		 nop			<--- call delay slot
@@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	regs->tnpc = orig_ret_address + 4;
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f85f0d..6c27679ec6aa 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 		regs->ip = (unsigned long)p->ainsn.insn;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 	/* fixup registers */
 #ifdef CONFIG_X86_64
 	regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 04a3556bdea6..0be7795655fa 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -157,11 +157,10 @@ struct kretprobe {
 	int nmissed;
 	size_t data_size;
 	struct hlist_head free_instances;
-	struct hlist_head used_instances;
+	spinlock_t lock;
 };
 
 struct kretprobe_instance {
-	struct hlist_node uflist; /* either on free list or used list */
 	struct hlist_node hlist;
 	struct kretprobe *rp;
 	kprobe_opcode_t *ret_addr;
@@ -201,7 +200,6 @@ static inline int init_test_probes(void)
 }
 #endif /* CONFIG_KPROBES_SANITY_TEST */
 
-extern spinlock_t kretprobe_lock;
 extern struct mutex kprobe_mutex;
 extern int arch_prepare_kprobe(struct kprobe *p);
 extern void arch_arm_kprobe(struct kprobe *p);
@@ -214,6 +212,9 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p);
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
+void kretprobe_hash_lock(struct task_struct *tsk,
+			 struct hlist_head **head, unsigned long *flags);
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags);
 struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
 
 /* kprobe_running() will just return the current_kprobe on this CPU */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1485ca8d0e00..cb0b3bde3617 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -62,6 +62,7 @@
 	addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
 #endif
 
+static int kprobes_initialized;
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 
@@ -69,8 +70,15 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 static bool kprobe_enabled;
 
 DEFINE_MUTEX(kprobe_mutex);		/* Protects kprobe_table */
-DEFINE_SPINLOCK(kretprobe_lock);	/* Protects kretprobe_inst_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static struct {
+	spinlock_t lock ____cacheline_aligned;
+} kretprobe_table_locks[KPROBE_TABLE_SIZE];
+
+static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
+{
+	return &(kretprobe_table_locks[hash].lock);
+}
 
 /*
  * Normally, functions that we'd want to prohibit kprobes in, are marked
@@ -368,26 +376,53 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
 	return;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
 				struct hlist_head *head)
 {
+	struct kretprobe *rp = ri->rp;
+
 	/* remove rp inst off the rprobe_inst_table */
 	hlist_del(&ri->hlist);
-	if (ri->rp) {
-		/* remove rp inst off the used list */
-		hlist_del(&ri->uflist);
-		/* put rp inst back onto the free list */
-		INIT_HLIST_NODE(&ri->uflist);
-		hlist_add_head(&ri->uflist, &ri->rp->free_instances);
+	INIT_HLIST_NODE(&ri->hlist);
+	if (likely(rp)) {
+		spin_lock(&rp->lock);
+		hlist_add_head(&ri->hlist, &rp->free_instances);
+		spin_unlock(&rp->lock);
 	} else
 		/* Unregistering */
 		hlist_add_head(&ri->hlist, head);
 }
 
-struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
+void kretprobe_hash_lock(struct task_struct *tsk,
+			 struct hlist_head **head, unsigned long *flags)
+{
+	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+	spinlock_t *hlist_lock;
+
+	*head = &kretprobe_inst_table[hash];
+	hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
 {
-	return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
+	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
+{
+	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+	spinlock_t *hlist_lock;
+
+	hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_unlock_irqrestore(hlist_lock, *flags);
+}
+
+void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
+{
+	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_unlock_irqrestore(hlist_lock, *flags);
 }
 
 /*
@@ -401,17 +436,21 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
 	struct kretprobe_instance *ri;
 	struct hlist_head *head, empty_rp;
 	struct hlist_node *node, *tmp;
-	unsigned long flags = 0;
+	unsigned long hash, flags = 0;
 
-	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(tk);
+	if (unlikely(!kprobes_initialized))
+		/* Early boot.  kretprobe_table_locks not yet initialized. */
+		return;
+
+	hash = hash_ptr(tk, KPROBE_HASH_BITS);
+	head = &kretprobe_inst_table[hash];
+	kretprobe_table_lock(hash, &flags);
 	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
 		if (ri->task == tk)
 			recycle_rp_inst(ri, &empty_rp);
 	}
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
-
+	kretprobe_table_unlock(hash, &flags);
+	INIT_HLIST_HEAD(&empty_rp);
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
@@ -423,24 +462,29 @@ static inline void free_rp_inst(struct kretprobe *rp)
 	struct kretprobe_instance *ri;
 	struct hlist_node *pos, *next;
 
-	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) {
-		hlist_del(&ri->uflist);
+	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
+		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
 }
 
 static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
 {
-	unsigned long flags;
+	unsigned long flags, hash;
 	struct kretprobe_instance *ri;
 	struct hlist_node *pos, *next;
+	struct hlist_head *head;
+
 	/* No race here */
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
-		ri->rp = NULL;
-		hlist_del(&ri->uflist);
+	for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
+		kretprobe_table_lock(hash, &flags);
+		head = &kretprobe_inst_table[hash];
+		hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
+			if (ri->rp == rp)
+				ri->rp = NULL;
+		}
+		kretprobe_table_unlock(hash, &flags);
 	}
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
 	free_rp_inst(rp);
 }
 
@@ -831,32 +875,37 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 					   struct pt_regs *regs)
 {
 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
-	unsigned long flags = 0;
+	unsigned long hash, flags = 0;
+	struct kretprobe_instance *ri;
 
 	/*TODO: consider to only swap the RA after the last pre_handler fired */
-	spin_lock_irqsave(&kretprobe_lock, flags);
+	hash = hash_ptr(current, KPROBE_HASH_BITS);
+	spin_lock_irqsave(&rp->lock, flags);
 	if (!hlist_empty(&rp->free_instances)) {
-		struct kretprobe_instance *ri;
-
 		ri = hlist_entry(rp->free_instances.first,
-				 struct kretprobe_instance, uflist);
+				struct kretprobe_instance, hlist);
+		hlist_del(&ri->hlist);
+		spin_unlock_irqrestore(&rp->lock, flags);
+
 		ri->rp = rp;
 		ri->task = current;
 
 		if (rp->entry_handler && rp->entry_handler(ri, regs)) {
-			spin_unlock_irqrestore(&kretprobe_lock, flags);
+			spin_unlock_irqrestore(&rp->lock, flags);
 			return 0;
 		}
 
 		arch_prepare_kretprobe(ri, regs);
 
 		/* XXX(hch): why is there no hlist_move_head? */
-		hlist_del(&ri->uflist);
-		hlist_add_head(&ri->uflist, &ri->rp->used_instances);
-		hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task));
-	} else
+		INIT_HLIST_NODE(&ri->hlist);
+		kretprobe_table_lock(hash, &flags);
+		hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
+		kretprobe_table_unlock(hash, &flags);
+	} else {
 		rp->nmissed++;
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+		spin_unlock_irqrestore(&rp->lock, flags);
+	}
 	return 0;
 }
 
@@ -892,7 +941,7 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
 		rp->maxactive = NR_CPUS;
 #endif
 	}
-	INIT_HLIST_HEAD(&rp->used_instances);
+	spin_lock_init(&rp->lock);
 	INIT_HLIST_HEAD(&rp->free_instances);
 	for (i = 0; i < rp->maxactive; i++) {
 		inst = kmalloc(sizeof(struct kretprobe_instance) +
@@ -901,8 +950,8 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
 			free_rp_inst(rp);
 			return -ENOMEM;
 		}
-		INIT_HLIST_NODE(&inst->uflist);
-		hlist_add_head(&inst->uflist, &rp->free_instances);
+		INIT_HLIST_NODE(&inst->hlist);
+		hlist_add_head(&inst->hlist, &rp->free_instances);
 	}
 
 	rp->nmissed = 0;
@@ -1009,6 +1058,7 @@ static int __init init_kprobes(void)
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		INIT_HLIST_HEAD(&kprobe_table[i]);
 		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
+		spin_lock_init(&(kretprobe_table_locks[i].lock));
 	}
 
 	/*
@@ -1050,6 +1100,7 @@ static int __init init_kprobes(void)
 	err = arch_init_kprobes();
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
+	kprobes_initialized = (err == 0);
 
 	if (!err)
 		init_test_probes();
-- 
cgit v1.2.3


From d8f388d8dc8d4f36539dd37c1fff62cc404ea0fc Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Fri, 25 Jul 2008 01:46:07 -0700
Subject: gpio: sysfs interface

This adds a simple sysfs interface for GPIOs.

    /sys/class/gpio
    	/export ... asks the kernel to export a GPIO to userspace
    	/unexport ... to return a GPIO to the kernel
        /gpioN ... for each exported GPIO #N
	    /value ... always readable, writes fail for input GPIOs
	    /direction ... r/w as: in, out (default low); write high, low
	/gpiochipN ... for each gpiochip; #N is its first GPIO
	    /base ... (r/o) same as N
	    /label ... (r/o) descriptive, not necessarily unique
	    /ngpio ... (r/o) number of GPIOs; numbered N .. N+(ngpio - 1)

GPIOs claimed by kernel code may be exported by its owner using a new
gpio_export() call, which should be most useful for driver debugging.
Such exports may optionally be done without a "direction" attribute.

Userspace may ask to take over a GPIO by writing to a sysfs control file,
helping to cope with incomplete board support or other "one-off"
requirements that don't merit full kernel support:

  echo 23 > /sys/class/gpio/export
	... will gpio_request(23, "sysfs") and gpio_export(23);
	use /sys/class/gpio/gpio-23/direction to (re)configure it,
	when that GPIO can be used as both input and output.
  echo 23 > /sys/class/gpio/unexport
	... will gpio_free(23), when it was exported as above

The extra D-space footprint is a few hundred bytes, except for the sysfs
resources associated with each exported GPIO.  The additional I-space
footprint is about two thirds of the current size of gpiolib (!).  Since
no /dev node creation is involved, no "udev" support is needed.

Related changes:

  * This adds a device pointer to "struct gpio_chip".  When GPIO
    providers initialize that, sysfs gpio class devices become children of
    that device instead of being "virtual" devices.

  * The (few) gpio_chip providers which have such a device node have
    been updated.

  * Some gpio_chip drivers also needed to update their module "owner"
    field ...  for which missing kerneldoc was added.

  * Some gpio_chips don't support input GPIOs.  Those GPIOs are now
    flagged appropriately when the chip is registered.

Based on previous patches, and discussion both on and off LKML.

A Documentation/ABI/testing/sysfs-gpio update is ready to submit once this
merges to mainline.

[akpm@linux-foundation.org: a few maintenance build fixes]
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Guennadi Liakhovetski <g.liakhovetski@pengutronix.de>
Cc: Greg KH <greg@kroah.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt       | 123 +++++++++-
 arch/arm/plat-omap/gpio.c    |   3 +
 arch/avr32/mach-at32ap/pio.c |   2 +
 drivers/gpio/Kconfig         |  15 ++
 drivers/gpio/gpiolib.c       | 536 +++++++++++++++++++++++++++++++++++++++++--
 drivers/gpio/mcp23s08.c      |   1 +
 drivers/gpio/pca953x.c       |   1 +
 drivers/gpio/pcf857x.c       |   1 +
 drivers/i2c/chips/tps65010.c |   2 +
 drivers/mfd/htc-egpio.c      |   2 +
 include/asm-generic/gpio.h   |  33 ++-
 include/linux/gpio.h         |  13 ++
 12 files changed, 712 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index c35ca9e40d4c..8b69811a9642 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -347,15 +347,12 @@ necessarily be nonportable.
 Dynamic definition of GPIOs is not currently standard; for example, as
 a side effect of configuring an add-on board with some GPIO expanders.
 
-These calls are purely for kernel space, but a userspace API could be built
-on top of them.
-
 
 GPIO implementor's framework (OPTIONAL)
 =======================================
 As noted earlier, there is an optional implementation framework making it
 easier for platforms to support different kinds of GPIO controller using
-the same programming interface.
+the same programming interface.  This framework is called "gpiolib".
 
 As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
 will be found there.  That will list all the controllers registered through
@@ -439,4 +436,120 @@ becomes available.  That may mean the device should not be registered until
 calls for that GPIO can work.  One way to address such dependencies is for
 such gpio_chip controllers to provide setup() and teardown() callbacks to
 board specific code; those board specific callbacks would register devices
-once all the necessary resources are available.
+once all the necessary resources are available, and remove them later when
+the GPIO controller device becomes unavailable.
+
+
+Sysfs Interface for Userspace (OPTIONAL)
+========================================
+Platforms which use the "gpiolib" implementors framework may choose to
+configure a sysfs user interface to GPIOs.  This is different from the
+debugfs interface, since it provides control over GPIO direction and
+value instead of just showing a gpio state summary.  Plus, it could be
+present on production systems without debugging support.
+
+Given approprate hardware documentation for the system, userspace could
+know for example that GPIO #23 controls the write protect line used to
+protect boot loader segments in flash memory.  System upgrade procedures
+may need to temporarily remove that protection, first importing a GPIO,
+then changing its output state, then updating the code before re-enabling
+the write protection.  In normal use, GPIO #23 would never be touched,
+and the kernel would have no need to know about it.
+
+Again depending on appropriate hardware documentation, on some systems
+userspace GPIO can be used to determine system configuration data that
+standard kernels won't know about.  And for some tasks, simple userspace
+GPIO drivers could be all that the system really needs.
+
+Note that standard kernel drivers exist for common "LEDs and Buttons"
+GPIO tasks:  "leds-gpio" and "gpio_keys", respectively.  Use those
+instead of talking directly to the GPIOs; they integrate with kernel
+frameworks better than your userspace code could.
+
+
+Paths in Sysfs
+--------------
+There are three kinds of entry in /sys/class/gpio:
+
+   -	Control interfaces used to get userspace control over GPIOs;
+
+   -	GPIOs themselves; and
+
+   -	GPIO controllers ("gpio_chip" instances).
+
+That's in addition to standard files including the "device" symlink.
+
+The control interfaces are write-only:
+
+    /sys/class/gpio/
+
+    	"export" ... Userspace may ask the kernel to export control of
+		a GPIO to userspace by writing its number to this file.
+
+		Example:  "echo 19 > export" will create a "gpio19" node
+		for GPIO #19, if that's not requested by kernel code.
+
+    	"unexport" ... Reverses the effect of exporting to userspace.
+
+		Example:  "echo 19 > unexport" will remove a "gpio19"
+		node exported using the "export" file.
+
+GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
+and have the following read/write attributes:
+
+    /sys/class/gpio/gpioN/
+
+	"direction" ... reads as either "in" or "out".  This value may
+		normally be written.  Writing as "out" defaults to
+		initializing the value as low.  To ensure glitch free
+		operation, values "low" and "high" may be written to
+		configure the GPIO as an output with that initial value.
+
+		Note that this attribute *will not exist* if the kernel
+		doesn't support changing the direction of a GPIO, or
+		it was exported by kernel code that didn't explicitly
+		allow userspace to reconfigure this GPIO's direction.
+
+	"value" ... reads as either 0 (low) or 1 (high).  If the GPIO
+		is configured as an output, this value may be written;
+		any nonzero value is treated as high.
+
+GPIO controllers have paths like /sys/class/gpio/chipchip42/ (for the
+controller implementing GPIOs starting at #42) and have the following
+read-only attributes:
+
+    /sys/class/gpio/gpiochipN/
+
+    	"base" ... same as N, the first GPIO managed by this chip
+
+    	"label" ... provided for diagnostics (not always unique)
+
+    	"ngpio" ... how many GPIOs this manges (N to N + ngpio - 1)
+
+Board documentation should in most cases cover what GPIOs are used for
+what purposes.  However, those numbers are not always stable; GPIOs on
+a daughtercard might be different depending on the base board being used,
+or other cards in the stack.  In such cases, you may need to use the
+gpiochip nodes (possibly in conjunction with schematics) to determine
+the correct GPIO number to use for a given signal.
+
+
+Exporting from Kernel code
+--------------------------
+Kernel code can explicitly manage exports of GPIOs which have already been
+requested using gpio_request():
+
+	/* export the GPIO to userspace */
+	int gpio_export(unsigned gpio, bool direction_may_change);
+
+	/* reverse gpio_export() */
+	void gpio_unexport();
+
+After a kernel driver requests a GPIO, it may only be made available in
+the sysfs interface by gpio_export().  The driver can control whether the
+signal direction may change.  This helps drivers prevent userspace code
+from accidentally clobbering important system state.
+
+This explicit exporting can help with debugging (by making some kinds
+of experiments easier), or can provide an always-there interface that's
+suitable for documenting as part of a board support package.
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index 1903a3491ee9..d8e9c2c3f0f6 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -1488,6 +1488,9 @@ static int __init _omap_gpio_init(void)
 		bank->chip.set = gpio_set;
 		if (bank_is_mpuio(bank)) {
 			bank->chip.label = "mpuio";
+#ifdef CONFIG_ARCH_OMAP1
+			bank->chip.dev = &omap_mpuio_device.dev;
+#endif
 			bank->chip.base = OMAP_MPUIO(0);
 		} else {
 			bank->chip.label = "gpio";
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index 60da03ba7117..296294f8ed81 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -360,6 +360,8 @@ static int __init pio_probe(struct platform_device *pdev)
 	pio->chip.label = pio->name;
 	pio->chip.base = pdev->id * 32;
 	pio->chip.ngpio = 32;
+	pio->chip.dev = &pdev->dev;
+	pio->chip.owner = THIS_MODULE;
 
 	pio->chip.direction_input = direction_input;
 	pio->chip.get = gpio_get;
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index fced1909cbba..6ec0e35b98e3 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -23,6 +23,21 @@ config DEBUG_GPIO
 	  slower.  The diagnostics help catch the type of setup errors
 	  that are most common when setting up new platforms or boards.
 
+config GPIO_SYSFS
+	bool "/sys/class/gpio/... (sysfs interface)"
+	depends on SYSFS && EXPERIMENTAL
+	help
+	  Say Y here to add a sysfs interface for GPIOs.
+
+	  This is mostly useful to work around omissions in a system's
+	  kernel support.  Those are common in custom and semicustom
+	  hardware assembled using standard kernels with a minimum of
+	  custom patches.  In those cases, userspace code may import
+	  a given GPIO from the kernel, if no kernel driver requested it.
+
+	  Kernel drivers may also request that a particular GPIO be
+	  exported to userspace; this can be useful when debugging.
+
 # put expanders in the right section, in alphabetical order
 
 comment "I2C GPIO expanders:"
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index beaf6b3a37dc..8d2940517c99 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2,8 +2,11 @@
 #include <linux/module.h>
 #include <linux/irq.h>
 #include <linux/spinlock.h>
-
-#include <asm/gpio.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/gpio.h>
 
 
 /* Optional implementation infrastructure for GPIO interfaces.
@@ -44,6 +47,8 @@ struct gpio_desc {
 #define FLAG_REQUESTED	0
 #define FLAG_IS_OUT	1
 #define FLAG_RESERVED	2
+#define FLAG_EXPORT	3	/* protected by sysfs_lock */
+#define FLAG_SYSFS	4	/* exported via /sys/class/gpio/control */
 
 #ifdef CONFIG_DEBUG_FS
 	const char		*label;
@@ -151,6 +156,482 @@ err:
 	return ret;
 }
 
+#ifdef CONFIG_GPIO_SYSFS
+
+/* lock protects against unexport_gpio() being called while
+ * sysfs files are active.
+ */
+static DEFINE_MUTEX(sysfs_lock);
+
+/*
+ * /sys/class/gpio/gpioN... only for GPIOs that are exported
+ *   /direction
+ *      * MAY BE OMITTED if kernel won't allow direction changes
+ *      * is read/write as "in" or "out"
+ *      * may also be written as "high" or "low", initializing
+ *        output value as specified ("out" implies "low")
+ *   /value
+ *      * always readable, subject to hardware behavior
+ *      * may be writable, as zero/nonzero
+ *
+ * REVISIT there will likely be an attribute for configuring async
+ * notifications, e.g. to specify polling interval or IRQ trigger type
+ * that would for example trigger a poll() on the "value".
+ */
+
+static ssize_t gpio_direction_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else
+		status = sprintf(buf, "%s\n",
+			test_bit(FLAG_IS_OUT, &desc->flags)
+				? "out" : "in");
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static ssize_t gpio_direction_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	unsigned		gpio = desc - gpio_desc;
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else if (sysfs_streq(buf, "high"))
+		status = gpio_direction_output(gpio, 1);
+	else if (sysfs_streq(buf, "out") || sysfs_streq(buf, "low"))
+		status = gpio_direction_output(gpio, 0);
+	else if (sysfs_streq(buf, "in"))
+		status = gpio_direction_input(gpio);
+	else
+		status = -EINVAL;
+
+	mutex_unlock(&sysfs_lock);
+	return status ? : size;
+}
+
+static const DEVICE_ATTR(direction, 0644,
+		gpio_direction_show, gpio_direction_store);
+
+static ssize_t gpio_value_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	unsigned		gpio = desc - gpio_desc;
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else
+		status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static ssize_t gpio_value_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	unsigned		gpio = desc - gpio_desc;
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else if (!test_bit(FLAG_IS_OUT, &desc->flags))
+		status = -EPERM;
+	else {
+		long		value;
+
+		status = strict_strtol(buf, 0, &value);
+		if (status == 0) {
+			gpio_set_value_cansleep(gpio, value != 0);
+			status = size;
+		}
+	}
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static /*const*/ DEVICE_ATTR(value, 0644,
+		gpio_value_show, gpio_value_store);
+
+static const struct attribute *gpio_attrs[] = {
+	&dev_attr_direction.attr,
+	&dev_attr_value.attr,
+	NULL,
+};
+
+static const struct attribute_group gpio_attr_group = {
+	.attrs = (struct attribute **) gpio_attrs,
+};
+
+/*
+ * /sys/class/gpio/gpiochipN/
+ *   /base ... matching gpio_chip.base (N)
+ *   /label ... matching gpio_chip.label
+ *   /ngpio ... matching gpio_chip.ngpio
+ */
+
+static ssize_t chip_base_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	const struct gpio_chip	*chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->base);
+}
+static DEVICE_ATTR(base, 0444, chip_base_show, NULL);
+
+static ssize_t chip_label_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	const struct gpio_chip	*chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", chip->label ? : "");
+}
+static DEVICE_ATTR(label, 0444, chip_label_show, NULL);
+
+static ssize_t chip_ngpio_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	const struct gpio_chip	*chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", chip->ngpio);
+}
+static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL);
+
+static const struct attribute *gpiochip_attrs[] = {
+	&dev_attr_base.attr,
+	&dev_attr_label.attr,
+	&dev_attr_ngpio.attr,
+	NULL,
+};
+
+static const struct attribute_group gpiochip_attr_group = {
+	.attrs = (struct attribute **) gpiochip_attrs,
+};
+
+/*
+ * /sys/class/gpio/export ... write-only
+ *	integer N ... number of GPIO to export (full access)
+ * /sys/class/gpio/unexport ... write-only
+ *	integer N ... number of GPIO to unexport
+ */
+static ssize_t export_store(struct class *class, const char *buf, size_t len)
+{
+	long	gpio;
+	int	status;
+
+	status = strict_strtol(buf, 0, &gpio);
+	if (status < 0)
+		goto done;
+
+	/* No extra locking here; FLAG_SYSFS just signifies that the
+	 * request and export were done by on behalf of userspace, so
+	 * they may be undone on its behalf too.
+	 */
+
+	status = gpio_request(gpio, "sysfs");
+	if (status < 0)
+		goto done;
+
+	status = gpio_export(gpio, true);
+	if (status < 0)
+		gpio_free(gpio);
+	else
+		set_bit(FLAG_SYSFS, &gpio_desc[gpio].flags);
+
+done:
+	if (status)
+		pr_debug("%s: status %d\n", __func__, status);
+	return status ? : len;
+}
+
+static ssize_t unexport_store(struct class *class, const char *buf, size_t len)
+{
+	long	gpio;
+	int	status;
+
+	status = strict_strtol(buf, 0, &gpio);
+	if (status < 0)
+		goto done;
+
+	status = -EINVAL;
+
+	/* reject bogus commands (gpio_unexport ignores them) */
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	/* No extra locking here; FLAG_SYSFS just signifies that the
+	 * request and export were done by on behalf of userspace, so
+	 * they may be undone on its behalf too.
+	 */
+	if (test_and_clear_bit(FLAG_SYSFS, &gpio_desc[gpio].flags)) {
+		status = 0;
+		gpio_free(gpio);
+	}
+done:
+	if (status)
+		pr_debug("%s: status %d\n", __func__, status);
+	return status ? : len;
+}
+
+static struct class_attribute gpio_class_attrs[] = {
+	__ATTR(export, 0200, NULL, export_store),
+	__ATTR(unexport, 0200, NULL, unexport_store),
+	__ATTR_NULL,
+};
+
+static struct class gpio_class = {
+	.name =		"gpio",
+	.owner =	THIS_MODULE,
+
+	.class_attrs =	gpio_class_attrs,
+};
+
+
+/**
+ * gpio_export - export a GPIO through sysfs
+ * @gpio: gpio to make available, already requested
+ * @direction_may_change: true if userspace may change gpio direction
+ * Context: arch_initcall or later
+ *
+ * When drivers want to make a GPIO accessible to userspace after they
+ * have requested it -- perhaps while debugging, or as part of their
+ * public interface -- they may use this routine.  If the GPIO can
+ * change direction (some can't) and the caller allows it, userspace
+ * will see "direction" sysfs attribute which may be used to change
+ * the gpio's direction.  A "value" attribute will always be provided.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	unsigned long		flags;
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+
+	/* can't export until sysfs is available ... */
+	if (!gpio_class.p) {
+		pr_debug("%s: called too early!\n", __func__);
+		return -ENOENT;
+	}
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	desc = &gpio_desc[gpio];
+	if (test_bit(FLAG_REQUESTED, &desc->flags)
+			&& !test_bit(FLAG_EXPORT, &desc->flags)) {
+		status = 0;
+		if (!desc->chip->direction_input
+				|| !desc->chip->direction_output)
+			direction_may_change = false;
+	}
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	if (status == 0) {
+		struct device	*dev;
+
+		dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
+					desc, "gpio%d", gpio);
+		if (dev) {
+			if (direction_may_change)
+				status = sysfs_create_group(&dev->kobj,
+						&gpio_attr_group);
+			else
+				status = device_create_file(dev,
+						&dev_attr_value);
+			if (status != 0)
+				device_unregister(dev);
+		} else
+			status = -ENODEV;
+		if (status == 0)
+			set_bit(FLAG_EXPORT, &desc->flags);
+	}
+
+	mutex_unlock(&sysfs_lock);
+
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_export);
+
+static int match_export(struct device *dev, void *data)
+{
+	return dev_get_drvdata(dev) == data;
+}
+
+/**
+ * gpio_unexport - reverse effect of gpio_export()
+ * @gpio: gpio to make unavailable
+ *
+ * This is implicit on gpio_free().
+ */
+void gpio_unexport(unsigned gpio)
+{
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	desc = &gpio_desc[gpio];
+	if (test_bit(FLAG_EXPORT, &desc->flags)) {
+		struct device	*dev = NULL;
+
+		dev = class_find_device(&gpio_class, NULL, desc, match_export);
+		if (dev) {
+			clear_bit(FLAG_EXPORT, &desc->flags);
+			put_device(dev);
+			device_unregister(dev);
+			status = 0;
+		} else
+			status = -ENODEV;
+	}
+
+	mutex_unlock(&sysfs_lock);
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+}
+EXPORT_SYMBOL_GPL(gpio_unexport);
+
+static int gpiochip_export(struct gpio_chip *chip)
+{
+	int		status;
+	struct device	*dev;
+
+	/* Many systems register gpio chips for SOC support very early,
+	 * before driver model support is available.  In those cases we
+	 * export this later, in gpiolib_sysfs_init() ... here we just
+	 * verify that _some_ field of gpio_class got initialized.
+	 */
+	if (!gpio_class.p)
+		return 0;
+
+	/* use chip->base for the ID; it's already known to be unique */
+	mutex_lock(&sysfs_lock);
+	dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip,
+				"gpiochip%d", chip->base);
+	if (dev) {
+		status = sysfs_create_group(&dev->kobj,
+				&gpiochip_attr_group);
+	} else
+		status = -ENODEV;
+	chip->exported = (status == 0);
+	mutex_unlock(&sysfs_lock);
+
+	if (status) {
+		unsigned long	flags;
+		unsigned	gpio;
+
+		spin_lock_irqsave(&gpio_lock, flags);
+		gpio = chip->base;
+		while (gpio_desc[gpio].chip == chip)
+			gpio_desc[gpio++].chip = NULL;
+		spin_unlock_irqrestore(&gpio_lock, flags);
+
+		pr_debug("%s: chip %s status %d\n", __func__,
+				chip->label, status);
+	}
+
+	return status;
+}
+
+static void gpiochip_unexport(struct gpio_chip *chip)
+{
+	int			status;
+	struct device		*dev;
+
+	mutex_lock(&sysfs_lock);
+	dev = class_find_device(&gpio_class, NULL, chip, match_export);
+	if (dev) {
+		put_device(dev);
+		device_unregister(dev);
+		chip->exported = 0;
+		status = 0;
+	} else
+		status = -ENODEV;
+	mutex_unlock(&sysfs_lock);
+
+	if (status)
+		pr_debug("%s: chip %s status %d\n", __func__,
+				chip->label, status);
+}
+
+static int __init gpiolib_sysfs_init(void)
+{
+	int		status;
+	unsigned long	flags;
+	unsigned	gpio;
+
+	status = class_register(&gpio_class);
+	if (status < 0)
+		return status;
+
+	/* Scan and register the gpio_chips which registered very
+	 * early (e.g. before the class_register above was called).
+	 *
+	 * We run before arch_initcall() so chip->dev nodes can have
+	 * registered, and so arch_initcall() can always gpio_export().
+	 */
+	spin_lock_irqsave(&gpio_lock, flags);
+	for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) {
+		struct gpio_chip	*chip;
+
+		chip = gpio_desc[gpio].chip;
+		if (!chip || chip->exported)
+			continue;
+
+		spin_unlock_irqrestore(&gpio_lock, flags);
+		status = gpiochip_export(chip);
+		spin_lock_irqsave(&gpio_lock, flags);
+	}
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+
+	return status;
+}
+postcore_initcall(gpiolib_sysfs_init);
+
+#else
+static inline int gpiochip_export(struct gpio_chip *chip)
+{
+	return 0;
+}
+
+static inline void gpiochip_unexport(struct gpio_chip *chip)
+{
+}
+
+#endif /* CONFIG_GPIO_SYSFS */
+
 /**
  * gpiochip_add() - register a gpio_chip
  * @chip: the chip to register, with chip->base initialized
@@ -160,6 +641,11 @@ err:
  * because the chip->base is invalid or already associated with a
  * different chip.  Otherwise it returns zero as a success code.
  *
+ * When gpiochip_add() is called very early during boot, so that GPIOs
+ * can be freely used, the chip->dev device must be registered before
+ * the gpio framework's arch_initcall().  Otherwise sysfs initialization
+ * for GPIOs will fail rudely.
+ *
  * If chip->base is negative, this requests dynamic assignment of
  * a range of valid GPIOs.
  */
@@ -182,7 +668,7 @@ int gpiochip_add(struct gpio_chip *chip)
 		base = gpiochip_find_base(chip->ngpio);
 		if (base < 0) {
 			status = base;
-			goto fail_unlock;
+			goto unlock;
 		}
 		chip->base = base;
 	}
@@ -197,12 +683,23 @@ int gpiochip_add(struct gpio_chip *chip)
 	if (status == 0) {
 		for (id = base; id < base + chip->ngpio; id++) {
 			gpio_desc[id].chip = chip;
-			gpio_desc[id].flags = 0;
+
+			/* REVISIT:  most hardware initializes GPIOs as
+			 * inputs (often with pullups enabled) so power
+			 * usage is minimized.  Linux code should set the
+			 * gpio direction first thing; but until it does,
+			 * we may expose the wrong direction in sysfs.
+			 */
+			gpio_desc[id].flags = !chip->direction_input
+				? (1 << FLAG_IS_OUT)
+				: 0;
 		}
 	}
 
-fail_unlock:
+unlock:
 	spin_unlock_irqrestore(&gpio_lock, flags);
+	if (status == 0)
+		status = gpiochip_export(chip);
 fail:
 	/* failures here can mean systems won't boot... */
 	if (status)
@@ -239,6 +736,10 @@ int gpiochip_remove(struct gpio_chip *chip)
 	}
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	if (status == 0)
+		gpiochip_unexport(chip);
+
 	return status;
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
@@ -296,6 +797,8 @@ void gpio_free(unsigned gpio)
 		return;
 	}
 
+	gpio_unexport(gpio);
+
 	spin_lock_irqsave(&gpio_lock, flags);
 
 	desc = &gpio_desc[gpio];
@@ -534,10 +1037,6 @@ EXPORT_SYMBOL_GPL(gpio_set_value_cansleep);
 
 #ifdef CONFIG_DEBUG_FS
 
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-
 static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 {
 	unsigned		i;
@@ -614,17 +1113,28 @@ static int gpiolib_show(struct seq_file *s, void *unused)
 	/* REVISIT this isn't locked against gpio_chip removal ... */
 
 	for (gpio = 0; gpio_is_valid(gpio); gpio++) {
+		struct device *dev;
+
 		if (chip == gpio_desc[gpio].chip)
 			continue;
 		chip = gpio_desc[gpio].chip;
 		if (!chip)
 			continue;
 
-		seq_printf(s, "%sGPIOs %d-%d, %s%s:\n",
+		seq_printf(s, "%sGPIOs %d-%d",
 				started ? "\n" : "",
-				chip->base, chip->base + chip->ngpio - 1,
-				chip->label ? : "generic",
-				chip->can_sleep ? ", can sleep" : "");
+				chip->base, chip->base + chip->ngpio - 1);
+		dev = chip->dev;
+		if (dev)
+			seq_printf(s, ", %s/%s",
+				dev->bus ? dev->bus->name : "no-bus",
+				dev->bus_id);
+		if (chip->label)
+			seq_printf(s, ", %s", chip->label);
+		if (chip->can_sleep)
+			seq_printf(s, ", can sleep");
+		seq_printf(s, ":\n");
+
 		started = 1;
 		if (chip->dbg_show)
 			chip->dbg_show(s, chip);
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index 7f92fdd5f0e2..7efd7d3a81f9 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -239,6 +239,7 @@ static int mcp23s08_probe(struct spi_device *spi)
 	mcp->chip.base = pdata->base;
 	mcp->chip.ngpio = 8;
 	mcp->chip.can_sleep = 1;
+	mcp->chip.dev = &spi->dev;
 	mcp->chip.owner = THIS_MODULE;
 
 	spi_set_drvdata(spi, mcp);
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index a380730b61ab..cc8468692ae0 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -188,6 +188,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
 	gc->base = chip->gpio_start;
 	gc->ngpio = gpios;
 	gc->label = chip->client->name;
+	gc->dev = &chip->client->dev;
 	gc->owner = THIS_MODULE;
 }
 
diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c
index d25d356c4f20..fc9c6ae739ee 100644
--- a/drivers/gpio/pcf857x.c
+++ b/drivers/gpio/pcf857x.c
@@ -200,6 +200,7 @@ static int pcf857x_probe(struct i2c_client *client,
 
 	gpio->chip.base = pdata->gpio_base;
 	gpio->chip.can_sleep = 1;
+	gpio->chip.dev = &client->dev;
 	gpio->chip.owner = THIS_MODULE;
 
 	/* NOTE:  the OnSemi jlc1562b is also largely compatible with
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index 85949685191b..cf02e8fceb42 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -636,6 +636,8 @@ static int tps65010_probe(struct i2c_client *client,
 		tps->outmask = board->outmask;
 
 		tps->chip.label = client->name;
+		tps->chip.dev = &client->dev;
+		tps->chip.owner = THIS_MODULE;
 
 		tps->chip.set = tps65010_gpio_set;
 		tps->chip.direction_output = tps65010_output;
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 8872cc077519..6be43172dc65 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -318,6 +318,8 @@ static int __init egpio_probe(struct platform_device *pdev)
 		ei->chip[i].dev = &(pdev->dev);
 		chip = &(ei->chip[i].chip);
 		chip->label           = "htc-egpio";
+		chip->dev             = &pdev->dev;
+		chip->owner           = THIS_MODULE;
 		chip->get             = egpio_get;
 		chip->set             = egpio_set;
 		chip->direction_input = egpio_direction_input;
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 6be061d09da9..1beff5166e53 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -32,6 +32,8 @@ struct module;
 /**
  * struct gpio_chip - abstract a GPIO controller
  * @label: for diagnostics
+ * @dev: optional device providing the GPIOs
+ * @owner: helps prevent removal of modules exporting active GPIOs
  * @direction_input: configures signal "offset" as input, or returns error
  * @get: returns value for signal "offset"; for output signals this
  *	returns either the value actually sensed, or zero
@@ -59,6 +61,7 @@ struct module;
  */
 struct gpio_chip {
 	char			*label;
+	struct device		*dev;
 	struct module		*owner;
 
 	int			(*direction_input)(struct gpio_chip *chip,
@@ -74,6 +77,7 @@ struct gpio_chip {
 	int			base;
 	u16			ngpio;
 	unsigned		can_sleep:1;
+	unsigned		exported:1;
 };
 
 extern const char *gpiochip_is_requested(struct gpio_chip *chip,
@@ -108,7 +112,18 @@ extern void __gpio_set_value(unsigned gpio, int value);
 extern int __gpio_cansleep(unsigned gpio);
 
 
-#else
+#ifdef CONFIG_GPIO_SYSFS
+
+/*
+ * A sysfs interface can be exported by individual drivers if they want,
+ * but more typically is configured entirely from userspace.
+ */
+extern int gpio_export(unsigned gpio, bool direction_may_change);
+extern void gpio_unexport(unsigned gpio);
+
+#endif	/* CONFIG_GPIO_SYSFS */
+
+#else	/* !CONFIG_HAVE_GPIO_LIB */
 
 static inline int gpio_is_valid(int number)
 {
@@ -137,6 +152,20 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
 	gpio_set_value(gpio, value);
 }
 
-#endif
+#endif /* !CONFIG_HAVE_GPIO_LIB */
+
+#ifndef CONFIG_GPIO_SYSFS
+
+/* sysfs support is only available with gpiolib, where it's optional */
+
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	return -ENOSYS;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+}
+#endif	/* CONFIG_GPIO_SYSFS */
 
 #endif /* _ASM_GENERIC_GPIO_H */
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 98be6c5762b9..730a20b83576 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -79,6 +79,19 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
 	WARN_ON(1);
 }
 
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	/* GPIO can never have been requested or set as {in,out}put */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+	/* GPIO can never have been exported */
+	WARN_ON(1);
+}
+
 static inline int gpio_to_irq(unsigned gpio)
 {
 	/* GPIO can never have been requested or set as input */
-- 
cgit v1.2.3


From 8f1cc3b10e6ee0c5c7c8ed27f8771c4f252b4862 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Fri, 25 Jul 2008 01:46:09 -0700
Subject: gpio: mcp23s08 handles multiple chips per chipselect

Teach the mcp23s08 driver about a curious feature of these chips: up to
four of them can share the same chipselect, with the SPI signals wired in
parallel, by matching two bits in the first protocol byte against two
address lines on the chip.

This is handled by three software changes:

  * Platform data now holds an array of per-chip structs, not
    just one chip's address and pullup configuration.

  * Probe() and remove() now use another level of structure,
    wrapping an instance of the original structure for each
    mcp23s08 chip sharing that chipselect.

  * The HAEN bit is set, so that the hardware address bits can no
    longer be ignored (boot firmware may not have enabled them).

The "one struct per chip" preserves the guts of the current code,
but platform_data will need minor changes.

    OLD:
	/* incorrect "slave" ID may not have mattered */
	.slave = 3,
	.pullups = BIT(3) | BIT(1) | BIT(0),

    NEW:
	/* slave address _must_ match chip's wiring */
	.chip[3] = {
		.is_present = true,
		.pullups = BIT(3) | BIT(1) | BIT(0),
	},

There's no change in how things _behave_ for spi_device nodes with a
single mcp23s08 chip.  New multi-chip configurations assign GPIOs in
sequence, without holes.  The spi_device just resembles a bigger
controller, but internally it has multiple gpio_chip instances.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/mcp23s08.c      | 133 +++++++++++++++++++++++++++++++++----------
 include/linux/spi/mcp23s08.h |  25 +++++---
 2 files changed, 118 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index 7efd7d3a81f9..8a1b405fefda 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -40,15 +40,26 @@ struct mcp23s08 {
 	struct spi_device	*spi;
 	u8			addr;
 
+	u8			cache[11];
 	/* lock protects the cached values */
 	struct mutex		lock;
-	u8			cache[11];
 
 	struct gpio_chip	chip;
 
 	struct work_struct	work;
 };
 
+/* A given spi_device can represent up to four mcp23s08 chips
+ * sharing the same chipselect but using different addresses
+ * (e.g. chips #0 and #3 might be populated, but not #1 or $2).
+ * Driver data holds all the per-chip data.
+ */
+struct mcp23s08_driver_data {
+	unsigned		ngpio;
+	struct mcp23s08		*mcp[4];
+	struct mcp23s08		chip[];
+};
+
 static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg)
 {
 	u8	tx[2], rx[1];
@@ -208,25 +219,18 @@ done:
 
 /*----------------------------------------------------------------------*/
 
-static int mcp23s08_probe(struct spi_device *spi)
+static int mcp23s08_probe_one(struct spi_device *spi, unsigned addr,
+		unsigned base, unsigned pullups)
 {
-	struct mcp23s08			*mcp;
-	struct mcp23s08_platform_data	*pdata;
+	struct mcp23s08_driver_data	*data = spi_get_drvdata(spi);
+	struct mcp23s08			*mcp = data->mcp[addr];
 	int				status;
 	int				do_update = 0;
 
-	pdata = spi->dev.platform_data;
-	if (!pdata || pdata->slave > 3 || !pdata->base)
-		return -ENODEV;
-
-	mcp = kzalloc(sizeof *mcp, GFP_KERNEL);
-	if (!mcp)
-		return -ENOMEM;
-
 	mutex_init(&mcp->lock);
 
 	mcp->spi = spi;
-	mcp->addr = 0x40 | (pdata->slave << 1);
+	mcp->addr = 0x40 | (addr << 1);
 
 	mcp->chip.label = "mcp23s08",
 
@@ -236,27 +240,28 @@ static int mcp23s08_probe(struct spi_device *spi)
 	mcp->chip.set = mcp23s08_set;
 	mcp->chip.dbg_show = mcp23s08_dbg_show;
 
-	mcp->chip.base = pdata->base;
+	mcp->chip.base = base;
 	mcp->chip.ngpio = 8;
 	mcp->chip.can_sleep = 1;
 	mcp->chip.dev = &spi->dev;
 	mcp->chip.owner = THIS_MODULE;
 
-	spi_set_drvdata(spi, mcp);
-
-	/* verify MCP_IOCON.SEQOP = 0, so sequential reads work */
+	/* verify MCP_IOCON.SEQOP = 0, so sequential reads work,
+	 * and MCP_IOCON.HAEN = 1, so we work with all chips.
+	 */
 	status = mcp23s08_read(mcp, MCP_IOCON);
 	if (status < 0)
 		goto fail;
-	if (status & IOCON_SEQOP) {
+	if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN)) {
 		status &= ~IOCON_SEQOP;
+		status |= IOCON_HAEN;
 		status = mcp23s08_write(mcp, MCP_IOCON, (u8) status);
 		if (status < 0)
 			goto fail;
 	}
 
 	/* configure ~100K pullups */
-	status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups);
+	status = mcp23s08_write(mcp, MCP_GPPU, pullups);
 	if (status < 0)
 		goto fail;
 
@@ -283,11 +288,58 @@ static int mcp23s08_probe(struct spi_device *spi)
 		tx[1] = MCP_IPOL;
 		memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2);
 		status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0);
-
-		/* FIXME check status... */
+		if (status < 0)
+			goto fail;
 	}
 
 	status = gpiochip_add(&mcp->chip);
+fail:
+	if (status < 0)
+		dev_dbg(&spi->dev, "can't setup chip %d, --> %d\n",
+				addr, status);
+	return status;
+}
+
+static int mcp23s08_probe(struct spi_device *spi)
+{
+	struct mcp23s08_platform_data	*pdata;
+	unsigned			addr;
+	unsigned			chips = 0;
+	struct mcp23s08_driver_data	*data;
+	int				status;
+	unsigned			base;
+
+	pdata = spi->dev.platform_data;
+	if (!pdata || !gpio_is_valid(pdata->base))
+		return -ENODEV;
+
+	for (addr = 0; addr < 4; addr++) {
+		if (!pdata->chip[addr].is_present)
+			continue;
+		chips++;
+	}
+	if (!chips)
+		return -ENODEV;
+
+	data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08),
+			GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	spi_set_drvdata(spi, data);
+
+	base = pdata->base;
+	for (addr = 0; addr < 4; addr++) {
+		if (!pdata->chip[addr].is_present)
+			continue;
+		chips--;
+		data->mcp[addr] = &data->chip[chips];
+		status = mcp23s08_probe_one(spi, addr, base,
+				pdata->chip[addr].pullups);
+		if (status < 0)
+			goto fail;
+		base += 8;
+	}
+	data->ngpio = base - pdata->base;
 
 	/* NOTE:  these chips have a relatively sane IRQ framework, with
 	 * per-signal masking and level/edge triggering.  It's not yet
@@ -295,8 +347,9 @@ static int mcp23s08_probe(struct spi_device *spi)
 	 */
 
 	if (pdata->setup) {
-		status = pdata->setup(spi, mcp->chip.base,
-				mcp->chip.ngpio, pdata->context);
+		status = pdata->setup(spi,
+				pdata->base, data->ngpio,
+				pdata->context);
 		if (status < 0)
 			dev_dbg(&spi->dev, "setup --> %d\n", status);
 	}
@@ -304,19 +357,29 @@ static int mcp23s08_probe(struct spi_device *spi)
 	return 0;
 
 fail:
-	kfree(mcp);
+	for (addr = 0; addr < 4; addr++) {
+		int tmp;
+
+		if (!data->mcp[addr])
+			continue;
+		tmp = gpiochip_remove(&data->mcp[addr]->chip);
+		if (tmp < 0)
+			dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+	}
+	kfree(data);
 	return status;
 }
 
 static int mcp23s08_remove(struct spi_device *spi)
 {
-	struct mcp23s08			*mcp = spi_get_drvdata(spi);
+	struct mcp23s08_driver_data	*data = spi_get_drvdata(spi);
 	struct mcp23s08_platform_data	*pdata = spi->dev.platform_data;
+	unsigned			addr;
 	int				status = 0;
 
 	if (pdata->teardown) {
 		status = pdata->teardown(spi,
-				mcp->chip.base, mcp->chip.ngpio,
+				pdata->base, data->ngpio,
 				pdata->context);
 		if (status < 0) {
 			dev_err(&spi->dev, "%s --> %d\n", "teardown", status);
@@ -324,11 +387,20 @@ static int mcp23s08_remove(struct spi_device *spi)
 		}
 	}
 
-	status = gpiochip_remove(&mcp->chip);
+	for (addr = 0; addr < 4; addr++) {
+		int tmp;
+
+		if (!data->mcp[addr])
+			continue;
+
+		tmp = gpiochip_remove(&data->mcp[addr]->chip);
+		if (tmp < 0) {
+			dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+			status = tmp;
+		}
+	}
 	if (status == 0)
-		kfree(mcp);
-	else
-		dev_err(&spi->dev, "%s --> %d\n", "remove", status);
+		kfree(data);
 	return status;
 }
 
@@ -356,4 +428,3 @@ static void __exit mcp23s08_exit(void)
 module_exit(mcp23s08_exit);
 
 MODULE_LICENSE("GPL");
-
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 835ddf47d45c..22ef107d7704 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -1,18 +1,25 @@
 
-/* FIXME driver should be able to handle all four slaves that
- * can be hooked up to each chipselect, as well as IRQs...
- */
+/* FIXME driver should be able to handle IRQs...  */
+
+struct mcp23s08_chip_info {
+	bool	is_present;		/* true iff populated */
+	u8	pullups;		/* BIT(x) means enable pullup x */
+};
 
 struct mcp23s08_platform_data {
-	/* four slaves can share one SPI chipselect */
-	u8		slave;
+	/* Four slaves (numbered 0..3) can share one SPI chipselect, and
+	 * will provide 8..32 GPIOs using 1..4 gpio_chip instances.
+	 */
+	struct mcp23s08_chip_info	chip[4];
 
-	/* number assigned to the first GPIO */
+	/* "base" is the number of the first GPIO.  Dynamic assignment is
+	 * not currently supported, and even if there are gaps in chip
+	 * addressing the GPIO numbers are sequential .. so for example
+	 * if only slaves 0 and 3 are present, their GPIOs range from
+	 * base to base+15.
+	 */
 	unsigned	base;
 
-	/* pins with pullups */
-	u8		pullups;
-
 	void		*context;	/* param to setup/teardown */
 
 	int		(*setup)(struct spi_device *spi,
-- 
cgit v1.2.3


From bbcd6d543de335bf81e96477f46a60a8bf51039c Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Fri, 25 Jul 2008 01:46:14 -0700
Subject: gpio: max732x driver

This adds a driver supporting a family of I2C port expanders from Maxim,
which includes the MAX7319 and MAX7320-7327 chips.

[dbrownell@users.sourceforge.net: minor fixes]
Signed-off-by: Jack Ren <jack.ren@marvell.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |  19 +++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/max732x.c      | 385 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/max732x.h |  19 +++
 4 files changed, 424 insertions(+)
 create mode 100644 drivers/gpio/max732x.c
 create mode 100644 include/linux/i2c/max732x.h

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 5a355f829167..dbd42d6c93a7 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -67,6 +67,25 @@ config GPIO_SYSFS
 
 comment "I2C GPIO expanders:"
 
+config GPIO_MAX732X
+	tristate "MAX7319, MAX7320-7327 I2C Port Expanders"
+	depends on I2C
+	help
+	  Say yes here to support the MAX7319, MAX7320-7327 series of I2C
+	  Port Expanders. Each IO port on these chips has a fixed role of
+	  Input (designated by 'I'), Push-Pull Output ('O'), or Open-Drain
+	  Input and Output (designed by 'P'). The combinations are listed
+	  below:
+
+	  8 bits:	max7319 (8I), max7320 (8O), max7321 (8P),
+		  	max7322 (4I4O), max7323 (4P4O)
+
+	  16 bits:	max7324 (8I8O), max7325 (8P8O),
+		  	max7326 (4I12O), max7327 (4P12O)
+
+	  Board setup code must specify the model to use, and the start
+	  number for these GPIOs.
+
 config GPIO_PCA953X
 	tristate "PCA953x, PCA955x, and MAX7310 I/O ports"
 	depends on I2C
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 8c45948d1fe7..01b4bbde1956 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,6 +5,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
+obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
diff --git a/drivers/gpio/max732x.c b/drivers/gpio/max732x.c
new file mode 100644
index 000000000000..b51c8135ca28
--- /dev/null
+++ b/drivers/gpio/max732x.c
@@ -0,0 +1,385 @@
+/*
+ *  max732x.c - I2C Port Expander with 8/16 I/O
+ *
+ *  Copyright (C) 2007 Marvell International Ltd.
+ *  Copyright (C) 2008 Jack Ren <jack.ren@marvell.com>
+ *  Copyright (C) 2008 Eric Miao <eric.miao@marvell.com>
+ *
+ *  Derived from drivers/gpio/pca953x.c
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/max732x.h>
+
+
+/*
+ * Each port of MAX732x (including MAX7319) falls into one of the
+ * following three types:
+ *
+ *   - Push Pull Output
+ *   - Input
+ *   - Open Drain I/O
+ *
+ * designated by 'O', 'I' and 'P' individually according to MAXIM's
+ * datasheets.
+ *
+ * There are two groups of I/O ports, each group usually includes
+ * up to 8 I/O ports, and is accessed by a specific I2C address:
+ *
+ *   - Group A : by I2C address 0b'110xxxx
+ *   - Group B : by I2C address 0b'101xxxx
+ *
+ * where 'xxxx' is decided by the connections of pin AD2/AD0.  The
+ * address used also affects the initial state of output signals.
+ *
+ * Within each group of ports, there are five known combinations of
+ * I/O ports: 4I4O, 4P4O, 8I, 8P, 8O, see the definitions below for
+ * the detailed organization of these ports.
+ *
+ * GPIO numbers start from 'gpio_base + 0' to 'gpio_base + 8/16',
+ * and GPIOs from GROUP_A are numbered before those from GROUP_B
+ * (if there are two groups).
+ *
+ * NOTE: MAX7328/MAX7329 are drop-in replacements for PCF8574/a, so
+ * they are not supported by this driver.
+ */
+
+#define PORT_NONE	0x0	/* '/' No Port */
+#define PORT_OUTPUT	0x1	/* 'O' Push-Pull, Output Only */
+#define PORT_INPUT	0x2	/* 'I' Input Only */
+#define PORT_OPENDRAIN	0x3	/* 'P' Open-Drain, I/O */
+
+#define IO_4I4O		0x5AA5	/* O7 O6 I5 I4 I3 I2 O1 O0 */
+#define IO_4P4O		0x5FF5	/* O7 O6 P5 P4 P3 P2 O1 O0 */
+#define IO_8I		0xAAAA	/* I7 I6 I5 I4 I3 I2 I1 I0 */
+#define IO_8P		0xFFFF	/* P7 P6 P5 P4 P3 P2 P1 P0 */
+#define IO_8O		0x5555	/* O7 O6 O5 O4 O3 O2 O1 O0 */
+
+#define GROUP_A(x)	((x) & 0xffff)	/* I2C Addr: 0b'110xxxx */
+#define GROUP_B(x)	((x) << 16)	/* I2C Addr: 0b'101xxxx */
+
+static const struct i2c_device_id max732x_id[] = {
+	{ "max7319", GROUP_A(IO_8I) },
+	{ "max7320", GROUP_B(IO_8O) },
+	{ "max7321", GROUP_A(IO_8P) },
+	{ "max7322", GROUP_A(IO_4I4O) },
+	{ "max7323", GROUP_A(IO_4P4O) },
+	{ "max7324", GROUP_A(IO_8I) | GROUP_B(IO_8O) },
+	{ "max7325", GROUP_A(IO_8P) | GROUP_B(IO_8O) },
+	{ "max7326", GROUP_A(IO_4I4O) | GROUP_B(IO_8O) },
+	{ "max7327", GROUP_A(IO_4P4O) | GROUP_B(IO_8O) },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, max732x_id);
+
+struct max732x_chip {
+	struct gpio_chip gpio_chip;
+
+	struct i2c_client *client;	/* "main" client */
+	struct i2c_client *client_dummy;
+	struct i2c_client *client_group_a;
+	struct i2c_client *client_group_b;
+
+	unsigned int	mask_group_a;
+	unsigned int	dir_input;
+	unsigned int	dir_output;
+
+	struct mutex	lock;
+	uint8_t		reg_out[2];
+};
+
+static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val)
+{
+	struct i2c_client *client;
+	int ret;
+
+	client = group_a ? chip->client_group_a : chip->client_group_b;
+	ret = i2c_smbus_write_byte(client, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed writing\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int max732x_read(struct max732x_chip *chip, int group_a, uint8_t *val)
+{
+	struct i2c_client *client;
+	int ret;
+
+	client = group_a ? chip->client_group_a : chip->client_group_b;
+	ret = i2c_smbus_read_byte(client);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed reading\n");
+		return ret;
+	}
+
+	*val = (uint8_t)ret;
+	return 0;
+}
+
+static inline int is_group_a(struct max732x_chip *chip, unsigned off)
+{
+	return (1u << off) & chip->mask_group_a;
+}
+
+static int max732x_gpio_get_value(struct gpio_chip *gc, unsigned off)
+{
+	struct max732x_chip *chip;
+	uint8_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	ret = max732x_read(chip, is_group_a(chip, off), &reg_val);
+	if (ret < 0)
+		return 0;
+
+	return reg_val & (1u << (off & 0x7));
+}
+
+static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct max732x_chip *chip;
+	uint8_t reg_out, mask = 1u << (off & 0x7);
+	int ret;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	mutex_lock(&chip->lock);
+
+	reg_out = (off > 7) ? chip->reg_out[1] : chip->reg_out[0];
+	reg_out = (val) ? reg_out | mask : reg_out & ~mask;
+
+	ret = max732x_write(chip, is_group_a(chip, off), reg_out);
+	if (ret < 0)
+		goto out;
+
+	/* update the shadow register then */
+	if (off > 7)
+		chip->reg_out[1] = reg_out;
+	else
+		chip->reg_out[0] = reg_out;
+out:
+	mutex_unlock(&chip->lock);
+}
+
+static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
+{
+	struct max732x_chip *chip;
+	unsigned int mask = 1u << off;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	if ((mask & chip->dir_input) == 0) {
+		dev_dbg(&chip->client->dev, "%s port %d is output only\n",
+			chip->client->name, off);
+		return -EACCES;
+	}
+
+	return 0;
+}
+
+static int max732x_gpio_direction_output(struct gpio_chip *gc,
+		unsigned off, int val)
+{
+	struct max732x_chip *chip;
+	unsigned int mask = 1u << off;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	if ((mask & chip->dir_output) == 0) {
+		dev_dbg(&chip->client->dev, "%s port %d is input only\n",
+			chip->client->name, off);
+		return -EACCES;
+	}
+
+	max732x_gpio_set_value(gc, off, val);
+	return 0;
+}
+
+static int __devinit max732x_setup_gpio(struct max732x_chip *chip,
+					const struct i2c_device_id *id,
+					unsigned gpio_start)
+{
+	struct gpio_chip *gc = &chip->gpio_chip;
+	uint32_t id_data = id->driver_data;
+	int i, port = 0;
+
+	for (i = 0; i < 16; i++, id_data >>= 2) {
+		unsigned int mask = 1 << port;
+
+		switch (id_data & 0x3) {
+		case PORT_OUTPUT:
+			chip->dir_output |= mask;
+			break;
+		case PORT_INPUT:
+			chip->dir_input |= mask;
+			break;
+		case PORT_OPENDRAIN:
+			chip->dir_output |= mask;
+			chip->dir_input |= mask;
+			break;
+		default:
+			continue;
+		}
+
+		if (i < 8)
+			chip->mask_group_a |= mask;
+		port++;
+	}
+
+	if (chip->dir_input)
+		gc->direction_input = max732x_gpio_direction_input;
+	if (chip->dir_output) {
+		gc->direction_output = max732x_gpio_direction_output;
+		gc->set = max732x_gpio_set_value;
+	}
+	gc->get = max732x_gpio_get_value;
+	gc->can_sleep = 1;
+
+	gc->base = gpio_start;
+	gc->ngpio = port;
+	gc->label = chip->client->name;
+	gc->owner = THIS_MODULE;
+
+	return port;
+}
+
+static int __devinit max732x_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct max732x_platform_data *pdata;
+	struct max732x_chip *chip;
+	struct i2c_client *c;
+	uint16_t addr_a, addr_b;
+	int ret, nr_port;
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL)
+		return -ENODEV;
+
+	chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+	chip->client = client;
+
+	nr_port = max732x_setup_gpio(chip, id, pdata->gpio_base);
+
+	addr_a = (client->addr & 0x0f) | 0x60;
+	addr_b = (client->addr & 0x0f) | 0x50;
+
+	switch (client->addr & 0x70) {
+	case 0x60:
+		chip->client_group_a = client;
+		if (nr_port > 7) {
+			c = i2c_new_dummy(client->adapter, addr_b);
+			chip->client_group_b = chip->client_dummy = c;
+		}
+		break;
+	case 0x50:
+		chip->client_group_b = client;
+		if (nr_port > 7) {
+			c = i2c_new_dummy(client->adapter, addr_a);
+			chip->client_group_a = chip->client_dummy = c;
+		}
+		break;
+	default:
+		dev_err(&client->dev, "invalid I2C address specified %02x\n",
+				client->addr);
+		ret = -EINVAL;
+		goto out_failed;
+	}
+
+	mutex_init(&chip->lock);
+
+	max732x_read(chip, is_group_a(chip, 0), &chip->reg_out[0]);
+	if (nr_port > 7)
+		max732x_read(chip, is_group_a(chip, 8), &chip->reg_out[1]);
+
+	ret = gpiochip_add(&chip->gpio_chip);
+	if (ret)
+		goto out_failed;
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, chip);
+	return 0;
+
+out_failed:
+	kfree(chip);
+	return ret;
+}
+
+static int __devexit max732x_remove(struct i2c_client *client)
+{
+	struct max732x_platform_data *pdata = client->dev.platform_data;
+	struct max732x_chip *chip = i2c_get_clientdata(client);
+	int ret;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "%s failed, %d\n",
+					"teardown", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&chip->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "%s failed, %d\n",
+				"gpiochip_remove()", ret);
+		return ret;
+	}
+
+	/* unregister any dummy i2c_client */
+	if (chip->client_dummy)
+		i2c_unregister_device(chip->client_dummy);
+
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver max732x_driver = {
+	.driver = {
+		.name	= "max732x",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= max732x_probe,
+	.remove		= __devexit_p(max732x_remove),
+	.id_table	= max732x_id,
+};
+
+static int __init max732x_init(void)
+{
+	return i2c_add_driver(&max732x_driver);
+}
+module_init(max732x_init);
+
+static void __exit max732x_exit(void)
+{
+	i2c_del_driver(&max732x_driver);
+}
+module_exit(max732x_exit);
+
+MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
+MODULE_DESCRIPTION("GPIO expander driver for MAX732X");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/max732x.h b/include/linux/i2c/max732x.h
new file mode 100644
index 000000000000..e10336631c62
--- /dev/null
+++ b/include/linux/i2c/max732x.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_I2C_MAX732X_H
+#define __LINUX_I2C_MAX732X_H
+
+/* platform data for the MAX732x 8/16-bit I/O expander driver */
+
+struct max732x_platform_data {
+	/* number of the first GPIO */
+	unsigned	gpio_base;
+
+	void		*context;	/* param to setup/teardown */
+
+	int		(*setup)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+	int		(*teardown)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+};
+#endif /* __LINUX_I2C_MAX732X_H */
-- 
cgit v1.2.3


From 50c33a84db4aa5082e3af8d873b22344ae2ebea8 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Fri, 25 Jul 2008 01:46:16 -0700
Subject: ext2: fix typo in Hurd part of include/linux/ext2_fs.h

Fix typo in Hurd part of include/linux/ext2_fs.h

The ';' here is redundant or can even pose problem.  This is actually not
used by the Linux kernel, but it is exposed in GNU/Hurd.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext2_fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 84cec2aa9f1e..2efe7b863cff 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -284,8 +284,8 @@ struct ext2_inode {
 
 #ifdef	__hurd__
 #define i_translator	osd1.hurd1.h_i_translator
-#define i_frag		osd2.hurd2.h_i_frag;
-#define i_fsize		osd2.hurd2.h_i_fsize;
+#define i_frag		osd2.hurd2.h_i_frag
+#define i_fsize		osd2.hurd2.h_i_fsize
 #define i_uid_high	osd2.hurd2.h_i_uid_high
 #define i_gid_high	osd2.hurd2.h_i_gid_high
 #define i_author	osd2.hurd2.h_i_author
-- 
cgit v1.2.3


From ae76dd9a6b5bbe5315fb7028e03f68f75b8538f3 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 25 Jul 2008 01:46:23 -0700
Subject: ext3: handle corrupted orphan list at mount

If the orphan node list includes valid, untruncatable nodes with nlink > 0
the ext3_orphan_cleanup loop which attempts to delete them will not do so,
causing it to loop forever. Fix by checking for such nodes in the
ext3_orphan_get function.

This patch fixes the second case (image hdb.20000009.softlockup.gz)
reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: printk warning fix]
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/ialloc.c        |  9 +++++++++
 fs/ext3/inode.c         | 20 ++++++++++++++------
 include/linux/ext3_fs.h |  1 +
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 77126821b2e9..47b678d73e7a 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 	if (IS_ERR(inode))
 		goto iget_failed;
 
+	/*
+	 * If the orphans has i_nlinks > 0 then it should be able to be
+	 * truncated, otherwise it won't be removed from the orphan list
+	 * during processing and an infinite loop will result.
+	 */
+	if (inode->i_nlink && !ext3_can_truncate(inode))
+		goto bad_orphan;
+
 	if (NEXT_ORPHAN(inode) > max_ino)
 		goto bad_orphan;
 	brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
 		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
 		       NEXT_ORPHAN(inode));
 		printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+		printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
 		/* Avoid freeing blocks if we got a bad deleted inode */
 		if (inode->i_nlink == 0)
 			inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf3ce40..74b432fa166b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2253,6 +2253,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 	}
 }
 
+int ext3_can_truncate(struct inode *inode)
+{
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return 0;
+	if (S_ISREG(inode->i_mode))
+		return 1;
+	if (S_ISDIR(inode->i_mode))
+		return 1;
+	if (S_ISLNK(inode->i_mode))
+		return !ext3_inode_is_fast_symlink(inode);
+	return 0;
+}
+
 /*
  * ext3_truncate()
  *
@@ -2297,12 +2310,7 @@ void ext3_truncate(struct inode *inode)
 	unsigned blocksize = inode->i_sb->s_blocksize;
 	struct page *page;
 
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	    S_ISLNK(inode->i_mode)))
-		return;
-	if (ext3_inode_is_fast_symlink(inode))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+	if (!ext3_can_truncate(inode))
 		return;
 
 	/*
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 36c540396377..80171ee89a22 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *);
 extern void ext3_dirty_inode(struct inode *);
 extern int ext3_change_inode_journal_flag(struct inode *, int);
 extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern int ext3_can_truncate(struct inode *inode);
 extern void ext3_truncate (struct inode *);
 extern void ext3_set_inode_flags(struct inode *);
 extern void ext3_get_inode_flags(struct ext3_inode_info *);
-- 
cgit v1.2.3


From de0ca06a99c33df8333955642843331ab6b6e7ff Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:34 -0700
Subject: coda: remove CODA_FS_OLD_API

While fixing CONFIG_ leakages to the userspace kernel headers I ran into
CODA_FS_OLD_API.

After five years, are there still people using the old API left?
Especially considering that you have to choose at compile time which API
to support in the kernel (and distributions tend to offer the new API for
some time).

Jan: "The old API can definitely go.  Around the time the new
      interface went in there were some non-Coda userspace file system
      implementations that took a while longer to convert to the new API,
      but by now they all switched to the new interface or in some cases
      to a FUSE-based solution."

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig           | 14 --------------
 fs/coda/coda_linux.c |  6 ++----
 fs/coda/psdev.c      |  4 ----
 fs/coda/upcall.c     | 15 +--------------
 include/linux/coda.h | 43 -------------------------------------------
 5 files changed, 3 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a2ff95..ed563b9e352a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2093,20 +2093,6 @@ config CODA_FS
 	  To compile the coda client support as a module, choose M here: the
 	  module will be called coda.
 
-config CODA_FS_OLD_API
-	bool "Use 96-bit Coda file identifiers"
-	depends on CODA_FS
-	help
-	  A new kernel-userspace API had to be introduced for Coda v6.0
-	  to support larger 128-bit file identifiers as needed by the
-	  new realms implementation.
-
-	  However this new API is not backward compatible with older
-	  clients. If you really need to run the old Coda userspace
-	  cache manager then say Y.
-
-	  For most cases you probably want to say N.
-
 config AFS_FS
 	tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c854890f94..bf4a3fd3c8e3 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
 char * coda_f2s(struct CodaFid *f)
 {
 	static char s[60];
-#ifdef CONFIG_CODA_FS_OLD_API
- 	sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
-#else
+
  	sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
-#endif
+
 	return s;
 }
 
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 40c36f7352a6..0d9b80ec689c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -378,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
 MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
 MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
 MODULE_LICENSE("GPL");
-#ifdef CONFIG_CODA_FS_OLD_API
-MODULE_VERSION("5.3.21");
-#else
 MODULE_VERSION("6.6");
-#endif
 
 static int __init init_coda(void)
 {
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531094dd..ce432bca95d1 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
         inp->ih.opcode = opcode;
 	inp->ih.pid = current->pid;
 	inp->ih.pgid = task_pgrp_nr(current);
-#ifdef CONFIG_CODA_FS_OLD_API
-	memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
-	inp->ih.cred.cr_fsuid = current->fsuid;
-#else
 	inp->ih.uid = current->fsuid;
-#endif
+
 	return (void*)inp;
 }
 
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
 	union inputArgs *inp;
 	union outputArgs *outp;
 	int insize, outsize, error;
-#ifdef CONFIG_CODA_FS_OLD_API
-	struct coda_cred cred = { 0, };
-	cred.cr_fsuid = uid;
-#endif
 	
 	insize = SIZE(release);
 	UPARG(CODA_CLOSE);
 	
-#ifdef CONFIG_CODA_FS_OLD_API
-	memcpy(&(inp->ih.cred), &cred, sizeof(cred));
-#else
 	inp->ih.uid = uid;
-#endif
-	
         inp->coda_close.VFid = *fid;
         inp->coda_close.flags = flags;
 
diff --git a/include/linux/coda.h b/include/linux/coda.h
index b5cf0780c51a..96c87693800b 100644
--- a/include/linux/coda.h
+++ b/include/linux/coda.h
@@ -199,28 +199,6 @@ typedef u_int32_t vuid_t;
 typedef u_int32_t vgid_t;
 #endif /*_VUID_T_ */
 
-#ifdef CONFIG_CODA_FS_OLD_API
-struct CodaFid {
-	u_int32_t opaque[3];
-};
-
-static __inline__ ino_t  coda_f2i(struct CodaFid *fid)
-{
-	if ( ! fid ) 
-		return 0; 
-	if (fid->opaque[1] == 0xfffffffe || fid->opaque[1] == 0xffffffff)
-		return ((fid->opaque[0] << 20) | (fid->opaque[2] & 0xfffff));
-	else
-		return (fid->opaque[2] + (fid->opaque[1]<<10) + (fid->opaque[0]<<20));
-}
-
-struct coda_cred {
-    vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
-    vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
-};
-
-#else /* not defined(CONFIG_CODA_FS_OLD_API) */
-
 struct CodaFid {
 	u_int32_t opaque[4];
 };
@@ -228,8 +206,6 @@ struct CodaFid {
 #define coda_f2i(fid)\
 	(fid ? (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]) : 0)
 
-#endif
-
 #ifndef _VENUS_VATTR_T_
 #define _VENUS_VATTR_T_
 /*
@@ -313,15 +289,7 @@ struct coda_statfs {
 
 #define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t)
 
-#if 0
-#define CODA_KERNEL_VERSION 0 /* don't care about kernel version number */
-#define CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */
-#endif
-#ifdef CONFIG_CODA_FS_OLD_API
-#define CODA_KERNEL_VERSION 2 /* venus_lookup got an extra parameter */
-#else
 #define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */
-#endif
 
 /*
  *        Venus <-> Coda  RPC arguments
@@ -329,16 +297,9 @@ struct coda_statfs {
 struct coda_in_hdr {
     u_int32_t opcode;
     u_int32_t unique;	    /* Keep multiple outstanding msgs distinct */
-#ifdef CONFIG_CODA_FS_OLD_API
-    u_int16_t pid;	    /* Common to all */
-    u_int16_t pgid;	    /* Common to all */
-    u_int16_t sid;          /* Common to all */
-    struct coda_cred cred;  /* Common to all */
-#else
     pid_t pid;
     pid_t pgid;
     vuid_t uid;
-#endif
 };
 
 /* Really important that opcode and unique are 1st two fields! */
@@ -613,11 +574,7 @@ struct coda_vget_out {
 /* CODA_PURGEUSER is a venus->kernel call */
 struct coda_purgeuser_out {
     struct coda_out_hdr oh;
-#ifdef CONFIG_CODA_FS_OLD_API
-    struct coda_cred cred;
-#else
     vuid_t uid;
-#endif
 };
 
 /* coda_zapfile: */
-- 
cgit v1.2.3


From f68215c4640a38d66429014e524a627bf572d26a Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Jul 2008 01:46:38 -0700
Subject: reiserfs: convert j_lock to mutex

j_lock is a semaphore but uses it as if it were a mutex.  This patch converts
it to a mutex.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c          | 6 +++---
 include/linux/reiserfs_fs_sb.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2fa4743..0f7b1e807e60 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -558,13 +558,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
 static inline void lock_journal(struct super_block *p_s_sb)
 {
 	PROC_INFO_INC(p_s_sb, journal.lock_journal);
-	down(&SB_JOURNAL(p_s_sb)->j_lock);
+	mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 
 /* unlock the current transaction */
 static inline void unlock_journal(struct super_block *p_s_sb)
 {
-	up(&SB_JOURNAL(p_s_sb)->j_lock);
+	mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 
 static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -2837,7 +2837,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	journal->j_last = NULL;
 	journal->j_first = NULL;
 	init_waitqueue_head(&(journal->j_join_wait));
-	sema_init(&journal->j_lock, 1);
+	mutex_init(&journal->j_mutex);
 	sema_init(&journal->j_flush_sem, 1);
 
 	journal->j_trans_id = 10;
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 336ee43ed7d8..49b639b88bac 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -193,7 +193,7 @@ struct reiserfs_journal {
 	struct buffer_head *j_header_bh;
 
 	time_t j_trans_start_time;	/* time this transaction started */
-	struct semaphore j_lock;
+	struct mutex j_mutex;
 	struct semaphore j_flush_sem;
 	wait_queue_head_t j_join_wait;	/* wait for current transaction to finish before starting new one */
 	atomic_t j_jlock;	/* lock for j_join_wait */
-- 
cgit v1.2.3


From afe70259076fff0446001eaa1a287f615241a357 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Jul 2008 01:46:39 -0700
Subject: reiserfs: convert j_flush_sem to mutex

j_flush_sem is a semaphore but uses it as if it were a mutex.  This patch
converts it to a mutex.

[akpm@linux-foundation.org: fix mutex_trylock retval treatment]
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c          | 14 +++++++-------
 include/linux/reiserfs_fs_sb.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 0f7b1e807e60..3cb4a562030e 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1411,8 +1411,8 @@ static int flush_journal_list(struct super_block *s,
 
 	/* if flushall == 0, the lock is already held */
 	if (flushall) {
-		down(&journal->j_flush_sem);
-	} else if (!down_trylock(&journal->j_flush_sem)) {
+		mutex_lock(&journal->j_flush_mutex);
+	} else if (mutex_trylock(&journal->j_flush_mutex)) {
 		BUG();
 	}
 
@@ -1642,7 +1642,7 @@ static int flush_journal_list(struct super_block *s,
 	jl->j_state = 0;
 	put_journal_list(s, jl);
 	if (flushall)
-		up(&journal->j_flush_sem);
+		mutex_unlock(&journal->j_flush_mutex);
 	put_fs_excl();
 	return err;
 }
@@ -1772,12 +1772,12 @@ static int kupdate_transactions(struct super_block *s,
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	chunk.nr = 0;
 
-	down(&journal->j_flush_sem);
+	mutex_lock(&journal->j_flush_mutex);
 	if (!journal_list_still_alive(s, orig_trans_id)) {
 		goto done;
 	}
 
-	/* we've got j_flush_sem held, nobody is going to delete any
+	/* we've got j_flush_mutex held, nobody is going to delete any
 	 * of these lists out from underneath us
 	 */
 	while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1812,7 @@ static int kupdate_transactions(struct super_block *s,
 	}
 
       done:
-	up(&journal->j_flush_sem);
+	mutex_unlock(&journal->j_flush_mutex);
 	return ret;
 }
 
@@ -2838,7 +2838,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	journal->j_first = NULL;
 	init_waitqueue_head(&(journal->j_join_wait));
 	mutex_init(&journal->j_mutex);
-	sema_init(&journal->j_flush_sem, 1);
+	mutex_init(&journal->j_flush_mutex);
 
 	journal->j_trans_id = 10;
 	journal->j_mount_id = 10;
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 49b639b88bac..c0751724ee64 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -194,7 +194,7 @@ struct reiserfs_journal {
 
 	time_t j_trans_start_time;	/* time this transaction started */
 	struct mutex j_mutex;
-	struct semaphore j_flush_sem;
+	struct mutex j_flush_mutex;
 	wait_queue_head_t j_join_wait;	/* wait for current transaction to finish before starting new one */
 	atomic_t j_jlock;	/* lock for j_join_wait */
 	int j_list_bitmap_index;	/* number of next list bitmap to use */
-- 
cgit v1.2.3


From 90415deac75a761a25239af6f56381546f8d2201 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Jul 2008 01:46:40 -0700
Subject: reiserfs: convert j_commit_lock to mutex

j_commit_lock is a semaphore but uses it as if it were a mutex.  This patch
converts it to a mutex.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c          | 22 ++++++++++------------
 include/linux/reiserfs_fs_sb.h |  2 +-
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3cb4a562030e..c8f60ee183b5 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
 **		        from within kupdate, it will ignore the immediate flag
 */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
 #include <linux/time.h>
 #include <linux/semaphore.h>
-
 #include <linux/vmalloc.h>
 #include <linux/reiserfs_fs.h>
-
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/fcntl.h>
@@ -54,6 +49,9 @@
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+
+#include <asm/system.h>
 
 /* gets a struct reiserfs_journal_list * from a list head */
 #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
 	}
 
 	/* make sure nobody is trying to flush this one at the same time */
-	down(&jl->j_commit_lock);
+	mutex_lock(&jl->j_commit_mutex);
 	if (!journal_list_still_alive(s, trans_id)) {
-		up(&jl->j_commit_lock);
+		mutex_unlock(&jl->j_commit_mutex);
 		goto put_jl;
 	}
 	BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
 		if (flushall) {
 			atomic_set(&(jl->j_older_commits_done), 1);
 		}
-		up(&jl->j_commit_lock);
+		mutex_unlock(&jl->j_commit_mutex);
 		goto put_jl;
 	}
 
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
 	if (flushall) {
 		atomic_set(&(jl->j_older_commits_done), 1);
 	}
-	up(&jl->j_commit_lock);
+	mutex_unlock(&jl->j_commit_mutex);
       put_jl:
 	put_journal_list(s, jl);
 
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
 	INIT_LIST_HEAD(&jl->j_working_list);
 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
 	INIT_LIST_HEAD(&jl->j_bh_list);
-	sema_init(&jl->j_commit_lock, 1);
+	mutex_init(&jl->j_commit_mutex);
 	SB_JOURNAL(s)->j_num_lists++;
 	get_journal_list(jl);
 	return jl;
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	 * the new transaction is fully setup, and we've already flushed the
 	 * ordered bh list
 	 */
-	down(&jl->j_commit_lock);
+	mutex_lock(&jl->j_commit_mutex);
 
 	/* save the transaction id in case we need to commit it later */
 	commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 		lock_kernel();
 	}
 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
-	up(&jl->j_commit_lock);
+	mutex_unlock(&jl->j_commit_mutex);
 
 	/* honor the flush wishes from the caller, simple commits can
 	 ** be done outside the journal lock, they are done below
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index c0751724ee64..315517e8bfa1 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -152,7 +152,7 @@ struct reiserfs_journal_list {
 	atomic_t j_nonzerolen;
 	atomic_t j_commit_left;
 	atomic_t j_older_commits_done;	/* all commits older than this on disk */
-	struct semaphore j_commit_lock;
+	struct mutex j_commit_mutex;
 	unsigned long j_trans_id;
 	time_t j_timestamp;
 	struct reiserfs_list_bitmap *j_list_bitmap;
-- 
cgit v1.2.3


From 4596c8aaf96e8634ca755c9f34b91420a39bebd4 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 25 Jul 2008 01:46:42 -0700
Subject: fat: fix VFAT_IOCTL_READDIR_xxx and cleanup for userland

"struct dirent" is a kernel type here, but is a **different type** in
userspace!  This means both the structure and the IOCTL number is wrong!

So, this adds new "struct __fat_dirent" to generate correct IOCTL number.
And kernel stuff moves to under __KERNEL__.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msdos_fs.h | 47 +++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 81cd36b735b0..5161394c7894 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -2,11 +2,11 @@
 #define _LINUX_MSDOS_FS_H
 
 #include <linux/magic.h>
+#include <asm/byteorder.h>
 
 /*
  * The MS-DOS filesystem constants/structures
  */
-#include <asm/byteorder.h>
 
 #define SECTOR_SIZE	512		/* sector size (bytes) */
 #define SECTOR_BITS	9		/* log2(SECTOR_SIZE) */
@@ -89,24 +89,22 @@
 #define IS_FSINFO(x)	(le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \
 			 && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2)
 
+struct __fat_dirent {
+	long		d_ino;
+	__kernel_off_t	d_off;
+	unsigned short	d_reclen;
+	char		d_name[256]; /* We must not include limits.h! */
+};
+
 /*
  * ioctl commands
  */
-#define VFAT_IOCTL_READDIR_BOTH		_IOR('r', 1, struct dirent [2])
-#define VFAT_IOCTL_READDIR_SHORT	_IOR('r', 2, struct dirent [2])
+#define VFAT_IOCTL_READDIR_BOTH		_IOR('r', 1, struct __fat_dirent[2])
+#define VFAT_IOCTL_READDIR_SHORT	_IOR('r', 2, struct __fat_dirent[2])
 /* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */
 #define FAT_IOCTL_GET_ATTRIBUTES	_IOR('r', 0x10, __u32)
 #define FAT_IOCTL_SET_ATTRIBUTES	_IOW('r', 0x11, __u32)
 
-/*
- * vfat shortname flags
- */
-#define VFAT_SFN_DISPLAY_LOWER	0x0001 /* convert to lowercase for display */
-#define VFAT_SFN_DISPLAY_WIN95	0x0002 /* emulate win95 rule for display */
-#define VFAT_SFN_DISPLAY_WINNT	0x0004 /* emulate winnt rule for display */
-#define VFAT_SFN_CREATE_WIN95	0x0100 /* emulate win95 rule for create */
-#define VFAT_SFN_CREATE_WINNT	0x0200 /* emulate winnt rule for create */
-
 struct fat_boot_sector {
 	__u8	ignored[3];	/* Boot strap short or near jump */
 	__u8	system_id[8];	/* Name - can be used to special case
@@ -168,14 +166,6 @@ struct msdos_dir_slot {
 	__u8    name11_12[4];	/* last 2 characters in name */
 };
 
-struct fat_slot_info {
-	loff_t i_pos;		/* on-disk position of directory entry */
-	loff_t slot_off;	/* offset for slot or de start */
-	int nr_slots;		/* number of slots + 1(de) in filename */
-	struct msdos_dir_entry *de;
-	struct buffer_head *bh;
-};
-
 #ifdef __KERNEL__
 
 #include <linux/buffer_head.h>
@@ -184,6 +174,15 @@ struct fat_slot_info {
 #include <linux/fs.h>
 #include <linux/mutex.h>
 
+/*
+ * vfat shortname flags
+ */
+#define VFAT_SFN_DISPLAY_LOWER	0x0001 /* convert to lowercase for display */
+#define VFAT_SFN_DISPLAY_WIN95	0x0002 /* emulate win95 rule for display */
+#define VFAT_SFN_DISPLAY_WINNT	0x0004 /* emulate winnt rule for display */
+#define VFAT_SFN_CREATE_WIN95	0x0100 /* emulate win95 rule for create */
+#define VFAT_SFN_CREATE_WINNT	0x0200 /* emulate winnt rule for create */
+
 struct fat_mount_options {
 	uid_t fs_uid;
 	gid_t fs_gid;
@@ -267,6 +266,14 @@ struct msdos_inode_info {
 	struct inode vfs_inode;
 };
 
+struct fat_slot_info {
+	loff_t i_pos;		/* on-disk position of directory entry */
+	loff_t slot_off;	/* offset for slot or de start */
+	int nr_slots;		/* number of slots + 1(de) in filename */
+	struct msdos_dir_entry *de;
+	struct buffer_head *bh;
+};
+
 static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
-- 
cgit v1.2.3


From 7557bc66be629d19a402e752673708bfbb8b5e86 Mon Sep 17 00:00:00 2001
From: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Date: Fri, 25 Jul 2008 01:46:45 -0700
Subject: msdos fs: remove unsettable atari option

It has been impossible to set the option 'atari' of the MSDOS filesystem
for several years.  Since nobody seems to have missed it, let's remove its
remains.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/msdos/namei.c         | 18 ++++++------------
 include/linux/msdos_fs.h |  1 -
 2 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f2956412a..e4ad6c6b753e 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
 
 /* Characters that are undesirable in an MS-DOS file name */
 static unsigned char bad_chars[] = "*?<>|\"";
-static unsigned char bad_if_strict_pc[] = "+=,; ";
-/* GEMDOS is less restrictive */
-static unsigned char bad_if_strict_atari[] = " ";
-
-#define bad_if_strict(opts) \
-	((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
+static unsigned char bad_if_strict[] = "+=,; ";
 
 /***** Formats an MS-DOS file name. Rejects invalid names. */
 static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
 			/* Get rid of dot - test for it elsewhere */
 			name++;
 			len--;
-		} else if (!opts->atari)
+		} else
 			return -EINVAL;
 	}
 	/*
-	 * disallow names that _really_ start with a dot for MS-DOS,
-	 * GEMDOS does not care
+	 * disallow names that _really_ start with a dot
 	 */
-	space = !opts->atari;
+	space = 1;
 	c = 0;
 	for (walk = res; len && walk - res < 8; walk++) {
 		c = *name++;
 		len--;
 		if (opts->name_check != 'r' && strchr(bad_chars, c))
 			return -EINVAL;
-		if (opts->name_check == 's' && strchr(bad_if_strict(opts), c))
+		if (opts->name_check == 's' && strchr(bad_if_strict, c))
 			return -EINVAL;
 		if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
 			return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
 			if (opts->name_check != 'r' && strchr(bad_chars, c))
 				return -EINVAL;
 			if (opts->name_check == 's' &&
-			    strchr(bad_if_strict(opts), c))
+			    strchr(bad_if_strict, c))
 				return -EINVAL;
 			if (c < ' ' || c == ':' || c == '\\')
 				return -EINVAL;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 5161394c7894..3346c9c8f17a 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -201,7 +201,6 @@ struct fat_mount_options {
 		 utf8:1,	  /* Use of UTF-8 character set (Default) */
 		 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
 		 numtail:1,       /* Does first alias have a numeric '~1' type tail? */
-		 atari:1,         /* Use Atari GEMDOS variation of MS-DOS fs */
 		 flush:1,	  /* write things quickly */
 		 nocase:1,	  /* Does this need case conversion? 0=need case conversion*/
 		 usefree:1;	  /* Use free_clusters for FAT32 */
-- 
cgit v1.2.3


From cf6ae8b50e0ee3f764392dadd1970e3f03c40773 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:46 -0700
Subject: remove the in-kernel struct dirent{,64}

The kernel struct dirent{,64} were different from the ones in
userspace.

Even worse, we exported the kernel ones to userspace.

But after the fat usages are fixed we can remove the conflicting
kernel versions.

Reviewed-by: H. Peter Anvin <hpa@kernel.org>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild   |  1 -
 include/linux/dirent.h | 20 --------------------
 2 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 71d70d1fbce2..a18008ce7aba 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -189,7 +189,6 @@ unifdef-y += connector.h
 unifdef-y += cuda.h
 unifdef-y += cyclades.h
 unifdef-y += dccp.h
-unifdef-y += dirent.h
 unifdef-y += dlm.h
 unifdef-y += dlm_plock.h
 unifdef-y += edd.h
diff --git a/include/linux/dirent.h b/include/linux/dirent.h
index 5d6023b87800..f072fb8d10a3 100644
--- a/include/linux/dirent.h
+++ b/include/linux/dirent.h
@@ -1,23 +1,6 @@
 #ifndef _LINUX_DIRENT_H
 #define _LINUX_DIRENT_H
 
-struct dirent {
-	long		d_ino;
-	__kernel_off_t	d_off;
-	unsigned short	d_reclen;
-	char		d_name[256]; /* We must not include limits.h! */
-};
-
-struct dirent64 {
-	__u64		d_ino;
-	__s64		d_off;
-	unsigned short	d_reclen;
-	unsigned char	d_type;
-	char		d_name[256];
-};
-
-#ifdef __KERNEL__
-
 struct linux_dirent64 {
 	u64		d_ino;
 	s64		d_off;
@@ -26,7 +9,4 @@ struct linux_dirent64 {
 	char		d_name[0];
 };
 
-#endif	/* __KERNEL__ */
-
-
 #endif
-- 
cgit v1.2.3


From e8938a62a85d1f487e02c3b01955b47c9598f6d2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:46 -0700
Subject: remove unused #include <linux/dirent.h>'s

Remove some unused #include <linux/dirent.h>'s.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/kernel/linux32.c | 1 -
 fs/compat_ioctl.c          | 1 -
 fs/smbfs/cache.c           | 1 -
 fs/smbfs/proc.c            | 1 -
 include/linux/nfsd/nfsd.h  | 1 -
 5 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index c266211ed653..2fefb14414b7 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -11,7 +11,6 @@
 #include <linux/file.h>
 #include <linux/smp_lock.h>
 #include <linux/highuid.h>
-#include <linux/dirent.h>
 #include <linux/resource.h>
 #include <linux/highmem.h>
 #include <linux/time.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 18e2c548161d..5235c67e7594 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/raid/md.h>
 #include <linux/kd.h>
-#include <linux/dirent.h>
 #include <linux/route.h>
 #include <linux/in6.h>
 #include <linux/ipv6_route.h>
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f0542a21..8c177eb7e344 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/dirent.h>
 #include <linux/smb_fs.h>
 #include <linux/pagemap.h>
 #include <linux/net.h>
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27b7f4b..ee536e8a649a 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/dcache.h>
-#include <linux/dirent.h>
 #include <linux/nls.h>
 #include <linux/smp_lock.h>
 #include <linux/net.h>
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index a2861d95ecc3..108f47e5fd95 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -12,7 +12,6 @@
 
 #include <linux/types.h>
 #include <linux/unistd.h>
-#include <linux/dirent.h>
 #include <linux/fs.h>
 #include <linux/posix_acl.h>
 #include <linux/mount.h>
-- 
cgit v1.2.3


From b271e067c896ad4082b15e96077675d08db40625 Mon Sep 17 00:00:00 2001
From: Joe Peterson <joe@skyrush.com>
Date: Fri, 25 Jul 2008 01:46:47 -0700
Subject: fatfs: add UTC timestamp option

Provide a new mount option ("tz=UTC") for DOS (vfat/msdos) filesystems,
allowing timestamps to be in coordinated universal time (UTC) rather than
local time in applications where doing this is advantageous.

In particular, portable devices that use fat/vfat (such as digital
cameras) can benefit from using UTC in their internal clocks, thus
avoiding daylight saving time errors and general time ambiguity issues.
The user of the device does not have to worry about changing the time when
moving from place or when daylight saving changes.

The new mount option, when set, disables the counter-adjustment that Linux
currently makes to FAT timestamp info in anticipation of the normal
userspace time zone correction.  When used in this new mode, all daylight
saving time and time zone handling is done in userspace as is normal for
many other filesystems (like ext3).  The default mode, which remains
unchanged, is still appropriate when mounting volumes written in Windows
(because of its use of local time).

I originally based this patch on one submitted last year by Paul Collins,
but I updated it to work with current source and changed variable/option
naming.  Ogawa Hirofumi (who maintains these filesystems) and I discussed
this patch at length on lkml, and he suggested using the option name in
the attached version of the patch.  Barry Bouwsma pointed out a good
addition to the patch as well.

Signed-off-by: Joe Peterson <joe@skyrush.com>
Signed-off-by: Paul Collins <paul@ondioline.org>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Barry Bouwsma <free_beer_for_all@yahoo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/dir.c             |  2 +-
 fs/fat/inode.c           | 27 ++++++++++++++++++++-------
 fs/fat/misc.c            | 10 ++++++----
 fs/msdos/namei.c         |  3 ++-
 fs/vfat/namei.c          |  2 +-
 include/linux/msdos_fs.h |  8 +++++---
 6 files changed, 35 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4c35477bc94c..cd4a0162e10d 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1101,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
 		goto error_free;
 	}
 
-	fat_date_unix2dos(ts->tv_sec, &time, &date);
+	fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
 
 	de = (struct msdos_dir_entry *)bhs[0]->b_data;
 	/* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 60deb5fd1188..23676f9d79ce 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 	inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
 			   & ~((loff_t)sbi->cluster_size - 1)) >> 9;
 	inode->i_mtime.tv_sec =
-		date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
+		date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
+			      sbi->options.tz_utc);
 	inode->i_mtime.tv_nsec = 0;
 	if (sbi->options.isvfat) {
 		int secs = de->ctime_cs / 100;
 		int csecs = de->ctime_cs % 100;
 		inode->i_ctime.tv_sec  =
 			date_dos2unix(le16_to_cpu(de->ctime),
-				      le16_to_cpu(de->cdate)) + secs;
+				      le16_to_cpu(de->cdate),
+				      sbi->options.tz_utc) + secs;
 		inode->i_ctime.tv_nsec = csecs * 10000000;
 		inode->i_atime.tv_sec =
-			date_dos2unix(0, le16_to_cpu(de->adate));
+			date_dos2unix(0, le16_to_cpu(de->adate),
+				      sbi->options.tz_utc);
 		inode->i_atime.tv_nsec = 0;
 	} else
 		inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -591,11 +594,14 @@ retry:
 	raw_entry->attr = fat_attr(inode);
 	raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
 	raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
-	fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
+	fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
+			  &raw_entry->date, sbi->options.tz_utc);
 	if (sbi->options.isvfat) {
 		__le16 atime;
-		fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
-		fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
+		fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
+				  &raw_entry->cdate, sbi->options.tz_utc);
+		fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
+				  &raw_entry->adate, sbi->options.tz_utc);
 		raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
 			inode->i_ctime.tv_nsec / 10000000;
 	}
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
 	}
 	if (sbi->options.flush)
 		seq_puts(m, ",flush");
+	if (opts->tz_utc)
+		seq_puts(m, ",tz=UTC");
 
 	return 0;
 }
@@ -848,7 +856,7 @@ enum {
 	Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
 	Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
 	Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
-	Opt_obsolate, Opt_flush, Opt_err,
+	Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
 };
 
 static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
 	{Opt_obsolate, "cvf_options=%100s"},
 	{Opt_obsolate, "posix"},
 	{Opt_flush, "flush"},
+	{Opt_tz_utc, "tz=UTC"},
 	{Opt_err, NULL},
 };
 static match_table_t msdos_tokens = {
@@ -947,6 +956,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 	opts->utf8 = opts->unicode_xlate = 0;
 	opts->numtail = 1;
 	opts->usefree = opts->nocase = 0;
+	opts->tz_utc = 0;
 	*debug = 0;
 
 	if (!options)
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 		case Opt_flush:
 			opts->flush = 1;
 			break;
+		case Opt_tz_utc:
+			opts->tz_utc = 1;
+			break;
 
 		/* msdos specific */
 		case Opt_dots:
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f23511eacf..79fb98ad36d4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
 };
 
 /* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-int date_dos2unix(unsigned short time, unsigned short date)
+int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
 {
 	int month, year, secs;
 
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
 	    ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
 	    month < 2 ? 1 : 0)+3653);
 			/* days since 1.1.70 plus 80's leap day */
-	secs += sys_tz.tz_minuteswest*60;
+	if (!tz_utc)
+		secs += sys_tz.tz_minuteswest*60;
 	return secs;
 }
 
 /* Convert linear UNIX date to a MS-DOS time/date pair. */
-void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
+void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
 {
 	int day, year, nl_day, month;
 
-	unix_date -= sys_tz.tz_minuteswest*60;
+	if (!tz_utc)
+		unix_date -= sys_tz.tz_minuteswest*60;
 
 	/* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
 	if (unix_date < 315532800)
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index e4ad6c6b753e..e844b9809d27 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -237,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 			   int is_dir, int is_hid, int cluster,
 			   struct timespec *ts, struct fat_slot_info *sinfo)
 {
+	struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
 	struct msdos_dir_entry de;
 	__le16 time, date;
 	int err;
@@ -246,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 	if (is_hid)
 		de.attr |= ATTR_HIDDEN;
 	de.lcase = 0;
-	fat_date_unix2dos(ts->tv_sec, &time, &date);
+	fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
 	de.cdate = de.adate = 0;
 	de.ctime = 0;
 	de.ctime_cs = 0;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba69be82..155c10b4adbd 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
 	memcpy(de->name, msdos_name, MSDOS_NAME);
 	de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
 	de->lcase = lcase;
-	fat_date_unix2dos(ts->tv_sec, &time, &date);
+	fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
 	de->time = de->ctime = time;
 	de->date = de->cdate = de->adate = date;
 	de->ctime_cs = 0;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 3346c9c8f17a..ba63858056c7 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -203,7 +203,8 @@ struct fat_mount_options {
 		 numtail:1,       /* Does first alias have a numeric '~1' type tail? */
 		 flush:1,	  /* write things quickly */
 		 nocase:1,	  /* Does this need case conversion? 0=need case conversion*/
-		 usefree:1;	  /* Use free_clusters for FAT32 */
+		 usefree:1,	  /* Use free_clusters for FAT32 */
+		 tz_utc:1;	  /* Filesystem timestamps are in UTC */
 };
 
 #define FAT_HASH_BITS	8
@@ -434,8 +435,9 @@ extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
 extern void fat_fs_panic(struct super_block *s, const char *fmt, ...);
 extern void fat_clusters_flush(struct super_block *sb);
 extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
-extern int date_dos2unix(unsigned short time, unsigned short date);
-extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date);
+extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc);
+extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date,
+			      int tz_utc);
 extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
 
 int fat_cache_init(void);
-- 
cgit v1.2.3


From b85f4b87a511bea86dac68c4f0fabaee2cac6c4c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:50 -0700
Subject: quota: rename quota functions from upper case, make bigger ones
 non-inline

Cleanup quotaops.h: Rename functions from uppercase to lowercase (and
define backward compatibility macros), move larger functions to dquot.c
and make them non-inline.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dquot.c               |  53 +++++++++++
 include/linux/quotaops.h | 226 ++++++++++++++++++++++-------------------------
 2 files changed, 160 insertions(+), 119 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index ad88cf6fcbaf..0bcaf970bbb4 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1153,6 +1153,28 @@ int dquot_drop(struct inode *inode)
 	return 0;
 }
 
+/* Wrapper to remove references to quota structures from inode */
+void vfs_dq_drop(struct inode *inode)
+{
+	/* Here we can get arbitrary inode from clear_inode() so we have
+	 * to be careful. OTOH we don't need locking as quota operations
+	 * are allowed to change only at mount time */
+	if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
+	    && inode->i_sb->dq_op->drop) {
+		int cnt;
+		/* Test before calling to rule out calls from proc and such
+                 * where we are not allowed to block. Note that this is
+		 * actually reliable test even without the lock - the caller
+		 * must assure that nobody can come after the DQUOT_DROP and
+		 * add quota pointers back anyway */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+			if (inode->i_dquot[cnt] != NODQUOT)
+				break;
+		if (cnt < MAXQUOTAS)
+			inode->i_sb->dq_op->drop(inode);
+	}
+}
+
 /*
  * Following four functions update i_blocks+i_bytes fields and
  * quota information (together with appropriate checks)
@@ -1426,6 +1448,18 @@ warn_put_all:
 	return ret;
 }
 
+/* Wrapper for transferring ownership of an inode */
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+{
+	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+		vfs_dq_init(inode);
+		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+			return 1;
+	}
+	return 0;
+}
+
+
 /*
  * Write info of quota file to disk
  */
@@ -1766,6 +1800,22 @@ out:
 	return error;
 }
 
+/* Wrapper to turn on quotas when remounting rw */
+int vfs_dq_quota_on_remount(struct super_block *sb)
+{
+	int cnt;
+	int ret = 0, err;
+
+	if (!sb->s_qcop || !sb->s_qcop->quota_on)
+		return -ENOSYS;
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
+		if (err < 0 && !ret)
+			ret = err;
+	}
+	return ret;
+}
+
 /* Generic routine for getting common part of quota structure */
 static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
 {
@@ -2101,8 +2151,11 @@ EXPORT_SYMBOL(dquot_release);
 EXPORT_SYMBOL(dquot_mark_dquot_dirty);
 EXPORT_SYMBOL(dquot_initialize);
 EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(vfs_dq_drop);
 EXPORT_SYMBOL(dquot_alloc_space);
 EXPORT_SYMBOL(dquot_alloc_inode);
 EXPORT_SYMBOL(dquot_free_space);
 EXPORT_SYMBOL(dquot_free_inode);
 EXPORT_SYMBOL(dquot_transfer);
+EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f86702053853..0c8f9fe462af 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -19,34 +19,38 @@
 /*
  * declaration of quota_function calls in kernel.
  */
-extern void sync_dquots(struct super_block *sb, int type);
-
-extern int dquot_initialize(struct inode *inode, int type);
-extern int dquot_drop(struct inode *inode);
-
-extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-extern int dquot_alloc_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_free_space(struct inode *inode, qsize_t number);
-extern int dquot_free_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_transfer(struct inode *inode, struct iattr *iattr);
-extern int dquot_commit(struct dquot *dquot);
-extern int dquot_acquire(struct dquot *dquot);
-extern int dquot_release(struct dquot *dquot);
-extern int dquot_commit_info(struct super_block *sb, int type);
-extern int dquot_mark_dquot_dirty(struct dquot *dquot);
-
-extern int vfs_quota_on(struct super_block *sb, int type, int format_id,
-		char *path, int remount);
-extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
-		int format_id, int type);
-extern int vfs_quota_off(struct super_block *sb, int type, int remount);
-extern int vfs_quota_sync(struct super_block *sb, int type);
-extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
-extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+void sync_dquots(struct super_block *sb, int type);
+
+int dquot_initialize(struct inode *inode, int type);
+int dquot_drop(struct inode *inode);
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
+int dquot_alloc_inode(const struct inode *inode, unsigned long number);
+
+int dquot_free_space(struct inode *inode, qsize_t number);
+int dquot_free_inode(const struct inode *inode, unsigned long number);
+
+int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_commit(struct dquot *dquot);
+int dquot_acquire(struct dquot *dquot);
+int dquot_release(struct dquot *dquot);
+int dquot_commit_info(struct super_block *sb, int type);
+int dquot_mark_dquot_dirty(struct dquot *dquot);
+
+int vfs_quota_on(struct super_block *sb, int type, int format_id,
+ 	char *path, int remount);
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+ 	int format_id, int type);
+int vfs_quota_off(struct super_block *sb, int type, int remount);
+int vfs_quota_sync(struct super_block *sb, int type);
+int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+
+void vfs_dq_drop(struct inode *inode);
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
+int vfs_dq_quota_on_remount(struct super_block *sb);
 
 /*
  * Operations supported for diskquotas.
@@ -59,38 +63,16 @@ extern struct quotactl_ops vfs_quotactl_ops;
 
 /* It is better to call this function outside of any transaction as it might
  * need a lot of space in journal for dquot structure allocation. */
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
 {
 	BUG_ON(!inode->i_sb);
 	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
 		inode->i_sb->dq_op->initialize(inode, -1);
 }
 
-/* The same as with DQUOT_INIT */
-static inline void DQUOT_DROP(struct inode *inode)
-{
-	/* Here we can get arbitrary inode from clear_inode() so we have
-	 * to be careful. OTOH we don't need locking as quota operations
-	 * are allowed to change only at mount time */
-	if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
-	    && inode->i_sb->dq_op->drop) {
-		int cnt;
-		/* Test before calling to rule out calls from proc and such
-                 * where we are not allowed to block. Note that this is
-		 * actually reliable test even without the lock - the caller
-		 * must assure that nobody can come after the DQUOT_DROP and
-		 * add quota pointers back anyway */
-		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-			if (inode->i_dquot[cnt] != NODQUOT)
-				break;
-		if (cnt < MAXQUOTAS)
-			inode->i_sb->dq_op->drop(inode);
-	}
-}
-
 /* The following allocation/freeing/transfer functions *must* be called inside
  * a transaction (deadlocks possible otherwise) */
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_enabled(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
@@ -102,15 +84,15 @@ static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 	return 0;
 }
 
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
 {
 	int ret;
-        if (!(ret =  DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr)))
+        if (!(ret =  vfs_dq_prealloc_space_nodirty(inode, nr)))
 		mark_inode_dirty(inode);
 	return ret;
 }
 
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_enabled(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
@@ -122,25 +104,25 @@ static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 	return 0;
 }
 
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 {
 	int ret;
-	if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr)))
+	if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
 		mark_inode_dirty(inode);
 	return ret;
 }
 
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
 	if (sb_any_quota_enabled(inode->i_sb)) {
-		DQUOT_INIT(inode);
+		vfs_dq_init(inode);
 		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
 			return 1;
 	}
 	return 0;
 }
 
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_enabled(inode->i_sb))
 		inode->i_sb->dq_op->free_space(inode, nr);
@@ -148,35 +130,25 @@ static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 		inode_sub_bytes(inode, nr);
 }
 
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+	vfs_dq_free_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 }
 
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
 {
 	if (sb_any_quota_enabled(inode->i_sb))
 		inode->i_sb->dq_op->free_inode(inode, 1);
 }
 
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
-{
-	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
-		DQUOT_INIT(inode);
-		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
-			return 1;
-	}
-	return 0;
-}
-
 /* The following two functions cannot be called inside a transaction */
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
 {
 	sync_dquots(sb, -1);
 }
 
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	int ret = -ENOSYS;
 
@@ -185,21 +157,6 @@ static inline int DQUOT_OFF(struct super_block *sb, int remount)
 	return ret;
 }
 
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
-{
-	int cnt;
-	int ret = 0, err;
-
-	if (!sb->s_qcop || !sb->s_qcop->quota_on)
-		return -ENOSYS;
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
-		if (err < 0 && !ret)
-			ret = err;
-	}
-	return ret;
-}
-
 #else
 
 /*
@@ -208,113 +165,144 @@ static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
 #define sb_dquot_ops				(NULL)
 #define sb_quotactl_ops				(NULL)
 
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
 {
 }
 
-static inline void DQUOT_DROP(struct inode *inode)
+static inline void vfs_dq_drop(struct inode *inode)
 {
 }
 
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
 	return 0;
 }
 
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
 {
 }
 
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
 {
 }
 
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	return 0;
 }
 
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
+static inline int vfs_dq_quota_on_remount(struct super_block *sb)
 {
 	return 0;
 }
 
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
+static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 {
 	return 0;
 }
 
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
 	return 0;
 }
 
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr);
+	vfs_dq_prealloc_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 	return 0;
 }
 
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
 	return 0;
 }
 
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_ALLOC_SPACE_NODIRTY(inode, nr);
+	vfs_dq_alloc_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 	return 0;
 }
 
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_sub_bytes(inode, nr);
 }
 
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+	vfs_dq_free_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 }	
 
 #endif /* CONFIG_QUOTA */
 
-static inline int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_PREALLOC_SPACE_NODIRTY(inode,
+	return vfs_dq_prealloc_space_nodirty(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline int DQUOT_PREALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_PREALLOC_SPACE(inode,
+	return vfs_dq_prealloc_space(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_ALLOC_SPACE_NODIRTY(inode,
+ 	return vfs_dq_alloc_space_nodirty(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline int DQUOT_ALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_ALLOC_SPACE(inode,
+	return vfs_dq_alloc_space(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE_NODIRTY(inode, nr << inode->i_sb->s_blocksize_bits);
+	vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline void DQUOT_FREE_BLOCK(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE(inode, nr << inode->i_sb->s_blocksize_bits);
+	vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits);
 }
 
+/*
+ * Define uppercase equivalents for compatibility with old function names
+ * Can go away when we think all users have been converted (15/04/2008)
+ */
+#define DQUOT_INIT(inode) vfs_dq_init(inode)
+#define DQUOT_DROP(inode) vfs_dq_drop(inode)
+#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \
+				vfs_dq_prealloc_space_nodirty(inode, nr)
+#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr)
+#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \
+				vfs_dq_alloc_space_nodirty(inode, nr)
+#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr)
+#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \
+				vfs_dq_prealloc_block_nodirty(inode, nr)
+#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr)
+#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \
+				vfs_dq_alloc_block_nodirty(inode, nr)
+#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr)
+#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode)
+#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \
+				vfs_dq_free_space_nodirty(inode, nr)
+#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr)
+#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \
+				vfs_dq_free_block_nodirty(inode, nr)
+#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr)
+#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode)
+#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr)
+#define DQUOT_SYNC(sb) vfs_dq_sync(sb)
+#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount)
+#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb)
+
 #endif /* _LINUX_QUOTAOPS_ */
-- 
cgit v1.2.3


From 02a55ca87185e114e5d298a8d00608501dbabf67 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:50 -0700
Subject: quota: cleanup loop in sync_dquots()

Make loop in sync_dquots() checking whether there's something to write
more readable, remove useless variable and macro info_any_dirty() which
is used only in this place.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: "Vegard Nossum" <vegard.nossum@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/quota.c            | 18 ++++++++++++------
 include/linux/quota.h |  2 --
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f3c7aa..7f4386ebc23a 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
 
 void sync_dquots(struct super_block *sb, int type)
 {
-	int cnt, dirty;
+	int cnt;
 
 	if (sb) {
 		if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		/* This test just improves performance so it needn't be reliable... */
-		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
-			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
-				dirty = 1;
-		if (!dirty)
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (type != -1 && type != cnt)
+				continue;
+			if (!sb_has_quota_enabled(sb, cnt))
+				continue;
+			if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
+			    list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
+				continue;
+			break;
+		}
+		if (cnt == MAXQUOTAS)
 			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index dcddfb200947..6f1d97ddf828 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -224,8 +224,6 @@ struct super_block;
 
 extern void mark_info_dirty(struct super_block *sb, int type);
 #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
-#define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list))
-#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
 
 #define sb_dqopt(sb) (&(sb)->s_dquot)
 #define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
-- 
cgit v1.2.3


From 74abb9890dafb12a50dc140de215ed477beb1b88 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:51 -0700
Subject: quota: move function-macros from quota.h to quotaops.h

Move declarations of some macros, which should be in fact functions to
quotaops.h.  This way they can be later converted to inline functions
because we can now use declarations from quota.h.  Also add necessary
includes of quotaops.h to a few files.

[akpm@linux-foundation.org: fix JFS build]
[akpm@linux-foundation.org: fix UFS build]
[vegard.nossum@gmail.com: fix QUOTA=n build]
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Arjen Pool <arjenpool@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext2/super.c          |  1 +
 fs/jfs/super.c           |  1 +
 fs/quota_v1.c            |  1 +
 fs/quota_v2.c            |  1 +
 fs/reiserfs/super.c      |  1 +
 fs/ufs/super.c           |  1 +
 include/linux/quota.h    | 22 +++-------------------
 include/linux/quotaops.h | 26 ++++++++++++++++++++++++++
 8 files changed, 35 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc792db..31308a3b0b8b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/log2.h>
+#include <linux/quotaops.h>
 #include <asm/uaccess.h>
 #include "ext2.h"
 #include "xattr.h"
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d7936a..359c091d8965 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
 #include <linux/parser.h>
 #include <linux/completion.h>
 #include <linux/vfs.h>
+#include <linux/quotaops.h>
 #include <linux/mount.h>
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf9269105c..5ae15b13eeb0 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/quota.h>
+#include <linux/quotaops.h>
 #include <linux/dqblk_v1.h>
 #include <linux/quotaio_v1.h>
 #include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada903633..b53827dc02d9 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/quotaops.h>
 
 #include <asm/byteorder.h>
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a10a6d2a8870..2ec748ba0bd3 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,6 +22,7 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
+#include <linux/quotaops.h>
 #include <linux/vfs.h>
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 506f724055c2..227c9d700040 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
 
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/stat.h>
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 6f1d97ddf828..f9983ea0ff88 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -41,9 +41,6 @@
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
 
-typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
-typedef __u64 qsize_t;          /* Type in which we store sizes */
-
 /* Size of blocks in which are counted size limits */
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -172,6 +169,9 @@ enum {
 
 #include <asm/atomic.h>
 
+typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+typedef __u64 qsize_t;          /* Type in which we store sizes */
+
 extern spinlock_t dq_data_lock;
 
 /* Maximal numbers of writes for quota operation (insert/delete/update)
@@ -225,9 +225,6 @@ struct super_block;
 extern void mark_info_dirty(struct super_block *sb, int type);
 #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
 
-#define sb_dqopt(sb) (&(sb)->s_dquot)
-#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
-
 struct dqstats {
 	int lookups;
 	int drops;
@@ -335,19 +332,6 @@ struct quota_info {
 	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
 };
 
-#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
-	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
-
-#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
-				  sb_has_quota_enabled(sb, GRPQUOTA))
-
-#define sb_has_quota_suspended(sb, type) \
-	((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
-			      (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
-
-#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
-				  sb_has_quota_suspended(sb, GRPQUOTA))
-
 int register_quota_format(struct quota_format_type *fmt);
 void unregister_quota_format(struct quota_format_type *fmt);
 
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 0c8f9fe462af..38218c1334b1 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -14,6 +14,8 @@
 
 #include <linux/fs.h>
 
+#define sb_dqopt(sb) (&(sb)->s_dquot)
+
 #if defined(CONFIG_QUOTA)
 
 /*
@@ -52,6 +54,25 @@ void vfs_dq_drop(struct inode *inode);
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
+#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
+
+/*
+ * Functions for checking status of quota
+ */
+
+#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
+	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
+
+#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
+				  sb_has_quota_enabled(sb, GRPQUOTA))
+
+#define sb_has_quota_suspended(sb, type) \
+	((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
+			      (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
+
+#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
+				  sb_has_quota_suspended(sb, GRPQUOTA))
+
 /*
  * Operations supported for diskquotas.
  */
@@ -159,6 +180,11 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
 
 #else
 
+#define sb_has_quota_enabled(sb, type) 0
+#define sb_any_quota_enabled(sb) 0
+#define sb_has_quota_suspended(sb, type) 0
+#define sb_any_quota_suspended(sb) 0
+
 /*
  * NO-OP when quota not configured.
  */
-- 
cgit v1.2.3


From 03b063436ca1076301de58d9d628f610ab5404ad Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:52 -0700
Subject: quota: convert macros to inline functions

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/quota.h    |  5 +++-
 include/linux/quotaops.h | 65 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 53 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/quota.h b/include/linux/quota.h
index f9983ea0ff88..4e004fef8134 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -223,7 +223,10 @@ struct super_block;
 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B)	/* Is info dirty? */
 
 extern void mark_info_dirty(struct super_block *sb, int type);
-#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
+static inline int info_dirty(struct mem_dqinfo *info)
+{
+	return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
+}
 
 struct dqstats {
 	int lookups;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 38218c1334b1..742187f7a05c 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -11,10 +11,12 @@
 #define _LINUX_QUOTAOPS_
 
 #include <linux/smp_lock.h>
-
 #include <linux/fs.h>
 
-#define sb_dqopt(sb) (&(sb)->s_dquot)
+static inline struct quota_info *sb_dqopt(struct super_block *sb)
+{
+	return &sb->s_dquot;
+}
 
 #if defined(CONFIG_QUOTA)
 
@@ -54,24 +56,40 @@ void vfs_dq_drop(struct inode *inode);
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
-#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
+static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
+{
+	return sb_dqopt(sb)->info + type;
+}
 
 /*
  * Functions for checking status of quota
  */
 
-#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
-	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+	if (type == USRQUOTA)
+		return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
+	return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
+}
 
-#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
-				  sb_has_quota_enabled(sb, GRPQUOTA))
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+	return sb_has_quota_enabled(sb, USRQUOTA) ||
+		sb_has_quota_enabled(sb, GRPQUOTA);
+}
 
-#define sb_has_quota_suspended(sb, type) \
-	((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
-			      (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+	if (type == USRQUOTA)
+		return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
+	return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
+}
 
-#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
-				  sb_has_quota_suspended(sb, GRPQUOTA))
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+	return sb_has_quota_suspended(sb, USRQUOTA) ||
+		sb_has_quota_suspended(sb, GRPQUOTA);
+}
 
 /*
  * Operations supported for diskquotas.
@@ -180,10 +198,25 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
 
 #else
 
-#define sb_has_quota_enabled(sb, type) 0
-#define sb_any_quota_enabled(sb) 0
-#define sb_has_quota_suspended(sb, type) 0
-#define sb_any_quota_suspended(sb) 0
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+	return 0;
+}
 
 /*
  * NO-OP when quota not configured.
-- 
cgit v1.2.3


From 657d3bfa98e542271b449f8cd84c7501ae2b2255 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:52 -0700
Subject: quota: implement sending information via netlink about user below
 quota

Sometimes it may be useful for userspace to know (e.g.  for some hosting
guys) that some user stopped exceeding his hardlimit or softlimit in
quotas.  Implement sending of such events to userspace via quota netlink
protocol so that they don't have to poll for such events.  Based on idea
and initial implementation by Vladislav Bogdanov.

Cc: Vladislav Bogdanov <slava@nsys.by>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dquot.c            | 60 +++++++++++++++++++++++++++++++++++++++++++++------
 include/linux/quota.h |  4 ++++
 2 files changed, 58 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 0bcaf970bbb4..1346eebe74ce 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -889,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
 	char *msg = NULL;
 	struct tty_struct *tty;
 
-	if (!need_print_warning(dquot))
+	if (warntype == QUOTA_NL_IHARDBELOW ||
+	    warntype == QUOTA_NL_ISOFTBELOW ||
+	    warntype == QUOTA_NL_BHARDBELOW ||
+	    warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
 		return;
 
 	mutex_lock(&tty_mutex);
@@ -1097,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	return QUOTA_OK;
 }
 
+static int info_idq_free(struct dquot *dquot, ulong inodes)
+{
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+		return QUOTA_NL_NOWARN;
+
+	if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
+		return QUOTA_NL_ISOFTBELOW;
+	if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
+	    dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
+		return QUOTA_NL_IHARDBELOW;
+	return QUOTA_NL_NOWARN;
+}
+
+static int info_bdq_free(struct dquot *dquot, qsize_t space)
+{
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+	    toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+		return QUOTA_NL_NOWARN;
+
+	if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
+	    dquot->dq_dqb.dqb_bsoftlimit)
+		return QUOTA_NL_BSOFTBELOW;
+	if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
+	    toqb(dquot->dq_dqb.dqb_curspace - space) <
+						dquot->dq_dqb.dqb_bhardlimit)
+		return QUOTA_NL_BHARDBELOW;
+	return QUOTA_NL_NOWARN;
+}
 /*
  *	Initialize quota pointers in inode
  *	Transaction must be started at entry
@@ -1284,6 +1316,7 @@ warn_put_all:
 int dquot_free_space(struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
+	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1292,6 +1325,7 @@ out_sub:
 		inode_sub_bytes(inode, number);
 		return QUOTA_OK;
 	}
+
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
 	if (IS_NOQUOTA(inode)) {
@@ -1302,6 +1336,7 @@ out_sub:
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
+		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
 		dquot_decr_space(inode->i_dquot[cnt], number);
 	}
 	inode_sub_bytes(inode, number);
@@ -1310,6 +1345,7 @@ out_sub:
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
@@ -1320,11 +1356,13 @@ out_sub:
 int dquot_free_inode(const struct inode *inode, unsigned long number)
 {
 	unsigned int cnt;
+	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
+
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
 	if (IS_NOQUOTA(inode)) {
@@ -1335,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
+		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
 		dquot_decr_inodes(inode->i_dquot[cnt], number);
 	}
 	spin_unlock(&dq_data_lock);
@@ -1342,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
@@ -1359,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	struct dquot *transfer_to[MAXQUOTAS];
 	int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
 	    chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
-	char warntype[MAXQUOTAS];
+	char warntype_to[MAXQUOTAS];
+	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1368,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	/* Clear the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
-		warntype[cnt] = QUOTA_NL_NOWARN;
+		warntype_to[cnt] = QUOTA_NL_NOWARN;
 	}
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
@@ -1400,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		if (transfer_to[cnt] == NODQUOT)
 			continue;
 		transfer_from[cnt] = inode->i_dquot[cnt];
-		if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA ||
-		    check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA)
+		if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
+		    NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
+		    warntype_to + cnt) == NO_QUOTA)
 			goto warn_put_all;
 	}
 
@@ -1417,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 
 		/* Due to IO error we might not have transfer_from[] structure */
 		if (transfer_from[cnt]) {
+			warntype_from_inodes[cnt] =
+				info_idq_free(transfer_from[cnt], 1);
+			warntype_from_space[cnt] =
+				info_bdq_free(transfer_from[cnt], space);
 			dquot_decr_inodes(transfer_from[cnt], 1);
 			dquot_decr_space(transfer_from[cnt], space);
 		}
@@ -1436,7 +1482,9 @@ warn_put_all:
 		if (transfer_to[cnt])
 			mark_dquot_dirty(transfer_to[cnt]);
 	}
-	flush_warnings(transfer_to, warntype);
+	flush_warnings(transfer_to, warntype_to);
+	flush_warnings(transfer_from, warntype_from_inodes);
+	flush_warnings(transfer_from, warntype_from_space);
 	
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 4e004fef8134..376a05048bc5 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -135,6 +135,10 @@ struct if_dqinfo {
 #define QUOTA_NL_BHARDWARN 4		/* Block hardlimit reached */
 #define QUOTA_NL_BSOFTLONGWARN 5	/* Block grace time expired */
 #define QUOTA_NL_BSOFTWARN 6		/* Block softlimit reached */
+#define QUOTA_NL_IHARDBELOW 7		/* Usage got below inode hardlimit */
+#define QUOTA_NL_ISOFTBELOW 8		/* Usage got below inode softlimit */
+#define QUOTA_NL_BHARDBELOW 9		/* Usage got below block hardlimit */
+#define QUOTA_NL_BSOFTBELOW 10		/* Usage got below block softlimit */
 
 enum {
 	QUOTA_NL_C_UNSPEC,
-- 
cgit v1.2.3


From f2992db2a4f7ae10f61d5bc68c7c1528cec639e2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:46:55 -0700
Subject: Mark res_counter_charge(_locked) with __must_check

Ignoring their return values may result in counter underflow in the future -
when the value charged will be uncharged (or in "leaks" - when the value is
not uncharged).

This also prevents from using charging routines to decrement the
counter value (i.e. uncharge it) ;)

(Current code works OK with res_counter, however :) )

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6d9e1fca098c..125660e7793f 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -95,8 +95,10 @@ void res_counter_init(struct res_counter *counter);
  * counter->limit _locked call expects the counter->lock to be taken
  */
 
-int res_counter_charge_locked(struct res_counter *counter, unsigned long val);
-int res_counter_charge(struct res_counter *counter, unsigned long val);
+int __must_check res_counter_charge_locked(struct res_counter *counter,
+		unsigned long val);
+int __must_check res_counter_charge(struct res_counter *counter,
+		unsigned long val);
 
 /*
  * uncharge - tell that some portion of the resource is released
-- 
cgit v1.2.3


From ce16b49d37e748574f7fabc2726268d542d0aa1a Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:46:57 -0700
Subject: cgroup files: clean up whitespace in struct cftype

This patch removes some extraneous spaces from method declarations in
struct cftype, to fit in with conventional kernel style.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e155aa78d859..88a734edccbc 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -205,48 +205,48 @@ struct cftype {
 	 * subsystem, followed by a period */
 	char name[MAX_CFTYPE_NAME];
 	int private;
-	int (*open) (struct inode *inode, struct file *file);
-	ssize_t (*read) (struct cgroup *cgrp, struct cftype *cft,
-			 struct file *file,
-			 char __user *buf, size_t nbytes, loff_t *ppos);
+	int (*open)(struct inode *inode, struct file *file);
+	ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
+			struct file *file,
+			char __user *buf, size_t nbytes, loff_t *ppos);
 	/*
 	 * read_u64() is a shortcut for the common case of returning a
 	 * single integer. Use it in place of read()
 	 */
-	u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft);
+	u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
 	/*
 	 * read_s64() is a signed version of read_u64()
 	 */
-	s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft);
+	s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
 	/*
 	 * read_map() is used for defining a map of key/value
 	 * pairs. It should call cb->fill(cb, key, value) for each
 	 * entry. The key/value pairs (and their ordering) should not
 	 * change between reboots.
 	 */
-	int (*read_map) (struct cgroup *cont, struct cftype *cft,
-			 struct cgroup_map_cb *cb);
+	int (*read_map)(struct cgroup *cont, struct cftype *cft,
+			struct cgroup_map_cb *cb);
 	/*
 	 * read_seq_string() is used for outputting a simple sequence
 	 * using seqfile.
 	 */
-	int (*read_seq_string) (struct cgroup *cont, struct cftype *cft,
-			 struct seq_file *m);
+	int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
+			       struct seq_file *m);
 
-	ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft,
-			  struct file *file,
-			  const char __user *buf, size_t nbytes, loff_t *ppos);
+	ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
+			 struct file *file,
+			 const char __user *buf, size_t nbytes, loff_t *ppos);
 
 	/*
 	 * write_u64() is a shortcut for the common case of accepting
 	 * a single integer (as parsed by simple_strtoull) from
 	 * userspace. Use in place of write(); return 0 or error.
 	 */
-	int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val);
+	int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
 	/*
 	 * write_s64() is a signed version of write_u64()
 	 */
-	int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val);
+	int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
 
 	/*
 	 * trigger() callback can be used to get some kick from the
@@ -256,7 +256,7 @@ struct cftype {
 	 */
 	int (*trigger)(struct cgroup *cgrp, unsigned int event);
 
-	int (*release) (struct inode *inode, struct file *file);
+	int (*release)(struct inode *inode, struct file *file);
 };
 
 struct cgroup_scanner {
-- 
cgit v1.2.3


From db3b14978abc02041046ed8353f0899cb58ffffc Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:46:58 -0700
Subject: cgroup files: add write_string cgroup control file method

This patch adds a write_string() method for cgroups control files. The
semantics are that a buffer is copied from userspace to kernelspace
and the handler function invoked on that buffer.  The buffer is
guaranteed to be nul-terminated, and no longer than max_write_len
(defaulting to 64 bytes if unspecified). Later patches will convert
existing raw file write handlers in control group subsystems to use
this method.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 14 ++++++++++++++
 kernel/cgroup.c        | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 88a734edccbc..f5379455bb59 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -205,6 +205,13 @@ struct cftype {
 	 * subsystem, followed by a period */
 	char name[MAX_CFTYPE_NAME];
 	int private;
+
+	/*
+	 * If non-zero, defines the maximum length of string that can
+	 * be passed to write_string; defaults to 64
+	 */
+	size_t max_write_len;
+
 	int (*open)(struct inode *inode, struct file *file);
 	ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
 			struct file *file,
@@ -248,6 +255,13 @@ struct cftype {
 	 */
 	int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
 
+	/*
+	 * write_string() is passed a nul-terminated kernelspace
+	 * buffer of maximum length determined by max_write_len.
+	 * Returns 0 or -ve error code.
+	 */
+	int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
+			    const char *buffer);
 	/*
 	 * trigger() callback can be used to get some kick from the
 	 * userspace, when the actual string written is not important
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 70d083c6fb6b..3a99cc2df860 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1363,6 +1363,39 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 	return retval;
 }
 
+static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
+				   struct file *file,
+				   const char __user *userbuf,
+				   size_t nbytes, loff_t *unused_ppos)
+{
+	char local_buffer[64];
+	int retval = 0;
+	size_t max_bytes = cft->max_write_len;
+	char *buffer = local_buffer;
+
+	if (!max_bytes)
+		max_bytes = sizeof(local_buffer) - 1;
+	if (nbytes >= max_bytes)
+		return -E2BIG;
+	/* Allocate a dynamic buffer if we need one */
+	if (nbytes >= sizeof(local_buffer)) {
+		buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+		if (buffer == NULL)
+			return -ENOMEM;
+	}
+	if (nbytes && copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;     /* nul-terminate */
+	strstrip(buffer);
+	retval = cft->write_string(cgrp, cft, buffer);
+	if (!retval)
+		retval = nbytes;
+	if (buffer != local_buffer)
+		kfree(buffer);
+	return retval;
+}
+
 static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
 					   struct cftype *cft,
 					   struct file *file,
@@ -1440,6 +1473,8 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
 		return cft->write(cgrp, cft, file, buf, nbytes, ppos);
 	if (cft->write_u64 || cft->write_s64)
 		return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
+	if (cft->write_string)
+		return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
 	if (cft->trigger) {
 		int ret = cft->trigger(cgrp, (unsigned int)cft->private);
 		return ret ? ret : nbytes;
-- 
cgit v1.2.3


From e788e066c651b1bbf4a927dc95395c1aa13be436 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:46:59 -0700
Subject: cgroup files: move the release_agent file to use typed handlers

Adds cgroup_release_agent_write() and cgroup_release_agent_show()
methods to handle writing/reading the path to a cgroup hierarchy's
release agent. As a result, cgroup_common_file_read() is now unnecessary.

As part of the change, a previously-tolerated race in
cgroup_release_agent() is avoided by copying the current
release_agent_path prior to calling call_usermode_helper().

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |   2 +
 kernel/cgroup.c        | 125 ++++++++++++++++++++++---------------------------
 2 files changed, 59 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f5379455bb59..e78377a91a74 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -295,6 +295,8 @@ int cgroup_add_files(struct cgroup *cgrp,
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
+int cgroup_lock_live_group(struct cgroup *cgrp);
+
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3a99cc2df860..0120b5d67a73 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -89,11 +89,7 @@ struct cgroupfs_root {
 	/* Hierarchy-specific flags */
 	unsigned long flags;
 
-	/* The path to use for release notifications. No locking
-	 * between setting and use - so if userspace updates this
-	 * while child cgroups exist, you could miss a
-	 * notification. We ensure that it's always a valid
-	 * NUL-terminated string */
+	/* The path to use for release notifications. */
 	char release_agent_path[PATH_MAX];
 };
 
@@ -1329,6 +1325,45 @@ enum cgroup_filetype {
 	FILE_RELEASE_AGENT,
 };
 
+/**
+ * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
+ * @cgrp: the cgroup to be checked for liveness
+ *
+ * Returns true (with lock held) on success, or false (with no lock
+ * held) on failure.
+ */
+int cgroup_lock_live_group(struct cgroup *cgrp)
+{
+	mutex_lock(&cgroup_mutex);
+	if (cgroup_is_removed(cgrp)) {
+		mutex_unlock(&cgroup_mutex);
+		return false;
+	}
+	return true;
+}
+
+static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
+				      const char *buffer)
+{
+	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+	strcpy(cgrp->root->release_agent_path, buffer);
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
+static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
+				     struct seq_file *seq)
+{
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+	seq_puts(seq, cgrp->root->release_agent_path);
+	seq_putc(seq, '\n');
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
 static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 				struct file *file,
 				const char __user *userbuf,
@@ -1443,10 +1478,6 @@ static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
 		else
 			clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 		break;
-	case FILE_RELEASE_AGENT:
-		BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
-		strcpy(cgrp->root->release_agent_path, buffer);
-		break;
 	default:
 		retval = -EINVAL;
 		goto out2;
@@ -1506,49 +1537,6 @@ static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
 	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
 
-static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
-					  struct cftype *cft,
-					  struct file *file,
-					  char __user *buf,
-					  size_t nbytes, loff_t *ppos)
-{
-	enum cgroup_filetype type = cft->private;
-	char *page;
-	ssize_t retval = 0;
-	char *s;
-
-	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
-		return -ENOMEM;
-
-	s = page;
-
-	switch (type) {
-	case FILE_RELEASE_AGENT:
-	{
-		struct cgroupfs_root *root;
-		size_t n;
-		mutex_lock(&cgroup_mutex);
-		root = cgrp->root;
-		n = strnlen(root->release_agent_path,
-			    sizeof(root->release_agent_path));
-		n = min(n, (size_t) PAGE_SIZE);
-		strncpy(s, root->release_agent_path, n);
-		mutex_unlock(&cgroup_mutex);
-		s += n;
-		break;
-	}
-	default:
-		retval = -EINVAL;
-		goto out;
-	}
-	*s++ = '\n';
-
-	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
-out:
-	free_page((unsigned long)page);
-	return retval;
-}
-
 static ssize_t cgroup_file_read(struct file *file, char __user *buf,
 				   size_t nbytes, loff_t *ppos)
 {
@@ -1606,6 +1594,7 @@ int cgroup_seqfile_release(struct inode *inode, struct file *file)
 
 static struct file_operations cgroup_seqfile_operations = {
 	.read = seq_read,
+	.write = cgroup_file_write,
 	.llseek = seq_lseek,
 	.release = cgroup_seqfile_release,
 };
@@ -2283,8 +2272,9 @@ static struct cftype files[] = {
 
 static struct cftype cft_release_agent = {
 	.name = "release_agent",
-	.read = cgroup_common_file_read,
-	.write = cgroup_common_file_write,
+	.read_seq_string = cgroup_release_agent_show,
+	.write_string = cgroup_release_agent_write,
+	.max_write_len = PATH_MAX,
 	.private = FILE_RELEASE_AGENT,
 };
 
@@ -3111,27 +3101,24 @@ static void cgroup_release_agent(struct work_struct *work)
 	while (!list_empty(&release_list)) {
 		char *argv[3], *envp[3];
 		int i;
-		char *pathbuf;
+		char *pathbuf = NULL, *agentbuf = NULL;
 		struct cgroup *cgrp = list_entry(release_list.next,
 						    struct cgroup,
 						    release_list);
 		list_del_init(&cgrp->release_list);
 		spin_unlock(&release_list_lock);
 		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-		if (!pathbuf) {
-			spin_lock(&release_list_lock);
-			continue;
-		}
-
-		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
-			kfree(pathbuf);
-			spin_lock(&release_list_lock);
-			continue;
-		}
+		if (!pathbuf)
+			goto continue_free;
+		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
+			goto continue_free;
+		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+		if (!agentbuf)
+			goto continue_free;
 
 		i = 0;
-		argv[i++] = cgrp->root->release_agent_path;
-		argv[i++] = (char *)pathbuf;
+		argv[i++] = agentbuf;
+		argv[i++] = pathbuf;
 		argv[i] = NULL;
 
 		i = 0;
@@ -3145,8 +3132,10 @@ static void cgroup_release_agent(struct work_struct *work)
 		 * be a slow process */
 		mutex_unlock(&cgroup_mutex);
 		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-		kfree(pathbuf);
 		mutex_lock(&cgroup_mutex);
+ continue_free:
+		kfree(pathbuf);
+		kfree(agentbuf);
 		spin_lock(&release_list_lock);
 	}
 	spin_unlock(&release_list_lock);
-- 
cgit v1.2.3


From 84eea842886ac35020be6043e04748ed22014359 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:00 -0700
Subject: cgroups: misc cleanups to write_string patchset

This patch contains cleanups suggested by reviewers for the recent
write_string() patchset:

- pair cgroup_lock_live_group() with cgroup_unlock() in cgroup.c for
  clarity, rather than directly unlocking cgroup_mutex.

- make the return type of cgroup_lock_live_group() a bool

- use a #define'd constant for the local buffer size in read/write functions

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  4 ++--
 kernel/cgroup.c        | 21 ++++++++++++---------
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e78377a91a74..cc59d3a21d87 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -21,11 +21,13 @@
 struct cgroupfs_root;
 struct cgroup_subsys;
 struct inode;
+struct cgroup;
 
 extern int cgroup_init_early(void);
 extern int cgroup_init(void);
 extern void cgroup_init_smp(void);
 extern void cgroup_lock(void);
+extern bool cgroup_lock_live_group(struct cgroup *cgrp);
 extern void cgroup_unlock(void);
 extern void cgroup_fork(struct task_struct *p);
 extern void cgroup_fork_callbacks(struct task_struct *p);
@@ -295,8 +297,6 @@ int cgroup_add_files(struct cgroup *cgrp,
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
-int cgroup_lock_live_group(struct cgroup *cgrp);
-
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0120b5d67a73..a14122ecaa5e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1329,10 +1329,10 @@ enum cgroup_filetype {
  * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
  * @cgrp: the cgroup to be checked for liveness
  *
- * Returns true (with lock held) on success, or false (with no lock
- * held) on failure.
+ * On success, returns true; the lock should be later released with
+ * cgroup_unlock(). On failure returns false with no lock held.
  */
-int cgroup_lock_live_group(struct cgroup *cgrp)
+bool cgroup_lock_live_group(struct cgroup *cgrp)
 {
 	mutex_lock(&cgroup_mutex);
 	if (cgroup_is_removed(cgrp)) {
@@ -1349,7 +1349,7 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
 	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
 	strcpy(cgrp->root->release_agent_path, buffer);
-	mutex_unlock(&cgroup_mutex);
+	cgroup_unlock();
 	return 0;
 }
 
@@ -1360,16 +1360,19 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
 		return -ENODEV;
 	seq_puts(seq, cgrp->root->release_agent_path);
 	seq_putc(seq, '\n');
-	mutex_unlock(&cgroup_mutex);
+	cgroup_unlock();
 	return 0;
 }
 
+/* A buffer size big enough for numbers or short strings */
+#define CGROUP_LOCAL_BUFFER_SIZE 64
+
 static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 				struct file *file,
 				const char __user *userbuf,
 				size_t nbytes, loff_t *unused_ppos)
 {
-	char buffer[64];
+	char buffer[CGROUP_LOCAL_BUFFER_SIZE];
 	int retval = 0;
 	char *end;
 
@@ -1403,7 +1406,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
 				   const char __user *userbuf,
 				   size_t nbytes, loff_t *unused_ppos)
 {
-	char local_buffer[64];
+	char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
 	int retval = 0;
 	size_t max_bytes = cft->max_write_len;
 	char *buffer = local_buffer;
@@ -1518,7 +1521,7 @@ static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
 			       char __user *buf, size_t nbytes,
 			       loff_t *ppos)
 {
-	char tmp[64];
+	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
 	u64 val = cft->read_u64(cgrp, cft);
 	int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
 
@@ -1530,7 +1533,7 @@ static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
 			       char __user *buf, size_t nbytes,
 			       loff_t *ppos)
 {
-	char tmp[64];
+	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
 	s64 val = cft->read_s64(cgrp, cft);
 	int len = sprintf(tmp, "%lld\n", (long long) val);
 
-- 
cgit v1.2.3


From 856c13aa1ff6136c1968414fdea5938ea9d5ebf2 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:04 -0700
Subject: cgroup files: convert res_counter_write() to be a cgroups
 write_string() handler

Currently res_counter_write() is a raw file handler even though it's
ultimately taking a number, since in some cases it wants to
pre-process the string when converting it to a number.

This patch converts res_counter_write() from a raw file handler to a
write_string() handler; this allows some of the boilerplate
copying/locking/checking to be removed, and simplies the cleanup path,
since these functions are now performed by the cgroups framework.

[lizf@cn.fujitsu.com: build fix]
Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 11 ++++++++---
 kernel/res_counter.c        | 48 ++++++++++++++++++++-------------------------
 mm/memcontrol.c             | 24 +++++------------------
 3 files changed, 34 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 125660e7793f..290205dfe094 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -63,9 +63,14 @@ u64 res_counter_read_u64(struct res_counter *counter, int member);
 ssize_t res_counter_read(struct res_counter *counter, int member,
 		const char __user *buf, size_t nbytes, loff_t *pos,
 		int (*read_strategy)(unsigned long long val, char *s));
-ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *buf, size_t nbytes, loff_t *pos,
-		int (*write_strategy)(char *buf, unsigned long long *val));
+
+typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
+
+int res_counter_memparse_write_strategy(const char *buf,
+					unsigned long long *res);
+
+int res_counter_write(struct res_counter *counter, int member,
+		      const char *buffer, write_strategy_fn write_strategy);
 
 /*
  * the field descriptors. one for each member of res_counter
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index d3c61b4ebef2..f275c8eca772 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/res_counter.h>
 #include <linux/uaccess.h>
+#include <linux/mm.h>
 
 void res_counter_init(struct res_counter *counter)
 {
@@ -102,44 +103,37 @@ u64 res_counter_read_u64(struct res_counter *counter, int member)
 	return *res_counter_member(counter, member);
 }
 
-ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *userbuf, size_t nbytes, loff_t *pos,
-		int (*write_strategy)(char *st_buf, unsigned long long *val))
+int res_counter_memparse_write_strategy(const char *buf,
+					unsigned long long *res)
 {
-	int ret;
-	char *buf, *end;
-	unsigned long flags;
-	unsigned long long tmp, *val;
-
-	buf = kmalloc(nbytes + 1, GFP_KERNEL);
-	ret = -ENOMEM;
-	if (buf == NULL)
-		goto out;
+	char *end;
+	/* FIXME - make memparse() take const char* args */
+	*res = memparse((char *)buf, &end);
+	if (*end != '\0')
+		return -EINVAL;
 
-	buf[nbytes] = '\0';
-	ret = -EFAULT;
-	if (copy_from_user(buf, userbuf, nbytes))
-		goto out_free;
+	*res = PAGE_ALIGN(*res);
+	return 0;
+}
 
-	ret = -EINVAL;
+int res_counter_write(struct res_counter *counter, int member,
+		      const char *buf, write_strategy_fn write_strategy)
+{
+	char *end;
+	unsigned long flags;
+	unsigned long long tmp, *val;
 
-	strstrip(buf);
 	if (write_strategy) {
-		if (write_strategy(buf, &tmp)) {
-			goto out_free;
-		}
+		if (write_strategy(buf, &tmp))
+			return -EINVAL;
 	} else {
 		tmp = simple_strtoull(buf, &end, 10);
 		if (*end != '\0')
-			goto out_free;
+			return -EINVAL;
 	}
 	spin_lock_irqsave(&counter->lock, flags);
 	val = res_counter_member(counter, member);
 	*val = tmp;
 	spin_unlock_irqrestore(&counter->lock, flags);
-	ret = nbytes;
-out_free:
-	kfree(buf);
-out:
-	return ret;
+	return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e46451e1d9b7..7385d58fb061 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -838,32 +838,18 @@ out:
 	return ret;
 }
 
-static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
-{
-	*tmp = memparse(buf, &buf);
-	if (*buf != '\0')
-		return -EINVAL;
-
-	/*
-	 * Round up the value to the closest page size
-	 */
-	*tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
-	return 0;
-}
-
 static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 {
 	return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
 				    cft->private);
 }
 
-static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
-				struct file *file, const char __user *userbuf,
-				size_t nbytes, loff_t *ppos)
+static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+			    const char *buffer)
 {
 	return res_counter_write(&mem_cgroup_from_cont(cont)->res,
-				cft->private, userbuf, nbytes, ppos,
-				mem_cgroup_write_strategy);
+				 cft->private, buffer,
+				 res_counter_memparse_write_strategy);
 }
 
 static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -940,7 +926,7 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "limit_in_bytes",
 		.private = RES_LIMIT,
-		.write = mem_cgroup_write,
+		.write_string = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read,
 	},
 	{
-- 
cgit v1.2.3


From e885dcde75685e09f23cffae1f6d5169c105b8a0 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Fri, 25 Jul 2008 01:47:06 -0700
Subject: cgroup_clone: use pid of newly created task for new cgroup

cgroup_clone creates a new cgroup with the pid of the task.  This works
correctly for unshare, but for clone cgroup_clone is called from
copy_namespaces inside copy_process, which happens before the new pid is
created.  As a result, the new cgroup was created with current's pid.
This patch:

	1. Moves the call inside copy_process to after the new pid
	   is created
	2. Passes the struct pid into ns_cgroup_clone (as it is not
	   yet attached to the task)
	3. Passes a name from ns_cgroup_clone() into cgroup_clone()
	   so as to keep cgroup_clone() itself simpler
	4. Uses pid_vnr() to get the process id value, so that the
	   pid used to name the new cgroup is always the pid as it
	   would be known to the task which did the cloning or
	   unsharing.  I think that is the most intuitive thing to
	   do.  This way, task t1 does clone(CLONE_NEWPID) to get
	   t2, which does clone(CLONE_NEWPID) to get t3, then the
	   cgroup for t3 will be named for the pid by which t2 knows
	   t3.

(Thanks to Dan Smith for finding the main bug)

Changelog:
	June 11: Incorporate Paul Menage's feedback:  don't pass
	         NULL to ns_cgroup_clone from unshare, and reduce
		 patch size by using 'nodename' in cgroup_clone.
	June 10: Original version

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Serge Hallyn <serge@us.ibm.com>
Acked-by: Paul Menage <menage@google.com>
Tested-by: Dan Smith <danms@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h  | 3 ++-
 include/linux/nsproxy.h | 7 +++++--
 kernel/cgroup.c         | 7 +++----
 kernel/fork.c           | 6 ++++++
 kernel/ns_cgroup.c      | 8 ++++++--
 kernel/nsproxy.c        | 8 +-------
 6 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index cc59d3a21d87..c98dd7cb7076 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -364,7 +364,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
 	return task_subsys_state(task, subsys_id)->cgroup;
 }
 
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
+							char *nodename);
 
 /* A cgroup_iter should be treated as an opaque object */
 struct cgroup_iter {
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 0e66b57631fc..c8a768e59640 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -82,9 +82,12 @@ static inline void get_nsproxy(struct nsproxy *ns)
 }
 
 #ifdef CONFIG_CGROUP_NS
-int ns_cgroup_clone(struct task_struct *tsk);
+int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid);
 #else
-static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
+static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid)
+{
+	return 0;
+}
 #endif
 
 #endif
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86b71e714e13..66ec9fd21e0c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2848,16 +2848,17 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
  * cgroup_clone - clone the cgroup the given subsystem is attached to
  * @tsk: the task to be moved
  * @subsys: the given subsystem
+ * @nodename: the name for the new cgroup
  *
  * Duplicate the current cgroup in the hierarchy that the given
  * subsystem is attached to, and move this task into the new
  * child.
  */
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
+							char *nodename)
 {
 	struct dentry *dentry;
 	int ret = 0;
-	char nodename[MAX_CGROUP_TYPE_NAMELEN];
 	struct cgroup *parent, *child;
 	struct inode *inode;
 	struct css_set *cg;
@@ -2882,8 +2883,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
 	cg = tsk->cgroups;
 	parent = task_cgroup(tsk, subsys->subsys_id);
 
-	snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid);
-
 	/* Pin the hierarchy */
 	atomic_inc(&parent->root->sb->s_active);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 5a5d6fef341d..228f80c9155a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1107,6 +1107,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (clone_flags & CLONE_THREAD)
 		p->tgid = current->tgid;
 
+	if (current->nsproxy != p->nsproxy) {
+		retval = ns_cgroup_clone(p, pid);
+		if (retval)
+			goto bad_fork_free_pid;
+	}
+
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 	/*
 	 * Clear TID on mm_release()?
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 48d7ed6fc3a4..43c2111cd54d 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/cgroup.h>
 #include <linux/fs.h>
+#include <linux/proc_fs.h>
 #include <linux/slab.h>
 #include <linux/nsproxy.h>
 
@@ -24,9 +25,12 @@ static inline struct ns_cgroup *cgroup_to_ns(
 			    struct ns_cgroup, css);
 }
 
-int ns_cgroup_clone(struct task_struct *task)
+int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
 {
-	return cgroup_clone(task, &ns_subsys);
+	char name[PROC_NUMBUF];
+
+	snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
+	return cgroup_clone(task, &ns_subsys, name);
 }
 
 /*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index adc785146a1c..21575fc46d05 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -157,12 +157,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 		goto out;
 	}
 
-	err = ns_cgroup_clone(tsk);
-	if (err) {
-		put_nsproxy(new_ns);
-		goto out;
-	}
-
 	tsk->nsproxy = new_ns;
 
 out:
@@ -209,7 +203,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 		goto out;
 	}
 
-	err = ns_cgroup_clone(current);
+	err = ns_cgroup_clone(current, task_pid(current));
 	if (err)
 		put_nsproxy(*new_nsp);
 
-- 
cgit v1.2.3


From e8589cc189f96b87348ae83ea4db38eaac624135 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:10 -0700
Subject: memcg: better migration handling

This patch changes page migration under memory controller to use a
different algorithm.  (thanks to Christoph for new idea.)

Before:
 - page_cgroup is migrated from an old page to a new page.
After:
 - a new page is accounted , no reuse of page_cgroup.

Pros:

 - We can avoid compliated lock depndencies and races in migration.

Cons:

 - new param to mem_cgroup_charge_common().

 - mem_cgroup_getref() is added for handling ref_cnt ping-pong.

This version simplifies complicated lock dependency in page migraiton
under memory resource controller.

  new refcnt sequence is following.

a mapped page:
  prepage_migration() ..... +1 to NEW page
  try_to_unmap()      ..... all refs to OLD page is gone.
  move_pages()        ..... +1 to NEW page if page cache.
  remap...            ..... all refs from *map* is added to NEW one.
  end_migration()     ..... -1 to New page.

  page's mapcount + (page_is_cache) refs are added to NEW one.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  11 ++--
 mm/memcontrol.c            | 128 +++++++++++++++++++++++----------------------
 mm/migrate.c               |  22 +++++---
 3 files changed, 86 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e6608776bc96..84ead2aa6f18 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -50,9 +50,10 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 #define mm_match_cgroup(mm, cgroup)	\
 	((cgroup) == mem_cgroup_from_task((mm)->owner))
 
-extern int mem_cgroup_prepare_migration(struct page *page);
+extern int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
 extern void mem_cgroup_end_migration(struct page *page);
-extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
+extern int mem_cgroup_getref(struct page *page);
 
 /*
  * For memory reclaim.
@@ -112,7 +113,8 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
 	return 1;
 }
 
-static inline int mem_cgroup_prepare_migration(struct page *page)
+static inline int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 {
 	return 0;
 }
@@ -121,8 +123,7 @@ static inline void mem_cgroup_end_migration(struct page *page)
 {
 }
 
-static inline void
-mem_cgroup_page_migration(struct page *page, struct page *newpage)
+static inline void mem_cgroup_getref(struct page *page)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90ccc1326356..da5912b84551 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -524,7 +524,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
  * < 0 if the cgroup is over its limit
  */
 static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
-				gfp_t gfp_mask, enum charge_type ctype)
+				gfp_t gfp_mask, enum charge_type ctype,
+				struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *mem;
 	struct page_cgroup *pc;
@@ -569,16 +570,21 @@ retry:
 	 * thread group leader migrates. It's possible that mm is not
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
-	if (!mm)
-		mm = &init_mm;
+	if (!memcg) {
+		if (!mm)
+			mm = &init_mm;
 
-	rcu_read_lock();
-	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-	/*
-	 * For every charge from the cgroup, increment reference count
-	 */
-	css_get(&mem->css);
-	rcu_read_unlock();
+		rcu_read_lock();
+		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+		/*
+		 * For every charge from the cgroup, increment reference count
+		 */
+		css_get(&mem->css);
+		rcu_read_unlock();
+	} else {
+		mem = memcg;
+		css_get(&memcg->css);
+	}
 
 	while (res_counter_charge(&mem->res, PAGE_SIZE)) {
 		if (!(gfp_mask & __GFP_WAIT))
@@ -648,7 +654,7 @@ err:
 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
-				MEM_CGROUP_CHARGE_TYPE_MAPPED);
+				MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
 }
 
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -657,7 +663,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 	if (!mm)
 		mm = &init_mm;
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
-				MEM_CGROUP_CHARGE_TYPE_CACHE);
+				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+}
+
+int mem_cgroup_getref(struct page *page)
+{
+	struct page_cgroup *pc;
+
+	if (mem_cgroup_subsys.disabled)
+		return 0;
+
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
+	VM_BUG_ON(!pc);
+	pc->ref_cnt++;
+	unlock_page_cgroup(page);
+	return 0;
 }
 
 /*
@@ -707,65 +728,39 @@ unlock:
 }
 
 /*
- * Returns non-zero if a page (under migration) has valid page_cgroup member.
- * Refcnt of page_cgroup is incremented.
+ * Before starting migration, account against new page.
  */
-int mem_cgroup_prepare_migration(struct page *page)
+int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 {
 	struct page_cgroup *pc;
+	struct mem_cgroup *mem = NULL;
+	enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+	int ret = 0;
 
 	if (mem_cgroup_subsys.disabled)
 		return 0;
 
 	lock_page_cgroup(page);
 	pc = page_get_page_cgroup(page);
-	if (pc)
-		pc->ref_cnt++;
+	if (pc) {
+		mem = pc->mem_cgroup;
+		css_get(&mem->css);
+		if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
+			ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+	}
 	unlock_page_cgroup(page);
-	return pc != NULL;
-}
-
-void mem_cgroup_end_migration(struct page *page)
-{
-	mem_cgroup_uncharge_page(page);
+	if (mem) {
+		ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
+			ctype, mem);
+		css_put(&mem->css);
+	}
+	return ret;
 }
 
-/*
- * We know both *page* and *newpage* are now not-on-LRU and PG_locked.
- * And no race with uncharge() routines because page_cgroup for *page*
- * has extra one reference by mem_cgroup_prepare_migration.
- */
-void mem_cgroup_page_migration(struct page *page, struct page *newpage)
+/* remove redundant charge */
+void mem_cgroup_end_migration(struct page *newpage)
 {
-	struct page_cgroup *pc;
-	struct mem_cgroup_per_zone *mz;
-	unsigned long flags;
-
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	if (!pc) {
-		unlock_page_cgroup(page);
-		return;
-	}
-
-	mz = page_cgroup_zoneinfo(pc);
-	spin_lock_irqsave(&mz->lru_lock, flags);
-	__mem_cgroup_remove_list(mz, pc);
-	spin_unlock_irqrestore(&mz->lru_lock, flags);
-
-	page_assign_page_cgroup(page, NULL);
-	unlock_page_cgroup(page);
-
-	pc->page = newpage;
-	lock_page_cgroup(newpage);
-	page_assign_page_cgroup(newpage, pc);
-
-	mz = page_cgroup_zoneinfo(pc);
-	spin_lock_irqsave(&mz->lru_lock, flags);
-	__mem_cgroup_add_list(mz, pc);
-	spin_unlock_irqrestore(&mz->lru_lock, flags);
-
-	unlock_page_cgroup(newpage);
+	mem_cgroup_uncharge_page(newpage);
 }
 
 /*
@@ -795,12 +790,19 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 		page = pc->page;
 		get_page(page);
 		spin_unlock_irqrestore(&mz->lru_lock, flags);
-		mem_cgroup_uncharge_page(page);
-		put_page(page);
-		if (--count <= 0) {
-			count = FORCE_UNCHARGE_BATCH;
+		/*
+		 * Check if this page is on LRU. !LRU page can be found
+		 * if it's under page migration.
+		 */
+		if (PageLRU(page)) {
+			mem_cgroup_uncharge_page(page);
+			put_page(page);
+			if (--count <= 0) {
+				count = FORCE_UNCHARGE_BATCH;
+				cond_resched();
+			}
+		} else
 			cond_resched();
-		}
 		spin_lock_irqsave(&mz->lru_lock, flags);
 	}
 	spin_unlock_irqrestore(&mz->lru_lock, flags);
diff --git a/mm/migrate.c b/mm/migrate.c
index 376cceba82f9..f6d7f8efd1a8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -358,6 +358,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	__inc_zone_page_state(newpage, NR_FILE_PAGES);
 
 	write_unlock_irq(&mapping->tree_lock);
+	if (!PageSwapCache(newpage)) {
+		mem_cgroup_uncharge_page(page);
+		mem_cgroup_getref(newpage);
+	}
 
 	return 0;
 }
@@ -611,7 +615,6 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 		rc = fallback_migrate_page(mapping, newpage, page);
 
 	if (!rc) {
-		mem_cgroup_page_migration(page, newpage);
 		remove_migration_ptes(page, newpage);
 	} else
 		newpage->mapping = NULL;
@@ -641,6 +644,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		/* page was freed from under us. So we are done. */
 		goto move_newpage;
 
+	charge = mem_cgroup_prepare_migration(page, newpage);
+	if (charge == -ENOMEM) {
+		rc = -ENOMEM;
+		goto move_newpage;
+	}
+	/* prepare cgroup just returns 0 or -ENOMEM */
+	BUG_ON(charge);
+
 	rc = -EAGAIN;
 	if (TestSetPageLocked(page)) {
 		if (!force)
@@ -692,19 +703,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		goto rcu_unlock;
 	}
 
-	charge = mem_cgroup_prepare_migration(page);
 	/* Establish migration ptes or remove ptes */
 	try_to_unmap(page, 1);
 
 	if (!page_mapped(page))
 		rc = move_to_new_page(newpage, page);
 
-	if (rc) {
+	if (rc)
 		remove_migration_ptes(page, page);
-		if (charge)
-			mem_cgroup_end_migration(page);
-	} else if (charge)
- 		mem_cgroup_end_migration(newpage);
 rcu_unlock:
 	if (rcu_locked)
 		rcu_read_unlock();
@@ -725,6 +731,8 @@ unlock:
 	}
 
 move_newpage:
+	if (!charge)
+		mem_cgroup_end_migration(newpage);
 	/*
 	 * Move the new page to the LRU. If migration was not successful
 	 * then this will free the page.
-- 
cgit v1.2.3


From 69029cd550284e32de13d6dd2f77b723c8a0e444 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:14 -0700
Subject: memcg: remove refcnt from page_cgroup

memcg: performance improvements

Patch Description
 1/5 ... remove refcnt fron page_cgroup patch (shmem handling is fixed)
 2/5 ... swapcache handling patch
 3/5 ... add helper function for shmem's memory reclaim patch
 4/5 ... optimize by likely/unlikely ppatch
 5/5 ... remove redundunt check patch (shmem handling is fixed.)

Unix bench result.

== 2.6.26-rc2-mm1 + memory resource controller
Execl Throughput                           2915.4 lps   (29.6 secs, 3 samples)
C Compiler Throughput                      1019.3 lpm   (60.0 secs, 3 samples)
Shell Scripts (1 concurrent)               5796.0 lpm   (60.0 secs, 3 samples)
Shell Scripts (8 concurrent)               1097.7 lpm   (60.0 secs, 3 samples)
Shell Scripts (16 concurrent)               565.3 lpm   (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks    1022128.0 KBps  (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks   544057.0 KBps  (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks    346481.0 KBps  (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks      319325.0 KBps  (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks     148788.0 KBps  (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks       99051.0 KBps  (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks    2058917.0 KBps  (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks   1606109.0 KBps  (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks    854789.0 KBps  (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places         126145.2 lpm   (30.0 secs, 3 samples)

                     INDEX VALUES
TEST                                        BASELINE     RESULT      INDEX

Execl Throughput                                43.0     2915.4      678.0
File Copy 1024 bufsize 2000 maxblocks         3960.0   346481.0      875.0
File Copy 256 bufsize 500 maxblocks           1655.0    99051.0      598.5
File Copy 4096 bufsize 8000 maxblocks         5800.0   854789.0     1473.8
Shell Scripts (8 concurrent)                     6.0     1097.7     1829.5
                                                                 =========
     FINAL SCORE                                                     991.3

== 2.6.26-rc2-mm1 + this set ==
Execl Throughput                           3012.9 lps   (29.9 secs, 3 samples)
C Compiler Throughput                       981.0 lpm   (60.0 secs, 3 samples)
Shell Scripts (1 concurrent)               5872.0 lpm   (60.0 secs, 3 samples)
Shell Scripts (8 concurrent)               1120.3 lpm   (60.0 secs, 3 samples)
Shell Scripts (16 concurrent)               578.0 lpm   (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks    1003993.0 KBps  (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks   550452.0 KBps  (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks    347159.0 KBps  (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks      314644.0 KBps  (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks     151852.0 KBps  (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks      101000.0 KBps  (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks    2033256.0 KBps  (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks   1611814.0 KBps  (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks    847979.0 KBps  (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places         128148.7 lpm   (30.0 secs, 3 samples)

                     INDEX VALUES
TEST                                        BASELINE     RESULT      INDEX

Execl Throughput                                43.0     3012.9      700.7
File Copy 1024 bufsize 2000 maxblocks         3960.0   347159.0      876.7
File Copy 256 bufsize 500 maxblocks           1655.0   101000.0      610.3
File Copy 4096 bufsize 8000 maxblocks         5800.0   847979.0     1462.0
Shell Scripts (8 concurrent)                     6.0     1120.3     1867.2
                                                                 =========
     FINAL SCORE                                                    1004.6

This patch:

Remove refcnt from page_cgroup().

After this,

 * A page is charged only when !page_mapped() && no page_cgroup is assigned.
	* Anon page is newly mapped.
	* File page is added to mapping->tree.

 * A page is uncharged only when
	* Anon page is fully unmapped.
	* File page is removed from LRU.

There is no change in behavior from user's view.

This patch also removes unnecessary calls in rmap.c which was used only for
refcnt mangement.

[akpm@linux-foundation.org: fix warning]
[hugh@veritas.com: fix shmem_unuse_inode charging]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  10 ++---
 mm/filemap.c               |   6 +--
 mm/memcontrol.c            | 109 ++++++++++++++++++++++++++-------------------
 mm/migrate.c               |   3 +-
 mm/rmap.c                  |  14 +-----
 mm/shmem.c                 |  35 ++++++++++-----
 6 files changed, 97 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 84ead2aa6f18..b4980b8f048e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,6 +35,7 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
 extern void mem_cgroup_uncharge_page(struct page *page);
+extern void mem_cgroup_uncharge_cache_page(struct page *page);
 extern void mem_cgroup_move_lists(struct page *page, bool active);
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
@@ -53,7 +54,6 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern int
 mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
 extern void mem_cgroup_end_migration(struct page *page);
-extern int mem_cgroup_getref(struct page *page);
 
 /*
  * For memory reclaim.
@@ -98,6 +98,10 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
 {
 }
 
+static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+}
+
 static inline void mem_cgroup_move_lists(struct page *page, bool active)
 {
 }
@@ -123,10 +127,6 @@ static inline void mem_cgroup_end_migration(struct page *page)
 {
 }
 
-static inline void mem_cgroup_getref(struct page *page)
-{
-}
-
 static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
 {
 	return 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index 5d4c880d7cd9..2d3ec1ffc66e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -115,7 +115,7 @@ void __remove_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
-	mem_cgroup_uncharge_page(page);
+	mem_cgroup_uncharge_cache_page(page);
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
 	mapping->nrpages--;
@@ -474,12 +474,12 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
 		} else
-			mem_cgroup_uncharge_page(page);
+			mem_cgroup_uncharge_cache_page(page);
 
 		write_unlock_irq(&mapping->tree_lock);
 		radix_tree_preload_end();
 	} else
-		mem_cgroup_uncharge_page(page);
+		mem_cgroup_uncharge_cache_page(page);
 out:
 	return error;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index da5912b84551..a61706193c31 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -166,7 +166,6 @@ struct page_cgroup {
 	struct list_head lru;		/* per cgroup LRU list */
 	struct page *page;
 	struct mem_cgroup *mem_cgroup;
-	int ref_cnt;			/* cached, mapped, migrating */
 	int flags;
 };
 #define PAGE_CGROUP_FLAG_CACHE	(0x1)	/* charged as cache */
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
 enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
 	MEM_CGROUP_CHARGE_TYPE_MAPPED,
+	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */
 };
 
 /*
@@ -552,9 +552,7 @@ retry:
 	 */
 	if (pc) {
 		VM_BUG_ON(pc->page != page);
-		VM_BUG_ON(pc->ref_cnt <= 0);
-
-		pc->ref_cnt++;
+		VM_BUG_ON(!pc->mem_cgroup);
 		unlock_page_cgroup(page);
 		goto done;
 	}
@@ -570,10 +568,7 @@ retry:
 	 * thread group leader migrates. It's possible that mm is not
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
-	if (!memcg) {
-		if (!mm)
-			mm = &init_mm;
-
+	if (likely(!memcg)) {
 		rcu_read_lock();
 		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
 		/*
@@ -609,7 +604,6 @@ retry:
 		}
 	}
 
-	pc->ref_cnt = 1;
 	pc->mem_cgroup = mem;
 	pc->page = page;
 	/*
@@ -653,6 +647,17 @@ err:
 
 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
+	/*
+	 * If already mapped, we don't have to account.
+	 * If page cache, page->mapping has address_space.
+	 * But page->mapping may have out-of-use anon_vma pointer,
+	 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
+	 * is NULL.
+  	 */
+	if (page_mapped(page) || (page->mapping && !PageAnon(page)))
+		return 0;
+	if (unlikely(!mm))
+		mm = &init_mm;
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
 }
@@ -660,32 +665,17 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask)
 {
-	if (!mm)
+	if (unlikely(!mm))
 		mm = &init_mm;
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
 }
 
-int mem_cgroup_getref(struct page *page)
-{
-	struct page_cgroup *pc;
-
-	if (mem_cgroup_subsys.disabled)
-		return 0;
-
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	VM_BUG_ON(!pc);
-	pc->ref_cnt++;
-	unlock_page_cgroup(page);
-	return 0;
-}
-
 /*
- * Uncharging is always a welcome operation, we never complain, simply
- * uncharge.
+ * uncharge if !page_mapped(page)
  */
-void mem_cgroup_uncharge_page(struct page *page)
+static void
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 {
 	struct page_cgroup *pc;
 	struct mem_cgroup *mem;
@@ -704,29 +694,41 @@ void mem_cgroup_uncharge_page(struct page *page)
 		goto unlock;
 
 	VM_BUG_ON(pc->page != page);
-	VM_BUG_ON(pc->ref_cnt <= 0);
 
-	if (--(pc->ref_cnt) == 0) {
-		mz = page_cgroup_zoneinfo(pc);
-		spin_lock_irqsave(&mz->lru_lock, flags);
-		__mem_cgroup_remove_list(mz, pc);
-		spin_unlock_irqrestore(&mz->lru_lock, flags);
+	if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+	    && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
+		|| page_mapped(page)))
+		goto unlock;
 
-		page_assign_page_cgroup(page, NULL);
-		unlock_page_cgroup(page);
+	mz = page_cgroup_zoneinfo(pc);
+	spin_lock_irqsave(&mz->lru_lock, flags);
+	__mem_cgroup_remove_list(mz, pc);
+	spin_unlock_irqrestore(&mz->lru_lock, flags);
 
-		mem = pc->mem_cgroup;
-		res_counter_uncharge(&mem->res, PAGE_SIZE);
-		css_put(&mem->css);
+	page_assign_page_cgroup(page, NULL);
+	unlock_page_cgroup(page);
 
-		kmem_cache_free(page_cgroup_cache, pc);
-		return;
-	}
+	mem = pc->mem_cgroup;
+	res_counter_uncharge(&mem->res, PAGE_SIZE);
+	css_put(&mem->css);
 
+	kmem_cache_free(page_cgroup_cache, pc);
+	return;
 unlock:
 	unlock_page_cgroup(page);
 }
 
+void mem_cgroup_uncharge_page(struct page *page)
+{
+	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+}
+
+void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+	VM_BUG_ON(page_mapped(page));
+	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+}
+
 /*
  * Before starting migration, account against new page.
  */
@@ -757,15 +759,29 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 	return ret;
 }
 
-/* remove redundant charge */
+/* remove redundant charge if migration failed*/
 void mem_cgroup_end_migration(struct page *newpage)
 {
-	mem_cgroup_uncharge_page(newpage);
+	/*
+	 * At success, page->mapping is not NULL.
+	 * special rollback care is necessary when
+	 * 1. at migration failure. (newpage->mapping is cleared in this case)
+	 * 2. the newpage was moved but not remapped again because the task
+	 *    exits and the newpage is obsolete. In this case, the new page
+	 *    may be a swapcache. So, we just call mem_cgroup_uncharge_page()
+	 *    always for avoiding mess. The  page_cgroup will be removed if
+	 *    unnecessary. File cache pages is still on radix-tree. Don't
+	 *    care it.
+	 */
+	if (!newpage->mapping)
+		__mem_cgroup_uncharge_common(newpage,
+					 MEM_CGROUP_CHARGE_TYPE_FORCE);
+	else if (PageAnon(newpage))
+		mem_cgroup_uncharge_page(newpage);
 }
 
 /*
  * This routine traverse page_cgroup in given list and drop them all.
- * This routine ignores page_cgroup->ref_cnt.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
  */
 #define FORCE_UNCHARGE_BATCH	(128)
@@ -795,7 +811,8 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 		 * if it's under page migration.
 		 */
 		if (PageLRU(page)) {
-			mem_cgroup_uncharge_page(page);
+			__mem_cgroup_uncharge_common(page,
+					MEM_CGROUP_CHARGE_TYPE_FORCE);
 			put_page(page);
 			if (--count <= 0) {
 				count = FORCE_UNCHARGE_BATCH;
diff --git a/mm/migrate.c b/mm/migrate.c
index f6d7f8efd1a8..d8c65a65c61d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -359,8 +359,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 
 	write_unlock_irq(&mapping->tree_lock);
 	if (!PageSwapCache(newpage)) {
-		mem_cgroup_uncharge_page(page);
-		mem_cgroup_getref(newpage);
+		mem_cgroup_uncharge_cache_page(page);
 	}
 
 	return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index bf0a5b7cfb8e..abbd29f7c43f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page,
 	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	if (atomic_inc_and_test(&page->_mapcount))
 		__page_set_anon_rmap(page, vma, address);
-	else {
+	else
 		__page_check_anon_rmap(page, vma, address);
-		/*
-		 * We unconditionally charged during prepare, we uncharge here
-		 * This takes care of balancing the reference counts
-		 */
-		mem_cgroup_uncharge_page(page);
-	}
 }
 
 /**
@@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page)
 {
 	if (atomic_inc_and_test(&page->_mapcount))
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
-	else
-		/*
-		 * We unconditionally charged during prepare, we uncharge here
-		 * This takes care of balancing the reference counts
-		 */
-		mem_cgroup_uncharge_page(page);
 }
 
 #ifdef CONFIG_DEBUG_VM
diff --git a/mm/shmem.c b/mm/shmem.c
index 9ffbea9b79e1..d58305e8a484 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,20 +922,26 @@ found:
 	error = 1;
 	if (!inode)
 		goto out;
-	/* Precharge page while we can wait, compensate afterwards */
+	/* Precharge page using GFP_KERNEL while we can wait */
 	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
 	if (error)
 		goto out;
 	error = radix_tree_preload(GFP_KERNEL);
-	if (error)
-		goto uncharge;
+	if (error) {
+		mem_cgroup_uncharge_cache_page(page);
+		goto out;
+	}
 	error = 1;
 
 	spin_lock(&info->lock);
 	ptr = shmem_swp_entry(info, idx, NULL);
-	if (ptr && ptr->val == entry.val)
+	if (ptr && ptr->val == entry.val) {
 		error = add_to_page_cache(page, inode->i_mapping,
 						idx, GFP_NOWAIT);
+		/* does mem_cgroup_uncharge_cache_page on error */
+	} else	/* we must compensate for our precharge above */
+		mem_cgroup_uncharge_cache_page(page);
+
 	if (error == -EEXIST) {
 		struct page *filepage = find_get_page(inode->i_mapping, idx);
 		error = 1;
@@ -961,8 +967,6 @@ found:
 		shmem_swp_unmap(ptr);
 	spin_unlock(&info->lock);
 	radix_tree_preload_end();
-uncharge:
-	mem_cgroup_uncharge_page(page);
 out:
 	unlock_page(page);
 	page_cache_release(page);
@@ -1319,7 +1323,7 @@ repeat:
 					page_cache_release(swappage);
 					goto failed;
 				}
-				mem_cgroup_uncharge_page(swappage);
+				mem_cgroup_uncharge_cache_page(swappage);
 			}
 			page_cache_release(swappage);
 			goto repeat;
@@ -1358,6 +1362,8 @@ repeat:
 		}
 
 		if (!filepage) {
+			int ret;
+
 			spin_unlock(&info->lock);
 			filepage = shmem_alloc_page(gfp, info, idx);
 			if (!filepage) {
@@ -1386,10 +1392,18 @@ repeat:
 				swap = *entry;
 				shmem_swp_unmap(entry);
 			}
-			if (error || swap.val || 0 != add_to_page_cache_lru(
-					filepage, mapping, idx, GFP_NOWAIT)) {
+			ret = error || swap.val;
+			if (ret)
+				mem_cgroup_uncharge_cache_page(filepage);
+			else
+				ret = add_to_page_cache_lru(filepage, mapping,
+						idx, GFP_NOWAIT);
+			/*
+			 * At add_to_page_cache_lru() failure, uncharge will
+			 * be done automatically.
+			 */
+			if (ret) {
 				spin_unlock(&info->lock);
-				mem_cgroup_uncharge_page(filepage);
 				page_cache_release(filepage);
 				shmem_unacct_blocks(info->flags, 1);
 				shmem_free_blocks(inode, 1);
@@ -1398,7 +1412,6 @@ repeat:
 					goto failed;
 				goto repeat;
 			}
-			mem_cgroup_uncharge_page(filepage);
 			info->flags |= SHMEM_PAGEIN;
 		}
 
-- 
cgit v1.2.3


From c9b0ed51483cc2fc42bb801b6675c4231b0e4634 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:15 -0700
Subject: memcg: helper function for relcaim from shmem.

A new call, mem_cgroup_shrink_usage() is added for shmem handling and
relacing non-standard usage of mem_cgroup_charge/uncharge.

Now, shmem calls mem_cgroup_charge() just for reclaim some pages from
mem_cgroup.  In general, shmem is used by some process group and not for
global resource (like file caches).  So, it's reasonable to reclaim pages
from mem_cgroup where shmem is mainly used.

[hugh@veritas.com: shmem_getpage release page sooner]
[hugh@veritas.com: mem_cgroup_shrink_usage css_put]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  7 +++++++
 mm/memcontrol.c            | 26 ++++++++++++++++++++++++++
 mm/shmem.c                 | 11 ++++-------
 3 files changed, 37 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b4980b8f048e..fdf3967e1397 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -37,6 +37,8 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
 extern void mem_cgroup_move_lists(struct page *page, bool active);
+extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
+
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
@@ -102,6 +104,11 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
+static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+	return 0;
+}
+
 static inline void mem_cgroup_move_lists(struct page *page, bool active)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a61706193c31..f46b8615de6c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -780,6 +780,32 @@ void mem_cgroup_end_migration(struct page *newpage)
 		mem_cgroup_uncharge_page(newpage);
 }
 
+/*
+ * A call to try to shrink memory usage under specified resource controller.
+ * This is typically used for page reclaiming for shmem for reducing side
+ * effect of page allocation from shmem, which is used by some mem_cgroup.
+ */
+int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+	struct mem_cgroup *mem;
+	int progress = 0;
+	int retry = MEM_CGROUP_RECLAIM_RETRIES;
+
+	rcu_read_lock();
+	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+	css_get(&mem->css);
+	rcu_read_unlock();
+
+	do {
+		progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
+	} while (!progress && --retry);
+
+	css_put(&mem->css);
+	if (!retry)
+		return -ENOMEM;
+	return 0;
+}
+
 /*
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
diff --git a/mm/shmem.c b/mm/shmem.c
index d58305e8a484..f92fea94d037 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1315,17 +1315,14 @@ repeat:
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			unlock_page(swappage);
+			page_cache_release(swappage);
 			if (error == -ENOMEM) {
 				/* allow reclaim from this memory cgroup */
-				error = mem_cgroup_cache_charge(swappage,
-					current->mm, gfp & ~__GFP_HIGHMEM);
-				if (error) {
-					page_cache_release(swappage);
+				error = mem_cgroup_shrink_usage(current->mm,
+								gfp);
+				if (error)
 					goto failed;
-				}
-				mem_cgroup_uncharge_cache_page(swappage);
 			}
-			page_cache_release(swappage);
 			goto repeat;
 		}
 	} else if (sgp == SGP_READ && !filepage) {
-- 
cgit v1.2.3


From 12b9804419cfb1c1bdac413f6c373af3b88d154b Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:19 -0700
Subject: res_counter: limit change support ebusy

Add an interface to set limit.  This is necessary to memory resource
controller because it shrinks usage at set limit.

Other controllers may not need this interface to shrink usage because
shrinking is not necessary or impossible.

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 290205dfe094..fdeadd9740dc 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -158,4 +158,20 @@ static inline void res_counter_reset_failcnt(struct res_counter *cnt)
 	cnt->failcnt = 0;
 	spin_unlock_irqrestore(&cnt->lock, flags);
 }
+
+static inline int res_counter_set_limit(struct res_counter *cnt,
+		unsigned long long limit)
+{
+	unsigned long flags;
+	int ret = -EBUSY;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	if (cnt->usage < limit) {
+		cnt->limit = limit;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return ret;
+}
+
 #endif
-- 
cgit v1.2.3


From 364d3c13c17f45da6d638011078d4c4d3070d719 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:36 -0700
Subject: ptrace: give more respect to SIGKILL

ptrace_stop() has some complicated checks to prevent the scheduling in the
TASK_TRACED state with the pending SIGKILL, but these checks are racy, and
they depend on arch_ptrace_stop_needed().

This patch assumes that the traced task should die asap if it was killed by
SIGKILL, in that case schedule()->signal_pending_state() has no reason to
ignore the TASK_WAKEKILL part of TASK_TRACED, and we can kill this nasty
special case.

Note: do_exit()->ptrace_notify() is special, the killed task can already
dequeue SIGKILL at this point. Another indication that fatal_signal_pending()
is not exactly right.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6aca4a16e377..79e749dbf81e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2054,9 +2054,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
 	if (!signal_pending(p))
 		return 0;
 
-	if (state & (__TASK_STOPPED | __TASK_TRACED))
-		return 0;
-
 	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
 }
 
-- 
cgit v1.2.3


From 7b34e4283c685f5cc6ba6d30e939906eee0d4bcf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:37 -0700
Subject: introduce PF_KTHREAD flag

Introduce the new PF_KTHREAD flag to mark the kernel threads.  It is set
by INIT_TASK() and copied to the forked childs (we could set it in
kthreadd() along with PF_NOFREEZE instead).

daemonize() was changed as well.  In that case testing of PF_KTHREAD is
racy, but daemonize() is hopeless anyway.

This flag is cleared in do_execve(), before search_binary_handler().
Probably not the best place, we can do this in exec_mmap() or in
start_thread(), or clear it along with PF_FORKNOEXEC.  But I think this
doesn't matter in practice, and if do_execve() fails kthread should die
soon.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                 | 1 +
 include/linux/init_task.h | 2 +-
 include/linux/sched.h     | 1 +
 kernel/exit.c             | 2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index af249af4ccab..cd2e8c9b1249 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1326,6 +1326,7 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
+	current->flags &= ~PF_KTHREAD;
 	retval = search_binary_handler(bprm,regs);
 	if (retval >= 0) {
 		/* execve success */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 93c45acf249a..021d8e720c79 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -122,7 +122,7 @@ extern struct group_info init_groups;
 	.state		= 0,						\
 	.stack		= &init_thread_info,				\
 	.usage		= ATOMIC_INIT(2),				\
-	.flags		= 0,						\
+	.flags		= PF_KTHREAD,					\
 	.lock_depth	= -1,						\
 	.prio		= MAX_PRIO-20,					\
 	.static_prio	= MAX_PRIO-20,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 79e749dbf81e..eec64a4adb9d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1483,6 +1483,7 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
+#define PF_KTHREAD	0x00000020	/* I am a kernel thread */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
 #define PF_DUMPCORE	0x00000200	/* dumped core */
diff --git a/kernel/exit.c b/kernel/exit.c
index a7799d8a6404..28a44a2612dc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -430,7 +430,7 @@ void daemonize(const char *name, ...)
 	 * We don't want to have TIF_FREEZE set if the system-wide hibernation
 	 * or suspend transition begins right now.
 	 */
-	current->flags |= PF_NOFREEZE;
+	current->flags |= (PF_NOFREEZE | PF_KTHREAD);
 
 	if (current->nsproxy != &init_nsproxy) {
 		get_nsproxy(&init_nsproxy);
-- 
cgit v1.2.3


From 246bb0b1deb29726990620d8b5e55ca29f331362 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:38 -0700
Subject: kill PF_BORROWED_MM in favour of PF_KTHREAD

Kill PF_BORROWED_MM.  Change use_mm/unuse_mm to not play with ->flags, and
do s/PF_BORROWED_MM/PF_KTHREAD/ for a couple of other users.

No functional changes yet.  But this allows us to do further
fixes/cleanups.

oom_kill/ptrace/etc often check "p->mm != NULL" to filter out the
kthreads, this is wrong because of use_mm().  The problem with
PF_BORROWED_MM is that we need task_lock() to avoid races.  With this
patch we can check PF_KTHREAD directly, or use a simple lockless helper:

	/* The result must not be dereferenced !!! */
	struct mm_struct *__get_task_mm(struct task_struct *tsk)
	{
		if (tsk->flags & PF_KTHREAD)
			return NULL;
		return tsk->mm;
	}

Note also ecard_task().  It runs with ->mm != NULL, but it's the kernel
thread without PF_BORROWED_MM.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c              | 2 --
 include/linux/sched.h | 3 +--
 kernel/fork.c         | 4 ++--
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117ddd93..0051fd94b44e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
 	struct task_struct *tsk = current;
 
 	task_lock(tsk);
-	tsk->flags |= PF_BORROWED_MM;
 	active_mm = tsk->active_mm;
 	atomic_inc(&mm->mm_count);
 	tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
 	struct task_struct *tsk = current;
 
 	task_lock(tsk);
-	tsk->flags &= ~PF_BORROWED_MM;
 	tsk->mm = NULL;
 	/* active_mm is still 'mm' */
 	enter_lazy_tlb(mm, tsk);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eec64a4adb9d..0560999eb1db 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1483,7 +1483,6 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
-#define PF_KTHREAD	0x00000020	/* I am a kernel thread */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
 #define PF_DUMPCORE	0x00000200	/* dumped core */
@@ -1497,7 +1496,7 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
 #define PF_SWAPOFF	0x00080000	/* I am in swapoff */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
-#define PF_BORROWED_MM	0x00200000	/* I am a kthread doing use_mm */
+#define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
 #define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
 #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
diff --git a/kernel/fork.c b/kernel/fork.c
index 228f80c9155a..eeaec6893b0d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(mmput);
 /**
  * get_task_mm - acquire a reference to the task's mm
  *
- * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
+ * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
  * this kernel workthread has transiently adopted a user mm with use_mm,
  * to do its AIO) is not set and if so returns a reference to it, after
  * bumping up the use count.  User must release the mm via mmput()
@@ -487,7 +487,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
 	task_lock(task);
 	mm = task->mm;
 	if (mm) {
-		if (task->flags & PF_BORROWED_MM)
+		if (task->flags & PF_KTHREAD)
 			mm = NULL;
 		else
 			atomic_inc(&mm->mm_users);
-- 
cgit v1.2.3


From 32ecb1f26dd50eeaac4e3f4dea4541c97848e459 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:41 -0700
Subject: coredump: turn mm->core_startup_done into the pointer to struct
 core_state

mm->core_startup_done points to "struct completion startup_done" allocated
on the coredump_wait()'s stack.  Introduce the new structure, core_state,
which holds this "struct completion".  This way we can add more info
visible to the threads participating in coredump without enlarging
mm_struct.

No changes in affected .o files.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 8 ++++----
 include/linux/mm_types.h | 7 ++++++-
 kernel/exit.c            | 2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index e347e6ed1617..71734568f018 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1597,13 +1597,13 @@ static int coredump_wait(int exit_code)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
-	struct completion startup_done;
+	struct core_state core_state;
 	struct completion *vfork_done;
 	int core_waiters;
 
 	init_completion(&mm->core_done);
-	init_completion(&startup_done);
-	mm->core_startup_done = &startup_done;
+	init_completion(&core_state.startup);
+	mm->core_state = &core_state;
 
 	core_waiters = zap_threads(tsk, mm, exit_code);
 	up_write(&mm->mmap_sem);
@@ -1622,7 +1622,7 @@ static int coredump_wait(int exit_code)
 	}
 
 	if (core_waiters)
-		wait_for_completion(&startup_done);
+		wait_for_completion(&core_state.startup);
 fail:
 	BUG_ON(mm->core_waiters);
 	return core_waiters;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 02a27ae78539..97819efd2333 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,6 +159,10 @@ struct vm_area_struct {
 #endif
 };
 
+struct core_state {
+	struct completion startup;
+};
+
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
@@ -220,7 +224,8 @@ struct mm_struct {
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	/* coredumping support */
-	struct completion *core_startup_done, core_done;
+	struct core_state *core_state;
+	struct completion core_done;
 
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;	/* aio lock */
diff --git a/kernel/exit.c b/kernel/exit.c
index 28a44a2612dc..f7fa21dbced4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -680,7 +680,7 @@ static void exit_mm(struct task_struct * tsk)
 		up_read(&mm->mmap_sem);
 		down_write(&mm->mmap_sem);
 		if (!--mm->core_waiters)
-			complete(mm->core_startup_done);
+			complete(&mm->core_state->startup);
 		up_write(&mm->mmap_sem);
 
 		wait_for_completion(&mm->core_done);
-- 
cgit v1.2.3


From 999d9fc1670bc082928b93b11d1f2e0e417d973c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:41 -0700
Subject: coredump: move mm->core_waiters into struct core_state

Move mm->core_waiters into "struct core_state" allocated on stack.  This
shrinks mm_struct a little bit and allows further changes.

This patch mostly does s/core_waiters/core_state.  The only essential
change is that coredump_wait() must clear mm->core_state before return.

The coredump_wait()'s path is uglified and .text grows by 30 bytes, this
is fixed by the next patch.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 21 +++++++++++----------
 include/linux/mm_types.h |  2 +-
 kernel/exit.c            |  8 ++++----
 kernel/fork.c            |  2 +-
 kernel/signal.c          |  4 ++--
 5 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 71734568f018..50de3aaff4d0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -722,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm)
 		 * Make sure that if there is a core dump in progress
 		 * for the old mm, we get out and die instead of going
 		 * through with the exec.  We must hold mmap_sem around
-		 * checking core_waiters and changing tsk->mm.  The
-		 * core-inducing thread will increment core_waiters for
-		 * each thread whose ->mm == old_mm.
+		 * checking core_state and changing tsk->mm.
 		 */
 		down_read(&old_mm->mmap_sem);
-		if (unlikely(old_mm->core_waiters)) {
+		if (unlikely(old_mm->core_state)) {
 			up_read(&old_mm->mmap_sem);
 			return -EINTR;
 		}
@@ -1514,7 +1512,7 @@ static void zap_process(struct task_struct *start)
 	t = start;
 	do {
 		if (t != current && t->mm) {
-			t->mm->core_waiters++;
+			t->mm->core_state->nr_threads++;
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
 		}
@@ -1538,11 +1536,11 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	if (err)
 		return err;
 
-	if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+	if (atomic_read(&mm->mm_users) == mm->core_state->nr_threads + 1)
 		goto done;
 	/*
 	 * We should find and kill all tasks which use this mm, and we should
-	 * count them correctly into mm->core_waiters. We don't take tasklist
+	 * count them correctly into ->nr_threads. We don't take tasklist
 	 * lock, but this is safe wrt:
 	 *
 	 * fork:
@@ -1590,7 +1588,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	}
 	rcu_read_unlock();
 done:
-	return mm->core_waiters;
+	return mm->core_state->nr_threads;
 }
 
 static int coredump_wait(int exit_code)
@@ -1603,9 +1601,12 @@ static int coredump_wait(int exit_code)
 
 	init_completion(&mm->core_done);
 	init_completion(&core_state.startup);
+	core_state.nr_threads = 0;
 	mm->core_state = &core_state;
 
 	core_waiters = zap_threads(tsk, mm, exit_code);
+	if (core_waiters < 0)
+		mm->core_state = NULL;
 	up_write(&mm->mmap_sem);
 
 	if (unlikely(core_waiters < 0))
@@ -1623,8 +1624,8 @@ static int coredump_wait(int exit_code)
 
 	if (core_waiters)
 		wait_for_completion(&core_state.startup);
+	mm->core_state = NULL;
 fail:
-	BUG_ON(mm->core_waiters);
 	return core_waiters;
 }
 
@@ -1702,7 +1703,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	/*
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
-	if (mm->core_waiters || !get_dumpable(mm)) {
+	if (mm->core_state || !get_dumpable(mm)) {
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 97819efd2333..c0b1747b61a5 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -160,6 +160,7 @@ struct vm_area_struct {
 };
 
 struct core_state {
+	int nr_threads;
 	struct completion startup;
 };
 
@@ -179,7 +180,6 @@ struct mm_struct {
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
 	int map_count;				/* number of VMAs */
-	int core_waiters;
 	struct rw_semaphore mmap_sem;
 	spinlock_t page_table_lock;		/* Protects page tables and some counters */
 
diff --git a/kernel/exit.c b/kernel/exit.c
index f7fa21dbced4..988e232254e9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -670,16 +670,16 @@ static void exit_mm(struct task_struct * tsk)
 		return;
 	/*
 	 * Serialize with any possible pending coredump.
-	 * We must hold mmap_sem around checking core_waiters
+	 * We must hold mmap_sem around checking core_state
 	 * and clearing tsk->mm.  The core-inducing thread
-	 * will increment core_waiters for each thread in the
+	 * will increment ->nr_threads for each thread in the
 	 * group with ->mm != NULL.
 	 */
 	down_read(&mm->mmap_sem);
-	if (mm->core_waiters) {
+	if (mm->core_state) {
 		up_read(&mm->mmap_sem);
 		down_write(&mm->mmap_sem);
-		if (!--mm->core_waiters)
+		if (!--mm->core_state->nr_threads)
 			complete(&mm->core_state->startup);
 		up_write(&mm->mmap_sem);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index eeaec6893b0d..813d5c89b9d5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -400,7 +400,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->flags = (current->mm) ? current->mm->flags
 				  : MMF_DUMP_FILTER_DEFAULT;
-	mm->core_waiters = 0;
+	mm->core_state = NULL;
 	mm->nr_ptes = 0;
 	set_mm_counter(mm, file_rss, 0);
 	set_mm_counter(mm, anon_rss, 0);
diff --git a/kernel/signal.c b/kernel/signal.c
index 39c1706edf03..5c7b7eaa0dc6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1480,10 +1480,10 @@ static inline int may_ptrace_stop(void)
 	 * is a deadlock situation, and pointless because our tracer
 	 * is dead so don't allow us to stop.
 	 * If SIGKILL was already sent before the caller unlocked
-	 * ->siglock we must see ->core_waiters != 0. Otherwise it
+	 * ->siglock we must see ->core_state != NULL. Otherwise it
 	 * is safe to enter schedule().
 	 */
-	if (unlikely(current->mm->core_waiters) &&
+	if (unlikely(current->mm->core_state) &&
 	    unlikely(current->mm == current->parent->mm))
 		return 0;
 
-- 
cgit v1.2.3


From c5f1cc8c1828486a61ab3e575da6e2c62b34d399 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:42 -0700
Subject: coredump: turn core_state->nr_threads into atomic_t

Turn core_state->nr_threads into atomic_t and kill now unneeded
down_write(&mm->mmap_sem) in exit_mm().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 2 +-
 include/linux/mm_types.h | 2 +-
 kernel/exit.c            | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index c74bb34eeeff..15d493fe8aa3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1591,7 +1591,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	}
 	rcu_read_unlock();
 done:
-	core_state->nr_threads = nr;
+	atomic_set(&core_state->nr_threads, nr);
 	return nr;
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c0b1747b61a5..ae99a28ba6ae 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -160,7 +160,7 @@ struct vm_area_struct {
 };
 
 struct core_state {
-	int nr_threads;
+	atomic_t nr_threads;
 	struct completion startup;
 };
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 988e232254e9..63d82957baae 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -678,10 +678,9 @@ static void exit_mm(struct task_struct * tsk)
 	down_read(&mm->mmap_sem);
 	if (mm->core_state) {
 		up_read(&mm->mmap_sem);
-		down_write(&mm->mmap_sem);
-		if (!--mm->core_state->nr_threads)
+
+		if (atomic_dec_and_test(&mm->core_state->nr_threads))
 			complete(&mm->core_state->startup);
-		up_write(&mm->mmap_sem);
 
 		wait_for_completion(&mm->core_done);
 		down_read(&mm->mmap_sem);
-- 
cgit v1.2.3


From b564daf806d492dd4f7afe9b6c83b8d35d137669 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:44 -0700
Subject: coredump: construct the list of coredumping threads at startup time

binfmt->core_dump() has to iterate over the all threads in system in order
to find the coredumping threads and construct the list using the
GFP_ATOMIC allocations.

With this patch each thread allocates the list node on exit_mm()'s stack and
adds itself to the list.

This allows us to do further changes:

	- simplify ->core_dump()

	- change exit_mm() to clear ->mm first, then wait for ->core_done.
	  this makes the coredumping process visible to oom_kill

	- kill mm->core_done

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                |  2 ++
 include/linux/mm_types.h |  6 ++++++
 kernel/exit.c            | 15 ++++++++++++---
 3 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index b8ee842d93cd..fe2873b8037f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1604,6 +1604,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 
 	init_completion(&mm->core_done);
 	init_completion(&core_state->startup);
+	core_state->dumper.task = tsk;
+	core_state->dumper.next = NULL;
 	core_waiters = zap_threads(tsk, mm, core_state, exit_code);
 	up_write(&mm->mmap_sem);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ae99a28ba6ae..4d0d0abc79fe 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,8 +159,14 @@ struct vm_area_struct {
 #endif
 };
 
+struct core_thread {
+	struct task_struct *task;
+	struct core_thread *next;
+};
+
 struct core_state {
 	atomic_t nr_threads;
+	struct core_thread dumper;
 	struct completion startup;
 };
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 63d82957baae..b66f0d55c791 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -664,6 +664,7 @@ assign_new_owner:
 static void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
+	struct core_state *core_state;
 
 	mm_release(tsk, mm);
 	if (!mm)
@@ -676,11 +677,19 @@ static void exit_mm(struct task_struct * tsk)
 	 * group with ->mm != NULL.
 	 */
 	down_read(&mm->mmap_sem);
-	if (mm->core_state) {
+	core_state = mm->core_state;
+	if (core_state) {
+		struct core_thread self;
 		up_read(&mm->mmap_sem);
 
-		if (atomic_dec_and_test(&mm->core_state->nr_threads))
-			complete(&mm->core_state->startup);
+		self.task = tsk;
+		self.next = xchg(&core_state->dumper.next, &self);
+		/*
+		 * Implies mb(), the result of xchg() must be visible
+		 * to core_state->dumper.
+		 */
+		if (atomic_dec_and_test(&core_state->nr_threads))
+			complete(&core_state->startup);
 
 		wait_for_completion(&mm->core_done);
 		down_read(&mm->mmap_sem);
-- 
cgit v1.2.3


From a94e2d408eaedbd85aae259621d46fafc10479a2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:46 -0700
Subject: coredump: kill mm->core_done

Now that we have core_state->dumper list we can use it to wake up the
sub-threads waiting for the coredump completion.

This uglifies the code and .text grows by 47 bytes, but otoh mm_struct
lessens by sizeof(struct completion).  Also, with this change we can
decouple exit_mm() from the coredumping code.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 25 ++++++++++++++++++++++---
 include/linux/mm_types.h |  4 +---
 kernel/exit.c            |  8 +++++++-
 3 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index fe2873b8037f..bff43aeb235e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1602,7 +1602,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 	struct completion *vfork_done;
 	int core_waiters;
 
-	init_completion(&mm->core_done);
 	init_completion(&core_state->startup);
 	core_state->dumper.task = tsk;
 	core_state->dumper.next = NULL;
@@ -1628,6 +1627,27 @@ fail:
 	return core_waiters;
 }
 
+static void coredump_finish(struct mm_struct *mm)
+{
+	struct core_thread *curr, *next;
+	struct task_struct *task;
+
+	next = mm->core_state->dumper.next;
+	while ((curr = next) != NULL) {
+		next = curr->next;
+		task = curr->task;
+		/*
+		 * see exit_mm(), curr->task must not see
+		 * ->task == NULL before we read ->next.
+		 */
+		smp_mb();
+		curr->task = NULL;
+		wake_up_process(task);
+	}
+
+	mm->core_state = NULL;
+}
+
 /*
  * set_dumpable converts traditional three-value dumpable to two flags and
  * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
@@ -1812,8 +1832,7 @@ fail_unlock:
 		argv_free(helper_argv);
 
 	current->fsuid = fsuid;
-	complete_all(&mm->core_done);
-	mm->core_state = NULL;
+	coredump_finish(mm);
 fail:
 	return retval;
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4d0d0abc79fe..746f975b58ef 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -229,9 +229,7 @@ struct mm_struct {
 
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
-	/* coredumping support */
-	struct core_state *core_state;
-	struct completion core_done;
+	struct core_state *core_state; /* coredumping support */
 
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;	/* aio lock */
diff --git a/kernel/exit.c b/kernel/exit.c
index b66f0d55c791..8a4d4d12e294 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -691,7 +691,13 @@ static void exit_mm(struct task_struct * tsk)
 		if (atomic_dec_and_test(&core_state->nr_threads))
 			complete(&core_state->startup);
 
-		wait_for_completion(&mm->core_done);
+		for (;;) {
+			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+			if (!self.task) /* see coredump_finish() */
+				break;
+			schedule();
+		}
+		__set_task_state(tsk, TASK_RUNNING);
 		down_read(&mm->mmap_sem);
 	}
 	atomic_inc(&mm->mm_count);
-- 
cgit v1.2.3


From db700897224b5ebdf852f2d38920ce428940d059 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:49 -0700
Subject: workqueues: implement flush_work()

Most of users of flush_workqueue() can be changed to use cancel_work_sync(),
but sometimes we really need to wait for the completion and cancelling is not
an option. schedule_on_each_cpu() is good example.

Add the new helper, flush_work(work), which waits for the completion of the
specific work_struct. More precisely, it "flushes" the result of of the last
queue_work() which is visible to the caller.

For example, this code

	queue_work(wq, work);
	/* WINDOW */
	queue_work(wq, work);

	flush_work(work);

doesn't necessary work "as expected". What can happen in the WINDOW above is

	- wq starts the execution of work->func()

	- the caller migrates to another CPU

now, after the 2nd queue_work() this work is active on the previous CPU, and
at the same time it is queued on another. In this case flush_work(work) may
return before the first work->func() completes.

It is trivial to add another helper

	int flush_work_sync(struct work_struct *work)
	{
		return flush_work(work) || wait_on_work(work);
	}

which works "more correctly", but it has to iterate over all CPUs and thus
it much slower than flush_work().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Max Krasnyansky <maxk@qualcomm.com>
Acked-by: Jarek Poplawski <jarkao2@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 14d47120682b..5c158c477ac7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -201,6 +201,8 @@ extern int keventd_up(void);
 extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
+extern int flush_work(struct work_struct *work);
+
 extern int cancel_work_sync(struct work_struct *work);
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d9a2d65cc63e..ee41cf857d55 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -423,6 +423,52 @@ void flush_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @work: the work which is to be flushed
+ *
+ * It is expected that, prior to calling flush_work(), the caller has
+ * arranged for the work to not be requeued, otherwise it doesn't make
+ * sense to use this function.
+ */
+int flush_work(struct work_struct *work)
+{
+	struct cpu_workqueue_struct *cwq;
+	struct list_head *prev;
+	struct wq_barrier barr;
+
+	might_sleep();
+	cwq = get_wq_data(work);
+	if (!cwq)
+		return 0;
+
+	prev = NULL;
+	spin_lock_irq(&cwq->lock);
+	if (!list_empty(&work->entry)) {
+		/*
+		 * See the comment near try_to_grab_pending()->smp_rmb().
+		 * If it was re-queued under us we are not going to wait.
+		 */
+		smp_rmb();
+		if (unlikely(cwq != get_wq_data(work)))
+			goto out;
+		prev = &work->entry;
+	} else {
+		if (cwq->current_work != work)
+			goto out;
+		prev = &cwq->worklist;
+	}
+	insert_wq_barrier(cwq, &barr, prev->next);
+out:
+	spin_unlock_irq(&cwq->lock);
+	if (!prev)
+		return 0;
+
+	wait_for_completion(&barr.done);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
 /*
  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
-- 
cgit v1.2.3


From 3da1c84c00c7e5fa8348336bd8c342f9128b0f14 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:50 -0700
Subject: workqueues: make get_online_cpus() useable for work->func()

workqueue_cpu_callback(CPU_DEAD) flushes cwq->thread under
cpu_maps_update_begin().  This means that the multithreaded workqueues
can't use get_online_cpus() due to the possible deadlock, very bad and
very old problem.

Introduce the new state, CPU_POST_DEAD, which is called after
cpu_hotplug_done() but before cpu_maps_update_done().

Change workqueue_cpu_callback() to use CPU_POST_DEAD instead of CPU_DEAD.
This means that create/destroy functions can't rely on get_online_cpus()
any longer and should take cpu_add_remove_lock instead.

[akpm@linux-foundation.org: fix CONFIG_SMP=n]
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h      | 15 +++++++++++----
 include/linux/notifier.h |  2 ++
 kernel/cpu.c             |  5 +++++
 kernel/workqueue.c       | 18 +++++++++---------
 4 files changed, 27 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7464ba3b4333..d7faf8808497 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,10 +69,11 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
 #endif
 
 int cpu_up(unsigned int cpu);
-
 extern void cpu_hotplug_init(void);
+extern void cpu_maps_update_begin(void);
+extern void cpu_maps_update_done(void);
 
-#else
+#else	/* CONFIG_SMP */
 
 static inline int register_cpu_notifier(struct notifier_block *nb)
 {
@@ -87,10 +88,16 @@ static inline void cpu_hotplug_init(void)
 {
 }
 
+static inline void cpu_maps_update_begin(void)
+{
+}
+
+static inline void cpu_maps_update_done(void)
+{
+}
+
 #endif /* CONFIG_SMP */
 extern struct sysdev_class cpu_sysdev_class;
-extern void cpu_maps_update_begin(void);
-extern void cpu_maps_update_done(void);
 
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index bd3d72ddf333..da2698b0fdd1 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -214,6 +214,8 @@ static inline int notifier_to_errno(int ret)
 #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
 #define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
 				        * not handling interrupts, soon dead */
+#define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+					* lock is dropped */
 
 /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
  * operation in progress
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2cc409ce0a8f..10ba5f1004a5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -285,6 +285,11 @@ out_allowed:
 	set_cpus_allowed_ptr(current, &old_allowed);
 out_release:
 	cpu_hotplug_done();
+	if (!err) {
+		if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
+					    hcpu) == NOTIFY_BAD)
+			BUG();
+	}
 	return err;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5fbffd302eb5..828e58230cbc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -828,7 +828,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		err = create_workqueue_thread(cwq, singlethread_cpu);
 		start_workqueue_thread(cwq, -1);
 	} else {
-		get_online_cpus();
+		cpu_maps_update_begin();
 		spin_lock(&workqueue_lock);
 		list_add(&wq->list, &workqueues);
 		spin_unlock(&workqueue_lock);
@@ -840,7 +840,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 			err = create_workqueue_thread(cwq, cpu);
 			start_workqueue_thread(cwq, cpu);
 		}
-		put_online_cpus();
+		cpu_maps_update_done();
 	}
 
 	if (err) {
@@ -854,8 +854,8 @@ EXPORT_SYMBOL_GPL(__create_workqueue_key);
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
 	/*
-	 * Our caller is either destroy_workqueue() or CPU_DEAD,
-	 * get_online_cpus() protects cwq->thread.
+	 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
+	 * cpu_add_remove_lock protects cwq->thread.
 	 */
 	if (cwq->thread == NULL)
 		return;
@@ -865,7 +865,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 
 	flush_cpu_workqueue(cwq);
 	/*
-	 * If the caller is CPU_DEAD and cwq->worklist was not empty,
+	 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
 	 * a concurrent flush_workqueue() can insert a barrier after us.
 	 * However, in that case run_workqueue() won't return and check
 	 * kthread_should_stop() until it flushes all work_struct's.
@@ -889,14 +889,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	const cpumask_t *cpu_map = wq_cpu_map(wq);
 	int cpu;
 
-	get_online_cpus();
+	cpu_maps_update_begin();
 	spin_lock(&workqueue_lock);
 	list_del(&wq->list);
 	spin_unlock(&workqueue_lock);
 
 	for_each_cpu_mask_nr(cpu, *cpu_map)
 		cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
-	put_online_cpus();
+ 	cpu_maps_update_done();
 
 	free_percpu(wq->cpu_wq);
 	kfree(wq);
@@ -935,7 +935,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 		case CPU_UP_CANCELED:
 			start_workqueue_thread(cwq, -1);
-		case CPU_DEAD:
+		case CPU_POST_DEAD:
 			cleanup_workqueue_thread(cwq);
 			break;
 		}
@@ -943,7 +943,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 	switch (action) {
 	case CPU_UP_CANCELED:
-	case CPU_DEAD:
+	case CPU_POST_DEAD:
 		cpu_clear(cpu, cpu_populated_map);
 	}
 
-- 
cgit v1.2.3


From 95b68dec0d52c7b8fea3698b3938cf3ab936436b Mon Sep 17 00:00:00 2001
From: Chandru <chandru@in.ibm.com>
Date: Fri, 25 Jul 2008 01:47:55 -0700
Subject: calgary iommu: use the first kernels TCE tables in kdump

kdump kernel fails to boot with calgary iommu and aacraid driver on a x366
box.  The ongoing dma's of aacraid from the first kernel continue to exist
until the driver is loaded in the kdump kernel.  Calgary is initialized
prior to aacraid and creation of new tce tables causes wrong dma's to
occur.  Here we try to get the tce tables of the first kernel in kdump
kernel and use them.  While in the kdump kernel we do not allocate new tce
tables but instead read the base address register contents of calgary
iommu and use the tables that the registers point to.  With these changes
the kdump kernel and hence aacraid now boots normally.

Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/pci-calgary_64.c | 85 +++++++++++++++++++++++++++++++++++++---
 include/linux/crash_dump.h       |  8 ++++
 2 files changed, 87 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 151f2d171f7c..19e7fc7c2c4f 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/crash_dump.h>
 #include <linux/dma-mapping.h>
 #include <linux/bitops.h>
 #include <linux/pci_ids.h>
@@ -167,6 +168,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl);
 static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
 static void calioc2_tce_cache_blast(struct iommu_table *tbl);
 static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
 
 static struct cal_chipset_ops calgary_chip_ops = {
 	.handle_quirks = calgary_handle_quirks,
@@ -830,7 +833,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
 
 	tbl = pci_iommu(dev->bus);
 	tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
-	tce_free(tbl, 0, tbl->it_size);
+
+	if (is_kdump_kernel())
+		calgary_init_bitmap_from_tce_table(tbl);
+	else
+		tce_free(tbl, 0, tbl->it_size);
 
 	if (is_calgary(dev->device))
 		tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1216,10 @@ static int __init calgary_init(void)
 	if (ret)
 		return ret;
 
+	/* Purely for kdump kernel case */
+	if (is_kdump_kernel())
+		get_tce_space_from_tar();
+
 	do {
 		dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
 		if (!dev)
@@ -1339,6 +1350,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
 	return (val != 0xffffffff);
 }
 
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+	u64 *tp;
+	unsigned int index;
+	tp = ((u64 *)tbl->it_base);
+	for (index = 0 ; index < tbl->it_size; index++) {
+		if (*tp != 0x0)
+			set_bit(index, tbl->it_map);
+		tp++;
+	}
+}
+
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void get_tce_space_from_tar()
+{
+	int bus;
+	void __iomem *target;
+	unsigned long tce_space;
+
+	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+		struct calgary_bus_info *info = &bus_info[bus];
+		unsigned short pci_device;
+		u32 val;
+
+		val = read_pci_config(bus, 0, 0, 0);
+		pci_device = (val & 0xFFFF0000) >> 16;
+
+		if (!is_cal_pci_dev(pci_device))
+			continue;
+		if (info->translation_disabled)
+			continue;
+
+		if (calgary_bus_has_devices(bus, pci_device) ||
+						translate_empty_slots) {
+			target = calgary_reg(bus_info[bus].bbar,
+						tar_offset(bus));
+			tce_space = be64_to_cpu(readq(target));
+			tce_space = tce_space & TAR_SW_BITS;
+
+			tce_space = tce_space & (~specified_table_size);
+			info->tce_space = (u64 *)__va(tce_space);
+		}
+	}
+	return;
+}
+
 void __init detect_calgary(void)
 {
 	int bus;
@@ -1394,7 +1460,8 @@ void __init detect_calgary(void)
 		return;
 	}
 
-	specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
+	specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
+					saved_max_pfn : max_pfn) * PAGE_SIZE);
 
 	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
 		struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1479,16 @@ void __init detect_calgary(void)
 
 		if (calgary_bus_has_devices(bus, pci_device) ||
 		    translate_empty_slots) {
-			tbl = alloc_tce_table();
-			if (!tbl)
-				goto cleanup;
-			info->tce_space = tbl;
+			/*
+			 * If it is kdump kernel, find and use tce tables
+			 * from first kernel, else allocate tce tables here
+			 */
+			if (!is_kdump_kernel()) {
+				tbl = alloc_tce_table();
+				if (!tbl)
+					goto cleanup;
+				info->tce_space = tbl;
+			}
 			calgary_found = 1;
 		}
 	}
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 22c7ac5cd80c..6cd39a927e1f 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -22,5 +22,13 @@ extern struct proc_dir_entry *proc_vmcore;
 
 #define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
 
+static inline int is_kdump_kernel(void)
+{
+	return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
+}
+#else /* !CONFIG_CRASH_DUMP */
+static inline int is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
+
+extern unsigned long saved_max_pfn;
 #endif /* LINUX_CRASHDUMP_H */
-- 
cgit v1.2.3


From 2027d1abc25ff770cc3bc936abd33570ce85d85a Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:57 -0700
Subject: idr: change the idr structure

After scalability problems have been detected when using the sysV ipcs, I have
proposed to use an RCU based implementation of the IDR api instead (see
threads http://lkml.org/lkml/2008/4/11/212 and
http://lkml.org/lkml/2008/4/29/295).

This resulted in many people asking to convert the idr API and make it rcu
safe (because most of the code was duplicated and thus unmaintanable and
unreviewable).

So here is a first attempt.

The important change wrt to the idr API itself is during idr removes: idr
layers are freed after a grace period, instead of being moved to the free
list.

The important change wrt to ipcs, is that idr_find() can now be called
locklessly inside a rcu read critical section.

Here are the results I've got for the pmsg test sent by Manfred:

   2.6.25-rc3-mm1   2.6.25-rc3-mm1+   2.6.25-mm1   Patched 2.6.25-mm1
1         1168441           1064021       876000               947488
2         1094264            921059      1549592              1730685
3         2082520           1738165      1694370              2324880
4         2079929           1695521       404553              2400408
5         2898758            406566       391283              3246580
6         2921417            261275       263249              3752148
7         3308761            126056       191742              4243142
8         3329456            100129       141722              4275780

1st column: stock 2.6.25-rc3-mm1
2nd column: 2.6.25-rc3-mm1 + ipc patches (store ipcs into idrs)
3nd column: stock 2.6.25-mm1
4th column: 2.6.25-mm1 + this pacth series.

This patch:

Add an rcu_head to the idr_layer structure in order to free it after a grace
period.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 9a2d762124de..1af61d23be36 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/init.h>
+#include <linux/rcupdate.h>
 
 #if BITS_PER_LONG == 32
 # define IDR_BITS 5
@@ -51,6 +52,7 @@ struct idr_layer {
 	unsigned long		 bitmap; /* A zero bit means "space here" */
 	struct idr_layer	*ary[1<<IDR_BITS];
 	int			 count;	 /* When zero, we can release it */
+	struct rcu_head		 rcu_head;
 };
 
 struct idr {
-- 
cgit v1.2.3


From 944ca05c7b4972f2ebf37262e0f4933d178ad6db Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:59 -0700
Subject: idr: error checking factorization

Do some code factorization in the return code analysis.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h |  6 ++++++
 lib/idr.c           | 30 +++++++++---------------------
 2 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 1af61d23be36..762c3f2c631d 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -73,6 +73,12 @@ struct idr {
 }
 #define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
 
+/* Actions to be taken after a call to _idr_sub_alloc */
+#define IDR_NEED_TO_GROW -2
+#define IDR_NOMORE_SPACE -3
+
+#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
+
 /*
  * This is what we export.
  */
diff --git a/lib/idr.c b/lib/idr.c
index 9d905b131ecb..80ba06f29d36 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -143,7 +143,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			/* if already at the top layer, we need to grow */
 			if (!(p = pa[l])) {
 				*starting_id = id;
-				return -2;
+				return IDR_NEED_TO_GROW;
 			}
 
 			/* If we need to go up one layer, continue the
@@ -160,7 +160,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			id = ((id >> sh) ^ n ^ m) << sh;
 		}
 		if ((id >= MAX_ID_BIT) || (id < 0))
-			return -3;
+			return IDR_NOMORE_SPACE;
 		if (l == 0)
 			break;
 		/*
@@ -229,7 +229,7 @@ build_up:
 	idp->top = p;
 	idp->layers = layers;
 	v = sub_alloc(idp, &id, pa);
-	if (v == -2)
+	if (v == IDR_NEED_TO_GROW)
 		goto build_up;
 	return(v);
 }
@@ -278,12 +278,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 	 * This is a cheap hack until the IDR code can be fixed to
 	 * return proper error values.
 	 */
-	if (rv < 0) {
-		if (rv == -1)
-			return -EAGAIN;
-		else /* Will be -3 */
-			return -ENOSPC;
-	}
+	if (rv < 0)
+		return _idr_rc_to_errno(rv);
 	*id = rv;
 	return 0;
 }
@@ -313,12 +309,8 @@ int idr_get_new(struct idr *idp, void *ptr, int *id)
 	 * This is a cheap hack until the IDR code can be fixed to
 	 * return proper error values.
 	 */
-	if (rv < 0) {
-		if (rv == -1)
-			return -EAGAIN;
-		else /* Will be -3 */
-			return -ENOSPC;
-	}
+	if (rv < 0)
+		return _idr_rc_to_errno(rv);
 	*id = rv;
 	return 0;
 }
@@ -696,12 +688,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
  restart:
 	/* get vacant slot */
 	t = idr_get_empty_slot(&ida->idr, idr_id, pa);
-	if (t < 0) {
-		if (t == -1)
-			return -EAGAIN;
-		else /* will be -3 */
-			return -ENOSPC;
-	}
+	if (t < 0)
+		return _idr_rc_to_errno(t);
 
 	if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
 		return -ENOSPC;
-- 
cgit v1.2.3


From f9c46d6ea5ce138a886c3a0f10a46130afab75f5 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:01 -0700
Subject: idr: make idr_find rcu-safe

Make idr_find rcu-safe: it can now be called inside an rcu_read critical
section.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 16 ++++++++++++++++
 lib/idr.c           | 11 ++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 762c3f2c631d..fa035f96f2a3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -79,6 +79,22 @@ struct idr {
 
 #define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
 
+/**
+ * idr synchronization (stolen from radix-tree.h)
+ *
+ * idr_find() is able to be called locklessly, using RCU. The caller must
+ * ensure calls to this function are made within rcu_read_lock() regions.
+ * Other readers (lock-free or otherwise) and modifications may be running
+ * concurrently.
+ *
+ * It is still required that the caller manage the synchronization and
+ * lifetimes of the items. So if RCU lock-free lookups are used, typically
+ * this would mean that the items have their own locks, or are amenable to
+ * lock-free access; and that the items are freed by RCU (or only freed after
+ * having been deleted from the idr tree *and* a synchronize_rcu() grace
+ * period).
+ */
+
 /*
  * This is what we export.
  */
diff --git a/lib/idr.c b/lib/idr.c
index 44ab3b2a4eba..21e12af1f231 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -456,7 +456,8 @@ EXPORT_SYMBOL(idr_destroy);
  * return indicates that @id is not valid or you passed %NULL in
  * idr_get_new().
  *
- * The caller must serialize idr_find() vs idr_get_new() and idr_remove().
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
  */
 void *idr_find(struct idr *idp, int id)
 {
@@ -464,7 +465,7 @@ void *idr_find(struct idr *idp, int id)
 	struct idr_layer *p;
 
 	n = idp->layers * IDR_BITS;
-	p = idp->top;
+	p = rcu_dereference(idp->top);
 
 	/* Mask off upper bits we don't use for the search. */
 	id &= MAX_ID_MASK;
@@ -474,7 +475,7 @@ void *idr_find(struct idr *idp, int id)
 
 	while (n > 0 && p) {
 		n -= IDR_BITS;
-		p = p->ary[(id >> n) & IDR_MASK];
+		p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
 	}
 	return((void *)p);
 }
@@ -507,7 +508,7 @@ int idr_for_each(struct idr *idp,
 	struct idr_layer **paa = &pa[0];
 
 	n = idp->layers * IDR_BITS;
-	p = idp->top;
+	p = rcu_dereference(idp->top);
 	max = 1 << n;
 
 	id = 0;
@@ -515,7 +516,7 @@ int idr_for_each(struct idr *idp,
 		while (n > 0 && p) {
 			n -= IDR_BITS;
 			*paa++ = p;
-			p = p->ary[(id >> n) & IDR_MASK];
+			p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
 		}
 
 		if (p) {
-- 
cgit v1.2.3


From 4daa28f6d8f5cda8ea0f55048e3c8811c384cbdd Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:04 -0700
Subject: ipc/sem.c: convert undo structures to struct list_head

The undo structures contain two linked lists, the attached patch replaces
them with generic struct list_head lists.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h |  12 ++--
 ipc/sem.c           | 163 ++++++++++++++++++++++++++++------------------------
 2 files changed, 95 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index c8eaad9e4b72..6a1af1b49a13 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -95,7 +95,7 @@ struct sem_array {
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
 	struct sem_queue	*sem_pending;	/* pending operations to be processed */
 	struct sem_queue	**sem_pending_last; /* last pending operation */
-	struct sem_undo		*undo;		/* undo requests on this array */
+	struct list_head	list_id;	/* undo requests on this array */
 	unsigned long		sem_nsems;	/* no. of semaphores in array */
 };
 
@@ -118,8 +118,8 @@ struct sem_queue {
  * when the process exits.
  */
 struct sem_undo {
-	struct sem_undo *	proc_next;	/* next entry on this process */
-	struct sem_undo *	id_next;	/* next entry on this semaphore set */
+	struct list_head	list_proc;	/* per-process list: all undos from one process */
+	struct list_head	list_id;	/* per semaphore array list: all undos for one array */
 	int			semid;		/* semaphore set identifier */
 	short *			semadj;		/* array of adjustments, one per semaphore */
 };
@@ -128,9 +128,9 @@ struct sem_undo {
  * that may be shared among all a CLONE_SYSVSEM task group.
  */ 
 struct sem_undo_list {
-	atomic_t	refcnt;
-	spinlock_t	lock;
-	struct sem_undo	*proc_list;
+	atomic_t		refcnt;
+	spinlock_t		lock;
+	struct list_head	list_proc;
 };
 
 struct sysv_sem {
diff --git a/ipc/sem.c b/ipc/sem.c
index e9418df5ff3e..4f26c7157356 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -274,7 +274,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	sma->sem_base = (struct sem *) &sma[1];
 	/* sma->sem_pending = NULL; */
 	sma->sem_pending_last = &sma->sem_pending;
-	/* sma->undo = NULL; */
+	INIT_LIST_HEAD(&sma->list_id);
 	sma->sem_nsems = nsems;
 	sma->sem_ctime = get_seconds();
 	sem_unlock(sma);
@@ -536,7 +536,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	 * (They will be freed without any further action in exit_sem()
 	 * or during the next semop.)
 	 */
-	for (un = sma->undo; un; un = un->id_next)
+	assert_spin_locked(&sma->sem_perm.lock);
+	list_for_each_entry(un, &sma->list_id, list_id)
 		un->semid = -1;
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
@@ -763,9 +764,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 
 		for (i = 0; i < nsems; i++)
 			sma->sem_base[i].semval = sem_io[i];
-		for (un = sma->undo; un; un = un->id_next)
+
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_for_each_entry(un, &sma->list_id, list_id) {
 			for (i = 0; i < nsems; i++)
 				un->semadj[i] = 0;
+		}
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -797,12 +801,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 	{
 		int val = arg.val;
 		struct sem_undo *un;
+
 		err = -ERANGE;
 		if (val > SEMVMX || val < 0)
 			goto out_unlock;
 
-		for (un = sma->undo; un; un = un->id_next)
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_for_each_entry(un, &sma->list_id, list_id)
 			un->semadj[semnum] = 0;
+
 		curr->semval = val;
 		curr->sempid = task_tgid_vnr(current);
 		sma->sem_ctime = get_seconds();
@@ -952,6 +959,8 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 			return -ENOMEM;
 		spin_lock_init(&undo_list->lock);
 		atomic_set(&undo_list->refcnt, 1);
+		INIT_LIST_HEAD(&undo_list->list_proc);
+
 		current->sysvsem.undo_list = undo_list;
 	}
 	*undo_listp = undo_list;
@@ -960,25 +969,30 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 
 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
 {
-	struct sem_undo **last, *un;
-
-	last = &ulp->proc_list;
-	un = *last;
-	while(un != NULL) {
-		if(un->semid==semid)
-			break;
-		if(un->semid==-1) {
-			*last=un->proc_next;
-			kfree(un);
-		} else {
-			last=&un->proc_next;
+	struct sem_undo *walk, *tmp;
+
+	assert_spin_locked(&ulp->lock);
+	list_for_each_entry_safe(walk, tmp, &ulp->list_proc, list_proc) {
+		if (walk->semid == semid)
+			return walk;
+		if (walk->semid == -1) {
+			list_del(&walk->list_proc);
+			kfree(walk);
 		}
-		un=*last;
 	}
-	return un;
+	return NULL;
 }
 
-static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
+/**
+ * find_alloc_undo - Lookup (and if not present create) undo array
+ * @ns: namespace
+ * @semid: semaphore array id
+ *
+ * The function looks up (and if not present creates) the undo structure.
+ * The size of the undo structure depends on the size of the semaphore
+ * array, thus the alloc path is not that straightforward.
+ */
+static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 {
 	struct sem_array *sma;
 	struct sem_undo_list *ulp;
@@ -997,6 +1011,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 		goto out;
 
 	/* no undo structure around - allocate one. */
+	/* step 1: figure out the size of the semaphore array */
 	sma = sem_lock_check(ns, semid);
 	if (IS_ERR(sma))
 		return ERR_PTR(PTR_ERR(sma));
@@ -1004,15 +1019,19 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	nsems = sma->sem_nsems;
 	sem_getref_and_unlock(sma);
 
+	/* step 2: allocate new undo structure */
 	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
 		sem_putref(sma);
 		return ERR_PTR(-ENOMEM);
 	}
-	new->semadj = (short *) &new[1];
-	new->semid = semid;
 
+	/* step 3: Acquire the lock on the undo list pointer */
 	spin_lock(&ulp->lock);
+
+	/* step 4: check for races: someone else allocated the undo struct,
+	 *         semaphore array was destroyed.
+	 */
 	un = lookup_undo(ulp, semid);
 	if (un) {
 		spin_unlock(&ulp->lock);
@@ -1028,13 +1047,17 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 		un = ERR_PTR(-EIDRM);
 		goto out;
 	}
-	new->proc_next = ulp->proc_list;
-	ulp->proc_list = new;
-	new->id_next = sma->undo;
-	sma->undo = new;
+	/* step 5: initialize & link new undo structure */
+	new->semadj = (short *) &new[1];
+	new->semid = semid;
+	assert_spin_locked(&ulp->lock);
+	list_add(&new->list_proc, &ulp->list_proc);
+	assert_spin_locked(&sma->sem_perm.lock);
+	list_add(&new->list_id, &sma->list_id);
+
 	sem_unlock(sma);
-	un = new;
 	spin_unlock(&ulp->lock);
+	un = new;
 out:
 	return un;
 }
@@ -1090,9 +1113,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 			alter = 1;
 	}
 
-retry_undos:
 	if (undos) {
-		un = find_undo(ns, semid);
+		un = find_alloc_undo(ns, semid);
 		if (IS_ERR(un)) {
 			error = PTR_ERR(un);
 			goto out_free;
@@ -1107,14 +1129,14 @@ retry_undos:
 	}
 
 	/*
-	 * semid identifiers are not unique - find_undo may have
+	 * semid identifiers are not unique - find_alloc_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
-	 * and now a new array with received the same id. Check and retry.
+	 * and now a new array with received the same id. Check and fail.
 	 */
-	if (un && un->semid == -1) {
-		sem_unlock(sma);
-		goto retry_undos;
-	}
+	error = -EIDRM;
+	if (un && un->semid == -1)
+		goto out_unlock_free;
+
 	error = -EFBIG;
 	if (max >= sma->sem_nsems)
 		goto out_unlock_free;
@@ -1243,56 +1265,44 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
  */
 void exit_sem(struct task_struct *tsk)
 {
-	struct sem_undo_list *undo_list;
-	struct sem_undo *u, **up;
-	struct ipc_namespace *ns;
+	struct sem_undo_list *ulp;
+	struct sem_undo *un, *tmp;
 
-	undo_list = tsk->sysvsem.undo_list;
-	if (!undo_list)
+	ulp = tsk->sysvsem.undo_list;
+	if (!ulp)
 		return;
 	tsk->sysvsem.undo_list = NULL;
 
-	if (!atomic_dec_and_test(&undo_list->refcnt))
+	if (!atomic_dec_and_test(&ulp->refcnt))
 		return;
 
-	ns = tsk->nsproxy->ipc_ns;
-	/* There's no need to hold the semundo list lock, as current
-         * is the last task exiting for this undo list.
-	 */
-	for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
+	spin_lock(&ulp->lock);
+
+	list_for_each_entry_safe(un, tmp, &ulp->list_proc, list_proc) {
 		struct sem_array *sma;
-		int nsems, i;
-		struct sem_undo *un, **unp;
-		int semid;
-	       
-		semid = u->semid;
-
-		if(semid == -1)
-			continue;
-		sma = sem_lock(ns, semid);
+		int i;
+
+		if (un->semid == -1)
+			goto free;
+
+		sma = sem_lock(tsk->nsproxy->ipc_ns, un->semid);
 		if (IS_ERR(sma))
-			continue;
+			goto free;
 
-		if (u->semid == -1)
-			goto next_entry;
+		if (un->semid == -1)
+			goto unlock_free;
 
-		BUG_ON(sem_checkid(sma, u->semid));
+		BUG_ON(sem_checkid(sma, un->semid));
 
-		/* remove u from the sma->undo list */
-		for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
-			if (u == un)
-				goto found;
-		}
-		printk ("exit_sem undo list error id=%d\n", u->semid);
-		goto next_entry;
-found:
-		*unp = un->id_next;
-		/* perform adjustments registered in u */
-		nsems = sma->sem_nsems;
-		for (i = 0; i < nsems; i++) {
+		/* remove un from sma->list_id */
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_del(&un->list_id);
+
+		/* perform adjustments registered in un */
+		for (i = 0; i < sma->sem_nsems; i++) {
 			struct sem * semaphore = &sma->sem_base[i];
-			if (u->semadj[i]) {
-				semaphore->semval += u->semadj[i];
+			if (un->semadj[i]) {
+				semaphore->semval += un->semadj[i];
 				/*
 				 * Range checks of the new semaphore value,
 				 * not defined by sus:
@@ -1316,10 +1326,15 @@ found:
 		sma->sem_otime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
-next_entry:
+unlock_free:
 		sem_unlock(sma);
+free:
+		assert_spin_locked(&ulp->lock);
+		list_del(&un->list_proc);
+		kfree(un);
 	}
-	kfree(undo_list);
+	spin_unlock(&ulp->lock);
+	kfree(ulp);
 }
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From 2c0c29d414087f3b021059673c20a7088f5f1fff Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:05 -0700
Subject: ipc/sem.c: remove unused entries from struct sem_queue

sem_queue.sma and sem_queue.id were never used, the attached patch removes
them.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h | 2 --
 ipc/sem.c           | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 6a1af1b49a13..87756ef1198e 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -107,8 +107,6 @@ struct sem_queue {
 	struct sem_undo *	undo;	 /* undo structure */
 	int    			pid;	 /* process id of requesting process */
 	int    			status;	 /* completion status of operation */
-	struct sem_array *	sma;	 /* semaphore array for operations */
-	int			id;	 /* internal sem id */
 	struct sembuf *		sops;	 /* array of pending operations */
 	int			nsops;	 /* number of operations */
 	int			alter;   /* does the operation alter the array? */
diff --git a/ipc/sem.c b/ipc/sem.c
index 4f26c7157356..d5ce4000ca17 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1160,12 +1160,10 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	 * task into the pending queue and go to sleep.
 	 */
 		
-	queue.sma = sma;
 	queue.sops = sops;
 	queue.nsops = nsops;
 	queue.undo = un;
 	queue.pid = task_tgid_vnr(current);
-	queue.id = semid;
 	queue.alter = alter;
 	if (alter)
 		append_to_queue(sma ,&queue);
-- 
cgit v1.2.3


From a1193f8ec091cd8fd309cc2982abe4499f6f2b4d Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:06 -0700
Subject: ipc/sem.c: convert sem_array.sem_pending to struct list_head

sem_array.sem_pending is a double linked list, the attached patch converts
it to struct list_head.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h | 12 +++----
 ipc/sem.c           | 92 ++++++++++++++++++++---------------------------------
 2 files changed, 40 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 87756ef1198e..d42599395d79 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -93,21 +93,19 @@ struct sem_array {
 	time_t			sem_otime;	/* last semop time */
 	time_t			sem_ctime;	/* last change time */
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
-	struct sem_queue	*sem_pending;	/* pending operations to be processed */
-	struct sem_queue	**sem_pending_last; /* last pending operation */
+	struct list_head	sem_pending;	/* pending operations to be processed */
 	struct list_head	list_id;	/* undo requests on this array */
 	unsigned long		sem_nsems;	/* no. of semaphores in array */
 };
 
 /* One queue for each sleeping process in the system. */
 struct sem_queue {
-	struct sem_queue *	next;	 /* next entry in the queue */
-	struct sem_queue **	prev;	 /* previous entry in the queue, *(q->prev) == q */
-	struct task_struct*	sleeper; /* this process */
-	struct sem_undo *	undo;	 /* undo structure */
+	struct list_head	list;	 /* queue of pending operations */
+	struct task_struct	*sleeper; /* this process */
+	struct sem_undo		*undo;	 /* undo structure */
 	int    			pid;	 /* process id of requesting process */
 	int    			status;	 /* completion status of operation */
-	struct sembuf *		sops;	 /* array of pending operations */
+	struct sembuf		*sops;	 /* array of pending operations */
 	int			nsops;	 /* number of operations */
 	int			alter;   /* does the operation alter the array? */
 };
diff --git a/ipc/sem.c b/ipc/sem.c
index d5ce4000ca17..3ca232736b31 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -272,8 +272,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	ns->used_sems += nsems;
 
 	sma->sem_base = (struct sem *) &sma[1];
-	/* sma->sem_pending = NULL; */
-	sma->sem_pending_last = &sma->sem_pending;
+	INIT_LIST_HEAD(&sma->sem_pending);
 	INIT_LIST_HEAD(&sma->list_id);
 	sma->sem_nsems = nsems;
 	sma->sem_ctime = get_seconds();
@@ -331,38 +330,6 @@ asmlinkage long sys_semget(key_t key, int nsems, int semflg)
 	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
 }
 
-/* Manage the doubly linked list sma->sem_pending as a FIFO:
- * insert new queue elements at the tail sma->sem_pending_last.
- */
-static inline void append_to_queue (struct sem_array * sma,
-				    struct sem_queue * q)
-{
-	*(q->prev = sma->sem_pending_last) = q;
-	*(sma->sem_pending_last = &q->next) = NULL;
-}
-
-static inline void prepend_to_queue (struct sem_array * sma,
-				     struct sem_queue * q)
-{
-	q->next = sma->sem_pending;
-	*(q->prev = &sma->sem_pending) = q;
-	if (q->next)
-		q->next->prev = &q->next;
-	else /* sma->sem_pending_last == &sma->sem_pending */
-		sma->sem_pending_last = &q->next;
-}
-
-static inline void remove_from_queue (struct sem_array * sma,
-				      struct sem_queue * q)
-{
-	*(q->prev) = q->next;
-	if (q->next)
-		q->next->prev = q->prev;
-	else /* sma->sem_pending_last == &q->next */
-		sma->sem_pending_last = q->prev;
-	q->prev = NULL; /* mark as removed */
-}
-
 /*
  * Determine whether a sequence of semaphore operations would succeed
  * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
@@ -438,16 +405,15 @@ static void update_queue (struct sem_array * sma)
 	int error;
 	struct sem_queue * q;
 
-	q = sma->sem_pending;
-	while(q) {
+	q = list_entry(sma->sem_pending.next, struct sem_queue, list);
+	while (&q->list != &sma->sem_pending) {
 		error = try_atomic_semop(sma, q->sops, q->nsops,
 					 q->undo, q->pid);
 
 		/* Does q->sleeper still need to sleep? */
 		if (error <= 0) {
 			struct sem_queue *n;
-			remove_from_queue(sma,q);
-			q->status = IN_WAKEUP;
+
 			/*
 			 * Continue scanning. The next operation
 			 * that must be checked depends on the type of the
@@ -458,11 +424,26 @@ static void update_queue (struct sem_array * sma)
 			 *   for semaphore values to become 0.
 			 * - if the operation didn't modify the array,
 			 *   then just continue.
+			 * The order of list_del() and reading ->next
+			 * is crucial: In the former case, the list_del()
+			 * must be done first [because we might be the
+			 * first entry in ->sem_pending], in the latter
+			 * case the list_del() must be done last
+			 * [because the list is invalid after the list_del()]
 			 */
-			if (q->alter)
-				n = sma->sem_pending;
-			else
-				n = q->next;
+			if (q->alter) {
+				list_del(&q->list);
+				n = list_entry(sma->sem_pending.next,
+						struct sem_queue, list);
+			} else {
+				n = list_entry(q->list.next, struct sem_queue,
+						list);
+				list_del(&q->list);
+			}
+
+			/* wake up the waiting thread */
+			q->status = IN_WAKEUP;
+
 			wake_up_process(q->sleeper);
 			/* hands-off: q will disappear immediately after
 			 * writing q->status.
@@ -471,7 +452,7 @@ static void update_queue (struct sem_array * sma)
 			q->status = error;
 			q = n;
 		} else {
-			q = q->next;
+			q = list_entry(q->list.next, struct sem_queue, list);
 		}
 	}
 }
@@ -491,7 +472,7 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
 	struct sem_queue * q;
 
 	semncnt = 0;
-	for (q = sma->sem_pending; q; q = q->next) {
+	list_for_each_entry(q, &sma->sem_pending, list) {
 		struct sembuf * sops = q->sops;
 		int nsops = q->nsops;
 		int i;
@@ -503,13 +484,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
 	}
 	return semncnt;
 }
+
 static int count_semzcnt (struct sem_array * sma, ushort semnum)
 {
 	int semzcnt;
 	struct sem_queue * q;
 
 	semzcnt = 0;
-	for (q = sma->sem_pending; q; q = q->next) {
+	list_for_each_entry(q, &sma->sem_pending, list) {
 		struct sembuf * sops = q->sops;
 		int nsops = q->nsops;
 		int i;
@@ -529,7 +511,7 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
 	struct sem_undo *un;
-	struct sem_queue *q;
+	struct sem_queue *q, *t;
 	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
 
 	/* Invalidate the existing undo structures for this semaphore set.
@@ -541,17 +523,14 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		un->semid = -1;
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
-	q = sma->sem_pending;
-	while(q) {
-		struct sem_queue *n;
-		/* lazy remove_from_queue: we are killing the whole queue */
-		q->prev = NULL;
-		n = q->next;
+
+	list_for_each_entry_safe(q, t, &sma->sem_pending, list) {
+		list_del(&q->list);
+
 		q->status = IN_WAKEUP;
 		wake_up_process(q->sleeper); /* doesn't sleep */
 		smp_wmb();
 		q->status = -EIDRM;	/* hands-off q */
-		q = n;
 	}
 
 	/* Remove the semaphore set from the IDR */
@@ -1166,9 +1145,9 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	queue.pid = task_tgid_vnr(current);
 	queue.alter = alter;
 	if (alter)
-		append_to_queue(sma ,&queue);
+		list_add_tail(&queue.list, &sma->sem_pending);
 	else
-		prepend_to_queue(sma ,&queue);
+		list_add(&queue.list, &sma->sem_pending);
 
 	queue.status = -EINTR;
 	queue.sleeper = current;
@@ -1194,7 +1173,6 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 
 	sma = sem_lock(ns, semid);
 	if (IS_ERR(sma)) {
-		BUG_ON(queue.prev != NULL);
 		error = -EIDRM;
 		goto out_free;
 	}
@@ -1212,7 +1190,7 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	 */
 	if (timeout && jiffies_left == 0)
 		error = -EAGAIN;
-	remove_from_queue(sma,&queue);
+	list_del(&queue.list);
 	goto out_unlock_free;
 
 out_unlock_free:
-- 
cgit v1.2.3


From 380af1b33b3ff92df5cda96329b58f5d1b6b5a53 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:06 -0700
Subject: ipc/sem.c: rewrite undo list locking

The attached patch:
- reverses the locking order of ulp->lock and sem_lock:
  Previously, it was first ulp->lock, then inside sem_lock.
  Now it's the other way around.
- converts the undo structure to rcu.

Benefits:
- With the old locking order, IPC_RMID could not kfree the undo structures.
  The stale entries remained in the linked lists and were released later.
- The patch fixes a a race in semtimedop(): if both IPC_RMID and a semget() that
  recreates exactly the same id happen between find_alloc_undo() and sem_lock,
  then semtimedop() would access already kfree'd memory.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h |   6 ++-
 ipc/sem.c           | 147 +++++++++++++++++++++++++++++++++-------------------
 2 files changed, 98 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index d42599395d79..1b191c176bcd 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -78,6 +78,7 @@ struct  seminfo {
 
 #ifdef __KERNEL__
 #include <asm/atomic.h>
+#include <linux/rcupdate.h>
 
 struct task_struct;
 
@@ -114,7 +115,10 @@ struct sem_queue {
  * when the process exits.
  */
 struct sem_undo {
-	struct list_head	list_proc;	/* per-process list: all undos from one process */
+	struct list_head	list_proc;	/* per-process list: all undos from one process. */
+						/* rcu protected */
+	struct rcu_head		rcu;		/* rcu struct for sem_undo() */
+	struct sem_undo_list	*ulp;		/* sem_undo_list for the process */
 	struct list_head	list_id;	/* per semaphore array list: all undos for one array */
 	int			semid;		/* semaphore set identifier */
 	short *			semadj;		/* array of adjustments, one per semaphore */
diff --git a/ipc/sem.c b/ipc/sem.c
index 3ca232736b31..bf1bc36cb7ee 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -504,27 +504,35 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
 	return semzcnt;
 }
 
+void free_un(struct rcu_head *head)
+{
+	struct sem_undo *un = container_of(head, struct sem_undo, rcu);
+	kfree(un);
+}
+
 /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
  * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
  * remains locked on exit.
  */
 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
-	struct sem_undo *un;
-	struct sem_queue *q, *t;
+	struct sem_undo *un, *tu;
+	struct sem_queue *q, *tq;
 	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
 
-	/* Invalidate the existing undo structures for this semaphore set.
-	 * (They will be freed without any further action in exit_sem()
-	 * or during the next semop.)
-	 */
+	/* Free the existing undo structures for this semaphore set.  */
 	assert_spin_locked(&sma->sem_perm.lock);
-	list_for_each_entry(un, &sma->list_id, list_id)
+	list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
+		list_del(&un->list_id);
+		spin_lock(&un->ulp->lock);
 		un->semid = -1;
+		list_del_rcu(&un->list_proc);
+		spin_unlock(&un->ulp->lock);
+		call_rcu(&un->rcu, free_un);
+	}
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
-
-	list_for_each_entry_safe(q, t, &sma->sem_pending, list) {
+	list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
 		list_del(&q->list);
 
 		q->status = IN_WAKEUP;
@@ -948,16 +956,11 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 
 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
 {
-	struct sem_undo *walk, *tmp;
+	struct sem_undo *walk;
 
-	assert_spin_locked(&ulp->lock);
-	list_for_each_entry_safe(walk, tmp, &ulp->list_proc, list_proc) {
+	list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) {
 		if (walk->semid == semid)
 			return walk;
-		if (walk->semid == -1) {
-			list_del(&walk->list_proc);
-			kfree(walk);
-		}
 	}
 	return NULL;
 }
@@ -970,6 +973,8 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
  * The function looks up (and if not present creates) the undo structure.
  * The size of the undo structure depends on the size of the semaphore
  * array, thus the alloc path is not that straightforward.
+ * Lifetime-rules: sem_undo is rcu-protected, on success, the function
+ * performs a rcu_read_lock().
  */
 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 {
@@ -983,11 +988,13 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	if (error)
 		return ERR_PTR(error);
 
+	rcu_read_lock();
 	spin_lock(&ulp->lock);
 	un = lookup_undo(ulp, semid);
 	spin_unlock(&ulp->lock);
 	if (likely(un!=NULL))
 		goto out;
+	rcu_read_unlock();
 
 	/* no undo structure around - allocate one. */
 	/* step 1: figure out the size of the semaphore array */
@@ -1005,38 +1012,38 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	/* step 3: Acquire the lock on the undo list pointer */
-	spin_lock(&ulp->lock);
-
-	/* step 4: check for races: someone else allocated the undo struct,
-	 *         semaphore array was destroyed.
-	 */
-	un = lookup_undo(ulp, semid);
-	if (un) {
-		spin_unlock(&ulp->lock);
-		kfree(new);
-		sem_putref(sma);
-		goto out;
-	}
+	/* step 3: Acquire the lock on semaphore array */
 	sem_lock_and_putref(sma);
 	if (sma->sem_perm.deleted) {
 		sem_unlock(sma);
-		spin_unlock(&ulp->lock);
 		kfree(new);
 		un = ERR_PTR(-EIDRM);
 		goto out;
 	}
+	spin_lock(&ulp->lock);
+
+	/*
+	 * step 4: check for races: did someone else allocate the undo struct?
+	 */
+	un = lookup_undo(ulp, semid);
+	if (un) {
+		kfree(new);
+		goto success;
+	}
 	/* step 5: initialize & link new undo structure */
 	new->semadj = (short *) &new[1];
+	new->ulp = ulp;
 	new->semid = semid;
 	assert_spin_locked(&ulp->lock);
-	list_add(&new->list_proc, &ulp->list_proc);
+	list_add_rcu(&new->list_proc, &ulp->list_proc);
 	assert_spin_locked(&sma->sem_perm.lock);
 	list_add(&new->list_id, &sma->list_id);
+	un = new;
 
-	sem_unlock(sma);
+success:
 	spin_unlock(&ulp->lock);
-	un = new;
+	rcu_read_lock();
+	sem_unlock(sma);
 out:
 	return un;
 }
@@ -1103,6 +1110,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 
 	sma = sem_lock_check(ns, semid);
 	if (IS_ERR(sma)) {
+		if (un)
+			rcu_read_unlock();
 		error = PTR_ERR(sma);
 		goto out_free;
 	}
@@ -1111,10 +1120,26 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	 * semid identifiers are not unique - find_alloc_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
 	 * and now a new array with received the same id. Check and fail.
+	 * This case can be detected checking un->semid. The existance of
+	 * "un" itself is guaranteed by rcu.
 	 */
 	error = -EIDRM;
-	if (un && un->semid == -1)
-		goto out_unlock_free;
+	if (un) {
+		if (un->semid == -1) {
+			rcu_read_unlock();
+			goto out_unlock_free;
+		} else {
+			/*
+			 * rcu lock can be released, "un" cannot disappear:
+			 * - sem_lock is acquired, thus IPC_RMID is
+			 *   impossible.
+			 * - exit_sem is impossible, it always operates on
+			 *   current (or a dead task).
+			 */
+
+			rcu_read_unlock();
+		}
+	}
 
 	error = -EFBIG;
 	if (max >= sma->sem_nsems)
@@ -1242,7 +1267,6 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
 void exit_sem(struct task_struct *tsk)
 {
 	struct sem_undo_list *ulp;
-	struct sem_undo *un, *tmp;
 
 	ulp = tsk->sysvsem.undo_list;
 	if (!ulp)
@@ -1252,28 +1276,47 @@ void exit_sem(struct task_struct *tsk)
 	if (!atomic_dec_and_test(&ulp->refcnt))
 		return;
 
-	spin_lock(&ulp->lock);
-
-	list_for_each_entry_safe(un, tmp, &ulp->list_proc, list_proc) {
+	for (;;) {
 		struct sem_array *sma;
+		struct sem_undo *un;
+		int semid;
 		int i;
 
-		if (un->semid == -1)
-			goto free;
+		rcu_read_lock();
+		un = list_entry(rcu_dereference(ulp->list_proc.next),
+					struct sem_undo, list_proc);
+		if (&un->list_proc == &ulp->list_proc)
+			semid = -1;
+		 else
+			semid = un->semid;
+		rcu_read_unlock();
 
-		sma = sem_lock(tsk->nsproxy->ipc_ns, un->semid);
-		if (IS_ERR(sma))
-			goto free;
+		if (semid == -1)
+			break;
 
-		if (un->semid == -1)
-			goto unlock_free;
+		sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
 
-		BUG_ON(sem_checkid(sma, un->semid));
+		/* exit_sem raced with IPC_RMID, nothing to do */
+		if (IS_ERR(sma))
+			continue;
 
-		/* remove un from sma->list_id */
+		un = lookup_undo(ulp, semid);
+		if (un == NULL) {
+			/* exit_sem raced with IPC_RMID+semget() that created
+			 * exactly the same semid. Nothing to do.
+			 */
+			sem_unlock(sma);
+			continue;
+		}
+
+		/* remove un from the linked lists */
 		assert_spin_locked(&sma->sem_perm.lock);
 		list_del(&un->list_id);
 
+		spin_lock(&ulp->lock);
+		list_del_rcu(&un->list_proc);
+		spin_unlock(&ulp->lock);
+
 		/* perform adjustments registered in un */
 		for (i = 0; i < sma->sem_nsems; i++) {
 			struct sem * semaphore = &sma->sem_base[i];
@@ -1302,14 +1345,10 @@ void exit_sem(struct task_struct *tsk)
 		sma->sem_otime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
-unlock_free:
 		sem_unlock(sma);
-free:
-		assert_spin_locked(&ulp->lock);
-		list_del(&un->list_proc);
-		kfree(un);
+
+		call_rcu(&un->rcu, free_un);
 	}
-	spin_unlock(&ulp->lock);
 	kfree(ulp);
 }
 
-- 
cgit v1.2.3


From 9eefe520c814f6f62c5d36a2ddcd3fb99dfdb30e Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:08 -0700
Subject: ipc: do not use a negative value to re-enable msgmni automatic
 recomputing

This patch proposes an alternative to the "magical
positive-versus-negative number trick" Andrew complained about last week
in http://lkml.org/lkml/2008/6/24/418.

This had been introduced with the patches that scale msgmni to the amount
of lowmem.  With these patches, msgmni has a registered notification
routine that recomputes msgmni value upon memory add/remove or ipc
namespace creation/ removal.

When msgmni is changed from user space (i.e.  value written to the proc
file), that notification routine is unregistered, and the way to make it
registered back is to write a negative value into the proc file.  This is
the "magical positive-versus-negative number trick".

To fix this, a new proc file is introduced: /proc/sys/kernel/auto_msgmni.
This file acts as ON/OFF for msgmni automatic recomputing.

With this patch, the process is the following:
1) kernel boots in "automatic recomputing mode"
   /proc/sys/kernel/msgmni contains the value that has been computed (depends
                           on lowmem)
   /proc/sys/kernel/automatic_msgmni contains "1"

2) echo <val> > /proc/sys/kernel/msgmni
   . sets msg_ctlmni to <val>
   . de-activates automatic recomputing (i.e. if, say, some memory is added
     msgmni won't be recomputed anymore)
   . /proc/sys/kernel/automatic_msgmni now contains "0"

3) echo "0" > /proc/sys/kernel/automatic_msgmni
   . de-activates msgmni automatic recomputing
     this has the same effect as 2) except that msg_ctlmni's value stays
     blocked at its current value)

3) echo "1" > /proc/sys/kernel/automatic_msgmni
   . recomputes msgmni's value based on the current available memory size
     and number of ipc namespaces
   . re-activates automatic recomputing for msgmni.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Solofo Ramangalahy <Solofo.Ramangalahy@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  3 +-
 ipc/ipc_sysctl.c              | 72 +++++++++++++++++++++++++++++++++++--------
 ipc/ipcns_notifier.c          | 20 +++++++++---
 3 files changed, 76 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index ea6c18a8b0d4..ea330f9e7100 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -36,6 +36,7 @@ struct ipc_namespace {
 	int		msg_ctlmni;
 	atomic_t	msg_bytes;
 	atomic_t	msg_hdrs;
+	int		auto_msgmni;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
@@ -53,7 +54,7 @@ extern atomic_t nr_ipc_ns;
 
 extern int register_ipcns_notifier(struct ipc_namespace *);
 extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern int unregister_ipcns_notifier(struct ipc_namespace *);
+extern void unregister_ipcns_notifier(struct ipc_namespace *);
 extern int ipcns_notify(unsigned long);
 
 #else /* CONFIG_SYSVIPC */
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index d3497465cc0a..69bc85978ba0 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table)
 }
 
 /*
- * Routine that is called when a tunable has successfully been changed by
- * hand and it has a callback routine registered on the ipc namespace notifier
- * chain: we don't want such tunables to be recomputed anymore upon memory
- * add/remove or ipc namespace creation/removal.
- * They can come back to a recomputable state by being set to a <0 value.
+ * Routine that is called when the file "auto_msgmni" has successfully been
+ * written.
+ * Two values are allowed:
+ * 0: unregister msgmni's callback routine from the ipc namespace notifier
+ *    chain. This means that msgmni won't be recomputed anymore upon memory
+ *    add/remove or ipc namespace creation/removal.
+ * 1: register back the callback routine.
  */
-static void tunable_set_callback(int val)
+static void ipc_auto_callback(int val)
 {
-	if (val >= 0)
+	if (!val)
 		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
 	else {
 		/*
@@ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 	rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
 
 	if (write && !rc && lenp_bef == *lenp)
-		tunable_set_callback(*((int *)(ipc_table.data)));
+		/*
+		 * Tunable has successfully been changed by hand. Disable its
+		 * automatic adjustment. This simply requires unregistering
+		 * the notifiers that trigger recalculation.
+		 */
+		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
 
 	return rc;
 }
@@ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
 					lenp, ppos);
 }
 
+static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
+	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table ipc_table;
+	size_t lenp_bef = *lenp;
+	int oldval;
+	int rc;
+
+	memcpy(&ipc_table, table, sizeof(ipc_table));
+	ipc_table.data = get_ipc(table);
+	oldval = *((int *)(ipc_table.data));
+
+	rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+
+	if (write && !rc && lenp_bef == *lenp) {
+		int newval = *((int *)(ipc_table.data));
+		/*
+		 * The file "auto_msgmni" has correctly been set.
+		 * React by (un)registering the corresponding tunable, if the
+		 * value has changed.
+		 */
+		if (newval != oldval)
+			ipc_auto_callback(newval);
+	}
+
+	return rc;
+}
+
 #else
 #define proc_ipc_doulongvec_minmax NULL
 #define proc_ipc_dointvec	   NULL
 #define proc_ipc_callback_dointvec NULL
+#define proc_ipcauto_dointvec_minmax NULL
 #endif
 
 #ifdef CONFIG_SYSCTL_SYSCALL
@@ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
 	rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval,
 		newlen);
 
-	if (newval && newlen && rc > 0) {
+	if (newval && newlen && rc > 0)
 		/*
 		 * Tunable has successfully been changed from userland
 		 */
-		int *data = get_ipc(table);
-
-		tunable_set_callback(*data);
-	}
+		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
 
 	return rc;
 }
@@ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
 #define sysctl_ipc_registered_data NULL
 #endif
 
+static int zero;
+static int one = 1;
+
 static struct ctl_table ipc_kern_table[] = {
 	{
 		.ctl_name	= KERN_SHMMAX,
@@ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = {
 		.proc_handler	= proc_ipc_dointvec,
 		.strategy	= sysctl_ipc_data,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "auto_msgmni",
+		.data		= &init_ipc_ns.auto_msgmni,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_ipcauto_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{}
 };
 
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c
index 70ff09183f7b..b9b31a4f77e1 100644
--- a/ipc/ipcns_notifier.c
+++ b/ipc/ipcns_notifier.c
@@ -55,25 +55,35 @@ static int ipcns_callback(struct notifier_block *self,
 
 int register_ipcns_notifier(struct ipc_namespace *ns)
 {
+	int rc;
+
 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
 	ns->ipcns_nb.notifier_call = ipcns_callback;
 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-	return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+	rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+	if (!rc)
+		ns->auto_msgmni = 1;
+	return rc;
 }
 
 int cond_register_ipcns_notifier(struct ipc_namespace *ns)
 {
+	int rc;
+
 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
 	ns->ipcns_nb.notifier_call = ipcns_callback;
 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-	return blocking_notifier_chain_cond_register(&ipcns_chain,
+	rc = blocking_notifier_chain_cond_register(&ipcns_chain,
 							&ns->ipcns_nb);
+	if (!rc)
+		ns->auto_msgmni = 1;
+	return rc;
 }
 
-int unregister_ipcns_notifier(struct ipc_namespace *ns)
+void unregister_ipcns_notifier(struct ipc_namespace *ns)
 {
-	return blocking_notifier_chain_unregister(&ipcns_chain,
-						&ns->ipcns_nb);
+	blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
+	ns->auto_msgmni = 0;
 }
 
 int ipcns_notify(unsigned long val)
-- 
cgit v1.2.3


From d805dda412346225a50af2d399d958a4bc676c38 Mon Sep 17 00:00:00 2001
From: Abdel Benamrouche <draconux@gmail.com>
Date: Fri, 25 Jul 2008 01:48:25 -0700
Subject: fs/partition/check.c: fix return value warning

fs/partitions/check.c:381: warning: ignoring return value of ___device_add___,
  declared with attribute warn_unused_result

[akpm@linux-foundation.org: multiple-return-statements-per-function are evil]
Signed-off-by: Abdel Benamrouche <draconux@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/partitions/check.c | 28 ++++++++++++++++++++++------
 include/linux/genhd.h |  2 +-
 2 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index efef715135d3..2e6413fbd2d8 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
 static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
 		   whole_disk_show, NULL);
 
-void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
 {
 	struct hd_struct *p;
 	int err;
 
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
-		return;
+		return -ENOMEM;
 
 	if (!init_part_stats(p)) {
-		kfree(p);
-		return;
+		err = -ENOMEM;
+		goto out0;
 	}
 	p->start_sect = start;
 	p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
 
 	/* delay uevent until 'holders' subdir is created */
 	p->dev.uevent_suppress = 1;
-	device_add(&p->dev);
+	err = device_add(&p->dev);
+	if (err)
+		goto out1;
 	partition_sysfs_add_subdir(p);
 	p->dev.uevent_suppress = 0;
-	if (flags & ADDPART_FLAG_WHOLEDISK)
+	if (flags & ADDPART_FLAG_WHOLEDISK) {
 		err = device_create_file(&p->dev, &dev_attr_whole_disk);
+		if (err)
+			goto out2;
+	}
 
 	/* suppress uevent if the disk supresses it */
 	if (!disk->dev.uevent_suppress)
 		kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+
+	return 0;
+
+out2:
+	device_del(&p->dev);
+out1:
+	put_device(&p->dev);
+	free_part_stats(p);
+out0:
+	kfree(p);
+	return err;
 }
 
 /* Not exported, helper to add_disk(). */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e8787417f65a..118216f1bd3c 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -541,7 +541,7 @@ extern dev_t blk_lookup_devt(const char *name, int part);
 extern char *disk_name (struct gendisk *hd, int part, char *buf);
 
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
-extern void add_partition(struct gendisk *, int, sector_t, sector_t, int);
+extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
 extern void delete_partition(struct gendisk *, int);
 extern void printk_all_partitions(void);
 
-- 
cgit v1.2.3


From 6e644c3126149b65460610fe5a00d8a162092abe Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:28 -0700
Subject: move proc_kmsg_operations to fs/proc/internal.h

This patch moves the extern of struct proc_kmsg_operations to
fs/proc/internal.h and adds an #include "internal.h" to fs/proc/kmsg.c
so that the latter sees the former.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h      | 1 +
 fs/proc/kmsg.c          | 2 ++
 include/linux/proc_fs.h | 1 -
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..8d67616e7bb0 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
 extern const struct file_operations proc_clear_refs_operations;
 extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
+extern const struct file_operations proc_kmsg_operations;
 extern const struct inode_operations proc_net_inode_operations;
 
 void free_proc_entry(struct proc_dir_entry *de);
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
+#include "internal.h"
+
 extern wait_queue_head_t log_wait;
 
 extern int do_syslog(int type, char __user *bug, int count);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 15a9eaf4a802..cdabc2fc02f7 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -138,7 +138,6 @@ extern int proc_readdir(struct file *, void *, filldir_t);
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 
 extern const struct file_operations proc_kcore_operations;
-extern const struct file_operations proc_kmsg_operations;
 extern const struct file_operations ppc_htab_operations;
 
 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
-- 
cgit v1.2.3


From 881adb85358309ea9c6f707394002719982ec607 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Jul 2008 01:48:29 -0700
Subject: proc: always do ->release

Current two-stage scheme of removing PDE emphasizes one bug in proc:

		open
				rmmod
				remove_proc_entry
		close

->release won't be called because ->proc_fops were cleared.  In simple
cases it's small memory leak.

For every ->open, ->release has to be done.  List of openers is introduced
which is traversed at remove_proc_entry() if neeeded.

Discussions with Al long ago (sigh).

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c       | 14 ++++++++++
 fs/proc/inode.c         | 74 ++++++++++++++++++++++++++++++++++++++++++++++---
 fs/proc/internal.h      |  7 +++++
 include/linux/proc_fs.h |  1 +
 4 files changed, 92 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..bc0a0dd2d844 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 	ent->pde_users = 0;
 	spin_lock_init(&ent->pde_unload_lock);
 	ent->pde_unload_completion = NULL;
+	INIT_LIST_HEAD(&ent->pde_openers);
  out:
 	return ent;
 }
@@ -789,6 +790,19 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	spin_unlock(&de->pde_unload_lock);
 
 continue_removing:
+	spin_lock(&de->pde_unload_lock);
+	while (!list_empty(&de->pde_openers)) {
+		struct pde_opener *pdeo;
+
+		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+		list_del(&pdeo->lh);
+		spin_unlock(&de->pde_unload_lock);
+		pdeo->release(pdeo->inode, pdeo->file);
+		kfree(pdeo);
+		spin_lock(&de->pde_unload_lock);
+	}
+	spin_unlock(&de->pde_unload_lock);
+
 	if (S_ISDIR(de->mode))
 		parent->nlink--;
 	de->nlink = 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..354c08485825 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -126,12 +126,17 @@ static const struct super_operations proc_sops = {
 	.remount_fs	= proc_remount,
 };
 
-static void pde_users_dec(struct proc_dir_entry *pde)
+static void __pde_users_dec(struct proc_dir_entry *pde)
 {
-	spin_lock(&pde->pde_unload_lock);
 	pde->pde_users--;
 	if (pde->pde_unload_completion && pde->pde_users == 0)
 		complete(pde->pde_unload_completion);
+}
+
+static void pde_users_dec(struct proc_dir_entry *pde)
+{
+	spin_lock(&pde->pde_unload_lock);
+	__pde_users_dec(pde);
 	spin_unlock(&pde->pde_unload_lock);
 }
 
@@ -318,36 +323,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	struct proc_dir_entry *pde = PDE(inode);
 	int rv = 0;
 	int (*open)(struct inode *, struct file *);
+	int (*release)(struct inode *, struct file *);
+	struct pde_opener *pdeo;
+
+	/*
+	 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
+	 * sequence. ->release won't be called because ->proc_fops will be
+	 * cleared. Depending on complexity of ->release, consequences vary.
+	 *
+	 * We can't wait for mercy when close will be done for real, it's
+	 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
+	 * by hand in remove_proc_entry(). For this, save opener's credentials
+	 * for later.
+	 */
+	pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+	if (!pdeo)
+		return -ENOMEM;
 
 	spin_lock(&pde->pde_unload_lock);
 	if (!pde->proc_fops) {
 		spin_unlock(&pde->pde_unload_lock);
+		kfree(pdeo);
 		return rv;
 	}
 	pde->pde_users++;
 	open = pde->proc_fops->open;
+	release = pde->proc_fops->release;
 	spin_unlock(&pde->pde_unload_lock);
 
 	if (open)
 		rv = open(inode, file);
 
-	pde_users_dec(pde);
+	spin_lock(&pde->pde_unload_lock);
+	if (rv == 0 && release) {
+		/* To know what to release. */
+		pdeo->inode = inode;
+		pdeo->file = file;
+		/* Strictly for "too late" ->release in proc_reg_release(). */
+		pdeo->release = release;
+		list_add(&pdeo->lh, &pde->pde_openers);
+	} else
+		kfree(pdeo);
+	__pde_users_dec(pde);
+	spin_unlock(&pde->pde_unload_lock);
 	return rv;
 }
 
+static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
+					struct inode *inode, struct file *file)
+{
+	struct pde_opener *pdeo;
+
+	list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+		if (pdeo->inode == inode && pdeo->file == file)
+			return pdeo;
+	}
+	return NULL;
+}
+
 static int proc_reg_release(struct inode *inode, struct file *file)
 {
 	struct proc_dir_entry *pde = PDE(inode);
 	int rv = 0;
 	int (*release)(struct inode *, struct file *);
+	struct pde_opener *pdeo;
 
 	spin_lock(&pde->pde_unload_lock);
+	pdeo = find_pde_opener(pde, inode, file);
 	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
+		/*
+		 * Can't simply exit, __fput() will think that everything is OK,
+		 * and move on to freeing struct file. remove_proc_entry() will
+		 * find slacker in opener's list and will try to do non-trivial
+		 * things with struct file. Therefore, remove opener from list.
+		 *
+		 * But if opener is removed from list, who will ->release it?
+		 */
+		if (pdeo) {
+			list_del(&pdeo->lh);
+			spin_unlock(&pde->pde_unload_lock);
+			rv = pdeo->release(inode, file);
+			kfree(pdeo);
+		} else
+			spin_unlock(&pde->pde_unload_lock);
 		return rv;
 	}
 	pde->pde_users++;
 	release = pde->proc_fops->release;
+	if (pdeo) {
+		list_del(&pdeo->lh);
+		kfree(pdeo);
+	}
 	spin_unlock(&pde->pde_unload_lock);
 
 	if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 8d67616e7bb0..442202314d53 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -89,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
 		struct dentry *dentry);
 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
 		filldir_t filldir);
+
+struct pde_opener {
+	struct inode *inode;
+	struct file *file;
+	int (*release)(struct inode *, struct file *);
+	struct list_head lh;
+};
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index cdabc2fc02f7..f560d1705afe 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -79,6 +79,7 @@ struct proc_dir_entry {
 	int pde_users;	/* number of callers into module in progress */
 	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
 	struct completion *pde_unload_completion;
+	struct list_head pde_openers;	/* who did ->open, but not ->release */
 };
 
 struct kcore_list {
-- 
cgit v1.2.3


From 3ae4eed34be0177a8e003411a84e4ee212adbced Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:34 -0700
Subject: proper pid{hash,map}_init() prototypes

This patch adds proper prototypes for pid{hash,map}_init() in
include/linux/pid_namespace.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid_namespace.h | 3 +++
 init/main.c                   | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index caff5283d15c..1a49ab5ec7b9 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -85,4 +85,7 @@ static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
 	return tsk->nsproxy->pid_ns->child_reaper;
 }
 
+void pidhash_init(void);
+void pidmap_init(void);
+
 #endif /* _LINUX_PID_NS_H */
diff --git a/init/main.c b/init/main.c
index 2769dc031c62..0604cbcaf1e4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -87,8 +87,6 @@ extern void init_IRQ(void);
 extern void fork_init(unsigned long);
 extern void mca_init(void);
 extern void sbus_init(void);
-extern void pidhash_init(void);
-extern void pidmap_init(void);
 extern void prio_tree_init(void);
 extern void radix_tree_init(void);
 extern void free_initmem(void);
-- 
cgit v1.2.3


From 33166b1ffca5e1945246bcaa77d72a22b0d3e531 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Fri, 25 Jul 2008 01:48:35 -0700
Subject: shrink struct pid by removing padding on 64 bit builds

When struct pid is built on a 64 bit platform gcc has to insert padding to
maintain the correct alignment, by simply reordering its members the
memory usage shrinks from 88 bytes to 80.

I've successfully run with this patch on my desktop AMD64 machine.

There are no significant kernel size changes to a default config.X86_64
on the latest git v2.6.26-rc1

   text    data     bss     dec     hex filename
5404828  976760  734280 7115868  6c945c vmlinux
5404811  976760  734280 7115851  6c944b vmlinux.pid-patch

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index c21c7e8124a7..6f084b9e2c40 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -57,10 +57,10 @@ struct upid {
 struct pid
 {
 	atomic_t count;
+	unsigned int level;
 	/* lists of tasks that use this pid */
 	struct hlist_head tasks[PIDTYPE_MAX];
 	struct rcu_head rcu;
-	unsigned int level;
 	struct upid numbers[1];
 };
 
-- 
cgit v1.2.3


From 19b0cfcca41dd772065671ad0584e1cea0f3fd13 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:35 -0700
Subject: pidns: remove now unused kill_proc function

This function operated on a pid_t to kill a task, which is no longer valid
in a containerized system.

It has finally lost all its users and we can safely remove it from the
tree.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 -
 kernel/signal.c       | 12 ------------
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0560999eb1db..134cb5cb506c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1800,7 +1800,6 @@ extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
 extern void zap_other_threads(struct task_struct *p);
-extern int kill_proc(pid_t, int, int);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
 extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
diff --git a/kernel/signal.c b/kernel/signal.c
index 5c7b7eaa0dc6..82c3545596c5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1228,17 +1228,6 @@ int kill_pid(struct pid *pid, int sig, int priv)
 }
 EXPORT_SYMBOL(kill_pid);
 
-int
-kill_proc(pid_t pid, int sig, int priv)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
-	rcu_read_unlock();
-	return ret;
-}
-
 /*
  * These functions support sending signals using preallocated sigqueue
  * structures.  This is needed "because realtime applications cannot
@@ -1906,7 +1895,6 @@ EXPORT_SYMBOL(recalc_sigpending);
 EXPORT_SYMBOL_GPL(dequeue_signal);
 EXPORT_SYMBOL(flush_signals);
 EXPORT_SYMBOL(force_sig);
-EXPORT_SYMBOL(kill_proc);
 EXPORT_SYMBOL(ptrace_notify);
 EXPORT_SYMBOL(send_sig);
 EXPORT_SYMBOL(send_sig_info);
-- 
cgit v1.2.3


From e49859e71e0318b564de1546bdc30fab738f9deb Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:36 -0700
Subject: pidns: remove now unused find_pid function.

This one had the only users so far - the kill_proc, which is removed, so
drop this (invalid in namespaced world) call too.

And of course - erase all references on it from comments.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h   | 4 +---
 include/linux/sched.h | 2 +-
 kernel/pid.c          | 8 +-------
 3 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 6f084b9e2c40..ff1b2a5814d4 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -48,7 +48,7 @@ enum pid_type
  */
 
 struct upid {
-	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
+	/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
 	int nr;
 	struct pid_namespace *ns;
 	struct hlist_node pid_chain;
@@ -105,14 +105,12 @@ extern struct pid_namespace init_pid_ns;
  * or rcu_read_lock() held.
  *
  * find_pid_ns() finds the pid in the namespace specified
- * find_pid() find the pid by its global id, i.e. in the init namespace
  * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
  *
  * see also find_task_by_pid() set in include/linux/sched.h
  */
 extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 extern struct pid *find_vpid(int nr);
-extern struct pid *find_pid(int nr);
 
 /*
  * Lookup a PID in the hash table, and return with it's count elevated.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 134cb5cb506c..182da1550fad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1718,7 +1718,7 @@ extern struct pid_namespace init_pid_ns;
  * find_task_by_pid():
  *      finds a task by its global pid
  *
- * see also find_pid() etc in include/linux/pid.h
+ * see also find_vpid() etc in include/linux/pid.h
  */
 
 extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
diff --git a/kernel/pid.c b/kernel/pid.c
index 753fd90d9ec1..064e76afa507 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -309,12 +309,6 @@ struct pid *find_vpid(int nr)
 }
 EXPORT_SYMBOL_GPL(find_vpid);
 
-struct pid *find_pid(int nr)
-{
-	return find_pid_ns(nr, &init_pid_ns);
-}
-EXPORT_SYMBOL_GPL(find_pid);
-
 /*
  * attach_pid() must be called with the tasklist_lock write-held.
  */
@@ -483,7 +477,7 @@ EXPORT_SYMBOL(task_session_nr_ns);
 /*
  * Used by proc to find the first pid that is greater then or equal to nr.
  *
- * If there is a pid at nr this function is exactly the same as find_pid.
+ * If there is a pid at nr this function is exactly the same as find_pid_ns.
  */
 struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 {
-- 
cgit v1.2.3


From dbda0de52618d13d1b927c7ba7bb839cfddc4e8c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:37 -0700
Subject: pidns: remove find_task_by_pid, unused for a long time

It seems to me that it was a mistake marking this function as deprecated
and scheduling it for removal, rather than resolutely removing it after
the last caller's death.

Anyway - better late, then never.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/feature-removal-schedule.txt | 18 ------------------
 include/linux/pid.h                        |  2 +-
 include/linux/sched.h                      |  6 ------
 3 files changed, 1 insertion(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 09c4a1efb8e3..721c71b86e06 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -138,24 +138,6 @@ Who:	Kay Sievers <kay.sievers@suse.de>
 
 ---------------------------
 
-What:	find_task_by_pid
-When:	2.6.26
-Why:	With pid namespaces, calling this funciton will return the
-	wrong task when called from inside a namespace.
-
-	The best way to save a task pid and find a task by this
-	pid later, is to find this task's struct pid pointer (or get
-	it directly from the task) and call pid_task() later.
-
-	If someone really needs to get a task by its pid_t, then
-	he most likely needs the find_task_by_vpid() to get the
-	task from the same namespace as the current task is in, but
-	this may be not so in general.
-
-Who:	Pavel Emelyanov <xemul@openvz.org>
-
----------------------------
-
 What:	ACPI procfs interface
 When:	July 2008
 Why:	ACPI sysfs conversion should be finished by January 2008.
diff --git a/include/linux/pid.h b/include/linux/pid.h
index ff1b2a5814d4..22921ac4cfd9 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -107,7 +107,7 @@ extern struct pid_namespace init_pid_ns;
  * find_pid_ns() finds the pid in the namespace specified
  * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
  *
- * see also find_task_by_pid() set in include/linux/sched.h
+ * see also find_task_by_vpid() set in include/linux/sched.h
  */
 extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 extern struct pid *find_vpid(int nr);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 182da1550fad..354ef478a80d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1715,8 +1715,6 @@ extern struct pid_namespace init_pid_ns;
  *      finds a task by its pid in the specified namespace
  * find_task_by_vpid():
  *      finds a task by its virtual pid
- * find_task_by_pid():
- *      finds a task by its global pid
  *
  * see also find_vpid() etc in include/linux/pid.h
  */
@@ -1724,10 +1722,6 @@ extern struct pid_namespace init_pid_ns;
 extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
 		struct pid_namespace *ns);
 
-static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr)
-{
-	return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
-}
 extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
-- 
cgit v1.2.3


From 49b5cf34727a6c1be1568ab28e89a2d9a6bf51e0 Mon Sep 17 00:00:00 2001
From: Jonathan Lim <jlim@sgi.com>
Date: Fri, 25 Jul 2008 01:48:40 -0700
Subject: accounting: account for user time when updating memory integrals

Adapt acct_update_integrals() to include user time when calculating the time
difference.  The units of acct_rss_mem1 and acct_vm_mem1 are also changed from
pages-jiffies to pages-usecs to avoid calling jiffies_to_usecs() in
xacct_add_tsk() which might overflow.

Signed-off-by: Jonathan Lim <jlim@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        |  2 ++
 kernel/tsacct.c       | 21 ++++++++++++++-------
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 354ef478a80d..af780f299c7c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1257,7 +1257,7 @@ struct task_struct {
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
-	cputime_t acct_stimexpd;/* stime since last update */
+	cputime_t acct_timexpd;	/* stime + utime since last update */
 #endif
 #ifdef CONFIG_CPUSETS
 	nodemask_t mems_allowed;
diff --git a/kernel/sched.c b/kernel/sched.c
index 6acf749d3336..0047bd9b96aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4046,6 +4046,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
 	else
 		cpustat->user = cputime64_add(cpustat->user, tmp);
+	/* Account for user time used */
+	acct_update_integrals(p);
 }
 
 /*
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 4ab1b584961b..1da6990af8e0 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -84,9 +84,9 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 {
 	struct mm_struct *mm;
 
-	/* convert pages-jiffies to Mbyte-usec */
-	stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
-	stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
+	/* convert pages-usec to Mbyte-usec */
+	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
+	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
 	mm = get_task_mm(p);
 	if (mm) {
 		/* adjust to KB unit */
@@ -118,12 +118,19 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 void acct_update_integrals(struct task_struct *tsk)
 {
 	if (likely(tsk->mm)) {
-		long delta = cputime_to_jiffies(
-			cputime_sub(tsk->stime, tsk->acct_stimexpd));
+		cputime_t time, dtime;
+		struct timeval value;
+		u64 delta;
+
+		time = tsk->stime + tsk->utime;
+		dtime = cputime_sub(time, tsk->acct_timexpd);
+		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
+		delta = value.tv_sec;
+		delta = delta * USEC_PER_SEC + value.tv_usec;
 
 		if (delta == 0)
 			return;
-		tsk->acct_stimexpd = tsk->stime;
+		tsk->acct_timexpd = time;
 		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
 		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
 	}
@@ -135,7 +142,7 @@ void acct_update_integrals(struct task_struct *tsk)
  */
 void acct_clear_integrals(struct task_struct *tsk)
 {
-	tsk->acct_stimexpd = 0;
+	tsk->acct_timexpd = 0;
 	tsk->acct_rss_mem1 = 0;
 	tsk->acct_vm_mem1 = 0;
 }
-- 
cgit v1.2.3


From 20fad13ac66ac001c19220d3d08b4de5b6cca6e1 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:43 -0700
Subject: pidns: add the struct bsd_acct_struct pointer on pid_namespace struct

All the bsdacct-related info will be stored in the area, pointer by this
one.

It will be NULL automatically for all new namespaces.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid_namespace.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 1a49ab5ec7b9..1af82c4e17d4 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -14,6 +14,8 @@ struct pidmap {
 
 #define PIDMAP_ENTRIES         ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
 
+struct bsd_acct_struct;
+
 struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
@@ -25,6 +27,9 @@ struct pid_namespace {
 #ifdef CONFIG_PROC_FS
 	struct vfsmount *proc_mnt;
 #endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	struct bsd_acct_struct *bacct;
+#endif
 };
 
 extern struct pid_namespace init_pid_ns;
-- 
cgit v1.2.3


From 0b6b030fc30d169bb406b34b4fc60d99dde4a9c6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:47 -0700
Subject: bsdacct: switch from global bsd_acct_struct instance to per-pidns one

Allocate the structure on the first call to sys_acct().  After this each
namespace, that ordered the accounting, will live with this structure till
its own death.

Two notes
- routines, that close the accounting on fs umount time use
  the init_pid_ns's acct by now;
- accounting routine accounts to dying task's namespace
  (also by now).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acct.h   |  3 ++
 kernel/acct.c          | 84 +++++++++++++++++++++++++++++++++++++++-----------
 kernel/pid_namespace.c |  2 ++
 3 files changed, 71 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index e8cae54e8d88..882dc7248766 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -120,17 +120,20 @@ struct acct_v3
 struct vfsmount;
 struct super_block;
 struct pacct_struct;
+struct pid_namespace;
 extern void acct_auto_close_mnt(struct vfsmount *m);
 extern void acct_auto_close(struct super_block *sb);
 extern void acct_init_pacct(struct pacct_struct *pacct);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
+extern void acct_exit_ns(struct pid_namespace *);
 #else
 #define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
 #define acct_init_pacct(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
+#define acct_exit_ns(ns)	do { } while (0)
 #endif
 
 /*
diff --git a/kernel/acct.c b/kernel/acct.c
index 72d4760c8da8..febbbc67157e 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -93,8 +93,6 @@ struct bsd_acct_struct {
 
 static DEFINE_SPINLOCK(acct_lock);
 
-static struct bsd_acct_struct acct_globals __cacheline_aligned;
-
 /*
  * Called whenever the timer says to check the free space.
  */
@@ -176,7 +174,8 @@ out:
  *
  * NOTE: acct_lock MUST be held on entry and exit.
  */
-static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+		struct pid_namespace *ns)
 {
 	struct file *old_acct = NULL;
 	struct pid_namespace *old_ns = NULL;
@@ -188,10 +187,11 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
 		acct->active = 0;
 		acct->needcheck = 0;
 		acct->file = NULL;
+		acct->ns = NULL;
 	}
 	if (file) {
 		acct->file = file;
-		acct->ns = get_pid_ns(task_active_pid_ns(current));
+		acct->ns = ns;
 		acct->needcheck = 0;
 		acct->active = 1;
 		/* It's been deleted if it was used before so this is safe */
@@ -204,7 +204,6 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
 		spin_unlock(&acct_lock);
 		do_acct_process(acct, old_ns, old_acct);
 		filp_close(old_acct, NULL);
-		put_pid_ns(old_ns);
 		spin_lock(&acct_lock);
 	}
 }
@@ -213,6 +212,8 @@ static int acct_on(char *name)
 {
 	struct file *file;
 	int error;
+	struct pid_namespace *ns;
+	struct bsd_acct_struct *acct = NULL;
 
 	/* Difference from BSD - they don't do O_APPEND */
 	file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -229,18 +230,34 @@ static int acct_on(char *name)
 		return -EIO;
 	}
 
+	ns = task_active_pid_ns(current);
+	if (ns->bacct == NULL) {
+		acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+		if (acct == NULL) {
+			filp_close(file, NULL);
+			return -ENOMEM;
+		}
+	}
+
 	error = security_acct(file);
 	if (error) {
+		kfree(acct);
 		filp_close(file, NULL);
 		return error;
 	}
 
 	spin_lock(&acct_lock);
+	if (ns->bacct == NULL) {
+		ns->bacct = acct;
+		acct = NULL;
+	}
+
 	mnt_pin(file->f_path.mnt);
-	acct_file_reopen(&acct_globals, file);
+	acct_file_reopen(ns->bacct, file, ns);
 	spin_unlock(&acct_lock);
 
 	mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+	kfree(acct);
 
 	return 0;
 }
@@ -270,10 +287,16 @@ asmlinkage long sys_acct(const char __user *name)
 		error = acct_on(tmp);
 		putname(tmp);
 	} else {
+		struct bsd_acct_struct *acct;
+
+		acct = task_active_pid_ns(current)->bacct;
+		if (acct == NULL)
+			return 0;
+
 		error = security_acct(NULL);
 		if (!error) {
 			spin_lock(&acct_lock);
-			acct_file_reopen(&acct_globals, NULL);
+			acct_file_reopen(acct, NULL, NULL);
 			spin_unlock(&acct_lock);
 		}
 	}
@@ -289,9 +312,15 @@ asmlinkage long sys_acct(const char __user *name)
  */
 void acct_auto_close_mnt(struct vfsmount *m)
 {
+	struct bsd_acct_struct *acct;
+
+	acct = init_pid_ns.bacct;
+	if (acct == NULL)
+		return;
+
 	spin_lock(&acct_lock);
-	if (acct_globals.file && acct_globals.file->f_path.mnt == m)
-		acct_file_reopen(&acct_globals, NULL);
+	if (acct->file && acct->file->f_path.mnt == m)
+		acct_file_reopen(acct, NULL, NULL);
 	spin_unlock(&acct_lock);
 }
 
@@ -304,10 +333,29 @@ void acct_auto_close_mnt(struct vfsmount *m)
  */
 void acct_auto_close(struct super_block *sb)
 {
+	struct bsd_acct_struct *acct;
+
+	acct = init_pid_ns.bacct;
+	if (acct == NULL)
+		return;
+
 	spin_lock(&acct_lock);
-	if (acct_globals.file &&
-	    acct_globals.file->f_path.mnt->mnt_sb == sb) {
-		acct_file_reopen(&acct_globals, NULL);
+	if (acct->file && acct->file->f_path.mnt->mnt_sb == sb)
+		acct_file_reopen(acct, NULL, NULL);
+	spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+	struct bsd_acct_struct *acct;
+
+	spin_lock(&acct_lock);
+	acct = ns->bacct;
+	if (acct != NULL) {
+		if (acct->file != NULL)
+			acct_file_reopen(acct, NULL, NULL);
+
+		kfree(acct);
 	}
 	spin_unlock(&acct_lock);
 }
@@ -587,25 +635,25 @@ void acct_collect(long exitcode, int group_dead)
 void acct_process(void)
 {
 	struct file *file = NULL;
-	struct pid_namespace *ns;
+	struct pid_namespace *ns = task_active_pid_ns(current);
+	struct bsd_acct_struct *acct;
 
+	acct = ns->bacct;
 	/*
 	 * accelerate the common fastpath:
 	 */
-	if (!acct_globals.file)
+	if (!acct || !acct->file)
 		return;
 
 	spin_lock(&acct_lock);
-	file = acct_globals.file;
+	file = acct->file;
 	if (unlikely(!file)) {
 		spin_unlock(&acct_lock);
 		return;
 	}
 	get_file(file);
-	ns = get_pid_ns(acct_globals.ns);
 	spin_unlock(&acct_lock);
 
-	do_acct_process(&acct_globals, ns, file);
+	do_acct_process(acct, ns, file);
 	fput(file);
-	put_pid_ns(ns);
 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 06331cc1c3f5..ea567b78d1aa 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -12,6 +12,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/syscalls.h>
 #include <linux/err.h>
+#include <linux/acct.h>
 
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 
@@ -181,6 +182,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 
 	/* Child reaper for the pid namespace is going away */
 	pid_ns->child_reaper = NULL;
+	acct_exit_ns(pid_ns);
 	return;
 }
 
-- 
cgit v1.2.3


From 297c5d92634c809cef23d73e7b2556f2528ff7e2 Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Fri, 25 Jul 2008 01:48:49 -0700
Subject: task IO accounting: provide distinct tgid/tid I/O statistics

Report per-thread I/O statistics in /proc/pid/task/tid/io and aggregate
parent I/O statistics in /proc/pid/io.  This approach follows the same
model used to account per-process and per-thread CPU times.

As a practial application, this allows for example to quickly find the top
I/O consumer when a process spawns many child threads that perform the
actual I/O work, because the aggregated I/O statistics can always be found
in /proc/pid/io.

[ Oleg Nesterov points out that we should check that the task is still
  alive before we iterate over the threads, but also says that we can do
  that fixup on top of this later.  - Linus ]

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Cc: Matt Heaton <matt@hostmonster.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Acked-by-with-comments: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c        | 86 ++++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sched.h |  4 +++
 kernel/exit.c         | 27 ++++++++++++++++
 kernel/fork.c         |  6 ++++
 4 files changed, 108 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..a891fe4cb43b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
 }
 
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
-{
+static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+{
+	u64 rchar, wchar, syscr, syscw;
+	struct task_io_accounting ioac;
+
+	if (!whole) {
+		rchar = task->rchar;
+		wchar = task->wchar;
+		syscr = task->syscr;
+		syscw = task->syscw;
+		memcpy(&ioac, &task->ioac, sizeof(ioac));
+	} else {
+		unsigned long flags;
+		struct task_struct *t = task;
+		rchar = wchar = syscr = syscw = 0;
+		memset(&ioac, 0, sizeof(ioac));
+
+		rcu_read_lock();
+		do {
+			rchar += t->rchar;
+			wchar += t->wchar;
+			syscr += t->syscr;
+			syscw += t->syscw;
+
+			ioac.read_bytes += t->ioac.read_bytes;
+			ioac.write_bytes += t->ioac.write_bytes;
+			ioac.cancelled_write_bytes +=
+					t->ioac.cancelled_write_bytes;
+			t = next_thread(t);
+		} while (t != task);
+		rcu_read_unlock();
+
+		if (lock_task_sighand(task, &flags)) {
+			struct signal_struct *sig = task->signal;
+
+			rchar += sig->rchar;
+			wchar += sig->wchar;
+			syscr += sig->syscr;
+			syscw += sig->syscw;
+
+			ioac.read_bytes += sig->ioac.read_bytes;
+			ioac.write_bytes += sig->ioac.write_bytes;
+			ioac.cancelled_write_bytes +=
+					sig->ioac.cancelled_write_bytes;
+
+			unlock_task_sighand(task, &flags);
+		}
+	}
+
 	return sprintf(buffer,
-#ifdef CONFIG_TASK_XACCT
 			"rchar: %llu\n"
 			"wchar: %llu\n"
 			"syscr: %llu\n"
 			"syscw: %llu\n"
-#endif
 			"read_bytes: %llu\n"
 			"write_bytes: %llu\n"
 			"cancelled_write_bytes: %llu\n",
-#ifdef CONFIG_TASK_XACCT
-			(unsigned long long)task->rchar,
-			(unsigned long long)task->wchar,
-			(unsigned long long)task->syscr,
-			(unsigned long long)task->syscw,
-#endif
-			(unsigned long long)task->ioac.read_bytes,
-			(unsigned long long)task->ioac.write_bytes,
-			(unsigned long long)task->ioac.cancelled_write_bytes);
+			(unsigned long long)rchar,
+			(unsigned long long)wchar,
+			(unsigned long long)syscr,
+			(unsigned long long)syscw,
+			(unsigned long long)ioac.read_bytes,
+			(unsigned long long)ioac.write_bytes,
+			(unsigned long long)ioac.cancelled_write_bytes);
+}
+
+static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+{
+	return do_io_accounting(task, buffer, 0);
 }
-#endif
+
+static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+{
+	return do_io_accounting(task, buffer, 1);
+}
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 
 /*
  * Thread groups
@@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-	INF("io",	S_IRUGO, pid_io_accounting),
+	INF("io",	S_IRUGO, tgid_io_accounting),
 #endif
 };
 
@@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
 #endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	INF("io",	S_IRUGO, tid_io_accounting),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file * filp,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index af780f299c7c..d22ffe06d0eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -506,6 +506,10 @@ struct signal_struct {
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
+#ifdef CONFIG_TASK_XACCT
+	u64 rchar, wchar, syscr, syscw;
+#endif
+	struct task_io_accounting ioac;
 
 	/*
 	 * Cumulative ns of scheduled CPU time for dead threads in the
diff --git a/kernel/exit.c b/kernel/exit.c
index 8a4d4d12e294..ad933bb29ec7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -120,6 +120,18 @@ static void __exit_signal(struct task_struct *tsk)
 		sig->nivcsw += tsk->nivcsw;
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
+#ifdef CONFIG_TASK_XACCT
+		sig->rchar += tsk->rchar;
+		sig->wchar += tsk->wchar;
+		sig->syscr += tsk->syscr;
+		sig->syscw += tsk->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		sig->ioac.read_bytes += tsk->ioac.read_bytes;
+		sig->ioac.write_bytes += tsk->ioac.write_bytes;
+		sig->ioac.cancelled_write_bytes +=
+					tsk->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 		sig = NULL; /* Marker for below. */
 	}
@@ -1366,6 +1378,21 @@ static int wait_task_zombie(struct task_struct *p, int options,
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
+#ifdef CONFIG_TASK_XACCT
+		psig->rchar += p->rchar + sig->rchar;
+		psig->wchar += p->wchar + sig->wchar;
+		psig->syscr += p->syscr + sig->syscr;
+		psig->syscw += p->syscw + sig->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		psig->ioac.read_bytes +=
+			p->ioac.read_bytes + sig->ioac.read_bytes;
+		psig->ioac.write_bytes +=
+			p->ioac.write_bytes + sig->ioac.write_bytes;
+		psig->ioac.cancelled_write_bytes +=
+				p->ioac.cancelled_write_bytes +
+				sig->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 		spin_unlock_irq(&p->parent->sighand->siglock);
 	}
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 813d5c89b9d5..b99d73e971a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -812,6 +812,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+#ifdef CONFIG_TASK_XACCT
+	sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
+#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	memset(&sig->ioac, 0, sizeof(sig->ioac));
+#endif
 	sig->sum_sched_runtime = 0;
 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
-- 
cgit v1.2.3


From 873b47717732c2f33a4b14de02571a4295a02f0c Mon Sep 17 00:00:00 2001
From: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Date: Fri, 25 Jul 2008 01:48:52 -0700
Subject: per-task-delay-accounting: add memory reclaim delay

Sometimes, application responses become bad under heavy memory load.
Applications take a bit time to reclaim memory.  The statistics, how long
memory reclaim takes, will be useful to measure memory usage.

This patch adds accounting memory reclaim to per-task-delay-accounting for
accounting the time of do_try_to_free_pages().

<i.e>

- When System is under low memory load,
  memory reclaim may not occur.

$ free
             total       used       free     shared    buffers     cached
Mem:       8197800    1577300    6620500          0       4808    1516724
-/+ buffers/cache:      55768    8142032
Swap:     16386292          0   16386292

$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0      0 5069748  10612 3014060    0    0     0     0    3   26  0  0 100  0
 0  0      0 5069748  10612 3014060    0    0     0     0    4   22  0  0 100  0
 0  0      0 5069748  10612 3014060    0    0     0     0    3   18  0  0 100  0

Measure the time of tar command.

$ ls -s test.dat
1501472 test.dat

$ time tar cvf test.tar test.dat
real    0m13.388s
user    0m0.116s
sys     0m5.304s

$ ./delayget -d -p <pid>
CPU             count     real total  virtual total    delay total
                  428     5528345500     5477116080       62749891
IO              count    delay total
                  338     8078977189
SWAP            count    delay total
                    0              0
RECLAIM         count    delay total
                    0              0

- When system is under heavy memory load
  memory reclaim may occur.

$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0 7159032  49724   1812   3012    0    0     0     0    3   24  0  0 100  0
 0  0 7159032  49724   1812   3012    0    0     0     0    4   24  0  0 100  0
 0  0 7159032  49848   1812   3012    0    0     0     0    3   22  0  0 100  0

In this case, one process uses more 8G memory
by execution of malloc() and memset().

$ time tar cvf test.tar test.dat
real    1m38.563s        <-  increased by 85 sec
user    0m0.140s
sys     0m7.060s

$ ./delayget -d -p <pid>
CPU             count     real total  virtual total    delay total
                 9021     7140446250     7315277975      923201824
IO              count    delay total
                 8965    90466349669
SWAP            count    delay total
                    3       21036367
RECLAIM         count    delay total
                  740    61011951153

In the later case, the value of RECLAIM is increasing.
So, taskstats can show how much memory reclaim influences TAT.

Signed-off-by: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujistu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h | 19 +++++++++++++++++++
 include/linux/sched.h     |  4 ++++
 kernel/delayacct.c        | 13 +++++++++++++
 mm/vmscan.c               |  5 +++++
 4 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index ab94bc083558..f352f06fa063 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -39,6 +39,8 @@ extern void __delayacct_blkio_start(void);
 extern void __delayacct_blkio_end(void);
 extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
+extern void __delayacct_freepages_start(void);
+extern void __delayacct_freepages_end(void);
 
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 {
@@ -107,6 +109,18 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 	return 0;
 }
 
+static inline void delayacct_freepages_start(void)
+{
+	if (current->delays)
+		__delayacct_freepages_start();
+}
+
+static inline void delayacct_freepages_end(void)
+{
+	if (current->delays)
+		__delayacct_freepages_end();
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -129,6 +143,11 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 { return 0; }
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 { return 0; }
+static inline void delayacct_freepages_start(void)
+{}
+static inline void delayacct_freepages_end(void)
+{}
+
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d22ffe06d0eb..42036ffe6b00 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -672,6 +672,10 @@ struct task_delay_info {
 				/* io operations performed */
 	u32 swapin_count;	/* total count of the number of swapin block */
 				/* io operations performed */
+
+	struct timespec freepages_start, freepages_end;
+	u64 freepages_delay;	/* wait for memory reclaim */
+	u32 freepages_count;	/* total count of memory reclaim */
 };
 #endif	/* CONFIG_TASK_DELAY_ACCT */
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 10e43fd8b721..84b6782a2ce4 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -165,3 +165,16 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
 	return ret;
 }
 
+void __delayacct_freepages_start(void)
+{
+	delayacct_start(&current->delays->freepages_start);
+}
+
+void __delayacct_freepages_end(void)
+{
+	delayacct_end(&current->delays->freepages_start,
+			&current->delays->freepages_end,
+			&current->delays->freepages_delay,
+			&current->delays->freepages_count);
+}
+
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 967d30ccd92b..26672c6cd3ce 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -38,6 +38,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/memcontrol.h>
+#include <linux/delayacct.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -1316,6 +1317,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 	struct zone *zone;
 	enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
 
+	delayacct_freepages_start();
+
 	if (scan_global_lru(sc))
 		count_vm_event(ALLOCSTALL);
 	/*
@@ -1396,6 +1399,8 @@ out:
 	} else
 		mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
 
+	delayacct_freepages_end();
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 016ae219b920c4e606088761d3d6070cdf8ba706 Mon Sep 17 00:00:00 2001
From: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Date: Fri, 25 Jul 2008 01:48:53 -0700
Subject: per-task-delay-accounting: update taskstats for memory reclaim delay

Add members for memory reclaim delay to taskstats, and accumulate them in
__delayacct_add_tsk() .

Signed-off-by: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/accounting/taskstats-struct.txt | 7 +++++++
 include/linux/taskstats.h                     | 6 +++++-
 kernel/delayacct.c                            | 3 +++
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index cd784f46bf8a..b988d110db59 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -26,6 +26,8 @@ There are three different groups of fields in the struct taskstats:
 
 5) Time accounting for SMT machines
 
+6) Extended delay accounting fields for memory reclaim
+
 Future extension should add fields to the end of the taskstats struct, and
 should not change the relative position of each field within the struct.
 
@@ -170,4 +172,9 @@ struct taskstats {
 	__u64	ac_utimescaled;		/* utime scaled on frequency etc */
 	__u64	ac_stimescaled;		/* stime scaled on frequency etc */
 	__u64	cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+6) Extended delay accounting fields for memory reclaim
+	/* Delay waiting for memory reclaim */
+	__u64	freepages_count;
+	__u64	freepages_delay_total;
 }
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 5d69c0744fff..18269e956a71 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
  */
 
 
-#define TASKSTATS_VERSION	6
+#define TASKSTATS_VERSION	7
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -157,6 +157,10 @@ struct taskstats {
 	__u64	ac_utimescaled;		/* utime scaled on frequency etc */
 	__u64	ac_stimescaled;		/* stime scaled on frequency etc */
 	__u64	cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+	/* Delay waiting for memory reclaim */
+	__u64	freepages_count;
+	__u64	freepages_delay_total;
 };
 
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 84b6782a2ce4..b3179dad71be 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -145,8 +145,11 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
 	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
 	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
 	d->blkio_count += tsk->delays->blkio_count;
 	d->swapin_count += tsk->delays->swapin_count;
+	d->freepages_count += tsk->delays->freepages_count;
 	spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
 done:
-- 
cgit v1.2.3


From bde74e4bc64415b142e556a34d295a52a1b7da9d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:48:57 -0700
Subject: locks: add special return value for asynchronous locks

Use a special error value FILE_LOCK_DEFERRED to mean that a locking
operation returned asynchronously.  This is returned by

  posix_lock_file() for sleeping locks to mean that the lock has been
  queued on the block list, and will be woken up when it might become
  available and needs to be retried (either fl_lmops->fl_notify() is
  called or fl_wait is woken up).

  f_op->lock() to mean either the above, or that the filesystem will
  call back with fl_lmops->fl_grant() when the result of the locking
  operation is known.  The filesystem can do this for sleeping as well
  as non-sleeping locks.

This is to make sure, that return values of -EAGAIN and -EINPROGRESS by
filesystems are not mistaken to mean an asynchronous locking.

This also makes error handling in fs/locks.c and lockd/svclock.c slightly
cleaner.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dlm/plock.c     |  2 +-
 fs/lockd/svclock.c | 13 ++++---------
 fs/locks.c         | 28 ++++++++++++++--------------
 include/linux/fs.h |  6 ++++++
 4 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5781ca..eba87ff3177b 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 	if (xop->callback == NULL)
 		wait_event(recv_wq, (op->done != 0));
 	else {
-		rv = -EINPROGRESS;
+		rv = FILE_LOCK_DEFERRED;
 		goto out;
 	}
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 821b9acdfb66..cf0d5c2c318d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			goto out;
 		case -EAGAIN:
 			ret = nlm_lck_denied;
-			break;
-		case -EINPROGRESS:
+			goto out;
+		case FILE_LOCK_DEFERRED:
 			if (wait)
 				break;
 			/* Filesystem lock operation is in progress
@@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			goto out;
 	}
 
-	ret = nlm_lck_denied;
-	if (!wait)
-		goto out;
-
 	ret = nlm_lck_blocked;
 
 	/* Append to list of blocked */
@@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 	}
 
 	error = vfs_test_lock(file->f_file, &lock->fl);
-	if (error == -EINPROGRESS) {
+	if (error == FILE_LOCK_DEFERRED) {
 		ret = nlmsvc_defer_lock_rqst(rqstp, block);
 		goto out;
 	}
@@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	switch (error) {
 	case 0:
 		break;
-	case -EAGAIN:
-	case -EINPROGRESS:
+	case FILE_LOCK_DEFERRED:
 		dprintk("lockd: lock still blocked error %d\n", error);
 		nlmsvc_insert_block(block, NLM_NEVER);
 		nlmsvc_release_block(block);
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747371c..1ce57b4b362c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -779,8 +779,10 @@ find_conflict:
 		if (!flock_locks_conflict(request, fl))
 			continue;
 		error = -EAGAIN;
-		if (request->fl_flags & FL_SLEEP)
-			locks_insert_block(fl, request);
+		if (!(request->fl_flags & FL_SLEEP))
+			goto out;
+		error = FILE_LOCK_DEFERRED;
+		locks_insert_block(fl, request);
 		goto out;
 	}
 	if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 			error = -EDEADLK;
 			if (posix_locks_deadlock(request, fl))
 				goto out;
-			error = -EAGAIN;
+			error = FILE_LOCK_DEFERRED;
 			locks_insert_block(fl, request);
 			goto out;
   		}
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
 	might_sleep ();
 	for (;;) {
 		error = posix_lock_file(filp, fl, NULL);
-		if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
 		if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 
 	for (;;) {
 		error = __posix_lock_file(inode, &fl, NULL);
-		if (error != -EAGAIN)
-			break;
-		if (!(fl.fl_flags & FL_SLEEP))
+		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
 		if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
 	might_sleep();
 	for (;;) {
 		error = flock_lock_file(filp, fl);
-		if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
 		if (!error)
@@ -1716,17 +1716,17 @@ out:
  * fl_grant is set. Callers expecting ->lock() to return asynchronously
  * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
  * the request is for a blocking lock. When ->lock() does return asynchronously,
- * it must return -EINPROGRESS, and call ->fl_grant() when the lock
+ * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
  * request completes.
  * If the request is for non-blocking lock the file system should return
- * -EINPROGRESS then try to get the lock and call the callback routine with
- * the result. If the request timed out the callback routine will return a
+ * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
+ * with the result. If the request timed out the callback routine will return a
  * nonzero return code and the file system should release the lock. The file
  * system is also responsible to keep a corresponding posix lock when it
  * grants a lock so the VFS can find out which locks are locally held and do
  * the correct lock cleanup when required.
  * The underlying filesystem must not drop the kernel lock or call
- * ->fl_grant() before returning to the caller with a -EINPROGRESS
+ * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
  * return code.
  */
 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1804,7 +1804,7 @@ again:
 	else {
 		for (;;) {
 			error = posix_lock_file(filp, file_lock, NULL);
-			if (error != -EAGAIN || cmd == F_SETLK)
+			if (error != FILE_LOCK_DEFERRED)
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
 					!file_lock->fl_next);
@@ -1941,7 +1941,7 @@ again:
 	else {
 		for (;;) {
 			error = posix_lock_file(filp, file_lock, NULL);
-			if (error != -EAGAIN || cmd == F_SETLK64)
+			if (error != FILE_LOCK_DEFERRED)
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
 					!file_lock->fl_next);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b86f806014c..49d8eb7a71be 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -885,6 +885,12 @@ static inline int file_check_writeable(struct file *filp)
 #define FL_CLOSE	64	/* unlock on close */
 #define FL_SLEEP	128	/* A blocking lock */
 
+/*
+ * Special return value from posix_lock_file() and vfs_lock_file() for
+ * asynchronous locking.
+ */
+#define FILE_LOCK_DEFERRED 1
+
 /*
  * The POSIX file lock owner is determined by
  * the "struct files_struct" in the thread group
-- 
cgit v1.2.3


From 33670fa296860283f04a7975b8c790f101e43a6e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:49:02 -0700
Subject: fuse: nfs export special lookups

Implement the get_parent export operation by sending a LOOKUP request with
".." as the name.

Implement looking up an inode by node ID after it has been evicted from
the cache.  This is done by seding a LOOKUP request with "." as the name
(for all file types, not just directories).

The filesystem can set the FUSE_EXPORT_SUPPORT flag in the INIT reply, to
indicate that it supports these special lookups.

Thanks to John Muir for the original implementation of this feature.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/fuse_i.h     |  6 +++++
 fs/fuse/inode.c      | 66 +++++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/fuse.h |  3 +++
 3 files changed, 72 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5d3146da64e6..3a876076bdd1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
 	/** Do not send separate SETATTR request before open(O_TRUNC)  */
 	unsigned atomic_o_trunc : 1;
 
+	/** Filesystem supports NFS exporting.  Only set in INIT */
+	unsigned export_support : 1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
@@ -473,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 			int generation, struct fuse_attr *attr,
 			u64 attr_valid, u64 attr_version);
 
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+		     struct fuse_entry_out *outarg, struct inode **inode);
+
 /**
  * Send FORGET command
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 71fa76a48a31..7d2f7d6e22e2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -562,6 +562,7 @@ struct fuse_inode_handle
 static struct dentry *fuse_get_dentry(struct super_block *sb,
 				      struct fuse_inode_handle *handle)
 {
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
 	struct inode *inode;
 	struct dentry *entry;
 	int err = -ESTALE;
@@ -570,8 +571,27 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
 		goto out_err;
 
 	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
-	if (!inode)
-		goto out_err;
+	if (!inode) {
+		struct fuse_entry_out outarg;
+		struct qstr name;
+
+		if (!fc->export_support)
+			goto out_err;
+
+		name.len = 1;
+		name.name = ".";
+		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
+				       &inode);
+		if (err && err != -ENOENT)
+			goto out_err;
+		if (err || !inode) {
+			err = -ESTALE;
+			goto out_err;
+		}
+		err = -EIO;
+		if (get_node_id(inode) != handle->nodeid)
+			goto out_iput;
+	}
 	err = -ESTALE;
 	if (inode->i_generation != handle->generation)
 		goto out_iput;
@@ -659,11 +679,46 @@ static struct dentry *fuse_fh_to_parent(struct super_block *sb,
 	return fuse_get_dentry(sb, &parent);
 }
 
+static struct dentry *fuse_get_parent(struct dentry *child)
+{
+	struct inode *child_inode = child->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(child_inode);
+	struct inode *inode;
+	struct dentry *parent;
+	struct fuse_entry_out outarg;
+	struct qstr name;
+	int err;
+
+	if (!fc->export_support)
+		return ERR_PTR(-ESTALE);
+
+	name.len = 2;
+	name.name = "..";
+	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
+			       &name, &outarg, &inode);
+	if (err && err != -ENOENT)
+		return ERR_PTR(err);
+	if (err || !inode)
+		return ERR_PTR(-ESTALE);
+
+	parent = d_alloc_anon(inode);
+	if (!parent) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	if (get_node_id(inode) != FUSE_ROOT_ID) {
+		parent->d_op = &fuse_dentry_operations;
+		fuse_invalidate_entry_cache(parent);
+	}
+
+	return parent;
+}
 
 static const struct export_operations fuse_export_operations = {
 	.fh_to_dentry	= fuse_fh_to_dentry,
 	.fh_to_parent	= fuse_fh_to_parent,
 	.encode_fh	= fuse_encode_fh,
+	.get_parent	= fuse_get_parent,
 };
 
 static const struct super_operations fuse_super_operations = {
@@ -695,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->no_lock = 1;
 			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
 				fc->atomic_o_trunc = 1;
+			if (arg->minor >= 9) {
+				/* LOOKUP has dependency on proto version */
+				if (arg->flags & FUSE_EXPORT_SUPPORT)
+					fc->export_support = 1;
+			}
 			if (arg->flags & FUSE_BIG_WRITES)
 				fc->big_writes = 1;
 		} else {
@@ -721,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-		FUSE_BIG_WRITES;
+		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index d48282197696..265635dc9908 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -104,11 +104,14 @@ struct fuse_file_lock {
 
 /**
  * INIT request/reply flags
+ *
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
 #define FUSE_FILE_OPS		(1 << 2)
 #define FUSE_ATOMIC_O_TRUNC	(1 << 3)
+#define FUSE_EXPORT_SUPPORT	(1 << 4)
 #define FUSE_BIG_WRITES		(1 << 5)
 
 /**
-- 
cgit v1.2.3


From 8f421c595a9145959d8aab09172743132abdffdb Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:04 -0700
Subject: edac: i5100 new intel chipset driver

Preliminary support for the Intel 5100 MCH.  CE and UE errors are reported
along with the current DIMM label information and other memory parameters.

Reasons why this is preliminary:

1) This chip has 2 independent memory controllers which, for best
   perforance, use interleaved accesses to the DDR2 memory.  This
   architecture does not map very well to the current edac data structures
   which depend on symmetric channel access to the interleaved data.
   Without core changes, the best I could do for now is to map both memory
   controllers to different csrows (first all ranks of controller 0, then
   all ranks of controller 1).  Someone much more familiar with the edac
   core than I will probably need to come up with a more general data
   structure to handle the interleaving and de-interleaving of the two
   memory controllers.

2) I have not yet tackled the de-interleaving of the rank/controller
   address space into the physical address space of the CPU.  There is
   nothing fundamentally missing, it is just ending up to be a lot of
   code, and I'd rather keep it separate for now, esp since it doesn't
   work yet...

3) The code depends on a particular i5100 chip select to DIMM mainboard
   chip select mapping.  This mapping seems obvious to me in order to
   support dual and single ranked memory, but it is not unique and DIMM
   labels could be wrong on other mainboards.  There is no way to query
   this mapping that I know of.

4) The code requires that the i5100 is in 32GB mode.  Only 4 ranks per
   controller, 2 ranks per DIMM are supported.  I do not have hardware
   (nor do I expect to have hardware anytime soon) for the 48GB (6 ranks
   per controller) mode.

5) The serial presence detect code should be broken out into a "real"
   i2c driver so that decode-dimms.pl can work.

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/Kconfig      |   7 +
 drivers/edac/Makefile     |   1 +
 drivers/edac/i5100_edac.c | 827 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h   |   3 +
 4 files changed, 838 insertions(+)
 create mode 100644 drivers/edac/i5100_edac.c

(limited to 'include/linux')

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 6e6c3c4aea6b..5a11e3cbcae2 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -123,6 +123,13 @@ config EDAC_I5000
 	  Support for error detection and correction the Intel
 	  Greekcreek/Blackford chipsets.
 
+config EDAC_I5100
+	tristate "Intel San Clemente MCH"
+	depends on EDAC_MM_EDAC && X86 && PCI
+	help
+	  Support for error detection and correction the Intel
+	  San Clemente MCH.
+
 config EDAC_MPC85XX
 	tristate "Freescale MPC85xx"
 	depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 83807731d4a9..e5e9104b5520 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -19,6 +19,7 @@ endif
 
 obj-$(CONFIG_EDAC_AMD76X)		+= amd76x_edac.o
 obj-$(CONFIG_EDAC_I5000)		+= i5000_edac.o
+obj-$(CONFIG_EDAC_I5100)		+= i5100_edac.o
 obj-$(CONFIG_EDAC_E7XXX)		+= e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)		+= e752x_edac.o
 obj-$(CONFIG_EDAC_I82443BXGX)		+= i82443bxgx_edac.o
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
new file mode 100644
index 000000000000..43430bf70181
--- /dev/null
+++ b/drivers/edac/i5100_edac.c
@@ -0,0 +1,827 @@
+/*
+ * Intel 5100 Memory Controllers kernel module
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ * This module is based on the following document:
+ *
+ * Intel 5100X Chipset Memory Controller Hub (MCH) - Datasheet
+ *      http://download.intel.com/design/chipsets/datashts/318378.pdf
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/delay.h>
+#include <linux/mmzone.h>
+
+#include "edac_core.h"
+
+/* register addresses and bit field accessors... */
+
+/* device 16, func 1 */
+#define I5100_MS		0x44	/* Memory Status Register */
+#define I5100_SPDDATA		0x48	/* Serial Presence Detect Status Reg */
+#define		I5100_SPDDATA_RDO(a)	((a) >> 15 & 1)
+#define		I5100_SPDDATA_SBE(a)	((a) >> 13 & 1)
+#define		I5100_SPDDATA_BUSY(a)	((a) >> 12 & 1)
+#define		I5100_SPDDATA_DATA(a)	((a)       & ((1 << 8) - 1))
+#define I5100_SPDCMD		0x4c	/* Serial Presence Detect Command Reg */
+#define		I5100_SPDCMD_DTI(a)	(((a) & ((1 << 4) - 1)) << 28)
+#define		I5100_SPDCMD_CKOVRD(a)	(((a) & 1)              << 27)
+#define		I5100_SPDCMD_SA(a)	(((a) & ((1 << 3) - 1)) << 24)
+#define		I5100_SPDCMD_BA(a)	(((a) & ((1 << 8) - 1)) << 16)
+#define		I5100_SPDCMD_DATA(a)	(((a) & ((1 << 8) - 1)) <<  8)
+#define		I5100_SPDCMD_CMD(a)	((a) & 1)
+#define I5100_TOLM		0x6c	/* Top of Low Memory */
+#define		I5100_TOLM_TOLM(a)	((a) >> 12 & ((1 << 4) - 1))
+#define I5100_MIR0		0x80	/* Memory Interleave Range 0 */
+#define I5100_MIR1		0x84	/* Memory Interleave Range 1 */
+#define I5100_AMIR_0		0x8c	/* Adjusted Memory Interleave Range 0 */
+#define I5100_AMIR_1		0x90	/* Adjusted Memory Interleave Range 1 */
+#define		I5100_MIR_LIMIT(a)	((a) >> 4 & ((1 << 12) - 1))
+#define		I5100_MIR_WAY1(a)	((a) >> 1 & 1)
+#define		I5100_MIR_WAY0(a)	((a)      & 1)
+#define I5100_FERR_NF_MEM	0xa0	/* MC First Non Fatal Errors */
+#define		I5100_FERR_NF_MEM_CHAN_INDX(a)	((a) >> 28 & 1)
+#define		I5100_FERR_NF_MEM_SPD_MASK	(1 << 18)
+#define		I5100_FERR_NF_MEM_M16ERR_MASK	(1 << 16)
+#define		I5100_FERR_NF_MEM_M15ERR_MASK	(1 << 15)
+#define		I5100_FERR_NF_MEM_M14ERR_MASK	(1 << 14)
+#define		I5100_FERR_NF_MEM_
+#define		I5100_FERR_NF_MEM_
+#define		I5100_FERR_NF_MEM_ANY_MASK	\
+			(I5100_FERR_NF_MEM_M16ERR_MASK | \
+			I5100_FERR_NF_MEM_M15ERR_MASK | \
+			I5100_FERR_NF_MEM_M14ERR_MASK)
+#define		I5100_FERR_NF_MEM_ANY(a)  ((a) & I5100_FERR_NF_MEM_ANY_MASK)
+#define	I5100_NERR_NF_MEM	0xa4	/* MC Next Non-Fatal Errors */
+#define		I5100_NERR_NF_MEM_ANY(a)  I5100_FERR_NF_MEM_ANY(a)
+
+/* device 21 and 22, func 0 */
+#define I5100_MTR_0	0x154	/* Memory Technology Registers 0-3 */
+#define I5100_DMIR	0x15c	/* DIMM Interleave Range */
+#define		I5100_DMIR_LIMIT(a)	((a) >> 16 & ((1 << 11) - 1))
+#define		I5100_DMIR_RANK(a, i)	((a) >> (4 * i) & ((1 <<  2) - 1))
+#define I5100_MTR_4	0x1b0	/* Memory Technology Registers 4,5 */
+#define		I5100_MTR_PRESENT(a)	((a) >> 10 & 1)
+#define		I5100_MTR_ETHROTTLE(a)	((a) >>  9 & 1)
+#define		I5100_MTR_WIDTH(a)	((a) >>  8 & 1)
+#define		I5100_MTR_NUMBANK(a)	((a) >>  6 & 1)
+#define		I5100_MTR_NUMROW(a)	((a) >>  2 & ((1 << 2) - 1))
+#define		I5100_MTR_NUMCOL(a)	((a)       & ((1 << 2) - 1))
+#define	I5100_VALIDLOG	0x18c	/* Valid Log Markers */
+#define		I5100_VALIDLOG_REDMEMVALID(a)	((a) >> 2 & 1)
+#define		I5100_VALIDLOG_RECMEMVALID(a)	((a) >> 1 & 1)
+#define		I5100_VALIDLOG_NRECMEMVALID(a)	((a)      & 1)
+#define	I5100_NRECMEMA	0x190	/* Non-Recoverable Memory Error Log Reg A */
+#define		I5100_NRECMEMA_MERR(a)		((a) >> 15 & ((1 << 5) - 1))
+#define		I5100_NRECMEMA_BANK(a)		((a) >> 12 & ((1 << 3) - 1))
+#define		I5100_NRECMEMA_RANK(a)		((a) >>  8 & ((1 << 3) - 1))
+#define		I5100_NRECMEMA_DM_BUF_ID(a)	((a)       & ((1 << 8) - 1))
+#define	I5100_NRECMEMB	0x194	/* Non-Recoverable Memory Error Log Reg B */
+#define		I5100_NRECMEMB_CAS(a)		((a) >> 16 & ((1 << 13) - 1))
+#define		I5100_NRECMEMB_RAS(a)		((a)       & ((1 << 16) - 1))
+#define	I5100_REDMEMA	0x198	/* Recoverable Memory Data Error Log Reg A */
+#define		I5100_REDMEMA_SYNDROME(a)	(a)
+#define	I5100_REDMEMB	0x19c	/* Recoverable Memory Data Error Log Reg B */
+#define		I5100_REDMEMB_ECC_LOCATOR(a)	((a) & ((1 << 18) - 1))
+#define	I5100_RECMEMA	0x1a0	/* Recoverable Memory Error Log Reg A */
+#define		I5100_RECMEMA_MERR(a)		I5100_NRECMEMA_MERR(a)
+#define		I5100_RECMEMA_BANK(a)		I5100_NRECMEMA_BANK(a)
+#define		I5100_RECMEMA_RANK(a)		I5100_NRECMEMA_RANK(a)
+#define		I5100_RECMEMA_DM_BUF_ID(a)	I5100_NRECMEMA_DM_BUF_ID(a)
+#define	I5100_RECMEMB	0x1a4	/* Recoverable Memory Error Log Reg B */
+#define		I5100_RECMEMB_CAS(a)		I5100_NRECMEMB_CAS(a)
+#define		I5100_RECMEMB_RAS(a)		I5100_NRECMEMB_RAS(a)
+
+/* some generic limits */
+#define I5100_MAX_RANKS_PER_CTLR	6
+#define I5100_MAX_CTLRS			2
+#define I5100_MAX_RANKS_PER_DIMM	4
+#define I5100_DIMM_ADDR_LINES		(6 - 3)	/* 64 bits / 8 bits per byte */
+#define I5100_MAX_DIMM_SLOTS_PER_CTLR	4
+#define I5100_MAX_RANK_INTERLEAVE	4
+#define I5100_MAX_DMIRS			5
+
+struct i5100_priv {
+	/* ranks on each dimm -- 0 maps to not present -- obtained via SPD */
+	int dimm_numrank[I5100_MAX_CTLRS][I5100_MAX_DIMM_SLOTS_PER_CTLR];
+
+	/*
+	 * mainboard chip select map -- maps i5100 chip selects to
+	 * DIMM slot chip selects.  In the case of only 4 ranks per
+	 * controller, the mapping is fairly obvious but not unique.
+	 * we map -1 -> NC and assume both controllers use the same
+	 * map...
+	 *
+	 */
+	int dimm_csmap[I5100_MAX_DIMM_SLOTS_PER_CTLR][I5100_MAX_RANKS_PER_DIMM];
+
+	/* memory interleave range */
+	struct {
+		u64	 limit;
+		unsigned way[2];
+	} mir[I5100_MAX_CTLRS];
+
+	/* adjusted memory interleave range register */
+	unsigned amir[I5100_MAX_CTLRS];
+
+	/* dimm interleave range */
+	struct {
+		unsigned rank[I5100_MAX_RANK_INTERLEAVE];
+		u64	 limit;
+	} dmir[I5100_MAX_CTLRS][I5100_MAX_DMIRS];
+
+	/* memory technology registers... */
+	struct {
+		unsigned present;	/* 0 or 1 */
+		unsigned ethrottle;	/* 0 or 1 */
+		unsigned width;		/* 4 or 8 bits  */
+		unsigned numbank;	/* 2 or 3 lines */
+		unsigned numrow;	/* 13 .. 16 lines */
+		unsigned numcol;	/* 11 .. 12 lines */
+	} mtr[I5100_MAX_CTLRS][I5100_MAX_RANKS_PER_CTLR];
+
+	u64 tolm;		/* top of low memory in bytes */
+	unsigned ranksperctlr;	/* number of ranks per controller */
+
+	struct pci_dev *mc;	/* device 16 func 1 */
+	struct pci_dev *ch0mm;	/* device 21 func 0 */
+	struct pci_dev *ch1mm;	/* device 22 func 0 */
+};
+
+/* map a rank/ctlr to a slot number on the mainboard */
+static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
+			      int ctlr, int rank)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+	int i;
+
+	for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+		int j;
+		const int numrank = priv->dimm_numrank[ctlr][i];
+
+		for (j = 0; j < numrank; j++)
+			if (priv->dimm_csmap[i][j] == rank)
+				return i * 2 + ctlr;
+	}
+
+	return -1;
+}
+
+/*
+ * The processor bus memory addresses are broken into three
+ * pieces, whereas the controller addresses are contiguous.
+ *
+ * here we map from the controller address space to the
+ * processor address space:
+ *
+ *    Processor Address Space
+ * +-----------------------------+
+ * |                             |
+ * |  "high" memory addresses    |
+ * |                             |
+ * +-----------------------------+ <- 4GB on the i5100
+ * |                             |
+ * |  other non-memory addresses |
+ * |                             |
+ * +-----------------------------+ <- top of low memory
+ * |                             |
+ * | "low" memory addresses      |
+ * |                             |
+ * +-----------------------------+
+ */
+static unsigned long i5100_ctl_page_to_phys(struct mem_ctl_info *mci,
+					    unsigned long cntlr_addr)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	if (cntlr_addr < priv->tolm)
+		return cntlr_addr;
+
+	return (1ULL << 32) + (cntlr_addr - priv->tolm);
+}
+
+static const char *i5100_err_msg(unsigned err)
+{
+	const char *merrs[] = {
+		"unknown", /* 0 */
+		"uncorrectable data ECC on replay", /* 1 */
+		"unknown", /* 2 */
+		"unknown", /* 3 */
+		"aliased uncorrectable demand data ECC", /* 4 */
+		"aliased uncorrectable spare-copy data ECC", /* 5 */
+		"aliased uncorrectable patrol data ECC", /* 6 */
+		"unknown", /* 7 */
+		"unknown", /* 8 */
+		"unknown", /* 9 */
+		"non-aliased uncorrectable demand data ECC", /* 10 */
+		"non-aliased uncorrectable spare-copy data ECC", /* 11 */
+		"non-aliased uncorrectable patrol data ECC", /* 12 */
+		"unknown", /* 13 */
+		"correctable demand data ECC", /* 14 */
+		"correctable spare-copy data ECC", /* 15 */
+		"correctable patrol data ECC", /* 16 */
+		"unknown", /* 17 */
+		"SPD protocol error", /* 18 */
+		"unknown", /* 19 */
+		"spare copy initiated", /* 20 */
+		"spare copy completed", /* 21 */
+	};
+	unsigned i;
+
+	for (i = 0; i < ARRAY_SIZE(merrs); i++)
+		if (1 << i & err)
+			return merrs[i];
+
+	return "none";
+}
+
+/* convert csrow index into a rank (per controller -- 0..5) */
+static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	return csrow % priv->ranksperctlr;
+}
+
+/* convert csrow index into a controller (0..1) */
+static int i5100_csrow_to_cntlr(const struct mem_ctl_info *mci, int csrow)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	return csrow / priv->ranksperctlr;
+}
+
+static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci,
+				    int ctlr, int rank)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	return ctlr * priv->ranksperctlr + rank;
+}
+
+static void i5100_handle_ce(struct mem_ctl_info *mci,
+			    int ctlr,
+			    unsigned bank,
+			    unsigned rank,
+			    unsigned long syndrome,
+			    unsigned cas,
+			    unsigned ras,
+			    const char *msg)
+{
+	const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+	printk(KERN_ERR
+		"CE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+		ctlr, bank, rank, syndrome, cas, ras,
+		csrow, mci->csrows[csrow].channels[0].label, msg);
+
+	mci->ce_count++;
+	mci->csrows[csrow].ce_count++;
+	mci->csrows[csrow].channels[0].ce_count++;
+}
+
+static void i5100_handle_ue(struct mem_ctl_info *mci,
+			    int ctlr,
+			    unsigned bank,
+			    unsigned rank,
+			    unsigned long syndrome,
+			    unsigned cas,
+			    unsigned ras,
+			    const char *msg)
+{
+	const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+	printk(KERN_ERR
+		"UE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+		ctlr, bank, rank, syndrome, cas, ras,
+		csrow, mci->csrows[csrow].channels[0].label, msg);
+
+	mci->ue_count++;
+	mci->csrows[csrow].ue_count++;
+}
+
+static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
+			   u32 ferr, u32 nerr)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	struct pci_dev *pdev = (ctlr) ? priv->ch1mm : priv->ch0mm;
+	u32 dw;
+	u32 dw2;
+	unsigned syndrome = 0;
+	unsigned ecc_loc = 0;
+	unsigned merr;
+	unsigned bank;
+	unsigned rank;
+	unsigned cas;
+	unsigned ras;
+
+	pci_read_config_dword(pdev, I5100_VALIDLOG, &dw);
+
+	if (I5100_VALIDLOG_REDMEMVALID(dw)) {
+		pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
+		syndrome = I5100_REDMEMA_SYNDROME(dw2);
+		pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
+		ecc_loc = I5100_REDMEMB_ECC_LOCATOR(dw2);
+	}
+
+	if (I5100_VALIDLOG_RECMEMVALID(dw)) {
+		const char *msg;
+
+		pci_read_config_dword(pdev, I5100_RECMEMA, &dw2);
+		merr = I5100_RECMEMA_MERR(dw2);
+		bank = I5100_RECMEMA_BANK(dw2);
+		rank = I5100_RECMEMA_RANK(dw2);
+
+		pci_read_config_dword(pdev, I5100_RECMEMB, &dw2);
+		cas = I5100_RECMEMB_CAS(dw2);
+		ras = I5100_RECMEMB_RAS(dw2);
+
+		/* FIXME:  not really sure if this is what merr is...
+		 */
+		if (!merr)
+			msg = i5100_err_msg(ferr);
+		else
+			msg = i5100_err_msg(nerr);
+
+		i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+	}
+
+	if (I5100_VALIDLOG_NRECMEMVALID(dw)) {
+		const char *msg;
+
+		pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2);
+		merr = I5100_NRECMEMA_MERR(dw2);
+		bank = I5100_NRECMEMA_BANK(dw2);
+		rank = I5100_NRECMEMA_RANK(dw2);
+
+		pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2);
+		cas = I5100_NRECMEMB_CAS(dw2);
+		ras = I5100_NRECMEMB_RAS(dw2);
+
+		/* FIXME:  not really sure if this is what merr is...
+		 */
+		if (!merr)
+			msg = i5100_err_msg(ferr);
+		else
+			msg = i5100_err_msg(nerr);
+
+		i5100_handle_ue(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+	}
+
+	pci_write_config_dword(pdev, I5100_VALIDLOG, dw);
+}
+
+static void i5100_check_error(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	u32 dw;
+
+
+	pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
+	if (I5100_FERR_NF_MEM_ANY(dw)) {
+		u32 dw2;
+
+		pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
+		if (dw2)
+			pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
+					       dw2);
+		pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
+
+		i5100_read_log(mci, I5100_FERR_NF_MEM_CHAN_INDX(dw),
+			       I5100_FERR_NF_MEM_ANY(dw),
+			       I5100_NERR_NF_MEM_ANY(dw2));
+	}
+}
+
+static struct pci_dev *pci_get_device_func(unsigned vendor,
+					   unsigned device,
+					   unsigned func)
+{
+	struct pci_dev *ret = NULL;
+
+	while (1) {
+		ret = pci_get_device(vendor, device, ret);
+
+		if (!ret)
+			break;
+
+		if (PCI_FUNC(ret->devfn) == func)
+			break;
+	}
+
+	return ret;
+}
+
+static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
+					    int csrow)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	const unsigned ctlr_rank = i5100_csrow_to_rank(mci, csrow);
+	const unsigned ctlr = i5100_csrow_to_cntlr(mci, csrow);
+	unsigned addr_lines;
+
+	/* dimm present? */
+	if (!priv->mtr[ctlr][ctlr_rank].present)
+		return 0ULL;
+
+	addr_lines =
+		I5100_DIMM_ADDR_LINES +
+		priv->mtr[ctlr][ctlr_rank].numcol +
+		priv->mtr[ctlr][ctlr_rank].numrow +
+		priv->mtr[ctlr][ctlr_rank].numbank;
+
+	return (unsigned long)
+		((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE);
+}
+
+static void __devinit i5100_init_mtr(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+	int i;
+
+	for (i = 0; i < I5100_MAX_CTLRS; i++) {
+		int j;
+		struct pci_dev *pdev = mms[i];
+
+		for (j = 0; j < I5100_MAX_RANKS_PER_CTLR; j++) {
+			const unsigned addr =
+				(j < 4) ? I5100_MTR_0 + j * 2 :
+					  I5100_MTR_4 + (j - 4) * 2;
+			u16 w;
+
+			pci_read_config_word(pdev, addr, &w);
+
+			priv->mtr[i][j].present = I5100_MTR_PRESENT(w);
+			priv->mtr[i][j].ethrottle = I5100_MTR_ETHROTTLE(w);
+			priv->mtr[i][j].width = 4 + 4 * I5100_MTR_WIDTH(w);
+			priv->mtr[i][j].numbank = 2 + I5100_MTR_NUMBANK(w);
+			priv->mtr[i][j].numrow = 13 + I5100_MTR_NUMROW(w);
+			priv->mtr[i][j].numcol = 10 + I5100_MTR_NUMCOL(w);
+		}
+	}
+}
+
+/*
+ * FIXME: make this into a real i2c adapter (so that dimm-decode
+ * will work)?
+ */
+static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
+			       u8 ch, u8 slot, u8 addr, u8 *byte)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	u16 w;
+	u32 dw;
+	unsigned long et;
+
+	pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+	if (I5100_SPDDATA_BUSY(w))
+		return -1;
+
+	dw =	I5100_SPDCMD_DTI(0xa) |
+		I5100_SPDCMD_CKOVRD(1) |
+		I5100_SPDCMD_SA(ch * 4 + slot) |
+		I5100_SPDCMD_BA(addr) |
+		I5100_SPDCMD_DATA(0) |
+		I5100_SPDCMD_CMD(0);
+	pci_write_config_dword(priv->mc, I5100_SPDCMD, dw);
+
+	/* wait up to 100ms */
+	et = jiffies + HZ / 10;
+	udelay(100);
+	while (1) {
+		pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+		if (!I5100_SPDDATA_BUSY(w))
+			break;
+		udelay(100);
+	}
+
+	if (!I5100_SPDDATA_RDO(w) || I5100_SPDDATA_SBE(w))
+		return -1;
+
+	*byte = I5100_SPDDATA_DATA(w);
+
+	return 0;
+}
+
+/*
+ * fill dimm chip select map
+ *
+ * FIXME:
+ *   o only valid for 4 ranks per controller
+ *   o not the only way to may chip selects to dimm slots
+ *   o investigate if there is some way to obtain this map from the bios
+ */
+static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	int i;
+
+	WARN_ON(priv->ranksperctlr != 4);
+
+	for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+		int j;
+
+		for (j = 0; j < I5100_MAX_RANKS_PER_DIMM; j++)
+			priv->dimm_csmap[i][j] = -1; /* default NC */
+	}
+
+	/* only 2 chip selects per slot... */
+	priv->dimm_csmap[0][0] = 0;
+	priv->dimm_csmap[0][1] = 3;
+	priv->dimm_csmap[1][0] = 1;
+	priv->dimm_csmap[1][1] = 2;
+	priv->dimm_csmap[2][0] = 2;
+	priv->dimm_csmap[3][0] = 3;
+}
+
+static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev,
+					     struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	int i;
+
+	for (i = 0; i < I5100_MAX_CTLRS; i++) {
+		int j;
+
+		for (j = 0; j < I5100_MAX_DIMM_SLOTS_PER_CTLR; j++) {
+			u8 rank;
+
+			if (i5100_read_spd_byte(mci, i, j, 5, &rank) < 0)
+				priv->dimm_numrank[i][j] = 0;
+			else
+				priv->dimm_numrank[i][j] = (rank & 3) + 1;
+		}
+	}
+
+	i5100_init_dimm_csmap(mci);
+}
+
+static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
+					      struct mem_ctl_info *mci)
+{
+	u16 w;
+	u32 dw;
+	struct i5100_priv *priv = mci->pvt_info;
+	struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+	int i;
+
+	pci_read_config_word(pdev, I5100_TOLM, &w);
+	priv->tolm = (u64) I5100_TOLM_TOLM(w) * 256 * 1024 * 1024;
+
+	pci_read_config_word(pdev, I5100_MIR0, &w);
+	priv->mir[0].limit = (u64) I5100_MIR_LIMIT(w) << 28;
+	priv->mir[0].way[1] = I5100_MIR_WAY1(w);
+	priv->mir[0].way[0] = I5100_MIR_WAY0(w);
+
+	pci_read_config_word(pdev, I5100_MIR1, &w);
+	priv->mir[1].limit = (u64) I5100_MIR_LIMIT(w) << 28;
+	priv->mir[1].way[1] = I5100_MIR_WAY1(w);
+	priv->mir[1].way[0] = I5100_MIR_WAY0(w);
+
+	pci_read_config_word(pdev, I5100_AMIR_0, &w);
+	priv->amir[0] = w;
+	pci_read_config_word(pdev, I5100_AMIR_1, &w);
+	priv->amir[1] = w;
+
+	for (i = 0; i < I5100_MAX_CTLRS; i++) {
+		int j;
+
+		for (j = 0; j < 5; j++) {
+			int k;
+
+			pci_read_config_dword(mms[i], I5100_DMIR + j * 4, &dw);
+
+			priv->dmir[i][j].limit =
+				(u64) I5100_DMIR_LIMIT(dw) << 28;
+			for (k = 0; k < I5100_MAX_RANKS_PER_DIMM; k++)
+				priv->dmir[i][j].rank[k] =
+					I5100_DMIR_RANK(dw, k);
+		}
+	}
+
+	i5100_init_mtr(mci);
+}
+
+static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
+{
+	int i;
+	unsigned long total_pages = 0UL;
+	struct i5100_priv *priv = mci->pvt_info;
+
+	for (i = 0; i < mci->nr_csrows; i++) {
+		const unsigned long npages = i5100_npages(mci, i);
+		const unsigned cntlr = i5100_csrow_to_cntlr(mci, i);
+		const unsigned rank = i5100_csrow_to_rank(mci, i);
+
+		if (!npages)
+			continue;
+
+		/*
+		 * FIXME: these two are totally bogus -- I don't see how to
+		 * map them correctly to this structure...
+		 */
+		mci->csrows[i].first_page = total_pages;
+		mci->csrows[i].last_page = total_pages + npages - 1;
+		mci->csrows[i].page_mask = 0UL;
+
+		mci->csrows[i].nr_pages = npages;
+		mci->csrows[i].grain = 32;
+		mci->csrows[i].csrow_idx = i;
+		mci->csrows[i].dtype =
+			(priv->mtr[cntlr][rank].width == 4) ? DEV_X4 : DEV_X8;
+		mci->csrows[i].ue_count = 0;
+		mci->csrows[i].ce_count = 0;
+		mci->csrows[i].mtype = MEM_RDDR2;
+		mci->csrows[i].edac_mode = EDAC_SECDED;
+		mci->csrows[i].mci = mci;
+		mci->csrows[i].nr_channels = 1;
+		mci->csrows[i].channels[0].chan_idx = 0;
+		mci->csrows[i].channels[0].ce_count = 0;
+		mci->csrows[i].channels[0].csrow = mci->csrows + i;
+		snprintf(mci->csrows[i].channels[0].label,
+			 sizeof(mci->csrows[i].channels[0].label),
+			 "DIMM%u", i5100_rank_to_slot(mci, cntlr, rank));
+
+		total_pages += npages;
+	}
+}
+
+static int __devinit i5100_init_one(struct pci_dev *pdev,
+				    const struct pci_device_id *id)
+{
+	int rc;
+	struct mem_ctl_info *mci;
+	struct i5100_priv *priv;
+	struct pci_dev *ch0mm, *ch1mm;
+	int ret = 0;
+	u32 dw;
+	int ranksperch;
+
+	if (PCI_FUNC(pdev->devfn) != 1)
+		return -ENODEV;
+
+	rc = pci_enable_device(pdev);
+	if (rc < 0) {
+		ret = rc;
+		goto bail;
+	}
+
+	/* figure out how many ranks, from strapped state of 48GB_Mode input */
+	pci_read_config_dword(pdev, I5100_MS, &dw);
+	ranksperch = !!(dw & (1 << 8)) * 2 + 4;
+
+	if (ranksperch != 4) {
+		/* FIXME: get 6 ranks / controller to work - need hw... */
+		printk(KERN_INFO "i5100_edac: unsupported configuration.\n");
+		ret = -ENODEV;
+		goto bail;
+	}
+
+	/* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
+	ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_21, 0);
+	if (!ch0mm)
+		return -ENODEV;
+
+	rc = pci_enable_device(ch0mm);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_ch0;
+	}
+
+	/* device 22, func 0, Channel 1 Memory Map, Error Flag/Mask, etc... */
+	ch1mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_22, 0);
+	if (!ch1mm) {
+		ret = -ENODEV;
+		goto bail_ch0;
+	}
+
+	rc = pci_enable_device(ch1mm);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_ch1;
+	}
+
+	mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
+	if (!mci) {
+		ret = -ENOMEM;
+		goto bail_ch1;
+	}
+
+	mci->dev = &pdev->dev;
+
+	priv = mci->pvt_info;
+	priv->ranksperctlr = ranksperch;
+	priv->mc = pdev;
+	priv->ch0mm = ch0mm;
+	priv->ch1mm = ch1mm;
+
+	i5100_init_dimm_layout(pdev, mci);
+	i5100_init_interleaving(pdev, mci);
+
+	mci->mtype_cap = MEM_FLAG_FB_DDR2;
+	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = "i5100_edac.c";
+	mci->mod_ver = "not versioned";
+	mci->ctl_name = "i5100";
+	mci->dev_name = pci_name(pdev);
+	mci->ctl_page_to_phys = i5100_ctl_page_to_phys;
+
+	mci->edac_check = i5100_check_error;
+
+	i5100_init_csrows(mci);
+
+	/* this strange construction seems to be in every driver, dunno why */
+	switch (edac_op_state) {
+	case EDAC_OPSTATE_POLL:
+	case EDAC_OPSTATE_NMI:
+		break;
+	default:
+		edac_op_state = EDAC_OPSTATE_POLL;
+		break;
+	}
+
+	if (edac_mc_add_mc(mci)) {
+		ret = -ENODEV;
+		goto bail_mc;
+	}
+
+	goto bail;
+
+bail_mc:
+	edac_mc_free(mci);
+
+bail_ch1:
+	pci_dev_put(ch1mm);
+
+bail_ch0:
+	pci_dev_put(ch0mm);
+
+bail:
+	return ret;
+}
+
+static void __devexit i5100_remove_one(struct pci_dev *pdev)
+{
+	struct mem_ctl_info *mci;
+	struct i5100_priv *priv;
+
+	mci = edac_mc_del_mc(&pdev->dev);
+
+	if (!mci)
+		return;
+
+	priv = mci->pvt_info;
+	pci_dev_put(priv->ch0mm);
+	pci_dev_put(priv->ch1mm);
+
+	edac_mc_free(mci);
+}
+
+static const struct pci_device_id i5100_pci_tbl[] __devinitdata = {
+	/* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, i5100_pci_tbl);
+
+static struct pci_driver i5100_driver = {
+	.name = KBUILD_BASENAME,
+	.probe = i5100_init_one,
+	.remove = __devexit_p(i5100_remove_one),
+	.id_table = i5100_pci_tbl,
+};
+
+static int __init i5100_init(void)
+{
+	int pci_rc;
+
+	pci_rc = pci_register_driver(&i5100_driver);
+
+	return (pci_rc < 0) ? pci_rc : 0;
+}
+
+static void __exit i5100_exit(void)
+{
+	pci_unregister_driver(&i5100_driver);
+}
+
+module_init(i5100_init);
+module_exit(i5100_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR
+    ("Arthur Jones <ajones@riverbed.com>");
+MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers");
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 119ae7b8f028..c3b1761aba26 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2400,6 +2400,9 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_4	0x3a30
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
+#define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
+#define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
 #define PCI_DEVICE_ID_INTEL_5400_ERR	0x4030
 #define PCI_DEVICE_ID_INTEL_5400_FBD0	0x4035
 #define PCI_DEVICE_ID_INTEL_5400_FBD1	0x4036
-- 
cgit v1.2.3


From 7dcf2a9fced59e58e4694cdcf15850c01fdba89b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 1 Jul 2008 19:27:16 +0300
Subject: remove dummy asm/kvm.h files

This patch removes the dummy asm/kvm.h files on architectures not (yet)
supporting KVM and uses the same conditional headers installation as
already used for a.out.h .

Also removed are superfluous install rules in the s390 and x86 Kbuild
files (they are already in Kbuild.asm).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/asm-alpha/kvm.h        | 6 ------
 include/asm-arm/kvm.h          | 6 ------
 include/asm-avr32/kvm.h        | 6 ------
 include/asm-blackfin/kvm.h     | 6 ------
 include/asm-cris/kvm.h         | 6 ------
 include/asm-frv/kvm.h          | 6 ------
 include/asm-generic/Kbuild.asm | 2 ++
 include/asm-h8300/kvm.h        | 6 ------
 include/asm-m32r/kvm.h         | 6 ------
 include/asm-m68k/kvm.h         | 6 ------
 include/asm-m68knommu/kvm.h    | 6 ------
 include/asm-mips/kvm.h         | 6 ------
 include/asm-mn10300/kvm.h      | 6 ------
 include/asm-parisc/kvm.h       | 6 ------
 include/asm-s390/Kbuild        | 1 -
 include/asm-sh/kvm.h           | 6 ------
 include/asm-sparc/kvm.h        | 6 ------
 include/asm-sparc64/kvm.h      | 1 -
 include/asm-um/kvm.h           | 6 ------
 include/asm-x86/Kbuild         | 1 -
 include/asm-xtensa/kvm.h       | 6 ------
 include/linux/Kbuild           | 2 ++
 22 files changed, 4 insertions(+), 105 deletions(-)
 delete mode 100644 include/asm-alpha/kvm.h
 delete mode 100644 include/asm-arm/kvm.h
 delete mode 100644 include/asm-avr32/kvm.h
 delete mode 100644 include/asm-blackfin/kvm.h
 delete mode 100644 include/asm-cris/kvm.h
 delete mode 100644 include/asm-frv/kvm.h
 delete mode 100644 include/asm-h8300/kvm.h
 delete mode 100644 include/asm-m32r/kvm.h
 delete mode 100644 include/asm-m68k/kvm.h
 delete mode 100644 include/asm-m68knommu/kvm.h
 delete mode 100644 include/asm-mips/kvm.h
 delete mode 100644 include/asm-mn10300/kvm.h
 delete mode 100644 include/asm-parisc/kvm.h
 delete mode 100644 include/asm-sh/kvm.h
 delete mode 100644 include/asm-sparc/kvm.h
 delete mode 100644 include/asm-sparc64/kvm.h
 delete mode 100644 include/asm-um/kvm.h
 delete mode 100644 include/asm-xtensa/kvm.h

(limited to 'include/linux')

diff --git a/include/asm-alpha/kvm.h b/include/asm-alpha/kvm.h
deleted file mode 100644
index b9daec429689..000000000000
--- a/include/asm-alpha/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_ALPHA_H
-#define __LINUX_KVM_ALPHA_H
-
-/* alpha does not support KVM */
-
-#endif
diff --git a/include/asm-arm/kvm.h b/include/asm-arm/kvm.h
deleted file mode 100644
index cb3c08cbcb9e..000000000000
--- a/include/asm-arm/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_ARM_H
-#define __LINUX_KVM_ARM_H
-
-/* arm does not support KVM */
-
-#endif
diff --git a/include/asm-avr32/kvm.h b/include/asm-avr32/kvm.h
deleted file mode 100644
index 8c5777020e2c..000000000000
--- a/include/asm-avr32/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_AVR32_H
-#define __LINUX_KVM_AVR32_H
-
-/* avr32 does not support KVM */
-
-#endif
diff --git a/include/asm-blackfin/kvm.h b/include/asm-blackfin/kvm.h
deleted file mode 100644
index e3477d77c014..000000000000
--- a/include/asm-blackfin/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_BLACKFIN_H
-#define __LINUX_KVM_BLACKFIN_H
-
-/* blackfin does not support KVM */
-
-#endif
diff --git a/include/asm-cris/kvm.h b/include/asm-cris/kvm.h
deleted file mode 100644
index c860f51149f0..000000000000
--- a/include/asm-cris/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_CRIS_H
-#define __LINUX_KVM_CRIS_H
-
-/* cris does not support KVM */
-
-#endif
diff --git a/include/asm-frv/kvm.h b/include/asm-frv/kvm.h
deleted file mode 100644
index 9c8a4f08d0a9..000000000000
--- a/include/asm-frv/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_FRV_H
-#define __LINUX_KVM_FRV_H
-
-/* frv does not support KVM */
-
-#endif
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 7cd25b8e7c9a..1170dc60e638 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -1,4 +1,6 @@
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
 header-y  += kvm.h
+endif
 
 ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
 unifdef-y += a.out.h
diff --git a/include/asm-h8300/kvm.h b/include/asm-h8300/kvm.h
deleted file mode 100644
index bdbed7b987e1..000000000000
--- a/include/asm-h8300/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_H8300_H
-#define __LINUX_KVM_H8300_H
-
-/* h8300 does not support KVM */
-
-#endif
diff --git a/include/asm-m32r/kvm.h b/include/asm-m32r/kvm.h
deleted file mode 100644
index 99a40515b77e..000000000000
--- a/include/asm-m32r/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M32R_H
-#define __LINUX_KVM_M32R_H
-
-/* m32r does not support KVM */
-
-#endif
diff --git a/include/asm-m68k/kvm.h b/include/asm-m68k/kvm.h
deleted file mode 100644
index 7ed27fce5240..000000000000
--- a/include/asm-m68k/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M68K_H
-#define __LINUX_KVM_M68K_H
-
-/* m68k does not support KVM */
-
-#endif
diff --git a/include/asm-m68knommu/kvm.h b/include/asm-m68knommu/kvm.h
deleted file mode 100644
index b49d4258dabb..000000000000
--- a/include/asm-m68knommu/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M68KNOMMU_H
-#define __LINUX_KVM_M68KNOMMU_H
-
-/* m68knommu does not support KVM */
-
-#endif
diff --git a/include/asm-mips/kvm.h b/include/asm-mips/kvm.h
deleted file mode 100644
index 093a5b7f796b..000000000000
--- a/include/asm-mips/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_MIPS_H
-#define __LINUX_KVM_MIPS_H
-
-/* mips does not support KVM */
-
-#endif
diff --git a/include/asm-mn10300/kvm.h b/include/asm-mn10300/kvm.h
deleted file mode 100644
index f6b609ff4a57..000000000000
--- a/include/asm-mn10300/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_MN10300_H
-#define __LINUX_KVM_MN10300_H
-
-/* mn10300 does not support KVM */
-
-#endif
diff --git a/include/asm-parisc/kvm.h b/include/asm-parisc/kvm.h
deleted file mode 100644
index 00cc45812547..000000000000
--- a/include/asm-parisc/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_PARISC_H
-#define __LINUX_KVM_PARISC_H
-
-/* parisc does not support KVM */
-
-#endif
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild
index bb5e9edb9825..63a23415fba6 100644
--- a/include/asm-s390/Kbuild
+++ b/include/asm-s390/Kbuild
@@ -7,7 +7,6 @@ header-y += tape390.h
 header-y += ucontext.h
 header-y += vtoc.h
 header-y += zcrypt.h
-header-y += kvm.h
 header-y += chsc.h
 
 unifdef-y += cmb.h
diff --git a/include/asm-sh/kvm.h b/include/asm-sh/kvm.h
deleted file mode 100644
index 6af51dbab2d0..000000000000
--- a/include/asm-sh/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_SH_H
-#define __LINUX_KVM_SH_H
-
-/* sh does not support KVM */
-
-#endif
diff --git a/include/asm-sparc/kvm.h b/include/asm-sparc/kvm.h
deleted file mode 100644
index 2e5478da3819..000000000000
--- a/include/asm-sparc/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_SPARC_H
-#define __LINUX_KVM_SPARC_H
-
-/* sparc does not support KVM */
-
-#endif
diff --git a/include/asm-sparc64/kvm.h b/include/asm-sparc64/kvm.h
deleted file mode 100644
index 53564ad86b15..000000000000
--- a/include/asm-sparc64/kvm.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-sparc/kvm.h>
diff --git a/include/asm-um/kvm.h b/include/asm-um/kvm.h
deleted file mode 100644
index 66aa77094551..000000000000
--- a/include/asm-um/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_UM_H
-#define __LINUX_KVM_UM_H
-
-/* um does not support KVM */
-
-#endif
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 1e3554596f72..811e9828ccb3 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -3,7 +3,6 @@ include include/asm-generic/Kbuild.asm
 header-y += boot.h
 header-y += bootparam.h
 header-y += debugreg.h
-header-y += kvm.h
 header-y += ldt.h
 header-y += msr-index.h
 header-y += prctl.h
diff --git a/include/asm-xtensa/kvm.h b/include/asm-xtensa/kvm.h
deleted file mode 100644
index bda4e331e98c..000000000000
--- a/include/asm-xtensa/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_XTENSA_H
-#define __LINUX_KVM_XTENSA_H
-
-/* xtensa does not support KVM */
-
-#endif
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 71d70d1fbce2..402c8f55d713 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -256,7 +256,9 @@ unifdef-y += kd.h
 unifdef-y += kernelcapi.h
 unifdef-y += kernel.h
 unifdef-y += keyboard.h
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
 unifdef-y += kvm.h
+endif
 unifdef-y += llc.h
 unifdef-y += loop.h
 unifdef-y += lp.h
-- 
cgit v1.2.3


From c6af5e9f8a57467df2e55e428316a43480174521 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Fri, 25 Jul 2008 15:48:04 +0200
Subject: bootmem: Move node allocation macros back to !HAVE_ARCH_BOOTMEM_NODE

These got unintentionally moved, put them back as x86 provides its own
versions.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 4ddf2922fc8d..652470b687c9 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -103,17 +103,16 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-
-extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
-				   int flags);
-
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
+#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+
+extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
+				   int flags);
 
 extern void *alloc_bootmem_section(unsigned long size,
 				   unsigned long section_nr);
-- 
cgit v1.2.3


From b4615e69b6c6353878b734a8202b65efbc554df4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 13:19:22 -0700
Subject: sys_paccept definition missing __user annotation

Introduced by commit aaca0bdca573f3f51ea03139f9c7289541e7bca3 ("flag
parameters: paccept"):

  net/socket.c:1515:17: error: symbol 'sys_paccept' redeclared with different type (originally declared at include/linux/syscalls.h:413) - incompatible argument 4 (different address spaces)

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 06f2bf76c030..d6ff145919ca 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -411,7 +411,7 @@ asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
-			    const sigset_t *, size_t, int);
+			    const __user sigset_t *, size_t, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
-- 
cgit v1.2.3