ia64: Change per-CPU implementation so that __get_cpu_var() returns the

canonical address (l-value). To get the virtually mapped alias (which is more efficient), use __ia64_per_cpu_var(). The latter is safe only if the address of the l-value is never passed to another CPU (i.e., not stored in any global place). For extremely efficient, portable per-CPU variables, there is now a new API local.h which was introduced by Rusty Russell. To use this, declare a variable of type local_t as a per-CPU variable and then use {__,}cpu_local_FOO() to manipulate such variables. This patch also updated the atomic interface with a 64-bit counter.
author: David Mosberger <davidm@tiger.hpl.hp.com> 2003-07-15 00:39:40 -0700
committer: David Mosberger <davidm@tiger.hpl.hp.com> 2003-07-15 00:39:40 -0700
commit: 1eaad053df9105aaeaaa09a3a536fdd4669f0d25 (patch)
tree: 27d24a63fa4ee14c11d3054519f0d9064b99c731
parent: ec26ea398bbad198c27cfa4fbff5bd728a6c2b2b (diff)
10 files changed, 184 insertions, 33 deletions
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index d06ed2f76894..e98440c847e9 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -64,9 +64,10 @@ EXPORT_SYMBOL(ia64_pfn_valid);
 #endif
 
 #include <asm/processor.h>
-EXPORT_SYMBOL(cpu_info__per_cpu);
+EXPORT_SYMBOL(per_cpu__cpu_info);
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(__per_cpu_offset);
+EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
 #endif
 EXPORT_SYMBOL(kernel_thread);
 
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 979e659fcef1..88b591be8faa 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -566,7 +566,7 @@ static struct vm_operations_struct pfm_vm_ops={
 
 
 #define pfm_wait_task_inactive(t)	wait_task_inactive(t)
-#define pfm_get_cpu_var(v)		__get_cpu_var(v)
+#define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
 #define pfm_get_cpu_data(a,b)		per_cpu(a, b)
 typedef	irqreturn_t	pfm_irq_handler_t;
 #define PFM_IRQ_HANDLER_RET(v)	do {  \
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 02fa6ce85268..484e8f451be3 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -56,6 +56,7 @@ unsigned long __per_cpu_offset[NR_CPUS];
 #endif
 
 DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
 DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
 unsigned long ia64_cycles_per_usec;
 struct ia64_boot_param *ia64_boot_param;
@@ -709,6 +710,8 @@ cpu_init (void)
 			memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
 			__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
 			cpu_data += PERCPU_PAGE_SIZE;
+
+			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 		}
 	}
 	cpu_data = __per_cpu_start + __per_cpu_offset[smp_processor_id()];
@@ -716,19 +719,18 @@ cpu_init (void)
 	cpu_data = __phys_per_cpu_start;
 #endif /* !CONFIG_SMP */
 
-	cpu_info = cpu_data + ((char *) &__get_cpu_var(cpu_info) - __per_cpu_start);
-#ifdef CONFIG_NUMA
-	cpu_info->node_data = get_node_data_ptr();
-#endif
-
 	get_max_cacheline_size();
 
 	/*
 	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
 	 * ia64_mmu_init() yet.  And we can't call ia64_mmu_init() first because it
 	 * depends on the data returned by identify_cpu().  We break the dependency by
-	 * accessing cpu_data() the old way, through identity mapped space.
+	 * accessing cpu_data() through the canonical per-CPU address.
 	 */
+	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
+#ifdef CONFIG_NUMA
+	cpu_info->node_data = get_node_data_ptr();
+#endif
 	identify_cpu(cpu_info);
 
 #ifdef CONFIG_MCKINLEY
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 9727272ffb85..61cf1097dd82 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -72,7 +72,7 @@ static volatile struct call_data_struct *call_data;
 #define IPI_CPU_STOP		1
 
 /* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
-static DEFINE_PER_CPU(__u64, ipi_operation) ____cacheline_aligned;
+static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
 
 static void
 stop_this_cpu (void)
@@ -91,7 +91,7 @@ irqreturn_t
 handle_IPI (int irq, void *dev_id, struct pt_regs *regs)
 {
 	int this_cpu = get_cpu();
-	unsigned long *pending_ipis = &__get_cpu_var(ipi_operation);
+	unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
 	unsigned long ops;
 
 	/* Count this now; we may make a call that never returns. */
diff --git a/asm-ia64/local.h b/asm-ia64/local.h
new file mode 100644
index 000000000000..1dbd584ad851
--- /dev/null
+++ b/asm-ia64/local.h
@@ -0,0 +1,50 @@
+#ifndef _ASM_IA64_LOCAL_H
+#define _ASM_IA64_LOCAL_H
+
+/*
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/percpu.h>
+
+typedef struct {
+	atomic64_t val;
+} local_t;
+
+#define LOCAL_INIT(i)	((local_t) { { (i) } })
+#define local_read(l)	atomic64_read(&(l)->val)
+#define local_set(l, i)	atomic64_set(&(l)->val, i)
+#define local_inc(l)	atomic64_inc(&(l)->val)
+#define local_dec(l)	atomic64_dec(&(l)->val)
+#define local_add(l)	atomic64_add(&(l)->val)
+#define local_sub(l)	atomic64_sub(&(l)->val)
+
+/* Non-atomic variants, i.e., preemption disabled and won't be touched in interrupt, etc.  */
+
+#define __local_inc(l)		(++(l)->val.counter)
+#define __local_dec(l)		(--(l)->val.counter)
+#define __local_add(i,l)	((l)->val.counter += (i))
+#define __local_sub(i,l)	((l)->val.counter -= (i))
+
+/*
+ * Use these for per-cpu local_t variables.  Note they take a variable (eg. mystruct.foo),
+ * not an address.
+ */
+#define cpu_local_read(v)	local_read(&__ia64_per_cpu_var(v))
+#define cpu_local_set(v, i)	local_set(&__ia64_per_cpu_var(v), (i))
+#define cpu_local_inc(v)	local_inc(&__ia64_per_cpu_var(v))
+#define cpu_local_dec(v)	local_dec(&__ia64_per_cpu_var(v))
+#define cpu_local_add(i, v)	local_add((i), &__ia64_per_cpu_var(v))
+#define cpu_local_sub(i, v)	local_sub((i), &__ia64_per_cpu_var(v))
+
+/*
+ * Non-atomic increments, i.e., preemption disabled and won't be touched in interrupt,
+ * etc.
+ */
+#define __cpu_local_inc(v)	__local_inc(&__ia64_per_cpu_var(v))
+#define __cpu_local_dec(v)	__local_dec(&__ia64_per_cpu_var(v))
+#define __cpu_local_add(i, v)	__local_add((i), &__ia64_per_cpu_var(v))
+#define __cpu_local_sub(i, v)	__local_sub((i), &__ia64_per_cpu_var(v))
+
+#endif /* _ASM_IA64_LOCAL_H */
diff --git a/include/asm-ia64/atomic.h b/include/asm-ia64/atomic.h
index 37029e9f75c4..93d47187a650 100644
--- a/include/asm-ia64/atomic.h
+++ b/include/asm-ia64/atomic.h
@@ -9,7 +9,7 @@
  * "int" types were carefully placed so as to ensure proper operation
  * of the macros.
  *
- * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999, 2002-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 #include <linux/types.h>
@@ -21,11 +21,16 @@
  * memory accesses are ordered.
  */
 typedef struct { volatile __s32 counter; } atomic_t;
+typedef struct { volatile __s64 counter; } atomic64_t;
 
 #define ATOMIC_INIT(i)		((atomic_t) { (i) })
+#define ATOMIC64_INIT(i)	((atomic64_t) { (i) })
 
 #define atomic_read(v)		((v)->counter)
+#define atomic64_read(v)	((v)->counter)
+
 #define atomic_set(v,i)		(((v)->counter) = (i))
+#define atomic64_set(v,i)	(((v)->counter) = (i))
 
 static __inline__ int
 ia64_atomic_add (int i, atomic_t *v)
@@ -37,7 +42,21 @@ ia64_atomic_add (int i, atomic_t *v)
 		CMPXCHG_BUGCHECK(v);
 		old = atomic_read(v);
 		new = old + i;
-	} while (ia64_cmpxchg("acq", v, old, old + i, sizeof(atomic_t)) != old);
+	} while (ia64_cmpxchg("acq", v, old, new, sizeof(atomic_t)) != old);
+	return new;
+}
+
+static __inline__ int
+ia64_atomic64_add (int i, atomic64_t *v)
+{
+	__s64 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic_read(v);
+		new = old + i;
+	} while (ia64_cmpxchg("acq", v, old, new, sizeof(atomic_t)) != old);
 	return new;
 }
 
@@ -55,6 +74,20 @@ ia64_atomic_sub (int i, atomic_t *v)
 	return new;
 }
 
+static __inline__ int
+ia64_atomic64_sub (int i, atomic64_t *v)
+{
+	__s64 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic_read(v);
+		new = old - i;
+	} while (ia64_cmpxchg("acq", v, old, new, sizeof(atomic_t)) != old);
+	return new;
+}
+
 #define atomic_add_return(i,v)						\
 ({									\
 	int __ia64_aar_i = (i);						\
@@ -67,6 +100,18 @@ ia64_atomic_sub (int i, atomic_t *v)
 		: ia64_atomic_add(__ia64_aar_i, v);			\
 })
 
+#define atomic64_add_return(i,v)					\
+({									\
+	long __ia64_aar_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
+	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
+	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
+	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
+		: ia64_atomic64_add(__ia64_aar_i, v);			\
+})
+
 /*
  * Atomically add I to V and return TRUE if the resulting value is
  * negative.
@@ -77,6 +122,12 @@ atomic_add_negative (int i, atomic_t *v)
 	return atomic_add_return(i, v) < 0;
 }
 
+static __inline__ int
+atomic64_add_negative (int i, atomic64_t *v)
+{
+	return atomic64_add_return(i, v) < 0;
+}
+
 #define atomic_sub_return(i,v)						\
 ({									\
 	int __ia64_asr_i = (i);						\
@@ -89,18 +140,40 @@ atomic_add_negative (int i, atomic_t *v)
 		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
 
+#define atomic64_sub_return(i,v)					\
+({									\
+	long __ia64_asr_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
+	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
+	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
+	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
+		: ia64_atomic64_sub(__ia64_asr_i, v);			\
+})
+
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
 
 #define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
 #define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
 #define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) != 0)
+#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i), (v)) == 0)
+#define atomic64_dec_and_test(v)	(atomic64_sub_return(1, (v)) == 0)
+#define atomic64_inc_and_test(v)	(atomic64_add_return(1, (v)) != 0)
 
 #define atomic_add(i,v)			atomic_add_return((i), (v))
 #define atomic_sub(i,v)			atomic_sub_return((i), (v))
 #define atomic_inc(v)			atomic_add(1, (v))
 #define atomic_dec(v)			atomic_sub(1, (v))
 
+#define atomic64_add(i,v)		atomic64_add_return((i), (v))
+#define atomic64_sub(i,v)		atomic64_sub_return((i), (v))
+#define atomic64_inc(v)			atomic64_add(1, (v))
+#define atomic64_dec(v)			atomic64_sub(1, (v))
+
 /* Atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index dee1cd007f5a..95e786212982 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -86,9 +86,9 @@ delayed_tlb_flush (void)
 {
 	extern void local_flush_tlb_all (void);
 
-	if (unlikely(__get_cpu_var(ia64_need_tlb_flush))) {
+	if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
 		local_flush_tlb_all();
-		__get_cpu_var(ia64_need_tlb_flush) = 0;
+		__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
 	}
 }
 
diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h
index cd4a79d645a0..947feb5ce17e 100644
--- a/include/asm-ia64/percpu.h
+++ b/include/asm-ia64/percpu.h
@@ -1,43 +1,66 @@
 #ifndef _ASM_IA64_PERCPU_H
 #define _ASM_IA64_PERCPU_H
 
-#include <linux/config.h>
-#include <linux/compiler.h>
-
 /*
  * Copyright (C) 2002-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
+
 #define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
 
 #ifdef __ASSEMBLY__
-
-#define THIS_CPU(var)	(var##__per_cpu)  /* use this to mark accesses to per-CPU variables... */
-
+# define THIS_CPU(var)	(per_cpu__##var)  /* use this to mark accesses to per-CPU variables... */
 #else /* !__ASSEMBLY__ */
 
+#include <linux/config.h>
+
 #include <linux/threads.h>
 
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
+
+/*
+ * Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an
+ * external routine, to avoid include-hell.
+ */
+#ifdef CONFIG_SMP
+
 extern unsigned long __per_cpu_offset[NR_CPUS];
 
+/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
+DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
+
+/* Separate out the type, so (int[3], foo) works. */
 #define DEFINE_PER_CPU(type, name) \
-    __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu
-#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu
+    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
 
-#define __get_cpu_var(var)	(var##__per_cpu)
-#ifdef CONFIG_SMP
-# define per_cpu(var, cpu)	(*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu]))
+#define per_cpu(var, cpu)  (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
+#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset)))
 
 extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size);
-#else
-# define per_cpu(var, cpu)	((void)cpu, __get_cpu_var(var))
-#endif
 
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu)
+#else /* ! SMP */
+
+#define DEFINE_PER_CPU(type, name)		__typeof__(type) per_cpu__##name
+#define per_cpu(var, cpu)			((void)cpu, per_cpu__##var)
+#define __get_cpu_var(var)			per_cpu__##var
+
+#endif	/* SMP */
+
+#define EXPORT_PER_CPU_SYMBOL(var)		EXPORT_SYMBOL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var)		EXPORT_SYMBOL_GPL(per_cpu__##var)
+
+/* ia64-specific part: */
 
 extern void setup_per_cpu_areas (void);
 
+/*
+ * Be extremely careful when taking the address of this variable!  Due to virtual
+ * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
+ * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
+ * more efficient.
+ */
+#define __ia64_per_cpu_var(var)	(per_cpu__##var)
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_IA64_PERCPU_H */
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 56e55b0a5061..669e44bf8012 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -191,10 +191,12 @@ struct cpuinfo_ia64 {
 DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info);
 
 /*
- * The "local" data pointer.  It points to the per-CPU data of the currently executing
+ * The "local" data variable.  It refers to the per-CPU data of the currently executing
  * CPU, much like "current" points to the per-task data of the currently executing task.
+ * Do not use the address of local_cpu_data, since it will be different from
+ * cpu_data(smp_processor_id())!
  */
-#define local_cpu_data		(&__get_cpu_var(cpu_info))
+#define local_cpu_data		(&__ia64_per_cpu_var(cpu_info))
 #define cpu_data(cpu)		(&per_cpu(cpu_info, cpu))
 
 extern void identify_cpu (struct cpuinfo_ia64 *);
diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h
index 3d0141eb9eaa..ec51cacaf232 100644
--- a/include/asm-ia64/tlb.h
+++ b/include/asm-ia64/tlb.h
@@ -126,7 +126,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e
 static inline struct mmu_gather *
 tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	struct mmu_gather *tlb = &per_cpu(mmu_gathers, smp_processor_id());
+	struct mmu_gather *tlb = &__get_cpu_var(mmu_gathers);
 
 	tlb->mm = mm;
 	/*
author	David Mosberger <davidm@tiger.hpl.hp.com>	2003-07-15 00:39:40 -0700
committer	David Mosberger <davidm@tiger.hpl.hp.com>	2003-07-15 00:39:40 -0700
commit	1eaad053df9105aaeaaa09a3a536fdd4669f0d25 (patch)
tree	27d24a63fa4ee14c11d3054519f0d9064b99c731
parent	ec26ea398bbad198c27cfa4fbff5bd728a6c2b2b (diff)