diff options
| author | David Mosberger <davidm@tiger.hpl.hp.com> | 2003-07-15 00:39:40 -0700 |
|---|---|---|
| committer | David Mosberger <davidm@tiger.hpl.hp.com> | 2003-07-15 00:39:40 -0700 |
| commit | 1eaad053df9105aaeaaa09a3a536fdd4669f0d25 (patch) | |
| tree | 27d24a63fa4ee14c11d3054519f0d9064b99c731 | |
| parent | ec26ea398bbad198c27cfa4fbff5bd728a6c2b2b (diff) | |
ia64: Change per-CPU implementation so that __get_cpu_var() returns the
canonical address (l-value). To get the virtually mapped
alias (which is more efficient), use __ia64_per_cpu_var(). The
latter is safe only if the address of the l-value is never passed
to another CPU (i.e., not stored in any global place).
For extremely efficient, portable per-CPU variables, there is
now a new API local.h which was introduced by Rusty Russell.
To use this, declare a variable of type local_t as a per-CPU
variable and then use {__,}cpu_local_FOO() to manipulate such
variables. This patch also updated the atomic interface with
a 64-bit counter.
| -rw-r--r-- | arch/ia64/kernel/ia64_ksyms.c | 3 | ||||
| -rw-r--r-- | arch/ia64/kernel/perfmon.c | 2 | ||||
| -rw-r--r-- | arch/ia64/kernel/setup.c | 14 | ||||
| -rw-r--r-- | arch/ia64/kernel/smp.c | 4 | ||||
| -rw-r--r-- | asm-ia64/local.h | 50 | ||||
| -rw-r--r-- | include/asm-ia64/atomic.h | 77 | ||||
| -rw-r--r-- | include/asm-ia64/mmu_context.h | 4 | ||||
| -rw-r--r-- | include/asm-ia64/percpu.h | 55 | ||||
| -rw-r--r-- | include/asm-ia64/processor.h | 6 | ||||
| -rw-r--r-- | include/asm-ia64/tlb.h | 2 |
10 files changed, 184 insertions, 33 deletions
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index d06ed2f76894..e98440c847e9 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -64,9 +64,10 @@ EXPORT_SYMBOL(ia64_pfn_valid); #endif #include <asm/processor.h> -EXPORT_SYMBOL(cpu_info__per_cpu); +EXPORT_SYMBOL(per_cpu__cpu_info); #ifdef CONFIG_SMP EXPORT_SYMBOL(__per_cpu_offset); +EXPORT_SYMBOL(per_cpu__local_per_cpu_offset); #endif EXPORT_SYMBOL(kernel_thread); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 979e659fcef1..88b591be8faa 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -566,7 +566,7 @@ static struct vm_operations_struct pfm_vm_ops={ #define pfm_wait_task_inactive(t) wait_task_inactive(t) -#define pfm_get_cpu_var(v) __get_cpu_var(v) +#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) #define pfm_get_cpu_data(a,b) per_cpu(a, b) typedef irqreturn_t pfm_irq_handler_t; #define PFM_IRQ_HANDLER_RET(v) do { \ diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 02fa6ce85268..484e8f451be3 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -56,6 +56,7 @@ unsigned long __per_cpu_offset[NR_CPUS]; #endif DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); +DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8); unsigned long ia64_cycles_per_usec; struct ia64_boot_param *ia64_boot_param; @@ -709,6 +710,8 @@ cpu_init (void) memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start; cpu_data += PERCPU_PAGE_SIZE; + + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; } } cpu_data = __per_cpu_start + __per_cpu_offset[smp_processor_id()]; @@ -716,19 +719,18 @@ cpu_init (void) cpu_data = __phys_per_cpu_start; #endif /* !CONFIG_SMP */ - cpu_info = cpu_data + ((char *) &__get_cpu_var(cpu_info) - __per_cpu_start); -#ifdef CONFIG_NUMA - cpu_info->node_data = get_node_data_ptr(); -#endif - get_max_cacheline_size(); /* * We can't pass "local_cpu_data" to identify_cpu() because we haven't called * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it * depends on the data returned by identify_cpu(). We break the dependency by - * accessing cpu_data() the old way, through identity mapped space. + * accessing cpu_data() through the canonical per-CPU address. */ + cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start); +#ifdef CONFIG_NUMA + cpu_info->node_data = get_node_data_ptr(); +#endif identify_cpu(cpu_info); #ifdef CONFIG_MCKINLEY diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 9727272ffb85..61cf1097dd82 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -72,7 +72,7 @@ static volatile struct call_data_struct *call_data; #define IPI_CPU_STOP 1 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ -static DEFINE_PER_CPU(__u64, ipi_operation) ____cacheline_aligned; +static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; static void stop_this_cpu (void) @@ -91,7 +91,7 @@ irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs) { int this_cpu = get_cpu(); - unsigned long *pending_ipis = &__get_cpu_var(ipi_operation); + unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation); unsigned long ops; /* Count this now; we may make a call that never returns. */ diff --git a/asm-ia64/local.h b/asm-ia64/local.h new file mode 100644 index 000000000000..1dbd584ad851 --- /dev/null +++ b/asm-ia64/local.h @@ -0,0 +1,50 @@ +#ifndef _ASM_IA64_LOCAL_H +#define _ASM_IA64_LOCAL_H + +/* + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/percpu.h> + +typedef struct { + atomic64_t val; +} local_t; + +#define LOCAL_INIT(i) ((local_t) { { (i) } }) +#define local_read(l) atomic64_read(&(l)->val) +#define local_set(l, i) atomic64_set(&(l)->val, i) +#define local_inc(l) atomic64_inc(&(l)->val) +#define local_dec(l) atomic64_dec(&(l)->val) +#define local_add(l) atomic64_add(&(l)->val) +#define local_sub(l) atomic64_sub(&(l)->val) + +/* Non-atomic variants, i.e., preemption disabled and won't be touched in interrupt, etc. */ + +#define __local_inc(l) (++(l)->val.counter) +#define __local_dec(l) (--(l)->val.counter) +#define __local_add(i,l) ((l)->val.counter += (i)) +#define __local_sub(i,l) ((l)->val.counter -= (i)) + +/* + * Use these for per-cpu local_t variables. Note they take a variable (eg. mystruct.foo), + * not an address. + */ +#define cpu_local_read(v) local_read(&__ia64_per_cpu_var(v)) +#define cpu_local_set(v, i) local_set(&__ia64_per_cpu_var(v), (i)) +#define cpu_local_inc(v) local_inc(&__ia64_per_cpu_var(v)) +#define cpu_local_dec(v) local_dec(&__ia64_per_cpu_var(v)) +#define cpu_local_add(i, v) local_add((i), &__ia64_per_cpu_var(v)) +#define cpu_local_sub(i, v) local_sub((i), &__ia64_per_cpu_var(v)) + +/* + * Non-atomic increments, i.e., preemption disabled and won't be touched in interrupt, + * etc. + */ +#define __cpu_local_inc(v) __local_inc(&__ia64_per_cpu_var(v)) +#define __cpu_local_dec(v) __local_dec(&__ia64_per_cpu_var(v)) +#define __cpu_local_add(i, v) __local_add((i), &__ia64_per_cpu_var(v)) +#define __cpu_local_sub(i, v) __local_sub((i), &__ia64_per_cpu_var(v)) + +#endif /* _ASM_IA64_LOCAL_H */ diff --git a/include/asm-ia64/atomic.h b/include/asm-ia64/atomic.h index 37029e9f75c4..93d47187a650 100644 --- a/include/asm-ia64/atomic.h +++ b/include/asm-ia64/atomic.h @@ -9,7 +9,7 @@ * "int" types were carefully placed so as to ensure proper operation * of the macros. * - * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co + * Copyright (C) 1998, 1999, 2002-2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> */ #include <linux/types.h> @@ -21,11 +21,16 @@ * memory accesses are ordered. */ typedef struct { volatile __s32 counter; } atomic_t; +typedef struct { volatile __s64 counter; } atomic64_t; #define ATOMIC_INIT(i) ((atomic_t) { (i) }) +#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) #define atomic_read(v) ((v)->counter) +#define atomic64_read(v) ((v)->counter) + #define atomic_set(v,i) (((v)->counter) = (i)) +#define atomic64_set(v,i) (((v)->counter) = (i)) static __inline__ int ia64_atomic_add (int i, atomic_t *v) @@ -37,7 +42,21 @@ ia64_atomic_add (int i, atomic_t *v) CMPXCHG_BUGCHECK(v); old = atomic_read(v); new = old + i; - } while (ia64_cmpxchg("acq", v, old, old + i, sizeof(atomic_t)) != old); + } while (ia64_cmpxchg("acq", v, old, new, sizeof(atomic_t)) != old); + return new; +} + +static __inline__ int +ia64_atomic64_add (int i, atomic64_t *v) +{ + __s64 old, new; + CMPXCHG_BUGCHECK_DECL + + do { + CMPXCHG_BUGCHECK(v); + old = atomic_read(v); + new = old + i; + } while (ia64_cmpxchg("acq", v, old, new, sizeof(atomic_t)) != old); return new; } @@ -55,6 +74,20 @@ ia64_atomic_sub (int i, atomic_t *v) return new; } +static __inline__ int +ia64_atomic64_sub (int i, atomic64_t *v) +{ + __s64 old, new; + CMPXCHG_BUGCHECK_DECL + + do { + CMPXCHG_BUGCHECK(v); + old = atomic_read(v); + new = old - i; + } while (ia64_cmpxchg("acq", v, old, new, sizeof(atomic_t)) != old); + return new; +} + #define atomic_add_return(i,v) \ ({ \ int __ia64_aar_i = (i); \ @@ -67,6 +100,18 @@ ia64_atomic_sub (int i, atomic_t *v) : ia64_atomic_add(__ia64_aar_i, v); \ }) +#define atomic64_add_return(i,v) \ +({ \ + long __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ + : ia64_atomic64_add(__ia64_aar_i, v); \ +}) + /* * Atomically add I to V and return TRUE if the resulting value is * negative. @@ -77,6 +122,12 @@ atomic_add_negative (int i, atomic_t *v) return atomic_add_return(i, v) < 0; } +static __inline__ int +atomic64_add_negative (int i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} + #define atomic_sub_return(i,v) \ ({ \ int __ia64_asr_i = (i); \ @@ -89,18 +140,40 @@ atomic_add_negative (int i, atomic_t *v) : ia64_atomic_sub(__ia64_asr_i, v); \ }) +#define atomic64_sub_return(i,v) \ +({ \ + long __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ + : ia64_atomic64_sub(__ia64_asr_i, v); \ +}) + #define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_return(v) atomic_add_return(1, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) #define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) #define atomic_inc_and_test(v) (atomic_add_return(1, (v)) != 0) +#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0) +#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0) +#define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) != 0) #define atomic_add(i,v) atomic_add_return((i), (v)) #define atomic_sub(i,v) atomic_sub_return((i), (v)) #define atomic_inc(v) atomic_add(1, (v)) #define atomic_dec(v) atomic_sub(1, (v)) +#define atomic64_add(i,v) atomic64_add_return((i), (v)) +#define atomic64_sub(i,v) atomic64_sub_return((i), (v)) +#define atomic64_inc(v) atomic64_add(1, (v)) +#define atomic64_dec(v) atomic64_sub(1, (v)) + /* Atomic operations are already serializing */ #define smp_mb__before_atomic_dec() barrier() #define smp_mb__after_atomic_dec() barrier() diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index dee1cd007f5a..95e786212982 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h @@ -86,9 +86,9 @@ delayed_tlb_flush (void) { extern void local_flush_tlb_all (void); - if (unlikely(__get_cpu_var(ia64_need_tlb_flush))) { + if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { local_flush_tlb_all(); - __get_cpu_var(ia64_need_tlb_flush) = 0; + __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; } } diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h index cd4a79d645a0..947feb5ce17e 100644 --- a/include/asm-ia64/percpu.h +++ b/include/asm-ia64/percpu.h @@ -1,43 +1,66 @@ #ifndef _ASM_IA64_PERCPU_H #define _ASM_IA64_PERCPU_H -#include <linux/config.h> -#include <linux/compiler.h> - /* * Copyright (C) 2002-2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> */ + #define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE #ifdef __ASSEMBLY__ - -#define THIS_CPU(var) (var##__per_cpu) /* use this to mark accesses to per-CPU variables... */ - +# define THIS_CPU(var) (per_cpu__##var) /* use this to mark accesses to per-CPU variables... */ #else /* !__ASSEMBLY__ */ +#include <linux/config.h> + #include <linux/threads.h> +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name + +/* + * Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an + * external routine, to avoid include-hell. + */ +#ifdef CONFIG_SMP + extern unsigned long __per_cpu_offset[NR_CPUS]; +/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ +DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); + +/* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu + __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name -#define __get_cpu_var(var) (var##__per_cpu) -#ifdef CONFIG_SMP -# define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu])) +#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu])) +#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset))) extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size); -#else -# define per_cpu(var, cpu) ((void)cpu, __get_cpu_var(var)) -#endif -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu) +#else /* ! SMP */ + +#define DEFINE_PER_CPU(type, name) __typeof__(type) per_cpu__##name +#define per_cpu(var, cpu) ((void)cpu, per_cpu__##var) +#define __get_cpu_var(var) per_cpu__##var + +#endif /* SMP */ + +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) + +/* ia64-specific part: */ extern void setup_per_cpu_areas (void); +/* + * Be extremely careful when taking the address of this variable! Due to virtual + * remapping, it is different from the canonical address returned by __get_cpu_var(var)! + * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly + * more efficient. + */ +#define __ia64_per_cpu_var(var) (per_cpu__##var) + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_IA64_PERCPU_H */ diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 56e55b0a5061..669e44bf8012 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -191,10 +191,12 @@ struct cpuinfo_ia64 { DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info); /* - * The "local" data pointer. It points to the per-CPU data of the currently executing + * The "local" data variable. It refers to the per-CPU data of the currently executing * CPU, much like "current" points to the per-task data of the currently executing task. + * Do not use the address of local_cpu_data, since it will be different from + * cpu_data(smp_processor_id())! */ -#define local_cpu_data (&__get_cpu_var(cpu_info)) +#define local_cpu_data (&__ia64_per_cpu_var(cpu_info)) #define cpu_data(cpu) (&per_cpu(cpu_info, cpu)) extern void identify_cpu (struct cpuinfo_ia64 *); diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h index 3d0141eb9eaa..ec51cacaf232 100644 --- a/include/asm-ia64/tlb.h +++ b/include/asm-ia64/tlb.h @@ -126,7 +126,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e static inline struct mmu_gather * tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush) { - struct mmu_gather *tlb = &per_cpu(mmu_gathers, smp_processor_id()); + struct mmu_gather *tlb = &__get_cpu_var(mmu_gathers); tlb->mm = mm; /* |
