diff options
| author | Andrew Morton <akpm@digeo.com> | 2003-04-12 12:59:51 -0700 |
|---|---|---|
| committer | James Bottomley <jejb@raven.il.steeleye.com> | 2003-04-12 12:59:51 -0700 |
| commit | ba8e8755393fd032e448b3cfa35cb01743807699 (patch) | |
| tree | 75a18c8515be1213c8d0eea312aedb8e39713aaa | |
| parent | f688c084bc58323c8a7ca19090884ac5da7b6c04 (diff) | |
[PATCH] percpu_counters: approximate but scalable counters
Several places in ext2 and ext3 are using filesystem-wide counters which use
global locking. Mainly for the orlov allocator's heuristics.
To solve the contention which this causes we can trade off accuracy against
speed.
This patch introduces a "percpu_counter" library type in which the counts are
per-cpu and are periodically spilled into a global counter. Readers only
read the global counter.
These objects are *large*. On a 32 CPU P4, they are 4 kbytes. On a 4 way
p3, 128 bytes.
| -rw-r--r-- | include/linux/percpu_counter.h | 100 | ||||
| -rw-r--r-- | kernel/ksyms.c | 2 | ||||
| -rw-r--r-- | lib/Makefile | 1 | ||||
| -rw-r--r-- | lib/percpu_counter.c | 18 |
4 files changed, 121 insertions, 0 deletions
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h new file mode 100644 index 000000000000..69d0a66b662e --- /dev/null +++ b/include/linux/percpu_counter.h @@ -0,0 +1,100 @@ +/* + * A simple "approximate counter" for use in ext2 and ext3 superblocks. + * + * WARNING: these things are HUGE. 4 kbytes per counter on 32-way P4. + */ + +#include <linux/config.h> +#include <linux/spinlock.h> +#include <linux/smp.h> +#include <linux/preempt.h> + +#ifdef CONFIG_SMP + +struct __percpu_counter { + long count; +} ____cacheline_aligned; + +struct percpu_counter { + spinlock_t lock; + long count; + struct __percpu_counter counters[NR_CPUS]; +}; + +#if NR_CPUS >= 16 +#define FBC_BATCH (NR_CPUS*2) +#else +#define FBC_BATCH (NR_CPUS*4) +#endif + +static inline void percpu_counter_init(struct percpu_counter *fbc) +{ + int i; + + spin_lock_init(&fbc->lock); + fbc->count = 0; + for (i = 0; i < NR_CPUS; i++) + fbc->counters[i].count = 0; +} + +void percpu_counter_mod(struct percpu_counter *fbc, long amount); + +static inline long percpu_counter_read(struct percpu_counter *fbc) +{ + return fbc->count; +} + +/* + * It is possible for the percpu_counter_read() to return a small negative + * number for some counter which should never be negative. + */ +static inline long percpu_counter_read_positive(struct percpu_counter *fbc) +{ + long ret = fbc->count; + + barrier(); /* Prevent reloads of fbc->count */ + if (ret > 0) + return ret; + return 1; +} + +#else + +struct percpu_counter { + long count; +}; + +static inline void percpu_counter_init(struct percpu_counter *fbc) +{ + fbc->count = 0; +} + +static inline void +percpu_counter_mod(struct percpu_counter *fbc, long amount) +{ + preempt_disable(); + fbc->count += amount; + preempt_enable(); +} + +static inline long percpu_counter_read(struct percpu_counter *fbc) +{ + return fbc->count; +} + +static inline long percpu_counter_read_positive(struct percpu_counter *fbc) +{ + return fbc->count; +} + +#endif /* CONFIG_SMP */ + +static inline void percpu_counter_inc(struct percpu_counter *fbc) +{ + percpu_counter_mod(fbc, 1); +} + +static inline void percpu_counter_dec(struct percpu_counter *fbc) +{ + percpu_counter_mod(fbc, -1); +} diff --git a/kernel/ksyms.c b/kernel/ksyms.c index f7ef45020896..5d31affa36ce 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -58,6 +58,7 @@ #include <linux/ptrace.h> #include <linux/time.h> #include <linux/backing-dev.h> +#include <linux/percpu_counter.h> #include <asm/checksum.h> #if defined(CONFIG_PROC_FS) @@ -100,6 +101,7 @@ EXPORT_SYMBOL(kfree); #ifdef CONFIG_SMP EXPORT_SYMBOL(kmalloc_percpu); EXPORT_SYMBOL(kfree_percpu); +EXPORT_SYMBOL(percpu_counter_mod); #endif EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(__vmalloc); diff --git a/lib/Makefile b/lib/Makefile index 24e6b3adc098..6ab94d3cb906 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -14,6 +14,7 @@ obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o +obj-$(CONFIG_SMP) += percpu_counter.o ifneq ($(CONFIG_HAVE_DEC_LOCK),y) obj-y += dec_and_lock.o diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c new file mode 100644 index 000000000000..73f99d99f9ac --- /dev/null +++ b/lib/percpu_counter.c @@ -0,0 +1,18 @@ + +#include <linux/percpu_counter.h> + +void percpu_counter_mod(struct percpu_counter *fbc, long amount) +{ + int cpu = get_cpu(); + long count = fbc->counters[cpu].count; + + count += amount; + if (count >= FBC_BATCH || count <= -FBC_BATCH) { + spin_lock(&fbc->lock); + fbc->count += count; + spin_unlock(&fbc->lock); + count = 0; + } + fbc->counters[cpu].count = count; + put_cpu(); +} |
