summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Morton <akpm@digeo.com>2003-04-12 12:59:51 -0700
committerJames Bottomley <jejb@raven.il.steeleye.com>2003-04-12 12:59:51 -0700
commitba8e8755393fd032e448b3cfa35cb01743807699 (patch)
tree75a18c8515be1213c8d0eea312aedb8e39713aaa
parentf688c084bc58323c8a7ca19090884ac5da7b6c04 (diff)
[PATCH] percpu_counters: approximate but scalable counters
Several places in ext2 and ext3 are using filesystem-wide counters which use global locking. Mainly for the orlov allocator's heuristics. To solve the contention which this causes we can trade off accuracy against speed. This patch introduces a "percpu_counter" library type in which the counts are per-cpu and are periodically spilled into a global counter. Readers only read the global counter. These objects are *large*. On a 32 CPU P4, they are 4 kbytes. On a 4 way p3, 128 bytes.
-rw-r--r--include/linux/percpu_counter.h100
-rw-r--r--kernel/ksyms.c2
-rw-r--r--lib/Makefile1
-rw-r--r--lib/percpu_counter.c18
4 files changed, 121 insertions, 0 deletions
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
new file mode 100644
index 000000000000..69d0a66b662e
--- /dev/null
+++ b/include/linux/percpu_counter.h
@@ -0,0 +1,100 @@
+/*
+ * A simple "approximate counter" for use in ext2 and ext3 superblocks.
+ *
+ * WARNING: these things are HUGE. 4 kbytes per counter on 32-way P4.
+ */
+
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/preempt.h>
+
+#ifdef CONFIG_SMP
+
+struct __percpu_counter {
+ long count;
+} ____cacheline_aligned;
+
+struct percpu_counter {
+ spinlock_t lock;
+ long count;
+ struct __percpu_counter counters[NR_CPUS];
+};
+
+#if NR_CPUS >= 16
+#define FBC_BATCH (NR_CPUS*2)
+#else
+#define FBC_BATCH (NR_CPUS*4)
+#endif
+
+static inline void percpu_counter_init(struct percpu_counter *fbc)
+{
+ int i;
+
+ spin_lock_init(&fbc->lock);
+ fbc->count = 0;
+ for (i = 0; i < NR_CPUS; i++)
+ fbc->counters[i].count = 0;
+}
+
+void percpu_counter_mod(struct percpu_counter *fbc, long amount);
+
+static inline long percpu_counter_read(struct percpu_counter *fbc)
+{
+ return fbc->count;
+}
+
+/*
+ * It is possible for the percpu_counter_read() to return a small negative
+ * number for some counter which should never be negative.
+ */
+static inline long percpu_counter_read_positive(struct percpu_counter *fbc)
+{
+ long ret = fbc->count;
+
+ barrier(); /* Prevent reloads of fbc->count */
+ if (ret > 0)
+ return ret;
+ return 1;
+}
+
+#else
+
+struct percpu_counter {
+ long count;
+};
+
+static inline void percpu_counter_init(struct percpu_counter *fbc)
+{
+ fbc->count = 0;
+}
+
+static inline void
+percpu_counter_mod(struct percpu_counter *fbc, long amount)
+{
+ preempt_disable();
+ fbc->count += amount;
+ preempt_enable();
+}
+
+static inline long percpu_counter_read(struct percpu_counter *fbc)
+{
+ return fbc->count;
+}
+
+static inline long percpu_counter_read_positive(struct percpu_counter *fbc)
+{
+ return fbc->count;
+}
+
+#endif /* CONFIG_SMP */
+
+static inline void percpu_counter_inc(struct percpu_counter *fbc)
+{
+ percpu_counter_mod(fbc, 1);
+}
+
+static inline void percpu_counter_dec(struct percpu_counter *fbc)
+{
+ percpu_counter_mod(fbc, -1);
+}
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index f7ef45020896..5d31affa36ce 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -58,6 +58,7 @@
#include <linux/ptrace.h>
#include <linux/time.h>
#include <linux/backing-dev.h>
+#include <linux/percpu_counter.h>
#include <asm/checksum.h>
#if defined(CONFIG_PROC_FS)
@@ -100,6 +101,7 @@ EXPORT_SYMBOL(kfree);
#ifdef CONFIG_SMP
EXPORT_SYMBOL(kmalloc_percpu);
EXPORT_SYMBOL(kfree_percpu);
+EXPORT_SYMBOL(percpu_counter_mod);
#endif
EXPORT_SYMBOL(vfree);
EXPORT_SYMBOL(__vmalloc);
diff --git a/lib/Makefile b/lib/Makefile
index 24e6b3adc098..6ab94d3cb906 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -14,6 +14,7 @@ obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+obj-$(CONFIG_SMP) += percpu_counter.o
ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
obj-y += dec_and_lock.o
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
new file mode 100644
index 000000000000..73f99d99f9ac
--- /dev/null
+++ b/lib/percpu_counter.c
@@ -0,0 +1,18 @@
+
+#include <linux/percpu_counter.h>
+
+void percpu_counter_mod(struct percpu_counter *fbc, long amount)
+{
+ int cpu = get_cpu();
+ long count = fbc->counters[cpu].count;
+
+ count += amount;
+ if (count >= FBC_BATCH || count <= -FBC_BATCH) {
+ spin_lock(&fbc->lock);
+ fbc->count += count;
+ spin_unlock(&fbc->lock);
+ count = 0;
+ }
+ fbc->counters[cpu].count = count;
+ put_cpu();
+}