summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2004-06-23 18:49:33 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-06-23 18:49:33 -0700
commit5c60169a01af712b0b1aa1f5db3fcb8776b22d9f (patch)
treea6c9a380b2decc4f07dd4847d40087637c4e0805 /include
parentb884e83821944633fb02295fd0470398090ac782 (diff)
[PATCH] rcu lock update: Add per-cpu batch counter
From: Manfred Spraul <manfred@colorfullife.com> Below is the one of my patches from my rcu lock update. Jack Steiner tested the first one on a 512p and it resolved the rcu cache line trashing. All were tested on osdl with STP. Step one for reducing cacheline trashing within rcupdate.c: The current code uses the rcu_cpu_mask bitmap both for keeping track of the cpus that haven't gone through a quiescent state and for checking if a cpu should look for quiescent states. The bitmap is frequently changed and the check is done by polling - together this causes cache line trashing. If it's cheaper to access a (mostly) read-only cacheline than a cacheline that is frequently dirtied, then it's possible to reduce the trashing by splitting the rcu_cpu_mask bitmap into two cachelines: The patch adds a generation counter and moves it into a separate cacheline. This allows to removes all accesses to rcu_cpumask (in the read-write cacheline) from rcu_pending and at least 50% of the accesses from rcu_check_quiescent_state. rcu_pending and all but one call per cpu to rcu_check_quiescent_state access the read-only cacheline. Probably not enough for 512p, but it's a start, just for 128 byte more memory use, without slowing down rcu grace periods. Obviously the read-only cacheline is not really read-only: it's written once per grace period to indicate that a new grace period is running. Tests on an 8-way Pentium III with reaim showed some improvement: oprofile hits: Reference: http://khack.osdl.org/stp/293075/ Hits % 23741 0.0994 rcu_pending 19057 0.0798 rcu_check_quiescent_state 6530 0.0273 rcu_check_callbacks Patched: http://khack.osdl.org/stp/293076/ 8291 0.0579 rcu_pending 5475 0.0382 rcu_check_quiescent_state 3604 0.0252 rcu_check_callbacks The total runtime differs between both runs, thus the % number must be compared: Around 50% faster. I've uninlined rcu_pending for the test. Tested with reaim and kernbench. Description: - per-cpu quiescbatch and qs_pending fields introduced: quiescbatch contains the number of the last quiescent period that the cpu has seen and qs_pending is set if the cpu has not yet reported the quiescent state for the current period. With these two fields a cpu can test if it should report a quiescent state without having to look at the frequently written rcu_cpu_mask bitmap. - curbatch split into two fields: rcu_ctrlblk.batch.completed and rcu_ctrlblk.batch.cur. This makes it possible to figure out if a grace period is running (completed != cur) without accessing the rcu_cpu_mask bitmap. - rcu_ctrlblk.maxbatch removed and replaced with a true/false next_pending flag: next_pending=1 means that another grace period should be started immediately after the end of the current period. Previously, this was achieved by maxbatch: curbatch==maxbatch means don't start, curbatch!= maxbatch means start. A flag improves the readability: The only possible values for maxbatch were curbatch and curbatch+1. - rcu_ctrlblk split into two cachelines for better performance. - common code from rcu_offline_cpu and rcu_check_quiescent_state merged into cpu_quiet. - rcu_offline_cpu: replace spin_lock_irq with spin_lock_bh, there are no accesses from irq context (and there are accesses to the spinlock with enabled interrupts from tasklet context). - rcu_restart_cpu introduced, s390 should call it after changing nohz: Theoretically the global batch counter could wrap around and end up at RCU_quiescbatch(cpu). Then the cpu would not look for a quiescent state and rcu would lock up. Signed-off-by: Manfred Spraul <manfred@colorfullife.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/rcupdate.h48
1 files changed, 34 insertions, 14 deletions
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 58048abd7446..f9981251d542 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -65,11 +65,18 @@ struct rcu_head {
/* Control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
- spinlock_t mutex; /* Guard this struct */
- long curbatch; /* Current batch number. */
- long maxbatch; /* Max requested batch number. */
- cpumask_t rcu_cpu_mask; /* CPUs that need to switch in order */
- /* for current batch to proceed. */
+ /* "const" members: only changed when starting/ending a grace period */
+ struct {
+ long cur; /* Current batch number. */
+ long completed; /* Number of the last completed batch */
+ } batch ____cacheline_maxaligned_in_smp;
+ /* remaining members: bookkeeping of the progress of the grace period */
+ struct {
+ spinlock_t mutex; /* Guard this struct */
+ int next_pending; /* Is the next batch already waiting? */
+ cpumask_t rcu_cpu_mask; /* CPUs that need to switch */
+ /* in order for current batch to proceed. */
+ } state ____cacheline_maxaligned_in_smp;
};
/* Is batch a before batch b ? */
@@ -90,9 +97,14 @@ static inline int rcu_batch_after(long a, long b)
* curlist - current batch for which quiescent cycle started if any
*/
struct rcu_data {
+ /* 1) quiescent state handling : */
+ long quiescbatch; /* Batch # for grace period */
long qsctr; /* User-mode/idle loop etc. */
long last_qsctr; /* value of qsctr at beginning */
/* of rcu grace period */
+ int qs_pending; /* core waits for quiesc state */
+
+ /* 2) batch handling */
long batch; /* Batch # for current RCU batch */
struct list_head nxtlist;
struct list_head curlist;
@@ -101,24 +113,31 @@ struct rcu_data {
DECLARE_PER_CPU(struct rcu_data, rcu_data);
extern struct rcu_ctrlblk rcu_ctrlblk;
+#define RCU_quiescbatch(cpu) (per_cpu(rcu_data, (cpu)).quiescbatch)
#define RCU_qsctr(cpu) (per_cpu(rcu_data, (cpu)).qsctr)
#define RCU_last_qsctr(cpu) (per_cpu(rcu_data, (cpu)).last_qsctr)
+#define RCU_qs_pending(cpu) (per_cpu(rcu_data, (cpu)).qs_pending)
#define RCU_batch(cpu) (per_cpu(rcu_data, (cpu)).batch)
#define RCU_nxtlist(cpu) (per_cpu(rcu_data, (cpu)).nxtlist)
#define RCU_curlist(cpu) (per_cpu(rcu_data, (cpu)).curlist)
-#define RCU_QSCTR_INVALID 0
-
static inline int rcu_pending(int cpu)
{
- if ((!list_empty(&RCU_curlist(cpu)) &&
- rcu_batch_before(RCU_batch(cpu), rcu_ctrlblk.curbatch)) ||
- (list_empty(&RCU_curlist(cpu)) &&
- !list_empty(&RCU_nxtlist(cpu))) ||
- cpu_isset(cpu, rcu_ctrlblk.rcu_cpu_mask))
+ /* This cpu has pending rcu entries and the grace period
+ * for them has completed.
+ */
+ if (!list_empty(&RCU_curlist(cpu)) &&
+ !rcu_batch_before(rcu_ctrlblk.batch.completed,RCU_batch(cpu)))
+ return 1;
+ /* This cpu has no pending entries, but there are new entries */
+ if (list_empty(&RCU_curlist(cpu)) &&
+ !list_empty(&RCU_nxtlist(cpu)))
+ return 1;
+ /* The rcu core waits for a quiescent state from the cpu */
+ if (RCU_quiescbatch(cpu) != rcu_ctrlblk.batch.cur || RCU_qs_pending(cpu))
return 1;
- else
- return 0;
+ /* nothing to do */
+ return 0;
}
#define rcu_read_lock() preempt_disable()
@@ -126,6 +145,7 @@ static inline int rcu_pending(int cpu)
extern void rcu_init(void);
extern void rcu_check_callbacks(int cpu, int user);
+extern void rcu_restart_cpu(int cpu);
/* Exported interfaces */
extern void FASTCALL(call_rcu(struct rcu_head *head,