diff options
| author | Andrew Morton <akpm@osdl.org> | 2004-06-23 18:49:33 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-06-23 18:49:33 -0700 |
| commit | 5c60169a01af712b0b1aa1f5db3fcb8776b22d9f (patch) | |
| tree | a6c9a380b2decc4f07dd4847d40087637c4e0805 /include/linux | |
| parent | b884e83821944633fb02295fd0470398090ac782 (diff) | |
[PATCH] rcu lock update: Add per-cpu batch counter
From: Manfred Spraul <manfred@colorfullife.com>
Below is the one of my patches from my rcu lock update. Jack Steiner tested
the first one on a 512p and it resolved the rcu cache line trashing. All were
tested on osdl with STP.
Step one for reducing cacheline trashing within rcupdate.c:
The current code uses the rcu_cpu_mask bitmap both for keeping track of the
cpus that haven't gone through a quiescent state and for checking if a cpu
should look for quiescent states. The bitmap is frequently changed and the
check is done by polling - together this causes cache line trashing.
If it's cheaper to access a (mostly) read-only cacheline than a cacheline that
is frequently dirtied, then it's possible to reduce the trashing by splitting
the rcu_cpu_mask bitmap into two cachelines:
The patch adds a generation counter and moves it into a separate cacheline.
This allows to removes all accesses to rcu_cpumask (in the read-write
cacheline) from rcu_pending and at least 50% of the accesses from
rcu_check_quiescent_state. rcu_pending and all but one call per cpu to
rcu_check_quiescent_state access the read-only cacheline. Probably not enough
for 512p, but it's a start, just for 128 byte more memory use, without slowing
down rcu grace periods. Obviously the read-only cacheline is not really
read-only: it's written once per grace period to indicate that a new grace
period is running.
Tests on an 8-way Pentium III with reaim showed some improvement:
oprofile hits:
Reference: http://khack.osdl.org/stp/293075/
Hits %
23741 0.0994 rcu_pending
19057 0.0798 rcu_check_quiescent_state
6530 0.0273 rcu_check_callbacks
Patched: http://khack.osdl.org/stp/293076/
8291 0.0579 rcu_pending
5475 0.0382 rcu_check_quiescent_state
3604 0.0252 rcu_check_callbacks
The total runtime differs between both runs, thus the % number must
be compared: Around 50% faster. I've uninlined rcu_pending for the
test.
Tested with reaim and kernbench.
Description:
- per-cpu quiescbatch and qs_pending fields introduced: quiescbatch contains
the number of the last quiescent period that the cpu has seen and qs_pending
is set if the cpu has not yet reported the quiescent state for the current
period. With these two fields a cpu can test if it should report a
quiescent state without having to look at the frequently written
rcu_cpu_mask bitmap.
- curbatch split into two fields: rcu_ctrlblk.batch.completed and
rcu_ctrlblk.batch.cur. This makes it possible to figure out if a grace
period is running (completed != cur) without accessing the rcu_cpu_mask
bitmap.
- rcu_ctrlblk.maxbatch removed and replaced with a true/false next_pending
flag: next_pending=1 means that another grace period should be started
immediately after the end of the current period. Previously, this was
achieved by maxbatch: curbatch==maxbatch means don't start, curbatch!=
maxbatch means start. A flag improves the readability: The only possible
values for maxbatch were curbatch and curbatch+1.
- rcu_ctrlblk split into two cachelines for better performance.
- common code from rcu_offline_cpu and rcu_check_quiescent_state merged into
cpu_quiet.
- rcu_offline_cpu: replace spin_lock_irq with spin_lock_bh, there are no
accesses from irq context (and there are accesses to the spinlock with
enabled interrupts from tasklet context).
- rcu_restart_cpu introduced, s390 should call it after changing nohz:
Theoretically the global batch counter could wrap around and end up at
RCU_quiescbatch(cpu). Then the cpu would not look for a quiescent state and
rcu would lock up.
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/rcupdate.h | 48 |
1 files changed, 34 insertions, 14 deletions
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 58048abd7446..f9981251d542 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -65,11 +65,18 @@ struct rcu_head { /* Control variables for rcupdate callback mechanism. */ struct rcu_ctrlblk { - spinlock_t mutex; /* Guard this struct */ - long curbatch; /* Current batch number. */ - long maxbatch; /* Max requested batch number. */ - cpumask_t rcu_cpu_mask; /* CPUs that need to switch in order */ - /* for current batch to proceed. */ + /* "const" members: only changed when starting/ending a grace period */ + struct { + long cur; /* Current batch number. */ + long completed; /* Number of the last completed batch */ + } batch ____cacheline_maxaligned_in_smp; + /* remaining members: bookkeeping of the progress of the grace period */ + struct { + spinlock_t mutex; /* Guard this struct */ + int next_pending; /* Is the next batch already waiting? */ + cpumask_t rcu_cpu_mask; /* CPUs that need to switch */ + /* in order for current batch to proceed. */ + } state ____cacheline_maxaligned_in_smp; }; /* Is batch a before batch b ? */ @@ -90,9 +97,14 @@ static inline int rcu_batch_after(long a, long b) * curlist - current batch for which quiescent cycle started if any */ struct rcu_data { + /* 1) quiescent state handling : */ + long quiescbatch; /* Batch # for grace period */ long qsctr; /* User-mode/idle loop etc. */ long last_qsctr; /* value of qsctr at beginning */ /* of rcu grace period */ + int qs_pending; /* core waits for quiesc state */ + + /* 2) batch handling */ long batch; /* Batch # for current RCU batch */ struct list_head nxtlist; struct list_head curlist; @@ -101,24 +113,31 @@ struct rcu_data { DECLARE_PER_CPU(struct rcu_data, rcu_data); extern struct rcu_ctrlblk rcu_ctrlblk; +#define RCU_quiescbatch(cpu) (per_cpu(rcu_data, (cpu)).quiescbatch) #define RCU_qsctr(cpu) (per_cpu(rcu_data, (cpu)).qsctr) #define RCU_last_qsctr(cpu) (per_cpu(rcu_data, (cpu)).last_qsctr) +#define RCU_qs_pending(cpu) (per_cpu(rcu_data, (cpu)).qs_pending) #define RCU_batch(cpu) (per_cpu(rcu_data, (cpu)).batch) #define RCU_nxtlist(cpu) (per_cpu(rcu_data, (cpu)).nxtlist) #define RCU_curlist(cpu) (per_cpu(rcu_data, (cpu)).curlist) -#define RCU_QSCTR_INVALID 0 - static inline int rcu_pending(int cpu) { - if ((!list_empty(&RCU_curlist(cpu)) && - rcu_batch_before(RCU_batch(cpu), rcu_ctrlblk.curbatch)) || - (list_empty(&RCU_curlist(cpu)) && - !list_empty(&RCU_nxtlist(cpu))) || - cpu_isset(cpu, rcu_ctrlblk.rcu_cpu_mask)) + /* This cpu has pending rcu entries and the grace period + * for them has completed. + */ + if (!list_empty(&RCU_curlist(cpu)) && + !rcu_batch_before(rcu_ctrlblk.batch.completed,RCU_batch(cpu))) + return 1; + /* This cpu has no pending entries, but there are new entries */ + if (list_empty(&RCU_curlist(cpu)) && + !list_empty(&RCU_nxtlist(cpu))) + return 1; + /* The rcu core waits for a quiescent state from the cpu */ + if (RCU_quiescbatch(cpu) != rcu_ctrlblk.batch.cur || RCU_qs_pending(cpu)) return 1; - else - return 0; + /* nothing to do */ + return 0; } #define rcu_read_lock() preempt_disable() @@ -126,6 +145,7 @@ static inline int rcu_pending(int cpu) extern void rcu_init(void); extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_restart_cpu(int cpu); /* Exported interfaces */ extern void FASTCALL(call_rcu(struct rcu_head *head, |
