From 5c1eabe68501d1e1b1586c7f4c46cc531828c4ab Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 22 Oct 2012 19:37:47 -0400 Subject: percpu-rw-semaphores: use light/heavy barriers This patch introduces new barrier pair light_mb() and heavy_mb() for percpu rw semaphores. This patch fixes a bug in percpu-rw-semaphores where a barrier was missing in percpu_up_write. This patch improves performance on the read path of percpu-rw-semaphores: on non-x86 cpus, there was a smp_mb() in percpu_up_read. This patch changes it to a compiler barrier and removes the "#if defined(X86) ..." condition. From: Lai Jiangshan Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds --- include/linux/percpu-rwsem.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux/percpu-rwsem.h') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index cf80f7e5277f..18f35b54286c 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -12,6 +12,9 @@ struct percpu_rw_semaphore { struct mutex mtx; }; +#define light_mb() barrier() +#define heavy_mb() synchronize_sched() + static inline void percpu_down_read(struct percpu_rw_semaphore *p) { rcu_read_lock(); @@ -24,22 +27,12 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *p) } this_cpu_inc(*p->counters); rcu_read_unlock(); + light_mb(); /* A, between read of p->locked and read of data, paired with D */ } static inline void percpu_up_read(struct percpu_rw_semaphore *p) { - /* - * On X86, write operation in this_cpu_dec serves as a memory unlock - * barrier (i.e. memory accesses may be moved before the write, but - * no memory accesses are moved past the write). - * On other architectures this may not be the case, so we need smp_mb() - * there. - */ -#if defined(CONFIG_X86) && (!defined(CONFIG_X86_PPRO_FENCE) && !defined(CONFIG_X86_OOSTORE)) - barrier(); -#else - smp_mb(); -#endif + light_mb(); /* B, between read of the data and write to p->counter, paired with C */ this_cpu_dec(*p->counters); } @@ -61,11 +54,12 @@ static inline void percpu_down_write(struct percpu_rw_semaphore *p) synchronize_rcu(); while (__percpu_count(p->counters)) msleep(1); - smp_rmb(); /* paired with smp_mb() in percpu_sem_up_read() */ + heavy_mb(); /* C, between read of p->counter and write to data, paired with B */ } static inline void percpu_up_write(struct percpu_rw_semaphore *p) { + heavy_mb(); /* D, between write to data and write to p->locked, paired with A */ p->locked = false; mutex_unlock(&p->mtx); } -- cgit v1.2.3 From 1bf11c53535ab87e3bf14ecdf6747bf46f601c5d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 22 Oct 2012 19:39:16 -0400 Subject: percpu-rw-semaphores: use rcu_read_lock_sched Use rcu_read_lock_sched / rcu_read_unlock_sched / synchronize_sched instead of rcu_read_lock / rcu_read_unlock / synchronize_rcu. This is an optimization. The RCU-protected region is very small, so there will be no latency problems if we disable preempt in this region. So we use rcu_read_lock_sched / rcu_read_unlock_sched that translates to preempt_disable / preempt_disable. It is smaller (and supposedly faster) than preemptible rcu_read_lock / rcu_read_unlock. Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds --- include/linux/percpu-rwsem.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/percpu-rwsem.h') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 18f35b54286c..250a4acddb2b 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -17,16 +17,16 @@ struct percpu_rw_semaphore { static inline void percpu_down_read(struct percpu_rw_semaphore *p) { - rcu_read_lock(); + rcu_read_lock_sched(); if (unlikely(p->locked)) { - rcu_read_unlock(); + rcu_read_unlock_sched(); mutex_lock(&p->mtx); this_cpu_inc(*p->counters); mutex_unlock(&p->mtx); return; } this_cpu_inc(*p->counters); - rcu_read_unlock(); + rcu_read_unlock_sched(); light_mb(); /* A, between read of p->locked and read of data, paired with D */ } @@ -51,7 +51,7 @@ static inline void percpu_down_write(struct percpu_rw_semaphore *p) { mutex_lock(&p->mtx); p->locked = true; - synchronize_rcu(); + synchronize_sched(); /* make sure that all readers exit the rcu_read_lock_sched region */ while (__percpu_count(p->counters)) msleep(1); heavy_mb(); /* C, between read of p->counter and write to data, paired with B */ -- cgit v1.2.3