From eddee5ba34eb6c9890ef106f19ead2b370e5342f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:20 +1100 Subject: rhashtable: Fix walker behaviour during rehash Previously whenever the walker encountered a resize it simply snaps back to the beginning and starts again. However, this only works if the rehash started and completed while the walker was idle. If the walker attempts to restart while the rehash is still ongoing, we may miss objects that we shouldn't have. This patch fixes this by making the walker walk the old table followed by the new table just like all other readers. If a rehash is detected we will still signal our caller of the fact so they can prepare for duplicates but we will simply continue the walk onto the new table after the old one is finished either by us or by the rehasher. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c93ff8ac474a..4192682c1d5c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -53,6 +53,7 @@ struct rhash_head { * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets + * @walkers: List of active walkers * @buckets: size * hash buckets */ struct bucket_table { @@ -61,6 +62,7 @@ struct bucket_table { u32 shift; unsigned int locks_mask; spinlock_t *locks; + struct list_head walkers; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; @@ -104,7 +106,6 @@ struct rhashtable_params { * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping - * @walkers: List of active walkers * @being_destroyed: True if table is set up for destruction */ struct rhashtable { @@ -115,17 +116,16 @@ struct rhashtable { struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; - struct list_head walkers; }; /** * struct rhashtable_walker - Hash table walker * @list: List entry on list of walkers - * @resize: Resize event occured + * @tbl: The table that we were walking over */ struct rhashtable_walker { struct list_head list; - bool resize; + struct bucket_table *tbl; }; /** -- cgit v1.2.3 From 9d901bc05153bbf33b5da2cd6266865e531f0545 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:23 +1100 Subject: rhashtable: Free bucket tables asynchronously after rehash There is in fact no need to wait for an RCU grace period in the rehash function, since all insertions are guaranteed to go into the new table through spin locks. This patch uses call_rcu to free the old/rehashed table at our leisure. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ lib/rhashtable.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 4192682c1d5c..a0abddd226b3 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -54,6 +54,7 @@ struct rhash_head { * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers + * @rcu: RCU structure for freeing the table * @buckets: size * hash buckets */ struct bucket_table { @@ -63,6 +64,7 @@ struct bucket_table { unsigned int locks_mask; spinlock_t *locks; struct list_head walkers; + struct rcu_head rcu; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e55bbc84c449..36fb0910bec2 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -141,6 +141,11 @@ static void bucket_table_free(const struct bucket_table *tbl) kvfree(tbl); } +static void bucket_table_free_rcu(struct rcu_head *head) +{ + bucket_table_free(container_of(head, struct bucket_table, rcu)); +} + static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets) { @@ -288,9 +293,7 @@ static void rhashtable_rehash(struct rhashtable *ht, * table, and thus no references to the old table will * remain. */ - synchronize_rcu(); - - bucket_table_free(old_tbl); + call_rcu(&old_tbl->rcu, bucket_table_free_rcu); } /** -- cgit v1.2.3 From 63d512d0cffcae40505d9448abd509972465e846 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:24 +1100 Subject: rhashtable: Add rehash counter to bucket_table This patch adds a rehash counter to bucket_table to indicate the last bucket that has been rehashed. This serves two purposes: 1. Any bucket that has been rehashed can never gain a new object. 2. If the rehash counter reaches the size of the table, the table will forever remain empty. This patch also downsizes bucket_table->size to an unsigned int since we do not support sizes greater than 32 bits yet. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 +++- lib/rhashtable.c | 1 + lib/test_rhashtable.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a0abddd226b3..ed7562ad4ca0 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,6 +49,7 @@ struct rhash_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] @@ -58,7 +59,8 @@ struct rhash_head { * @buckets: size * hash buckets */ struct bucket_table { - size_t size; + unsigned int size; + unsigned int rehash; u32 hash_rnd; u32 shift; unsigned int locks_mask; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 36fb0910bec2..ff4ea1704546 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -260,6 +260,7 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) spin_lock_bh(old_bucket_lock); while (!rhashtable_rehash_one(ht, old_hash)) ; + old_tbl->rehash++; spin_unlock_bh(old_bucket_lock); } diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 67c7593d1dd6..16974fd89e4e 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -80,7 +80,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet) rcu_cnt = cnt = 0; if (!quiet) - pr_info(" [%#4x/%zu]", i, tbl->size); + pr_info(" [%#4x/%u]", i, tbl->size); rht_for_each_entry_rcu(obj, pos, tbl, i, node) { cnt++; -- cgit v1.2.3 From c4db8848af6af92f90462258603be844baeab44d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:25 +1100 Subject: rhashtable: Move future_tbl into struct bucket_table This patch moves future_tbl to open up the possibility of having multiple rehashes on the same table. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++-- lib/rhashtable.c | 27 +++++++++++---------------- 2 files changed, 14 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ed7562ad4ca0..1695378b3c5b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -56,6 +56,7 @@ struct rhash_head { * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers * @rcu: RCU structure for freeing the table + * @future_tbl: Table under construction during rehashing * @buckets: size * hash buckets */ struct bucket_table { @@ -68,6 +69,8 @@ struct bucket_table { struct list_head walkers; struct rcu_head rcu; + struct bucket_table __rcu *future_tbl; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; @@ -105,7 +108,6 @@ struct rhashtable_params { /** * struct rhashtable - Hash table handle * @tbl: Bucket table - * @future_tbl: Table under construction during expansion/shrinking * @nelems: Number of elements in table * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously @@ -114,7 +116,6 @@ struct rhashtable_params { */ struct rhashtable { struct bucket_table __rcu *tbl; - struct bucket_table __rcu *future_tbl; atomic_t nelems; bool being_destroyed; struct rhashtable_params p; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index ff4ea1704546..9d53a46dcca9 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -207,8 +207,9 @@ static bool rht_shrink_below_30(const struct rhashtable *ht, static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) { - struct bucket_table *new_tbl = rht_dereference(ht->future_tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl = + rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl; struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; int err = -ENOENT; struct rhash_head *head, *next, *entry; @@ -273,10 +274,8 @@ static void rhashtable_rehash(struct rhashtable *ht, /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. - * The synchronize_rcu() guarantees for the new table to be picked up - * so no new additions go into the old table while we relink. */ - rcu_assign_pointer(ht->future_tbl, new_tbl); + rcu_assign_pointer(old_tbl->future_tbl, new_tbl); /* Ensure the new table is visible to readers. */ smp_wmb(); @@ -400,7 +399,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, * also grab the bucket lock in old_tbl because until the * rehash completes ht->tbl won't be changed. */ - tbl = rht_dereference_rcu(ht->future_tbl, ht); + tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl; if (tbl != old_tbl) { hash = head_hashfn(ht, tbl, obj); spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); @@ -525,7 +524,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) * visible then that guarantees the entry to still be in * old_tbl if it exists. */ - tbl = rht_dereference_rcu(ht->future_tbl, ht); + tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl; if (!ret && old_tbl != tbl) ret = __rhashtable_remove(ht, tbl, obj); @@ -599,7 +598,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup); void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg) { - const struct bucket_table *tbl, *old_tbl; + const struct bucket_table *tbl; struct rhash_head *he; u32 hash; @@ -618,9 +617,8 @@ restart: /* Ensure we see any new tables. */ smp_rmb(); - old_tbl = tbl; - tbl = rht_dereference_rcu(ht->future_tbl, ht); - if (unlikely(tbl != old_tbl)) + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(tbl)) goto restart; rcu_read_unlock(); @@ -830,14 +828,13 @@ next: iter->skip = 0; } - iter->walker->tbl = rht_dereference_rcu(ht->future_tbl, ht); - if (iter->walker->tbl != tbl) { + iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (iter->walker->tbl) { iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); } - iter->walker->tbl = NULL; iter->p = NULL; out: @@ -865,8 +862,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) ht = iter->ht; mutex_lock(&ht->mutex); - if (rht_dereference(ht->tbl, ht) == tbl || - rht_dereference(ht->future_tbl, ht) == tbl) + if (tbl->rehash < tbl->size) list_add(&iter->walker->list, &tbl->walkers); else iter->walker->tbl = NULL; @@ -961,7 +957,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) atomic_set(&ht->nelems, 0); RCU_INIT_POINTER(ht->tbl, tbl); - RCU_INIT_POINTER(ht->future_tbl, tbl); INIT_WORK(&ht->run_work, rht_deferred_worker); -- cgit v1.2.3