diff options
Diffstat (limited to 'fs/bcachefs/rcu_pending.c')
-rw-r--r-- | fs/bcachefs/rcu_pending.c | 666 |
1 files changed, 0 insertions, 666 deletions
diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c deleted file mode 100644 index b1438be9d690..000000000000 --- a/fs/bcachefs/rcu_pending.c +++ /dev/null @@ -1,666 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define pr_fmt(fmt) "%s() " fmt "\n", __func__ - -#include <linux/generic-radix-tree.h> -#include <linux/mm.h> -#include <linux/percpu.h> -#include <linux/slab.h> -#include <linux/srcu.h> -#include <linux/vmalloc.h> - -#include "rcu_pending.h" -#include "darray.h" -#include "util.h" - -#define static_array_for_each(_a, _i) \ - for (typeof(&(_a)[0]) _i = _a; \ - _i < (_a) + ARRAY_SIZE(_a); \ - _i++) - -enum rcu_pending_special { - RCU_PENDING_KVFREE = 1, - RCU_PENDING_CALL_RCU = 2, -}; - -#define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE) -#define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU) - -#ifdef __KERNEL__ -typedef unsigned long rcu_gp_poll_state_t; - -static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) -{ - return l == r; -} -#else -typedef struct urcu_gp_poll_state rcu_gp_poll_state_t; - -static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) -{ - return l.grace_period_id == r.grace_period_id; -} -#endif - -static inline rcu_gp_poll_state_t __get_state_synchronize_rcu(struct srcu_struct *ssp) -{ - return ssp - ? get_state_synchronize_srcu(ssp) - : get_state_synchronize_rcu(); -} - -static inline rcu_gp_poll_state_t __start_poll_synchronize_rcu(struct srcu_struct *ssp) -{ - return ssp - ? start_poll_synchronize_srcu(ssp) - : start_poll_synchronize_rcu(); -} - -static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, rcu_gp_poll_state_t cookie) -{ - return ssp - ? poll_state_synchronize_srcu(ssp, cookie) - : poll_state_synchronize_rcu(cookie); -} - -static inline void __rcu_barrier(struct srcu_struct *ssp) -{ - return ssp - ? srcu_barrier(ssp) - : rcu_barrier(); -} - -static inline void __call_rcu(struct srcu_struct *ssp, struct rcu_head *rhp, - rcu_callback_t func) -{ - if (ssp) - call_srcu(ssp, rhp, func); - else - call_rcu(rhp, func); -} - -struct rcu_pending_seq { - /* - * We're using a radix tree like a vector - we're just pushing elements - * onto the end; we're using a radix tree instead of an actual vector to - * avoid reallocation overhead - */ - GENRADIX(struct rcu_head *) objs; - size_t nr; - struct rcu_head **cursor; - rcu_gp_poll_state_t seq; -}; - -struct rcu_pending_list { - struct rcu_head *head; - struct rcu_head *tail; - rcu_gp_poll_state_t seq; -}; - -struct rcu_pending_pcpu { - struct rcu_pending *parent; - spinlock_t lock; - int cpu; - - /* - * We can't bound the number of unprocessed gp sequence numbers, and we - * can't efficiently merge radix trees for expired grace periods, so we - * need darray/vector: - */ - DARRAY_PREALLOCATED(struct rcu_pending_seq, 4) objs; - - /* Third entry is for expired objects: */ - struct rcu_pending_list lists[NUM_ACTIVE_RCU_POLL_OLDSTATE + 1]; - - struct rcu_head cb; - bool cb_armed; - struct work_struct work; -}; - -static bool __rcu_pending_has_pending(struct rcu_pending_pcpu *p) -{ - if (p->objs.nr) - return true; - - static_array_for_each(p->lists, i) - if (i->head) - return true; - - return false; -} - -static void rcu_pending_list_merge(struct rcu_pending_list *l1, - struct rcu_pending_list *l2) -{ -#ifdef __KERNEL__ - if (!l1->head) - l1->head = l2->head; - else - l1->tail->next = l2->head; -#else - if (!l1->head) - l1->head = l2->head; - else - l1->tail->next.next = (void *) l2->head; -#endif - - l1->tail = l2->tail; - l2->head = l2->tail = NULL; -} - -static void rcu_pending_list_add(struct rcu_pending_list *l, - struct rcu_head *n) -{ -#ifdef __KERNEL__ - if (!l->head) - l->head = n; - else - l->tail->next = n; - l->tail = n; - n->next = NULL; -#else - if (!l->head) - l->head = n; - else - l->tail->next.next = (void *) n; - l->tail = n; - n->next.next = NULL; -#endif -} - -static void merge_expired_lists(struct rcu_pending_pcpu *p) -{ - struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE]; - - for (struct rcu_pending_list *i = p->lists; i < expired; i++) - if (i->head && __poll_state_synchronize_rcu(p->parent->srcu, i->seq)) - rcu_pending_list_merge(expired, i); -} - -#ifndef __KERNEL__ -static inline void kfree_bulk(size_t nr, void ** p) -{ - while (nr--) - kfree(*p); -} -#endif - -static noinline void __process_finished_items(struct rcu_pending *pending, - struct rcu_pending_pcpu *p, - unsigned long flags) -{ - struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE]; - struct rcu_pending_seq objs = {}; - struct rcu_head *list = NULL; - - if (p->objs.nr && - __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) { - objs = p->objs.data[0]; - darray_remove_item(&p->objs, p->objs.data); - } - - merge_expired_lists(p); - - list = expired->head; - expired->head = expired->tail = NULL; - - spin_unlock_irqrestore(&p->lock, flags); - - switch ((ulong) pending->process) { - case RCU_PENDING_KVFREE: - for (size_t i = 0; i < objs.nr; ) { - size_t nr_this_node = min(GENRADIX_NODE_SIZE / sizeof(void *), objs.nr - i); - - kfree_bulk(nr_this_node, (void **) genradix_ptr(&objs.objs, i)); - i += nr_this_node; - } - genradix_free(&objs.objs); - - while (list) { - struct rcu_head *obj = list; -#ifdef __KERNEL__ - list = obj->next; -#else - list = (void *) obj->next.next; -#endif - - /* - * low bit of pointer indicates whether rcu_head needs - * to be freed - kvfree_rcu_mightsleep() - */ - BUILD_BUG_ON(ARCH_SLAB_MINALIGN == 0); - - void *ptr = (void *)(((unsigned long) obj->func) & ~1UL); - bool free_head = ((unsigned long) obj->func) & 1UL; - - kvfree(ptr); - if (free_head) - kfree(obj); - } - - break; - - case RCU_PENDING_CALL_RCU: - for (size_t i = 0; i < objs.nr; i++) { - struct rcu_head *obj = *genradix_ptr(&objs.objs, i); - obj->func(obj); - } - genradix_free(&objs.objs); - - while (list) { - struct rcu_head *obj = list; -#ifdef __KERNEL__ - list = obj->next; -#else - list = (void *) obj->next.next; -#endif - obj->func(obj); - } - break; - - default: - for (size_t i = 0; i < objs.nr; i++) - pending->process(pending, *genradix_ptr(&objs.objs, i)); - genradix_free(&objs.objs); - - while (list) { - struct rcu_head *obj = list; -#ifdef __KERNEL__ - list = obj->next; -#else - list = (void *) obj->next.next; -#endif - pending->process(pending, obj); - } - break; - } -} - -static bool process_finished_items(struct rcu_pending *pending, - struct rcu_pending_pcpu *p, - unsigned long flags) -{ - /* - * XXX: we should grab the gp seq once and avoid multiple function - * calls, this is called from __rcu_pending_enqueue() fastpath in - * may_sleep==true mode - */ - if ((p->objs.nr && __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) || - (p->lists[0].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[0].seq)) || - (p->lists[1].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[1].seq)) || - p->lists[2].head) { - __process_finished_items(pending, p, flags); - return true; - } - - return false; -} - -static void rcu_pending_work(struct work_struct *work) -{ - struct rcu_pending_pcpu *p = - container_of(work, struct rcu_pending_pcpu, work); - struct rcu_pending *pending = p->parent; - unsigned long flags; - - do { - spin_lock_irqsave(&p->lock, flags); - } while (process_finished_items(pending, p, flags)); - - spin_unlock_irqrestore(&p->lock, flags); -} - -static void rcu_pending_rcu_cb(struct rcu_head *rcu) -{ - struct rcu_pending_pcpu *p = container_of(rcu, struct rcu_pending_pcpu, cb); - - schedule_work_on(p->cpu, &p->work); - - unsigned long flags; - spin_lock_irqsave(&p->lock, flags); - if (__rcu_pending_has_pending(p)) { - spin_unlock_irqrestore(&p->lock, flags); - __call_rcu(p->parent->srcu, &p->cb, rcu_pending_rcu_cb); - } else { - p->cb_armed = false; - spin_unlock_irqrestore(&p->lock, flags); - } -} - -static __always_inline struct rcu_pending_seq * -get_object_radix(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq) -{ - darray_for_each_reverse(p->objs, objs) - if (rcu_gp_poll_cookie_eq(objs->seq, seq)) - return objs; - - if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC)) - return NULL; - - return &darray_last(p->objs); -} - -static noinline bool -rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq, - struct rcu_head *head, void *ptr, - unsigned long *flags) -{ - if (ptr) { - if (!head) { - /* - * kvfree_rcu_mightsleep(): we weren't passed an - * rcu_head, but we need one: use the low bit of the - * ponter to free to flag that the head needs to be - * freed as well: - */ - ptr = (void *)(((unsigned long) ptr)|1UL); - head = kmalloc(sizeof(*head), __GFP_NOWARN); - if (!head) { - spin_unlock_irqrestore(&p->lock, *flags); - head = kmalloc(sizeof(*head), GFP_KERNEL|__GFP_NOFAIL); - /* - * dropped lock, did GFP_KERNEL allocation, - * check for gp expiration - */ - if (unlikely(__poll_state_synchronize_rcu(p->parent->srcu, seq))) { - kvfree(--ptr); - kfree(head); - spin_lock_irqsave(&p->lock, *flags); - return false; - } - } - } - - head->func = ptr; - } -again: - for (struct rcu_pending_list *i = p->lists; - i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) { - if (rcu_gp_poll_cookie_eq(i->seq, seq)) { - rcu_pending_list_add(i, head); - return false; - } - } - - for (struct rcu_pending_list *i = p->lists; - i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) { - if (!i->head) { - i->seq = seq; - rcu_pending_list_add(i, head); - return true; - } - } - - merge_expired_lists(p); - goto again; -} - -/* - * __rcu_pending_enqueue: enqueue a pending RCU item, to be processed (via - * pending->pracess) once grace period elapses. - * - * Attempt to enqueue items onto a radix tree; if memory allocation fails, fall - * back to a linked list. - * - * - If @ptr is NULL, we're enqueuing an item for a generic @pending with a - * process callback - * - * - If @ptr and @head are both not NULL, we're kvfree_rcu() - * - * - If @ptr is not NULL and @head is, we're kvfree_rcu_mightsleep() - * - * - If @may_sleep is true, will do GFP_KERNEL memory allocations and process - * expired items. - */ -static __always_inline void -__rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, - void *ptr, bool may_sleep) -{ - - struct rcu_pending_pcpu *p; - struct rcu_pending_seq *objs; - struct genradix_node *new_node = NULL; - unsigned long flags; - bool start_gp = false; - - BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); - - /* We could technically be scheduled before taking the lock and end up - * using a different cpu's rcu_pending_pcpu: that's ok, it needs a lock - * anyways - * - * And we have to do it this way to avoid breaking PREEMPT_RT, which - * redefines how spinlocks work: - */ - p = raw_cpu_ptr(pending->p); - spin_lock_irqsave(&p->lock, flags); - rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); -restart: - if (may_sleep && - unlikely(process_finished_items(pending, p, flags))) - goto check_expired; - - /* - * In kvfree_rcu() mode, the radix tree is only for slab pointers so - * that we can do kfree_bulk() - vmalloc pointers always use the linked - * list: - */ - if (ptr && unlikely(is_vmalloc_addr(ptr))) - goto list_add; - - objs = get_object_radix(p, seq); - if (unlikely(!objs)) - goto list_add; - - if (unlikely(!objs->cursor)) { - /* - * New radix tree nodes must be added under @p->lock because the - * tree root is in a darray that can be resized (typically, - * genradix supports concurrent unlocked allocation of new - * nodes) - hence preallocation and the retry loop: - */ - objs->cursor = genradix_ptr_alloc_preallocated_inlined(&objs->objs, - objs->nr, &new_node, GFP_ATOMIC|__GFP_NOWARN); - if (unlikely(!objs->cursor)) { - if (may_sleep) { - spin_unlock_irqrestore(&p->lock, flags); - - gfp_t gfp = GFP_KERNEL; - if (!head) - gfp |= __GFP_NOFAIL; - - new_node = genradix_alloc_node(gfp); - if (!new_node) - may_sleep = false; - goto check_expired; - } -list_add: - start_gp = rcu_pending_enqueue_list(p, seq, head, ptr, &flags); - goto start_gp; - } - } - - *objs->cursor++ = ptr ?: head; - /* zero cursor if we hit the end of a radix tree node: */ - if (!(((ulong) objs->cursor) & (GENRADIX_NODE_SIZE - 1))) - objs->cursor = NULL; - start_gp = !objs->nr; - objs->nr++; -start_gp: - if (unlikely(start_gp)) { - /* - * We only have one callback (ideally, we would have one for - * every outstanding graceperiod) - so if our callback is - * already in flight, we may still have to start a grace period - * (since we used get_state() above, not start_poll()) - */ - if (!p->cb_armed) { - p->cb_armed = true; - __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb); - } else { - __start_poll_synchronize_rcu(pending->srcu); - } - } - spin_unlock_irqrestore(&p->lock, flags); -free_node: - if (new_node) - genradix_free_node(new_node); - return; -check_expired: - if (unlikely(__poll_state_synchronize_rcu(pending->srcu, seq))) { - switch ((ulong) pending->process) { - case RCU_PENDING_KVFREE: - kvfree(ptr); - break; - case RCU_PENDING_CALL_RCU: - head->func(head); - break; - default: - pending->process(pending, head); - break; - } - goto free_node; - } - - p = raw_cpu_ptr(pending->p); - spin_lock_irqsave(&p->lock, flags); - goto restart; -} - -void rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *obj) -{ - __rcu_pending_enqueue(pending, obj, NULL, true); -} - -static struct rcu_head *rcu_pending_pcpu_dequeue(struct rcu_pending_pcpu *p) -{ - struct rcu_head *ret = NULL; - - spin_lock_irq(&p->lock); - darray_for_each(p->objs, objs) - if (objs->nr) { - ret = *genradix_ptr(&objs->objs, --objs->nr); - objs->cursor = NULL; - if (!objs->nr) - genradix_free(&objs->objs); - goto out; - } - - static_array_for_each(p->lists, i) - if (i->head) { - ret = i->head; -#ifdef __KERNEL__ - i->head = ret->next; -#else - i->head = (void *) ret->next.next; -#endif - if (!i->head) - i->tail = NULL; - goto out; - } -out: - spin_unlock_irq(&p->lock); - - return ret; -} - -struct rcu_head *rcu_pending_dequeue(struct rcu_pending *pending) -{ - return rcu_pending_pcpu_dequeue(raw_cpu_ptr(pending->p)); -} - -struct rcu_head *rcu_pending_dequeue_from_all(struct rcu_pending *pending) -{ - struct rcu_head *ret = rcu_pending_dequeue(pending); - - if (ret) - return ret; - - int cpu; - for_each_possible_cpu(cpu) { - ret = rcu_pending_pcpu_dequeue(per_cpu_ptr(pending->p, cpu)); - if (ret) - break; - } - return ret; -} - -static bool rcu_pending_has_pending_or_armed(struct rcu_pending *pending) -{ - int cpu; - for_each_possible_cpu(cpu) { - struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); - spin_lock_irq(&p->lock); - if (__rcu_pending_has_pending(p) || p->cb_armed) { - spin_unlock_irq(&p->lock); - return true; - } - spin_unlock_irq(&p->lock); - } - - return false; -} - -void rcu_pending_exit(struct rcu_pending *pending) -{ - int cpu; - - if (!pending->p) - return; - - while (rcu_pending_has_pending_or_armed(pending)) { - __rcu_barrier(pending->srcu); - - for_each_possible_cpu(cpu) { - struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); - flush_work(&p->work); - } - } - - for_each_possible_cpu(cpu) { - struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); - flush_work(&p->work); - } - - for_each_possible_cpu(cpu) { - struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); - - static_array_for_each(p->lists, i) - WARN_ON(i->head); - WARN_ON(p->objs.nr); - darray_exit(&p->objs); - } - free_percpu(pending->p); -} - -/** - * rcu_pending_init: - initialize a rcu_pending - * - * @pending: Object to init - * @srcu: May optionally be used with an srcu_struct; if NULL, uses normal - * RCU flavor - * @process: Callback function invoked on objects once their RCU barriers - * have completed; if NULL, kvfree() is used. - */ -int rcu_pending_init(struct rcu_pending *pending, - struct srcu_struct *srcu, - rcu_pending_process_fn process) -{ - pending->p = alloc_percpu(struct rcu_pending_pcpu); - if (!pending->p) - return -ENOMEM; - - int cpu; - for_each_possible_cpu(cpu) { - struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu); - p->parent = pending; - p->cpu = cpu; - spin_lock_init(&p->lock); - darray_init(&p->objs); - INIT_WORK(&p->work, rcu_pending_work); - } - - pending->srcu = srcu; - pending->process = process; - - return 0; -} |