From 5b9cce4c7eb0696558dfd4946074ae1fb9d8f05d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 26 Aug 2019 09:06:52 -0700 Subject: writeback: Generalize and expose wb_completion wb_completion is used to track writeback completions. We want to use it from memcg side for foreign inode flushes. This patch updates it to remember the target waitq instead of assuming bdi->wb_waitq and expose it outside of fs-writeback.c. Reviewed-by: Jan Kara Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/backing-dev.h') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 35b31d176f74..02650b1253a2 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -44,6 +44,8 @@ void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); void wb_wakeup_delayed(struct bdi_writeback *wb); +void wb_wait_for_completion(struct wb_completion *done); + extern spinlock_t bdi_lock; extern struct list_head bdi_list; -- cgit v1.2.3 From 34f8fe501f0624de115d087680c84000b5d9abc9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 26 Aug 2019 09:06:53 -0700 Subject: bdi: Add bdi->id There currently is no way to universally identify and lookup a bdi without holding a reference and pointer to it. This patch adds an non-recycling bdi->id and implements bdi_get_by_id() which looks up bdis by their ids. This will be used by memcg foreign inode flushing. I left bdi_list alone for simplicity and because while rb_tree does support rcu assignment it doesn't seem to guarantee lossless walk when walk is racing aginst tree rebalance operations. Reviewed-by: Jan Kara Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 2 ++ include/linux/backing-dev.h | 1 + mm/backing-dev.c | 65 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 2 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 8fb740178d5d..1075f2552cfc 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -185,6 +185,8 @@ struct bdi_writeback { }; struct backing_dev_info { + u64 id; + struct rb_node rb_node; /* keyed by ->id */ struct list_head bdi_list; unsigned long ra_pages; /* max readahead in PAGE_SIZE units */ unsigned long io_pages; /* max allowed IO size */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 02650b1253a2..84cdcfbc763f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -24,6 +24,7 @@ static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) return bdi; } +struct backing_dev_info *bdi_get_by_id(u64 id); void bdi_put(struct backing_dev_info *bdi); __printf(2, 3) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index e8e89158adec..612aa7c5ddbd 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include #include #include @@ -22,10 +23,12 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; /* - * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side - * locking. + * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU + * reader side locking. */ DEFINE_SPINLOCK(bdi_lock); +static u64 bdi_id_cursor; +static struct rb_root bdi_tree = RB_ROOT; LIST_HEAD(bdi_list); /* bdi_wq serves all asynchronous writeback tasks */ @@ -859,9 +862,58 @@ struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id) } EXPORT_SYMBOL(bdi_alloc_node); +static struct rb_node **bdi_lookup_rb_node(u64 id, struct rb_node **parentp) +{ + struct rb_node **p = &bdi_tree.rb_node; + struct rb_node *parent = NULL; + struct backing_dev_info *bdi; + + lockdep_assert_held(&bdi_lock); + + while (*p) { + parent = *p; + bdi = rb_entry(parent, struct backing_dev_info, rb_node); + + if (bdi->id > id) + p = &(*p)->rb_left; + else if (bdi->id < id) + p = &(*p)->rb_right; + else + break; + } + + if (parentp) + *parentp = parent; + return p; +} + +/** + * bdi_get_by_id - lookup and get bdi from its id + * @id: bdi id to lookup + * + * Find bdi matching @id and get it. Returns NULL if the matching bdi + * doesn't exist or is already unregistered. + */ +struct backing_dev_info *bdi_get_by_id(u64 id) +{ + struct backing_dev_info *bdi = NULL; + struct rb_node **p; + + spin_lock_bh(&bdi_lock); + p = bdi_lookup_rb_node(id, NULL); + if (*p) { + bdi = rb_entry(*p, struct backing_dev_info, rb_node); + bdi_get(bdi); + } + spin_unlock_bh(&bdi_lock); + + return bdi; +} + int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) { struct device *dev; + struct rb_node *parent, **p; if (bdi->dev) /* The driver needs to use separate queues per device */ return 0; @@ -877,7 +929,15 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) set_bit(WB_registered, &bdi->wb.state); spin_lock_bh(&bdi_lock); + + bdi->id = ++bdi_id_cursor; + + p = bdi_lookup_rb_node(bdi->id, &parent); + rb_link_node(&bdi->rb_node, parent, p); + rb_insert_color(&bdi->rb_node, &bdi_tree); + list_add_tail_rcu(&bdi->bdi_list, &bdi_list); + spin_unlock_bh(&bdi_lock); trace_writeback_bdi_register(bdi); @@ -918,6 +978,7 @@ EXPORT_SYMBOL(bdi_register_owner); static void bdi_remove_from_list(struct backing_dev_info *bdi) { spin_lock_bh(&bdi_lock); + rb_erase(&bdi->rb_node, &bdi_tree); list_del_rcu(&bdi->bdi_list); spin_unlock_bh(&bdi_lock); -- cgit v1.2.3 From ed288dc0d4aa29f65bd25b31b5cb866aa5664ff9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 26 Aug 2019 09:06:54 -0700 Subject: writeback: Separate out wb_get_lookup() from wb_get_create() Separate out wb_get_lookup() which doesn't try to create one if there isn't already one from wb_get_create(). This will be used by later patches. Reviewed-by: Jan Kara Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 ++ mm/backing-dev.c | 55 ++++++++++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 18 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 84cdcfbc763f..97967ce06de3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -230,6 +230,8 @@ static inline int bdi_sched_wait(void *word) struct bdi_writeback_congested * wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp); void wb_congested_put(struct bdi_writeback_congested *congested); +struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, + struct cgroup_subsys_state *memcg_css); struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 612aa7c5ddbd..d9daa3e422d0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -618,13 +618,12 @@ out_put: } /** - * wb_get_create - get wb for a given memcg, create if necessary + * wb_get_lookup - get wb for a given memcg * @bdi: target bdi * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref) - * @gfp: allocation mask to use * - * Try to get the wb for @memcg_css on @bdi. If it doesn't exist, try to - * create one. The returned wb has its refcount incremented. + * Try to get the wb for @memcg_css on @bdi. The returned wb has its + * refcount incremented. * * This function uses css_get() on @memcg_css and thus expects its refcnt * to be positive on invocation. IOW, rcu_read_lock() protection on @@ -641,6 +640,39 @@ out_put: * each lookup. On mismatch, the existing wb is discarded and a new one is * created. */ +struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, + struct cgroup_subsys_state *memcg_css) +{ + struct bdi_writeback *wb; + + if (!memcg_css->parent) + return &bdi->wb; + + rcu_read_lock(); + wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id); + if (wb) { + struct cgroup_subsys_state *blkcg_css; + + /* see whether the blkcg association has changed */ + blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); + if (unlikely(wb->blkcg_css != blkcg_css || !wb_tryget(wb))) + wb = NULL; + css_put(blkcg_css); + } + rcu_read_unlock(); + + return wb; +} + +/** + * wb_get_create - get wb for a given memcg, create if necessary + * @bdi: target bdi + * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref) + * @gfp: allocation mask to use + * + * Try to get the wb for @memcg_css on @bdi. If it doesn't exist, try to + * create one. See wb_get_lookup() for more details. + */ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp) @@ -653,20 +685,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, return &bdi->wb; do { - rcu_read_lock(); - wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id); - if (wb) { - struct cgroup_subsys_state *blkcg_css; - - /* see whether the blkcg association has changed */ - blkcg_css = cgroup_get_e_css(memcg_css->cgroup, - &io_cgrp_subsys); - if (unlikely(wb->blkcg_css != blkcg_css || - !wb_tryget(wb))) - wb = NULL; - css_put(blkcg_css); - } - rcu_read_unlock(); + wb = wb_get_lookup(bdi, memcg_css); } while (!wb && !cgwb_create(bdi, memcg_css, gfp)); return wb; -- cgit v1.2.3