From e1b849cfa6b61f1c866a908c9e8dd9b5aaab820b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 9 Apr 2025 17:12:59 +0200 Subject: writeback: Avoid contention on wb->list_lock when switching inodes There can be multiple inode switch works that are trying to switch inodes to / from the same wb. This can happen in particular if some cgroup exits which owns many (thousands) inodes and we need to switch them all. In this case several inode_switch_wbs_work_fn() instances will be just spinning on the same wb->list_lock while only one of them makes forward progress. This wastes CPU cycles and quickly leads to softlockup reports and unusable system. Instead of running several inode_switch_wbs_work_fn() instances in parallel switching to the same wb and contending on wb->list_lock, run just one work item per wb and manage a queue of isw items switching to this wb. Acked-by: Tejun Heo Signed-off-by: Jan Kara --- include/linux/backing-dev-defs.h | 4 ++++ include/linux/writeback.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 2ad261082bba..c5c9d89c73ed 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -152,6 +152,10 @@ struct bdi_writeback { struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ struct list_head b_attached; /* attached inodes, protected by list_lock */ struct list_head offline_node; /* anchored at offline_cgwbs */ + struct work_struct switch_work; /* work used to perform inode switching + * to this wb */ + struct llist_head switch_wbs_ctxs; /* queued contexts for + * writeback switching */ union { struct work_struct release_work; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a2848d731a46..15a4bc4ab819 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -265,6 +265,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } +void inode_switch_wbs_work_fn(struct work_struct *work); + #else /* CONFIG_CGROUP_WRITEBACK */ static inline void inode_attach_wb(struct inode *inode, struct folio *folio) -- cgit v1.2.3 From 0cee64c547e3c9cda646af3e075a64f445ee8148 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 12 Sep 2025 12:38:38 +0200 Subject: writeback: Add tracepoint to track pending inode switches Add trace_inode_switch_wbs_queue tracepoint to allow insight into how many inodes are queued to switch their bdi_writeback structure. Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fs-writeback.c | 2 ++ include/trace/events/writeback.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index af5f396449f1..52129267e3bd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -667,6 +667,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) isw->inodes[0] = inode; + trace_inode_switch_wbs_queue(inode->i_wb, new_wb, 1); wb_queue_isw(new_wb, isw); return; @@ -752,6 +753,7 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb) return restart; } + trace_inode_switch_wbs_queue(wb, new_wb, nr); wb_queue_isw(new_wb, isw); return restart; diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 1e23919c0da9..c08aff044e80 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -213,6 +213,35 @@ TRACE_EVENT(inode_foreign_history, ) ); +TRACE_EVENT(inode_switch_wbs_queue, + + TP_PROTO(struct bdi_writeback *old_wb, struct bdi_writeback *new_wb, + unsigned int count), + + TP_ARGS(old_wb, new_wb, count), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(ino_t, old_cgroup_ino) + __field(ino_t, new_cgroup_ino) + __field(unsigned int, count) + ), + + TP_fast_assign( + strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32); + __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); + __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); + __entry->count = count; + ), + + TP_printk("bdi %s: old_cgroup_ino=%lu new_cgroup_ino=%lu count=%u", + __entry->name, + (unsigned long)__entry->old_cgroup_ino, + (unsigned long)__entry->new_cgroup_ino, + __entry->count + ) +); + TRACE_EVENT(inode_switch_wbs, TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb, -- cgit v1.2.3