From 8c9ae56dc73b5ae48a14000b96292bd4f2aeb710 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Sep 2023 15:44:17 +0800 Subject: sched/numa, mm: make numa migrate functions to take a folio The cpupid (or access time) is stored in the head page for THP, so it is safely to make should_numa_migrate_memory() and numa_hint_fault_latency() to take a folio. This is in preparation for large folio numa balancing. Link: https://lkml.kernel.org/r/20230921074417.24004-7-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Zi Yan Signed-off-by: Andrew Morton --- kernel/sched/fair.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cb225921bbca..42aefe7e6fdc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1722,12 +1722,12 @@ static bool pgdat_free_space_enough(struct pglist_data *pgdat) * The smaller the hint page fault latency, the higher the possibility * for the page to be hot. */ -static int numa_hint_fault_latency(struct page *page) +static int numa_hint_fault_latency(struct folio *folio) { int last_time, time; time = jiffies_to_msecs(jiffies); - last_time = xchg_page_access_time(page, time); + last_time = xchg_page_access_time(&folio->page, time); return (time - last_time) & PAGE_ACCESS_TIME_MASK; } @@ -1784,7 +1784,7 @@ static void numa_promotion_adjust_threshold(struct pglist_data *pgdat, } } -bool should_numa_migrate_memory(struct task_struct *p, struct page * page, +bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio, int src_nid, int dst_cpu) { struct numa_group *ng = deref_curr_numa_group(p); @@ -1814,16 +1814,16 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, numa_promotion_adjust_threshold(pgdat, rate_limit, def_th); th = pgdat->nbp_threshold ? : def_th; - latency = numa_hint_fault_latency(page); + latency = numa_hint_fault_latency(folio); if (latency >= th) return false; return !numa_promotion_rate_limit(pgdat, rate_limit, - thp_nr_pages(page)); + folio_nr_pages(folio)); } this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid); - last_cpupid = page_cpupid_xchg_last(page, this_cpupid); + last_cpupid = page_cpupid_xchg_last(&folio->page, this_cpupid); if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) && !node_is_toptier(src_nid) && !cpupid_valid(last_cpupid)) -- cgit v1.2.3 From 37acade0ce8938f00d6979bd02b8043b5b7089ae Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 10 Oct 2023 04:58:29 +0100 Subject: sched: remove wait bookmarks There are no users of wait bookmarks left, so simplify the wait code by removing them. Link: https://lkml.kernel.org/r/20231010035829.544242-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Ingo Molnar Cc: Benjamin Segall Cc: Bin Lai Cc: Daniel Bristot de Oliveira Cc: Dietmar Eggemann Cc: Ingo Molnar Cc: Juri Lelli Cc: Mel Gorman Cc: Peter Zijlstra Cc: Steven Rostedt (Google) Cc: Valentin Schneider Cc: Vincent Guittot Signed-off-by: Andrew Morton --- include/linux/wait.h | 9 +++----- kernel/sched/wait.c | 60 +++++++++------------------------------------------- 2 files changed, 13 insertions(+), 56 deletions(-) (limited to 'kernel/sched') diff --git a/include/linux/wait.h b/include/linux/wait.h index 5ec7739400f4..3473b663176f 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -19,10 +19,9 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int /* wait_queue_entry::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 #define WQ_FLAG_WOKEN 0x02 -#define WQ_FLAG_BOOKMARK 0x04 -#define WQ_FLAG_CUSTOM 0x08 -#define WQ_FLAG_DONE 0x10 -#define WQ_FLAG_PRIORITY 0x20 +#define WQ_FLAG_CUSTOM 0x04 +#define WQ_FLAG_DONE 0x08 +#define WQ_FLAG_PRIORITY 0x10 /* * A single wait-queue entry structure: @@ -212,8 +211,6 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); -void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, - unsigned int mode, void *key, wait_queue_entry_t *bookmark); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 802d98cf2de3..51e38f5f4701 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -57,13 +57,6 @@ void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry } EXPORT_SYMBOL(remove_wait_queue); -/* - * Scan threshold to break wait queue walk. - * This allows a waker to take a break from holding the - * wait queue lock during the wait queue walk. - */ -#define WAITQUEUE_WALK_BREAK_CNT 64 - /* * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve @@ -78,21 +71,13 @@ EXPORT_SYMBOL(remove_wait_queue); * zero in this (rare) case, and we handle it by continuing to scan the queue. */ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, - int nr_exclusive, int wake_flags, void *key, - wait_queue_entry_t *bookmark) + int nr_exclusive, int wake_flags, void *key) { wait_queue_entry_t *curr, *next; - int cnt = 0; lockdep_assert_held(&wq_head->lock); - if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) { - curr = list_next_entry(bookmark, entry); - - list_del(&bookmark->entry); - bookmark->flags = 0; - } else - curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry); + curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry); if (&curr->entry == &wq_head->head) return nr_exclusive; @@ -101,21 +86,11 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, unsigned flags = curr->flags; int ret; - if (flags & WQ_FLAG_BOOKMARK) - continue; - ret = curr->func(curr, mode, wake_flags, key); if (ret < 0) break; if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) break; - - if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) && - (&next->entry != &wq_head->head)) { - bookmark->flags = WQ_FLAG_BOOKMARK; - list_add_tail(&bookmark->entry, &next->entry); - break; - } } return nr_exclusive; @@ -125,20 +100,12 @@ static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int m int nr_exclusive, int wake_flags, void *key) { unsigned long flags; - wait_queue_entry_t bookmark; - int remaining = nr_exclusive; + int remaining; - bookmark.flags = 0; - bookmark.private = NULL; - bookmark.func = NULL; - INIT_LIST_HEAD(&bookmark.entry); - - do { - spin_lock_irqsave(&wq_head->lock, flags); - remaining = __wake_up_common(wq_head, mode, remaining, - wake_flags, key, &bookmark); - spin_unlock_irqrestore(&wq_head->lock, flags); - } while (bookmark.flags & WQ_FLAG_BOOKMARK); + spin_lock_irqsave(&wq_head->lock, flags); + remaining = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, + key); + spin_unlock_irqrestore(&wq_head->lock, flags); return nr_exclusive - remaining; } @@ -171,23 +138,16 @@ void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode */ void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr) { - __wake_up_common(wq_head, mode, nr, 0, NULL, NULL); + __wake_up_common(wq_head, mode, nr, 0, NULL); } EXPORT_SYMBOL_GPL(__wake_up_locked); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key) { - __wake_up_common(wq_head, mode, 1, 0, key, NULL); + __wake_up_common(wq_head, mode, 1, 0, key); } EXPORT_SYMBOL_GPL(__wake_up_locked_key); -void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, - unsigned int mode, void *key, wait_queue_entry_t *bookmark) -{ - __wake_up_common(wq_head, mode, 1, 0, key, bookmark); -} -EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark); - /** * __wake_up_sync_key - wake up threads blocked on a waitqueue. * @wq_head: the waitqueue @@ -233,7 +193,7 @@ EXPORT_SYMBOL_GPL(__wake_up_sync_key); void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key) { - __wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL); + __wake_up_common(wq_head, mode, 1, WF_SYNC, key); } EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key); -- cgit v1.2.3 From 0b201c3624ae9f58ebfff8484f304f3008fb01b8 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 18 Oct 2023 22:07:55 +0800 Subject: sched/fair: use folio_xchg_access_time() in numa_hint_fault_latency() Convert to use folio_xchg_access_time() in numa_hint_fault_latency(). Link: https://lkml.kernel.org/r/20231018140806.2783514-9-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: David Hildenbrand Cc: Huang Ying Cc: Ingo Molnar Cc: Juri Lelli Cc: Matthew Wilcox (Oracle) Cc: Peter Zijlstra Cc: Vincent Guittot Cc: Zi Yan Signed-off-by: Andrew Morton --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 42aefe7e6fdc..3ee9d3564c20 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1727,7 +1727,7 @@ static int numa_hint_fault_latency(struct folio *folio) int last_time, time; time = jiffies_to_msecs(jiffies); - last_time = xchg_page_access_time(&folio->page, time); + last_time = folio_xchg_access_time(folio, time); return (time - last_time) & PAGE_ACCESS_TIME_MASK; } -- cgit v1.2.3 From 1b143cc77f2074dd43b610d6bfffc822d20b6e16 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 18 Oct 2023 22:08:00 +0800 Subject: sched/fair: use folio_xchg_last_cpupid() in should_numa_migrate_memory() Convert to use folio_xchg_last_cpupid() in should_numa_migrate_memory(). Link: https://lkml.kernel.org/r/20231018140806.2783514-14-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: David Hildenbrand Cc: Huang Ying Cc: Ingo Molnar Cc: Juri Lelli Cc: Matthew Wilcox (Oracle) Cc: Peter Zijlstra Cc: Vincent Guittot Cc: Zi Yan Signed-off-by: Andrew Morton --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3ee9d3564c20..d1a765cdf6e4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1823,7 +1823,7 @@ bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio, } this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid); - last_cpupid = page_cpupid_xchg_last(&folio->page, this_cpupid); + last_cpupid = folio_xchg_last_cpupid(folio, this_cpupid); if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) && !node_is_toptier(src_nid) && !cpupid_valid(last_cpupid)) -- cgit v1.2.3