diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-09 19:57:30 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-09 19:57:30 -0800 |
| commit | d16738a4e79e55b2c3c9ff4fb7b74a4a24723515 (patch) | |
| tree | 694b05e5b5f00ad2e70f243f84ad921b79cd8dc9 /include/linux | |
| parent | 0506158ac7363a70f0deb49f71d26ccb57e55990 (diff) | |
| parent | fa39ec4f89f2637ed1cdbcde3656825951787668 (diff) | |
Merge tag 'kthread-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks
Pull kthread updates from Frederic Weisbecker:
"The kthread code provides an infrastructure which manages the
preferred affinity of unbound kthreads (node or custom cpumask)
against housekeeping (CPU isolation) constraints and CPU hotplug
events.
One crucial missing piece is the handling of cpuset: when an isolated
partition is created, deleted, or its CPUs updated, all the unbound
kthreads in the top cpuset become indifferently affine to _all_ the
non-isolated CPUs, possibly breaking their preferred affinity along
the way.
Solve this with performing the kthreads affinity update from cpuset to
the kthreads consolidated relevant code instead so that preferred
affinities are honoured and applied against the updated cpuset
isolated partitions.
The dispatch of the new isolated cpumasks to timers, workqueues and
kthreads is performed by housekeeping, as per the nice Tejun's
suggestion.
As a welcome side effect, HK_TYPE_DOMAIN then integrates both the set
from boot defined domain isolation (through isolcpus=) and cpuset
isolated partitions. Housekeeping cpumasks are now modifiable with a
specific RCU based synchronization. A big step toward making
nohz_full= also mutable through cpuset in the future"
* tag 'kthread-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks: (33 commits)
doc: Add housekeeping documentation
kthread: Document kthread_affine_preferred()
kthread: Comment on the purpose and placement of kthread_affine_node() call
kthread: Honour kthreads preferred affinity after cpuset changes
sched/arm64: Move fallback task cpumask to HK_TYPE_DOMAIN
sched: Switch the fallback task allowed cpumask to HK_TYPE_DOMAIN
kthread: Rely on HK_TYPE_DOMAIN for preferred affinity management
kthread: Include kthreadd to the managed affinity list
kthread: Include unbound kthreads in the managed affinity list
kthread: Refine naming of affinity related fields
PCI: Remove superfluous HK_TYPE_WQ check
sched/isolation: Remove HK_TYPE_TICK test from cpu_is_isolated()
cpuset: Remove cpuset_cpu_is_isolated()
timers/migration: Remove superfluous cpuset isolation test
cpuset: Propagate cpuset isolation update to timers through housekeeping
cpuset: Propagate cpuset isolation update to workqueue through housekeeping
PCI: Flush PCI probe workqueue on cpuset isolated partition change
sched/isolation: Flush vmstat workqueues on cpuset isolated partition change
sched/isolation: Flush memcg workqueues on cpuset isolated partition change
cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
...
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/cpuhplock.h | 1 | ||||
| -rw-r--r-- | include/linux/cpuset.h | 8 | ||||
| -rw-r--r-- | include/linux/kthread.h | 1 | ||||
| -rw-r--r-- | include/linux/memcontrol.h | 4 | ||||
| -rw-r--r-- | include/linux/mmu_context.h | 2 | ||||
| -rw-r--r-- | include/linux/pci.h | 3 | ||||
| -rw-r--r-- | include/linux/percpu-rwsem.h | 1 | ||||
| -rw-r--r-- | include/linux/sched/isolation.h | 16 | ||||
| -rw-r--r-- | include/linux/vmstat.h | 2 | ||||
| -rw-r--r-- | include/linux/workqueue.h | 2 |
10 files changed, 28 insertions, 12 deletions
diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h index f7aa20f62b87..286b3ab92e15 100644 --- a/include/linux/cpuhplock.h +++ b/include/linux/cpuhplock.h @@ -13,6 +13,7 @@ struct device; extern int lockdep_is_cpus_held(void); +extern int lockdep_is_cpus_write_held(void); #ifdef CONFIG_HOTPLUG_CPU void cpus_write_lock(void); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index a98d3330385c..a4aa2f1767d0 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -18,6 +18,8 @@ #include <linux/mmu_context.h> #include <linux/jump_label.h> +extern bool lockdep_is_cpuset_held(void); + #ifdef CONFIG_CPUSETS /* @@ -77,7 +79,6 @@ extern void cpuset_unlock(void); extern void cpuset_cpus_allowed_locked(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern bool cpuset_cpus_allowed_fallback(struct task_struct *p); -extern bool cpuset_cpu_is_isolated(int cpu); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -213,11 +214,6 @@ static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p) return false; } -static inline bool cpuset_cpu_is_isolated(int cpu) -{ - return false; -} - static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { return node_possible_map; diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8d27403888ce..c92c1149ee6e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -100,6 +100,7 @@ void kthread_unpark(struct task_struct *k); void kthread_parkme(void); void kthread_exit(long result) __noreturn; void kthread_complete_and_exit(struct completion *, long) __noreturn; +int kthreads_update_housekeeping(void); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0651865a4564..5b004b95648b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1037,6 +1037,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm) return id; } +void mem_cgroup_flush_workqueue(void); + extern int mem_cgroup_init(void); #else /* CONFIG_MEMCG */ @@ -1436,6 +1438,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm) return 0; } +static inline void mem_cgroup_flush_workqueue(void) { } + static inline int mem_cgroup_init(void) { return 0; } #endif /* CONFIG_MEMCG */ diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index ac01dc4eb2ce..ed3dd0f3fe19 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -24,7 +24,7 @@ static inline void leave_mm(void) { } #ifndef task_cpu_possible_mask # define task_cpu_possible_mask(p) cpu_possible_mask # define task_cpu_possible(cpu, p) true -# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_TICK) +# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_DOMAIN) #else # define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p)) #endif diff --git a/include/linux/pci.h b/include/linux/pci.h index b5cc0c2b9906..9357e9b00e1c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1206,6 +1206,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata, struct list_head *resources); int pci_host_probe(struct pci_host_bridge *bridge); +void pci_probe_flush_workqueue(void); int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax); int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax); void pci_bus_release_busn_res(struct pci_bus *b); @@ -2079,6 +2080,8 @@ static inline int pci_has_flag(int flag) { return 0; } _PCI_NOP_ALL(read, *) _PCI_NOP_ALL(write,) +static inline void pci_probe_flush_workqueue(void) { } + static inline struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 288f5235649a..c8cb010d655e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -161,6 +161,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); __percpu_init_rwsem(sem, #sem, &rwsem_key); \ }) +#define percpu_rwsem_is_write_held(sem) lockdep_is_held_type(sem, 0) #define percpu_rwsem_is_held(sem) lockdep_is_held(sem) #define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem) diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index d8501f4709b5..dc3975ff1b2e 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -2,13 +2,21 @@ #define _LINUX_SCHED_ISOLATION_H #include <linux/cpumask.h> -#include <linux/cpuset.h> #include <linux/init.h> #include <linux/tick.h> enum hk_type { + /* Inverse of boot-time isolcpus= argument */ + HK_TYPE_DOMAIN_BOOT, + /* + * Same as HK_TYPE_DOMAIN_BOOT but also includes the + * inverse of cpuset isolated partitions. As such it + * is always a subset of HK_TYPE_DOMAIN_BOOT. + */ HK_TYPE_DOMAIN, + /* Inverse of boot-time isolcpus=managed_irq argument */ HK_TYPE_MANAGED_IRQ, + /* Inverse of boot-time nohz_full= or isolcpus=nohz arguments */ HK_TYPE_KERNEL_NOISE, HK_TYPE_MAX, @@ -31,6 +39,7 @@ extern const struct cpumask *housekeeping_cpumask(enum hk_type type); extern bool housekeeping_enabled(enum hk_type type); extern void housekeeping_affine(struct task_struct *t, enum hk_type type); extern bool housekeeping_test_cpu(int cpu, enum hk_type type); +extern int housekeeping_update(struct cpumask *isol_mask); extern void __init housekeeping_init(void); #else @@ -58,6 +67,7 @@ static inline bool housekeeping_test_cpu(int cpu, enum hk_type type) return true; } +static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; } static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ @@ -72,9 +82,7 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type) static inline bool cpu_is_isolated(int cpu) { - return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN) || - !housekeeping_test_cpu(cpu, HK_TYPE_TICK) || - cpuset_cpu_is_isolated(cpu); + return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN); } #endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 3398a345bda8..1909b945b3ea 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -303,6 +303,7 @@ int calculate_pressure_threshold(struct zone *zone); int calculate_normal_threshold(struct zone *zone); void set_pgdat_percpu_threshold(pg_data_t *pgdat, int (*calculate_pressure)(struct zone *)); +void vmstat_flush_workqueue(void); #else /* CONFIG_SMP */ /* @@ -403,6 +404,7 @@ static inline void __dec_node_page_state(struct page *page, static inline void refresh_zone_stat_thresholds(void) { } static inline void cpu_vm_stats_fold(int cpu) { } static inline void quiet_vmstat(void) { } +static inline void vmstat_flush_workqueue(void) { } static inline void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) { } diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index dabc351cc127..a4749f56398f 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -588,7 +588,7 @@ struct workqueue_attrs *alloc_workqueue_attrs_noprof(void); void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); -extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask); +extern int workqueue_unbound_housekeeping_update(const struct cpumask *hk); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); |
