diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-09 19:57:30 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-09 19:57:30 -0800 |
| commit | d16738a4e79e55b2c3c9ff4fb7b74a4a24723515 (patch) | |
| tree | 694b05e5b5f00ad2e70f243f84ad921b79cd8dc9 /drivers | |
| parent | 0506158ac7363a70f0deb49f71d26ccb57e55990 (diff) | |
| parent | fa39ec4f89f2637ed1cdbcde3656825951787668 (diff) | |
Merge tag 'kthread-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks
Pull kthread updates from Frederic Weisbecker:
"The kthread code provides an infrastructure which manages the
preferred affinity of unbound kthreads (node or custom cpumask)
against housekeeping (CPU isolation) constraints and CPU hotplug
events.
One crucial missing piece is the handling of cpuset: when an isolated
partition is created, deleted, or its CPUs updated, all the unbound
kthreads in the top cpuset become indifferently affine to _all_ the
non-isolated CPUs, possibly breaking their preferred affinity along
the way.
Solve this with performing the kthreads affinity update from cpuset to
the kthreads consolidated relevant code instead so that preferred
affinities are honoured and applied against the updated cpuset
isolated partitions.
The dispatch of the new isolated cpumasks to timers, workqueues and
kthreads is performed by housekeeping, as per the nice Tejun's
suggestion.
As a welcome side effect, HK_TYPE_DOMAIN then integrates both the set
from boot defined domain isolation (through isolcpus=) and cpuset
isolated partitions. Housekeeping cpumasks are now modifiable with a
specific RCU based synchronization. A big step toward making
nohz_full= also mutable through cpuset in the future"
* tag 'kthread-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks: (33 commits)
doc: Add housekeeping documentation
kthread: Document kthread_affine_preferred()
kthread: Comment on the purpose and placement of kthread_affine_node() call
kthread: Honour kthreads preferred affinity after cpuset changes
sched/arm64: Move fallback task cpumask to HK_TYPE_DOMAIN
sched: Switch the fallback task allowed cpumask to HK_TYPE_DOMAIN
kthread: Rely on HK_TYPE_DOMAIN for preferred affinity management
kthread: Include kthreadd to the managed affinity list
kthread: Include unbound kthreads in the managed affinity list
kthread: Refine naming of affinity related fields
PCI: Remove superfluous HK_TYPE_WQ check
sched/isolation: Remove HK_TYPE_TICK test from cpu_is_isolated()
cpuset: Remove cpuset_cpu_is_isolated()
timers/migration: Remove superfluous cpuset isolation test
cpuset: Propagate cpuset isolation update to timers through housekeeping
cpuset: Propagate cpuset isolation update to workqueue through housekeeping
PCI: Flush PCI probe workqueue on cpuset isolated partition change
sched/isolation: Flush vmstat workqueues on cpuset isolated partition change
sched/isolation: Flush memcg workqueues on cpuset isolated partition change
cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
...
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/base/cpu.c | 2 | ||||
| -rw-r--r-- | drivers/pci/pci-driver.c | 71 |
2 files changed, 53 insertions, 20 deletions
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index c6c57b6f61c6..3e3fa031e605 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -291,7 +291,7 @@ static ssize_t print_cpus_isolated(struct device *dev, return -ENOMEM; cpumask_andnot(isolated, cpu_possible_mask, - housekeeping_cpumask(HK_TYPE_DOMAIN)); + housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT)); len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated)); free_cpumask_var(isolated); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 7c2d9d596258..a9590601835a 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -302,9 +302,8 @@ struct drv_dev_and_id { const struct pci_device_id *id; }; -static long local_pci_probe(void *_ddi) +static int local_pci_probe(struct drv_dev_and_id *ddi) { - struct drv_dev_and_id *ddi = _ddi; struct pci_dev *pci_dev = ddi->dev; struct pci_driver *pci_drv = ddi->drv; struct device *dev = &pci_dev->dev; @@ -338,6 +337,21 @@ static long local_pci_probe(void *_ddi) return 0; } +static struct workqueue_struct *pci_probe_wq; + +struct pci_probe_arg { + struct drv_dev_and_id *ddi; + struct work_struct work; + int ret; +}; + +static void local_pci_probe_callback(struct work_struct *work) +{ + struct pci_probe_arg *arg = container_of(work, struct pci_probe_arg, work); + + arg->ret = local_pci_probe(arg->ddi); +} + static bool pci_physfn_is_probed(struct pci_dev *dev) { #ifdef CONFIG_PCI_IOV @@ -362,40 +376,55 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, dev->is_probed = 1; cpu_hotplug_disable(); - /* * Prevent nesting work_on_cpu() for the case where a Virtual Function * device is probed from work_on_cpu() of the Physical device. */ if (node < 0 || node >= MAX_NUMNODES || !node_online(node) || pci_physfn_is_probed(dev)) { - cpu = nr_cpu_ids; + error = local_pci_probe(&ddi); } else { - cpumask_var_t wq_domain_mask; + struct pci_probe_arg arg = { .ddi = &ddi }; - if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) { - error = -ENOMEM; - goto out; + INIT_WORK_ONSTACK(&arg.work, local_pci_probe_callback); + /* + * The target election and the enqueue of the work must be within + * the same RCU read side section so that when the workqueue pool + * is flushed after a housekeeping cpumask update, further readers + * are guaranteed to queue the probing work to the appropriate + * targets. + */ + rcu_read_lock(); + cpu = cpumask_any_and(cpumask_of_node(node), + housekeeping_cpumask(HK_TYPE_DOMAIN)); + + if (cpu < nr_cpu_ids) { + struct workqueue_struct *wq = pci_probe_wq; + + if (WARN_ON_ONCE(!wq)) + wq = system_percpu_wq; + queue_work_on(cpu, wq, &arg.work); + rcu_read_unlock(); + flush_work(&arg.work); + error = arg.ret; + } else { + rcu_read_unlock(); + error = local_pci_probe(&ddi); } - cpumask_and(wq_domain_mask, - housekeeping_cpumask(HK_TYPE_WQ), - housekeeping_cpumask(HK_TYPE_DOMAIN)); - cpu = cpumask_any_and(cpumask_of_node(node), - wq_domain_mask); - free_cpumask_var(wq_domain_mask); + destroy_work_on_stack(&arg.work); } - if (cpu < nr_cpu_ids) - error = work_on_cpu(cpu, local_pci_probe, &ddi); - else - error = local_pci_probe(&ddi); -out: dev->is_probed = 0; cpu_hotplug_enable(); return error; } +void pci_probe_flush_workqueue(void) +{ + flush_workqueue(pci_probe_wq); +} + /** * __pci_device_probe - check if a driver wants to claim a specific PCI device * @drv: driver to call to check if it wants the PCI device @@ -1733,6 +1762,10 @@ static int __init pci_driver_init(void) { int ret; + pci_probe_wq = alloc_workqueue("sync_wq", WQ_PERCPU, 0); + if (!pci_probe_wq) + return -ENOMEM; + ret = bus_register(&pci_bus_type); if (ret) return ret; |
