diff options
-rw-r--r-- | MAINTAINERS | 8 | ||||
-rw-r--r-- | arch/riscv/include/asm/pgtable.h | 17 | ||||
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/topology.h | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/topology.c | 13 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 35 | ||||
-rw-r--r-- | kernel/sched/fair.c | 7 | ||||
-rw-r--r-- | kernel/sched/sched.h | 37 |
9 files changed, 80 insertions, 51 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index cfffa6c3bdf1..35dac69ec0d5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1772,7 +1772,7 @@ F: drivers/staging/iio/*/ad* X: drivers/iio/*/adjd* ANALOGBITS PLL LIBRARIES -M: Paul Walmsley <paul.walmsley@sifive.com> +M: Paul Walmsley <pjw@kernel.org> M: Samuel Holland <samuel.holland@sifive.com> S: Supported F: drivers/clk/analogbits/* @@ -19296,7 +19296,7 @@ S: Maintained F: drivers/pci/controller/dwc/*layerscape* PCI DRIVER FOR FU740 -M: Paul Walmsley <paul.walmsley@sifive.com> +M: Paul Walmsley <pjw@kernel.org> M: Greentime Hu <greentime.hu@sifive.com> M: Samuel Holland <samuel.holland@sifive.com> L: linux-pci@vger.kernel.org @@ -21656,7 +21656,7 @@ F: Documentation/devicetree/bindings/timer/andestech,plmt0.yaml F: arch/riscv/boot/dts/andes/ RISC-V ARCHITECTURE -M: Paul Walmsley <paul.walmsley@sifive.com> +M: Paul Walmsley <pjw@kernel.org> M: Palmer Dabbelt <palmer@dabbelt.com> M: Albert Ou <aou@eecs.berkeley.edu> R: Alexandre Ghiti <alex@ghiti.fr> @@ -23108,7 +23108,7 @@ S: Maintained F: drivers/watchdog/simatic-ipc-wdt.c SIFIVE DRIVERS -M: Paul Walmsley <paul.walmsley@sifive.com> +M: Paul Walmsley <pjw@kernel.org> M: Samuel Holland <samuel.holland@sifive.com> L: linux-riscv@lists.infradead.org S: Supported diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 91697fbf1f90..815067742939 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -942,6 +942,23 @@ static inline int pudp_test_and_clear_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, (pte_t *)pudp); } +#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR +static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, pud_t *pudp) +{ +#ifdef CONFIG_SMP + pud_t pud = __pud(xchg(&pudp->pud, 0)); +#else + pud_t pud = *pudp; + + pud_clear(pudp); +#endif + + page_table_check_pud_clear(mm, pud); + + return pud; +} + static inline int pud_young(pud_t pud) { return pte_young(pud_pte(pud)); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 52c8910ba2ef..05880301212e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -26,7 +26,6 @@ config X86_64 depends on 64BIT # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE - select ARCH_HAS_PTDUMP select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_PER_VMA_LOCK @@ -99,6 +98,7 @@ config X86 select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PREEMPT_LAZY + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_HW_PTE_YOUNG select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2 diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 6c79ee7c0957..21041898157a 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -231,6 +231,16 @@ static inline bool topology_is_primary_thread(unsigned int cpu) } #define topology_is_primary_thread topology_is_primary_thread +int topology_get_primary_thread(unsigned int cpu); + +static inline bool topology_is_core_online(unsigned int cpu) +{ + int pcpu = topology_get_primary_thread(cpu); + + return pcpu >= 0 ? cpu_online(pcpu) : false; +} +#define topology_is_core_online topology_is_core_online + #else /* CONFIG_SMP */ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index e35ccdc84910..6073a16628f9 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -372,6 +372,19 @@ unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_uni return topo_unit_count(lvlid, at_level, apic_maps[which_units].map); } +#ifdef CONFIG_SMP +int topology_get_primary_thread(unsigned int cpu) +{ + u32 apic_id = cpuid_to_apicid[cpu]; + + /* + * Get the core domain level APIC id, which is the primary thread + * and return the CPU number assigned to it. + */ + return topo_lookup_cpuid(topo_apicid(apic_id, TOPO_CORE_DOMAIN)); +} +#endif + #ifdef CONFIG_ACPI_HOTPLUG_CPU /** * topology_hotplug_apic - Handle a physical hotplugged APIC after boot diff --git a/include/linux/sched.h b/include/linux/sched.h index f8188b833350..e4ce0a76831e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -706,7 +706,6 @@ struct sched_dl_entity { unsigned int dl_defer : 1; unsigned int dl_defer_armed : 1; unsigned int dl_defer_running : 1; - unsigned int dl_server_idle : 1; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -733,7 +732,6 @@ struct sched_dl_entity { * runnable task. */ struct rq *rq; - dl_server_has_tasks_f server_has_tasks; dl_server_pick_f server_pick_task; #ifdef CONFIG_RT_MUTEXES diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index f25301267e47..72c1f72463c7 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -875,7 +875,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) */ if (dl_se->dl_defer && !dl_se->dl_defer_running && dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) { - if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) { + if (!is_dl_boosted(dl_se)) { /* * Set dl_se->dl_defer_armed and dl_throttled variables to @@ -1152,8 +1152,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf) /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */ static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC; -static bool dl_server_stopped(struct sched_dl_entity *dl_se); - static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se) { struct rq *rq = rq_of_dl_se(dl_se); @@ -1171,12 +1169,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_ if (!dl_se->dl_runtime) return HRTIMER_NORESTART; - if (!dl_se->server_has_tasks(dl_se)) { - replenish_dl_entity(dl_se); - dl_server_stopped(dl_se); - return HRTIMER_NORESTART; - } - if (dl_se->dl_defer_armed) { /* * First check if the server could consume runtime in background. @@ -1579,10 +1571,8 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) { /* 0 runtime = fair server disabled */ - if (dl_se->dl_runtime) { - dl_se->dl_server_idle = 0; + if (dl_se->dl_runtime) update_curr_dl_se(dl_se->rq, dl_se, delta_exec); - } } void dl_server_start(struct sched_dl_entity *dl_se) @@ -1610,26 +1600,10 @@ void dl_server_stop(struct sched_dl_entity *dl_se) dl_se->dl_server_active = 0; } -static bool dl_server_stopped(struct sched_dl_entity *dl_se) -{ - if (!dl_se->dl_server_active) - return true; - - if (dl_se->dl_server_idle) { - dl_server_stop(dl_se); - return true; - } - - dl_se->dl_server_idle = 1; - return false; -} - void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, - dl_server_has_tasks_f has_tasks, dl_server_pick_f pick_task) { dl_se->rq = rq; - dl_se->server_has_tasks = has_tasks; dl_se->server_pick_task = pick_task; } @@ -2394,10 +2368,7 @@ again: if (dl_server(dl_se)) { p = dl_se->server_pick_task(dl_se); if (!p) { - if (!dl_server_stopped(dl_se)) { - dl_se->dl_yielded = 1; - update_curr_dl_se(rq, dl_se, 0); - } + dl_server_stop(dl_se); goto again; } rq->dl_server = dl_se; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b173a059315c..8ce56a8d507f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8859,11 +8859,6 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru return pick_next_task_fair(rq, prev, NULL); } -static bool fair_server_has_tasks(struct sched_dl_entity *dl_se) -{ - return !!dl_se->rq->cfs.nr_queued; -} - static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se) { return pick_task_fair(dl_se->rq); @@ -8875,7 +8870,7 @@ void fair_server_init(struct rq *rq) init_dl_entity(dl_se); - dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_task); + dl_server_init(dl_se, rq, fair_server_pick_task); } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index be9745d104f7..cf2109b67f9a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -365,25 +365,50 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6 * * dl_se::rq -- runqueue we belong to. * - * dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the - * server when it runs out of tasks to run. - * * dl_se::server_pick() -- nested pick_next_task(); we yield the period if this * returns NULL. * * dl_server_update() -- called from update_curr_common(), propagates runtime * to the server. * - * dl_server_start() - * dl_server_stop() -- start/stop the server when it has (no) tasks. + * dl_server_start() -- start the server when it has tasks; it will stop + * automatically when there are no more tasks, per + * dl_se::server_pick() returning NULL. + * + * dl_server_stop() -- (force) stop the server; use when updating + * parameters. * * dl_server_init() -- initializes the server. + * + * When started the dl_server will (per dl_defer) schedule a timer for its + * zero-laxity point -- that is, unlike regular EDF tasks which run ASAP, a + * server will run at the very end of its period. + * + * This is done such that any runtime from the target class can be accounted + * against the server -- through dl_server_update() above -- such that when it + * becomes time to run, it might already be out of runtime and get deferred + * until the next period. In this case dl_server_timer() will alternate + * between defer and replenish but never actually enqueue the server. + * + * Only when the target class does not manage to exhaust the server's runtime + * (there's actualy starvation in the given period), will the dl_server get on + * the runqueue. Once queued it will pick tasks from the target class and run + * them until either its runtime is exhaused, at which point its back to + * dl_server_timer, or until there are no more tasks to run, at which point + * the dl_server stops itself. + * + * By stopping at this point the dl_server retains bandwidth, which, if a new + * task wakes up imminently (starting the server again), can be used -- + * subject to CBS wakeup rules -- without having to wait for the next period. + * + * Additionally, because of the dl_defer behaviour the start/stop behaviour is + * naturally thottled to once per period, avoiding high context switch + * workloads from spamming the hrtimer program/cancel paths. */ extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec); extern void dl_server_start(struct sched_dl_entity *dl_se); extern void dl_server_stop(struct sched_dl_entity *dl_se); extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, - dl_server_has_tasks_f has_tasks, dl_server_pick_f pick_task); extern void sched_init_dl_servers(void); |