summaryrefslogtreecommitdiff
path: root/arch/um/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um/kernel')
-rw-r--r--arch/um/kernel/Makefile1
-rw-r--r--arch/um/kernel/asm-offsets.c44
-rw-r--r--arch/um/kernel/irq.c27
-rw-r--r--arch/um/kernel/kmsg_dump.c2
-rw-r--r--arch/um/kernel/ksyms.c2
-rw-r--r--arch/um/kernel/mem.c111
-rw-r--r--arch/um/kernel/physmem.c71
-rw-r--r--arch/um/kernel/process.c18
-rw-r--r--arch/um/kernel/skas/mmu.c33
-rw-r--r--arch/um/kernel/skas/process.c19
-rw-r--r--arch/um/kernel/smp.c242
-rw-r--r--arch/um/kernel/time.c58
-rw-r--r--arch/um/kernel/tlb.c5
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/um/kernel/um_arch.c49
15 files changed, 445 insertions, 239 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index b8f4e9281599..be60bc451b3f 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_GPROF) += gprof_syms.o
obj-$(CONFIG_OF) += dtb.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_SMP) += smp.o
USER_OBJS := config.o
diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
index a69873aa697f..d38447e39d5e 100644
--- a/arch/um/kernel/asm-offsets.c
+++ b/arch/um/kernel/asm-offsets.c
@@ -1,3 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#define COMPILE_OFFSETS
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/crypto.h>
+#include <linux/kbuild.h>
+#include <linux/audit.h>
+#include <linux/fs.h>
+#include <asm/mman.h>
+#include <asm/seccomp.h>
-#include <sysdep/kernel-offsets.h>
+/* workaround for a warning with -Wmissing-prototypes */
+void foo(void);
+
+void foo(void)
+{
+ DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
+
+ DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE);
+ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK);
+ DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT);
+
+ DEFINE(UM_GFP_KERNEL, GFP_KERNEL);
+ DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC);
+
+ DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
+
+ DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
+ DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
+
+ DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES);
+
+ DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE);
+
+ DEFINE(HOSTFS_ATTR_MODE, ATTR_MODE);
+ DEFINE(HOSTFS_ATTR_UID, ATTR_UID);
+ DEFINE(HOSTFS_ATTR_GID, ATTR_GID);
+ DEFINE(HOSTFS_ATTR_SIZE, ATTR_SIZE);
+ DEFINE(HOSTFS_ATTR_ATIME, ATTR_ATIME);
+ DEFINE(HOSTFS_ATTR_MTIME, ATTR_MTIME);
+ DEFINE(HOSTFS_ATTR_CTIME, ATTR_CTIME);
+ DEFINE(HOSTFS_ATTR_ATIME_SET, ATTR_ATIME_SET);
+ DEFINE(HOSTFS_ATTR_MTIME_SET, ATTR_MTIME_SET);
+}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index d69d137a0334..f4b13f15a9c1 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -22,6 +22,9 @@
#include <irq_kern.h>
#include <linux/time-internal.h>
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define irq_stats(x) (&per_cpu(irq_stat, x))
/* When epoll triggers we do not know why it did so
* we can also have different IRQs for read and write.
@@ -683,7 +686,7 @@ void __init init_IRQ(void)
{
int i;
- irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
+ irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_percpu_irq);
for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++)
irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
@@ -701,3 +704,25 @@ void sigchld_handler(int sig, struct siginfo *unused_si,
{
do_IRQ(SIGCHLD_IRQ, regs);
}
+
+/*
+ * /proc/interrupts printing for arch specific interrupts
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+#if IS_ENABLED(CONFIG_SMP)
+ int cpu;
+
+ seq_printf(p, "%*s: ", prec, "RES");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count);
+ seq_puts(p, " Rescheduling interrupts\n");
+
+ seq_printf(p, "%*s: ", prec, "CAL");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count);
+ seq_puts(p, " Function call interrupts\n");
+#endif
+
+ return 0;
+}
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
index 419021175272..fc0f543d1d8e 100644
--- a/arch/um/kernel/kmsg_dump.c
+++ b/arch/um/kernel/kmsg_dump.c
@@ -31,7 +31,7 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
* expected to output the crash information.
*/
if (strcmp(con->name, "ttynull") != 0 &&
- (console_srcu_read_flags(con) & CON_ENABLED)) {
+ console_is_usable(con, console_srcu_read_flags(con), true)) {
break;
}
}
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index f2fb77da08cf..96314c31e61c 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -6,8 +6,8 @@
#include <linux/module.h>
#include <os.h>
+EXPORT_SYMBOL(um_get_signals);
EXPORT_SYMBOL(um_set_signals);
-EXPORT_SYMBOL(signals_enabled);
EXPORT_SYMBOL(os_stat_fd);
EXPORT_SYMBOL(os_stat_file);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 32e3b1972dc1..39c4a7e21c6f 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -71,7 +71,7 @@ void __init arch_mm_preinit(void)
/* Map in the area just after the brk now that kmalloc is about
* to be turned on.
*/
- brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
+ brk_end = PAGE_ALIGN((unsigned long) sbrk(0));
map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
memblock_free((void *)brk_end, uml_reserved - brk_end);
uml_reserved = brk_end;
@@ -84,109 +84,6 @@ void __init mem_init(void)
kmalloc_ok = 1;
}
-#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA)
-/*
- * Create a page table and place a pointer to it in a middle page
- * directory entry.
- */
-static void __init one_page_table_init(pmd_t *pmd)
-{
- if (pmd_none(*pmd)) {
- pte_t *pte = (pte_t *) memblock_alloc_low(PAGE_SIZE,
- PAGE_SIZE);
- if (!pte)
- panic("%s: Failed to allocate %lu bytes align=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE);
-
- set_pmd(pmd, __pmd(_KERNPG_TABLE +
- (unsigned long) __pa(pte)));
- BUG_ON(pte != pte_offset_kernel(pmd, 0));
- }
-}
-
-static void __init one_md_table_init(pud_t *pud)
-{
-#if CONFIG_PGTABLE_LEVELS > 2
- pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- if (!pmd_table)
- panic("%s: Failed to allocate %lu bytes align=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE);
-
- set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table)));
- BUG_ON(pmd_table != pmd_offset(pud, 0));
-#endif
-}
-
-static void __init one_ud_table_init(p4d_t *p4d)
-{
-#if CONFIG_PGTABLE_LEVELS > 3
- pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- if (!pud_table)
- panic("%s: Failed to allocate %lu bytes align=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE);
-
- set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table)));
- BUG_ON(pud_table != pud_offset(p4d, 0));
-#endif
-}
-
-static void __init fixrange_init(unsigned long start, unsigned long end,
- pgd_t *pgd_base)
-{
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- int i, j;
- unsigned long vaddr;
-
- vaddr = start;
- i = pgd_index(vaddr);
- j = pmd_index(vaddr);
- pgd = pgd_base + i;
-
- for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
- p4d = p4d_offset(pgd, vaddr);
- if (p4d_none(*p4d))
- one_ud_table_init(p4d);
- pud = pud_offset(p4d, vaddr);
- if (pud_none(*pud))
- one_md_table_init(pud);
- pmd = pmd_offset(pud, vaddr);
- for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) {
- one_page_table_init(pmd);
- vaddr += PMD_SIZE;
- }
- j = 0;
- }
-}
-
-static void __init fixaddr_user_init( void)
-{
- long size = FIXADDR_USER_END - FIXADDR_USER_START;
- pte_t *pte;
- phys_t p;
- unsigned long v, vaddr = FIXADDR_USER_START;
-
- if (!size)
- return;
-
- fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir);
- v = (unsigned long) memblock_alloc_low(size, PAGE_SIZE);
- if (!v)
- panic("%s: Failed to allocate %lu bytes align=%lx\n",
- __func__, size, PAGE_SIZE);
-
- memcpy((void *) v , (void *) FIXADDR_USER_START, size);
- p = __pa(v);
- for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
- p += PAGE_SIZE) {
- pte = virt_to_kpte(vaddr);
- pte_set_val(*pte, p, PAGE_READONLY);
- }
-}
-#endif
-
void __init paging_init(void)
{
unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
@@ -197,12 +94,8 @@ void __init paging_init(void)
panic("%s: Failed to allocate %lu bytes align=%lx\n",
__func__, PAGE_SIZE, PAGE_SIZE);
- max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT;
+ max_zone_pfn[ZONE_NORMAL] = high_physmem >> PAGE_SHIFT;
free_area_init(max_zone_pfn);
-
-#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA)
- fixaddr_user_init();
-#endif
}
/*
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index af02b5f9911d..ae6ca373c261 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -105,19 +105,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
fd = physmem_fd;
*offset_out = phys;
}
- else if (phys < __pa(end_iomem)) {
- struct iomem_region *region = iomem_regions;
-
- while (region != NULL) {
- if ((phys >= region->phys) &&
- (phys < region->phys + region->size)) {
- fd = region->fd;
- *offset_out = phys - region->phys;
- break;
- }
- region = region->next;
- }
- }
return fd;
}
@@ -140,61 +127,3 @@ __uml_setup("mem=", uml_mem_setup,
" be more, and the excess, if it's ever used, will just be swapped out.\n"
" Example: mem=64M\n\n"
);
-
-__uml_setup("iomem=", parse_iomem,
-"iomem=<name>,<file>\n"
-" Configure <file> as an IO memory region named <name>.\n\n"
-);
-
-/*
- * This list is constructed in parse_iomem and addresses filled in
- * setup_iomem, both of which run during early boot. Afterwards, it's
- * unchanged.
- */
-struct iomem_region *iomem_regions;
-
-/* Initialized in parse_iomem and unchanged thereafter */
-int iomem_size;
-
-unsigned long find_iomem(char *driver, unsigned long *len_out)
-{
- struct iomem_region *region = iomem_regions;
-
- while (region != NULL) {
- if (!strcmp(region->driver, driver)) {
- *len_out = region->size;
- return region->virt;
- }
-
- region = region->next;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(find_iomem);
-
-static int setup_iomem(void)
-{
- struct iomem_region *region = iomem_regions;
- unsigned long iomem_start = high_physmem + PAGE_SIZE;
- int err;
-
- while (region != NULL) {
- err = os_map_memory((void *) iomem_start, region->fd, 0,
- region->size, 1, 1, 0);
- if (err)
- printk(KERN_ERR "Mapping iomem region for driver '%s' "
- "failed, errno = %d\n", region->driver, -err);
- else {
- region->virt = iomem_start;
- region->phys = __pa(region->virt);
- }
-
- iomem_start += region->size + PAGE_SIZE;
- region = region->next;
- }
-
- return 0;
-}
-
-__initcall(setup_iomem);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 9c9c66dc45f0..63b38a3f73f7 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -43,7 +43,9 @@
* cares about its entry, so it's OK if another processor is modifying its
* entry.
*/
-struct task_struct *cpu_tasks[NR_CPUS];
+struct task_struct *cpu_tasks[NR_CPUS] = {
+ [0 ... NR_CPUS - 1] = &init_task,
+};
EXPORT_SYMBOL(cpu_tasks);
void free_stack(unsigned long stack, int order)
@@ -185,11 +187,7 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
void initial_thread_cb(void (*proc)(void *), void *arg)
{
- int save_kmalloc_ok = kmalloc_ok;
-
- kmalloc_ok = 0;
initial_thread_cb_skas(proc, arg);
- kmalloc_ok = save_kmalloc_ok;
}
int arch_dup_task_struct(struct task_struct *dst,
@@ -220,11 +218,21 @@ void arch_cpu_idle(void)
um_idle_sleep();
}
+void arch_cpu_idle_prepare(void)
+{
+ os_idle_prepare();
+}
+
int __uml_cant_sleep(void) {
return in_atomic() || irqs_disabled() || in_interrupt();
/* Is in_interrupt() really needed? */
}
+int uml_need_resched(void)
+{
+ return need_resched();
+}
+
extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end;
void do_uml_exitcalls(void)
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index afe9a2f251ef..00957788591b 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -23,17 +23,36 @@ static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
static spinlock_t mm_list_lock;
static struct list_head mm_list;
+void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile)
+{
+ struct mm_context *ctx = container_of(mm_id, struct mm_context, id);
+
+ mutex_lock(&ctx->turnstile);
+}
+
+void exit_turnstile(struct mm_id *mm_id) __releases(turnstile)
+{
+ struct mm_context *ctx = container_of(mm_id, struct mm_context, id);
+
+ mutex_unlock(&ctx->turnstile);
+}
+
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
struct mm_id *new_id = &mm->context.id;
unsigned long stack = 0;
int ret = -ENOMEM;
+ mutex_init(&mm->context.turnstile);
+ spin_lock_init(&mm->context.sync_tlb_lock);
+
stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES));
if (stack == 0)
goto out;
new_id->stack = stack;
+ new_id->syscall_data_len = 0;
+ new_id->syscall_fd_num = 0;
scoped_guard(spinlock_irqsave, &mm_list_lock) {
/* Insert into list, used for lookups when the child dies */
@@ -73,6 +92,9 @@ void destroy_context(struct mm_struct *mm)
return;
}
+ scoped_guard(spinlock_irqsave, &mm_list_lock)
+ list_del(&mm->context.list);
+
if (mmu->id.pid > 0) {
os_kill_ptraced_process(mmu->id.pid, 1);
mmu->id.pid = -1;
@@ -82,10 +104,6 @@ void destroy_context(struct mm_struct *mm)
os_close_file(mmu->id.sock);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
-
- guard(spinlock_irqsave)(&mm_list_lock);
-
- list_del(&mm->context.list);
}
static irqreturn_t mm_sigchld_irq(int irq, void* dev)
@@ -110,12 +128,11 @@ static irqreturn_t mm_sigchld_irq(int irq, void* dev)
/* Marks the MM as dead */
mm_context->id.pid = -1;
- /*
- * NOTE: If SMP is implemented, a futex_wake
- * needs to be added here.
- */
stub_data = (void *)mm_context->id.stack;
stub_data->futex = FUTEX_IN_KERN;
+#if IS_ENABLED(CONFIG_SMP)
+ os_futex_wake(&stub_data->futex);
+#endif
/*
* NOTE: Currently executing syscalls by
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 5881b17eb987..4a7673b0261a 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -7,6 +7,7 @@
#include <linux/sched/mm.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/task.h>
+#include <linux/smp-internal.h>
#include <asm/tlbflush.h>
@@ -26,12 +27,12 @@ static int __init start_kernel_proc(void *unused)
return 0;
}
-static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE);
+char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE);
int __init start_uml(void)
{
- stack_protections((unsigned long) &cpu0_irqstack);
- set_sigstack(cpu0_irqstack, THREAD_SIZE);
+ stack_protections((unsigned long) &cpu_irqstacks[0]);
+ set_sigstack(cpu_irqstacks[0], THREAD_SIZE);
init_new_thread_signals();
@@ -64,3 +65,15 @@ void current_mm_sync(void)
um_tlb_sync(current->mm);
}
+
+static DEFINE_SPINLOCK(initial_jmpbuf_spinlock);
+
+void initial_jmpbuf_lock(void)
+{
+ spin_lock_irq(&initial_jmpbuf_spinlock);
+}
+
+void initial_jmpbuf_unlock(void)
+{
+ spin_unlock_irq(&initial_jmpbuf_spinlock);
+}
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
new file mode 100644
index 000000000000..f1e52b7348fb
--- /dev/null
+++ b/arch/um/kernel/smp.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ *
+ * Based on the previous implementation in TT mode
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/module.h>
+#include <linux/processor.h>
+#include <linux/threads.h>
+#include <linux/cpu.h>
+#include <linux/hardirq.h>
+#include <linux/smp.h>
+#include <linux/smp-internal.h>
+#include <init.h>
+#include <kern.h>
+#include <os.h>
+#include <smp.h>
+
+enum {
+ UML_IPI_RES = 0,
+ UML_IPI_CALL_SINGLE,
+ UML_IPI_CALL,
+ UML_IPI_STOP,
+};
+
+void arch_smp_send_reschedule(int cpu)
+{
+ os_send_ipi(cpu, UML_IPI_RES);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+ os_send_ipi(cpu, UML_IPI_CALL_SINGLE);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ int cpu;
+
+ for_each_cpu(cpu, mask)
+ os_send_ipi(cpu, UML_IPI_CALL);
+}
+
+void smp_send_stop(void)
+{
+ int cpu, me = smp_processor_id();
+
+ for_each_online_cpu(cpu) {
+ if (cpu == me)
+ continue;
+ os_send_ipi(cpu, UML_IPI_STOP);
+ }
+}
+
+static void ipi_handler(int vector, struct uml_pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
+ int cpu = raw_smp_processor_id();
+
+ irq_enter();
+
+ if (current->mm)
+ os_alarm_process(current->mm->context.id.pid);
+
+ switch (vector) {
+ case UML_IPI_RES:
+ inc_irq_stat(irq_resched_count);
+ scheduler_ipi();
+ break;
+
+ case UML_IPI_CALL_SINGLE:
+ inc_irq_stat(irq_call_count);
+ generic_smp_call_function_single_interrupt();
+ break;
+
+ case UML_IPI_CALL:
+ inc_irq_stat(irq_call_count);
+ generic_smp_call_function_interrupt();
+ break;
+
+ case UML_IPI_STOP:
+ set_cpu_online(cpu, false);
+ while (1)
+ pause();
+ break;
+
+ default:
+ pr_err("CPU#%d received unknown IPI (vector=%d)!\n", cpu, vector);
+ break;
+ }
+
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+void uml_ipi_handler(int vector)
+{
+ struct uml_pt_regs r = { .is_user = 0 };
+
+ preempt_disable();
+ ipi_handler(vector, &r);
+ preempt_enable();
+}
+
+/* AP states used only during CPU startup */
+enum {
+ UML_CPU_PAUSED = 0,
+ UML_CPU_RUNNING,
+};
+
+static int cpu_states[NR_CPUS];
+
+static int start_secondary(void *unused)
+{
+ int err, cpu = raw_smp_processor_id();
+
+ notify_cpu_starting(cpu);
+ set_cpu_online(cpu, true);
+
+ err = um_setup_timer();
+ if (err)
+ panic("CPU#%d failed to setup timer, err = %d", cpu, err);
+
+ local_irq_enable();
+
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+
+ return 0;
+}
+
+void uml_start_secondary(void *opaque)
+{
+ int cpu = raw_smp_processor_id();
+ struct mm_struct *mm = &init_mm;
+ struct task_struct *idle;
+
+ stack_protections((unsigned long) &cpu_irqstacks[cpu]);
+ set_sigstack(&cpu_irqstacks[cpu], THREAD_SIZE);
+
+ set_cpu_present(cpu, true);
+ os_futex_wait(&cpu_states[cpu], UML_CPU_PAUSED);
+
+ smp_rmb(); /* paired with smp_wmb() in __cpu_up() */
+
+ idle = cpu_tasks[cpu];
+ idle->thread_info.cpu = cpu;
+
+ mmgrab(mm);
+ idle->active_mm = mm;
+
+ idle->thread.request.thread.proc = start_secondary;
+ idle->thread.request.thread.arg = NULL;
+
+ new_thread(task_stack_page(idle), &idle->thread.switch_buf,
+ new_thread_handler);
+ os_start_secondary(opaque, &idle->thread.switch_buf);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ int err, cpu, me = smp_processor_id();
+ unsigned long deadline;
+
+ os_init_smp();
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == me)
+ continue;
+
+ pr_debug("Booting processor %d...\n", cpu);
+ err = os_start_cpu_thread(cpu);
+ if (err) {
+ pr_crit("CPU#%d failed to start cpu thread, err = %d",
+ cpu, err);
+ continue;
+ }
+
+ deadline = jiffies + msecs_to_jiffies(1000);
+ spin_until_cond(cpu_present(cpu) ||
+ time_is_before_jiffies(deadline));
+
+ if (!cpu_present(cpu))
+ pr_crit("CPU#%d failed to boot\n", cpu);
+ }
+}
+
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+ cpu_tasks[cpu] = tidle;
+ smp_wmb(); /* paired with smp_rmb() in uml_start_secondary() */
+ cpu_states[cpu] = UML_CPU_RUNNING;
+ os_futex_wake(&cpu_states[cpu]);
+ spin_until_cond(cpu_online(cpu));
+
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+/* Set in uml_ncpus_setup */
+int uml_ncpus = 1;
+
+void __init prefill_possible_map(void)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < uml_ncpus; cpu++)
+ set_cpu_possible(cpu, true);
+ for (; cpu < NR_CPUS; cpu++)
+ set_cpu_possible(cpu, false);
+}
+
+static int __init uml_ncpus_setup(char *line, int *add)
+{
+ *add = 0;
+
+ if (kstrtoint(line, 10, &uml_ncpus)) {
+ os_warn("%s: Couldn't parse '%s'\n", __func__, line);
+ return -1;
+ }
+
+ uml_ncpus = clamp(uml_ncpus, 1, NR_CPUS);
+
+ return 0;
+}
+
+__uml_setup("ncpus=", uml_ncpus_setup,
+"ncpus=<# of desired CPUs>\n"
+" This tells UML how many virtual processors to start. The maximum\n"
+" number of supported virtual processors can be obtained by querying\n"
+" the CONFIG_NR_CPUS option using --showconfig.\n\n"
+);
+
+EXPORT_SYMBOL(uml_curr_cpu);
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 17da0a870650..b344a36b44eb 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -625,9 +625,10 @@ void time_travel_sleep(void)
* controller application.
*/
unsigned long long next = S64_MAX;
+ int cpu = raw_smp_processor_id();
if (time_travel_mode == TT_MODE_BASIC)
- os_timer_disable();
+ os_timer_disable(cpu);
time_travel_update_time(next, true);
@@ -638,9 +639,9 @@ void time_travel_sleep(void)
* This is somewhat wrong - we should get the first
* one sooner like the os_timer_one_shot() below...
*/
- os_timer_set_interval(time_travel_timer_interval);
+ os_timer_set_interval(cpu, time_travel_timer_interval);
} else {
- os_timer_one_shot(time_travel_timer_event.time - next);
+ os_timer_one_shot(cpu, time_travel_timer_event.time - next);
}
}
}
@@ -758,6 +759,8 @@ extern u64 time_travel_ext_req(u32 op, u64 time);
#define time_travel_del_event(e) do { } while (0)
#endif
+static struct clock_event_device timer_clockevent[NR_CPUS];
+
void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
unsigned long flags;
@@ -780,12 +783,14 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
static int itimer_shutdown(struct clock_event_device *evt)
{
+ int cpu = evt - &timer_clockevent[0];
+
if (time_travel_mode != TT_MODE_OFF)
time_travel_del_event(&time_travel_timer_event);
if (time_travel_mode != TT_MODE_INFCPU &&
time_travel_mode != TT_MODE_EXTERNAL)
- os_timer_disable();
+ os_timer_disable(cpu);
return 0;
}
@@ -793,6 +798,7 @@ static int itimer_shutdown(struct clock_event_device *evt)
static int itimer_set_periodic(struct clock_event_device *evt)
{
unsigned long long interval = NSEC_PER_SEC / HZ;
+ int cpu = evt - &timer_clockevent[0];
if (time_travel_mode != TT_MODE_OFF) {
time_travel_del_event(&time_travel_timer_event);
@@ -805,7 +811,7 @@ static int itimer_set_periodic(struct clock_event_device *evt)
if (time_travel_mode != TT_MODE_INFCPU &&
time_travel_mode != TT_MODE_EXTERNAL)
- os_timer_set_interval(interval);
+ os_timer_set_interval(cpu, interval);
return 0;
}
@@ -825,7 +831,7 @@ static int itimer_next_event(unsigned long delta,
if (time_travel_mode != TT_MODE_INFCPU &&
time_travel_mode != TT_MODE_EXTERNAL)
- return os_timer_one_shot(delta);
+ return os_timer_one_shot(raw_smp_processor_id(), delta);
return 0;
}
@@ -835,10 +841,9 @@ static int itimer_one_shot(struct clock_event_device *evt)
return itimer_next_event(0, evt);
}
-static struct clock_event_device timer_clockevent = {
+static struct clock_event_device _timer_clockevent = {
.name = "posix-timer",
.rating = 250,
- .cpumask = cpu_possible_mask,
.features = CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_ONESHOT,
.set_state_shutdown = itimer_shutdown,
@@ -856,6 +861,9 @@ static struct clock_event_device timer_clockevent = {
static irqreturn_t um_timer(int irq, void *dev)
{
+ int cpu = raw_smp_processor_id();
+ struct clock_event_device *evt = &timer_clockevent[cpu];
+
/*
* Interrupt the (possibly) running userspace process, technically this
* should only happen if userspace is currently executing.
@@ -867,7 +875,7 @@ static irqreturn_t um_timer(int irq, void *dev)
get_current()->mm)
os_alarm_process(get_current()->mm->context.id.pid);
- (*timer_clockevent.event_handler)(&timer_clockevent);
+ evt->event_handler(evt);
return IRQ_HANDLED;
}
@@ -904,7 +912,24 @@ static struct clocksource timer_clocksource = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static void __init um_timer_setup(void)
+int um_setup_timer(void)
+{
+ int cpu = raw_smp_processor_id();
+ struct clock_event_device *evt = &timer_clockevent[cpu];
+ int err;
+
+ err = os_timer_create();
+ if (err)
+ return err;
+
+ memcpy(evt, &_timer_clockevent, sizeof(*evt));
+ evt->cpumask = cpumask_of(cpu);
+ clockevents_register_device(evt);
+
+ return 0;
+}
+
+static void __init um_timer_init(void)
{
int err;
@@ -913,8 +938,8 @@ static void __init um_timer_setup(void)
printk(KERN_ERR "register_timer : request_irq failed - "
"errno = %d\n", -err);
- err = os_timer_create();
- if (err != 0) {
+ err = um_setup_timer();
+ if (err) {
printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
return;
}
@@ -924,7 +949,6 @@ static void __init um_timer_setup(void)
printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
return;
}
- clockevents_register_device(&timer_clockevent);
}
void read_persistent_clock64(struct timespec64 *ts)
@@ -945,7 +969,7 @@ void read_persistent_clock64(struct timespec64 *ts)
void __init time_init(void)
{
timer_set_signal_handler();
- late_time_init = um_timer_setup;
+ late_time_init = um_timer_init;
}
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
@@ -961,21 +985,21 @@ static int setup_time_travel(char *str)
{
if (strcmp(str, "=inf-cpu") == 0) {
time_travel_mode = TT_MODE_INFCPU;
- timer_clockevent.name = "time-travel-timer-infcpu";
+ _timer_clockevent.name = "time-travel-timer-infcpu";
timer_clocksource.name = "time-travel-clock";
return 1;
}
if (strncmp(str, "=ext:", 5) == 0) {
time_travel_mode = TT_MODE_EXTERNAL;
- timer_clockevent.name = "time-travel-timer-external";
+ _timer_clockevent.name = "time-travel-timer-external";
timer_clocksource.name = "time-travel-clock-external";
return time_travel_connect_external(str + 5);
}
if (!*str) {
time_travel_mode = TT_MODE_BASIC;
- timer_clockevent.name = "time-travel-timer";
+ _timer_clockevent.name = "time-travel-timer";
timer_clocksource.name = "time-travel-clock";
return 1;
}
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index cf7e0d4407f2..39608cccf2c6 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -162,9 +162,11 @@ int um_tlb_sync(struct mm_struct *mm)
{
pgd_t *pgd;
struct vm_ops ops;
- unsigned long addr = mm->context.sync_tlb_range_from, next;
+ unsigned long addr, next;
int ret = 0;
+ guard(spinlock_irqsave)(&mm->context.sync_tlb_lock);
+
if (mm->context.sync_tlb_range_to == 0)
return 0;
@@ -177,6 +179,7 @@ int um_tlb_sync(struct mm_struct *mm)
ops.unmap = unmap;
}
+ addr = mm->context.sync_tlb_range_from;
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 5b80a3a89c20..177615820a4c 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -316,7 +316,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (!is_user && regs)
current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
- if (!is_user && init_mm.context.sync_tlb_range_to) {
+ if (!is_user && address >= start_vm && address < end_vm) {
/*
* Kernel has pending updates from set_ptes that were not
* flushed yet. Syncing them should fix the pagefault (if not
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index cfbbbf8500c3..e2b24e1ecfa6 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -19,6 +19,7 @@
#include <linux/kmsg_dump.h>
#include <linux/suspend.h>
#include <linux/random.h>
+#include <linux/smp-internal.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
@@ -71,6 +72,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
{
int i = 0;
+#if IS_ENABLED(CONFIG_SMP)
+ i = (uintptr_t) v - 1;
+ if (!cpu_online(i))
+ return 0;
+#endif
+
seq_printf(m, "processor\t: %d\n", i);
seq_printf(m, "vendor_id\t: User Mode Linux\n");
seq_printf(m, "model name\t: UML\n");
@@ -87,13 +94,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100);
-
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL;
+ if (*pos < nr_cpu_ids)
+ return (void *)(uintptr_t)(*pos + 1);
+ return NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
@@ -239,8 +247,6 @@ static struct notifier_block panic_exit_notifier = {
void uml_finishsetup(void)
{
- cpu_tasks[0] = &init_task;
-
atomic_notifier_chain_register(&panic_notifier_list,
&panic_exit_notifier);
@@ -254,11 +260,7 @@ unsigned long stub_start;
unsigned long task_size;
EXPORT_SYMBOL(task_size);
-unsigned long host_task_size;
-
unsigned long brk_start;
-unsigned long end_iomem;
-EXPORT_SYMBOL(end_iomem);
#define MIN_VMALLOC (32 * 1024 * 1024)
@@ -298,16 +300,14 @@ static unsigned long __init get_top_address(char **envp)
top_addr = (unsigned long) envp[i];
}
- top_addr &= ~(UM_KERN_PAGE_SIZE - 1);
- top_addr += UM_KERN_PAGE_SIZE;
-
- return top_addr;
+ return PAGE_ALIGN(top_addr + 1);
}
int __init linux_main(int argc, char **argv, char **envp)
{
unsigned long avail, diff;
unsigned long virtmem_size, max_physmem;
+ unsigned long host_task_size;
unsigned long stack;
unsigned int i;
int add;
@@ -354,12 +354,11 @@ int __init linux_main(int argc, char **argv, char **envp)
* so they actually get what they asked for. This should
* add zero for non-exec shield users
*/
-
- diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
+ diff = PAGE_ALIGN(brk_start) - PAGE_ALIGN((unsigned long) &_end);
if (diff > 1024 * 1024) {
os_info("Adding %ld bytes to physical memory to account for "
"exec-shield gap\n", diff);
- physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
+ physmem_size += diff;
}
uml_physmem = (unsigned long) __binary_start & PAGE_MASK;
@@ -369,10 +368,8 @@ int __init linux_main(int argc, char **argv, char **envp)
setup_machinename(init_utsname()->machine);
- physmem_size = (physmem_size + PAGE_SIZE - 1) & PAGE_MASK;
- iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
-
- max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
+ physmem_size = PAGE_ALIGN(physmem_size);
+ max_physmem = TASK_SIZE - uml_physmem - MIN_VMALLOC;
if (physmem_size > max_physmem) {
physmem_size = max_physmem;
os_info("Physical memory size shrunk to %llu bytes\n",
@@ -380,7 +377,6 @@ int __init linux_main(int argc, char **argv, char **envp)
}
high_physmem = uml_physmem + physmem_size;
- end_iomem = high_physmem + iomem_size;
start_vm = VMALLOC_START;
@@ -421,6 +417,7 @@ void __init setup_arch(char **cmdline_p)
strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
setup_hostinfo(host_info, sizeof host_info);
+ prefill_possible_map();
if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) {
add_bootloader_randomness(rng_seed, sizeof(rng_seed));
@@ -455,6 +452,18 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
+#if IS_ENABLED(CONFIG_SMP)
+void alternatives_smp_module_add(struct module *mod, char *name,
+ void *locks, void *locks_end,
+ void *text, void *text_end)
+{
+}
+
+void alternatives_smp_module_del(struct module *mod)
+{
+}
+#endif
+
void *text_poke(void *addr, const void *opcode, size_t len)
{
/*