From 9e124fe16ff24746d6de5a2ad685266d7bce0e08 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:07 +0100 Subject: xen: Enable console tty by default in domU if it's not a dummy Without console= arguments on the kernel command line, the first console to register becomes enabled and the preferred console (the one behind /dev/console). This is normally tty (assuming CONFIG_VT_CONSOLE is enabled, which it commonly is). This is okay as long tty is a useful console. But unless we have the PV framebuffer, and it is enabled for this domain, tty0 in domU is merely a dummy. In that case, we want the preferred console to be the Xen console hvc0, and we want it without having to fiddle with the kernel command line. Commit b8c2d3dfbc117dff26058fbac316b8acfc2cb5f7 did that for us. Since we now have the PV framebuffer, we want to enable and prefer tty again, but only when PVFB is enabled. But even then we still want to enable the Xen console as well. Problem: when tty registers, we can't yet know whether the PVFB is enabled. By the time we can know (xenstore is up), the console setup game is over. Solution: enable console tty by default, but keep hvc as the preferred console. Change the preferred console to tty when PVFB probes successfully, unless we've been given console kernel parameters. Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- kernel/printk.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 8fb01c32aa3b..028ed75d4864 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -121,6 +121,8 @@ struct console_cmdline static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int selected_console = -1; static int preferred_console = -1; +int console_set_on_cmdline; +EXPORT_SYMBOL(console_set_on_cmdline); /* Flag: console code may call schedule() */ static int console_may_schedule; @@ -890,6 +892,7 @@ static int __init console_setup(char *str) *s = 0; __add_preferred_console(buf, idx, options, brl_options); + console_set_on_cmdline = 1; return 1; } __setup("console=", console_setup); -- cgit v1.2.3 From aa276e1cafb3ce9d01d1e837bcd67e92616013ac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jun 2008 19:15:00 +0200 Subject: x86, clockevents: add C1E aware idle function C1E on AMD machines is like C3 but without control from the OS. Up to now we disabled the local apic timer for those machines as it stops when the CPU goes into C1E. This excludes those machines from high resolution timers / dynamic ticks, which hurts especially X2 based laptops. The current boot time C1E detection has another, more serious flaw as well: some BIOSes do not enable C1E until the ACPI processor module is loaded. This causes systems to stop working after that point. To work nicely with C1E enabled machines we use a separate idle function, which checks on idle entry whether C1E was enabled in the Interrupt Pending Message MSR. This allows us to do timer broadcasting for C1E and covers the late enablement of C1E as well. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 5 ++-- arch/x86/kernel/apic_64.c | 26 +---------------- arch/x86/kernel/cpu/amd.c | 30 -------------------- arch/x86/kernel/cpu/amd_64.c | 25 ----------------- arch/x86/kernel/process.c | 66 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/apic.h | 3 -- kernel/time/tick-broadcast.c | 6 ++-- 7 files changed, 73 insertions(+), 88 deletions(-) (limited to 'kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4b99b1bdeb6c..c44206e731d4 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -64,9 +64,8 @@ static int enable_local_apic __initdata; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; -/* Disable local APIC timer from the kernel commandline or via dmi quirk - or using CPU MSR check */ -int local_apic_timer_disabled; +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ +static int local_apic_timer_disabled; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0633cfd0dc29..a5cc8447cf4d 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -43,7 +43,7 @@ #include #include -int disable_apic_timer __cpuinitdata; +static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; @@ -422,32 +422,8 @@ void __init setup_boot_APIC_clock(void) setup_APIC_timer(); } -/* - * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the - * C1E flag only in the secondary CPU, so when we detect the wreckage - * we already have enabled the boot CPU local apic timer. Check, if - * disable_apic_timer is set and the DUMMY flag is cleared. If yes, - * set the DUMMY flag again and force the broadcast mode in the - * clockevents layer. - */ -static void __cpuinit check_boot_apic_timer_broadcast(void) -{ - if (!disable_apic_timer || - (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) - return; - - printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); - lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; - - local_irq_enable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, - &boot_cpu_physical_apicid); - local_irq_disable(); -} - void __cpuinit setup_secondary_APIC_clock(void) { - check_boot_apic_timer_broadcast(); setup_APIC_timer(); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e76b49e7a916..acc891ae5901 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,31 +24,6 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -#ifdef CONFIG_X86_LOCAL_APIC - -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ -static __cpuinit int amd_apic_timer_broken(struct cpuinfo_x86 *c) -{ - u32 lo, hi; - - if (c->x86 < 0x0F) - return 0; - - /* Family 0x0f models < rev F do not have this MSR */ - if (c->x86 == 0x0f && c->x86_model < 0x40) - return 0; - - rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); - if (lo & K8_INTP_C1E_ACTIVE_MASK) { - if (smp_processor_id() != boot_cpu_physical_apicid) - printk(KERN_INFO "AMD C1E detected late. " - "Force timer broadcast.\n"); - return 1; - } - return 0; -} -#endif - int force_mwait __cpuinitdata; static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) @@ -285,11 +260,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) num_cache_leaves = 3; } -#ifdef CONFIG_X86_LOCAL_APIC - if (amd_apic_timer_broken(c)) - local_apic_timer_disabled = 1; -#endif - /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index f5fc161d8f2a..f8d20588bde9 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -110,28 +110,6 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) #endif } -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ -static __cpuinit int amd_apic_timer_broken(struct cpuinfo_x86 *c) -{ - u32 lo, hi; - - if (c->x86 < 0x0F) - return 0; - - /* Family 0x0f models < rev F do not have this MSR */ - if (c->x86 == 0x0f && c->x86_model < 0x40) - return 0; - - rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); - if (lo & K8_INTP_C1E_ACTIVE_MASK) { - if (smp_processor_id() != boot_cpu_physical_apicid) - printk(KERN_INFO "AMD C1E detected late. " - "Force timer broadcast.\n"); - return 1; - } - return 0; -} - void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { early_init_amd_mc(c); @@ -212,9 +190,6 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 == 0x10) amd_enable_pci_ext_cfg(c); - if (amd_apic_timer_broken(c)) - disable_apic_timer = 1; - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { unsigned long long tseg; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9fea14607dfe..68ad3539b143 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -6,6 +6,7 @@ #include #include #include +#include struct kmem_cache *task_xstate_cachep; @@ -219,6 +220,68 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) return (edx & MWAIT_EDX_C1); } +/* + * Check for AMD CPUs, which have potentially C1E support + */ +static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) +{ + if (c->x86_vendor != X86_VENDOR_AMD) + return 0; + + if (c->x86 < 0x0F) + return 0; + + /* Family 0x0f models < rev F do not have C1E */ + if (c->x86 == 0x0f && c->x86_model < 0x40) + return 0; + + return 1; +} + +/* + * C1E aware idle routine. We check for C1E active in the interrupt + * pending message MSR. If we detect C1E, then we handle it the same + * way as C3 power states (local apic timer and TSC stop) + */ +static void c1e_idle(void) +{ + static cpumask_t c1e_mask = CPU_MASK_NONE; + static int c1e_detected; + + if (need_resched()) + return; + + if (!c1e_detected) { + u32 lo, hi; + + rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); + if (lo & K8_INTP_C1E_ACTIVE_MASK) { + c1e_detected = 1; + mark_tsc_unstable("TSC halt in C1E"); + printk(KERN_INFO "System has C1E enabled\n"); + } + } + + if (c1e_detected) { + int cpu = smp_processor_id(); + + if (!cpu_isset(cpu, c1e_mask)) { + cpu_set(cpu, c1e_mask); + /* Force broadcast so ACPI can not interfere */ + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, + &cpu); + printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", + cpu); + } + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + default_idle(); + local_irq_disable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + local_irq_enable(); + } else + default_idle(); +} + void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_SMP @@ -236,6 +299,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) */ printk(KERN_INFO "using mwait in idle threads.\n"); pm_idle = mwait_idle; + } else if (check_c1e_idle(c)) { + printk(KERN_INFO "using C1E aware idle routine\n"); + pm_idle = c1e_idle; } else pm_idle = default_idle; } diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index be9639a9a186..3c387cda95fa 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -38,12 +38,9 @@ extern void generic_apic_probe(void); extern int apic_verbosity; extern int timer_over_8254; extern int local_apic_timer_c2_ok; -extern int local_apic_timer_disabled; -extern int apic_runs_main_timer; extern int ioapic_force; extern int disable_apic; -extern int disable_apic_timer; /* * Basic functions accessing APICs. diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 57a1f02e5ec0..67f80c261709 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -30,6 +30,7 @@ struct tick_device tick_broadcast_device; static cpumask_t tick_broadcast_mask; static DEFINE_SPINLOCK(tick_broadcast_lock); +static int tick_broadcast_force; #ifdef CONFIG_TICK_ONESHOT static void tick_broadcast_clear_oneshot(int cpu); @@ -232,10 +233,11 @@ static void tick_do_broadcast_on_off(void *why) CLOCK_EVT_MODE_SHUTDOWN); } if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) - dev->features |= CLOCK_EVT_FEAT_DUMMY; + tick_broadcast_force = 1; break; case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - if (cpu_isset(cpu, tick_broadcast_mask)) { + if (!tick_broadcast_force && + cpu_isset(cpu, tick_broadcast_mask)) { cpu_clear(cpu, tick_broadcast_mask); if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); -- cgit v1.2.3 From 076ac2af86c3b7f89ac31bc50a7508d3e035b786 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:12 +0200 Subject: sched, numa: replace MAX_NUMNODES with nr_node_ids in kernel/sched.c * Replace usages of MAX_NUMNODES with nr_node_ids in kernel/sched.c, where appropriate. This saves some allocated space as well as many wasted cycles going through node entries that are non-existent. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- kernel/sched.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 94ead43eda62..bcc22b569ee9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6538,9 +6538,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) min_val = INT_MAX; - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { /* Start at @node */ - n = (node + i) % MAX_NUMNODES; + n = (node + i) % nr_node_ids; if (!nr_cpus_node(n)) continue; @@ -6734,7 +6734,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) if (!sched_group_nodes) continue; - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { struct sched_group *oldsg, *sg = sched_group_nodes[i]; *nodemask = node_to_cpumask(i); @@ -6927,7 +6927,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, /* * Allocate the per-node list of sched groups */ - sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), + sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *), GFP_KERNEL); if (!sched_group_nodes) { printk(KERN_WARNING "Can not alloc sched group node list\n"); @@ -7066,7 +7066,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #endif /* Set up physical groups */ - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { SCHED_CPUMASK_VAR(nodemask, allmasks); SCHED_CPUMASK_VAR(send_covered, allmasks); @@ -7090,7 +7090,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, send_covered, tmpmask); } - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < nr_node_ids; i++) { /* Set up node groups */ struct sched_group *sg, *prev; SCHED_CPUMASK_VAR(nodemask, allmasks); @@ -7129,9 +7129,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map, cpus_or(*covered, *covered, *nodemask); prev = sg; - for (j = 0; j < MAX_NUMNODES; j++) { + for (j = 0; j < nr_node_ids; j++) { SCHED_CPUMASK_VAR(notcovered, allmasks); - int n = (i + j) % MAX_NUMNODES; + int n = (i + j) % nr_node_ids; node_to_cpumask_ptr(pnodemask, n); cpus_complement(*notcovered, *covered); @@ -7184,7 +7184,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, } #ifdef CONFIG_NUMA - for (i = 0; i < MAX_NUMNODES; i++) + for (i = 0; i < nr_node_ids; i++) init_numa_sched_groups_power(sched_group_nodes[i]); if (sd_allnodes) { -- cgit v1.2.3 From a29d1cfe9e9337aedeed505afddc8465ac709b87 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Jun 2008 13:19:08 +0200 Subject: printk: export console_drivers this symbol is needed by drivers/video/xen-fbfront.ko. [ cherry-picked from tip/core/printk ] Signed-off-by: Ingo Molnar --- kernel/printk.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 028ed75d4864..1fb1382009f3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -75,6 +75,8 @@ EXPORT_SYMBOL(oops_in_progress); static DECLARE_MUTEX(console_sem); static DECLARE_MUTEX(secondary_console_sem); struct console *console_drivers; +EXPORT_SYMBOL_GPL(console_drivers); + /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's -- cgit v1.2.3