From 565c2f249a0cb549d419f4c92fb8642b404d42b5 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 12 May 2013 07:26:23 +0800 Subject: powerpc: Use patch_exception to update the debug exception handler Signed-off-by: Kevin Hao Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4085aaa9478f..5760b9bea936 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -520,9 +520,6 @@ static void __init irqstack_early_init(void) #ifdef CONFIG_PPC_BOOK3E static void __init exc_lvl_early_init(void) { - extern unsigned int interrupt_base_book3e; - extern unsigned int exc_debug_debug_book3e; - unsigned int i; for_each_possible_cpu(i) { @@ -535,8 +532,7 @@ static void __init exc_lvl_early_init(void) } if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) - patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, - (unsigned long)&exc_debug_debug_book3e, 0); + patch_exception(0x040, exc_debug_debug_book3e); } #else #define exc_lvl_early_init() -- cgit v1.2.3 From dfee0efe3ec8d4099c69e8234e4e4306619b9ba6 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 22 Jul 2013 14:40:20 +0800 Subject: powerpc: kernel: remove useless code which related with 'max_cpus' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since not need 'max_cpus' after the related commit, the related code are useless too, need be removed. The related commit: c1aa687 powerpc: Clean up obsolete code relating to decrementer and timebase The related warning: arch/powerpc/kernel/smp.c:323:43: warning: parameter ‘max_cpus’ set but not used [-Wunused-but-set-parameter] Signed-off-by: Chen Gang Reviewed-by: Srivatsa S. Bhat Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a3b64f3bf9a2..19d654b0150a 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -369,13 +369,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); - if (smp_ops) - if (smp_ops->probe) - max_cpus = smp_ops->probe(); - else - max_cpus = NR_CPUS; - else - max_cpus = 1; + if (smp_ops && smp_ops->probe) + smp_ops->probe(); } void smp_prepare_boot_cpu(void) -- cgit v1.2.3 From 0ce636700c5bad54eda0e62903a1803f6d67b31d Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 22 Aug 2013 09:30:35 +0800 Subject: powerpc: purge all the prefetched instructions for the coherent icache flush As Benjamin Herrenschmidt has indicated, we still need a dummy icbi to purge all the prefetched instructions from the ifetch buffers for the snooping icache. We also need a sync before the icbi to order the actual stores to memory that might have modified instructions with the icbi. Signed-off-by: Kevin Hao Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/cache.h | 14 +++++++++++++- arch/powerpc/kernel/misc_32.S | 4 +++- arch/powerpc/kernel/misc_64.S | 6 ++++++ 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 9e495c9a6a88..ed0afc1e44a4 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -41,8 +41,20 @@ struct ppc64_caches { extern struct ppc64_caches ppc64_caches; #endif /* __powerpc64__ && ! __ASSEMBLY__ */ -#if !defined(__ASSEMBLY__) +#if defined(__ASSEMBLY__) +/* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ +#define PURGE_PREFETCHED_INS \ + sync; \ + icbi 0,r3; \ + sync; \ + isync +#else #define __read_mostly __attribute__((__section__(".data..read_mostly"))) #ifdef CONFIG_6xx diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index e47d268727a4..879f09620f83 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -344,7 +344,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) */ _KPROBE(flush_icache_range) BEGIN_FTR_SECTION - isync + PURGE_PREFETCHED_INS blr /* for 601, do nothing */ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) li r5,L1_CACHE_BYTES-1 @@ -448,6 +448,7 @@ _GLOBAL(invalidate_dcache_range) */ _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS blr END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ @@ -489,6 +490,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) */ _GLOBAL(__flush_dcache_icache_phys) BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS blr /* for 601, do nothing */ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mfmsr r10 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index e59caf874d05..a9f7a79a3a40 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -67,6 +67,7 @@ PPC64_CACHES: _KPROBE(flush_icache_range) BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS blr END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* @@ -211,6 +212,11 @@ _GLOBAL(__flush_dcache_icache) * Different systems have different cache line sizes */ +BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS + blr +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + /* Flush the dcache */ ld r7,PPC64_CACHES@toc(r2) clrrdi r3,r3,PAGE_SHIFT /* Page align */ -- cgit v1.2.3 From c041cfa2af1ccb8d0346dc576144a1085e9b4d4b Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Wed, 23 Jan 2013 16:06:11 +0800 Subject: powerpc: Make irq_stat.timers_irqs counting more specific Current irq_stat.timers_irqs counting doesn't discriminate timer event handler and other timer interrupt(like arch_irq_work_raise). Sometimes we need to know exactly how much interrupts timer event handler fired, so let's be more specific on this. Signed-off-by: Fan Du Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/hardirq.h | 3 ++- arch/powerpc/kernel/irq.c | 12 +++++++++--- arch/powerpc/kernel/time.c | 3 ++- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 3bdcfce2c42a..418fb654370d 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -6,7 +6,8 @@ typedef struct { unsigned int __softirq_pending; - unsigned int timer_irqs; + unsigned int timer_irqs_event; + unsigned int timer_irqs_others; unsigned int pmu_irqs; unsigned int mce_exceptions; unsigned int spurious_irqs; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ba0165615215..9729b23bfb0a 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -354,8 +354,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s: ", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs); - seq_printf(p, " Local timer interrupts\n"); + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); + seq_printf(p, " Local timer interrupts for timer event device\n"); + + seq_printf(p, "%*s: ", prec, "LOC"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); + seq_printf(p, " Local timer interrupts for others\n"); seq_printf(p, "%*s: ", prec, "SPU"); for_each_online_cpu(j) @@ -389,11 +394,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) */ u64 arch_irq_stat_cpu(unsigned int cpu) { - u64 sum = per_cpu(irq_stat, cpu).timer_irqs; + u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event; sum += per_cpu(irq_stat, cpu).pmu_irqs; sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; + sum += per_cpu(irq_stat, cpu).timer_irqs_others; #ifdef CONFIG_PPC_DOORBELL sum += per_cpu(irq_stat, cpu).doorbell_irqs; #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index b3b144121cc9..afb1b56ef4fa 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -510,7 +510,6 @@ void timer_interrupt(struct pt_regs * regs) */ may_hard_irq_enable(); - __get_cpu_var(irq_stat).timer_irqs++; #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) @@ -532,10 +531,12 @@ void timer_interrupt(struct pt_regs * regs) *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); + __get_cpu_var(irq_stat).timer_irqs_event++; } else { now = *next_tb - now; if (now <= DECREMENTER_MAX) set_dec((int)now); + __get_cpu_var(irq_stat).timer_irqs_others++; } #ifdef CONFIG_PPC64 -- cgit v1.2.3 From fd7e42960d7bdf825fedc665727baab4cec44164 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 3 Oct 2013 14:57:35 +0530 Subject: powerpc/kernel/sysfs: Cleanup set up macros for PMC/non-PMC SPRs Currently PMC (Performance Monitor Counter) setup macros are used for other SPRs. Since not all SPRs are PMC related, this patch modifies the exisiting macro and uses it to setup both PMC and non PMC SPRs accordingly. Signed-off-by: Madhavan Srinivasan Acked-by: Olof Johansson Acked-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/sysfs.c | 72 ++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 34 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index b4e667663d9b..cad777eb613a 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -108,14 +108,14 @@ void ppc_enable_pmcs(void) } EXPORT_SYMBOL(ppc_enable_pmcs); -#define SYSFS_PMCSETUP(NAME, ADDRESS) \ +#define __SYSFS_SPRSETUP(NAME, ADDRESS, EXTRA) \ static void read_##NAME(void *val) \ { \ *(unsigned long *)val = mfspr(ADDRESS); \ } \ static void write_##NAME(void *val) \ { \ - ppc_enable_pmcs(); \ + EXTRA; \ mtspr(ADDRESS, *(unsigned long *)val); \ } \ static ssize_t show_##NAME(struct device *dev, \ @@ -140,6 +140,10 @@ static ssize_t __used \ return count; \ } +#define SYSFS_PMCSETUP(NAME, ADDRESS) \ + __SYSFS_SPRSETUP(NAME, ADDRESS, ppc_enable_pmcs()) +#define SYSFS_SPRSETUP(NAME, ADDRESS) \ + __SYSFS_SPRSETUP(NAME, ADDRESS, ) /* Let's define all possible registers, we'll only hook up the ones * that are implemented on the current processor @@ -175,10 +179,10 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7); SYSFS_PMCSETUP(pmc8, SPRN_PMC8); SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); -SYSFS_PMCSETUP(purr, SPRN_PURR); -SYSFS_PMCSETUP(spurr, SPRN_SPURR); -SYSFS_PMCSETUP(dscr, SPRN_DSCR); -SYSFS_PMCSETUP(pir, SPRN_PIR); +SYSFS_SPRSETUP(purr, SPRN_PURR); +SYSFS_SPRSETUP(spurr, SPRN_SPURR); +SYSFS_SPRSETUP(dscr, SPRN_DSCR); +SYSFS_SPRSETUP(pir, SPRN_PIR); /* Lets only enable read for phyp resources and @@ -249,34 +253,34 @@ SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3); SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4); SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); #ifdef CONFIG_DEBUG_KERNEL -SYSFS_PMCSETUP(hid0, SPRN_HID0); -SYSFS_PMCSETUP(hid1, SPRN_HID1); -SYSFS_PMCSETUP(hid4, SPRN_HID4); -SYSFS_PMCSETUP(hid5, SPRN_HID5); -SYSFS_PMCSETUP(ima0, SPRN_PA6T_IMA0); -SYSFS_PMCSETUP(ima1, SPRN_PA6T_IMA1); -SYSFS_PMCSETUP(ima2, SPRN_PA6T_IMA2); -SYSFS_PMCSETUP(ima3, SPRN_PA6T_IMA3); -SYSFS_PMCSETUP(ima4, SPRN_PA6T_IMA4); -SYSFS_PMCSETUP(ima5, SPRN_PA6T_IMA5); -SYSFS_PMCSETUP(ima6, SPRN_PA6T_IMA6); -SYSFS_PMCSETUP(ima7, SPRN_PA6T_IMA7); -SYSFS_PMCSETUP(ima8, SPRN_PA6T_IMA8); -SYSFS_PMCSETUP(ima9, SPRN_PA6T_IMA9); -SYSFS_PMCSETUP(imaat, SPRN_PA6T_IMAAT); -SYSFS_PMCSETUP(btcr, SPRN_PA6T_BTCR); -SYSFS_PMCSETUP(pccr, SPRN_PA6T_PCCR); -SYSFS_PMCSETUP(rpccr, SPRN_PA6T_RPCCR); -SYSFS_PMCSETUP(der, SPRN_PA6T_DER); -SYSFS_PMCSETUP(mer, SPRN_PA6T_MER); -SYSFS_PMCSETUP(ber, SPRN_PA6T_BER); -SYSFS_PMCSETUP(ier, SPRN_PA6T_IER); -SYSFS_PMCSETUP(sier, SPRN_PA6T_SIER); -SYSFS_PMCSETUP(siar, SPRN_PA6T_SIAR); -SYSFS_PMCSETUP(tsr0, SPRN_PA6T_TSR0); -SYSFS_PMCSETUP(tsr1, SPRN_PA6T_TSR1); -SYSFS_PMCSETUP(tsr2, SPRN_PA6T_TSR2); -SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3); +SYSFS_SPRSETUP(hid0, SPRN_HID0); +SYSFS_SPRSETUP(hid1, SPRN_HID1); +SYSFS_SPRSETUP(hid4, SPRN_HID4); +SYSFS_SPRSETUP(hid5, SPRN_HID5); +SYSFS_SPRSETUP(ima0, SPRN_PA6T_IMA0); +SYSFS_SPRSETUP(ima1, SPRN_PA6T_IMA1); +SYSFS_SPRSETUP(ima2, SPRN_PA6T_IMA2); +SYSFS_SPRSETUP(ima3, SPRN_PA6T_IMA3); +SYSFS_SPRSETUP(ima4, SPRN_PA6T_IMA4); +SYSFS_SPRSETUP(ima5, SPRN_PA6T_IMA5); +SYSFS_SPRSETUP(ima6, SPRN_PA6T_IMA6); +SYSFS_SPRSETUP(ima7, SPRN_PA6T_IMA7); +SYSFS_SPRSETUP(ima8, SPRN_PA6T_IMA8); +SYSFS_SPRSETUP(ima9, SPRN_PA6T_IMA9); +SYSFS_SPRSETUP(imaat, SPRN_PA6T_IMAAT); +SYSFS_SPRSETUP(btcr, SPRN_PA6T_BTCR); +SYSFS_SPRSETUP(pccr, SPRN_PA6T_PCCR); +SYSFS_SPRSETUP(rpccr, SPRN_PA6T_RPCCR); +SYSFS_SPRSETUP(der, SPRN_PA6T_DER); +SYSFS_SPRSETUP(mer, SPRN_PA6T_MER); +SYSFS_SPRSETUP(ber, SPRN_PA6T_BER); +SYSFS_SPRSETUP(ier, SPRN_PA6T_IER); +SYSFS_SPRSETUP(sier, SPRN_PA6T_SIER); +SYSFS_SPRSETUP(siar, SPRN_PA6T_SIAR); +SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0); +SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1); +SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2); +SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3); #endif /* CONFIG_DEBUG_KERNEL */ #endif /* HAS_PPC_PMC_PA6T */ -- cgit v1.2.3 From 729b0f715371ce1e7636b4958fc45d6882442456 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:04:00 +0530 Subject: powerpc/book3s: Introduce exclusive emergency stack for machine check exception. This patch introduces exclusive emergency stack for machine check exception. We use emergency stack to handle machine check exception so that we can save MCE information (srr1, srr0, dar and dsisr) before turning on ME bit and be ready for re-entrancy. This helps us to prevent clobbering of MCE information in case of nested machine checks. The reason for using emergency stack over normal kernel stack is that the machine check might occur in the middle of setting up a stack frame which may result into improper use of kernel stack. Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/paca.h | 9 +++++++++ arch/powerpc/kernel/setup_64.c | 10 +++++++++- arch/powerpc/xmon/xmon.c | 4 ++++ 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index b6ea9e068c13..c3523d1dda58 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -152,6 +152,15 @@ struct paca_struct { */ struct opal_machine_check_event *opal_mc_evt; #endif +#ifdef CONFIG_PPC_BOOK3S_64 + /* Exclusive emergency stack pointer for machine check exception. */ + void *mc_emergency_sp; + /* + * Flag to check whether we are in machine check early handler + * and already using emergency stack. + */ + u16 in_mce; +#endif /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5760b9bea936..2232aff66059 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -540,7 +540,8 @@ static void __init exc_lvl_early_init(void) /* * Stack space used when we detect a bad kernel stack pointer, and - * early in SMP boots before relocation is enabled. + * early in SMP boots before relocation is enabled. Exclusive emergency + * stack for machine checks. */ static void __init emergency_stack_init(void) { @@ -563,6 +564,13 @@ static void __init emergency_stack_init(void) sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); sp += THREAD_SIZE; paca[i].emergency_sp = __va(sp); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* emergency stack for machine check exception handling. */ + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp += THREAD_SIZE; + paca[i].mc_emergency_sp = __va(sp); +#endif } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index af9d3469fb99..a90731b3d44a 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2051,6 +2051,10 @@ static void dump_one_paca(int cpu) DUMP(p, stab_addr, "lx"); #endif DUMP(p, emergency_sp, "p"); +#ifdef CONFIG_PPC_BOOK3S_64 + DUMP(p, mc_emergency_sp, "p"); + DUMP(p, in_mce, "x"); +#endif DUMP(p, data_offset, "lx"); DUMP(p, hw_cpu_id, "x"); DUMP(p, cpu_start, "x"); -- cgit v1.2.3 From 1e9b4507ed98457edb8a892934282b8f63e17246 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:04:08 +0530 Subject: powerpc/book3s: handle machine check in Linux host. Move machine check entry point into Linux. So far we were dependent on firmware to decode MCE error details and handover the high level info to OS. This patch introduces early machine check routine that saves the MCE information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate stack frame on emergency stack and set the r1 accordingly. This allows us to be prepared to take another exception without loosing context. One thing to note here that, if we get another machine check while ME bit is off then we risk a checkstop. Hence we restrict ourselves to save only MCE information and register saved on PACA_EXMC save are before we turn the ME bit on. We use paca->in_mce flag to differentiate between first entry and nested machine check entry which helps proper use of emergency stack. We increment paca->in_mce every time we enter in early machine check handler and decrement it while leaving. When we enter machine check early handler first time (paca->in_mce == 0), we are sure nobody is using MC emergency stack and allocate a stack frame at the start of the emergency stack. During subsequent entry (paca->in_mce > 0), we know that r1 points inside emergency stack and we allocate separate stack frame accordingly. This prevents us from clobbering MCE information during nested machine checks. The early machine check handler changes are placed under CPU_FTR_HVMODE section. This makes sure that the early machine check handler will get executed only in hypervisor kernel. This is the code flow: Machine Check Interrupt | V 0x200 vector ME=0, IR=0, DR=0 | V +-----------------------------------------------+ |machine_check_pSeries_early: | ME=0, IR=0, DR=0 | Alloc frame on emergency stack | | Save srr1, srr0, dar and dsisr on stack | +-----------------------------------------------+ | (ME=1, IR=0, DR=0, RFID) | V machine_check_handle_early ME=1, IR=0, DR=0 | V +-----------------------------------------------+ | machine_check_early (r3=pt_regs) | ME=1, IR=0, DR=0 | Things to do: (in next patches) | | Flush SLB for SLB errors | | Flush TLB for TLB errors | | Decode and save MCE info | +-----------------------------------------------+ | (Fall through existing exception handler routine.) | V machine_check_pSerie ME=1, IR=0, DR=0 | (ME=1, IR=1, DR=1, RFID) | V machine_check_common ME=1, IR=1, DR=1 . . . Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 4 ++ arch/powerpc/kernel/exceptions-64s.S | 111 +++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/traps.c | 12 ++++ 3 files changed, 127 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2ea5cc033ec8..41a283956a29 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -232,6 +232,10 @@ int main(void) DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); +#ifdef CONFIG_PPC_BOOK3S_64 + DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp)); + DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce)); +#endif DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9f905e40922e..4034dfb2a530 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -156,7 +156,11 @@ machine_check_pSeries_1: HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_0(PACA_EXMC) +BEGIN_FTR_SECTION + b machine_check_pSeries_early +FTR_SECTION_ELSE b machine_check_pSeries_0 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) . = 0x300 .globl data_access_pSeries @@ -405,6 +409,64 @@ denorm_exception_hv: .align 7 /* moved from 0x200 */ +machine_check_pSeries_early: +BEGIN_FTR_SECTION + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) + /* + * Register contents: + * R13 = PACA + * R9 = CR + * Original R9 to R13 is saved on PACA_EXMC + * + * Switch to mc_emergency stack and handle re-entrancy (though we + * currently don't test for overflow). Save MCE registers srr1, + * srr0, dar and dsisr and then set ME=1 + * + * We use paca->in_mce to check whether this is the first entry or + * nested machine check. We increment paca->in_mce to track nested + * machine checks. + * + * If this is the first entry then set stack pointer to + * paca->mc_emergency_sp, otherwise r1 is already pointing to + * stack frame on mc_emergency stack. + * + * NOTE: We are here with MSR_ME=0 (off), which means we risk a + * checkstop if we get another machine check exception before we do + * rfid with MSR_ME=1. + */ + mr r11,r1 /* Save r1 */ + lhz r10,PACA_IN_MCE(r13) + cmpwi r10,0 /* Are we in nested machine check */ + bne 0f /* Yes, we are. */ + /* First machine check entry */ + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */ +0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + addi r10,r10,1 /* increment paca->in_mce */ + sth r10,PACA_IN_MCE(r13) + std r11,GPR1(r1) /* Save r1 on the stack. */ + std r11,0(r1) /* make stack chain pointer */ + mfspr r11,SPRN_SRR0 /* Save SRR0 */ + std r11,_NIP(r1) + mfspr r11,SPRN_SRR1 /* Save SRR1 */ + std r11,_MSR(r1) + mfspr r11,SPRN_DAR /* Save DAR */ + std r11,_DAR(r1) + mfspr r11,SPRN_DSISR /* Save DSISR */ + std r11,_DSISR(r1) + std r9,_CCR(r1) /* Save CR in stackframe */ + /* Save r9 through r13 from EXMC save area to stack frame. */ + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) + mfmsr r11 /* get MSR value */ + ori r11,r11,MSR_ME /* turn on ME bit */ + ori r11,r11,MSR_RI /* turn on RI bit */ + ld r12,PACAKBASE(r13) /* get high part of &label */ + LOAD_HANDLER(r12, machine_check_handle_early) + mtspr SPRN_SRR0,r12 + mtspr SPRN_SRR1,r11 + rfid + b . /* prevent speculative execution */ +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + machine_check_pSeries: .globl machine_check_fwnmi machine_check_fwnmi: @@ -712,6 +774,55 @@ machine_check_common: bl .machine_check_exception b .ret_from_except +#define MACHINE_CHECK_HANDLER_WINDUP \ + /* Clear MSR_RI before setting SRR0 and SRR1. */\ + li r0,MSR_RI; \ + mfmsr r9; /* get MSR value */ \ + andc r9,r9,r0; \ + mtmsrd r9,1; /* Clear MSR_RI */ \ + /* Move original SRR0 and SRR1 into the respective regs */ \ + ld r9,_MSR(r1); \ + mtspr SPRN_SRR1,r9; \ + ld r3,_NIP(r1); \ + mtspr SPRN_SRR0,r3; \ + ld r9,_CTR(r1); \ + mtctr r9; \ + ld r9,_XER(r1); \ + mtxer r9; \ + ld r9,_LINK(r1); \ + mtlr r9; \ + REST_GPR(0, r1); \ + REST_8GPRS(2, r1); \ + REST_GPR(10, r1); \ + ld r11,_CCR(r1); \ + mtcr r11; \ + /* Decrement paca->in_mce. */ \ + lhz r12,PACA_IN_MCE(r13); \ + subi r12,r12,1; \ + sth r12,PACA_IN_MCE(r13); \ + REST_GPR(11, r1); \ + REST_2GPRS(12, r1); \ + /* restore original r1. */ \ + ld r1,GPR1(r1) + + /* + * Handle machine check early in real mode. We come here with + * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. + */ + .align 7 + .globl machine_check_handle_early +machine_check_handle_early: +BEGIN_FTR_SECTION + std r0,GPR0(r1) /* Save r0 */ + EXCEPTION_PROLOG_COMMON_3(0x200) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .machine_check_early + /* Deliver the machine check to host kernel in V mode. */ + MACHINE_CHECK_HANDLER_WINDUP + b machine_check_pSeries +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 907a472f9a9e..97a1ce72e130 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -285,6 +285,18 @@ void system_reset_exception(struct pt_regs *regs) /* What should we do here? We could issue a shutdown or hard reset. */ } + +/* + * This function is called in real mode. Strictly no printk's please. + * + * regs->nip and regs->msr contains srr0 and ssr1. + */ +long machine_check_early(struct pt_regs *regs) +{ + /* TODO: handle/decode machine check reason */ + return 0; +} + #endif /* -- cgit v1.2.3 From 1c51089f777bf357487668be9621292cfed752bd Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:04:31 +0530 Subject: powerpc/book3s: Return from interrupt if coming from evil context. We can get machine checks from any context. We need to make sure that we handle all of them correctly. If we are coming from hypervisor user-space, we can continue in host kernel in virtual mode to deliver the MC event. If we got woken up from power-saving mode then we may come in with one of the following state: a. No state loss b. Supervisor state loss c. Hypervisor state loss For (a) and (b), we go back to nap again. State (c) is fatal, keep spinning. For all other context which we not sure of queue up the MCE event and return from the interrupt. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 82 ++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/idle_power7.S | 1 + 2 files changed, 83 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4034dfb2a530..1aec3025eeee 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -155,6 +155,24 @@ machine_check_pSeries_1: */ HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ +#ifdef CONFIG_PPC_P7_NAP +BEGIN_FTR_SECTION + /* Running native on arch 2.06 or later, check if we are + * waking up from nap. We only handle no state loss and + * supervisor state loss. We do -not- handle hypervisor + * state loss at this time. + */ + mfspr r13,SPRN_SRR1 + rlwinm. r13,r13,47-31,30,31 + beq 9f + + /* waking up from powersave (nap) state */ + cmpwi cr1,r13,2 + /* Total loss of HV state is fatal. let's just stay stuck here */ + bgt cr1,. +9: +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +#endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_0(PACA_EXMC) BEGIN_FTR_SECTION b machine_check_pSeries_early @@ -818,6 +836,70 @@ BEGIN_FTR_SECTION bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl .machine_check_early + ld r12,_MSR(r1) +#ifdef CONFIG_PPC_P7_NAP + /* + * Check if thread was in power saving mode. We come here when any + * of the following is true: + * a. thread wasn't in power saving mode + * b. thread was in power saving mode with no state loss or + * supervisor state loss + * + * Go back to nap again if (b) is true. + */ + rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ + beq 4f /* No, it wasn;t */ + /* Thread was in power saving mode. Go back to nap again. */ + cmpwi r11,2 + bne 3f + /* Supervisor state loss */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) +3: MACHINE_CHECK_HANDLER_WINDUP + GET_PACA(r13) + ld r1,PACAR1(r13) + b .power7_enter_nap_mode +4: +#endif + /* + * Check if we are coming from hypervisor userspace. If yes then we + * continue in host kernel in V mode to deliver the MC event. + */ + rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ + beq 5f + andi. r11,r12,MSR_PR /* See if coming from user. */ + bne 9f /* continue in V mode if we are. */ + +5: +#ifdef CONFIG_KVM_BOOK3S_64_HV + /* + * We are coming from kernel context. Check if we are coming from + * guest. if yes, then we can continue. We will fall through + * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest. + */ + lbz r11,HSTATE_IN_GUEST(r13) + cmpwi r11,0 /* Check if coming from guest */ + bne 9f /* continue if we are. */ +#endif + /* + * At this point we are not sure about what context we come from. + * Queue up the MCE event and return from the interrupt. + * But before that, check if this is an un-recoverable exception. + * If yes, then stay on emergency stack and panic. + */ + andi. r11,r12,MSR_RI + bne 2f +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b +2: + /* + * Return from MC interrupt. + * TODO: Queue up the MCE event so that we can log it later. + */ + MACHINE_CHECK_HANDLER_WINDUP + rfid +9: /* Deliver the machine check to host kernel in V mode. */ MACHINE_CHECK_HANDLER_WINDUP b machine_check_pSeries diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 847e40e62fce..3fdef0f0c67f 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -84,6 +84,7 @@ _GLOBAL(power7_nap) std r9,_MSR(r1) std r1,PACAR1(r13) +_GLOBAL(power7_enter_nap_mode) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP -- cgit v1.2.3 From 4c703416efc0a23f83a282b9240bb92fbd9e0be9 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:04:40 +0530 Subject: powerpc/book3s: Introduce a early machine check hook in cpu_spec. This patch adds the early machine check function pointer in cputable for CPU specific early machine check handling. The early machine handle routine will be called in real mode to handle SLB and TLB errors. We can not reuse the existing machine_check hook because it is always invoked in kernel virtual mode and we would already be in trouble if we get SLB or TLB errors. This patch just sets up a mechanism to invoke CPU specific handler. The subsequent patches will populate the function pointer. Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/cputable.h | 7 +++++++ arch/powerpc/kernel/traps.c | 7 +++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 0d4939ba48e7..c5b107afca4d 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -90,6 +90,13 @@ struct cpu_spec { * if the error is fatal, 1 if it was fully recovered and 0 to * pass up (not CPU originated) */ int (*machine_check)(struct pt_regs *regs); + + /* + * Processor specific early machine check handler which is + * called in real mode to handle SLB and TLB errors. + */ + long (*machine_check_early)(struct pt_regs *regs); + }; extern struct cpu_spec *cur_cpu_spec; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 97a1ce72e130..330841766b09 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -293,8 +293,11 @@ void system_reset_exception(struct pt_regs *regs) */ long machine_check_early(struct pt_regs *regs) { - /* TODO: handle/decode machine check reason */ - return 0; + long handled = 0; + + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + handled = cur_cpu_spec->machine_check_early(regs); + return handled; } #endif -- cgit v1.2.3 From 0440705049b041d84268ea57f6e90e2f16618897 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:04:56 +0530 Subject: powerpc/book3s: Add flush_tlb operation in cpu_spec. This patch introduces flush_tlb operation in cpu_spec structure. This will help us to invoke appropriate CPU-side flush tlb routine. This patch adds the foundation to invoke CPU specific flush routine for respective architectures. Currently this patch introduce flush_tlb for p7 and p8. Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/cputable.h | 5 +++++ arch/powerpc/kernel/cpu_setup_power.S | 38 +++++++++++++++++++++++++---------- arch/powerpc/kernel/cputable.c | 8 ++++++++ arch/powerpc/kvm/book3s_hv_ras.c | 18 ++++------------- 4 files changed, 44 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index c5b107afca4d..617cc767c076 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -97,6 +97,11 @@ struct cpu_spec { */ long (*machine_check_early)(struct pt_regs *regs); + /* + * Processor specific routine to flush tlbs. + */ + void (*flush_tlb)(unsigned long inval_selector); + }; extern struct cpu_spec *cur_cpu_spec; diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 18b5b9cf8e37..37d1bb002aa9 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -29,7 +29,7 @@ _GLOBAL(__setup_cpu_power7) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR bl __init_LPCR - bl __init_TLB + bl __init_tlb_power7 mtlr r11 blr @@ -42,7 +42,7 @@ _GLOBAL(__restore_cpu_power7) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR bl __init_LPCR - bl __init_TLB + bl __init_tlb_power7 mtlr r11 blr @@ -59,7 +59,7 @@ _GLOBAL(__setup_cpu_power8) oris r3, r3, LPCR_AIL_3@h bl __init_LPCR bl __init_HFSCR - bl __init_TLB + bl __init_tlb_power8 bl __init_PMU_HV mtlr r11 blr @@ -78,7 +78,7 @@ _GLOBAL(__restore_cpu_power8) oris r3, r3, LPCR_AIL_3@h bl __init_LPCR bl __init_HFSCR - bl __init_TLB + bl __init_tlb_power8 bl __init_PMU_HV mtlr r11 blr @@ -134,15 +134,31 @@ __init_HFSCR: mtspr SPRN_HFSCR,r3 blr -__init_TLB: - /* - * Clear the TLB using the "IS 3" form of tlbiel instruction - * (invalidate by congruence class). P7 has 128 CCs, P8 has 512 - * so we just always do 512 - */ +/* + * Clear the TLB using the specified IS form of tlbiel instruction + * (invalidate by congruence class). P7 has 128 CCs., P8 has 512. + * + * r3 = IS field + */ +__init_tlb_power7: + li r3,0xc00 /* IS field = 0b11 */ +_GLOBAL(__flush_tlb_power7) + li r6,128 + mtctr r6 + mr r7,r3 /* IS field */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr + +__init_tlb_power8: + li r3,0xc00 /* IS field = 0b11 */ +_GLOBAL(__flush_tlb_power8) li r6,512 mtctr r6 - li r7,0xc00 /* IS field = 0b11 */ + mr r7,r3 /* IS field */ ptesync 2: tlbiel r7 addi r7,r7,0x1000 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 597d954e5860..55d0f9c282b8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -71,6 +71,8 @@ extern void __restore_cpu_power7(void); extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); extern void __restore_cpu_a2(void); +extern void __flush_tlb_power7(unsigned long inval_selector); +extern void __flush_tlb_power8(unsigned long inval_selector); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -440,6 +442,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .flush_tlb = __flush_tlb_power7, .platform = "power7", }, { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ @@ -456,6 +459,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, .platform = "power8", }, { /* Power7 */ @@ -474,6 +478,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .flush_tlb = __flush_tlb_power7, .platform = "power7", }, { /* Power7+ */ @@ -492,6 +497,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .flush_tlb = __flush_tlb_power7, .platform = "power7+", }, { /* Power8E */ @@ -510,6 +516,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, .platform = "power8", }, { /* Power8 */ @@ -528,6 +535,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, .platform = "power8", }, { /* Cell Broadband Engine */ diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index a353c485808c..5c427b41a2f5 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -58,18 +58,6 @@ static void reload_slb(struct kvm_vcpu *vcpu) } } -/* POWER7 TLB flush */ -static void flush_tlb_power7(struct kvm_vcpu *vcpu) -{ - unsigned long i, rb; - - rb = TLBIEL_INVAL_SET_LPID; - for (i = 0; i < POWER7_TLB_SETS; ++i) { - asm volatile("tlbiel %0" : : "r" (rb)); - rb += 1 << TLBIEL_INVAL_SET_SHIFT; - } -} - /* * On POWER7, see if we can handle a machine check that occurred inside * the guest in real mode, without switching to the host partition. @@ -96,7 +84,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI); } if (dsisr & DSISR_MC_TLB_MULTI) { - flush_tlb_power7(vcpu); + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID); dsisr &= ~DSISR_MC_TLB_MULTI; } /* Any other errors we don't understand? */ @@ -113,7 +102,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) reload_slb(vcpu); break; case SRR1_MC_IFETCH_TLBMULTI: - flush_tlb_power7(vcpu); + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID); break; default: handled = 0; -- cgit v1.2.3 From e22a22740c1ac23aaa10835f026b3549ee3e4e75 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:05:11 +0530 Subject: powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power7. If we get a machine check exception due to SLB or TLB errors, then flush SLBs/TLBs and reload SLBs to recover. We do this in real mode before turning on MMU. Otherwise we would run into nested machine checks. If we get a machine check when we are in guest, then just flush the SLBs and continue. This patch handles errors for power7. The next patch will handle errors for power8 Signed-off-by: Mahesh Salgaonkar Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/bitops.h | 5 ++ arch/powerpc/include/asm/mce.h | 67 +++++++++++++++++ arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/cputable.c | 4 + arch/powerpc/kernel/mce_power.c | 150 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 227 insertions(+) create mode 100644 arch/powerpc/include/asm/mce.h create mode 100644 arch/powerpc/kernel/mce_power.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 910194e9a1e2..a5e9a7d494d8 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -46,6 +46,11 @@ #include #include +/* PPC bit number conversion */ +#define PPC_BITLSHIFT(be) (BITS_PER_LONG - 1 - (be)) +#define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit)) +#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) + /* * clear_bit doesn't imply a memory barrier */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h new file mode 100644 index 000000000000..8157d4eaead6 --- /dev/null +++ b/arch/powerpc/include/asm/mce.h @@ -0,0 +1,67 @@ +/* + * Machine check exception header file. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#ifndef __ASM_PPC64_MCE_H__ +#define __ASM_PPC64_MCE_H__ + +#include + +/* + * Machine Check bits on power7 and power8 + */ +#define P7_SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) /* P8 too */ + +/* SRR1 bits for machine check (On Power7 and Power8) */ +#define P7_SRR1_MC_IFETCH(srr1) ((srr1) & PPC_BITMASK(43, 45)) /* P8 too */ + +#define P7_SRR1_MC_IFETCH_UE (0x1 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_SLB_PARITY (0x2 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_SLB_MULTIHIT (0x3 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_SLB_BOTH (0x4 << PPC_BITLSHIFT(45)) +#define P7_SRR1_MC_IFETCH_TLB_MULTIHIT (0x5 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_UE_TLB_RELOAD (0x6 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL (0x7 << PPC_BITLSHIFT(45)) + +/* SRR1 bits for machine check (On Power8) */ +#define P8_SRR1_MC_IFETCH_ERAT_MULTIHIT (0x4 << PPC_BITLSHIFT(45)) + +/* DSISR bits for machine check (On Power7 and Power8) */ +#define P7_DSISR_MC_UE (PPC_BIT(48)) /* P8 too */ +#define P7_DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) /* P8 too */ +#define P7_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) /* P8 too */ +#define P7_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) /* P8 too */ +#define P7_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55)) /* P8 too */ +#define P7_DSISR_MC_SLB_MULTIHIT (PPC_BIT(56)) /* P8 too */ +#define P7_DSISR_MC_SLB_MULTIHIT_PARITY (PPC_BIT(57)) /* P8 too */ + +/* + * DSISR bits for machine check (Power8) in addition to above. + * Secondary DERAT Multihit + */ +#define P8_DSISR_MC_ERAT_MULTIHIT_SEC (PPC_BIT(54)) + +/* SLB error bits */ +#define P7_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_ERAT_MULTIHIT | \ + P7_DSISR_MC_SLB_PARITY_MFSLB | \ + P7_DSISR_MC_SLB_MULTIHIT | \ + P7_DSISR_MC_SLB_MULTIHIT_PARITY) + +#endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 445cb6e39d5b..07c63d0aa759 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce_power.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 55d0f9c282b8..c54188bcdd9e 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -73,6 +73,7 @@ extern void __restore_cpu_power8(void); extern void __restore_cpu_a2(void); extern void __flush_tlb_power7(unsigned long inval_selector); extern void __flush_tlb_power8(unsigned long inval_selector); +extern long __machine_check_early_realmode_p7(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -443,6 +444,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .flush_tlb = __flush_tlb_power7, + .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ @@ -479,6 +481,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .flush_tlb = __flush_tlb_power7, + .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, { /* Power7+ */ @@ -498,6 +501,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .flush_tlb = __flush_tlb_power7, + .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7+", }, { /* Power8E */ diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c new file mode 100644 index 000000000000..690547319b03 --- /dev/null +++ b/arch/powerpc/kernel/mce_power.c @@ -0,0 +1,150 @@ +/* + * Machine check exception handling CPU-side for power7 and power8 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG +#define pr_fmt(fmt) "mce_power: " fmt + +#include +#include +#include +#include + +/* flush SLBs and reload */ +static void flush_and_reload_slb(void) +{ + struct slb_shadow *slb; + unsigned long i, n; + + /* Invalidate all SLBs */ + asm volatile("slbmte %0,%0; slbia" : : "r" (0)); + +#ifdef CONFIG_KVM_BOOK3S_HANDLER + /* + * If machine check is hit when in guest or in transition, we will + * only flush the SLBs and continue. + */ + if (get_paca()->kvm_hstate.in_guest) + return; +#endif + + /* For host kernel, reload the SLBs from shadow SLB buffer. */ + slb = get_slb_shadow(); + if (!slb) + return; + + n = min_t(u32, slb->persistent, SLB_MIN_SIZE); + + /* Load up the SLB entries from shadow SLB */ + for (i = 0; i < n; i++) { + unsigned long rb = slb->save_area[i].esid; + unsigned long rs = slb->save_area[i].vsid; + + rb = (rb & ~0xFFFul) | i; + asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); + } +} + +static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) +{ + long handled = 1; + + /* + * flush and reload SLBs for SLB errors and flush TLBs for TLB errors. + * reset the error bits whenever we handle them so that at the end + * we can check whether we handled all of them or not. + * */ + if (dsisr & slb_error_bits) { + flush_and_reload_slb(); + /* reset error bits */ + dsisr &= ~(slb_error_bits); + } + if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + /* reset error bits */ + dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; + } + /* Any other errors we don't understand? */ + if (dsisr & 0xffffffffUL) + handled = 0; + + return handled; +} + +static long mce_handle_derror_p7(uint64_t dsisr) +{ + return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS); +} + +static long mce_handle_common_ierror(uint64_t srr1) +{ + long handled = 0; + + switch (P7_SRR1_MC_IFETCH(srr1)) { + case 0: + break; + case P7_SRR1_MC_IFETCH_SLB_PARITY: + case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: + /* flush and reload SLBs for SLB errors. */ + flush_and_reload_slb(); + handled = 1; + break; + case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + handled = 1; + } + break; + default: + break; + } + + return handled; +} + +static long mce_handle_ierror_p7(uint64_t srr1) +{ + long handled = 0; + + handled = mce_handle_common_ierror(srr1); + + if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { + flush_and_reload_slb(); + handled = 1; + } + return handled; +} + +long __machine_check_early_realmode_p7(struct pt_regs *regs) +{ + uint64_t srr1; + long handled = 1; + + srr1 = regs->msr; + + if (P7_SRR1_MC_LOADSTORE(srr1)) + handled = mce_handle_derror_p7(regs->dsisr); + else + handled = mce_handle_ierror_p7(srr1); + + /* TODO: Decode machine check reason. */ + return handled; +} -- cgit v1.2.3 From ae744f3432d3872c51298d922728e13c24ccc068 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:05:26 +0530 Subject: powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power8. This patch handles the memory errors on power8. If we get a machine check exception due to SLB or TLB errors, then flush SLBs/TLBs and reload SLBs to recover. Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 3 +++ arch/powerpc/kernel/cputable.c | 4 ++++ arch/powerpc/kernel/mce_power.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 8157d4eaead6..e3ffa825b970 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -64,4 +64,7 @@ P7_DSISR_MC_SLB_MULTIHIT | \ P7_DSISR_MC_SLB_MULTIHIT_PARITY) +#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \ + P8_DSISR_MC_ERAT_MULTIHIT_SEC) + #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c54188bcdd9e..6c8dd5da4de5 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -74,6 +74,7 @@ extern void __restore_cpu_a2(void); extern void __flush_tlb_power7(unsigned long inval_selector); extern void __flush_tlb_power8(unsigned long inval_selector); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -462,6 +463,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, { /* Power7 */ @@ -521,6 +523,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, { /* Power8 */ @@ -540,6 +543,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, { /* Cell Broadband Engine */ diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 690547319b03..60a217f11275 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -148,3 +148,37 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) /* TODO: Decode machine check reason. */ return handled; } + +static long mce_handle_ierror_p8(uint64_t srr1) +{ + long handled = 0; + + handled = mce_handle_common_ierror(srr1); + + if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { + flush_and_reload_slb(); + handled = 1; + } + return handled; +} + +static long mce_handle_derror_p8(uint64_t dsisr) +{ + return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS); +} + +long __machine_check_early_realmode_p8(struct pt_regs *regs) +{ + uint64_t srr1; + long handled = 1; + + srr1 = regs->msr; + + if (P7_SRR1_MC_LOADSTORE(srr1)) + handled = mce_handle_derror_p8(regs->dsisr); + else + handled = mce_handle_ierror_p8(srr1); + + /* TODO: Decode machine check reason. */ + return handled; +} -- cgit v1.2.3 From 36df96f8acaf51992177645eb2d781f766ce97dc Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:05:40 +0530 Subject: powerpc/book3s: Decode and save machine check event. Now that we handle machine check in linux, the MCE decoding should also take place in linux host. This info is crucial to log before we go down in case we can not handle the machine check errors. This patch decodes and populates a machine check event which contain high level meaning full MCE information. We do this in real mode C code with ME bit on. The MCE information is still available on emergency stack (in pt_regs structure format). Even if we take another exception at this point the MCE early handler will allocate a new stack frame on top of current one. So when we return back here we still have our MCE information safe on current stack. We use per cpu buffer to save high level MCE information. Each per cpu buffer is an array of machine check event structure indexed by per cpu counter mce_nest_count. The mce_nest_count is incremented every time we enter machine check early handler in real mode to get the current free slot (index = mce_nest_count - 1). The mce_nest_count is decremented once the MCE info is consumed by virtual mode machine exception handler. This patch provides save_mce_event(), get_mce_event() and release_mce_event() generic routines that can be used by machine check handlers to populate and retrieve the event. The routine release_mce_event() will free the event slot so that it can be reused. Caller can invoke get_mce_event() with a release flag either to release the event slot immediately OR keep it so that it can be fetched again. The event slot can be also released anytime by invoking release_mce_event(). This patch also updates kvm code to invoke get_mce_event to retrieve generic mce event rather than paca->opal_mce_evt. The KVM code always calls get_mce_event() with release flags set to false so that event is available for linus host machine If machine check occurs while we are in guest, KVM tries to handle the error. If KVM is able to handle MC error successfully, it enters the guest and delivers the machine check to guest. If KVM is not able to handle MC error, it exists the guest and passes the control to linux host machine check handler which then logs MC event and decides how to handle it in linux host. In failure case, KVM needs to make sure that the MC event is available for linux host to consume. Hence KVM always calls get_mce_event() with release flags set to false and later it invokes release_mce_event() only if it succeeds to handle error. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 124 +++++++++++++++++++++++++ arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/mce.c | 164 ++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/mce_power.c | 116 ++++++++++++++++++++++-- arch/powerpc/kvm/book3s_hv_ras.c | 32 ++++--- arch/powerpc/platforms/powernv/opal.c | 35 ++++---- 6 files changed, 434 insertions(+), 39 deletions(-) create mode 100644 arch/powerpc/kernel/mce.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index e3ffa825b970..87cad2a808c2 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -66,5 +66,129 @@ #define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \ P8_DSISR_MC_ERAT_MULTIHIT_SEC) +enum MCE_Version { + MCE_V1 = 1, +}; + +enum MCE_Severity { + MCE_SEV_NO_ERROR = 0, + MCE_SEV_WARNING = 1, + MCE_SEV_ERROR_SYNC = 2, + MCE_SEV_FATAL = 3, +}; + +enum MCE_Disposition { + MCE_DISPOSITION_RECOVERED = 0, + MCE_DISPOSITION_NOT_RECOVERED = 1, +}; + +enum MCE_Initiator { + MCE_INITIATOR_UNKNOWN = 0, + MCE_INITIATOR_CPU = 1, +}; + +enum MCE_ErrorType { + MCE_ERROR_TYPE_UNKNOWN = 0, + MCE_ERROR_TYPE_UE = 1, + MCE_ERROR_TYPE_SLB = 2, + MCE_ERROR_TYPE_ERAT = 3, + MCE_ERROR_TYPE_TLB = 4, +}; + +enum MCE_UeErrorType { + MCE_UE_ERROR_INDETERMINATE = 0, + MCE_UE_ERROR_IFETCH = 1, + MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2, + MCE_UE_ERROR_LOAD_STORE = 3, + MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4, +}; + +enum MCE_SlbErrorType { + MCE_SLB_ERROR_INDETERMINATE = 0, + MCE_SLB_ERROR_PARITY = 1, + MCE_SLB_ERROR_MULTIHIT = 2, +}; + +enum MCE_EratErrorType { + MCE_ERAT_ERROR_INDETERMINATE = 0, + MCE_ERAT_ERROR_PARITY = 1, + MCE_ERAT_ERROR_MULTIHIT = 2, +}; + +enum MCE_TlbErrorType { + MCE_TLB_ERROR_INDETERMINATE = 0, + MCE_TLB_ERROR_PARITY = 1, + MCE_TLB_ERROR_MULTIHIT = 2, +}; + +struct machine_check_event { + enum MCE_Version version:8; /* 0x00 */ + uint8_t in_use; /* 0x01 */ + enum MCE_Severity severity:8; /* 0x02 */ + enum MCE_Initiator initiator:8; /* 0x03 */ + enum MCE_ErrorType error_type:8; /* 0x04 */ + enum MCE_Disposition disposition:8; /* 0x05 */ + uint8_t reserved_1[2]; /* 0x06 */ + uint64_t gpr3; /* 0x08 */ + uint64_t srr0; /* 0x10 */ + uint64_t srr1; /* 0x18 */ + union { /* 0x20 */ + struct { + enum MCE_UeErrorType ue_error_type:8; + uint8_t effective_address_provided; + uint8_t physical_address_provided; + uint8_t reserved_1[5]; + uint64_t effective_address; + uint64_t physical_address; + uint8_t reserved_2[8]; + } ue_error; + + struct { + enum MCE_SlbErrorType slb_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } slb_error; + + struct { + enum MCE_EratErrorType erat_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } erat_error; + + struct { + enum MCE_TlbErrorType tlb_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } tlb_error; + } u; +}; + +struct mce_error_info { + enum MCE_ErrorType error_type:8; + union { + enum MCE_UeErrorType ue_error_type:8; + enum MCE_SlbErrorType slb_error_type:8; + enum MCE_EratErrorType erat_error_type:8; + enum MCE_TlbErrorType tlb_error_type:8; + } u; + uint8_t reserved[2]; +}; + +#define MAX_MC_EVT 100 + +/* Release flags for get_mce_event() */ +#define MCE_EVENT_RELEASE true +#define MCE_EVENT_DONTRELEASE false + +extern void save_mce_event(struct pt_regs *regs, long handled, + struct mce_error_info *mce_err, uint64_t addr); +extern int get_mce_event(struct machine_check_event *mce, bool release); +extern void release_mce_event(void); #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 07c63d0aa759..904d713366ff 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o -obj-$(CONFIG_PPC_BOOK3S_64) += mce_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c new file mode 100644 index 000000000000..aeecdf1ba897 --- /dev/null +++ b/arch/powerpc/kernel/mce.c @@ -0,0 +1,164 @@ +/* + * Machine check exception handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG +#define pr_fmt(fmt) "mce: " fmt + +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(int, mce_nest_count); +static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); + +static void mce_set_error_info(struct machine_check_event *mce, + struct mce_error_info *mce_err) +{ + mce->error_type = mce_err->error_type; + switch (mce_err->error_type) { + case MCE_ERROR_TYPE_UE: + mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; + break; + case MCE_ERROR_TYPE_SLB: + mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; + break; + case MCE_ERROR_TYPE_ERAT: + mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; + break; + case MCE_ERROR_TYPE_TLB: + mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; + break; + case MCE_ERROR_TYPE_UNKNOWN: + default: + break; + } +} + +/* + * Decode and save high level MCE information into per cpu buffer which + * is an array of machine_check_event structure. + */ +void save_mce_event(struct pt_regs *regs, long handled, + struct mce_error_info *mce_err, + uint64_t addr) +{ + uint64_t srr1; + int index = __get_cpu_var(mce_nest_count)++; + struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); + + /* + * Return if we don't have enough space to log mce event. + * mce_nest_count may go beyond MAX_MC_EVT but that's ok, + * the check below will stop buffer overrun. + */ + if (index >= MAX_MC_EVT) + return; + + /* Populate generic machine check info */ + mce->version = MCE_V1; + mce->srr0 = regs->nip; + mce->srr1 = regs->msr; + mce->gpr3 = regs->gpr[3]; + mce->in_use = 1; + + mce->initiator = MCE_INITIATOR_CPU; + if (handled) + mce->disposition = MCE_DISPOSITION_RECOVERED; + else + mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; + mce->severity = MCE_SEV_ERROR_SYNC; + + srr1 = regs->msr; + + /* + * Populate the mce error_type and type-specific error_type. + */ + mce_set_error_info(mce, mce_err); + + if (!addr) + return; + + if (mce->error_type == MCE_ERROR_TYPE_TLB) { + mce->u.tlb_error.effective_address_provided = true; + mce->u.tlb_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { + mce->u.slb_error.effective_address_provided = true; + mce->u.slb_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { + mce->u.erat_error.effective_address_provided = true; + mce->u.erat_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_UE) { + mce->u.ue_error.effective_address_provided = true; + mce->u.ue_error.effective_address = addr; + } + return; +} + +/* + * get_mce_event: + * mce Pointer to machine_check_event structure to be filled. + * release Flag to indicate whether to free the event slot or not. + * 0 <= do not release the mce event. Caller will invoke + * release_mce_event() once event has been consumed. + * 1 <= release the slot. + * + * return 1 = success + * 0 = failure + * + * get_mce_event() will be called by platform specific machine check + * handle routine and in KVM. + * When we call get_mce_event(), we are still in interrupt context and + * preemption will not be scheduled until ret_from_expect() routine + * is called. + */ +int get_mce_event(struct machine_check_event *mce, bool release) +{ + int index = __get_cpu_var(mce_nest_count) - 1; + struct machine_check_event *mc_evt; + int ret = 0; + + /* Sanity check */ + if (index < 0) + return ret; + + /* Check if we have MCE info to process. */ + if (index < MAX_MC_EVT) { + mc_evt = &__get_cpu_var(mce_event[index]); + /* Copy the event structure and release the original */ + if (mce) + *mce = *mc_evt; + if (release) + mc_evt->in_use = 0; + ret = 1; + } + /* Decrement the count to free the slot. */ + if (release) + __get_cpu_var(mce_nest_count)--; + + return ret; +} + +void release_mce_event(void) +{ + get_mce_event(NULL, true); +} diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 60a217f11275..b36e777a734f 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -133,22 +133,116 @@ static long mce_handle_ierror_p7(uint64_t srr1) return handled; } +static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1) +{ + switch (P7_SRR1_MC_IFETCH(srr1)) { + case P7_SRR1_MC_IFETCH_SLB_PARITY: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + break; + case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + break; + case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + break; + case P7_SRR1_MC_IFETCH_UE: + case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; + break; + case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = + MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + } +} + +static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1) +{ + mce_get_common_ierror(mce_err, srr1); + if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; + } +} + +static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr) +{ + if (dsisr & P7_DSISR_MC_UE) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; + } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = + MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; + } +} + long __machine_check_early_realmode_p7(struct pt_regs *regs) { - uint64_t srr1; + uint64_t srr1, addr; long handled = 1; + struct mce_error_info mce_error_info = { 0 }; srr1 = regs->msr; - if (P7_SRR1_MC_LOADSTORE(srr1)) + /* + * Handle memory errors depending whether this was a load/store or + * ifetch exception. Also, populate the mce error_type and + * type-specific error_type from either SRR1 or DSISR, depending + * whether this was a load/store or ifetch exception + */ + if (P7_SRR1_MC_LOADSTORE(srr1)) { handled = mce_handle_derror_p7(regs->dsisr); - else + mce_get_derror_p7(&mce_error_info, regs->dsisr); + addr = regs->dar; + } else { handled = mce_handle_ierror_p7(srr1); + mce_get_ierror_p7(&mce_error_info, srr1); + addr = regs->nip; + } - /* TODO: Decode machine check reason. */ + save_mce_event(regs, handled, &mce_error_info, addr); return handled; } +static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1) +{ + mce_get_common_ierror(mce_err, srr1); + if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } +} + +static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr) +{ + mce_get_derror_p7(mce_err, dsisr); + if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } +} + static long mce_handle_ierror_p8(uint64_t srr1) { long handled = 0; @@ -169,16 +263,22 @@ static long mce_handle_derror_p8(uint64_t dsisr) long __machine_check_early_realmode_p8(struct pt_regs *regs) { - uint64_t srr1; + uint64_t srr1, addr; long handled = 1; + struct mce_error_info mce_error_info = { 0 }; srr1 = regs->msr; - if (P7_SRR1_MC_LOADSTORE(srr1)) + if (P7_SRR1_MC_LOADSTORE(srr1)) { handled = mce_handle_derror_p8(regs->dsisr); - else + mce_get_derror_p8(&mce_error_info, regs->dsisr); + addr = regs->dar; + } else { handled = mce_handle_ierror_p8(srr1); + mce_get_ierror_p8(&mce_error_info, srr1); + addr = regs->nip; + } - /* TODO: Decode machine check reason. */ + save_mce_event(regs, handled, &mce_error_info, addr); return handled; } diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 5c427b41a2f5..768a9f977c00 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -12,6 +12,7 @@ #include #include #include +#include /* SRR1 bits for machine check on POWER7 */ #define SRR1_MC_LDSTERR (1ul << (63-42)) @@ -67,9 +68,7 @@ static void reload_slb(struct kvm_vcpu *vcpu) static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) { unsigned long srr1 = vcpu->arch.shregs.msr; -#ifdef CONFIG_PPC_POWERNV - struct opal_machine_check_event *opal_evt; -#endif + struct machine_check_event mce_evt; long handled = 1; if (srr1 & SRR1_MC_LDSTERR) { @@ -109,22 +108,31 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) handled = 0; } -#ifdef CONFIG_PPC_POWERNV /* - * See if OPAL has already handled the condition. - * We assume that if the condition is recovered then OPAL + * See if we have already handled the condition in the linux host. + * We assume that if the condition is recovered then linux host * will have generated an error log event that we will pick * up and log later. + * Don't release mce event now. In case if condition is not + * recovered we do guest exit and go back to linux host machine + * check handler. Hence we need make sure that current mce event + * is available for linux host to consume. */ - opal_evt = local_paca->opal_mc_evt; - if (opal_evt->version == OpalMCE_V1 && - (opal_evt->severity == OpalMCE_SEV_NO_ERROR || - opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED)) + if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) + goto out; + + if (mce_evt.version == MCE_V1 && + (mce_evt.severity == MCE_SEV_NO_ERROR || + mce_evt.disposition == MCE_DISPOSITION_RECOVERED)) handled = 1; +out: + /* + * If we have handled the error, then release the mce event because + * we will be delivering machine check to guest. + */ if (handled) - opal_evt->in_use = 0; -#endif + release_mce_event(); return handled; } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 1c798cd55372..c5e71d773f47 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "powernv.h" @@ -256,8 +257,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) int opal_machine_check(struct pt_regs *regs) { - struct opal_machine_check_event *opal_evt = get_paca()->opal_mc_evt; - struct opal_machine_check_event evt; + struct machine_check_event evt; const char *level, *sevstr, *subtype; static const char *opal_mc_ue_types[] = { "Indeterminate", @@ -282,30 +282,29 @@ int opal_machine_check(struct pt_regs *regs) "Multihit", }; - /* Copy the event structure and release the original */ - evt = *opal_evt; - opal_evt->in_use = 0; + if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) + return 0; /* Print things out */ - if (evt.version != OpalMCE_V1) { + if (evt.version != MCE_V1) { pr_err("Machine Check Exception, Unknown event version %d !\n", evt.version); return 0; } switch(evt.severity) { - case OpalMCE_SEV_NO_ERROR: + case MCE_SEV_NO_ERROR: level = KERN_INFO; sevstr = "Harmless"; break; - case OpalMCE_SEV_WARNING: + case MCE_SEV_WARNING: level = KERN_WARNING; sevstr = ""; break; - case OpalMCE_SEV_ERROR_SYNC: + case MCE_SEV_ERROR_SYNC: level = KERN_ERR; sevstr = "Severe"; break; - case OpalMCE_SEV_FATAL: + case MCE_SEV_FATAL: default: level = KERN_ERR; sevstr = "Fatal"; @@ -313,12 +312,12 @@ int opal_machine_check(struct pt_regs *regs) } printk("%s%s Machine check interrupt [%s]\n", level, sevstr, - evt.disposition == OpalMCE_DISPOSITION_RECOVERED ? + evt.disposition == MCE_DISPOSITION_RECOVERED ? "Recovered" : "[Not recovered"); printk("%s Initiator: %s\n", level, - evt.initiator == OpalMCE_INITIATOR_CPU ? "CPU" : "Unknown"); + evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); switch(evt.error_type) { - case OpalMCE_ERROR_TYPE_UE: + case MCE_ERROR_TYPE_UE: subtype = evt.u.ue_error.ue_error_type < ARRAY_SIZE(opal_mc_ue_types) ? opal_mc_ue_types[evt.u.ue_error.ue_error_type] @@ -331,7 +330,7 @@ int opal_machine_check(struct pt_regs *regs) printk("%s Physial address: %016llx\n", level, evt.u.ue_error.physical_address); break; - case OpalMCE_ERROR_TYPE_SLB: + case MCE_ERROR_TYPE_SLB: subtype = evt.u.slb_error.slb_error_type < ARRAY_SIZE(opal_mc_slb_types) ? opal_mc_slb_types[evt.u.slb_error.slb_error_type] @@ -341,7 +340,7 @@ int opal_machine_check(struct pt_regs *regs) printk("%s Effective address: %016llx\n", level, evt.u.slb_error.effective_address); break; - case OpalMCE_ERROR_TYPE_ERAT: + case MCE_ERROR_TYPE_ERAT: subtype = evt.u.erat_error.erat_error_type < ARRAY_SIZE(opal_mc_erat_types) ? opal_mc_erat_types[evt.u.erat_error.erat_error_type] @@ -351,7 +350,7 @@ int opal_machine_check(struct pt_regs *regs) printk("%s Effective address: %016llx\n", level, evt.u.erat_error.effective_address); break; - case OpalMCE_ERROR_TYPE_TLB: + case MCE_ERROR_TYPE_TLB: subtype = evt.u.tlb_error.tlb_error_type < ARRAY_SIZE(opal_mc_tlb_types) ? opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type] @@ -362,11 +361,11 @@ int opal_machine_check(struct pt_regs *regs) level, evt.u.tlb_error.effective_address); break; default: - case OpalMCE_ERROR_TYPE_UNKNOWN: + case MCE_ERROR_TYPE_UNKNOWN: printk("%s Error type: Unknown\n", level); break; } - return evt.severity == OpalMCE_SEV_FATAL ? 0 : 1; + return evt.severity == MCE_SEV_FATAL ? 0 : 1; } static irqreturn_t opal_interrupt(int irq, void *data) -- cgit v1.2.3 From b5ff4211a8294be2ddbaf963fa3666fa042292a8 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:05:49 +0530 Subject: powerpc/book3s: Queue up and process delayed MCE events. When machine check real mode handler can not continue into host kernel in V mode, it returns from the interrupt and we loose MCE event which never gets logged. In such a situation queue up the MCE event so that we can log it later when we get back into host kernel with r1 pointing to kernel stack e.g. during syscall exit. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 3 + arch/powerpc/kernel/entry_64.S | 5 ++ arch/powerpc/kernel/exceptions-64s.S | 7 +- arch/powerpc/kernel/mce.c | 154 ++++++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 97 +-------------------- 5 files changed, 168 insertions(+), 98 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 87cad2a808c2..3276b409299c 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -190,5 +190,8 @@ extern void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t addr); extern int get_mce_event(struct machine_check_event *mce, bool release); extern void release_mce_event(void); +extern void machine_check_queue_event(void); +extern void machine_check_process_queued_event(void); +extern void machine_check_print_event_info(struct machine_check_event *evt); #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index bbfb0294b354..770d6d65c47b 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -183,6 +183,11 @@ syscall_exit: #ifdef SHOW_SYSCALLS bl .do_show_syscall_exit ld r3,RESULT(r1) +#endif +#ifdef CONFIG_PPC_BOOK3S_64 +BEGIN_FTR_SECTION + bl .machine_check_process_queued_event +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) #endif CURRENT_THREAD_INFO(r12, r1) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1aec3025eeee..862b9dd4a9db 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -855,7 +855,8 @@ BEGIN_FTR_SECTION /* Supervisor state loss */ li r0,1 stb r0,PACA_NAPSTATELOST(r13) -3: MACHINE_CHECK_HANDLER_WINDUP +3: bl .machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP GET_PACA(r13) ld r1,PACAR1(r13) b .power7_enter_nap_mode @@ -895,8 +896,10 @@ BEGIN_FTR_SECTION 2: /* * Return from MC interrupt. - * TODO: Queue up the MCE event so that we can log it later. + * Queue up the MCE event so that we can log it later, while + * returning from kernel or opal call. */ + bl .machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP rfid 9: diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index aeecdf1ba897..1c6d15701c56 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -31,6 +31,10 @@ static DEFINE_PER_CPU(int, mce_nest_count); static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); +/* Queue for delayed MCE events. */ +static DEFINE_PER_CPU(int, mce_queue_count); +static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -162,3 +166,153 @@ void release_mce_event(void) { get_mce_event(NULL, true); } + +/* + * Queue up the MCE event which then can be handled later. + */ +void machine_check_queue_event(void) +{ + int index; + struct machine_check_event evt; + + if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) + return; + + index = __get_cpu_var(mce_queue_count)++; + /* If queue is full, just return for now. */ + if (index >= MAX_MC_EVT) { + __get_cpu_var(mce_queue_count)--; + return; + } + __get_cpu_var(mce_event_queue[index]) = evt; +} + +/* + * process pending MCE event from the mce event queue. This function will be + * called during syscall exit. + */ +void machine_check_process_queued_event(void) +{ + int index; + + preempt_disable(); + /* + * For now just print it to console. + * TODO: log this error event to FSP or nvram. + */ + while (__get_cpu_var(mce_queue_count) > 0) { + index = __get_cpu_var(mce_queue_count) - 1; + machine_check_print_event_info( + &__get_cpu_var(mce_event_queue[index])); + __get_cpu_var(mce_queue_count)--; + } + preempt_enable(); +} + +void machine_check_print_event_info(struct machine_check_event *evt) +{ + const char *level, *sevstr, *subtype; + static const char *mc_ue_types[] = { + "Indeterminate", + "Instruction fetch", + "Page table walk ifetch", + "Load/Store", + "Page table walk Load/Store", + }; + static const char *mc_slb_types[] = { + "Indeterminate", + "Parity", + "Multihit", + }; + static const char *mc_erat_types[] = { + "Indeterminate", + "Parity", + "Multihit", + }; + static const char *mc_tlb_types[] = { + "Indeterminate", + "Parity", + "Multihit", + }; + + /* Print things out */ + if (evt->version != MCE_V1) { + pr_err("Machine Check Exception, Unknown event version %d !\n", + evt->version); + return; + } + switch (evt->severity) { + case MCE_SEV_NO_ERROR: + level = KERN_INFO; + sevstr = "Harmless"; + break; + case MCE_SEV_WARNING: + level = KERN_WARNING; + sevstr = ""; + break; + case MCE_SEV_ERROR_SYNC: + level = KERN_ERR; + sevstr = "Severe"; + break; + case MCE_SEV_FATAL: + default: + level = KERN_ERR; + sevstr = "Fatal"; + break; + } + + printk("%s%s Machine check interrupt [%s]\n", level, sevstr, + evt->disposition == MCE_DISPOSITION_RECOVERED ? + "Recovered" : "[Not recovered"); + printk("%s Initiator: %s\n", level, + evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); + switch (evt->error_type) { + case MCE_ERROR_TYPE_UE: + subtype = evt->u.ue_error.ue_error_type < + ARRAY_SIZE(mc_ue_types) ? + mc_ue_types[evt->u.ue_error.ue_error_type] + : "Unknown"; + printk("%s Error type: UE [%s]\n", level, subtype); + if (evt->u.ue_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.ue_error.effective_address); + if (evt->u.ue_error.physical_address_provided) + printk("%s Physial address: %016llx\n", + level, evt->u.ue_error.physical_address); + break; + case MCE_ERROR_TYPE_SLB: + subtype = evt->u.slb_error.slb_error_type < + ARRAY_SIZE(mc_slb_types) ? + mc_slb_types[evt->u.slb_error.slb_error_type] + : "Unknown"; + printk("%s Error type: SLB [%s]\n", level, subtype); + if (evt->u.slb_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.slb_error.effective_address); + break; + case MCE_ERROR_TYPE_ERAT: + subtype = evt->u.erat_error.erat_error_type < + ARRAY_SIZE(mc_erat_types) ? + mc_erat_types[evt->u.erat_error.erat_error_type] + : "Unknown"; + printk("%s Error type: ERAT [%s]\n", level, subtype); + if (evt->u.erat_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.erat_error.effective_address); + break; + case MCE_ERROR_TYPE_TLB: + subtype = evt->u.tlb_error.tlb_error_type < + ARRAY_SIZE(mc_tlb_types) ? + mc_tlb_types[evt->u.tlb_error.tlb_error_type] + : "Unknown"; + printk("%s Error type: TLB [%s]\n", level, subtype); + if (evt->u.tlb_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.tlb_error.effective_address); + break; + default: + case MCE_ERROR_TYPE_UNKNOWN: + printk("%s Error type: Unknown\n", level); + break; + } +} diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index c5e71d773f47..245096f90437 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -258,29 +258,6 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) int opal_machine_check(struct pt_regs *regs) { struct machine_check_event evt; - const char *level, *sevstr, *subtype; - static const char *opal_mc_ue_types[] = { - "Indeterminate", - "Instruction fetch", - "Page table walk ifetch", - "Load/Store", - "Page table walk Load/Store", - }; - static const char *opal_mc_slb_types[] = { - "Indeterminate", - "Parity", - "Multihit", - }; - static const char *opal_mc_erat_types[] = { - "Indeterminate", - "Parity", - "Multihit", - }; - static const char *opal_mc_tlb_types[] = { - "Indeterminate", - "Parity", - "Multihit", - }; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return 0; @@ -291,80 +268,8 @@ int opal_machine_check(struct pt_regs *regs) evt.version); return 0; } - switch(evt.severity) { - case MCE_SEV_NO_ERROR: - level = KERN_INFO; - sevstr = "Harmless"; - break; - case MCE_SEV_WARNING: - level = KERN_WARNING; - sevstr = ""; - break; - case MCE_SEV_ERROR_SYNC: - level = KERN_ERR; - sevstr = "Severe"; - break; - case MCE_SEV_FATAL: - default: - level = KERN_ERR; - sevstr = "Fatal"; - break; - } + machine_check_print_event_info(&evt); - printk("%s%s Machine check interrupt [%s]\n", level, sevstr, - evt.disposition == MCE_DISPOSITION_RECOVERED ? - "Recovered" : "[Not recovered"); - printk("%s Initiator: %s\n", level, - evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); - switch(evt.error_type) { - case MCE_ERROR_TYPE_UE: - subtype = evt.u.ue_error.ue_error_type < - ARRAY_SIZE(opal_mc_ue_types) ? - opal_mc_ue_types[evt.u.ue_error.ue_error_type] - : "Unknown"; - printk("%s Error type: UE [%s]\n", level, subtype); - if (evt.u.ue_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt.u.ue_error.effective_address); - if (evt.u.ue_error.physical_address_provided) - printk("%s Physial address: %016llx\n", - level, evt.u.ue_error.physical_address); - break; - case MCE_ERROR_TYPE_SLB: - subtype = evt.u.slb_error.slb_error_type < - ARRAY_SIZE(opal_mc_slb_types) ? - opal_mc_slb_types[evt.u.slb_error.slb_error_type] - : "Unknown"; - printk("%s Error type: SLB [%s]\n", level, subtype); - if (evt.u.slb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt.u.slb_error.effective_address); - break; - case MCE_ERROR_TYPE_ERAT: - subtype = evt.u.erat_error.erat_error_type < - ARRAY_SIZE(opal_mc_erat_types) ? - opal_mc_erat_types[evt.u.erat_error.erat_error_type] - : "Unknown"; - printk("%s Error type: ERAT [%s]\n", level, subtype); - if (evt.u.erat_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt.u.erat_error.effective_address); - break; - case MCE_ERROR_TYPE_TLB: - subtype = evt.u.tlb_error.tlb_error_type < - ARRAY_SIZE(opal_mc_tlb_types) ? - opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type] - : "Unknown"; - printk("%s Error type: TLB [%s]\n", level, subtype); - if (evt.u.tlb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt.u.tlb_error.effective_address); - break; - default: - case MCE_ERROR_TYPE_UNKNOWN: - printk("%s Error type: Unknown\n", level); - break; - } return evt.severity == MCE_SEV_FATAL ? 0 : 1; } -- cgit v1.2.3 From b63a0ffe35de7e5f9b907bbc2c783e702f7e15af Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:06:13 +0530 Subject: powerpc/powernv: Machine check exception handling. Add basic error handling in machine check exception handler. - If MSR_RI isn't set, we can not recover. - Check if disposition set to OpalMCE_DISPOSITION_RECOVERED. - Check if address at fault is inside kernel address space, if not then send SIGBUS to process if we hit exception when in userspace. - If address at fault is not provided then and if we get a synchronous machine check while in userspace then kill the task. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 1 + arch/powerpc/kernel/mce.c | 27 ++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 43 ++++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 3276b409299c..a2b8c7b35fba 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -193,5 +193,6 @@ extern void release_mce_event(void); extern void machine_check_queue_event(void); extern void machine_check_process_queued_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt); +extern uint64_t get_mce_fault_addr(struct machine_check_event *evt); #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 1c6d15701c56..c0c52ec1fca7 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -316,3 +316,30 @@ void machine_check_print_event_info(struct machine_check_event *evt) break; } } + +uint64_t get_mce_fault_addr(struct machine_check_event *evt) +{ + switch (evt->error_type) { + case MCE_ERROR_TYPE_UE: + if (evt->u.ue_error.effective_address_provided) + return evt->u.ue_error.effective_address; + break; + case MCE_ERROR_TYPE_SLB: + if (evt->u.slb_error.effective_address_provided) + return evt->u.slb_error.effective_address; + break; + case MCE_ERROR_TYPE_ERAT: + if (evt->u.erat_error.effective_address_provided) + return evt->u.erat_error.effective_address; + break; + case MCE_ERROR_TYPE_TLB: + if (evt->u.tlb_error.effective_address_provided) + return evt->u.tlb_error.effective_address; + break; + default: + case MCE_ERROR_TYPE_UNKNOWN: + break; + } + return 0; +} +EXPORT_SYMBOL(get_mce_fault_addr); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index f348bd49487c..01e74cbc67e9 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -251,6 +252,44 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) return written; } +static int opal_recover_mce(struct pt_regs *regs, + struct machine_check_event *evt) +{ + int recovered = 0; + uint64_t ea = get_mce_fault_addr(evt); + + if (!(regs->msr & MSR_RI)) { + /* If MSR_RI isn't set, we cannot recover */ + recovered = 0; + } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { + /* Platform corrected itself */ + recovered = 1; + } else if (ea && !is_kernel_addr(ea)) { + /* + * Faulting address is not in kernel text. We should be fine. + * We need to find which process uses this address. + * For now, kill the task if we have received exception when + * in userspace. + * + * TODO: Queue up this address for hwpoisioning later. + */ + if (user_mode(regs) && !is_global_init(current)) { + _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); + recovered = 1; + } else + recovered = 0; + } else if (user_mode(regs) && !is_global_init(current) && + evt->severity == MCE_SEV_ERROR_SYNC) { + /* + * If we have received a synchronous error when in userspace + * kill the task. + */ + _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); + recovered = 1; + } + return recovered; +} + int opal_machine_check(struct pt_regs *regs) { struct machine_check_event evt; @@ -266,7 +305,9 @@ int opal_machine_check(struct pt_regs *regs) } machine_check_print_event_info(&evt); - return evt.severity == MCE_SEV_FATAL ? 0 : 1; + if (opal_recover_mce(regs, &evt)) + return 1; + return 0; } static irqreturn_t opal_interrupt(int irq, void *data) -- cgit v1.2.3 From d905c5df9aef38d63df268f6f5e7b13894f626d3 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 21 Nov 2013 17:43:14 +1100 Subject: PPC: POWERNV: move iommu_add_device earlier The current implementation of IOMMU on sPAPR does not use iommu_ops and therefore does not call IOMMU API's bus_set_iommu() which 1) sets iommu_ops for a bus 2) registers a bus notifier Instead, PCI devices are added to IOMMU groups from subsys_initcall_sync(tce_iommu_init) which does basically the same thing without using iommu_ops callbacks. However Freescale PAMU driver (https://lkml.org/lkml/2013/7/1/158) implements iommu_ops and when tce_iommu_init is called, every PCI device is already added to some group so there is a conflict. This patch does 2 things: 1. removes the loop in which PCI devices were added to groups and adds explicit iommu_add_device() calls to add devices as soon as they get the iommu_table pointer assigned to them. 2. moves a bus notifier to powernv code in order to avoid conflict with the notifier from Freescale driver. iommu_add_device() and iommu_del_device() are public now. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iommu.h | 26 ++++++++++++++++ arch/powerpc/kernel/iommu.c | 48 +++-------------------------- arch/powerpc/platforms/powernv/pci-ioda.c | 8 ++--- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 2 +- arch/powerpc/platforms/powernv/pci.c | 33 +++++++++++++++++++- arch/powerpc/platforms/pseries/iommu.c | 8 +++-- 6 files changed, 72 insertions(+), 53 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index c34656a8925e..774fa2776907 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -101,8 +101,34 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); +#ifdef CONFIG_IOMMU_API extern void iommu_register_group(struct iommu_table *tbl, int pci_domain_number, unsigned long pe_num); +extern int iommu_add_device(struct device *dev); +extern void iommu_del_device(struct device *dev); +#else +static inline void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, + unsigned long pe_num) +{ +} + +static inline int iommu_add_device(struct device *dev) +{ + return 0; +} + +static inline void iommu_del_device(struct device *dev) +{ +} +#endif /* !CONFIG_IOMMU_API */ + +static inline void set_iommu_table_base_and_group(struct device *dev, + void *base) +{ + set_iommu_table_base(dev, base); + iommu_add_device(dev); +} extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 572bb5b95f35..d22abe0b08e3 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl) } EXPORT_SYMBOL_GPL(iommu_release_ownership); -static int iommu_add_device(struct device *dev) +int iommu_add_device(struct device *dev) { struct iommu_table *tbl; int ret = 0; @@ -1134,52 +1134,12 @@ static int iommu_add_device(struct device *dev) return ret; } +EXPORT_SYMBOL_GPL(iommu_add_device); -static void iommu_del_device(struct device *dev) +void iommu_del_device(struct device *dev) { iommu_group_remove_device(dev); } - -static int iommu_bus_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - - switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - return iommu_add_device(dev); - case BUS_NOTIFY_DEL_DEVICE: - iommu_del_device(dev); - return 0; - default: - return 0; - } -} - -static struct notifier_block tce_iommu_bus_nb = { - .notifier_call = iommu_bus_notifier, -}; - -static int __init tce_iommu_init(void) -{ - struct pci_dev *pdev = NULL; - - BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); - - for_each_pci_dev(pdev) - iommu_add_device(&pdev->dev); - - bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); - return 0; -} - -subsys_initcall_sync(tce_iommu_init); - -#else - -void iommu_register_group(struct iommu_table *tbl, - int pci_domain_number, unsigned long pe_num) -{ -} +EXPORT_SYMBOL_GPL(iommu_del_device); #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 084cdfa40682..614356cac466 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -460,7 +460,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev return; pe = &phb->ioda.pe_array[pdn->pe_number]; - set_iommu_table_base(&pdev->dev, &pe->tce32_table); + set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); } static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) @@ -468,7 +468,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base(&dev->dev, &pe->tce32_table); + set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); if (dev->subordinate) pnv_ioda_setup_bus_dma(pe, dev->subordinate); } @@ -644,7 +644,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); if (pe->pdev) - set_iommu_table_base(&pe->pdev->dev, tbl); + set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else pnv_ioda_setup_bus_dma(pe, pe->pbus); @@ -722,7 +722,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, iommu_init_table(tbl, phb->hose->node); if (pe->pdev) - set_iommu_table_base(&pe->pdev->dev, tbl); + set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else pnv_ioda_setup_bus_dma(pe, pe->pbus); diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index f8b4bd8afb2e..e3807d69393e 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -92,7 +92,7 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, pci_domain_nr(phb->hose->bus), phb->opal_id); } - set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); + set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table); } static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 4eb33a9ed532..6f3d49c18d64 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -536,7 +536,7 @@ static void pnv_pci_dma_fallback_setup(struct pci_controller *hose, pdn->iommu_table = pnv_pci_setup_bml_iommu(hose); if (!pdn->iommu_table) return; - set_iommu_table_base(&pdev->dev, pdn->iommu_table); + set_iommu_table_base_and_group(&pdev->dev, pdn->iommu_table); } static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) @@ -657,3 +657,34 @@ void __init pnv_pci_init(void) ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs; #endif } + +static int tce_iommu_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + return iommu_add_device(dev); + case BUS_NOTIFY_DEL_DEVICE: + if (dev->iommu_group) + iommu_del_device(dev); + return 0; + default: + return 0; + } +} + +static struct notifier_block tce_iommu_bus_nb = { + .notifier_call = tce_iommu_bus_notifier, +}; + +static int __init tce_iommu_bus_notifier_init(void) +{ + BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); + + bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); + return 0; +} + +subsys_initcall_sync(tce_iommu_bus_notifier_init); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index f253361552ae..a80af6c20cba 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -687,7 +687,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); - set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); + set_iommu_table_base_and_group(&dev->dev, + PCI_DN(dn)->iommu_table); return; } @@ -699,7 +700,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) dn = dn->parent; if (dn && PCI_DN(dn)) - set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); + set_iommu_table_base_and_group(&dev->dev, + PCI_DN(dn)->iommu_table); else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", pci_name(dev)); @@ -1193,7 +1195,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" found DMA window, table: %p\n", pci->iommu_table); } - set_iommu_table_base(&dev->dev, pci->iommu_table); + set_iommu_table_base_and_group(&dev->dev, pci->iommu_table); } static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) -- cgit v1.2.3 From fb48dc22824daaa60ff1d6a6c9e22c79112dfb8e Mon Sep 17 00:00:00 2001 From: Brian King Date: Mon, 25 Nov 2013 16:27:54 -0600 Subject: powerpc: Increase EEH recovery timeout for SR-IOV In order to support concurrent adapter firmware download to SR-IOV adapters on pSeries, each VF will see an EEH event where the slot will remain in the unavailable state for the duration of the adapter firmware update, which can take as long as 5 minutes. Extend the EEH recovery timeout to account for this. Signed-off-by: Brian King Acked-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 2 +- arch/powerpc/kernel/eeh_driver.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 4bd687d5e7aa..f4b7a227f183 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -84,7 +84,7 @@ #define EEH_MAX_FAILS 2100000 /* Time to wait for a PCI slot to report status, in milliseconds */ -#define PCI_BUS_RESET_WAIT_MSEC (60*1000) +#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) /* Platform dependent EEH operations */ struct eeh_ops *eeh_ops = NULL; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 36bed5a12750..4ef59c33777f 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -468,7 +468,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) /* The longest amount of time to wait for a pci device * to come back on line, in seconds. */ -#define MAX_WAIT_FOR_RECOVERY 150 +#define MAX_WAIT_FOR_RECOVERY 300 static void eeh_handle_normal_event(struct eeh_pe *pe) { -- cgit v1.2.3 From 1a8f6f97ea4dbaaa21b05cae2dacea47e4aea37b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 5 Dec 2013 11:31:08 +0800 Subject: powerpc: Make slb_shadow a local The only external user of slb_shadow is the pseries lpar code, and it can access through the paca array instead. Signed-off-by: Jeremy Kerr Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/lppaca.h | 2 -- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 844c28de7ec0..d0a2a2f99564 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -132,8 +132,6 @@ struct slb_shadow { } save_area[SLB_NUM_BOLTED]; } ____cacheline_aligned; -extern struct slb_shadow slb_shadow[]; - /* * Layout of entries in the hypervisor's dispatch trace log buffer. */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0620eaaaad45..9095a6f7ac2c 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -99,7 +99,7 @@ static inline void free_lppacas(void) { } * 3 persistent SLBs are registered here. The buffer will be zero * initially, hence will all be invaild until we actually write them. */ -struct slb_shadow slb_shadow[] __cacheline_aligned = { +static struct slb_shadow slb_shadow[] __cacheline_aligned = { [0 ... (NR_CPUS-1)] = { .persistent = cpu_to_be32(SLB_NUM_BOLTED), .buffer_length = cpu_to_be32(sizeof(struct slb_shadow)), diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 4fca3def9db9..28cf0f33c5be 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -92,7 +92,7 @@ void vpa_init(int cpu) * PAPR says this feature is SLB-Buffer but firmware never * reports that. All SPLPAR support SLB shadow buffer. */ - addr = __pa(&slb_shadow[cpu]); + addr = __pa(paca[cpu].slb_shadow_ptr); if (firmware_has_feature(FW_FEATURE_SPLPAR)) { ret = register_slb_shadow(hwcpu, addr); if (ret) -- cgit v1.2.3 From 6f4441ef7009b9ec063678d906eb762318689494 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 5 Dec 2013 11:42:40 +0800 Subject: powerpc: Dynamically allocate slb_shadow from memblock Currently, the slb_shadow buffer is our largest symbol: [jk@pablo linux]$ nm --size-sort -r -S obj/vmlinux | head -1 c000000000da0000 0000000000040000 d slb_shadow - we allocate 128 bytes per cpu; so 256k with NR_CPUS=2048. As we have constant initialisers, it's allocated in .text, causing a larger vmlinux image. We may also allocate unecessary slb_shadow buffers (> no. pacas), since we use the build-time NR_CPUS rather than the run-time nr_cpu_ids. We could move this to the bss, but then we still have the NR_CPUS vs nr_cpu_ids potential for overallocation. This change dynamically allocates the slb_shadow array, during initialise_pacas(). At a cost of 104 bytes of text, we save 256k of data: [jk@pablo linux]$ size obj/vmlinux{.orig,} text data bss dec hex filename 9202795 5244676 1169576 15617047 ee4c17 obj/vmlinux.orig 9202899 4982532 1169576 15355007 ea4c7f obj/vmlinux Tested on pseries. Signed-off-by: Jeremy Kerr Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/paca.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 9095a6f7ac2c..623c356fe34f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -99,12 +99,28 @@ static inline void free_lppacas(void) { } * 3 persistent SLBs are registered here. The buffer will be zero * initially, hence will all be invaild until we actually write them. */ -static struct slb_shadow slb_shadow[] __cacheline_aligned = { - [0 ... (NR_CPUS-1)] = { - .persistent = cpu_to_be32(SLB_NUM_BOLTED), - .buffer_length = cpu_to_be32(sizeof(struct slb_shadow)), - }, -}; +static struct slb_shadow *slb_shadow; + +static void __init allocate_slb_shadows(int nr_cpus, int limit) +{ + int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus); + slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit)); + memset(slb_shadow, 0, size); +} + +static struct slb_shadow * __init init_slb_shadow(int cpu) +{ + struct slb_shadow *s = &slb_shadow[cpu]; + + s->persistent = cpu_to_be32(SLB_NUM_BOLTED); + s->buffer_length = cpu_to_be32(sizeof(*s)); + + return s; +} + +#else /* CONFIG_PPC_STD_MMU_64 */ + +static void __init allocate_slb_shadows(int nr_cpus, int limit) { } #endif /* CONFIG_PPC_STD_MMU_64 */ @@ -142,7 +158,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->__current = &init_task; new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; #ifdef CONFIG_PPC_STD_MMU_64 - new_paca->slb_shadow_ptr = &slb_shadow[cpu]; + new_paca->slb_shadow_ptr = init_slb_shadow(cpu); #endif /* CONFIG_PPC_STD_MMU_64 */ } @@ -190,6 +206,8 @@ void __init allocate_pacas(void) allocate_lppacas(nr_cpu_ids, limit); + allocate_slb_shadows(nr_cpu_ids, limit); + /* Can't use for_each_*_cpu, as they aren't functional yet */ for (cpu = 0; cpu < nr_cpu_ids; cpu++) initialise_paca(&paca[cpu], cpu); -- cgit v1.2.3 From fc2798502f860b18f3c7121e4dc659d3d9d28d74 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 9 Dec 2013 22:54:40 -0800 Subject: PCI: Convert pcibios_resource_to_bus() to take a pci_bus, not a pci_dev These interfaces: pcibios_resource_to_bus(struct pci_dev *dev, *bus_region, *resource) pcibios_bus_to_resource(struct pci_dev *dev, *resource, *bus_region) took a pci_dev, but they really depend only on the pci_bus. And we want to use them in resource allocation paths where we have the bus but not a device, so this patch converts them to take the pci_bus instead of the pci_dev: pcibios_resource_to_bus(struct pci_bus *bus, *bus_region, *resource) pcibios_bus_to_resource(struct pci_bus *bus, *resource, *bus_region) In fact, with standard PCI-PCI bridges, they only depend on the host bridge, because that's the only place address translation occurs, but we aren't going that far yet. [bhelgaas: changelog] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- arch/alpha/kernel/pci-sysfs.c | 4 ++-- arch/powerpc/kernel/pci-common.c | 4 ++-- arch/powerpc/kernel/pci_of_scan.c | 4 ++-- arch/sparc/kernel/pci.c | 6 +++--- drivers/pci/host-bridge.c | 19 ++++++++----------- drivers/pci/probe.c | 18 +++++++++--------- drivers/pci/quirks.c | 2 +- drivers/pci/rom.c | 2 +- drivers/pci/setup-bus.c | 16 ++++++++-------- drivers/pci/setup-res.c | 2 +- drivers/pcmcia/i82092.c | 2 +- drivers/pcmcia/yenta_socket.c | 6 +++--- drivers/scsi/sym53c8xx_2/sym_glue.c | 5 +++-- drivers/video/arkfb.c | 2 +- drivers/video/s3fb.c | 2 +- drivers/video/vt8623fb.c | 2 +- include/linux/pci.h | 4 ++-- 17 files changed, 49 insertions(+), 51 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c index 2b183b0d3207..99e8d4796c96 100644 --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -83,7 +83,7 @@ static int pci_mmap_resource(struct kobject *kobj, if (iomem_is_exclusive(res->start)) return -EINVAL; - pcibios_resource_to_bus(pdev, &bar, res); + pcibios_resource_to_bus(pdev->bus, &bar, res); vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0)); mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; @@ -139,7 +139,7 @@ static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num) long dense_offset; unsigned long sparse_size; - pcibios_resource_to_bus(pdev, &bar, &pdev->resource[num]); + pcibios_resource_to_bus(pdev->bus, &bar, &pdev->resource[num]); /* All core logic chips have 4G sparse address space, except CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index a1e3e40ca3fd..d9476c1fc959 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -835,7 +835,7 @@ static void pcibios_fixup_resources(struct pci_dev *dev) * at 0 as unset as well, except if PCI_PROBE_ONLY is also set * since in that case, we don't want to re-assign anything */ - pcibios_resource_to_bus(dev, ®, res); + pcibios_resource_to_bus(dev->bus, ®, res); if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) || (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { /* Only print message if not re-assigning */ @@ -886,7 +886,7 @@ static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus, /* Job is a bit different between memory and IO */ if (res->flags & IORESOURCE_MEM) { - pcibios_resource_to_bus(dev, ®ion, res); + pcibios_resource_to_bus(dev->bus, ®ion, res); /* If the BAR is non-0 then it's probably been initialized */ if (region.start != 0) diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index ac0b034f9ae0..83c26d829991 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -111,7 +111,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) res->name = pci_name(dev); region.start = base; region.end = base + size - 1; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); } } @@ -280,7 +280,7 @@ void of_scan_pci_bridge(struct pci_dev *dev) res->flags = flags; region.start = of_read_number(&ranges[1], 2); region.end = region.start + size - 1; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); } sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), bus->number); diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index cb021453de2a..7de8d1f590b7 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -392,7 +392,7 @@ static void apb_fake_ranges(struct pci_dev *dev, res->flags = IORESOURCE_IO; region.start = (first << 21); region.end = (last << 21) + ((1 << 21) - 1); - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); pci_read_config_byte(dev, APB_MEM_ADDRESS_MAP, &map); apb_calc_first_last(map, &first, &last); @@ -400,7 +400,7 @@ static void apb_fake_ranges(struct pci_dev *dev, res->flags = IORESOURCE_MEM; region.start = (first << 29); region.end = (last << 29) + ((1 << 29) - 1); - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); } static void pci_of_scan_bus(struct pci_pbm_info *pbm, @@ -491,7 +491,7 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm, res->flags = flags; region.start = GET_64BIT(ranges, 1); region.end = region.start + size - 1; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); } after_ranges: sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c index a68dc613a5be..06ace6248c61 100644 --- a/drivers/pci/host-bridge.c +++ b/drivers/pci/host-bridge.c @@ -9,22 +9,19 @@ #include "pci.h" -static struct pci_bus *find_pci_root_bus(struct pci_dev *dev) +static struct pci_bus *find_pci_root_bus(struct pci_bus *bus) { - struct pci_bus *bus; - - bus = dev->bus; while (bus->parent) bus = bus->parent; return bus; } -static struct pci_host_bridge *find_pci_host_bridge(struct pci_dev *dev) +static struct pci_host_bridge *find_pci_host_bridge(struct pci_bus *bus) { - struct pci_bus *bus = find_pci_root_bus(dev); + struct pci_bus *root_bus = find_pci_root_bus(bus); - return to_pci_host_bridge(bus->bridge); + return to_pci_host_bridge(root_bus->bridge); } void pci_set_host_bridge_release(struct pci_host_bridge *bridge, @@ -40,10 +37,10 @@ static bool resource_contains(struct resource *res1, struct resource *res2) return res1->start <= res2->start && res1->end >= res2->end; } -void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, +void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region, struct resource *res) { - struct pci_host_bridge *bridge = find_pci_host_bridge(dev); + struct pci_host_bridge *bridge = find_pci_host_bridge(bus); struct pci_host_bridge_window *window; resource_size_t offset = 0; @@ -68,10 +65,10 @@ static bool region_contains(struct pci_bus_region *region1, return region1->start <= region2->start && region1->end >= region2->end; } -void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, +void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res, struct pci_bus_region *region) { - struct pci_host_bridge *bridge = find_pci_host_bridge(dev); + struct pci_host_bridge *bridge = find_pci_host_bridge(bus); struct pci_host_bridge_window *window; resource_size_t offset = 0; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 38e403dddf6e..f049e3f53fcc 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -269,8 +269,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, region.end = l + sz; } - pcibios_bus_to_resource(dev, res, ®ion); - pcibios_resource_to_bus(dev, &inverted_region, res); + pcibios_bus_to_resource(dev->bus, res, ®ion); + pcibios_resource_to_bus(dev->bus, &inverted_region, res); /* * If "A" is a BAR value (a bus address), "bus_to_resource(A)" is @@ -364,7 +364,7 @@ static void pci_read_bridge_io(struct pci_bus *child) res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO; region.start = base; region.end = limit + io_granularity - 1; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } } @@ -386,7 +386,7 @@ static void pci_read_bridge_mmio(struct pci_bus *child) res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; region.start = base; region.end = limit + 0xfffff; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } } @@ -436,7 +436,7 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) res->flags |= IORESOURCE_MEM_64; region.start = base; region.end = limit + 0xfffff; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } } @@ -1084,24 +1084,24 @@ int pci_setup_device(struct pci_dev *dev) region.end = 0x1F7; res = &dev->resource[0]; res->flags = LEGACY_IO_RESOURCE; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); region.start = 0x3F6; region.end = 0x3F6; res = &dev->resource[1]; res->flags = LEGACY_IO_RESOURCE; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); } if ((progif & 4) == 0) { region.start = 0x170; region.end = 0x177; res = &dev->resource[2]; res->flags = LEGACY_IO_RESOURCE; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); region.start = 0x376; region.end = 0x376; res = &dev->resource[3]; res->flags = LEGACY_IO_RESOURCE; - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); } } break; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b3b1b9aa8863..4ad6bf6c107b 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -343,7 +343,7 @@ static void quirk_io_region(struct pci_dev *dev, int port, /* Convert from PCI bus to resource space */ bus_region.start = region; bus_region.end = region + size - 1; - pcibios_bus_to_resource(dev, res, &bus_region); + pcibios_bus_to_resource(dev->bus, res, &bus_region); if (!pci_claim_resource(dev, nr)) dev_info(&dev->dev, "quirk: %pR claimed by %s\n", res, name); diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index c5d0a08a8747..5d595724e5f4 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -31,7 +31,7 @@ int pci_enable_rom(struct pci_dev *pdev) if (!res->flags) return -1; - pcibios_resource_to_bus(pdev, ®ion, res); + pcibios_resource_to_bus(pdev->bus, ®ion, res); pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr); rom_addr &= ~PCI_ROM_ADDRESS_MASK; rom_addr |= region.start | PCI_ROM_ADDRESS_ENABLE; diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 219a4106480a..79339822d80e 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -475,7 +475,7 @@ void pci_setup_cardbus(struct pci_bus *bus) &bus->busn_res); res = bus->resource[0]; - pcibios_resource_to_bus(bridge, ®ion, res); + pcibios_resource_to_bus(bridge->bus, ®ion, res); if (res->flags & IORESOURCE_IO) { /* * The IO resource is allocated a range twice as large as it @@ -489,7 +489,7 @@ void pci_setup_cardbus(struct pci_bus *bus) } res = bus->resource[1]; - pcibios_resource_to_bus(bridge, ®ion, res); + pcibios_resource_to_bus(bridge->bus, ®ion, res); if (res->flags & IORESOURCE_IO) { dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_IO_BASE_1, @@ -499,7 +499,7 @@ void pci_setup_cardbus(struct pci_bus *bus) } res = bus->resource[2]; - pcibios_resource_to_bus(bridge, ®ion, res); + pcibios_resource_to_bus(bridge->bus, ®ion, res); if (res->flags & IORESOURCE_MEM) { dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_0, @@ -509,7 +509,7 @@ void pci_setup_cardbus(struct pci_bus *bus) } res = bus->resource[3]; - pcibios_resource_to_bus(bridge, ®ion, res); + pcibios_resource_to_bus(bridge->bus, ®ion, res); if (res->flags & IORESOURCE_MEM) { dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_1, @@ -546,7 +546,7 @@ static void pci_setup_bridge_io(struct pci_bus *bus) /* Set up the top and bottom of the PCI I/O segment for this bus. */ res = bus->resource[0]; - pcibios_resource_to_bus(bridge, ®ion, res); + pcibios_resource_to_bus(bridge->bus, ®ion, res); if (res->flags & IORESOURCE_IO) { pci_read_config_dword(bridge, PCI_IO_BASE, &l); l &= 0xffff0000; @@ -578,7 +578,7 @@ static void pci_setup_bridge_mmio(struct pci_bus *bus) /* Set up the top and bottom of the PCI Memory segment for this bus. */ res = bus->resource[1]; - pcibios_resource_to_bus(bridge, ®ion, res); + pcibios_resource_to_bus(bridge->bus, ®ion, res); if (res->flags & IORESOURCE_MEM) { l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; @@ -604,7 +604,7 @@ static void pci_setup_bridge_mmio_pref(struct pci_bus *bus) /* Set up PREF base/limit. */ bu = lu = 0; res = bus->resource[2]; - pcibios_resource_to_bus(bridge, ®ion, res); + pcibios_resource_to_bus(bridge->bus, ®ion, res); if (res->flags & IORESOURCE_PREFETCH) { l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; @@ -1422,7 +1422,7 @@ static int iov_resources_unassigned(struct pci_dev *dev, void *data) if (!r->flags) continue; - pcibios_resource_to_bus(dev, ®ion, r); + pcibios_resource_to_bus(dev->bus, ®ion, r); if (!region.start) { *unassigned = true; return 1; /* return early from pci_walk_bus() */ diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 83c4d3bc47ab..5c060b152ce6 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -52,7 +52,7 @@ void pci_update_resource(struct pci_dev *dev, int resno) if (res->flags & IORESOURCE_PCI_FIXED) return; - pcibios_resource_to_bus(dev, ®ion, res); + pcibios_resource_to_bus(dev->bus, ®ion, res); new = region.start | (res->flags & PCI_REGION_FLAG_MASK); if (res->flags & IORESOURCE_IO) diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c index 519c4d6003a6..7d47456429a1 100644 --- a/drivers/pcmcia/i82092.c +++ b/drivers/pcmcia/i82092.c @@ -608,7 +608,7 @@ static int i82092aa_set_mem_map(struct pcmcia_socket *socket, struct pccard_mem_ enter("i82092aa_set_mem_map"); - pcibios_resource_to_bus(sock_info->dev, ®ion, mem->res); + pcibios_resource_to_bus(sock_info->dev->bus, ®ion, mem->res); map = mem->map; if (map > 4) { diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index dc18a3a5e010..8485761e76af 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -445,7 +445,7 @@ static int yenta_set_mem_map(struct pcmcia_socket *sock, struct pccard_mem_map * unsigned int start, stop, card_start; unsigned short word; - pcibios_resource_to_bus(socket->dev, ®ion, mem->res); + pcibios_resource_to_bus(socket->dev->bus, ®ion, mem->res); map = mem->map; start = region.start; @@ -709,7 +709,7 @@ static int yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned type region.start = config_readl(socket, addr_start) & mask; region.end = config_readl(socket, addr_end) | ~mask; if (region.start && region.end > region.start && !override_bios) { - pcibios_bus_to_resource(dev, res, ®ion); + pcibios_bus_to_resource(dev->bus, res, ®ion); if (pci_claim_resource(dev, PCI_BRIDGE_RESOURCES + nr) == 0) return 0; dev_printk(KERN_INFO, &dev->dev, @@ -1033,7 +1033,7 @@ static void yenta_config_init(struct yenta_socket *socket) struct pci_dev *dev = socket->dev; struct pci_bus_region region; - pcibios_resource_to_bus(socket->dev, ®ion, &dev->resource[0]); + pcibios_resource_to_bus(socket->dev->bus, ®ion, &dev->resource[0]); config_writel(socket, CB_LEGACY_MODE_BASE, 0); config_writel(socket, PCI_BASE_ADDRESS_0, region.start); diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index bac55f7f69f9..6d3ee1ab6362 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -1531,7 +1531,7 @@ static int sym_iomap_device(struct sym_device *device) struct pci_bus_region bus_addr; int i = 2; - pcibios_resource_to_bus(pdev, &bus_addr, &pdev->resource[1]); + pcibios_resource_to_bus(pdev->bus, &bus_addr, &pdev->resource[1]); device->mmio_base = bus_addr.start; if (device->chip.features & FE_RAM) { @@ -1541,7 +1541,8 @@ static int sym_iomap_device(struct sym_device *device) */ if (!pdev->resource[i].flags) i++; - pcibios_resource_to_bus(pdev, &bus_addr, &pdev->resource[i]); + pcibios_resource_to_bus(pdev->bus, &bus_addr, + &pdev->resource[i]); device->ram_base = bus_addr.start; } diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c index a6b29bd4a12a..adc4ea2cc5a0 100644 --- a/drivers/video/arkfb.c +++ b/drivers/video/arkfb.c @@ -1014,7 +1014,7 @@ static int ark_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) vga_res.flags = IORESOURCE_IO; - pcibios_bus_to_resource(dev, &vga_res, &bus_reg); + pcibios_bus_to_resource(dev->bus, &vga_res, &bus_reg); par->state.vgabase = (void __iomem *) vga_res.start; diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c index 968b2997175a..9a3f8f1c6aab 100644 --- a/drivers/video/s3fb.c +++ b/drivers/video/s3fb.c @@ -1180,7 +1180,7 @@ static int s3_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) vga_res.flags = IORESOURCE_IO; - pcibios_bus_to_resource(dev, &vga_res, &bus_reg); + pcibios_bus_to_resource(dev->bus, &vga_res, &bus_reg); par->state.vgabase = (void __iomem *) vga_res.start; diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c index 8bc6e0958a09..5c7cbc6c6236 100644 --- a/drivers/video/vt8623fb.c +++ b/drivers/video/vt8623fb.c @@ -729,7 +729,7 @@ static int vt8623_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) vga_res.flags = IORESOURCE_IO; - pcibios_bus_to_resource(dev, &vga_res, &bus_reg); + pcibios_bus_to_resource(dev->bus, &vga_res, &bus_reg); par->state.vgabase = (void __iomem *) vga_res.start; diff --git a/include/linux/pci.h b/include/linux/pci.h index 7d5555270491..bf32412704a7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -737,9 +737,9 @@ void pci_fixup_cardbus(struct pci_bus *); /* Generic PCI functions used internally */ -void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, +void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region, struct resource *res); -void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, +void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res, struct pci_bus_region *region); void pcibios_scan_specific_bus(int busn); struct pci_bus *pci_find_bus(int domain, int busnr); -- cgit v1.2.3 From 4e243b79b0002cd57cf894ddcfb8d4eb51078041 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 10 Dec 2013 00:40:15 +0530 Subject: powerpc: Fix "attempt to move .org backwards" error With recent machine check patch series changes, The exception vectors starting from 0x4300 are now overflowing with allyesconfig. Fix that by moving machine_check_common and machine_check_handle_early code out of that region to make enough room for exception vector area. Fixes this build error reportes by Stephen: arch/powerpc/kernel/exceptions-64s.S: Assembler messages: arch/powerpc/kernel/exceptions-64s.S:958: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:959: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:983: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:984: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:1003: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:1013: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:1014: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:1015: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:1016: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:1017: Error: attempt to move .org backwards arch/powerpc/kernel/exceptions-64s.S:1018: Error: attempt to move .org backwards [Moved the code further down as it introduced link errors due to too long relative branches to the masked interrupts handlers from the exception prologs. Also removed the useless feature section --BenH ] Signed-off-by: Mahesh Salgaonkar Reported-by: Stephen Rothwell Tested-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 278 +++++++++++++++++------------------ 1 file changed, 138 insertions(+), 140 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 862b9dd4a9db..38d507306a11 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -768,146 +768,6 @@ kvmppc_skip_Hinterrupt: STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) - /* - * Machine check is different because we use a different - * save area: PACA_EXMC instead of PACA_EXGEN. - */ - .align 7 - .globl machine_check_common -machine_check_common: - - mfspr r10,SPRN_DAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) - FINISH_NAP - DISABLE_INTS - ld r3,PACA_EXGEN+EX_DAR(r13) - lwz r4,PACA_EXGEN+EX_DSISR(r13) - std r3,_DAR(r1) - std r4,_DSISR(r1) - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl .machine_check_exception - b .ret_from_except - -#define MACHINE_CHECK_HANDLER_WINDUP \ - /* Clear MSR_RI before setting SRR0 and SRR1. */\ - li r0,MSR_RI; \ - mfmsr r9; /* get MSR value */ \ - andc r9,r9,r0; \ - mtmsrd r9,1; /* Clear MSR_RI */ \ - /* Move original SRR0 and SRR1 into the respective regs */ \ - ld r9,_MSR(r1); \ - mtspr SPRN_SRR1,r9; \ - ld r3,_NIP(r1); \ - mtspr SPRN_SRR0,r3; \ - ld r9,_CTR(r1); \ - mtctr r9; \ - ld r9,_XER(r1); \ - mtxer r9; \ - ld r9,_LINK(r1); \ - mtlr r9; \ - REST_GPR(0, r1); \ - REST_8GPRS(2, r1); \ - REST_GPR(10, r1); \ - ld r11,_CCR(r1); \ - mtcr r11; \ - /* Decrement paca->in_mce. */ \ - lhz r12,PACA_IN_MCE(r13); \ - subi r12,r12,1; \ - sth r12,PACA_IN_MCE(r13); \ - REST_GPR(11, r1); \ - REST_2GPRS(12, r1); \ - /* restore original r1. */ \ - ld r1,GPR1(r1) - - /* - * Handle machine check early in real mode. We come here with - * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. - */ - .align 7 - .globl machine_check_handle_early -machine_check_handle_early: -BEGIN_FTR_SECTION - std r0,GPR0(r1) /* Save r0 */ - EXCEPTION_PROLOG_COMMON_3(0x200) - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl .machine_check_early - ld r12,_MSR(r1) -#ifdef CONFIG_PPC_P7_NAP - /* - * Check if thread was in power saving mode. We come here when any - * of the following is true: - * a. thread wasn't in power saving mode - * b. thread was in power saving mode with no state loss or - * supervisor state loss - * - * Go back to nap again if (b) is true. - */ - rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ - beq 4f /* No, it wasn;t */ - /* Thread was in power saving mode. Go back to nap again. */ - cmpwi r11,2 - bne 3f - /* Supervisor state loss */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) -3: bl .machine_check_queue_event - MACHINE_CHECK_HANDLER_WINDUP - GET_PACA(r13) - ld r1,PACAR1(r13) - b .power7_enter_nap_mode -4: -#endif - /* - * Check if we are coming from hypervisor userspace. If yes then we - * continue in host kernel in V mode to deliver the MC event. - */ - rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ - beq 5f - andi. r11,r12,MSR_PR /* See if coming from user. */ - bne 9f /* continue in V mode if we are. */ - -5: -#ifdef CONFIG_KVM_BOOK3S_64_HV - /* - * We are coming from kernel context. Check if we are coming from - * guest. if yes, then we can continue. We will fall through - * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest. - */ - lbz r11,HSTATE_IN_GUEST(r13) - cmpwi r11,0 /* Check if coming from guest */ - bne 9f /* continue if we are. */ -#endif - /* - * At this point we are not sure about what context we come from. - * Queue up the MCE event and return from the interrupt. - * But before that, check if this is an un-recoverable exception. - * If yes, then stay on emergency stack and panic. - */ - andi. r11,r12,MSR_RI - bne 2f -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b -2: - /* - * Return from MC interrupt. - * Queue up the MCE event so that we can log it later, while - * returning from kernel or opal call. - */ - bl .machine_check_queue_event - MACHINE_CHECK_HANDLER_WINDUP - rfid -9: - /* Deliver the machine check to host kernel in V mode. */ - MACHINE_CHECK_HANDLER_WINDUP - b machine_check_pSeries -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt) @@ -1276,6 +1136,30 @@ unrecov_user_slb: #endif /* __DISABLED__ */ + /* + * Machine check is different because we use a different + * save area: PACA_EXMC instead of PACA_EXGEN. + */ + .align 7 + .globl machine_check_common +machine_check_common: + + mfspr r10,SPRN_DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) + FINISH_NAP + DISABLE_INTS + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + std r3,_DAR(r1) + std r4,_DSISR(r1) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .machine_check_exception + b .ret_from_except + .align 7 .globl alignment_common alignment_common: @@ -1459,6 +1343,120 @@ _GLOBAL(opal_mc_secondary_handler) #endif /* CONFIG_PPC_POWERNV */ +#define MACHINE_CHECK_HANDLER_WINDUP \ + /* Clear MSR_RI before setting SRR0 and SRR1. */\ + li r0,MSR_RI; \ + mfmsr r9; /* get MSR value */ \ + andc r9,r9,r0; \ + mtmsrd r9,1; /* Clear MSR_RI */ \ + /* Move original SRR0 and SRR1 into the respective regs */ \ + ld r9,_MSR(r1); \ + mtspr SPRN_SRR1,r9; \ + ld r3,_NIP(r1); \ + mtspr SPRN_SRR0,r3; \ + ld r9,_CTR(r1); \ + mtctr r9; \ + ld r9,_XER(r1); \ + mtxer r9; \ + ld r9,_LINK(r1); \ + mtlr r9; \ + REST_GPR(0, r1); \ + REST_8GPRS(2, r1); \ + REST_GPR(10, r1); \ + ld r11,_CCR(r1); \ + mtcr r11; \ + /* Decrement paca->in_mce. */ \ + lhz r12,PACA_IN_MCE(r13); \ + subi r12,r12,1; \ + sth r12,PACA_IN_MCE(r13); \ + REST_GPR(11, r1); \ + REST_2GPRS(12, r1); \ + /* restore original r1. */ \ + ld r1,GPR1(r1) + + /* + * Handle machine check early in real mode. We come here with + * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. + */ + .align 7 + .globl machine_check_handle_early +machine_check_handle_early: + std r0,GPR0(r1) /* Save r0 */ + EXCEPTION_PROLOG_COMMON_3(0x200) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .machine_check_early + ld r12,_MSR(r1) +#ifdef CONFIG_PPC_P7_NAP + /* + * Check if thread was in power saving mode. We come here when any + * of the following is true: + * a. thread wasn't in power saving mode + * b. thread was in power saving mode with no state loss or + * supervisor state loss + * + * Go back to nap again if (b) is true. + */ + rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ + beq 4f /* No, it wasn;t */ + /* Thread was in power saving mode. Go back to nap again. */ + cmpwi r11,2 + bne 3f + /* Supervisor state loss */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) +3: bl .machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP + GET_PACA(r13) + ld r1,PACAR1(r13) + b .power7_enter_nap_mode +4: +#endif + /* + * Check if we are coming from hypervisor userspace. If yes then we + * continue in host kernel in V mode to deliver the MC event. + */ + rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ + beq 5f + andi. r11,r12,MSR_PR /* See if coming from user. */ + bne 9f /* continue in V mode if we are. */ + +5: +#ifdef CONFIG_KVM_BOOK3S_64_HV + /* + * We are coming from kernel context. Check if we are coming from + * guest. if yes, then we can continue. We will fall through + * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest. + */ + lbz r11,HSTATE_IN_GUEST(r13) + cmpwi r11,0 /* Check if coming from guest */ + bne 9f /* continue if we are. */ +#endif + /* + * At this point we are not sure about what context we come from. + * Queue up the MCE event and return from the interrupt. + * But before that, check if this is an un-recoverable exception. + * If yes, then stay on emergency stack and panic. + */ + andi. r11,r12,MSR_RI + bne 2f +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b +2: + /* + * Return from MC interrupt. + * Queue up the MCE event so that we can log it later, while + * returning from kernel or opal call. + */ + bl .machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP + rfid +9: + /* Deliver the machine check to host kernel in V mode. */ + MACHINE_CHECK_HANDLER_WINDUP + b machine_check_pSeries + /* * r13 points to the PACA, r9 contains the saved CR, * r12 contain the saved SRR1, SRR0 is still ready for return -- cgit v1.2.3 From e589a4404fa06730355de204d3d136ed9bbc7dea Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 9 Dec 2013 18:17:01 +1100 Subject: powerpc/iommu: Update constant names to reflect their hardcoded page size The powerpc iommu uses a hardcoded page size of 4K. This patch changes the name of the IOMMU_PAGE_* macros to reflect the hardcoded values. A future patch will use the existing names to support dynamic page sizes. Signed-off-by: Alistair Popple Signed-off-by: Alexey Kardashevskiy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iommu.h | 10 ++--- arch/powerpc/kernel/dma-iommu.c | 4 +- arch/powerpc/kernel/iommu.c | 78 +++++++++++++++++----------------- arch/powerpc/kernel/vio.c | 19 +++++---- arch/powerpc/platforms/cell/iommu.c | 12 +++--- arch/powerpc/platforms/powernv/pci.c | 4 +- arch/powerpc/platforms/pseries/iommu.c | 8 ++-- arch/powerpc/platforms/pseries/setup.c | 4 +- arch/powerpc/platforms/wsp/wsp_pci.c | 10 ++--- drivers/net/ethernet/ibm/ibmveth.c | 9 ++-- drivers/vfio/vfio_iommu_spapr_tce.c | 28 ++++++------ 11 files changed, 94 insertions(+), 92 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 774fa2776907..0869c7e74421 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -30,10 +30,10 @@ #include #include -#define IOMMU_PAGE_SHIFT 12 -#define IOMMU_PAGE_SIZE (ASM_CONST(1) << IOMMU_PAGE_SHIFT) -#define IOMMU_PAGE_MASK (~((1 << IOMMU_PAGE_SHIFT) - 1)) -#define IOMMU_PAGE_ALIGN(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE) +#define IOMMU_PAGE_SHIFT_4K 12 +#define IOMMU_PAGE_SIZE_4K (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K) +#define IOMMU_PAGE_MASK_4K (~((1 << IOMMU_PAGE_SHIFT_4K) - 1)) +#define IOMMU_PAGE_ALIGN_4K(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE_4K) /* Boot time flags */ extern int iommu_is_off; @@ -42,7 +42,7 @@ extern int iommu_force_on; /* Pure 2^n version of get_order */ static __inline__ __attribute_const__ int get_iommu_order(unsigned long size) { - return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT) + 1; + return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT_4K) + 1; } diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index e4897523de41..5cfe3dbfc4a0 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -83,10 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) return 0; } - if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT)) { + if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT_4K)) { dev_info(dev, "Warning: IOMMU offset too big for device mask\n"); dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n", - mask, tbl->it_offset << IOMMU_PAGE_SHIFT); + mask, tbl->it_offset << IOMMU_PAGE_SHIFT_4K); return 0; } else return 1; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index d22abe0b08e3..df4a7f1b7444 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -251,14 +251,14 @@ again: if (dev) boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << IOMMU_PAGE_SHIFT); + 1 << IOMMU_PAGE_SHIFT_4K); else - boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT); + boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT_4K); /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ n = iommu_area_alloc(tbl->it_map, limit, start, npages, - tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, - align_mask); + tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT_4K, + align_mask); if (n == -1) { if (likely(pass == 0)) { /* First try the pool from the start */ @@ -320,12 +320,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, return DMA_ERROR_CODE; entry += tbl->it_offset; /* Offset into real TCE table */ - ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ + ret = entry << IOMMU_PAGE_SHIFT_4K; /* Set the return dma address */ /* Put the TCEs in the HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - (unsigned long)page & IOMMU_PAGE_MASK, - direction, attrs); + (unsigned long)page & IOMMU_PAGE_MASK_4K, + direction, attrs); /* ppc_md.tce_build() only returns non-zero for transient errors. * Clean up the table bitmap in this case and return @@ -352,7 +352,7 @@ static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, { unsigned long entry, free_entry; - entry = dma_addr >> IOMMU_PAGE_SHIFT; + entry = dma_addr >> IOMMU_PAGE_SHIFT_4K; free_entry = entry - tbl->it_offset; if (((free_entry + npages) > tbl->it_size) || @@ -401,7 +401,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; struct iommu_pool *pool; - entry = dma_addr >> IOMMU_PAGE_SHIFT; + entry = dma_addr >> IOMMU_PAGE_SHIFT_4K; free_entry = entry - tbl->it_offset; pool = get_pool(tbl, free_entry); @@ -468,13 +468,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, } /* Allocate iommu entries for that segment */ vaddr = (unsigned long) sg_virt(s); - npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE); + npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE_4K); align = 0; - if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE && + if (IOMMU_PAGE_SHIFT_4K < PAGE_SHIFT && slen >= PAGE_SIZE && (vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; + align = PAGE_SHIFT - IOMMU_PAGE_SHIFT_4K; entry = iommu_range_alloc(dev, tbl, npages, &handle, - mask >> IOMMU_PAGE_SHIFT, align); + mask >> IOMMU_PAGE_SHIFT_4K, align); DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); @@ -489,16 +489,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Convert entry to a dma_addr_t */ entry += tbl->it_offset; - dma_addr = entry << IOMMU_PAGE_SHIFT; - dma_addr |= (s->offset & ~IOMMU_PAGE_MASK); + dma_addr = entry << IOMMU_PAGE_SHIFT_4K; + dma_addr |= (s->offset & ~IOMMU_PAGE_MASK_4K); DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", npages, entry, dma_addr); /* Insert into HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - vaddr & IOMMU_PAGE_MASK, - direction, attrs); + vaddr & IOMMU_PAGE_MASK_4K, + direction, attrs); if(unlikely(build_fail)) goto failure; @@ -559,9 +559,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (s->dma_length != 0) { unsigned long vaddr, npages; - vaddr = s->dma_address & IOMMU_PAGE_MASK; + vaddr = s->dma_address & IOMMU_PAGE_MASK_4K; npages = iommu_num_pages(s->dma_address, s->dma_length, - IOMMU_PAGE_SIZE); + IOMMU_PAGE_SIZE_4K); __iommu_free(tbl, vaddr, npages); s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -592,7 +592,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, if (sg->dma_length == 0) break; npages = iommu_num_pages(dma_handle, sg->dma_length, - IOMMU_PAGE_SIZE); + IOMMU_PAGE_SIZE_4K); __iommu_free(tbl, dma_handle, npages); sg = sg_next(sg); } @@ -676,7 +676,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) set_bit(0, tbl->it_map); /* We only split the IOMMU table if we have 1GB or more of space */ - if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) + if ((tbl->it_size << IOMMU_PAGE_SHIFT_4K) >= (1UL * 1024 * 1024 * 1024)) tbl->nr_pools = IOMMU_NR_POOLS; else tbl->nr_pools = 1; @@ -768,16 +768,16 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, vaddr = page_address(page) + offset; uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE); + npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE_4K); if (tbl) { align = 0; - if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE && + if (IOMMU_PAGE_SHIFT_4K < PAGE_SHIFT && size >= PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; + align = PAGE_SHIFT - IOMMU_PAGE_SHIFT_4K; dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, - mask >> IOMMU_PAGE_SHIFT, align, + mask >> IOMMU_PAGE_SHIFT_4K, align, attrs); if (dma_handle == DMA_ERROR_CODE) { if (printk_ratelimit()) { @@ -786,7 +786,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, npages); } } else - dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); + dma_handle |= (uaddr & ~IOMMU_PAGE_MASK_4K); } return dma_handle; @@ -801,7 +801,7 @@ void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, BUG_ON(direction == DMA_NONE); if (tbl) { - npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE); + npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE_4K); iommu_free(tbl, dma_handle, npages); } } @@ -845,10 +845,10 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> IOMMU_PAGE_SHIFT; + nio_pages = size >> IOMMU_PAGE_SHIFT_4K; io_order = get_iommu_order(size); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, - mask >> IOMMU_PAGE_SHIFT, io_order, NULL); + mask >> IOMMU_PAGE_SHIFT_4K, io_order, NULL); if (mapping == DMA_ERROR_CODE) { free_pages((unsigned long)ret, order); return NULL; @@ -864,7 +864,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, unsigned int nio_pages; size = PAGE_ALIGN(size); - nio_pages = size >> IOMMU_PAGE_SHIFT; + nio_pages = size >> IOMMU_PAGE_SHIFT_4K; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -935,10 +935,10 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl, if (tce_value) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK) + if (ioba & ~IOMMU_PAGE_MASK_4K) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT; + ioba >>= IOMMU_PAGE_SHIFT_4K; if (ioba < tbl->it_offset) return -EINVAL; @@ -955,13 +955,13 @@ int iommu_tce_put_param_check(struct iommu_table *tbl, if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) return -EINVAL; - if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ)) + if (tce & ~(IOMMU_PAGE_MASK_4K | TCE_PCI_WRITE | TCE_PCI_READ)) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK) + if (ioba & ~IOMMU_PAGE_MASK_4K) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT; + ioba >>= IOMMU_PAGE_SHIFT_4K; if (ioba < tbl->it_offset) return -EINVAL; @@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", - __func__, hwaddr, entry << IOMMU_PAGE_SHIFT, + __func__, hwaddr, entry << IOMMU_PAGE_SHIFT_4K, hwaddr, ret); */ return ret; @@ -1049,14 +1049,14 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, { int ret; struct page *page = NULL; - unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK; + unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK_4K & ~PAGE_MASK; enum dma_data_direction direction = iommu_tce_direction(tce); ret = get_user_pages_fast(tce & PAGE_MASK, 1, direction != DMA_TO_DEVICE, &page); if (unlikely(ret != 1)) { /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", - tce, entry << IOMMU_PAGE_SHIFT, ret); */ + tce, entry << IOMMU_PAGE_SHIFT_4K, ret); */ return -EFAULT; } hwaddr = (unsigned long) page_address(page) + offset; @@ -1067,7 +1067,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, if (ret < 0) pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", - __func__, entry << IOMMU_PAGE_SHIFT, tce, ret); + __func__, entry << IOMMU_PAGE_SHIFT_4K, tce, ret); return ret; } diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 76a64821f4a2..2e89fa350763 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -520,14 +520,14 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, struct vio_dev *viodev = to_vio_dev(dev); dma_addr_t ret = DMA_ERROR_CODE; - if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K))) { atomic_inc(&viodev->cmo.allocs_failed); return ret; } ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs); if (unlikely(dma_mapping_error(dev, ret))) { - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K)); atomic_inc(&viodev->cmo.allocs_failed); } @@ -543,7 +543,7 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K)); } static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, @@ -556,7 +556,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, size_t alloc_size = 0; for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE); + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE_4K); if (vio_cmo_alloc(viodev, alloc_size)) { atomic_inc(&viodev->cmo.allocs_failed); @@ -572,7 +572,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, } for (sgl = sglist, count = 0; count < ret; count++, sgl++) - alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE_4K); if (alloc_size) vio_cmo_dealloc(viodev, alloc_size); @@ -590,7 +590,7 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, int count = 0; for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE_4K); dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); @@ -736,7 +736,8 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) return -EINVAL; } - viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev)); + viodev->cmo.desired = + IOMMU_PAGE_ALIGN_4K(viodrv->get_desired_dma(viodev)); if (viodev->cmo.desired < VIO_CMO_MIN_ENT) viodev->cmo.desired = VIO_CMO_MIN_ENT; size = VIO_CMO_MIN_ENT; @@ -1176,9 +1177,9 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) &tbl->it_index, &offset, &size); /* TCE table size - measured in tce entries */ - tbl->it_size = size >> IOMMU_PAGE_SHIFT; + tbl->it_size = size >> IOMMU_PAGE_SHIFT_4K; /* offset for VIO should always be 0 */ - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT; + tbl->it_offset = offset >> IOMMU_PAGE_SHIFT_4K; tbl->it_busno = 0; tbl->it_type = TCE_VB; tbl->it_blocksize = 16; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index b53560660b72..fc61b908eaf0 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -197,7 +197,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages, io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); - for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE) + for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE_4K) io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask); mb(); @@ -430,7 +430,7 @@ static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, { cell_iommu_setup_stab(iommu, base, size, 0, 0); iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0, - IOMMU_PAGE_SHIFT); + IOMMU_PAGE_SHIFT_4K); cell_iommu_enable_hardware(iommu); } @@ -487,8 +487,8 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_blocksize = 16; window->table.it_base = (unsigned long)iommu->ptab; window->table.it_index = iommu->nid; - window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT) + pte_offset; - window->table.it_size = size >> IOMMU_PAGE_SHIFT; + window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT_4K) + pte_offset; + window->table.it_size = size >> IOMMU_PAGE_SHIFT_4K; iommu_init_table(&window->table, iommu->nid); @@ -773,7 +773,7 @@ static void __init cell_iommu_init_one(struct device_node *np, /* Setup the iommu_table */ cell_iommu_setup_window(iommu, np, base, size, - offset >> IOMMU_PAGE_SHIFT); + offset >> IOMMU_PAGE_SHIFT_4K); } static void __init cell_disable_iommus(void) @@ -1122,7 +1122,7 @@ static int __init cell_iommu_fixed_mapping_init(void) cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize); iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0, - IOMMU_PAGE_SHIFT); + IOMMU_PAGE_SHIFT_4K); cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize, fbase, fsize); cell_iommu_enable_hardware(iommu); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index bac289aac7cc..7f4d857668a9 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -564,7 +564,7 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, { tbl->it_blocksize = 16; tbl->it_base = (unsigned long)tce_mem; - tbl->it_offset = dma_offset >> IOMMU_PAGE_SHIFT; + tbl->it_offset = dma_offset >> IOMMU_PAGE_SHIFT_4K; tbl->it_index = 0; tbl->it_size = tce_size >> 3; tbl->it_busno = 0; @@ -761,7 +761,7 @@ static struct notifier_block tce_iommu_bus_nb = { static int __init tce_iommu_bus_notifier_init(void) { - BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); + BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE_4K); bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); return 0; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index a80af6c20cba..1b7531ce0c0c 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -488,7 +488,7 @@ static void iommu_table_setparms(struct pci_controller *phb, tbl->it_busno = phb->bus->number; /* Units of tce entries */ - tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT; + tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT_4K; /* Test if we are going over 2GB of DMA space */ if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { @@ -499,7 +499,7 @@ static void iommu_table_setparms(struct pci_controller *phb, phb->dma_window_base_cur += phb->dma_window_size; /* Set the tce table size - measured in entries */ - tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT; + tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT_4K; tbl->it_index = 0; tbl->it_blocksize = 16; @@ -540,8 +540,8 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, tbl->it_base = 0; tbl->it_blocksize = 16; tbl->it_type = TCE_PCI; - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT; - tbl->it_size = size >> IOMMU_PAGE_SHIFT; + tbl->it_offset = offset >> IOMMU_PAGE_SHIFT_4K; + tbl->it_size = size >> IOMMU_PAGE_SHIFT_4K; } static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c1f190858701..49cd16e6b450 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -72,7 +72,7 @@ int CMO_PrPSP = -1; int CMO_SecPSP = -1; -unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT); +unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ @@ -569,7 +569,7 @@ void pSeries_cmo_feature_init(void) { char *ptr, *key, *value, *end; int call_status; - int page_order = IOMMU_PAGE_SHIFT; + int page_order = IOMMU_PAGE_SHIFT_4K; pr_debug(" -> fw_cmo_feature_init()\n"); spin_lock(&rtas_data_buf_lock); diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c index 62cb527493e7..8a589618551e 100644 --- a/arch/powerpc/platforms/wsp/wsp_pci.c +++ b/arch/powerpc/platforms/wsp/wsp_pci.c @@ -260,7 +260,7 @@ static int tce_build_wsp(struct iommu_table *tbl, long index, long npages, *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n", - tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT); + tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT_4K); uaddr += TCE_PAGE_SIZE; index++; @@ -381,8 +381,8 @@ static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb, /* Init bits and pieces */ tbl->table.it_blocksize = 16; - tbl->table.it_offset = addr >> IOMMU_PAGE_SHIFT; - tbl->table.it_size = size >> IOMMU_PAGE_SHIFT; + tbl->table.it_offset = addr >> IOMMU_PAGE_SHIFT_4K; + tbl->table.it_size = size >> IOMMU_PAGE_SHIFT_4K; /* * It's already blank but we clear it anyway. @@ -449,8 +449,8 @@ static void wsp_pci_dma_dev_setup(struct pci_dev *pdev) if (table) { pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n", pci_name(pdev), - table->table.it_offset << IOMMU_PAGE_SHIFT, - (table->table.it_offset << IOMMU_PAGE_SHIFT) + table->table.it_offset << IOMMU_PAGE_SHIFT_4K, + (table->table.it_offset << IOMMU_PAGE_SHIFT_4K) + phb->dma32_region_size - 1); archdata->dma_data.iommu_table_base = &table->table; return; diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 952d795230a4..f7d7538b6bd9 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1282,24 +1282,25 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) /* netdev inits at probe time along with the structures we need below*/ if (netdev == NULL) - return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT); + return IOMMU_PAGE_ALIGN_4K(IBMVETH_IO_ENTITLEMENT_DEFAULT); adapter = netdev_priv(netdev); ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; - ret += IOMMU_PAGE_ALIGN(netdev->mtu); + ret += IOMMU_PAGE_ALIGN_4K(netdev->mtu); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { /* add the size of the active receive buffers */ if (adapter->rx_buff_pool[i].active) ret += adapter->rx_buff_pool[i].size * - IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. + IOMMU_PAGE_ALIGN_4K(adapter->rx_buff_pool[i]. buff_size); rxqentries += adapter->rx_buff_pool[i].size; } /* add the size of the receive queue entries */ - ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry)); + ret += IOMMU_PAGE_ALIGN_4K( + rxqentries * sizeof(struct ibmveth_rx_q_entry)); return ret; } diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index bdae7a04af75..a84788ba662c 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -81,7 +81,7 @@ static int tce_iommu_enable(struct tce_container *container) * enforcing the limit based on the max that the guest can map. */ down_write(¤t->mm->mmap_sem); - npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; + npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; locked = current->mm->locked_vm + npages; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { @@ -110,7 +110,7 @@ static void tce_iommu_disable(struct tce_container *container) down_write(¤t->mm->mmap_sem); current->mm->locked_vm -= (container->tbl->it_size << - IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; + IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; up_write(¤t->mm->mmap_sem); } @@ -174,8 +174,8 @@ static long tce_iommu_ioctl(void *iommu_data, if (info.argsz < minsz) return -EINVAL; - info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT; - info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT; + info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K; + info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K; info.flags = 0; if (copy_to_user((void __user *)arg, &info, minsz)) @@ -205,8 +205,8 @@ static long tce_iommu_ioctl(void *iommu_data, VFIO_DMA_MAP_FLAG_WRITE)) return -EINVAL; - if ((param.size & ~IOMMU_PAGE_MASK) || - (param.vaddr & ~IOMMU_PAGE_MASK)) + if ((param.size & ~IOMMU_PAGE_MASK_4K) || + (param.vaddr & ~IOMMU_PAGE_MASK_4K)) return -EINVAL; /* iova is checked by the IOMMU API */ @@ -220,17 +220,17 @@ static long tce_iommu_ioctl(void *iommu_data, if (ret) return ret; - for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) { + for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) { ret = iommu_put_tce_user_mode(tbl, - (param.iova >> IOMMU_PAGE_SHIFT) + i, + (param.iova >> IOMMU_PAGE_SHIFT_4K) + i, tce); if (ret) break; - tce += IOMMU_PAGE_SIZE; + tce += IOMMU_PAGE_SIZE_4K; } if (ret) iommu_clear_tces_and_put_pages(tbl, - param.iova >> IOMMU_PAGE_SHIFT, i); + param.iova >> IOMMU_PAGE_SHIFT_4K, i); iommu_flush_tce(tbl); @@ -256,17 +256,17 @@ static long tce_iommu_ioctl(void *iommu_data, if (param.flags) return -EINVAL; - if (param.size & ~IOMMU_PAGE_MASK) + if (param.size & ~IOMMU_PAGE_MASK_4K) return -EINVAL; ret = iommu_tce_clear_param_check(tbl, param.iova, 0, - param.size >> IOMMU_PAGE_SHIFT); + param.size >> IOMMU_PAGE_SHIFT_4K); if (ret) return ret; ret = iommu_clear_tces_and_put_pages(tbl, - param.iova >> IOMMU_PAGE_SHIFT, - param.size >> IOMMU_PAGE_SHIFT); + param.iova >> IOMMU_PAGE_SHIFT_4K, + param.size >> IOMMU_PAGE_SHIFT_4K); iommu_flush_tce(tbl); return ret; -- cgit v1.2.3 From 3a553170d35d69bea3877bffa508489dfa6f133d Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 9 Dec 2013 18:17:02 +1100 Subject: powerpc/iommu: Add it_page_shift field to determine iommu page size This patch adds a it_page_shift field to struct iommu_table and initiliases it to 4K for all platforms. Signed-off-by: Alistair Popple Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iommu.h | 1 + arch/powerpc/kernel/vio.c | 5 +++-- arch/powerpc/platforms/cell/iommu.c | 8 +++++--- arch/powerpc/platforms/pasemi/iommu.c | 5 ++++- arch/powerpc/platforms/powernv/pci.c | 3 ++- arch/powerpc/platforms/pseries/iommu.c | 10 ++++++---- arch/powerpc/platforms/wsp/wsp_pci.c | 5 +++-- 7 files changed, 24 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 0869c7e74421..7c928342f3f5 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -76,6 +76,7 @@ struct iommu_table { struct iommu_pool large_pool; struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ + unsigned long it_page_shift;/* table iommu page size */ #ifdef CONFIG_IOMMU_API struct iommu_group *it_group; #endif diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 2e89fa350763..170ac24a0106 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1177,9 +1177,10 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) &tbl->it_index, &offset, &size); /* TCE table size - measured in tce entries */ - tbl->it_size = size >> IOMMU_PAGE_SHIFT_4K; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->it_size = size >> tbl->it_page_shift; /* offset for VIO should always be 0 */ - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT_4K; + tbl->it_offset = offset >> tbl->it_page_shift; tbl->it_busno = 0; tbl->it_type = TCE_VB; tbl->it_blocksize = 16; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index fc61b908eaf0..2b90ff8a93be 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -197,7 +197,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages, io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); - for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE_4K) + for (i = 0; i < npages; i++, uaddr += tbl->it_page_shift) io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask); mb(); @@ -487,8 +487,10 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_blocksize = 16; window->table.it_base = (unsigned long)iommu->ptab; window->table.it_index = iommu->nid; - window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT_4K) + pte_offset; - window->table.it_size = size >> IOMMU_PAGE_SHIFT_4K; + window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; + window->table.it_offset = + (offset >> window->table.it_page_shift) + pte_offset; + window->table.it_size = size >> window->table.it_page_shift; iommu_init_table(&window->table, iommu->nid); diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 7d2d036754b5..2e576f2ae442 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -138,8 +138,11 @@ static void iommu_table_iobmap_setup(void) pr_debug(" -> %s\n", __func__); iommu_table_iobmap.it_busno = 0; iommu_table_iobmap.it_offset = 0; + iommu_table_iobmap.it_page_shift = IOBMAP_PAGE_SHIFT; + /* it_size is in number of entries */ - iommu_table_iobmap.it_size = 0x80000000 >> IOBMAP_PAGE_SHIFT; + iommu_table_iobmap.it_size = + 0x80000000 >> iommu_table_iobmap.it_page_shift; /* Initialize the common IOMMU code */ iommu_table_iobmap.it_base = (unsigned long)iob_l2_base; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 7f4d857668a9..569b46423a93 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -564,7 +564,8 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, { tbl->it_blocksize = 16; tbl->it_base = (unsigned long)tce_mem; - tbl->it_offset = dma_offset >> IOMMU_PAGE_SHIFT_4K; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->it_offset = dma_offset >> tbl->it_page_shift; tbl->it_index = 0; tbl->it_size = tce_size >> 3; tbl->it_busno = 0; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 1b7531ce0c0c..e0299183ae54 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -486,9 +486,10 @@ static void iommu_table_setparms(struct pci_controller *phb, memset((void *)tbl->it_base, 0, *sizep); tbl->it_busno = phb->bus->number; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; /* Units of tce entries */ - tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT_4K; + tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift; /* Test if we are going over 2GB of DMA space */ if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { @@ -499,7 +500,7 @@ static void iommu_table_setparms(struct pci_controller *phb, phb->dma_window_base_cur += phb->dma_window_size; /* Set the tce table size - measured in entries */ - tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT_4K; + tbl->it_size = phb->dma_window_size >> tbl->it_page_shift; tbl->it_index = 0; tbl->it_blocksize = 16; @@ -537,11 +538,12 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size); tbl->it_busno = phb->bus->number; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; tbl->it_base = 0; tbl->it_blocksize = 16; tbl->it_type = TCE_PCI; - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT_4K; - tbl->it_size = size >> IOMMU_PAGE_SHIFT_4K; + tbl->it_offset = offset >> tbl->it_page_shift; + tbl->it_size = size >> tbl->it_page_shift; } static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c index 8a589618551e..9a15e5b39bb8 100644 --- a/arch/powerpc/platforms/wsp/wsp_pci.c +++ b/arch/powerpc/platforms/wsp/wsp_pci.c @@ -381,8 +381,9 @@ static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb, /* Init bits and pieces */ tbl->table.it_blocksize = 16; - tbl->table.it_offset = addr >> IOMMU_PAGE_SHIFT_4K; - tbl->table.it_size = size >> IOMMU_PAGE_SHIFT_4K; + tbl->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->table.it_offset = addr >> tbl->table.it_page_shift; + tbl->table.it_size = size >> tbl->table.it_page_shift; /* * It's already blank but we clear it anyway. -- cgit v1.2.3 From d084775738b746648d4102337163a04534a02982 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 9 Dec 2013 18:17:03 +1100 Subject: powerpc/iommu: Update the generic code to use dynamic iommu page sizes This patch updates the generic iommu backend code to use the it_page_shift field to determine the iommu page size instead of using hardcoded values. Signed-off-by: Alistair Popple Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iommu.h | 19 +++++--- arch/powerpc/kernel/dma-iommu.c | 4 +- arch/powerpc/kernel/iommu.c | 88 +++++++++++++++++++----------------- arch/powerpc/kernel/vio.c | 25 +++++++--- arch/powerpc/platforms/powernv/pci.c | 2 - drivers/net/ethernet/ibm/ibmveth.c | 15 +++--- 6 files changed, 88 insertions(+), 65 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 7c928342f3f5..f7a8036579b5 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -35,17 +35,14 @@ #define IOMMU_PAGE_MASK_4K (~((1 << IOMMU_PAGE_SHIFT_4K) - 1)) #define IOMMU_PAGE_ALIGN_4K(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE_4K) +#define IOMMU_PAGE_SIZE(tblptr) (ASM_CONST(1) << (tblptr)->it_page_shift) +#define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1)) +#define IOMMU_PAGE_ALIGN(addr, tblptr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE(tblptr)) + /* Boot time flags */ extern int iommu_is_off; extern int iommu_force_on; -/* Pure 2^n version of get_order */ -static __inline__ __attribute_const__ int get_iommu_order(unsigned long size) -{ - return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT_4K) + 1; -} - - /* * IOMAP_MAX_ORDER defines the largest contiguous block * of dma space we can get. IOMAP_MAX_ORDER = 13 @@ -82,6 +79,14 @@ struct iommu_table { #endif }; +/* Pure 2^n version of get_order */ +static inline __attribute_const__ +int get_iommu_order(unsigned long size, struct iommu_table *tbl) +{ + return __ilog2((size - 1) >> tbl->it_page_shift) + 1; +} + + struct scatterlist; static inline void set_iommu_table_base(struct device *dev, void *base) diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 5cfe3dbfc4a0..54d0116256f7 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -83,10 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) return 0; } - if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT_4K)) { + if (tbl->it_offset > (mask >> tbl->it_page_shift)) { dev_info(dev, "Warning: IOMMU offset too big for device mask\n"); dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n", - mask, tbl->it_offset << IOMMU_PAGE_SHIFT_4K); + mask, tbl->it_offset << tbl->it_page_shift); return 0; } else return 1; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index df4a7f1b7444..f58d8135aab2 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -251,14 +251,13 @@ again: if (dev) boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << IOMMU_PAGE_SHIFT_4K); + 1 << tbl->it_page_shift); else - boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT_4K); + boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift); /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ - n = iommu_area_alloc(tbl->it_map, limit, start, npages, - tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT_4K, - align_mask); + n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset, + boundary_size >> tbl->it_page_shift, align_mask); if (n == -1) { if (likely(pass == 0)) { /* First try the pool from the start */ @@ -320,12 +319,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, return DMA_ERROR_CODE; entry += tbl->it_offset; /* Offset into real TCE table */ - ret = entry << IOMMU_PAGE_SHIFT_4K; /* Set the return dma address */ + ret = entry << tbl->it_page_shift; /* Set the return dma address */ /* Put the TCEs in the HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - (unsigned long)page & IOMMU_PAGE_MASK_4K, - direction, attrs); + (unsigned long)page & + IOMMU_PAGE_MASK(tbl), direction, attrs); /* ppc_md.tce_build() only returns non-zero for transient errors. * Clean up the table bitmap in this case and return @@ -352,7 +351,7 @@ static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, { unsigned long entry, free_entry; - entry = dma_addr >> IOMMU_PAGE_SHIFT_4K; + entry = dma_addr >> tbl->it_page_shift; free_entry = entry - tbl->it_offset; if (((free_entry + npages) > tbl->it_size) || @@ -401,7 +400,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; struct iommu_pool *pool; - entry = dma_addr >> IOMMU_PAGE_SHIFT_4K; + entry = dma_addr >> tbl->it_page_shift; free_entry = entry - tbl->it_offset; pool = get_pool(tbl, free_entry); @@ -468,13 +467,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, } /* Allocate iommu entries for that segment */ vaddr = (unsigned long) sg_virt(s); - npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE_4K); + npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl)); align = 0; - if (IOMMU_PAGE_SHIFT_4K < PAGE_SHIFT && slen >= PAGE_SIZE && + if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE && (vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT_4K; + align = PAGE_SHIFT - tbl->it_page_shift; entry = iommu_range_alloc(dev, tbl, npages, &handle, - mask >> IOMMU_PAGE_SHIFT_4K, align); + mask >> tbl->it_page_shift, align); DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); @@ -489,16 +488,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Convert entry to a dma_addr_t */ entry += tbl->it_offset; - dma_addr = entry << IOMMU_PAGE_SHIFT_4K; - dma_addr |= (s->offset & ~IOMMU_PAGE_MASK_4K); + dma_addr = entry << tbl->it_page_shift; + dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl)); DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", npages, entry, dma_addr); /* Insert into HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - vaddr & IOMMU_PAGE_MASK_4K, - direction, attrs); + vaddr & IOMMU_PAGE_MASK(tbl), + direction, attrs); if(unlikely(build_fail)) goto failure; @@ -559,9 +558,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (s->dma_length != 0) { unsigned long vaddr, npages; - vaddr = s->dma_address & IOMMU_PAGE_MASK_4K; + vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl); npages = iommu_num_pages(s->dma_address, s->dma_length, - IOMMU_PAGE_SIZE_4K); + IOMMU_PAGE_SIZE(tbl)); __iommu_free(tbl, vaddr, npages); s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -592,7 +591,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, if (sg->dma_length == 0) break; npages = iommu_num_pages(dma_handle, sg->dma_length, - IOMMU_PAGE_SIZE_4K); + IOMMU_PAGE_SIZE(tbl)); __iommu_free(tbl, dma_handle, npages); sg = sg_next(sg); } @@ -676,7 +675,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) set_bit(0, tbl->it_map); /* We only split the IOMMU table if we have 1GB or more of space */ - if ((tbl->it_size << IOMMU_PAGE_SHIFT_4K) >= (1UL * 1024 * 1024 * 1024)) + if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024)) tbl->nr_pools = IOMMU_NR_POOLS; else tbl->nr_pools = 1; @@ -768,16 +767,16 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, vaddr = page_address(page) + offset; uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE_4K); + npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl)); if (tbl) { align = 0; - if (IOMMU_PAGE_SHIFT_4K < PAGE_SHIFT && size >= PAGE_SIZE && + if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT_4K; + align = PAGE_SHIFT - tbl->it_page_shift; dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, - mask >> IOMMU_PAGE_SHIFT_4K, align, + mask >> tbl->it_page_shift, align, attrs); if (dma_handle == DMA_ERROR_CODE) { if (printk_ratelimit()) { @@ -786,7 +785,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, npages); } } else - dma_handle |= (uaddr & ~IOMMU_PAGE_MASK_4K); + dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl)); } return dma_handle; @@ -801,7 +800,8 @@ void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, BUG_ON(direction == DMA_NONE); if (tbl) { - npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE_4K); + npages = iommu_num_pages(dma_handle, size, + IOMMU_PAGE_SIZE(tbl)); iommu_free(tbl, dma_handle, npages); } } @@ -845,10 +845,10 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> IOMMU_PAGE_SHIFT_4K; - io_order = get_iommu_order(size); + nio_pages = size >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, - mask >> IOMMU_PAGE_SHIFT_4K, io_order, NULL); + mask >> tbl->it_page_shift, io_order, NULL); if (mapping == DMA_ERROR_CODE) { free_pages((unsigned long)ret, order); return NULL; @@ -864,7 +864,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, unsigned int nio_pages; size = PAGE_ALIGN(size); - nio_pages = size >> IOMMU_PAGE_SHIFT_4K; + nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -935,10 +935,10 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl, if (tce_value) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK_4K) + if (ioba & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT_4K; + ioba >>= tbl->it_page_shift; if (ioba < tbl->it_offset) return -EINVAL; @@ -955,13 +955,13 @@ int iommu_tce_put_param_check(struct iommu_table *tbl, if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) return -EINVAL; - if (tce & ~(IOMMU_PAGE_MASK_4K | TCE_PCI_WRITE | TCE_PCI_READ)) + if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ)) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK_4K) + if (ioba & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT_4K; + ioba >>= tbl->it_page_shift; if (ioba < tbl->it_offset) return -EINVAL; @@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", - __func__, hwaddr, entry << IOMMU_PAGE_SHIFT_4K, + __func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl), hwaddr, ret); */ return ret; @@ -1049,14 +1049,14 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, { int ret; struct page *page = NULL; - unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK_4K & ~PAGE_MASK; + unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; enum dma_data_direction direction = iommu_tce_direction(tce); ret = get_user_pages_fast(tce & PAGE_MASK, 1, direction != DMA_TO_DEVICE, &page); if (unlikely(ret != 1)) { /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", - tce, entry << IOMMU_PAGE_SHIFT_4K, ret); */ + tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */ return -EFAULT; } hwaddr = (unsigned long) page_address(page) + offset; @@ -1067,7 +1067,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, if (ret < 0) pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", - __func__, entry << IOMMU_PAGE_SHIFT_4K, tce, ret); + __func__, entry << tbl->it_page_shift, tce, ret); return ret; } @@ -1127,6 +1127,12 @@ int iommu_add_device(struct device *dev) pr_debug("iommu_tce: adding %s to iommu group %d\n", dev_name(dev), iommu_group_id(tbl->it_group)); + if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { + pr_err("iommu_tce: unsupported iommu page size."); + pr_err("%s has not been added\n", dev_name(dev)); + return -EINVAL; + } + ret = iommu_group_add_device(tbl->it_group, dev); if (ret < 0) pr_err("iommu_tce: %s has not been added, ret=%d\n", diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 170ac24a0106..826d8bd9e522 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -518,16 +518,18 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; dma_addr_t ret = DMA_ERROR_CODE; - if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K))) { + tbl = get_iommu_table_base(dev); + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) { atomic_inc(&viodev->cmo.allocs_failed); return ret; } ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs); if (unlikely(dma_mapping_error(dev, ret))) { - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); atomic_inc(&viodev->cmo.allocs_failed); } @@ -540,10 +542,12 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; + tbl = get_iommu_table_base(dev); dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); } static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, @@ -551,12 +555,14 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; struct scatterlist *sgl; int ret, count = 0; size_t alloc_size = 0; + tbl = get_iommu_table_base(dev); for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE_4K); + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl)); if (vio_cmo_alloc(viodev, alloc_size)) { atomic_inc(&viodev->cmo.allocs_failed); @@ -572,7 +578,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, } for (sgl = sglist, count = 0; count < ret; count++, sgl++) - alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE_4K); + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); if (alloc_size) vio_cmo_dealloc(viodev, alloc_size); @@ -585,12 +591,14 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; struct scatterlist *sgl; size_t alloc_size = 0; int count = 0; + tbl = get_iommu_table_base(dev); for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE_4K); + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); @@ -706,11 +714,14 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) { struct vio_cmo_dev_entry *dev_ent; struct device *dev = &viodev->dev; + struct iommu_table *tbl; struct vio_driver *viodrv = to_vio_driver(dev->driver); unsigned long flags; size_t size; bool dma_capable = false; + tbl = get_iommu_table_base(dev); + /* A device requires entitlement if it has a DMA window property */ switch (viodev->family) { case VDEVICE: @@ -737,7 +748,7 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) } viodev->cmo.desired = - IOMMU_PAGE_ALIGN_4K(viodrv->get_desired_dma(viodev)); + IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl); if (viodev->cmo.desired < VIO_CMO_MIN_ENT) viodev->cmo.desired = VIO_CMO_MIN_ENT; size = VIO_CMO_MIN_ENT; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 569b46423a93..b555ebc57ef5 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -762,8 +762,6 @@ static struct notifier_block tce_iommu_bus_nb = { static int __init tce_iommu_bus_notifier_init(void) { - BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE_4K); - bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); return 0; } diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index f7d7538b6bd9..d04dbaba83dd 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1276,31 +1276,34 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) { struct net_device *netdev = dev_get_drvdata(&vdev->dev); struct ibmveth_adapter *adapter; + struct iommu_table *tbl; unsigned long ret; int i; int rxqentries = 1; + tbl = get_iommu_table_base(&vdev->dev); + /* netdev inits at probe time along with the structures we need below*/ if (netdev == NULL) - return IOMMU_PAGE_ALIGN_4K(IBMVETH_IO_ENTITLEMENT_DEFAULT); + return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT, tbl); adapter = netdev_priv(netdev); ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; - ret += IOMMU_PAGE_ALIGN_4K(netdev->mtu); + ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { /* add the size of the active receive buffers */ if (adapter->rx_buff_pool[i].active) ret += adapter->rx_buff_pool[i].size * - IOMMU_PAGE_ALIGN_4K(adapter->rx_buff_pool[i]. - buff_size); + IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. + buff_size, tbl); rxqentries += adapter->rx_buff_pool[i].size; } /* add the size of the receive queue entries */ - ret += IOMMU_PAGE_ALIGN_4K( - rxqentries * sizeof(struct ibmveth_rx_q_entry)); + ret += IOMMU_PAGE_ALIGN( + rxqentries * sizeof(struct ibmveth_rx_q_entry), tbl); return ret; } -- cgit v1.2.3 From a68c33f3592eef63304a5f5ab68466539ccac56c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 16 Dec 2013 10:47:54 +1100 Subject: powerpc: Fix endian issues in power7/8 machine check handler The SLB save area is shared with the hypervisor and is defined as big endian, so we need to byte swap on little endian builds. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/mce_power.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b36e777a734f..27c93f41166f 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -50,12 +50,12 @@ static void flush_and_reload_slb(void) if (!slb) return; - n = min_t(u32, slb->persistent, SLB_MIN_SIZE); + n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); /* Load up the SLB entries from shadow SLB */ for (i = 0; i < n; i++) { - unsigned long rb = slb->save_area[i].esid; - unsigned long rs = slb->save_area[i].vsid; + unsigned long rb = be64_to_cpu(slb->save_area[i].esid); + unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); rb = (rb & ~0xFFFul) | i; asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); -- cgit v1.2.3 From 228b1a473037c89d524e03a569c688a22241b4ea Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 8 Aug 2013 15:56:09 +0300 Subject: powerpc/booke64: Add LRAT error exception handler LRAT (Logical to Real Address Translation) present in MMU v2 provides hardware translation from a logical page number (LPN) to a real page number (RPN) when tlbwe is executed by a guest or when a page table translation occurs from a guest virtual address. Add LRAT error exception handler to Booke3E 64-bit kernel and the basic KVM handler to avoid build breakage. This is a prerequisite for KVM LRAT support that will follow. Signed-off-by: Mihai Caraman Signed-off-by: Scott Wood --- arch/powerpc/include/asm/kvm_asm.h | 1 + arch/powerpc/include/asm/reg_booke.h | 1 + arch/powerpc/kernel/cpu_setup_fsl_booke.S | 12 ++++++++++++ arch/powerpc/kernel/exceptions-64e.S | 17 +++++++++++++++++ arch/powerpc/kvm/bookehv_interrupts.S | 2 ++ 5 files changed, 33 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 1bd92fd43cfb..1503d8c7c41b 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -74,6 +74,7 @@ #define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39 #define BOOKE_INTERRUPT_HV_SYSCALL 40 #define BOOKE_INTERRUPT_HV_PRIV 41 +#define BOOKE_INTERRUPT_LRAT_ERROR 42 /* book3s */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 2e31aacd8acc..1f7134dd0946 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -101,6 +101,7 @@ #define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */ #define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */ #define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */ +#define SPRN_IVOR42 0x1B4 /* Interrupt Vector Offset Register 42 */ #define SPRN_GIVOR2 0x1B8 /* Guest IVOR2 */ #define SPRN_GIVOR3 0x1B9 /* Guest IVOR3 */ #define SPRN_GIVOR4 0x1BA /* Guest IVOR4 */ diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index bfb18c7290b7..fa6862db8a02 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -57,6 +57,12 @@ _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 bl .setup_altivec_ivors + /* Touch IVOR42 only if the CPU supports E.HV category */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f + bl .setup_lrat_ivor +1: #endif bl __setup_cpu_e5500 mtlr r6 @@ -119,6 +125,12 @@ _GLOBAL(__setup_cpu_e5500) _GLOBAL(__restore_cpu_e6500) mflr r5 bl .setup_altivec_ivors + /* Touch IVOR42 only if the CPU supports E.HV category */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f + bl .setup_lrat_ivor +1: bl __restore_cpu_e5500 mtlr r5 blr diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e7751561fd1d..4d5a0b1034e8 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -308,6 +308,7 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x2e0, guest_doorbell_crit) EXCEPTION_STUB(0x300, hypercall) EXCEPTION_STUB(0x320, ehpriv) + EXCEPTION_STUB(0x340, lrat_error) .globl interrupt_end_book3e interrupt_end_book3e: @@ -677,6 +678,17 @@ kernel_dbg_exc: bl .unknown_exception b .ret_from_except +/* LRAT Error interrupt */ + START_EXCEPTION(lrat_error); + NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR, + PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x340, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except + /* * An interrupt came in while soft-disabled; We mark paca->irq_happened * accordingly and if the interrupt is level sensitive, we hard disable @@ -859,6 +871,7 @@ BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) BAD_STACK_TRAMPOLINE(0x310) BAD_STACK_TRAMPOLINE(0x320) +BAD_STACK_TRAMPOLINE(0x340) BAD_STACK_TRAMPOLINE(0x400) BAD_STACK_TRAMPOLINE(0x500) BAD_STACK_TRAMPOLINE(0x600) @@ -1414,3 +1427,7 @@ _GLOBAL(setup_ehv_ivors) SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ blr + +_GLOBAL(setup_lrat_ivor) + SET_IVOR(42, 0x340) /* LRAT Error */ + blr diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e8ed7d659c55..a0d6929d8678 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -319,6 +319,8 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \ SPRN_DSRR0, SPRN_DSRR1, 0 kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \ SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) #else /* * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h -- cgit v1.2.3 From 640e922501103aaf2e0abb4cf4de5d49fa8342f7 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Tue, 10 Dec 2013 23:07:45 +0000 Subject: powerpc: fix exception clearing in e500 SPE float emulation The e500 SPE floating-point emulation code clears existing exceptions (__FPU_FPSCR &= ~FP_EX_MASK;) before ORing in the exceptions from the emulated operation. However, these exception bits are the "sticky", cumulative exception bits, and should only be cleared by the user program setting SPEFSCR, not implicitly by any floating-point instruction (whether executed purely by the hardware or emulated). The spurious clearing of these bits shows up as missing exceptions in glibc testing. Fixing this, however, is not as simple as just not clearing the bits, because while the bits may be from previous floating-point operations (in which case they should not be cleared), the processor can also set the sticky bits itself before the interrupt for an exception occurs, and this can happen in cases when IEEE 754 semantics are that the sticky bit should not be set. Specifically, the "invalid" sticky bit is set in various cases with non-finite operands, where IEEE 754 semantics do not involve raising such an exception, and the "underflow" sticky bit is set in cases of exact underflow, whereas IEEE 754 semantics are that this flag is set only for inexact underflow. Thus, for correct emulation the kernel needs to know the setting of these two sticky bits before the instruction being emulated. When a floating-point operation raises an exception, the kernel can note the state of the sticky bits immediately afterwards. Some functions that affect the state of these bits, such as fesetenv and feholdexcept, need to use prctl with PR_GET_FPEXC and PR_SET_FPEXC anyway, and so it is natural to record the state of those bits during that call into the kernel and so avoid any need for a separate call into the kernel to inform it of a change to those bits. Thus, the interface I chose to use (in this patch and the glibc port) is that one of those prctl calls must be made after any userspace change to those sticky bits, other than through a floating-point operation that traps into the kernel anyway. feclearexcept and fesetexceptflag duly make those calls, which would not be required were it not for this issue. The previous EGLIBC port, and the uClibc code copied from it, is fundamentally broken as regards any use of prctl for floating-point exceptions because it didn't use the PR_FP_EXC_SW_ENABLE bit in its prctl calls (and did various worse things, such as passing a pointer when prctl expected an integer). If you avoid anything where prctl is used, the clearing of sticky bits still means it will never give anything approximating correct exception semantics with existing kernels. I don't believe the patch makes things any worse for existing code that doesn't try to inform the kernel of changes to sticky bits - such code may get incorrect exceptions in some cases, but it would have done so anyway in other cases. Signed-off-by: Joseph Myers Signed-off-by: Scott Wood --- arch/powerpc/include/asm/processor.h | 6 +++++- arch/powerpc/kernel/process.c | 30 ++++++++++++++++++++++++++++-- arch/powerpc/math-emu/math_efp.c | 20 +++++++++++++++++++- 3 files changed, 52 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index fc14a38c7ccf..91441d9cbaae 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -256,6 +256,8 @@ struct thread_struct { unsigned long evr[32]; /* upper 32-bits of SPE regs */ u64 acc; /* Accumulator */ unsigned long spefscr; /* SPE & eFP status */ + unsigned long spefscr_last; /* SPEFSCR value on last prctl + call or trap return */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -317,7 +319,9 @@ struct thread_struct { (_ALIGN_UP(sizeof(init_thread_info), 16) + (unsigned long) &init_stack) #ifdef CONFIG_SPE -#define SPEFSCR_INIT .spefscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE, +#define SPEFSCR_INIT \ + .spefscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE, \ + .spefscr_last = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE, #else #define SPEFSCR_INIT #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3386d8ab7eb0..b08c0d03530f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1175,6 +1175,19 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) if (val & PR_FP_EXC_SW_ENABLE) { #ifdef CONFIG_SPE if (cpu_has_feature(CPU_FTR_SPE)) { + /* + * When the sticky exception bits are set + * directly by userspace, it must call prctl + * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE + * in the existing prctl settings) or + * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in + * the bits being set). functions + * saving and restoring the whole + * floating-point environment need to do so + * anyway to restore the prctl settings from + * the saved environment. + */ + tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); tsk->thread.fpexc_mode = val & (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); return 0; @@ -1206,9 +1219,22 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) #ifdef CONFIG_SPE - if (cpu_has_feature(CPU_FTR_SPE)) + if (cpu_has_feature(CPU_FTR_SPE)) { + /* + * When the sticky exception bits are set + * directly by userspace, it must call prctl + * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE + * in the existing prctl settings) or + * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in + * the bits being set). functions + * saving and restoring the whole + * floating-point environment need to do so + * anyway to restore the prctl settings from + * the saved environment. + */ + tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); val = tsk->thread.fpexc_mode; - else + } else return -EINVAL; #else return -EINVAL; diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index a73f0884d358..59835c625dc6 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -630,9 +630,27 @@ update_ccr: regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2)); update_regs: - __FPU_FPSCR &= ~FP_EX_MASK; + /* + * If the "invalid" exception sticky bit was set by the + * processor for non-finite input, but was not set before the + * instruction being emulated, clear it. Likewise for the + * "underflow" bit, which may have been set by the processor + * for exact underflow, not just inexact underflow when the + * flag should be set for IEEE 754 semantics. Other sticky + * exceptions will only be set by the processor when they are + * correct according to IEEE 754 semantics, and we must not + * clear sticky bits that were already set before the emulated + * instruction as they represent the user-visible sticky + * exception status. "inexact" traps to kernel are not + * required for IEEE semantics and are not enabled by default, + * so the "inexact" sticky bit may have been set by a previous + * instruction without the kernel being aware of it. + */ + __FPU_FPSCR + &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last; __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK); mtspr(SPRN_SPEFSCR, __FPU_FPSCR); + current->thread.spefscr_last = __FPU_FPSCR; current->thread.evr[fc] = vc.wp[0]; regs->gpr[fc] = vc.wp[1]; -- cgit v1.2.3 From b58a7bd6df7b61446b833a7c72f8a1f11066e0b0 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 2 Jan 2014 16:37:50 -0600 Subject: powerpc/fsl-booke: Use SPRN_SPRGn rather than mfsprg/mtsprg This fixes a build break that was probably introduced with the removal of -Wa,-me500 (commit f49596a4cf4753d13951608f24f939a59fdcc653), where the assembler refuses to recognize SPRG4-7 with a generic PPC target. Signed-off-by: Scott Wood Cc: Dongsheng Wang Cc: Anton Vorontsov Reviewed-by: Wang Dongsheng Tested-by: Wang Dongsheng --- arch/powerpc/kernel/swsusp_booke.S | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S index 0f204053e5b5..553c1405ee05 100644 --- a/arch/powerpc/kernel/swsusp_booke.S +++ b/arch/powerpc/kernel/swsusp_booke.S @@ -74,21 +74,21 @@ _GLOBAL(swsusp_arch_suspend) bne 1b /* Save SPRGs */ - mfsprg r4,0 + mfspr r4,SPRN_SPRG0 stw r4,SL_SPRG0(r11) - mfsprg r4,1 + mfspr r4,SPRN_SPRG1 stw r4,SL_SPRG1(r11) - mfsprg r4,2 + mfspr r4,SPRN_SPRG2 stw r4,SL_SPRG2(r11) - mfsprg r4,3 + mfspr r4,SPRN_SPRG3 stw r4,SL_SPRG3(r11) - mfsprg r4,4 + mfspr r4,SPRN_SPRG4 stw r4,SL_SPRG4(r11) - mfsprg r4,5 + mfspr r4,SPRN_SPRG5 stw r4,SL_SPRG5(r11) - mfsprg r4,6 + mfspr r4,SPRN_SPRG6 stw r4,SL_SPRG6(r11) - mfsprg r4,7 + mfspr r4,SPRN_SPRG7 stw r4,SL_SPRG7(r11) /* Call the low level suspend stuff (we should probably have made @@ -150,21 +150,21 @@ _GLOBAL(swsusp_arch_resume) bl _tlbil_all lwz r4,SL_SPRG0(r11) - mtsprg 0,r4 + mtspr SPRN_SPRG0,r4 lwz r4,SL_SPRG1(r11) - mtsprg 1,r4 + mtspr SPRN_SPRG1,r4 lwz r4,SL_SPRG2(r11) - mtsprg 2,r4 + mtspr SPRN_SPRG2,r4 lwz r4,SL_SPRG3(r11) - mtsprg 3,r4 + mtspr SPRN_SPRG3,r4 lwz r4,SL_SPRG4(r11) - mtsprg 4,r4 + mtspr SPRN_SPRG4,r4 lwz r4,SL_SPRG5(r11) - mtsprg 5,r4 + mtspr SPRN_SPRG5,r4 lwz r4,SL_SPRG6(r11) - mtsprg 6,r4 + mtspr SPRN_SPRG6,r4 lwz r4,SL_SPRG7(r11) - mtsprg 7,r4 + mtspr SPRN_SPRG7,r4 /* restore the MSR */ lwz r3,SL_MSR(r11) -- cgit v1.2.3 From 202e059ce34d5c5e3ff8a542866c280d575ccb17 Mon Sep 17 00:00:00 2001 From: Wang Dongsheng Date: Tue, 17 Dec 2013 16:17:00 +0800 Subject: powerpc/85xx: add hardware automatically enter altivec idle state Each core's AltiVec unit may be placed into a power savings mode by turning off power to the unit. Core hardware will automatically power down the AltiVec unit after no AltiVec instructions have executed in N cycles. The AltiVec power-control is triggered by hardware. Signed-off-by: Wang Dongsheng Signed-off-by: Scott Wood --- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index fa6862db8a02..26c09db2ec20 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -53,6 +53,25 @@ _GLOBAL(__e500_dcache_setup) isync blr +/* + * FIXME - we haven't yet done testing to determine a reasonable default + * value for AV_WAIT_IDLE_BIT. + */ +#define AV_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_altivec_idle) + mfspr r3, SPRN_PWRMGTCR0 + + /* Enable Altivec Idle */ + oris r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h + li r11, AV_WAIT_IDLE_BIT + + /* Set Automatic AltiVec Idle Count */ + rlwimi r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT + + mtspr SPRN_PWRMGTCR0, r3 + + blr + _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 @@ -64,6 +83,7 @@ _GLOBAL(__setup_cpu_e6500) bl .setup_lrat_ivor 1: #endif + bl setup_altivec_idle bl __setup_cpu_e5500 mtlr r6 blr @@ -131,6 +151,7 @@ _GLOBAL(__restore_cpu_e6500) beq 1f bl .setup_lrat_ivor 1: + bl .setup_altivec_idle bl __restore_cpu_e5500 mtlr r5 blr -- cgit v1.2.3 From 1d47ddf7c3725e889763b1fffa70a04e1061940b Mon Sep 17 00:00:00 2001 From: Wang Dongsheng Date: Tue, 17 Dec 2013 16:17:01 +0800 Subject: powerpc/85xx: add hardware automatically enter pw20 state Using hardware features make core automatically enter PW20 state. Set a TB count to hardware, the effective count begins when PW10 is entered. When the effective period has expired, the core will proceed from PW10 to PW20 if no exit conditions have occurred during the period. Signed-off-by: Wang Dongsheng Signed-off-by: Scott Wood --- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 26c09db2ec20..cc2d8962e090 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -53,6 +53,25 @@ _GLOBAL(__e500_dcache_setup) isync blr +/* + * FIXME - we haven't yet done testing to determine a reasonable default + * value for PW20_WAIT_IDLE_BIT. + */ +#define PW20_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_pw20_idle) + mfspr r3, SPRN_PWRMGTCR0 + + /* Set PW20_WAIT bit, enable pw20 state*/ + ori r3, r3, PWRMGTCR0_PW20_WAIT + li r11, PW20_WAIT_IDLE_BIT + + /* Set Automatic PW20 Core Idle Count */ + rlwimi r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT + + mtspr SPRN_PWRMGTCR0, r3 + + blr + /* * FIXME - we haven't yet done testing to determine a reasonable default * value for AV_WAIT_IDLE_BIT. @@ -83,6 +102,7 @@ _GLOBAL(__setup_cpu_e6500) bl .setup_lrat_ivor 1: #endif + bl setup_pw20_idle bl setup_altivec_idle bl __setup_cpu_e5500 mtlr r6 @@ -151,6 +171,7 @@ _GLOBAL(__restore_cpu_e6500) beq 1f bl .setup_lrat_ivor 1: + bl .setup_pw20_idle bl .setup_altivec_idle bl __restore_cpu_e5500 mtlr r5 -- cgit v1.2.3 From 1820a8d2163554dcd0272cc095338499db4a4dc3 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 8 Oct 2013 09:32:19 +0530 Subject: kvm/powerpc: rename kvm_hypercall() to epapr_hypercall() kvm_hypercall() have nothing KVM specific, so renamed to epapr_hypercall(). Also this in moved to arch/powerpc/include/asm/epapr_hcalls.h Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 46 +++++++++++++++++++++++++++++++++ arch/powerpc/include/asm/kvm_para.h | 23 +++++------------ arch/powerpc/kernel/kvm.c | 41 ++--------------------------- 3 files changed, 54 insertions(+), 56 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 86b0ac79990c..eec87f9494bc 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -460,5 +460,51 @@ static inline unsigned int ev_idle(void) return r3; } + +#ifdef CONFIG_EPAPR_PARAVIRT +static inline unsigned long epapr_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + unsigned long register r0 asm("r0"); + unsigned long register r3 asm("r3") = in[0]; + unsigned long register r4 asm("r4") = in[1]; + unsigned long register r5 asm("r5") = in[2]; + unsigned long register r6 asm("r6") = in[3]; + unsigned long register r7 asm("r7") = in[4]; + unsigned long register r8 asm("r8") = in[5]; + unsigned long register r9 asm("r9") = in[6]; + unsigned long register r10 asm("r10") = in[7]; + unsigned long register r11 asm("r11") = nr; + unsigned long register r12 asm("r12"); + + asm volatile("bl epapr_hypercall_start" + : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), + "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), + "=r"(r12) + : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), + "r"(r9), "r"(r10), "r"(r11) + : "memory", "cc", "xer", "ctr", "lr"); + + out[0] = r4; + out[1] = r5; + out[2] = r6; + out[3] = r7; + out[4] = r8; + out[5] = r9; + out[6] = r10; + out[7] = r11; + + return r3; +} +#else +static unsigned long epapr_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + return EV_UNIMPLEMENTED; +} +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _EPAPR_HCALLS_H */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 2b119654b4c1..c18660ef26d8 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -39,10 +39,6 @@ static inline int kvm_para_available(void) return 1; } -extern unsigned long kvm_hypercall(unsigned long *in, - unsigned long *out, - unsigned long nr); - #else static inline int kvm_para_available(void) @@ -50,13 +46,6 @@ static inline int kvm_para_available(void) return 0; } -static unsigned long kvm_hypercall(unsigned long *in, - unsigned long *out, - unsigned long nr) -{ - return EV_UNIMPLEMENTED; -} - #endif static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) @@ -65,7 +54,7 @@ static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) unsigned long out[8]; unsigned long r; - r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + r = epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); *r2 = out[0]; return r; @@ -76,7 +65,7 @@ static inline long kvm_hypercall0(unsigned int nr) unsigned long in[8]; unsigned long out[8]; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) @@ -85,7 +74,7 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) unsigned long out[8]; in[0] = p1; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, @@ -96,7 +85,7 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, @@ -108,7 +97,7 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; in[2] = p3; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, @@ -122,7 +111,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, in[1] = p2; in[2] = p3; in[3] = p4; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index db28032e320e..6a0175297b0d 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -413,13 +413,13 @@ static void kvm_map_magic_page(void *data) { u32 *features = data; - ulong in[8]; + ulong in[8] = {0}; ulong out[8]; in[0] = KVM_MAGIC_PAGE; in[1] = KVM_MAGIC_PAGE; - kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); + epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); *features = out[0]; } @@ -711,43 +711,6 @@ static void kvm_use_magic_page(void) kvm_patching_worked ? "worked" : "failed"); } -unsigned long kvm_hypercall(unsigned long *in, - unsigned long *out, - unsigned long nr) -{ - unsigned long register r0 asm("r0"); - unsigned long register r3 asm("r3") = in[0]; - unsigned long register r4 asm("r4") = in[1]; - unsigned long register r5 asm("r5") = in[2]; - unsigned long register r6 asm("r6") = in[3]; - unsigned long register r7 asm("r7") = in[4]; - unsigned long register r8 asm("r8") = in[5]; - unsigned long register r9 asm("r9") = in[6]; - unsigned long register r10 asm("r10") = in[7]; - unsigned long register r11 asm("r11") = nr; - unsigned long register r12 asm("r12"); - - asm volatile("bl epapr_hypercall_start" - : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), - "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), - "=r"(r12) - : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), - "r"(r9), "r"(r10), "r"(r11) - : "memory", "cc", "xer", "ctr", "lr"); - - out[0] = r4; - out[1] = r5; - out[2] = r6; - out[3] = r7; - out[4] = r8; - out[5] = r9; - out[6] = r10; - out[7] = r11; - - return r3; -} -EXPORT_SYMBOL_GPL(kvm_hypercall); - static __init void kvm_free_tmp(void) { free_reserved_area(&kvm_tmp[kvm_tmp_index], -- cgit v1.2.3 From efff19122315f1431f6b02cd2983b15f5d3957bd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 15 Oct 2013 20:43:02 +1100 Subject: KVM: PPC: Store FP/VSX/VMX state in thread_fp/vr_state structures This uses struct thread_fp_state and struct thread_vr_state to store the floating-point, VMX/Altivec and VSX state, rather than flat arrays. This makes transferring the state to/from the thread_struct simpler and allows us to unify the get/set_one_reg implementations for the VSX registers. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 12 +-- arch/powerpc/kernel/asm-offsets.c | 11 +- arch/powerpc/kvm/book3s.c | 38 +++++-- arch/powerpc/kvm/book3s_hv.c | 42 -------- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +- arch/powerpc/kvm/book3s_paired_singles.c | 169 +++++++++++++++---------------- arch/powerpc/kvm/book3s_pr.c | 63 +----------- arch/powerpc/kvm/booke.c | 8 +- arch/powerpc/kvm/powerpc.c | 4 +- 9 files changed, 131 insertions(+), 220 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 237d1d25b448..3ca0b430eaee 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -410,8 +410,7 @@ struct kvm_vcpu_arch { ulong gpr[32]; - u64 fpr[32]; - u64 fpscr; + struct thread_fp_state fp; #ifdef CONFIG_SPE ulong evr[32]; @@ -420,12 +419,7 @@ struct kvm_vcpu_arch { u64 acc; #endif #ifdef CONFIG_ALTIVEC - vector128 vr[32]; - vector128 vscr; -#endif - -#ifdef CONFIG_VSX - u64 vsr[64]; + struct thread_vr_state vr; #endif #ifdef CONFIG_KVM_BOOKE_HV @@ -619,6 +613,8 @@ struct kvm_vcpu_arch { #endif }; +#define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET] + /* Values for vcpu->arch.state */ #define KVMPPC_VCPU_NOTREADY 0 #define KVMPPC_VCPU_RUNNABLE 1 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2ea5cc033ec8..8403f9031d93 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -425,14 +425,11 @@ int main(void) DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); - DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); - DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr)); + DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr)); + DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fp.fpscr)); #ifdef CONFIG_ALTIVEC - DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr)); - DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr)); -#endif -#ifdef CONFIG_VSX - DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr)); + DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr)); + DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vr.vscr)); #endif DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 48cf91bc862f..94e597e6f15c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -577,10 +577,10 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: i = reg->id - KVM_REG_PPC_FPR0; - val = get_reg_val(reg->id, vcpu->arch.fpr[i]); + val = get_reg_val(reg->id, VCPU_FPR(vcpu, i)); break; case KVM_REG_PPC_FPSCR: - val = get_reg_val(reg->id, vcpu->arch.fpscr); + val = get_reg_val(reg->id, vcpu->arch.fp.fpscr); break; #ifdef CONFIG_ALTIVEC case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: @@ -588,19 +588,30 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0]; + val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); + val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); break; case KVM_REG_PPC_VRSAVE: val = get_reg_val(reg->id, vcpu->arch.vrsave); break; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = reg->id - KVM_REG_PPC_VSR0; + val.vsxval[0] = vcpu->arch.fp.fpr[i][0]; + val.vsxval[1] = vcpu->arch.fp.fpr[i][1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ case KVM_REG_PPC_DEBUG_INST: { u32 opcode = INS_TW; r = copy_to_user((u32 __user *)(long)reg->addr, @@ -656,10 +667,10 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: i = reg->id - KVM_REG_PPC_FPR0; - vcpu->arch.fpr[i] = set_reg_val(reg->id, val); + VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val); break; case KVM_REG_PPC_FPSCR: - vcpu->arch.fpscr = set_reg_val(reg->id, val); + vcpu->arch.fp.fpscr = set_reg_val(reg->id, val); break; #ifdef CONFIG_ALTIVEC case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: @@ -667,14 +678,14 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); + vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); break; case KVM_REG_PPC_VRSAVE: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { @@ -684,6 +695,17 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) vcpu->arch.vrsave = set_reg_val(reg->id, val); break; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = reg->id - KVM_REG_PPC_VSR0; + vcpu->arch.fp.fpr[i][0] = val.vsxval[0]; + vcpu->arch.fp.fpr[i][1] = val.vsxval[1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ #ifdef CONFIG_KVM_XICS case KVM_REG_PPC_ICP_STATE: if (!vcpu->arch.icp) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 088a6e54c998..461f55566167 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -811,27 +811,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_SDAR: *val = get_reg_val(id, vcpu->arch.sdar); break; -#ifdef CONFIG_VSX - case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - if (cpu_has_feature(CPU_FTR_VSX)) { - /* VSX => FP reg i is stored in arch.vsr[2*i] */ - long int i = id - KVM_REG_PPC_FPR0; - *val = get_reg_val(id, vcpu->arch.vsr[2 * i]); - } else { - /* let generic code handle it */ - r = -EINVAL; - } - break; - case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: - if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = id - KVM_REG_PPC_VSR0; - val->vsxval[0] = vcpu->arch.vsr[2 * i]; - val->vsxval[1] = vcpu->arch.vsr[2 * i + 1]; - } else { - r = -ENXIO; - } - break; -#endif /* CONFIG_VSX */ case KVM_REG_PPC_VPA_ADDR: spin_lock(&vcpu->arch.vpa_update_lock); *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); @@ -914,27 +893,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_SDAR: vcpu->arch.sdar = set_reg_val(id, *val); break; -#ifdef CONFIG_VSX - case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - if (cpu_has_feature(CPU_FTR_VSX)) { - /* VSX => FP reg i is stored in arch.vsr[2*i] */ - long int i = id - KVM_REG_PPC_FPR0; - vcpu->arch.vsr[2 * i] = set_reg_val(id, *val); - } else { - /* let generic code handle it */ - r = -EINVAL; - } - break; - case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: - if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = id - KVM_REG_PPC_VSR0; - vcpu->arch.vsr[2 * i] = val->vsxval[0]; - vcpu->arch.vsr[2 * i + 1] = val->vsxval[1]; - } else { - r = -ENXIO; - } - break; -#endif /* CONFIG_VSX */ case KVM_REG_PPC_VPA_ADDR: addr = set_reg_val(id, *val); r = -EINVAL; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index d5ddc2d10748..47fd536fb13b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1889,7 +1889,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) BEGIN_FTR_SECTION reg = 0 .rept 32 - li r6,reg*16+VCPU_VSRS + li r6,reg*16+VCPU_FPRS STXVD2X(reg,R6,R3) reg = reg + 1 .endr @@ -1951,7 +1951,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) BEGIN_FTR_SECTION reg = 0 .rept 32 - li r7,reg*16+VCPU_VSRS + li r7,reg*16+VCPU_FPRS LXVD2X(reg,R7,R4) reg = reg + 1 .endr diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index a59a25a13218..c1abd95063f4 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -160,7 +160,7 @@ static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) { - kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]); + kvm_cvt_df(&VCPU_FPR(vcpu, rt), &vcpu->arch.qpr[rt]); } static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) @@ -207,11 +207,11 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, /* put in registers */ switch (ls_type) { case FPU_LS_SINGLE: - kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]); + kvm_cvt_fd((u32*)tmp, &VCPU_FPR(vcpu, rs)); vcpu->arch.qpr[rs] = *((u32*)tmp); break; case FPU_LS_DOUBLE: - vcpu->arch.fpr[rs] = *((u64*)tmp); + VCPU_FPR(vcpu, rs) = *((u64*)tmp); break; } @@ -233,18 +233,18 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (ls_type) { case FPU_LS_SINGLE: - kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp); + kvm_cvt_df(&VCPU_FPR(vcpu, rs), (u32*)tmp); val = *((u32*)tmp); len = sizeof(u32); break; case FPU_LS_SINGLE_LOW: - *((u32*)tmp) = vcpu->arch.fpr[rs]; - val = vcpu->arch.fpr[rs] & 0xffffffff; + *((u32*)tmp) = VCPU_FPR(vcpu, rs); + val = VCPU_FPR(vcpu, rs) & 0xffffffff; len = sizeof(u32); break; case FPU_LS_DOUBLE: - *((u64*)tmp) = vcpu->arch.fpr[rs]; - val = vcpu->arch.fpr[rs]; + *((u64*)tmp) = VCPU_FPR(vcpu, rs); + val = VCPU_FPR(vcpu, rs); len = sizeof(u64); break; default: @@ -301,7 +301,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_DONE; /* put in registers */ - kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]); + kvm_cvt_fd(&tmp[0], &VCPU_FPR(vcpu, rs)); vcpu->arch.qpr[rs] = tmp[1]; dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], @@ -319,7 +319,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 tmp[2]; int len = w ? sizeof(u32) : sizeof(u64); - kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]); + kvm_cvt_df(&VCPU_FPR(vcpu, rs), &tmp[0]); tmp[1] = vcpu->arch.qpr[rs]; r = kvmppc_st(vcpu, &addr, len, tmp, true); @@ -512,7 +512,6 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, u32 *src2, u32 *src3)) { u32 *qpr = vcpu->arch.qpr; - u64 *fpr = vcpu->arch.fpr; u32 ps0_out; u32 ps0_in1, ps0_in2, ps0_in3; u32 ps1_in1, ps1_in2, ps1_in3; @@ -521,20 +520,20 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in1], &ps0_in1); - kvm_cvt_df(&fpr[reg_in2], &ps0_in2); - kvm_cvt_df(&fpr[reg_in3], &ps0_in3); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in3), &ps0_in3); if (scalar & SCALAR_LOW) ps0_in2 = qpr[reg_in2]; - func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); + func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", ps0_in1, ps0_in2, ps0_in3, ps0_out); if (!(scalar & SCALAR_NO_PS0)) - kvm_cvt_fd(&ps0_out, &fpr[reg_out]); + kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out)); /* PS1 */ ps1_in1 = qpr[reg_in1]; @@ -545,7 +544,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, ps1_in2 = ps0_in2; if (!(scalar & SCALAR_NO_PS1)) - func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); + func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); @@ -561,7 +560,6 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, u32 *src2)) { u32 *qpr = vcpu->arch.qpr; - u64 *fpr = vcpu->arch.fpr; u32 ps0_out; u32 ps0_in1, ps0_in2; u32 ps1_out; @@ -571,20 +569,20 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in1], &ps0_in1); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1); if (scalar & SCALAR_LOW) ps0_in2 = qpr[reg_in2]; else - kvm_cvt_df(&fpr[reg_in2], &ps0_in2); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2); - func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2); + func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2); if (!(scalar & SCALAR_NO_PS0)) { dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", ps0_in1, ps0_in2, ps0_out); - kvm_cvt_fd(&ps0_out, &fpr[reg_out]); + kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out)); } /* PS1 */ @@ -594,7 +592,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, if (scalar & SCALAR_HIGH) ps1_in2 = ps0_in2; - func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2); + func(&vcpu->arch.fp.fpscr, &ps1_out, &ps1_in1, &ps1_in2); if (!(scalar & SCALAR_NO_PS1)) { qpr[reg_out] = ps1_out; @@ -612,7 +610,6 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, u32 *dst, u32 *src1)) { u32 *qpr = vcpu->arch.qpr; - u64 *fpr = vcpu->arch.fpr; u32 ps0_out, ps0_in; u32 ps1_in; @@ -620,17 +617,17 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in], &ps0_in); - func(&vcpu->arch.fpscr, &ps0_out, &ps0_in); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in), &ps0_in); + func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in); dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", ps0_in, ps0_out); - kvm_cvt_fd(&ps0_out, &fpr[reg_out]); + kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out)); /* PS1 */ ps1_in = qpr[reg_in]; - func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in); + func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in); dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", ps1_in, qpr[reg_out]); @@ -649,10 +646,10 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int ax_rc = inst_get_field(inst, 21, 25); short full_d = inst_get_field(inst, 16, 31); - u64 *fpr_d = &vcpu->arch.fpr[ax_rd]; - u64 *fpr_a = &vcpu->arch.fpr[ax_ra]; - u64 *fpr_b = &vcpu->arch.fpr[ax_rb]; - u64 *fpr_c = &vcpu->arch.fpr[ax_rc]; + u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd); + u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra); + u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb); + u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc); bool rcomp = (inst & 1) ? true : false; u32 cr = kvmppc_get_cr(vcpu); @@ -674,11 +671,11 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Do we need to clear FE0 / FE1 here? Don't think so. */ #ifdef DEBUG - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) { u32 f; - kvm_cvt_df(&vcpu->arch.fpr[i], &f); + kvm_cvt_df(&VCPU_FPR(vcpu, i), &f); dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", - i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); + i, f, VCPU_FPR(vcpu, i), i, vcpu->arch.qpr[i]); } #endif @@ -764,8 +761,8 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) break; } case OP_4X_PS_NEG: - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); + VCPU_FPR(vcpu, ax_rd) ^= 0x8000000000000000ULL; vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; vcpu->arch.qpr[ax_rd] ^= 0x80000000; break; @@ -775,7 +772,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_4X_PS_MR: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; break; case OP_4X_PS_CMPO1: @@ -784,44 +781,44 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_4X_PS_NABS: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); + VCPU_FPR(vcpu, ax_rd) |= 0x8000000000000000ULL; vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; vcpu->arch.qpr[ax_rd] |= 0x80000000; break; case OP_4X_PS_ABS: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); + VCPU_FPR(vcpu, ax_rd) &= ~0x8000000000000000ULL; vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; vcpu->arch.qpr[ax_rd] &= ~0x80000000; break; case OP_4X_PS_MERGE00: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; - /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ - kvm_cvt_df(&vcpu->arch.fpr[ax_rb], + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra); + /* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */ + kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb), &vcpu->arch.qpr[ax_rd]); break; case OP_4X_PS_MERGE01: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra); vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; break; case OP_4X_PS_MERGE10: WARN_ON(rcomp); - /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ + /* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], - &vcpu->arch.fpr[ax_rd]); - /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ - kvm_cvt_df(&vcpu->arch.fpr[ax_rb], + &VCPU_FPR(vcpu, ax_rd)); + /* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */ + kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb), &vcpu->arch.qpr[ax_rd]); break; case OP_4X_PS_MERGE11: WARN_ON(rcomp); - /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ + /* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], - &vcpu->arch.fpr[ax_rd]); + &VCPU_FPR(vcpu, ax_rd)); vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; break; } @@ -856,7 +853,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) case OP_4A_PS_SUM1: emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc]; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rc); break; case OP_4A_PS_SUM0: emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, @@ -1106,45 +1103,45 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) case 59: switch (inst_get_field(inst, 21, 30)) { case OP_59_FADDS: - fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FSUBS: - fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FDIVS: - fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fdivs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FRES: - fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fres(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FRSQRTES: - fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_frsqrtes(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; } switch (inst_get_field(inst, 26, 30)) { case OP_59_FMULS: - fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); + fpd_fmuls(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FMSUBS: - fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FMADDS: - fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FNMSUBS: - fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fnmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FNMADDS: - fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fnmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; } @@ -1159,12 +1156,12 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_63_MFFS: /* XXX missing CR */ - *fpr_d = vcpu->arch.fpscr; + *fpr_d = vcpu->arch.fp.fpscr; break; case OP_63_MTFSF: /* XXX missing fm bits */ /* XXX missing CR */ - vcpu->arch.fpscr = *fpr_b; + vcpu->arch.fp.fpscr = *fpr_b; break; case OP_63_FCMPU: { @@ -1172,7 +1169,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) u32 cr0_mask = 0xf0000000; u32 cr_shift = inst_get_field(inst, 6, 8) * 4; - fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); + fpd_fcmpu(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b); cr &= ~(cr0_mask >> cr_shift); cr |= (cr & cr0_mask) >> cr_shift; break; @@ -1183,40 +1180,40 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) u32 cr0_mask = 0xf0000000; u32 cr_shift = inst_get_field(inst, 6, 8) * 4; - fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); + fpd_fcmpo(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b); cr &= ~(cr0_mask >> cr_shift); cr |= (cr & cr0_mask) >> cr_shift; break; } case OP_63_FNEG: - fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fneg(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); break; case OP_63_FMR: *fpr_d = *fpr_b; break; case OP_63_FABS: - fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fabs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); break; case OP_63_FCPSGN: - fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fcpsgn(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); break; case OP_63_FDIV: - fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); break; case OP_63_FADD: - fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); break; case OP_63_FSUB: - fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); break; case OP_63_FCTIW: - fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fctiw(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); break; case OP_63_FCTIWZ: - fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fctiwz(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); break; case OP_63_FRSP: - fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_frsp(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_63_FRSQRTE: @@ -1224,39 +1221,39 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) double one = 1.0f; /* fD = sqrt(fB) */ - fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fsqrt(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); /* fD = 1.0f / fD */ - fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); + fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); break; } } switch (inst_get_field(inst, 26, 30)) { case OP_63_FMUL: - fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); + fpd_fmul(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c); break; case OP_63_FSEL: - fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fsel(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; case OP_63_FMSUB: - fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; case OP_63_FMADD: - fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; case OP_63_FNMSUB: - fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fnmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; case OP_63_FNMADD: - fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fnmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; } break; } #ifdef DEBUG - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) { u32 f; - kvm_cvt_df(&vcpu->arch.fpr[i], &f); + kvm_cvt_df(&VCPU_FPR(vcpu, i), &f); dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); } #endif diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d63a91f825d3..2bb425b22461 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -545,12 +545,6 @@ static inline int get_fpr_index(int i) void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) { struct thread_struct *t = ¤t->thread; - u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX - u64 *vcpu_vsx = vcpu->arch.vsr; -#endif - u64 *thread_fpr = &t->fp_state.fpr[0][0]; - int i; /* * VSX instructions can access FP and vector registers, so if @@ -575,24 +569,14 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) */ if (current->thread.regs->msr & MSR_FP) giveup_fpu(current); - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) - vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; - - vcpu->arch.fpscr = t->fp_state.fpscr; - -#ifdef CONFIG_VSX - if (cpu_has_feature(CPU_FTR_VSX)) - for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++) - vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; -#endif + vcpu->arch.fp = t->fp_state; } #ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { if (current->thread.regs->msr & MSR_VEC) giveup_altivec(current); - memcpy(vcpu->arch.vr, t->vr_state.vr, sizeof(vcpu->arch.vr)); - vcpu->arch.vscr = t->vr_state.vscr; + vcpu->arch.vr = t->vr_state; } #endif @@ -640,12 +624,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr) { struct thread_struct *t = ¤t->thread; - u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX - u64 *vcpu_vsx = vcpu->arch.vsr; -#endif - u64 *thread_fpr = &t->fp_state.fpr[0][0]; - int i; /* When we have paired singles, we emulate in software */ if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) @@ -683,13 +661,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, #endif if (msr & MSR_FP) { - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) - thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; -#ifdef CONFIG_VSX - for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++) - thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; -#endif - t->fp_state.fpscr = vcpu->arch.fpscr; + t->fp_state = vcpu->arch.fp; t->fpexc_mode = 0; enable_kernel_fp(); load_fp_state(&t->fp_state); @@ -697,8 +669,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, if (msr & MSR_VEC) { #ifdef CONFIG_ALTIVEC - memcpy(t->vr_state.vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); - t->vr_state.vscr = vcpu->arch.vscr; + t->vr_state = vcpu->arch.vr; t->vrsave = -1; enable_kernel_altivec(); load_vr_state(&t->vr_state); @@ -1118,19 +1089,6 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; -#ifdef CONFIG_VSX - case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { - long int i = id - KVM_REG_PPC_VSR0; - - if (!cpu_has_feature(CPU_FTR_VSX)) { - r = -ENXIO; - break; - } - val->vsxval[0] = vcpu->arch.fpr[i]; - val->vsxval[1] = vcpu->arch.vsr[i]; - break; - } -#endif /* CONFIG_VSX */ default: r = -EINVAL; break; @@ -1149,19 +1107,6 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, to_book3s(vcpu)->hior = set_reg_val(id, *val); to_book3s(vcpu)->hior_explicit = true; break; -#ifdef CONFIG_VSX - case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { - long int i = id - KVM_REG_PPC_VSR0; - - if (!cpu_has_feature(CPU_FTR_VSX)) { - r = -ENXIO; - break; - } - vcpu->arch.fpr[i] = val->vsxval[0]; - vcpu->arch.vsr[i] = val->vsxval[1]; - break; - } -#endif /* CONFIG_VSX */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 53e65a210b9a..0033465ecc3f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -707,9 +707,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) fpexc_mode = current->thread.fpexc_mode; /* Restore guest FPU state to thread */ - memcpy(current->thread.fp_state.fpr, vcpu->arch.fpr, - sizeof(vcpu->arch.fpr)); - current->thread.fp_state.fpscr = vcpu->arch.fpscr; + current->thread.fp_state = vcpu->arch.fp; /* * Since we can't trap on MSR_FP in GS-mode, we consider the guest @@ -745,9 +743,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->fpu_active = 0; /* Save guest FPU state from thread */ - memcpy(vcpu->arch.fpr, current->thread.fp_state.fpr, - sizeof(vcpu->arch.fpr)); - vcpu->arch.fpscr = current->thread.fp_state.fpscr; + vcpu->arch.fp = current->thread.fp_state; /* Restore userspace FPU state from stack */ current->thread.fp_state = fp; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9ae97686e9f4..7ca9e0a80499 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -656,14 +656,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); break; case KVM_MMIO_REG_FPR: - vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; + VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr; break; #ifdef CONFIG_PPC_BOOK3S case KVM_MMIO_REG_QPR: vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; case KVM_MMIO_REG_FQPR: - vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; + VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr; vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; #endif -- cgit v1.2.3 From 595e4f7e697e2e6fb252f6be6a83d5b9460b59a3 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 15 Oct 2013 20:43:04 +1100 Subject: KVM: PPC: Book3S HV: Use load/store_fp_state functions in HV guest entry/exit This modifies kvmppc_load_fp and kvmppc_save_fp to use the generic FP/VSX and VMX load/store functions instead of open-coding the FP/VSX/VMX load/store instructions. Since kvmppc_load/save_fp don't follow C calling conventions, we make them private symbols within book3s_hv_rmhandlers.S. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 2 - arch/powerpc/kvm/book3s_hv_rmhandlers.S | 86 +++++++++------------------------ 2 files changed, 22 insertions(+), 66 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8403f9031d93..5e64c3d2149f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -426,10 +426,8 @@ int main(void) DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr)); - DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fp.fpscr)); #ifdef CONFIG_ALTIVEC DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr)); - DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vr.vscr)); #endif DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 47fd536fb13b..acbd1d6afba0 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1261,7 +1261,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* save FP state */ mr r3, r9 - bl .kvmppc_save_fp + bl kvmppc_save_fp /* Increment yield count if they have a VPA */ ld r8, VCPU_VPA(r9) /* do they have a VPA? */ @@ -1691,7 +1691,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) std r31, VCPU_GPR(R31)(r3) /* save FP state */ - bl .kvmppc_save_fp + bl kvmppc_save_fp /* * Take a nap until a decrementer or external interrupt occurs, @@ -1869,8 +1869,12 @@ kvmppc_read_intr: /* * Save away FP, VMX and VSX registers. * r3 = vcpu pointer + * N.B. r30 and r31 are volatile across this function, + * thus it is not callable from C. */ -_GLOBAL(kvmppc_save_fp) +kvmppc_save_fp: + mflr r30 + mr r31,r3 mfmsr r5 ori r8,r5,MSR_FP #ifdef CONFIG_ALTIVEC @@ -1885,42 +1889,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif mtmsrd r8 isync -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - reg = 0 - .rept 32 - li r6,reg*16+VCPU_FPRS - STXVD2X(reg,R6,R3) - reg = reg + 1 - .endr -FTR_SECTION_ELSE -#endif - reg = 0 - .rept 32 - stfd reg,reg*8+VCPU_FPRS(r3) - reg = reg + 1 - .endr -#ifdef CONFIG_VSX -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) -#endif - mffs fr0 - stfd fr0,VCPU_FPSCR(r3) - + addi r3,r3,VCPU_FPRS + bl .store_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION - reg = 0 - .rept 32 - li r6,reg*16+VCPU_VRS - stvx reg,r6,r3 - reg = reg + 1 - .endr - mfvscr vr0 - li r6,VCPU_VSCR - stvx vr0,r6,r3 + addi r3,r31,VCPU_VRS + bl .store_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif mfspr r6,SPRN_VRSAVE stw r6,VCPU_VRSAVE(r3) + mtlr r30 mtmsrd r5 isync blr @@ -1928,9 +1907,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* * Load up FP, VMX and VSX registers * r4 = vcpu pointer + * N.B. r30 and r31 are volatile across this function, + * thus it is not callable from C. */ - .globl kvmppc_load_fp kvmppc_load_fp: + mflr r30 + mr r31,r4 mfmsr r9 ori r8,r9,MSR_FP #ifdef CONFIG_ALTIVEC @@ -1945,42 +1927,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif mtmsrd r8 isync - lfd fr0,VCPU_FPSCR(r4) - MTFSF_L(fr0) -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - reg = 0 - .rept 32 - li r7,reg*16+VCPU_FPRS - LXVD2X(reg,R7,R4) - reg = reg + 1 - .endr -FTR_SECTION_ELSE -#endif - reg = 0 - .rept 32 - lfd reg,reg*8+VCPU_FPRS(r4) - reg = reg + 1 - .endr -#ifdef CONFIG_VSX -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) -#endif - + addi r3,r4,VCPU_FPRS + bl .load_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION - li r7,VCPU_VSCR - lvx vr0,r7,r4 - mtvscr vr0 - reg = 0 - .rept 32 - li r7,reg*16+VCPU_VRS - lvx reg,r7,r4 - reg = reg + 1 - .endr + addi r3,r31,VCPU_VRS + bl .load_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif lwz r7,VCPU_VRSAVE(r4) mtspr SPRN_VRSAVE,r7 + mtlr r30 + mr r4,r31 blr /* -- cgit v1.2.3 From a7189483f03d4c4b93219ff27a2e0a01716abd21 Mon Sep 17 00:00:00 2001 From: Wang Dongsheng Date: Tue, 17 Dec 2013 16:17:02 +0800 Subject: powerpc/85xx: add sysfs for pw20 state and altivec idle Add a sys interface to enable/diable pw20 state or altivec idle, and control the wait entry time. Enable/Disable interface: 0, disable. 1, enable. /sys/devices/system/cpu/cpuX/pw20_state /sys/devices/system/cpu/cpuX/altivec_idle Set wait time interface:(Nanosecond) /sys/devices/system/cpu/cpuX/pw20_wait_time /sys/devices/system/cpu/cpuX/altivec_idle_wait_time Example: Base on TBfreq is 41MHZ. 1~48(ns): TB[63] 49~97(ns): TB[62] 98~195(ns): TB[61] 196~390(ns): TB[60] 391~780(ns): TB[59] 781~1560(ns): TB[58] ... Signed-off-by: Wang Dongsheng [scottwood@freescale.com: change ifdef] Signed-off-by: Scott Wood --- arch/powerpc/kernel/sysfs.c | 316 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index cad777eb613a..d4a43e64a6a9 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -86,6 +86,304 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E +#define MAX_BIT 63 + +static u64 pw20_wt; +static u64 altivec_idle_wt; + +static unsigned int get_idle_ticks_bit(u64 ns) +{ + u64 cycle; + + if (ns >= 10000) + cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec; + else + cycle = div_u64(ns * tb_ticks_per_usec, 1000); + + if (!cycle) + return 0; + + return ilog2(cycle); +} + +static void do_show_pwrmgtcr0(void *val) +{ + u32 *value = val; + + *value = mfspr(SPRN_PWRMGTCR0); +} + +static ssize_t show_pw20_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + unsigned int cpu = dev->id; + + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + + value &= PWRMGTCR0_PW20_WAIT; + + return sprintf(buf, "%u\n", value ? 1 : 0); +} + +static void do_store_pw20_state(void *val) +{ + u32 *value = val; + u32 pw20_state; + + pw20_state = mfspr(SPRN_PWRMGTCR0); + + if (*value) + pw20_state |= PWRMGTCR0_PW20_WAIT; + else + pw20_state &= ~PWRMGTCR0_PW20_WAIT; + + mtspr(SPRN_PWRMGTCR0, pw20_state); +} + +static ssize_t store_pw20_state(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 value; + unsigned int cpu = dev->id; + + if (kstrtou32(buf, 0, &value)) + return -EINVAL; + + if (value > 1) + return -EINVAL; + + smp_call_function_single(cpu, do_store_pw20_state, &value, 1); + + return count; +} + +static ssize_t show_pw20_wait_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + u64 tb_cycle = 1; + u64 time; + + unsigned int cpu = dev->id; + + if (!pw20_wt) { + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + value = (value & PWRMGTCR0_PW20_ENT) >> + PWRMGTCR0_PW20_ENT_SHIFT; + + tb_cycle = (tb_cycle << (MAX_BIT - value + 1)); + /* convert ms to ns */ + if (tb_ticks_per_usec > 1000) { + time = div_u64(tb_cycle, tb_ticks_per_usec / 1000); + } else { + u32 rem_us; + + time = div_u64_rem(tb_cycle, tb_ticks_per_usec, + &rem_us); + time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec; + } + } else { + time = pw20_wt; + } + + return sprintf(buf, "%llu\n", time > 0 ? time : 0); +} + +static void set_pw20_wait_entry_bit(void *val) +{ + u32 *value = val; + u32 pw20_idle; + + pw20_idle = mfspr(SPRN_PWRMGTCR0); + + /* Set Automatic PW20 Core Idle Count */ + /* clear count */ + pw20_idle &= ~PWRMGTCR0_PW20_ENT; + + /* set count */ + pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT); + + mtspr(SPRN_PWRMGTCR0, pw20_idle); +} + +static ssize_t store_pw20_wait_time(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 entry_bit; + u64 value; + + unsigned int cpu = dev->id; + + if (kstrtou64(buf, 0, &value)) + return -EINVAL; + + if (!value) + return -EINVAL; + + entry_bit = get_idle_ticks_bit(value); + if (entry_bit > MAX_BIT) + return -EINVAL; + + pw20_wt = value; + + smp_call_function_single(cpu, set_pw20_wait_entry_bit, + &entry_bit, 1); + + return count; +} + +static ssize_t show_altivec_idle(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + unsigned int cpu = dev->id; + + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + + value &= PWRMGTCR0_AV_IDLE_PD_EN; + + return sprintf(buf, "%u\n", value ? 1 : 0); +} + +static void do_store_altivec_idle(void *val) +{ + u32 *value = val; + u32 altivec_idle; + + altivec_idle = mfspr(SPRN_PWRMGTCR0); + + if (*value) + altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN; + else + altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN; + + mtspr(SPRN_PWRMGTCR0, altivec_idle); +} + +static ssize_t store_altivec_idle(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 value; + unsigned int cpu = dev->id; + + if (kstrtou32(buf, 0, &value)) + return -EINVAL; + + if (value > 1) + return -EINVAL; + + smp_call_function_single(cpu, do_store_altivec_idle, &value, 1); + + return count; +} + +static ssize_t show_altivec_idle_wait_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + u64 tb_cycle = 1; + u64 time; + + unsigned int cpu = dev->id; + + if (!altivec_idle_wt) { + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + value = (value & PWRMGTCR0_AV_IDLE_CNT) >> + PWRMGTCR0_AV_IDLE_CNT_SHIFT; + + tb_cycle = (tb_cycle << (MAX_BIT - value + 1)); + /* convert ms to ns */ + if (tb_ticks_per_usec > 1000) { + time = div_u64(tb_cycle, tb_ticks_per_usec / 1000); + } else { + u32 rem_us; + + time = div_u64_rem(tb_cycle, tb_ticks_per_usec, + &rem_us); + time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec; + } + } else { + time = altivec_idle_wt; + } + + return sprintf(buf, "%llu\n", time > 0 ? time : 0); +} + +static void set_altivec_idle_wait_entry_bit(void *val) +{ + u32 *value = val; + u32 altivec_idle; + + altivec_idle = mfspr(SPRN_PWRMGTCR0); + + /* Set Automatic AltiVec Idle Count */ + /* clear count */ + altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT; + + /* set count */ + altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT); + + mtspr(SPRN_PWRMGTCR0, altivec_idle); +} + +static ssize_t store_altivec_idle_wait_time(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 entry_bit; + u64 value; + + unsigned int cpu = dev->id; + + if (kstrtou64(buf, 0, &value)) + return -EINVAL; + + if (!value) + return -EINVAL; + + entry_bit = get_idle_ticks_bit(value); + if (entry_bit > MAX_BIT) + return -EINVAL; + + altivec_idle_wt = value; + + smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit, + &entry_bit, 1); + + return count; +} + +/* + * Enable/Disable interface: + * 0, disable. 1, enable. + */ +static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state); +static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle); + +/* + * Set wait time interface:(Nanosecond) + * Example: Base on TBfreq is 41MHZ. + * 1~48(ns): TB[63] + * 49~97(ns): TB[62] + * 98~195(ns): TB[61] + * 196~390(ns): TB[60] + * 391~780(ns): TB[59] + * 781~1560(ns): TB[58] + * ... + */ +static DEVICE_ATTR(pw20_wait_time, 0600, + show_pw20_wait_time, + store_pw20_wait_time); +static DEVICE_ATTR(altivec_idle_wait_time, 0600, + show_altivec_idle_wait_time, + store_altivec_idle_wait_time); +#endif + /* * Enabling PMCs will slow partition context switch times so we only do * it the first time we write to the PMCs. @@ -425,6 +723,15 @@ static void register_cpu_online(unsigned int cpu) device_create_file(s, &dev_attr_pir); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { + device_create_file(s, &dev_attr_pw20_state); + device_create_file(s, &dev_attr_pw20_wait_time); + + device_create_file(s, &dev_attr_altivec_idle); + device_create_file(s, &dev_attr_altivec_idle_wait_time); + } +#endif cacheinfo_cpu_online(cpu); } @@ -497,6 +804,15 @@ static void unregister_cpu_online(unsigned int cpu) device_remove_file(s, &dev_attr_pir); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { + device_remove_file(s, &dev_attr_pw20_state); + device_remove_file(s, &dev_attr_pw20_wait_time); + + device_remove_file(s, &dev_attr_altivec_idle); + device_remove_file(s, &dev_attr_altivec_idle_wait_time); + } +#endif cacheinfo_cpu_offline(cpu); } -- cgit v1.2.3 From 7c732cba3d9312882e82d91d5948261dfd5c8fe6 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:03 +0800 Subject: powerpc/fsl_booke: protect the access to MAS7 The e500v1 doesn't implement the MAS7, so we should avoid to access this register on that implementations. In the current kernel, the access to MAS7 are protected by either CONFIG_PHYS_64BIT or MMU_FTR_BIG_PHYS. Since some code are executed before the code patching, we have to use CONFIG_PHYS_64BIT in these cases. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 2 ++ arch/powerpc/mm/hugetlbpage-book3e.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index f45726a1d963..09921a5197c6 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -82,7 +82,9 @@ _ENTRY(_start); and r19,r3,r18 /* r19 = page offset */ andc r31,r20,r18 /* r31 = page base */ or r31,r31,r19 /* r31 = devtree phys addr */ +#ifdef CONFIG_PHYS_64BIT mfspr r30,SPRN_MAS7 +#endif li r25,0 /* phys kernel start (low) */ li r24,0 /* CPU number */ diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 74551b5e41e5..646c4bffaeba 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -103,7 +103,8 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { mtspr(SPRN_MAS7_MAS3, mas7_3); } else { - mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); + if (mmu_has_feature(MMU_FTR_BIG_PHYS)) + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); } -- cgit v1.2.3 From 99739611e816716d912ae89a4354237fc39745a6 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:04 +0800 Subject: powerpc/fsl_booke: introduce get_phys_addr function Move the codes which translate a effective address to physical address to a separate function. So it can be reused by other code. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 50 +++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 09921a5197c6..196950f29c00 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -65,26 +65,9 @@ _ENTRY(_start); nop /* Translate device tree address to physical, save in r30/r31 */ - mfmsr r16 - mfspr r17,SPRN_PID - rlwinm r17,r17,16,0x3fff0000 /* turn PID into MAS6[SPID] */ - rlwimi r17,r16,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */ - mtspr SPRN_MAS6,r17 - - tlbsx 0,r3 /* must succeed */ - - mfspr r16,SPRN_MAS1 - mfspr r20,SPRN_MAS3 - rlwinm r17,r16,25,0x1f /* r17 = log2(page size) */ - li r18,1024 - slw r18,r18,r17 /* r18 = page size */ - addi r18,r18,-1 - and r19,r3,r18 /* r19 = page offset */ - andc r31,r20,r18 /* r31 = page base */ - or r31,r31,r19 /* r31 = devtree phys addr */ -#ifdef CONFIG_PHYS_64BIT - mfspr r30,SPRN_MAS7 -#endif + bl get_phys_addr + mr r30,r3 + mr r31,r4 li r25,0 /* phys kernel start (low) */ li r24,0 /* CPU number */ @@ -857,6 +840,33 @@ KernelSPE: #endif /* CONFIG_SPE */ +/* + * Translate the effec addr in r3 to phys addr. The phys addr will be put + * into r3(higher 32bit) and r4(lower 32bit) + */ +get_phys_addr: + mfmsr r8 + mfspr r9,SPRN_PID + rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */ + rlwimi r9,r8,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */ + mtspr SPRN_MAS6,r9 + + tlbsx 0,r3 /* must succeed */ + + mfspr r8,SPRN_MAS1 + mfspr r12,SPRN_MAS3 + rlwinm r9,r8,25,0x1f /* r9 = log2(page size) */ + li r10,1024 + slw r10,r10,r9 /* r10 = page size */ + addi r10,r10,-1 + and r11,r3,r10 /* r11 = page offset */ + andc r4,r12,r10 /* r4 = page base */ + or r4,r4,r11 /* r4 = devtree phys addr */ +#ifdef CONFIG_PHYS_64BIT + mfspr r3,SPRN_MAS7 +#endif + blr + /* * Global functions */ -- cgit v1.2.3 From dd189692d40948d6445bbaeb8cb9bf9d15f54dc6 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:06 +0800 Subject: powerpc: enable the relocatable support for the fsl booke 32bit kernel This is based on the codes in the head_44x.S. The difference is that the init tlb size we used is 64M. With this patch we can only load the kernel at address between memstart_addr ~ memstart_addr + 64M. We will fix this restriction in the following patches. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/Kconfig | 2 +- arch/powerpc/kernel/fsl_booke_entry_mapping.S | 2 ++ arch/powerpc/kernel/head_fsl_booke.S | 34 +++++++++++++++++++++++++++ arch/powerpc/mm/fsl_booke_mmu.c | 28 ++++++++++++++++++++++ 4 files changed, 65 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f0a893142cee..4bb52e6488ea 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -884,7 +884,7 @@ config DYNAMIC_MEMSTART config RELOCATABLE bool "Build a relocatable kernel" - depends on ADVANCED_OPTIONS && FLATMEM && 44x + depends on ADVANCED_OPTIONS && FLATMEM && (44x || FSL_BOOKE) select NONSTATIC_KERNEL help This builds a kernel image that is capable of running at the diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S index a92c79be2728..f22e7e44fbf3 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -176,6 +176,8 @@ skpinv: addi r6,r6,1 /* Increment */ /* 7. Jump to KERNELBASE mapping */ lis r6,(KERNELBASE & ~0xfff)@h ori r6,r6,(KERNELBASE & ~0xfff)@l + rlwinm r7,r25,0,0x03ffffff + add r6,r7,r6 #elif defined(ENTRY_MAPPING_KEXEC_SETUP) /* diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 196950f29c00..19bd574bda9d 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -73,6 +73,30 @@ _ENTRY(_start); li r24,0 /* CPU number */ li r23,0 /* phys kernel start (high) */ +#ifdef CONFIG_RELOCATABLE + LOAD_REG_ADDR_PIC(r3, _stext) /* Get our current runtime base */ + + /* Translate _stext address to physical, save in r23/r25 */ + bl get_phys_addr + mr r23,r3 + mr r25,r4 + + /* + * We have the runtime (virutal) address of our base. + * We calculate our shift of offset from a 64M page. + * We could map the 64M page we belong to at PAGE_OFFSET and + * get going from there. + */ + lis r4,KERNELBASE@h + ori r4,r4,KERNELBASE@l + rlwinm r6,r25,0,0x3ffffff /* r6 = PHYS_START % 64M */ + rlwinm r5,r4,0,0x3ffffff /* r5 = KERNELBASE % 64M */ + subf r3,r5,r6 /* r3 = r6 - r5 */ + add r3,r4,r3 /* Required Virtual Address */ + + bl relocate +#endif + /* We try to not make any assumptions about how the boot loader * setup or used the TLBs. We invalidate all mappings from the * boot loader and load a single entry in TLB1[0] to map the @@ -182,6 +206,16 @@ _ENTRY(__early_start) bl early_init +#ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_PHYS_64BIT + mr r3,r23 + mr r4,r25 +#else + mr r3,r25 +#endif + bl relocate_init +#endif + #ifdef CONFIG_DYNAMIC_MEMSTART lis r3,kernstart_addr@ha la r3,kernstart_addr@l(r3) diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 07ba45b0f07c..ce4a1163ddd3 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -241,4 +241,32 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, /* 64M mapped initially according to head_fsl_booke.S */ memblock_set_current_limit(min_t(u64, limit, 0x04000000)); } + +#ifdef CONFIG_RELOCATABLE +notrace void __init relocate_init(phys_addr_t start) +{ + unsigned long base = KERNELBASE; + + /* + * Relocatable kernel support based on processing of dynamic + * relocation entries. + * Compute the virt_phys_offset : + * virt_phys_offset = stext.run - kernstart_addr + * + * stext.run = (KERNELBASE & ~0x3ffffff) + (kernstart_addr & 0x3ffffff) + * When we relocate, we have : + * + * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff) + * + * hence: + * virt_phys_offset = (KERNELBASE & ~0x3ffffff) - + * (kernstart_addr & ~0x3ffffff) + * + */ + kernstart_addr = start; + start &= ~0x3ffffff; + base &= ~0x3ffffff; + virt_phys_offset = base - start; +} +#endif #endif -- cgit v1.2.3 From 78a235efdc42ff363de81fdbc171385e8b86b69b Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:07 +0800 Subject: powerpc/fsl_booke: set the tlb entry for the kernel address in AS1 We use the tlb1 entries to map low mem to the kernel space. In the current code, it assumes that the first tlb entry would cover the kernel image. But this is not true for some special cases, such as when we run a relocatable kernel above the 64M or set CONFIG_KERNEL_START above 64M. So we choose to switch to address space 1 before setting these tlb entries. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 81 ++++++++++++++++++++++++++++++++++++ arch/powerpc/mm/fsl_booke_mmu.c | 2 + arch/powerpc/mm/mmu_decl.h | 2 + 3 files changed, 85 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 19bd574bda9d..75f0223e6d0d 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1156,6 +1156,87 @@ __secondary_hold_acknowledge: .long -1 #endif +/* + * Create a tlb entry with the same effective and physical address as + * the tlb entry used by the current running code. But set the TS to 1. + * Then switch to the address space 1. It will return with the r3 set to + * the ESEL of the new created tlb. + */ +_GLOBAL(switch_to_as1) + mflr r5 + + /* Find a entry not used */ + mfspr r3,SPRN_TLB1CFG + andi. r3,r3,0xfff + mfspr r4,SPRN_PID + rlwinm r4,r4,16,0x3fff0000 /* turn PID into MAS6[SPID] */ + mtspr SPRN_MAS6,r4 +1: lis r4,0x1000 /* Set MAS0(TLBSEL) = 1 */ + addi r3,r3,-1 + rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r4 + tlbre + mfspr r4,SPRN_MAS1 + andis. r4,r4,MAS1_VALID@h + bne 1b + + /* Get the tlb entry used by the current running code */ + bl 0f +0: mflr r4 + tlbsx 0,r4 + + mfspr r4,SPRN_MAS1 + ori r4,r4,MAS1_TS /* Set the TS = 1 */ + mtspr SPRN_MAS1,r4 + + mfspr r4,SPRN_MAS0 + rlwinm r4,r4,0,~MAS0_ESEL_MASK + rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r4 + tlbwe + isync + sync + + mfmsr r4 + ori r4,r4,MSR_IS | MSR_DS + mtspr SPRN_SRR0,r5 + mtspr SPRN_SRR1,r4 + sync + rfi + +/* + * Restore to the address space 0 and also invalidate the tlb entry created + * by switch_to_as1. +*/ +_GLOBAL(restore_to_as0) + mflr r0 + + bl 0f +0: mflr r9 + addi r9,r9,1f - 0b + + mfmsr r7 + li r8,(MSR_IS | MSR_DS) + andc r7,r7,r8 + + mtspr SPRN_SRR0,r9 + mtspr SPRN_SRR1,r7 + sync + rfi + + /* Invalidate the temporary tlb entry for AS1 */ +1: lis r9,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r9,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r9 + tlbre + mfspr r9,SPRN_MAS1 + rlwinm r9,r9,0,2,31 /* Clear MAS1 Valid and IPPROT */ + mtspr SPRN_MAS1,r9 + tlbwe + isync + mtlr r0 + blr + /* * We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index ce4a1163ddd3..1d54f6d35e71 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -222,7 +222,9 @@ void __init adjust_total_lowmem(void) /* adjust lowmem size to __max_low_memory */ ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); + i = switch_to_as1(); __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM); + restore_to_as0(i); pr_info("Memory CAM mapping: "); for (i = 0; i < tlbcam_index - 1; i++) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 83eb5d5f53d5..eefbf7bb4331 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -148,6 +148,8 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(unsigned long top); extern void adjust_total_lowmem(void); +extern int switch_to_as1(void); +extern void restore_to_as0(int esel); #endif extern void loadcam_entry(unsigned int index); -- cgit v1.2.3 From b27652dd2174df1a7e0a7c5f00d1c8e3ed9287a7 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:08 +0800 Subject: powerpc: introduce early_get_first_memblock_info For a relocatable kernel since it can be loaded at any place, there is no any relation between the kernel start addr and the memstart_addr. So we can't calculate the memstart_addr from kernel start addr. And also we can't wait to do the relocation after we get the real memstart_addr from device tree because it is so late. So introduce a new function we can use to get the first memblock address and size in a very early stage (before machine_init). Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/prom.c | 41 ++++++++++++++++++++++++++++++++++++++++- include/linux/of_fdt.h | 1 + 2 files changed, 41 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fa0ad8aafbcc..f58c0d3aaeb4 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -523,6 +523,20 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, return early_init_dt_scan_memory(node, uname, depth, data); } +/* + * For a relocatable kernel, we need to get the memstart_addr first, + * then use it to calculate the virtual kernel start address. This has + * to happen at a very early stage (before machine_init). In this case, + * we just want to get the memstart_address and would not like to mess the + * memblock at this stage. So introduce a variable to skip the memblock_add() + * for this reason. + */ +#ifdef CONFIG_RELOCATABLE +static int add_mem_to_memblock = 1; +#else +#define add_mem_to_memblock 1 +#endif + void __init early_init_dt_add_memory_arch(u64 base, u64 size) { #ifdef CONFIG_PPC64 @@ -543,7 +557,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } /* Add the chunk to the MEMBLOCK list */ - memblock_add(base, size); + if (add_mem_to_memblock) + memblock_add(base, size); } static void __init early_reserve_mem_dt(void) @@ -740,6 +755,30 @@ void __init early_init_devtree(void *params) DBG(" <- early_init_devtree()\n"); } +#ifdef CONFIG_RELOCATABLE +/* + * This function run before early_init_devtree, so we have to init + * initial_boot_params. + */ +void __init early_get_first_memblock_info(void *params, phys_addr_t *size) +{ + /* Setup flat device-tree pointer */ + initial_boot_params = params; + + /* + * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid + * mess the memblock. + */ + add_mem_to_memblock = 0; + of_scan_flat_dt(early_init_dt_scan_root, NULL); + of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + add_mem_to_memblock = 1; + + if (size) + *size = first_memblock_size; +} +#endif + /******* * * New implementation of the OF "find" APIs, return a refcounted diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 0beaee9dac1f..2b77058a7335 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -116,6 +116,7 @@ extern const void *of_flat_dt_match_machine(const void *default_match, extern void unflatten_device_tree(void); extern void unflatten_and_copy_device_tree(void); extern void early_init_devtree(void *); +extern void early_get_first_memblock_info(void *, phys_addr_t *); #else /* CONFIG_OF_FLATTREE */ static inline const char *of_flat_dt_get_machine_name(void) { return NULL; } static inline void unflatten_device_tree(void) {} -- cgit v1.2.3 From 7d2471f9fa85089beb1cb9436ffc28f9e11e518d Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:10 +0800 Subject: powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for relocatable kernel This is always true for a non-relocatable kernel. Otherwise the kernel would get stuck. But for a relocatable kernel, it seems a little complicated. When booting a relocatable kernel, we just align the kernel start addr to 64M and map the PAGE_OFFSET from there. The relocation will base on this virtual address. But if this address is not the same as the memstart_addr, we will have to change the map of PAGE_OFFSET to the real memstart_addr and do another relocation again. Signed-off-by: Kevin Hao [scottwood@freescale.com: make offset long and non-negative in simple case] Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 74 +++++++++++++++++++++++++++++++++--- arch/powerpc/mm/fsl_booke_mmu.c | 42 +++++++++++++++++--- arch/powerpc/mm/mmu_decl.h | 2 +- 3 files changed, 106 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 75f0223e6d0d..b1f7edc3c360 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -81,6 +81,39 @@ _ENTRY(_start); mr r23,r3 mr r25,r4 + bl 0f +0: mflr r8 + addis r3,r8,(is_second_reloc - 0b)@ha + lwz r19,(is_second_reloc - 0b)@l(r3) + + /* Check if this is the second relocation. */ + cmpwi r19,1 + bne 1f + + /* + * For the second relocation, we already get the real memstart_addr + * from device tree. So we will map PAGE_OFFSET to memstart_addr, + * then the virtual address of start kernel should be: + * PAGE_OFFSET + (kernstart_addr - memstart_addr) + * Since the offset between kernstart_addr and memstart_addr should + * never be beyond 1G, so we can just use the lower 32bit of them + * for the calculation. + */ + lis r3,PAGE_OFFSET@h + + addis r4,r8,(kernstart_addr - 0b)@ha + addi r4,r4,(kernstart_addr - 0b)@l + lwz r5,4(r4) + + addis r6,r8,(memstart_addr - 0b)@ha + addi r6,r6,(memstart_addr - 0b)@l + lwz r7,4(r6) + + subf r5,r7,r5 + add r3,r3,r5 + b 2f + +1: /* * We have the runtime (virutal) address of our base. * We calculate our shift of offset from a 64M page. @@ -94,7 +127,14 @@ _ENTRY(_start); subf r3,r5,r6 /* r3 = r6 - r5 */ add r3,r4,r3 /* Required Virtual Address */ - bl relocate +2: bl relocate + + /* + * For the second relocation, we already set the right tlb entries + * for the kernel space, so skip the code in fsl_booke_entry_mapping.S + */ + cmpwi r19,1 + beq set_ivor #endif /* We try to not make any assumptions about how the boot loader @@ -122,6 +162,7 @@ _ENTRY(__early_start) #include "fsl_booke_entry_mapping.S" #undef ENTRY_MAPPING_BOOT_SETUP +set_ivor: /* Establish the interrupt vector offsets */ SET_IVOR(0, CriticalInput); SET_IVOR(1, MachineCheck); @@ -207,11 +248,13 @@ _ENTRY(__early_start) bl early_init #ifdef CONFIG_RELOCATABLE + mr r3,r30 + mr r4,r31 #ifdef CONFIG_PHYS_64BIT - mr r3,r23 - mr r4,r25 + mr r5,r23 + mr r6,r25 #else - mr r3,r25 + mr r5,r25 #endif bl relocate_init #endif @@ -1207,6 +1250,9 @@ _GLOBAL(switch_to_as1) /* * Restore to the address space 0 and also invalidate the tlb entry created * by switch_to_as1. + * r3 - the tlb entry which should be invalidated + * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0) + * r5 - device tree virtual address. If r4 is 0, r5 is ignored. */ _GLOBAL(restore_to_as0) mflr r0 @@ -1215,7 +1261,15 @@ _GLOBAL(restore_to_as0) 0: mflr r9 addi r9,r9,1f - 0b - mfmsr r7 + /* + * We may map the PAGE_OFFSET in AS0 to a different physical address, + * so we need calculate the right jump and device tree address based + * on the offset passed by r4. + */ + add r9,r9,r4 + add r5,r5,r4 + +2: mfmsr r7 li r8,(MSR_IS | MSR_DS) andc r7,r7,r8 @@ -1234,9 +1288,19 @@ _GLOBAL(restore_to_as0) mtspr SPRN_MAS1,r9 tlbwe isync + + cmpwi r4,0 + bne 3f mtlr r0 blr + /* + * The PAGE_OFFSET will map to a different physical address, + * jump to _start to do another relocation again. + */ +3: mr r3,r5 + bl _start + /* * We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index ca956c83e3a2..95deb9fdf92f 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -231,7 +231,7 @@ void __init adjust_total_lowmem(void) i = switch_to_as1(); __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM); - restore_to_as0(i); + restore_to_as0(i, 0, 0); pr_info("Memory CAM mapping: "); for (i = 0; i < tlbcam_index - 1; i++) @@ -252,17 +252,25 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, } #ifdef CONFIG_RELOCATABLE -notrace void __init relocate_init(phys_addr_t start) +int __initdata is_second_reloc; +notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) { unsigned long base = KERNELBASE; + kernstart_addr = start; + if (is_second_reloc) { + virt_phys_offset = PAGE_OFFSET - memstart_addr; + return; + } + /* * Relocatable kernel support based on processing of dynamic - * relocation entries. - * Compute the virt_phys_offset : + * relocation entries. Before we get the real memstart_addr, + * We will compute the virt_phys_offset like this: * virt_phys_offset = stext.run - kernstart_addr * - * stext.run = (KERNELBASE & ~0x3ffffff) + (kernstart_addr & 0x3ffffff) + * stext.run = (KERNELBASE & ~0x3ffffff) + + * (kernstart_addr & 0x3ffffff) * When we relocate, we have : * * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff) @@ -272,10 +280,32 @@ notrace void __init relocate_init(phys_addr_t start) * (kernstart_addr & ~0x3ffffff) * */ - kernstart_addr = start; start &= ~0x3ffffff; base &= ~0x3ffffff; virt_phys_offset = base - start; + early_get_first_memblock_info(__va(dt_ptr), NULL); + /* + * We now get the memstart_addr, then we should check if this + * address is the same as what the PAGE_OFFSET map to now. If + * not we have to change the map of PAGE_OFFSET to memstart_addr + * and do a second relocation. + */ + if (start != memstart_addr) { + int n; + long offset = start - memstart_addr; + + is_second_reloc = 1; + n = switch_to_as1(); + /* map a 64M area for the second relocation */ + if (memstart_addr > start) + map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM); + else + map_mem_in_cams_addr(start, PAGE_OFFSET + offset, + 0x4000000, CONFIG_LOWMEM_CAM_NUM); + restore_to_as0(n, offset, __va(dt_ptr)); + /* We should never reach here */ + panic("Relocation error"); + } } #endif #endif diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index eefbf7bb4331..91da910210cb 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -149,7 +149,7 @@ extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(unsigned long top); extern void adjust_total_lowmem(void); extern int switch_to_as1(void); -extern void restore_to_as0(int esel); +extern void restore_to_as0(int esel, int offset, void *dt_ptr); #endif extern void loadcam_entry(unsigned int index); -- cgit v1.2.3 From 0be7d969b0efef085ed6497d462ba16a875ca737 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:11 +0800 Subject: powerpc/fsl_booke: smp support for booting a relocatable kernel above 64M When booting above the 64M for a secondary cpu, we also face the same issue as the boot cpu that the PAGE_OFFSET map two different physical address for the init tlb and the final map. So we have to use switch_to_as1/restore_to_as0 between the conversion of these two maps. When restoring to as0 for a secondary cpu, we only need to return to the caller. So add a new parameter for function restore_to_as0 for this purpose. Use LOAD_REG_ADDR_PIC to get the address of variables which may be used before we set the final map in cams for the secondary cpu. Move the setting of cams a bit earlier in order to avoid the unnecessary using of LOAD_REG_ADDR_PIC. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 41 ++++++++++++++++++++++++------------ arch/powerpc/mm/fsl_booke_mmu.c | 4 ++-- arch/powerpc/mm/mmu_decl.h | 2 +- arch/powerpc/mm/tlb_nohash_low.S | 4 +++- 4 files changed, 34 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index b1f7edc3c360..b497188a94a1 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -216,8 +216,7 @@ set_ivor: /* Check to see if we're the second processor, and jump * to the secondary_start code if so */ - lis r24, boot_cpuid@h - ori r24, r24, boot_cpuid@l + LOAD_REG_ADDR_PIC(r24, boot_cpuid) lwz r24, 0(r24) cmpwi r24, -1 mfspr r24,SPRN_PIR @@ -1146,24 +1145,36 @@ _GLOBAL(__flush_disable_L1) /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start __secondary_start: - lis r3,__secondary_hold_acknowledge@h - ori r3,r3,__secondary_hold_acknowledge@l - stw r24,0(r3) - - li r3,0 - mr r4,r24 /* Why? */ - bl call_setup_cpu - - lis r3,tlbcam_index@ha - lwz r3,tlbcam_index@l(r3) + LOAD_REG_ADDR_PIC(r3, tlbcam_index) + lwz r3,0(r3) mtctr r3 li r26,0 /* r26 safe? */ + bl switch_to_as1 + mr r27,r3 /* tlb entry */ /* Load each CAM entry */ 1: mr r3,r26 bl loadcam_entry addi r26,r26,1 bdnz 1b + mr r3,r27 /* tlb entry */ + LOAD_REG_ADDR_PIC(r4, memstart_addr) + lwz r4,0(r4) + mr r5,r25 /* phys kernel start */ + rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */ + subf r4,r5,r4 /* memstart_addr - phys kernel start */ + li r5,0 /* no device tree */ + li r6,0 /* not boot cpu */ + bl restore_to_as0 + + + lis r3,__secondary_hold_acknowledge@h + ori r3,r3,__secondary_hold_acknowledge@l + stw r24,0(r3) + + li r3,0 + mr r4,r24 /* Why? */ + bl call_setup_cpu /* get current_thread_info and current */ lis r1,secondary_ti@ha @@ -1253,6 +1264,7 @@ _GLOBAL(switch_to_as1) * r3 - the tlb entry which should be invalidated * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0) * r5 - device tree virtual address. If r4 is 0, r5 is ignored. + * r6 - boot cpu */ _GLOBAL(restore_to_as0) mflr r0 @@ -1268,6 +1280,7 @@ _GLOBAL(restore_to_as0) */ add r9,r9,r4 add r5,r5,r4 + add r0,r0,r4 2: mfmsr r7 li r8,(MSR_IS | MSR_DS) @@ -1290,7 +1303,9 @@ _GLOBAL(restore_to_as0) isync cmpwi r4,0 - bne 3f + cmpwi cr1,r6,0 + cror eq,4*cr1+eq,eq + bne 3f /* offset != 0 && is_boot_cpu */ mtlr r0 blr diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 95deb9fdf92f..a68671c18ad4 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -231,7 +231,7 @@ void __init adjust_total_lowmem(void) i = switch_to_as1(); __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM); - restore_to_as0(i, 0, 0); + restore_to_as0(i, 0, 0, 1); pr_info("Memory CAM mapping: "); for (i = 0; i < tlbcam_index - 1; i++) @@ -302,7 +302,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) else map_mem_in_cams_addr(start, PAGE_OFFSET + offset, 0x4000000, CONFIG_LOWMEM_CAM_NUM); - restore_to_as0(n, offset, __va(dt_ptr)); + restore_to_as0(n, offset, __va(dt_ptr), 1); /* We should never reach here */ panic("Relocation error"); } diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 91da910210cb..9615d82919b8 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -149,7 +149,7 @@ extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(unsigned long top); extern void adjust_total_lowmem(void); extern int switch_to_as1(void); -extern void restore_to_as0(int esel, int offset, void *dt_ptr); +extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu); #endif extern void loadcam_entry(unsigned int index); diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 626ad081639f..43ff3c797fbf 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -402,7 +402,9 @@ _GLOBAL(set_context) * Load TLBCAM[index] entry in to the L2 CAM MMU */ _GLOBAL(loadcam_entry) - LOAD_REG_ADDR(r4, TLBCAM) + mflr r5 + LOAD_REG_ADDR_PIC(r4, TLBCAM) + mtlr r5 mulli r5,r3,TLBCAM_SIZE add r3,r5,r4 lwz r4,TLBCAM_MAS0(r3) -- cgit v1.2.3 From 28efc35fe68dacbddc4b12c2fa8f2df1593a4ad3 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 11 Oct 2013 19:22:38 -0500 Subject: powerpc/e6500: TLB miss handler with hardware tablewalk support There are a few things that make the existing hw tablewalk handlers unsuitable for e6500: - Indirect entries go in TLB1 (though the resulting direct entries go in TLB0). - It has threads, but no "tlbsrx." -- so we need a spinlock and a normal "tlbsx". Because we need this lock, hardware tablewalk is mandatory on e6500 unless we want to add spinlock+tlbsx to the normal bolted TLB miss handler. - TLB1 has no HES (nor next-victim hint) so we need software round robin (TODO: integrate this round robin data with hugetlb/KVM) - The existing tablewalk handlers map half of a page table at a time, because IBM hardware has a fixed 1MiB indirect page size. e6500 has variable size indirect entries, with a minimum of 2MiB. So we can't do the half-page indirect mapping, and even if we could it would be less efficient than mapping the full page. - Like on e5500, the linear mapping is bolted, so we don't need the overhead of supporting nested tlb misses. Note that hardware tablewalk does not work in rev1 of e6500. We do not expect to support e6500 rev1 in mainline Linux. Signed-off-by: Scott Wood Cc: Mihai Caraman --- arch/powerpc/include/asm/mmu-book3e.h | 13 +++ arch/powerpc/include/asm/mmu.h | 21 +++-- arch/powerpc/include/asm/paca.h | 6 ++ arch/powerpc/kernel/asm-offsets.c | 9 ++ arch/powerpc/kernel/paca.c | 5 + arch/powerpc/kernel/setup_64.c | 31 ++++++ arch/powerpc/mm/fsl_booke_mmu.c | 7 ++ arch/powerpc/mm/mem.c | 6 ++ arch/powerpc/mm/tlb_low_64e.S | 171 ++++++++++++++++++++++++++++++++++ arch/powerpc/mm/tlb_nohash.c | 93 ++++++++++++------ 10 files changed, 326 insertions(+), 36 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 936db360790a..89b785d16846 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -286,8 +286,21 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) extern int mmu_linear_psize; extern int mmu_vmemmap_psize; +struct tlb_core_data { + /* For software way selection, as on Freescale TLB1 */ + u8 esel_next, esel_max, esel_first; + + /* Per-core spinlock for e6500 TLB handlers (no tlbsrx.) */ + u8 lock; +}; + #ifdef CONFIG_PPC64 extern unsigned long linear_map_top; +extern int book3e_htw_mode; + +#define PPC_HTW_NONE 0 +#define PPC_HTW_IBM 1 +#define PPC_HTW_E6500 2 /* * 64-bit booke platforms don't load the tlb in the tlb miss handler code. diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 691fd8aca939..f8d1d6dcf7db 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -180,16 +180,17 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ #define MMU_PAGE_256K 4 #define MMU_PAGE_1M 5 -#define MMU_PAGE_4M 6 -#define MMU_PAGE_8M 7 -#define MMU_PAGE_16M 8 -#define MMU_PAGE_64M 9 -#define MMU_PAGE_256M 10 -#define MMU_PAGE_1G 11 -#define MMU_PAGE_16G 12 -#define MMU_PAGE_64G 13 - -#define MMU_PAGE_COUNT 14 +#define MMU_PAGE_2M 6 +#define MMU_PAGE_4M 7 +#define MMU_PAGE_8M 8 +#define MMU_PAGE_16M 9 +#define MMU_PAGE_64M 10 +#define MMU_PAGE_256M 11 +#define MMU_PAGE_1G 12 +#define MMU_PAGE_16G 13 +#define MMU_PAGE_64G 14 + +#define MMU_PAGE_COUNT 15 #if defined(CONFIG_PPC_STD_MMU_64) /* 64-bit classic hash table MMU */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index c3523d1dda58..e81731c62a7f 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -113,6 +113,10 @@ struct paca_struct { /* Keep pgd in the same cacheline as the start of extlb */ pgd_t *pgd __attribute__((aligned(0x80))); /* Current PGD */ pgd_t *kernel_pgd; /* Kernel PGD */ + + /* Shared by all threads of a core -- points to tcd of first thread */ + struct tlb_core_data *tcd_ptr; + /* We can have up to 3 levels of reentrancy in the TLB miss handler */ u64 extlb[3][EX_TLB_SIZE / sizeof(u64)]; u64 exmc[8]; /* used for machine checks */ @@ -123,6 +127,8 @@ struct paca_struct { void *mc_kstack; void *crit_kstack; void *dbg_kstack; + + struct tlb_core_data tcd; #endif /* CONFIG_PPC_BOOK3E */ mm_context_t context; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 41a283956a29..ed8d68ce71f3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -203,6 +203,15 @@ int main(void) DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack)); DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack)); DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack)); + DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr)); + + DEFINE(TCD_ESEL_NEXT, + offsetof(struct tlb_core_data, esel_next)); + DEFINE(TCD_ESEL_MAX, + offsetof(struct tlb_core_data, esel_max)); + DEFINE(TCD_ESEL_FIRST, + offsetof(struct tlb_core_data, esel_first)); + DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock)); #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_STD_MMU_64 diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 623c356fe34f..bf0aada02fe4 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -160,6 +160,11 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #ifdef CONFIG_PPC_STD_MMU_64 new_paca->slb_shadow_ptr = init_slb_shadow(cpu); #endif /* CONFIG_PPC_STD_MMU_64 */ + +#ifdef CONFIG_PPC_BOOK3E + /* For now -- if we have threads this will be adjusted later */ + new_paca->tcd_ptr = &new_paca->tcd; +#endif } /* Put the paca pointer into r13 and SPRG_PACA */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2232aff66059..1ce9b87d7df8 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -97,6 +97,36 @@ int dcache_bsize; int icache_bsize; int ucache_bsize; +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +static void setup_tlb_core_data(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + int first = cpu_first_thread_sibling(cpu); + + paca[cpu].tcd_ptr = &paca[first].tcd; + + /* + * If we have threads, we need either tlbsrx. + * or e6500 tablewalk mode, or else TLB handlers + * will be racy and could produce duplicate entries. + */ + if (smt_enabled_at_boot >= 2 && + !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && + book3e_htw_mode != PPC_HTW_E6500) { + /* Should we panic instead? */ + WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n", + __func__); + } + } +} +#else +static void setup_tlb_core_data(void) +{ +} +#endif + #ifdef CONFIG_SMP static char *smt_enabled_cmdline; @@ -445,6 +475,7 @@ void __init setup_system(void) smp_setup_cpu_maps(); check_smt_enabled(); + setup_tlb_core_data(); #ifdef CONFIG_SMP /* Release secondary cpus out of their spinloops at 0x60 now that diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index a68671c18ad4..94cd728166d3 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -191,6 +192,12 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, } tlbcam_index = i; +#ifdef CONFIG_PPC64 + get_paca()->tcd.esel_next = i; + get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + get_paca()->tcd.esel_first = i; +#endif + return amount_mapped; } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3fa93dc7fe75..94448cd4b444 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -307,6 +307,12 @@ static void __init register_page_bootmem_info(void) void __init mem_init(void) { + /* + * book3s is limited to 16 page sizes due to encoding this in + * a 4-bit field for slices. + */ + BUILD_BUG_ON(MMU_PAGE_COUNT > 16); + #ifdef CONFIG_SWIOTLB swiotlb_init(0); #endif diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index b4113bf86353..75f5d2777f61 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -239,6 +239,177 @@ itlb_miss_fault_bolted: beq tlb_miss_common_bolted b itlb_miss_kernel_bolted +/* + * TLB miss handling for e6500 and derivatives, using hardware tablewalk. + * + * Linear mapping is bolted: no virtual page table or nested TLB misses + * Indirect entries in TLB1, hardware loads resulting direct entries + * into TLB0 + * No HES or NV hint on TLB1, so we need to do software round-robin + * No tlbsrx. so we need a spinlock, and we have to deal + * with MAS-damage caused by tlbsx + * 4K pages only + */ + + START_EXCEPTION(instruction_tlb_miss_e6500) + tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0 + + ld r11,PACA_TCD_PTR(r13) + srdi. r15,r16,60 /* get region */ + ori r16,r16,1 + + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne tlb_miss_kernel_e6500 /* user/kernel test */ + + b tlb_miss_common_e6500 + + START_EXCEPTION(data_tlb_miss_e6500) + tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR + + ld r11,PACA_TCD_PTR(r13) + srdi. r15,r16,60 /* get region */ + rldicr r16,r16,0,62 + + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne tlb_miss_kernel_e6500 /* user vs kernel check */ + +/* + * This is the guts of the TLB miss handler for e6500 and derivatives. + * We are entered with: + * + * r16 = page of faulting address (low bit 0 if data, 1 if instruction) + * r15 = crap (free to use) + * r14 = page table base + * r13 = PACA + * r11 = tlb_per_core ptr + * r10 = crap (free to use) + */ +tlb_miss_common_e6500: + /* + * Search if we already have an indirect entry for that virtual + * address, and if we do, bail out. + * + * MAS6:IND should be already set based on MAS4 + */ + addi r10,r11,TCD_LOCK +1: lbarx r15,0,r10 + cmpdi r15,0 + bne 2f + li r15,1 + stbcx. r15,0,r10 + bne 1b + .subsection 1 +2: lbz r15,0(r10) + cmpdi r15,0 + bne 2b + b 1b + .previous + + mfspr r15,SPRN_MAS2 + + tlbsx 0,r16 + mfspr r10,SPRN_MAS1 + andis. r10,r10,MAS1_VALID@h + bne tlb_miss_done_e6500 + + /* Undo MAS-damage from the tlbsx */ + mfspr r10,SPRN_MAS1 + oris r10,r10,MAS1_VALID@h + mtspr SPRN_MAS1,r10 + mtspr SPRN_MAS2,r15 + + /* Now, we need to walk the page tables. First check if we are in + * range. + */ + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + bne- tlb_miss_fault_e6500 + + rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 + cmpldi cr0,r14,0 + clrrdi r15,r15,3 + beq- tlb_miss_fault_e6500 /* No PGDIR, bail */ + ldx r14,r14,r15 /* grab pgd entry */ + + rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 + clrrdi r15,r15,3 + cmpdi cr0,r14,0 + bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */ + ldx r14,r14,r15 /* grab pud entry */ + + rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 + clrrdi r15,r15,3 + cmpdi cr0,r14,0 + bge tlb_miss_fault_e6500 + ldx r14,r14,r15 /* Grab pmd entry */ + + mfspr r10,SPRN_MAS0 + cmpdi cr0,r14,0 + bge tlb_miss_fault_e6500 + + /* Now we build the MAS for a 2M indirect page: + * + * MAS 0 : ESEL needs to be filled by software round-robin + * MAS 1 : Fully set up + * - PID already updated by caller if necessary + * - TSIZE for now is base ind page size always + * - TID already cleared if necessary + * MAS 2 : Default not 2M-aligned, need to be redone + * MAS 3+7 : Needs to be done + */ + + ori r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) + mtspr SPRN_MAS7_MAS3,r14 + + clrrdi r15,r16,21 /* make EA 2M-aligned */ + mtspr SPRN_MAS2,r15 + + lbz r15,TCD_ESEL_NEXT(r11) + lbz r16,TCD_ESEL_MAX(r11) + lbz r14,TCD_ESEL_FIRST(r11) + rlwimi r10,r15,16,0x00ff0000 /* insert esel_next into MAS0 */ + addi r15,r15,1 /* increment esel_next */ + mtspr SPRN_MAS0,r10 + cmpw r15,r16 + iseleq r15,r14,r15 /* if next == last use first */ + stb r15,TCD_ESEL_NEXT(r11) + + tlbwe + +tlb_miss_done_e6500: + .macro tlb_unlock_e6500 + li r15,0 + isync + stb r15,TCD_LOCK(r11) + .endm + + tlb_unlock_e6500 + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) + tlb_epilog_bolted + rfi + +tlb_miss_kernel_e6500: + mfspr r10,SPRN_MAS1 + ld r14,PACA_KERNELPGD(r13) + cmpldi cr0,r15,8 /* Check for vmalloc region */ + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 + beq+ tlb_miss_common_e6500 + +tlb_miss_fault_e6500: + tlb_unlock_e6500 + /* We need to check if it was an instruction miss */ + andi. r16,r16,1 + bne itlb_miss_fault_e6500 +dtlb_miss_fault_e6500: + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_data_storage_book3e +itlb_miss_fault_e6500: + TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_instruction_storage_book3e + + /********************************************************************** * * * TLB miss handling for Book3E with TLB reservation and HES support * diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 8805b7b87dc6..735839b74dc5 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -58,6 +59,10 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .shift = 12, .enc = BOOK3E_PAGESZ_4K, }, + [MMU_PAGE_2M] = { + .shift = 21, + .enc = BOOK3E_PAGESZ_2M, + }, [MMU_PAGE_4M] = { .shift = 22, .enc = BOOK3E_PAGESZ_4M, @@ -136,7 +141,7 @@ static inline int mmu_get_tsize(int psize) int mmu_linear_psize; /* Page size used for the linear mapping */ int mmu_pte_psize; /* Page size used for PTE pages */ int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ -int book3e_htw_enabled; /* Is HW tablewalk enabled ? */ +int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ unsigned long linear_map_top; /* Top of linear mapping */ #endif /* CONFIG_PPC64 */ @@ -377,7 +382,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) { int tsize = mmu_psize_defs[mmu_pte_psize].enc; - if (book3e_htw_enabled) { + if (book3e_htw_mode != PPC_HTW_NONE) { unsigned long start = address & PMD_MASK; unsigned long end = address + PMD_SIZE; unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; @@ -430,7 +435,7 @@ static void setup_page_sizes(void) def = &mmu_psize_defs[psize]; shift = def->shift; - if (shift == 0) + if (shift == 0 || shift & 1) continue; /* adjust to be in terms of 4^shift Kb */ @@ -440,21 +445,40 @@ static void setup_page_sizes(void) def->flags |= MMU_PAGE_SIZE_DIRECT; } - goto no_indirect; + goto out; } if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) { - u32 tlb1ps = mfspr(SPRN_TLB1PS); + u32 tlb1cfg, tlb1ps; + + tlb0cfg = mfspr(SPRN_TLB0CFG); + tlb1cfg = mfspr(SPRN_TLB1CFG); + tlb1ps = mfspr(SPRN_TLB1PS); + eptcfg = mfspr(SPRN_EPTCFG); + + if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT)) + book3e_htw_mode = PPC_HTW_E6500; + + /* + * We expect 4K subpage size and unrestricted indirect size. + * The lack of a restriction on indirect size is a Freescale + * extension, indicated by PSn = 0 but SPSn != 0. + */ + if (eptcfg != 2) + book3e_htw_mode = PPC_HTW_NONE; for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { struct mmu_psize_def *def = &mmu_psize_defs[psize]; if (tlb1ps & (1U << (def->shift - 10))) { def->flags |= MMU_PAGE_SIZE_DIRECT; + + if (book3e_htw_mode && psize == MMU_PAGE_2M) + def->flags |= MMU_PAGE_SIZE_INDIRECT; } } - goto no_indirect; + goto out; } #endif @@ -471,8 +495,11 @@ static void setup_page_sizes(void) } /* Indirect page sizes supported ? */ - if ((tlb0cfg & TLBnCFG_IND) == 0) - goto no_indirect; + if ((tlb0cfg & TLBnCFG_IND) == 0 || + (tlb0cfg & TLBnCFG_PT) == 0) + goto out; + + book3e_htw_mode = PPC_HTW_IBM; /* Now, we only deal with one IND page size for each * direct size. Hopefully all implementations today are @@ -497,8 +524,8 @@ static void setup_page_sizes(void) def->ind = ps + 10; } } - no_indirect: +out: /* Cleanup array and print summary */ pr_info("MMU: Supported page sizes\n"); for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { @@ -520,23 +547,23 @@ static void setup_page_sizes(void) static void setup_mmu_htw(void) { - /* Check if HW tablewalk is present, and if yes, enable it by: - * - * - patching the TLB miss handlers to branch to the - * one dedicates to it - * - * - setting the global book3e_htw_enabled - */ - unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); + /* + * If we want to use HW tablewalk, enable it by patching the TLB miss + * handlers to branch to the one dedicated to it. + */ - if ((tlb0cfg & TLBnCFG_IND) && - (tlb0cfg & TLBnCFG_PT)) { + switch (book3e_htw_mode) { + case PPC_HTW_IBM: patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e); patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e); - book3e_htw_enabled = 1; + break; + case PPC_HTW_E6500: + patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e); + patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e); + break; } pr_info("MMU: Book3E HW tablewalk %s\n", - book3e_htw_enabled ? "enabled" : "not supported"); + book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported"); } /* @@ -576,8 +603,16 @@ static void __early_init_mmu(int boot_cpu) /* Set MAS4 based on page table setting */ mas4 = 0x4 << MAS4_WIMGED_SHIFT; - if (book3e_htw_enabled) { - mas4 |= mas4 | MAS4_INDD; + switch (book3e_htw_mode) { + case PPC_HTW_E6500: + mas4 |= MAS4_INDD; + mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT; + mas4 |= MAS4_TLBSELD(1); + mmu_pte_psize = MMU_PAGE_2M; + break; + + case PPC_HTW_IBM: + mas4 |= MAS4_INDD; #ifdef CONFIG_PPC_64K_PAGES mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; mmu_pte_psize = MMU_PAGE_256M; @@ -585,13 +620,16 @@ static void __early_init_mmu(int boot_cpu) mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; mmu_pte_psize = MMU_PAGE_1M; #endif - } else { + break; + + case PPC_HTW_NONE: #ifdef CONFIG_PPC_64K_PAGES mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; #else mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; #endif mmu_pte_psize = mmu_virtual_psize; + break; } mtspr(SPRN_MAS4, mas4); @@ -611,8 +649,11 @@ static void __early_init_mmu(int boot_cpu) /* limit memory so we dont have linear faults */ memblock_enforce_memory_limit(linear_map_top); - patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); - patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); + if (book3e_htw_mode == PPC_HTW_NONE) { + patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); + patch_exception(0x1e0, + exc_instruction_tlb_miss_bolted_book3e); + } } #endif -- cgit v1.2.3 From ed2ddc56e758d516c5699260ada4d68434dfe1dc Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Thu, 14 Mar 2013 16:55:11 +0200 Subject: powerpc: Replaced tlbilx with tlbwe in the initialization code On Freescale e6500 cores EPCR[DGTMI] controls whether guest supervisor state can execute TLB management instructions. If EPCR[DGTMI]=0 tlbwe and tlbilx are allowed to execute normally in the guest state. A hypervisor may choose to virtualize TLB1 and for this purpose it may use IPROT to protect the entries for being invalidated by the guest. However, because tlbwe and tlbilx execution in the guest state are sharing the same bit, it is not possible to have a scenario where tlbwe is allowed to be executed in guest state and tlbilx traps. When guest TLB management instructions are allowed to be executed in guest state the guest cannot use tlbilx to invalidate TLB1 guest entries. Linux is using tlbilx in the boot code to invalidate the temporary entries it creates when initializing the MMU. The patch is replacing the usage of tlbilx in initialization code with tlbwe with VALID bit cleared. Linux is also using tlbilx in other contexts (like huge pages or indirect entries) but removing the tlbilx from the initialization code offers the possibility to have scenarios under hypervisor which are not using huge pages or indirect entries. Signed-off-by: Diana Craciun Signed-off-by: Scott Wood --- arch/powerpc/kernel/exceptions-64e.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4d5a0b1034e8..063b65dd4f27 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1068,12 +1068,9 @@ skpinv: addi r6,r6,1 /* Increment */ mtspr SPRN_MAS0,r3 tlbre mfspr r6,SPRN_MAS1 - rlwinm r6,r6,0,2,0 /* clear IPROT */ + rlwinm r6,r6,0,2,31 /* clear IPROT and VALID */ mtspr SPRN_MAS1,r6 tlbwe - - /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,R0) sync isync @@ -1127,12 +1124,9 @@ skpinv: addi r6,r6,1 /* Increment */ mtspr SPRN_MAS0,r4 tlbre mfspr r5,SPRN_MAS1 - rlwinm r5,r5,0,2,0 /* clear IPROT */ + rlwinm r5,r5,0,2,31 /* clear IPROT and VALID */ mtspr SPRN_MAS1,r5 tlbwe - - /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,R0) sync isync -- cgit v1.2.3 From 7d71d5b2e80623242b0c0823ce6cb635d089c8b2 Mon Sep 17 00:00:00 2001 From: Gerhard Sittig Date: Sat, 30 Nov 2013 23:51:27 +0100 Subject: clk: mpc5xxx: switch to COMMON_CLK, retire PPC_CLOCK the setup before the change was - arch/powerpc/Kconfig had the PPC_CLOCK option, off by default - depending on the PPC_CLOCK option the arch/powerpc/kernel/clock.c file was built, which implements the clk.h API but always returns -ENOSYS unless a platform registers specific callbacks - the MPC52xx platform selected PPC_CLOCK but did not register any callbacks, thus all clk.h API calls keep resulting in -ENOSYS errors (which is OK, all peripheral drivers deal with the situation) - the MPC512x platform selected PPC_CLOCK and registered specific callbacks implemented in arch/powerpc/platforms/512x/clock.c, thus provided real support for the clock API - no other powerpc platform did select PPC_CLOCK the situation after the change is - the MPC512x platform implements the COMMON_CLK interface, and thus the PPC_CLOCK approach in arch/powerpc/platforms/512x/clock.c has become obsolete - the MPC52xx platform still lacks genuine support for the clk.h API while this is not a change against the previous situation (the error code returned from COMMON_CLK stubs differs but every call still results in an error) - with all references gone, the arch/powerpc/kernel/clock.c wrapper and the PPC_CLOCK option have become obsolete, as did the clk_interface.h header file the switch from PPC_CLOCK to COMMON_CLK is done for all platforms within the same commit such that multiplatform kernels (the combination of 512x and 52xx within one executable) keep working Cc: Mike Turquette Cc: Anatolij Gustschin Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Gerhard Sittig Signed-off-by: Anatolij Gustschin --- arch/powerpc/Kconfig | 5 - arch/powerpc/include/asm/clk_interface.h | 20 - arch/powerpc/kernel/Makefile | 1 - arch/powerpc/kernel/clock.c | 82 ---- arch/powerpc/platforms/512x/Kconfig | 2 +- arch/powerpc/platforms/512x/Makefile | 1 - arch/powerpc/platforms/512x/clock.c | 754 ------------------------------- arch/powerpc/platforms/52xx/Kconfig | 2 +- 8 files changed, 2 insertions(+), 865 deletions(-) delete mode 100644 arch/powerpc/include/asm/clk_interface.h delete mode 100644 arch/powerpc/kernel/clock.c delete mode 100644 arch/powerpc/platforms/512x/clock.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f0a893142cee..b131b26ca45a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1040,11 +1040,6 @@ config KEYS_COMPAT source "crypto/Kconfig" -config PPC_CLOCK - bool - default n - select HAVE_CLK - config PPC_LIB_RHEAP bool diff --git a/arch/powerpc/include/asm/clk_interface.h b/arch/powerpc/include/asm/clk_interface.h deleted file mode 100644 index ab1882c1e176..000000000000 --- a/arch/powerpc/include/asm/clk_interface.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef __ASM_POWERPC_CLK_INTERFACE_H -#define __ASM_POWERPC_CLK_INTERFACE_H - -#include - -struct clk_interface { - struct clk* (*clk_get) (struct device *dev, const char *id); - int (*clk_enable) (struct clk *clk); - void (*clk_disable) (struct clk *clk); - unsigned long (*clk_get_rate) (struct clk *clk); - void (*clk_put) (struct clk *clk); - long (*clk_round_rate) (struct clk *clk, unsigned long rate); - int (*clk_set_rate) (struct clk *clk, unsigned long rate); - int (*clk_set_parent) (struct clk *clk, struct clk *parent); - struct clk* (*clk_get_parent) (struct clk *clk); -}; - -extern struct clk_interface clk_functions; - -#endif /* __ASM_POWERPC_CLK_INTERFACE_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 904d713366ff..fcc9a89a4695 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -48,7 +48,6 @@ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o -obj-$(CONFIG_PPC_CLOCK) += clock.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c deleted file mode 100644 index a764b47791e8..000000000000 --- a/arch/powerpc/kernel/clock.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Dummy clk implementations for powerpc. - * These need to be overridden in platform code. - */ - -#include -#include -#include -#include -#include - -struct clk_interface clk_functions; - -struct clk *clk_get(struct device *dev, const char *id) -{ - if (clk_functions.clk_get) - return clk_functions.clk_get(dev, id); - return ERR_PTR(-ENOSYS); -} -EXPORT_SYMBOL(clk_get); - -void clk_put(struct clk *clk) -{ - if (clk_functions.clk_put) - clk_functions.clk_put(clk); -} -EXPORT_SYMBOL(clk_put); - -int clk_enable(struct clk *clk) -{ - if (clk_functions.clk_enable) - return clk_functions.clk_enable(clk); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_enable); - -void clk_disable(struct clk *clk) -{ - if (clk_functions.clk_disable) - clk_functions.clk_disable(clk); -} -EXPORT_SYMBOL(clk_disable); - -unsigned long clk_get_rate(struct clk *clk) -{ - if (clk_functions.clk_get_rate) - return clk_functions.clk_get_rate(clk); - return 0; -} -EXPORT_SYMBOL(clk_get_rate); - -long clk_round_rate(struct clk *clk, unsigned long rate) -{ - if (clk_functions.clk_round_rate) - return clk_functions.clk_round_rate(clk, rate); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_round_rate); - -int clk_set_rate(struct clk *clk, unsigned long rate) -{ - if (clk_functions.clk_set_rate) - return clk_functions.clk_set_rate(clk, rate); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_set_rate); - -struct clk *clk_get_parent(struct clk *clk) -{ - if (clk_functions.clk_get_parent) - return clk_functions.clk_get_parent(clk); - return ERR_PTR(-ENOSYS); -} -EXPORT_SYMBOL(clk_get_parent); - -int clk_set_parent(struct clk *clk, struct clk *parent) -{ - if (clk_functions.clk_set_parent) - return clk_functions.clk_set_parent(clk, parent); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_set_parent); diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig index fc9c1cbfcb1d..5aa3f4b5332c 100644 --- a/arch/powerpc/platforms/512x/Kconfig +++ b/arch/powerpc/platforms/512x/Kconfig @@ -1,9 +1,9 @@ config PPC_MPC512x bool "512x-based boards" depends on 6xx + select COMMON_CLK select FSL_SOC select IPIC - select PPC_CLOCK select PPC_PCI_CHOICE select FSL_PCI if PCI select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/powerpc/platforms/512x/Makefile b/arch/powerpc/platforms/512x/Makefile index 1e05f9def8a4..01693121a2b1 100644 --- a/arch/powerpc/platforms/512x/Makefile +++ b/arch/powerpc/platforms/512x/Makefile @@ -1,7 +1,6 @@ # # Makefile for the Freescale PowerPC 512x linux kernel. # -obj-$(CONFIG_PPC_CLOCK) += clock.o obj-$(CONFIG_COMMON_CLK) += clock-commonclk.o obj-y += mpc512x_shared.o obj-$(CONFIG_MPC5121_ADS) += mpc5121_ads.o mpc5121_ads_cpld.o diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c deleted file mode 100644 index fd8a37653417..000000000000 --- a/arch/powerpc/platforms/512x/clock.c +++ /dev/null @@ -1,754 +0,0 @@ -/* - * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: John Rigby - * - * Implements the clk api defined in include/linux/clk.h - * - * Original based on linux/arch/arm/mach-integrator/clock.c - * - * Copyright (C) 2004 ARM Limited. - * Written by Deep Blue Solutions Limited. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "mpc512x.h" - -#undef CLK_DEBUG - -static int clocks_initialized; - -#define CLK_HAS_RATE 0x1 /* has rate in MHz */ -#define CLK_HAS_CTRL 0x2 /* has control reg and bit */ - -struct clk { - struct list_head node; - char name[32]; - int flags; - struct device *dev; - unsigned long rate; - struct module *owner; - void (*calc) (struct clk *); - struct clk *parent; - int reg, bit; /* CLK_HAS_CTRL */ - int div_shift; /* only used by generic_div_clk_calc */ -}; - -static LIST_HEAD(clocks); -static DEFINE_MUTEX(clocks_mutex); - -static struct clk *mpc5121_clk_get(struct device *dev, const char *id) -{ - struct clk *p, *clk = ERR_PTR(-ENOENT); - int dev_match; - int id_match; - - if (dev == NULL || id == NULL) - return clk; - - mutex_lock(&clocks_mutex); - list_for_each_entry(p, &clocks, node) { - dev_match = id_match = 0; - - if (dev == p->dev) - dev_match++; - if (strcmp(id, p->name) == 0) - id_match++; - if ((dev_match || id_match) && try_module_get(p->owner)) { - clk = p; - break; - } - } - mutex_unlock(&clocks_mutex); - - return clk; -} - -#ifdef CLK_DEBUG -static void dump_clocks(void) -{ - struct clk *p; - - mutex_lock(&clocks_mutex); - printk(KERN_INFO "CLOCKS:\n"); - list_for_each_entry(p, &clocks, node) { - pr_info(" %s=%ld", p->name, p->rate); - if (p->parent) - pr_cont(" %s=%ld", p->parent->name, - p->parent->rate); - if (p->flags & CLK_HAS_CTRL) - pr_cont(" reg/bit=%d/%d", p->reg, p->bit); - pr_cont("\n"); - } - mutex_unlock(&clocks_mutex); -} -#define DEBUG_CLK_DUMP() dump_clocks() -#else -#define DEBUG_CLK_DUMP() -#endif - - -static void mpc5121_clk_put(struct clk *clk) -{ - module_put(clk->owner); -} - -#define NRPSC 12 - -struct mpc512x_clockctl { - u32 spmr; /* System PLL Mode Reg */ - u32 sccr[2]; /* System Clk Ctrl Reg 1 & 2 */ - u32 scfr1; /* System Clk Freq Reg 1 */ - u32 scfr2; /* System Clk Freq Reg 2 */ - u32 reserved; - u32 bcr; /* Bread Crumb Reg */ - u32 pccr[NRPSC]; /* PSC Clk Ctrl Reg 0-11 */ - u32 spccr; /* SPDIF Clk Ctrl Reg */ - u32 cccr; /* CFM Clk Ctrl Reg */ - u32 dccr; /* DIU Clk Cnfg Reg */ -}; - -static struct mpc512x_clockctl __iomem *clockctl; - -static int mpc5121_clk_enable(struct clk *clk) -{ - unsigned int mask; - - if (clk->flags & CLK_HAS_CTRL) { - mask = in_be32(&clockctl->sccr[clk->reg]); - mask |= 1 << clk->bit; - out_be32(&clockctl->sccr[clk->reg], mask); - } - return 0; -} - -static void mpc5121_clk_disable(struct clk *clk) -{ - unsigned int mask; - - if (clk->flags & CLK_HAS_CTRL) { - mask = in_be32(&clockctl->sccr[clk->reg]); - mask &= ~(1 << clk->bit); - out_be32(&clockctl->sccr[clk->reg], mask); - } -} - -static unsigned long mpc5121_clk_get_rate(struct clk *clk) -{ - if (clk->flags & CLK_HAS_RATE) - return clk->rate; - else - return 0; -} - -static long mpc5121_clk_round_rate(struct clk *clk, unsigned long rate) -{ - return rate; -} - -static int mpc5121_clk_set_rate(struct clk *clk, unsigned long rate) -{ - return 0; -} - -static int clk_register(struct clk *clk) -{ - mutex_lock(&clocks_mutex); - list_add(&clk->node, &clocks); - mutex_unlock(&clocks_mutex); - return 0; -} - -static unsigned long spmf_mult(void) -{ - /* - * Convert spmf to multiplier - */ - static int spmf_to_mult[] = { - 68, 1, 12, 16, - 20, 24, 28, 32, - 36, 40, 44, 48, - 52, 56, 60, 64 - }; - int spmf = (in_be32(&clockctl->spmr) >> 24) & 0xf; - return spmf_to_mult[spmf]; -} - -static unsigned long sysdiv_div_x_2(void) -{ - /* - * Convert sysdiv to divisor x 2 - * Some divisors have fractional parts so - * multiply by 2 then divide by this value - */ - static int sysdiv_to_div_x_2[] = { - 4, 5, 6, 7, - 8, 9, 10, 14, - 12, 16, 18, 22, - 20, 24, 26, 30, - 28, 32, 34, 38, - 36, 40, 42, 46, - 44, 48, 50, 54, - 52, 56, 58, 62, - 60, 64, 66, - }; - int sysdiv = (in_be32(&clockctl->scfr2) >> 26) & 0x3f; - return sysdiv_to_div_x_2[sysdiv]; -} - -static unsigned long ref_to_sys(unsigned long rate) -{ - rate *= spmf_mult(); - rate *= 2; - rate /= sysdiv_div_x_2(); - - return rate; -} - -static unsigned long sys_to_ref(unsigned long rate) -{ - rate *= sysdiv_div_x_2(); - rate /= 2; - rate /= spmf_mult(); - - return rate; -} - -static long ips_to_ref(unsigned long rate) -{ - int ips_div = (in_be32(&clockctl->scfr1) >> 23) & 0x7; - - rate *= ips_div; /* csb_clk = ips_clk * ips_div */ - rate *= 2; /* sys_clk = csb_clk * 2 */ - return sys_to_ref(rate); -} - -static unsigned long devtree_getfreq(char *clockname) -{ - struct device_node *np; - const unsigned int *prop; - unsigned int val = 0; - - np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr"); - if (np) { - prop = of_get_property(np, clockname, NULL); - if (prop) - val = *prop; - of_node_put(np); - } - return val; -} - -static void ref_clk_calc(struct clk *clk) -{ - unsigned long rate; - - rate = devtree_getfreq("bus-frequency"); - if (rate == 0) { - printk(KERN_ERR "No bus-frequency in dev tree\n"); - clk->rate = 0; - return; - } - clk->rate = ips_to_ref(rate); -} - -static struct clk ref_clk = { - .name = "ref_clk", - .calc = ref_clk_calc, -}; - - -static void sys_clk_calc(struct clk *clk) -{ - clk->rate = ref_to_sys(ref_clk.rate); -} - -static struct clk sys_clk = { - .name = "sys_clk", - .calc = sys_clk_calc, -}; - -static void diu_clk_calc(struct clk *clk) -{ - int diudiv_x_2 = in_be32(&clockctl->scfr1) & 0xff; - unsigned long rate; - - rate = sys_clk.rate; - - rate *= 2; - rate /= diudiv_x_2; - - clk->rate = rate; -} - -static void viu_clk_calc(struct clk *clk) -{ - unsigned long rate; - - rate = sys_clk.rate; - rate /= 2; - clk->rate = rate; -} - -static void half_clk_calc(struct clk *clk) -{ - clk->rate = clk->parent->rate / 2; -} - -static void generic_div_clk_calc(struct clk *clk) -{ - int div = (in_be32(&clockctl->scfr1) >> clk->div_shift) & 0x7; - - clk->rate = clk->parent->rate / div; -} - -static void unity_clk_calc(struct clk *clk) -{ - clk->rate = clk->parent->rate; -} - -static struct clk csb_clk = { - .name = "csb_clk", - .calc = half_clk_calc, - .parent = &sys_clk, -}; - -static void e300_clk_calc(struct clk *clk) -{ - int spmf = (in_be32(&clockctl->spmr) >> 16) & 0xf; - int ratex2 = clk->parent->rate * spmf; - - clk->rate = ratex2 / 2; -} - -static struct clk e300_clk = { - .name = "e300_clk", - .calc = e300_clk_calc, - .parent = &csb_clk, -}; - -static struct clk ips_clk = { - .name = "ips_clk", - .calc = generic_div_clk_calc, - .parent = &csb_clk, - .div_shift = 23, -}; - -/* - * Clocks controlled by SCCR1 (.reg = 0) - */ -static struct clk lpc_clk = { - .name = "lpc_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 30, - .calc = generic_div_clk_calc, - .parent = &ips_clk, - .div_shift = 11, -}; - -static struct clk nfc_clk = { - .name = "nfc_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 29, - .calc = generic_div_clk_calc, - .parent = &ips_clk, - .div_shift = 8, -}; - -static struct clk pata_clk = { - .name = "pata_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 28, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -/* - * PSC clocks (bits 27 - 16) - * are setup elsewhere - */ - -static struct clk sata_clk = { - .name = "sata_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 14, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk fec_clk = { - .name = "fec_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 13, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk pci_clk = { - .name = "pci_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 11, - .calc = generic_div_clk_calc, - .parent = &csb_clk, - .div_shift = 20, -}; - -/* - * Clocks controlled by SCCR2 (.reg = 1) - */ -static struct clk diu_clk = { - .name = "diu_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 31, - .calc = diu_clk_calc, -}; - -static struct clk viu_clk = { - .name = "viu_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 18, - .calc = viu_clk_calc, -}; - -static struct clk axe_clk = { - .name = "axe_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 30, - .calc = unity_clk_calc, - .parent = &csb_clk, -}; - -static struct clk usb1_clk = { - .name = "usb1_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 28, - .calc = unity_clk_calc, - .parent = &csb_clk, -}; - -static struct clk usb2_clk = { - .name = "usb2_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 27, - .calc = unity_clk_calc, - .parent = &csb_clk, -}; - -static struct clk i2c_clk = { - .name = "i2c_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 26, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk mscan_clk = { - .name = "mscan_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 25, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk sdhc_clk = { - .name = "sdhc_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 24, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk mbx_bus_clk = { - .name = "mbx_bus_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 22, - .calc = half_clk_calc, - .parent = &csb_clk, -}; - -static struct clk mbx_clk = { - .name = "mbx_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 21, - .calc = unity_clk_calc, - .parent = &csb_clk, -}; - -static struct clk mbx_3d_clk = { - .name = "mbx_3d_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 20, - .calc = generic_div_clk_calc, - .parent = &mbx_bus_clk, - .div_shift = 14, -}; - -static void psc_mclk_in_calc(struct clk *clk) -{ - clk->rate = devtree_getfreq("psc_mclk_in"); - if (!clk->rate) - clk->rate = 25000000; -} - -static struct clk psc_mclk_in = { - .name = "psc_mclk_in", - .calc = psc_mclk_in_calc, -}; - -static struct clk spdif_txclk = { - .name = "spdif_txclk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 23, -}; - -static struct clk spdif_rxclk = { - .name = "spdif_rxclk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 23, -}; - -static void ac97_clk_calc(struct clk *clk) -{ - /* ac97 bit clock is always 24.567 MHz */ - clk->rate = 24567000; -} - -static struct clk ac97_clk = { - .name = "ac97_clk_in", - .calc = ac97_clk_calc, -}; - -static struct clk *rate_clks[] = { - &ref_clk, - &sys_clk, - &diu_clk, - &viu_clk, - &csb_clk, - &e300_clk, - &ips_clk, - &fec_clk, - &sata_clk, - &pata_clk, - &nfc_clk, - &lpc_clk, - &mbx_bus_clk, - &mbx_clk, - &mbx_3d_clk, - &axe_clk, - &usb1_clk, - &usb2_clk, - &i2c_clk, - &mscan_clk, - &sdhc_clk, - &pci_clk, - &psc_mclk_in, - &spdif_txclk, - &spdif_rxclk, - &ac97_clk, - NULL -}; - -static void rate_clk_init(struct clk *clk) -{ - if (clk->calc) { - clk->calc(clk); - clk->flags |= CLK_HAS_RATE; - clk_register(clk); - } else { - printk(KERN_WARNING - "Could not initialize clk %s without a calc routine\n", - clk->name); - } -} - -static void rate_clks_init(void) -{ - struct clk **cpp, *clk; - - cpp = rate_clks; - while ((clk = *cpp++)) - rate_clk_init(clk); -} - -/* - * There are two clk enable registers with 32 enable bits each - * psc clocks and device clocks are all stored in dev_clks - */ -static struct clk dev_clks[2][32]; - -/* - * Given a psc number return the dev_clk - * associated with it - */ -static struct clk *psc_dev_clk(int pscnum) -{ - int reg, bit; - struct clk *clk; - - reg = 0; - bit = 27 - pscnum; - - clk = &dev_clks[reg][bit]; - clk->reg = 0; - clk->bit = bit; - return clk; -} - -/* - * PSC clock rate calculation - */ -static void psc_calc_rate(struct clk *clk, int pscnum, struct device_node *np) -{ - unsigned long mclk_src = sys_clk.rate; - unsigned long mclk_div; - - /* - * Can only change value of mclk divider - * when the divider is disabled. - * - * Zero is not a valid divider so minimum - * divider is 1 - * - * disable/set divider/enable - */ - out_be32(&clockctl->pccr[pscnum], 0); - out_be32(&clockctl->pccr[pscnum], 0x00020000); - out_be32(&clockctl->pccr[pscnum], 0x00030000); - - if (in_be32(&clockctl->pccr[pscnum]) & 0x80) { - clk->rate = spdif_rxclk.rate; - return; - } - - switch ((in_be32(&clockctl->pccr[pscnum]) >> 14) & 0x3) { - case 0: - mclk_src = sys_clk.rate; - break; - case 1: - mclk_src = ref_clk.rate; - break; - case 2: - mclk_src = psc_mclk_in.rate; - break; - case 3: - mclk_src = spdif_txclk.rate; - break; - } - - mclk_div = ((in_be32(&clockctl->pccr[pscnum]) >> 17) & 0x7fff) + 1; - clk->rate = mclk_src / mclk_div; -} - -/* - * Find all psc nodes in device tree and assign a clock - * with name "psc%d_mclk" and dev pointing at the device - * returned from of_find_device_by_node - */ -static void psc_clks_init(void) -{ - struct device_node *np; - struct platform_device *ofdev; - u32 reg; - const char *psc_compat; - - psc_compat = mpc512x_select_psc_compat(); - if (!psc_compat) - return; - - for_each_compatible_node(np, NULL, psc_compat) { - if (!of_property_read_u32(np, "reg", ®)) { - int pscnum = (reg & 0xf00) >> 8; - struct clk *clk = psc_dev_clk(pscnum); - - clk->flags = CLK_HAS_RATE | CLK_HAS_CTRL; - ofdev = of_find_device_by_node(np); - clk->dev = &ofdev->dev; - /* - * AC97 is special rate clock does - * not go through normal path - */ - if (of_device_is_compatible(np, "fsl,mpc5121-psc-ac97")) - clk->rate = ac97_clk.rate; - else - psc_calc_rate(clk, pscnum, np); - sprintf(clk->name, "psc%d_mclk", pscnum); - clk_register(clk); - clk_enable(clk); - } - } -} - -static struct clk_interface mpc5121_clk_functions = { - .clk_get = mpc5121_clk_get, - .clk_enable = mpc5121_clk_enable, - .clk_disable = mpc5121_clk_disable, - .clk_get_rate = mpc5121_clk_get_rate, - .clk_put = mpc5121_clk_put, - .clk_round_rate = mpc5121_clk_round_rate, - .clk_set_rate = mpc5121_clk_set_rate, - .clk_set_parent = NULL, - .clk_get_parent = NULL, -}; - -int __init mpc5121_clk_init(void) -{ - struct device_node *np; - - np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock"); - if (np) { - clockctl = of_iomap(np, 0); - of_node_put(np); - } - - if (!clockctl) { - printk(KERN_ERR "Could not map clock control registers\n"); - return 0; - } - - rate_clks_init(); - psc_clks_init(); - - /* leave clockctl mapped forever */ - /*iounmap(clockctl); */ - DEBUG_CLK_DUMP(); - clocks_initialized++; - clk_functions = mpc5121_clk_functions; - return 0; -} diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig index af54174801f7..b625a2c6f4f2 100644 --- a/arch/powerpc/platforms/52xx/Kconfig +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -1,7 +1,7 @@ config PPC_MPC52xx bool "52xx-based boards" depends on 6xx - select PPC_CLOCK + select COMMON_CLK select PPC_PCI_CHOICE config PPC_MPC5200_SIMPLE -- cgit v1.2.3 From c141611fb1ee2cfc374cf9be5327e97f361c4bed Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 9 Jan 2014 00:44:29 -0500 Subject: powerpc: Delete non-required instances of include None of these files are actually using any __init type directives and hence don't need to include . Most are just a left over from __devinit and __cpuinit removal, or simply due to code getting copied from one driver to the next. The one instance where we add an include for init.h covers off a case where that file was implicitly getting it from another header which itself didn't need it. Signed-off-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/paca.h | 1 - arch/powerpc/include/asm/ppc_asm.h | 1 - arch/powerpc/include/asm/ps3.h | 1 - arch/powerpc/include/asm/vio.h | 1 - arch/powerpc/kernel/cacheinfo.c | 1 - arch/powerpc/kernel/crash.c | 1 - arch/powerpc/kernel/eeh_pe.c | 1 - arch/powerpc/kernel/head_64.S | 1 + arch/powerpc/kernel/hw_breakpoint.c | 1 - arch/powerpc/kernel/iomap.c | 1 - arch/powerpc/kernel/kgdb.c | 1 - arch/powerpc/kernel/process.c | 1 - arch/powerpc/kernel/smp-tbsync.c | 1 - arch/powerpc/kernel/syscalls.c | 1 - arch/powerpc/kernel/vdso32/vdso32_wrapper.S | 1 - arch/powerpc/kernel/vdso64/vdso64_wrapper.S | 1 - arch/powerpc/mm/pgtable.c | 1 - arch/powerpc/mm/pgtable_64.c | 1 - arch/powerpc/mm/tlb_hash64.c | 1 - arch/powerpc/oprofile/op_model_7450.c | 1 - arch/powerpc/oprofile/op_model_cell.c | 1 - arch/powerpc/oprofile/op_model_fsl_emb.c | 1 - arch/powerpc/oprofile/op_model_pa6t.c | 1 - arch/powerpc/oprofile/op_model_power4.c | 1 - arch/powerpc/oprofile/op_model_rs64.c | 1 - arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 1 - arch/powerpc/platforms/83xx/suspend.c | 1 - arch/powerpc/platforms/85xx/sgy_cts1000.c | 1 - arch/powerpc/platforms/chrp/smp.c | 1 - arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 1 - arch/powerpc/platforms/pasemi/dma_lib.c | 1 - arch/powerpc/platforms/powermac/pfunc_core.c | 1 - arch/powerpc/platforms/powernv/eeh-ioda.c | 1 - arch/powerpc/platforms/pseries/cmm.c | 1 - arch/powerpc/platforms/pseries/dtl.c | 1 - arch/powerpc/sysdev/cpm2_pic.c | 1 - arch/powerpc/sysdev/fsl_ifc.c | 1 - arch/powerpc/sysdev/ge/ge_pic.h | 1 - arch/powerpc/sysdev/i8259.c | 1 - arch/powerpc/sysdev/mpc8xx_pic.c | 1 - arch/powerpc/sysdev/qe_lib/qe_io.c | 1 - arch/powerpc/sysdev/qe_lib/ucc.c | 1 - arch/powerpc/sysdev/qe_lib/ucc_fast.c | 1 - arch/powerpc/sysdev/qe_lib/ucc_slow.c | 1 - arch/powerpc/sysdev/udbg_memcons.c | 1 - arch/powerpc/sysdev/xics/icp-hv.c | 1 - 46 files changed, 1 insertion(+), 45 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index c3523d1dda58..68bee4636045 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -16,7 +16,6 @@ #ifdef CONFIG_PPC64 -#include #include #include #include diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index f595b98079ee..7689e0e98d33 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -4,7 +4,6 @@ #ifndef _ASM_POWERPC_PPC_ASM_H #define _ASM_POWERPC_PPC_ASM_H -#include #include #include #include diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index 678a7c1d9cb8..a1bc7e758422 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -21,7 +21,6 @@ #if !defined(_ASM_POWERPC_PS3_H) #define _ASM_POWERPC_PS3_H -#include #include #include #include diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h index 68d0cc998b1b..4f9b7ca0710f 100644 --- a/arch/powerpc/include/asm/vio.h +++ b/arch/powerpc/include/asm/vio.h @@ -15,7 +15,6 @@ #define _ASM_POWERPC_VIO_H #ifdef __KERNEL__ -#include #include #include #include diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 654932727873..abfa011344d9 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index fdcd8f551aff..18d7c80ddeb9 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f9450537e335..1feccd64f955 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 4f0946de2d5c..b7363bd42452 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -23,6 +23,7 @@ */ #include +#include #include #include #include diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f0b47d1a6b0e..b0a1792279bb 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 97a3715ac8bd..b82227e7e21b 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -3,7 +3,6 @@ * * (C) Copyright 2004 Linus Torvalds */ -#include #include #include #include diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 83e89d310734..8504657379f1 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3386d8ab7eb0..bf8e136316e2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c index e68fd1ae727a..7a37ecd3afa3 100644 --- a/arch/powerpc/kernel/smp-tbsync.c +++ b/arch/powerpc/kernel/smp-tbsync.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 4e3cc47f26b9..cd9be9aa016d 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S index 6e8f507ed32b..79683d0393f5 100644 --- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S @@ -1,4 +1,3 @@ -#include #include #include diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S index b8553d62b792..8df9e2463007 100644 --- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S @@ -1,4 +1,3 @@ -#include #include #include diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ad90429bbd8b..c695943a513c 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 02e8681fb865..7f0e872ab83d 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 36e44b4260eb..c99f6510a0b2 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -23,7 +23,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c index ff617246d128..d29b6e4e5e72 100644 --- a/arch/powerpc/oprofile/op_model_7450.c +++ b/arch/powerpc/oprofile/op_model_7450.c @@ -16,7 +16,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index b9589c19ccda..1f0ebdeea5f7 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_fsl_emb.c b/arch/powerpc/oprofile/op_model_fsl_emb.c index 2a82d3ed464d..14cf86fdddab 100644 --- a/arch/powerpc/oprofile/op_model_fsl_emb.c +++ b/arch/powerpc/oprofile/op_model_fsl_emb.c @@ -14,7 +14,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c index 42f778dff919..a114a7c22d40 100644 --- a/arch/powerpc/oprofile/op_model_pa6t.c +++ b/arch/powerpc/oprofile/op_model_pa6t.c @@ -22,7 +22,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index f444b94935f5..962fe7b3e3fb 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c index 9b801b8c8c5a..7e5b8ed3a1b7 100644 --- a/arch/powerpc/oprofile/op_model_rs64.c +++ b/arch/powerpc/oprofile/op_model_rs64.c @@ -8,7 +8,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index fd71cfdf2380..e238b6a55b15 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include #include #include #include diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 3d9716ccd327..4b4c081df94d 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -10,7 +10,6 @@ * by the Free Software Foundation. */ -#include #include #include #include diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index b9197cea1854..bb75add67084 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c index dead91b177b9..b6c9a0dcc924 100644 --- a/arch/powerpc/platforms/chrp/smp.c +++ b/arch/powerpc/platforms/chrp/smp.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 6c03034dbbd3..c269caee58f9 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -15,7 +15,6 @@ #define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt #include -#include #include #include #include diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index f3defd8a2806..aafa01ba062f 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -18,7 +18,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index d588e48dff74..43075081721f 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -5,7 +5,6 @@ * FIXME: LOCKING !!! */ -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 9b1db254898a..007ac4989841 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 1e561bef459b..2d8bf15879fd 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 5db66f1fbc26..7d61498e45c0 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -20,7 +20,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include #include #include #include diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index 10386b676d87..a11bd1d433ad 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -27,7 +27,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/fsl_ifc.c b/arch/powerpc/sysdev/fsl_ifc.c index d7fc72239144..fbc885b31946 100644 --- a/arch/powerpc/sysdev/fsl_ifc.c +++ b/arch/powerpc/sysdev/fsl_ifc.c @@ -19,7 +19,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include #include #include #include diff --git a/arch/powerpc/sysdev/ge/ge_pic.h b/arch/powerpc/sysdev/ge/ge_pic.h index 6149916da3f4..908dbd9826b6 100644 --- a/arch/powerpc/sysdev/ge/ge_pic.h +++ b/arch/powerpc/sysdev/ge/ge_pic.h @@ -1,7 +1,6 @@ #ifndef __GEF_PIC_H__ #define __GEF_PIC_H__ -#include void gef_pic_cascade(unsigned int, struct irq_desc *); unsigned int gef_pic_get_irq(void); diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 997df6a7ab5d..45598da0b321 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -8,7 +8,6 @@ */ #undef DEBUG -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c index b724622c3a0b..c4828c0be5bd 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/sysdev/mpc8xx_pic.c @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/qe_io.c b/arch/powerpc/sysdev/qe_lib/qe_io.c index a88807b3dd57..d09994164daf 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_io.c +++ b/arch/powerpc/sysdev/qe_lib/qe_io.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/ucc.c b/arch/powerpc/sysdev/qe_lib/ucc.c index 134b07d29435..621575b7e84a 100644 --- a/arch/powerpc/sysdev/qe_lib/ucc.c +++ b/arch/powerpc/sysdev/qe_lib/ucc.c @@ -14,7 +14,6 @@ * option) any later version. */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/ucc_fast.c b/arch/powerpc/sysdev/qe_lib/ucc_fast.c index cceb2e366738..65aaf15032ae 100644 --- a/arch/powerpc/sysdev/qe_lib/ucc_fast.c +++ b/arch/powerpc/sysdev/qe_lib/ucc_fast.c @@ -13,7 +13,6 @@ * option) any later version. */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c index 1c062f48f1ac..befaf1123f7f 100644 --- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c +++ b/arch/powerpc/sysdev/qe_lib/ucc_slow.c @@ -13,7 +13,6 @@ * option) any later version. */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c index ce5a7b489e4b..9998c0de12d0 100644 --- a/arch/powerpc/sysdev/udbg_memcons.c +++ b/arch/powerpc/sysdev/udbg_memcons.c @@ -18,7 +18,6 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include #include diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index df0fc5821469..c1917cf67c3d 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 1d350544d5bf17d835d2850004c64ca51235c771 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 3 Jan 2014 17:47:12 +0800 Subject: powerpc/eeh: Add restore_config operation After reset on the specific PE or PHB, we never configure AER correctly on PowerNV platform. We needn't care it on pSeries platform. The patch introduces additional EEH operation eeh_ops:: restore_config() so that we have chance to configure AER correctly for PowerNV platform. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh_pe.c | 3 +++ arch/powerpc/platforms/powernv/eeh-powernv.c | 3 ++- arch/powerpc/platforms/pseries/eeh_pseries.c | 4 +++- 4 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index d3e5e9bc8f94..7f8adc848cd6 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -157,6 +157,7 @@ struct eeh_ops { int (*read_config)(struct device_node *dn, int where, int size, u32 *val); int (*write_config)(struct device_node *dn, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); + int (*restore_config)(struct device_node *dn); }; extern struct eeh_ops *eeh_ops; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 1feccd64f955..f0c353fa655a 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -736,6 +736,9 @@ static void *eeh_restore_one_device_bars(void *data, void *flag) else eeh_restore_device_bars(edev, dn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(dn); + return NULL; } diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 73b981438cc5..ab91e6a34afa 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -359,7 +359,8 @@ static struct eeh_ops powernv_eeh_ops = { .configure_bridge = powernv_eeh_configure_bridge, .read_config = pnv_pci_cfg_read, .write_config = pnv_pci_cfg_write, - .next_error = powernv_eeh_next_error + .next_error = powernv_eeh_next_error, + .restore_config = NULL }; /** diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index ccb633e077b1..9ef3cc8ebc11 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -689,7 +689,9 @@ static struct eeh_ops pseries_eeh_ops = { .get_log = pseries_eeh_get_log, .configure_bridge = pseries_eeh_configure_bridge, .read_config = pseries_eeh_read_config, - .write_config = pseries_eeh_write_config + .write_config = pseries_eeh_write_config, + .next_error = NULL, + .restore_config = NULL }; /** -- cgit v1.2.3 From f26c7a035b7f2f1a7505ce42e4ba946b12f7df91 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Sun, 12 Jan 2014 14:13:45 +0800 Subject: powerpc/eeh: Hotplug improvement When EEH error comes to one specific PCI device before its driver is loaded, we will apply hotplug to recover the error. During the plug time, the PCI device will be probed and its driver is loaded. Then we wrongly calls to the error handlers if the driver supports EEH explicitly. The patch intends to fix by introducing flag EEH_DEV_NO_HANDLER and set it before we remove the PCI device. In turn, we can avoid wrongly calls the error handlers of the PCI device after its driver loaded. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 3 ++- arch/powerpc/kernel/eeh.c | 15 +++++++++++++++ arch/powerpc/kernel/eeh_driver.c | 10 +++++++--- 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 7f8adc848cd6..8b4b8e4a5c32 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -90,7 +90,8 @@ struct eeh_pe { #define EEH_DEV_IRQ_DISABLED (1 << 3) /* Interrupt disabled */ #define EEH_DEV_DISCONNECTED (1 << 4) /* Removing from PE */ -#define EEH_DEV_SYSFS (1 << 8) /* Sysfs created */ +#define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ +#define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ struct eeh_dev { int mode; /* EEH mode */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f4b7a227f183..148db72a8c43 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -921,6 +921,13 @@ void eeh_add_device_late(struct pci_dev *dev) eeh_sysfs_remove_device(edev->pdev); edev->mode &= ~EEH_DEV_SYSFS; + /* + * We definitely should have the PCI device removed + * though it wasn't correctly. So we needn't call + * into error handler afterwards. + */ + edev->mode |= EEH_DEV_NO_HANDLER; + edev->pdev = NULL; dev->dev.archdata.edev = NULL; } @@ -1023,6 +1030,14 @@ void eeh_remove_device(struct pci_dev *dev) else edev->mode |= EEH_DEV_DISCONNECTED; + /* + * We're removing from the PCI subsystem, that means + * the PCI device driver can't support EEH or not + * well. So we rely on hotplug completely to do recovery + * for the specific PCI device. + */ + edev->mode |= EEH_DEV_NO_HANDLER; + eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); edev->mode &= ~EEH_DEV_SYSFS; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 4ef59c33777f..7db39203a073 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -217,7 +217,8 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) if (!driver) return NULL; if (!driver->err_handler || - !driver->err_handler->mmio_enabled) { + !driver->err_handler->mmio_enabled || + (edev->mode & EEH_DEV_NO_HANDLER)) { eeh_pcid_put(dev); return NULL; } @@ -258,7 +259,8 @@ static void *eeh_report_reset(void *data, void *userdata) eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->slot_reset) { + !driver->err_handler->slot_reset || + (edev->mode & EEH_DEV_NO_HANDLER)) { eeh_pcid_put(dev); return NULL; } @@ -297,7 +299,9 @@ static void *eeh_report_resume(void *data, void *userdata) eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->resume) { + !driver->err_handler->resume || + (edev->mode & EEH_DEV_NO_HANDLER)) { + edev->mode &= ~EEH_DEV_NO_HANDLER; eeh_pcid_put(dev); return NULL; } -- cgit v1.2.3 From 0c4b9e27b09eeb4da84451c038a587b92ce93ff5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 13 Jan 2014 11:36:22 +0800 Subject: powerpc/iommu: Don't detach device without IOMMU group Some devices, for example PCI root port, don't have IOMMU table and group. We needn't detach them from their IOMMU group. Otherwise, it potentially incurs kernel crash because of referring NULL IOMMU group as following backtrace indicates: .iommu_group_remove_device+0x74/0x1b0 .iommu_bus_notifier+0x94/0xb4 .notifier_call_chain+0x78/0xe8 .__blocking_notifier_call_chain+0x7c/0xbc .blocking_notifier_call_chain+0x38/0x48 .device_del+0x50/0x234 .pci_remove_bus_device+0x88/0x138 .pci_stop_and_remove_bus_device+0x2c/0x40 .pcibios_remove_pci_devices+0xcc/0xfc .pcibios_remove_pci_devices+0x3c/0xfc Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/iommu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index f58d8135aab2..d773dd440a45 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1144,6 +1144,17 @@ EXPORT_SYMBOL_GPL(iommu_add_device); void iommu_del_device(struct device *dev) { + /* + * Some devices might not have IOMMU table and group + * and we needn't detach them from the associated + * IOMMU groups + */ + if (!dev->iommu_group) { + pr_debug("iommu_tce: skipping device %s with no tbl\n", + dev_name(dev)); + return; + } + iommu_group_remove_device(dev); } EXPORT_SYMBOL_GPL(iommu_del_device); -- cgit v1.2.3 From 30c826358d10c1d6f8147de3310b97488daec830 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 14 Jan 2014 15:45:09 +0530 Subject: Move precessing of MCE queued event out from syscall exit path. Huge Dickins reported an issue that b5ff4211a829 "powerpc/book3s: Queue up and process delayed MCE events" breaks the PowerMac G5 boot. This patch fixes it by moving the mce even processing away from syscall exit, which was wrong to do that in first place, and using irq work framework to delay processing of mce event. Reported-by: Hugh Dickins Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 1 - arch/powerpc/kernel/entry_64.S | 5 ----- arch/powerpc/kernel/mce.c | 13 ++++++++++--- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index a2b8c7b35fba..8e99edf6d966 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -191,7 +191,6 @@ extern void save_mce_event(struct pt_regs *regs, long handled, extern int get_mce_event(struct machine_check_event *mce, bool release); extern void release_mce_event(void); extern void machine_check_queue_event(void); -extern void machine_check_process_queued_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt); extern uint64_t get_mce_fault_addr(struct machine_check_event *evt); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 770d6d65c47b..bbfb0294b354 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -183,11 +183,6 @@ syscall_exit: #ifdef SHOW_SYSCALLS bl .do_show_syscall_exit ld r3,RESULT(r1) -#endif -#ifdef CONFIG_PPC_BOOK3S_64 -BEGIN_FTR_SECTION - bl .machine_check_process_queued_event -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) #endif CURRENT_THREAD_INFO(r12, r1) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index c0c52ec1fca7..cadef7e64e42 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -26,6 +26,7 @@ #include #include #include +#include #include static DEFINE_PER_CPU(int, mce_nest_count); @@ -35,6 +36,11 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); static DEFINE_PER_CPU(int, mce_queue_count); static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); +static void machine_check_process_queued_event(struct irq_work *work); +struct irq_work mce_event_process_work = { + .func = machine_check_process_queued_event, +}; + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -185,17 +191,19 @@ void machine_check_queue_event(void) return; } __get_cpu_var(mce_event_queue[index]) = evt; + + /* Queue irq work to process this event later. */ + irq_work_queue(&mce_event_process_work); } /* * process pending MCE event from the mce event queue. This function will be * called during syscall exit. */ -void machine_check_process_queued_event(void) +static void machine_check_process_queued_event(struct irq_work *work) { int index; - preempt_disable(); /* * For now just print it to console. * TODO: log this error event to FSP or nvram. @@ -206,7 +214,6 @@ void machine_check_process_queued_event(void) &__get_cpu_var(mce_event_queue[index])); __get_cpu_var(mce_queue_count)--; } - preempt_enable(); } void machine_check_print_event_info(struct machine_check_event *evt) -- cgit v1.2.3 From 0215f7d8c53fb192cd4491ede0ece5cca6b5db57 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 14 Jan 2014 17:11:39 +1100 Subject: powerpc: Fix races with irq_work If we set irq_work on a processor and immediately afterward, before the irq work has a chance to be processed, we change the decrementer value, we can seriously delay the handling of that irq_work. Fix it by checking in a few places for pending irq work, first before changing the decrementer in decrementer_set_next_event() and after changing it in the same function and in timer_interrupt(). Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index afb1b56ef4fa..b3dab20acf34 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -536,6 +536,9 @@ void timer_interrupt(struct pt_regs * regs) now = *next_tb - now; if (now <= DECREMENTER_MAX) set_dec((int)now); + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); __get_cpu_var(irq_stat).timer_irqs_others++; } @@ -802,8 +805,16 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { + /* Don't adjust the decrementer if some irq work is pending */ + if (test_irq_work_pending()) + return 0; __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; set_dec(evt); + + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); + return 0; } -- cgit v1.2.3 From d31626f70b6103f4d9153b75d07e0e8795728cc9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 13 Jan 2014 15:56:29 +1100 Subject: powerpc: Don't corrupt transactional state when using FP/VMX in kernel Currently, when we have a process using the transactional memory facilities on POWER8 (that is, the processor is in transactional or suspended state), and the process enters the kernel and the kernel then uses the floating-point or vector (VMX/Altivec) facility, we end up corrupting the user-visible FP/VMX/VSX state. This happens, for example, if a page fault causes a copy-on-write operation, because the copy_page function will use VMX to do the copy on POWER8. The test program below demonstrates the bug. The bug happens because when FP/VMX state for a transactional process is stored in the thread_struct, we store the checkpointed state in .fp_state/.vr_state and the transactional (current) state in .transact_fp/.transact_vr. However, when the kernel wants to use FP/VMX, it calls enable_kernel_fp() or enable_kernel_altivec(), which saves the current state in .fp_state/.vr_state. Furthermore, when we return to the user process we return with FP/VMX/VSX disabled. The next time the process uses FP/VMX/VSX, we don't know which set of state (the current register values, .fp_state/.vr_state, or .transact_fp/.transact_vr) we should be using, since we have no way to tell if we are still in the same transaction, and if not, whether the previous transaction succeeded or failed. Thus it is necessary to strictly adhere to the rule that if FP has been enabled at any point in a transaction, we must keep FP enabled for the user process with the current transactional state in the FP registers, until we detect that it is no longer in a transaction. Similarly for VMX; once enabled it must stay enabled until the process is no longer transactional. In order to keep this rule, we add a new thread_info flag which we test when returning from the kernel to userspace, called TIF_RESTORE_TM. This flag indicates that there is FP/VMX/VSX state to be restored before entering userspace, and when it is set the .tm_orig_msr field in the thread_struct indicates what state needs to be restored. The restoration is done by restore_tm_state(). The TIF_RESTORE_TM bit is set by new giveup_fpu/altivec_maybe_transactional helpers, which are called from enable_kernel_fp/altivec, giveup_vsx, and flush_fp/altivec_to_thread instead of giveup_fpu/altivec. The other thing to be done is to get the transactional FP/VMX/VSX state from .fp_state/.vr_state when doing reclaim, if that state has been saved there by giveup_fpu/altivec_maybe_transactional. Having done this, we set the FP/VMX bit in the thread's MSR after reclaim to indicate that that part of the state is now valid (having been reclaimed from the processor's checkpointed state). Finally, in the signal handling code, we move the clearing of the transactional state bits in the thread's MSR a bit earlier, before calling flush_fp_to_thread(), so that we don't unnecessarily set the TIF_RESTORE_TM bit. This is the test program: /* Michael Neuling 4/12/2013 * * See if the altivec state is leaked out of an aborted transaction due to * kernel vmx copy loops. * * gcc -m64 htm_vmxcopy.c -o htm_vmxcopy * */ /* We don't use all of these, but for reference: */ int main(int argc, char *argv[]) { long double vecin = 1.3; long double vecout; unsigned long pgsize = getpagesize(); int i; int fd; int size = pgsize*16; char tmpfile[] = "/tmp/page_faultXXXXXX"; char buf[pgsize]; char *a; uint64_t aborted = 0; fd = mkstemp(tmpfile); assert(fd >= 0); memset(buf, 0, pgsize); for (i = 0; i < size; i += pgsize) assert(write(fd, buf, pgsize) == pgsize); unlink(tmpfile); a = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); assert(a != MAP_FAILED); asm __volatile__( "lxvd2x 40,0,%[vecinptr] ; " // set 40 to initial value TBEGIN "beq 3f ;" TSUSPEND "xxlxor 40,40,40 ; " // set 40 to 0 "std 5, 0(%[map]) ;" // cause kernel vmx copy page TABORT TRESUME TEND "li %[res], 0 ;" "b 5f ;" "3: ;" // Abort handler "li %[res], 1 ;" "5: ;" "stxvd2x 40,0,%[vecoutptr] ; " : [res]"=r"(aborted) : [vecinptr]"r"(&vecin), [vecoutptr]"r"(&vecout), [map]"r"(a) : "memory", "r0", "r3", "r4", "r5", "r6", "r7"); if (aborted && (vecin != vecout)){ printf("FAILED: vector state leaked on abort %f != %f\n", (double)vecin, (double)vecout); exit(1); } munmap(a, size); close(fd); printf("PASSED!\n"); return 0; } Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 2 + arch/powerpc/include/asm/thread_info.h | 5 +- arch/powerpc/include/asm/tm.h | 1 + arch/powerpc/kernel/entry_64.S | 12 ++- arch/powerpc/kernel/fpu.S | 16 ++++ arch/powerpc/kernel/process.c | 146 ++++++++++++++++++++++++++++++--- arch/powerpc/kernel/signal.c | 3 +- arch/powerpc/kernel/signal_32.c | 21 ++--- arch/powerpc/kernel/signal_64.c | 14 ++-- arch/powerpc/kernel/traps.c | 12 +-- arch/powerpc/kernel/vector.S | 10 +++ 11 files changed, 195 insertions(+), 47 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index fc14a38c7ccf..232a2fa5b483 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -373,6 +373,8 @@ extern int set_endian(struct task_struct *tsk, unsigned int val); extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr); extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); +extern void fp_enable(void); +extern void vec_enable(void); extern void load_fp_state(struct thread_fp_state *fp); extern void store_fp_state(struct thread_fp_state *fp); extern void load_vr_state(struct thread_vr_state *vr); diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index fc2bf414c584..b034ecdb7c74 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -91,6 +91,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ +#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ #define TIF_NOHZ 9 /* in adaptive nohz mode */ @@ -113,6 +114,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1<thread.regs && + MSR_TM_ACTIVE(tsk->thread.regs->msr) && + !test_thread_flag(TIF_RESTORE_TM)) { + tsk->thread.tm_orig_msr = tsk->thread.regs->msr; + set_thread_flag(TIF_RESTORE_TM); + } + + giveup_fpu(tsk); +} + +void giveup_altivec_maybe_transactional(struct task_struct *tsk) +{ + /* + * If we are saving the current thread's registers, and the + * thread is in a transactional state, set the TIF_RESTORE_TM + * bit so that we know to restore the registers before + * returning to userspace. + */ + if (tsk == current && tsk->thread.regs && + MSR_TM_ACTIVE(tsk->thread.regs->msr) && + !test_thread_flag(TIF_RESTORE_TM)) { + tsk->thread.tm_orig_msr = tsk->thread.regs->msr; + set_thread_flag(TIF_RESTORE_TM); + } + + giveup_altivec(tsk); +} + +#else +#define giveup_fpu_maybe_transactional(tsk) giveup_fpu(tsk) +#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk) +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + #ifdef CONFIG_PPC_FPU /* * Make sure the floating-point register state in the @@ -101,13 +143,13 @@ void flush_fp_to_thread(struct task_struct *tsk) */ BUG_ON(tsk != current); #endif - giveup_fpu(tsk); + giveup_fpu_maybe_transactional(tsk); } preempt_enable(); } } EXPORT_SYMBOL_GPL(flush_fp_to_thread); -#endif +#endif /* CONFIG_PPC_FPU */ void enable_kernel_fp(void) { @@ -115,11 +157,11 @@ void enable_kernel_fp(void) #ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) - giveup_fpu(current); + giveup_fpu_maybe_transactional(current); else giveup_fpu(NULL); /* just enables FP for kernel */ #else - giveup_fpu(last_task_used_math); + giveup_fpu_maybe_transactional(last_task_used_math); #endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_fp); @@ -131,11 +173,11 @@ void enable_kernel_altivec(void) #ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) - giveup_altivec(current); + giveup_altivec_maybe_transactional(current); else giveup_altivec_notask(); #else - giveup_altivec(last_task_used_altivec); + giveup_altivec_maybe_transactional(last_task_used_altivec); #endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_altivec); @@ -152,7 +194,7 @@ void flush_altivec_to_thread(struct task_struct *tsk) #ifdef CONFIG_SMP BUG_ON(tsk != current); #endif - giveup_altivec(tsk); + giveup_altivec_maybe_transactional(tsk); } preempt_enable(); } @@ -181,8 +223,8 @@ EXPORT_SYMBOL(enable_kernel_vsx); void giveup_vsx(struct task_struct *tsk) { - giveup_fpu(tsk); - giveup_altivec(tsk); + giveup_fpu_maybe_transactional(tsk); + giveup_altivec_maybe_transactional(tsk); __giveup_vsx(tsk); } @@ -478,7 +520,48 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a, return false; return true; } + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static void tm_reclaim_thread(struct thread_struct *thr, + struct thread_info *ti, uint8_t cause) +{ + unsigned long msr_diff = 0; + + /* + * If FP/VSX registers have been already saved to the + * thread_struct, move them to the transact_fp array. + * We clear the TIF_RESTORE_TM bit since after the reclaim + * the thread will no longer be transactional. + */ + if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) { + msr_diff = thr->tm_orig_msr & ~thr->regs->msr; + if (msr_diff & MSR_FP) + memcpy(&thr->transact_fp, &thr->fp_state, + sizeof(struct thread_fp_state)); + if (msr_diff & MSR_VEC) + memcpy(&thr->transact_vr, &thr->vr_state, + sizeof(struct thread_vr_state)); + clear_ti_thread_flag(ti, TIF_RESTORE_TM); + msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1; + } + + tm_reclaim(thr, thr->regs->msr, cause); + + /* Having done the reclaim, we now have the checkpointed + * FP/VSX values in the registers. These might be valid + * even if we have previously called enable_kernel_fp() or + * flush_fp_to_thread(), so update thr->regs->msr to + * indicate their current validity. + */ + thr->regs->msr |= msr_diff; +} + +void tm_reclaim_current(uint8_t cause) +{ + tm_enable(); + tm_reclaim_thread(¤t->thread, current_thread_info(), cause); +} + static inline void tm_reclaim_task(struct task_struct *tsk) { /* We have to work out if we're switching from/to a task that's in the @@ -501,9 +584,11 @@ static inline void tm_reclaim_task(struct task_struct *tsk) /* Stash the original thread MSR, as giveup_fpu et al will * modify it. We hold onto it to see whether the task used - * FP & vector regs. + * FP & vector regs. If the TIF_RESTORE_TM flag is set, + * tm_orig_msr is already set. */ - thr->tm_orig_msr = thr->regs->msr; + if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM)) + thr->tm_orig_msr = thr->regs->msr; TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " "ccr=%lx, msr=%lx, trap=%lx)\n", @@ -511,7 +596,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) thr->regs->ccr, thr->regs->msr, thr->regs->trap); - tm_reclaim(thr, thr->regs->msr, TM_CAUSE_RESCHED); + tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED); TM_DEBUG("--- tm_reclaim on pid %d complete\n", tsk->pid); @@ -587,6 +672,43 @@ static inline void __switch_to_tm(struct task_struct *prev) tm_reclaim_task(prev); } } + +/* + * This is called if we are on the way out to userspace and the + * TIF_RESTORE_TM flag is set. It checks if we need to reload + * FP and/or vector state and does so if necessary. + * If userspace is inside a transaction (whether active or + * suspended) and FP/VMX/VSX instructions have ever been enabled + * inside that transaction, then we have to keep them enabled + * and keep the FP/VMX/VSX state loaded while ever the transaction + * continues. The reason is that if we didn't, and subsequently + * got a FP/VMX/VSX unavailable interrupt inside a transaction, + * we don't know whether it's the same transaction, and thus we + * don't know which of the checkpointed state and the transactional + * state to use. + */ +void restore_tm_state(struct pt_regs *regs) +{ + unsigned long msr_diff; + + clear_thread_flag(TIF_RESTORE_TM); + if (!MSR_TM_ACTIVE(regs->msr)) + return; + + msr_diff = current->thread.tm_orig_msr & ~regs->msr; + msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; + if (msr_diff & MSR_FP) { + fp_enable(); + load_fp_state(¤t->thread.fp_state); + regs->msr |= current->thread.fpexc_mode; + } + if (msr_diff & MSR_VEC) { + vec_enable(); + load_vr_state(¤t->thread.vr_state); + } + regs->msr |= msr_diff; +} + #else #define tm_recheckpoint_new_task(new) #define __switch_to_tm(prev) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 457e97aa2945..8fc4177ed65a 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -203,8 +203,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) { - tm_enable(); - tm_reclaim(¤t->thread, regs->msr, TM_CAUSE_SIGNAL); + tm_reclaim_current(TM_CAUSE_SIGNAL); if (MSR_TM_TRANSACTIONAL(regs->msr)) return current->thread.ckpt_regs.gpr[1]; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 68027bfa5f8e..6ce69e6f1fcb 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -519,6 +519,13 @@ static int save_tm_user_regs(struct pt_regs *regs, { unsigned long msr = regs->msr; + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state. This also ensures + * that flush_fp_to_thread won't set TIF_RESTORE_TM again. + */ + regs->msr &= ~MSR_TS_MASK; + /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); @@ -1056,13 +1063,6 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state: - */ - regs->msr &= ~MSR_TS_MASK; -#endif return 1; badframe: @@ -1484,13 +1484,6 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, regs->nip = (unsigned long) ka->sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state: - */ - regs->msr &= ~MSR_TS_MASK; -#endif return 1; badframe: diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 42991045349f..e35bf773df7a 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -192,6 +192,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state. This also ensures + * that flush_fp_to_thread won't set TIF_RESTORE_TM again. + */ + regs->msr &= ~MSR_TS_MASK; + flush_fp_to_thread(current); #ifdef CONFIG_ALTIVEC @@ -749,13 +756,6 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, /* Make sure signal handler doesn't get spurious FP exceptions */ current->thread.fp_state.fpscr = 0; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state: - */ - regs->msr &= ~MSR_TS_MASK; -#endif /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 330841766b09..26e1358ff0bc 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1399,7 +1399,6 @@ void fp_unavailable_tm(struct pt_regs *regs) TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n", regs->nip, regs->msr); - tm_enable(); /* We can only have got here if the task started using FP after * beginning the transaction. So, the transactional regs are just a @@ -1408,8 +1407,7 @@ void fp_unavailable_tm(struct pt_regs *regs) * transaction, and probably retry but now with FP enabled. So the * checkpointed FP registers need to be loaded. */ - tm_reclaim(¤t->thread, current->thread.regs->msr, - TM_CAUSE_FAC_UNAV); + tm_reclaim_current(TM_CAUSE_FAC_UNAV); /* Reclaim didn't save out any FPRs to transact_fprs. */ /* Enable FP for the task: */ @@ -1432,9 +1430,7 @@ void altivec_unavailable_tm(struct pt_regs *regs) TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx," "MSR=%lx\n", regs->nip, regs->msr); - tm_enable(); - tm_reclaim(¤t->thread, current->thread.regs->msr, - TM_CAUSE_FAC_UNAV); + tm_reclaim_current(TM_CAUSE_FAC_UNAV); regs->msr |= MSR_VEC; tm_recheckpoint(¤t->thread, regs->msr); current->thread.used_vr = 1; @@ -1455,10 +1451,8 @@ void vsx_unavailable_tm(struct pt_regs *regs) "MSR=%lx\n", regs->nip, regs->msr); - tm_enable(); /* This reclaims FP and/or VR regs if they're already enabled */ - tm_reclaim(¤t->thread, current->thread.regs->msr, - TM_CAUSE_FAC_UNAV); + tm_reclaim_current(TM_CAUSE_FAC_UNAV); regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode | MSR_VSX; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 0458a9aaba9d..74f8050518d6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -36,6 +36,16 @@ _GLOBAL(do_load_up_transact_altivec) blr #endif +/* + * Enable use of VMX/Altivec for the caller. + */ +_GLOBAL(vec_enable) + mfmsr r3 + oris r3,r3,MSR_VEC@h + MTMSRD(r3) + isync + blr + /* * Load state from memory into VMX registers including VSCR. * Assumes the caller has enabled VMX in the MSR. -- cgit v1.2.3 From 3ac8ff1c475bda7174fce63230c0932454287cd5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 13 Jan 2014 15:56:30 +1100 Subject: powerpc: Fix transactional FP/VMX/VSX unavailable handlers Currently, if a process starts a transaction and then takes an exception because the FPU, VMX or VSX unit is unavailable to it, we end up corrupting any FP/VMX/VSX state that was valid before the interrupt. For example, if the process starts a transaction with the FPU available to it but VMX unavailable, and then does a VMX instruction inside the transaction, the FP state gets corrupted. Loading up the desired state generally involves doing a reclaim and a recheckpoint. To avoid corrupting already-valid state, we have to be careful not to reload that state from the thread_struct between the reclaim and the recheckpoint (since the thread_struct values are stale by now), and we have to reload that state from the transact_fp/vr arrays after the recheckpoint to get back the current transactional values saved there by the reclaim. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/traps.c | 45 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 26e1358ff0bc..33cd7a0b8e73 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1416,11 +1416,19 @@ void fp_unavailable_tm(struct pt_regs *regs) /* This loads and recheckpoints the FP registers from * thread.fpr[]. They will remain in registers after the * checkpoint so we don't need to reload them after. + * If VMX is in use, the VRs now hold checkpointed values, + * so we don't want to load the VRs from the thread_struct. */ - tm_recheckpoint(¤t->thread, regs->msr); + tm_recheckpoint(¤t->thread, MSR_FP); + + /* If VMX is in use, get the transactional values back */ + if (regs->msr & MSR_VEC) { + do_load_up_transact_altivec(¤t->thread); + /* At this point all the VSX state is loaded, so enable it */ + regs->msr |= MSR_VSX; + } } -#ifdef CONFIG_ALTIVEC void altivec_unavailable_tm(struct pt_regs *regs) { /* See the comments in fp_unavailable_tm(). This function operates @@ -1432,14 +1440,19 @@ void altivec_unavailable_tm(struct pt_regs *regs) regs->nip, regs->msr); tm_reclaim_current(TM_CAUSE_FAC_UNAV); regs->msr |= MSR_VEC; - tm_recheckpoint(¤t->thread, regs->msr); + tm_recheckpoint(¤t->thread, MSR_VEC); current->thread.used_vr = 1; + + if (regs->msr & MSR_FP) { + do_load_up_transact_fpu(¤t->thread); + regs->msr |= MSR_VSX; + } } -#endif -#ifdef CONFIG_VSX void vsx_unavailable_tm(struct pt_regs *regs) { + unsigned long orig_msr = regs->msr; + /* See the comments in fp_unavailable_tm(). This works similarly, * though we're loading both FP and VEC registers in here. * @@ -1451,16 +1464,30 @@ void vsx_unavailable_tm(struct pt_regs *regs) "MSR=%lx\n", regs->nip, regs->msr); + current->thread.used_vsr = 1; + + /* If FP and VMX are already loaded, we have all the state we need */ + if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) { + regs->msr |= MSR_VSX; + return; + } + /* This reclaims FP and/or VR regs if they're already enabled */ tm_reclaim_current(TM_CAUSE_FAC_UNAV); regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode | MSR_VSX; - /* This loads & recheckpoints FP and VRs. */ - tm_recheckpoint(¤t->thread, regs->msr); - current->thread.used_vsr = 1; + + /* This loads & recheckpoints FP and VRs; but we have + * to be sure not to overwrite previously-valid state. + */ + tm_recheckpoint(¤t->thread, regs->msr & ~orig_msr); + + if (orig_msr & MSR_FP) + do_load_up_transact_fpu(¤t->thread); + if (orig_msr & MSR_VEC) + do_load_up_transact_altivec(¤t->thread); } -#endif #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ void performance_monitor_exception(struct pt_regs *regs) -- cgit v1.2.3 From 7e4e7867b1e551b7b8f326da3604c47332972bc6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 15 Jan 2014 13:16:11 +0800 Subject: powerpc/eeh: Handle multiple EEH errors For one PCI error relevant OPAL event, we possibly have multiple EEH errors for that. For example, multiple frozen PEs detected on different PHBs. Unfortunately, we didn't cover the case. The patch enumarates the return value from eeh_ops::next_error() and change eeh_handle_special_event() and eeh_ops::next_error() to handle all existing EEH errors. As Ben pointed out, we needn't list_for_each_entry_safe() since we are not deleting any PHB from the hose_list and the EEH serialized lock should be held while purging EEH events. The patch covers those suggestions as well. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 10 ++ arch/powerpc/kernel/eeh_driver.c | 150 ++++++++++++++++-------------- arch/powerpc/platforms/powernv/eeh-ioda.c | 39 +++++--- 3 files changed, 112 insertions(+), 87 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 8b4b8e4a5c32..9e39ceb1d19f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -118,6 +118,16 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) return edev ? edev->pdev : NULL; } +/* Return values from eeh_ops::next_error */ +enum { + EEH_NEXT_ERR_NONE = 0, + EEH_NEXT_ERR_INF, + EEH_NEXT_ERR_FROZEN_PE, + EEH_NEXT_ERR_FENCED_PHB, + EEH_NEXT_ERR_DEAD_PHB, + EEH_NEXT_ERR_DEAD_IOC +}; + /* * The struct is used to trace the registered EEH operation * callback functions. Actually, those operation callback diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7db39203a073..baa3b06e6dab 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -630,84 +630,90 @@ static void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; struct pci_bus *bus; - struct pci_controller *hose, *tmp; + struct pci_controller *hose; unsigned long flags; - int rc = 0; + int rc; - /* - * The return value from next_error() has been classified as follows. - * It might be good to enumerate them. However, next_error() is only - * supported by PowerNV platform for now. So it would be fine to use - * integer directly: - * - * 4 - Dead IOC 3 - Dead PHB - * 2 - Fenced PHB 1 - Frozen PE - * 0 - No error found - * - */ - rc = eeh_ops->next_error(&pe); - if (rc <= 0) - return; - switch (rc) { - case 4: - /* Mark all PHBs in dead state */ - eeh_serialize_lock(&flags); - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe) continue; - - eeh_pe_state_mark(phb_pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + do { + rc = eeh_ops->next_error(&pe); + + switch (rc) { + case EEH_NEXT_ERR_DEAD_IOC: + /* Mark all PHBs in dead state */ + eeh_serialize_lock(&flags); + + /* Purge all events */ + eeh_remove_event(NULL); + + list_for_each_entry(hose, &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe) continue; + + eeh_pe_state_mark(phb_pe, + EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + } + + eeh_serialize_unlock(flags); + + break; + case EEH_NEXT_ERR_FROZEN_PE: + case EEH_NEXT_ERR_FENCED_PHB: + case EEH_NEXT_ERR_DEAD_PHB: + /* Mark the PE in fenced state */ + eeh_serialize_lock(&flags); + + /* Purge all events of the PHB */ + eeh_remove_event(pe); + + if (rc == EEH_NEXT_ERR_DEAD_PHB) + eeh_pe_state_mark(pe, + EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + else + eeh_pe_state_mark(pe, + EEH_PE_ISOLATED | EEH_PE_RECOVERING); + + eeh_serialize_unlock(flags); + + break; + case EEH_NEXT_ERR_NONE: + return; + default: + pr_warn("%s: Invalid value %d from next_error()\n", + __func__, rc); + return; } - eeh_serialize_unlock(flags); - - /* Purge all events */ - eeh_remove_event(NULL); - break; - case 3: - case 2: - case 1: - /* Mark the PE in fenced state */ - eeh_serialize_lock(&flags); - if (rc == 3) - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); - else - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_RECOVERING); - eeh_serialize_unlock(flags); - - /* Purge all events of the PHB */ - eeh_remove_event(pe); - break; - default: - pr_err("%s: Invalid value %d from next_error()\n", - __func__, rc); - return; - } - /* - * For fenced PHB and frozen PE, it's handled as normal - * event. We have to remove the affected PHBs for dead - * PHB and IOC - */ - if (rc == 2 || rc == 1) - eeh_handle_normal_event(pe); - else { - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD)) - continue; - - bus = eeh_pe_bus_get(phb_pe); - /* Notify all devices that they're about to go down. */ - eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - pcibios_remove_pci_devices(bus); + /* + * For fenced PHB and frozen PE, it's handled as normal + * event. We have to remove the affected PHBs for dead + * PHB and IOC + */ + if (rc == EEH_NEXT_ERR_FROZEN_PE || + rc == EEH_NEXT_ERR_FENCED_PHB) { + eeh_handle_normal_event(pe); + } else { + list_for_each_entry(hose, &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe || + !(phb_pe->state & EEH_PE_PHB_DEAD)) + continue; + + /* Notify all devices to be down */ + bus = eeh_pe_bus_get(phb_pe); + eeh_pe_dev_traverse(pe, + eeh_report_failure, NULL); + pcibios_remove_pci_devices(bus); + } } - } + + /* + * If we have detected dead IOC, we needn't proceed + * any more since all PHBs would have been removed + */ + if (rc == EEH_NEXT_ERR_DEAD_IOC) + break; + } while (rc != EEH_NEXT_ERR_NONE); } /** diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 007ac4989841..8dd16f4675a2 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -735,12 +735,12 @@ static int ioda_eeh_get_pe(struct pci_controller *hose, */ static int ioda_eeh_next_error(struct eeh_pe **pe) { - struct pci_controller *hose, *tmp; + struct pci_controller *hose; struct pnv_phb *phb; u64 frozen_pe_no; u16 err_type, severity; long rc; - int ret = 1; + int ret = EEH_NEXT_ERR_NONE; /* * While running here, it's safe to purge the event queue. @@ -750,7 +750,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) eeh_remove_event(NULL); opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + list_for_each_entry(hose, &hose_list, list_node) { /* * If the subordinate PCI buses of the PHB has been * removed, we needn't take care of it any more. @@ -789,19 +789,19 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) switch (err_type) { case OPAL_EEH_IOC_ERROR: if (severity == OPAL_EEH_SEV_IOC_DEAD) { - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { + list_for_each_entry(hose, &hose_list, + list_node) { phb = hose->private_data; phb->eeh_state |= PNV_EEH_STATE_REMOVED; } pr_err("EEH: dead IOC detected\n"); - ret = 4; - goto out; + ret = EEH_NEXT_ERR_DEAD_IOC; } else if (severity == OPAL_EEH_SEV_INF) { pr_info("EEH: IOC informative error " "detected\n"); ioda_eeh_hub_diag(hose); + ret = EEH_NEXT_ERR_NONE; } break; @@ -813,21 +813,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) pr_err("EEH: dead PHB#%x detected\n", hose->global_number); phb->eeh_state |= PNV_EEH_STATE_REMOVED; - ret = 3; - goto out; + ret = EEH_NEXT_ERR_DEAD_PHB; } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { if (ioda_eeh_get_phb_pe(hose, pe)) break; pr_err("EEH: fenced PHB#%x detected\n", hose->global_number); - ret = 2; - goto out; + ret = EEH_NEXT_ERR_FENCED_PHB; } else if (severity == OPAL_EEH_SEV_INF) { pr_info("EEH: PHB#%x informative error " "detected\n", hose->global_number); ioda_eeh_phb_diag(hose); + ret = EEH_NEXT_ERR_NONE; } break; @@ -837,13 +836,23 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", (*pe)->addr, (*pe)->phb->global_number); - ret = 1; - goto out; + ret = EEH_NEXT_ERR_FROZEN_PE; + break; + default: + pr_warn("%s: Unexpected error type %d\n", + __func__, err_type); } + + /* + * If we have no errors on the specific PHB or only + * informative error there, we continue poking it. + * Otherwise, we need actions to be taken by upper + * layer. + */ + if (ret > EEH_NEXT_ERR_INF) + break; } - ret = 0; -out: return ret; } -- cgit v1.2.3 From 1c2042c83aa7af10579b71a1fe5e22bbae69f08c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Jan 2014 14:33:20 +0100 Subject: powerpc/eeh: Use global PCI rescan-remove locking Race conditions are theoretically possible between the PCI device addition and removal in the PPC64 PCI error recovery driver and the generic PCI bus rescan and device removal that can be triggered via sysfs. To avoid those race conditions make PPC64 PCI error recovery driver use global PCI rescan-remove locking around PCI device addition and removal. Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas --- arch/powerpc/kernel/eeh_driver.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 36bed5a12750..c17f90d0f73c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -369,7 +369,9 @@ static void *eeh_rmv_device(void *data, void *userdata) edev->mode |= EEH_DEV_DISCONNECTED; (*removed)++; + pci_lock_rescan_remove(); pci_stop_and_remove_bus_device(dev); + pci_unlock_rescan_remove(); return NULL; } @@ -416,10 +418,13 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * into pcibios_add_pci_devices(). */ eeh_pe_state_mark(pe, EEH_PE_KEEP); - if (bus) + if (bus) { + pci_lock_rescan_remove(); pcibios_remove_pci_devices(bus); - else if (frozen_bus) + pci_unlock_rescan_remove(); + } else if (frozen_bus) { eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); + } /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system @@ -429,6 +434,8 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) if (rc) return rc; + pci_lock_rescan_remove(); + /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -462,6 +469,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) pe->tstamp = tstamp; pe->freeze_count = cnt; + pci_unlock_rescan_remove(); return 0; } @@ -618,8 +626,11 @@ perm_error: eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); /* Shut down the device drivers for good. */ - if (frozen_bus) + if (frozen_bus) { + pci_lock_rescan_remove(); pcibios_remove_pci_devices(frozen_bus); + pci_unlock_rescan_remove(); + } } static void eeh_handle_special_event(void) @@ -692,6 +703,7 @@ static void eeh_handle_special_event(void) if (rc == 2 || rc == 1) eeh_handle_normal_event(pe); else { + pci_lock_rescan_remove(); list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); @@ -703,6 +715,7 @@ static void eeh_handle_special_event(void) eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); pcibios_remove_pci_devices(bus); } + pci_unlock_rescan_remove(); } } -- cgit v1.2.3 From e0b7ec058c0eb7ba8d5d937d81de2bd16db6970e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Jan 2014 21:25:20 +1100 Subject: KVM: PPC: Book3S HV: Align physical and virtual CPU thread numbers On a threaded processor such as POWER7, we group VCPUs into virtual cores and arrange that the VCPUs in a virtual core run on the same physical core. Currently we don't enforce any correspondence between virtual thread numbers within a virtual core and physical thread numbers. Physical threads are allocated starting at 0 on a first-come first-served basis to runnable virtual threads (VCPUs). POWER8 implements a new "msgsndp" instruction which guest kernels can use to interrupt other threads in the same core or sub-core. Since the instruction takes the destination physical thread ID as a parameter, it becomes necessary to align the physical thread IDs with the virtual thread IDs, that is, to make sure virtual thread N within a virtual core always runs on physical thread N. This means that it's possible that thread 0, which is where we call __kvmppc_vcore_entry, may end up running some other vcpu than the one whose task called kvmppc_run_core(), or it may end up running no vcpu at all, if for example thread 0 of the virtual core is currently executing in userspace. However, we do need thread 0 to be responsible for switching the MMU -- a previous version of this patch that had other threads switching the MMU was found to be responsible for occasional memory corruption and machine check interrupts in the guest on POWER7 machines. To accommodate this, we no longer pass the vcpu pointer to __kvmppc_vcore_entry, but instead let the assembly code load it from the PACA. Since the assembly code will need to know the kvm pointer and the thread ID for threads which don't have a vcpu, we move the thread ID into the PACA and we add a kvm pointer to the virtual core structure. In the case where thread 0 has no vcpu to run, it still calls into kvmppc_hv_entry in order to do the MMU switch, and then naps until either its vcpu is ready to run in the guest, or some other thread needs to exit the guest. In the latter case, thread 0 jumps to the code that switches the MMU back to the host. This control flow means that now we switch the MMU before loading any guest vcpu state. Similarly, on guest exit we now save all the guest vcpu state before switching the MMU back to the host. This has required substantial code movement, making the diff rather large. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s_asm.h | 1 + arch/powerpc/include/asm/kvm_host.h | 2 + arch/powerpc/kernel/asm-offsets.c | 3 +- arch/powerpc/kvm/book3s_hv.c | 46 +- arch/powerpc/kvm/book3s_hv_interrupts.S | 6 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 676 +++++++++++++++++------------- 6 files changed, 397 insertions(+), 337 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 0bd9348a4db9..490b34f5d6bf 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -87,6 +87,7 @@ struct kvmppc_host_state { u8 hwthread_req; u8 hwthread_state; u8 host_ipi; + u8 ptid; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 2c2ca5faf7f2..b850544dbc3f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -288,6 +288,7 @@ struct kvmppc_vcore { int n_woken; int nap_count; int napping_threads; + int first_vcpuid; u16 pcpu; u16 last_cpu; u8 vcore_state; @@ -298,6 +299,7 @@ struct kvmppc_vcore { u64 stolen_tb; u64 preempt_tb; struct kvm_vcpu *runner; + struct kvm *kvm; u64 tb_offset; /* guest timebase - host timebase */ ulong lpcr; u32 arch_compat; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 5e64c3d2149f..332ae66883e4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -506,7 +506,6 @@ int main(void) DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); - DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); @@ -514,6 +513,7 @@ int main(void) DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); + DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm)); DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); @@ -583,6 +583,7 @@ int main(void) HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); + HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7e1813ceabc1..7da53cd215db 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -990,6 +990,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, init_waitqueue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; + vcore->first_vcpuid = core * threads_per_core; + vcore->kvm = kvm; } kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; @@ -1003,6 +1005,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, ++vcore->num_threads; spin_unlock(&vcore->lock); vcpu->arch.vcore = vcore; + vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); @@ -1066,7 +1069,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) } } -extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +extern void __kvmppc_vcore_entry(void); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) @@ -1140,15 +1143,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) tpaca = &paca[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; tpaca->kvm_hstate.kvm_vcore = vc; - tpaca->kvm_hstate.napping = 0; + tpaca->kvm_hstate.ptid = vcpu->arch.ptid; vcpu->cpu = vc->pcpu; smp_wmb(); #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) - if (vcpu->arch.ptid) { + if (cpu != smp_processor_id()) { #ifdef CONFIG_KVM_XICS xics_wake_cpu(cpu); #endif - ++vc->n_woken; + if (vcpu->arch.ptid) + ++vc->n_woken; } #endif } @@ -1205,10 +1209,10 @@ static int on_primary_thread(void) */ static void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vcpu0, *vnext; + struct kvm_vcpu *vcpu, *vnext; long ret; u64 now; - int ptid, i, need_vpa_update; + int i, need_vpa_update; int srcu_idx; struct kvm_vcpu *vcpus_to_update[threads_per_core]; @@ -1245,25 +1249,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) spin_lock(&vc->lock); } - /* - * Assign physical thread IDs, first to non-ceded vcpus - * and then to ceded ones. - */ - ptid = 0; - vcpu0 = NULL; - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { - if (!vcpu->arch.ceded) { - if (!ptid) - vcpu0 = vcpu; - vcpu->arch.ptid = ptid++; - } - } - if (!vcpu0) - goto out; /* nothing to run; should never happen */ - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) - if (vcpu->arch.ceded) - vcpu->arch.ptid = ptid++; - /* * Make sure we are running on thread 0, and that * secondary threads are offline. @@ -1280,15 +1265,19 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_create_dtl_entry(vcpu, vc); } + /* Set this explicitly in case thread 0 doesn't have a vcpu */ + get_paca()->kvm_hstate.kvm_vcore = vc; + get_paca()->kvm_hstate.ptid = 0; + vc->vcore_state = VCORE_RUNNING; preempt_disable(); spin_unlock(&vc->lock); kvm_guest_enter(); - srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu); + srcu_idx = srcu_read_lock(&vc->kvm->srcu); - __kvmppc_vcore_entry(NULL, vcpu0); + __kvmppc_vcore_entry(); spin_lock(&vc->lock); /* disable sending of IPIs on virtual external irqs */ @@ -1303,7 +1292,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_EXITING; spin_unlock(&vc->lock); - srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx); + srcu_read_unlock(&vc->kvm->srcu, srcu_idx); /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); @@ -1411,7 +1400,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (!signal_pending(current)) { if (vc->vcore_state == VCORE_RUNNING && VCORE_EXIT_COUNT(vc) == 0) { - vcpu->arch.ptid = vc->n_runnable - 1; kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 00b7ed41ea17..e873796b1a29 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -35,7 +35,7 @@ ****************************************************************************/ /* Registers: - * r4: vcpu pointer + * none */ _GLOBAL(__kvmppc_vcore_entry) @@ -71,7 +71,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtmsrd r10,1 /* Save host PMU registers */ - /* R4 is live here (vcpu pointer) but not r3 or r5 */ li r3, 1 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r7, SPRN_MMCR0 /* save MMCR0 */ @@ -136,16 +135,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) * enters the guest with interrupts enabled. */ BEGIN_FTR_SECTION + ld r4, HSTATE_KVM_VCPU(r13) ld r0, VCPU_PENDING_EXC(r4) li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL) oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h and. r0, r0, r7 beq 32f - mr r31, r4 lhz r3, PACAPACAINDEX(r13) bl smp_send_reschedule nop - mr r4, r31 32: END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 66db71c9156a..8bbe91bdb6da 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -33,6 +33,10 @@ #error Need to fix lppaca and SLB shadow accesses in little endian mode #endif +/* Values in HSTATE_NAPPING(r13) */ +#define NAPPING_CEDE 1 +#define NAPPING_NOVCPU 2 + /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -57,6 +61,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline) RFI kvmppc_call_hv_entry: + ld r4, HSTATE_KVM_VCPU(r13) bl kvmppc_hv_entry /* Back from guest - restore host state and return to caller */ @@ -73,15 +78,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r3,PACA_SPRG3(r13) mtspr SPRN_SPRG3,r3 - /* - * Reload DEC. HDEC interrupts were disabled when - * we reloaded the host's LPCR value. - */ - ld r3, HSTATE_DECEXP(r13) - mftb r4 - subf r4, r4, r3 - mtspr SPRN_DEC, r4 - /* Reload the host's PMU registers */ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ lbz r4, LPPACA_PMCINUSE(r3) @@ -116,6 +112,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) isync 23: + /* + * Reload DEC. HDEC interrupts were disabled when + * we reloaded the host's LPCR value. + */ + ld r3, HSTATE_DECEXP(r13) + mftb r4 + subf r4, r4, r3 + mtspr SPRN_DEC, r4 + /* * For external and machine check interrupts, we need * to call the Linux handler to process the interrupt. @@ -156,15 +161,82 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 13: b machine_check_fwnmi +kvmppc_primary_no_guest: + /* We handle this much like a ceded vcpu */ + /* set our bit in napping_threads */ + ld r5, HSTATE_KVM_VCORE(r13) + lbz r7, HSTATE_PTID(r13) + li r0, 1 + sld r0, r0, r7 + addi r6, r5, VCORE_NAPPING_THREADS +1: lwarx r3, 0, r6 + or r3, r3, r0 + stwcx. r3, 0, r6 + bne 1b + /* order napping_threads update vs testing entry_exit_count */ + isync + li r12, 0 + lwz r7, VCORE_ENTRY_EXIT(r5) + cmpwi r7, 0x100 + bge kvm_novcpu_exit /* another thread already exiting */ + li r3, NAPPING_NOVCPU + stb r3, HSTATE_NAPPING(r13) + li r3, 1 + stb r3, HSTATE_HWTHREAD_REQ(r13) + + b kvm_do_nap + +kvm_novcpu_wakeup: + ld r1, HSTATE_HOST_R1(r13) + ld r5, HSTATE_KVM_VCORE(r13) + li r0, 0 + stb r0, HSTATE_NAPPING(r13) + stb r0, HSTATE_HWTHREAD_REQ(r13) + + /* see if any other thread is already exiting */ + li r12, 0 + lwz r0, VCORE_ENTRY_EXIT(r5) + cmpwi r0, 0x100 + bge kvm_novcpu_exit + + /* clear our bit in napping_threads */ + lbz r7, HSTATE_PTID(r13) + li r0, 1 + sld r0, r0, r7 + addi r6, r5, VCORE_NAPPING_THREADS +4: lwarx r3, 0, r6 + andc r3, r3, r0 + stwcx. r3, 0, r6 + bne 4b + + /* Check the wake reason in SRR1 to see why we got here */ + mfspr r3, SPRN_SRR1 + rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */ + cmpwi r3, 4 /* was it an external interrupt? */ + bne kvm_novcpu_exit /* if not, exit the guest */ + + /* extern interrupt - read and handle it */ + li r12, BOOK3S_INTERRUPT_EXTERNAL + bl kvmppc_read_intr + cmpdi r3, 0 + bge kvm_novcpu_exit + li r12, 0 + + /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ + ld r4, HSTATE_KVM_VCPU(r13) + cmpdi r4, 0 + bne kvmppc_got_guest + +kvm_novcpu_exit: + b hdec_soon + /* - * We come in here when wakened from nap mode on a secondary hw thread. + * We come in here when wakened from nap mode. * Relocation is off and most register values are lost. * r13 points to the PACA. */ .globl kvm_start_guest kvm_start_guest: - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD ld r2,PACATOC(r13) li r0,KVM_HWTHREAD_IN_KVM @@ -176,8 +248,13 @@ kvm_start_guest: /* were we napping due to cede? */ lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,0 - bne kvm_end_cede + cmpwi r0,NAPPING_CEDE + beq kvm_end_cede + cmpwi r0,NAPPING_NOVCPU + beq kvm_novcpu_wakeup + + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD /* * We weren't napping due to cede, so this must be a secondary @@ -220,7 +297,13 @@ kvm_start_guest: stw r8,HSTATE_SAVED_XIRR(r13) b kvm_no_guest -30: bl kvmppc_hv_entry +30: + /* Set HSTATE_DSCR(r13) to something sensible */ + LOAD_REG_ADDR(r6, dscr_default) + ld r6, 0(r6) + std r6, HSTATE_DSCR(r13) + + bl kvmppc_hv_entry /* Back from the guest, go back to nap */ /* Clear our vcpu pointer so we don't come back in early */ @@ -252,6 +335,7 @@ kvm_start_guest: kvm_no_guest: li r0, KVM_HWTHREAD_IN_NAP stb r0, HSTATE_HWTHREAD_STATE(r13) +kvm_do_nap: li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 @@ -276,7 +360,7 @@ kvmppc_hv_entry: /* Required state: * - * R4 = vcpu pointer + * R4 = vcpu pointer (or NULL) * MSR = ~IR|DR * R13 = PACA * R1 = host R1 @@ -286,124 +370,12 @@ kvmppc_hv_entry: std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) -BEGIN_FTR_SECTION - /* Set partition DABR */ - /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ - li r5,3 - ld r6,VCPU_DABR(r4) - mtspr SPRN_DABRX,r5 - mtspr SPRN_DABR,r6 - BEGIN_FTR_SECTION_NESTED(89) - isync - END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) - - /* Load guest PMU registers */ - /* R4 is live here (vcpu pointer) */ - li r3, 1 - sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ - mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ - isync - lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */ - lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */ - lwz r6, VCPU_PMC + 8(r4) - lwz r7, VCPU_PMC + 12(r4) - lwz r8, VCPU_PMC + 16(r4) - lwz r9, VCPU_PMC + 20(r4) -BEGIN_FTR_SECTION - lwz r10, VCPU_PMC + 24(r4) - lwz r11, VCPU_PMC + 28(r4) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) - mtspr SPRN_PMC1, r3 - mtspr SPRN_PMC2, r5 - mtspr SPRN_PMC3, r6 - mtspr SPRN_PMC4, r7 - mtspr SPRN_PMC5, r8 - mtspr SPRN_PMC6, r9 -BEGIN_FTR_SECTION - mtspr SPRN_PMC7, r10 - mtspr SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) - ld r3, VCPU_MMCR(r4) - ld r5, VCPU_MMCR + 8(r4) - ld r6, VCPU_MMCR + 16(r4) - ld r7, VCPU_SIAR(r4) - ld r8, VCPU_SDAR(r4) - mtspr SPRN_MMCR1, r5 - mtspr SPRN_MMCRA, r6 - mtspr SPRN_SIAR, r7 - mtspr SPRN_SDAR, r8 - mtspr SPRN_MMCR0, r3 - isync - - /* Load up FP, VMX and VSX registers */ - bl kvmppc_load_fp - - ld r14, VCPU_GPR(R14)(r4) - ld r15, VCPU_GPR(R15)(r4) - ld r16, VCPU_GPR(R16)(r4) - ld r17, VCPU_GPR(R17)(r4) - ld r18, VCPU_GPR(R18)(r4) - ld r19, VCPU_GPR(R19)(r4) - ld r20, VCPU_GPR(R20)(r4) - ld r21, VCPU_GPR(R21)(r4) - ld r22, VCPU_GPR(R22)(r4) - ld r23, VCPU_GPR(R23)(r4) - ld r24, VCPU_GPR(R24)(r4) - ld r25, VCPU_GPR(R25)(r4) - ld r26, VCPU_GPR(R26)(r4) - ld r27, VCPU_GPR(R27)(r4) - ld r28, VCPU_GPR(R28)(r4) - ld r29, VCPU_GPR(R29)(r4) - ld r30, VCPU_GPR(R30)(r4) - ld r31, VCPU_GPR(R31)(r4) - -BEGIN_FTR_SECTION - /* Switch DSCR to guest value */ - ld r5, VCPU_DSCR(r4) - mtspr SPRN_DSCR, r5 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - - /* - * Set the decrementer to the guest decrementer. - */ - ld r8,VCPU_DEC_EXPIRES(r4) - mftb r7 - subf r3,r7,r8 - mtspr SPRN_DEC,r3 - stw r3,VCPU_DEC(r4) - - ld r5, VCPU_SPRG0(r4) - ld r6, VCPU_SPRG1(r4) - ld r7, VCPU_SPRG2(r4) - ld r8, VCPU_SPRG3(r4) - mtspr SPRN_SPRG0, r5 - mtspr SPRN_SPRG1, r6 - mtspr SPRN_SPRG2, r7 - mtspr SPRN_SPRG3, r8 - /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) - /* Load up DAR and DSISR */ - ld r5, VCPU_DAR(r4) - lwz r6, VCPU_DSISR(r4) - mtspr SPRN_DAR, r5 - mtspr SPRN_DSISR, r6 - li r6, KVM_GUEST_MODE_HOST_HV stb r6, HSTATE_IN_GUEST(r13) -BEGIN_FTR_SECTION - /* Restore AMR and UAMOR, set AMOR to all 1s */ - ld r5,VCPU_AMR(r4) - ld r6,VCPU_UAMOR(r4) - li r7,-1 - mtspr SPRN_AMR,r5 - mtspr SPRN_UAMOR,r6 - mtspr SPRN_AMOR,r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - /* Clear out SLB */ li r6,0 slbmte r6,r6 @@ -429,8 +401,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) bne 21b /* Primary thread switches to guest partition. */ - ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ - lwz r6,VCPU_PTID(r4) + ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ + lbz r6,HSTATE_PTID(r13) cmpwi r6,0 bne 20f ld r6,KVM_SDR1(r9) @@ -504,32 +476,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_RMOR,r8 isync - /* Increment yield count if they have a VPA */ - ld r3, VCPU_VPA(r4) - cmpdi r3, 0 - beq 25f - lwz r5, LPPACA_YIELDCOUNT(r3) - addi r5, r5, 1 - stw r5, LPPACA_YIELDCOUNT(r3) - li r6, 1 - stb r6, VCPU_VPA_DIRTY(r4) -25: /* Check if HDEC expires soon */ mfspr r3,SPRN_HDEC - cmpwi r3,10 + cmpwi r3,512 /* 1 microsecond */ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER - mr r9,r4 blt hdec_soon - - /* Save purr/spurr */ - mfspr r5,SPRN_PURR - mfspr r6,SPRN_SPURR - std r5,HSTATE_PURR(r13) - std r6,HSTATE_SPURR(r13) - ld r7,VCPU_PURR(r4) - ld r8,VCPU_SPURR(r4) - mtspr SPRN_PURR,r7 - mtspr SPRN_SPURR,r8 b 31f /* @@ -540,7 +491,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) * We also have to invalidate the TLB since its * entries aren't tagged with the LPID. */ -30: ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ +30: ld r5,HSTATE_KVM_VCORE(r13) + ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ /* first take native_tlbie_lock */ .section ".toc","aw" @@ -605,7 +557,6 @@ toc_tlbie_lock: mfspr r3,SPRN_HDEC cmpwi r3,10 li r12,BOOK3S_INTERRUPT_HV_DECREMENTER - mr r9,r4 blt hdec_soon /* Enable HDEC interrupts */ @@ -620,9 +571,14 @@ toc_tlbie_lock: mfspr r0,SPRN_HID0 mfspr r0,SPRN_HID0 mfspr r0,SPRN_HID0 +31: + /* Do we have a guest vcpu to run? */ + cmpdi r4, 0 + beq kvmppc_primary_no_guest +kvmppc_got_guest: /* Load up guest SLB entries */ -31: lwz r5,VCPU_SLB_MAX(r4) + lwz r5,VCPU_SLB_MAX(r4) cmpwi r5,0 beq 9f mtctr r5 @@ -633,6 +589,140 @@ toc_tlbie_lock: addi r6,r6,VCPU_SLB_SIZE bdnz 1b 9: + /* Increment yield count if they have a VPA */ + ld r3, VCPU_VPA(r4) + cmpdi r3, 0 + beq 25f + lwz r5, LPPACA_YIELDCOUNT(r3) + addi r5, r5, 1 + stw r5, LPPACA_YIELDCOUNT(r3) + li r6, 1 + stb r6, VCPU_VPA_DIRTY(r4) +25: + +BEGIN_FTR_SECTION + /* Save purr/spurr */ + mfspr r5,SPRN_PURR + mfspr r6,SPRN_SPURR + std r5,HSTATE_PURR(r13) + std r6,HSTATE_SPURR(r13) + ld r7,VCPU_PURR(r4) + ld r8,VCPU_SPURR(r4) + mtspr SPRN_PURR,r7 + mtspr SPRN_SPURR,r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + +BEGIN_FTR_SECTION + /* Set partition DABR */ + /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ + li r5,3 + ld r6,VCPU_DABR(r4) + mtspr SPRN_DABRX,r5 + mtspr SPRN_DABR,r6 + BEGIN_FTR_SECTION_NESTED(89) + isync + END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + + /* Load guest PMU registers */ + /* R4 is live here (vcpu pointer) */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + isync + lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */ + lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */ + lwz r6, VCPU_PMC + 8(r4) + lwz r7, VCPU_PMC + 12(r4) + lwz r8, VCPU_PMC + 16(r4) + lwz r9, VCPU_PMC + 20(r4) +BEGIN_FTR_SECTION + lwz r10, VCPU_PMC + 24(r4) + lwz r11, VCPU_PMC + 28(r4) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + mtspr SPRN_PMC1, r3 + mtspr SPRN_PMC2, r5 + mtspr SPRN_PMC3, r6 + mtspr SPRN_PMC4, r7 + mtspr SPRN_PMC5, r8 + mtspr SPRN_PMC6, r9 +BEGIN_FTR_SECTION + mtspr SPRN_PMC7, r10 + mtspr SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + ld r3, VCPU_MMCR(r4) + ld r5, VCPU_MMCR + 8(r4) + ld r6, VCPU_MMCR + 16(r4) + ld r7, VCPU_SIAR(r4) + ld r8, VCPU_SDAR(r4) + mtspr SPRN_MMCR1, r5 + mtspr SPRN_MMCRA, r6 + mtspr SPRN_SIAR, r7 + mtspr SPRN_SDAR, r8 + mtspr SPRN_MMCR0, r3 + isync + + /* Load up FP, VMX and VSX registers */ + bl kvmppc_load_fp + + ld r14, VCPU_GPR(R14)(r4) + ld r15, VCPU_GPR(R15)(r4) + ld r16, VCPU_GPR(R16)(r4) + ld r17, VCPU_GPR(R17)(r4) + ld r18, VCPU_GPR(R18)(r4) + ld r19, VCPU_GPR(R19)(r4) + ld r20, VCPU_GPR(R20)(r4) + ld r21, VCPU_GPR(R21)(r4) + ld r22, VCPU_GPR(R22)(r4) + ld r23, VCPU_GPR(R23)(r4) + ld r24, VCPU_GPR(R24)(r4) + ld r25, VCPU_GPR(R25)(r4) + ld r26, VCPU_GPR(R26)(r4) + ld r27, VCPU_GPR(R27)(r4) + ld r28, VCPU_GPR(R28)(r4) + ld r29, VCPU_GPR(R29)(r4) + ld r30, VCPU_GPR(R30)(r4) + ld r31, VCPU_GPR(R31)(r4) + +BEGIN_FTR_SECTION + /* Switch DSCR to guest value */ + ld r5, VCPU_DSCR(r4) + mtspr SPRN_DSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* + * Set the decrementer to the guest decrementer. + */ + ld r8,VCPU_DEC_EXPIRES(r4) + mftb r7 + subf r3,r7,r8 + mtspr SPRN_DEC,r3 + stw r3,VCPU_DEC(r4) + + ld r5, VCPU_SPRG0(r4) + ld r6, VCPU_SPRG1(r4) + ld r7, VCPU_SPRG2(r4) + ld r8, VCPU_SPRG3(r4) + mtspr SPRN_SPRG0, r5 + mtspr SPRN_SPRG1, r6 + mtspr SPRN_SPRG2, r7 + mtspr SPRN_SPRG3, r8 + + /* Load up DAR and DSISR */ + ld r5, VCPU_DAR(r4) + lwz r6, VCPU_DSISR(r4) + mtspr SPRN_DAR, r5 + mtspr SPRN_DSISR, r6 + +BEGIN_FTR_SECTION + /* Restore AMR and UAMOR, set AMOR to all 1s */ + ld r5,VCPU_AMR(r4) + ld r6,VCPU_UAMOR(r4) + li r7,-1 + mtspr SPRN_AMR,r5 + mtspr SPRN_UAMOR,r6 + mtspr SPRN_AMOR,r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Restore state of CTRL run bit; assume 1 on entry */ lwz r5,VCPU_CTRL(r4) @@ -984,13 +1074,130 @@ BEGIN_FTR_SECTION mtspr SPRN_SPURR,r4 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) + /* Save DEC */ + mfspr r5,SPRN_DEC + mftb r6 + extsw r5,r5 + add r5,r5,r6 + std r5,VCPU_DEC_EXPIRES(r9) + + /* Save and reset AMR and UAMOR before turning on the MMU */ +BEGIN_FTR_SECTION + mfspr r5,SPRN_AMR + mfspr r6,SPRN_UAMOR + std r5,VCPU_AMR(r9) + std r6,VCPU_UAMOR(r9) + li r6,0 + mtspr SPRN_AMR,r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* Switch DSCR back to host value */ +BEGIN_FTR_SECTION + mfspr r8, SPRN_DSCR + ld r7, HSTATE_DSCR(r13) + std r8, VCPU_DSCR(r9) + mtspr SPRN_DSCR, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* Save non-volatile GPRs */ + std r14, VCPU_GPR(R14)(r9) + std r15, VCPU_GPR(R15)(r9) + std r16, VCPU_GPR(R16)(r9) + std r17, VCPU_GPR(R17)(r9) + std r18, VCPU_GPR(R18)(r9) + std r19, VCPU_GPR(R19)(r9) + std r20, VCPU_GPR(R20)(r9) + std r21, VCPU_GPR(R21)(r9) + std r22, VCPU_GPR(R22)(r9) + std r23, VCPU_GPR(R23)(r9) + std r24, VCPU_GPR(R24)(r9) + std r25, VCPU_GPR(R25)(r9) + std r26, VCPU_GPR(R26)(r9) + std r27, VCPU_GPR(R27)(r9) + std r28, VCPU_GPR(R28)(r9) + std r29, VCPU_GPR(R29)(r9) + std r30, VCPU_GPR(R30)(r9) + std r31, VCPU_GPR(R31)(r9) + + /* Save SPRGs */ + mfspr r3, SPRN_SPRG0 + mfspr r4, SPRN_SPRG1 + mfspr r5, SPRN_SPRG2 + mfspr r6, SPRN_SPRG3 + std r3, VCPU_SPRG0(r9) + std r4, VCPU_SPRG1(r9) + std r5, VCPU_SPRG2(r9) + std r6, VCPU_SPRG3(r9) + + /* save FP state */ + mr r3, r9 + bl kvmppc_save_fp + + /* Increment yield count if they have a VPA */ + ld r8, VCPU_VPA(r9) /* do they have a VPA? */ + cmpdi r8, 0 + beq 25f + lwz r3, LPPACA_YIELDCOUNT(r8) + addi r3, r3, 1 + stw r3, LPPACA_YIELDCOUNT(r8) + li r3, 1 + stb r3, VCPU_VPA_DIRTY(r9) +25: + /* Save PMU registers if requested */ + /* r8 and cr0.eq are live here */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mfspr r4, SPRN_MMCR0 /* save MMCR0 */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + mfspr r6, SPRN_MMCRA +BEGIN_FTR_SECTION + /* On P7, clear MMCRA in order to disable SDAR updates */ + li r7, 0 + mtspr SPRN_MMCRA, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + isync + beq 21f /* if no VPA, save PMU stuff anyway */ + lbz r7, LPPACA_PMCINUSE(r8) + cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */ + bne 21f + std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ + b 22f +21: mfspr r5, SPRN_MMCR1 + mfspr r7, SPRN_SIAR + mfspr r8, SPRN_SDAR + std r4, VCPU_MMCR(r9) + std r5, VCPU_MMCR + 8(r9) + std r6, VCPU_MMCR + 16(r9) + std r7, VCPU_SIAR(r9) + std r8, VCPU_SDAR(r9) + mfspr r3, SPRN_PMC1 + mfspr r4, SPRN_PMC2 + mfspr r5, SPRN_PMC3 + mfspr r6, SPRN_PMC4 + mfspr r7, SPRN_PMC5 + mfspr r8, SPRN_PMC6 +BEGIN_FTR_SECTION + mfspr r10, SPRN_PMC7 + mfspr r11, SPRN_PMC8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + stw r3, VCPU_PMC(r9) + stw r4, VCPU_PMC + 4(r9) + stw r5, VCPU_PMC + 8(r9) + stw r6, VCPU_PMC + 12(r9) + stw r7, VCPU_PMC + 16(r9) + stw r8, VCPU_PMC + 20(r9) +BEGIN_FTR_SECTION + stw r10, VCPU_PMC + 24(r9) + stw r11, VCPU_PMC + 28(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +22: /* Clear out SLB */ li r5,0 slbmte r5,r5 slbia ptesync -hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */ +hdec_soon: /* r12 = trap, r13 = paca */ BEGIN_FTR_SECTION b 32f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) @@ -1024,8 +1231,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) */ cmpwi r3,0x100 /* Are we the first here? */ bge 43f - cmpwi r3,1 /* Are any other threads in the guest? */ - ble 43f cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER beq 40f li r0,0 @@ -1036,7 +1241,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) * doesn't wake CPUs up from nap. */ lwz r3,VCORE_NAPPING_THREADS(r5) - lwz r4,VCPU_PTID(r9) + lbz r4,HSTATE_PTID(r13) li r0,1 sld r0,r0,r4 andc. r3,r3,r0 /* no sense IPI'ing ourselves */ @@ -1053,10 +1258,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) addi r6,r6,PACA_SIZE bne 42b +secondary_too_late: /* Secondary threads wait for primary to do partition switch */ -43: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ - ld r5,HSTATE_KVM_VCORE(r13) - lwz r3,VCPU_PTID(r9) +43: ld r5,HSTATE_KVM_VCORE(r13) + ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ + lbz r3,HSTATE_PTID(r13) cmpwi r3,0 beq 15f HMT_LOW @@ -1121,7 +1327,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) * We have to lock against concurrent tlbies, and * we have to flush the whole TLB. */ -32: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ +32: ld r5,HSTATE_KVM_VCORE(r13) + ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ /* Take the guest's tlbie_lock */ #ifdef __BIG_ENDIAN__ @@ -1204,151 +1411,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1: addi r8,r8,16 .endr - /* Save DEC */ - mfspr r5,SPRN_DEC - mftb r6 - extsw r5,r5 - add r5,r5,r6 - std r5,VCPU_DEC_EXPIRES(r9) - - /* Save and reset AMR and UAMOR before turning on the MMU */ -BEGIN_FTR_SECTION - mfspr r5,SPRN_AMR - mfspr r6,SPRN_UAMOR - std r5,VCPU_AMR(r9) - std r6,VCPU_UAMOR(r9) - li r6,0 - mtspr SPRN_AMR,r6 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - /* Unset guest mode */ li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - /* Switch DSCR back to host value */ -BEGIN_FTR_SECTION - mfspr r8, SPRN_DSCR - ld r7, HSTATE_DSCR(r13) - std r8, VCPU_DSCR(r9) - mtspr SPRN_DSCR, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - - /* Save non-volatile GPRs */ - std r14, VCPU_GPR(R14)(r9) - std r15, VCPU_GPR(R15)(r9) - std r16, VCPU_GPR(R16)(r9) - std r17, VCPU_GPR(R17)(r9) - std r18, VCPU_GPR(R18)(r9) - std r19, VCPU_GPR(R19)(r9) - std r20, VCPU_GPR(R20)(r9) - std r21, VCPU_GPR(R21)(r9) - std r22, VCPU_GPR(R22)(r9) - std r23, VCPU_GPR(R23)(r9) - std r24, VCPU_GPR(R24)(r9) - std r25, VCPU_GPR(R25)(r9) - std r26, VCPU_GPR(R26)(r9) - std r27, VCPU_GPR(R27)(r9) - std r28, VCPU_GPR(R28)(r9) - std r29, VCPU_GPR(R29)(r9) - std r30, VCPU_GPR(R30)(r9) - std r31, VCPU_GPR(R31)(r9) - - /* Save SPRGs */ - mfspr r3, SPRN_SPRG0 - mfspr r4, SPRN_SPRG1 - mfspr r5, SPRN_SPRG2 - mfspr r6, SPRN_SPRG3 - std r3, VCPU_SPRG0(r9) - std r4, VCPU_SPRG1(r9) - std r5, VCPU_SPRG2(r9) - std r6, VCPU_SPRG3(r9) - - /* save FP state */ - mr r3, r9 - bl kvmppc_save_fp - - /* Increment yield count if they have a VPA */ - ld r8, VCPU_VPA(r9) /* do they have a VPA? */ - cmpdi r8, 0 - beq 25f - lwz r3, LPPACA_YIELDCOUNT(r8) - addi r3, r3, 1 - stw r3, LPPACA_YIELDCOUNT(r8) - li r3, 1 - stb r3, VCPU_VPA_DIRTY(r9) -25: - /* Save PMU registers if requested */ - /* r8 and cr0.eq are live here */ - li r3, 1 - sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ - mfspr r4, SPRN_MMCR0 /* save MMCR0 */ - mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ - mfspr r6, SPRN_MMCRA -BEGIN_FTR_SECTION - /* On P7, clear MMCRA in order to disable SDAR updates */ - li r7, 0 - mtspr SPRN_MMCRA, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - isync - beq 21f /* if no VPA, save PMU stuff anyway */ - lbz r7, LPPACA_PMCINUSE(r8) - cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */ - bne 21f - std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ - b 22f -21: mfspr r5, SPRN_MMCR1 - mfspr r7, SPRN_SIAR - mfspr r8, SPRN_SDAR - std r4, VCPU_MMCR(r9) - std r5, VCPU_MMCR + 8(r9) - std r6, VCPU_MMCR + 16(r9) - std r7, VCPU_SIAR(r9) - std r8, VCPU_SDAR(r9) - mfspr r3, SPRN_PMC1 - mfspr r4, SPRN_PMC2 - mfspr r5, SPRN_PMC3 - mfspr r6, SPRN_PMC4 - mfspr r7, SPRN_PMC5 - mfspr r8, SPRN_PMC6 -BEGIN_FTR_SECTION - mfspr r10, SPRN_PMC7 - mfspr r11, SPRN_PMC8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) - stw r3, VCPU_PMC(r9) - stw r4, VCPU_PMC + 4(r9) - stw r5, VCPU_PMC + 8(r9) - stw r6, VCPU_PMC + 12(r9) - stw r7, VCPU_PMC + 16(r9) - stw r8, VCPU_PMC + 20(r9) -BEGIN_FTR_SECTION - stw r10, VCPU_PMC + 24(r9) - stw r11, VCPU_PMC + 28(r9) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -22: ld r0, 112+PPC_LR_STKOFF(r1) addi r1, r1, 112 mtlr r0 blr -secondary_too_late: - ld r5,HSTATE_KVM_VCORE(r13) - HMT_LOW -13: lbz r3,VCORE_IN_GUEST(r5) - cmpwi r3,0 - bne 13b - HMT_MEDIUM - li r0, KVM_GUEST_MODE_NONE - stb r0, HSTATE_IN_GUEST(r13) - ld r11,PACA_SLBSHADOWPTR(r13) - - .rept SLB_NUM_BOLTED - ld r5,SLBSHADOW_SAVEAREA(r11) - ld r6,SLBSHADOW_SAVEAREA+8(r11) - andis. r7,r5,SLB_ESID_V@h - beq 1f - slbmte r6,r5 -1: addi r11,r11,16 - .endr - b 22b /* * Check whether an HDSI is an HPTE not found fault or something else. @@ -1649,7 +1719,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * up to the host. */ ld r5,HSTATE_KVM_VCORE(r13) - lwz r6,VCPU_PTID(r3) + lbz r6,HSTATE_PTID(r13) lwz r8,VCORE_ENTRY_EXIT(r5) clrldi r8,r8,56 li r0,1 @@ -1662,7 +1732,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) bge kvm_cede_exit stwcx. r4,0,r6 bne 31b - li r0,1 + li r0,NAPPING_CEDE stb r0,HSTATE_NAPPING(r13) /* order napping_threads update vs testing entry_exit_count */ lwsync @@ -1751,7 +1821,7 @@ kvm_end_cede: /* clear our bit in vcore->napping_threads */ 33: ld r5,HSTATE_KVM_VCORE(r13) - lwz r3,VCPU_PTID(r4) + lbz r3,HSTATE_PTID(r13) li r0,1 sld r0,r0,r3 addi r6,r5,VCORE_NAPPING_THREADS -- cgit v1.2.3 From b005255e12a311d2c87ea70a7c7b192b2187c22c Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 8 Jan 2014 21:25:21 +1100 Subject: KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs This adds fields to the struct kvm_vcpu_arch to store the new guest-accessible SPRs on POWER8, adds code to the get/set_one_reg functions to allow userspace to access this state, and adds code to the guest entry and exit to context-switch these SPRs between host and guest. Note that DPDES (Directed Privileged Doorbell Exception State) is shared between threads on a core; hence we store it in struct kvmppc_vcore and have the master thread save and restore it. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 25 +++++- arch/powerpc/include/asm/reg.h | 17 ++++ arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kernel/asm-offsets.c | 23 +++++ arch/powerpc/kvm/book3s_hv.c | 153 +++++++++++++++++++++++++++++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 145 ++++++++++++++++++++++++++++++ 6 files changed, 361 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index b850544dbc3f..81c92d1d7978 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -304,6 +304,7 @@ struct kvmppc_vcore { ulong lpcr; u32 arch_compat; ulong pcr; + ulong dpdes; /* doorbell state (POWER8) */ }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -448,6 +449,7 @@ struct kvm_vcpu_arch { ulong pc; ulong ctr; ulong lr; + ulong tar; ulong xer; u32 cr; @@ -457,13 +459,32 @@ struct kvm_vcpu_arch { ulong guest_owned_ext; ulong purr; ulong spurr; + ulong ic; + ulong vtb; ulong dscr; ulong amr; ulong uamor; + ulong iamr; u32 ctrl; ulong dabr; + ulong dawr; + ulong dawrx; + ulong ciabr; ulong cfar; ulong ppr; + ulong pspb; + ulong fscr; + ulong tfhar; + ulong tfiar; + ulong texasr; + ulong ebbhr; + ulong ebbrr; + ulong bescr; + ulong csigr; + ulong tacr; + ulong tcscr; + ulong acop; + ulong wort; ulong shadow_srr1; #endif u32 vrsave; /* also USPRG0 */ @@ -498,10 +519,12 @@ struct kvm_vcpu_arch { u32 ccr1; u32 dbsr; - u64 mmcr[3]; + u64 mmcr[5]; u32 pmc[8]; + u32 spmc[2]; u64 siar; u64 sdar; + u64 sier; #ifdef CONFIG_KVM_EXIT_TIMING struct mutex exit_timing_lock; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 5c45787d551e..2f41e6475648 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -223,6 +223,11 @@ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 #define SPRN_DAWR 0xB4 +#define SPRN_CIABR 0xBB +#define CIABR_PRIV 0x3 +#define CIABR_PRIV_USER 1 +#define CIABR_PRIV_SUPER 2 +#define CIABR_PRIV_HYPER 3 #define SPRN_DAWRX 0xBC #define DAWRX_USER (1UL << 0) #define DAWRX_KERNEL (1UL << 1) @@ -260,6 +265,8 @@ #define SPRN_HRMOR 0x139 /* Real mode offset register */ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ +#define SPRN_IC 0x350 /* Virtual Instruction Count */ +#define SPRN_VTB 0x351 /* Virtual Time Base */ /* HFSCR and FSCR bit numbers are the same */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */ @@ -368,6 +375,8 @@ #define DER_EBRKE 0x00000002 /* External Breakpoint Interrupt */ #define DER_DPIE 0x00000001 /* Dev. Port Nonmaskable Request */ #define SPRN_DMISS 0x3D0 /* Data TLB Miss Register */ +#define SPRN_DHDES 0x0B1 /* Directed Hyp. Doorbell Exc. State */ +#define SPRN_DPDES 0x0B0 /* Directed Priv. Doorbell Exc. State */ #define SPRN_EAR 0x11A /* External Address Register */ #define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ #define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ @@ -427,6 +436,7 @@ #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ #define SPRN_IABR2 0x3FA /* 83xx */ #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ +#define SPRN_IAMR 0x03D /* Instr. Authority Mask Reg */ #define SPRN_HID4 0x3F4 /* 970 HID4 */ #define HID4_LPES0 (1ul << (63-0)) /* LPAR env. sel. bit 0 */ #define HID4_RMLS2_SH (63 - 2) /* Real mode limit bottom 2 bits */ @@ -541,6 +551,7 @@ #define SPRN_PIR 0x3FF /* Processor Identification Register */ #endif #define SPRN_TIR 0x1BE /* Thread Identification Register */ +#define SPRN_PSPB 0x09F /* Problem State Priority Boost reg */ #define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */ #define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */ #define SPRN_PURR 0x135 /* Processor Utilization of Resources Reg */ @@ -682,6 +693,7 @@ #define SPRN_EBBHR 804 /* Event based branch handler register */ #define SPRN_EBBRR 805 /* Event based branch return register */ #define SPRN_BESCR 806 /* Branch event status and control register */ +#define SPRN_WORT 895 /* Workload optimization register - thread */ #define SPRN_PMC1 787 #define SPRN_PMC2 788 @@ -698,6 +710,11 @@ #define SIER_SIHV 0x1000000 /* Sampled MSR_HV */ #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ +#define SPRN_TACR 888 +#define SPRN_TCSCR 889 +#define SPRN_CSIGR 890 +#define SPRN_SPMC1 892 +#define SPRN_SPMC2 893 /* When EBB is enabled, some of MMCR0/MMCR2/SIER are user accessible */ #define MMCR0_USER_MASK (MMCR0_FC | MMCR0_PMXE | MMCR0_PMAO) diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 6836ec79a830..a586fb9b77bd 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -545,6 +545,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TCSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1) #define KVM_REG_PPC_PID (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2) #define KVM_REG_PPC_ACOP (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3) +#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb4) #define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) #define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 332ae66883e4..043900abbbb0 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -432,6 +432,7 @@ int main(void) DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar)); DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -484,11 +485,17 @@ int main(void) DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); + DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); + DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb)); DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); + DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr)); DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); + DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr)); + DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx)); + DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); @@ -497,8 +504,10 @@ int main(void) DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); + DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc)); DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar)); DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar)); + DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); @@ -508,6 +517,19 @@ int main(void) DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); + DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); + DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); + DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar)); + DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar)); + DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr)); + DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); + DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); + DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr)); + DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr)); + DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr)); + DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); + DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); + DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); @@ -517,6 +539,7 @@ int main(void) DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); + DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7da53cd215db..5b08ddf91d2d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -800,7 +800,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: *val = get_reg_val(id, vcpu->arch.uamor); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: i = id - KVM_REG_PPC_MMCR0; *val = get_reg_val(id, vcpu->arch.mmcr[i]); break; @@ -808,12 +808,85 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, i = id - KVM_REG_PPC_PMC1; *val = get_reg_val(id, vcpu->arch.pmc[i]); break; + case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: + i = id - KVM_REG_PPC_SPMC1; + *val = get_reg_val(id, vcpu->arch.spmc[i]); + break; case KVM_REG_PPC_SIAR: *val = get_reg_val(id, vcpu->arch.siar); break; case KVM_REG_PPC_SDAR: *val = get_reg_val(id, vcpu->arch.sdar); break; + case KVM_REG_PPC_SIER: + *val = get_reg_val(id, vcpu->arch.sier); + break; + case KVM_REG_PPC_IAMR: + *val = get_reg_val(id, vcpu->arch.iamr); + break; + case KVM_REG_PPC_TFHAR: + *val = get_reg_val(id, vcpu->arch.tfhar); + break; + case KVM_REG_PPC_TFIAR: + *val = get_reg_val(id, vcpu->arch.tfiar); + break; + case KVM_REG_PPC_TEXASR: + *val = get_reg_val(id, vcpu->arch.texasr); + break; + case KVM_REG_PPC_FSCR: + *val = get_reg_val(id, vcpu->arch.fscr); + break; + case KVM_REG_PPC_PSPB: + *val = get_reg_val(id, vcpu->arch.pspb); + break; + case KVM_REG_PPC_EBBHR: + *val = get_reg_val(id, vcpu->arch.ebbhr); + break; + case KVM_REG_PPC_EBBRR: + *val = get_reg_val(id, vcpu->arch.ebbrr); + break; + case KVM_REG_PPC_BESCR: + *val = get_reg_val(id, vcpu->arch.bescr); + break; + case KVM_REG_PPC_TAR: + *val = get_reg_val(id, vcpu->arch.tar); + break; + case KVM_REG_PPC_DPDES: + *val = get_reg_val(id, vcpu->arch.vcore->dpdes); + break; + case KVM_REG_PPC_DAWR: + *val = get_reg_val(id, vcpu->arch.dawr); + break; + case KVM_REG_PPC_DAWRX: + *val = get_reg_val(id, vcpu->arch.dawrx); + break; + case KVM_REG_PPC_CIABR: + *val = get_reg_val(id, vcpu->arch.ciabr); + break; + case KVM_REG_PPC_IC: + *val = get_reg_val(id, vcpu->arch.ic); + break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, vcpu->arch.vtb); + break; + case KVM_REG_PPC_CSIGR: + *val = get_reg_val(id, vcpu->arch.csigr); + break; + case KVM_REG_PPC_TACR: + *val = get_reg_val(id, vcpu->arch.tacr); + break; + case KVM_REG_PPC_TCSCR: + *val = get_reg_val(id, vcpu->arch.tcscr); + break; + case KVM_REG_PPC_PID: + *val = get_reg_val(id, vcpu->arch.pid); + break; + case KVM_REG_PPC_ACOP: + *val = get_reg_val(id, vcpu->arch.acop); + break; + case KVM_REG_PPC_WORT: + *val = get_reg_val(id, vcpu->arch.wort); + break; case KVM_REG_PPC_VPA_ADDR: spin_lock(&vcpu->arch.vpa_update_lock); *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); @@ -882,7 +955,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: vcpu->arch.uamor = set_reg_val(id, *val); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: i = id - KVM_REG_PPC_MMCR0; vcpu->arch.mmcr[i] = set_reg_val(id, *val); break; @@ -890,12 +963,88 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, i = id - KVM_REG_PPC_PMC1; vcpu->arch.pmc[i] = set_reg_val(id, *val); break; + case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: + i = id - KVM_REG_PPC_SPMC1; + vcpu->arch.spmc[i] = set_reg_val(id, *val); + break; case KVM_REG_PPC_SIAR: vcpu->arch.siar = set_reg_val(id, *val); break; case KVM_REG_PPC_SDAR: vcpu->arch.sdar = set_reg_val(id, *val); break; + case KVM_REG_PPC_SIER: + vcpu->arch.sier = set_reg_val(id, *val); + break; + case KVM_REG_PPC_IAMR: + vcpu->arch.iamr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TFHAR: + vcpu->arch.tfhar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TFIAR: + vcpu->arch.tfiar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TEXASR: + vcpu->arch.texasr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_FSCR: + vcpu->arch.fscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PSPB: + vcpu->arch.pspb = set_reg_val(id, *val); + break; + case KVM_REG_PPC_EBBHR: + vcpu->arch.ebbhr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_EBBRR: + vcpu->arch.ebbrr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_BESCR: + vcpu->arch.bescr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TAR: + vcpu->arch.tar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DPDES: + vcpu->arch.vcore->dpdes = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DAWR: + vcpu->arch.dawr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DAWRX: + vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP; + break; + case KVM_REG_PPC_CIABR: + vcpu->arch.ciabr = set_reg_val(id, *val); + /* Don't allow setting breakpoints in hypervisor code */ + if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) + vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */ + break; + case KVM_REG_PPC_IC: + vcpu->arch.ic = set_reg_val(id, *val); + break; + case KVM_REG_PPC_VTB: + vcpu->arch.vtb = set_reg_val(id, *val); + break; + case KVM_REG_PPC_CSIGR: + vcpu->arch.csigr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TACR: + vcpu->arch.tacr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TCSCR: + vcpu->arch.tcscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PID: + vcpu->arch.pid = set_reg_val(id, *val); + break; + case KVM_REG_PPC_ACOP: + vcpu->arch.acop = set_reg_val(id, *val); + break; + case KVM_REG_PPC_WORT: + vcpu->arch.wort = set_reg_val(id, *val); + break; case KVM_REG_PPC_VPA_ADDR: addr = set_reg_val(id, *val); r = -EINVAL; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 8bbe91bdb6da..691dd1ef555b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -460,6 +460,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) beq 38f mtspr SPRN_PCR, r7 38: + +BEGIN_FTR_SECTION + /* DPDES is shared between threads */ + ld r8, VCORE_DPDES(r5) + mtspr SPRN_DPDES, r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + li r0,1 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ b 10f @@ -659,6 +666,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_MMCRA, r6 mtspr SPRN_SIAR, r7 mtspr SPRN_SDAR, r8 +BEGIN_FTR_SECTION + ld r5, VCPU_MMCR + 24(r4) + ld r6, VCPU_SIER(r4) + lwz r7, VCPU_PMC + 24(r4) + lwz r8, VCPU_PMC + 28(r4) + ld r9, VCPU_MMCR + 32(r4) + mtspr SPRN_MMCR2, r5 + mtspr SPRN_SIER, r6 + mtspr SPRN_SPMC1, r7 + mtspr SPRN_SPMC2, r8 + mtspr SPRN_MMCRS, r9 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_MMCR0, r3 isync @@ -690,6 +709,61 @@ BEGIN_FTR_SECTION mtspr SPRN_DSCR, r5 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) +BEGIN_FTR_SECTION + /* Skip next section on POWER7 or PPC970 */ + b 8f +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + + /* Load up POWER8-specific registers */ + ld r5, VCPU_IAMR(r4) + lwz r6, VCPU_PSPB(r4) + ld r7, VCPU_FSCR(r4) + mtspr SPRN_IAMR, r5 + mtspr SPRN_PSPB, r6 + mtspr SPRN_FSCR, r7 + ld r5, VCPU_DAWR(r4) + ld r6, VCPU_DAWRX(r4) + ld r7, VCPU_CIABR(r4) + ld r8, VCPU_TAR(r4) + mtspr SPRN_DAWR, r5 + mtspr SPRN_DAWRX, r6 + mtspr SPRN_CIABR, r7 + mtspr SPRN_TAR, r8 + ld r5, VCPU_IC(r4) + ld r6, VCPU_VTB(r4) + mtspr SPRN_IC, r5 + mtspr SPRN_VTB, r6 + ld r5, VCPU_TFHAR(r4) + ld r6, VCPU_TFIAR(r4) + ld r7, VCPU_TEXASR(r4) + ld r8, VCPU_EBBHR(r4) + mtspr SPRN_TFHAR, r5 + mtspr SPRN_TFIAR, r6 + mtspr SPRN_TEXASR, r7 + mtspr SPRN_EBBHR, r8 + ld r5, VCPU_EBBRR(r4) + ld r6, VCPU_BESCR(r4) + ld r7, VCPU_CSIGR(r4) + ld r8, VCPU_TACR(r4) + mtspr SPRN_EBBRR, r5 + mtspr SPRN_BESCR, r6 + mtspr SPRN_CSIGR, r7 + mtspr SPRN_TACR, r8 + ld r5, VCPU_TCSCR(r4) + ld r6, VCPU_ACOP(r4) + lwz r7, VCPU_GUEST_PID(r4) + ld r8, VCPU_WORT(r4) + mtspr SPRN_TCSCR, r5 + mtspr SPRN_ACOP, r6 + mtspr SPRN_PID, r7 + mtspr SPRN_WORT, r8 +8: + /* * Set the decrementer to the guest decrementer. */ @@ -1081,6 +1155,54 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) add r5,r5,r6 std r5,VCPU_DEC_EXPIRES(r9) +BEGIN_FTR_SECTION + b 8f +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + + /* Save POWER8-specific registers */ + mfspr r5, SPRN_IAMR + mfspr r6, SPRN_PSPB + mfspr r7, SPRN_FSCR + std r5, VCPU_IAMR(r9) + stw r6, VCPU_PSPB(r9) + std r7, VCPU_FSCR(r9) + mfspr r5, SPRN_IC + mfspr r6, SPRN_VTB + mfspr r7, SPRN_TAR + std r5, VCPU_IC(r9) + std r6, VCPU_VTB(r9) + std r7, VCPU_TAR(r9) + mfspr r5, SPRN_TFHAR + mfspr r6, SPRN_TFIAR + mfspr r7, SPRN_TEXASR + mfspr r8, SPRN_EBBHR + std r5, VCPU_TFHAR(r9) + std r6, VCPU_TFIAR(r9) + std r7, VCPU_TEXASR(r9) + std r8, VCPU_EBBHR(r9) + mfspr r5, SPRN_EBBRR + mfspr r6, SPRN_BESCR + mfspr r7, SPRN_CSIGR + mfspr r8, SPRN_TACR + std r5, VCPU_EBBRR(r9) + std r6, VCPU_BESCR(r9) + std r7, VCPU_CSIGR(r9) + std r8, VCPU_TACR(r9) + mfspr r5, SPRN_TCSCR + mfspr r6, SPRN_ACOP + mfspr r7, SPRN_PID + mfspr r8, SPRN_WORT + std r5, VCPU_TCSCR(r9) + std r6, VCPU_ACOP(r9) + stw r7, VCPU_GUEST_PID(r9) + std r8, VCPU_WORT(r9) +8: + /* Save and reset AMR and UAMOR before turning on the MMU */ BEGIN_FTR_SECTION mfspr r5,SPRN_AMR @@ -1190,6 +1312,20 @@ BEGIN_FTR_SECTION stw r10, VCPU_PMC + 24(r9) stw r11, VCPU_PMC + 28(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +BEGIN_FTR_SECTION + mfspr r4, SPRN_MMCR2 + mfspr r5, SPRN_SIER + mfspr r6, SPRN_SPMC1 + mfspr r7, SPRN_SPMC2 + mfspr r8, SPRN_MMCRS + std r4, VCPU_MMCR + 24(r9) + std r5, VCPU_SIER(r9) + stw r6, VCPU_PMC + 24(r9) + stw r7, VCPU_PMC + 28(r9) + std r8, VCPU_MMCR + 32(r9) + lis r4, 0x8000 + mtspr SPRN_MMCRS, r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: /* Clear out SLB */ li r5,0 @@ -1290,6 +1426,15 @@ secondary_too_late: mtspr SPRN_LPID,r7 isync +BEGIN_FTR_SECTION + /* DPDES is shared between threads */ + mfspr r7, SPRN_DPDES + std r7, VCORE_DPDES(r5) + /* clear DPDES so we don't get guest doorbells in the host */ + li r8, 0 + mtspr SPRN_DPDES, r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + /* Subtract timebase offset from timebase */ ld r8,VCORE_TB_OFFSET(r5) cmpdi r8,0 -- cgit v1.2.3 From 8563bf52d509213e746295341ab52896b562ca5e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Jan 2014 21:25:29 +1100 Subject: KVM: PPC: Book3S HV: Add support for DABRX register on POWER7 The DABRX (DABR extension) register on POWER7 processors provides finer control over which accesses cause a data breakpoint interrupt. It contains 3 bits which indicate whether to enable accesses in user, kernel and hypervisor modes respectively to cause data breakpoint interrupts, plus one bit that enables both real mode and virtual mode accesses to cause interrupts. Currently, KVM sets DABRX to allow both kernel and user accesses to cause interrupts while in the guest. This adds support for the guest to specify other values for DABRX. PAPR defines a H_SET_XDABR hcall to allow the guest to set both DABR and DABRX with one call. This adds a real-mode implementation of H_SET_XDABR, which shares most of its code with the existing H_SET_DABR implementation. To support this, we add a per-vcpu field to store the DABRX value plus code to get and set it via the ONE_REG interface. For Linux guests to use this new hcall, userspace needs to add "hcall-xdabr" to the set of strings in the /chosen/hypertas-functions property in the device tree. If userspace does this and then migrates the guest to a host where the kernel doesn't include this patch, then userspace will need to implement H_SET_XDABR by writing the specified DABR value to the DABR using the ONE_REG interface. In that case, the old kernel will set DABRX to DABRX_USER | DABRX_KERNEL. That should still work correctly, at least for Linux guests, since Linux guests cope with getting data breakpoint interrupts in modes that weren't requested by just ignoring the interrupt, and Linux guests never set DABRX_BTI. The other thing this does is to make H_SET_DABR and H_SET_XDABR work on POWER8, which has the DAWR and DAWRX instead of DABR/X. Guests that know about POWER8 should use H_SET_MODE rather than H_SET_[X]DABR, but guests running in POWER7 compatibility mode will still use H_SET_[X]DABR. For them, this adds the logic to convert DABR/X values into DAWR/X values on POWER8. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/reg.h | 18 +++++++++++------- arch/powerpc/include/uapi/asm/kvm.h | 2 ++ arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_hv.c | 6 ++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 32 ++++++++++++++++++++++++++++++-- 7 files changed, 52 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a30035dd4c26..5056baf574f4 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1838,6 +1838,7 @@ registers, find a list below: PPC | KVM_REG_PPC_LPCR | 64 PPC | KVM_REG_PPC_PPR | 64 PPC | KVM_REG_PPC_ARCH_COMPAT 32 + PPC | KVM_REG_PPC_DABRX | 32 PPC | KVM_REG_PPC_TM_GPR0 | 64 ... PPC | KVM_REG_PPC_TM_GPR31 | 64 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 81c92d1d7978..d161bc09153b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -466,6 +466,7 @@ struct kvm_vcpu_arch { ulong uamor; ulong iamr; u32 ctrl; + u32 dabrx; ulong dabr; ulong dawr; ulong dawrx; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 05ecb072540c..adf644a80a3e 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -229,16 +229,20 @@ #define CIABR_PRIV_SUPER 2 #define CIABR_PRIV_HYPER 3 #define SPRN_DAWRX 0xBC -#define DAWRX_USER (1UL << 0) -#define DAWRX_KERNEL (1UL << 1) -#define DAWRX_HYP (1UL << 2) +#define DAWRX_USER __MASK(0) +#define DAWRX_KERNEL __MASK(1) +#define DAWRX_HYP __MASK(2) +#define DAWRX_WTI __MASK(3) +#define DAWRX_WT __MASK(4) +#define DAWRX_DR __MASK(5) +#define DAWRX_DW __MASK(6) #define SPRN_DABR 0x3F5 /* Data Address Breakpoint Register */ #define SPRN_DABR2 0x13D /* e300 */ #define SPRN_DABRX 0x3F7 /* Data Address Breakpoint Register Extension */ -#define DABRX_USER (1UL << 0) -#define DABRX_KERNEL (1UL << 1) -#define DABRX_HYP (1UL << 2) -#define DABRX_BTI (1UL << 3) +#define DABRX_USER __MASK(0) +#define DABRX_KERNEL __MASK(1) +#define DABRX_HYP __MASK(2) +#define DABRX_BTI __MASK(3) #define DABRX_ALL (DABRX_BTI | DABRX_HYP | DABRX_KERNEL | DABRX_USER) #define SPRN_DAR 0x013 /* Data Address Register */ #define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index a586fb9b77bd..a6665be4f3ab 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -554,6 +554,8 @@ struct kvm_get_htab_header { /* Architecture compatibility level */ #define KVM_REG_PPC_ARCH_COMPAT (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7) +#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 043900abbbb0..239a857f1141 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -493,6 +493,7 @@ int main(void) DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr)); DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); + DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx)); DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr)); DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx)); DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 216049ff7368..eb4eed4a7173 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -812,6 +812,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DABR: *val = get_reg_val(id, vcpu->arch.dabr); break; + case KVM_REG_PPC_DABRX: + *val = get_reg_val(id, vcpu->arch.dabrx); + break; case KVM_REG_PPC_DSCR: *val = get_reg_val(id, vcpu->arch.dscr); break; @@ -967,6 +970,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DABR: vcpu->arch.dabr = set_reg_val(id, *val); break; + case KVM_REG_PPC_DABRX: + vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP; + break; case KVM_REG_PPC_DSCR: vcpu->arch.dscr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index eae4ab9b9135..56299349e94b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -585,7 +585,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ - li r5,3 + lwz r5,VCPU_DABRX(r4) ld r6,VCPU_DABR(r4) mtspr SPRN_DABRX,r5 mtspr SPRN_DABR,r6 @@ -1763,24 +1763,52 @@ hcall_real_table: .long 0 /* 0x11c */ .long 0 /* 0x120 */ .long .kvmppc_h_bulk_remove - hcall_real_table + .long 0 /* 0x128 */ + .long 0 /* 0x12c */ + .long 0 /* 0x130 */ + .long .kvmppc_h_set_xdabr - hcall_real_table hcall_real_table_end: ignore_hdec: mr r4,r9 b fast_guest_return +_GLOBAL(kvmppc_h_set_xdabr) + andi. r0, r5, DABRX_USER | DABRX_KERNEL + beq 6f + li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI + andc. r0, r5, r0 + beq 3f +6: li r3, H_PARAMETER + blr + _GLOBAL(kvmppc_h_set_dabr) + li r5, DABRX_USER | DABRX_KERNEL +3: BEGIN_FTR_SECTION b 2f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) std r4,VCPU_DABR(r3) + stw r5, VCPU_DABRX(r3) + mtspr SPRN_DABRX, r5 /* Work around P7 bug where DABR can get corrupted on mtspr */ 1: mtspr SPRN_DABR,r4 mfspr r5, SPRN_DABR cmpd r4, r5 bne 1b isync -2: li r3,0 + li r3,0 + blr + + /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ +2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW + rlwimi r5, r4, 1, DAWRX_WT + clrrdi r4, r4, 3 + std r4, VCPU_DAWR(r3) + std r5, VCPU_DAWRX(r3) + mtspr SPRN_DAWR, r4 + mtspr SPRN_DAWRX, r5 + li r3, 0 blr _GLOBAL(kvmppc_h_cede) -- cgit v1.2.3 From d682916a381ac7c8eb965c10ab64bc7cc2f18647 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 8 Jan 2014 21:25:30 +1100 Subject: KVM: PPC: Book3S HV: Basic little-endian guest support We create a guest MSR from scratch when delivering exceptions in a few places. Instead of extracting LPCR[ILE] and inserting it into MSR_LE each time, we simply create a new variable intr_msr which contains the entire MSR to use. For a little-endian guest, userspace needs to set the ILE (interrupt little-endian) bit in the LPCR for each vcpu (or at least one vcpu in each virtual core). [paulus@samba.org - removed H_SET_MODE implementation from original version of the patch, and made kvmppc_set_lpcr update vcpu->arch.intr_msr.] Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 22 ++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 15 +++++---------- 5 files changed, 30 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d161bc09153b..7726a3bc8ff0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -636,6 +636,7 @@ struct kvm_vcpu_arch { spinlock_t tbacct_lock; u64 busy_stolen; u64 busy_preempt; + unsigned long intr_msr; #endif }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 239a857f1141..a60a2fdae5bd 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -480,6 +480,7 @@ int main(void) DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); + DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index efb8aa544876..22cc895333e6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -262,7 +262,7 @@ int kvmppc_mmu_hv_init(void) static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) { - kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); + kvmppc_set_msr(vcpu, vcpu->arch.intr_msr); } /* diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index eb4eed4a7173..3195e4f8a2ed 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -787,6 +787,27 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) u64 mask; spin_lock(&vc->lock); + /* + * If ILE (interrupt little-endian) has changed, update the + * MSR_LE bit in the intr_msr for each vcpu in this vcore. + */ + if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) { + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *vcpu; + int i; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.vcore != vc) + continue; + if (new_lpcr & LPCR_ILE) + vcpu->arch.intr_msr |= MSR_LE; + else + vcpu->arch.intr_msr &= ~MSR_LE; + } + mutex_unlock(&kvm->lock); + } + /* * Userspace can only modify DPFD (default prefetch depth), * ILE (interrupt little-endian) and TC (translation control). @@ -1155,6 +1176,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, spin_lock_init(&vcpu->arch.vpa_update_lock); spin_lock_init(&vcpu->arch.tbacct_lock); vcpu->arch.busy_preempt = TB_NIL; + vcpu->arch.intr_msr = MSR_SF | MSR_ME; kvmppc_mmu_book3s_hv_init(vcpu); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 56299349e94b..ecb76357c9d0 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -812,8 +812,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 12: mtspr SPRN_SRR0, r10 mr r10,r0 mtspr SPRN_SRR1, r11 - li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11,r11,63 + ld r11, VCPU_INTR_MSR(r4) 5: /* @@ -1551,8 +1550,7 @@ kvmppc_hdsi: mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 li r10, BOOK3S_INTERRUPT_DATA_STORAGE - li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11, r11, 63 + ld r11, VCPU_INTR_MSR(r9) fast_interrupt_c_return: 6: ld r7, VCPU_CTR(r9) lwz r8, VCPU_XER(r9) @@ -1621,8 +1619,7 @@ kvmppc_hisi: 1: mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 li r10, BOOK3S_INTERRUPT_INST_STORAGE - li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11, r11, 63 + ld r11, VCPU_INTR_MSR(r9) b fast_interrupt_c_return 3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ @@ -1665,8 +1662,7 @@ sc_1_fast_return: mtspr SPRN_SRR0,r10 mtspr SPRN_SRR1,r11 li r10, BOOK3S_INTERRUPT_SYSCALL - li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11, r11, 63 + ld r11, VCPU_INTR_MSR(r9) mr r4,r9 b fast_guest_return @@ -1994,8 +1990,7 @@ machine_check_realmode: beq mc_cont /* If not, deliver a machine check. SRR0/1 are already set */ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK - li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11, r11, 63 + ld r11, VCPU_INTR_MSR(r9) b fast_interrupt_c_return /* -- cgit v1.2.3 From 7b490411c37f7ab7965cbdfe5e3ec28eadb6db5b Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 8 Jan 2014 21:25:32 +1100 Subject: KVM: PPC: Book3S HV: Add new state for transactional memory Add new state for transactional memory (TM) to kvm_vcpu_arch. Also add asm-offset bits that are going to be required. This also moves the existing TFHAR, TFIAR and TEXASR SPRs into a CONFIG_PPC_TRANSACTIONAL_MEM section. This requires some code changes to ensure we still compile with CONFIG_PPC_TRANSACTIONAL_MEM=N. Much of the added the added #ifdefs are removed in a later patch when the bulk of the TM code is added. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras [agraf: fix merge conflict] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 24 +++++++++-- arch/powerpc/kernel/asm-offsets.c | 19 +++++++-- arch/powerpc/kvm/book3s_hv.c | 4 ++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 75 ++++++++++++++++++++++++++++++++- 4 files changed, 114 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7726a3bc8ff0..1eaea2dea174 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -475,9 +475,6 @@ struct kvm_vcpu_arch { ulong ppr; ulong pspb; ulong fscr; - ulong tfhar; - ulong tfiar; - ulong texasr; ulong ebbhr; ulong ebbrr; ulong bescr; @@ -526,6 +523,27 @@ struct kvm_vcpu_arch { u64 siar; u64 sdar; u64 sier; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + u64 tfhar; + u64 texasr; + u64 tfiar; + + u32 cr_tm; + u64 lr_tm; + u64 ctr_tm; + u64 amr_tm; + u64 ppr_tm; + u64 dscr_tm; + u64 tar_tm; + + ulong gpr_tm[32]; + + struct thread_fp_state fp_tm; + + struct thread_vr_state vr_tm; + u32 vrsave_tm; /* also USPRG0 */ + +#endif #ifdef CONFIG_KVM_EXIT_TIMING struct mutex exit_timing_lock; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a60a2fdae5bd..687f2ebf0cce 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -521,9 +521,6 @@ int main(void) DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); - DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar)); - DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar)); - DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr)); DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr)); @@ -545,6 +542,22 @@ int main(void) DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar)); + DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar)); + DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr)); + DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm)); + DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr)); + DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); + DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); + DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); + DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); + DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); + DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); + DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm)); + DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm)); + DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm)); +#endif #ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3195e4f8a2ed..f4a4c5c82fb2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -875,6 +875,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IAMR: *val = get_reg_val(id, vcpu->arch.iamr); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_REG_PPC_TFHAR: *val = get_reg_val(id, vcpu->arch.tfhar); break; @@ -884,6 +885,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TEXASR: *val = get_reg_val(id, vcpu->arch.texasr); break; +#endif case KVM_REG_PPC_FSCR: *val = get_reg_val(id, vcpu->arch.fscr); break; @@ -1033,6 +1035,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IAMR: vcpu->arch.iamr = set_reg_val(id, *val); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_REG_PPC_TFHAR: vcpu->arch.tfhar = set_reg_val(id, *val); break; @@ -1042,6 +1045,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TEXASR: vcpu->arch.texasr = set_reg_val(id, *val); break; +#endif case KVM_REG_PPC_FSCR: vcpu->arch.fscr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index ecb76357c9d0..dfa144cb199b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -701,13 +701,15 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r6, VCPU_VTB(r4) mtspr SPRN_IC, r5 mtspr SPRN_VTB, r6 +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM ld r5, VCPU_TFHAR(r4) ld r6, VCPU_TFIAR(r4) ld r7, VCPU_TEXASR(r4) - ld r8, VCPU_EBBHR(r4) mtspr SPRN_TFHAR, r5 mtspr SPRN_TFIAR, r6 mtspr SPRN_TEXASR, r7 +#endif + ld r8, VCPU_EBBHR(r4) mtspr SPRN_EBBHR, r8 ld r5, VCPU_EBBRR(r4) ld r6, VCPU_BESCR(r4) @@ -1118,13 +1120,15 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r5, VCPU_IC(r9) std r6, VCPU_VTB(r9) std r7, VCPU_TAR(r9) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM mfspr r5, SPRN_TFHAR mfspr r6, SPRN_TFIAR mfspr r7, SPRN_TEXASR - mfspr r8, SPRN_EBBHR std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) std r7, VCPU_TEXASR(r9) +#endif + mfspr r8, SPRN_EBBHR std r8, VCPU_EBBHR(r9) mfspr r5, SPRN_EBBRR mfspr r6, SPRN_BESCR @@ -1497,6 +1501,73 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1: addi r8,r8,16 .endr + /* Save DEC */ + mfspr r5,SPRN_DEC + mftb r6 + extsw r5,r5 + add r5,r5,r6 + std r5,VCPU_DEC_EXPIRES(r9) + +BEGIN_FTR_SECTION + b 8f +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + + /* Save POWER8-specific registers */ + mfspr r5, SPRN_IAMR + mfspr r6, SPRN_PSPB + mfspr r7, SPRN_FSCR + std r5, VCPU_IAMR(r9) + stw r6, VCPU_PSPB(r9) + std r7, VCPU_FSCR(r9) + mfspr r5, SPRN_IC + mfspr r6, SPRN_VTB + mfspr r7, SPRN_TAR + std r5, VCPU_IC(r9) + std r6, VCPU_VTB(r9) + std r7, VCPU_TAR(r9) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + mfspr r5, SPRN_TFHAR + mfspr r6, SPRN_TFIAR + mfspr r7, SPRN_TEXASR + std r5, VCPU_TFHAR(r9) + std r6, VCPU_TFIAR(r9) + std r7, VCPU_TEXASR(r9) +#endif + mfspr r8, SPRN_EBBHR + std r8, VCPU_EBBHR(r9) + mfspr r5, SPRN_EBBRR + mfspr r6, SPRN_BESCR + mfspr r7, SPRN_CSIGR + mfspr r8, SPRN_TACR + std r5, VCPU_EBBRR(r9) + std r6, VCPU_BESCR(r9) + std r7, VCPU_CSIGR(r9) + std r8, VCPU_TACR(r9) + mfspr r5, SPRN_TCSCR + mfspr r6, SPRN_ACOP + mfspr r7, SPRN_PID + mfspr r8, SPRN_WORT + std r5, VCPU_TCSCR(r9) + std r6, VCPU_ACOP(r9) + stw r7, VCPU_GUEST_PID(r9) + std r8, VCPU_WORT(r9) +8: + + /* Save and reset AMR and UAMOR before turning on the MMU */ +BEGIN_FTR_SECTION + mfspr r5,SPRN_AMR + mfspr r6,SPRN_UAMOR + std r5,VCPU_AMR(r9) + std r6,VCPU_UAMOR(r9) + li r6,0 + mtspr SPRN_AMR,r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + /* Unset guest mode */ li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) -- cgit v1.2.3 From 1c430c06d0ce871f36b2af504d45a07356e44800 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 21 Jan 2014 23:24:02 +0100 Subject: powerpc: Fix hw breakpoints on !HAVE_HW_BREAKPOINT configurations This fixes a logic error that caused a failure to update the hw breakpoint registers when not using the hw-breakpoint interface. Signed-off-by: Andreas Schwab Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 64b7a6e61dd1..8d4c247f1738 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -811,7 +811,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) + if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif -- cgit v1.2.3 From d765ff23e3181413fb1bed090c8d702165448a84 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jan 2014 16:33:56 +1100 Subject: powerpc: Fix 32-bit frames for signals delivered when transactional Commit d31626f70b61 ("powerpc: Don't corrupt transactional state when using FP/VMX in kernel") introduced a bug where the uc_link and uc_regs fields of the ucontext_t that is created to hold the transactional values of the registers in a 32-bit signal frame didn't get set correctly. The reason is that we now clear the MSR_TS bits in the MSR in save_tm_user_regs(), before the code that sets uc_link and uc_regs. To fix this, we move the setting of uc_link and uc_regs into the same if statement that selects whether to call save_tm_user_regs() or save_user_regs(). Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/signal_32.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 6ce69e6f1fcb..a67e00aa3caa 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1022,29 +1022,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_frame = &rt_sf->uc_transact.uc_mcontext; if (MSR_TM_ACTIVE(regs->msr)) { + if (__put_user((unsigned long)&rt_sf->uc_transact, + &rt_sf->uc.uc_link) || + __put_user((unsigned long)tm_frame, + &rt_sf->uc_transact.uc_regs)) + goto badframe; if (save_tm_user_regs(regs, frame, tm_frame, sigret)) goto badframe; } else #endif { + if (__put_user(0, &rt_sf->uc.uc_link)) + goto badframe; if (save_user_regs(regs, frame, tm_frame, sigret, 1)) goto badframe; } regs->link = tramp; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(regs->msr)) { - if (__put_user((unsigned long)&rt_sf->uc_transact, - &rt_sf->uc.uc_link) - || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) - goto badframe; - } - else -#endif - if (__put_user(0, &rt_sf->uc.uc_link)) - goto badframe; - current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ /* create a stack frame for the caller of the handler */ -- cgit v1.2.3 From 3fa8cad82b94d0bed002571bd246f2299ffc876b Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Tue, 14 Jan 2014 16:26:38 +0530 Subject: powerpc/pseries/cpuidle: smt-snooze-delay cleanup. smt-snooze-delay was designed to disable NAP state or delay the entry to the NAP state prior to adoption of cpuidle framework. This is per-cpu variable. With the coming of CPUIDLE framework, states can be disabled on per-cpu basis using the cpuidle/enable sysfs entry. Also, with the coming of cpuidle driver each state's target residency is per-driver unlike earlier which was per-device. Therefore, the per-cpu sysfs smt-snooze-delay which decides the target residency of the idle state on a particular cpu causes more confusion to the user as we cannot have different smt-snooze-delay (target residency) values for each cpu. In the current code, smt-snooze-delay functionality is completely broken. It makes sense to remove smt-snooze-delay from idle driver with the coming of cpuidle framework. However, sysfs files are retained as ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean up the kernel code. Signed-off-by: Deepthi Dharwar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 7 ------- arch/powerpc/kernel/sysfs.c | 2 -- drivers/cpuidle/cpuidle-pseries.c | 17 ----------------- 3 files changed, 26 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c2c0f4478be3..b62de43ae5f3 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -450,13 +450,6 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_nap(void); - -#ifdef CONFIG_PSERIES_CPUIDLE -extern void update_smt_snooze_delay(int cpu, int residency); -#else -static inline void update_smt_snooze_delay(int cpu, int residency) {} -#endif - extern void flush_instruction_cache(void); extern void hard_reset_now(void); extern void poweroff_now(void); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index d4a43e64a6a9..97e1dc917683 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -51,8 +51,6 @@ static ssize_t store_smt_snooze_delay(struct device *dev, return -EINVAL; per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; - update_smt_snooze_delay(cpu->dev.id, snooze); - return count; } diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index bb56091685d3..7ab564aa0b1c 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -162,23 +162,6 @@ static struct cpuidle_state shared_states[] = { .enter = &shared_cede_loop }, }; -void update_smt_snooze_delay(int cpu, int residency) -{ - struct cpuidle_driver *drv = cpuidle_get_driver(); - struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu); - - if (cpuidle_state_table != dedicated_states) - return; - - if (residency < 0) { - /* Disable the Nap state on that cpu */ - if (dev) - dev->states_usage[1].disable = 1; - } else - if (drv) - drv->states[1].target_residency = residency; -} - static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n, unsigned long action, void *hcpu) { -- cgit v1.2.3 From 91b973f90c1220d71923e7efe1e61f5329806380 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 18 Jan 2014 21:14:47 +1100 Subject: powerpc: Make sure "cache" directory is removed when offlining cpu The code in remove_cache_dir() is supposed to remove the "cache" subdirectory from the sysfs directory for a CPU when that CPU is being offlined. It tries to do this by calling kobject_put() on the kobject for the subdirectory. However, the subdirectory only gets removed once the last reference goes away, and the reference being put here may well not be the last reference. That means that the "cache" subdirectory may still exist when the offlining operation has finished. If the same CPU subsequently gets onlined, the code tries to add a new "cache" subdirectory. If the old subdirectory has not yet been removed, we get a WARN_ON in the sysfs code, with stack trace, and an error message printed on the console. Further, we ultimately end up with an online cpu with no "cache" subdirectory. This fixes it by doing an explicit kobject_del() at the point where we want the subdirectory to go away. kobject_del() removes the sysfs directory even though the object still exists in memory. The object will get freed at some point in the future. A subsequent onlining operation can create a new sysfs directory, even if the old object still exists in memory, without causing any problems. Cc: stable@vger.kernel.org # v3.0+ Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cacheinfo.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index abfa011344d9..2912b8787aa4 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -793,6 +793,9 @@ static void remove_cache_dir(struct cache_dir *cache_dir) { remove_index_dirs(cache_dir); + /* Remove cache dir from sysfs */ + kobject_del(cache_dir->kobj); + kobject_put(cache_dir->kobj); kfree(cache_dir); -- cgit v1.2.3