summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/ppc64/kernel/Makefile2
-rw-r--r--arch/ppc64/kernel/htab.c1081
-rw-r--r--arch/ppc64/kernel/pSeries_htab.c470
-rw-r--r--arch/ppc64/kernel/pSeries_lpar.c673
-rw-r--r--arch/ppc64/mm/init.c95
-rw-r--r--include/asm-ppc64/machdep.h39
-rw-r--r--include/asm-ppc64/mmu.h88
-rw-r--r--include/asm-ppc64/pgtable.h28
8 files changed, 1129 insertions, 1347 deletions
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile
index 99c830bdcc86..768b0f992795 100644
--- a/arch/ppc64/kernel/Makefile
+++ b/arch/ppc64/kernel/Makefile
@@ -27,7 +27,7 @@ obj-y := ppc_ksyms.o setup.o entry.o traps.o irq.o idle.o \
pmc.o mf_proc.o proc_pmc.o iSeries_setup.o \
ItLpQueue.o hvCall.o mf.o HvLpEvent.o ras.o \
iSeries_proc.o HvCall.o HvLpConfig.o \
- rtc.o init_task.o
+ rtc.o init_task.o pSeries_htab.o
obj-$(CONFIG_PCI) += pci.o pci_dn.o pci_dma.o
obj-$(CONFIG_PPC_EEH) += eeh.o
diff --git a/arch/ppc64/kernel/htab.c b/arch/ppc64/kernel/htab.c
index fac9a4e37256..292c3cf9775d 100644
--- a/arch/ppc64/kernel/htab.c
+++ b/arch/ppc64/kernel/htab.c
@@ -1,10 +1,11 @@
/*
- *
- *
* PowerPC64 port by Mike Corrigan and Dave Engebretsen
* {mikejc|engebret}@us.ibm.com
*
* Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * SMP scalability work:
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
*
* Module name: htab.c
*
@@ -18,17 +19,7 @@
*/
#include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/init.h>
-#include <asm/system.h>
-#include <asm/iSeries/LparData.h>
#include <linux/spinlock.h>
-#include <asm/ppcdebug.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
@@ -36,52 +27,42 @@
#include <linux/sysctl.h>
#include <linux/ctype.h>
#include <linux/cache.h>
+
+#include <asm/ppcdebug.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/init.h>
+#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/Naca.h>
#include <asm/system.h>
#include <asm/pmc.h>
#include <asm/machdep.h>
#include <asm/lmb.h>
+#include <asm/abs_addr.h>
#ifdef CONFIG_PPC_EEH
#include <asm/eeh.h>
#endif
-/* For iSeries */
-#include <asm/iSeries/HvCallHpt.h>
-
-/* Note: pte --> Linux PTE
+/*
+ * Note: pte --> Linux PTE
* HPTE --> PowerPC Hashed Page Table Entry
*/
HTAB htab_data = {NULL, 0, 0, 0, 0};
-int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
- void *buffer, size_t *lenp);
-
-void htab_initialize(void);
-void make_pte_LPAR(HPTE *htab,
- unsigned long va, unsigned long pa, int mode,
- unsigned long hash_mask, int large);
-
-extern unsigned long reloc_offset(void);
-extern unsigned long get_kernel_vsid( unsigned long ea );
-extern void cacheable_memzero( void *, unsigned int );
-
extern unsigned long _SDR1;
extern unsigned long klimit;
-extern struct Naca *naca;
-
-extern char _stext[], _etext[], __start_naca[], __end_stab[];
-
-static spinlock_t hash_table_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+extern unsigned long reloc_offset(void);
#define PTRRELOC(x) ((typeof(x))((unsigned long)(x) - offset))
#define PTRUNRELOC(x) ((typeof(x))((unsigned long)(x) + offset))
#define RELOC(x) (*PTRRELOC(&(x)))
-extern unsigned long htab_size( unsigned long );
-unsigned long hpte_getword0_iSeries( unsigned long slot );
-
#define KB (1024)
#define MB (1024*KB)
static inline void
@@ -90,7 +71,7 @@ create_pte_mapping(unsigned long start, unsigned long end,
{
unsigned long addr, offset = reloc_offset();
HTAB *_htab_data = PTRRELOC(&htab_data);
- HPTE *htab = (HPTE *)__v2a(_htab_data->htab);
+ HPTE *htab = (HPTE *)__v2a(_htab_data->htab);
unsigned int step;
if (large)
@@ -101,8 +82,12 @@ create_pte_mapping(unsigned long start, unsigned long end,
for (addr = start; addr < end; addr += step) {
unsigned long vsid = get_kernel_vsid(addr);
unsigned long va = (vsid << 28) | (addr & 0xfffffff);
- make_pte(htab, va, (unsigned long)__v2a(addr), mode, mask,
- large);
+ if (_machine == _MACH_pSeriesLP)
+ pSeries_lpar_make_pte(htab, va,
+ (unsigned long)__v2a(addr), mode, mask, large);
+ else
+ pSeries_make_pte(htab, va,
+ (unsigned long)__v2a(addr), mode, mask, large);
}
}
@@ -111,7 +96,7 @@ htab_initialize(void)
{
unsigned long table, htab_size_bytes;
unsigned long pteg_count;
- unsigned long mode_ro, mode_rw, mask;
+ unsigned long mode_rw, mask;
unsigned long offset = reloc_offset();
struct Naca *_naca = RELOC(naca);
HTAB *_htab_data = PTRRELOC(&htab_data);
@@ -132,7 +117,7 @@ htab_initialize(void)
_htab_data->htab_num_ptegs = pteg_count;
_htab_data->htab_hash_mask = pteg_count - 1;
- if(_machine == _MACH_pSeries) {
+ if (_machine == _MACH_pSeries) {
/* Find storage for the HPT. Must be contiguous in
* the absolute address space.
*/
@@ -151,734 +136,91 @@ htab_initialize(void)
RELOC(_SDR1) = 0;
}
- mode_ro = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RXRX;
mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
mask = pteg_count-1;
- /* Create PTE's for the kernel text and data sections plus
- * the HPT and HPTX arrays. Make the assumption that
- * (addr & KERNELBASE) == 0 (ie they are disjoint).
- * We also assume that the va is <= 64 bits.
- */
-#if 0
- create_pte_mapping((unsigned long)_stext, (unsigned long)__start_naca, mode_ro, mask);
- create_pte_mapping((unsigned long)__start_naca, (unsigned long)__end_stab, mode_rw, mask);
- create_pte_mapping((unsigned long)__end_stab, (unsigned long)_etext, mode_ro, mask);
- create_pte_mapping((unsigned long)_etext, RELOC(klimit), mode_rw, mask);
- create_pte_mapping((unsigned long)__a2v(table), (unsigned long)__a2v(table+htab_size_bytes), mode_rw, mask);
-#else
-#ifndef CONFIG_PPC_ISERIES
+ /* XXX we currently map kernel text rw, should fix this */
if (__is_processor(PV_POWER4) && _naca->physicalMemorySize > 256*MB) {
create_pte_mapping((unsigned long)KERNELBASE,
KERNELBASE + 256*MB, mode_rw, mask, 0);
create_pte_mapping((unsigned long)KERNELBASE + 256*MB,
KERNELBASE + (_naca->physicalMemorySize),
mode_rw, mask, 1);
- return;
+ } else {
+ create_pte_mapping((unsigned long)KERNELBASE,
+ KERNELBASE+(_naca->physicalMemorySize),
+ mode_rw, mask, 0);
}
-#endif
- create_pte_mapping((unsigned long)KERNELBASE,
- KERNELBASE+(_naca->physicalMemorySize),
- mode_rw, mask, 0);
-#endif
}
#undef KB
#undef MB
/*
- * Create a pte. Used during initialization only.
- * We assume the PTE will fit in the primary PTEG.
- */
-void make_pte(HPTE *htab,
- unsigned long va, unsigned long pa, int mode,
- unsigned long hash_mask, int large)
-{
- HPTE *hptep;
- unsigned long hash, i;
- volatile unsigned long x = 1;
- unsigned long vpn;
-
-#ifdef CONFIG_PPC_PSERIES
- if(_machine == _MACH_pSeriesLP) {
- make_pte_LPAR(htab, va, pa, mode, hash_mask, large);
- return;
- }
-#endif
-
- if (large)
- vpn = va >> 24;
- else
- vpn = va >> 12;
-
- hash = hpt_hash(vpn, large);
-
- hptep = htab + ((hash & hash_mask)*HPTES_PER_GROUP);
-
- for (i = 0; i < 8; ++i, ++hptep) {
- if ( hptep->dw0.dw0.v == 0 ) { /* !valid */
- hptep->dw1.dword1 = pa | mode;
- hptep->dw0.dword0 = 0;
- hptep->dw0.dw0.avpn = va >> 23;
- hptep->dw0.dw0.bolted = 1; /* bolted */
- hptep->dw0.dw0.v = 1; /* make valid */
- return;
- }
- }
-
- /* We should _never_ get here and too early to call xmon. */
- for(;x;x|=1);
-}
-
-/* Functions to invalidate a HPTE */
-static void hpte_invalidate_iSeries( unsigned long slot )
-{
- HvCallHpt_invalidateSetSwBitsGet( slot, 0, 0 );
-}
-
-static void hpte_invalidate_pSeries( unsigned long slot )
-{
- /* Local copy of the first doubleword of the HPTE */
- union {
- unsigned long d;
- Hpte_dword0 h;
- } hpte_dw0;
-
- /* Locate the HPTE */
- HPTE * hptep = htab_data.htab + slot;
-
- /* Get the first doubleword of the HPTE */
- hpte_dw0.d = hptep->dw0.dword0;
-
- /* Invalidate the hpte */
- hptep->dw0.dword0 = 0;
-
- /* Invalidate the tlb */
- {
- unsigned long vsid, group, pi, pi_high;
-
- vsid = hpte_dw0.h.avpn >> 5;
- group = slot >> 3;
- if(hpte_dw0.h.h) {
- group = ~group;
- }
- pi = (vsid ^ group) & 0x7ff;
- pi_high = (hpte_dw0.h.avpn & 0x1f) << 11;
- pi |= pi_high;
- _tlbie(pi << 12);
- }
-}
-
-
-/* Select an available HPT slot for a new HPTE
- * return slot index (if in primary group)
- * return -slot index (if in secondary group)
- */
-static long hpte_selectslot_iSeries( unsigned long vpn )
-{
- HPTE hpte;
- long ret_slot, orig_slot;
- unsigned long primary_hash;
- unsigned long hpteg_slot;
- unsigned long slot;
- unsigned i, k;
- union {
- unsigned long d;
- Hpte_dword0 h;
- } hpte_dw0;
-
- ret_slot = orig_slot = HvCallHpt_findValid( &hpte, vpn );
- if ( hpte.dw0.dw0.v ) { /* If valid ...what do we do now? */
- udbg_printf( "hpte_selectslot_iSeries: vpn 0x%016lx already valid at slot 0x%016lx\n", vpn, ret_slot );
- udbg_printf( "hpte_selectslot_iSeries: returned hpte 0x%016lx 0x%016lx\n", hpte.dw0.dword0, hpte.dw1.dword1 );
- panic("select_hpte_slot found entry already valid\n");
- }
- if ( ret_slot == -1 ) { /* -1 indicates no available slots */
-
- /* No available entry found in secondary group */
-
- PMC_SW_SYSTEM(htab_capacity_castouts);
-
- primary_hash = hpt_hash(vpn, 0);
- hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
- k = htab_data.next_round_robin++ & 0x7;
-
- for ( i=0; i<HPTES_PER_GROUP; ++i ) {
- if ( k == HPTES_PER_GROUP )
- k = 0;
- slot = hpteg_slot + k;
- hpte_dw0.d = hpte_getword0_iSeries( slot );
- if ( !hpte_dw0.h.bolted ) {
- hpte_invalidate_iSeries( slot );
- ret_slot = slot;
- }
- ++k;
- }
- } else {
- if ( ret_slot < 0 ) {
- PMC_SW_SYSTEM(htab_primary_overflows);
- ret_slot &= 0x7fffffffffffffff;
- ret_slot = -ret_slot;
- }
- }
- if ( ret_slot == -1 ) {
- /* No non-bolted entry found in primary group - time to panic */
- udbg_printf("hpte_selectslot_pSeries - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP);
- panic("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP);
- }
- PPCDBG(PPCDBG_MM, "hpte_selectslot_iSeries: vpn=0x%016lx, orig_slot=0x%016lx, ret_slot=0x%016lx \n",
- vpn, orig_slot, ret_slot );
- return ret_slot;
-}
-
-static long hpte_selectslot_pSeries(unsigned long vpn)
-{
- HPTE * hptep;
- unsigned long primary_hash;
- unsigned long hpteg_slot;
- unsigned i, k;
-
- /* Search the primary group for an available slot */
-
- primary_hash = hpt_hash(vpn, 0);
- hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
- hptep = htab_data.htab + hpteg_slot;
-
- for (i=0; i<HPTES_PER_GROUP; ++i) {
- if ( hptep->dw0.dw0.v == 0 ) {
- /* If an available slot found, return it */
- return hpteg_slot + i;
- }
- hptep++;
- }
-
- /* No available entry found in primary group */
-
- PMC_SW_SYSTEM(htab_primary_overflows);
-
- /* Search the secondary group */
-
- hpteg_slot = ( ~primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
- hptep = htab_data.htab + hpteg_slot;
-
- for (i=0; i<HPTES_PER_GROUP; ++i) {
- if ( hptep->dw0.dw0.v == 0 ) {
- /* If an available slot found, return it */
- return -(hpteg_slot + i);
- }
- hptep++;
- }
-
- /* No available entry found in secondary group */
-
- PMC_SW_SYSTEM(htab_capacity_castouts);
-
- /* Select an entry in the primary group to replace */
-
- hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
- hptep = htab_data.htab + hpteg_slot;
- k = htab_data.next_round_robin++ & 0x7;
-
- for (i=0; i<HPTES_PER_GROUP; ++i) {
- if (k == HPTES_PER_GROUP)
- k = 0;
-
- if (!hptep[k].dw0.dw0.bolted) {
- hpteg_slot += k;
- /* Invalidate the current entry */
- ppc_md.hpte_invalidate(hpteg_slot);
- return hpteg_slot;
- }
- ++k;
- }
-
- /* No non-bolted entry found in primary group - time to panic */
- udbg_printf("hpte_selectslot_pSeries - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP);
- /* xmon(0); */
- panic("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP);
-
- /* keep the compiler happy */
- return 0;
-}
-
-unsigned long hpte_getword0_iSeries( unsigned long slot )
-{
- unsigned long dword0;
-
- HPTE hpte;
- HvCallHpt_get( &hpte, slot );
- dword0 = hpte.dw0.dword0;
-
- return dword0;
-}
-
-unsigned long hpte_getword0_pSeries( unsigned long slot )
-{
- unsigned long dword0;
- HPTE * hptep = htab_data.htab + slot;
-
- dword0 = hptep->dw0.dword0;
- return dword0;
-}
-
-static long hpte_find_iSeries(unsigned long vpn)
-{
- HPTE hpte;
- long slot;
-
- slot = HvCallHpt_findValid( &hpte, vpn );
- if ( hpte.dw0.dw0.v ) {
- if ( slot < 0 ) {
- slot &= 0x7fffffffffffffff;
- slot = -slot;
- }
- } else
- slot = -1;
- return slot;
-}
-
-static long hpte_find_pSeries(unsigned long vpn)
-{
- union {
- unsigned long d;
- Hpte_dword0 h;
- } hpte_dw0;
- long slot;
- unsigned long hash;
- unsigned long i,j;
-
- hash = hpt_hash(vpn, 0);
- for ( j=0; j<2; ++j ) {
- slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
- for ( i=0; i<HPTES_PER_GROUP; ++i ) {
- hpte_dw0.d = hpte_getword0_pSeries( slot );
- if ( ( hpte_dw0.h.avpn == ( vpn >> 11 ) ) &&
- ( hpte_dw0.h.v ) &&
- ( hpte_dw0.h.h == j ) ) {
- /* HPTE matches */
- if ( j )
- slot = -slot;
- return slot;
- }
- ++slot;
- }
- hash = ~hash;
- }
- return -1;
-}
-
-/* This function is called by iSeries setup when initializing the hpt */
-void build_valid_hpte( unsigned long vsid, unsigned long ea, unsigned long pa,
- pte_t * ptep, unsigned hpteflags, unsigned bolted )
-{
- unsigned long vpn, flags;
- long hpte_slot;
- unsigned hash;
- pte_t pte;
-
- vpn = ((vsid << 28) | ( ea & 0xffff000 )) >> 12;
-
- spin_lock_irqsave( &hash_table_lock, flags );
-
- hpte_slot = ppc_md.hpte_selectslot( vpn );
- hash = 0;
- if ( hpte_slot < 0 ) {
- hash = 1;
- hpte_slot = -hpte_slot;
- }
- ppc_md.hpte_create_valid( hpte_slot, vpn, pa >> 12, hash, ptep,
- hpteflags, bolted );
-
- if ( ptep ) {
- /* Get existing pte flags */
- pte = *ptep;
- pte_val(pte) &= ~_PAGE_HPTEFLAGS;
-
- /* Add in the has hpte flag */
- pte_val(pte) |= _PAGE_HASHPTE;
-
- /* Add in the _PAGE_SECONDARY flag */
- pte_val(pte) |= hash << 15;
-
- /* Add in the hpte slot */
- pte_val(pte) |= (hpte_slot << 12) & _PAGE_GROUP_IX;
-
- /* Save the new pte. */
- *ptep = pte;
-
- }
- spin_unlock_irqrestore( &hash_table_lock, flags );
-}
-
-
-/* Create an HPTE and validate it
- * It is assumed that the HPT slot currently is invalid.
- * The HPTE is set with the vpn, rpn (converted to absolute)
- * and flags
- */
-static void hpte_create_valid_iSeries(unsigned long slot, unsigned long vpn,
- unsigned long prpn, unsigned hash,
- void * ptep, unsigned hpteflags,
- unsigned bolted )
-{
- /* Local copy of HPTE */
- struct {
- /* Local copy of first doubleword of HPTE */
- union {
- unsigned long d;
- Hpte_dword0 h;
- } dw0;
- /* Local copy of second doubleword of HPTE */
- union {
- unsigned long d;
- Hpte_dword1 h;
- Hpte_dword1_flags f;
- } dw1;
- } lhpte;
-
- unsigned long avpn = vpn >> 11;
- unsigned long arpn = physRpn_to_absRpn( prpn );
-
- /* Fill in the local HPTE with absolute rpn, avpn and flags */
- lhpte.dw1.d = 0;
- lhpte.dw1.h.rpn = arpn;
- lhpte.dw1.f.flags = hpteflags;
-
- lhpte.dw0.d = 0;
- lhpte.dw0.h.avpn = avpn;
- lhpte.dw0.h.h = hash;
- lhpte.dw0.h.bolted = bolted;
- lhpte.dw0.h.v = 1;
-
- /* Now fill in the actual HPTE */
- HvCallHpt_addValidate( slot, hash, (HPTE *)&lhpte );
-}
-
-static void hpte_create_valid_pSeries(unsigned long slot, unsigned long vpn,
- unsigned long prpn, unsigned hash,
- void * ptep, unsigned hpteflags,
- unsigned bolted)
-{
- /* Local copy of HPTE */
- struct {
- /* Local copy of first doubleword of HPTE */
- union {
- unsigned long d;
- Hpte_dword0 h;
- } dw0;
- /* Local copy of second doubleword of HPTE */
- union {
- unsigned long d;
- Hpte_dword1 h;
- Hpte_dword1_flags f;
- } dw1;
- } lhpte;
-
- unsigned long avpn = vpn >> 11;
- unsigned long arpn = physRpn_to_absRpn( prpn );
-
- HPTE *hptep;
-
- /* Fill in the local HPTE with absolute rpn, avpn and flags */
- lhpte.dw1.d = 0;
- lhpte.dw1.h.rpn = arpn;
- lhpte.dw1.f.flags = hpteflags;
-
- lhpte.dw0.d = 0;
- lhpte.dw0.h.avpn = avpn;
- lhpte.dw0.h.h = hash;
- lhpte.dw0.h.bolted = bolted;
- lhpte.dw0.h.v = 1;
-
- /* Now fill in the actual HPTE */
- hptep = htab_data.htab + slot;
-
- /* Set the second dword first so that the valid bit
- * is the last thing set
- */
-
- hptep->dw1.dword1 = lhpte.dw1.d;
-
- /* Guarantee the second dword is visible before
- * the valid bit
- */
-
- __asm__ __volatile__ ("eieio" : : : "memory");
-
- /* Now set the first dword including the valid bit */
- hptep->dw0.dword0 = lhpte.dw0.d;
-
- __asm__ __volatile__ ("ptesync" : : : "memory");
-}
-
-/* find_linux_pte returns the address of a linux pte for a given
+ * find_linux_pte returns the address of a linux pte for a given
* effective address and directory. If not found, it returns zero.
*/
-
-pte_t * find_linux_pte( pgd_t * pgdir, unsigned long ea )
+pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
{
pgd_t *pg;
pmd_t *pm;
pte_t *pt = NULL;
pte_t pte;
- pg = pgdir + pgd_index( ea );
- if ( ! pgd_none( *pg ) ) {
- pm = pmd_offset( pg, ea );
- if ( ! pmd_none( *pm ) ) {
- pt = pte_offset_kernel( pm, ea );
+ pg = pgdir + pgd_index(ea);
+ if (!pgd_none(*pg)) {
+
+ pm = pmd_offset(pg, ea);
+ if (!pmd_none(*pm)) {
+ pt = pte_offset_kernel(pm, ea);
pte = *pt;
- if ( ! pte_present( pte ) )
+ if (!pte_present(pte))
pt = NULL;
}
}
return pt;
-
-}
-
-static inline unsigned long computeHptePP( unsigned long pte )
-{
- return ( pte & _PAGE_USER ) |
- ( ( ( pte & _PAGE_USER ) >> 1 ) &
- ( ( ~( ( pte >> 2 ) & /* _PAGE_RW */
- ( pte >> 7 ) ) ) & /* _PAGE_DIRTY */
- 1 ) );
}
-static void hpte_updatepp_iSeries(long slot, unsigned long newpp, unsigned long va)
+static inline unsigned long computeHptePP(unsigned long pte)
{
- HvCallHpt_setPp( slot, newpp );
-}
-
-static void hpte_updatepp_pSeries(long slot, unsigned long newpp, unsigned long va)
-{
- /* Local copy of first doubleword of HPTE */
- union {
- unsigned long d;
- Hpte_dword0 h;
- } hpte_dw0;
-
- /* Local copy of second doubleword of HPTE */
- union {
- unsigned long d;
- Hpte_dword1 h;
- Hpte_dword1_flags f;
- } hpte_dw1;
-
- HPTE * hptep = htab_data.htab + slot;
-
- /* Turn off valid bit in HPTE */
- hpte_dw0.d = hptep->dw0.dword0;
- hpte_dw0.h.v = 0;
- hptep->dw0.dword0 = hpte_dw0.d;
-
- /* Ensure it is out of the tlb too */
- _tlbie( va );
-
- /* Insert the new pp bits into the HPTE */
- hpte_dw1.d = hptep->dw1.dword1;
- hpte_dw1.h.pp = newpp;
- hptep->dw1.dword1 = hpte_dw1.d;
-
- /* Ensure it is visible before validating */
- __asm__ __volatile__ ("eieio" : : : "memory");
-
- /* Turn the valid bit back on in HPTE */
- hpte_dw0.h.v = 1;
- hptep->dw0.dword0 = hpte_dw0.d;
-
- __asm__ __volatile__ ("ptesync" : : : "memory");
-}
-
-/*
- * Update the page protection bits. Intended to be used to create
- * guard pages for kernel data structures on pages which are bolted
- * in the HPT. Assumes pages being operated on will not be stolen.
- */
-void hpte_updateboltedpp_iSeries(unsigned long newpp, unsigned long ea )
-{
- unsigned long vsid,va,vpn;
- long slot;
-
- vsid = get_kernel_vsid( ea );
- va = ( vsid << 28 ) | ( ea & 0x0fffffff );
- vpn = va >> PAGE_SHIFT;
-
- slot = ppc_md.hpte_find( vpn );
- HvCallHpt_setPp( slot, newpp );
-}
-
-
-static __inline__ void set_pp_bit(unsigned long pp, HPTE *addr)
-{
- unsigned long old;
- unsigned long *p = (unsigned long *)(&(addr->dw1));
-
- __asm__ __volatile__(
- "1: ldarx %0,0,%3\n\
- rldimi %0,%2,0,62\n\
- stdcx. %0,0,%3\n\
- bne 1b"
- : "=&r" (old), "=m" (*p)
- : "r" (pp), "r" (p), "m" (*p)
- : "cc");
-}
-
-/*
- * Update the page protection bits. Intended to be used to create
- * guard pages for kernel data structures on pages which are bolted
- * in the HPT. Assumes pages being operated on will not be stolen.
- */
-void hpte_updateboltedpp_pSeries(unsigned long newpp, unsigned long ea)
-{
- unsigned long vsid,va,vpn,flags;
- long slot;
- HPTE *hptep;
-
- vsid = get_kernel_vsid( ea );
- va = ( vsid << 28 ) | ( ea & 0x0fffffff );
- vpn = va >> PAGE_SHIFT;
-
- slot = ppc_md.hpte_find( vpn );
- hptep = htab_data.htab + slot;
-
- set_pp_bit(newpp , hptep);
-
- /* Ensure it is out of the tlb too */
- spin_lock_irqsave( &hash_table_lock, flags );
- _tlbie( va );
- spin_unlock_irqrestore( &hash_table_lock, flags );
-}
-
-
-
-/* This is called very early. */
-void hpte_init_iSeries(void)
-{
- ppc_md.hpte_invalidate = hpte_invalidate_iSeries;
- ppc_md.hpte_updatepp = hpte_updatepp_iSeries;
- ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_iSeries;
- ppc_md.hpte_getword0 = hpte_getword0_iSeries;
- ppc_md.hpte_selectslot = hpte_selectslot_iSeries;
- ppc_md.hpte_create_valid = hpte_create_valid_iSeries;
- ppc_md.hpte_find = hpte_find_iSeries;
-}
-void hpte_init_pSeries(void)
-{
- ppc_md.hpte_invalidate = hpte_invalidate_pSeries;
- ppc_md.hpte_updatepp = hpte_updatepp_pSeries;
- ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_pSeries;
- ppc_md.hpte_getword0 = hpte_getword0_pSeries;
- ppc_md.hpte_selectslot = hpte_selectslot_pSeries;
- ppc_md.hpte_create_valid = hpte_create_valid_pSeries;
- ppc_md.hpte_find = hpte_find_pSeries;
+ return (pte & _PAGE_USER) |
+ (((pte & _PAGE_USER) >> 1) &
+ ((~((pte >> 2) & /* _PAGE_RW */
+ (pte >> 7))) & /* _PAGE_DIRTY */
+ 1));
}
/*
* Handle a fault by adding an HPTE. If the address can't be determined
* to be valid via Linux page tables, return 1. If handled return 0
*/
-int hash_page(unsigned long ea, unsigned long access)
+int __hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
+ pte_t *ptep)
{
- void *pgdir;
- unsigned long va, vsid, vpn;
- unsigned long newpp, hash_ind, prpn;
+ unsigned long va, vpn;
+ unsigned long newpp, prpn;
unsigned long hpteflags;
long slot;
- struct mm_struct *mm;
- pte_t old_pte, new_pte, *ptep;
-
- /* Check for invalid addresses. */
- if (!IS_VALID_EA(ea))
- return 1;
-
- switch (REGION_ID(ea)) {
- case USER_REGION_ID:
- mm = current->mm;
- if (mm == NULL)
- return 1;
-
- vsid = get_vsid(mm->context, ea);
- break;
- case IO_REGION_ID:
- mm = &ioremap_mm;
- vsid = get_kernel_vsid(ea);
- break;
- case VMALLOC_REGION_ID:
- mm = &init_mm;
- vsid = get_kernel_vsid(ea);
- break;
-#ifdef CONFIG_PPC_EEH
- case IO_UNMAPPED_REGION_ID:
- udbg_printf("EEH Error ea = 0x%lx\n", ea);
- PPCDBG_ENTER_DEBUGGER();
- panic("EEH Error ea = 0x%lx\n", ea);
- break;
-#endif
- case KERNEL_REGION_ID:
- /*
- * As htab_initialize is now, we shouldn't ever get here since
- * we're bolting the entire 0xC0... region.
- */
- udbg_printf("Little faulted on kernel address 0x%lx\n", ea);
- PPCDBG_ENTER_DEBUGGER();
- panic("Little faulted on kernel address 0x%lx\n", ea);
- break;
- default:
- /* Not a valid range, send the problem up to do_page_fault */
- return 1;
- break;
- }
+ pte_t old_pte, new_pte;
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> PAGE_SHIFT;
- pgdir = mm->pgd;
-
- if (pgdir == NULL)
- return 1;
-
- /*
- * Lock the Linux page table to prevent mmap and kswapd
- * from modifying entries while we search and update
- */
- spin_lock(&mm->page_table_lock);
-
- ptep = find_linux_pte(pgdir, ea);
/*
* If no pte found or not present, send the problem up to
* do_page_fault
*/
- if (!ptep || !pte_present(*ptep)) {
- spin_unlock(&mm->page_table_lock);
+ if (!ptep || !pte_present(*ptep))
return 1;
- }
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
*/
access |= _PAGE_PRESENT;
- if (access & ~(pte_val(*ptep))) {
- spin_unlock(&mm->page_table_lock);
+ if (access & ~(pte_val(*ptep)))
return 1;
- }
-
- /*
- * Acquire the hash table lock to guarantee that the linux
- * pte we fetch will not change
- */
- spin_lock(&hash_table_lock);
-
- /*
- * At this point we have found a pte (which was present).
- * The spinlocks prevent this status from changing
- * The hash_table_lock prevents the _PAGE_HASHPTE status
- * from changing (RPN, DIRTY and ACCESSED too)
- * The page_table_lock prevents the pte from being
- * invalidated or modified
- */
/*
* At this point, we have a pte (old_pte) which can be used to build
@@ -906,86 +248,151 @@ int hash_page(unsigned long ea, unsigned long access)
if (pte_val(old_pte) & _PAGE_HASHPTE) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot, secondary;
- /* Local copy of first doubleword of HPTE */
- union {
- unsigned long d;
- Hpte_dword0 h;
- } hpte_dw0;
+ /* XXX fix large pte flag */
hash = hpt_hash(vpn, 0);
secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15;
if (secondary)
hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
- /* If there is an HPTE for this page it is indexed by slot */
- hpte_dw0.d = ppc_md.hpte_getword0(slot);
- if ((hpte_dw0.h.avpn == (vpn >> 11)) &&
- (hpte_dw0.h.v) &&
- (hpte_dw0.h.h == secondary)){
- /* HPTE matches */
- ppc_md.hpte_updatepp(slot, newpp, va);
+
+ udbg_printf("updatepp cpu %d ea %lx vsid should be %lx\n", smp_processor_id(), ea, vsid);
+
+ /* XXX fix large pte flag */
+ if (ppc_md.hpte_updatepp(slot, newpp, va, 0) == -1)
+ pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
+ else
if (!pte_same(old_pte, new_pte))
*ptep = new_pte;
- } else {
- /* HPTE is not for this pte */
- pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
- }
}
if (!(pte_val(old_pte) & _PAGE_HASHPTE)) {
- /*
- * Case 1
- * For these cases we need to create a new
- * HPTE and update the linux pte
- */
-
- /* Find an available HPTE slot */
- slot = ppc_md.hpte_selectslot(vpn);
-
- hash_ind = 0;
- if (slot < 0) {
- slot = -slot;
- hash_ind = 1;
- }
-
- /* Set the physical address */
+ /* XXX fix large pte flag */
+ unsigned long hash = hpt_hash(vpn, 0);
+ unsigned long hpte_group;
prpn = pte_val(old_pte) >> PTE_SHIFT;
+repeat:
+ hpte_group = ((hash & htab_data.htab_hash_mask) *
+ HPTES_PER_GROUP) & ~0x7UL;
+
/* Update the linux pte with the HPTE slot */
pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
- pte_val(new_pte) |= hash_ind << 15;
- pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
pte_val(new_pte) |= _PAGE_HASHPTE;
- /*
+ /* copy appropriate flags from linux pte */
+ hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;
+
+ /* XXX fix large pte flag */
+ slot = ppc_md.insert_hpte(hpte_group, vpn, prpn, 0,
+ hpteflags, 0, 0);
+
+ /* Primary is full, try the secondary */
+ if (slot == -1) {
+ pte_val(new_pte) |= 1 << 15;
+ hpte_group = ((~hash & htab_data.htab_hash_mask) *
+ HPTES_PER_GROUP) & ~0x7UL;
+ /* XXX fix large pte flag */
+ slot = ppc_md.insert_hpte(hpte_group, vpn, prpn,
+ 1, hpteflags, 0, 0);
+ if (slot == -1) {
+ if (mftb() & 0x1)
+ hpte_group = ((hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+
+ ppc_md.remove_hpte(hpte_group);
+ goto repeat;
+ }
+ }
+
+ pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
+
+ /*
* No need to use ldarx/stdcx here because all who
* might be updating the pte will hold the
* page_table_lock or the hash_table_lock
* (we hold both)
*/
*ptep = new_pte;
+ }
- /* copy appropriate flags from linux pte */
- hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;
+ return 0;
+}
- /* Create the HPTE */
- ppc_md.hpte_create_valid(slot, vpn, prpn, hash_ind, ptep,
- hpteflags, 0);
+int hash_page(unsigned long ea, unsigned long access)
+{
+ void *pgdir;
+ unsigned long vsid;
+ struct mm_struct *mm;
+ pte_t *ptep;
+ int ret;
+
+ /* Check for invalid addresses. */
+ if (!IS_VALID_EA(ea))
+ return 1;
+
+ switch (REGION_ID(ea)) {
+ case USER_REGION_ID:
+ mm = current->mm;
+ if (mm == NULL)
+ return 1;
+
+ vsid = get_vsid(mm->context, ea);
+ break;
+ case IO_REGION_ID:
+ mm = &ioremap_mm;
+ vsid = get_kernel_vsid(ea);
+ break;
+ case VMALLOC_REGION_ID:
+ mm = &init_mm;
+ vsid = get_kernel_vsid(ea);
+ break;
+#ifdef CONFIG_PPC_EEH
+ case IO_UNMAPPED_REGION_ID:
+ udbg_printf("EEH Error ea = 0x%lx\n", ea);
+ PPCDBG_ENTER_DEBUGGER();
+ panic("EEH Error ea = 0x%lx\n", ea);
+ break;
+#endif
+ case KERNEL_REGION_ID:
+ /*
+ * As htab_initialize is now, we shouldn't ever get here since
+ * we're bolting the entire 0xC0... region.
+ */
+ udbg_printf("Little faulted on kernel address 0x%lx\n", ea);
+ PPCDBG_ENTER_DEBUGGER();
+ panic("Little faulted on kernel address 0x%lx\n", ea);
+ break;
+ default:
+ /* Not a valid range, send the problem up to do_page_fault */
+ return 1;
+ break;
}
- spin_unlock(&hash_table_lock);
+ pgdir = mm->pgd;
+
+ if (pgdir == NULL)
+ return 1;
+
+ /*
+ * Lock the Linux page table to prevent mmap and kswapd
+ * from modifying entries while we search and update
+ */
+ spin_lock(&mm->page_table_lock);
+ ptep = find_linux_pte(pgdir, ea);
+ ret = __hash_page(ea, access, vsid, ptep);
spin_unlock(&mm->page_table_lock);
- return 0;
+
+ return ret;
}
-void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte)
+void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
+ int local)
{
- unsigned long vsid, vpn, va, hash, secondary, slot, flags;
- union {
- unsigned long d;
- Hpte_dword0 h;
- } hpte_dw0;
+ unsigned long vsid, vpn, va, hash, secondary, slot;
+
+ /* XXX fix for large ptes */
+ unsigned long large = 0;
if ((ea >= USER_START) && (ea <= USER_END))
vsid = get_vsid(context, ea);
@@ -993,156 +400,32 @@ void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte)
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, 0);
+ if (large)
+ vpn = va >> LARGE_PAGE_SHIFT;
+ else
+ vpn = va >> PAGE_SHIFT;
+ hash = hpt_hash(vpn, large);
secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
if (secondary)
hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
- spin_lock_irqsave(&hash_table_lock, flags);
- /*
- * Id prefer to flush even if our hpte was stolen, but the new
- * entry could be bolted - Anton
- */
- hpte_dw0.d = ppc_md.hpte_getword0(slot);
- if ((hpte_dw0.h.avpn == (vpn >> 11)) &&
- (hpte_dw0.h.v) &&
- (hpte_dw0.h.h == secondary)){
- /* HPTE matches */
- ppc_md.hpte_invalidate(slot);
- }
-
- spin_unlock_irqrestore(&hash_table_lock, flags);
+ ppc_md.hpte_invalidate(slot, va, large, local);
}
-int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
- void *buffer, size_t *lenp)
+void flush_hash_range(unsigned long context, unsigned long number, int local)
{
- int vleft, first=1, len, left, val;
-#define TMPBUFLEN 256
- char buf[TMPBUFLEN], *p;
- static const char *sizestrings[4] = {
- "2MB", "256KB", "512KB", "1MB"
- };
- static const char *clockstrings[8] = {
- "clock disabled", "+1 clock", "+1.5 clock", "reserved(3)",
- "+2 clock", "+2.5 clock", "+3 clock", "reserved(7)"
- };
- static const char *typestrings[4] = {
- "flow-through burst SRAM", "reserved SRAM",
- "pipelined burst SRAM", "pipelined late-write SRAM"
- };
- static const char *holdstrings[4] = {
- "0.5", "1.0", "(reserved2)", "(reserved3)"
- };
-
- if ( ((_get_PVR() >> 16) != 8) && ((_get_PVR() >> 16) != 12))
- return -EFAULT;
-
- if ( /*!table->maxlen ||*/ (filp->f_pos && !write)) {
- *lenp = 0;
- return 0;
- }
-
- vleft = table->maxlen / sizeof(int);
- left = *lenp;
-
- for (; left /*&& vleft--*/; first=0) {
- if (write) {
- while (left) {
- char c;
- if(get_user(c,(char *) buffer))
- return -EFAULT;
- if (!isspace(c))
- break;
- left--;
- ((char *) buffer)++;
- }
- if (!left)
- break;
- len = left;
- if (len > TMPBUFLEN-1)
- len = TMPBUFLEN-1;
- if(copy_from_user(buf, buffer, len))
- return -EFAULT;
- buf[len] = 0;
- p = buf;
- if (*p < '0' || *p > '9')
- break;
- val = simple_strtoul(p, &p, 0);
- len = p-buf;
- if ((len < left) && *p && !isspace(*p))
- break;
- buffer += len;
- left -= len;
-#if 0
- /* DRENG need a def */
- _set_L2CR(0);
- _set_L2CR(val);
- while ( _get_L2CR() & 0x1 )
- /* wait for invalidate to finish */;
-#endif
-
- } else {
- p = buf;
- if (!first)
- *p++ = '\t';
-#if 0
- /* DRENG need a def */
- val = _get_L2CR();
-#endif
- p += sprintf(p, "0x%08x: ", val);
- p += sprintf(p, " %s", (val >> 31) & 1 ? "enabled" :
- "disabled");
- p += sprintf(p, ", %sparity", (val>>30)&1 ? "" : "no ");
- p += sprintf(p, ", %s", sizestrings[(val >> 28) & 3]);
- p += sprintf(p, ", %s", clockstrings[(val >> 25) & 7]);
- p += sprintf(p, ", %s", typestrings[(val >> 23) & 2]);
- p += sprintf(p, "%s", (val>>22)&1 ? ", data only" : "");
- p += sprintf(p, "%s", (val>>20)&1 ? ", ZZ enabled": "");
- p += sprintf(p, ", %s", (val>>19)&1 ? "write-through" :
- "copy-back");
- p += sprintf(p, "%s", (val>>18)&1 ? ", testing" : "");
- p += sprintf(p, ", %sns hold",holdstrings[(val>>16)&3]);
- p += sprintf(p, "%s", (val>>15)&1 ? ", DLL slow" : "");
- p += sprintf(p, "%s", (val>>14)&1 ? ", diff clock" :"");
- p += sprintf(p, "%s", (val>>13)&1 ? ", DLL bypass" :"");
-
- p += sprintf(p,"\n");
-
- len = strlen(buf);
- if (len > left)
- len = left;
- if(copy_to_user(buffer, buf, len))
- return -EFAULT;
- left -= len;
- buffer += len;
- break;
- }
- }
+ if (ppc_md.flush_hash_range) {
+ ppc_md.flush_hash_range(context, number, local);
+ } else {
+ int i;
+ struct tlb_batch_data *ptes =
+ &tlb_batch_array[smp_processor_id()][0];
- if (!write && !first && left) {
- if(put_user('\n', (char *) buffer))
- return -EFAULT;
- left--, buffer++;
- }
- if (write) {
- p = (char *) buffer;
- while (left) {
- char c;
- if(get_user(c, p++))
- return -EFAULT;
- if (!isspace(c))
- break;
- left--;
+ for (i = 0; i < number; i++) {
+ flush_hash_page(context, ptes->addr, ptes->pte, local);
+ ptes++;
}
}
- if (write && first)
- return -EINVAL;
- *lenp -= left;
- filp->f_pos += *lenp;
- return 0;
}
-
diff --git a/arch/ppc64/kernel/pSeries_htab.c b/arch/ppc64/kernel/pSeries_htab.c
new file mode 100644
index 000000000000..7880a385fc96
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_htab.c
@@ -0,0 +1,470 @@
+/*
+ * pSeries hashtable management.
+ *
+ * SMP scalability work:
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/spinlock.h>
+#include <linux/bitops.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+
+#include <asm/abs_addr.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+
+/*
+ * Create a pte. Used during initialization only.
+ * We assume the PTE will fit in the primary PTEG.
+ */
+void pSeries_make_pte(HPTE *htab, unsigned long va, unsigned long pa,
+ int mode, unsigned long hash_mask, int large)
+{
+ HPTE *hptep;
+ unsigned long hash, i;
+ unsigned long vpn;
+
+ if (large)
+ vpn = va >> LARGE_PAGE_SHIFT;
+ else
+ vpn = va >> PAGE_SHIFT;
+
+ hash = hpt_hash(vpn, large);
+
+ hptep = htab + ((hash & hash_mask)*HPTES_PER_GROUP);
+
+ for (i = 0; i < 8; ++i, ++hptep) {
+ if (hptep->dw0.dw0.v == 0) { /* !valid */
+ hptep->dw1.dword1 = pa | mode;
+ hptep->dw0.dword0 = 0;
+ hptep->dw0.dw0.avpn = va >> 23;
+ hptep->dw0.dw0.bolted = 1; /* bolted */
+ if (large) {
+ hptep->dw0.dw0.l = 1;
+ hptep->dw0.dw0.avpn &= ~0x1UL;
+ }
+ hptep->dw0.dw0.v = 1; /* make valid */
+ return;
+ }
+ }
+
+ /* We should _never_ get here and too early to call xmon. */
+ while(1)
+ ;
+}
+
+#define HPTE_LOCK_BIT 3
+
+static inline void pSeries_lock_hpte(HPTE *hptep)
+{
+ unsigned long *word = &hptep->dw0.dword0;
+
+ while (1) {
+ if (!test_and_set_bit(HPTE_LOCK_BIT, word))
+ break;
+ while(test_bit(HPTE_LOCK_BIT, word))
+ barrier();
+ }
+}
+
+static inline void pSeries_unlock_hpte(HPTE *hptep)
+{
+ unsigned long *word = &hptep->dw0.dword0;
+
+ asm volatile("lwsync":::"memory");
+ clear_bit(HPTE_LOCK_BIT, word);
+}
+
+static spinlock_t pSeries_tlbie_lock = SPIN_LOCK_UNLOCKED;
+
+static long pSeries_insert_hpte(unsigned long hpte_group, unsigned long vpn,
+ unsigned long prpn, int secondary,
+ unsigned long hpteflags, int bolted, int large)
+{
+ unsigned long avpn = vpn >> 11;
+ unsigned long arpn = physRpn_to_absRpn(prpn);
+ HPTE *hptep = htab_data.htab + hpte_group;
+ Hpte_dword0 dw0;
+ HPTE lhpte;
+ int i;
+
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ dw0 = hptep->dw0.dw0;
+
+ if (!dw0.v) {
+ /* retry with lock held */
+ pSeries_lock_hpte(hptep);
+ dw0 = hptep->dw0.dw0;
+ if (!dw0.v)
+ break;
+ pSeries_unlock_hpte(hptep);
+ }
+
+ hptep++;
+ }
+
+ if (i == HPTES_PER_GROUP)
+ return -1;
+
+ lhpte.dw1.dword1 = 0;
+ lhpte.dw1.dw1.rpn = arpn;
+ lhpte.dw1.flags.flags = hpteflags;
+
+ lhpte.dw0.dword0 = 0;
+ lhpte.dw0.dw0.avpn = avpn;
+ lhpte.dw0.dw0.h = secondary;
+ lhpte.dw0.dw0.bolted = bolted;
+ lhpte.dw0.dw0.v = 1;
+
+ if (large)
+ lhpte.dw0.dw0.l = 1;
+
+ hptep->dw1.dword1 = lhpte.dw1.dword1;
+
+ /* Guarantee the second dword is visible before the valid bit */
+ __asm__ __volatile__ ("eieio" : : : "memory");
+
+ /*
+ * Now set the first dword including the valid bit
+ * NOTE: this also unlocks the hpte
+ */
+ hptep->dw0.dword0 = lhpte.dw0.dword0;
+
+ __asm__ __volatile__ ("ptesync" : : : "memory");
+
+ return i;
+}
+
+static long pSeries_remove_hpte(unsigned long hpte_group)
+{
+ HPTE *hptep;
+ Hpte_dword0 dw0;
+ int i;
+ int slot_offset;
+ unsigned long vsid, group, pi, pi_high;
+ unsigned long slot;
+ unsigned long flags;
+ int large;
+ unsigned long va;
+
+ /* pick a random slot to start at */
+ slot_offset = mftb() & 0x7;
+
+ udbg_printf("remove_hpte in %d\n", slot_offset);
+
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ hptep = htab_data.htab + hpte_group + slot_offset;
+ dw0 = hptep->dw0.dw0;
+
+ if (dw0.v && !dw0.bolted) {
+ /* retry with lock held */
+ pSeries_lock_hpte(hptep);
+ dw0 = hptep->dw0.dw0;
+ if (dw0.v && !dw0.bolted)
+ break;
+ pSeries_unlock_hpte(hptep);
+ }
+
+ slot_offset++;
+ slot_offset &= 0x7;
+ }
+
+ if (i == HPTES_PER_GROUP)
+ return -1;
+
+ large = dw0.l;
+
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ hptep->dw0.dword0 = 0;
+
+ /* Invalidate the tlb */
+ vsid = dw0.avpn >> 5;
+ slot = hptep - htab_data.htab;
+ group = slot >> 3;
+ if (dw0.h)
+ group = ~group;
+ pi = (vsid ^ group) & 0x7ff;
+ pi_high = (dw0.avpn & 0x1f) << 11;
+ pi |= pi_high;
+
+ if (large)
+ va = pi << LARGE_PAGE_SHIFT;
+ else
+ va = pi << PAGE_SHIFT;
+
+ spin_lock_irqsave(&pSeries_tlbie_lock, flags);
+ _tlbie(va, large);
+ spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
+
+ return i;
+}
+
+static inline void set_pp_bit(unsigned long pp, HPTE *addr)
+{
+ unsigned long old;
+ unsigned long *p = &addr->dw1.dword1;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3\n\
+ rldimi %0,%2,0,62\n\
+ stdcx. %0,0,%3\n\
+ bne 1b"
+ : "=&r" (old), "=m" (*p)
+ : "r" (pp), "r" (p), "m" (*p)
+ : "cc");
+}
+
+/*
+ * Only works on small pages. Yes its ugly to have to check each slot in
+ * the group but we only use this during bootup.
+ */
+static long pSeries_hpte_find(unsigned long vpn)
+{
+ HPTE *hptep;
+ unsigned long hash;
+ unsigned long i, j;
+ long slot;
+ Hpte_dword0 dw0;
+
+ hash = hpt_hash(vpn, 0);
+
+ for (j = 0; j < 2; j++) {
+ slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ hptep = htab_data.htab + slot;
+ dw0 = hptep->dw0.dw0;
+
+ if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
+ (dw0.h == j)) {
+ /* HPTE matches */
+ if (j)
+ slot = -slot;
+ return slot;
+ }
+ ++slot;
+ }
+ hash = ~hash;
+ }
+
+ return -1;
+}
+
+static long pSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long va, int large)
+{
+ HPTE *hptep = htab_data.htab + slot;
+ Hpte_dword0 dw0;
+ unsigned long vpn, avpn;
+ unsigned long flags;
+
+ udbg_printf("updatepp\n");
+
+ if (large)
+ vpn = va >> LARGE_PAGE_SHIFT;
+ else
+ vpn = va >> PAGE_SHIFT;
+
+ avpn = vpn >> 11;
+
+ pSeries_lock_hpte(hptep);
+
+ dw0 = hptep->dw0.dw0;
+
+ if ((dw0.avpn != avpn) || !dw0.v) {
+ pSeries_unlock_hpte(hptep);
+ udbg_printf("updatepp missed\n");
+ return -1;
+ }
+
+ set_pp_bit(newpp, hptep);
+
+ pSeries_unlock_hpte(hptep);
+
+ /* Ensure it is out of the tlb too */
+ /* XXX use tlbiel where possible */
+ spin_lock_irqsave(&pSeries_tlbie_lock, flags);
+ _tlbie(va, large);
+ spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
+
+ return 0;
+}
+
+/*
+ * Update the page protection bits. Intended to be used to create
+ * guard pages for kernel data structures on pages which are bolted
+ * in the HPT. Assumes pages being operated on will not be stolen.
+ * Does not work on large pages.
+ *
+ * No need to lock here because we should be the only user.
+ */
+static void pSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+{
+ unsigned long vsid, va, vpn, flags;
+ long slot;
+ HPTE *hptep;
+
+ vsid = get_kernel_vsid(ea);
+ va = (vsid << 28) | (ea & 0x0fffffff);
+ vpn = va >> PAGE_SHIFT;
+
+ slot = pSeries_hpte_find(vpn);
+ if (slot == -1)
+ panic("could not find page to bolt\n");
+ hptep = htab_data.htab + slot;
+
+ set_pp_bit(newpp, hptep);
+
+ /* Ensure it is out of the tlb too */
+ /* XXX use tlbiel where possible */
+ spin_lock_irqsave(&pSeries_tlbie_lock, flags);
+ _tlbie(va, 0);
+ spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
+}
+
+static void pSeries_hpte_invalidate(unsigned long slot, unsigned long va,
+ int large, int local)
+{
+ HPTE *hptep = htab_data.htab + slot;
+ Hpte_dword0 dw0;
+ unsigned long vpn, avpn;
+ unsigned long flags;
+
+ if (large)
+ vpn = va >> LARGE_PAGE_SHIFT;
+ else
+ vpn = va >> PAGE_SHIFT;
+
+ avpn = vpn >> 11;
+
+ pSeries_lock_hpte(hptep);
+
+ dw0 = hptep->dw0.dw0;
+
+ if ((dw0.avpn != avpn) || !dw0.v) {
+ pSeries_unlock_hpte(hptep);
+ udbg_printf("invalidate missed\n");
+ return;
+ }
+
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ hptep->dw0.dword0 = 0;
+
+ /* Invalidate the tlb */
+ if (!large && local && __is_processor(PV_POWER4)) {
+ _tlbiel(va, large);
+ } else {
+ spin_lock_irqsave(&pSeries_tlbie_lock, flags);
+ _tlbie(va, large);
+ spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
+ }
+}
+
+static void pSeries_flush_hash_range(unsigned long context,
+ unsigned long number, int local)
+{
+ unsigned long vsid, vpn, va, hash, secondary, slot, flags, avpn;
+ int i, j;
+ unsigned long va_array[MAX_BATCH_FLUSH];
+ HPTE *hptep;
+ Hpte_dword0 dw0;
+ struct tlb_batch_data *ptes = &tlb_batch_array[smp_processor_id()][0];
+ /* XXX fix for large ptes */
+ unsigned long large = 0;
+ j = 0;
+ for (i = 0; i < number; i++) {
+ if ((ptes->addr >= USER_START) && (ptes->addr <= USER_END))
+ vsid = get_vsid(context, ptes->addr);
+ else
+ vsid = get_kernel_vsid(ptes->addr);
+
+ va = (vsid << 28) | (ptes->addr & 0x0fffffff);
+ va_array[j] = va;
+ if (large)
+ vpn = va >> LARGE_PAGE_SHIFT;
+ else
+ vpn = va >> PAGE_SHIFT;
+ hash = hpt_hash(vpn, large);
+ secondary = (pte_val(ptes->pte) & _PAGE_SECONDARY) >> 15;
+ if (secondary)
+ hash = ~hash;
+ slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+ slot += (pte_val(ptes->pte) & _PAGE_GROUP_IX) >> 12;
+
+ hptep = htab_data.htab + slot;
+ avpn = vpn >> 11;
+
+ pSeries_lock_hpte(hptep);
+
+ dw0 = hptep->dw0.dw0;
+
+ ptes++;
+
+ if ((dw0.avpn != avpn) || !dw0.v) {
+ pSeries_unlock_hpte(hptep);
+ udbg_printf("invalidate missed\n");
+ continue;
+ }
+
+ j++;
+
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ hptep->dw0.dword0 = 0;
+ }
+
+ if (!large && local && __is_processor(PV_POWER4)) {
+ asm volatile("ptesync":::"memory");
+
+ for (i = 0; i < j; i++) {
+ asm volatile("\n\
+ clrldi %0,%0,16\n\
+ tlbiel %0"
+ : : "r" (va_array[i]) : "memory" );
+ }
+
+ asm volatile("ptesync":::"memory");
+ } else {
+ /* XXX double check that it is safe to take this late */
+ spin_lock_irqsave(&pSeries_tlbie_lock, flags);
+
+ asm volatile("ptesync":::"memory");
+
+ for (i = 0; i < j; i++) {
+ asm volatile("\n\
+ clrldi %0,%0,16\n\
+ tlbie %0"
+ : : "r" (va_array[i]) : "memory" );
+ }
+
+ asm volatile("eieio; tlbsync; ptesync":::"memory");
+
+ spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
+ }
+}
+
+void hpte_init_pSeries(void)
+{
+ struct device_node *root;
+ const char *model;
+
+ ppc_md.hpte_invalidate = pSeries_hpte_invalidate;
+ ppc_md.hpte_updatepp = pSeries_hpte_updatepp;
+ ppc_md.hpte_updateboltedpp = pSeries_hpte_updateboltedpp;
+ ppc_md.insert_hpte = pSeries_insert_hpte;
+ ppc_md.remove_hpte = pSeries_remove_hpte;
+ ppc_md.make_pte = pSeries_make_pte;
+
+ /* Disable TLB batching on nighthawk */
+ root = find_path_device("/");
+ if (root) {
+ model = get_property(root, "model", NULL);
+ if (strcmp(model, "CHRP IBM,9076-N81"))
+ ppc_md.flush_hash_range = pSeries_flush_hash_range;
+ }
+}
diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c
index 20adcefafeea..23d1dd029128 100644
--- a/arch/ppc64/kernel/pSeries_lpar.c
+++ b/arch/ppc64/kernel/pSeries_lpar.c
@@ -214,358 +214,6 @@ long plpar_xirr(unsigned long *xirr_ret)
xirr_ret, &dummy, &dummy);
}
-/*
- * The following section contains code that ultimately should
- * be put in the relavent file (htab.c, xics.c, etc). It has
- * been put here for the time being in order to ease maintainence
- * of the pSeries LPAR code until it can all be put into CVS.
- */
-static void hpte_invalidate_pSeriesLP(unsigned long slot)
-{
- HPTE old_pte;
- unsigned long lpar_rc;
- unsigned long flags = 0;
-
- lpar_rc = plpar_pte_remove(flags,
- slot,
- 0,
- &old_pte.dw0.dword0,
- &old_pte.dw1.dword1);
- if (lpar_rc != H_Success) BUG();
-}
-
-/* NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
- * the low 3 bits of flags happen to line up. So no transform is needed.
- * We can probably optimize here and assume the high bits of newpp are
- * already zero. For now I am paranoid.
- */
-static void hpte_updatepp_pSeriesLP(long slot, unsigned long newpp, unsigned long va)
-{
- unsigned long lpar_rc;
- unsigned long flags;
- flags = newpp & 3;
- lpar_rc = plpar_pte_protect( flags,
- slot,
- 0);
- if (lpar_rc != H_Success) {
- udbg_printf( " bad return code from pte protect rc = %lx \n", lpar_rc);
- for (;;);
- }
-}
-
-static void hpte_updateboltedpp_pSeriesLP(unsigned long newpp, unsigned long ea)
-{
- unsigned long lpar_rc;
- unsigned long vsid,va,vpn,flags;
- long slot;
-
- vsid = get_kernel_vsid( ea );
- va = ( vsid << 28 ) | ( ea & 0x0fffffff );
- vpn = va >> PAGE_SHIFT;
-
- slot = ppc_md.hpte_find( vpn );
- flags = newpp & 3;
- lpar_rc = plpar_pte_protect( flags,
- slot,
- 0);
- if (lpar_rc != H_Success) {
- udbg_printf( " bad return code from pte bolted protect rc = %lx \n", lpar_rc);
- for (;;);
- }
-}
-
-
-static unsigned long hpte_getword0_pSeriesLP(unsigned long slot)
-{
- unsigned long dword0;
- unsigned long lpar_rc;
- unsigned long dummy_word1;
- unsigned long flags;
- /* Read 1 pte at a time */
- /* Do not need RPN to logical page translation */
- /* No cross CEC PFT access */
- flags = 0;
-
- lpar_rc = plpar_pte_read(flags,
- slot,
- &dword0, &dummy_word1);
- if (lpar_rc != H_Success) {
- udbg_printf(" error on pte read in get_hpte0 rc = %lx \n", lpar_rc);
- for (;;);
- }
-
- return(dword0);
-}
-
-static long hpte_selectslot_pSeriesLP(unsigned long vpn)
-{
- unsigned long primary_hash;
- unsigned long hpteg_slot;
- unsigned i, k;
- unsigned long flags;
- HPTE pte_read;
- unsigned long lpar_rc;
-
- /* Search the primary group for an available slot */
- primary_hash = hpt_hash(vpn, 0);
-
- hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
-
- /* Read 1 pte at a time */
- /* Do not need RPN to logical page translation */
- /* No cross CEC PFT access */
- flags = 0;
- for (i=0; i<HPTES_PER_GROUP; ++i) {
- /* read the hpte entry from the slot */
- lpar_rc = plpar_pte_read(flags,
- hpteg_slot + i,
- &pte_read.dw0.dword0, &pte_read.dw1.dword1);
- if (lpar_rc != H_Success) {
- udbg_printf(" read of hardware page table failed rc = %lx \n", lpar_rc);
- for (;;);
- }
- if ( pte_read.dw0.dw0.v == 0 ) {
- /* If an available slot found, return it */
- return hpteg_slot + i;
- }
-
- }
-
-
- /* Search the secondary group for an available slot */
- hpteg_slot = ( ~primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
-
-
- for (i=0; i<HPTES_PER_GROUP; ++i) {
- /* read the hpte entry from the slot */
- lpar_rc = plpar_pte_read(flags,
- hpteg_slot + i,
- &pte_read.dw0.dword0, &pte_read.dw1.dword1);
- if (lpar_rc != H_Success) {
- udbg_printf(" read of hardware page table failed2 rc = %lx \n", lpar_rc);
- for (;;);
- }
- if ( pte_read.dw0.dw0.v == 0 ) {
- /* If an available slot found, return it */
- return hpteg_slot + i;
- }
-
- }
-
- /* No available entry found in secondary group */
-
-
- /* Select an entry in the primary group to replace */
-
- hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
-
- k = htab_data.next_round_robin++ & 0x7;
-
- for (i=0; i<HPTES_PER_GROUP; ++i) {
- if (k == HPTES_PER_GROUP)
- k = 0;
-
- lpar_rc = plpar_pte_read(flags,
- hpteg_slot + k,
- &pte_read.dw0.dword0, &pte_read.dw1.dword1);
- if (lpar_rc != H_Success) {
- udbg_printf( " pte read failed - rc = %lx", lpar_rc);
- for (;;);
- }
- if ( ! pte_read.dw0.dw0.bolted)
- {
- hpteg_slot += k;
- /* Invalidate the current entry */
- ppc_md.hpte_invalidate(hpteg_slot);
- return hpteg_slot;
- }
- ++k;
- }
-
- /* No non-bolted entry found in primary group - time to panic */
- udbg_printf("select_hpte_slot - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP);
- udbg_printf("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP);
- for (;;);
-
- /* never executes - avoid compiler errors */
- return 0;
-}
-
-
-static void hpte_create_valid_pSeriesLP(unsigned long slot, unsigned long vpn,
- unsigned long prpn, unsigned hash,
- void *ptep, unsigned hpteflags,
- unsigned bolted)
-{
- /* Local copy of HPTE */
- struct {
- /* Local copy of first doubleword of HPTE */
- union {
- unsigned long d;
- Hpte_dword0 h;
- } dw0;
- /* Local copy of second doubleword of HPTE */
- union {
- unsigned long d;
- Hpte_dword1 h;
- Hpte_dword1_flags f;
- } dw1;
- } lhpte;
-
- unsigned long avpn = vpn >> 11;
- unsigned long arpn = physRpn_to_absRpn( prpn );
-
- unsigned long lpar_rc;
- unsigned long flags;
- HPTE ret_hpte;
-
- /* Fill in the local HPTE with absolute rpn, avpn and flags */
- lhpte.dw1.d = 0;
- lhpte.dw1.h.rpn = arpn;
- lhpte.dw1.f.flags = hpteflags;
-
- lhpte.dw0.d = 0;
- lhpte.dw0.h.avpn = avpn;
- lhpte.dw0.h.h = hash;
- lhpte.dw0.h.bolted = bolted;
- lhpte.dw0.h.v = 1;
-
- /* Now fill in the actual HPTE */
- /* Set CEC cookie to 0 */
- /* Large page = 0 */
- /* Zero page = 0 */
- /* I-cache Invalidate = 0 */
- /* I-cache synchronize = 0 */
- /* Exact = 1 - only modify exact entry */
- flags = H_EXACT;
-
- if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
- lhpte.dw1.f.flags &= ~_PAGE_COHERENT;
-#if 1
- __asm__ __volatile__ (
- H_ENTER_r3
- "mr 4, %1\n"
- "mr 5, %2\n"
- "mr 6, %3\n"
- "mr 7, %4\n"
- HSC
- "mr %0, 3\n"
- : "=r" (lpar_rc)
- : "r" (flags), "r" (slot), "r" (lhpte.dw0.d), "r" (lhpte.dw1.d)
- : "r3", "r4", "r5", "r6", "r7", "cc");
-#else
- lpar_rc = plpar_pte_enter(flags,
- slot,
- lhpte.dw0.d,
- lhpte.dw1.d,
- &ret_hpte.dw0.dword0,
- &ret_hpte.dw1.dword1);
-#endif
- if (lpar_rc != H_Success) {
- udbg_printf("error on pte enter lapar rc = %ld\n",lpar_rc);
- udbg_printf("ent: s=%lx, dw0=%lx, dw1=%lx\n", slot, lhpte.dw0.d, lhpte.dw1.d);
- /* xmon_backtrace("backtrace"); */
- for (;;);
- }
-}
-
-static long hpte_find_pSeriesLP(unsigned long vpn)
-{
- union {
- unsigned long d;
- Hpte_dword0 h;
- } hpte_dw0;
- long slot;
- unsigned long hash;
- unsigned long i,j;
-
- hash = hpt_hash(vpn, 0);
- for ( j=0; j<2; ++j ) {
- slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
- for ( i=0; i<HPTES_PER_GROUP; ++i ) {
- hpte_dw0.d = hpte_getword0_pSeriesLP( slot );
- if ( ( hpte_dw0.h.avpn == ( vpn >> 11 ) ) &&
- ( hpte_dw0.h.v ) &&
- ( hpte_dw0.h.h == j ) ) {
- /* HPTE matches */
- if ( j )
- slot = -slot;
- return slot;
- }
- ++slot;
- }
- hash = ~hash;
- }
- return -1;
-}
-
-/*
- * Create a pte - LPAR . Used during initialization only.
- * We assume the PTE will fit in the primary PTEG.
- */
-void make_pte_LPAR(HPTE *htab,
- unsigned long va, unsigned long pa, int mode,
- unsigned long hash_mask, int large)
-{
- HPTE local_hpte, ret_hpte;
- unsigned long hash, slot, flags,lpar_rc, vpn;
-
- if (large)
- vpn = va >> 24;
- else
- vpn = va >> 12;
-
- hash = hpt_hash(vpn, large);
-
- slot = ((hash & hash_mask)*HPTES_PER_GROUP);
-
- local_hpte.dw1.dword1 = pa | mode;
- local_hpte.dw0.dword0 = 0;
- local_hpte.dw0.dw0.avpn = va >> 23;
- local_hpte.dw0.dw0.bolted = 1; /* bolted */
- if (large)
- local_hpte.dw0.dw0.l = 1; /* large page */
- local_hpte.dw0.dw0.v = 1;
-
- /* Set CEC cookie to 0 */
- /* Zero page = 0 */
- /* I-cache Invalidate = 0 */
- /* I-cache synchronize = 0 */
- /* Exact = 0 - modify any entry in group */
- flags = 0;
-#if 1
- __asm__ __volatile__ (
- H_ENTER_r3
- "mr 4, %1\n"
- "mr 5, %2\n"
- "mr 6, %3\n"
- "mr 7, %4\n"
- HSC
- "mr %0, 3\n"
- : "=r" (lpar_rc)
- : "r" (flags), "r" (slot), "r" (local_hpte.dw0.dword0), "r" (local_hpte.dw1.dword1)
- : "r3", "r4", "r5", "r6", "r7", "cc");
-#else
- lpar_rc = plpar_pte_enter(flags,
- slot,
- local_hpte.dw0.dword0,
- local_hpte.dw1.dword1,
- &ret_hpte.dw0.dword0,
- &ret_hpte.dw1.dword1);
-#endif
-#if 0 /* NOTE: we explicitly do not check return status here because it is
- * "normal" for early boot code to map io regions for which a partition
- * has no access. However, we will die if we actually fault on these
- * "permission denied" pages.
- */
- if (lpar_rc != H_Success) {
- /* pSeriesLP_init_early(); */
- udbg_printf("flags=%lx, slot=%lx, dword0=%lx, dword1=%lx, rc=%d\n", flags, slot, local_hpte.dw0.dword0,local_hpte.dw1.dword1, lpar_rc);
- BUG();
- }
-#endif
-}
-
static void tce_build_pSeriesLP(struct TceTable *tbl, long tcenum,
unsigned long uaddr, int direction )
{
@@ -786,19 +434,14 @@ static unsigned char udbg_getcLP(void)
}
}
+void pSeries_lpar_mm_init(void);
/* This is called early in setup.c.
* Use it to setup page table ppc_md stuff as well as udbg.
*/
void pSeriesLP_init_early(void)
{
- ppc_md.hpte_invalidate = hpte_invalidate_pSeriesLP;
- ppc_md.hpte_updatepp = hpte_updatepp_pSeriesLP;
- ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_pSeriesLP;
- ppc_md.hpte_getword0 = hpte_getword0_pSeriesLP;
- ppc_md.hpte_selectslot = hpte_selectslot_pSeriesLP;
- ppc_md.hpte_create_valid = hpte_create_valid_pSeriesLP;
- ppc_md.hpte_find = hpte_find_pSeriesLP;
+ pSeries_lpar_mm_init();
ppc_md.tce_build = tce_build_pSeriesLP;
ppc_md.tce_free = tce_free_pSeriesLP;
@@ -892,3 +535,315 @@ int hvc_count(int *start_termno)
}
return 0;
}
+
+
+
+
+
+
+/*
+ * Create a pte - LPAR . Used during initialization only.
+ * We assume the PTE will fit in the primary PTEG.
+ */
+void pSeries_lpar_make_pte(HPTE *htab, unsigned long va, unsigned long pa,
+ int mode, unsigned long hash_mask, int large)
+{
+ HPTE local_hpte;
+ unsigned long hash, slot, flags, lpar_rc, vpn;
+ unsigned long dummy1, dummy2;
+
+ if (large)
+ vpn = va >> LARGE_PAGE_SHIFT;
+ else
+ vpn = va >> PAGE_SHIFT;
+
+ hash = hpt_hash(vpn, large);
+
+ slot = ((hash & hash_mask)*HPTES_PER_GROUP);
+
+ local_hpte.dw1.dword1 = pa | mode;
+ local_hpte.dw0.dword0 = 0;
+ local_hpte.dw0.dw0.avpn = va >> 23;
+ local_hpte.dw0.dw0.bolted = 1; /* bolted */
+ if (large) {
+ local_hpte.dw0.dw0.l = 1; /* large page */
+ local_hpte.dw0.dw0.avpn &= ~0x1UL;
+ }
+ local_hpte.dw0.dw0.v = 1;
+
+ /* Set CEC cookie to 0 */
+ /* Zero page = 0 */
+ /* I-cache Invalidate = 0 */
+ /* I-cache synchronize = 0 */
+ /* Exact = 0 - modify any entry in group */
+ flags = 0;
+ lpar_rc = plpar_pte_enter(flags, slot, local_hpte.dw0.dword0,
+ local_hpte.dw1.dword1, &dummy1, &dummy2);
+
+#if 0
+ /*
+ * NOTE: we explicitly do not check return status here because it is
+ * "normal" for early boot code to map io regions for which a partition
+ * has no access. However, we will die if we actually fault on these
+ * "permission denied" pages.
+ */
+ if (lpar_rc != H_Success) {
+ udbg_printf("flags=%lx, slot=%lx, dword0=%lx, dword1=%lx, rc=%d\n", flags, slot, local_hpte.dw0.dword0,local_hpte.dw1.dword1, lpar_rc);
+ BUG();
+ }
+#endif
+}
+
+static long pSeries_lpar_insert_hpte(unsigned long hpte_group,
+ unsigned long vpn, unsigned long prpn,
+ int secondary, unsigned long hpteflags,
+ int bolted, int large)
+{
+ unsigned long avpn = vpn >> 11;
+ unsigned long arpn = physRpn_to_absRpn(prpn);
+ unsigned long lpar_rc;
+ unsigned long flags;
+ unsigned long slot;
+ HPTE lhpte;
+
+ /* Fill in the local HPTE with absolute rpn, avpn and flags */
+ lhpte.dw1.dword1 = 0;
+ lhpte.dw1.dw1.rpn = arpn;
+ lhpte.dw1.flags.flags = hpteflags;
+
+ lhpte.dw0.dword0 = 0;
+ lhpte.dw0.dw0.avpn = avpn;
+ lhpte.dw0.dw0.h = secondary;
+ lhpte.dw0.dw0.bolted = bolted;
+ lhpte.dw0.dw0.v = 1;
+
+ if (large)
+ lhpte.dw0.dw0.l = 1;
+
+ /* Now fill in the actual HPTE */
+ /* Set CEC cookie to 0 */
+ /* Large page = 0 */
+ /* Zero page = 0 */
+ /* I-cache Invalidate = 0 */
+ /* I-cache synchronize = 0 */
+ /* Exact = 0 */
+ flags = 0;
+
+ /* XXX why is this here? - Anton */
+ if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
+ lhpte.dw1.flags.flags &= ~_PAGE_COHERENT;
+
+ __asm__ __volatile__ (
+ H_ENTER_r3
+ "mr 4, %2\n"
+ "mr 5, %3\n"
+ "mr 6, %4\n"
+ "mr 7, %5\n"
+ HSC
+ "mr %0, 3\n"
+ "mr %1, 4\n"
+ : "=r" (lpar_rc), "=r" (slot)
+ : "r" (flags), "r" (hpte_group), "r" (lhpte.dw0.dword0),
+ "r" (lhpte.dw1.dword1)
+ : "r3", "r4", "r5", "r6", "r7", "cc");
+
+ if (lpar_rc == H_PTEG_Full)
+ return -1;
+
+ if (lpar_rc != H_Success) {
+ udbg_printf("error on pte enter lpar rc = %ld\n",lpar_rc);
+ udbg_printf("ent: s=%lx, dw0=%lx, dw1=%lx\n", slot,
+ lhpte.dw0.dword0, lhpte.dw1.dword1);
+
+ PPCDBG_ENTER_DEBUGGER();
+ panic("error on pte enter");
+ }
+
+ return slot;
+}
+
+static spinlock_t pSeries_lpar_tlbie_lock = SPIN_LOCK_UNLOCKED;
+
+static long pSeries_lpar_remove_hpte(unsigned long hpte_group)
+{
+ /* XXX take spinlock */
+ panic("pSeries_lpar_remove_hpte");
+}
+
+/* NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up. So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero. For now I am paranoid.
+ */
+static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long va, int large)
+{
+ unsigned long lpar_rc;
+ unsigned long flags;
+ flags = (newpp & 3) | H_AVPN;
+ unsigned long vpn = va >> PAGE_SHIFT;
+
+ udbg_printf("updatepp\n");
+
+ lpar_rc = plpar_pte_protect(flags, slot, (vpn >> 4) & ~0x7fUL);
+
+ if (lpar_rc == H_Not_Found) {
+ udbg_printf("updatepp missed\n");
+ return -1;
+ }
+
+ if (lpar_rc != H_Success) {
+ udbg_printf("bad return code from pte protect rc = %lx\n",
+ lpar_rc);
+ for (;;);
+ }
+
+ return 0;
+}
+
+static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
+{
+ unsigned long dword0;
+ unsigned long lpar_rc;
+ unsigned long dummy_word1;
+ unsigned long flags;
+
+ /* Read 1 pte at a time */
+ /* Do not need RPN to logical page translation */
+ /* No cross CEC PFT access */
+ flags = 0;
+
+ lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1);
+
+ if (lpar_rc != H_Success) {
+ udbg_printf("error on pte read in get_hpte0 rc = %lx\n",
+ lpar_rc);
+ for (;;);
+ }
+
+ return dword0;
+}
+
+static long pSeries_lpar_hpte_find(unsigned long vpn)
+{
+ unsigned long hash;
+ unsigned long i, j;
+ long slot;
+ union {
+ unsigned long dword0;
+ Hpte_dword0 dw0;
+ } hpte_dw0;
+ Hpte_dword0 dw0;
+
+ hash = hpt_hash(vpn, 0);
+
+ for (j = 0; j < 2; j++) {
+ slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ hpte_dw0.dword0 = pSeries_lpar_hpte_getword0(slot);
+ dw0 = hpte_dw0.dw0;
+
+ if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
+ (dw0.h == j)) {
+ /* HPTE matches */
+ if (j)
+ slot = -slot;
+ return slot;
+ }
+ ++slot;
+ }
+ hash = ~hash;
+ }
+
+ return -1;
+}
+
+static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
+ unsigned long ea)
+{
+ unsigned long lpar_rc;
+ unsigned long vsid, va, vpn, flags;
+ long slot;
+
+ vsid = get_kernel_vsid(ea);
+ va = (vsid << 28) | (ea & 0x0fffffff);
+ vpn = va >> PAGE_SHIFT;
+
+ slot = pSeries_lpar_hpte_find(vpn);
+ if (slot == -1)
+ panic("count not find page to bolt\n");
+
+ flags = newpp & 3;
+ lpar_rc = plpar_pte_protect(flags, slot, 0);
+
+ if (lpar_rc != H_Success) {
+ udbg_printf("bad return code from pte bolted protect rc = %lx\n", lpar_rc);
+ for (;;);
+ }
+}
+
+/*
+ * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
+ * lock.
+ */
+static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+ int large, int local)
+{
+ unsigned long vpn, avpn;
+ unsigned long lpar_rc;
+ unsigned long flags;
+ unsigned long dummy1, dummy2;
+
+ if (large)
+ vpn = va >> LARGE_PAGE_SHIFT;
+ else
+ vpn = va >> PAGE_SHIFT;
+
+ avpn = vpn >> 11;
+
+ spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+ lpar_rc = plpar_pte_remove(H_AVPN, slot, (vpn >> 4) & ~0x7fUL, &dummy1,
+ &dummy2);
+ spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+
+ if (lpar_rc == H_Not_Found) {
+ udbg_printf("invalidate missed\n");
+ return;
+ }
+
+ if (lpar_rc != H_Success) {
+ udbg_printf("bad return code from invalidate rc = %lx\n",
+ lpar_rc);
+ for (;;);
+ }
+}
+
+/*
+ * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
+ * lock.
+ */
+void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number,
+ int local)
+{
+ int i;
+ struct tlb_batch_data *ptes =
+ &tlb_batch_array[smp_processor_id()][0];
+ unsigned long flags;
+
+ spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+ for (i = 0; i < number; i++) {
+ flush_hash_page(context, ptes->addr, ptes->pte, local);
+ ptes++;
+ }
+ spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+void pSeries_lpar_mm_init(void)
+{
+ ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate;
+ ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp;
+ ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp;
+ ppc_md.insert_hpte = pSeries_lpar_insert_hpte;
+ ppc_md.remove_hpte = pSeries_lpar_remove_hpte;
+ ppc_md.make_pte = pSeries_lpar_make_pte;
+}
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index ece7186a18c5..37a8a3cbe077 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -253,7 +253,7 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags)
* entry in the hardware page table.
*/
vsid = get_kernel_vsid(ea);
- make_pte(htab_data.htab,
+ ppc_md.make_pte(htab_data.htab,
(vsid << 28) | (ea & 0xFFFFFFF), // va (NOT the ea)
pa,
_PAGE_NO_CACHE | _PAGE_GUARDED | PP_RWXX,
@@ -262,29 +262,23 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags)
}
void
-local_flush_tlb_all(void)
-{
- /* Implemented to just flush the vmalloc area.
- * vmalloc is the only user of flush_tlb_all.
- */
- local_flush_tlb_range( NULL, VMALLOC_START, VMALLOC_END );
-}
-
-void
local_flush_tlb_mm(struct mm_struct *mm)
{
- if ( mm->map_count ) {
+ if (mm->map_count) {
struct vm_area_struct *mp;
- for ( mp = mm->mmap; mp != NULL; mp = mp->vm_next )
- local_flush_tlb_range( mm, mp->vm_start, mp->vm_end );
- }
- else /* MIKEC: It is not clear why this is needed */
+ for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
+ local_flush_tlb_range(mm, mp->vm_start, mp->vm_end);
+ } else {
+ /* MIKEC: It is not clear why this is needed */
/* paulus: it is needed to clear out stale HPTEs
* when an address space (represented by an mm_struct)
* is being destroyed. */
- local_flush_tlb_range( mm, USER_START, USER_END );
-}
+ local_flush_tlb_range(mm, USER_START, USER_END);
+ }
+ /* XXX are there races with checking cpu_vm_mask? - Anton */
+ mm->cpu_vm_mask = 0;
+}
/*
* Callers should hold the mm->page_table_lock
@@ -297,7 +291,9 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
pmd_t *pmd;
pte_t *ptep;
pte_t pte;
-
+ unsigned long flags;
+ int local = 0;
+
switch( REGION_ID(vmaddr) ) {
case VMALLOC_REGION_ID:
pgd = pgd_offset_k( vmaddr );
@@ -308,13 +304,17 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
case USER_REGION_ID:
pgd = pgd_offset( vma->vm_mm, vmaddr );
context = vma->vm_mm->context;
+
+ /* XXX are there races with checking cpu_vm_mask? - Anton */
+ if (vma->vm_mm->cpu_vm_mask == (1 << smp_processor_id()))
+ local = 1;
+
break;
default:
panic("local_flush_tlb_page: invalid region 0x%016lx", vmaddr);
}
-
if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, vmaddr);
if (!pmd_none(*pmd)) {
@@ -322,12 +322,14 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
/* Check if HPTE might exist and flush it if so */
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
if ( pte_val(pte) & _PAGE_HASHPTE ) {
- flush_hash_page(context, vmaddr, pte);
+ flush_hash_page(context, vmaddr, pte, local);
}
}
}
}
+struct tlb_batch_data tlb_batch_array[NR_CPUS][MAX_BATCH_FLUSH];
+
void
local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
{
@@ -337,6 +339,10 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
pte_t pte;
unsigned long pgd_end, pmd_end;
unsigned long context;
+ unsigned long flags;
+ int i = 0;
+ struct tlb_batch_data *ptes = &tlb_batch_array[smp_processor_id()][0];
+ int local = 0;
if ( start >= end )
panic("flush_tlb_range: start (%016lx) greater than end (%016lx)\n", start, end );
@@ -356,6 +362,12 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
case USER_REGION_ID:
pgd = pgd_offset( mm, start );
context = mm->context;
+
+ /* XXX are there races with checking cpu_vm_mask? - Anton */
+ if (mm->cpu_vm_mask == (1 << smp_processor_id())) {
+ local = 1;
+ }
+
break;
default:
panic("flush_tlb_range: invalid region for start (%016lx) and end (%016lx)\n", start, end);
@@ -377,8 +389,17 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
do {
if ( pte_val(*ptep) & _PAGE_HASHPTE ) {
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
- if ( pte_val(pte) & _PAGE_HASHPTE )
- flush_hash_page( context, start, pte );
+ if ( pte_val(pte) & _PAGE_HASHPTE ) {
+ ptes->pte = pte;
+ ptes->addr = start;
+ ptes++;
+ i++;
+ if (i == MAX_BATCH_FLUSH) {
+ flush_hash_range(context, MAX_BATCH_FLUSH, local);
+ i = 0;
+ ptes = &tlb_batch_array[smp_processor_id()][0];
+ }
+ }
}
start += PAGE_SIZE;
++ptep;
@@ -393,6 +414,9 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
start = pgd_end;
++pgd;
} while ( start < end );
+
+ if (i)
+ flush_hash_range(context, i, local);
}
@@ -643,3 +667,30 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
flush_icache_range(maddr, maddr + len);
}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
+ pte_t pte)
+{
+ unsigned long vsid;
+ void *pgdir;
+ pte_t *ptep;
+
+ /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+ if (!pte_young(pte))
+ return;
+
+ pgdir = vma->vm_mm->pgd;
+ if (pgdir == NULL)
+ return;
+
+ ptep = find_linux_pte(pgdir, ea);
+ vsid = get_vsid(vma->vm_mm->context, ea);
+
+ __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep);
+}
diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h
index 304670e2177b..8b66663f5345 100644
--- a/include/asm-ppc64/machdep.h
+++ b/include/asm-ppc64/machdep.h
@@ -24,26 +24,33 @@ struct machdep_calls {
/* High use functions in the first cachelines, low use functions
* follow. DRENG collect profile data.
*/
- void (*hpte_invalidate)(unsigned long slot);
-
- void (*hpte_updatepp)(long slot,
+ void (*hpte_invalidate)(unsigned long slot,
+ unsigned long va,
+ int large,
+ int local);
+ long (*hpte_updatepp)(unsigned long slot,
unsigned long newpp,
- unsigned long va);
+ unsigned long va,
+ int large);
void (*hpte_updateboltedpp)(unsigned long newpp,
unsigned long ea);
- unsigned long (*hpte_getword0)(unsigned long slot);
-
- long (*hpte_find)( unsigned long vpn );
-
- long (*hpte_selectslot)(unsigned long vpn);
+ long (*insert_hpte)(unsigned long hpte_group,
+ unsigned long vpn,
+ unsigned long prpn,
+ int secondary,
+ unsigned long hpteflags,
+ int bolted,
+ int large);
+ long (*remove_hpte)(unsigned long hpte_group);
+ void (*flush_hash_range)(unsigned long context,
+ unsigned long number,
+ int local);
+ void (*make_pte)(void *htab, unsigned long va,
+ unsigned long pa,
+ int mode,
+ unsigned long hash_mask,
+ int large);
- void (*hpte_create_valid)(unsigned long slot,
- unsigned long vpn,
- unsigned long prpn,
- unsigned hash,
- void * ptep,
- unsigned hpteflags,
- unsigned bolted);
void (*tce_build)(struct TceTable * tbl,
long tcenum,
unsigned long uaddr,
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index df830a68e927..a0e55d9d023a 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -77,7 +77,7 @@ typedef struct {
unsigned long resv0: 7; /* Padding to a 64b boundary */
} slb_dword1;
-typedef struct _SLBE {
+typedef struct {
union {
unsigned long dword0;
slb_dword0 dw0;
@@ -107,26 +107,13 @@ typedef struct {
unsigned long avpn:57; /* vsid | api == avpn */
unsigned long : 2; /* Software use */
unsigned long bolted: 1; /* HPTE is "bolted" */
- unsigned long : 1; /* Software use */
+ unsigned long lock: 1; /* lock on pSeries SMP */
unsigned long l: 1; /* Virtual page is large (L=1) or 4 KB (L=0) */
unsigned long h: 1; /* Hash function identifier */
unsigned long v: 1; /* Valid (v=1) or invalid (v=0) */
} Hpte_dword0;
typedef struct {
- unsigned long : 6; /* unused - padding */
- unsigned long ac: 1; /* Address compare */
- unsigned long r: 1; /* Referenced */
- unsigned long c: 1; /* Changed */
- unsigned long w: 1; /* Write-thru cache mode */
- unsigned long i: 1; /* Cache inhibited */
- unsigned long m: 1; /* Memory coherence required */
- unsigned long g: 1; /* Guarded */
- unsigned long n: 1; /* No-execute */
- unsigned long pp: 2; /* Page protection bits 1:2 */
-} Hpte_flags;
-
-typedef struct {
unsigned long pp0: 1; /* Page protection bit 0 */
unsigned long : 1; /* Reserved */
unsigned long rpn: 50; /* Real page number */
@@ -134,12 +121,12 @@ typedef struct {
unsigned long ac: 1; /* Address compare */
unsigned long r: 1; /* Referenced */
unsigned long c: 1; /* Changed */
- unsigned long w: 1; /* Write-thru cache mode */
- unsigned long i: 1; /* Cache inhibited */
- unsigned long m: 1; /* Memory coherence required */
- unsigned long g: 1; /* Guarded */
- unsigned long n: 1; /* No-execute */
- unsigned long pp: 2; /* Page protection bits 1:2 */
+ unsigned long w: 1; /* Write-thru cache mode */
+ unsigned long i: 1; /* Cache inhibited */
+ unsigned long m: 1; /* Memory coherence required */
+ unsigned long g: 1; /* Guarded */
+ unsigned long n: 1; /* No-execute */
+ unsigned long pp: 2; /* Page protection bits 1:2 */
} Hpte_dword1;
typedef struct {
@@ -148,7 +135,7 @@ typedef struct {
unsigned long flags: 10; /* HPTE flags */
} Hpte_dword1_flags;
-typedef struct _HPTE {
+typedef struct {
union {
unsigned long dword0;
Hpte_dword0 dw0;
@@ -156,21 +143,8 @@ typedef struct _HPTE {
union {
unsigned long dword1;
- struct {
- unsigned long pp0: 1; /* Page protection bit 0 */
- unsigned long ts: 1; /* Tag set bit */
- unsigned long rpn: 50; /* Real page number */
- unsigned long : 2; /* Unused */
- unsigned long ac: 1; /* Address compare bit */
- unsigned long r: 1; /* Referenced */
- unsigned long c: 1; /* Changed */
- unsigned long w: 1; /* Write-thru cache mode */
- unsigned long i: 1; /* Cache inhibited */
- unsigned long m: 1; /* Memory coherence */
- unsigned long g: 1; /* Guarded */
- unsigned long n: 1; /* No-execute page if N=1 */
- unsigned long pp: 2; /* Page protection bit 1:2 */
- } dw1;
+ Hpte_dword1 dw1;
+ Hpte_dword1_flags flags;
} dw1;
} HPTE;
@@ -204,6 +178,8 @@ void create_valid_hpte( unsigned long slot, unsigned long vpn,
#define PT_SHIFT (12) /* Page Table */
#define PT_MASK 0x02FF
+#define LARGE_PAGE_SHIFT 24
+
static inline unsigned long hpt_hash(unsigned long vpn, int large)
{
unsigned long vsid;
@@ -220,20 +196,36 @@ static inline unsigned long hpt_hash(unsigned long vpn, int large)
return (vsid & 0x7fffffffff) ^ page;
}
-#define PG_SHIFT (12) /* Page Entry */
+static inline void _tlbie(unsigned long va, int large)
+{
+ asm volatile("ptesync": : :"memory");
+
+ if (large) {
+ asm volatile("clrldi %0,%0,16\n\
+ tlbie %0,1" : : "r"(va) : "memory");
+ } else {
+ asm volatile("clrldi %0,%0,16\n\
+ tlbie %0,0" : : "r"(va) : "memory");
+ }
-extern __inline__ void _tlbie( unsigned long va )
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbiel(unsigned long va, int large)
{
- __asm__ __volatile__ ( " \n\
- clrldi %0,%0,16 \n\
- ptesync \n\
- tlbie %0 \n\
- eieio \n\
- tlbsync \n\
- ptesync"
- : : "r" (va) : "memory" );
+ asm volatile("ptesync": : :"memory");
+
+ if (large) {
+ asm volatile("clrldi %0,%0,16\n\
+ tlbiel %0,1" : : "r"(va) : "memory");
+ } else {
+ asm volatile("clrldi %0,%0,16\n\
+ tlbiel %0,0" : : "r"(va) : "memory");
+ }
+
+ asm volatile("ptesync": : :"memory");
}
-
+
#endif /* __ASSEMBLY__ */
/* Block size masks */
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index 32668b8e59f1..c81ad14b6b5e 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -7,6 +7,7 @@
*/
#ifndef __ASSEMBLY__
+#include <linux/threads.h>
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
@@ -93,13 +94,15 @@
#define _PAGE_WRITETHRU 0x040UL /* W: cache write-through */
#define _PAGE_DIRTY 0x080UL /* C: page changed */
#define _PAGE_ACCESSED 0x100UL /* R: page referenced */
+#if 0
#define _PAGE_HPTENOIX 0x200UL /* software: pte HPTE slot unknown */
+#endif
#define _PAGE_HASHPTE 0x400UL /* software: pte has an associated HPTE */
#define _PAGE_EXEC 0x800UL /* software: i-cache coherence required */
#define _PAGE_SECONDARY 0x8000UL /* software: HPTE is in secondary group */
#define _PAGE_GROUP_IX 0x7000UL /* software: HPTE index within group */
/* Bits 0x7000 identify the index within an HPT Group */
-#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_HPTENOIX | _PAGE_SECONDARY | _PAGE_GROUP_IX)
+#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
/* PAGE_MASK gives the right answer below, but only by accident */
/* It should be preserving the high 48 bits and then specifically */
/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
@@ -397,6 +400,7 @@ extern void paging_init(void);
* as entries are faulted into the hash table by the low-level
* data/instruction access exception handlers.
*/
+#if 0
/*
* We won't be able to use update_mmu_cache to update the
* hardware page table because we need to update the pte
@@ -404,9 +408,29 @@ extern void paging_init(void);
* its value.
*/
#define update_mmu_cache(vma, addr, pte) do { } while (0)
+#else
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to put a corresponding HPTE into the hash table
+ * ahead of time, instead of waiting for the inevitable extra
+ * hash-table miss exception.
+ */
+extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
+#endif
extern void flush_hash_segments(unsigned low_vsid, unsigned high_vsid);
-extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte);
+extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
+ int local);
+void flush_hash_range(unsigned long context, unsigned long number, int local);
+
+/* TLB flush batching */
+#define MAX_BATCH_FLUSH 128
+struct tlb_batch_data {
+ pte_t pte;
+ unsigned long addr;
+};
+extern struct tlb_batch_data tlb_batch_array[NR_CPUS][MAX_BATCH_FLUSH];
/* Encode and de-code a swap entry */
#define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f)