diff options
| -rw-r--r-- | arch/ppc64/kernel/Makefile | 2 | ||||
| -rw-r--r-- | arch/ppc64/kernel/htab.c | 1081 | ||||
| -rw-r--r-- | arch/ppc64/kernel/pSeries_htab.c | 470 | ||||
| -rw-r--r-- | arch/ppc64/kernel/pSeries_lpar.c | 673 | ||||
| -rw-r--r-- | arch/ppc64/mm/init.c | 95 | ||||
| -rw-r--r-- | include/asm-ppc64/machdep.h | 39 | ||||
| -rw-r--r-- | include/asm-ppc64/mmu.h | 88 | ||||
| -rw-r--r-- | include/asm-ppc64/pgtable.h | 28 |
8 files changed, 1129 insertions, 1347 deletions
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index 99c830bdcc86..768b0f992795 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -27,7 +27,7 @@ obj-y := ppc_ksyms.o setup.o entry.o traps.o irq.o idle.o \ pmc.o mf_proc.o proc_pmc.o iSeries_setup.o \ ItLpQueue.o hvCall.o mf.o HvLpEvent.o ras.o \ iSeries_proc.o HvCall.o HvLpConfig.o \ - rtc.o init_task.o + rtc.o init_task.o pSeries_htab.o obj-$(CONFIG_PCI) += pci.o pci_dn.o pci_dma.o obj-$(CONFIG_PPC_EEH) += eeh.o diff --git a/arch/ppc64/kernel/htab.c b/arch/ppc64/kernel/htab.c index fac9a4e37256..292c3cf9775d 100644 --- a/arch/ppc64/kernel/htab.c +++ b/arch/ppc64/kernel/htab.c @@ -1,10 +1,11 @@ /* - * - * * PowerPC64 port by Mike Corrigan and Dave Engebretsen * {mikejc|engebret}@us.ibm.com * * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> + * + * SMP scalability work: + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM * * Module name: htab.c * @@ -18,17 +19,7 @@ */ #include <linux/config.h> -#include <asm/processor.h> -#include <asm/pgtable.h> -#include <asm/mmu.h> -#include <asm/mmu_context.h> -#include <asm/page.h> -#include <asm/types.h> -#include <asm/init.h> -#include <asm/system.h> -#include <asm/iSeries/LparData.h> #include <linux/spinlock.h> -#include <asm/ppcdebug.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/proc_fs.h> @@ -36,52 +27,42 @@ #include <linux/sysctl.h> #include <linux/ctype.h> #include <linux/cache.h> + +#include <asm/ppcdebug.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/page.h> +#include <asm/types.h> +#include <asm/init.h> +#include <asm/system.h> #include <asm/uaccess.h> #include <asm/Naca.h> #include <asm/system.h> #include <asm/pmc.h> #include <asm/machdep.h> #include <asm/lmb.h> +#include <asm/abs_addr.h> #ifdef CONFIG_PPC_EEH #include <asm/eeh.h> #endif -/* For iSeries */ -#include <asm/iSeries/HvCallHpt.h> - -/* Note: pte --> Linux PTE +/* + * Note: pte --> Linux PTE * HPTE --> PowerPC Hashed Page Table Entry */ HTAB htab_data = {NULL, 0, 0, 0, 0}; -int proc_dol2crvec(ctl_table *table, int write, struct file *filp, - void *buffer, size_t *lenp); - -void htab_initialize(void); -void make_pte_LPAR(HPTE *htab, - unsigned long va, unsigned long pa, int mode, - unsigned long hash_mask, int large); - -extern unsigned long reloc_offset(void); -extern unsigned long get_kernel_vsid( unsigned long ea ); -extern void cacheable_memzero( void *, unsigned int ); - extern unsigned long _SDR1; extern unsigned long klimit; -extern struct Naca *naca; - -extern char _stext[], _etext[], __start_naca[], __end_stab[]; - -static spinlock_t hash_table_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; +extern unsigned long reloc_offset(void); #define PTRRELOC(x) ((typeof(x))((unsigned long)(x) - offset)) #define PTRUNRELOC(x) ((typeof(x))((unsigned long)(x) + offset)) #define RELOC(x) (*PTRRELOC(&(x))) -extern unsigned long htab_size( unsigned long ); -unsigned long hpte_getword0_iSeries( unsigned long slot ); - #define KB (1024) #define MB (1024*KB) static inline void @@ -90,7 +71,7 @@ create_pte_mapping(unsigned long start, unsigned long end, { unsigned long addr, offset = reloc_offset(); HTAB *_htab_data = PTRRELOC(&htab_data); - HPTE *htab = (HPTE *)__v2a(_htab_data->htab); + HPTE *htab = (HPTE *)__v2a(_htab_data->htab); unsigned int step; if (large) @@ -101,8 +82,12 @@ create_pte_mapping(unsigned long start, unsigned long end, for (addr = start; addr < end; addr += step) { unsigned long vsid = get_kernel_vsid(addr); unsigned long va = (vsid << 28) | (addr & 0xfffffff); - make_pte(htab, va, (unsigned long)__v2a(addr), mode, mask, - large); + if (_machine == _MACH_pSeriesLP) + pSeries_lpar_make_pte(htab, va, + (unsigned long)__v2a(addr), mode, mask, large); + else + pSeries_make_pte(htab, va, + (unsigned long)__v2a(addr), mode, mask, large); } } @@ -111,7 +96,7 @@ htab_initialize(void) { unsigned long table, htab_size_bytes; unsigned long pteg_count; - unsigned long mode_ro, mode_rw, mask; + unsigned long mode_rw, mask; unsigned long offset = reloc_offset(); struct Naca *_naca = RELOC(naca); HTAB *_htab_data = PTRRELOC(&htab_data); @@ -132,7 +117,7 @@ htab_initialize(void) _htab_data->htab_num_ptegs = pteg_count; _htab_data->htab_hash_mask = pteg_count - 1; - if(_machine == _MACH_pSeries) { + if (_machine == _MACH_pSeries) { /* Find storage for the HPT. Must be contiguous in * the absolute address space. */ @@ -151,734 +136,91 @@ htab_initialize(void) RELOC(_SDR1) = 0; } - mode_ro = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RXRX; mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; mask = pteg_count-1; - /* Create PTE's for the kernel text and data sections plus - * the HPT and HPTX arrays. Make the assumption that - * (addr & KERNELBASE) == 0 (ie they are disjoint). - * We also assume that the va is <= 64 bits. - */ -#if 0 - create_pte_mapping((unsigned long)_stext, (unsigned long)__start_naca, mode_ro, mask); - create_pte_mapping((unsigned long)__start_naca, (unsigned long)__end_stab, mode_rw, mask); - create_pte_mapping((unsigned long)__end_stab, (unsigned long)_etext, mode_ro, mask); - create_pte_mapping((unsigned long)_etext, RELOC(klimit), mode_rw, mask); - create_pte_mapping((unsigned long)__a2v(table), (unsigned long)__a2v(table+htab_size_bytes), mode_rw, mask); -#else -#ifndef CONFIG_PPC_ISERIES + /* XXX we currently map kernel text rw, should fix this */ if (__is_processor(PV_POWER4) && _naca->physicalMemorySize > 256*MB) { create_pte_mapping((unsigned long)KERNELBASE, KERNELBASE + 256*MB, mode_rw, mask, 0); create_pte_mapping((unsigned long)KERNELBASE + 256*MB, KERNELBASE + (_naca->physicalMemorySize), mode_rw, mask, 1); - return; + } else { + create_pte_mapping((unsigned long)KERNELBASE, + KERNELBASE+(_naca->physicalMemorySize), + mode_rw, mask, 0); } -#endif - create_pte_mapping((unsigned long)KERNELBASE, - KERNELBASE+(_naca->physicalMemorySize), - mode_rw, mask, 0); -#endif } #undef KB #undef MB /* - * Create a pte. Used during initialization only. - * We assume the PTE will fit in the primary PTEG. - */ -void make_pte(HPTE *htab, - unsigned long va, unsigned long pa, int mode, - unsigned long hash_mask, int large) -{ - HPTE *hptep; - unsigned long hash, i; - volatile unsigned long x = 1; - unsigned long vpn; - -#ifdef CONFIG_PPC_PSERIES - if(_machine == _MACH_pSeriesLP) { - make_pte_LPAR(htab, va, pa, mode, hash_mask, large); - return; - } -#endif - - if (large) - vpn = va >> 24; - else - vpn = va >> 12; - - hash = hpt_hash(vpn, large); - - hptep = htab + ((hash & hash_mask)*HPTES_PER_GROUP); - - for (i = 0; i < 8; ++i, ++hptep) { - if ( hptep->dw0.dw0.v == 0 ) { /* !valid */ - hptep->dw1.dword1 = pa | mode; - hptep->dw0.dword0 = 0; - hptep->dw0.dw0.avpn = va >> 23; - hptep->dw0.dw0.bolted = 1; /* bolted */ - hptep->dw0.dw0.v = 1; /* make valid */ - return; - } - } - - /* We should _never_ get here and too early to call xmon. */ - for(;x;x|=1); -} - -/* Functions to invalidate a HPTE */ -static void hpte_invalidate_iSeries( unsigned long slot ) -{ - HvCallHpt_invalidateSetSwBitsGet( slot, 0, 0 ); -} - -static void hpte_invalidate_pSeries( unsigned long slot ) -{ - /* Local copy of the first doubleword of the HPTE */ - union { - unsigned long d; - Hpte_dword0 h; - } hpte_dw0; - - /* Locate the HPTE */ - HPTE * hptep = htab_data.htab + slot; - - /* Get the first doubleword of the HPTE */ - hpte_dw0.d = hptep->dw0.dword0; - - /* Invalidate the hpte */ - hptep->dw0.dword0 = 0; - - /* Invalidate the tlb */ - { - unsigned long vsid, group, pi, pi_high; - - vsid = hpte_dw0.h.avpn >> 5; - group = slot >> 3; - if(hpte_dw0.h.h) { - group = ~group; - } - pi = (vsid ^ group) & 0x7ff; - pi_high = (hpte_dw0.h.avpn & 0x1f) << 11; - pi |= pi_high; - _tlbie(pi << 12); - } -} - - -/* Select an available HPT slot for a new HPTE - * return slot index (if in primary group) - * return -slot index (if in secondary group) - */ -static long hpte_selectslot_iSeries( unsigned long vpn ) -{ - HPTE hpte; - long ret_slot, orig_slot; - unsigned long primary_hash; - unsigned long hpteg_slot; - unsigned long slot; - unsigned i, k; - union { - unsigned long d; - Hpte_dword0 h; - } hpte_dw0; - - ret_slot = orig_slot = HvCallHpt_findValid( &hpte, vpn ); - if ( hpte.dw0.dw0.v ) { /* If valid ...what do we do now? */ - udbg_printf( "hpte_selectslot_iSeries: vpn 0x%016lx already valid at slot 0x%016lx\n", vpn, ret_slot ); - udbg_printf( "hpte_selectslot_iSeries: returned hpte 0x%016lx 0x%016lx\n", hpte.dw0.dword0, hpte.dw1.dword1 ); - panic("select_hpte_slot found entry already valid\n"); - } - if ( ret_slot == -1 ) { /* -1 indicates no available slots */ - - /* No available entry found in secondary group */ - - PMC_SW_SYSTEM(htab_capacity_castouts); - - primary_hash = hpt_hash(vpn, 0); - hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; - k = htab_data.next_round_robin++ & 0x7; - - for ( i=0; i<HPTES_PER_GROUP; ++i ) { - if ( k == HPTES_PER_GROUP ) - k = 0; - slot = hpteg_slot + k; - hpte_dw0.d = hpte_getword0_iSeries( slot ); - if ( !hpte_dw0.h.bolted ) { - hpte_invalidate_iSeries( slot ); - ret_slot = slot; - } - ++k; - } - } else { - if ( ret_slot < 0 ) { - PMC_SW_SYSTEM(htab_primary_overflows); - ret_slot &= 0x7fffffffffffffff; - ret_slot = -ret_slot; - } - } - if ( ret_slot == -1 ) { - /* No non-bolted entry found in primary group - time to panic */ - udbg_printf("hpte_selectslot_pSeries - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP); - panic("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP); - } - PPCDBG(PPCDBG_MM, "hpte_selectslot_iSeries: vpn=0x%016lx, orig_slot=0x%016lx, ret_slot=0x%016lx \n", - vpn, orig_slot, ret_slot ); - return ret_slot; -} - -static long hpte_selectslot_pSeries(unsigned long vpn) -{ - HPTE * hptep; - unsigned long primary_hash; - unsigned long hpteg_slot; - unsigned i, k; - - /* Search the primary group for an available slot */ - - primary_hash = hpt_hash(vpn, 0); - hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; - hptep = htab_data.htab + hpteg_slot; - - for (i=0; i<HPTES_PER_GROUP; ++i) { - if ( hptep->dw0.dw0.v == 0 ) { - /* If an available slot found, return it */ - return hpteg_slot + i; - } - hptep++; - } - - /* No available entry found in primary group */ - - PMC_SW_SYSTEM(htab_primary_overflows); - - /* Search the secondary group */ - - hpteg_slot = ( ~primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; - hptep = htab_data.htab + hpteg_slot; - - for (i=0; i<HPTES_PER_GROUP; ++i) { - if ( hptep->dw0.dw0.v == 0 ) { - /* If an available slot found, return it */ - return -(hpteg_slot + i); - } - hptep++; - } - - /* No available entry found in secondary group */ - - PMC_SW_SYSTEM(htab_capacity_castouts); - - /* Select an entry in the primary group to replace */ - - hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; - hptep = htab_data.htab + hpteg_slot; - k = htab_data.next_round_robin++ & 0x7; - - for (i=0; i<HPTES_PER_GROUP; ++i) { - if (k == HPTES_PER_GROUP) - k = 0; - - if (!hptep[k].dw0.dw0.bolted) { - hpteg_slot += k; - /* Invalidate the current entry */ - ppc_md.hpte_invalidate(hpteg_slot); - return hpteg_slot; - } - ++k; - } - - /* No non-bolted entry found in primary group - time to panic */ - udbg_printf("hpte_selectslot_pSeries - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP); - /* xmon(0); */ - panic("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP); - - /* keep the compiler happy */ - return 0; -} - -unsigned long hpte_getword0_iSeries( unsigned long slot ) -{ - unsigned long dword0; - - HPTE hpte; - HvCallHpt_get( &hpte, slot ); - dword0 = hpte.dw0.dword0; - - return dword0; -} - -unsigned long hpte_getword0_pSeries( unsigned long slot ) -{ - unsigned long dword0; - HPTE * hptep = htab_data.htab + slot; - - dword0 = hptep->dw0.dword0; - return dword0; -} - -static long hpte_find_iSeries(unsigned long vpn) -{ - HPTE hpte; - long slot; - - slot = HvCallHpt_findValid( &hpte, vpn ); - if ( hpte.dw0.dw0.v ) { - if ( slot < 0 ) { - slot &= 0x7fffffffffffffff; - slot = -slot; - } - } else - slot = -1; - return slot; -} - -static long hpte_find_pSeries(unsigned long vpn) -{ - union { - unsigned long d; - Hpte_dword0 h; - } hpte_dw0; - long slot; - unsigned long hash; - unsigned long i,j; - - hash = hpt_hash(vpn, 0); - for ( j=0; j<2; ++j ) { - slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; - for ( i=0; i<HPTES_PER_GROUP; ++i ) { - hpte_dw0.d = hpte_getword0_pSeries( slot ); - if ( ( hpte_dw0.h.avpn == ( vpn >> 11 ) ) && - ( hpte_dw0.h.v ) && - ( hpte_dw0.h.h == j ) ) { - /* HPTE matches */ - if ( j ) - slot = -slot; - return slot; - } - ++slot; - } - hash = ~hash; - } - return -1; -} - -/* This function is called by iSeries setup when initializing the hpt */ -void build_valid_hpte( unsigned long vsid, unsigned long ea, unsigned long pa, - pte_t * ptep, unsigned hpteflags, unsigned bolted ) -{ - unsigned long vpn, flags; - long hpte_slot; - unsigned hash; - pte_t pte; - - vpn = ((vsid << 28) | ( ea & 0xffff000 )) >> 12; - - spin_lock_irqsave( &hash_table_lock, flags ); - - hpte_slot = ppc_md.hpte_selectslot( vpn ); - hash = 0; - if ( hpte_slot < 0 ) { - hash = 1; - hpte_slot = -hpte_slot; - } - ppc_md.hpte_create_valid( hpte_slot, vpn, pa >> 12, hash, ptep, - hpteflags, bolted ); - - if ( ptep ) { - /* Get existing pte flags */ - pte = *ptep; - pte_val(pte) &= ~_PAGE_HPTEFLAGS; - - /* Add in the has hpte flag */ - pte_val(pte) |= _PAGE_HASHPTE; - - /* Add in the _PAGE_SECONDARY flag */ - pte_val(pte) |= hash << 15; - - /* Add in the hpte slot */ - pte_val(pte) |= (hpte_slot << 12) & _PAGE_GROUP_IX; - - /* Save the new pte. */ - *ptep = pte; - - } - spin_unlock_irqrestore( &hash_table_lock, flags ); -} - - -/* Create an HPTE and validate it - * It is assumed that the HPT slot currently is invalid. - * The HPTE is set with the vpn, rpn (converted to absolute) - * and flags - */ -static void hpte_create_valid_iSeries(unsigned long slot, unsigned long vpn, - unsigned long prpn, unsigned hash, - void * ptep, unsigned hpteflags, - unsigned bolted ) -{ - /* Local copy of HPTE */ - struct { - /* Local copy of first doubleword of HPTE */ - union { - unsigned long d; - Hpte_dword0 h; - } dw0; - /* Local copy of second doubleword of HPTE */ - union { - unsigned long d; - Hpte_dword1 h; - Hpte_dword1_flags f; - } dw1; - } lhpte; - - unsigned long avpn = vpn >> 11; - unsigned long arpn = physRpn_to_absRpn( prpn ); - - /* Fill in the local HPTE with absolute rpn, avpn and flags */ - lhpte.dw1.d = 0; - lhpte.dw1.h.rpn = arpn; - lhpte.dw1.f.flags = hpteflags; - - lhpte.dw0.d = 0; - lhpte.dw0.h.avpn = avpn; - lhpte.dw0.h.h = hash; - lhpte.dw0.h.bolted = bolted; - lhpte.dw0.h.v = 1; - - /* Now fill in the actual HPTE */ - HvCallHpt_addValidate( slot, hash, (HPTE *)&lhpte ); -} - -static void hpte_create_valid_pSeries(unsigned long slot, unsigned long vpn, - unsigned long prpn, unsigned hash, - void * ptep, unsigned hpteflags, - unsigned bolted) -{ - /* Local copy of HPTE */ - struct { - /* Local copy of first doubleword of HPTE */ - union { - unsigned long d; - Hpte_dword0 h; - } dw0; - /* Local copy of second doubleword of HPTE */ - union { - unsigned long d; - Hpte_dword1 h; - Hpte_dword1_flags f; - } dw1; - } lhpte; - - unsigned long avpn = vpn >> 11; - unsigned long arpn = physRpn_to_absRpn( prpn ); - - HPTE *hptep; - - /* Fill in the local HPTE with absolute rpn, avpn and flags */ - lhpte.dw1.d = 0; - lhpte.dw1.h.rpn = arpn; - lhpte.dw1.f.flags = hpteflags; - - lhpte.dw0.d = 0; - lhpte.dw0.h.avpn = avpn; - lhpte.dw0.h.h = hash; - lhpte.dw0.h.bolted = bolted; - lhpte.dw0.h.v = 1; - - /* Now fill in the actual HPTE */ - hptep = htab_data.htab + slot; - - /* Set the second dword first so that the valid bit - * is the last thing set - */ - - hptep->dw1.dword1 = lhpte.dw1.d; - - /* Guarantee the second dword is visible before - * the valid bit - */ - - __asm__ __volatile__ ("eieio" : : : "memory"); - - /* Now set the first dword including the valid bit */ - hptep->dw0.dword0 = lhpte.dw0.d; - - __asm__ __volatile__ ("ptesync" : : : "memory"); -} - -/* find_linux_pte returns the address of a linux pte for a given + * find_linux_pte returns the address of a linux pte for a given * effective address and directory. If not found, it returns zero. */ - -pte_t * find_linux_pte( pgd_t * pgdir, unsigned long ea ) +pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea) { pgd_t *pg; pmd_t *pm; pte_t *pt = NULL; pte_t pte; - pg = pgdir + pgd_index( ea ); - if ( ! pgd_none( *pg ) ) { - pm = pmd_offset( pg, ea ); - if ( ! pmd_none( *pm ) ) { - pt = pte_offset_kernel( pm, ea ); + pg = pgdir + pgd_index(ea); + if (!pgd_none(*pg)) { + + pm = pmd_offset(pg, ea); + if (!pmd_none(*pm)) { + pt = pte_offset_kernel(pm, ea); pte = *pt; - if ( ! pte_present( pte ) ) + if (!pte_present(pte)) pt = NULL; } } return pt; - -} - -static inline unsigned long computeHptePP( unsigned long pte ) -{ - return ( pte & _PAGE_USER ) | - ( ( ( pte & _PAGE_USER ) >> 1 ) & - ( ( ~( ( pte >> 2 ) & /* _PAGE_RW */ - ( pte >> 7 ) ) ) & /* _PAGE_DIRTY */ - 1 ) ); } -static void hpte_updatepp_iSeries(long slot, unsigned long newpp, unsigned long va) +static inline unsigned long computeHptePP(unsigned long pte) { - HvCallHpt_setPp( slot, newpp ); -} - -static void hpte_updatepp_pSeries(long slot, unsigned long newpp, unsigned long va) -{ - /* Local copy of first doubleword of HPTE */ - union { - unsigned long d; - Hpte_dword0 h; - } hpte_dw0; - - /* Local copy of second doubleword of HPTE */ - union { - unsigned long d; - Hpte_dword1 h; - Hpte_dword1_flags f; - } hpte_dw1; - - HPTE * hptep = htab_data.htab + slot; - - /* Turn off valid bit in HPTE */ - hpte_dw0.d = hptep->dw0.dword0; - hpte_dw0.h.v = 0; - hptep->dw0.dword0 = hpte_dw0.d; - - /* Ensure it is out of the tlb too */ - _tlbie( va ); - - /* Insert the new pp bits into the HPTE */ - hpte_dw1.d = hptep->dw1.dword1; - hpte_dw1.h.pp = newpp; - hptep->dw1.dword1 = hpte_dw1.d; - - /* Ensure it is visible before validating */ - __asm__ __volatile__ ("eieio" : : : "memory"); - - /* Turn the valid bit back on in HPTE */ - hpte_dw0.h.v = 1; - hptep->dw0.dword0 = hpte_dw0.d; - - __asm__ __volatile__ ("ptesync" : : : "memory"); -} - -/* - * Update the page protection bits. Intended to be used to create - * guard pages for kernel data structures on pages which are bolted - * in the HPT. Assumes pages being operated on will not be stolen. - */ -void hpte_updateboltedpp_iSeries(unsigned long newpp, unsigned long ea ) -{ - unsigned long vsid,va,vpn; - long slot; - - vsid = get_kernel_vsid( ea ); - va = ( vsid << 28 ) | ( ea & 0x0fffffff ); - vpn = va >> PAGE_SHIFT; - - slot = ppc_md.hpte_find( vpn ); - HvCallHpt_setPp( slot, newpp ); -} - - -static __inline__ void set_pp_bit(unsigned long pp, HPTE *addr) -{ - unsigned long old; - unsigned long *p = (unsigned long *)(&(addr->dw1)); - - __asm__ __volatile__( - "1: ldarx %0,0,%3\n\ - rldimi %0,%2,0,62\n\ - stdcx. %0,0,%3\n\ - bne 1b" - : "=&r" (old), "=m" (*p) - : "r" (pp), "r" (p), "m" (*p) - : "cc"); -} - -/* - * Update the page protection bits. Intended to be used to create - * guard pages for kernel data structures on pages which are bolted - * in the HPT. Assumes pages being operated on will not be stolen. - */ -void hpte_updateboltedpp_pSeries(unsigned long newpp, unsigned long ea) -{ - unsigned long vsid,va,vpn,flags; - long slot; - HPTE *hptep; - - vsid = get_kernel_vsid( ea ); - va = ( vsid << 28 ) | ( ea & 0x0fffffff ); - vpn = va >> PAGE_SHIFT; - - slot = ppc_md.hpte_find( vpn ); - hptep = htab_data.htab + slot; - - set_pp_bit(newpp , hptep); - - /* Ensure it is out of the tlb too */ - spin_lock_irqsave( &hash_table_lock, flags ); - _tlbie( va ); - spin_unlock_irqrestore( &hash_table_lock, flags ); -} - - - -/* This is called very early. */ -void hpte_init_iSeries(void) -{ - ppc_md.hpte_invalidate = hpte_invalidate_iSeries; - ppc_md.hpte_updatepp = hpte_updatepp_iSeries; - ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_iSeries; - ppc_md.hpte_getword0 = hpte_getword0_iSeries; - ppc_md.hpte_selectslot = hpte_selectslot_iSeries; - ppc_md.hpte_create_valid = hpte_create_valid_iSeries; - ppc_md.hpte_find = hpte_find_iSeries; -} -void hpte_init_pSeries(void) -{ - ppc_md.hpte_invalidate = hpte_invalidate_pSeries; - ppc_md.hpte_updatepp = hpte_updatepp_pSeries; - ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_pSeries; - ppc_md.hpte_getword0 = hpte_getword0_pSeries; - ppc_md.hpte_selectslot = hpte_selectslot_pSeries; - ppc_md.hpte_create_valid = hpte_create_valid_pSeries; - ppc_md.hpte_find = hpte_find_pSeries; + return (pte & _PAGE_USER) | + (((pte & _PAGE_USER) >> 1) & + ((~((pte >> 2) & /* _PAGE_RW */ + (pte >> 7))) & /* _PAGE_DIRTY */ + 1)); } /* * Handle a fault by adding an HPTE. If the address can't be determined * to be valid via Linux page tables, return 1. If handled return 0 */ -int hash_page(unsigned long ea, unsigned long access) +int __hash_page(unsigned long ea, unsigned long access, unsigned long vsid, + pte_t *ptep) { - void *pgdir; - unsigned long va, vsid, vpn; - unsigned long newpp, hash_ind, prpn; + unsigned long va, vpn; + unsigned long newpp, prpn; unsigned long hpteflags; long slot; - struct mm_struct *mm; - pte_t old_pte, new_pte, *ptep; - - /* Check for invalid addresses. */ - if (!IS_VALID_EA(ea)) - return 1; - - switch (REGION_ID(ea)) { - case USER_REGION_ID: - mm = current->mm; - if (mm == NULL) - return 1; - - vsid = get_vsid(mm->context, ea); - break; - case IO_REGION_ID: - mm = &ioremap_mm; - vsid = get_kernel_vsid(ea); - break; - case VMALLOC_REGION_ID: - mm = &init_mm; - vsid = get_kernel_vsid(ea); - break; -#ifdef CONFIG_PPC_EEH - case IO_UNMAPPED_REGION_ID: - udbg_printf("EEH Error ea = 0x%lx\n", ea); - PPCDBG_ENTER_DEBUGGER(); - panic("EEH Error ea = 0x%lx\n", ea); - break; -#endif - case KERNEL_REGION_ID: - /* - * As htab_initialize is now, we shouldn't ever get here since - * we're bolting the entire 0xC0... region. - */ - udbg_printf("Little faulted on kernel address 0x%lx\n", ea); - PPCDBG_ENTER_DEBUGGER(); - panic("Little faulted on kernel address 0x%lx\n", ea); - break; - default: - /* Not a valid range, send the problem up to do_page_fault */ - return 1; - break; - } + pte_t old_pte, new_pte; /* Search the Linux page table for a match with va */ va = (vsid << 28) | (ea & 0x0fffffff); vpn = va >> PAGE_SHIFT; - pgdir = mm->pgd; - - if (pgdir == NULL) - return 1; - - /* - * Lock the Linux page table to prevent mmap and kswapd - * from modifying entries while we search and update - */ - spin_lock(&mm->page_table_lock); - - ptep = find_linux_pte(pgdir, ea); /* * If no pte found or not present, send the problem up to * do_page_fault */ - if (!ptep || !pte_present(*ptep)) { - spin_unlock(&mm->page_table_lock); + if (!ptep || !pte_present(*ptep)) return 1; - } /* * Check the user's access rights to the page. If access should be * prevented then send the problem up to do_page_fault. */ access |= _PAGE_PRESENT; - if (access & ~(pte_val(*ptep))) { - spin_unlock(&mm->page_table_lock); + if (access & ~(pte_val(*ptep))) return 1; - } - - /* - * Acquire the hash table lock to guarantee that the linux - * pte we fetch will not change - */ - spin_lock(&hash_table_lock); - - /* - * At this point we have found a pte (which was present). - * The spinlocks prevent this status from changing - * The hash_table_lock prevents the _PAGE_HASHPTE status - * from changing (RPN, DIRTY and ACCESSED too) - * The page_table_lock prevents the pte from being - * invalidated or modified - */ /* * At this point, we have a pte (old_pte) which can be used to build @@ -906,86 +248,151 @@ int hash_page(unsigned long ea, unsigned long access) if (pte_val(old_pte) & _PAGE_HASHPTE) { /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot, secondary; - /* Local copy of first doubleword of HPTE */ - union { - unsigned long d; - Hpte_dword0 h; - } hpte_dw0; + /* XXX fix large pte flag */ hash = hpt_hash(vpn, 0); secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15; if (secondary) hash = ~hash; slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; - /* If there is an HPTE for this page it is indexed by slot */ - hpte_dw0.d = ppc_md.hpte_getword0(slot); - if ((hpte_dw0.h.avpn == (vpn >> 11)) && - (hpte_dw0.h.v) && - (hpte_dw0.h.h == secondary)){ - /* HPTE matches */ - ppc_md.hpte_updatepp(slot, newpp, va); + + udbg_printf("updatepp cpu %d ea %lx vsid should be %lx\n", smp_processor_id(), ea, vsid); + + /* XXX fix large pte flag */ + if (ppc_md.hpte_updatepp(slot, newpp, va, 0) == -1) + pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; + else if (!pte_same(old_pte, new_pte)) *ptep = new_pte; - } else { - /* HPTE is not for this pte */ - pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; - } } if (!(pte_val(old_pte) & _PAGE_HASHPTE)) { - /* - * Case 1 - * For these cases we need to create a new - * HPTE and update the linux pte - */ - - /* Find an available HPTE slot */ - slot = ppc_md.hpte_selectslot(vpn); - - hash_ind = 0; - if (slot < 0) { - slot = -slot; - hash_ind = 1; - } - - /* Set the physical address */ + /* XXX fix large pte flag */ + unsigned long hash = hpt_hash(vpn, 0); + unsigned long hpte_group; prpn = pte_val(old_pte) >> PTE_SHIFT; +repeat: + hpte_group = ((hash & htab_data.htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + /* Update the linux pte with the HPTE slot */ pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; - pte_val(new_pte) |= hash_ind << 15; - pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; pte_val(new_pte) |= _PAGE_HASHPTE; - /* + /* copy appropriate flags from linux pte */ + hpteflags = (pte_val(new_pte) & 0x1f8) | newpp; + + /* XXX fix large pte flag */ + slot = ppc_md.insert_hpte(hpte_group, vpn, prpn, 0, + hpteflags, 0, 0); + + /* Primary is full, try the secondary */ + if (slot == -1) { + pte_val(new_pte) |= 1 << 15; + hpte_group = ((~hash & htab_data.htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + /* XXX fix large pte flag */ + slot = ppc_md.insert_hpte(hpte_group, vpn, prpn, + 1, hpteflags, 0, 0); + if (slot == -1) { + if (mftb() & 0x1) + hpte_group = ((hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + + ppc_md.remove_hpte(hpte_group); + goto repeat; + } + } + + pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; + + /* * No need to use ldarx/stdcx here because all who * might be updating the pte will hold the * page_table_lock or the hash_table_lock * (we hold both) */ *ptep = new_pte; + } - /* copy appropriate flags from linux pte */ - hpteflags = (pte_val(new_pte) & 0x1f8) | newpp; + return 0; +} - /* Create the HPTE */ - ppc_md.hpte_create_valid(slot, vpn, prpn, hash_ind, ptep, - hpteflags, 0); +int hash_page(unsigned long ea, unsigned long access) +{ + void *pgdir; + unsigned long vsid; + struct mm_struct *mm; + pte_t *ptep; + int ret; + + /* Check for invalid addresses. */ + if (!IS_VALID_EA(ea)) + return 1; + + switch (REGION_ID(ea)) { + case USER_REGION_ID: + mm = current->mm; + if (mm == NULL) + return 1; + + vsid = get_vsid(mm->context, ea); + break; + case IO_REGION_ID: + mm = &ioremap_mm; + vsid = get_kernel_vsid(ea); + break; + case VMALLOC_REGION_ID: + mm = &init_mm; + vsid = get_kernel_vsid(ea); + break; +#ifdef CONFIG_PPC_EEH + case IO_UNMAPPED_REGION_ID: + udbg_printf("EEH Error ea = 0x%lx\n", ea); + PPCDBG_ENTER_DEBUGGER(); + panic("EEH Error ea = 0x%lx\n", ea); + break; +#endif + case KERNEL_REGION_ID: + /* + * As htab_initialize is now, we shouldn't ever get here since + * we're bolting the entire 0xC0... region. + */ + udbg_printf("Little faulted on kernel address 0x%lx\n", ea); + PPCDBG_ENTER_DEBUGGER(); + panic("Little faulted on kernel address 0x%lx\n", ea); + break; + default: + /* Not a valid range, send the problem up to do_page_fault */ + return 1; + break; } - spin_unlock(&hash_table_lock); + pgdir = mm->pgd; + + if (pgdir == NULL) + return 1; + + /* + * Lock the Linux page table to prevent mmap and kswapd + * from modifying entries while we search and update + */ + spin_lock(&mm->page_table_lock); + ptep = find_linux_pte(pgdir, ea); + ret = __hash_page(ea, access, vsid, ptep); spin_unlock(&mm->page_table_lock); - return 0; + + return ret; } -void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte) +void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte, + int local) { - unsigned long vsid, vpn, va, hash, secondary, slot, flags; - union { - unsigned long d; - Hpte_dword0 h; - } hpte_dw0; + unsigned long vsid, vpn, va, hash, secondary, slot; + + /* XXX fix for large ptes */ + unsigned long large = 0; if ((ea >= USER_START) && (ea <= USER_END)) vsid = get_vsid(context, ea); @@ -993,156 +400,32 @@ void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte) vsid = get_kernel_vsid(ea); va = (vsid << 28) | (ea & 0x0fffffff); - vpn = va >> PAGE_SHIFT; - hash = hpt_hash(vpn, 0); + if (large) + vpn = va >> LARGE_PAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + hash = hpt_hash(vpn, large); secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; if (secondary) hash = ~hash; slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; - spin_lock_irqsave(&hash_table_lock, flags); - /* - * Id prefer to flush even if our hpte was stolen, but the new - * entry could be bolted - Anton - */ - hpte_dw0.d = ppc_md.hpte_getword0(slot); - if ((hpte_dw0.h.avpn == (vpn >> 11)) && - (hpte_dw0.h.v) && - (hpte_dw0.h.h == secondary)){ - /* HPTE matches */ - ppc_md.hpte_invalidate(slot); - } - - spin_unlock_irqrestore(&hash_table_lock, flags); + ppc_md.hpte_invalidate(slot, va, large, local); } -int proc_dol2crvec(ctl_table *table, int write, struct file *filp, - void *buffer, size_t *lenp) +void flush_hash_range(unsigned long context, unsigned long number, int local) { - int vleft, first=1, len, left, val; -#define TMPBUFLEN 256 - char buf[TMPBUFLEN], *p; - static const char *sizestrings[4] = { - "2MB", "256KB", "512KB", "1MB" - }; - static const char *clockstrings[8] = { - "clock disabled", "+1 clock", "+1.5 clock", "reserved(3)", - "+2 clock", "+2.5 clock", "+3 clock", "reserved(7)" - }; - static const char *typestrings[4] = { - "flow-through burst SRAM", "reserved SRAM", - "pipelined burst SRAM", "pipelined late-write SRAM" - }; - static const char *holdstrings[4] = { - "0.5", "1.0", "(reserved2)", "(reserved3)" - }; - - if ( ((_get_PVR() >> 16) != 8) && ((_get_PVR() >> 16) != 12)) - return -EFAULT; - - if ( /*!table->maxlen ||*/ (filp->f_pos && !write)) { - *lenp = 0; - return 0; - } - - vleft = table->maxlen / sizeof(int); - left = *lenp; - - for (; left /*&& vleft--*/; first=0) { - if (write) { - while (left) { - char c; - if(get_user(c,(char *) buffer)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - ((char *) buffer)++; - } - if (!left) - break; - len = left; - if (len > TMPBUFLEN-1) - len = TMPBUFLEN-1; - if(copy_from_user(buf, buffer, len)) - return -EFAULT; - buf[len] = 0; - p = buf; - if (*p < '0' || *p > '9') - break; - val = simple_strtoul(p, &p, 0); - len = p-buf; - if ((len < left) && *p && !isspace(*p)) - break; - buffer += len; - left -= len; -#if 0 - /* DRENG need a def */ - _set_L2CR(0); - _set_L2CR(val); - while ( _get_L2CR() & 0x1 ) - /* wait for invalidate to finish */; -#endif - - } else { - p = buf; - if (!first) - *p++ = '\t'; -#if 0 - /* DRENG need a def */ - val = _get_L2CR(); -#endif - p += sprintf(p, "0x%08x: ", val); - p += sprintf(p, " %s", (val >> 31) & 1 ? "enabled" : - "disabled"); - p += sprintf(p, ", %sparity", (val>>30)&1 ? "" : "no "); - p += sprintf(p, ", %s", sizestrings[(val >> 28) & 3]); - p += sprintf(p, ", %s", clockstrings[(val >> 25) & 7]); - p += sprintf(p, ", %s", typestrings[(val >> 23) & 2]); - p += sprintf(p, "%s", (val>>22)&1 ? ", data only" : ""); - p += sprintf(p, "%s", (val>>20)&1 ? ", ZZ enabled": ""); - p += sprintf(p, ", %s", (val>>19)&1 ? "write-through" : - "copy-back"); - p += sprintf(p, "%s", (val>>18)&1 ? ", testing" : ""); - p += sprintf(p, ", %sns hold",holdstrings[(val>>16)&3]); - p += sprintf(p, "%s", (val>>15)&1 ? ", DLL slow" : ""); - p += sprintf(p, "%s", (val>>14)&1 ? ", diff clock" :""); - p += sprintf(p, "%s", (val>>13)&1 ? ", DLL bypass" :""); - - p += sprintf(p,"\n"); - - len = strlen(buf); - if (len > left) - len = left; - if(copy_to_user(buffer, buf, len)) - return -EFAULT; - left -= len; - buffer += len; - break; - } - } + if (ppc_md.flush_hash_range) { + ppc_md.flush_hash_range(context, number, local); + } else { + int i; + struct tlb_batch_data *ptes = + &tlb_batch_array[smp_processor_id()][0]; - if (!write && !first && left) { - if(put_user('\n', (char *) buffer)) - return -EFAULT; - left--, buffer++; - } - if (write) { - p = (char *) buffer; - while (left) { - char c; - if(get_user(c, p++)) - return -EFAULT; - if (!isspace(c)) - break; - left--; + for (i = 0; i < number; i++) { + flush_hash_page(context, ptes->addr, ptes->pte, local); + ptes++; } } - if (write && first) - return -EINVAL; - *lenp -= left; - filp->f_pos += *lenp; - return 0; } - diff --git a/arch/ppc64/kernel/pSeries_htab.c b/arch/ppc64/kernel/pSeries_htab.c new file mode 100644 index 000000000000..7880a385fc96 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_htab.c @@ -0,0 +1,470 @@ +/* + * pSeries hashtable management. + * + * SMP scalability work: + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/spinlock.h> +#include <linux/bitops.h> +#include <linux/threads.h> +#include <linux/smp.h> + +#include <asm/abs_addr.h> +#include <asm/machdep.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> + +/* + * Create a pte. Used during initialization only. + * We assume the PTE will fit in the primary PTEG. + */ +void pSeries_make_pte(HPTE *htab, unsigned long va, unsigned long pa, + int mode, unsigned long hash_mask, int large) +{ + HPTE *hptep; + unsigned long hash, i; + unsigned long vpn; + + if (large) + vpn = va >> LARGE_PAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + + hash = hpt_hash(vpn, large); + + hptep = htab + ((hash & hash_mask)*HPTES_PER_GROUP); + + for (i = 0; i < 8; ++i, ++hptep) { + if (hptep->dw0.dw0.v == 0) { /* !valid */ + hptep->dw1.dword1 = pa | mode; + hptep->dw0.dword0 = 0; + hptep->dw0.dw0.avpn = va >> 23; + hptep->dw0.dw0.bolted = 1; /* bolted */ + if (large) { + hptep->dw0.dw0.l = 1; + hptep->dw0.dw0.avpn &= ~0x1UL; + } + hptep->dw0.dw0.v = 1; /* make valid */ + return; + } + } + + /* We should _never_ get here and too early to call xmon. */ + while(1) + ; +} + +#define HPTE_LOCK_BIT 3 + +static inline void pSeries_lock_hpte(HPTE *hptep) +{ + unsigned long *word = &hptep->dw0.dword0; + + while (1) { + if (!test_and_set_bit(HPTE_LOCK_BIT, word)) + break; + while(test_bit(HPTE_LOCK_BIT, word)) + barrier(); + } +} + +static inline void pSeries_unlock_hpte(HPTE *hptep) +{ + unsigned long *word = &hptep->dw0.dword0; + + asm volatile("lwsync":::"memory"); + clear_bit(HPTE_LOCK_BIT, word); +} + +static spinlock_t pSeries_tlbie_lock = SPIN_LOCK_UNLOCKED; + +static long pSeries_insert_hpte(unsigned long hpte_group, unsigned long vpn, + unsigned long prpn, int secondary, + unsigned long hpteflags, int bolted, int large) +{ + unsigned long avpn = vpn >> 11; + unsigned long arpn = physRpn_to_absRpn(prpn); + HPTE *hptep = htab_data.htab + hpte_group; + Hpte_dword0 dw0; + HPTE lhpte; + int i; + + for (i = 0; i < HPTES_PER_GROUP; i++) { + dw0 = hptep->dw0.dw0; + + if (!dw0.v) { + /* retry with lock held */ + pSeries_lock_hpte(hptep); + dw0 = hptep->dw0.dw0; + if (!dw0.v) + break; + pSeries_unlock_hpte(hptep); + } + + hptep++; + } + + if (i == HPTES_PER_GROUP) + return -1; + + lhpte.dw1.dword1 = 0; + lhpte.dw1.dw1.rpn = arpn; + lhpte.dw1.flags.flags = hpteflags; + + lhpte.dw0.dword0 = 0; + lhpte.dw0.dw0.avpn = avpn; + lhpte.dw0.dw0.h = secondary; + lhpte.dw0.dw0.bolted = bolted; + lhpte.dw0.dw0.v = 1; + + if (large) + lhpte.dw0.dw0.l = 1; + + hptep->dw1.dword1 = lhpte.dw1.dword1; + + /* Guarantee the second dword is visible before the valid bit */ + __asm__ __volatile__ ("eieio" : : : "memory"); + + /* + * Now set the first dword including the valid bit + * NOTE: this also unlocks the hpte + */ + hptep->dw0.dword0 = lhpte.dw0.dword0; + + __asm__ __volatile__ ("ptesync" : : : "memory"); + + return i; +} + +static long pSeries_remove_hpte(unsigned long hpte_group) +{ + HPTE *hptep; + Hpte_dword0 dw0; + int i; + int slot_offset; + unsigned long vsid, group, pi, pi_high; + unsigned long slot; + unsigned long flags; + int large; + unsigned long va; + + /* pick a random slot to start at */ + slot_offset = mftb() & 0x7; + + udbg_printf("remove_hpte in %d\n", slot_offset); + + for (i = 0; i < HPTES_PER_GROUP; i++) { + hptep = htab_data.htab + hpte_group + slot_offset; + dw0 = hptep->dw0.dw0; + + if (dw0.v && !dw0.bolted) { + /* retry with lock held */ + pSeries_lock_hpte(hptep); + dw0 = hptep->dw0.dw0; + if (dw0.v && !dw0.bolted) + break; + pSeries_unlock_hpte(hptep); + } + + slot_offset++; + slot_offset &= 0x7; + } + + if (i == HPTES_PER_GROUP) + return -1; + + large = dw0.l; + + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->dw0.dword0 = 0; + + /* Invalidate the tlb */ + vsid = dw0.avpn >> 5; + slot = hptep - htab_data.htab; + group = slot >> 3; + if (dw0.h) + group = ~group; + pi = (vsid ^ group) & 0x7ff; + pi_high = (dw0.avpn & 0x1f) << 11; + pi |= pi_high; + + if (large) + va = pi << LARGE_PAGE_SHIFT; + else + va = pi << PAGE_SHIFT; + + spin_lock_irqsave(&pSeries_tlbie_lock, flags); + _tlbie(va, large); + spin_unlock_irqrestore(&pSeries_tlbie_lock, flags); + + return i; +} + +static inline void set_pp_bit(unsigned long pp, HPTE *addr) +{ + unsigned long old; + unsigned long *p = &addr->dw1.dword1; + + __asm__ __volatile__( + "1: ldarx %0,0,%3\n\ + rldimi %0,%2,0,62\n\ + stdcx. %0,0,%3\n\ + bne 1b" + : "=&r" (old), "=m" (*p) + : "r" (pp), "r" (p), "m" (*p) + : "cc"); +} + +/* + * Only works on small pages. Yes its ugly to have to check each slot in + * the group but we only use this during bootup. + */ +static long pSeries_hpte_find(unsigned long vpn) +{ + HPTE *hptep; + unsigned long hash; + unsigned long i, j; + long slot; + Hpte_dword0 dw0; + + hash = hpt_hash(vpn, 0); + + for (j = 0; j < 2; j++) { + slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; + for (i = 0; i < HPTES_PER_GROUP; i++) { + hptep = htab_data.htab + slot; + dw0 = hptep->dw0.dw0; + + if ((dw0.avpn == (vpn >> 11)) && dw0.v && + (dw0.h == j)) { + /* HPTE matches */ + if (j) + slot = -slot; + return slot; + } + ++slot; + } + hash = ~hash; + } + + return -1; +} + +static long pSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large) +{ + HPTE *hptep = htab_data.htab + slot; + Hpte_dword0 dw0; + unsigned long vpn, avpn; + unsigned long flags; + + udbg_printf("updatepp\n"); + + if (large) + vpn = va >> LARGE_PAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + + avpn = vpn >> 11; + + pSeries_lock_hpte(hptep); + + dw0 = hptep->dw0.dw0; + + if ((dw0.avpn != avpn) || !dw0.v) { + pSeries_unlock_hpte(hptep); + udbg_printf("updatepp missed\n"); + return -1; + } + + set_pp_bit(newpp, hptep); + + pSeries_unlock_hpte(hptep); + + /* Ensure it is out of the tlb too */ + /* XXX use tlbiel where possible */ + spin_lock_irqsave(&pSeries_tlbie_lock, flags); + _tlbie(va, large); + spin_unlock_irqrestore(&pSeries_tlbie_lock, flags); + + return 0; +} + +/* + * Update the page protection bits. Intended to be used to create + * guard pages for kernel data structures on pages which are bolted + * in the HPT. Assumes pages being operated on will not be stolen. + * Does not work on large pages. + * + * No need to lock here because we should be the only user. + */ +static void pSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) +{ + unsigned long vsid, va, vpn, flags; + long slot; + HPTE *hptep; + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + + slot = pSeries_hpte_find(vpn); + if (slot == -1) + panic("could not find page to bolt\n"); + hptep = htab_data.htab + slot; + + set_pp_bit(newpp, hptep); + + /* Ensure it is out of the tlb too */ + /* XXX use tlbiel where possible */ + spin_lock_irqsave(&pSeries_tlbie_lock, flags); + _tlbie(va, 0); + spin_unlock_irqrestore(&pSeries_tlbie_lock, flags); +} + +static void pSeries_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + HPTE *hptep = htab_data.htab + slot; + Hpte_dword0 dw0; + unsigned long vpn, avpn; + unsigned long flags; + + if (large) + vpn = va >> LARGE_PAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + + avpn = vpn >> 11; + + pSeries_lock_hpte(hptep); + + dw0 = hptep->dw0.dw0; + + if ((dw0.avpn != avpn) || !dw0.v) { + pSeries_unlock_hpte(hptep); + udbg_printf("invalidate missed\n"); + return; + } + + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->dw0.dword0 = 0; + + /* Invalidate the tlb */ + if (!large && local && __is_processor(PV_POWER4)) { + _tlbiel(va, large); + } else { + spin_lock_irqsave(&pSeries_tlbie_lock, flags); + _tlbie(va, large); + spin_unlock_irqrestore(&pSeries_tlbie_lock, flags); + } +} + +static void pSeries_flush_hash_range(unsigned long context, + unsigned long number, int local) +{ + unsigned long vsid, vpn, va, hash, secondary, slot, flags, avpn; + int i, j; + unsigned long va_array[MAX_BATCH_FLUSH]; + HPTE *hptep; + Hpte_dword0 dw0; + struct tlb_batch_data *ptes = &tlb_batch_array[smp_processor_id()][0]; + /* XXX fix for large ptes */ + unsigned long large = 0; + j = 0; + for (i = 0; i < number; i++) { + if ((ptes->addr >= USER_START) && (ptes->addr <= USER_END)) + vsid = get_vsid(context, ptes->addr); + else + vsid = get_kernel_vsid(ptes->addr); + + va = (vsid << 28) | (ptes->addr & 0x0fffffff); + va_array[j] = va; + if (large) + vpn = va >> LARGE_PAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + hash = hpt_hash(vpn, large); + secondary = (pte_val(ptes->pte) & _PAGE_SECONDARY) >> 15; + if (secondary) + hash = ~hash; + slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; + slot += (pte_val(ptes->pte) & _PAGE_GROUP_IX) >> 12; + + hptep = htab_data.htab + slot; + avpn = vpn >> 11; + + pSeries_lock_hpte(hptep); + + dw0 = hptep->dw0.dw0; + + ptes++; + + if ((dw0.avpn != avpn) || !dw0.v) { + pSeries_unlock_hpte(hptep); + udbg_printf("invalidate missed\n"); + continue; + } + + j++; + + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->dw0.dword0 = 0; + } + + if (!large && local && __is_processor(PV_POWER4)) { + asm volatile("ptesync":::"memory"); + + for (i = 0; i < j; i++) { + asm volatile("\n\ + clrldi %0,%0,16\n\ + tlbiel %0" + : : "r" (va_array[i]) : "memory" ); + } + + asm volatile("ptesync":::"memory"); + } else { + /* XXX double check that it is safe to take this late */ + spin_lock_irqsave(&pSeries_tlbie_lock, flags); + + asm volatile("ptesync":::"memory"); + + for (i = 0; i < j; i++) { + asm volatile("\n\ + clrldi %0,%0,16\n\ + tlbie %0" + : : "r" (va_array[i]) : "memory" ); + } + + asm volatile("eieio; tlbsync; ptesync":::"memory"); + + spin_unlock_irqrestore(&pSeries_tlbie_lock, flags); + } +} + +void hpte_init_pSeries(void) +{ + struct device_node *root; + const char *model; + + ppc_md.hpte_invalidate = pSeries_hpte_invalidate; + ppc_md.hpte_updatepp = pSeries_hpte_updatepp; + ppc_md.hpte_updateboltedpp = pSeries_hpte_updateboltedpp; + ppc_md.insert_hpte = pSeries_insert_hpte; + ppc_md.remove_hpte = pSeries_remove_hpte; + ppc_md.make_pte = pSeries_make_pte; + + /* Disable TLB batching on nighthawk */ + root = find_path_device("/"); + if (root) { + model = get_property(root, "model", NULL); + if (strcmp(model, "CHRP IBM,9076-N81")) + ppc_md.flush_hash_range = pSeries_flush_hash_range; + } +} diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c index 20adcefafeea..23d1dd029128 100644 --- a/arch/ppc64/kernel/pSeries_lpar.c +++ b/arch/ppc64/kernel/pSeries_lpar.c @@ -214,358 +214,6 @@ long plpar_xirr(unsigned long *xirr_ret) xirr_ret, &dummy, &dummy); } -/* - * The following section contains code that ultimately should - * be put in the relavent file (htab.c, xics.c, etc). It has - * been put here for the time being in order to ease maintainence - * of the pSeries LPAR code until it can all be put into CVS. - */ -static void hpte_invalidate_pSeriesLP(unsigned long slot) -{ - HPTE old_pte; - unsigned long lpar_rc; - unsigned long flags = 0; - - lpar_rc = plpar_pte_remove(flags, - slot, - 0, - &old_pte.dw0.dword0, - &old_pte.dw1.dword1); - if (lpar_rc != H_Success) BUG(); -} - -/* NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and - * the low 3 bits of flags happen to line up. So no transform is needed. - * We can probably optimize here and assume the high bits of newpp are - * already zero. For now I am paranoid. - */ -static void hpte_updatepp_pSeriesLP(long slot, unsigned long newpp, unsigned long va) -{ - unsigned long lpar_rc; - unsigned long flags; - flags = newpp & 3; - lpar_rc = plpar_pte_protect( flags, - slot, - 0); - if (lpar_rc != H_Success) { - udbg_printf( " bad return code from pte protect rc = %lx \n", lpar_rc); - for (;;); - } -} - -static void hpte_updateboltedpp_pSeriesLP(unsigned long newpp, unsigned long ea) -{ - unsigned long lpar_rc; - unsigned long vsid,va,vpn,flags; - long slot; - - vsid = get_kernel_vsid( ea ); - va = ( vsid << 28 ) | ( ea & 0x0fffffff ); - vpn = va >> PAGE_SHIFT; - - slot = ppc_md.hpte_find( vpn ); - flags = newpp & 3; - lpar_rc = plpar_pte_protect( flags, - slot, - 0); - if (lpar_rc != H_Success) { - udbg_printf( " bad return code from pte bolted protect rc = %lx \n", lpar_rc); - for (;;); - } -} - - -static unsigned long hpte_getword0_pSeriesLP(unsigned long slot) -{ - unsigned long dword0; - unsigned long lpar_rc; - unsigned long dummy_word1; - unsigned long flags; - /* Read 1 pte at a time */ - /* Do not need RPN to logical page translation */ - /* No cross CEC PFT access */ - flags = 0; - - lpar_rc = plpar_pte_read(flags, - slot, - &dword0, &dummy_word1); - if (lpar_rc != H_Success) { - udbg_printf(" error on pte read in get_hpte0 rc = %lx \n", lpar_rc); - for (;;); - } - - return(dword0); -} - -static long hpte_selectslot_pSeriesLP(unsigned long vpn) -{ - unsigned long primary_hash; - unsigned long hpteg_slot; - unsigned i, k; - unsigned long flags; - HPTE pte_read; - unsigned long lpar_rc; - - /* Search the primary group for an available slot */ - primary_hash = hpt_hash(vpn, 0); - - hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; - - /* Read 1 pte at a time */ - /* Do not need RPN to logical page translation */ - /* No cross CEC PFT access */ - flags = 0; - for (i=0; i<HPTES_PER_GROUP; ++i) { - /* read the hpte entry from the slot */ - lpar_rc = plpar_pte_read(flags, - hpteg_slot + i, - &pte_read.dw0.dword0, &pte_read.dw1.dword1); - if (lpar_rc != H_Success) { - udbg_printf(" read of hardware page table failed rc = %lx \n", lpar_rc); - for (;;); - } - if ( pte_read.dw0.dw0.v == 0 ) { - /* If an available slot found, return it */ - return hpteg_slot + i; - } - - } - - - /* Search the secondary group for an available slot */ - hpteg_slot = ( ~primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; - - - for (i=0; i<HPTES_PER_GROUP; ++i) { - /* read the hpte entry from the slot */ - lpar_rc = plpar_pte_read(flags, - hpteg_slot + i, - &pte_read.dw0.dword0, &pte_read.dw1.dword1); - if (lpar_rc != H_Success) { - udbg_printf(" read of hardware page table failed2 rc = %lx \n", lpar_rc); - for (;;); - } - if ( pte_read.dw0.dw0.v == 0 ) { - /* If an available slot found, return it */ - return hpteg_slot + i; - } - - } - - /* No available entry found in secondary group */ - - - /* Select an entry in the primary group to replace */ - - hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP; - - k = htab_data.next_round_robin++ & 0x7; - - for (i=0; i<HPTES_PER_GROUP; ++i) { - if (k == HPTES_PER_GROUP) - k = 0; - - lpar_rc = plpar_pte_read(flags, - hpteg_slot + k, - &pte_read.dw0.dword0, &pte_read.dw1.dword1); - if (lpar_rc != H_Success) { - udbg_printf( " pte read failed - rc = %lx", lpar_rc); - for (;;); - } - if ( ! pte_read.dw0.dw0.bolted) - { - hpteg_slot += k; - /* Invalidate the current entry */ - ppc_md.hpte_invalidate(hpteg_slot); - return hpteg_slot; - } - ++k; - } - - /* No non-bolted entry found in primary group - time to panic */ - udbg_printf("select_hpte_slot - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP); - udbg_printf("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP); - for (;;); - - /* never executes - avoid compiler errors */ - return 0; -} - - -static void hpte_create_valid_pSeriesLP(unsigned long slot, unsigned long vpn, - unsigned long prpn, unsigned hash, - void *ptep, unsigned hpteflags, - unsigned bolted) -{ - /* Local copy of HPTE */ - struct { - /* Local copy of first doubleword of HPTE */ - union { - unsigned long d; - Hpte_dword0 h; - } dw0; - /* Local copy of second doubleword of HPTE */ - union { - unsigned long d; - Hpte_dword1 h; - Hpte_dword1_flags f; - } dw1; - } lhpte; - - unsigned long avpn = vpn >> 11; - unsigned long arpn = physRpn_to_absRpn( prpn ); - - unsigned long lpar_rc; - unsigned long flags; - HPTE ret_hpte; - - /* Fill in the local HPTE with absolute rpn, avpn and flags */ - lhpte.dw1.d = 0; - lhpte.dw1.h.rpn = arpn; - lhpte.dw1.f.flags = hpteflags; - - lhpte.dw0.d = 0; - lhpte.dw0.h.avpn = avpn; - lhpte.dw0.h.h = hash; - lhpte.dw0.h.bolted = bolted; - lhpte.dw0.h.v = 1; - - /* Now fill in the actual HPTE */ - /* Set CEC cookie to 0 */ - /* Large page = 0 */ - /* Zero page = 0 */ - /* I-cache Invalidate = 0 */ - /* I-cache synchronize = 0 */ - /* Exact = 1 - only modify exact entry */ - flags = H_EXACT; - - if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) - lhpte.dw1.f.flags &= ~_PAGE_COHERENT; -#if 1 - __asm__ __volatile__ ( - H_ENTER_r3 - "mr 4, %1\n" - "mr 5, %2\n" - "mr 6, %3\n" - "mr 7, %4\n" - HSC - "mr %0, 3\n" - : "=r" (lpar_rc) - : "r" (flags), "r" (slot), "r" (lhpte.dw0.d), "r" (lhpte.dw1.d) - : "r3", "r4", "r5", "r6", "r7", "cc"); -#else - lpar_rc = plpar_pte_enter(flags, - slot, - lhpte.dw0.d, - lhpte.dw1.d, - &ret_hpte.dw0.dword0, - &ret_hpte.dw1.dword1); -#endif - if (lpar_rc != H_Success) { - udbg_printf("error on pte enter lapar rc = %ld\n",lpar_rc); - udbg_printf("ent: s=%lx, dw0=%lx, dw1=%lx\n", slot, lhpte.dw0.d, lhpte.dw1.d); - /* xmon_backtrace("backtrace"); */ - for (;;); - } -} - -static long hpte_find_pSeriesLP(unsigned long vpn) -{ - union { - unsigned long d; - Hpte_dword0 h; - } hpte_dw0; - long slot; - unsigned long hash; - unsigned long i,j; - - hash = hpt_hash(vpn, 0); - for ( j=0; j<2; ++j ) { - slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; - for ( i=0; i<HPTES_PER_GROUP; ++i ) { - hpte_dw0.d = hpte_getword0_pSeriesLP( slot ); - if ( ( hpte_dw0.h.avpn == ( vpn >> 11 ) ) && - ( hpte_dw0.h.v ) && - ( hpte_dw0.h.h == j ) ) { - /* HPTE matches */ - if ( j ) - slot = -slot; - return slot; - } - ++slot; - } - hash = ~hash; - } - return -1; -} - -/* - * Create a pte - LPAR . Used during initialization only. - * We assume the PTE will fit in the primary PTEG. - */ -void make_pte_LPAR(HPTE *htab, - unsigned long va, unsigned long pa, int mode, - unsigned long hash_mask, int large) -{ - HPTE local_hpte, ret_hpte; - unsigned long hash, slot, flags,lpar_rc, vpn; - - if (large) - vpn = va >> 24; - else - vpn = va >> 12; - - hash = hpt_hash(vpn, large); - - slot = ((hash & hash_mask)*HPTES_PER_GROUP); - - local_hpte.dw1.dword1 = pa | mode; - local_hpte.dw0.dword0 = 0; - local_hpte.dw0.dw0.avpn = va >> 23; - local_hpte.dw0.dw0.bolted = 1; /* bolted */ - if (large) - local_hpte.dw0.dw0.l = 1; /* large page */ - local_hpte.dw0.dw0.v = 1; - - /* Set CEC cookie to 0 */ - /* Zero page = 0 */ - /* I-cache Invalidate = 0 */ - /* I-cache synchronize = 0 */ - /* Exact = 0 - modify any entry in group */ - flags = 0; -#if 1 - __asm__ __volatile__ ( - H_ENTER_r3 - "mr 4, %1\n" - "mr 5, %2\n" - "mr 6, %3\n" - "mr 7, %4\n" - HSC - "mr %0, 3\n" - : "=r" (lpar_rc) - : "r" (flags), "r" (slot), "r" (local_hpte.dw0.dword0), "r" (local_hpte.dw1.dword1) - : "r3", "r4", "r5", "r6", "r7", "cc"); -#else - lpar_rc = plpar_pte_enter(flags, - slot, - local_hpte.dw0.dword0, - local_hpte.dw1.dword1, - &ret_hpte.dw0.dword0, - &ret_hpte.dw1.dword1); -#endif -#if 0 /* NOTE: we explicitly do not check return status here because it is - * "normal" for early boot code to map io regions for which a partition - * has no access. However, we will die if we actually fault on these - * "permission denied" pages. - */ - if (lpar_rc != H_Success) { - /* pSeriesLP_init_early(); */ - udbg_printf("flags=%lx, slot=%lx, dword0=%lx, dword1=%lx, rc=%d\n", flags, slot, local_hpte.dw0.dword0,local_hpte.dw1.dword1, lpar_rc); - BUG(); - } -#endif -} - static void tce_build_pSeriesLP(struct TceTable *tbl, long tcenum, unsigned long uaddr, int direction ) { @@ -786,19 +434,14 @@ static unsigned char udbg_getcLP(void) } } +void pSeries_lpar_mm_init(void); /* This is called early in setup.c. * Use it to setup page table ppc_md stuff as well as udbg. */ void pSeriesLP_init_early(void) { - ppc_md.hpte_invalidate = hpte_invalidate_pSeriesLP; - ppc_md.hpte_updatepp = hpte_updatepp_pSeriesLP; - ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_pSeriesLP; - ppc_md.hpte_getword0 = hpte_getword0_pSeriesLP; - ppc_md.hpte_selectslot = hpte_selectslot_pSeriesLP; - ppc_md.hpte_create_valid = hpte_create_valid_pSeriesLP; - ppc_md.hpte_find = hpte_find_pSeriesLP; + pSeries_lpar_mm_init(); ppc_md.tce_build = tce_build_pSeriesLP; ppc_md.tce_free = tce_free_pSeriesLP; @@ -892,3 +535,315 @@ int hvc_count(int *start_termno) } return 0; } + + + + + + +/* + * Create a pte - LPAR . Used during initialization only. + * We assume the PTE will fit in the primary PTEG. + */ +void pSeries_lpar_make_pte(HPTE *htab, unsigned long va, unsigned long pa, + int mode, unsigned long hash_mask, int large) +{ + HPTE local_hpte; + unsigned long hash, slot, flags, lpar_rc, vpn; + unsigned long dummy1, dummy2; + + if (large) + vpn = va >> LARGE_PAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + + hash = hpt_hash(vpn, large); + + slot = ((hash & hash_mask)*HPTES_PER_GROUP); + + local_hpte.dw1.dword1 = pa | mode; + local_hpte.dw0.dword0 = 0; + local_hpte.dw0.dw0.avpn = va >> 23; + local_hpte.dw0.dw0.bolted = 1; /* bolted */ + if (large) { + local_hpte.dw0.dw0.l = 1; /* large page */ + local_hpte.dw0.dw0.avpn &= ~0x1UL; + } + local_hpte.dw0.dw0.v = 1; + + /* Set CEC cookie to 0 */ + /* Zero page = 0 */ + /* I-cache Invalidate = 0 */ + /* I-cache synchronize = 0 */ + /* Exact = 0 - modify any entry in group */ + flags = 0; + lpar_rc = plpar_pte_enter(flags, slot, local_hpte.dw0.dword0, + local_hpte.dw1.dword1, &dummy1, &dummy2); + +#if 0 + /* + * NOTE: we explicitly do not check return status here because it is + * "normal" for early boot code to map io regions for which a partition + * has no access. However, we will die if we actually fault on these + * "permission denied" pages. + */ + if (lpar_rc != H_Success) { + udbg_printf("flags=%lx, slot=%lx, dword0=%lx, dword1=%lx, rc=%d\n", flags, slot, local_hpte.dw0.dword0,local_hpte.dw1.dword1, lpar_rc); + BUG(); + } +#endif +} + +static long pSeries_lpar_insert_hpte(unsigned long hpte_group, + unsigned long vpn, unsigned long prpn, + int secondary, unsigned long hpteflags, + int bolted, int large) +{ + unsigned long avpn = vpn >> 11; + unsigned long arpn = physRpn_to_absRpn(prpn); + unsigned long lpar_rc; + unsigned long flags; + unsigned long slot; + HPTE lhpte; + + /* Fill in the local HPTE with absolute rpn, avpn and flags */ + lhpte.dw1.dword1 = 0; + lhpte.dw1.dw1.rpn = arpn; + lhpte.dw1.flags.flags = hpteflags; + + lhpte.dw0.dword0 = 0; + lhpte.dw0.dw0.avpn = avpn; + lhpte.dw0.dw0.h = secondary; + lhpte.dw0.dw0.bolted = bolted; + lhpte.dw0.dw0.v = 1; + + if (large) + lhpte.dw0.dw0.l = 1; + + /* Now fill in the actual HPTE */ + /* Set CEC cookie to 0 */ + /* Large page = 0 */ + /* Zero page = 0 */ + /* I-cache Invalidate = 0 */ + /* I-cache synchronize = 0 */ + /* Exact = 0 */ + flags = 0; + + /* XXX why is this here? - Anton */ + if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) + lhpte.dw1.flags.flags &= ~_PAGE_COHERENT; + + __asm__ __volatile__ ( + H_ENTER_r3 + "mr 4, %2\n" + "mr 5, %3\n" + "mr 6, %4\n" + "mr 7, %5\n" + HSC + "mr %0, 3\n" + "mr %1, 4\n" + : "=r" (lpar_rc), "=r" (slot) + : "r" (flags), "r" (hpte_group), "r" (lhpte.dw0.dword0), + "r" (lhpte.dw1.dword1) + : "r3", "r4", "r5", "r6", "r7", "cc"); + + if (lpar_rc == H_PTEG_Full) + return -1; + + if (lpar_rc != H_Success) { + udbg_printf("error on pte enter lpar rc = %ld\n",lpar_rc); + udbg_printf("ent: s=%lx, dw0=%lx, dw1=%lx\n", slot, + lhpte.dw0.dword0, lhpte.dw1.dword1); + + PPCDBG_ENTER_DEBUGGER(); + panic("error on pte enter"); + } + + return slot; +} + +static spinlock_t pSeries_lpar_tlbie_lock = SPIN_LOCK_UNLOCKED; + +static long pSeries_lpar_remove_hpte(unsigned long hpte_group) +{ + /* XXX take spinlock */ + panic("pSeries_lpar_remove_hpte"); +} + +/* NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and + * the low 3 bits of flags happen to line up. So no transform is needed. + * We can probably optimize here and assume the high bits of newpp are + * already zero. For now I am paranoid. + */ +static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int large) +{ + unsigned long lpar_rc; + unsigned long flags; + flags = (newpp & 3) | H_AVPN; + unsigned long vpn = va >> PAGE_SHIFT; + + udbg_printf("updatepp\n"); + + lpar_rc = plpar_pte_protect(flags, slot, (vpn >> 4) & ~0x7fUL); + + if (lpar_rc == H_Not_Found) { + udbg_printf("updatepp missed\n"); + return -1; + } + + if (lpar_rc != H_Success) { + udbg_printf("bad return code from pte protect rc = %lx\n", + lpar_rc); + for (;;); + } + + return 0; +} + +static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) +{ + unsigned long dword0; + unsigned long lpar_rc; + unsigned long dummy_word1; + unsigned long flags; + + /* Read 1 pte at a time */ + /* Do not need RPN to logical page translation */ + /* No cross CEC PFT access */ + flags = 0; + + lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1); + + if (lpar_rc != H_Success) { + udbg_printf("error on pte read in get_hpte0 rc = %lx\n", + lpar_rc); + for (;;); + } + + return dword0; +} + +static long pSeries_lpar_hpte_find(unsigned long vpn) +{ + unsigned long hash; + unsigned long i, j; + long slot; + union { + unsigned long dword0; + Hpte_dword0 dw0; + } hpte_dw0; + Hpte_dword0 dw0; + + hash = hpt_hash(vpn, 0); + + for (j = 0; j < 2; j++) { + slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; + for (i = 0; i < HPTES_PER_GROUP; i++) { + hpte_dw0.dword0 = pSeries_lpar_hpte_getword0(slot); + dw0 = hpte_dw0.dw0; + + if ((dw0.avpn == (vpn >> 11)) && dw0.v && + (dw0.h == j)) { + /* HPTE matches */ + if (j) + slot = -slot; + return slot; + } + ++slot; + } + hash = ~hash; + } + + return -1; +} + +static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, + unsigned long ea) +{ + unsigned long lpar_rc; + unsigned long vsid, va, vpn, flags; + long slot; + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); + vpn = va >> PAGE_SHIFT; + + slot = pSeries_lpar_hpte_find(vpn); + if (slot == -1) + panic("count not find page to bolt\n"); + + flags = newpp & 3; + lpar_rc = plpar_pte_protect(flags, slot, 0); + + if (lpar_rc != H_Success) { + udbg_printf("bad return code from pte bolted protect rc = %lx\n", lpar_rc); + for (;;); + } +} + +/* + * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie + * lock. + */ +static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, + int large, int local) +{ + unsigned long vpn, avpn; + unsigned long lpar_rc; + unsigned long flags; + unsigned long dummy1, dummy2; + + if (large) + vpn = va >> LARGE_PAGE_SHIFT; + else + vpn = va >> PAGE_SHIFT; + + avpn = vpn >> 11; + + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + lpar_rc = plpar_pte_remove(H_AVPN, slot, (vpn >> 4) & ~0x7fUL, &dummy1, + &dummy2); + spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); + + if (lpar_rc == H_Not_Found) { + udbg_printf("invalidate missed\n"); + return; + } + + if (lpar_rc != H_Success) { + udbg_printf("bad return code from invalidate rc = %lx\n", + lpar_rc); + for (;;); + } +} + +/* + * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie + * lock. + */ +void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number, + int local) +{ + int i; + struct tlb_batch_data *ptes = + &tlb_batch_array[smp_processor_id()][0]; + unsigned long flags; + + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + for (i = 0; i < number; i++) { + flush_hash_page(context, ptes->addr, ptes->pte, local); + ptes++; + } + spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); +} + +void pSeries_lpar_mm_init(void) +{ + ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; + ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; + ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; + ppc_md.insert_hpte = pSeries_lpar_insert_hpte; + ppc_md.remove_hpte = pSeries_lpar_remove_hpte; + ppc_md.make_pte = pSeries_lpar_make_pte; +} diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index ece7186a18c5..37a8a3cbe077 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -253,7 +253,7 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags) * entry in the hardware page table. */ vsid = get_kernel_vsid(ea); - make_pte(htab_data.htab, + ppc_md.make_pte(htab_data.htab, (vsid << 28) | (ea & 0xFFFFFFF), // va (NOT the ea) pa, _PAGE_NO_CACHE | _PAGE_GUARDED | PP_RWXX, @@ -262,29 +262,23 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags) } void -local_flush_tlb_all(void) -{ - /* Implemented to just flush the vmalloc area. - * vmalloc is the only user of flush_tlb_all. - */ - local_flush_tlb_range( NULL, VMALLOC_START, VMALLOC_END ); -} - -void local_flush_tlb_mm(struct mm_struct *mm) { - if ( mm->map_count ) { + if (mm->map_count) { struct vm_area_struct *mp; - for ( mp = mm->mmap; mp != NULL; mp = mp->vm_next ) - local_flush_tlb_range( mm, mp->vm_start, mp->vm_end ); - } - else /* MIKEC: It is not clear why this is needed */ + for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) + local_flush_tlb_range(mm, mp->vm_start, mp->vm_end); + } else { + /* MIKEC: It is not clear why this is needed */ /* paulus: it is needed to clear out stale HPTEs * when an address space (represented by an mm_struct) * is being destroyed. */ - local_flush_tlb_range( mm, USER_START, USER_END ); -} + local_flush_tlb_range(mm, USER_START, USER_END); + } + /* XXX are there races with checking cpu_vm_mask? - Anton */ + mm->cpu_vm_mask = 0; +} /* * Callers should hold the mm->page_table_lock @@ -297,7 +291,9 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) pmd_t *pmd; pte_t *ptep; pte_t pte; - + unsigned long flags; + int local = 0; + switch( REGION_ID(vmaddr) ) { case VMALLOC_REGION_ID: pgd = pgd_offset_k( vmaddr ); @@ -308,13 +304,17 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) case USER_REGION_ID: pgd = pgd_offset( vma->vm_mm, vmaddr ); context = vma->vm_mm->context; + + /* XXX are there races with checking cpu_vm_mask? - Anton */ + if (vma->vm_mm->cpu_vm_mask == (1 << smp_processor_id())) + local = 1; + break; default: panic("local_flush_tlb_page: invalid region 0x%016lx", vmaddr); } - if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, vmaddr); if (!pmd_none(*pmd)) { @@ -322,12 +322,14 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) /* Check if HPTE might exist and flush it if so */ pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0)); if ( pte_val(pte) & _PAGE_HASHPTE ) { - flush_hash_page(context, vmaddr, pte); + flush_hash_page(context, vmaddr, pte, local); } } } } +struct tlb_batch_data tlb_batch_array[NR_CPUS][MAX_BATCH_FLUSH]; + void local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) { @@ -337,6 +339,10 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e pte_t pte; unsigned long pgd_end, pmd_end; unsigned long context; + unsigned long flags; + int i = 0; + struct tlb_batch_data *ptes = &tlb_batch_array[smp_processor_id()][0]; + int local = 0; if ( start >= end ) panic("flush_tlb_range: start (%016lx) greater than end (%016lx)\n", start, end ); @@ -356,6 +362,12 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e case USER_REGION_ID: pgd = pgd_offset( mm, start ); context = mm->context; + + /* XXX are there races with checking cpu_vm_mask? - Anton */ + if (mm->cpu_vm_mask == (1 << smp_processor_id())) { + local = 1; + } + break; default: panic("flush_tlb_range: invalid region for start (%016lx) and end (%016lx)\n", start, end); @@ -377,8 +389,17 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e do { if ( pte_val(*ptep) & _PAGE_HASHPTE ) { pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0)); - if ( pte_val(pte) & _PAGE_HASHPTE ) - flush_hash_page( context, start, pte ); + if ( pte_val(pte) & _PAGE_HASHPTE ) { + ptes->pte = pte; + ptes->addr = start; + ptes++; + i++; + if (i == MAX_BATCH_FLUSH) { + flush_hash_range(context, MAX_BATCH_FLUSH, local); + i = 0; + ptes = &tlb_batch_array[smp_processor_id()][0]; + } + } } start += PAGE_SIZE; ++ptep; @@ -393,6 +414,9 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e start = pgd_end; ++pgd; } while ( start < end ); + + if (i) + flush_hash_range(context, i, local); } @@ -643,3 +667,30 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK); flush_icache_range(maddr, maddr + len); } + +/* + * This is called at the end of handling a user page fault, when the + * fault has been handled by updating a PTE in the linux page tables. + * We use it to preload an HPTE into the hash table corresponding to + * the updated linux PTE. + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, + pte_t pte) +{ + unsigned long vsid; + void *pgdir; + pte_t *ptep; + + /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ + if (!pte_young(pte)) + return; + + pgdir = vma->vm_mm->pgd; + if (pgdir == NULL) + return; + + ptep = find_linux_pte(pgdir, ea); + vsid = get_vsid(vma->vm_mm->context, ea); + + __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep); +} diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h index 304670e2177b..8b66663f5345 100644 --- a/include/asm-ppc64/machdep.h +++ b/include/asm-ppc64/machdep.h @@ -24,26 +24,33 @@ struct machdep_calls { /* High use functions in the first cachelines, low use functions * follow. DRENG collect profile data. */ - void (*hpte_invalidate)(unsigned long slot); - - void (*hpte_updatepp)(long slot, + void (*hpte_invalidate)(unsigned long slot, + unsigned long va, + int large, + int local); + long (*hpte_updatepp)(unsigned long slot, unsigned long newpp, - unsigned long va); + unsigned long va, + int large); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea); - unsigned long (*hpte_getword0)(unsigned long slot); - - long (*hpte_find)( unsigned long vpn ); - - long (*hpte_selectslot)(unsigned long vpn); + long (*insert_hpte)(unsigned long hpte_group, + unsigned long vpn, + unsigned long prpn, + int secondary, + unsigned long hpteflags, + int bolted, + int large); + long (*remove_hpte)(unsigned long hpte_group); + void (*flush_hash_range)(unsigned long context, + unsigned long number, + int local); + void (*make_pte)(void *htab, unsigned long va, + unsigned long pa, + int mode, + unsigned long hash_mask, + int large); - void (*hpte_create_valid)(unsigned long slot, - unsigned long vpn, - unsigned long prpn, - unsigned hash, - void * ptep, - unsigned hpteflags, - unsigned bolted); void (*tce_build)(struct TceTable * tbl, long tcenum, unsigned long uaddr, diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index df830a68e927..a0e55d9d023a 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -77,7 +77,7 @@ typedef struct { unsigned long resv0: 7; /* Padding to a 64b boundary */ } slb_dword1; -typedef struct _SLBE { +typedef struct { union { unsigned long dword0; slb_dword0 dw0; @@ -107,26 +107,13 @@ typedef struct { unsigned long avpn:57; /* vsid | api == avpn */ unsigned long : 2; /* Software use */ unsigned long bolted: 1; /* HPTE is "bolted" */ - unsigned long : 1; /* Software use */ + unsigned long lock: 1; /* lock on pSeries SMP */ unsigned long l: 1; /* Virtual page is large (L=1) or 4 KB (L=0) */ unsigned long h: 1; /* Hash function identifier */ unsigned long v: 1; /* Valid (v=1) or invalid (v=0) */ } Hpte_dword0; typedef struct { - unsigned long : 6; /* unused - padding */ - unsigned long ac: 1; /* Address compare */ - unsigned long r: 1; /* Referenced */ - unsigned long c: 1; /* Changed */ - unsigned long w: 1; /* Write-thru cache mode */ - unsigned long i: 1; /* Cache inhibited */ - unsigned long m: 1; /* Memory coherence required */ - unsigned long g: 1; /* Guarded */ - unsigned long n: 1; /* No-execute */ - unsigned long pp: 2; /* Page protection bits 1:2 */ -} Hpte_flags; - -typedef struct { unsigned long pp0: 1; /* Page protection bit 0 */ unsigned long : 1; /* Reserved */ unsigned long rpn: 50; /* Real page number */ @@ -134,12 +121,12 @@ typedef struct { unsigned long ac: 1; /* Address compare */ unsigned long r: 1; /* Referenced */ unsigned long c: 1; /* Changed */ - unsigned long w: 1; /* Write-thru cache mode */ - unsigned long i: 1; /* Cache inhibited */ - unsigned long m: 1; /* Memory coherence required */ - unsigned long g: 1; /* Guarded */ - unsigned long n: 1; /* No-execute */ - unsigned long pp: 2; /* Page protection bits 1:2 */ + unsigned long w: 1; /* Write-thru cache mode */ + unsigned long i: 1; /* Cache inhibited */ + unsigned long m: 1; /* Memory coherence required */ + unsigned long g: 1; /* Guarded */ + unsigned long n: 1; /* No-execute */ + unsigned long pp: 2; /* Page protection bits 1:2 */ } Hpte_dword1; typedef struct { @@ -148,7 +135,7 @@ typedef struct { unsigned long flags: 10; /* HPTE flags */ } Hpte_dword1_flags; -typedef struct _HPTE { +typedef struct { union { unsigned long dword0; Hpte_dword0 dw0; @@ -156,21 +143,8 @@ typedef struct _HPTE { union { unsigned long dword1; - struct { - unsigned long pp0: 1; /* Page protection bit 0 */ - unsigned long ts: 1; /* Tag set bit */ - unsigned long rpn: 50; /* Real page number */ - unsigned long : 2; /* Unused */ - unsigned long ac: 1; /* Address compare bit */ - unsigned long r: 1; /* Referenced */ - unsigned long c: 1; /* Changed */ - unsigned long w: 1; /* Write-thru cache mode */ - unsigned long i: 1; /* Cache inhibited */ - unsigned long m: 1; /* Memory coherence */ - unsigned long g: 1; /* Guarded */ - unsigned long n: 1; /* No-execute page if N=1 */ - unsigned long pp: 2; /* Page protection bit 1:2 */ - } dw1; + Hpte_dword1 dw1; + Hpte_dword1_flags flags; } dw1; } HPTE; @@ -204,6 +178,8 @@ void create_valid_hpte( unsigned long slot, unsigned long vpn, #define PT_SHIFT (12) /* Page Table */ #define PT_MASK 0x02FF +#define LARGE_PAGE_SHIFT 24 + static inline unsigned long hpt_hash(unsigned long vpn, int large) { unsigned long vsid; @@ -220,20 +196,36 @@ static inline unsigned long hpt_hash(unsigned long vpn, int large) return (vsid & 0x7fffffffff) ^ page; } -#define PG_SHIFT (12) /* Page Entry */ +static inline void _tlbie(unsigned long va, int large) +{ + asm volatile("ptesync": : :"memory"); + + if (large) { + asm volatile("clrldi %0,%0,16\n\ + tlbie %0,1" : : "r"(va) : "memory"); + } else { + asm volatile("clrldi %0,%0,16\n\ + tlbie %0,0" : : "r"(va) : "memory"); + } -extern __inline__ void _tlbie( unsigned long va ) + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + +static inline void _tlbiel(unsigned long va, int large) { - __asm__ __volatile__ ( " \n\ - clrldi %0,%0,16 \n\ - ptesync \n\ - tlbie %0 \n\ - eieio \n\ - tlbsync \n\ - ptesync" - : : "r" (va) : "memory" ); + asm volatile("ptesync": : :"memory"); + + if (large) { + asm volatile("clrldi %0,%0,16\n\ + tlbiel %0,1" : : "r"(va) : "memory"); + } else { + asm volatile("clrldi %0,%0,16\n\ + tlbiel %0,0" : : "r"(va) : "memory"); + } + + asm volatile("ptesync": : :"memory"); } - + #endif /* __ASSEMBLY__ */ /* Block size masks */ diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index 32668b8e59f1..c81ad14b6b5e 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -7,6 +7,7 @@ */ #ifndef __ASSEMBLY__ +#include <linux/threads.h> #include <asm/processor.h> /* For TASK_SIZE */ #include <asm/mmu.h> #include <asm/page.h> @@ -93,13 +94,15 @@ #define _PAGE_WRITETHRU 0x040UL /* W: cache write-through */ #define _PAGE_DIRTY 0x080UL /* C: page changed */ #define _PAGE_ACCESSED 0x100UL /* R: page referenced */ +#if 0 #define _PAGE_HPTENOIX 0x200UL /* software: pte HPTE slot unknown */ +#endif #define _PAGE_HASHPTE 0x400UL /* software: pte has an associated HPTE */ #define _PAGE_EXEC 0x800UL /* software: i-cache coherence required */ #define _PAGE_SECONDARY 0x8000UL /* software: HPTE is in secondary group */ #define _PAGE_GROUP_IX 0x7000UL /* software: HPTE index within group */ /* Bits 0x7000 identify the index within an HPT Group */ -#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_HPTENOIX | _PAGE_SECONDARY | _PAGE_GROUP_IX) +#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX) /* PAGE_MASK gives the right answer below, but only by accident */ /* It should be preserving the high 48 bits and then specifically */ /* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */ @@ -397,6 +400,7 @@ extern void paging_init(void); * as entries are faulted into the hash table by the low-level * data/instruction access exception handlers. */ +#if 0 /* * We won't be able to use update_mmu_cache to update the * hardware page table because we need to update the pte @@ -404,9 +408,29 @@ extern void paging_init(void); * its value. */ #define update_mmu_cache(vma, addr, pte) do { } while (0) +#else +/* + * This gets called at the end of handling a page fault, when + * the kernel has put a new PTE into the page table for the process. + * We use it to put a corresponding HPTE into the hash table + * ahead of time, instead of waiting for the inevitable extra + * hash-table miss exception. + */ +extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); +#endif extern void flush_hash_segments(unsigned low_vsid, unsigned high_vsid); -extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte); +extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte, + int local); +void flush_hash_range(unsigned long context, unsigned long number, int local); + +/* TLB flush batching */ +#define MAX_BATCH_FLUSH 128 +struct tlb_batch_data { + pte_t pte; + unsigned long addr; +}; +extern struct tlb_batch_data tlb_batch_array[NR_CPUS][MAX_BATCH_FLUSH]; /* Encode and de-code a swap entry */ #define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f) |
