summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/ppc64/kernel/head.S25
-rw-r--r--arch/ppc64/mm/hash_utils.c10
-rw-r--r--arch/ppc64/mm/slb_low.S53
-rw-r--r--arch/ppc64/mm/stab.c6
-rw-r--r--include/asm-ppc64/mmu.h45
-rw-r--r--include/asm-ppc64/mmu_context.h113
-rw-r--r--include/asm-ppc64/page.h11
-rw-r--r--include/asm-ppc64/pgtable.h14
8 files changed, 161 insertions, 116 deletions
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
index 48ccde53a8ac..c089755a9684 100644
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -548,15 +548,15 @@ __end_systemcfg:
.llong 0 /* Reserved */
.llong 0 /* Reserved */
.llong 0 /* Reserved */
- .llong 0xc00000000 /* KERNELBASE ESID */
- .llong 0x6a99b4b14 /* KERNELBASE VSID */
+ .llong (KERNELBASE>>SID_SHIFT)
+ .llong 0x40bffffd5 /* KERNELBASE VSID */
/* We have to list the bolted VMALLOC segment here, too, so that it
* will be restored on shared processor switch */
- .llong 0xd00000000 /* VMALLOCBASE ESID */
- .llong 0x08d12e6ab /* VMALLOCBASE VSID */
+ .llong (VMALLOCBASE>>SID_SHIFT)
+ .llong 0xb0cffffd1 /* VMALLOCBASE VSID */
.llong 8192 /* # pages to map (32 MB) */
.llong 0 /* Offset from start of loadarea to start of map */
- .llong 0x0006a99b4b140000 /* VPN of first page to map */
+ .llong 0x40bffffd50000 /* VPN of first page to map */
. = 0x6100
@@ -1064,18 +1064,9 @@ _GLOBAL(do_stab_bolted)
rldimi r10,r11,7,52 /* r10 = first ste of the group */
/* Calculate VSID */
- /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */
- rldic r11,r11,15,36
- ori r11,r11,0xc
-
- /* VSID_RANDOMIZER */
- li r9,9
- sldi r9,r9,32
- oris r9,r9,58231
- ori r9,r9,39831
-
- mulld r9,r11,r9
- rldic r9,r9,12,16 /* r9 = vsid << 12 */
+ /* This is a kernel address, so protovsid = ESID */
+ ASM_VSID_SCRAMBLE(r11, r9)
+ rldic r9,r11,12,16 /* r9 = vsid << 12 */
/* Search the primary group for a free entry */
1: ld r11,0(r10) /* Test valid bit of the current ste */
diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c
index 842602152e9c..cf2fe76430ee 100644
--- a/arch/ppc64/mm/hash_utils.c
+++ b/arch/ppc64/mm/hash_utils.c
@@ -253,24 +253,24 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
int local = 0;
cpumask_t tmp;
- /* Check for invalid addresses. */
- if (!IS_VALID_EA(ea))
- return 1;
-
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
mm = current->mm;
- if (mm == NULL)
+ if ((ea > USER_END) || (! mm))
return 1;
vsid = get_vsid(mm->context.id, ea);
break;
case IO_REGION_ID:
+ if (ea > IMALLOC_END)
+ return 1;
mm = &ioremap_mm;
vsid = get_kernel_vsid(ea);
break;
case VMALLOC_REGION_ID:
+ if (ea > VMALLOC_END)
+ return 1;
mm = &init_mm;
vsid = get_kernel_vsid(ea);
break;
diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S
index 8b061af1d8ee..8379d678f70f 100644
--- a/arch/ppc64/mm/slb_low.S
+++ b/arch/ppc64/mm/slb_low.S
@@ -68,19 +68,19 @@ _GLOBAL(slb_allocate)
srdi r3,r3,28 /* get esid */
cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
- /* r9 = region, r3 = esid, cr7 = <>KERNELBASE */
-
- rldicr. r11,r3,32,16
- bne- 8f /* invalid ea bits set */
- addi r11,r9,-1
- cmpldi r11,0xb
- blt- 8f /* invalid region */
+ rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */
+ oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
- /* r9 = region, r3 = esid, r10 = entry, cr7 = <>KERNELBASE */
+ /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
blt cr7,0f /* user or kernel? */
- /* kernel address */
+ /* kernel address: proto-VSID = ESID */
+ /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but
+ * this code will generate the protoVSID 0xfffffffff for the
+ * top segment. That's ok, the scramble below will translate
+ * it to VSID 0, which is reserved as a bad VSID - one which
+ * will never have any pages in it. */
li r11,SLB_VSID_KERNEL
BEGIN_FTR_SECTION
bne cr7,9f
@@ -88,8 +88,12 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
b 9f
-0: /* user address */
+0: /* user address: proto-VSID = context<<15 | ESID */
li r11,SLB_VSID_USER
+
+ srdi. r9,r3,13
+ bne- 8f /* invalid ea bits set */
+
#ifdef CONFIG_HUGETLB_PAGE
BEGIN_FTR_SECTION
/* check against the hugepage ranges */
@@ -111,33 +115,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
#endif /* CONFIG_HUGETLB_PAGE */
6: ld r9,PACACONTEXTID(r13)
+ rldimi r3,r9,USER_ESID_BITS,0
-9: /* r9 = "context", r3 = esid, r11 = flags, r10 = entry */
-
- rldimi r9,r3,15,0 /* r9= VSID ordinal */
-
-7: rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */
- oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
-
- /* r9 = ordinal, r3 = esid, r11 = flags, r10 = esid_data */
-
- li r3,VSID_RANDOMIZER@higher
- sldi r3,r3,32
- oris r3,r3,VSID_RANDOMIZER@h
- ori r3,r3,VSID_RANDOMIZER@l
-
- mulld r9,r3,r9 /* r9 = ordinal * VSID_RANDOMIZER */
- clrldi r9,r9,28 /* r9 &= VSID_MASK */
- sldi r9,r9,SLB_VSID_SHIFT /* r9 <<= SLB_VSID_SHIFT */
- or r9,r9,r11 /* r9 |= flags */
+9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
+ ASM_VSID_SCRAMBLE(r3,r9)
- /* r9 = vsid_data, r10 = esid_data, cr7 = <>KERNELBASE */
+ rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */
/*
* No need for an isync before or after this slbmte. The exception
* we enter with and the rfid we exit with are context synchronizing.
*/
- slbmte r9,r10
+ slbmte r11,r10
bgelr cr7 /* we're done for kernel addresses */
@@ -160,6 +149,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
blr
8: /* invalid EA */
- li r9,0 /* 0 VSID ordinal -> BAD_VSID */
+ li r3,0 /* BAD_VSID */
li r11,SLB_VSID_USER /* flags don't much matter */
- b 7b
+ b 9b
diff --git a/arch/ppc64/mm/stab.c b/arch/ppc64/mm/stab.c
index 99330c4c6f8d..9b4fe8ac115a 100644
--- a/arch/ppc64/mm/stab.c
+++ b/arch/ppc64/mm/stab.c
@@ -115,15 +115,11 @@ static int __ste_allocate(unsigned long ea, struct mm_struct *mm)
unsigned char stab_entry;
unsigned long offset;
- /* Check for invalid effective addresses. */
- if (!IS_VALID_EA(ea))
- return 1;
-
/* Kernel or user address? */
if (ea >= KERNELBASE) {
vsid = get_kernel_vsid(ea);
} else {
- if (! mm)
+ if ((ea >= TASK_SIZE_USER64) || (! mm))
return 1;
vsid = get_vsid(mm->context.id, ea);
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index d7ebf8797f94..0d116091d651 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -15,6 +15,7 @@
#include <linux/config.h>
#include <asm/page.h>
+#include <linux/stringify.h>
#ifndef __ASSEMBLY__
@@ -215,12 +216,44 @@ extern void htab_finish_init(void);
#define SLB_VSID_KERNEL (SLB_VSID_KP|SLB_VSID_C)
#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS)
-#define VSID_RANDOMIZER ASM_CONST(42470972311)
-#define VSID_MASK 0xfffffffffUL
-/* Because we never access addresses below KERNELBASE as kernel
- * addresses, this VSID is never used for anything real, and will
- * never have pages hashed into it */
-#define BAD_VSID ASM_CONST(0)
+#define VSID_MULTIPLIER ASM_CONST(268435399) /* largest 28-bit prime */
+#define VSID_BITS 36
+#define VSID_MODULUS ((1UL<<VSID_BITS)-1)
+
+#define CONTEXT_BITS 20
+#define USER_ESID_BITS 15
+
+/*
+ * This macro generates asm code to compute the VSID scramble
+ * function. Used in slb_allocate() and do_stab_bolted. The function
+ * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
+ *
+ * rt = register continaing the proto-VSID and into which the
+ * VSID will be stored
+ * rx = scratch register (clobbered)
+ *
+ * - rt and rx must be different registers
+ * - The answer will end up in the low 36 bits of rt. The higher
+ * bits may contain other garbage, so you may need to mask the
+ * result.
+ */
+#define ASM_VSID_SCRAMBLE(rt, rx) \
+ lis rx,VSID_MULTIPLIER@h; \
+ ori rx,rx,VSID_MULTIPLIER@l; \
+ mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
+ \
+ srdi rx,rt,VSID_BITS; \
+ clrldi rt,rt,(64-VSID_BITS); \
+ add rt,rt,rx; /* add high and low bits */ \
+ /* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \
+ * 2^36-1+2^28-1. That in particular means that if r3 >= \
+ * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \
+ * the bit clear, r3 already has the answer we want, if it \
+ * doesn't, the answer is the low 36 bits of r3+1. So in all \
+ * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
+ addi rx,rt,1; \
+ srdi rx,rx,VSID_BITS; /* extract 2^36 bit */ \
+ add rt,rt,rx
/* Block size masks */
#define BL_128K 0x000
diff --git a/include/asm-ppc64/mmu_context.h b/include/asm-ppc64/mmu_context.h
index 773ce3af4635..d08d9a9e4342 100644
--- a/include/asm-ppc64/mmu_context.h
+++ b/include/asm-ppc64/mmu_context.h
@@ -34,7 +34,7 @@ static inline int sched_find_first_bit(unsigned long *b)
}
#define NO_CONTEXT 0
-#define FIRST_USER_CONTEXT 0x10 /* First 16 reserved for kernel */
+#define FIRST_USER_CONTEXT 1
#define LAST_USER_CONTEXT 0x8000 /* Same as PID_MAX for now... */
#define NUM_USER_CONTEXT (LAST_USER_CONTEXT-FIRST_USER_CONTEXT)
@@ -181,46 +181,87 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
local_irq_restore(flags);
}
-/* This is only valid for kernel (including vmalloc, imalloc and bolted) EA's
+/* VSID allocation
+ * ===============
+ *
+ * We first generate a 36-bit "proto-VSID". For kernel addresses this
+ * is equal to the ESID, for user addresses it is:
+ * (context << 15) | (esid & 0x7fff)
+ *
+ * The two forms are distinguishable because the top bit is 0 for user
+ * addresses, whereas the top two bits are 1 for kernel addresses.
+ * Proto-VSIDs with the top two bits equal to 0b10 are reserved for
+ * now.
+ *
+ * The proto-VSIDs are then scrambled into real VSIDs with the
+ * multiplicative hash:
+ *
+ * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS
+ * where VSID_MULTIPLIER = 268435399 = 0xFFFFFC7
+ * VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF
+ *
+ * This scramble is only well defined for proto-VSIDs below
+ * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are
+ * reserved. VSID_MULTIPLIER is prime (the largest 28-bit prime, in
+ * fact), so in particular it is co-prime to VSID_MODULUS, making this
+ * a 1:1 scrambling function. Because the modulus is 2^n-1 we can
+ * compute it efficiently without a divide or extra multiply (see
+ * below).
+ *
+ * This scheme has several advantages over older methods:
+ *
+ * - We have VSIDs allocated for every kernel address
+ * (i.e. everything above 0xC000000000000000), except the very top
+ * segment, which simplifies several things.
+ *
+ * - We allow for 15 significant bits of ESID and 20 bits of
+ * context for user addresses. i.e. 8T (43 bits) of address space for
+ * up to 1M contexts (although the page table structure and context
+ * allocation will need changes to take advantage of this).
+ *
+ * - The scramble function gives robust scattering in the hash
+ * table (at least based on some initial results). The previous
+ * method was more susceptible to pathological cases giving excessive
+ * hash collisions.
*/
-static inline unsigned long
-get_kernel_vsid( unsigned long ea )
-{
- unsigned long ordinal, vsid;
-
- ordinal = (((ea >> 28) & 0x1fff) * LAST_USER_CONTEXT) | (ea >> 60);
- vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK;
-
-#ifdef HTABSTRESS
- /* For debug, this path creates a very poor vsid distribuition.
- * A user program can access virtual addresses in the form
- * 0x0yyyyxxxx000 where yyyy = xxxx to cause multiple mappings
- * to hash to the same page table group.
- */
- ordinal = ((ea >> 28) & 0x1fff) | (ea >> 44);
- vsid = ordinal & VSID_MASK;
-#endif /* HTABSTRESS */
-
- return vsid;
-}
-
-/* This is only valid for user EA's (user EA's do not exceed 2^41 (EADDR_SIZE))
+
+/*
+ * WARNING - If you change these you must make sure the asm
+ * implementations in slb_allocate(), do_stab_bolted and mmu.h
+ * (ASM_VSID_SCRAMBLE macro) are changed accordingly.
+ *
+ * You'll also need to change the precomputed VSID values in head.S
+ * which are used by the iSeries firmware.
*/
-static inline unsigned long
-get_vsid( unsigned long context, unsigned long ea )
-{
- unsigned long ordinal, vsid;
- ordinal = (((ea >> 28) & 0x1fff) * LAST_USER_CONTEXT) | context;
- vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK;
+static inline unsigned long vsid_scramble(unsigned long protovsid)
+{
+#if 0
+ /* The code below is equivalent to this function for arguments
+ * < 2^VSID_BITS, which is all this should ever be called
+ * with. However gcc is not clever enough to compute the
+ * modulus (2^n-1) without a second multiply. */
+ return ((protovsid * VSID_MULTIPLIER) % VSID_MODULUS);
+#else /* 1 */
+ unsigned long x;
+
+ x = protovsid * VSID_MULTIPLIER;
+ x = (x >> VSID_BITS) + (x & VSID_MODULUS);
+ return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS;
+#endif /* 1 */
+}
-#ifdef HTABSTRESS
- /* See comment above. */
- ordinal = ((ea >> 28) & 0x1fff) | (context << 16);
- vsid = ordinal & VSID_MASK;
-#endif /* HTABSTRESS */
+/* This is only valid for addresses >= KERNELBASE */
+static inline unsigned long get_kernel_vsid(unsigned long ea)
+{
+ return vsid_scramble(ea >> SID_SHIFT);
+}
- return vsid;
+/* This is only valid for user addresses (which are below 2^41) */
+static inline unsigned long get_vsid(unsigned long context, unsigned long ea)
+{
+ return vsid_scramble((context << USER_ESID_BITS)
+ | (ea >> SID_SHIFT));
}
#endif /* __PPC64_MMU_CONTEXT_H */
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h
index af8219b8e775..6ad9c212cc71 100644
--- a/include/asm-ppc64/page.h
+++ b/include/asm-ppc64/page.h
@@ -212,17 +212,6 @@ extern int page_is_ram(unsigned long pfn);
#define USER_REGION_ID (0UL)
#define REGION_ID(X) (((unsigned long)(X))>>REGION_SHIFT)
-/*
- * Define valid/invalid EA bits (for all ranges)
- */
-#define VALID_EA_BITS (0x000001ffffffffffUL)
-#define INVALID_EA_BITS (~(REGION_MASK|VALID_EA_BITS))
-
-#define IS_VALID_REGION_ID(x) \
- (((x) == USER_REGION_ID) || ((x) >= KERNEL_REGION_ID))
-#define IS_VALID_EA(x) \
- ((!((x) & INVALID_EA_BITS)) && IS_VALID_REGION_ID(REGION_ID(x)))
-
#define __bpn_to_ba(x) ((((unsigned long)(x))<<PAGE_SHIFT) + KERNELBASE)
#define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT)
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index 397758e24c0f..d759cad47745 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -45,10 +45,16 @@
PGD_INDEX_SIZE + PAGE_SHIFT)
/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define PGTABLE_EA_BITS 41
+#define PGTABLE_EA_MASK ((1UL<<PGTABLE_EA_BITS)-1)
+
+/*
* Define the address range of the vmalloc VM area.
*/
#define VMALLOC_START (0xD000000000000000ul)
-#define VMALLOC_END (VMALLOC_START + VALID_EA_BITS)
+#define VMALLOC_END (VMALLOC_START + PGTABLE_EA_MASK)
/*
* Define the address range of the imalloc VM area.
@@ -58,19 +64,19 @@
#define IMALLOC_VMADDR(x) ((unsigned long)(x))
#define PHBS_IO_BASE (0xE000000000000000ul) /* Reserve 2 gigs for PHBs */
#define IMALLOC_BASE (0xE000000080000000ul)
-#define IMALLOC_END (IMALLOC_BASE + VALID_EA_BITS)
+#define IMALLOC_END (IMALLOC_BASE + PGTABLE_EA_MASK)
/*
* Define the address range mapped virt <-> physical
*/
#define KRANGE_START KERNELBASE
-#define KRANGE_END (KRANGE_START + VALID_EA_BITS)
+#define KRANGE_END (KRANGE_START + PGTABLE_EA_MASK)
/*
* Define the user address range
*/
#define USER_START (0UL)
-#define USER_END (USER_START + VALID_EA_BITS)
+#define USER_END (USER_START + PGTABLE_EA_MASK)
/*