summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2004-05-31 18:48:03 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-05-31 18:48:03 -0700
commitbfbc3b5f996db1b88c52faec33f33bfa16ac0273 (patch)
treecb7a0ceb81ade6dcdcc6519d8d83cacb14e4610b
parent9e039f5ef0d4e8b95f281822163eeb91262f6aa3 (diff)
[PATCH] ppc64: bolt first vmalloc segment into SLB
From: Anton Blanchard <anton@samba.org> Based on some profiles we noticed the first vmalloc region was being continually cast out and replaced. All modules end up there so it is one of our hottest segments. This patch bolts the vmalloc region into the second segment. SLB misses on an NFS benchmark were reduced by about 10% with this patch. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/ppc64/kernel/head.S2
-rw-r--r--arch/ppc64/kernel/stab.c45
-rw-r--r--include/asm-ppc64/mmu_context.h10
3 files changed, 50 insertions, 7 deletions
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
index 892c957ad0ed..54b053c6851b 100644
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -1129,7 +1129,7 @@ SLB_NUM_ENTRIES = 64
addi r21,r22,1
cmpdi r21,SLB_NUM_ENTRIES
blt+ 2f
- li r21,1 /* dont touch bolted slot 0 */
+ li r21,2 /* dont touch slot 0 or 1 */
2: std r21,PACASTABRR(r20)
/* r20 = paca, r22 = entry */
diff --git a/arch/ppc64/kernel/stab.c b/arch/ppc64/kernel/stab.c
index b3ac3ac628d1..6261cc12dc40 100644
--- a/arch/ppc64/kernel/stab.c
+++ b/arch/ppc64/kernel/stab.c
@@ -44,6 +44,11 @@ void stab_initialize(unsigned long stab)
/* Invalidate the entire SLB & all the ERATS */
#ifdef CONFIG_PPC_ISERIES
asm volatile("isync; slbia; isync":::"memory");
+ /*
+ * The hypervisor loads SLB entry 0, but we need to increment
+ * next_round_robin to avoid overwriting it
+ */
+ get_paca()->xStab_data.next_round_robin = 1;
#else
asm volatile("isync":::"memory");
asm volatile("slbmte %0,%0"::"r" (0) : "memory");
@@ -51,6 +56,14 @@ void stab_initialize(unsigned long stab)
make_slbe(esid, vsid, seg0_largepages, 1);
asm volatile("isync":::"memory");
#endif
+
+ /*
+ * Bolt in the first vmalloc segment. Since modules end
+ * up there it gets hit very heavily.
+ */
+ esid = GET_ESID(VMALLOCBASE);
+ vsid = get_kernel_vsid(VMALLOCBASE);
+ make_slbe(esid, vsid, 0, 1);
} else {
asm volatile("isync; slbia; isync":::"memory");
make_ste(stab, esid, vsid);
@@ -317,6 +330,7 @@ static void make_slbe(unsigned long esid, unsigned long vsid, int large,
unsigned long word0;
slb_dword1 data;
} vsid_data;
+ struct paca_struct *lpaca = get_paca();
/*
* We take the next entry, round robin. Previously we tried
@@ -330,18 +344,25 @@ static void make_slbe(unsigned long esid, unsigned long vsid, int large,
* for the kernel stack during the first part of exception exit
* which gets invalidated due to a tlbie from another cpu at a
* non recoverable point (after setting srr0/1) - Anton
+ *
+ * paca Ksave is always valid (even when on the interrupt stack)
+ * so we use that.
*/
- castout_entry = get_paca()->xStab_data.next_round_robin;
+ castout_entry = lpaca->xStab_data.next_round_robin;
do {
entry = castout_entry;
castout_entry++;
+ /*
+ * We bolt in the first kernel segment and the first
+ * vmalloc segment.
+ */
if (castout_entry >= naca->slb_size)
- castout_entry = 1;
+ castout_entry = 2;
asm volatile("slbmfee %0,%1" : "=r" (esid_data) : "r" (entry));
} while (esid_data.data.v &&
- esid_data.data.esid == GET_ESID(__get_SP()));
+ esid_data.data.esid == GET_ESID(lpaca->xKsave));
- get_paca()->xStab_data.next_round_robin = castout_entry;
+ lpaca->xStab_data.next_round_robin = castout_entry;
/* slbie not needed as the previous mapping is still valid. */
@@ -422,6 +443,8 @@ int slb_allocate(unsigned long ea)
}
esid = GET_ESID(ea);
+
+ BUG_ON((esid << SID_SHIFT) == VMALLOCBASE);
__slb_allocate(esid, vsid, context);
return 0;
@@ -478,7 +501,9 @@ void flush_slb(struct task_struct *tsk, struct mm_struct *mm)
unsigned long word0;
slb_dword0 data;
} esid_data;
+ unsigned long esid, vsid;
+ WARN_ON(!irqs_disabled());
if (offset <= NR_STAB_CACHE_ENTRIES) {
int i;
@@ -486,11 +511,23 @@ void flush_slb(struct task_struct *tsk, struct mm_struct *mm)
for (i = 0; i < offset; i++) {
esid_data.word0 = 0;
esid_data.data.esid = __get_cpu_var(stab_cache[i]);
+ BUG_ON(esid_data.data.esid == GET_ESID(VMALLOCBASE));
asm volatile("slbie %0" : : "r" (esid_data));
}
asm volatile("isync" : : : "memory");
} else {
asm volatile("isync; slbia; isync" : : : "memory");
+
+ /*
+ * Bolt in the first vmalloc segment. Since modules end
+ * up there it gets hit very heavily. We must not touch
+ * the vmalloc region between the slbia and here, thats
+ * why we require interrupts off.
+ */
+ esid = GET_ESID(VMALLOCBASE);
+ vsid = get_kernel_vsid(VMALLOCBASE);
+ get_paca()->xStab_data.next_round_robin = 1;
+ make_slbe(esid, vsid, 0, 1);
}
/* Workaround POWER5 < DD2.1 issue */
diff --git a/include/asm-ppc64/mmu_context.h b/include/asm-ppc64/mmu_context.h
index 51196458b8d9..505ad13b358e 100644
--- a/include/asm-ppc64/mmu_context.h
+++ b/include/asm-ppc64/mmu_context.h
@@ -172,8 +172,14 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* After we have set current->mm to a new value, this activates
* the context for the new mm so we see the new mappings.
*/
-#define activate_mm(active_mm, mm) \
- switch_mm(active_mm, mm, current);
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ switch_mm(prev, next, current);
+ local_irq_restore(flags);
+}
#define VSID_RANDOMIZER 42470972311UL
#define VSID_MASK 0xfffffffffUL