From 512b5b89b9ef90e7a6475513fa9402e55b0e831c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 16 Nov 2015 13:52:07 +0530 Subject: ARC: Abstract out ISA specific SLEEP args No semantical changes, prepares for ARCv2 specific change in next commit Signed-off-by: Vineet Gupta --- arch/arc/kernel/process.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/arc/kernel') diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 91d5a0f1f3f7..a3f750e76b68 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -44,11 +44,10 @@ SYSCALL_DEFINE0(arc_gettls) void arch_cpu_idle(void) { /* sleep, but enable all interrupts before committing */ - if (is_isa_arcompact()) { - __asm__("sleep 0x3"); - } else { - __asm__("sleep 0x10"); - } + __asm__ __volatile__( + "sleep %0 \n" + : + :"I"(ISA_SLEEP_ARG)); /* can't be "r" has to be embedded const */ } asmlinkage void ret_from_fork(void); -- cgit v1.2.3 From e81b75f7b206d4c44afd20354a66825239490109 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 17 Nov 2015 17:46:48 +0530 Subject: ARC: remove SYNC from __switch_to() SYNC in __switch_to() is a historic relic and not needed at all. - In UP context it is obviously useless, why would we want to stall the core for all updates to stack memory of t0 to complete before loading kernel mode callee registers from t1 stack's memory. - In SMP, there could be potential race in which outgoing task could be concurrently picked for running on a different core, thus writes to stack here need to be visible before the reads from stack on other core. Peter confirmed that generic schedular already has needed barriers (by way of rq lock) so there is no need for additional arch barrier. This came up when Noam was trying to replace this SYNC with EZChip specific hardware thread scheduling instruction for their platform support. Link: http://lkml.kernel.org/r/20151102092654.GM17308@twins.programming.kicks-ass.net Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Cc: Noam Camus Signed-off-by: Vineet Gupta --- arch/arc/kernel/ctx_sw.c | 2 -- arch/arc/kernel/ctx_sw_asm.S | 3 --- 2 files changed, 5 deletions(-) (limited to 'arch/arc/kernel') diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c index c14a5bea0c76..5d446df2c413 100644 --- a/arch/arc/kernel/ctx_sw.c +++ b/arch/arc/kernel/ctx_sw.c @@ -58,8 +58,6 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) "st sp, [r24] \n\t" #endif - "sync \n\t" - /* * setup _current_task with incoming tsk. * optionally, set r25 to that as well diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S index e248594097e7..e6890b1f8650 100644 --- a/arch/arc/kernel/ctx_sw_asm.S +++ b/arch/arc/kernel/ctx_sw_asm.S @@ -44,9 +44,6 @@ __switch_to: * don't need to do anything special to return it */ - /* hardware memory barrier */ - sync - /* * switch to new task, contained in r1 * Temp reg r3 is required to get the ptr to store val -- cgit v1.2.3 From 2e22502c080f27afeab5e6f11e618fb7bc7aea53 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 23 Nov 2015 19:32:51 +0530 Subject: ARC: dw2 unwind: Remove falllback linear search thru FDE entries Fixes STAR 9000953410: "perf callgraph profiling causing RCU stalls" | perf record -g -c 15000 -e cycles /sbin/hackbench | | INFO: rcu_preempt self-detected stall on CPU | 1: (1 GPs behind) idle=609/140000000000002/0 softirq=2914/2915 fqs=603 | Task dump for CPU 1: in-kernel dwarf unwinder has a fast binary lookup and a fallback linear search (which iterates thru each of ~11K entries) thus takes 2 orders of magnitude longer (~3 million cycles vs. 2000). Routines written in hand assembler lack dwarf info (as we don't support assembler CFI pseudo-ops yet) fail the unwinder binary lookup, hit linear search, failing nevertheless in the end. However the linear search is pointless as binary lookup tables are created from it in first place. It is impossible to have binary lookup fail while succeed the linear search. It is pure waste of cycles thus removed by this patch. This manifested as RCU stalls / NMI watchdog splat when running hackbench under perf with callgraph profiling. The triggering condition was perf counter overflowing in routine lacking dwarf info (like memset) leading to patheic 3 million cycle unwinder slow path and by the time it returned new interrupts were already pending (Timer, IPI) and taken rightaway. The original memset didn't make forward progress, system kept accruing more interrupts and more unwinder delayes in a vicious feedback loop, ultimately triggering the NMI diagnostic. Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/kernel/unwind.c | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) (limited to 'arch/arc/kernel') diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 93c6ea52b671..7352475451f6 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -986,42 +986,13 @@ int arc_unwind(struct unwind_frame_info *frame) (const u8 *)(fde + 1) + *fde, ptrType); - if (pc >= endLoc) + if (pc >= endLoc) { fde = NULL; - } else - fde = NULL; - } - if (fde == NULL) { - for (fde = table->address, tableSize = table->size; - cie = NULL, tableSize > sizeof(*fde) - && tableSize - sizeof(*fde) >= *fde; - tableSize -= sizeof(*fde) + *fde, - fde += 1 + *fde / sizeof(*fde)) { - cie = cie_for_fde(fde, table); - if (cie == &bad_cie) { cie = NULL; - break; } - if (cie == NULL - || cie == ¬_fde - || (ptrType = fde_pointer_type(cie)) < 0) - continue; - ptr = (const u8 *)(fde + 2); - startLoc = read_pointer(&ptr, - (const u8 *)(fde + 1) + - *fde, ptrType); - if (!startLoc) - continue; - if (!(ptrType & DW_EH_PE_indirect)) - ptrType &= - DW_EH_PE_FORM | DW_EH_PE_signed; - endLoc = - startLoc + read_pointer(&ptr, - (const u8 *)(fde + - 1) + - *fde, ptrType); - if (pc >= startLoc && pc < endLoc) - break; + } else { + fde = NULL; + cie = NULL; } } } -- cgit v1.2.3