From b0da6d44157aa6e652de7634343708251ba64146 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 29 Apr 2016 22:29:26 +0100
Subject: asm-generic: Drop renameat syscall from default list

The newer renameat2 syscall provides all the functionality provided by
the renameat syscall and adds flags, so future architectures won't need
to include renameat.

Therefore drop the renameat syscall from the generic syscall list unless
__ARCH_WANT_RENAMEAT is defined by the architecture's unistd.h prior to
including asm-generic/unistd.h, and adjust all architectures using the
generic syscall list to define it so that no in-tree architectures are
affected.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-snps-arc@lists.infradead.org
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Mark Salter <msalter@redhat.com>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: linux-c6x-dev@linux-c6x.org
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: linux-hexagon@vger.kernel.org
Cc: linux-metag@vger.kernel.org
Cc: Jonas Bonn <jonas@southpole.se>
Cc: linux@lists.openrisc.net
Cc: Chen Liqin <liqin.linux@gmail.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: nios2-dev@lists.rocketboards.org
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: uclinux-h8-devel@lists.sourceforge.jp
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arc/include/uapi/asm/unistd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h
index 39e58d1cdf90..41fa2ec9e02c 100644
--- a/arch/arc/include/uapi/asm/unistd.h
+++ b/arch/arc/include/uapi/asm/unistd.h
@@ -15,6 +15,7 @@
 #if !defined(_UAPI_ASM_ARC_UNISTD_H) || defined(__SYSCALL)
 #define _UAPI_ASM_ARC_UNISTD_H
 
+#define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
-- 
cgit v1.2.3


From 569579401ae1c9b9f317f38261e32135b153e9b3 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 28 Jan 2016 12:56:03 +0530
Subject: ARC: opencode arc_request_percpu_irq

- The idea is to remove the API usage since it has a subltle
  design flaw - relies on being called on cpu0 first. This is true for
  some early per cpu irqs such as TIMER/IPI, but not for late probed
  per cpu peripherals such a perf. And it's usage in perf has already
  bitten us once: see c6317bc7c5ab
  ("ARCv2: perf: Ensure perf intr gets enabled on all cores") where we
  ended up open coding it anyways

- The seeming duplication will go away once we start using cpu notifier
  for timer setup

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/irq.h |  3 ---
 arch/arc/kernel/irq.c      | 29 -----------------------------
 arch/arc/kernel/smp.c      | 15 ++++++++++++++-
 arch/arc/kernel/time.c     | 14 +++++++++++---
 4 files changed, 25 insertions(+), 36 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 49014f0ef36d..f9c735ede4fc 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -26,8 +26,5 @@
 
 extern void arc_init_IRQ(void);
 void arc_local_timer_setup(void);
-void arc_request_percpu_irq(int irq, int cpu,
-                            irqreturn_t (*isr)(int irq, void *dev),
-                            const char *irq_nm, void *percpu_dev);
 
 #endif
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 88074b50456b..fb6dede9d05f 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -50,32 +50,3 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs)
 	irq_exit();
 	set_irq_regs(old_regs);
 }
-
-/*
- * API called for requesting percpu interrupts - called by each CPU
- *  - For boot CPU, actually request the IRQ with genirq core + enables
- *  - For subsequent callers only enable called locally
- *
- * Relies on being called by boot cpu first (i.e. request called ahead) of
- * any enable as expected by genirq. Hence Suitable only for TIMER, IPI
- * which are guaranteed to be setup on boot core first.
- * Late probed peripherals such as perf can't use this as there no guarantee
- * of being called on boot CPU first.
- */
-
-void arc_request_percpu_irq(int irq, int cpu,
-                            irqreturn_t (*isr)(int irq, void *dev),
-                            const char *irq_nm,
-                            void *percpu_dev)
-{
-	/* Boot cpu calls request, all call enable */
-	if (!cpu) {
-		int rc;
-
-		rc = request_percpu_irq(irq, isr, irq_nm, percpu_dev);
-		if (rc)
-			panic("Percpu IRQ request failed for %d\n", irq);
-	}
-
-	enable_percpu_irq(irq, 0);
-}
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 4cb3add77c75..ca83ebe15a64 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -346,6 +346,10 @@ irqreturn_t do_IPI(int irq, void *dev_id)
 
 /*
  * API called by platform code to hookup arch-common ISR to their IPI IRQ
+ *
+ * Note: If IPI is provided by platform (vs. say ARC MCIP), their intc setup/map
+ * function needs to call call irq_set_percpu_devid() for IPI IRQ, otherwise
+ * request_percpu_irq() below will fail
  */
 static DEFINE_PER_CPU(int, ipi_dev);
 
@@ -353,7 +357,16 @@ int smp_ipi_irq_setup(int cpu, int irq)
 {
 	int *dev = per_cpu_ptr(&ipi_dev, cpu);
 
-	arc_request_percpu_irq(irq, cpu, do_IPI, "IPI Interrupt", dev);
+	/* Boot cpu calls request, all call enable */
+	if (!cpu) {
+		int rc;
+
+		rc = request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev);
+		if (rc)
+			panic("Percpu IRQ request failed for %d\n", irq);
+	}
+
+	enable_percpu_irq(irq, 0);
 
 	return 0;
 }
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 7d9a736fc7e5..146da3cbcc99 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -251,14 +251,22 @@ void arc_local_timer_setup()
 {
 	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
 	int cpu = smp_processor_id();
+	int irq = TIMER0_IRQ;
 
 	evt->cpumask = cpumask_of(cpu);
 	clockevents_config_and_register(evt, arc_get_core_freq(),
 					0, ARC_TIMER_MAX);
 
-	/* setup the per-cpu timer IRQ handler - for all cpus */
-	arc_request_percpu_irq(TIMER0_IRQ, cpu, timer_irq_handler,
-			       "Timer0 (per-cpu-tick)", evt);
+	if (!cpu) {
+		int rc;
+
+		rc = request_percpu_irq(irq, timer_irq_handler,
+					"Timer0 (per-cpu-tick)", evt);
+		if (rc)
+			panic("Percpu IRQ request failed for TIMER\n");
+	}
+
+	enable_percpu_irq(irq, 0);
 }
 
 /*
-- 
cgit v1.2.3


From eec3c58efa271d7dfa30c978dda2c88280212634 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Fri, 1 Jan 2016 15:48:49 +0530
Subject: ARC: clockevent: switch to cpu notifier for clockevent setup

ARC Timers so far have been handled as "legacy" w/o explicit description
in DT. This poses challenge for newer platforms wanting to use them.
This series will eventually help move timers over to DT.

This patch does a small change of using a CPU notifier to set clockevent
on non-boot CPUs. So explicit setup is done only on boot CPU (which will
later be done by DT)

Signed-off-by: Noam Camus <noamc@ezchip.com>
[vgupta: broken off from a bigger patch]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/irq.h |  1 -
 arch/arc/kernel/smp.c      |  2 --
 arch/arc/kernel/time.c     | 62 ++++++++++++++++++++++++++++++----------------
 3 files changed, 41 insertions(+), 24 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index f9c735ede4fc..5c0b5abda67a 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -25,6 +25,5 @@
 #include <asm-generic/irq.h>
 
 extern void arc_init_IRQ(void);
-void arc_local_timer_setup(void);
 
 #endif
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index ca83ebe15a64..6b1813456336 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -138,8 +138,6 @@ void start_kernel_secondary(void)
 	if (machine_desc->init_per_cpu)
 		machine_desc->init_per_cpu(cpu);
 
-	arc_local_timer_setup();
-
 	local_irq_enable();
 	preempt_disable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 146da3cbcc99..e97be743d47b 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -29,17 +29,14 @@
  * which however is currently broken
  */
 
-#include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/time.h>
 #include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/profile.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/cpu.h>
 #include <asm/irq.h>
 #include <asm/arcregs.h>
 #include <asm/clk.h>
@@ -183,6 +180,8 @@ static struct clocksource arc_counter = {
 
 /********** Clock Event Device *********/
 
+static int arc_timer_irq = TIMER0_IRQ;
+
 /*
  * Arm the timer to interrupt after @cycles
  * The distinction for oneshot/periodic is done in arc_event_timer_ack() below
@@ -218,7 +217,6 @@ static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = {
 	.features		= CLOCK_EVT_FEAT_ONESHOT |
 				  CLOCK_EVT_FEAT_PERIODIC,
 	.rating			= 300,
-	.irq			= TIMER0_IRQ,	/* hardwired, no need for resources */
 	.set_next_event		= arc_clkevent_set_next_event,
 	.set_state_periodic	= arc_clkevent_set_periodic,
 };
@@ -244,29 +242,52 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int arc_timer_cpu_notify(struct notifier_block *self,
+				unsigned long action, void *hcpu)
+{
+	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
+
+	evt->cpumask = cpumask_of(smp_processor_id());
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		clockevents_config_and_register(evt, arc_get_core_freq(),
+						0, ULONG_MAX);
+		enable_percpu_irq(arc_timer_irq, 0);
+		break;
+	case CPU_DYING:
+		disable_percpu_irq(arc_timer_irq);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block arc_timer_cpu_nb = {
+	.notifier_call = arc_timer_cpu_notify,
+};
+
 /*
- * Setup the local event timer for @cpu
+ * clockevent setup for boot CPU
  */
-void arc_local_timer_setup()
+static void __init arc_clockevent_setup(void)
 {
 	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
-	int cpu = smp_processor_id();
-	int irq = TIMER0_IRQ;
+	int ret;
 
-	evt->cpumask = cpumask_of(cpu);
+	register_cpu_notifier(&arc_timer_cpu_nb);
+
+	evt->cpumask = cpumask_of(smp_processor_id());
 	clockevents_config_and_register(evt, arc_get_core_freq(),
 					0, ARC_TIMER_MAX);
 
-	if (!cpu) {
-		int rc;
-
-		rc = request_percpu_irq(irq, timer_irq_handler,
-					"Timer0 (per-cpu-tick)", evt);
-		if (rc)
-			panic("Percpu IRQ request failed for TIMER\n");
-	}
+	/* Needs apriori irq_set_percpu_devid() done in intc map function */
+	ret = request_percpu_irq(arc_timer_irq, timer_irq_handler,
+				 "Timer0 (per-cpu-tick)", evt);
+	if (ret)
+		pr_err("Unable to register interrupt\n");
 
-	enable_percpu_irq(irq, 0);
+	enable_percpu_irq(arc_timer_irq, 0);
 }
 
 /*
@@ -291,6 +312,5 @@ void __init time_init(void)
 		 */
 		clocksource_register_hz(&arc_counter, arc_get_core_freq());
 
-	/* sets up the periodic event timer */
-	arc_local_timer_setup();
+	arc_clockevent_setup();
 }
-- 
cgit v1.2.3


From 77c8d0d6b3f4ea0989b9ca42fb368cc2aac02495 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 1 Jan 2016 17:58:45 +0530
Subject: ARC: clockevent: DT based probe

 - timer frequency is derived from DT (no longer rely on top level
   DT "clock-frequency" probed early and exported by asm/clk.h)

 - TIMER0_IRQ need not be exported across arch code, confined to intc as
   it is property of same

 - Any failures in clockevent setup are considered pedantic and system
   panic()'s as there is no generic fallback (unlike clocksource where
   a jiffies based soft clocksource always exists)

Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/irq.h     |  9 -------
 arch/arc/kernel/intc-compact.c |  2 ++
 arch/arc/kernel/time.c         | 59 ++++++++++++++++++++++++++++++------------
 3 files changed, 45 insertions(+), 25 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 5c0b5abda67a..a6ac89dc228f 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -12,15 +12,6 @@
 #define NR_CPU_IRQS	32  /* number of interrupt lines of ARC770 CPU */
 #define NR_IRQS		128 /* allow some CPU external IRQ handling */
 
-/* Platform Independent IRQs */
-#ifdef CONFIG_ISA_ARCOMPACT
-#define TIMER0_IRQ      3
-#define TIMER1_IRQ      4
-#else
-#define TIMER0_IRQ      16
-#define TIMER1_IRQ      17
-#endif
-
 #include <linux/interrupt.h>
 #include <asm-generic/irq.h>
 
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index 4195eedeb6d1..d31bc647146d 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -14,6 +14,8 @@
 #include <linux/irqchip.h>
 #include <asm/irq.h>
 
+#define TIMER0_IRQ	3	/* Fixed by ISA */
+
 /*
  * Early Hardware specific Interrupt setup
  * -Platform independent, needed for each CPU (not foldable into init_IRQ)
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 848353a27ac8..01ec30d83590 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -30,19 +30,15 @@
  */
 
 #include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/init.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
 #include <asm/irq.h>
 #include <asm/arcregs.h>
-#include <asm/clk.h>
-#include <asm/mach_desc.h>
 
 #include <asm/mcip.h>
 
@@ -59,6 +55,30 @@
 
 #define ARC_TIMER_MAX	0xFFFFFFFF
 
+static unsigned long arc_timer_freq;
+
+static int noinline arc_get_timer_clk(struct device_node *node)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		pr_err("timer missing clk");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		pr_err("Couldn't enable parent clk\n");
+		return ret;
+	}
+
+	arc_timer_freq = clk_get_rate(clk);
+
+	return 0;
+}
+
 /********** Clock Source Device *********/
 
 #ifdef CONFIG_ARC_HAS_GFRC
@@ -182,7 +202,7 @@ static struct clocksource arc_counter = {
 
 /********** Clock Event Device *********/
 
-static int arc_timer_irq = TIMER0_IRQ;
+static int arc_timer_irq;
 
 /*
  * Arm the timer to interrupt after @cycles
@@ -210,7 +230,7 @@ static int arc_clkevent_set_periodic(struct clock_event_device *dev)
 	 * At X Hz, 1 sec = 1000ms -> X cycles;
 	 *		      10ms -> X / 100 cycles
 	 */
-	arc_timer_event_setup(arc_get_core_freq() / HZ);
+	arc_timer_event_setup(arc_timer_freq / HZ);
 	return 0;
 }
 
@@ -253,7 +273,7 @@ static int arc_timer_cpu_notify(struct notifier_block *self,
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_STARTING:
-		clockevents_config_and_register(evt, arc_get_core_freq(),
+		clockevents_config_and_register(evt, arc_timer_freq,
 						0, ULONG_MAX);
 		enable_percpu_irq(arc_timer_irq, 0);
 		break;
@@ -272,25 +292,35 @@ static struct notifier_block arc_timer_cpu_nb = {
 /*
  * clockevent setup for boot CPU
  */
-static void __init arc_clockevent_setup(void)
+static void __init arc_clockevent_setup(struct device_node *node)
 {
 	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
 	int ret;
 
 	register_cpu_notifier(&arc_timer_cpu_nb);
 
+	arc_timer_irq = irq_of_parse_and_map(node, 0);
+	if (arc_timer_irq <= 0)
+		panic("clockevent: missing irq");
+
+	ret = arc_get_timer_clk(node);
+	if (ret)
+		panic("clockevent: missing clk");
+
+	evt->irq = arc_timer_irq;
 	evt->cpumask = cpumask_of(smp_processor_id());
-	clockevents_config_and_register(evt, arc_get_core_freq(),
+	clockevents_config_and_register(evt, arc_timer_freq,
 					0, ARC_TIMER_MAX);
 
 	/* Needs apriori irq_set_percpu_devid() done in intc map function */
 	ret = request_percpu_irq(arc_timer_irq, timer_irq_handler,
 				 "Timer0 (per-cpu-tick)", evt);
 	if (ret)
-		pr_err("Unable to register interrupt\n");
+		panic("clockevent: unable to request irq\n");
 
 	enable_percpu_irq(arc_timer_irq, 0);
 }
+CLOCKSOURCE_OF_DECLARE(arc_clkevt, "snps,arc-timer", arc_clockevent_setup);
 
 /*
  * Called from start_kernel() - boot CPU only
@@ -299,7 +329,6 @@ static void __init arc_clockevent_setup(void)
  * -Also sets up any global state needed for timer subsystem:
  *    - for "counting" timer, registers a clocksource, usable across CPUs
  *      (provided that underlying counter h/w is synchronized across cores)
- *    - for "event" timer, sets up TIMER0 IRQ (as that is platform agnostic)
  */
 void __init time_init(void)
 {
@@ -315,7 +344,5 @@ void __init time_init(void)
 		 * CLK upto 4.29 GHz can be safely represented in 32 bits
 		 * because Max 32 bit number is 4,294,967,295
 		 */
-		clocksource_register_hz(&arc_counter, arc_get_core_freq());
-
-	arc_clockevent_setup();
+		clocksource_register_hz(&arc_counter, arc_timer_freq);
 }
-- 
cgit v1.2.3


From 88555cc584d93e658722f090d3f8fb84d5fa15a5 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 30 Mar 2016 18:35:28 +0530
Subject: ARC: irq: export some IRQs again

This will be needed for switching to linear irq domain as
irq_create_mapping() called by intr code needs the IRQ numbers
in addition to existing usage in mcip.c for requesting the irq

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/irq.h | 6 ++++++
 arch/arc/kernel/mcip.c     | 3 ---
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index a6ac89dc228f..c0fa0d2de400 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -12,6 +12,12 @@
 #define NR_CPU_IRQS	32  /* number of interrupt lines of ARC770 CPU */
 #define NR_IRQS		128 /* allow some CPU external IRQ handling */
 
+/* Platform Independent IRQs */
+#ifdef CONFIG_ISA_ARCV2
+#define IPI_IRQ		19
+#define SOFTIRQ_IRQ	21
+#endif
+
 #include <linux/interrupt.h>
 #include <asm-generic/irq.h>
 
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 262d9c3771e6..72f9179b1a24 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -15,9 +15,6 @@
 #include <asm/mcip.h>
 #include <asm/setup.h>
 
-#define IPI_IRQ		19
-#define SOFTIRQ_IRQ	21
-
 static char smp_cpuinfo_buf[128];
 static int idu_detected;
 
-- 
cgit v1.2.3


From 6e9318d1be83714e004a0ac795a936df4d2bed3e Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Mon, 1 Feb 2016 17:30:17 +0300
Subject: ARC: RIP arc_{get|set}_core_freq() clk API

There are no more users of this - so RIP!

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
[vgupta: update changelog]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/clk.h | 22 ----------------------
 arch/arc/kernel/Makefile   |  2 +-
 arch/arc/kernel/clk.c      | 21 ---------------------
 arch/arc/kernel/devtree.c  |  7 -------
 4 files changed, 1 insertion(+), 51 deletions(-)
 delete mode 100644 arch/arc/include/asm/clk.h
 delete mode 100644 arch/arc/kernel/clk.c

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/clk.h b/arch/arc/include/asm/clk.h
deleted file mode 100644
index bf9d29f5bd53..000000000000
--- a/arch/arc/include/asm/clk.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _ASM_ARC_CLK_H
-#define _ASM_ARC_CLK_H
-
-/* Although we can't really hide core_freq, the accessor is still better way */
-extern unsigned long core_freq;
-
-static inline unsigned long arc_get_core_freq(void)
-{
-	return core_freq;
-}
-
-extern int arc_set_core_freq(unsigned long);
-
-#endif
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 1bc2036b19d7..cfcdedf52ff8 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -9,7 +9,7 @@
 CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
 obj-y	:= arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o
-obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o
+obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
 obj-$(CONFIG_ISA_ARCOMPACT)		+= entry-compact.o intc-compact.o
 obj-$(CONFIG_ISA_ARCV2)			+= entry-arcv2.o intc-arcv2.o
 obj-$(CONFIG_PCI)  			+= pcibios.o
diff --git a/arch/arc/kernel/clk.c b/arch/arc/kernel/clk.c
deleted file mode 100644
index 10c7b0b5a079..000000000000
--- a/arch/arc/kernel/clk.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <asm/clk.h>
-
-unsigned long core_freq = 80000000;
-
-/*
- * As of now we default to device-tree provided clock
- * In future we can determine this in early boot
- */
-int arc_set_core_freq(unsigned long freq)
-{
-	core_freq = freq;
-	return 0;
-}
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index fecbc96df828..f1e07c2344f8 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -14,7 +14,6 @@
 #include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
-#include <asm/clk.h>
 #include <asm/mach_desc.h>
 
 #ifdef CONFIG_SERIAL_EARLYCON
@@ -63,8 +62,6 @@ const struct machine_desc * __init setup_machine_fdt(void *dt)
 {
 	const struct machine_desc *mdesc;
 	unsigned long dt_root;
-	const void *clk;
-	int len;
 
 	if (!early_init_dt_scan(dt))
 		return NULL;
@@ -74,10 +71,6 @@ const struct machine_desc * __init setup_machine_fdt(void *dt)
 		machine_halt();
 
 	dt_root = of_get_flat_dt_root();
-	clk = of_get_flat_dt_prop(dt_root, "clock-frequency", &len);
-	if (clk)
-		arc_set_core_freq(of_read_ulong(clk, len/4));
-
 	arc_set_early_base_baud(dt_root);
 
 	return mdesc;
-- 
cgit v1.2.3


From 4bb40c6d6c011dc616d6d7fbad2bce21794f97a0 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Wed, 13 Jan 2016 01:58:38 +0000
Subject: ARC: clean out UAPI byteorder.h clean off Kconfig symbol

UAPI header should not use Kconfig items

Use __BIG_ENDIAN__ defined as a compiler intrinsic

Signed-off-by: Noam Camus <noamc@ezchip.com>
[vgupta: fix changelog]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/uapi/asm/byteorder.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/uapi/asm/byteorder.h b/arch/arc/include/uapi/asm/byteorder.h
index 9da71d415c38..ea5ca444c7e3 100644
--- a/arch/arc/include/uapi/asm/byteorder.h
+++ b/arch/arc/include/uapi/asm/byteorder.h
@@ -9,7 +9,7 @@
 #ifndef __ASM_ARC_BYTEORDER_H
 #define __ASM_ARC_BYTEORDER_H
 
-#ifdef CONFIG_CPU_BIG_ENDIAN
+#ifdef __BIG_ENDIAN__
 #include <linux/byteorder/big_endian.h>
 #else
 #include <linux/byteorder/little_endian.h>
-- 
cgit v1.2.3


From 15ca68a993d10767c37793e6a0a780b0a7e395dd Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Sun, 7 Sep 2014 22:52:33 +0300
Subject: ARC: Make vmalloc size configurable

On ARC, lower 2G of address space is translated and used for
 - user vaddr space (region 0 to 5)
 - unused kernel-user gutter (region 6)
 - kernel vaddr space (region 7)

where each region simply represents 256MB of address space.

The kernel vaddr space of 256MB is used to implement vmalloc, modules
So far this was enough, but not on EZChip system with 4K CPUs (given
that per cpu mechanism uses vmalloc for allocating chunks)

So allow VMALLOC_SIZE to be configurable by expanding down into the unused
kernel-user gutter region which at default 256M was excessive anyways.

Also use _BITUL() to fix a build error since PGDIR_SIZE cannot use "1UL"
as called from assembly code in mm/tlbex.S

Signed-off-by: Noam Camus <noamc@ezchip.com>
[vgupta: rewrote changelog, debugged bootup crash due to int vs. hex]
Acked-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig                 | 11 +++++++++++
 arch/arc/include/asm/pgtable.h   |  2 +-
 arch/arc/include/asm/processor.h | 19 +++++++++----------
 arch/arc/mm/tlb.c                |  5 +++++
 4 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 4f1482aa8823..30ad63413660 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -492,6 +492,17 @@ config ARCH_DMA_ADDR_T_64BIT
 config ARC_PLAT_NEEDS_PHYS_TO_DMA
 	bool
 
+config ARC_KVADDR_SIZE
+	int "Kernel Virtaul Address Space size (MB)"
+	range 0 512
+	default "256"
+	help
+	  The kernel address space is carved out of 256MB of translated address
+	  space for catering to vmalloc, modules, pkmap, fixmap. This however may
+	  not suffice vmalloc requirements of a 4K CPU EZChip system. So allow
+	  this to be stretched to 512 MB (by extending into the reserved
+	  kernel-user gutter)
+
 config ARC_CURR_IN_REG
 	bool "Dedicate Register r25 for current_task pointer"
 	default y
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 10d4b8b8e545..034bbdc0ff61 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -217,7 +217,7 @@
 #define BITS_FOR_PTE	(PGDIR_SHIFT - PAGE_SHIFT)
 #define BITS_FOR_PGD	(32 - PGDIR_SHIFT)
 
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)	/* vaddr span, not PDG sz */
+#define PGDIR_SIZE	_BITUL(PGDIR_SHIFT)	/* vaddr span, not PDG sz */
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
 #define	PTRS_PER_PTE	_BITUL(BITS_FOR_PTE)
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 1d694c1ef6d6..d0a9211ec769 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -97,7 +97,7 @@ extern unsigned int get_wchan(struct task_struct *p);
 #endif /* !__ASSEMBLY__ */
 
 /*
- * System Memory Map on ARC
+ * Default System Memory Map on ARC
  *
  * ---------------------------- (lower 2G, Translated) -------------------------
  * 0x0000_0000		0x5FFF_FFFF	(user vaddr: TASK_SIZE)
@@ -109,18 +109,17 @@ extern unsigned int get_wchan(struct task_struct *p);
  * 0xC000_0000		0xFFFF_FFFF	(peripheral uncached space)
  * -----------------------------------------------------------------------------
  */
-#define VMALLOC_START	0x70000000
 
-/*
- * 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter
- * See asm/highmem.h for details
- */
-#define VMALLOC_SIZE	(PAGE_OFFSET - VMALLOC_START - PGDIR_SIZE * 4)
-#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
+#define TASK_SIZE	0x60000000
 
-#define USER_KERNEL_GUTTER    0x10000000
+#define VMALLOC_START	(PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
+
+/* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
+#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
+
+#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 
-#define TASK_SIZE	(VMALLOC_START - USER_KERNEL_GUTTER)
+#define USER_KERNEL_GUTTER    (VMALLOC_START - TASK_SIZE)
 
 #define STACK_TOP       TASK_SIZE
 #define STACK_TOP_MAX   STACK_TOP
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 7046c12c58ed..745a9aeb2d96 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -814,6 +814,11 @@ void arc_mmu_init(void)
 
 	printk(arc_mmu_mumbojumbo(0, str, sizeof(str)));
 
+	/*
+	 * Can't be done in processor.h due to header include depenedencies
+	 */
+	BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE));
+
 	/* For efficiency sake, kernel is compile time built for a MMU ver
 	 * This must match the hardware it is running on.
 	 * Linux built for MMU V2, if run on MMU V1 will break down because V1
-- 
cgit v1.2.3


From 2a1021fce85cb9867f3655c58a9c826a3612fae9 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Tue, 9 Jun 2015 14:05:50 +0300
Subject: ARC: rwlock: disable interrupts in !LLSC variant

If we hold rwlock and interrupt occures we may
end up spinning on it for ever during softirq.
Note that this lock is an internal lock
and since the lock is free to be used from any context,
the lock needs to be IRQ-safe.

Below you may see an example for interrupt we get while
nl_table_lock is holding its rw->lock_mutex and we spinned
on it for ever.

The concept for the fix was taken from SPARC.

[2015-05-12 19:16:12] Stack Trace:
[2015-05-12 19:16:12]   arc_unwind_core+0xb8/0x11c
[2015-05-12 19:16:12]   dump_stack+0x68/0xac
[2015-05-12 19:16:12]   _raw_read_lock+0xa8/0xac
[2015-05-12 19:16:12]   netlink_broadcast_filtered+0x56/0x35c
[2015-05-12 19:16:12]   nlmsg_notify+0x42/0xa4
[2015-05-12 19:16:13]   neigh_update+0x1fe/0x44c
[2015-05-12 19:16:13]   neigh_event_ns+0x40/0xa4
[2015-05-12 19:16:13]   arp_process+0x46e/0x5a8
[2015-05-12 19:16:13]   __netif_receive_skb_core+0x358/0x500
[2015-05-12 19:16:13]   process_backlog+0x92/0x154
[2015-05-12 19:16:13]   net_rx_action+0xb8/0x188
[2015-05-12 19:16:13]   __do_softirq+0xda/0x1d8
[2015-05-12 19:16:14]   irq_exit+0x8a/0x8c
[2015-05-12 19:16:14]   arch_do_IRQ+0x6c/0xa8
[2015-05-12 19:16:14]   handle_interrupt_level1+0xe4/0xf0

Signed-off-by: Noam Camus <noamc@ezchip.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
---
 arch/arc/include/asm/spinlock.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index db8c59d1eaeb..800e7c430ca5 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -610,7 +610,9 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
 	int ret = 0;
+	unsigned long flags;
 
+	local_irq_save(flags);
 	arch_spin_lock(&(rw->lock_mutex));
 
 	/*
@@ -623,6 +625,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	}
 
 	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
 
 	smp_mb();
 	return ret;
@@ -632,7 +635,9 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	int ret = 0;
+	unsigned long flags;
 
+	local_irq_save(flags);
 	arch_spin_lock(&(rw->lock_mutex));
 
 	/*
@@ -646,6 +651,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 		ret = 1;
 	}
 	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
 
 	return ret;
 }
@@ -664,16 +670,24 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
 	arch_spin_lock(&(rw->lock_mutex));
 	rw->counter++;
 	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
 }
 
 static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
 	arch_spin_lock(&(rw->lock_mutex));
 	rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
 	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
 }
 
 #endif
-- 
cgit v1.2.3


From 8bcf2c48f32e22f923b69f779c95b1348308d5b1 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Sun, 6 Dec 2015 15:40:55 +0200
Subject: ARC: [plat-eznps] Use dedicated user stack top

NPS use special mapping right below TASK_SIZE.
Hence we need to lower STACK_TOP so that user stack won't
overlap NPS special mapping.

Signed-off-by: Noam Camus <noamc@ezchip.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/processor.h | 18 ++++++++++++++++++
 arch/arc/mm/tlb.c                |  6 ++++++
 2 files changed, 24 insertions(+)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index d0a9211ec769..194a09fce198 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -121,7 +121,25 @@ extern unsigned int get_wchan(struct task_struct *p);
 
 #define USER_KERNEL_GUTTER    (VMALLOC_START - TASK_SIZE)
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+/* NPS architecture defines special window of 129M in user address space for
+ * special memory areas, when accessing this window the MMU do not use TLB.
+ * Instead MMU direct the access to:
+ * 0x57f00000:0x57ffffff -- 1M of closely coupled memory (aka CMEM)
+ * 0x58000000:0x5fffffff -- 16 huge pages, 8M each, with fixed map (aka FMTs)
+ *
+ * CMEM - is the fastest memory we got and its size is 16K.
+ * FMT  - is used to map either to internal/external memory.
+ * Internal memory is the second fast memory and its size is 16M
+ * External memory is the biggest memory (16G) and also the slowest.
+ *
+ * STACK_TOP need to be PMD align (21bit) that is why we supply 0x57e00000.
+ */
+#define STACK_TOP       0x57e00000
+#else
 #define STACK_TOP       TASK_SIZE
+#endif
+
 #define STACK_TOP_MAX   STACK_TOP
 
 /* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 745a9aeb2d96..ec868a9081a1 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -819,6 +819,12 @@ void arc_mmu_init(void)
 	 */
 	BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE));
 
+	/*
+	 * stack top size sanity check,
+	 * Can't be done in processor.h due to header include depenedencies
+	 */
+	BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE));
+
 	/* For efficiency sake, kernel is compile time built for a MMU ver
 	 * This must match the hardware it is running on.
 	 * Linux built for MMU V2, if run on MMU V1 will break down because V1
-- 
cgit v1.2.3


From a5a10d99a946602cf4ae50eadc65c2480dbd2e56 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Sat, 16 May 2015 17:49:35 +0300
Subject: ARC: [plat-eznps] Use dedicated atomic/bitops/cmpxchg

We need our own implementaions since we lack LLSC support.
Our extended ISA provided with optimized solution for all 32bit
operations we see in these three headers.
Signed-off-by: Noam Camus <noamc@ezchip.com>
---
 arch/arc/include/asm/atomic.h  | 83 ++++++++++++++++++++++++++++++++++++++++--
 arch/arc/include/asm/bitops.h  | 60 ++++++++++++++++++++++++++++--
 arch/arc/include/asm/cmpxchg.h | 76 ++++++++++++++++++++++++++++++++++----
 3 files changed, 205 insertions(+), 14 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 7730d302cadb..5f3dcbbc0cc9 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -17,6 +17,8 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
+#ifndef CONFIG_ARC_PLAT_EZNPS
+
 #define atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
@@ -180,13 +182,88 @@ ATOMIC_OP(andnot, &= ~, bic)
 ATOMIC_OP(or, |=, or)
 ATOMIC_OP(xor, ^=, xor)
 
-#undef ATOMIC_OPS
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
 #undef SCOND_FAIL_RETRY_VAR_DEF
 #undef SCOND_FAIL_RETRY_ASM
 #undef SCOND_FAIL_RETRY_VARS
 
+#else /* CONFIG_ARC_PLAT_EZNPS */
+
+static inline int atomic_read(const atomic_t *v)
+{
+	int temp;
+
+	__asm__ __volatile__(
+	"	ld.di %0, [%1]"
+	: "=r"(temp)
+	: "r"(&v->counter)
+	: "memory");
+	return temp;
+}
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+	__asm__ __volatile__(
+	"	st.di %0,[%1]"
+	:
+	: "r"(i), "r"(&v->counter)
+	: "memory");
+}
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"       .word %2\n"						\
+	:								\
+	: "r"(i), "r"(&v->counter), "i"(asm_op)				\
+	: "r2", "r3", "memory");					\
+}									\
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned int temp = i;						\
+									\
+	/* Explicit full memory barrier needed before/after */		\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"       .word %2\n"						\
+	"	mov %0, r2"						\
+	: "+r"(temp)							\
+	: "r"(&v->counter), "i"(asm_op)					\
+	: "r2", "r3", "memory");					\
+									\
+	smp_mb();							\
+									\
+	temp c_op i;							\
+									\
+	return temp;							\
+}
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
+#define atomic_sub(i, v) atomic_add(-(i), (v))
+#define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
+
+ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
+#define atomic_andnot(mask, v) atomic_and(~(mask), (v))
+ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
+ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
+
+#endif /* CONFIG_ARC_PLAT_EZNPS */
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
 /**
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 0352fb8d21b9..8da87feec59a 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -22,7 +22,7 @@
 #include <asm/smp.h>
 #endif
 
-#if defined(CONFIG_ARC_HAS_LLSC)
+#ifdef CONFIG_ARC_HAS_LLSC
 
 /*
  * Hardware assisted Atomic-R-M-W
@@ -88,7 +88,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
 	return (old & (1 << nr)) != 0;					\
 }
 
-#else	/* !CONFIG_ARC_HAS_LLSC */
+#elif !defined(CONFIG_ARC_PLAT_EZNPS)
 
 /*
  * Non hardware assisted Atomic-R-M-W
@@ -139,7 +139,55 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
 	return (old & (1UL << (nr & 0x1f))) != 0;			\
 }
 
-#endif /* CONFIG_ARC_HAS_LLSC */
+#else /* CONFIG_ARC_PLAT_EZNPS */
+
+#define BIT_OP(op, c_op, asm_op)					\
+static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
+{									\
+	m += nr >> 5;							\
+									\
+	nr = (1UL << (nr & 0x1f));					\
+	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
+		nr = ~nr;						\
+									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"	.word %2\n"						\
+	:								\
+	: "r"(nr), "r"(m), "i"(asm_op)					\
+	: "r2", "r3", "memory");					\
+}
+
+#define TEST_N_BIT_OP(op, c_op, asm_op)					\
+static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
+{									\
+	unsigned long old;						\
+									\
+	m += nr >> 5;							\
+									\
+	nr = old = (1UL << (nr & 0x1f));				\
+	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
+		old = ~old;						\
+									\
+	/* Explicit full memory barrier needed before/after */		\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"       .word %2\n"						\
+	"	mov %0, r2"						\
+	: "+r"(old)							\
+	: "r"(m), "i"(asm_op)						\
+	: "r2", "r3", "memory");					\
+									\
+	smp_mb();							\
+									\
+	return (old & nr) != 0;					\
+}
+
+#endif /* CONFIG_ARC_PLAT_EZNPS */
 
 /***************************************
  * Non atomic variants
@@ -181,9 +229,15 @@ static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long
 	/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
 	__TEST_N_BIT_OP(op, c_op, asm_op)
 
+#ifndef CONFIG_ARC_PLAT_EZNPS
 BIT_OPS(set, |, bset)
 BIT_OPS(clear, & ~, bclr)
 BIT_OPS(change, ^, bxor)
+#else
+BIT_OPS(set, |, CTOP_INST_AOR_DI_R2_R2_R3)
+BIT_OPS(clear, & ~, CTOP_INST_AAND_DI_R2_R2_R3)
+BIT_OPS(change, ^, CTOP_INST_AXOR_DI_R2_R2_R3)
+#endif
 
 /*
  * This routine doesn't need to be atomic.
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index a444be67cd53..d819de1c5d10 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -44,7 +44,7 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
 	return prev;
 }
 
-#else
+#elif !defined(CONFIG_ARC_PLAT_EZNPS)
 
 static inline unsigned long
 __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
@@ -64,23 +64,48 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
 	return prev;
 }
 
+#else /* CONFIG_ARC_PLAT_EZNPS */
+
+static inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
+{
+	/*
+	 * Explicit full memory barrier needed before/after
+	 */
+	smp_mb();
+
+	write_aux_reg(CTOP_AUX_GPA1, expected);
+
+	__asm__ __volatile__(
+	"	mov r2, %0\n"
+	"	mov r3, %1\n"
+	"	.word %2\n"
+	"	mov %0, r2"
+	: "+r"(new)
+	: "r"(ptr), "i"(CTOP_INST_EXC_DI_R2_R2_R3)
+	: "r2", "r3", "memory");
+
+	smp_mb();
+
+	return new;
+}
+
 #endif /* CONFIG_ARC_HAS_LLSC */
 
 #define cmpxchg(ptr, o, n) ((typeof(*(ptr)))__cmpxchg((ptr), \
 				(unsigned long)(o), (unsigned long)(n)))
 
 /*
- * Since not supported natively, ARC cmpxchg() uses atomic_ops_lock (UP/SMP)
- * just to gaurantee semantics.
- * atomic_cmpxchg() needs to use the same locks as it's other atomic siblings
- * which also happens to be atomic_ops_lock.
- *
- * Thus despite semantically being different, implementation of atomic_cmpxchg()
- * is same as cmpxchg().
+ * atomic_cmpxchg is same as cmpxchg
+ *   LLSC: only different in data-type, semantics are exactly same
+ *  !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
+ *         semantics, and this lock also happens to be used by atomic_*()
  */
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
 
+#ifndef CONFIG_ARC_PLAT_EZNPS
+
 /*
  * xchg (reg with memory) based on "Native atomic" EX insn
  */
@@ -143,6 +168,41 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 
 #endif
 
+#else /* CONFIG_ARC_PLAT_EZNPS */
+
+static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
+				   int size)
+{
+	extern unsigned long __xchg_bad_pointer(void);
+
+	switch (size) {
+	case 4:
+		/*
+		 * Explicit full memory barrier needed before/after
+		 */
+		smp_mb();
+
+		__asm__ __volatile__(
+		"	mov r2, %0\n"
+		"	mov r3, %1\n"
+		"	.word %2\n"
+		"	mov %0, r2\n"
+		: "+r"(val)
+		: "r"(ptr), "i"(CTOP_INST_XEX_DI_R2_R2_R3)
+		: "r2", "r3", "memory");
+
+		smp_mb();
+
+		return val;
+	}
+	return __xchg_bad_pointer();
+}
+
+#define xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
+						 sizeof(*(ptr))))
+
+#endif /* CONFIG_ARC_PLAT_EZNPS */
+
 /*
  * "atomic" variant of xchg()
  * REQ: It needs to follow the same serialization rules as other atomic_xxx()
-- 
cgit v1.2.3


From b1f2f6f3cf5e37f0418f6cebf365cff7c3abf6d7 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Fri, 10 Apr 2015 21:28:50 +0300
Subject: ARC: [plat-eznps] Use dedicated SMP barriers

NPS device got 256 cores and each got 16 HW threads (SMT).
We use EZchip dedicated ISA to trigger HW scheduler of the
core that current HW thread belongs to.
This scheduling makes sure that data beyond barrier is available
to all HW threads in core and by that to all in device (4K).

Signed-off-by: Noam Camus <noamc@ezchip.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 arch/arc/include/asm/barrier.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
index a7209983ee64..b1e327495c7d 100644
--- a/arch/arc/include/asm/barrier.h
+++ b/arch/arc/include/asm/barrier.h
@@ -30,9 +30,7 @@
 #define rmb()	asm volatile("dmb 1\n" : : : "memory")
 #define wmb()	asm volatile("dmb 2\n" : : : "memory")
 
-#endif
-
-#ifdef CONFIG_ISA_ARCOMPACT
+#elif !defined(CONFIG_ARC_PLAT_EZNPS)  /* CONFIG_ISA_ARCOMPACT */
 
 /*
  * ARCompact based cores (ARC700) only have SYNC instruction which is super
@@ -41,6 +39,14 @@
  */
 
 #define mb()	asm volatile("sync\n" : : : "memory")
+
+#else	/* CONFIG_ARC_PLAT_EZNPS */
+
+#include <plat/ctop.h>
+
+#define mb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
+#define rmb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RD) : "memory")
+
 #endif
 
 #include <asm-generic/barrier.h>
-- 
cgit v1.2.3


From 86c25466f7414d6396f1aaa13e4b34f36ec272d5 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Mon, 3 Jun 2013 15:17:25 +0300
Subject: ARC: [plat-eznps] Use dedicated identity auxiliary register.

With generic "identity" num of CPUs is limited to 256 (8 bit).
We use our alternative AUX register GLOBAL_ID (12 bit).
Now we can support up to 4096 CPUs.

Signed-off-by: Noam Camus <noamc@ezchip.com>
---
 arch/arc/include/asm/entry-compact.h    |  6 ++++++
 arch/arc/kernel/ctx_sw.c                | 13 +++++++++++++
 arch/arc/plat-eznps/include/plat/ctop.h |  9 +++++++++
 3 files changed, 28 insertions(+)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 1d8f57cd6057..e0e1faf03c50 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -36,6 +36,10 @@
 #include <asm/irqflags-compact.h>
 #include <asm/thread_info.h>	/* For THREAD_SIZE */
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+#include <plat/ctop.h>
+#endif
+
 /*--------------------------------------------------------------
  * Switch to Kernel Mode stack if SP points to User Mode stack
  *
@@ -296,11 +300,13 @@
 	bic \reg, sp, (THREAD_SIZE - 1)
 .endm
 
+#ifndef CONFIG_ARC_PLAT_EZNPS
 /* Get CPU-ID of this core */
 .macro  GET_CPU_ID  reg
 	lr  \reg, [identity]
 	lsr \reg, \reg, 8
 	bmsk \reg, \reg, 7
 .endm
+#endif
 
 #endif  /* __ASM_ARC_ENTRY_COMPACT_H */
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index 5d446df2c413..6f4cb0dab1b9 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -16,6 +16,9 @@
 
 #include <asm/asm-offsets.h>
 #include <linux/sched.h>
+#ifdef CONFIG_ARC_PLAT_EZNPS
+#include <plat/ctop.h>
+#endif
 
 #define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
 
@@ -66,10 +69,17 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 		 */
 #ifndef CONFIG_SMP
 		"st  %2, [@_current_task]	\n\t"
+#else
+#ifdef CONFIG_ARC_PLAT_EZNPS
+		"lr   r24, [%4]		\n\t"
+#ifndef CONFIG_EZNPS_MTM_EXT
+		"lsr  r24, r24, 4		\n\t"
+#endif
 #else
 		"lr   r24, [identity]		\n\t"
 		"lsr  r24, r24, 8		\n\t"
 		"bmsk r24, r24, 7		\n\t"
+#endif
 		"add2 r24, @_current_task, r24	\n\t"
 		"st   %2,  [r24]		\n\t"
 #endif
@@ -107,6 +117,9 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 
 		: "=r"(tmp)
 		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
+#ifdef CONFIG_ARC_PLAT_EZNPS
+		, "i"(CTOP_AUX_LOGIC_GLOBAL_ID)
+#endif
 		: "blink"
 	);
 
diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h
index 91b25f3dd39a..9d6718c1a199 100644
--- a/arch/arc/plat-eznps/include/plat/ctop.h
+++ b/arch/arc/plat-eznps/include/plat/ctop.h
@@ -195,6 +195,15 @@ struct nps_host_reg_aux_lpc {
 #define REG_GIM_P_INT_DST_25    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x149)
 #define REG_GIM_P_INT_DST_26    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x14A)
 
+#else
+
+.macro  GET_CPU_ID  reg
+	lr  \reg, [CTOP_AUX_LOGIC_GLOBAL_ID]
+#ifndef CONFIG_EZNPS_MTM_EXT
+	lsr \reg, \reg, 4
+#endif
+.endm
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _PLAT_EZNPS_CTOP_H */
-- 
cgit v1.2.3


From 46c3e6b8768643d9bc7325324d17e37781b7bbf8 Mon Sep 17 00:00:00 2001
From: Tal Zilcer <talz@ezchip.com>
Date: Mon, 9 Mar 2015 16:58:39 +0200
Subject: ARC: [plat-eznps] Use dedicated cpu_relax()

Since the CTOP is SMT hardware multi-threaded, we need to hint
the HW that now will be a very good time to do a hardware
thread context switching. This is done by issuing the schd.rw
instruction (binary coded here so as to not require specific
revision of GCC to build the kernel).
sched.rw means that Thread becomes eligible for execution by
the threads scheduler after all pending read/write
transactions were completed.

Implementing cpu_relax_lowlatency() with barrier()
Since with current semantics of cpu_relax() it may take a
while till yielded CPU will get back.

Signed-off-by: Noam Camus <noamc@ezchip.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/processor.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 194a09fce198..f9048994b22f 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -57,9 +57,19 @@ struct task_struct;
  * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
  * get optimised away by gcc
  */
-#define cpu_relax()	__asm__ __volatile__ ("" : : : "memory")
+#ifndef CONFIG_EZNPS_MTM_EXT
 
-#define cpu_relax_lowlatency() cpu_relax()
+#define cpu_relax()		barrier()
+#define cpu_relax_lowlatency()	cpu_relax()
+
+#else
+
+#define cpu_relax()     \
+	__asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
+
+#define cpu_relax_lowlatency()	barrier()
+
+#endif
 
 #define copy_segments(tsk, mm)      do { } while (0)
 #define release_segments(mm)        do { } while (0)
-- 
cgit v1.2.3


From 085572f3cc88cf223f0dfdee2dcbb0672e938527 Mon Sep 17 00:00:00 2001
From: Noam Camus <noamc@ezchip.com>
Date: Fri, 29 May 2015 11:40:02 +0300
Subject: ARC: [plat-eznps] Use dedicated COMMAND_LINE_SIZE

The default 256 bytes sometimes is just not enough.
We usually provide earlycon=... and console=... and ip=...
All this and more may need more room.

Signed-off-by: Noam Camus <noamc@ezchip.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/setup.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 307846691be6..48b37c693db3 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -12,7 +12,11 @@
 #include <linux/types.h>
 #include <uapi/asm/setup.h>
 
+#ifdef CONFIG_ARC_PLAT_EZNPS
+#define COMMAND_LINE_SIZE 2048
+#else
 #define COMMAND_LINE_SIZE 256
+#endif
 
 /*
  * Data structure to map a ID to string
-- 
cgit v1.2.3


From 5035cd5b666b7378e32e1985225aae5746a59ac9 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 3 May 2016 14:53:40 +0530
Subject: ARC: pae: STRICT_MM_TYPECHECKS was broken

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/page.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 0d53854884d0..296c3426a6ad 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -31,7 +31,11 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
  * These are used to make use of C type-checking..
  */
 typedef struct {
+#ifdef CONFIG_ARC_HAS_PAE40
+	unsigned long long pte;
+#else
 	unsigned long pte;
+#endif
 } pte_t;
 typedef struct {
 	unsigned long pgd;
-- 
cgit v1.2.3


From fd8cfd3000191cb7f5b9ea8640bd46181f6b4b74 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 19 May 2016 17:13:00 -0700
Subject: arch: fix has_transparent_hugepage()

I've just discovered that the useful-sounding has_transparent_hugepage()
is actually an architecture-dependent minefield: on some arches it only
builds if CONFIG_TRANSPARENT_HUGEPAGE=y, on others it's also there when
not, but on some of those (arm and arm64) it then gives the wrong
answer; and on mips alone it's marked __init, which would crash if
called later (but so far it has not been called later).

Straighten this out: make it available to all configs, with a sensible
default in asm-generic/pgtable.h, removing its definitions from those
arches (arc, arm, arm64, sparc, tile) which are served by the default,
adding #define has_transparent_hugepage has_transparent_hugepage to
those (mips, powerpc, s390, x86) which need to override the default at
runtime, and removing the __init from mips (but maybe that kind of code
should be avoided after init: set a static variable the first time it's
called).

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Ning Qu <quning@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Vineet Gupta <vgupta@synopsys.com>		[arch/arc]
Acked-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>	[arch/s390]
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/include/asm/hugepage.h              |  2 --
 arch/arm/include/asm/pgtable-3level.h        |  5 -----
 arch/arm64/include/asm/pgtable.h             |  5 -----
 arch/mips/include/asm/pgtable.h              |  1 +
 arch/mips/mm/tlb-r4k.c                       | 21 +++++++++++----------
 arch/powerpc/include/asm/book3s/64/pgtable.h |  1 +
 arch/powerpc/include/asm/pgtable.h           |  1 -
 arch/s390/include/asm/pgtable.h              |  1 +
 arch/sparc/include/asm/pgtable_64.h          |  2 --
 arch/tile/include/asm/pgtable.h              |  1 -
 arch/x86/include/asm/pgtable.h               |  1 +
 include/asm-generic/pgtable.h                |  8 ++++++++
 12 files changed, 23 insertions(+), 26 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 7afe3356b770..317ff773e1ca 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -61,8 +61,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd);
 
-#define has_transparent_hugepage() 1
-
 /* Generic variants assume pgtable_t is struct page *, hence need for these */
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
 extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index dc46398bc3a5..fa70db7c714b 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -281,11 +281,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	flush_pmd_entry(pmdp);
 }
 
-static inline int has_transparent_hugepage(void)
-{
-	return 1;
-}
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2da46ae9c991..a7ac45a03dd0 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -314,11 +314,6 @@ static inline int pmd_protnone(pmd_t pmd)
 
 #define set_pmd_at(mm, addr, pmdp, pmd)	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
 
-static inline int has_transparent_hugepage(void)
-{
-	return 1;
-}
-
 #define __pgprot_modify(prot,mask,bits) \
 	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
 
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9a4fe0133ff1..f53a7e3a4dd9 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -468,6 +468,7 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
+#define has_transparent_hugepage has_transparent_hugepage
 extern int has_transparent_hugepage(void);
 
 static inline int pmd_trans_huge(pmd_t pmd)
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index c17d7627f872..2d93b63cf830 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -400,19 +400,20 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
-int __init has_transparent_hugepage(void)
+int has_transparent_hugepage(void)
 {
-	unsigned int mask;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	write_c0_pagemask(PM_HUGE_MASK);
-	back_to_back_c0_hazard();
-	mask = read_c0_pagemask();
-	write_c0_pagemask(PM_DEFAULT_MASK);
+	static unsigned int mask = -1;
 
-	local_irq_restore(flags);
+	if (mask == -1) {	/* first call comes during __init */
+		unsigned long flags;
 
+		local_irq_save(flags);
+		write_c0_pagemask(PM_HUGE_MASK);
+		back_to_back_c0_hazard();
+		mask = read_c0_pagemask();
+		write_c0_pagemask(PM_DEFAULT_MASK);
+		local_irq_restore(flags);
+	}
 	return mask == PM_HUGE_MASK;
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 77d3ce05798e..8fe6f6b48aa5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -219,6 +219,7 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		       pmd_t *pmdp, pmd_t pmd);
 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd);
+#define has_transparent_hugepage has_transparent_hugepage
 extern int has_transparent_hugepage(void);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 47897a30982d..ee09e99097f0 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -65,7 +65,6 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		       struct page **pages, int *nr);
 #ifndef CONFIG_TRANSPARENT_HUGEPAGE
 #define pmd_large(pmd)		0
-#define has_transparent_hugepage() 0
 #endif
 pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 				   bool *is_thp, unsigned *shift);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 2f66645587a2..18d2beb89340 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1223,6 +1223,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
 	return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
 }
 
+#define has_transparent_hugepage has_transparent_hugepage
 static inline int has_transparent_hugepage(void)
 {
 	return MACHINE_HAS_HPAGE ? 1 : 0;
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index f089cfa249f3..93ce0ada3c63 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -681,8 +681,6 @@ static inline unsigned long pmd_trans_huge(pmd_t pmd)
 	return pte_val(pte) & _PAGE_PMD_HUGE;
 }
 
-#define has_transparent_hugepage() 1
-
 static inline pmd_t pmd_mkold(pmd_t pmd)
 {
 	pte_t pte = __pte(pmd_val(pmd));
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 96cecf55522e..2a26cc4fefc2 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -487,7 +487,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define has_transparent_hugepage() 1
 #define pmd_trans_huge pmd_huge_page
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index f86491a7bc9d..1a27396b6ea0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -181,6 +181,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
 	return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
 }
 
+#define has_transparent_hugepage has_transparent_hugepage
 static inline int has_transparent_hugepage(void)
 {
 	return boot_cpu_has(X86_FEATURE_PSE);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 9401f4819891..d4458b6dbfb4 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -806,4 +806,12 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 #define io_remap_pfn_range remap_pfn_range
 #endif
 
+#ifndef has_transparent_hugepage
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define has_transparent_hugepage() 1
+#else
+#define has_transparent_hugepage() 0
+#endif
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
-- 
cgit v1.2.3


From 2547476a5e4061f6addb88d5fc837d3a950f54c4 Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Sat, 21 May 2016 13:45:35 +0200
Subject: Fix typos

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Makefile                    | 2 +-
 arch/arc/include/asm/entry-compact.h | 4 ++--
 arch/arc/include/asm/mmu_context.h   | 2 +-
 arch/arc/include/asm/pgtable.h       | 2 +-
 arch/arc/include/asm/processor.h     | 2 +-
 arch/arc/include/asm/smp.h           | 2 +-
 arch/arc/include/asm/thread_info.h   | 2 +-
 arch/arc/include/asm/uaccess.h       | 2 +-
 arch/arc/include/uapi/asm/swab.h     | 2 +-
 arch/arc/kernel/perf_event.c         | 2 +-
 arch/arc/kernel/setup.c              | 2 +-
 arch/arc/kernel/signal.c             | 2 +-
 arch/arc/kernel/troubleshoot.c       | 2 +-
 arch/arc/mm/cache.c                  | 6 +++---
 arch/arc/mm/dma.c                    | 2 +-
 15 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 02fabef2891c..d4df6be66d58 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -127,7 +127,7 @@ libs-y		+= arch/arc/lib/ $(LIBGCC)
 
 boot		:= arch/arc/boot
 
-#default target for make without any arguements.
+#default target for make without any arguments.
 KBUILD_IMAGE	:= bootpImage
 
 all:	$(KBUILD_IMAGE)
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index e0e1faf03c50..14c310f2e0b1 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -76,8 +76,8 @@
 	 * We need to be a bit more cautious here. What if a kernel bug in
 	 * L1 ISR, caused SP to go whaco (some small value which looks like
 	 * USER stk) and then we take L2 ISR.
-	 * Above brlo alone would treat it as a valid L1-L2 sceanrio
-	 * instead of shouting alound
+	 * Above brlo alone would treat it as a valid L1-L2 scenario
+	 * instead of shouting around
 	 * The only feasible way is to make sure this L2 happened in
 	 * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in
 	 * L1 ISR before it switches stack
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 1fd467ef658f..b0b87f2447f5 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -83,7 +83,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 		local_flush_tlb_all();
 
 		/*
-		 * Above checke for rollover of 8 bit ASID in 32 bit container.
+		 * Above check for rollover of 8 bit ASID in 32 bit container.
 		 * If the container itself wrapped around, set it to a non zero
 		 * "generation" to distinguish from no context
 		 */
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 034bbdc0ff61..858f98ef7f1b 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -47,7 +47,7 @@
  * Page Tables are purely for Linux VM's consumption and the bits below are
  * suited to that (uniqueness). Hence some are not implemented in the TLB and
  * some have different value in TLB.
- * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible becoz they live in
+ * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in
  *      seperate PD0 and PD1, which combined forms a translation entry)
  *      while for PTE perspective, they are 8 and 9 respectively
  * with MMU v3: Most bits (except SHARED) represent the exact hardware pos
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index f9048994b22f..16b630fbeb6a 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -78,7 +78,7 @@ struct task_struct;
 #define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
 
 /*
- * Where abouts of Task's sp, fp, blink when it was last seen in kernel mode.
+ * Where about of Task's sp, fp, blink when it was last seen in kernel mode.
  * Look in process.c for details of kernel stack layout
  */
 #define TSK_K_ESP(tsk)		(tsk->thread.ksp)
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index 991380438d6b..89fdd1b0a76e 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -86,7 +86,7 @@ static inline const char *arc_platform_smp_cpuinfo(void)
  * (1) These insn were introduced only in 4.10 release. So for older released
  *	support needed.
  *
- * (2) In a SMP setup, the LLOCK/SCOND atomiticity across CPUs needs to be
+ * (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be
  *	gaurantted by the platform (not something which core handles).
  *	Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ
  *	disabling for atomicity.
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 3af67455659a..2d79e527fa50 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -103,7 +103,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
 
 /*
  * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it.
- * SYSCALL_TRACE is anways seperately/unconditionally tested right after a
+ * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a
  * syscall, so all that reamins to be tested is _TIF_WORK_MASK
  */
 
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index d1da6032b715..a78d5670884f 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -32,7 +32,7 @@
 #define __kernel_ok		(segment_eq(get_fs(), KERNEL_DS))
 
 /*
- * Algorthmically, for __user_ok() we want do:
+ * Algorithmically, for __user_ok() we want do:
  * 	(start < TASK_SIZE) && (start+len < TASK_SIZE)
  * where TASK_SIZE could either be retrieved from thread_info->addr_limit or
  * emitted directly in code.
diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h
index 095599a73195..71f3918b0fc3 100644
--- a/arch/arc/include/uapi/asm/swab.h
+++ b/arch/arc/include/uapi/asm/swab.h
@@ -74,7 +74,7 @@
 	__tmp ^ __in;						\
 })
 
-#elif (ARC_BSWAP_TYPE == 2)	/* Custom single cycle bwap instruction */
+#elif (ARC_BSWAP_TYPE == 2)	/* Custom single cycle bswap instruction */
 
 #define __arch_swab32(x)						\
 ({									\
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 6fd48021324b..08f03d9b5b3e 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -108,7 +108,7 @@ static void arc_perf_event_update(struct perf_event *event,
 	int64_t delta = new_raw_count - prev_raw_count;
 
 	/*
-	 * We don't afaraid of hwc->prev_count changing beneath our feet
+	 * We aren't afraid of hwc->prev_count changing beneath our feet
 	 * because there's no way for us to re-enter this function anytime.
 	 */
 	local64_set(&hwc->prev_count, new_raw_count);
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index f63b8bfefb0c..2ee7a4d758a8 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -392,7 +392,7 @@ void __init setup_arch(char **cmdline_p)
 		/*
 		 * If we are here, it is established that @uboot_arg didn't
 		 * point to DT blob. Instead if u-boot says it is cmdline,
-		 * Appent to embedded DT cmdline.
+		 * append to embedded DT cmdline.
 		 * setup_machine_fdt() would have populated @boot_command_line
 		 */
 		if (uboot_tag == 1) {
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 004b7f0bc76c..6cb3736b6b83 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -34,7 +34,7 @@
  *  -ViXS were still seeing crashes when using insmod to load drivers.
  *   It turned out that the code to change Execute permssions for TLB entries
  *   of user was not guarded for interrupts (mod_tlb_permission)
- *   This was cauing TLB entries to be overwritten on unrelated indexes
+ *   This was causing TLB entries to be overwritten on unrelated indexes
  *
  * Vineetg: July 15th 2008: Bug #94183
  *  -Exception happens in Delay slot of a JMP, and before user space resumes,
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index a6f91e88ce36..934150e7ac48 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -276,7 +276,7 @@ static int tlb_stats_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-/* called on user read(): display the couters */
+/* called on user read(): display the counters */
 static ssize_t tlb_stats_output(struct file *file,	/* file descriptor */
 				char __user *user_buf,	/* user buffer */
 				size_t len,		/* length of buffer */
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 9e5eddbb856f..5a294b2c3cb3 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -215,7 +215,7 @@ slc_chk:
  * ------------------
  * This ver of MMU supports variable page sizes (1k-16k): although Linux will
  * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * However from hardware perspective, smaller page sizes aggravate aliasing
  * meaning more vaddr bits needed to disambiguate the cache-line-op ;
  * the existing scheme of piggybacking won't work for certain configurations.
  * Two new registers IC_PTAG and DC_PTAG inttoduced.
@@ -302,7 +302,7 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 
 	/*
 	 * This is technically for MMU v4, using the MMU v3 programming model
-	 * Special work for HS38 aliasing I-cache configuratino with PAE40
+	 * Special work for HS38 aliasing I-cache configuration with PAE40
 	 *   - upper 8 bits of paddr need to be written into PTAG_HI
 	 *   - (and needs to be written before the lower 32 bits)
 	 * Note that PTAG_HI is hoisted outside the line loop
@@ -936,7 +936,7 @@ void arc_cache_init(void)
 			      ic->ver, CONFIG_ARC_MMU_VER);
 
 		/*
-		 * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG
+		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
 		 * pair to provide vaddr/paddr respectively, just as in MMU v3
 		 */
 		if (is_isa_arcv2() && ic->alias)
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 8c8e36fa5659..73d7e4c75b7d 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -10,7 +10,7 @@
  * DMA Coherent API Notes
  *
  * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
- * implemented by accessintg it using a kernel virtual address, with
+ * implemented by accessing it using a kernel virtual address, with
  * Cache bit off in the TLB entry.
  *
  * The default DMA address == Phy address which is 0x8000_0000 based.
-- 
cgit v1.2.3


From 42316a201a60be38b07db1ebc3a1633107ed7209 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 31 May 2016 16:31:33 +0530
Subject: Revert "ARCv2: spinlock/rwlock/atomics: reduce 1 instruction in
 exponential backoff"

This reverts commit 10971638701dedadb58c88ce4d31c9375b224ed6.

The issue was fixed in hardware in HS2.1C release and there are no known
external users of affected RTL - so revert thw whole delayed retry
series !

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/atomic.h   | 3 ++-
 arch/arc/include/asm/spinlock.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 5f3dcbbc0cc9..75c8226317f4 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -36,7 +36,8 @@
 	"	mov	%[tmp], %[delay]	\n"	/* tmp = delay */	\
 	"2: 	brne.d	%[tmp], 0, 2b		\n"	/* while (tmp != 0) */	\
 	"	sub	%[tmp], %[tmp], 1	\n"	/* tmp-- */		\
-	"	rol	%[delay], %[delay]	\n"	/* delay *= 2 */	\
+	"	asl.f	%[delay], %[delay], 1	\n"	/* delay *= 2 */	\
+	"	mov.z	%[delay], 1		\n"	/* handle overflow */	\
 	"	b	1b			\n"	/* start over */	\
 	"4: ; --- success ---			\n"				\
 
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 800e7c430ca5..a86cb84fad2a 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -260,7 +260,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 	"	mov	%[tmp], %[delay]	\n"	/* tmp = delay */	\
 	"2: 	brne.d	%[tmp], 0, 2b		\n"	/* while (tmp != 0) */	\
 	"	sub	%[tmp], %[tmp], 1	\n"	/* tmp-- */		\
-	"	rol	%[delay], %[delay]	\n"	/* delay *= 2 */	\
+	"	asl.f	%[delay], %[delay], 1	\n"	/* delay *= 2 */	\
+	"	mov.z	%[delay], 1		\n"	/* handle overflow */	\
 	"	b	1b			\n"	/* start over */	\
 	"					\n"				\
 	"4: ; --- done ---			\n"				\
-- 
cgit v1.2.3


From 819f3602dcbd6b021cd50e18f5d05da30bca5b07 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 31 May 2016 16:33:29 +0530
Subject: Revert "ARCv2: spinlock/rwlock: Reset retry delay when starting a new
 spin-wait cycle"

This reverts commit b89aa12c177477e34caa722818536fb5d0bffd76.

The issue was fixed in hardware in HS2.1C release and there are no known
external users of affected RTL so revert the whole delayed retry series !

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/spinlock.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index a86cb84fad2a..5e01bdf968ea 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -279,7 +279,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	__asm__ __volatile__(
 	"0:	mov	%[delay], 1		\n"
 	"1:	llock	%[val], [%[slock]]	\n"
-	"	breq	%[val], %[LOCKED], 0b	\n"	/* spin while LOCKED */
+	"	breq	%[val], %[LOCKED], 1b	\n"	/* spin while LOCKED */
 	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
 	"	bz	4f			\n"	/* done */
 	"					\n"
@@ -358,7 +358,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 	__asm__ __volatile__(
 	"0:	mov	%[delay], 1		\n"
 	"1:	llock	%[val], [%[rwlock]]	\n"
-	"	brls	%[val], %[WR_LOCKED], 0b\n"	/* <= 0: spin while write locked */
+	"	brls	%[val], %[WR_LOCKED], 1b\n"	/* <= 0: spin while write locked */
 	"	sub	%[val], %[val], 1	\n"	/* reader lock */
 	"	scond	%[val], [%[rwlock]]	\n"
 	"	bz	4f			\n"	/* done */
@@ -427,7 +427,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 	__asm__ __volatile__(
 	"0:	mov	%[delay], 1		\n"
 	"1:	llock	%[val], [%[rwlock]]	\n"
-	"	brne	%[val], %[UNLOCKED], 0b	\n"	/* while !UNLOCKED spin */
+	"	brne	%[val], %[UNLOCKED], 1b	\n"	/* while !UNLOCKED spin */
 	"	mov	%[val], %[WR_LOCKED]	\n"
 	"	scond	%[val], [%[rwlock]]	\n"
 	"	bz	4f			\n"
-- 
cgit v1.2.3


From ed6aefed726a305bd36344e230d2a9e9301226fc Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 31 May 2016 16:35:09 +0530
Subject: Revert "ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND
 with exponential backoff"

This reverts commit e78fdfef84be13a5c2b8276e12203cdf24778596.

The issue was fixed in hardware in HS2.1C release and there are no known
external users of affected RTL so revert the whole delayed retry series !

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig                |   5 -
 arch/arc/include/asm/atomic.h   |  46 +------
 arch/arc/include/asm/spinlock.h | 293 ----------------------------------------
 3 files changed, 4 insertions(+), 340 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index be9d0b5ae0cc..0d3e59f56974 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -389,11 +389,6 @@ config ARC_HAS_LLSC
 	default y
 	depends on !ARC_CANT_LLSC
 
-config ARC_STAR_9000923308
-	bool "Workaround for llock/scond livelock"
-	default n
-	depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
-
 config ARC_HAS_SWAPE
 	bool "Insn: SWAPE (endian-swap)"
 	default y
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 75c8226317f4..dd683995bc9d 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -25,51 +25,17 @@
 
 #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
-#ifdef CONFIG_ARC_STAR_9000923308
-
-#define SCOND_FAIL_RETRY_VAR_DEF						\
-	unsigned int delay = 1, tmp;						\
-
-#define SCOND_FAIL_RETRY_ASM							\
-	"	bz	4f			\n"				\
-	"   ; --- scond fail delay ---		\n"				\
-	"	mov	%[tmp], %[delay]	\n"	/* tmp = delay */	\
-	"2: 	brne.d	%[tmp], 0, 2b		\n"	/* while (tmp != 0) */	\
-	"	sub	%[tmp], %[tmp], 1	\n"	/* tmp-- */		\
-	"	asl.f	%[delay], %[delay], 1	\n"	/* delay *= 2 */	\
-	"	mov.z	%[delay], 1		\n"	/* handle overflow */	\
-	"	b	1b			\n"	/* start over */	\
-	"4: ; --- success ---			\n"				\
-
-#define SCOND_FAIL_RETRY_VARS							\
-	  ,[delay] "+&r" (delay),[tmp] "=&r"	(tmp)				\
-
-#else	/* !CONFIG_ARC_STAR_9000923308 */
-
-#define SCOND_FAIL_RETRY_VAR_DEF
-
-#define SCOND_FAIL_RETRY_ASM							\
-	"	bnz     1b			\n"				\
-
-#define SCOND_FAIL_RETRY_VARS
-
-#endif
-
 #define ATOMIC_OP(op, c_op, asm_op)					\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
-	unsigned int val;				                \
-	SCOND_FAIL_RETRY_VAR_DEF                                        \
+	unsigned int val;						\
 									\
 	__asm__ __volatile__(						\
 	"1:	llock   %[val], [%[ctr]]		\n"		\
 	"	" #asm_op " %[val], %[val], %[i]	\n"		\
 	"	scond   %[val], [%[ctr]]		\n"		\
-	"						\n"		\
-	SCOND_FAIL_RETRY_ASM						\
-									\
+	"	bnz     1b				\n"		\
 	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
-	  SCOND_FAIL_RETRY_VARS						\
 	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
 	  [i]	"ir"	(i)						\
 	: "cc");							\
@@ -78,8 +44,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
 static inline int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
-	unsigned int val;				                \
-	SCOND_FAIL_RETRY_VAR_DEF                                        \
+	unsigned int val;						\
 									\
 	/*								\
 	 * Explicit full memory barrier needed before/after as		\
@@ -91,11 +56,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	"1:	llock   %[val], [%[ctr]]		\n"		\
 	"	" #asm_op " %[val], %[val], %[i]	\n"		\
 	"	scond   %[val], [%[ctr]]		\n"		\
-	"						\n"		\
-	SCOND_FAIL_RETRY_ASM						\
-									\
+	"	bnz     1b				\n"		\
 	: [val]	"=&r"	(val)						\
-	  SCOND_FAIL_RETRY_VARS						\
 	: [ctr]	"r"	(&v->counter),					\
 	  [i]	"ir"	(i)						\
 	: "cc");							\
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 5e01bdf968ea..cded4a9b5438 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -20,11 +20,6 @@
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-/*
- * A normal LLOCK/SCOND based system, w/o need for livelock workaround
- */
-#ifndef CONFIG_ARC_STAR_9000923308
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned int val;
@@ -238,294 +233,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 	smp_mb();
 }
 
-#else	/* CONFIG_ARC_STAR_9000923308 */
-
-/*
- * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
- * coherency transactions in the SCU. The exclusive line state keeps rotating
- * among contenting cores leading to a never ending cycle. So break the cycle
- * by deferring the retry of failed exclusive access (SCOND). The actual delay
- * needed is function of number of contending cores as well as the unrelated
- * coherency traffic from other cores. To keep the code simple, start off with
- * small delay of 1 which would suffice most cases and in case of contention
- * double the delay. Eventually the delay is sufficient such that the coherency
- * pipeline is drained, thus a subsequent exclusive access would succeed.
- */
-
-#define SCOND_FAIL_RETRY_VAR_DEF						\
-	unsigned int delay, tmp;						\
-
-#define SCOND_FAIL_RETRY_ASM							\
-	"   ; --- scond fail delay ---		\n"				\
-	"	mov	%[tmp], %[delay]	\n"	/* tmp = delay */	\
-	"2: 	brne.d	%[tmp], 0, 2b		\n"	/* while (tmp != 0) */	\
-	"	sub	%[tmp], %[tmp], 1	\n"	/* tmp-- */		\
-	"	asl.f	%[delay], %[delay], 1	\n"	/* delay *= 2 */	\
-	"	mov.z	%[delay], 1		\n"	/* handle overflow */	\
-	"	b	1b			\n"	/* start over */	\
-	"					\n"				\
-	"4: ; --- done ---			\n"				\
-
-#define SCOND_FAIL_RETRY_VARS							\
-	  ,[delay] "=&r" (delay), [tmp] "=&r"	(tmp)				\
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	unsigned int val;
-	SCOND_FAIL_RETRY_VAR_DEF;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"0:	mov	%[delay], 1		\n"
-	"1:	llock	%[val], [%[slock]]	\n"
-	"	breq	%[val], %[LOCKED], 1b	\n"	/* spin while LOCKED */
-	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
-	"	bz	4f			\n"	/* done */
-	"					\n"
-	SCOND_FAIL_RETRY_ASM
-
-	: [val]		"=&r"	(val)
-	  SCOND_FAIL_RETRY_VARS
-	: [slock]	"r"	(&(lock->slock)),
-	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
-	: "memory", "cc");
-
-	smp_mb();
-}
-
-/* 1 - lock taken successfully */
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	unsigned int val, got_it = 0;
-	SCOND_FAIL_RETRY_VAR_DEF;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"0:	mov	%[delay], 1		\n"
-	"1:	llock	%[val], [%[slock]]	\n"
-	"	breq	%[val], %[LOCKED], 4f	\n"	/* already LOCKED, just bail */
-	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
-	"	bz.d	4f			\n"
-	"	mov.z	%[got_it], 1		\n"	/* got it */
-	"					\n"
-	SCOND_FAIL_RETRY_ASM
-
-	: [val]		"=&r"	(val),
-	  [got_it]	"+&r"	(got_it)
-	  SCOND_FAIL_RETRY_VARS
-	: [slock]	"r"	(&(lock->slock)),
-	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
-	: "memory", "cc");
-
-	smp_mb();
-
-	return got_it;
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	smp_mb();
-
-	lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
-
-	smp_mb();
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers but only one writer.
- * Unfair locking as Writers could be starved indefinitely by Reader(s)
- */
-
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
-	unsigned int val;
-	SCOND_FAIL_RETRY_VAR_DEF;
-
-	smp_mb();
-
-	/*
-	 * zero means writer holds the lock exclusively, deny Reader.
-	 * Otherwise grant lock to first/subseq reader
-	 *
-	 * 	if (rw->counter > 0) {
-	 *		rw->counter--;
-	 *		ret = 1;
-	 *	}
-	 */
-
-	__asm__ __volatile__(
-	"0:	mov	%[delay], 1		\n"
-	"1:	llock	%[val], [%[rwlock]]	\n"
-	"	brls	%[val], %[WR_LOCKED], 1b\n"	/* <= 0: spin while write locked */
-	"	sub	%[val], %[val], 1	\n"	/* reader lock */
-	"	scond	%[val], [%[rwlock]]	\n"
-	"	bz	4f			\n"	/* done */
-	"					\n"
-	SCOND_FAIL_RETRY_ASM
-
-	: [val]		"=&r"	(val)
-	  SCOND_FAIL_RETRY_VARS
-	: [rwlock]	"r"	(&(rw->counter)),
-	  [WR_LOCKED]	"ir"	(0)
-	: "memory", "cc");
-
-	smp_mb();
-}
-
-/* 1 - lock taken successfully */
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
-	unsigned int val, got_it = 0;
-	SCOND_FAIL_RETRY_VAR_DEF;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"0:	mov	%[delay], 1		\n"
-	"1:	llock	%[val], [%[rwlock]]	\n"
-	"	brls	%[val], %[WR_LOCKED], 4f\n"	/* <= 0: already write locked, bail */
-	"	sub	%[val], %[val], 1	\n"	/* counter-- */
-	"	scond	%[val], [%[rwlock]]	\n"
-	"	bz.d	4f			\n"
-	"	mov.z	%[got_it], 1		\n"	/* got it */
-	"					\n"
-	SCOND_FAIL_RETRY_ASM
-
-	: [val]		"=&r"	(val),
-	  [got_it]	"+&r"	(got_it)
-	  SCOND_FAIL_RETRY_VARS
-	: [rwlock]	"r"	(&(rw->counter)),
-	  [WR_LOCKED]	"ir"	(0)
-	: "memory", "cc");
-
-	smp_mb();
-
-	return got_it;
-}
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
-{
-	unsigned int val;
-	SCOND_FAIL_RETRY_VAR_DEF;
-
-	smp_mb();
-
-	/*
-	 * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
-	 * deny writer. Otherwise if unlocked grant to writer
-	 * Hence the claim that Linux rwlocks are unfair to writers.
-	 * (can be starved for an indefinite time by readers).
-	 *
-	 *	if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
-	 *		rw->counter = 0;
-	 *		ret = 1;
-	 *	}
-	 */
-
-	__asm__ __volatile__(
-	"0:	mov	%[delay], 1		\n"
-	"1:	llock	%[val], [%[rwlock]]	\n"
-	"	brne	%[val], %[UNLOCKED], 1b	\n"	/* while !UNLOCKED spin */
-	"	mov	%[val], %[WR_LOCKED]	\n"
-	"	scond	%[val], [%[rwlock]]	\n"
-	"	bz	4f			\n"
-	"					\n"
-	SCOND_FAIL_RETRY_ASM
-
-	: [val]		"=&r"	(val)
-	  SCOND_FAIL_RETRY_VARS
-	: [rwlock]	"r"	(&(rw->counter)),
-	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
-	  [WR_LOCKED]	"ir"	(0)
-	: "memory", "cc");
-
-	smp_mb();
-}
-
-/* 1 - lock taken successfully */
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
-	unsigned int val, got_it = 0;
-	SCOND_FAIL_RETRY_VAR_DEF;
-
-	smp_mb();
-
-	__asm__ __volatile__(
-	"0:	mov	%[delay], 1		\n"
-	"1:	llock	%[val], [%[rwlock]]	\n"
-	"	brne	%[val], %[UNLOCKED], 4f	\n"	/* !UNLOCKED, bail */
-	"	mov	%[val], %[WR_LOCKED]	\n"
-	"	scond	%[val], [%[rwlock]]	\n"
-	"	bz.d	4f			\n"
-	"	mov.z	%[got_it], 1		\n"	/* got it */
-	"					\n"
-	SCOND_FAIL_RETRY_ASM
-
-	: [val]		"=&r"	(val),
-	  [got_it]	"+&r"	(got_it)
-	  SCOND_FAIL_RETRY_VARS
-	: [rwlock]	"r"	(&(rw->counter)),
-	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
-	  [WR_LOCKED]	"ir"	(0)
-	: "memory", "cc");
-
-	smp_mb();
-
-	return got_it;
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
-	unsigned int val;
-
-	smp_mb();
-
-	/*
-	 * rw->counter++;
-	 */
-	__asm__ __volatile__(
-	"1:	llock	%[val], [%[rwlock]]	\n"
-	"	add	%[val], %[val], 1	\n"
-	"	scond	%[val], [%[rwlock]]	\n"
-	"	bnz	1b			\n"
-	"					\n"
-	: [val]		"=&r"	(val)
-	: [rwlock]	"r"	(&(rw->counter))
-	: "memory", "cc");
-
-	smp_mb();
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
-	unsigned int val;
-
-	smp_mb();
-
-	/*
-	 * rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
-	 */
-	__asm__ __volatile__(
-	"1:	llock	%[val], [%[rwlock]]	\n"
-	"	scond	%[UNLOCKED], [%[rwlock]]\n"
-	"	bnz	1b			\n"
-	"					\n"
-	: [val]		"=&r"	(val)
-	: [rwlock]	"r"	(&(rw->counter)),
-	  [UNLOCKED]	"r"	(__ARCH_RW_LOCK_UNLOCKED__)
-	: "memory", "cc");
-
-	smp_mb();
-}
-
-#undef SCOND_FAIL_RETRY_VAR_DEF
-#undef SCOND_FAIL_RETRY_ASM
-#undef SCOND_FAIL_RETRY_VARS
-
-#endif	/* CONFIG_ARC_STAR_9000923308 */
-
 #else	/* !CONFIG_ARC_HAS_LLSC */
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
-- 
cgit v1.2.3


From 54d87d600adbe9889bccaff38420cec02250993b Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:48:58 -0700
Subject: arc: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

pte_alloc_one_kernel uses __get_order_pte but this is obviously always
zero because BITS_FOR_PTE is not larger than 9 yet the page size is
always larger than 4K.  This means that this flag has never been
actually useful here because it has always been used only for
PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-7-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/include/asm/pgalloc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arc/include')

diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 86ed671286df..3749234b7419 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -95,7 +95,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO,
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 					 __get_order_pte());
 
 	return pte;
@@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	pgtable_t pte_pg;
 	struct page *page;
 
-	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
+	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
 	if (!pte_pg)
 		return 0;
 	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
-- 
cgit v1.2.3