From ff7b437f36b026dcd7351f86a90a0424c891dc06 Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Mon, 23 Sep 2019 02:02:44 -0700
Subject: kunit: defconfig: add defconfigs for building KUnit tests

Add defconfig for UML and a fragment that can be used to configure other
architectures for building KUnit tests. Add option to kunit_tool to use
a defconfig to create the kunitconfig.

Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 arch/um/configs/kunit_defconfig | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 arch/um/configs/kunit_defconfig

(limited to 'arch')

diff --git a/arch/um/configs/kunit_defconfig b/arch/um/configs/kunit_defconfig
new file mode 100644
index 000000000000..9235b7d42d38
--- /dev/null
+++ b/arch/um/configs/kunit_defconfig
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_KUNIT_TEST=y
+CONFIG_KUNIT_EXAMPLE_TEST=y
-- 
cgit v1.2.3


From 2b730952066cd022d1f46e801f06ca6ca9878823 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 28 Sep 2019 16:53:56 +0200
Subject: x86/microcode/amd: Fix two -Wunused-but-set-variable warnings

The dummy variable is the high part of the microcode revision MSR which
is defined as reserved. Mark it unused so that W=1 builds don't trigger
the above warning.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190928162559.26294-1-bp@alien8.de
---
 arch/x86/kernel/cpu/microcode/amd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index a0e52bd00ecc..3f6b137ef4e6 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -567,7 +567,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 void reload_ucode_amd(void)
 {
 	struct microcode_amd *mc;
-	u32 rev, dummy;
+	u32 rev, dummy __always_unused;
 
 	mc = (struct microcode_amd *)amd_ucode_patch;
 
@@ -673,7 +673,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
 	struct ucode_cpu_info *uci;
 	struct ucode_patch *p;
 	enum ucode_state ret;
-	u32 rev, dummy;
+	u32 rev, dummy __always_unused;
 
 	BUG_ON(raw_smp_processor_id() != cpu);
 
-- 
cgit v1.2.3


From 47cd84e98f512eac5aad988f08baff432aea35ba Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 28 Sep 2019 19:02:29 +0200
Subject: x86/mce/amd: Make disable_err_thresholding() static

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190928170539.2729-1-bp@alien8.de
---
 arch/x86/kernel/cpu/mce/amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 6ea7fdc82f3c..5167bd2bb6b1 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -583,7 +583,7 @@ bool amd_filter_mce(struct mce *m)
  * - Prevent possible spurious interrupts from the IF bank on Family 0x17
  *   Models 0x10-0x2F due to Erratum #1114.
  */
-void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
+static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
 {
 	int i, num_msrs;
 	u64 hwcr;
-- 
cgit v1.2.3


From 6e898d2bf67a82df0aa0c955adc9278faba9a635 Mon Sep 17 00:00:00 2001
From: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Date: Wed, 18 Sep 2019 14:19:30 +0800
Subject: x86/mce: Add Zhaoxin MCE support

All newer Zhaoxin CPUs are compatible with Intel's Machine-Check
Architecture, so add support for them.

 [ bp: Reflow comment in vendor_disable_error_reporting() and massage
   commit message. ]

Signed-off-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: CooperYan@zhaoxin.com
Cc: DavidWang@zhaoxin.com
Cc: HerryYang@zhaoxin.com
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: QiyuanWang@zhaoxin.com
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1568787573-1297-2-git-send-email-TonyWWang-oc@zhaoxin.com
---
 arch/x86/kernel/cpu/mce/core.c | 44 +++++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 743370ee4983..a780fe02aa47 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -488,8 +488,9 @@ int mce_usable_address(struct mce *m)
 	if (!(m->status & MCI_STATUS_ADDRV))
 		return 0;
 
-	/* Checks after this one are Intel-specific: */
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+	/* Checks after this one are Intel/Zhaoxin-specific: */
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
 		return 1;
 
 	if (!(m->status & MCI_STATUS_MISCV))
@@ -507,10 +508,13 @@ EXPORT_SYMBOL_GPL(mce_usable_address);
 
 bool mce_is_memory_error(struct mce *m)
 {
-	if (m->cpuvendor == X86_VENDOR_AMD ||
-	    m->cpuvendor == X86_VENDOR_HYGON) {
+	switch (m->cpuvendor) {
+	case X86_VENDOR_AMD:
+	case X86_VENDOR_HYGON:
 		return amd_mce_is_memory_error(m);
-	} else if (m->cpuvendor == X86_VENDOR_INTEL) {
+
+	case X86_VENDOR_INTEL:
+	case X86_VENDOR_ZHAOXIN:
 		/*
 		 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
 		 *
@@ -527,9 +531,10 @@ bool mce_is_memory_error(struct mce *m)
 		return (m->status & 0xef80) == BIT(7) ||
 		       (m->status & 0xef00) == BIT(8) ||
 		       (m->status & 0xeffc) == 0xc;
-	}
 
-	return false;
+	default:
+		return false;
+	}
 }
 EXPORT_SYMBOL_GPL(mce_is_memory_error);
 
@@ -1697,6 +1702,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 		if (c->x86 == 6 && c->x86_model == 45)
 			quirk_no_way_out = quirk_sandybridge_ifu;
 	}
+
+	if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
+		/*
+		 * All newer Zhaoxin CPUs support MCE broadcasting. Enable
+		 * synchronization with a one second timeout.
+		 */
+		if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
+			if (cfg->monarch_timeout < 0)
+				cfg->monarch_timeout = USEC_PER_SEC;
+		}
+	}
+
 	if (cfg->monarch_timeout < 0)
 		cfg->monarch_timeout = 0;
 	if (cfg->bootlog != 0)
@@ -2014,15 +2031,16 @@ static void mce_disable_error_reporting(void)
 static void vendor_disable_error_reporting(void)
 {
 	/*
-	 * Don't clear on Intel or AMD or Hygon CPUs. Some of these MSRs
-	 * are socket-wide.
-	 * Disabling them for just a single offlined CPU is bad, since it will
-	 * inhibit reporting for all shared resources on the socket like the
-	 * last level cache (LLC), the integrated memory controller (iMC), etc.
+	 * Don't clear on Intel or AMD or Hygon or Zhaoxin CPUs. Some of these
+	 * MSRs are socket-wide. Disabling them for just a single offlined CPU
+	 * is bad, since it will inhibit reporting for all shared resources on
+	 * the socket like the last level cache (LLC), the integrated memory
+	 * controller (iMC), etc.
 	 */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
 	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ||
-	    boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+	    boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+	    boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN)
 		return;
 
 	mce_disable_error_reporting();
-- 
cgit v1.2.3


From 5a3d56a034be9e8e87a6cb9ed3f2928184db1417 Mon Sep 17 00:00:00 2001
From: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Date: Wed, 18 Sep 2019 14:19:32 +0800
Subject: x86/mce: Add Zhaoxin CMCI support

All newer Zhaoxin CPUs support CMCI and are compatible with Intel's
Machine-Check Architecture. Add that support for Zhaoxin CPUs.

 [ bp: Massage comments and export intel_init_cmci(). ]

Signed-off-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: CooperYan@zhaoxin.com
Cc: DavidWang@zhaoxin.com
Cc: HerryYang@zhaoxin.com
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: QiyuanWang@zhaoxin.com
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1568787573-1297-4-git-send-email-TonyWWang-oc@zhaoxin.com
---
 arch/x86/kernel/cpu/mce/core.c     | 27 +++++++++++++++++++++++++++
 arch/x86/kernel/cpu/mce/intel.c    |  6 ++++--
 arch/x86/kernel/cpu/mce/internal.h |  2 ++
 3 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index a780fe02aa47..1e6b8a478d59 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1777,6 +1777,29 @@ static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
 	}
 }
 
+static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
+{
+	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+
+	/*
+	 * These CPUs have MCA bank 8 which reports only one error type called
+	 * SVAD (System View Address Decoder). The reporting of that error is
+	 * controlled by IA32_MC8.CTL.0.
+	 *
+	 * If enabled, prefetching on these CPUs will cause SVAD MCE when
+	 * virtual machines start and result in a system  panic. Always disable
+	 * bank 8 SVAD error by default.
+	 */
+	if ((c->x86 == 7 && c->x86_model == 0x1b) ||
+	    (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
+		if (this_cpu_read(mce_num_banks) > 8)
+			mce_banks[8].ctl = 0;
+	}
+
+	intel_init_cmci();
+	mce_adjust_timer = cmci_intel_adjust_timer;
+}
+
 static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 {
 	switch (c->x86_vendor) {
@@ -1798,6 +1821,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 		mce_centaur_feature_init(c);
 		break;
 
+	case X86_VENDOR_ZHAOXIN:
+		mce_zhaoxin_feature_init(c);
+		break;
+
 	default:
 		break;
 	}
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 88cd9598fa57..fb6e990b5a77 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -85,8 +85,10 @@ static int cmci_supported(int *banks)
 	 * initialization is vendor keyed and this
 	 * makes sure none of the backdoors are entered otherwise.
 	 */
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
 		return 0;
+
 	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
 		return 0;
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
@@ -423,7 +425,7 @@ void cmci_disable_bank(int bank)
 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 }
 
-static void intel_init_cmci(void)
+void intel_init_cmci(void)
 {
 	int banks;
 
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 43031db429d2..a7ee23045b9e 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -45,11 +45,13 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval);
 bool mce_intel_cmci_poll(void);
 void mce_intel_hcpu_update(unsigned long cpu);
 void cmci_disable_bank(int bank);
+void intel_init_cmci(void);
 #else
 # define cmci_intel_adjust_timer mce_adjust_timer_default
 static inline bool mce_intel_cmci_poll(void) { return false; }
 static inline void mce_intel_hcpu_update(unsigned long cpu) { }
 static inline void cmci_disable_bank(int bank) { }
+static inline void intel_init_cmci(void) { }
 #endif
 
 void mce_timer_kick(unsigned long interval);
-- 
cgit v1.2.3


From 70f0c230031dfef3c9b3e37b2a8c18d3f7186fb2 Mon Sep 17 00:00:00 2001
From: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Date: Wed, 18 Sep 2019 14:19:33 +0800
Subject: x86/mce: Add Zhaoxin LMCE support

Newer Zhaoxin CPUs support LMCE compatible with Intel. Add support for
that.

 [ bp: Export functions and massage. ]

Signed-off-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: CooperYan@zhaoxin.com
Cc: DavidWang@zhaoxin.com
Cc: HerryYang@zhaoxin.com
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: QiyuanWang@zhaoxin.com
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1568787573-1297-5-git-send-email-TonyWWang-oc@zhaoxin.com
---
 arch/x86/kernel/cpu/mce/core.c     | 22 ++++++++++++++++++++--
 arch/x86/kernel/cpu/mce/intel.c    |  4 ++--
 arch/x86/kernel/cpu/mce/internal.h |  4 ++++
 3 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 1e6b8a478d59..5f42f25bac8f 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1132,6 +1132,12 @@ static bool __mc_check_crashing_cpu(int cpu)
 		u64 mcgstatus;
 
 		mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
+			if (mcgstatus & MCG_STATUS_LMCES)
+				return false;
+		}
+
 		if (mcgstatus & MCG_STATUS_RIPV) {
 			mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 			return true;
@@ -1282,9 +1288,10 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 	/*
 	 * Check if this MCE is signaled to only this logical processor,
-	 * on Intel only.
+	 * on Intel, Zhaoxin only.
 	 */
-	if (m.cpuvendor == X86_VENDOR_INTEL)
+	if (m.cpuvendor == X86_VENDOR_INTEL ||
+	    m.cpuvendor == X86_VENDOR_ZHAOXIN)
 		lmce = m.mcgstatus & MCG_STATUS_LMCES;
 
 	/*
@@ -1797,9 +1804,15 @@ static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
 	}
 
 	intel_init_cmci();
+	intel_init_lmce();
 	mce_adjust_timer = cmci_intel_adjust_timer;
 }
 
+static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c)
+{
+	intel_clear_lmce();
+}
+
 static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 {
 	switch (c->x86_vendor) {
@@ -1836,6 +1849,11 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_clear(c);
 		break;
+
+	case X86_VENDOR_ZHAOXIN:
+		mce_zhaoxin_feature_clear(c);
+		break;
+
 	default:
 		break;
 	}
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index fb6e990b5a77..68a1d25c971e 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -444,7 +444,7 @@ void intel_init_cmci(void)
 	cmci_recheck();
 }
 
-static void intel_init_lmce(void)
+void intel_init_lmce(void)
 {
 	u64 val;
 
@@ -457,7 +457,7 @@ static void intel_init_lmce(void)
 		wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
 }
 
-static void intel_clear_lmce(void)
+void intel_clear_lmce(void)
 {
 	u64 val;
 
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index a7ee23045b9e..842b273bce31 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -46,12 +46,16 @@ bool mce_intel_cmci_poll(void);
 void mce_intel_hcpu_update(unsigned long cpu);
 void cmci_disable_bank(int bank);
 void intel_init_cmci(void);
+void intel_init_lmce(void);
+void intel_clear_lmce(void);
 #else
 # define cmci_intel_adjust_timer mce_adjust_timer_default
 static inline bool mce_intel_cmci_poll(void) { return false; }
 static inline void mce_intel_hcpu_update(unsigned long cpu) { }
 static inline void cmci_disable_bank(int bank) { }
 static inline void intel_init_cmci(void) { }
+static inline void intel_init_lmce(void) { }
+static inline void intel_clear_lmce(void) { }
 #endif
 
 void mce_timer_kick(unsigned long interval);
-- 
cgit v1.2.3


From 93946a33b5693a6bbcf917a170198ff4afaa7a31 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Thu, 22 Aug 2019 23:43:47 +0300
Subject: x86/microcode: Update late microcode in parallel

Microcode update was changed to be serialized due to restrictions after
Spectre days. Updating serially on a large multi-socket system can be
painful since it is being done on one CPU at a time.

Cloud customers have expressed discontent as services disappear for
a prolonged time. The restriction is that only one core (or only one
thread of a core in the case of an SMT system) goes through the update
while other cores (or respectively, SMT threads) are quiesced.

Do the microcode update only on the first thread of each core while
other siblings simply wait for this to complete.

 [ bp: Simplify, massage, cleanup comments. ]

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Mihai Carabas <mihai.carabas@oracle.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jon Grimm <Jon.Grimm@amd.com>
Cc: kanth.ghatraju@oracle.com
Cc: konrad.wilk@oracle.com
Cc: patrick.colp@oracle.com
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1566506627-16536-2-git-send-email-mihai.carabas@oracle.com
---
 arch/x86/kernel/cpu/microcode/core.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index cb0fdcaf1415..7019d4b2df0c 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -63,11 +63,6 @@ LIST_HEAD(microcode_cache);
  */
 static DEFINE_MUTEX(microcode_mutex);
 
-/*
- * Serialize late loading so that CPUs get updated one-by-one.
- */
-static DEFINE_RAW_SPINLOCK(update_lock);
-
 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
 
 struct cpu_info_ctx {
@@ -566,11 +561,18 @@ static int __reload_late(void *info)
 	if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
 		return -1;
 
-	raw_spin_lock(&update_lock);
-	apply_microcode_local(&err);
-	raw_spin_unlock(&update_lock);
+	/*
+	 * On an SMT system, it suffices to load the microcode on one sibling of
+	 * the core because the microcode engine is shared between the threads.
+	 * Synchronization still needs to take place so that no concurrent
+	 * loading attempts happen on multiple threads of an SMT core. See
+	 * below.
+	 */
+	if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
+		apply_microcode_local(&err);
+	else
+		goto wait_for_siblings;
 
-	/* siblings return UCODE_OK because their engine got updated already */
 	if (err > UCODE_NFOUND) {
 		pr_warn("Error reloading microcode on CPU %d\n", cpu);
 		ret = -1;
@@ -578,14 +580,18 @@ static int __reload_late(void *info)
 		ret = 1;
 	}
 
+wait_for_siblings:
+	if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC))
+		panic("Timeout during microcode update!\n");
+
 	/*
-	 * Increase the wait timeout to a safe value here since we're
-	 * serializing the microcode update and that could take a while on a
-	 * large number of CPUs. And that is fine as the *actual* timeout will
-	 * be determined by the last CPU finished updating and thus cut short.
+	 * At least one thread has completed update on each core.
+	 * For others, simply call the update to make sure the
+	 * per-cpu cpuinfo can be updated with right microcode
+	 * revision.
 	 */
-	if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
-		panic("Timeout during microcode update!\n");
+	if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
+		apply_microcode_local(&err);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 811ae8ba6dca6b91a3ceccf9d40b98818cc4f400 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 24 Aug 2019 10:01:53 +0200
Subject: x86/microcode/intel: Issue the revision updated message only on the
 BSP

... in order to not pollute dmesg with a line for each updated microcode
engine.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jon Grimm <Jon.Grimm@amd.com>
Cc: kanth.ghatraju@oracle.com
Cc: konrad.wilk@oracle.com
Cc: Mihai Carabas <mihai.carabas@oracle.com>
Cc: patrick.colp@oracle.com
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190824085341.GC16813@zn.tnic
---
 arch/x86/kernel/cpu/microcode/intel.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce799cfe9434..6a99535d7f37 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -791,6 +791,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
 	struct microcode_intel *mc;
 	enum ucode_state ret;
 	static int prev_rev;
@@ -836,7 +837,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
 		return UCODE_ERROR;
 	}
 
-	if (rev != prev_rev) {
+	if (bsp && rev != prev_rev) {
 		pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
 			rev,
 			mc->hdr.date & 0xffff,
@@ -852,7 +853,7 @@ out:
 	c->microcode	 = rev;
 
 	/* Update boot_cpu_data's revision too, if we're on the BSP: */
-	if (c->cpu_index == boot_cpu_data.cpu_index)
+	if (bsp)
 		boot_cpu_data.microcode = rev;
 
 	return ret;
-- 
cgit v1.2.3


From 7775cbaa11153ec5489cfa31de95aa1b5f29310b Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Thu, 12 Sep 2019 09:02:50 +0200
Subject: KVM: s390: Remove unused parameter from __inject_sigp_restart()

It's not required, so drop it to make it clear that this interrupt
does not have any extra parameters.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Link: https://lore.kernel.org/kvm/20190912070250.15131-1-thuth@redhat.com
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d1ccc168c071..165dea4c7f19 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1477,8 +1477,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	return 0;
 }
 
-static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
-				 struct kvm_s390_irq *irq)
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
@@ -2007,7 +2006,7 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 		rc = __inject_sigp_stop(vcpu, irq);
 		break;
 	case KVM_S390_RESTART:
-		rc = __inject_sigp_restart(vcpu, irq);
+		rc = __inject_sigp_restart(vcpu);
 		break;
 	case KVM_S390_INT_CLOCK_COMP:
 		rc = __inject_ckc(vcpu);
-- 
cgit v1.2.3


From f76f6371643b563a7168a6ba5713ce93caa36ecc Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Wed, 2 Oct 2019 03:56:27 -0400
Subject: KVM: s390: Cleanup kvm_arch_init error path

Both kvm_s390_gib_destroy and debug_unregister test if the needed
pointers are not NULL and hence can be called unconditionally.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/kvm/20191002075627.3582-1-frankja@linux.ibm.com
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f6db0f1bc867..40af442b2e15 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -453,16 +453,14 @@ static void kvm_s390_cpu_feat_init(void)
 
 int kvm_arch_init(void *opaque)
 {
-	int rc;
+	int rc = -ENOMEM;
 
 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 	if (!kvm_s390_dbf)
 		return -ENOMEM;
 
-	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
-		rc = -ENOMEM;
-		goto out_debug_unreg;
-	}
+	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
+		goto out;
 
 	kvm_s390_cpu_feat_init();
 
@@ -470,19 +468,17 @@ int kvm_arch_init(void *opaque)
 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 	if (rc) {
 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
-		goto out_debug_unreg;
+		goto out;
 	}
 
 	rc = kvm_s390_gib_init(GAL_ISC);
 	if (rc)
-		goto out_gib_destroy;
+		goto out;
 
 	return 0;
 
-out_gib_destroy:
-	kvm_s390_gib_destroy();
-out_debug_unreg:
-	debug_unregister(kvm_s390_dbf);
+out:
+	kvm_arch_exit();
 	return rc;
 }
 
-- 
cgit v1.2.3


From 11031c0d7d6e9bca0df233a8acfd6708d2b89470 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 11 Sep 2019 00:19:00 +0100
Subject: crypto: arm64/gcm-ce - implement 4 way interleave

To improve performance on cores with deep pipelines such as ThunderX2,
reimplement gcm(aes) using a 4-way interleave rather than the 2-way
interleave we use currently.

This comes down to a complete rewrite of the GCM part of the combined
GCM/GHASH driver, and instead of interleaving two invocations of AES
with the GHASH handling at the instruction level, the new version
uses a more coarse grained approach where each chunk of 64 bytes is
encrypted first and then ghashed (or ghashed and then decrypted in
the converse case).

The core NEON routine is now able to consume inputs of any size,
and tail blocks of less than 64 bytes are handled using overlapping
loads and stores, and processed by the same 4-way encryption and
hashing routines. This gets rid of most of the branches, and avoids
having to return to the C code to handle the tail block using a
stack buffer.

The table below compares the performance of the old driver and the new
one on various micro-architectures and running in various modes.

        |     AES-128      |     AES-192      |     AES-256      |
 #bytes | 512 | 1500 |  4k | 512 | 1500 |  4k | 512 | 1500 |  4k |
 -------+-----+------+-----+-----+------+-----+-----+------+-----+
    TX2 | 35% |  23% | 11% | 34% |  20% |  9% | 38% |  25% | 16% |
   EMAG | 11% |   6% |  3% | 12% |   4% |  2% | 11% |   4% |  2% |
    A72 |  8% |   5% | -4% |  9% |   4% | -5% |  7% |   4% | -5% |
    A53 | 11% |   6% | -1% | 10% |   8% | -1% | 10% |   8% | -2% |

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/ghash-ce-core.S | 501 ++++++++++++++++++++++++++------------
 arch/arm64/crypto/ghash-ce-glue.c | 293 ++++++++++------------
 2 files changed, 467 insertions(+), 327 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 410e8afcf5a7..a791c4adf8e6 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -13,8 +13,8 @@
 	T1		.req	v2
 	T2		.req	v3
 	MASK		.req	v4
-	XL		.req	v5
-	XM		.req	v6
+	XM		.req	v5
+	XL		.req	v6
 	XH		.req	v7
 	IN1		.req	v7
 
@@ -358,20 +358,37 @@ ENTRY(pmull_ghash_update_p8)
 	__pmull_ghash	p8
 ENDPROC(pmull_ghash_update_p8)
 
-	KS0		.req	v12
-	KS1		.req	v13
-	INP0		.req	v14
-	INP1		.req	v15
-
-	.macro		load_round_keys, rounds, rk
-	cmp		\rounds, #12
-	blo		2222f		/* 128 bits */
-	beq		1111f		/* 192 bits */
-	ld1		{v17.4s-v18.4s}, [\rk], #32
-1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
-2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
-	ld1		{v25.4s-v28.4s}, [\rk], #64
-	ld1		{v29.4s-v31.4s}, [\rk]
+	KS0		.req	v8
+	KS1		.req	v9
+	KS2		.req	v10
+	KS3		.req	v11
+
+	INP0		.req	v21
+	INP1		.req	v22
+	INP2		.req	v23
+	INP3		.req	v24
+
+	K0		.req	v25
+	K1		.req	v26
+	K2		.req	v27
+	K3		.req	v28
+	K4		.req	v12
+	K5		.req	v13
+	K6		.req	v4
+	K7		.req	v5
+	K8		.req	v14
+	K9		.req	v15
+	KK		.req	v29
+	KL		.req	v30
+	KM		.req	v31
+
+	.macro		load_round_keys, rounds, rk, tmp
+	add		\tmp, \rk, #64
+	ld1		{K0.4s-K3.4s}, [\rk]
+	ld1		{K4.4s-K5.4s}, [\tmp]
+	add		\tmp, \rk, \rounds, lsl #4
+	sub		\tmp, \tmp, #32
+	ld1		{KK.4s-KM.4s}, [\tmp]
 	.endm
 
 	.macro		enc_round, state, key
@@ -379,197 +396,367 @@ ENDPROC(pmull_ghash_update_p8)
 	aesmc		\state\().16b, \state\().16b
 	.endm
 
-	.macro		enc_block, state, rounds
-	cmp		\rounds, #12
-	b.lo		2222f		/* 128 bits */
-	b.eq		1111f		/* 192 bits */
-	enc_round	\state, v17
-	enc_round	\state, v18
-1111:	enc_round	\state, v19
-	enc_round	\state, v20
-2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+	.macro		enc_qround, s0, s1, s2, s3, key
+	enc_round	\s0, \key
+	enc_round	\s1, \key
+	enc_round	\s2, \key
+	enc_round	\s3, \key
+	.endm
+
+	.macro		enc_block, state, rounds, rk, tmp
+	add		\tmp, \rk, #96
+	ld1		{K6.4s-K7.4s}, [\tmp], #32
+	.irp		key, K0, K1, K2, K3, K4 K5
 	enc_round	\state, \key
 	.endr
-	aese		\state\().16b, v30.16b
-	eor		\state\().16b, \state\().16b, v31.16b
+
+	tbnz		\rounds, #2, .Lnot128_\@
+.Lout256_\@:
+	enc_round	\state, K6
+	enc_round	\state, K7
+
+.Lout192_\@:
+	enc_round	\state, KK
+	aese		\state\().16b, KL.16b
+	eor		\state\().16b, \state\().16b, KM.16b
+
+	.subsection	1
+.Lnot128_\@:
+	ld1		{K8.4s-K9.4s}, [\tmp], #32
+	enc_round	\state, K6
+	enc_round	\state, K7
+	ld1		{K6.4s-K7.4s}, [\tmp]
+	enc_round	\state, K8
+	enc_round	\state, K9
+	tbz		\rounds, #1, .Lout192_\@
+	b		.Lout256_\@
+	.previous
 	.endm
 
+	.align		6
 	.macro		pmull_gcm_do_crypt, enc
-	ld1		{SHASH.2d}, [x4], #16
-	ld1		{HH.2d}, [x4]
-	ld1		{XL.2d}, [x1]
-	ldr		x8, [x5, #8]			// load lower counter
+	stp		x29, x30, [sp, #-32]!
+	mov		x29, sp
+	str		x19, [sp, #24]
+
+	load_round_keys	x7, x6, x8
+
+	ld1		{SHASH.2d}, [x3], #16
+	ld1		{HH.2d-HH4.2d}, [x3]
 
-	movi		MASK.16b, #0xe1
 	trn1		SHASH2.2d, SHASH.2d, HH.2d
 	trn2		T1.2d, SHASH.2d, HH.2d
-CPU_LE(	rev		x8, x8		)
-	shl		MASK.2d, MASK.2d, #57
 	eor		SHASH2.16b, SHASH2.16b, T1.16b
 
-	.if		\enc == 1
-	ldr		x10, [sp]
-	ld1		{KS0.16b-KS1.16b}, [x10]
-	.endif
+	trn1		HH34.2d, HH3.2d, HH4.2d
+	trn2		T1.2d, HH3.2d, HH4.2d
+	eor		HH34.16b, HH34.16b, T1.16b
 
-	cbnz		x6, 4f
+	ld1		{XL.2d}, [x4]
 
-0:	ld1		{INP0.16b-INP1.16b}, [x3], #32
+	cbz		x0, 3f				// tag only?
 
-	rev		x9, x8
-	add		x11, x8, #1
-	add		x8, x8, #2
+	ldr		w8, [x5, #12]			// load lower counter
+CPU_LE(	rev		w8, w8		)
 
-	.if		\enc == 1
-	eor		INP0.16b, INP0.16b, KS0.16b	// encrypt input
-	eor		INP1.16b, INP1.16b, KS1.16b
+0:	mov		w9, #4				// max blocks per round
+	add		x10, x0, #0xf
+	lsr		x10, x10, #4			// remaining blocks
+
+	subs		x0, x0, #64
+	csel		w9, w10, w9, mi
+	add		w8, w8, w9
+
+	bmi		1f
+	ld1		{INP0.16b-INP3.16b}, [x2], #64
+	.subsection	1
+	/*
+	 * Populate the four input registers right to left with up to 63 bytes
+	 * of data, using overlapping loads to avoid branches.
+	 *
+	 *                INP0     INP1     INP2     INP3
+	 *  1 byte     |        |        |        |x       |
+	 * 16 bytes    |        |        |        |xxxxxxxx|
+	 * 17 bytes    |        |        |xxxxxxxx|x       |
+	 * 47 bytes    |        |xxxxxxxx|xxxxxxxx|xxxxxxx |
+	 * etc etc
+	 *
+	 * Note that this code may read up to 15 bytes before the start of
+	 * the input. It is up to the calling code to ensure this is safe if
+	 * this happens in the first iteration of the loop (i.e., when the
+	 * input size is < 16 bytes)
+	 */
+1:	mov		x15, #16
+	ands		x19, x0, #0xf
+	csel		x19, x19, x15, ne
+	adr_l		x17, .Lpermute_table + 16
+
+	sub		x11, x15, x19
+	add		x12, x17, x11
+	sub		x17, x17, x11
+	ld1		{T1.16b}, [x12]
+	sub		x10, x1, x11
+	sub		x11, x2, x11
+
+	cmp		x0, #-16
+	csel		x14, x15, xzr, gt
+	cmp		x0, #-32
+	csel		x15, x15, xzr, gt
+	cmp		x0, #-48
+	csel		x16, x19, xzr, gt
+	csel		x1, x1, x10, gt
+	csel		x2, x2, x11, gt
+
+	ld1		{INP0.16b}, [x2], x14
+	ld1		{INP1.16b}, [x2], x15
+	ld1		{INP2.16b}, [x2], x16
+	ld1		{INP3.16b}, [x2]
+	tbl		INP3.16b, {INP3.16b}, T1.16b
+	b		2f
+	.previous
+
+2:	.if		\enc == 0
+	bl		pmull_gcm_ghash_4x
 	.endif
 
-	ld1		{KS0.8b}, [x5]			// load upper counter
-	rev		x11, x11
-	sub		w0, w0, #2
-	mov		KS1.8b, KS0.8b
-	ins		KS0.d[1], x9			// set lower counter
-	ins		KS1.d[1], x11
+	bl		pmull_gcm_enc_4x
 
-	rev64		T1.16b, INP1.16b
+	tbnz		x0, #63, 6f
+	st1		{INP0.16b-INP3.16b}, [x1], #64
+	.if		\enc == 1
+	bl		pmull_gcm_ghash_4x
+	.endif
+	bne		0b
 
-	cmp		w7, #12
-	b.ge		2f				// AES-192/256?
+3:	ldp		x19, x10, [sp, #24]
+	cbz		x10, 5f				// output tag?
 
-1:	enc_round	KS0, v21
-	ext		IN1.16b, T1.16b, T1.16b, #8
+	ld1		{INP3.16b}, [x10]		// load lengths[]
+	mov		w9, #1
+	bl		pmull_gcm_ghash_4x
 
-	enc_round	KS1, v21
-	pmull2		XH2.1q, SHASH.2d, IN1.2d	// a1 * b1
+	mov		w11, #(0x1 << 24)		// BE '1U'
+	ld1		{KS0.16b}, [x5]
+	mov		KS0.s[3], w11
 
-	enc_round	KS0, v22
-	eor		T1.16b, T1.16b, IN1.16b
+	enc_block	KS0, x7, x6, x12
 
-	enc_round	KS1, v22
-	pmull		XL2.1q, SHASH.1d, IN1.1d	// a0 * b0
+	ext		XL.16b, XL.16b, XL.16b, #8
+	rev64		XL.16b, XL.16b
+	eor		XL.16b, XL.16b, KS0.16b
+	st1		{XL.16b}, [x10]			// store tag
 
-	enc_round	KS0, v23
-	pmull		XM2.1q, SHASH2.1d, T1.1d	// (a1 + a0)(b1 + b0)
+4:	ldp		x29, x30, [sp], #32
+	ret
 
-	enc_round	KS1, v23
-	rev64		T1.16b, INP0.16b
-	ext		T2.16b, XL.16b, XL.16b, #8
+5:
+CPU_LE(	rev		w8, w8		)
+	str		w8, [x5, #12]			// store lower counter
+	st1		{XL.2d}, [x4]
+	b		4b
+
+6:	ld1		{T1.16b-T2.16b}, [x17], #32	// permute vectors
+	sub		x17, x17, x19, lsl #1
+
+	cmp		w9, #1
+	beq		7f
+	.subsection	1
+7:	ld1		{INP2.16b}, [x1]
+	tbx		INP2.16b, {INP3.16b}, T1.16b
+	mov		INP3.16b, INP2.16b
+	b		8f
+	.previous
+
+	st1		{INP0.16b}, [x1], x14
+	st1		{INP1.16b}, [x1], x15
+	st1		{INP2.16b}, [x1], x16
+	tbl		INP3.16b, {INP3.16b}, T1.16b
+	tbx		INP3.16b, {INP2.16b}, T2.16b
+8:	st1		{INP3.16b}, [x1]
 
-	enc_round	KS0, v24
-	ext		IN1.16b, T1.16b, T1.16b, #8
-	eor		T1.16b, T1.16b, T2.16b
+	.if		\enc == 1
+	ld1		{T1.16b}, [x17]
+	tbl		INP3.16b, {INP3.16b}, T1.16b	// clear non-data bits
+	bl		pmull_gcm_ghash_4x
+	.endif
+	b		3b
+	.endm
 
-	enc_round	KS1, v24
-	eor		XL.16b, XL.16b, IN1.16b
+	/*
+	 * void pmull_gcm_encrypt(int blocks, u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u64 dg[], u8 ctr[],
+	 *			  int rounds, u8 tag)
+	 */
+ENTRY(pmull_gcm_encrypt)
+	pmull_gcm_do_crypt	1
+ENDPROC(pmull_gcm_encrypt)
 
-	enc_round	KS0, v25
-	eor		T1.16b, T1.16b, XL.16b
+	/*
+	 * void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u64 dg[], u8 ctr[],
+	 *			  int rounds, u8 tag)
+	 */
+ENTRY(pmull_gcm_decrypt)
+	pmull_gcm_do_crypt	0
+ENDPROC(pmull_gcm_decrypt)
 
-	enc_round	KS1, v25
-	pmull2		XH.1q, HH.2d, XL.2d		// a1 * b1
+pmull_gcm_ghash_4x:
+	movi		MASK.16b, #0xe1
+	shl		MASK.2d, MASK.2d, #57
 
-	enc_round	KS0, v26
-	pmull		XL.1q, HH.1d, XL.1d		// a0 * b0
+	rev64		T1.16b, INP0.16b
+	rev64		T2.16b, INP1.16b
+	rev64		TT3.16b, INP2.16b
+	rev64		TT4.16b, INP3.16b
 
-	enc_round	KS1, v26
-	pmull2		XM.1q, SHASH2.2d, T1.2d		// (a1 + a0)(b1 + b0)
+	ext		XL.16b, XL.16b, XL.16b, #8
 
-	enc_round	KS0, v27
-	eor		XL.16b, XL.16b, XL2.16b
-	eor		XH.16b, XH.16b, XH2.16b
+	tbz		w9, #2, 0f			// <4 blocks?
+	.subsection	1
+0:	movi		XH2.16b, #0
+	movi		XM2.16b, #0
+	movi		XL2.16b, #0
 
-	enc_round	KS1, v27
-	eor		XM.16b, XM.16b, XM2.16b
-	ext		T1.16b, XL.16b, XH.16b, #8
+	tbz		w9, #0, 1f			// 2 blocks?
+	tbz		w9, #1, 2f			// 1 block?
 
-	enc_round	KS0, v28
-	eor		T2.16b, XL.16b, XH.16b
-	eor		XM.16b, XM.16b, T1.16b
+	eor		T2.16b, T2.16b, XL.16b
+	ext		T1.16b, T2.16b, T2.16b, #8
+	b		.Lgh3
 
-	enc_round	KS1, v28
-	eor		XM.16b, XM.16b, T2.16b
+1:	eor		TT3.16b, TT3.16b, XL.16b
+	ext		T2.16b, TT3.16b, TT3.16b, #8
+	b		.Lgh2
 
-	enc_round	KS0, v29
-	pmull		T2.1q, XL.1d, MASK.1d
+2:	eor		TT4.16b, TT4.16b, XL.16b
+	ext		IN1.16b, TT4.16b, TT4.16b, #8
+	b		.Lgh1
+	.previous
 
-	enc_round	KS1, v29
-	mov		XH.d[0], XM.d[1]
-	mov		XM.d[1], XL.d[0]
+	eor		T1.16b, T1.16b, XL.16b
+	ext		IN1.16b, T1.16b, T1.16b, #8
 
-	aese		KS0.16b, v30.16b
-	eor		XL.16b, XM.16b, T2.16b
+	pmull2		XH2.1q, HH4.2d, IN1.2d		// a1 * b1
+	eor		T1.16b, T1.16b, IN1.16b
+	pmull		XL2.1q, HH4.1d, IN1.1d		// a0 * b0
+	pmull2		XM2.1q, HH34.2d, T1.2d		// (a1 + a0)(b1 + b0)
 
-	aese		KS1.16b, v30.16b
-	ext		T2.16b, XL.16b, XL.16b, #8
+	ext		T1.16b, T2.16b, T2.16b, #8
+.Lgh3:	eor		T2.16b, T2.16b, T1.16b
+	pmull2		XH.1q, HH3.2d, T1.2d		// a1 * b1
+	pmull		XL.1q, HH3.1d, T1.1d		// a0 * b0
+	pmull		XM.1q, HH34.1d, T2.1d		// (a1 + a0)(b1 + b0)
 
-	eor		KS0.16b, KS0.16b, v31.16b
-	pmull		XL.1q, XL.1d, MASK.1d
-	eor		T2.16b, T2.16b, XH.16b
+	eor		XH2.16b, XH2.16b, XH.16b
+	eor		XL2.16b, XL2.16b, XL.16b
+	eor		XM2.16b, XM2.16b, XM.16b
 
-	eor		KS1.16b, KS1.16b, v31.16b
-	eor		XL.16b, XL.16b, T2.16b
+	ext		T2.16b, TT3.16b, TT3.16b, #8
+.Lgh2:	eor		TT3.16b, TT3.16b, T2.16b
+	pmull2		XH.1q, HH.2d, T2.2d		// a1 * b1
+	pmull		XL.1q, HH.1d, T2.1d		// a0 * b0
+	pmull2		XM.1q, SHASH2.2d, TT3.2d	// (a1 + a0)(b1 + b0)
 
-	.if		\enc == 0
-	eor		INP0.16b, INP0.16b, KS0.16b
-	eor		INP1.16b, INP1.16b, KS1.16b
-	.endif
+	eor		XH2.16b, XH2.16b, XH.16b
+	eor		XL2.16b, XL2.16b, XL.16b
+	eor		XM2.16b, XM2.16b, XM.16b
 
-	st1		{INP0.16b-INP1.16b}, [x2], #32
+	ext		IN1.16b, TT4.16b, TT4.16b, #8
+.Lgh1:	eor		TT4.16b, TT4.16b, IN1.16b
+	pmull		XL.1q, SHASH.1d, IN1.1d		// a0 * b0
+	pmull2		XH.1q, SHASH.2d, IN1.2d		// a1 * b1
+	pmull		XM.1q, SHASH2.1d, TT4.1d	// (a1 + a0)(b1 + b0)
 
-	cbnz		w0, 0b
+	eor		XH.16b, XH.16b, XH2.16b
+	eor		XL.16b, XL.16b, XL2.16b
+	eor		XM.16b, XM.16b, XM2.16b
 
-CPU_LE(	rev		x8, x8		)
-	st1		{XL.2d}, [x1]
-	str		x8, [x5, #8]			// store lower counter
+	eor		T2.16b, XL.16b, XH.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		XM.16b, XM.16b, T2.16b
 
-	.if		\enc == 1
-	st1		{KS0.16b-KS1.16b}, [x10]
-	.endif
+	__pmull_reduce_p64
+
+	eor		T2.16b, T2.16b, XH.16b
+	eor		XL.16b, XL.16b, T2.16b
 
 	ret
+ENDPROC(pmull_gcm_ghash_4x)
+
+pmull_gcm_enc_4x:
+	ld1		{KS0.16b}, [x5]			// load upper counter
+	sub		w10, w8, #4
+	sub		w11, w8, #3
+	sub		w12, w8, #2
+	sub		w13, w8, #1
+	rev		w10, w10
+	rev		w11, w11
+	rev		w12, w12
+	rev		w13, w13
+	mov		KS1.16b, KS0.16b
+	mov		KS2.16b, KS0.16b
+	mov		KS3.16b, KS0.16b
+	ins		KS0.s[3], w10			// set lower counter
+	ins		KS1.s[3], w11
+	ins		KS2.s[3], w12
+	ins		KS3.s[3], w13
+
+	add		x10, x6, #96			// round key pointer
+	ld1		{K6.4s-K7.4s}, [x10], #32
+	.irp		key, K0, K1, K2, K3, K4, K5
+	enc_qround	KS0, KS1, KS2, KS3, \key
+	.endr
 
-2:	b.eq		3f				// AES-192?
-	enc_round	KS0, v17
-	enc_round	KS1, v17
-	enc_round	KS0, v18
-	enc_round	KS1, v18
-3:	enc_round	KS0, v19
-	enc_round	KS1, v19
-	enc_round	KS0, v20
-	enc_round	KS1, v20
-	b		1b
+	tbnz		x7, #2, .Lnot128
+	.subsection	1
+.Lnot128:
+	ld1		{K8.4s-K9.4s}, [x10], #32
+	.irp		key, K6, K7
+	enc_qround	KS0, KS1, KS2, KS3, \key
+	.endr
+	ld1		{K6.4s-K7.4s}, [x10]
+	.irp		key, K8, K9
+	enc_qround	KS0, KS1, KS2, KS3, \key
+	.endr
+	tbz		x7, #1, .Lout192
+	b		.Lout256
+	.previous
 
-4:	load_round_keys	w7, x6
-	b		0b
-	.endm
+.Lout256:
+	.irp		key, K6, K7
+	enc_qround	KS0, KS1, KS2, KS3, \key
+	.endr
 
-	/*
-	 * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
-	 *			  struct ghash_key const *k, u8 ctr[],
-	 *			  int rounds, u8 ks[])
-	 */
-ENTRY(pmull_gcm_encrypt)
-	pmull_gcm_do_crypt	1
-ENDPROC(pmull_gcm_encrypt)
+.Lout192:
+	enc_qround	KS0, KS1, KS2, KS3, KK
 
-	/*
-	 * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
-	 *			  struct ghash_key const *k, u8 ctr[],
-	 *			  int rounds)
-	 */
-ENTRY(pmull_gcm_decrypt)
-	pmull_gcm_do_crypt	0
-ENDPROC(pmull_gcm_decrypt)
+	aese		KS0.16b, KL.16b
+	aese		KS1.16b, KL.16b
+	aese		KS2.16b, KL.16b
+	aese		KS3.16b, KL.16b
+
+	eor		KS0.16b, KS0.16b, KM.16b
+	eor		KS1.16b, KS1.16b, KM.16b
+	eor		KS2.16b, KS2.16b, KM.16b
+	eor		KS3.16b, KS3.16b, KM.16b
+
+	eor		INP0.16b, INP0.16b, KS0.16b
+	eor		INP1.16b, INP1.16b, KS1.16b
+	eor		INP2.16b, INP2.16b, KS2.16b
+	eor		INP3.16b, INP3.16b, KS3.16b
 
-	/*
-	 * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
-	 */
-ENTRY(pmull_gcm_encrypt_block)
-	cbz		x2, 0f
-	load_round_keys	w3, x2
-0:	ld1		{v0.16b}, [x1]
-	enc_block	v0, w3
-	st1		{v0.16b}, [x0]
 	ret
-ENDPROC(pmull_gcm_encrypt_block)
+ENDPROC(pmull_gcm_enc_4x)
+
+	.section	".rodata", "a"
+	.align		6
+.Lpermute_table:
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
+	.previous
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 70b1469783f9..522cf004ce65 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -58,17 +58,15 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
 				      struct ghash_key const *k,
 				      const char *head);
 
-asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
-				  const u8 src[], struct ghash_key const *k,
+asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
+				  struct ghash_key const *k, u64 dg[],
 				  u8 ctr[], u32 const rk[], int rounds,
-				  u8 ks[]);
+				  u8 tag[]);
 
-asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
-				  const u8 src[], struct ghash_key const *k,
-				  u8 ctr[], u32 const rk[], int rounds);
-
-asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
-					u32 const rk[], int rounds);
+asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
+				  struct ghash_key const *k, u64 dg[],
+				  u8 ctr[], u32 const rk[], int rounds,
+				  u8 tag[]);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -85,7 +83,7 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
 						struct ghash_key const *k,
 						const char *head))
 {
-	if (likely(crypto_simd_usable())) {
+	if (likely(crypto_simd_usable() && simd_update)) {
 		kernel_neon_begin();
 		simd_update(blocks, dg, src, key, head);
 		kernel_neon_end();
@@ -398,136 +396,112 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
 	}
 }
 
-static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
-		      u64 dg[], u8 tag[], int cryptlen)
-{
-	u8 mac[AES_BLOCK_SIZE];
-	u128 lengths;
-
-	lengths.a = cpu_to_be64(req->assoclen * 8);
-	lengths.b = cpu_to_be64(cryptlen * 8);
-
-	ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL,
-			pmull_ghash_update_p64);
-
-	put_unaligned_be64(dg[1], mac);
-	put_unaligned_be64(dg[0], mac + 8);
-
-	crypto_xor(tag, mac, AES_BLOCK_SIZE);
-}
-
 static int gcm_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	int nrounds = num_rounds(&ctx->aes_key);
 	struct skcipher_walk walk;
+	u8 buf[AES_BLOCK_SIZE];
 	u8 iv[AES_BLOCK_SIZE];
-	u8 ks[2 * AES_BLOCK_SIZE];
-	u8 tag[AES_BLOCK_SIZE];
 	u64 dg[2] = {};
-	int nrounds = num_rounds(&ctx->aes_key);
+	u128 lengths;
+	u8 *tag;
 	int err;
 
+	lengths.a = cpu_to_be64(req->assoclen * 8);
+	lengths.b = cpu_to_be64(req->cryptlen * 8);
+
 	if (req->assoclen)
 		gcm_calculate_auth_mac(req, dg);
 
 	memcpy(iv, req->iv, GCM_IV_SIZE);
-	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+	put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
 	err = skcipher_walk_aead_encrypt(&walk, req, false);
 
-	if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
-		u32 const *rk = NULL;
-
-		kernel_neon_begin();
-		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
-		put_unaligned_be32(2, iv + GCM_IV_SIZE);
-		pmull_gcm_encrypt_block(ks, iv, NULL, nrounds);
-		put_unaligned_be32(3, iv + GCM_IV_SIZE);
-		pmull_gcm_encrypt_block(ks + AES_BLOCK_SIZE, iv, NULL, nrounds);
-		put_unaligned_be32(4, iv + GCM_IV_SIZE);
-
+	if (likely(crypto_simd_usable())) {
 		do {
-			int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+			const u8 *src = walk.src.virt.addr;
+			u8 *dst = walk.dst.virt.addr;
+			int nbytes = walk.nbytes;
+
+			tag = (u8 *)&lengths;
 
-			if (rk)
-				kernel_neon_begin();
+			if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
+				src = dst = memcpy(buf + sizeof(buf) - nbytes,
+						   src, nbytes);
+			} else if (nbytes < walk.total) {
+				nbytes &= ~(AES_BLOCK_SIZE - 1);
+				tag = NULL;
+			}
 
-			pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
-					  walk.src.virt.addr, &ctx->ghash_key,
-					  iv, rk, nrounds, ks);
+			kernel_neon_begin();
+			pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg,
+					  iv, ctx->aes_key.key_enc, nrounds,
+					  tag);
 			kernel_neon_end();
 
-			err = skcipher_walk_done(&walk,
-					walk.nbytes % (2 * AES_BLOCK_SIZE));
+			if (unlikely(!nbytes))
+				break;
 
-			rk = ctx->aes_key.key_enc;
-		} while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
-	} else {
-		aes_encrypt(&ctx->aes_key, tag, iv);
-		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+			if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+				memcpy(walk.dst.virt.addr,
+				       buf + sizeof(buf) - nbytes, nbytes);
 
-		while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
-			const int blocks =
-				walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+			err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		} while (walk.nbytes);
+	} else {
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			const u8 *src = walk.src.virt.addr;
 			u8 *dst = walk.dst.virt.addr;
-			u8 *src = walk.src.virt.addr;
 			int remaining = blocks;
 
 			do {
-				aes_encrypt(&ctx->aes_key, ks, iv);
-				crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+				aes_encrypt(&ctx->aes_key, buf, iv);
+				crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
 				crypto_inc(iv, AES_BLOCK_SIZE);
 
 				dst += AES_BLOCK_SIZE;
 				src += AES_BLOCK_SIZE;
 			} while (--remaining > 0);
 
-			ghash_do_update(blocks, dg,
-					walk.dst.virt.addr, &ctx->ghash_key,
-					NULL, pmull_ghash_update_p64);
+			ghash_do_update(blocks, dg, walk.dst.virt.addr,
+					&ctx->ghash_key, NULL, NULL);
 
 			err = skcipher_walk_done(&walk,
-						 walk.nbytes % (2 * AES_BLOCK_SIZE));
-		}
-		if (walk.nbytes) {
-			aes_encrypt(&ctx->aes_key, ks, iv);
-			if (walk.nbytes > AES_BLOCK_SIZE) {
-				crypto_inc(iv, AES_BLOCK_SIZE);
-				aes_encrypt(&ctx->aes_key, ks + AES_BLOCK_SIZE, iv);
-			}
+						 walk.nbytes % AES_BLOCK_SIZE);
 		}
-	}
 
-	/* handle the tail */
-	if (walk.nbytes) {
-		u8 buf[GHASH_BLOCK_SIZE];
-		unsigned int nbytes = walk.nbytes;
-		u8 *dst = walk.dst.virt.addr;
-		u8 *head = NULL;
+		/* handle the tail */
+		if (walk.nbytes) {
+			aes_encrypt(&ctx->aes_key, buf, iv);
 
-		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
-			       walk.nbytes);
+			crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+				       buf, walk.nbytes);
 
-		if (walk.nbytes > GHASH_BLOCK_SIZE) {
-			head = dst;
-			dst += GHASH_BLOCK_SIZE;
-			nbytes %= GHASH_BLOCK_SIZE;
+			memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+			memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
 		}
 
-		memcpy(buf, dst, nbytes);
-		memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
-		ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
-				pmull_ghash_update_p64);
+		tag = (u8 *)&lengths;
+		ghash_do_update(1, dg, tag, &ctx->ghash_key,
+				walk.nbytes ? buf : NULL, NULL);
 
-		err = skcipher_walk_done(&walk, 0);
+		if (walk.nbytes)
+			err = skcipher_walk_done(&walk, 0);
+
+		put_unaligned_be64(dg[1], tag);
+		put_unaligned_be64(dg[0], tag + 8);
+		put_unaligned_be32(1, iv + GCM_IV_SIZE);
+		aes_encrypt(&ctx->aes_key, iv, iv);
+		crypto_xor(tag, iv, AES_BLOCK_SIZE);
 	}
 
 	if (err)
 		return err;
 
-	gcm_final(req, ctx, dg, tag, req->cryptlen);
-
 	/* copy authtag to end of dst */
 	scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
 				 crypto_aead_authsize(aead), 1);
@@ -540,75 +514,65 @@ static int gcm_decrypt(struct aead_request *req)
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
 	unsigned int authsize = crypto_aead_authsize(aead);
+	int nrounds = num_rounds(&ctx->aes_key);
 	struct skcipher_walk walk;
-	u8 iv[2 * AES_BLOCK_SIZE];
-	u8 tag[AES_BLOCK_SIZE];
-	u8 buf[2 * GHASH_BLOCK_SIZE];
+	u8 buf[AES_BLOCK_SIZE];
+	u8 iv[AES_BLOCK_SIZE];
 	u64 dg[2] = {};
-	int nrounds = num_rounds(&ctx->aes_key);
+	u128 lengths;
+	u8 *tag;
 	int err;
 
+	lengths.a = cpu_to_be64(req->assoclen * 8);
+	lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
+
 	if (req->assoclen)
 		gcm_calculate_auth_mac(req, dg);
 
 	memcpy(iv, req->iv, GCM_IV_SIZE);
-	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+	put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
 	err = skcipher_walk_aead_decrypt(&walk, req, false);
 
-	if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
-		u32 const *rk = NULL;
-
-		kernel_neon_begin();
-		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
-		put_unaligned_be32(2, iv + GCM_IV_SIZE);
-
+	if (likely(crypto_simd_usable())) {
 		do {
-			int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
-			int rem = walk.total - blocks * AES_BLOCK_SIZE;
-
-			if (rk)
-				kernel_neon_begin();
-
-			pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
-					  walk.src.virt.addr, &ctx->ghash_key,
-					  iv, rk, nrounds);
-
-			/* check if this is the final iteration of the loop */
-			if (rem < (2 * AES_BLOCK_SIZE)) {
-				u8 *iv2 = iv + AES_BLOCK_SIZE;
-
-				if (rem > AES_BLOCK_SIZE) {
-					memcpy(iv2, iv, AES_BLOCK_SIZE);
-					crypto_inc(iv2, AES_BLOCK_SIZE);
-				}
+			const u8 *src = walk.src.virt.addr;
+			u8 *dst = walk.dst.virt.addr;
+			int nbytes = walk.nbytes;
 
-				pmull_gcm_encrypt_block(iv, iv, NULL, nrounds);
+			tag = (u8 *)&lengths;
 
-				if (rem > AES_BLOCK_SIZE)
-					pmull_gcm_encrypt_block(iv2, iv2, NULL,
-								nrounds);
+			if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
+				src = dst = memcpy(buf + sizeof(buf) - nbytes,
+						   src, nbytes);
+			} else if (nbytes < walk.total) {
+				nbytes &= ~(AES_BLOCK_SIZE - 1);
+				tag = NULL;
 			}
 
+			kernel_neon_begin();
+			pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg,
+					  iv, ctx->aes_key.key_enc, nrounds,
+					  tag);
 			kernel_neon_end();
 
-			err = skcipher_walk_done(&walk,
-					walk.nbytes % (2 * AES_BLOCK_SIZE));
+			if (unlikely(!nbytes))
+				break;
 
-			rk = ctx->aes_key.key_enc;
-		} while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
-	} else {
-		aes_encrypt(&ctx->aes_key, tag, iv);
-		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+			if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+				memcpy(walk.dst.virt.addr,
+				       buf + sizeof(buf) - nbytes, nbytes);
 
-		while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
-			int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+			err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		} while (walk.nbytes);
+	} else {
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			const u8 *src = walk.src.virt.addr;
 			u8 *dst = walk.dst.virt.addr;
-			u8 *src = walk.src.virt.addr;
 
 			ghash_do_update(blocks, dg, walk.src.virt.addr,
-					&ctx->ghash_key, NULL,
-					pmull_ghash_update_p64);
+					&ctx->ghash_key, NULL, NULL);
 
 			do {
 				aes_encrypt(&ctx->aes_key, buf, iv);
@@ -620,49 +584,38 @@ static int gcm_decrypt(struct aead_request *req)
 			} while (--blocks > 0);
 
 			err = skcipher_walk_done(&walk,
-						 walk.nbytes % (2 * AES_BLOCK_SIZE));
+						 walk.nbytes % AES_BLOCK_SIZE);
 		}
-		if (walk.nbytes) {
-			if (walk.nbytes > AES_BLOCK_SIZE) {
-				u8 *iv2 = iv + AES_BLOCK_SIZE;
-
-				memcpy(iv2, iv, AES_BLOCK_SIZE);
-				crypto_inc(iv2, AES_BLOCK_SIZE);
 
-				aes_encrypt(&ctx->aes_key, iv2, iv2);
-			}
-			aes_encrypt(&ctx->aes_key, iv, iv);
+		/* handle the tail */
+		if (walk.nbytes) {
+			memcpy(buf, walk.src.virt.addr, walk.nbytes);
+			memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
 		}
-	}
 
-	/* handle the tail */
-	if (walk.nbytes) {
-		const u8 *src = walk.src.virt.addr;
-		const u8 *head = NULL;
-		unsigned int nbytes = walk.nbytes;
+		tag = (u8 *)&lengths;
+		ghash_do_update(1, dg, tag, &ctx->ghash_key,
+				walk.nbytes ? buf : NULL, NULL);
 
-		if (walk.nbytes > GHASH_BLOCK_SIZE) {
-			head = src;
-			src += GHASH_BLOCK_SIZE;
-			nbytes %= GHASH_BLOCK_SIZE;
-		}
+		if (walk.nbytes) {
+			aes_encrypt(&ctx->aes_key, buf, iv);
 
-		memcpy(buf, src, nbytes);
-		memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
-		ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
-				pmull_ghash_update_p64);
+			crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+				       buf, walk.nbytes);
 
-		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
-			       walk.nbytes);
+			err = skcipher_walk_done(&walk, 0);
+		}
 
-		err = skcipher_walk_done(&walk, 0);
+		put_unaligned_be64(dg[1], tag);
+		put_unaligned_be64(dg[0], tag + 8);
+		put_unaligned_be32(1, iv + GCM_IV_SIZE);
+		aes_encrypt(&ctx->aes_key, iv, iv);
+		crypto_xor(tag, iv, AES_BLOCK_SIZE);
 	}
 
 	if (err)
 		return err;
 
-	gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
-
 	/* compare calculated auth tag with the stored one */
 	scatterwalk_map_and_copy(buf, req->src,
 				 req->assoclen + req->cryptlen - authsize,
@@ -675,7 +628,7 @@ static int gcm_decrypt(struct aead_request *req)
 
 static struct aead_alg gcm_aes_alg = {
 	.ivsize			= GCM_IV_SIZE,
-	.chunksize		= 2 * AES_BLOCK_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
 	.maxauthsize		= AES_BLOCK_SIZE,
 	.setkey			= gcm_setkey,
 	.setauthsize		= gcm_setauthsize,
-- 
cgit v1.2.3


From 7fb61afb7b5b4389e0f6e78c3a822d5991d4edef Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Sat, 14 Sep 2019 14:02:54 -0700
Subject: ARM: OMAP2+: Check omap3-rom-rng for GP device instead of HS device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In general we should check for GP device instead of HS device unless
the other options such as EMU are also checked. Otherwise omap3-rom-rng
won't probe on few of the old n900 macro boards still in service in
automated build and boot test systems.

Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Adam Ford <aford173@gmail.com>
Cc: Pali Rohár <pali.rohar@gmail.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/mach-omap2/pdata-quirks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index d942a3357090..b49ec3fbee4c 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -276,7 +276,7 @@ static void __init nokia_n900_legacy_init(void)
 	mmc_pdata[0].name = "external";
 	mmc_pdata[1].name = "internal";
 
-	if (omap_type() == OMAP2_DEVICE_TYPE_SEC) {
+	if (omap_type() != OMAP2_DEVICE_TYPE_GP) {
 		if (IS_ENABLED(CONFIG_ARM_ERRATA_430973)) {
 			pr_info("RX-51: Enabling ARM errata 430973 workaround\n");
 			/* set IBE to 1 */
-- 
cgit v1.2.3


From 0c0ef9ea6f3f0d5979dc7b094b0a184c1a94716b Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Sat, 14 Sep 2019 14:02:55 -0700
Subject: hwrng: omap3-rom - Fix missing clock by probing with device tree
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 0ed266d7ae5e ("clk: ti: omap3: cleanup unnecessary clock aliases")
removed old omap3 clock framework aliases but caused omap3-rom-rng to
stop working with clock not found error.

Based on discussions on the mailing list it was requested by Tero Kristo
that it would be best to fix this issue by probing omap3-rom-rng using
device tree to provide a proper clk property. The other option would be
to add back the missing clock alias, but that does not help moving things
forward with removing old legacy platform_data.

Let's also add a proper device tree binding and keep it together with
the fix.

Cc: devicetree@vger.kernel.org
Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Adam Ford <aford173@gmail.com>
Cc: Pali Rohár <pali.rohar@gmail.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Tero Kristo <t-kristo@ti.com>
Fixes: 0ed266d7ae5e ("clk: ti: omap3: cleanup unnecessary clock aliases")
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../devicetree/bindings/rng/omap3_rom_rng.txt      | 27 ++++++++++++++++++++++
 arch/arm/boot/dts/omap3-n900.dts                   |  6 +++++
 arch/arm/mach-omap2/pdata-quirks.c                 | 12 +---------
 drivers/char/hw_random/omap3-rom-rng.c             | 17 ++++++++++++--
 4 files changed, 49 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/rng/omap3_rom_rng.txt

(limited to 'arch')

diff --git a/Documentation/devicetree/bindings/rng/omap3_rom_rng.txt b/Documentation/devicetree/bindings/rng/omap3_rom_rng.txt
new file mode 100644
index 000000000000..f315c9723bd2
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/omap3_rom_rng.txt
@@ -0,0 +1,27 @@
+OMAP ROM RNG driver binding
+
+Secure SoCs may provide RNG via secure ROM calls like Nokia N900 does. The
+implementation can depend on the SoC secure ROM used.
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: must be "nokia,n900-rom-rng"
+
+- clocks:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: reference to the the RNG interface clock
+
+- clock-names:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "ick"
+
+Example:
+
+	rom_rng: rng {
+		compatible = "nokia,n900-rom-rng";
+		clocks = <&rng_ick>;
+		clock-names = "ick";
+	};
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 84a5ade1e865..63659880eeb3 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -155,6 +155,12 @@
 		pwms = <&pwm9 0 26316 0>; /* 38000 Hz */
 	};
 
+	rom_rng: rng {
+		compatible = "nokia,n900-rom-rng";
+		clocks = <&rng_ick>;
+		clock-names = "ick";
+	};
+
 	/* controlled (enabled/disabled) directly by bcm2048 and wl1251 */
 	vctcxo: vctcxo {
 		compatible = "fixed-clock";
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index b49ec3fbee4c..62cc90722848 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -262,14 +262,6 @@ static void __init am3517_evm_legacy_init(void)
 	am35xx_emac_reset();
 }
 
-static struct platform_device omap3_rom_rng_device = {
-	.name		= "omap3-rom-rng",
-	.id		= -1,
-	.dev	= {
-		.platform_data	= rx51_secure_rng_call,
-	},
-};
-
 static void __init nokia_n900_legacy_init(void)
 {
 	hsmmc2_internal_input_clk();
@@ -285,9 +277,6 @@ static void __init nokia_n900_legacy_init(void)
 			pr_warn("RX-51: Not enabling ARM errata 430973 workaround\n");
 			pr_warn("Thumb binaries may crash randomly without this workaround\n");
 		}
-
-		pr_info("RX-51: Registering OMAP3 HWRNG device\n");
-		platform_device_register(&omap3_rom_rng_device);
 	}
 }
 
@@ -627,6 +616,7 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = {
 	OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL),
 	OF_DEV_AUXDATA("ti,am3517-emac", 0x5c000000, "davinci_emac.0",
 		       &am35xx_emac_pdata),
+	OF_DEV_AUXDATA("nokia,n900-rom-rng", 0, NULL, rx51_secure_rng_call),
 	/* McBSP modules with sidetone core */
 #if IS_ENABLED(CONFIG_SND_SOC_OMAP_MCBSP)
 	OF_DEV_AUXDATA("ti,omap3-mcbsp", 0x49022000, "49022000.mcbsp", &mcbsp_pdata),
diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c
index 38b719017186..34e10f05545a 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -20,6 +20,8 @@
 #include <linux/workqueue.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 #define RNG_RESET			0x01
@@ -86,14 +88,18 @@ static int omap3_rom_rng_read(struct hwrng *rng, void *data, size_t max, bool w)
 
 static struct hwrng omap3_rom_rng_ops = {
 	.name		= "omap3-rom",
-	.read		= omap3_rom_rng_read,
 };
 
 static int omap3_rom_rng_probe(struct platform_device *pdev)
 {
 	int ret = 0;
 
-	pr_info("initializing\n");
+	omap3_rom_rng_ops.read = of_device_get_match_data(&pdev->dev);
+	if (!omap3_rom_rng_ops.read) {
+		dev_err(&pdev->dev, "missing rom code handler\n");
+
+		return -ENODEV;
+	}
 
 	omap3_rom_rng_call = pdev->dev.platform_data;
 	if (!omap3_rom_rng_call) {
@@ -125,9 +131,16 @@ static int omap3_rom_rng_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id omap_rom_rng_match[] = {
+	{ .compatible = "nokia,n900-rom-rng", .data = omap3_rom_rng_read, },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, omap_rom_rng_match);
+
 static struct platform_driver omap3_rom_rng_driver = {
 	.driver = {
 		.name		= "omap3-rom-rng",
+		.of_match_table = omap_rom_rng_match,
 	},
 	.probe		= omap3_rom_rng_probe,
 	.remove		= omap3_rom_rng_remove,
-- 
cgit v1.2.3


From 38f51c07054ff4796e473dba3bff2e648378002c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 3 Oct 2019 01:45:11 +0200
Subject: bpf, x86: Small optimization in comparing against imm0

Replace 'cmp reg, 0' with 'test reg, reg' for comparisons against
zero. Saves 1 byte of instruction encoding per occurrence. The flag
results of test 'reg, reg' are identical to 'cmp reg, 0' in all
cases except for AF which we don't use/care about. In terms of
macro-fusibility in combination with a subsequent conditional jump
instruction, both have the same properties for the jumps used in
the JIT translation. For example, same JITed Cilium program can
shrink a bit from e.g. 12,455 to 12,317 bytes as tests with 0 are
used quite frequently.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
---
 arch/x86/net/bpf_jit_comp.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 991549a1c5f3..3ad2ba1ad855 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -909,6 +909,16 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP32 | BPF_JSLT | BPF_K:
 		case BPF_JMP32 | BPF_JSGE | BPF_K:
 		case BPF_JMP32 | BPF_JSLE | BPF_K:
+			/* test dst_reg, dst_reg to save one extra byte */
+			if (imm32 == 0) {
+				if (BPF_CLASS(insn->code) == BPF_JMP)
+					EMIT1(add_2mod(0x48, dst_reg, dst_reg));
+				else if (is_ereg(dst_reg))
+					EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+				EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
+				goto emit_cond_jmp;
+			}
+
 			/* cmp dst_reg, imm8/32 */
 			if (BPF_CLASS(insn->code) == BPF_JMP)
 				EMIT1(add_1mod(0x48, dst_reg));
-- 
cgit v1.2.3


From 46a73e9e6ccc77619838885439873af41a5ad1c1 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Thu, 3 Oct 2019 14:27:23 +0200
Subject: MIPS: SGI-IP27: remove not used stuff inherited from IRIX

Most of the SN/SN0 header files are inherited from IRIX header files,
but not all of that stuff is useful for Linux. Remove not used parts.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/sn/arch.h     | 8 --------
 arch/mips/include/asm/sn/sn0/arch.h | 2 --
 arch/mips/include/asm/sn/types.h    | 3 ---
 3 files changed, 13 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/sn/arch.h b/arch/mips/include/asm/sn/arch.h
index 3f1fb1454749..97fa2b6440a1 100644
--- a/arch/mips/include/asm/sn/arch.h
+++ b/arch/mips/include/asm/sn/arch.h
@@ -19,8 +19,6 @@
 
 #define cputonasid(cpu)		(sn_cpu_info[(cpu)].p_nasid)
 #define cputoslice(cpu)		(sn_cpu_info[(cpu)].p_slice)
-#define makespnum(_nasid, _slice)					\
-		(((_nasid) << CPUS_PER_NODE_SHFT) | (_slice))
 
 #define INVALID_NASID		(nasid_t)-1
 #define INVALID_CNODEID		(cnodeid_t)-1
@@ -47,12 +45,6 @@ extern nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
  * will continue to work.  Don't use the arrays above directly.
  */
 
-#define NASID_TO_REGION(nnode)		\
-    ((nnode) >> \
-     (is_fine_dirmode() ? NASID_TO_FINEREG_SHFT : NASID_TO_COARSEREG_SHFT))
-
-extern cnodeid_t nasid_to_compact_node[MAX_NASIDS];
-extern nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
 extern cnodeid_t cpuid_to_compact_node[MAXCPUS];
 
 #define NASID_TO_COMPACT_NODEID(nnode)	(nasid_to_compact_node[nnode])
diff --git a/arch/mips/include/asm/sn/sn0/arch.h b/arch/mips/include/asm/sn/sn0/arch.h
index 425a67e6a947..ea8a6983f6a4 100644
--- a/arch/mips/include/asm/sn/sn0/arch.h
+++ b/arch/mips/include/asm/sn/sn0/arch.h
@@ -66,7 +66,5 @@
 #define SLOT_MIN_MEM_SIZE	(32*1024*1024)
 
 #define CPUS_PER_NODE		2	/* CPUs on a single hub */
-#define CPUS_PER_NODE_SHFT	1	/* Bits to shift in the node number */
-#define CPUS_PER_SUBNODE	2	/* CPUs on a single hub PI */
 
 #endif /* _ASM_SN_SN0_ARCH_H */
diff --git a/arch/mips/include/asm/sn/types.h b/arch/mips/include/asm/sn/types.h
index 6d24d4e8b9ed..f9d0bc2007c3 100644
--- a/arch/mips/include/asm/sn/types.h
+++ b/arch/mips/include/asm/sn/types.h
@@ -12,13 +12,10 @@
 #include <linux/types.h>
 
 typedef unsigned long	cpuid_t;
-typedef unsigned long	cnodemask_t;
 typedef signed short	nasid_t;	/* node id in numa-as-id space */
 typedef signed short	cnodeid_t;	/* node id in compact-id space */
 typedef signed char	partid_t;	/* partition ID type */
 typedef signed short	moduleid_t;	/* user-visible module number type */
-typedef signed short	cmoduleid_t;	/* kernel compact module id type */
-typedef unsigned char	clusterid_t;	/* Clusterid of the cell */
 
 typedef dev_t		vertex_hdl_t;	/* hardware graph vertex handle */
 
-- 
cgit v1.2.3


From 4bf841ebf17aaa0f7712623896c699b44fa92f44 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Thu, 3 Oct 2019 14:27:24 +0200
Subject: MIPS: SGI-IP27: get rid of compact node ids

Node ids don't need to be contiguous in Linux, so the concept to
use compact node ids to make them contiguous isn't needed at all.
This patchset therefore removes it.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/mach-ip27/mmzone.h   |  2 +-
 arch/mips/include/asm/mach-ip27/topology.h |  3 +-
 arch/mips/include/asm/sn/agent.h           |  2 +-
 arch/mips/include/asm/sn/arch.h            | 23 ----------
 arch/mips/include/asm/sn/hub.h             |  4 +-
 arch/mips/include/asm/sn/mapped_kernel.h   |  4 +-
 arch/mips/include/asm/sn/sn_private.h      |  5 +--
 arch/mips/include/asm/sn/types.h           |  1 -
 arch/mips/pci/pci-xtalk-bridge.c           |  2 +-
 arch/mips/sgi-ip27/ip27-hubio.c            | 10 ++---
 arch/mips/sgi-ip27/ip27-init.c             | 33 ++++----------
 arch/mips/sgi-ip27/ip27-irq.c              |  4 +-
 arch/mips/sgi-ip27/ip27-klconfig.c         | 14 +-----
 arch/mips/sgi-ip27/ip27-klnuma.c           | 21 ++++-----
 arch/mips/sgi-ip27/ip27-memory.c           | 57 ++++++++---------------
 arch/mips/sgi-ip27/ip27-nmi.c              | 16 ++-----
 arch/mips/sgi-ip27/ip27-reset.c            |  6 +--
 arch/mips/sgi-ip27/ip27-smp.c              | 72 +++++++-----------------------
 arch/mips/sgi-ip27/ip27-timer.c            |  4 +-
 arch/mips/sgi-ip27/ip27-xtalk.c            | 10 ++---
 20 files changed, 81 insertions(+), 212 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h
index 1cd6a23a84f2..f463826515df 100644
--- a/arch/mips/include/asm/mach-ip27/mmzone.h
+++ b/arch/mips/include/asm/mach-ip27/mmzone.h
@@ -6,7 +6,7 @@
 #include <asm/sn/arch.h>
 #include <asm/sn/hub.h>
 
-#define pa_to_nid(addr)		NASID_TO_COMPACT_NODEID(NASID_GET(addr))
+#define pa_to_nid(addr)		NASID_GET(addr)
 
 struct hub_data {
 	kern_vars_t	kern_vars;
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 965f0793a5f9..a717af9177ff 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -7,14 +7,13 @@
 #include <asm/mmzone.h>
 
 struct cpuinfo_ip27 {
-	cnodeid_t	p_nodeid;	/* my node ID in compact-id-space */
 	nasid_t		p_nasid;	/* my node ID in numa-as-id-space */
 	unsigned char	p_slice;	/* Physical position on node board */
 };
 
 extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
 
-#define cpu_to_node(cpu)	(sn_cpu_info[(cpu)].p_nodeid)
+#define cpu_to_node(cpu)	(cputonasid(cpu))
 #define cpumask_of_node(node)	((node) == -1 ?				\
 				 cpu_all_mask :				\
 				 &hub_data(node)->h_cpus)
diff --git a/arch/mips/include/asm/sn/agent.h b/arch/mips/include/asm/sn/agent.h
index e33d09293019..7e9b3271737a 100644
--- a/arch/mips/include/asm/sn/agent.h
+++ b/arch/mips/include/asm/sn/agent.h
@@ -26,7 +26,7 @@
 
 #if defined(CONFIG_SGI_IP27)
 #define HUB_NIC_ADDR(_cpuid)						   \
-	REMOTE_HUB_ADDR(COMPACT_TO_NASID_NODEID(cpu_to_node(_cpuid)),	    \
+	REMOTE_HUB_ADDR(cpu_to_node(_cpuid),				   \
 		MD_MLAN_CTL)
 #endif
 
diff --git a/arch/mips/include/asm/sn/arch.h b/arch/mips/include/asm/sn/arch.h
index 97fa2b6440a1..f7d3273d9599 100644
--- a/arch/mips/include/asm/sn/arch.h
+++ b/arch/mips/include/asm/sn/arch.h
@@ -21,34 +21,11 @@
 #define cputoslice(cpu)		(sn_cpu_info[(cpu)].p_slice)
 
 #define INVALID_NASID		(nasid_t)-1
-#define INVALID_CNODEID		(cnodeid_t)-1
 #define INVALID_PNODEID		(pnodeid_t)-1
 #define INVALID_MODULE		(moduleid_t)-1
 #define INVALID_PARTID		(partid_t)-1
 
 extern nasid_t get_nasid(void);
-extern cnodeid_t get_cpu_cnode(cpuid_t);
 extern int get_cpu_slice(cpuid_t);
 
-/*
- * NO ONE should access these arrays directly.	The only reason we refer to
- * them here is to avoid the procedure call that would be required in the
- * macros below.  (Really want private data members here :-)
- */
-extern cnodeid_t nasid_to_compact_node[MAX_NASIDS];
-extern nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
-
-/*
- * These macros are used by various parts of the kernel to convert
- * between the three different kinds of node numbering.	  At least some
- * of them may change to procedure calls in the future, but the macros
- * will continue to work.  Don't use the arrays above directly.
- */
-
-extern cnodeid_t cpuid_to_compact_node[MAXCPUS];
-
-#define NASID_TO_COMPACT_NODEID(nnode)	(nasid_to_compact_node[nnode])
-#define COMPACT_TO_NASID_NODEID(cnode)	(compact_to_nasid_node[cnode])
-#define CPUID_TO_COMPACT_NODEID(cpu)	(cpuid_to_compact_node[(cpu)])
-
 #endif /* _ASM_SN_ARCH_H */
diff --git a/arch/mips/include/asm/sn/hub.h b/arch/mips/include/asm/sn/hub.h
index 338f7eed74f1..45878fbefbae 100644
--- a/arch/mips/include/asm/sn/hub.h
+++ b/arch/mips/include/asm/sn/hub.h
@@ -10,8 +10,8 @@
 #include <asm/xtalk/xtalk.h>
 
 /* ip27-hubio.c */
-extern unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
+extern unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
 			  unsigned long xtalk_addr, size_t size);
-extern void hub_pio_init(cnodeid_t cnode);
+extern void hub_pio_init(nasid_t nasid);
 
 #endif /* __ASM_SN_HUB_H */
diff --git a/arch/mips/include/asm/sn/mapped_kernel.h b/arch/mips/include/asm/sn/mapped_kernel.h
index 2f3efa91c16e..3f1049807018 100644
--- a/arch/mips/include/asm/sn/mapped_kernel.h
+++ b/arch/mips/include/asm/sn/mapped_kernel.h
@@ -37,10 +37,10 @@
 
 #define MAPPED_KERN_RO_TO_PHYS(x) \
 				((unsigned long)MAPPED_ADDR_RO_TO_PHYS(x) | \
-				MAPPED_KERN_RO_PHYSBASE(get_compact_nodeid()))
+				MAPPED_KERN_RO_PHYSBASE(get_nasid()))
 #define MAPPED_KERN_RW_TO_PHYS(x) \
 				((unsigned long)MAPPED_ADDR_RW_TO_PHYS(x) | \
-				MAPPED_KERN_RW_PHYSBASE(get_compact_nodeid()))
+				MAPPED_KERN_RW_PHYSBASE(get_nasid()))
 
 #else /* CONFIG_MAPPED_KERNEL */
 
diff --git a/arch/mips/include/asm/sn/sn_private.h b/arch/mips/include/asm/sn/sn_private.h
index f09ba846c644..63a2c30d81c6 100644
--- a/arch/mips/include/asm/sn/sn_private.h
+++ b/arch/mips/include/asm/sn/sn_private.h
@@ -7,14 +7,13 @@
 extern nasid_t master_nasid;
 
 extern void cpu_node_probe(void);
-extern cnodeid_t get_compact_nodeid(void);
-extern void hub_rtc_init(cnodeid_t);
+extern void hub_rtc_init(nasid_t nasid);
 extern void cpu_time_init(void);
 extern void per_cpu_init(void);
 extern void install_cpu_nmi_handler(int slice);
 extern void install_ipi(void);
 extern void setup_replication_mask(void);
 extern void replicate_kernel_text(void);
-extern unsigned long node_getfirstfree(cnodeid_t);
+extern unsigned long node_getfirstfree(nasid_t nasid);
 
 #endif /* __ASM_SN_SN_PRIVATE_H */
diff --git a/arch/mips/include/asm/sn/types.h b/arch/mips/include/asm/sn/types.h
index f9d0bc2007c3..203c927e84d1 100644
--- a/arch/mips/include/asm/sn/types.h
+++ b/arch/mips/include/asm/sn/types.h
@@ -13,7 +13,6 @@
 
 typedef unsigned long	cpuid_t;
 typedef signed short	nasid_t;	/* node id in numa-as-id space */
-typedef signed short	cnodeid_t;	/* node id in compact-id space */
 typedef signed char	partid_t;	/* partition ID type */
 typedef signed short	moduleid_t;	/* user-visible module number type */
 
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 7b4d40354ee7..4bb5e326305e 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -285,7 +285,7 @@ static int bridge_set_affinity(struct irq_data *d, const struct cpumask *mask,
 	ret = irq_chip_set_affinity_parent(d, mask, force);
 	if (ret >= 0) {
 		cpu = cpumask_first_and(mask, cpu_online_mask);
-		nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
+		nasid = cpu_to_node(cpu);
 		bridge_write(data->bc, b_int_addr[pin].addr,
 			     (((data->bc->intr_addr >> 30) & 0x30000) |
 			      bit | (nasid << 8)));
diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c
index 6ebb8845a77c..a538d0ceb61d 100644
--- a/arch/mips/sgi-ip27/ip27-hubio.c
+++ b/arch/mips/sgi-ip27/ip27-hubio.c
@@ -25,10 +25,9 @@ static int force_fire_and_forget = 1;
  * @size:	size of the PIO mapping
  *
  **/
-unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
+unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
 			  unsigned long xtalk_addr, size_t size)
 {
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
 	unsigned i;
 
 	/* use small-window mapping if possible */
@@ -44,7 +43,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
 
 	xtalk_addr &= ~(BWIN_SIZE-1);
 	for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) {
-		if (test_and_set_bit(i, hub_data(cnode)->h_bigwin_used))
+		if (test_and_set_bit(i, hub_data(nasid)->h_bigwin_used))
 			continue;
 
 		/*
@@ -171,13 +170,12 @@ static void hub_set_piomode(nasid_t nasid)
  *
  * @hub:	hubinfo structure for our hub
  */
-void hub_pio_init(cnodeid_t cnode)
+void hub_pio_init(nasid_t nasid)
 {
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
 	unsigned i;
 
 	/* initialize big window piomaps for this hub */
-	bitmap_zero(hub_data(cnode)->h_bigwin_used, HUB_NUM_BIG_WINDOW);
+	bitmap_zero(hub_data(nasid)->h_bigwin_used, HUB_NUM_BIG_WINDOW);
 	for (i = 0; i < HUB_NUM_BIG_WINDOW; i++)
 		IIO_ITTE_DISABLE(nasid, i);
 
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 59d5375c9021..1dad799758c4 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -41,25 +41,16 @@
 static DECLARE_BITMAP(hub_init_mask, MAX_COMPACT_NODES);
 nasid_t master_nasid = INVALID_NASID;
 
-cnodeid_t	nasid_to_compact_node[MAX_NASIDS];
-nasid_t		compact_to_nasid_node[MAX_COMPACT_NODES];
-cnodeid_t	cpuid_to_compact_node[MAXCPUS];
-
-EXPORT_SYMBOL(nasid_to_compact_node);
-
 struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(sn_cpu_info);
 
-extern void pcibr_setup(cnodeid_t);
-
-static void per_hub_init(cnodeid_t cnode)
+static void per_hub_init(nasid_t nasid)
 {
-	struct hub_data *hub = hub_data(cnode);
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
+	struct hub_data *hub = hub_data(nasid);
 
 	cpumask_set_cpu(smp_processor_id(), &hub->h_cpus);
 
-	if (test_and_set_bit(cnode, hub_init_mask))
+	if (test_and_set_bit(nasid, hub_init_mask))
 		return;
 	/*
 	 * Set CRB timeout at 5ms, (< PI timeout of 10ms)
@@ -67,14 +58,14 @@ static void per_hub_init(cnodeid_t cnode)
 	REMOTE_HUB_S(nasid, IIO_ICTP, 0x800);
 	REMOTE_HUB_S(nasid, IIO_ICTO, 0xff);
 
-	hub_rtc_init(cnode);
+	hub_rtc_init(nasid);
 
 #ifdef CONFIG_REPLICATE_EXHANDLERS
 	/*
 	 * If this is not a headless node initialization,
 	 * copy over the caliased exception handlers.
 	 */
-	if (get_compact_nodeid() == cnode) {
+	if (get_nasid() == nasid) {
 		extern char except_vec2_generic, except_vec3_generic;
 		extern void build_tlb_refill_handler(void);
 
@@ -92,15 +83,15 @@ void per_cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	int slice = LOCAL_HUB_L(PI_CPU_NUM);
-	cnodeid_t cnode = get_compact_nodeid();
-	struct hub_data *hub = hub_data(cnode);
+	nasid_t nasid = get_nasid();
+	struct hub_data *hub = hub_data(nasid);
 
 	if (test_and_set_bit(slice, &hub->slice_map))
 		return;
 
 	clear_c0_status(ST0_IM);
 
-	per_hub_init(cnode);
+	per_hub_init(nasid);
 
 	cpu_time_init();
 	install_ipi();
@@ -122,14 +113,6 @@ get_nasid(void)
 			 >> NSRI_NODEID_SHFT);
 }
 
-/*
- * Map the physical node id to a virtual node id (virtual node ids are contiguous).
- */
-cnodeid_t get_compact_nodeid(void)
-{
-	return NASID_TO_COMPACT_NODEID(get_nasid());
-}
-
 extern void ip27_reboot_setup(void);
 
 void __init plat_mem_setup(void)
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 37be04975831..5aef06e28a5b 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -73,7 +73,7 @@ static void setup_hub_mask(struct hub_irq_data *hd, const struct cpumask *mask)
 	int cpu;
 
 	cpu = cpumask_first_and(mask, cpu_online_mask);
-	nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
+	nasid = cpu_to_node(cpu);
 	hd->cpu = cpu;
 	if (!cputoslice(cpu)) {
 		hd->irq_mask[0] = REMOTE_HUB_PTR(nasid, PI_INT_MASK0_A);
@@ -137,7 +137,7 @@ static int hub_domain_alloc(struct irq_domain *domain, unsigned int virq,
 			    handle_level_irq, NULL, NULL);
 
 	/* use CPU connected to nearest hub */
-	hub = hub_data(NASID_TO_COMPACT_NODEID(info->nasid));
+	hub = hub_data(info->nasid);
 	setup_hub_mask(hd, &hub->h_cpus);
 
 	/* Make sure it's not already pending when we connect it. */
diff --git a/arch/mips/sgi-ip27/ip27-klconfig.c b/arch/mips/sgi-ip27/ip27-klconfig.c
index 41171ff0c75e..6cb2160e7689 100644
--- a/arch/mips/sgi-ip27/ip27-klconfig.c
+++ b/arch/mips/sgi-ip27/ip27-klconfig.c
@@ -73,11 +73,6 @@ lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_type)
 	return (lboard_t *)NULL;
 }
 
-cnodeid_t get_cpu_cnode(cpuid_t cpu)
-{
-	return CPUID_TO_COMPACT_NODEID(cpu);
-}
-
 klcpu_t *nasid_slice_to_cpuinfo(nasid_t nasid, int slice)
 {
 	lboard_t *brd;
@@ -102,19 +97,14 @@ klcpu_t *sn_get_cpuinfo(cpuid_t cpu)
 	nasid_t nasid;
 	int slice;
 	klcpu_t *acpu;
-	gda_t *gdap = GDA;
-	cnodeid_t cnode;
 
 	if (!(cpu < MAXCPUS)) {
 		printk("sn_get_cpuinfo: illegal cpuid 0x%lx\n", cpu);
 		return NULL;
 	}
 
-	cnode = get_cpu_cnode(cpu);
-	if (cnode == INVALID_CNODEID)
-		return NULL;
-
-	if ((nasid = gdap->g_nasidtable[cnode]) == INVALID_NASID)
+	nasid = cputonasid(cpu);
+	if (nasid  == INVALID_NASID)
 		return NULL;
 
 	for (slice = 0; slice < CPUS_PER_NODE; slice++) {
diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index a4f01328de62..ee1c6ff4aa00 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -38,13 +38,13 @@ void __init setup_replication_mask(void)
 #error Kernel replication works with mapped kernel support. No calias support.
 #endif
 	{
-		cnodeid_t	cnode;
+		nasid_t nasid;
 
-		for_each_online_node(cnode) {
-			if (cnode == 0)
+		for_each_online_node(nasid) {
+			if (nasid == 0)
 				continue;
 			/* Advertise that we have a copy of the kernel */
-			cpumask_set_cpu(cnode, &ktext_repmask);
+			cpumask_set_cpu(nasid, &ktext_repmask);
 		}
 	}
 #endif
@@ -85,7 +85,6 @@ static __init void copy_kernel(nasid_t dest_nasid)
 
 void __init replicate_kernel_text(void)
 {
-	cnodeid_t cnode;
 	nasid_t client_nasid;
 	nasid_t server_nasid;
 
@@ -94,13 +93,12 @@ void __init replicate_kernel_text(void)
 	/* Record where the master node should get its kernel text */
 	set_ktext_source(master_nasid, master_nasid);
 
-	for_each_online_node(cnode) {
-		if (cnode == 0)
+	for_each_online_node(client_nasid) {
+		if (client_nasid == 0)
 			continue;
-		client_nasid = COMPACT_TO_NASID_NODEID(cnode);
 
 		/* Check if this node should get a copy of the kernel */
-		if (cpumask_test_cpu(cnode, &ktext_repmask)) {
+		if (cpumask_test_cpu(client_nasid, &ktext_repmask)) {
 			server_nasid = client_nasid;
 			copy_kernel(server_nasid);
 		}
@@ -115,17 +113,16 @@ void __init replicate_kernel_text(void)
  * data structures on the first couple of pages of the first slot of each
  * node. If this is the case, getfirstfree(node) > getslotstart(node, 0).
  */
-unsigned long node_getfirstfree(cnodeid_t cnode)
+unsigned long node_getfirstfree(nasid_t nasid)
 {
 	unsigned long loadbase = REP_BASE;
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
 	unsigned long offset;
 
 #ifdef CONFIG_MAPPED_KERNEL
 	loadbase += 16777216;
 #endif
 	offset = PAGE_ALIGN((unsigned long)(&_end)) - loadbase;
-	if ((cnode == 0) || (cpumask_test_cpu(cnode, &ktext_repmask)))
+	if ((nasid == 0) || (cpumask_test_cpu(nasid, &ktext_repmask)))
 		return TO_NODE(nasid, offset) >> PAGE_SHIFT;
 	else
 		return KDM_TO_PHYS(PAGE_ALIGN(SYMMON_STK_ADDR(nasid, 0))) >> PAGE_SHIFT;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index fb077a947575..f63f30b2cdcd 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -44,23 +44,23 @@ static int is_fine_dirmode(void)
 	return ((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK) >> NSRI_REGIONSIZE_SHFT) & REGIONSIZE_FINE;
 }
 
-static u64 get_region(cnodeid_t cnode)
+static u64 get_region(nasid_t nasid)
 {
 	if (fine_mode)
-		return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_FINEREG_SHFT;
+		return nasid >> NASID_TO_FINEREG_SHFT;
 	else
-		return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_COARSEREG_SHFT;
+		return nasid >> NASID_TO_COARSEREG_SHFT;
 }
 
 static u64 region_mask;
 
 static void gen_region_mask(u64 *region_mask)
 {
-	cnodeid_t cnode;
+	nasid_t nasid;
 
 	(*region_mask) = 0;
-	for_each_online_node(cnode) {
-		(*region_mask) |= 1ULL << get_region(cnode);
+	for_each_online_node(nasid) {
+		(*region_mask) |= 1ULL << get_region(nasid);
 	}
 }
 
@@ -111,16 +111,11 @@ static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b)
 {
 	klrou_t *router, *router_a = NULL, *router_b = NULL;
 	lboard_t *brd, *dest_brd;
-	cnodeid_t cnode;
 	nasid_t nasid;
 	int port;
 
 	/* Figure out which routers nodes in question are connected to */
-	for_each_online_node(cnode) {
-		nasid = COMPACT_TO_NASID_NODEID(cnode);
-
-		if (nasid == -1) continue;
-
+	for_each_online_node(nasid) {
 		brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid),
 					KLTYPE_ROUTER);
 
@@ -176,19 +171,16 @@ static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b)
 
 static void __init init_topology_matrix(void)
 {
-	nasid_t nasid, nasid2;
-	cnodeid_t row, col;
+	nasid_t row, col;
 
 	for (row = 0; row < MAX_COMPACT_NODES; row++)
 		for (col = 0; col < MAX_COMPACT_NODES; col++)
 			__node_distances[row][col] = -1;
 
 	for_each_online_node(row) {
-		nasid = COMPACT_TO_NASID_NODEID(row);
 		for_each_online_node(col) {
-			nasid2 = COMPACT_TO_NASID_NODEID(col);
 			__node_distances[row][col] =
-				compute_node_distance(nasid, nasid2);
+				compute_node_distance(row, col);
 		}
 	}
 }
@@ -196,12 +188,11 @@ static void __init init_topology_matrix(void)
 static void __init dump_topology(void)
 {
 	nasid_t nasid;
-	cnodeid_t cnode;
 	lboard_t *brd, *dest_brd;
 	int port;
 	int router_num = 0;
 	klrou_t *router;
-	cnodeid_t row, col;
+	nasid_t row, col;
 
 	pr_info("************** Topology ********************\n");
 
@@ -216,11 +207,7 @@ static void __init dump_topology(void)
 		pr_cont("\n");
 	}
 
-	for_each_online_node(cnode) {
-		nasid = COMPACT_TO_NASID_NODEID(cnode);
-
-		if (nasid == -1) continue;
-
+	for_each_online_node(nasid) {
 		brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid),
 					KLTYPE_ROUTER);
 
@@ -254,21 +241,17 @@ static void __init dump_topology(void)
 	}
 }
 
-static unsigned long __init slot_getbasepfn(cnodeid_t cnode, int slot)
+static unsigned long __init slot_getbasepfn(nasid_t nasid, int slot)
 {
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
-
 	return ((unsigned long)nasid << PFN_NASIDSHFT) | (slot << SLOT_PFNSHIFT);
 }
 
-static unsigned long __init slot_psize_compute(cnodeid_t node, int slot)
+static unsigned long __init slot_psize_compute(nasid_t nasid, int slot)
 {
-	nasid_t nasid;
 	lboard_t *brd;
 	klmembnk_t *banks;
 	unsigned long size;
 
-	nasid = COMPACT_TO_NASID_NODEID(node);
 	/* Find the node board */
 	brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27);
 	if (!brd)
@@ -298,7 +281,7 @@ static unsigned long __init slot_psize_compute(cnodeid_t node, int slot)
 
 static void __init mlreset(void)
 {
-	int i;
+	nasid_t nasid;
 
 	master_nasid = get_nasid();
 	fine_mode = is_fine_dirmode();
@@ -321,11 +304,7 @@ static void __init mlreset(void)
 	/*
 	 * Set all nodes' calias sizes to 8k
 	 */
-	for_each_online_node(i) {
-		nasid_t nasid;
-
-		nasid = COMPACT_TO_NASID_NODEID(i);
-
+	for_each_online_node(nasid) {
 		/*
 		 * Always have node 0 in the region mask, otherwise
 		 * CALIAS accesses get exceptions since the hub
@@ -354,7 +333,7 @@ static void __init szmem(void)
 {
 	unsigned long slot_psize, slot0sz = 0, nodebytes;	/* Hack to detect problem configs */
 	int slot;
-	cnodeid_t node;
+	nasid_t node;
 
 	for_each_online_node(node) {
 		nodebytes = 0;
@@ -384,7 +363,7 @@ static void __init szmem(void)
 	}
 }
 
-static void __init node_mem_init(cnodeid_t node)
+static void __init node_mem_init(nasid_t node)
 {
 	unsigned long slot_firstpfn = slot_getbasepfn(node, 0);
 	unsigned long slot_freepfn = node_getfirstfree(node);
@@ -431,7 +410,7 @@ static struct node_data null_node = {
  */
 void __init prom_meminit(void)
 {
-	cnodeid_t node;
+	nasid_t node;
 
 	mlreset();
 	szmem();
diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c
index 3aae388561d9..daf3670d94e7 100644
--- a/arch/mips/sgi-ip27/ip27-nmi.c
+++ b/arch/mips/sgi-ip27/ip27-nmi.c
@@ -17,8 +17,6 @@
 #define NODE_NUM_CPUS(n)	CPUS_PER_NODE
 #endif
 
-#define CNODEID_NONE (cnodeid_t)-1
-
 typedef unsigned long machreg_t;
 
 static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -152,16 +150,10 @@ void nmi_dump_hub_irq(nasid_t nasid, int slice)
  * Copy the cpu registers which have been saved in the IP27prom format
  * into the eframe format for the node under consideration.
  */
-void nmi_node_eframe_save(cnodeid_t  cnode)
+void nmi_node_eframe_save(nasid_t nasid)
 {
-	nasid_t nasid;
 	int slice;
 
-	/* Make sure that we have a valid node */
-	if (cnode == CNODEID_NONE)
-		return;
-
-	nasid = COMPACT_TO_NASID_NODEID(cnode);
 	if (nasid == INVALID_NASID)
 		return;
 
@@ -178,10 +170,10 @@ void nmi_node_eframe_save(cnodeid_t  cnode)
 void
 nmi_eframes_save(void)
 {
-	cnodeid_t	cnode;
+	nasid_t nasid;
 
-	for_each_online_node(cnode)
-		nmi_node_eframe_save(cnode);
+	for_each_online_node(nasid)
+		nmi_node_eframe_save(nasid);
 }
 
 void
diff --git a/arch/mips/sgi-ip27/ip27-reset.c b/arch/mips/sgi-ip27/ip27-reset.c
index e44a15d4f573..c90228d0d4c2 100644
--- a/arch/mips/sgi-ip27/ip27-reset.c
+++ b/arch/mips/sgi-ip27/ip27-reset.c
@@ -45,8 +45,7 @@ static void ip27_machine_restart(char *command)
 #endif
 #if 0
 	for_each_online_node(i)
-		REMOTE_HUB_S(COMPACT_TO_NASID_NODEID(i), PROMOP_REG,
-							PROMOP_REBOOT);
+		REMOTE_HUB_S(i, PROMOP_REG, PROMOP_REBOOT);
 #else
 	LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET);
 #endif
@@ -61,8 +60,7 @@ static void ip27_machine_halt(void)
 	smp_send_stop();
 #endif
 	for_each_online_node(i)
-		REMOTE_HUB_S(COMPACT_TO_NASID_NODEID(i), PROMOP_REG,
-							PROMOP_RESTART);
+		REMOTE_HUB_S(i, PROMOP_REG, PROMOP_RESTART);
 	LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET);
 	noreturn;
 }
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index 20b81209c6b8..386702abe660 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -31,34 +31,13 @@
  * Takes as first input the PROM assigned cpu id, and the kernel
  * assigned cpu id as the second.
  */
-static void alloc_cpupda(cpuid_t cpu, int cpunum)
+static void alloc_cpupda(nasid_t nasid, cpuid_t cpu, int cpunum)
 {
-	cnodeid_t node = get_cpu_cnode(cpu);
-	nasid_t nasid = COMPACT_TO_NASID_NODEID(node);
-
 	cputonasid(cpunum) = nasid;
-	sn_cpu_info[cpunum].p_nodeid = node;
 	cputoslice(cpunum) = get_cpu_slice(cpu);
 }
 
-static nasid_t get_actual_nasid(lboard_t *brd)
-{
-	klhub_t *hub;
-
-	if (!brd)
-		return INVALID_NASID;
-
-	/* find out if we are a completely disabled brd. */
-	hub  = (klhub_t *)find_first_component(brd, KLSTRUCT_HUB);
-	if (!hub)
-		return INVALID_NASID;
-	if (!(hub->hub_info.flags & KLINFO_ENABLE))	/* disabled node brd */
-		return hub->hub_info.physid;
-	else
-		return brd->brd_nasid;
-}
-
-static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
+static int do_cpumask(nasid_t nasid, int highest)
 {
 	static int tot_cpus_found = 0;
 	lboard_t *brd;
@@ -72,16 +51,13 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
 		acpu = (klcpu_t *)find_first_component(brd, KLSTRUCT_CPU);
 		while (acpu) {
 			cpuid = acpu->cpu_info.virtid;
-			/* cnode is not valid for completely disabled brds */
-			if (get_actual_nasid(brd) == brd->brd_nasid)
-				cpuid_to_compact_node[cpuid] = cnode;
-			if (cpuid > highest)
-				highest = cpuid;
 			/* Only let it join in if it's marked enabled */
 			if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
 			    (tot_cpus_found != NR_CPUS)) {
+				if (cpuid > highest)
+					highest = cpuid;
 				set_cpu_possible(cpuid, true);
-				alloc_cpupda(cpuid, tot_cpus_found);
+				alloc_cpupda(nasid, cpuid, tot_cpus_found);
 				cpus_found++;
 				tot_cpus_found++;
 			}
@@ -103,16 +79,6 @@ void cpu_node_probe(void)
 	int i, highest = 0;
 	gda_t *gdap = GDA;
 
-	/*
-	 * Initialize the arrays to invalid nodeid (-1)
-	 */
-	for (i = 0; i < MAX_COMPACT_NODES; i++)
-		compact_to_nasid_node[i] = INVALID_NASID;
-	for (i = 0; i < MAX_NASIDS; i++)
-		nasid_to_compact_node[i] = INVALID_CNODEID;
-	for (i = 0; i < MAXCPUS; i++)
-		cpuid_to_compact_node[i] = INVALID_CNODEID;
-
 	/*
 	 * MCD - this whole "compact node" stuff can probably be dropped,
 	 * as we can handle sparse numbering now
@@ -122,10 +88,8 @@ void cpu_node_probe(void)
 		nasid_t nasid = gdap->g_nasidtable[i];
 		if (nasid == INVALID_NASID)
 			break;
-		compact_to_nasid_node[i] = nasid;
-		nasid_to_compact_node[nasid] = i;
-		node_set_online(num_online_nodes());
-		highest = do_cpumask(i, nasid, highest);
+		node_set_online(nasid);
+		highest = do_cpumask(nasid, highest);
 	}
 
 	printk("Discovered %d cpus on %d nodes\n", highest + 1, num_online_nodes());
@@ -162,11 +126,10 @@ static void ip27_send_ipi_single(int destid, unsigned int action)
 	irq += cputoslice(destid);
 
 	/*
-	 * Convert the compact hub number to the NASID to get the correct
-	 * part of the address space.  Then set the interrupt bit associated
-	 * with the CPU we want to send the interrupt to.
+	 * Set the interrupt bit associated with the CPU we want to
+	 * send the interrupt to.
 	 */
-	REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq);
+	REMOTE_HUB_SEND_INTR(cpu_to_node(destid), irq);
 }
 
 static void ip27_send_ipi_mask(const struct cpumask *mask, unsigned int action)
@@ -208,23 +171,20 @@ static int ip27_boot_secondary(int cpu, struct task_struct *idle)
 
 static void __init ip27_smp_setup(void)
 {
-	cnodeid_t	cnode;
+	nasid_t nasid;
 
-	for_each_online_node(cnode) {
-		if (cnode == 0)
+	for_each_online_node(nasid) {
+		if (nasid == 0)
 			continue;
-		intr_clear_all(COMPACT_TO_NASID_NODEID(cnode));
+		intr_clear_all(nasid);
 	}
 
 	replicate_kernel_text();
 
 	/*
-	 * Assumption to be fixed: we're always booted on logical / physical
-	 * processor 0.	 While we're always running on logical processor 0
-	 * this still means this is physical processor zero; it might for
-	 * example be disabled in the firmware.
+	 * PROM sets up system, that boot cpu is always first CPU on nasid 0
 	 */
-	alloc_cpupda(0, 0);
+	alloc_cpupda(0, 0, 0);
 }
 
 static void __init ip27_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 9b4b9ac621a3..a317ea83f216 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -170,7 +170,7 @@ void cpu_time_init(void)
 	printk("CPU %d clock is %dMHz.\n", smp_processor_id(), cpu->cpu_speed);
 }
 
-void hub_rtc_init(cnodeid_t cnode)
+void hub_rtc_init(nasid_t nasid)
 {
 
 	/*
@@ -178,7 +178,7 @@ void hub_rtc_init(cnodeid_t cnode)
 	 * If this is not the current node then it is a cpuless
 	 * node and timeouts will not happen there.
 	 */
-	if (get_compact_nodeid() == cnode) {
+	if (get_nasid() == nasid) {
 		LOCAL_HUB_S(PI_RT_EN_A, 1);
 		LOCAL_HUB_S(PI_RT_EN_B, 1);
 		LOCAL_HUB_S(PI_PROF_EN_A, 0);
diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c
index 4a1f0b0c29e2..5602bb113921 100644
--- a/arch/mips/sgi-ip27/ip27-xtalk.c
+++ b/arch/mips/sgi-ip27/ip27-xtalk.c
@@ -138,14 +138,12 @@ static int xbow_probe(nasid_t nasid)
 	return 0;
 }
 
-static void xtalk_probe_node(cnodeid_t nid)
+static void xtalk_probe_node(nasid_t nasid)
 {
 	volatile u64		hubreg;
-	nasid_t			nasid;
 	xwidget_part_num_t	partnum;
 	widgetreg_t		widget_id;
 
-	nasid = COMPACT_TO_NASID_NODEID(nid);
 	hubreg = REMOTE_HUB_L(nasid, IIO_LLP_CSR);
 
 	/* check whether the link is up */
@@ -173,10 +171,10 @@ static void xtalk_probe_node(cnodeid_t nid)
 
 static int __init xtalk_init(void)
 {
-	cnodeid_t cnode;
+	nasid_t nasid;
 
-	for_each_online_node(cnode)
-		xtalk_probe_node(cnode);
+	for_each_online_node(nasid)
+		xtalk_probe_node(nasid);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 071d2f0b5419d3cf80cc8ba73ec8fe2cedc4cc0c Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 23:04:32 +0000
Subject: MIPS: r4k-bugs64: Limit R4k bug checks to affected systems

Only build the checks for R4k errata workarounds if we expect that the
kernel might actually run on a system with an R4k CPU - ie.
CONFIG_SYS_HAS_CPU_R4X00=y & we're targeting a pre-MIPSr1 ISA revision.

Rename cpu-bugs64.c to r4k-bugs64.c to indicate the fact that the code
is specific to R4k CPUs.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/Kconfig             |   4 +
 arch/mips/include/asm/bugs.h  |  18 ++-
 arch/mips/kernel/Makefile     |   2 +-
 arch/mips/kernel/cpu-bugs64.c | 325 ------------------------------------------
 arch/mips/kernel/genex.S      |   2 +-
 arch/mips/kernel/r4k-bugs64.c | 325 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 339 insertions(+), 337 deletions(-)
 delete mode 100644 arch/mips/kernel/cpu-bugs64.c
 create mode 100644 arch/mips/kernel/r4k-bugs64.c

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a0bd9bdb5f83..8736bf4420bc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2554,6 +2554,10 @@ config CPU_R4000_WORKAROUNDS
 config CPU_R4400_WORKAROUNDS
 	bool
 
+config CPU_R4X00_BUGS64
+	bool
+	default y if SYS_HAS_CPU_R4X00 && 64BIT && (TARGET_ISA_REV < 1)
+
 config MIPS_ASID_SHIFT
 	int
 	default 6 if CPU_R3000 || CPU_TX39XX
diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h
index d8ab8b7129b5..d72dc6e1cf3c 100644
--- a/arch/mips/include/asm/bugs.h
+++ b/arch/mips/include/asm/bugs.h
@@ -26,9 +26,8 @@ extern void check_bugs64(void);
 
 static inline void check_bugs_early(void)
 {
-#ifdef CONFIG_64BIT
-	check_bugs64_early();
-#endif
+	if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+		check_bugs64_early();
 }
 
 static inline void check_bugs(void)
@@ -37,19 +36,18 @@ static inline void check_bugs(void)
 
 	cpu_data[cpu].udelay_val = loops_per_jiffy;
 	check_bugs32();
-#ifdef CONFIG_64BIT
-	check_bugs64();
-#endif
+
+	if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+		check_bugs64();
 }
 
 static inline int r4k_daddiu_bug(void)
 {
-#ifdef CONFIG_64BIT
+	if (!IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+		return 0;
+
 	WARN_ON(daddiu_bug < 0);
 	return daddiu_bug != 0;
-#else
-	return 0;
-#endif
 }
 
 #endif /* _ASM_BUGS_H */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 89b07ea8d249..d6e97df51cfb 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -80,7 +80,7 @@ obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_PROC_FS)		+= proc.o
 obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
 
-obj-$(CONFIG_64BIT)		+= cpu-bugs64.o
+obj-$(CONFIG_CPU_R4X00_BUGS64)	+= r4k-bugs64.o
 
 obj-$(CONFIG_I8253)		+= i8253.o
 
diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c
deleted file mode 100644
index 6a7afe7ef4d3..000000000000
--- a/arch/mips/kernel/cpu-bugs64.c
+++ /dev/null
@@ -1,325 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2003, 2004, 2007  Maciej W. Rozycki
- */
-#include <linux/context_tracking.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/stddef.h>
-
-#include <asm/bugs.h>
-#include <asm/compiler.h>
-#include <asm/cpu.h>
-#include <asm/fpu.h>
-#include <asm/mipsregs.h>
-#include <asm/setup.h>
-
-static char bug64hit[] __initdata =
-	"reliable operation impossible!\n%s";
-static char nowar[] __initdata =
-	"Please report to <linux-mips@linux-mips.org>.";
-static char r4kwar[] __initdata =
-	"Enable CPU_R4000_WORKAROUNDS to rectify.";
-static char daddiwar[] __initdata =
-	"Enable CPU_DADDI_WORKAROUNDS to rectify.";
-
-static __always_inline __init
-void align_mod(const int align, const int mod)
-{
-	asm volatile(
-		".set	push\n\t"
-		".set	noreorder\n\t"
-		".balign %0\n\t"
-		".rept	%1\n\t"
-		"nop\n\t"
-		".endr\n\t"
-		".set	pop"
-		:
-		: "n"(align), "n"(mod));
-}
-
-static __always_inline __init
-void mult_sh_align_mod(long *v1, long *v2, long *w,
-		       const int align, const int mod)
-{
-	unsigned long flags;
-	int m1, m2;
-	long p, s, lv1, lv2, lw;
-
-	/*
-	 * We want the multiply and the shift to be isolated from the
-	 * rest of the code to disable gcc optimizations.  Hence the
-	 * asm statements that execute nothing, but make gcc not know
-	 * what the values of m1, m2 and s are and what lv2 and p are
-	 * used for.
-	 */
-
-	local_irq_save(flags);
-	/*
-	 * The following code leads to a wrong result of the first
-	 * dsll32 when executed on R4000 rev. 2.2 or 3.0 (PRId
-	 * 00000422 or 00000430, respectively).
-	 *
-	 * See "MIPS R4000PC/SC Errata, Processor Revision 2.2 and
-	 * 3.0" by MIPS Technologies, Inc., errata #16 and #28 for
-	 * details.  I got no permission to duplicate them here,
-	 * sigh... --macro
-	 */
-	asm volatile(
-		""
-		: "=r" (m1), "=r" (m2), "=r" (s)
-		: "0" (5), "1" (8), "2" (5));
-	align_mod(align, mod);
-	/*
-	 * The trailing nop is needed to fulfill the two-instruction
-	 * requirement between reading hi/lo and staring a mult/div.
-	 * Leaving it out may cause gas insert a nop itself breaking
-	 * the desired alignment of the next chunk.
-	 */
-	asm volatile(
-		".set	push\n\t"
-		".set	noat\n\t"
-		".set	noreorder\n\t"
-		".set	nomacro\n\t"
-		"mult	%2, %3\n\t"
-		"dsll32 %0, %4, %5\n\t"
-		"mflo	$0\n\t"
-		"dsll32 %1, %4, %5\n\t"
-		"nop\n\t"
-		".set	pop"
-		: "=&r" (lv1), "=r" (lw)
-		: "r" (m1), "r" (m2), "r" (s), "I" (0)
-		: "hi", "lo", "$0");
-	/* We have to use single integers for m1 and m2 and a double
-	 * one for p to be sure the mulsidi3 gcc's RTL multiplication
-	 * instruction has the workaround applied.  Older versions of
-	 * gcc have correct umulsi3 and mulsi3, but other
-	 * multiplication variants lack the workaround.
-	 */
-	asm volatile(
-		""
-		: "=r" (m1), "=r" (m2), "=r" (s)
-		: "0" (m1), "1" (m2), "2" (s));
-	align_mod(align, mod);
-	p = m1 * m2;
-	lv2 = s << 32;
-	asm volatile(
-		""
-		: "=r" (lv2)
-		: "0" (lv2), "r" (p));
-	local_irq_restore(flags);
-
-	*v1 = lv1;
-	*v2 = lv2;
-	*w = lw;
-}
-
-static __always_inline __init void check_mult_sh(void)
-{
-	long v1[8], v2[8], w[8];
-	int bug, fix, i;
-
-	printk("Checking for the multiply/shift bug... ");
-
-	/*
-	 * Testing discovered false negatives for certain code offsets
-	 * into cache lines.  Hence we test all possible offsets for
-	 * the worst assumption of an R4000 I-cache line width of 32
-	 * bytes.
-	 *
-	 * We can't use a loop as alignment directives need to be
-	 * immediates.
-	 */
-	mult_sh_align_mod(&v1[0], &v2[0], &w[0], 32, 0);
-	mult_sh_align_mod(&v1[1], &v2[1], &w[1], 32, 1);
-	mult_sh_align_mod(&v1[2], &v2[2], &w[2], 32, 2);
-	mult_sh_align_mod(&v1[3], &v2[3], &w[3], 32, 3);
-	mult_sh_align_mod(&v1[4], &v2[4], &w[4], 32, 4);
-	mult_sh_align_mod(&v1[5], &v2[5], &w[5], 32, 5);
-	mult_sh_align_mod(&v1[6], &v2[6], &w[6], 32, 6);
-	mult_sh_align_mod(&v1[7], &v2[7], &w[7], 32, 7);
-
-	bug = 0;
-	for (i = 0; i < 8; i++)
-		if (v1[i] != w[i])
-			bug = 1;
-
-	if (bug == 0) {
-		pr_cont("no.\n");
-		return;
-	}
-
-	pr_cont("yes, workaround... ");
-
-	fix = 1;
-	for (i = 0; i < 8; i++)
-		if (v2[i] != w[i])
-			fix = 0;
-
-	if (fix == 1) {
-		pr_cont("yes.\n");
-		return;
-	}
-
-	pr_cont("no.\n");
-	panic(bug64hit, !R4000_WAR ? r4kwar : nowar);
-}
-
-static volatile int daddi_ov;
-
-asmlinkage void __init do_daddi_ov(struct pt_regs *regs)
-{
-	enum ctx_state prev_state;
-
-	prev_state = exception_enter();
-	daddi_ov = 1;
-	regs->cp0_epc += 4;
-	exception_exit(prev_state);
-}
-
-static __init void check_daddi(void)
-{
-	extern asmlinkage void handle_daddi_ov(void);
-	unsigned long flags;
-	void *handler;
-	long v, tmp;
-
-	printk("Checking for the daddi bug... ");
-
-	local_irq_save(flags);
-	handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
-	/*
-	 * The following code fails to trigger an overflow exception
-	 * when executed on R4000 rev. 2.2 or 3.0 (PRId 00000422 or
-	 * 00000430, respectively).
-	 *
-	 * See "MIPS R4000PC/SC Errata, Processor Revision 2.2 and
-	 * 3.0" by MIPS Technologies, Inc., erratum #23 for details.
-	 * I got no permission to duplicate it here, sigh... --macro
-	 */
-	asm volatile(
-		".set	push\n\t"
-		".set	noat\n\t"
-		".set	noreorder\n\t"
-		".set	nomacro\n\t"
-		"addiu	%1, $0, %2\n\t"
-		"dsrl	%1, %1, 1\n\t"
-#ifdef HAVE_AS_SET_DADDI
-		".set	daddi\n\t"
-#endif
-		"daddi	%0, %1, %3\n\t"
-		".set	pop"
-		: "=r" (v), "=&r" (tmp)
-		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-	set_except_vector(EXCCODE_OV, handler);
-	local_irq_restore(flags);
-
-	if (daddi_ov) {
-		pr_cont("no.\n");
-		return;
-	}
-
-	pr_cont("yes, workaround... ");
-
-	local_irq_save(flags);
-	handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
-	asm volatile(
-		"addiu	%1, $0, %2\n\t"
-		"dsrl	%1, %1, 1\n\t"
-		"daddi	%0, %1, %3"
-		: "=r" (v), "=&r" (tmp)
-		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-	set_except_vector(EXCCODE_OV, handler);
-	local_irq_restore(flags);
-
-	if (daddi_ov) {
-		pr_cont("yes.\n");
-		return;
-	}
-
-	pr_cont("no.\n");
-	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
-}
-
-int daddiu_bug	= IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1;
-
-static __init void check_daddiu(void)
-{
-	long v, w, tmp;
-
-	printk("Checking for the daddiu bug... ");
-
-	/*
-	 * The following code leads to a wrong result of daddiu when
-	 * executed on R4400 rev. 1.0 (PRId 00000440).
-	 *
-	 * See "MIPS R4400PC/SC Errata, Processor Revision 1.0" by
-	 * MIPS Technologies, Inc., erratum #7 for details.
-	 *
-	 * According to "MIPS R4000PC/SC Errata, Processor Revision
-	 * 2.2 and 3.0" by MIPS Technologies, Inc., erratum #41 this
-	 * problem affects R4000 rev. 2.2 and 3.0 (PRId 00000422 and
-	 * 00000430, respectively), too.  Testing failed to trigger it
-	 * so far.
-	 *
-	 * I got no permission to duplicate the errata here, sigh...
-	 * --macro
-	 */
-	asm volatile(
-		".set	push\n\t"
-		".set	noat\n\t"
-		".set	noreorder\n\t"
-		".set	nomacro\n\t"
-		"addiu	%2, $0, %3\n\t"
-		"dsrl	%2, %2, 1\n\t"
-#ifdef HAVE_AS_SET_DADDI
-		".set	daddi\n\t"
-#endif
-		"daddiu %0, %2, %4\n\t"
-		"addiu	%1, $0, %4\n\t"
-		"daddu	%1, %2\n\t"
-		".set	pop"
-		: "=&r" (v), "=&r" (w), "=&r" (tmp)
-		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-
-	daddiu_bug = v != w;
-
-	if (!daddiu_bug) {
-		pr_cont("no.\n");
-		return;
-	}
-
-	pr_cont("yes, workaround... ");
-
-	asm volatile(
-		"addiu	%2, $0, %3\n\t"
-		"dsrl	%2, %2, 1\n\t"
-		"daddiu %0, %2, %4\n\t"
-		"addiu	%1, $0, %4\n\t"
-		"daddu	%1, %2"
-		: "=&r" (v), "=&r" (w), "=&r" (tmp)
-		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
-
-	if (v == w) {
-		pr_cont("yes.\n");
-		return;
-	}
-
-	pr_cont("no.\n");
-	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
-}
-
-void __init check_bugs64_early(void)
-{
-	if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) {
-		check_mult_sh();
-		check_daddiu();
-	}
-}
-
-void __init check_bugs64(void)
-{
-	if (!IS_ENABLED(CONFIG_CPU_MIPSR6))
-		check_daddi();
-}
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index efde27c99414..d586cdac9605 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -657,7 +657,7 @@ isrdhwr:
 	.set	pop
 	END(handle_ri_rdhwr)
 
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_CPU_R4X00_BUGS64
 /* A temporary overflow handler used by check_daddi(). */
 
 	__INIT
diff --git a/arch/mips/kernel/r4k-bugs64.c b/arch/mips/kernel/r4k-bugs64.c
new file mode 100644
index 000000000000..6a7afe7ef4d3
--- /dev/null
+++ b/arch/mips/kernel/r4k-bugs64.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2003, 2004, 2007  Maciej W. Rozycki
+ */
+#include <linux/context_tracking.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/stddef.h>
+
+#include <asm/bugs.h>
+#include <asm/compiler.h>
+#include <asm/cpu.h>
+#include <asm/fpu.h>
+#include <asm/mipsregs.h>
+#include <asm/setup.h>
+
+static char bug64hit[] __initdata =
+	"reliable operation impossible!\n%s";
+static char nowar[] __initdata =
+	"Please report to <linux-mips@linux-mips.org>.";
+static char r4kwar[] __initdata =
+	"Enable CPU_R4000_WORKAROUNDS to rectify.";
+static char daddiwar[] __initdata =
+	"Enable CPU_DADDI_WORKAROUNDS to rectify.";
+
+static __always_inline __init
+void align_mod(const int align, const int mod)
+{
+	asm volatile(
+		".set	push\n\t"
+		".set	noreorder\n\t"
+		".balign %0\n\t"
+		".rept	%1\n\t"
+		"nop\n\t"
+		".endr\n\t"
+		".set	pop"
+		:
+		: "n"(align), "n"(mod));
+}
+
+static __always_inline __init
+void mult_sh_align_mod(long *v1, long *v2, long *w,
+		       const int align, const int mod)
+{
+	unsigned long flags;
+	int m1, m2;
+	long p, s, lv1, lv2, lw;
+
+	/*
+	 * We want the multiply and the shift to be isolated from the
+	 * rest of the code to disable gcc optimizations.  Hence the
+	 * asm statements that execute nothing, but make gcc not know
+	 * what the values of m1, m2 and s are and what lv2 and p are
+	 * used for.
+	 */
+
+	local_irq_save(flags);
+	/*
+	 * The following code leads to a wrong result of the first
+	 * dsll32 when executed on R4000 rev. 2.2 or 3.0 (PRId
+	 * 00000422 or 00000430, respectively).
+	 *
+	 * See "MIPS R4000PC/SC Errata, Processor Revision 2.2 and
+	 * 3.0" by MIPS Technologies, Inc., errata #16 and #28 for
+	 * details.  I got no permission to duplicate them here,
+	 * sigh... --macro
+	 */
+	asm volatile(
+		""
+		: "=r" (m1), "=r" (m2), "=r" (s)
+		: "0" (5), "1" (8), "2" (5));
+	align_mod(align, mod);
+	/*
+	 * The trailing nop is needed to fulfill the two-instruction
+	 * requirement between reading hi/lo and staring a mult/div.
+	 * Leaving it out may cause gas insert a nop itself breaking
+	 * the desired alignment of the next chunk.
+	 */
+	asm volatile(
+		".set	push\n\t"
+		".set	noat\n\t"
+		".set	noreorder\n\t"
+		".set	nomacro\n\t"
+		"mult	%2, %3\n\t"
+		"dsll32 %0, %4, %5\n\t"
+		"mflo	$0\n\t"
+		"dsll32 %1, %4, %5\n\t"
+		"nop\n\t"
+		".set	pop"
+		: "=&r" (lv1), "=r" (lw)
+		: "r" (m1), "r" (m2), "r" (s), "I" (0)
+		: "hi", "lo", "$0");
+	/* We have to use single integers for m1 and m2 and a double
+	 * one for p to be sure the mulsidi3 gcc's RTL multiplication
+	 * instruction has the workaround applied.  Older versions of
+	 * gcc have correct umulsi3 and mulsi3, but other
+	 * multiplication variants lack the workaround.
+	 */
+	asm volatile(
+		""
+		: "=r" (m1), "=r" (m2), "=r" (s)
+		: "0" (m1), "1" (m2), "2" (s));
+	align_mod(align, mod);
+	p = m1 * m2;
+	lv2 = s << 32;
+	asm volatile(
+		""
+		: "=r" (lv2)
+		: "0" (lv2), "r" (p));
+	local_irq_restore(flags);
+
+	*v1 = lv1;
+	*v2 = lv2;
+	*w = lw;
+}
+
+static __always_inline __init void check_mult_sh(void)
+{
+	long v1[8], v2[8], w[8];
+	int bug, fix, i;
+
+	printk("Checking for the multiply/shift bug... ");
+
+	/*
+	 * Testing discovered false negatives for certain code offsets
+	 * into cache lines.  Hence we test all possible offsets for
+	 * the worst assumption of an R4000 I-cache line width of 32
+	 * bytes.
+	 *
+	 * We can't use a loop as alignment directives need to be
+	 * immediates.
+	 */
+	mult_sh_align_mod(&v1[0], &v2[0], &w[0], 32, 0);
+	mult_sh_align_mod(&v1[1], &v2[1], &w[1], 32, 1);
+	mult_sh_align_mod(&v1[2], &v2[2], &w[2], 32, 2);
+	mult_sh_align_mod(&v1[3], &v2[3], &w[3], 32, 3);
+	mult_sh_align_mod(&v1[4], &v2[4], &w[4], 32, 4);
+	mult_sh_align_mod(&v1[5], &v2[5], &w[5], 32, 5);
+	mult_sh_align_mod(&v1[6], &v2[6], &w[6], 32, 6);
+	mult_sh_align_mod(&v1[7], &v2[7], &w[7], 32, 7);
+
+	bug = 0;
+	for (i = 0; i < 8; i++)
+		if (v1[i] != w[i])
+			bug = 1;
+
+	if (bug == 0) {
+		pr_cont("no.\n");
+		return;
+	}
+
+	pr_cont("yes, workaround... ");
+
+	fix = 1;
+	for (i = 0; i < 8; i++)
+		if (v2[i] != w[i])
+			fix = 0;
+
+	if (fix == 1) {
+		pr_cont("yes.\n");
+		return;
+	}
+
+	pr_cont("no.\n");
+	panic(bug64hit, !R4000_WAR ? r4kwar : nowar);
+}
+
+static volatile int daddi_ov;
+
+asmlinkage void __init do_daddi_ov(struct pt_regs *regs)
+{
+	enum ctx_state prev_state;
+
+	prev_state = exception_enter();
+	daddi_ov = 1;
+	regs->cp0_epc += 4;
+	exception_exit(prev_state);
+}
+
+static __init void check_daddi(void)
+{
+	extern asmlinkage void handle_daddi_ov(void);
+	unsigned long flags;
+	void *handler;
+	long v, tmp;
+
+	printk("Checking for the daddi bug... ");
+
+	local_irq_save(flags);
+	handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
+	/*
+	 * The following code fails to trigger an overflow exception
+	 * when executed on R4000 rev. 2.2 or 3.0 (PRId 00000422 or
+	 * 00000430, respectively).
+	 *
+	 * See "MIPS R4000PC/SC Errata, Processor Revision 2.2 and
+	 * 3.0" by MIPS Technologies, Inc., erratum #23 for details.
+	 * I got no permission to duplicate it here, sigh... --macro
+	 */
+	asm volatile(
+		".set	push\n\t"
+		".set	noat\n\t"
+		".set	noreorder\n\t"
+		".set	nomacro\n\t"
+		"addiu	%1, $0, %2\n\t"
+		"dsrl	%1, %1, 1\n\t"
+#ifdef HAVE_AS_SET_DADDI
+		".set	daddi\n\t"
+#endif
+		"daddi	%0, %1, %3\n\t"
+		".set	pop"
+		: "=r" (v), "=&r" (tmp)
+		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
+	set_except_vector(EXCCODE_OV, handler);
+	local_irq_restore(flags);
+
+	if (daddi_ov) {
+		pr_cont("no.\n");
+		return;
+	}
+
+	pr_cont("yes, workaround... ");
+
+	local_irq_save(flags);
+	handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
+	asm volatile(
+		"addiu	%1, $0, %2\n\t"
+		"dsrl	%1, %1, 1\n\t"
+		"daddi	%0, %1, %3"
+		: "=r" (v), "=&r" (tmp)
+		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
+	set_except_vector(EXCCODE_OV, handler);
+	local_irq_restore(flags);
+
+	if (daddi_ov) {
+		pr_cont("yes.\n");
+		return;
+	}
+
+	pr_cont("no.\n");
+	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
+}
+
+int daddiu_bug	= IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1;
+
+static __init void check_daddiu(void)
+{
+	long v, w, tmp;
+
+	printk("Checking for the daddiu bug... ");
+
+	/*
+	 * The following code leads to a wrong result of daddiu when
+	 * executed on R4400 rev. 1.0 (PRId 00000440).
+	 *
+	 * See "MIPS R4400PC/SC Errata, Processor Revision 1.0" by
+	 * MIPS Technologies, Inc., erratum #7 for details.
+	 *
+	 * According to "MIPS R4000PC/SC Errata, Processor Revision
+	 * 2.2 and 3.0" by MIPS Technologies, Inc., erratum #41 this
+	 * problem affects R4000 rev. 2.2 and 3.0 (PRId 00000422 and
+	 * 00000430, respectively), too.  Testing failed to trigger it
+	 * so far.
+	 *
+	 * I got no permission to duplicate the errata here, sigh...
+	 * --macro
+	 */
+	asm volatile(
+		".set	push\n\t"
+		".set	noat\n\t"
+		".set	noreorder\n\t"
+		".set	nomacro\n\t"
+		"addiu	%2, $0, %3\n\t"
+		"dsrl	%2, %2, 1\n\t"
+#ifdef HAVE_AS_SET_DADDI
+		".set	daddi\n\t"
+#endif
+		"daddiu %0, %2, %4\n\t"
+		"addiu	%1, $0, %4\n\t"
+		"daddu	%1, %2\n\t"
+		".set	pop"
+		: "=&r" (v), "=&r" (w), "=&r" (tmp)
+		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
+
+	daddiu_bug = v != w;
+
+	if (!daddiu_bug) {
+		pr_cont("no.\n");
+		return;
+	}
+
+	pr_cont("yes, workaround... ");
+
+	asm volatile(
+		"addiu	%2, $0, %3\n\t"
+		"dsrl	%2, %2, 1\n\t"
+		"daddiu %0, %2, %4\n\t"
+		"addiu	%1, $0, %4\n\t"
+		"daddu	%1, %2"
+		: "=&r" (v), "=&r" (w), "=&r" (tmp)
+		: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
+
+	if (v == w) {
+		pr_cont("yes.\n");
+		return;
+	}
+
+	pr_cont("no.\n");
+	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
+}
+
+void __init check_bugs64_early(void)
+{
+	if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) {
+		check_mult_sh();
+		check_daddiu();
+	}
+}
+
+void __init check_bugs64(void)
+{
+	if (!IS_ENABLED(CONFIG_CPU_MIPSR6))
+		check_daddi();
+}
-- 
cgit v1.2.3


From 5045d06b37361c6fbe05357c765f6b9a6fcaea87 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 23:04:32 +0000
Subject: MIPS: r4k-bugs64: Drop CONFIG_CPU_MIPSR6 checks

The r4k-bugs64 code will no longer be built for MIPSr6 kernel
configurations, so there's no need to perform checks for MIPSr6 within
the code. Drop those redundant checks.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/kernel/r4k-bugs64.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/r4k-bugs64.c b/arch/mips/kernel/r4k-bugs64.c
index 6a7afe7ef4d3..1ff19f1ea5ca 100644
--- a/arch/mips/kernel/r4k-bugs64.c
+++ b/arch/mips/kernel/r4k-bugs64.c
@@ -242,7 +242,7 @@ static __init void check_daddi(void)
 	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
 }
 
-int daddiu_bug	= IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1;
+int daddiu_bug	= -1;
 
 static __init void check_daddiu(void)
 {
@@ -312,14 +312,11 @@ static __init void check_daddiu(void)
 
 void __init check_bugs64_early(void)
 {
-	if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) {
-		check_mult_sh();
-		check_daddiu();
-	}
+	check_mult_sh();
+	check_daddiu();
 }
 
 void __init check_bugs64(void)
 {
-	if (!IS_ENABLED(CONFIG_CPU_MIPSR6))
-		check_daddi();
+	check_daddi();
 }
-- 
cgit v1.2.3


From cd5f9e4fd8792afeadfec17290ee87943b00421b Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 12 Aug 2019 12:36:52 +0200
Subject: MIPS: ralink: mt7628a.dtsi: Add I2C controller DT node

This patch adds the I2C controller description to the MT7628A dtsi file.

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Harvey Hunt <harveyhuntnexus@gmail.com>
Cc: John Crispin <john@phrozen.org>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/boot/dts/ralink/mt7628a.dtsi | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/ralink/mt7628a.dtsi b/arch/mips/boot/dts/ralink/mt7628a.dtsi
index 61f8621e88b3..742bcc1dc2e0 100644
--- a/arch/mips/boot/dts/ralink/mt7628a.dtsi
+++ b/arch/mips/boot/dts/ralink/mt7628a.dtsi
@@ -199,6 +199,22 @@
 			status = "disabled";
 		};
 
+		i2c: i2c@900 {
+			compatible = "mediatek,mt7621-i2c";
+			reg = <0x900 0x100>;
+
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinmux_i2c_i2c>;
+
+			resets = <&resetc 16>;
+			reset-names = "i2c";
+
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			status = "disabled";
+		};
+
 		uart0: uartlite@c00 {
 			compatible = "ns16550a";
 			reg = <0xc00 0x100>;
-- 
cgit v1.2.3


From 376357aca715c27f716844e9825417e12e3b02e7 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 12 Aug 2019 12:36:55 +0200
Subject: MIPS: ralink: Add GARDENA smart Gateway MT7688 board

This patch adds support for the GARDENA smart Gateway, which is based on
the MediaTek MT7688 SoC. It is equipped with 128 MiB of DDR and 8 MiB of
flash (SPI NOR) and additional 128MiB SPI NAND storage.

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Harvey Hunt <harveyhuntnexus@gmail.com>
Cc: John Crispin <john@phrozen.org>
Cc: linux-mips@vger.kernel.org
---
 .../dts/ralink/gardena_smart_gateway_mt7688.dts    | 197 +++++++++++++++++++++
 1 file changed, 197 insertions(+)
 create mode 100644 arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts

(limited to 'arch')

diff --git a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts
new file mode 100644
index 000000000000..aa5caaa31104
--- /dev/null
+++ b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Stefan Roese <sr@denx.de>
+ */
+
+/dts-v1/;
+
+/include/ "mt7628a.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+	compatible = "gardena,smart-gateway-mt7688", "ralink,mt7688a-soc",
+		     "ralink,mt7628a-soc";
+	model = "GARDENA smart Gateway (MT7688)";
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x8000000>;
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinmux_gpio_gpio>;	/* GPIO11 */
+
+		user_btn1 {
+			label = "USER_BTN1";
+			gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+			linux,code =<KEY_PROG1> ;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinmux_pwm0_gpio>,	/* GPIO18 */
+			    <&pinmux_pwm1_gpio>,	/* GPIO19 */
+			    <&pinmux_sdmode_gpio>,	/* GPIO22..29 */
+			    <&pinmux_p0led_an_gpio>;	/* GPIO43 */
+		/*
+		 * <&pinmux_i2s_gpio> (covers GPIO0..3) is needed here as
+		 * well for GPIO3. But this is already claimed for uart1
+		 * (see below). So we can't include it in this LED node.
+		 */
+
+		power_blue {
+			label = "smartgw:power:blue";
+			gpios = <&gpio 18 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		power_green {
+			label = "smartgw:power:green";
+			gpios = <&gpio 19 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		power_red {
+			label = "smartgw:power:red";
+			gpios = <&gpio 22 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		radio_blue {
+			label = "smartgw:radio:blue";
+			gpios = <&gpio 23 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		radio_green {
+			label = "smartgw:radio:green";
+			gpios = <&gpio 24 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		radio_red {
+			label = "smartgw:radio:red";
+			gpios = <&gpio 25 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		internet_blue {
+			label = "smartgw:internet:blue";
+			gpios = <&gpio 26 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		internet_green {
+			label = "smartgw:internet:green";
+			gpios = <&gpio 27 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		internet_red {
+			label = "smartgw:internet:red";
+			gpios = <&gpio 28 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		ethernet_link {
+			label = "smartgw:eth:link";
+			gpios = <&gpio 3 GPIO_ACTIVE_LOW>;
+			linux,default-trigger = "netdev";
+		};
+
+		ethernet_activity {
+			label = "smartgw:eth:act";
+			gpios = <&gpio 43 GPIO_ACTIVE_LOW>;
+			linux,default-trigger = "netdev";
+		};
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+};
+
+&i2c {
+	status = "okay";
+};
+
+&spi {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinmux_spi_spi>, <&pinmux_spi_cs1_cs>;
+
+	m25p80@0 {
+		compatible = "jedec,spi-nor";
+		reg = <0>;
+		spi-max-frequency = <40000000>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "uboot";
+				reg = <0x0 0xa0000>;
+				read-only;
+			};
+
+			partition@a0000 {
+				label = "uboot_env0";
+				reg = <0xa0000 0x10000>;
+			};
+
+			partition@b0000 {
+				label = "uboot_env1";
+				reg = <0xb0000 0x10000>;
+			};
+
+			factory: partition@c0000 {
+				label = "factory";
+				reg = <0xc0000 0x10000>;
+				read-only;
+			};
+		};
+	};
+
+	nand_flash@1 {
+		compatible = "spi-nand";
+		linux,mtd-name = "gd5f";
+		reg = <1>;
+		spi-max-frequency = <40000000>;
+	};
+};
+
+&uart1 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinmux_i2s_gpio>;		/* GPIO0..3 */
+
+	rts-gpios = <&gpio 1 GPIO_ACTIVE_LOW>;
+	cts-gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
+};
+
+&uart2 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinmux_p2led_an_gpio>,	/* GPIO41 */
+		    <&pinmux_p3led_an_gpio>;	/* GPIO40 */
+
+	rts-gpios = <&gpio 40 GPIO_ACTIVE_LOW>;
+	cts-gpios = <&gpio 41 GPIO_ACTIVE_LOW>;
+};
+
+&watchdog {
+	status = "okay";
+};
-- 
cgit v1.2.3


From 878f75c7a2530471844a93b01e887f09d24ed57f Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:05 +0000
Subject: MIPS: Unify sc beqz definition

We currently duplicate the definition of __scbeqz in asm/atomic.h &
asm/cmpxchg.h. Move it to asm/llsc.h & rename it to __SC_BEQZ to fit
better with the existing __SC macro provided there.

We include a tab in the string in order to avoid the need for users to
indent code any further to include whitespace of their own after the
instruction mnemonic.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/atomic.h  | 28 +++++++++-------------------
 arch/mips/include/asm/cmpxchg.h | 20 ++++----------------
 arch/mips/include/asm/llsc.h    | 11 +++++++++++
 3 files changed, 24 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index bb8658cc7f12..7578c807ef98 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -20,19 +20,9 @@
 #include <asm/compiler.h>
 #include <asm/cpu-features.h>
 #include <asm/cmpxchg.h>
+#include <asm/llsc.h>
 #include <asm/war.h>
 
-/*
- * Using a branch-likely instruction to check the result of an sc instruction
- * works around a bug present in R10000 CPUs prior to revision 3.0 that could
- * cause ll-sc sequences to execute non-atomically.
- */
-#if R10000_LLSC_WAR
-# define __scbeqz "beqzl"
-#else
-# define __scbeqz "beqz"
-#endif
-
 #define ATOMIC_INIT(i)	  { (i) }
 
 /*
@@ -65,7 +55,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			      \
 		"1:	ll	%0, %1		# atomic_" #op "	\n"   \
 		"	" #asm_op " %0, %2				\n"   \
 		"	sc	%0, %1					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
+		"\t" __SC_BEQZ "%0, 1b					\n"   \
 		"	.set	pop					\n"   \
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
 		: "Ir" (i) : __LLSC_CLOBBER);				      \
@@ -93,7 +83,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	      \
 		"1:	ll	%1, %2		# atomic_" #op "_return	\n"   \
 		"	" #asm_op " %0, %1, %3				\n"   \
 		"	sc	%0, %2					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
+		"\t" __SC_BEQZ "%0, 1b					\n"   \
 		"	" #asm_op " %0, %1, %3				\n"   \
 		"	.set	pop					\n"   \
 		: "=&r" (result), "=&r" (temp),				      \
@@ -127,7 +117,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	      \
 		"1:	ll	%1, %2		# atomic_fetch_" #op "	\n"   \
 		"	" #asm_op " %0, %1, %3				\n"   \
 		"	sc	%0, %2					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
+		"\t" __SC_BEQZ "%0, 1b					\n"   \
 		"	.set	pop					\n"   \
 		"	move	%0, %1					\n"   \
 		: "=&r" (result), "=&r" (temp),				      \
@@ -205,7 +195,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 		"	.set	push					\n"
 		"	.set	"MIPS_ISA_LEVEL"			\n"
 		"	sc	%1, %2					\n"
-		"\t" __scbeqz "	%1, 1b					\n"
+		"\t" __SC_BEQZ "%1, 1b					\n"
 		"2:							\n"
 		"	.set	pop					\n"
 		: "=&r" (result), "=&r" (temp),
@@ -267,7 +257,7 @@ static __inline__ void atomic64_##op(s64 i, atomic64_t * v)		      \
 		"1:	lld	%0, %1		# atomic64_" #op "	\n"   \
 		"	" #asm_op " %0, %2				\n"   \
 		"	scd	%0, %1					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
+		"\t" __SC_BEQZ "%0, 1b					\n"   \
 		"	.set	pop					\n"   \
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
 		: "Ir" (i) : __LLSC_CLOBBER);				      \
@@ -295,7 +285,7 @@ static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)   \
 		"1:	lld	%1, %2		# atomic64_" #op "_return\n"  \
 		"	" #asm_op " %0, %1, %3				\n"   \
 		"	scd	%0, %2					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
+		"\t" __SC_BEQZ "%0, 1b					\n"   \
 		"	" #asm_op " %0, %1, %3				\n"   \
 		"	.set	pop					\n"   \
 		: "=&r" (result), "=&r" (temp),				      \
@@ -329,7 +319,7 @@ static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)    \
 		"1:	lld	%1, %2		# atomic64_fetch_" #op "\n"   \
 		"	" #asm_op " %0, %1, %3				\n"   \
 		"	scd	%0, %2					\n"   \
-		"\t" __scbeqz "	%0, 1b					\n"   \
+		"\t" __SC_BEQZ "%0, 1b					\n"   \
 		"	move	%0, %1					\n"   \
 		"	.set	pop					\n"   \
 		: "=&r" (result), "=&r" (temp),				      \
@@ -404,7 +394,7 @@ static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v)
 		"	move	%1, %0					\n"
 		"	bltz	%0, 1f					\n"
 		"	scd	%1, %2					\n"
-		"\t" __scbeqz "	%1, 1b					\n"
+		"\t" __SC_BEQZ "%1, 1b					\n"
 		"1:							\n"
 		"	.set	pop					\n"
 		: "=&r" (result), "=&r" (temp),
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 79bf34efbc04..5d3f0e3513b4 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -11,19 +11,9 @@
 #include <linux/bug.h>
 #include <linux/irqflags.h>
 #include <asm/compiler.h>
+#include <asm/llsc.h>
 #include <asm/war.h>
 
-/*
- * Using a branch-likely instruction to check the result of an sc instruction
- * works around a bug present in R10000 CPUs prior to revision 3.0 that could
- * cause ll-sc sequences to execute non-atomically.
- */
-#if R10000_LLSC_WAR
-# define __scbeqz "beqzl"
-#else
-# define __scbeqz "beqz"
-#endif
-
 /*
  * These functions doesn't exist, so if they are called you'll either:
  *
@@ -57,7 +47,7 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
 		"	move	$1, %z3				\n"	\
 		"	.set	" MIPS_ISA_ARCH_LEVEL "		\n"	\
 		"	" st "	$1, %1				\n"	\
-		"\t" __scbeqz "	$1, 1b				\n"	\
+		"\t" __SC_BEQZ	"$1, 1b				\n"	\
 		"	.set	pop				\n"	\
 		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
 		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)			\
@@ -130,7 +120,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
 		"	move	$1, %z4				\n"	\
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"	" st "	$1, %1				\n"	\
-		"\t" __scbeqz "	$1, 1b				\n"	\
+		"\t" __SC_BEQZ	"$1, 1b				\n"	\
 		"	.set	pop				\n"	\
 		"2:						\n"	\
 		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
@@ -268,7 +258,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 	/* Attempt to store new at ptr */
 	"	scd	%L1, %2				\n"
 	/* If we failed, loop! */
-	"\t" __scbeqz "	%L1, 1b				\n"
+	"\t" __SC_BEQZ "%L1, 1b				\n"
 	"	.set	pop				\n"
 	"2:						\n"
 	: "=&r"(ret),
@@ -311,6 +301,4 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 # endif /* !CONFIG_SMP */
 #endif /* !CONFIG_64BIT */
 
-#undef __scbeqz
-
 #endif /* __ASM_CMPXCHG_H */
diff --git a/arch/mips/include/asm/llsc.h b/arch/mips/include/asm/llsc.h
index c6d17d171147..9b19f38562ac 100644
--- a/arch/mips/include/asm/llsc.h
+++ b/arch/mips/include/asm/llsc.h
@@ -25,4 +25,15 @@
 #define __EXT		"dext	"
 #endif
 
+/*
+ * Using a branch-likely instruction to check the result of an sc instruction
+ * works around a bug present in R10000 CPUs prior to revision 3.0 that could
+ * cause ll-sc sequences to execute non-atomically.
+ */
+#if R10000_LLSC_WAR
+# define __SC_BEQZ "beqzl	"
+#else
+# define __SC_BEQZ "beqz	"
+#endif
+
 #endif /* __ASM_LLSC_H  */
-- 
cgit v1.2.3


From ef85d057a605c36063a15345be87a45e0affba88 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:06 +0000
Subject: MIPS: Use compact branch for LL/SC loops on MIPSr6+

When targeting MIPSr6 or higher make use of a compact branch in LL/SC
loops, preventing the insertion of a delay slot nop that only serves to
waste space.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/llsc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/include/asm/llsc.h b/arch/mips/include/asm/llsc.h
index 9b19f38562ac..d240a4a2d1c4 100644
--- a/arch/mips/include/asm/llsc.h
+++ b/arch/mips/include/asm/llsc.h
@@ -9,6 +9,8 @@
 #ifndef __ASM_LLSC_H
 #define __ASM_LLSC_H
 
+#include <asm/isa-rev.h>
+
 #if _MIPS_SZLONG == 32
 #define SZLONG_LOG 5
 #define SZLONG_MASK 31UL
@@ -32,6 +34,8 @@
  */
 #if R10000_LLSC_WAR
 # define __SC_BEQZ "beqzl	"
+#elif MIPS_ISA_REV >= 6
+# define __SC_BEQZ "beqzc	"
 #else
 # define __SC_BEQZ "beqz	"
 #endif
-- 
cgit v1.2.3


From bf92927251b3642c10f8562d4f884a785cdd1855 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:07 +0000
Subject: MIPS: barrier: Add __SYNC() infrastructure

Introduce an asm/sync.h header which provides infrastructure that can be
used to generate sync instructions of various types, and for various
reasons. For example if we need a sync instruction that provides a full
completion barrier but only on systems which have weak memory ordering,
we can generate the appropriate assembly code using:

  __SYNC(full, weak_ordering)

When the kernel is configured to run on systems with weak memory
ordering (ie. CONFIG_WEAK_ORDERING is selected) we'll emit a sync
instruction. When the kernel is configured to run on systems with strong
memory ordering (ie. CONFIG_WEAK_ORDERING is not selected) we'll emit
nothing. The caller doesn't need to know which happened - it simply says
what it needs & when, with no concern for checking the kernel
configuration.

There are some scenarios in which we may want to emit code only when we
*didn't* emit a sync instruction. For example, some Loongson3 CPUs
suffer from a bug that requires us to emit a sync instruction prior to
each ll instruction (enabled by CONFIG_CPU_LOONGSON3_WORKAROUNDS). In
cases where this bug workaround is enabled, it's wasteful to then have
more generic code emit another sync instruction to provide barriers we
need in general. A __SYNC_ELSE() macro allows for this, providing an
extra argument that contains code to be assembled only in cases where
the sync instruction was not emitted. For example if we have a scenario
in which we generally want to emit a release barrier but for affected
Loongson3 configurations upgrade that to a full completion barrier, we
can do that like so:

  __SYNC_ELSE(full, loongson3_war, __SYNC(rl, always))

The assembly generated by these macros can be used either as inline
assembly or in assembly source files.

Differing types of sync as provided by MIPSr6 are defined, but currently
they all generate a full completion barrier except in kernels configured
for Cavium Octeon systems. There the wmb sync-type is used, and rmb
syncs are omitted, as has been the case since commit 6b07d38aaa52
("MIPS: Octeon: Use optimized memory barrier primitives."). Using
__SYNC() with the wmb or rmb types will abstract away the Octeon
specific behavior and allow us to later clean up asm/barrier.h code that
currently includes a plethora of #ifdef's.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/barrier.h | 113 +---------------------
 arch/mips/include/asm/sync.h    | 207 ++++++++++++++++++++++++++++++++++++++++
 arch/mips/kernel/pm-cps.c       |  20 ++--
 3 files changed, 219 insertions(+), 121 deletions(-)
 create mode 100644 arch/mips/include/asm/sync.h

(limited to 'arch')

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 9228f7386220..5ad39bfd3b6d 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -9,116 +9,7 @@
 #define __ASM_BARRIER_H
 
 #include <asm/addrspace.h>
-
-/*
- * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
- * These values are used with the sync instruction to perform memory barriers.
- * Types of ordering guarantees available through the SYNC instruction:
- * - Completion Barriers
- * - Ordering Barriers
- * As compared to the completion barrier, the ordering barrier is a
- * lighter-weight operation as it does not require the specified instructions
- * before the SYNC to be already completed. Instead it only requires that those
- * specified instructions which are subsequent to the SYNC in the instruction
- * stream are never re-ordered for processing ahead of the specified
- * instructions which are before the SYNC in the instruction stream.
- * This potentially reduces how many cycles the barrier instruction must stall
- * before it completes.
- * Implementations that do not use any of the non-zero values of stype to define
- * different barriers, such as ordering barriers, must make those stype values
- * act the same as stype zero.
- */
-
-/*
- * Completion barriers:
- * - Every synchronizable specified memory instruction (loads or stores or both)
- *   that occurs in the instruction stream before the SYNC instruction must be
- *   already globally performed before any synchronizable specified memory
- *   instructions that occur after the SYNC are allowed to be performed, with
- *   respect to any other processor or coherent I/O module.
- *
- * - The barrier does not guarantee the order in which instruction fetches are
- *   performed.
- *
- * - A stype value of zero will always be defined such that it performs the most
- *   complete set of synchronization operations that are defined.This means
- *   stype zero always does a completion barrier that affects both loads and
- *   stores preceding the SYNC instruction and both loads and stores that are
- *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
- *   by the architecture or specific implementations to perform synchronization
- *   behaviors that are less complete than that of stype zero. If an
- *   implementation does not use one of these non-zero values to define a
- *   different synchronization behavior, then that non-zero value of stype must
- *   act the same as stype zero completion barrier. This allows software written
- *   for an implementation with a lighter-weight barrier to work on another
- *   implementation which only implements the stype zero completion barrier.
- *
- * - A completion barrier is required, potentially in conjunction with SSNOP (in
- *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
- *   to guarantee that memory reference results are visible across operating
- *   mode changes. For example, a completion barrier is required on some
- *   implementations on entry to and exit from Debug Mode to guarantee that
- *   memory effects are handled correctly.
- */
-
-/*
- * stype 0 - A completion barrier that affects preceding loads and stores and
- * subsequent loads and stores.
- * Older instructions which must reach the load/store ordering point before the
- * SYNC instruction completes: Loads, Stores
- * Younger instructions which must reach the load/store ordering point only
- * after the SYNC instruction completes: Loads, Stores
- * Older instructions which must be globally performed when the SYNC instruction
- * completes: Loads, Stores
- */
-#define STYPE_SYNC 0x0
-
-/*
- * Ordering barriers:
- * - Every synchronizable specified memory instruction (loads or stores or both)
- *   that occurs in the instruction stream before the SYNC instruction must
- *   reach a stage in the load/store datapath after which no instruction
- *   re-ordering is possible before any synchronizable specified memory
- *   instruction which occurs after the SYNC instruction in the instruction
- *   stream reaches the same stage in the load/store datapath.
- *
- * - If any memory instruction before the SYNC instruction in program order,
- *   generates a memory request to the external memory and any memory
- *   instruction after the SYNC instruction in program order also generates a
- *   memory request to external memory, the memory request belonging to the
- *   older instruction must be globally performed before the time the memory
- *   request belonging to the younger instruction is globally performed.
- *
- * - The barrier does not guarantee the order in which instruction fetches are
- *   performed.
- */
-
-/*
- * stype 0x10 - An ordering barrier that affects preceding loads and stores and
- * subsequent loads and stores.
- * Older instructions which must reach the load/store ordering point before the
- * SYNC instruction completes: Loads, Stores
- * Younger instructions which must reach the load/store ordering point only
- * after the SYNC instruction completes: Loads, Stores
- * Older instructions which must be globally performed when the SYNC instruction
- * completes: N/A
- */
-#define STYPE_SYNC_MB 0x10
-
-/*
- * stype 0x14 - A completion barrier specific to global invalidations
- *
- * When a sync instruction of this type completes any preceding GINVI or GINVT
- * operation has been globalized & completed on all coherent CPUs. Anything
- * that the GINV* instruction should invalidate will have been invalidated on
- * all coherent CPUs when this instruction completes. It is implementation
- * specific whether the GINV* instructions themselves will ensure completion,
- * or this sync type will.
- *
- * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
- * this sync type also requires that previous SYNCI operations have completed.
- */
-#define STYPE_GINV	0x14
+#include <asm/sync.h>
 
 #ifdef CONFIG_CPU_HAS_SYNC
 #define __sync()				\
@@ -286,7 +177,7 @@
 
 static inline void sync_ginv(void)
 {
-	asm volatile("sync\t%0" :: "i"(STYPE_GINV));
+	asm volatile("sync\t%0" :: "i"(__SYNC_ginv));
 }
 
 #include <asm-generic/barrier.h>
diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h
new file mode 100644
index 000000000000..7c6a1095f556
--- /dev/null
+++ b/arch/mips/include/asm/sync.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __MIPS_ASM_SYNC_H__
+#define __MIPS_ASM_SYNC_H__
+
+/*
+ * sync types are defined by the MIPS64 Instruction Set documentation in Volume
+ * II-A of the MIPS Architecture Reference Manual, which can be found here:
+ *
+ *   https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
+ *
+ * Two types of barrier are provided:
+ *
+ *   1) Completion barriers, which ensure that a memory operation has actually
+ *      completed & often involve stalling the CPU pipeline to do so.
+ *
+ *   2) Ordering barriers, which only ensure that affected memory operations
+ *      won't be reordered in the CPU pipeline in a manner that violates the
+ *      restrictions imposed by the barrier.
+ *
+ * Ordering barriers can be more efficient than completion barriers, since:
+ *
+ *   a) Ordering barriers only require memory access instructions which preceed
+ *      them in program order (older instructions) to reach a point in the
+ *      load/store datapath beyond which reordering is not possible before
+ *      allowing memory access instructions which follow them (younger
+ *      instructions) to be performed.  That is, older instructions don't
+ *      actually need to complete - they just need to get far enough that all
+ *      other coherent CPUs will observe their completion before they observe
+ *      the effects of younger instructions.
+ *
+ *   b) Multiple variants of ordering barrier are provided which allow the
+ *      effects to be restricted to different combinations of older or younger
+ *      loads or stores. By way of example, if we only care that stores older
+ *      than a barrier are observed prior to stores that are younger than a
+ *      barrier & don't care about the ordering of loads then the 'wmb'
+ *      ordering barrier can be used. Limiting the barrier's effects to stores
+ *      allows loads to continue unaffected & potentially allows the CPU to
+ *      make progress faster than if younger loads had to wait for older stores
+ *      to complete.
+ */
+
+/*
+ * No sync instruction at all; used to allow code to nullify the effect of the
+ * __SYNC() macro without needing lots of #ifdefery.
+ */
+#define __SYNC_none	-1
+
+/*
+ * A full completion barrier; all memory accesses appearing prior to this sync
+ * instruction in program order must complete before any memory accesses
+ * appearing after this sync instruction in program order.
+ */
+#define __SYNC_full	0x00
+
+/*
+ * For now we use a full completion barrier to implement all sync types, until
+ * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
+ * sufficient to uphold our desired memory model.
+ */
+#define __SYNC_aq	__SYNC_full
+#define __SYNC_rl	__SYNC_full
+#define __SYNC_mb	__SYNC_full
+
+/*
+ * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
+ * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
+ * speculative reads.
+ */
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+# define __SYNC_rmb	__SYNC_none
+# define __SYNC_wmb	0x04
+#else
+# define __SYNC_rmb	__SYNC_full
+# define __SYNC_wmb	__SYNC_full
+#endif
+
+/*
+ * A GINV sync is a little different; it doesn't relate directly to loads or
+ * stores, but instead causes synchronization of an icache or TLB global
+ * invalidation operation triggered by the ginvi or ginvt instructions
+ * respectively. In cases where we need to know that a ginvi or ginvt operation
+ * has been performed by all coherent CPUs, we must issue a sync instruction of
+ * this type. Once this instruction graduates all coherent CPUs will have
+ * observed the invalidation.
+ */
+#define __SYNC_ginv	0x14
+
+/* Trivial; indicate that we always need this sync instruction. */
+#define __SYNC_always	(1 << 0)
+
+/*
+ * Indicate that we need this sync instruction only on systems with weakly
+ * ordered memory access. In general this is most MIPS systems, but there are
+ * exceptions which provide strongly ordered memory.
+ */
+#ifdef CONFIG_WEAK_ORDERING
+# define __SYNC_weak_ordering	(1 << 1)
+#else
+# define __SYNC_weak_ordering	0
+#endif
+
+/*
+ * Indicate that we need this sync instruction only on systems where LL/SC
+ * don't implicitly provide a memory barrier. In general this is most MIPS
+ * systems.
+ */
+#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
+# define __SYNC_weak_llsc	(1 << 2)
+#else
+# define __SYNC_weak_llsc	0
+#endif
+
+/*
+ * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
+ * store or prefetch) in between an LL & SC can cause the SC instruction to
+ * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
+ * containing such sequences, this bug bites harder than we might otherwise
+ * expect due to reordering & speculation:
+ *
+ * 1) A memory access appearing prior to the LL in program order may actually
+ *    be executed after the LL - this is the reordering case.
+ *
+ *    In order to avoid this we need to place a memory barrier (ie. a SYNC
+ *    instruction) prior to every LL instruction, in between it and any earlier
+ *    memory access instructions.
+ *
+ *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
+ *
+ * 2) If a conditional branch exists between an LL & SC with a target outside
+ *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
+ *    or similar, then misprediction of the branch may allow speculative
+ *    execution of memory accesses from outside of the LL-SC loop.
+ *
+ *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
+ *    at each affected branch target.
+ *
+ *    This case affects all current Loongson 3 CPUs.
+ *
+ * The above described cases cause an error in the cache coherence protocol;
+ * such that the Invalidate of a competing LL-SC goes 'missing' and SC
+ * erroneously observes its core still has Exclusive state and lets the SC
+ * proceed.
+ *
+ * Therefore the error only occurs on SMP systems.
+ */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
+# define __SYNC_loongson3_war	(1 << 31)
+#else
+# define __SYNC_loongson3_war	0
+#endif
+
+/*
+ * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
+ * barrier to be ineffective, requiring the use of 2 in sequence to provide an
+ * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
+ * optimized memory barrier primitives."). Here we specify that the affected
+ * sync instructions should be emitted twice.
+ */
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+# define __SYNC_rpt(type)	(1 + (type == __SYNC_wmb))
+#else
+# define __SYNC_rpt(type)	1
+#endif
+
+/*
+ * The main event. Here we actually emit a sync instruction of a given type, if
+ * reason is non-zero.
+ *
+ * In future we have the option of emitting entries in a fixups-style table
+ * here that would allow us to opportunistically remove some sync instructions
+ * when we detect at runtime that we're running on a CPU that doesn't need
+ * them.
+ */
+#ifdef CONFIG_CPU_HAS_SYNC
+# define ____SYNC(_type, _reason, _else)			\
+	.if	(( _type ) != -1) && ( _reason );		\
+	.set	push;						\
+	.set	MIPS_ISA_LEVEL_RAW;				\
+	.rept	__SYNC_rpt(_type);				\
+	sync	_type;						\
+	.endr;							\
+	.set	pop;						\
+	.else;							\
+	_else;							\
+	.endif
+#else
+# define ____SYNC(_type, _reason, _else)
+#endif
+
+/*
+ * Preprocessor magic to expand macros used as arguments before we insert them
+ * into assembly code.
+ */
+#ifdef __ASSEMBLY__
+# define ___SYNC(type, reason, else)				\
+	____SYNC(type, reason, else)
+#else
+# define ___SYNC(type, reason, else)				\
+	__stringify(____SYNC(type, reason, else))
+#endif
+
+#define __SYNC(type, reason)					\
+	___SYNC(__SYNC_##type, __SYNC_##reason, )
+#define __SYNC_ELSE(type, reason, else)				\
+	___SYNC(__SYNC_##type, __SYNC_##reason, else)
+
+#endif /* __MIPS_ASM_SYNC_H__ */
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index a26f40db15d0..9bf60d7d44d3 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -307,7 +307,7 @@ static int cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
 	}
 
 	/* Barrier ensuring previous cache invalidates are complete */
-	uasm_i_sync(pp, STYPE_SYNC);
+	uasm_i_sync(pp, __SYNC_full);
 	uasm_i_ehb(pp);
 
 	/* Check whether the pipeline stalled due to the FSB being full */
@@ -397,7 +397,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 
 	if (coupled_coherence) {
 		/* Increment ready_count */
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 		uasm_build_label(&l, p, lbl_incready);
 		uasm_i_ll(&p, t1, 0, r_nc_count);
 		uasm_i_addiu(&p, t2, t1, 1);
@@ -406,7 +406,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		uasm_i_addiu(&p, t1, t1, 1);
 
 		/* Barrier ensuring all CPUs see the updated r_nc_count value */
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 
 		/*
 		 * If this is the last VPE to become ready for non-coherence
@@ -473,7 +473,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 			      Index_Writeback_Inv_D, lbl_flushdcache);
 
 	/* Barrier ensuring previous cache invalidates are complete */
-	uasm_i_sync(&p, STYPE_SYNC);
+	uasm_i_sync(&p, __SYNC_full);
 	uasm_i_ehb(&p);
 
 	if (mips_cm_revision() < CM_REV_CM3) {
@@ -487,7 +487,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		uasm_i_lw(&p, t0, 0, r_pcohctl);
 
 		/* Barrier to ensure write to coherence control is complete */
-		uasm_i_sync(&p, STYPE_SYNC);
+		uasm_i_sync(&p, __SYNC_full);
 		uasm_i_ehb(&p);
 	}
 
@@ -534,7 +534,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		}
 
 		/* Barrier to ensure write to CPC command is complete */
-		uasm_i_sync(&p, STYPE_SYNC);
+		uasm_i_sync(&p, __SYNC_full);
 		uasm_i_ehb(&p);
 	}
 
@@ -572,13 +572,13 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 	uasm_i_lw(&p, t0, 0, r_pcohctl);
 
 	/* Barrier to ensure write to coherence control is complete */
-	uasm_i_sync(&p, STYPE_SYNC);
+	uasm_i_sync(&p, __SYNC_full);
 	uasm_i_ehb(&p);
 
 	if (coupled_coherence && (state == CPS_PM_NC_WAIT)) {
 		/* Decrement ready_count */
 		uasm_build_label(&l, p, lbl_decready);
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 		uasm_i_ll(&p, t1, 0, r_nc_count);
 		uasm_i_addiu(&p, t2, t1, -1);
 		uasm_i_sc(&p, t2, 0, r_nc_count);
@@ -586,7 +586,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		uasm_i_andi(&p, v0, t1, (1 << fls(smp_num_siblings)) - 1);
 
 		/* Barrier ensuring all CPUs see the updated r_nc_count value */
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 	}
 
 	if (coupled_coherence && (state == CPS_PM_CLOCK_GATED)) {
@@ -608,7 +608,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 		uasm_build_label(&l, p, lbl_secondary_cont);
 
 		/* Barrier ensuring all CPUs see the updated r_nc_count value */
-		uasm_i_sync(&p, STYPE_SYNC_MB);
+		uasm_i_sync(&p, __SYNC_mb);
 	}
 
 	/* The core is coherent, time to return to C code */
-- 
cgit v1.2.3


From 21e3134b3ec09e722cbcda69788f206adc8db1f4 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:08 +0000
Subject: MIPS: barrier: Clean up rmb() & wmb() definitions

Simplify our definitions of rmb() & wmb() using the new __SYNC()
infrastructure.

The fast_rmb() & fast_wmb() macros are removed, since they only provided
a level of indirection that made the code less readable & weren't
directly used anywhere in the kernel tree.

The Octeon #ifdef'ery is removed, since the "syncw" instruction
previously used is merely an alias for "sync 4" which __SYNC() will emit
for the wmb sync type when the kernel is configured for an Octeon CPU.
Similarly __SYNC() will emit nothing for the rmb sync type in Octeon
configurations.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/barrier.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 5ad39bfd3b6d..f36cab87cfde 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -26,6 +26,18 @@
 #define __sync()	do { } while(0)
 #endif
 
+static inline void rmb(void)
+{
+	asm volatile(__SYNC(rmb, always) ::: "memory");
+}
+#define rmb rmb
+
+static inline void wmb(void)
+{
+	asm volatile(__SYNC(wmb, always) ::: "memory");
+}
+#define wmb wmb
+
 #define __fast_iob()				\
 	__asm__ __volatile__(			\
 		".set	push\n\t"		\
@@ -37,16 +49,9 @@
 		: "m" (*(int *)CKSEG1)		\
 		: "memory")
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
-# define OCTEON_SYNCW_STR	".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
-# define __syncw()	__asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
-
-# define fast_wmb()	__syncw()
-# define fast_rmb()	barrier()
 # define fast_mb()	__sync()
 # define fast_iob()	do { } while (0)
 #else /* ! CONFIG_CPU_CAVIUM_OCTEON */
-# define fast_wmb()	__sync()
-# define fast_rmb()	__sync()
 # define fast_mb()	__sync()
 # ifdef CONFIG_SGI_IP28
 #  define fast_iob()				\
@@ -83,19 +88,14 @@
 
 #endif /* !CONFIG_CPU_HAS_WB */
 
-#define wmb()		fast_wmb()
-#define rmb()		fast_rmb()
-
 #if defined(CONFIG_WEAK_ORDERING)
 # ifdef CONFIG_CPU_CAVIUM_OCTEON
 #  define __smp_mb()	__sync()
-#  define __smp_rmb()	barrier()
-#  define __smp_wmb()	__syncw()
 # else
 #  define __smp_mb()	__asm__ __volatile__("sync" : : :"memory")
-#  define __smp_rmb()	__asm__ __volatile__("sync" : : :"memory")
-#  define __smp_wmb()	__asm__ __volatile__("sync" : : :"memory")
 # endif
+# define __smp_rmb()	rmb()
+# define __smp_wmb()	wmb()
 #else
 #define __smp_mb()	barrier()
 #define __smp_rmb()	barrier()
-- 
cgit v1.2.3


From 05e6da742b5b708057e84487576655e4d7238dd1 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:10 +0000
Subject: MIPS: barrier: Clean up __smp_mb() definition

We #ifdef on Cavium Octeon CPUs, but emit the same sync instruction in
both cases. Remove the #ifdef & simply expand to the __sync() macro.

Whilst here indent the strong ordering case definitions to match the
indentation of the weak ordering ones, helping readability.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/barrier.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index f36cab87cfde..8a5abc1c85a6 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -89,17 +89,13 @@ static inline void wmb(void)
 #endif /* !CONFIG_CPU_HAS_WB */
 
 #if defined(CONFIG_WEAK_ORDERING)
-# ifdef CONFIG_CPU_CAVIUM_OCTEON
-#  define __smp_mb()	__sync()
-# else
-#  define __smp_mb()	__asm__ __volatile__("sync" : : :"memory")
-# endif
+# define __smp_mb()	__sync()
 # define __smp_rmb()	rmb()
 # define __smp_wmb()	wmb()
 #else
-#define __smp_mb()	barrier()
-#define __smp_rmb()	barrier()
-#define __smp_wmb()	barrier()
+# define __smp_mb()	barrier()
+# define __smp_rmb()	barrier()
+# define __smp_wmb()	barrier()
 #endif
 
 /*
-- 
cgit v1.2.3


From 5c12a6eff6ae3ed32f1c4d6458e58e6c4e9b2352 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:11 +0000
Subject: MIPS: barrier: Remove fast_mb() Octeon #ifdef'ery

The definition of fast_mb() is the same in both the Octeon & non-Octeon
cases, so remove the duplication & define it only once.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/barrier.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 8a5abc1c85a6..657ec01120a4 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -38,6 +38,8 @@ static inline void wmb(void)
 }
 #define wmb wmb
 
+#define fast_mb()	__sync()
+
 #define __fast_iob()				\
 	__asm__ __volatile__(			\
 		".set	push\n\t"		\
@@ -49,10 +51,8 @@ static inline void wmb(void)
 		: "m" (*(int *)CKSEG1)		\
 		: "memory")
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
-# define fast_mb()	__sync()
 # define fast_iob()	do { } while (0)
 #else /* ! CONFIG_CPU_CAVIUM_OCTEON */
-# define fast_mb()	__sync()
 # ifdef CONFIG_SGI_IP28
 #  define fast_iob()				\
 	__asm__ __volatile__(			\
-- 
cgit v1.2.3


From fe0065e56227a2f6a6ad717c6d8d871263e482a8 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:13 +0000
Subject: MIPS: barrier: Clean up __sync() definition

Implement __sync() using the new __SYNC() infrastructure, which will
take care of not emitting an instruction for old R3k CPUs that don't
support it. The only behavioral difference is that __sync() will now
provide a compiler barrier on these old CPUs, but that seems like
reasonable behavior anyway.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/barrier.h | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 657ec01120a4..a117c6d95038 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -11,20 +11,10 @@
 #include <asm/addrspace.h>
 #include <asm/sync.h>
 
-#ifdef CONFIG_CPU_HAS_SYNC
-#define __sync()				\
-	__asm__ __volatile__(			\
-		".set	push\n\t"		\
-		".set	noreorder\n\t"		\
-		".set	mips2\n\t"		\
-		"sync\n\t"			\
-		".set	pop"			\
-		: /* no output */		\
-		: /* no input */		\
-		: "memory")
-#else
-#define __sync()	do { } while(0)
-#endif
+static inline void __sync(void)
+{
+	asm volatile(__SYNC(full, always) ::: "memory");
+}
 
 static inline void rmb(void)
 {
-- 
cgit v1.2.3


From 185d7d7a58194e3784e8dc2898756065f974090a Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:14 +0000
Subject: MIPS: barrier: Clean up sync_ginv()

Use the new __SYNC() infrastructure to implement sync_ginv(), for
consistency with much of the rest of the asm/barrier.h.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/barrier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index a117c6d95038..c7e05e832da9 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -163,7 +163,7 @@ static inline void wmb(void)
 
 static inline void sync_ginv(void)
 {
-	asm volatile("sync\t%0" :: "i"(__SYNC_ginv));
+	asm volatile(__SYNC(ginv, always));
 }
 
 #include <asm-generic/barrier.h>
-- 
cgit v1.2.3


From 36d3295c5a0d9169bae1d40f8db92459977c2936 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:15 +0000
Subject: MIPS: atomic: Fix whitespace in ATOMIC_OP macros

We define macros in asm/atomic.h which end each line with space
characters before a backslash to continue on the next line. Remove the
space characters leaving tabs as the whitespace used for conformity with
coding convention.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/atomic.h | 184 ++++++++++++++++++++---------------------
 1 file changed, 92 insertions(+), 92 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 7578c807ef98..2d2a8a74c51b 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -42,102 +42,102 @@
  */
 #define atomic_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
-#define ATOMIC_OP(op, c_op, asm_op)					      \
-static __inline__ void atomic_##op(int i, atomic_t * v)			      \
-{									      \
-	if (kernel_uses_llsc) {						      \
-		int temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	ll	%0, %1		# atomic_" #op "	\n"   \
-		"	" #asm_op " %0, %2				\n"   \
-		"	sc	%0, %1					\n"   \
-		"\t" __SC_BEQZ "%0, 1b					\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		v->counter c_op i;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static __inline__ void atomic_##op(int i, atomic_t * v)			\
+{									\
+	if (kernel_uses_llsc) {						\
+		int temp;						\
+									\
+		loongson_llsc_mb();					\
+		__asm__ __volatile__(					\
+		"	.set	push				\n"	\
+		"	.set	"MIPS_ISA_LEVEL"		\n"	\
+		"1:	ll	%0, %1	# atomic_" #op "	\n"	\
+		"	" #asm_op " %0, %2			\n"	\
+		"	sc	%0, %1				\n"	\
+		"\t" __SC_BEQZ "%0, 1b				\n"	\
+		"	.set	pop				\n"	\
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	\
+		: "Ir" (i) : __LLSC_CLOBBER);				\
+	} else {							\
+		unsigned long flags;					\
+									\
+		raw_local_irq_save(flags);				\
+		v->counter c_op i;					\
+		raw_local_irq_restore(flags);				\
+	}								\
 }
 
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	      \
-{									      \
-	int result;							      \
-									      \
-	if (kernel_uses_llsc) {						      \
-		int temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	ll	%1, %2		# atomic_" #op "_return	\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	sc	%0, %2					\n"   \
-		"\t" __SC_BEQZ "%0, 1b					\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		result c_op i;						      \
-		v->counter = result;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-									      \
-	return result;							      \
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	\
+{									\
+	int result;							\
+									\
+	if (kernel_uses_llsc) {						\
+		int temp;						\
+									\
+		loongson_llsc_mb();					\
+		__asm__ __volatile__(					\
+		"	.set	push				\n"	\
+		"	.set	"MIPS_ISA_LEVEL"		\n"	\
+		"1:	ll	%1, %2	# atomic_" #op "_return	\n"	\
+		"	" #asm_op " %0, %1, %3			\n"	\
+		"	sc	%0, %2				\n"	\
+		"\t" __SC_BEQZ "%0, 1b				\n"	\
+		"	" #asm_op " %0, %1, %3			\n"	\
+		"	.set	pop				\n"	\
+		: "=&r" (result), "=&r" (temp),				\
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)			\
+		: "Ir" (i) : __LLSC_CLOBBER);				\
+	} else {							\
+		unsigned long flags;					\
+									\
+		raw_local_irq_save(flags);				\
+		result = v->counter;					\
+		result c_op i;						\
+		v->counter = result;					\
+		raw_local_irq_restore(flags);				\
+	}								\
+									\
+	return result;							\
 }
 
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				      \
-static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	      \
-{									      \
-	int result;							      \
-									      \
-	if (kernel_uses_llsc) {						      \
-		int temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	ll	%1, %2		# atomic_fetch_" #op "	\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	sc	%0, %2					\n"   \
-		"\t" __SC_BEQZ "%0, 1b					\n"   \
-		"	.set	pop					\n"   \
-		"	move	%0, %1					\n"   \
-		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		v->counter c_op i;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-									      \
-	return result;							      \
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	\
+{									\
+	int result;							\
+									\
+	if (kernel_uses_llsc) {						\
+		int temp;						\
+									\
+		loongson_llsc_mb();					\
+		__asm__ __volatile__(					\
+		"	.set	push				\n"	\
+		"	.set	"MIPS_ISA_LEVEL"		\n"	\
+		"1:	ll	%1, %2	# atomic_fetch_" #op "	\n"	\
+		"	" #asm_op " %0, %1, %3			\n"	\
+		"	sc	%0, %2				\n"	\
+		"\t" __SC_BEQZ "%0, 1b				\n"	\
+		"	.set	pop				\n"	\
+		"	move	%0, %1				\n"	\
+		: "=&r" (result), "=&r" (temp),				\
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)			\
+		: "Ir" (i) : __LLSC_CLOBBER);				\
+	} else {							\
+		unsigned long flags;					\
+									\
+		raw_local_irq_save(flags);				\
+		result = v->counter;					\
+		v->counter c_op i;					\
+		raw_local_irq_restore(flags);				\
+	}								\
+									\
+	return result;							\
 }
 
-#define ATOMIC_OPS(op, c_op, asm_op)					      \
-	ATOMIC_OP(op, c_op, asm_op)					      \
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				      \
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
 	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, addu)
@@ -149,8 +149,8 @@ ATOMIC_OPS(sub, -=, subu)
 #define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
 
 #undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					      \
-	ATOMIC_OP(op, c_op, asm_op)					      \
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
 	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(and, &=, and)
-- 
cgit v1.2.3


From 9537db24c65aeb71718916272687b0d00d3e0821 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:16 +0000
Subject: MIPS: atomic: Handle !kernel_uses_llsc first

Handle the !kernel_uses_llsc path first in our ATOMIC_OP(),
ATOMIC_OP_RETURN() & ATOMIC_FETCH_OP() macros & return from within the
block. This allows us to de-indent the kernel_uses_llsc path by one
level which will be useful when making further changes.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/atomic.h | 99 +++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 50 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 2d2a8a74c51b..ace2ea005588 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -45,51 +45,36 @@
 #define ATOMIC_OP(op, c_op, asm_op)					\
 static __inline__ void atomic_##op(int i, atomic_t * v)			\
 {									\
-	if (kernel_uses_llsc) {						\
-		int temp;						\
+	int temp;							\
 									\
-		loongson_llsc_mb();					\
-		__asm__ __volatile__(					\
-		"	.set	push				\n"	\
-		"	.set	"MIPS_ISA_LEVEL"		\n"	\
-		"1:	ll	%0, %1	# atomic_" #op "	\n"	\
-		"	" #asm_op " %0, %2			\n"	\
-		"	sc	%0, %1				\n"	\
-		"\t" __SC_BEQZ "%0, 1b				\n"	\
-		"	.set	pop				\n"	\
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	\
-		: "Ir" (i) : __LLSC_CLOBBER);				\
-	} else {							\
+	if (!kernel_uses_llsc) {					\
 		unsigned long flags;					\
 									\
 		raw_local_irq_save(flags);				\
 		v->counter c_op i;					\
 		raw_local_irq_restore(flags);				\
+		return;							\
 	}								\
+									\
+	loongson_llsc_mb();						\
+	__asm__ __volatile__(						\
+	"	.set	push					\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"1:	ll	%0, %1		# atomic_" #op "	\n"	\
+	"	" #asm_op " %0, %2				\n"	\
+	"	sc	%0, %1					\n"	\
+	"\t" __SC_BEQZ "%0, 1b					\n"	\
+	"	.set	pop					\n"	\
+	: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)		\
+	: "Ir" (i) : __LLSC_CLOBBER);					\
 }
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
 static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	\
 {									\
-	int result;							\
-									\
-	if (kernel_uses_llsc) {						\
-		int temp;						\
+	int temp, result;						\
 									\
-		loongson_llsc_mb();					\
-		__asm__ __volatile__(					\
-		"	.set	push				\n"	\
-		"	.set	"MIPS_ISA_LEVEL"		\n"	\
-		"1:	ll	%1, %2	# atomic_" #op "_return	\n"	\
-		"	" #asm_op " %0, %1, %3			\n"	\
-		"	sc	%0, %2				\n"	\
-		"\t" __SC_BEQZ "%0, 1b				\n"	\
-		"	" #asm_op " %0, %1, %3			\n"	\
-		"	.set	pop				\n"	\
-		: "=&r" (result), "=&r" (temp),				\
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			\
-		: "Ir" (i) : __LLSC_CLOBBER);				\
-	} else {							\
+	if (!kernel_uses_llsc) {					\
 		unsigned long flags;					\
 									\
 		raw_local_irq_save(flags);				\
@@ -97,41 +82,55 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	\
 		result c_op i;						\
 		v->counter = result;					\
 		raw_local_irq_restore(flags);				\
+		return result;						\
 	}								\
 									\
+	loongson_llsc_mb();						\
+	__asm__ __volatile__(						\
+	"	.set	push					\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"1:	ll	%1, %2		# atomic_" #op "_return	\n"	\
+	"	" #asm_op " %0, %1, %3				\n"	\
+	"	sc	%0, %2					\n"	\
+	"\t" __SC_BEQZ "%0, 1b					\n"	\
+	"	" #asm_op " %0, %1, %3				\n"	\
+	"	.set	pop					\n"	\
+	: "=&r" (result), "=&r" (temp),					\
+	  "+" GCC_OFF_SMALL_ASM() (v->counter)				\
+	: "Ir" (i) : __LLSC_CLOBBER);					\
+									\
 	return result;							\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
 static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	\
 {									\
-	int result;							\
+	int temp, result;						\
 									\
-	if (kernel_uses_llsc) {						\
-		int temp;						\
-									\
-		loongson_llsc_mb();					\
-		__asm__ __volatile__(					\
-		"	.set	push				\n"	\
-		"	.set	"MIPS_ISA_LEVEL"		\n"	\
-		"1:	ll	%1, %2	# atomic_fetch_" #op "	\n"	\
-		"	" #asm_op " %0, %1, %3			\n"	\
-		"	sc	%0, %2				\n"	\
-		"\t" __SC_BEQZ "%0, 1b				\n"	\
-		"	.set	pop				\n"	\
-		"	move	%0, %1				\n"	\
-		: "=&r" (result), "=&r" (temp),				\
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			\
-		: "Ir" (i) : __LLSC_CLOBBER);				\
-	} else {							\
+	if (!kernel_uses_llsc) {					\
 		unsigned long flags;					\
 									\
 		raw_local_irq_save(flags);				\
 		result = v->counter;					\
 		v->counter c_op i;					\
 		raw_local_irq_restore(flags);				\
+		return result;						\
 	}								\
 									\
+	loongson_llsc_mb();						\
+	__asm__ __volatile__(						\
+	"	.set	push					\n"	\
+	"	.set	"MIPS_ISA_LEVEL"			\n"	\
+	"1:	ll	%1, %2		# atomic_fetch_" #op "	\n"	\
+	"	" #asm_op " %0, %1, %3				\n"	\
+	"	sc	%0, %2					\n"	\
+	"\t" __SC_BEQZ "%0, 1b					\n"	\
+	"	.set	pop					\n"	\
+	"	move	%0, %1					\n"	\
+	: "=&r" (result), "=&r" (temp),					\
+	  "+" GCC_OFF_SMALL_ASM() (v->counter)				\
+	: "Ir" (i) : __LLSC_CLOBBER);					\
+									\
 	return result;							\
 }
 
-- 
cgit v1.2.3


From a38ee6bb14a41b6849576bcf6cbd33cbbe5c3a7d Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:18 +0000
Subject: MIPS: atomic: Use one macro to generate 32b & 64b functions

Cut down on duplication by generalizing the ATOMIC_OP(),
ATOMIC_OP_RETURN() & ATOMIC_FETCH_OP() macros to work for both 32b &
64b atomics, and removing the ATOMIC64_ variants. This ensures
consistency between our atomic_* & atomic64_* functions.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/atomic.h | 196 ++++++++++-------------------------------
 1 file changed, 45 insertions(+), 151 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index ace2ea005588..b834af5a7382 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -42,10 +42,10 @@
  */
 #define atomic_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static __inline__ void atomic_##op(int i, atomic_t * v)			\
+#define ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc)			\
+static __inline__ void pfx##_##op(type i, pfx##_t * v)			\
 {									\
-	int temp;							\
+	type temp;							\
 									\
 	if (!kernel_uses_llsc) {					\
 		unsigned long flags;					\
@@ -60,19 +60,19 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			\
 	__asm__ __volatile__(						\
 	"	.set	push					\n"	\
 	"	.set	" MIPS_ISA_LEVEL "			\n"	\
-	"1:	ll	%0, %1		# atomic_" #op "	\n"	\
+	"1:	" #ll "	%0, %1		# " #pfx "_" #op "	\n"	\
 	"	" #asm_op " %0, %2				\n"	\
-	"	sc	%0, %1					\n"	\
+	"	" #sc "	%0, %1					\n"	\
 	"\t" __SC_BEQZ "%0, 1b					\n"	\
 	"	.set	pop					\n"	\
 	: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)		\
 	: "Ir" (i) : __LLSC_CLOBBER);					\
 }
 
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	\
+#define ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc)		\
+static __inline__ type pfx##_##op##_return_relaxed(type i, pfx##_t * v)	\
 {									\
-	int temp, result;						\
+	type temp, result;						\
 									\
 	if (!kernel_uses_llsc) {					\
 		unsigned long flags;					\
@@ -89,9 +89,9 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	\
 	__asm__ __volatile__(						\
 	"	.set	push					\n"	\
 	"	.set	" MIPS_ISA_LEVEL "			\n"	\
-	"1:	ll	%1, %2		# atomic_" #op "_return	\n"	\
+	"1:	" #ll "	%1, %2		# " #pfx "_" #op "_return\n"	\
 	"	" #asm_op " %0, %1, %3				\n"	\
-	"	sc	%0, %2					\n"	\
+	"	" #sc "	%0, %2					\n"	\
 	"\t" __SC_BEQZ "%0, 1b					\n"	\
 	"	" #asm_op " %0, %1, %3				\n"	\
 	"	.set	pop					\n"	\
@@ -102,8 +102,8 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	\
 	return result;							\
 }
 
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	\
+#define ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc)		\
+static __inline__ type pfx##_fetch_##op##_relaxed(type i, pfx##_t * v)	\
 {									\
 	int temp, result;						\
 									\
@@ -120,10 +120,10 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	\
 	loongson_llsc_mb();						\
 	__asm__ __volatile__(						\
 	"	.set	push					\n"	\
-	"	.set	"MIPS_ISA_LEVEL"			\n"	\
-	"1:	ll	%1, %2		# atomic_fetch_" #op "	\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"1:	" #ll "	%1, %2		# " #pfx "_fetch_" #op "\n"	\
 	"	" #asm_op " %0, %1, %3				\n"	\
-	"	sc	%0, %2					\n"	\
+	"	" #sc "	%0, %2					\n"	\
 	"\t" __SC_BEQZ "%0, 1b					\n"	\
 	"	.set	pop					\n"	\
 	"	move	%0, %1					\n"	\
@@ -134,32 +134,50 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	\
 	return result;							\
 }
 
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
+#define ATOMIC_OPS(pfx, op, type, c_op, asm_op, ll, sc)			\
+	ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc)			\
+	ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc)		\
+	ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc)
 
-ATOMIC_OPS(add, +=, addu)
-ATOMIC_OPS(sub, -=, subu)
+ATOMIC_OPS(atomic, add, int, +=, addu, ll, sc)
+ATOMIC_OPS(atomic, sub, int, -=, subu, ll, sc)
 
 #define atomic_add_return_relaxed	atomic_add_return_relaxed
 #define atomic_sub_return_relaxed	atomic_sub_return_relaxed
 #define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
 #define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
 
+#ifdef CONFIG_64BIT
+ATOMIC_OPS(atomic64, add, s64, +=, daddu, lld, scd)
+ATOMIC_OPS(atomic64, sub, s64, -=, dsubu, lld, scd)
+# define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+# define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+# define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+# define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+#endif /* CONFIG_64BIT */
+
 #undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
+#define ATOMIC_OPS(pfx, op, type, c_op, asm_op, ll, sc)			\
+	ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc)			\
+	ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc)
 
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
+ATOMIC_OPS(atomic, and, int, &=, and, ll, sc)
+ATOMIC_OPS(atomic, or, int, |=, or, ll, sc)
+ATOMIC_OPS(atomic, xor, int, ^=, xor, ll, sc)
 
 #define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
 #define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
 #define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
 
+#ifdef CONFIG_64BIT
+ATOMIC_OPS(atomic64, and, s64, &=, and, lld, scd)
+ATOMIC_OPS(atomic64, or, s64, |=, or, lld, scd)
+ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd)
+# define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+# define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+# define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+#endif
+
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
@@ -243,130 +261,6 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
  */
 #define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
-#define ATOMIC64_OP(op, c_op, asm_op)					      \
-static __inline__ void atomic64_##op(s64 i, atomic64_t * v)		      \
-{									      \
-	if (kernel_uses_llsc) {						      \
-		s64 temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	lld	%0, %1		# atomic64_" #op "	\n"   \
-		"	" #asm_op " %0, %2				\n"   \
-		"	scd	%0, %1					\n"   \
-		"\t" __SC_BEQZ "%0, 1b					\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		v->counter c_op i;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-}
-
-#define ATOMIC64_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)   \
-{									      \
-	s64 result;							      \
-									      \
-	if (kernel_uses_llsc) {						      \
-		s64 temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	lld	%1, %2		# atomic64_" #op "_return\n"  \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	scd	%0, %2					\n"   \
-		"\t" __SC_BEQZ "%0, 1b					\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		result c_op i;						      \
-		v->counter = result;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-									      \
-	return result;							      \
-}
-
-#define ATOMIC64_FETCH_OP(op, c_op, asm_op)				      \
-static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)    \
-{									      \
-	s64 result;							      \
-									      \
-	if (kernel_uses_llsc) {						      \
-		s64 temp;						      \
-									      \
-		loongson_llsc_mb();					      \
-		__asm__ __volatile__(					      \
-		"	.set	push					\n"   \
-		"	.set	"MIPS_ISA_LEVEL"			\n"   \
-		"1:	lld	%1, %2		# atomic64_fetch_" #op "\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
-		"	scd	%0, %2					\n"   \
-		"\t" __SC_BEQZ "%0, 1b					\n"   \
-		"	move	%0, %1					\n"   \
-		"	.set	pop					\n"   \
-		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
-		: "Ir" (i) : __LLSC_CLOBBER);				      \
-	} else {							      \
-		unsigned long flags;					      \
-									      \
-		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		v->counter c_op i;					      \
-		raw_local_irq_restore(flags);				      \
-	}								      \
-									      \
-	return result;							      \
-}
-
-#define ATOMIC64_OPS(op, c_op, asm_op)					      \
-	ATOMIC64_OP(op, c_op, asm_op)					      \
-	ATOMIC64_OP_RETURN(op, c_op, asm_op)				      \
-	ATOMIC64_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC64_OPS(add, +=, daddu)
-ATOMIC64_OPS(sub, -=, dsubu)
-
-#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
-#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
-
-#undef ATOMIC64_OPS
-#define ATOMIC64_OPS(op, c_op, asm_op)					      \
-	ATOMIC64_OP(op, c_op, asm_op)					      \
-	ATOMIC64_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC64_OPS(and, &=, and)
-ATOMIC64_OPS(or, |=, or)
-ATOMIC64_OPS(xor, ^=, xor)
-
-#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
-
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
-
 /*
  * atomic64_sub_if_positive - conditionally subtract integer from atomic
  *                            variable
-- 
cgit v1.2.3


From 4d1dbfe6cbec34c6398a480c0572bba794e89e11 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:20 +0000
Subject: MIPS: atomic: Emit Loongson3 sync workarounds within asm

Generate the sync instructions required to workaround Loongson3 LL/SC
errata within inline asm blocks, which feels a little safer than doing
it from C where strictly speaking the compiler would be well within its
rights to insert a memory access between the separate asm statements we
previously had, containing sync & ll instructions respectively.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/atomic.h | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index b834af5a7382..841ff274ada6 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -21,6 +21,7 @@
 #include <asm/cpu-features.h>
 #include <asm/cmpxchg.h>
 #include <asm/llsc.h>
+#include <asm/sync.h>
 #include <asm/war.h>
 
 #define ATOMIC_INIT(i)	  { (i) }
@@ -56,10 +57,10 @@ static __inline__ void pfx##_##op(type i, pfx##_t * v)			\
 		return;							\
 	}								\
 									\
-	loongson_llsc_mb();						\
 	__asm__ __volatile__(						\
 	"	.set	push					\n"	\
 	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" __SYNC(full, loongson3_war) "			\n"	\
 	"1:	" #ll "	%0, %1		# " #pfx "_" #op "	\n"	\
 	"	" #asm_op " %0, %2				\n"	\
 	"	" #sc "	%0, %1					\n"	\
@@ -85,10 +86,10 @@ static __inline__ type pfx##_##op##_return_relaxed(type i, pfx##_t * v)	\
 		return result;						\
 	}								\
 									\
-	loongson_llsc_mb();						\
 	__asm__ __volatile__(						\
 	"	.set	push					\n"	\
 	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" __SYNC(full, loongson3_war) "			\n"	\
 	"1:	" #ll "	%1, %2		# " #pfx "_" #op "_return\n"	\
 	"	" #asm_op " %0, %1, %3				\n"	\
 	"	" #sc "	%0, %2					\n"	\
@@ -117,10 +118,10 @@ static __inline__ type pfx##_fetch_##op##_relaxed(type i, pfx##_t * v)	\
 		return result;						\
 	}								\
 									\
-	loongson_llsc_mb();						\
 	__asm__ __volatile__(						\
 	"	.set	push					\n"	\
 	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" __SYNC(full, loongson3_war) "			\n"	\
 	"1:	" #ll "	%1, %2		# " #pfx "_fetch_" #op "\n"	\
 	"	" #asm_op " %0, %1, %3				\n"	\
 	"	" #sc "	%0, %2					\n"	\
@@ -200,10 +201,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 	if (kernel_uses_llsc) {
 		int temp;
 
-		loongson_llsc_mb();
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	"MIPS_ISA_LEVEL"			\n"
+		"	" __SYNC(full, loongson3_war) "			\n"
 		"1:	ll	%1, %2		# atomic_sub_if_positive\n"
 		"	.set	pop					\n"
 		"	subu	%0, %1, %3				\n"
@@ -213,7 +214,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 		"	.set	"MIPS_ISA_LEVEL"			\n"
 		"	sc	%1, %2					\n"
 		"\t" __SC_BEQZ "%1, 1b					\n"
-		"2:							\n"
+		"2:	" __SYNC(full, loongson3_war) "			\n"
 		"	.set	pop					\n"
 		: "=&r" (result), "=&r" (temp),
 		  "+" GCC_OFF_SMALL_ASM() (v->counter)
@@ -229,7 +230,14 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 		raw_local_irq_restore(flags);
 	}
 
-	smp_llsc_mb();
+	/*
+	 * In the Loongson3 workaround case we already have a completion
+	 * barrier at 2: above, which is needed due to the bltz that can branch
+	 * to code outside of the LL/SC loop. As such, we don't need to emit
+	 * another barrier here.
+	 */
+	if (!__SYNC_loongson3_war)
+		smp_llsc_mb();
 
 	return result;
 }
-- 
cgit v1.2.3


From 77d281b7966e476927a45c5fb272d720aa75bb95 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:21 +0000
Subject: MIPS: atomic: Use _atomic barriers in atomic_sub_if_positive()

Use smp_mb__before_atomic() & smp_mb__after_atomic() in
atomic_sub_if_positive() rather than the equivalent
smp_mb__before_llsc() & smp_llsc_mb(). The former are more standard &
this preps us for avoiding redundant duplicate barriers on Loongson3 in
a later patch.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/atomic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 841ff274ada6..24443ef29337 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -196,7 +196,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 {
 	int result;
 
-	smp_mb__before_llsc();
+	smp_mb__before_atomic();
 
 	if (kernel_uses_llsc) {
 		int temp;
@@ -237,7 +237,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 	 * another barrier here.
 	 */
 	if (!__SYNC_loongson3_war)
-		smp_llsc_mb();
+		smp_mb__after_atomic();
 
 	return result;
 }
-- 
cgit v1.2.3


From 40e784b4d4bc31dee5f1db6a20287777d3aaa4dc Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:23 +0000
Subject: MIPS: atomic: Unify 32b & 64b sub_if_positive

Unify the definitions of atomic_sub_if_positive() &
atomic64_sub_if_positive() using a macro like we do for most other
atomic functions. This allows us to share the implementation ensuring
consistency between the two. Notably this provides the appropriate
loongson3_war barriers in the atomic64_sub_if_positive() case which were
previously missing.

The code is rearranged a little to handle the !kernel_uses_llsc case
first in order to de-indent the LL/SC case & allow us not to go over 80
characters per line.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/atomic.h | 164 +++++++++++++++--------------------------
 1 file changed, 58 insertions(+), 106 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 24443ef29337..96ef50fa2817 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -192,65 +192,71 @@ ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd)
  * Atomically test @v and subtract @i if @v is greater or equal than @i.
  * The function returns the old value of @v minus @i.
  */
-static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
-{
-	int result;
-
-	smp_mb__before_atomic();
-
-	if (kernel_uses_llsc) {
-		int temp;
-
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	"MIPS_ISA_LEVEL"			\n"
-		"	" __SYNC(full, loongson3_war) "			\n"
-		"1:	ll	%1, %2		# atomic_sub_if_positive\n"
-		"	.set	pop					\n"
-		"	subu	%0, %1, %3				\n"
-		"	move	%1, %0					\n"
-		"	bltz	%0, 2f					\n"
-		"	.set	push					\n"
-		"	.set	"MIPS_ISA_LEVEL"			\n"
-		"	sc	%1, %2					\n"
-		"\t" __SC_BEQZ "%1, 1b					\n"
-		"2:	" __SYNC(full, loongson3_war) "			\n"
-		"	.set	pop					\n"
-		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)
-		: "Ir" (i) : __LLSC_CLOBBER);
-	} else {
-		unsigned long flags;
+#define ATOMIC_SIP_OP(pfx, type, op, ll, sc)				\
+static __inline__ int pfx##_sub_if_positive(type i, pfx##_t * v)	\
+{									\
+	type temp, result;						\
+									\
+	smp_mb__before_atomic();					\
+									\
+	if (!kernel_uses_llsc) {					\
+		unsigned long flags;					\
+									\
+		raw_local_irq_save(flags);				\
+		result = v->counter;					\
+		result -= i;						\
+		if (result >= 0)					\
+			v->counter = result;				\
+		raw_local_irq_restore(flags);				\
+		smp_mb__after_atomic();					\
+		return result;						\
+	}								\
+									\
+	__asm__ __volatile__(						\
+	"	.set	push					\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" __SYNC(full, loongson3_war) "			\n"	\
+	"1:	" #ll "	%1, %2		# atomic_sub_if_positive\n"	\
+	"	.set	pop					\n"	\
+	"	" #op "	%0, %1, %3				\n"	\
+	"	move	%1, %0					\n"	\
+	"	bltz	%0, 2f					\n"	\
+	"	.set	push					\n"	\
+	"	.set	" MIPS_ISA_LEVEL "			\n"	\
+	"	" #sc "	%1, %2					\n"	\
+	"	" __SC_BEQZ "%1, 1b				\n"	\
+	"2:	" __SYNC(full, loongson3_war) "			\n"	\
+	"	.set	pop					\n"	\
+	: "=&r" (result), "=&r" (temp),					\
+	  "+" GCC_OFF_SMALL_ASM() (v->counter)				\
+	: "Ir" (i)							\
+	: __LLSC_CLOBBER);						\
+									\
+	/*								\
+	 * In the Loongson3 workaround case we already have a		\
+	 * completion barrier at 2: above, which is needed due to the	\
+	 * bltz that can branch	to code outside of the LL/SC loop. As	\
+	 * such, we don't need to emit another barrier here.		\
+	 */								\
+	if (!__SYNC_loongson3_war)					\
+		smp_mb__after_atomic();					\
+									\
+	return result;							\
+}
 
-		raw_local_irq_save(flags);
-		result = v->counter;
-		result -= i;
-		if (result >= 0)
-			v->counter = result;
-		raw_local_irq_restore(flags);
-	}
+ATOMIC_SIP_OP(atomic, int, subu, ll, sc)
+#define atomic_dec_if_positive(v)	atomic_sub_if_positive(1, v)
 
-	/*
-	 * In the Loongson3 workaround case we already have a completion
-	 * barrier at 2: above, which is needed due to the bltz that can branch
-	 * to code outside of the LL/SC loop. As such, we don't need to emit
-	 * another barrier here.
-	 */
-	if (!__SYNC_loongson3_war)
-		smp_mb__after_atomic();
+#ifdef CONFIG_64BIT
+ATOMIC_SIP_OP(atomic64, s64, dsubu, lld, scd)
+#define atomic64_dec_if_positive(v)	atomic64_sub_if_positive(1, v)
+#endif
 
-	return result;
-}
+#undef ATOMIC_SIP_OP
 
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-/*
- * atomic_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- */
-#define atomic_dec_if_positive(v)	atomic_sub_if_positive(1, v)
-
 #ifdef CONFIG_64BIT
 
 #define ATOMIC64_INIT(i)    { (i) }
@@ -269,64 +275,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
  */
 #define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
-/*
- * atomic64_sub_if_positive - conditionally subtract integer from atomic
- *                            variable
- * @i: integer value to subtract
- * @v: pointer of type atomic64_t
- *
- * Atomically test @v and subtract @i if @v is greater or equal than @i.
- * The function returns the old value of @v minus @i.
- */
-static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v)
-{
-	s64 result;
-
-	smp_mb__before_llsc();
-
-	if (kernel_uses_llsc) {
-		s64 temp;
-
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	"MIPS_ISA_LEVEL"			\n"
-		"1:	lld	%1, %2		# atomic64_sub_if_positive\n"
-		"	dsubu	%0, %1, %3				\n"
-		"	move	%1, %0					\n"
-		"	bltz	%0, 1f					\n"
-		"	scd	%1, %2					\n"
-		"\t" __SC_BEQZ "%1, 1b					\n"
-		"1:							\n"
-		"	.set	pop					\n"
-		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF_SMALL_ASM() (v->counter)
-		: "Ir" (i));
-	} else {
-		unsigned long flags;
-
-		raw_local_irq_save(flags);
-		result = v->counter;
-		result -= i;
-		if (result >= 0)
-			v->counter = result;
-		raw_local_irq_restore(flags);
-	}
-
-	smp_llsc_mb();
-
-	return result;
-}
-
 #define atomic64_cmpxchg(v, o, n) \
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-/*
- * atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- */
-#define atomic64_dec_if_positive(v)	atomic64_sub_if_positive(1, v)
-
 #endif /* CONFIG_64BIT */
 
 #endif /* _ASM_ATOMIC_H */
-- 
cgit v1.2.3


From 1da7bce8591d58bf2a442b0324659af7390401c2 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:24 +0000
Subject: MIPS: atomic: Deduplicate 32b & 64b read, set, xchg, cmpxchg

Remove the remaining duplication between 32b & 64b in asm/atomic.h by
making use of an ATOMIC_OPS() macro to generate:

  - atomic_read()/atomic64_read()
  - atomic_set()/atomic64_set()
  - atomic_cmpxchg()/atomic64_cmpxchg()
  - atomic_xchg()/atomic64_xchg()

This is consistent with the way all other functions in asm/atomic.h are
generated, and ensures consistency between the 32b & 64b functions.

Of note is that this results in the above now being static inline
functions rather than macros.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/atomic.h | 70 ++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 43 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 96ef50fa2817..e5ac88392d1f 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -24,24 +24,34 @@
 #include <asm/sync.h>
 #include <asm/war.h>
 
-#define ATOMIC_INIT(i)	  { (i) }
+#define ATOMIC_OPS(pfx, type)						\
+static __always_inline type pfx##_read(const pfx##_t *v)		\
+{									\
+	return READ_ONCE(v->counter);					\
+}									\
+									\
+static __always_inline void pfx##_set(pfx##_t *v, type i)		\
+{									\
+	WRITE_ONCE(v->counter, i);					\
+}									\
+									\
+static __always_inline type pfx##_cmpxchg(pfx##_t *v, type o, type n)	\
+{									\
+	return cmpxchg(&v->counter, o, n);				\
+}									\
+									\
+static __always_inline type pfx##_xchg(pfx##_t *v, type n)		\
+{									\
+	return xchg(&v->counter, n);					\
+}
 
-/*
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
-#define atomic_read(v)		READ_ONCE((v)->counter)
+#define ATOMIC_INIT(i)		{ (i) }
+ATOMIC_OPS(atomic, int)
 
-/*
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
-#define atomic_set(v, i)	WRITE_ONCE((v)->counter, (i))
+#ifdef CONFIG_64BIT
+# define ATOMIC64_INIT(i)	{ (i) }
+ATOMIC_OPS(atomic64, s64)
+#endif
 
 #define ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc)			\
 static __inline__ void pfx##_##op(type i, pfx##_t * v)			\
@@ -135,6 +145,7 @@ static __inline__ type pfx##_fetch_##op##_relaxed(type i, pfx##_t * v)	\
 	return result;							\
 }
 
+#undef ATOMIC_OPS
 #define ATOMIC_OPS(pfx, op, type, c_op, asm_op, ll, sc)			\
 	ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc)			\
 	ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc)		\
@@ -254,31 +265,4 @@ ATOMIC_SIP_OP(atomic64, s64, dsubu, lld, scd)
 
 #undef ATOMIC_SIP_OP
 
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
-
-#ifdef CONFIG_64BIT
-
-#define ATOMIC64_INIT(i)    { (i) }
-
-/*
- * atomic64_read - read atomic variable
- * @v: pointer of type atomic64_t
- *
- */
-#define atomic64_read(v)	READ_ONCE((v)->counter)
-
-/*
- * atomic64_set - set atomic variable
- * @v: pointer of type atomic64_t
- * @i: required value
- */
-#define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
-
-#define atomic64_cmpxchg(v, o, n) \
-	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
-
-#endif /* CONFIG_64BIT */
-
 #endif /* _ASM_ATOMIC_H */
-- 
cgit v1.2.3


From fe7cd97e68fac186492847f8eda0eff8bcfb0cbc Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:25 +0000
Subject: MIPS: bitops: Handle !kernel_uses_llsc first

Reorder conditions in our various bitops functions that check
kernel_uses_llsc such that they handle the !kernel_uses_llsc case first.
This allows us to avoid the need to duplicate the kernel_uses_llsc check
in all the other cases. For functions that don't involve barriers common
to the various implementations, we switch to returning from within each
if block making each case easier to read in isolation.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 213 ++++++++++++++++++++---------------------
 1 file changed, 105 insertions(+), 108 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 985d6a02f9ea..e300960717e0 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -52,11 +52,16 @@ int __mips_test_and_change_bit(unsigned long nr,
  */
 static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
 	unsigned long temp;
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
+	if (!kernel_uses_llsc) {
+		__mips_set_bit(nr, addr);
+		return;
+	}
+
+	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	arch=r4000				\n"
@@ -68,8 +73,11 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 		: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*m)
 		: "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)
 		: __LLSC_CLOBBER);
+		return;
+	}
+
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+	if (__builtin_constant_p(bit)) {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
@@ -80,23 +88,23 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 			: "ir" (bit), "r" (~0)
 			: __LLSC_CLOBBER);
 		} while (unlikely(!temp));
+		return;
+	}
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
-	} else if (kernel_uses_llsc) {
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1		# set_bit	\n"
-			"	or	%0, %2				\n"
-			"	" __SC	"%0, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (1UL << bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
-	} else
-		__mips_set_bit(nr, addr);
+
+	loongson_llsc_mb();
+	do {
+		__asm__ __volatile__(
+		"	.set	push				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+		"	" __LL "%0, %1		# set_bit	\n"
+		"	or	%0, %2				\n"
+		"	" __SC	"%0, %1				\n"
+		"	.set	pop				\n"
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+		: "ir" (1UL << bit)
+		: __LLSC_CLOBBER);
+	} while (unlikely(!temp));
 }
 
 /*
@@ -111,11 +119,16 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
 	unsigned long temp;
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
+	if (!kernel_uses_llsc) {
+		__mips_clear_bit(nr, addr);
+		return;
+	}
+
+	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	arch=r4000				\n"
@@ -127,8 +140,11 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 		: "ir" (~(1UL << bit))
 		: __LLSC_CLOBBER);
+		return;
+	}
+
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+	if (__builtin_constant_p(bit)) {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
@@ -139,23 +155,23 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 			: "ir" (bit)
 			: __LLSC_CLOBBER);
 		} while (unlikely(!temp));
+		return;
+	}
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
-	} else if (kernel_uses_llsc) {
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1		# clear_bit	\n"
-			"	and	%0, %2				\n"
-			"	" __SC "%0, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (~(1UL << bit))
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
-	} else
-		__mips_clear_bit(nr, addr);
+
+	loongson_llsc_mb();
+	do {
+		__asm__ __volatile__(
+		"	.set	push				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+		"	" __LL "%0, %1		# clear_bit	\n"
+		"	and	%0, %2				\n"
+		"	" __SC "%0, %1				\n"
+		"	.set	pop				\n"
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+		: "ir" (~(1UL << bit))
+		: __LLSC_CLOBBER);
+	} while (unlikely(!temp));
 }
 
 /*
@@ -183,12 +199,16 @@ static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *ad
  */
 static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 {
+	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
+	unsigned long temp;
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
+	if (!kernel_uses_llsc) {
+		__mips_change_bit(nr, addr);
+		return;
+	}
 
+	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	push				\n"
 		"	.set	arch=r4000			\n"
@@ -200,25 +220,22 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 		: "ir" (1UL << bit)
 		: __LLSC_CLOBBER);
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
+		return;
+	}
 
-		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1		# change_bit	\n"
-			"	xor	%0, %2				\n"
-			"	" __SC	"%0, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (1UL << bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
-	} else
-		__mips_change_bit(nr, addr);
+	loongson_llsc_mb();
+	do {
+		__asm__ __volatile__(
+		"	.set	push				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
+		"	" __LL "%0, %1		# change_bit	\n"
+		"	xor	%0, %2				\n"
+		"	" __SC	"%0, %1				\n"
+		"	.set	pop				\n"
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
+		: "ir" (1UL << bit)
+		: __LLSC_CLOBBER);
+	} while (unlikely(!temp));
 }
 
 /*
@@ -232,15 +249,15 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 static inline int test_and_set_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
+	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long res;
+	unsigned long res, temp;
 
 	smp_mb__before_llsc();
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
+	if (!kernel_uses_llsc) {
+		res = __mips_test_and_set_bit(nr, addr);
+	} else if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	arch=r4000				\n"
@@ -253,10 +270,7 @@ static inline int test_and_set_bit(unsigned long nr,
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: __LLSC_CLOBBER);
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
+	} else {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
@@ -272,8 +286,7 @@ static inline int test_and_set_bit(unsigned long nr,
 		} while (unlikely(!res));
 
 		res = temp & (1UL << bit);
-	} else
-		res = __mips_test_and_set_bit(nr, addr);
+	}
 
 	smp_llsc_mb();
 
@@ -291,13 +304,13 @@ static inline int test_and_set_bit(unsigned long nr,
 static inline int test_and_set_bit_lock(unsigned long nr,
 	volatile unsigned long *addr)
 {
+	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long res;
-
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
+	unsigned long res, temp;
 
+	if (!kernel_uses_llsc) {
+		res = __mips_test_and_set_bit_lock(nr, addr);
+	} else if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	arch=r4000				\n"
@@ -310,11 +323,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 		: "=&r" (temp), "+m" (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: __LLSC_CLOBBER);
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
-		loongson_llsc_mb();
+	} else {
 		do {
 			__asm__ __volatile__(
 			"	.set	push				\n"
@@ -329,8 +338,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 		} while (unlikely(!res));
 
 		res = temp & (1UL << bit);
-	} else
-		res = __mips_test_and_set_bit_lock(nr, addr);
+	}
 
 	smp_llsc_mb();
 
@@ -347,15 +355,15 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 static inline int test_and_clear_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
+	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long res;
+	unsigned long res, temp;
 
 	smp_mb__before_llsc();
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
+	if (!kernel_uses_llsc) {
+		res = __mips_test_and_clear_bit(nr, addr);
+	} else if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	arch=r4000				\n"
@@ -370,10 +378,7 @@ static inline int test_and_clear_bit(unsigned long nr,
 		: "r" (1UL << bit)
 		: __LLSC_CLOBBER);
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	} else if (kernel_uses_llsc && __builtin_constant_p(nr)) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
+	} else if (__builtin_constant_p(nr)) {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
@@ -386,10 +391,7 @@ static inline int test_and_clear_bit(unsigned long nr,
 			: __LLSC_CLOBBER);
 		} while (unlikely(!temp));
 #endif
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
+	} else {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
@@ -406,8 +408,7 @@ static inline int test_and_clear_bit(unsigned long nr,
 		} while (unlikely(!res));
 
 		res = temp & (1UL << bit);
-	} else
-		res = __mips_test_and_clear_bit(nr, addr);
+	}
 
 	smp_llsc_mb();
 
@@ -425,15 +426,15 @@ static inline int test_and_clear_bit(unsigned long nr,
 static inline int test_and_change_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
+	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long res;
+	unsigned long res, temp;
 
 	smp_mb__before_llsc();
 
-	if (kernel_uses_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
+	if (!kernel_uses_llsc) {
+		res = __mips_test_and_change_bit(nr, addr);
+	} else if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	arch=r4000				\n"
@@ -446,10 +447,7 @@ static inline int test_and_change_bit(unsigned long nr,
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: __LLSC_CLOBBER);
-	} else if (kernel_uses_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-		unsigned long temp;
-
+	} else {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
@@ -465,8 +463,7 @@ static inline int test_and_change_bit(unsigned long nr,
 		} while (unlikely(!res));
 
 		res = temp & (1UL << bit);
-	} else
-		res = __mips_test_and_change_bit(nr, addr);
+	}
 
 	smp_llsc_mb();
 
-- 
cgit v1.2.3


From 3d2920cf4fd41a27730083ef395a0c49d4750474 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:26 +0000
Subject: MIPS: bitops: Only use ins for bit 16 or higher

set_bit() can set bits 0-15 using an ori instruction, rather than
loading the value -1 into a register & then using an ins instruction.

That is, rather than the following:

  li   t0, -1
  ll   t1, 0(t2)
  ins  t1, t0, 4, 1
  sc   t1, 0(t2)

We can have the simpler:

  ll   t1, 0(t2)
  ori  t1, t1, 0x10
  sc   t1, 0(t2)

The or path already allows immediates to be used, so simply restricting
the ins path to bits that don't fit in immediates is sufficient to take
advantage of this.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index e300960717e0..1e5739191ddf 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -77,7 +77,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 	}
 
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	if (__builtin_constant_p(bit)) {
+	if (__builtin_constant_p(bit) && (bit >= 16)) {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
-- 
cgit v1.2.3


From 59361e9975fd567a642f4ee32fd0ea662ffa7040 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:27 +0000
Subject: MIPS: bitops: Use MIPS_ISA_REV, not #ifdefs

Rather than #ifdef on CONFIG_CPU_* to determine whether the ins
instruction is supported we can simply check MIPS_ISA_REV to discover
whether we're targeting MIPSr2 or higher. Do so in order to clean up the
code.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 1e5739191ddf..0f5329e32e87 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -19,6 +19,7 @@
 #include <asm/byteorder.h>		/* sigh ... */
 #include <asm/compiler.h>
 #include <asm/cpu-features.h>
+#include <asm/isa-rev.h>
 #include <asm/llsc.h>
 #include <asm/sgidefs.h>
 #include <asm/war.h>
@@ -76,8 +77,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 		return;
 	}
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	if (__builtin_constant_p(bit) && (bit >= 16)) {
+	if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit) && (bit >= 16)) {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
@@ -90,7 +90,6 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 		} while (unlikely(!temp));
 		return;
 	}
-#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 	loongson_llsc_mb();
 	do {
@@ -143,8 +142,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 		return;
 	}
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	if (__builtin_constant_p(bit)) {
+	if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit)) {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
@@ -157,7 +155,6 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 		} while (unlikely(!temp));
 		return;
 	}
-#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 	loongson_llsc_mb();
 	do {
@@ -377,8 +374,7 @@ static inline int test_and_clear_bit(unsigned long nr,
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: __LLSC_CLOBBER);
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
-	} else if (__builtin_constant_p(nr)) {
+	} else if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(nr)) {
 		loongson_llsc_mb();
 		do {
 			__asm__ __volatile__(
@@ -390,7 +386,6 @@ static inline int test_and_clear_bit(unsigned long nr,
 			: "ir" (bit)
 			: __LLSC_CLOBBER);
 		} while (unlikely(!temp));
-#endif
 	} else {
 		loongson_llsc_mb();
 		do {
-- 
cgit v1.2.3


From 27aab27259aec1f200cf1f84f02b8192d27abe64 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:29 +0000
Subject: MIPS: bitops: ins start position is always an immediate

The start position for an ins instruction is always encoded as an
immediate, so allowing registers to be used by the inline asm makes no
sense. It should never happen anyway since a bit index should always be
small enough to be treated as an immediate, but remove the nonsensical
"r" for sanity.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 0f5329e32e87..03532ae9f528 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -85,7 +85,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 			"	" __INS "%0, %3, %2, 1			\n"
 			"	" __SC "%0, %1				\n"
 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (bit), "r" (~0)
+			: "i" (bit), "r" (~0)
 			: __LLSC_CLOBBER);
 		} while (unlikely(!temp));
 		return;
@@ -150,7 +150,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 			"	" __INS "%0, $0, %2, 1			\n"
 			"	" __SC "%0, %1				\n"
 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "ir" (bit)
+			: "i" (bit)
 			: __LLSC_CLOBBER);
 		} while (unlikely(!temp));
 		return;
@@ -383,7 +383,7 @@ static inline int test_and_clear_bit(unsigned long nr,
 			"	" __INS "%0, $0, %3, 1			\n"
 			"	" __SC	"%0, %1				\n"
 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "ir" (bit)
+			: "i" (bit)
 			: __LLSC_CLOBBER);
 		} while (unlikely(!temp));
 	} else {
-- 
cgit v1.2.3


From 6bbe043bd3f4766b089b7b51a80e75745868c038 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:30 +0000
Subject: MIPS: bitops: Implement test_and_set_bit() in terms of _lock variant

The only difference between test_and_set_bit() & test_and_set_bit_lock()
is memory ordering barrier semantics - the former provides a full
barrier whilst the latter only provides acquire semantics.

We can therefore implement test_and_set_bit() in terms of
test_and_set_bit_lock() with the addition of the extra memory barrier.
Do this in order to avoid duplicating logic.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 66 +++++++++---------------------------------
 arch/mips/lib/bitops.c         | 26 -----------------
 2 files changed, 13 insertions(+), 79 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 03532ae9f528..ea35a2e87b6d 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -31,8 +31,6 @@
 void __mips_set_bit(unsigned long nr, volatile unsigned long *addr);
 void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr);
 void __mips_change_bit(unsigned long nr, volatile unsigned long *addr);
-int __mips_test_and_set_bit(unsigned long nr,
-			    volatile unsigned long *addr);
 int __mips_test_and_set_bit_lock(unsigned long nr,
 				 volatile unsigned long *addr);
 int __mips_test_and_clear_bit(unsigned long nr,
@@ -236,24 +234,22 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 }
 
 /*
- * test_and_set_bit - Set a bit and return its old value
+ * test_and_set_bit_lock - Set a bit and return its old value
  * @nr: Bit to set
  * @addr: Address to count from
  *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
+ * This operation is atomic and implies acquire ordering semantics
+ * after the memory operation.
  */
-static inline int test_and_set_bit(unsigned long nr,
+static inline int test_and_set_bit_lock(unsigned long nr,
 	volatile unsigned long *addr)
 {
 	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
 	unsigned long res, temp;
 
-	smp_mb__before_llsc();
-
 	if (!kernel_uses_llsc) {
-		res = __mips_test_and_set_bit(nr, addr);
+		res = __mips_test_and_set_bit_lock(nr, addr);
 	} else if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	push					\n"
@@ -264,7 +260,7 @@ static inline int test_and_set_bit(unsigned long nr,
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	pop					\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
+		: "=&r" (temp), "+m" (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: __LLSC_CLOBBER);
 	} else {
@@ -291,56 +287,20 @@ static inline int test_and_set_bit(unsigned long nr,
 }
 
 /*
- * test_and_set_bit_lock - Set a bit and return its old value
+ * test_and_set_bit - Set a bit and return its old value
  * @nr: Bit to set
  * @addr: Address to count from
  *
- * This operation is atomic and implies acquire ordering semantics
- * after the memory operation.
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
  */
-static inline int test_and_set_bit_lock(unsigned long nr,
+static inline int test_and_set_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
-	int bit = nr & SZLONG_MASK;
-	unsigned long res, temp;
-
-	if (!kernel_uses_llsc) {
-		res = __mips_test_and_set_bit_lock(nr, addr);
-	} else if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
-		"	and	%2, %0, %3				\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+m" (*m), "=&r" (res)
-		: "r" (1UL << bit)
-		: __LLSC_CLOBBER);
-	} else {
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1	# test_and_set_bit	\n"
-			"	or	%2, %0, %3			\n"
-			"	" __SC	"%2, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "r" (1UL << bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!res));
-
-		res = temp & (1UL << bit);
-	}
-
-	smp_llsc_mb();
-
-	return res != 0;
+	smp_mb__before_llsc();
+	return test_and_set_bit_lock(nr, addr);
 }
+
 /*
  * test_and_clear_bit - Clear a bit and return its old value
  * @nr: Bit to clear
diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c
index 3b2a1e78a543..fba402c0879d 100644
--- a/arch/mips/lib/bitops.c
+++ b/arch/mips/lib/bitops.c
@@ -77,32 +77,6 @@ void __mips_change_bit(unsigned long nr, volatile unsigned long *addr)
 EXPORT_SYMBOL(__mips_change_bit);
 
 
-/**
- * __mips_test_and_set_bit - Set a bit and return its old value.  This is
- * called by test_and_set_bit() if it cannot find a faster solution.
- * @nr: Bit to set
- * @addr: Address to count from
- */
-int __mips_test_and_set_bit(unsigned long nr,
-			    volatile unsigned long *addr)
-{
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
-	unsigned long mask;
-	unsigned long flags;
-	int res;
-
-	a += nr >> SZLONG_LOG;
-	mask = 1UL << bit;
-	raw_local_irq_save(flags);
-	res = (mask & *a) != 0;
-	*a |= mask;
-	raw_local_irq_restore(flags);
-	return res;
-}
-EXPORT_SYMBOL(__mips_test_and_set_bit);
-
-
 /**
  * __mips_test_and_set_bit_lock - Set a bit and return its old value.  This is
  * called by test_and_set_bit_lock() if it cannot find a faster solution.
-- 
cgit v1.2.3


From a2e66b862cc7b8db1a0eb63b71e61943d48c3c8a Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:31 +0000
Subject: MIPS: bitops: Allow immediates in test_and_{set,clear,change}_bit

The logical operations or & xor used in the test_and_set_bit_lock(),
test_and_clear_bit() & test_and_change_bit() functions currently force
the value 1<<bit to be placed in a register. If the bit is compile-time
constant & fits within the immediate field of an or/xor instruction (ie.
16 bits) then we can make use of the ori/xori instruction variants &
avoid the use of an extra register. Add the extra "i" constraints in
order to allow use of these immediate encodings.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index ea35a2e87b6d..7314ba5a3683 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -261,7 +261,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 		"	and	%2, %0, %3				\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "+m" (*m), "=&r" (res)
-		: "r" (1UL << bit)
+		: "ir" (1UL << bit)
 		: __LLSC_CLOBBER);
 	} else {
 		loongson_llsc_mb();
@@ -274,7 +274,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 			"	" __SC	"%2, %1				\n"
 			"	.set	pop				\n"
 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "r" (1UL << bit)
+			: "ir" (1UL << bit)
 			: __LLSC_CLOBBER);
 		} while (unlikely(!res));
 
@@ -332,7 +332,7 @@ static inline int test_and_clear_bit(unsigned long nr,
 		"	and	%2, %0, %3				\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-		: "r" (1UL << bit)
+		: "ir" (1UL << bit)
 		: __LLSC_CLOBBER);
 	} else if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(nr)) {
 		loongson_llsc_mb();
@@ -358,7 +358,7 @@ static inline int test_and_clear_bit(unsigned long nr,
 			"	" __SC	"%2, %1				\n"
 			"	.set	pop				\n"
 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "r" (1UL << bit)
+			: "ir" (1UL << bit)
 			: __LLSC_CLOBBER);
 		} while (unlikely(!res));
 
@@ -400,7 +400,7 @@ static inline int test_and_change_bit(unsigned long nr,
 		"	and	%2, %0, %3				\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-		: "r" (1UL << bit)
+		: "ir" (1UL << bit)
 		: __LLSC_CLOBBER);
 	} else {
 		loongson_llsc_mb();
@@ -413,7 +413,7 @@ static inline int test_and_change_bit(unsigned long nr,
 			"	" __SC	"\t%2, %1			\n"
 			"	.set	pop				\n"
 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "r" (1UL << bit)
+			: "ir" (1UL << bit)
 			: __LLSC_CLOBBER);
 		} while (unlikely(!res));
 
-- 
cgit v1.2.3


From d6103510e7ccdc992e4eca7031eae366117ae6d4 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:31 +0000
Subject: MIPS: bitops: Use the BIT() macro

Use the BIT() macro in asm/bitops.h rather than open-coding its
equivalent.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 7314ba5a3683..0f8ff896e86b 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -13,6 +13,7 @@
 #error only <linux/bitops.h> can be included directly
 #endif
 
+#include <linux/bits.h>
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/barrier.h>
@@ -70,7 +71,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 		"	beqzl	%0, 1b					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)
+		: "ir" (BIT(bit)), GCC_OFF_SMALL_ASM() (*m)
 		: __LLSC_CLOBBER);
 		return;
 	}
@@ -99,7 +100,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 		"	" __SC	"%0, %1				\n"
 		"	.set	pop				\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (1UL << bit)
+		: "ir" (BIT(bit))
 		: __LLSC_CLOBBER);
 	} while (unlikely(!temp));
 }
@@ -135,7 +136,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 		"	beqzl	%0, 1b					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (~(1UL << bit))
+		: "ir" (~(BIT(bit)))
 		: __LLSC_CLOBBER);
 		return;
 	}
@@ -164,7 +165,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 		"	" __SC "%0, %1				\n"
 		"	.set	pop				\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (~(1UL << bit))
+		: "ir" (~(BIT(bit)))
 		: __LLSC_CLOBBER);
 	} while (unlikely(!temp));
 }
@@ -213,7 +214,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 		"	beqzl	%0, 1b				\n"
 		"	.set	pop				\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (1UL << bit)
+		: "ir" (BIT(bit))
 		: __LLSC_CLOBBER);
 		return;
 	}
@@ -228,7 +229,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 		"	" __SC	"%0, %1				\n"
 		"	.set	pop				\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (1UL << bit)
+		: "ir" (BIT(bit))
 		: __LLSC_CLOBBER);
 	} while (unlikely(!temp));
 }
@@ -261,7 +262,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 		"	and	%2, %0, %3				\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "+m" (*m), "=&r" (res)
-		: "ir" (1UL << bit)
+		: "ir" (BIT(bit))
 		: __LLSC_CLOBBER);
 	} else {
 		loongson_llsc_mb();
@@ -274,11 +275,11 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 			"	" __SC	"%2, %1				\n"
 			"	.set	pop				\n"
 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "ir" (1UL << bit)
+			: "ir" (BIT(bit))
 			: __LLSC_CLOBBER);
 		} while (unlikely(!res));
 
-		res = temp & (1UL << bit);
+		res = temp & BIT(bit);
 	}
 
 	smp_llsc_mb();
@@ -332,7 +333,7 @@ static inline int test_and_clear_bit(unsigned long nr,
 		"	and	%2, %0, %3				\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-		: "ir" (1UL << bit)
+		: "ir" (BIT(bit))
 		: __LLSC_CLOBBER);
 	} else if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(nr)) {
 		loongson_llsc_mb();
@@ -358,11 +359,11 @@ static inline int test_and_clear_bit(unsigned long nr,
 			"	" __SC	"%2, %1				\n"
 			"	.set	pop				\n"
 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "ir" (1UL << bit)
+			: "ir" (BIT(bit))
 			: __LLSC_CLOBBER);
 		} while (unlikely(!res));
 
-		res = temp & (1UL << bit);
+		res = temp & BIT(bit);
 	}
 
 	smp_llsc_mb();
@@ -400,7 +401,7 @@ static inline int test_and_change_bit(unsigned long nr,
 		"	and	%2, %0, %3				\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-		: "ir" (1UL << bit)
+		: "ir" (BIT(bit))
 		: __LLSC_CLOBBER);
 	} else {
 		loongson_llsc_mb();
@@ -413,11 +414,11 @@ static inline int test_and_change_bit(unsigned long nr,
 			"	" __SC	"\t%2, %1			\n"
 			"	.set	pop				\n"
 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "ir" (1UL << bit)
+			: "ir" (BIT(bit))
 			: __LLSC_CLOBBER);
 		} while (unlikely(!res));
 
-		res = temp & (1UL << bit);
+		res = temp & BIT(bit);
 	}
 
 	smp_llsc_mb();
-- 
cgit v1.2.3


From aad028cadb17867d257e8b90078f6a19614775ff Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:32 +0000
Subject: MIPS: bitops: Avoid redundant zero-comparison for non-LLSC

The IRQ-disabling non-LLSC fallbacks for bitops on UP systems already
return a zero or one, so there's no need to perform another comparison
against zero. Move these comparisons into the LLSC paths to avoid the
redundant work.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 0f8ff896e86b..7671db2a7b73 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -264,6 +264,8 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 		: "=&r" (temp), "+m" (*m), "=&r" (res)
 		: "ir" (BIT(bit))
 		: __LLSC_CLOBBER);
+
+		res = res != 0;
 	} else {
 		loongson_llsc_mb();
 		do {
@@ -279,12 +281,12 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 			: __LLSC_CLOBBER);
 		} while (unlikely(!res));
 
-		res = temp & BIT(bit);
+		res = (temp & BIT(bit)) != 0;
 	}
 
 	smp_llsc_mb();
 
-	return res != 0;
+	return res;
 }
 
 /*
@@ -335,6 +337,8 @@ static inline int test_and_clear_bit(unsigned long nr,
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "ir" (BIT(bit))
 		: __LLSC_CLOBBER);
+
+		res = res != 0;
 	} else if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(nr)) {
 		loongson_llsc_mb();
 		do {
@@ -363,12 +367,12 @@ static inline int test_and_clear_bit(unsigned long nr,
 			: __LLSC_CLOBBER);
 		} while (unlikely(!res));
 
-		res = temp & BIT(bit);
+		res = (temp & BIT(bit)) != 0;
 	}
 
 	smp_llsc_mb();
 
-	return res != 0;
+	return res;
 }
 
 /*
@@ -403,6 +407,8 @@ static inline int test_and_change_bit(unsigned long nr,
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "ir" (BIT(bit))
 		: __LLSC_CLOBBER);
+
+		res = res != 0;
 	} else {
 		loongson_llsc_mb();
 		do {
@@ -418,12 +424,12 @@ static inline int test_and_change_bit(unsigned long nr,
 			: __LLSC_CLOBBER);
 		} while (unlikely(!res));
 
-		res = temp & BIT(bit);
+		res = (temp & BIT(bit)) != 0;
 	}
 
 	smp_llsc_mb();
 
-	return res != 0;
+	return res;
 }
 
 #include <asm-generic/bitops/non-atomic.h>
-- 
cgit v1.2.3


From cc99987c375e499a95572504d69c215591222072 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:33 +0000
Subject: MIPS: bitops: Abstract LL/SC loops

Introduce __bit_op() & __test_bit_op() macros which abstract away the
implementation of LL/SC loops. This cuts down on a lot of duplicate
boilerplate code, and also allows R10000_LLSC_WAR to be handled outside
of the individual bitop functions.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 267 ++++++++++-------------------------------
 1 file changed, 63 insertions(+), 204 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 7671db2a7b73..fba0a842b98a 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -25,6 +25,41 @@
 #include <asm/sgidefs.h>
 #include <asm/war.h>
 
+#define __bit_op(mem, insn, inputs...) do {			\
+	unsigned long temp;					\
+								\
+	asm volatile(						\
+	"	.set		push			\n"	\
+	"	.set		" MIPS_ISA_LEVEL "	\n"	\
+	"1:	" __LL		"%0, %1			\n"	\
+	"	" insn		"			\n"	\
+	"	" __SC		"%0, %1			\n"	\
+	"	" __SC_BEQZ	"%0, 1b			\n"	\
+	"	.set		pop			\n"	\
+	: "=&r"(temp), "+" GCC_OFF_SMALL_ASM()(mem)		\
+	: inputs						\
+	: __LLSC_CLOBBER);					\
+} while (0)
+
+#define __test_bit_op(mem, ll_dst, insn, inputs...) ({		\
+	unsigned long orig, temp;				\
+								\
+	asm volatile(						\
+	"	.set		push			\n"	\
+	"	.set		" MIPS_ISA_LEVEL "	\n"	\
+	"1:	" __LL		ll_dst ", %2		\n"	\
+	"	" insn		"			\n"	\
+	"	" __SC		"%1, %2			\n"	\
+	"	" __SC_BEQZ	"%1, 1b			\n"	\
+	"	.set		pop			\n"	\
+	: "=&r"(orig), "=&r"(temp),				\
+	  "+" GCC_OFF_SMALL_ASM()(mem)				\
+	: inputs						\
+	: __LLSC_CLOBBER);					\
+								\
+	orig;							\
+})
+
 /*
  * These are the "slower" versions of the functions and are in bitops.c.
  * These functions call raw_local_irq_{save,restore}().
@@ -54,55 +89,20 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long temp;
 
 	if (!kernel_uses_llsc) {
 		__mips_set_bit(nr, addr);
 		return;
 	}
 
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL "%0, %1			# set_bit	\n"
-		"	or	%0, %2					\n"
-		"	" __SC	"%0, %1					\n"
-		"	beqzl	%0, 1b					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (BIT(bit)), GCC_OFF_SMALL_ASM() (*m)
-		: __LLSC_CLOBBER);
-		return;
-	}
-
 	if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit) && (bit >= 16)) {
 		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	" __LL "%0, %1		# set_bit	\n"
-			"	" __INS "%0, %3, %2, 1			\n"
-			"	" __SC "%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "i" (bit), "r" (~0)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
+		__bit_op(*m, __INS "%0, %3, %2, 1", "i"(bit), "r"(~0));
 		return;
 	}
 
 	loongson_llsc_mb();
-	do {
-		__asm__ __volatile__(
-		"	.set	push				\n"
-		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-		"	" __LL "%0, %1		# set_bit	\n"
-		"	or	%0, %2				\n"
-		"	" __SC	"%0, %1				\n"
-		"	.set	pop				\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (BIT(bit))
-		: __LLSC_CLOBBER);
-	} while (unlikely(!temp));
+	__bit_op(*m, "or\t%0, %2", "ir"(BIT(bit)));
 }
 
 /*
@@ -119,55 +119,20 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long temp;
 
 	if (!kernel_uses_llsc) {
 		__mips_clear_bit(nr, addr);
 		return;
 	}
 
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL "%0, %1			# clear_bit	\n"
-		"	and	%0, %2					\n"
-		"	" __SC "%0, %1					\n"
-		"	beqzl	%0, 1b					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (~(BIT(bit)))
-		: __LLSC_CLOBBER);
-		return;
-	}
-
 	if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit)) {
 		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	" __LL "%0, %1		# clear_bit	\n"
-			"	" __INS "%0, $0, %2, 1			\n"
-			"	" __SC "%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-			: "i" (bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
+		__bit_op(*m, __INS "%0, $0, %2, 1", "i"(bit));
 		return;
 	}
 
 	loongson_llsc_mb();
-	do {
-		__asm__ __volatile__(
-		"	.set	push				\n"
-		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-		"	" __LL "%0, %1		# clear_bit	\n"
-		"	and	%0, %2				\n"
-		"	" __SC "%0, %1				\n"
-		"	.set	pop				\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (~(BIT(bit)))
-		: __LLSC_CLOBBER);
-	} while (unlikely(!temp));
+	__bit_op(*m, "and\t%0, %2", "ir"(~BIT(bit)));
 }
 
 /*
@@ -197,41 +162,14 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long temp;
 
 	if (!kernel_uses_llsc) {
 		__mips_change_bit(nr, addr);
 		return;
 	}
 
-	if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	push				\n"
-		"	.set	arch=r4000			\n"
-		"1:	" __LL "%0, %1		# change_bit	\n"
-		"	xor	%0, %2				\n"
-		"	" __SC	"%0, %1				\n"
-		"	beqzl	%0, 1b				\n"
-		"	.set	pop				\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (BIT(bit))
-		: __LLSC_CLOBBER);
-		return;
-	}
-
 	loongson_llsc_mb();
-	do {
-		__asm__ __volatile__(
-		"	.set	push				\n"
-		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-		"	" __LL "%0, %1		# change_bit	\n"
-		"	xor	%0, %2				\n"
-		"	" __SC	"%0, %1				\n"
-		"	.set	pop				\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
-		: "ir" (BIT(bit))
-		: __LLSC_CLOBBER);
-	} while (unlikely(!temp));
+	__bit_op(*m, "xor\t%0, %2", "ir"(BIT(bit)));
 }
 
 /*
@@ -247,41 +185,16 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 {
 	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long res, temp;
+	unsigned long res, orig;
 
 	if (!kernel_uses_llsc) {
 		res = __mips_test_and_set_bit_lock(nr, addr);
-	} else if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
-		"	and	%2, %0, %3				\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+m" (*m), "=&r" (res)
-		: "ir" (BIT(bit))
-		: __LLSC_CLOBBER);
-
-		res = res != 0;
 	} else {
 		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL "%0, %1	# test_and_set_bit	\n"
-			"	or	%2, %0, %3			\n"
-			"	" __SC	"%2, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "ir" (BIT(bit))
-			: __LLSC_CLOBBER);
-		} while (unlikely(!res));
-
-		res = (temp & BIT(bit)) != 0;
+		orig = __test_bit_op(*m, "%0",
+				     "or\t%1, %0, %3",
+				     "ir"(BIT(bit)));
+		res = (orig & BIT(bit)) != 0;
 	}
 
 	smp_llsc_mb();
@@ -317,57 +230,25 @@ static inline int test_and_clear_bit(unsigned long nr,
 {
 	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long res, temp;
+	unsigned long res, orig;
 
 	smp_mb__before_llsc();
 
 	if (!kernel_uses_llsc) {
 		res = __mips_test_and_clear_bit(nr, addr);
-	} else if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	xor	%2, %3					\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
-		"	and	%2, %0, %3				\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-		: "ir" (BIT(bit))
-		: __LLSC_CLOBBER);
-
-		res = res != 0;
 	} else if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(nr)) {
 		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	" __LL	"%0, %1 # test_and_clear_bit	\n"
-			"	" __EXT "%2, %0, %3, 1			\n"
-			"	" __INS "%0, $0, %3, 1			\n"
-			"	" __SC	"%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "i" (bit)
-			: __LLSC_CLOBBER);
-		} while (unlikely(!temp));
+		res = __test_bit_op(*m, "%1",
+				    __EXT "%0, %1, %3, 1;"
+				    __INS "%1, $0, %3, 1",
+				    "i"(bit));
 	} else {
 		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL	"%0, %1 # test_and_clear_bit	\n"
-			"	or	%2, %0, %3			\n"
-			"	xor	%2, %3				\n"
-			"	" __SC	"%2, %1				\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "ir" (BIT(bit))
-			: __LLSC_CLOBBER);
-		} while (unlikely(!res));
-
-		res = (temp & BIT(bit)) != 0;
+		orig = __test_bit_op(*m, "%0",
+				     "or\t%1, %0, %3;"
+				     "xor\t%1, %1, %3",
+				     "ir"(BIT(bit)));
+		res = (orig & BIT(bit)) != 0;
 	}
 
 	smp_llsc_mb();
@@ -388,43 +269,18 @@ static inline int test_and_change_bit(unsigned long nr,
 {
 	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
 	int bit = nr & SZLONG_MASK;
-	unsigned long res, temp;
+	unsigned long res, orig;
 
 	smp_mb__before_llsc();
 
 	if (!kernel_uses_llsc) {
 		res = __mips_test_and_change_bit(nr, addr);
-	} else if (R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	arch=r4000				\n"
-		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
-		"	xor	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqzl	%2, 1b					\n"
-		"	and	%2, %0, %3				\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-		: "ir" (BIT(bit))
-		: __LLSC_CLOBBER);
-
-		res = res != 0;
 	} else {
 		loongson_llsc_mb();
-		do {
-			__asm__ __volatile__(
-			"	.set	push				\n"
-			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
-			"	" __LL	"%0, %1 # test_and_change_bit	\n"
-			"	xor	%2, %0, %3			\n"
-			"	" __SC	"\t%2, %1			\n"
-			"	.set	pop				\n"
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
-			: "ir" (BIT(bit))
-			: __LLSC_CLOBBER);
-		} while (unlikely(!res));
-
-		res = (temp & BIT(bit)) != 0;
+		orig = __test_bit_op(*m, "%0",
+				     "xor\t%1, %0, %3",
+				     "ir"(BIT(bit)));
+		res = (orig & BIT(bit)) != 0;
 	}
 
 	smp_llsc_mb();
@@ -432,6 +288,9 @@ static inline int test_and_change_bit(unsigned long nr,
 	return res;
 }
 
+#undef __bit_op
+#undef __test_bit_op
+
 #include <asm-generic/bitops/non-atomic.h>
 
 /*
-- 
cgit v1.2.3


From c042be02d730534ee93cb446f194a558b5a84adf Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:34 +0000
Subject: MIPS: bitops: Use BIT_WORD() & BITS_PER_LONG

Rather than using custom SZLONG_LOG & SZLONG_MASK macros to shift & mask
a bit index to form word & bit offsets respectively, make use of the
standard BIT_WORD() & BITS_PER_LONG macros for the same purpose.

volatile is added to the definition of pointers to the long-sized word
we'll operate on, in order to prevent the compiler complaining that we
cast away the volatile qualifier of the addr argument. This should have
no effect on generated code, which in the LL/SC case is inline asm
anyway & in the non-LLSC case access is constrained by compiler barriers
provided by raw_local_irq_{save,restore}().

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 24 ++++++++++++------------
 arch/mips/include/asm/llsc.h   |  4 ----
 arch/mips/lib/bitops.c         | 31 +++++++++++++------------------
 3 files changed, 25 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index fba0a842b98a..d39fca2def60 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -87,8 +87,8 @@ int __mips_test_and_change_bit(unsigned long nr,
  */
 static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
-	int bit = nr & SZLONG_MASK;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
 
 	if (!kernel_uses_llsc) {
 		__mips_set_bit(nr, addr);
@@ -117,8 +117,8 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
-	int bit = nr & SZLONG_MASK;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
 
 	if (!kernel_uses_llsc) {
 		__mips_clear_bit(nr, addr);
@@ -160,8 +160,8 @@ static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *ad
  */
 static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
-	int bit = nr & SZLONG_MASK;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
 
 	if (!kernel_uses_llsc) {
 		__mips_change_bit(nr, addr);
@@ -183,8 +183,8 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 static inline int test_and_set_bit_lock(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
-	int bit = nr & SZLONG_MASK;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
 	unsigned long res, orig;
 
 	if (!kernel_uses_llsc) {
@@ -228,8 +228,8 @@ static inline int test_and_set_bit(unsigned long nr,
 static inline int test_and_clear_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
-	int bit = nr & SZLONG_MASK;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
 	unsigned long res, orig;
 
 	smp_mb__before_llsc();
@@ -267,8 +267,8 @@ static inline int test_and_clear_bit(unsigned long nr,
 static inline int test_and_change_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *)addr) + (nr >> SZLONG_LOG);
-	int bit = nr & SZLONG_MASK;
+	volatile unsigned long *m = &addr[BIT_WORD(nr)];
+	int bit = nr % BITS_PER_LONG;
 	unsigned long res, orig;
 
 	smp_mb__before_llsc();
diff --git a/arch/mips/include/asm/llsc.h b/arch/mips/include/asm/llsc.h
index d240a4a2d1c4..c49738bc3bda 100644
--- a/arch/mips/include/asm/llsc.h
+++ b/arch/mips/include/asm/llsc.h
@@ -12,15 +12,11 @@
 #include <asm/isa-rev.h>
 
 #if _MIPS_SZLONG == 32
-#define SZLONG_LOG 5
-#define SZLONG_MASK 31UL
 #define __LL		"ll	"
 #define __SC		"sc	"
 #define __INS		"ins	"
 #define __EXT		"ext	"
 #elif _MIPS_SZLONG == 64
-#define SZLONG_LOG 6
-#define SZLONG_MASK 63UL
 #define __LL		"lld	"
 #define __SC		"scd	"
 #define __INS		"dins	"
diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c
index fba402c0879d..116d0bd8b2ae 100644
--- a/arch/mips/lib/bitops.c
+++ b/arch/mips/lib/bitops.c
@@ -7,6 +7,7 @@
  * Copyright (c) 1999, 2000  Silicon Graphics, Inc.
  */
 #include <linux/bitops.h>
+#include <linux/bits.h>
 #include <linux/irqflags.h>
 #include <linux/export.h>
 
@@ -19,12 +20,11 @@
  */
 void __mips_set_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	*a |= mask;
@@ -41,12 +41,11 @@ EXPORT_SYMBOL(__mips_set_bit);
  */
 void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	*a &= ~mask;
@@ -63,12 +62,11 @@ EXPORT_SYMBOL(__mips_clear_bit);
  */
 void __mips_change_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	*a ^= mask;
@@ -86,13 +84,12 @@ EXPORT_SYMBOL(__mips_change_bit);
 int __mips_test_and_set_bit_lock(unsigned long nr,
 				 volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 	int res;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	res = (mask & *a) != 0;
@@ -111,13 +108,12 @@ EXPORT_SYMBOL(__mips_test_and_set_bit_lock);
  */
 int __mips_test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 	int res;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	res = (mask & *a) != 0;
@@ -136,13 +132,12 @@ EXPORT_SYMBOL(__mips_test_and_clear_bit);
  */
 int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *a = (unsigned long *)addr;
-	unsigned bit = nr & SZLONG_MASK;
+	volatile unsigned long *a = &addr[BIT_WORD(nr)];
+	unsigned int bit = nr % BITS_PER_LONG;
 	unsigned long mask;
 	unsigned long flags;
 	int res;
 
-	a += nr >> SZLONG_LOG;
 	mask = 1UL << bit;
 	raw_local_irq_save(flags);
 	res = (mask & *a) != 0;
-- 
cgit v1.2.3


From 5bb29275df7a7aab8b6c29686109cc5cb1015850 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:35 +0000
Subject: MIPS: bitops: Emit Loongson3 sync workarounds within asm

Generate the sync instructions required to workaround Loongson3 LL/SC
errata within inline asm blocks, which feels a little safer than doing
it from C where strictly speaking the compiler would be well within its
rights to insert a memory access between the separate asm statements we
previously had, containing sync & ll instructions respectively.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index d39fca2def60..c08b6d225f10 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -31,6 +31,7 @@
 	asm volatile(						\
 	"	.set		push			\n"	\
 	"	.set		" MIPS_ISA_LEVEL "	\n"	\
+	"	" __SYNC(full, loongson3_war) "		\n"	\
 	"1:	" __LL		"%0, %1			\n"	\
 	"	" insn		"			\n"	\
 	"	" __SC		"%0, %1			\n"	\
@@ -47,6 +48,7 @@
 	asm volatile(						\
 	"	.set		push			\n"	\
 	"	.set		" MIPS_ISA_LEVEL "	\n"	\
+	"	" __SYNC(full, loongson3_war) "		\n"	\
 	"1:	" __LL		ll_dst ", %2		\n"	\
 	"	" insn		"			\n"	\
 	"	" __SC		"%1, %2			\n"	\
@@ -96,12 +98,10 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 	}
 
 	if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit) && (bit >= 16)) {
-		loongson_llsc_mb();
 		__bit_op(*m, __INS "%0, %3, %2, 1", "i"(bit), "r"(~0));
 		return;
 	}
 
-	loongson_llsc_mb();
 	__bit_op(*m, "or\t%0, %2", "ir"(BIT(bit)));
 }
 
@@ -126,12 +126,10 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 	}
 
 	if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(bit)) {
-		loongson_llsc_mb();
 		__bit_op(*m, __INS "%0, $0, %2, 1", "i"(bit));
 		return;
 	}
 
-	loongson_llsc_mb();
 	__bit_op(*m, "and\t%0, %2", "ir"(~BIT(bit)));
 }
 
@@ -168,7 +166,6 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 		return;
 	}
 
-	loongson_llsc_mb();
 	__bit_op(*m, "xor\t%0, %2", "ir"(BIT(bit)));
 }
 
@@ -190,7 +187,6 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 	if (!kernel_uses_llsc) {
 		res = __mips_test_and_set_bit_lock(nr, addr);
 	} else {
-		loongson_llsc_mb();
 		orig = __test_bit_op(*m, "%0",
 				     "or\t%1, %0, %3",
 				     "ir"(BIT(bit)));
@@ -237,13 +233,11 @@ static inline int test_and_clear_bit(unsigned long nr,
 	if (!kernel_uses_llsc) {
 		res = __mips_test_and_clear_bit(nr, addr);
 	} else if ((MIPS_ISA_REV >= 2) && __builtin_constant_p(nr)) {
-		loongson_llsc_mb();
 		res = __test_bit_op(*m, "%1",
 				    __EXT "%0, %1, %3, 1;"
 				    __INS "%1, $0, %3, 1",
 				    "i"(bit));
 	} else {
-		loongson_llsc_mb();
 		orig = __test_bit_op(*m, "%0",
 				     "or\t%1, %0, %3;"
 				     "xor\t%1, %1, %3",
@@ -276,7 +270,6 @@ static inline int test_and_change_bit(unsigned long nr,
 	if (!kernel_uses_llsc) {
 		res = __mips_test_and_change_bit(nr, addr);
 	} else {
-		loongson_llsc_mb();
 		orig = __test_bit_op(*m, "%0",
 				     "xor\t%1, %0, %3",
 				     "ir"(BIT(bit)));
-- 
cgit v1.2.3


From 9026737703aeee35702a0f990811e9202469c7b4 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:36 +0000
Subject: MIPS: bitops: Use smp_mb__before_atomic in test_* ops

Use smp_mb__before_atomic() rather than smp_mb__before_llsc() in
test_and_set_bit(), test_and_clear_bit() & test_and_change_bit(). The
_atomic() versions make semantic sense in these cases, and will allow a
later patch to omit redundant barriers for Loongson3 systems that
already include a barrier within __test_bit_op().

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/bitops.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index c08b6d225f10..a74769940fbd 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -209,7 +209,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
 static inline int test_and_set_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	smp_mb__before_llsc();
+	smp_mb__before_atomic();
 	return test_and_set_bit_lock(nr, addr);
 }
 
@@ -228,7 +228,7 @@ static inline int test_and_clear_bit(unsigned long nr,
 	int bit = nr % BITS_PER_LONG;
 	unsigned long res, orig;
 
-	smp_mb__before_llsc();
+	smp_mb__before_atomic();
 
 	if (!kernel_uses_llsc) {
 		res = __mips_test_and_clear_bit(nr, addr);
@@ -265,7 +265,7 @@ static inline int test_and_change_bit(unsigned long nr,
 	int bit = nr % BITS_PER_LONG;
 	unsigned long res, orig;
 
-	smp_mb__before_llsc();
+	smp_mb__before_atomic();
 
 	if (!kernel_uses_llsc) {
 		res = __mips_test_and_change_bit(nr, addr);
-- 
cgit v1.2.3


From 6a57d2d1e7c3ac7f47d8c51bddd9082fe2fb485b Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:37 +0000
Subject: MIPS: cmpxchg: Emit Loongson3 sync workarounds within asm

Generate the sync instructions required to workaround Loongson3 LL/SC
errata within inline asm blocks, which feels a little safer than doing
it from C where strictly speaking the compiler would be well within its
rights to insert a memory access between the separate asm statements we
previously had, containing sync & ll instructions respectively.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/cmpxchg.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 5d3f0e3513b4..fc121d20a980 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -12,6 +12,7 @@
 #include <linux/irqflags.h>
 #include <asm/compiler.h>
 #include <asm/llsc.h>
+#include <asm/sync.h>
 #include <asm/war.h>
 
 /*
@@ -36,12 +37,12 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
 	__typeof(*(m)) __ret;						\
 									\
 	if (kernel_uses_llsc) {						\
-		loongson_llsc_mb();					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
 		"	.set	noat				\n"	\
 		"	.set	push				\n"	\
 		"	.set	" MIPS_ISA_ARCH_LEVEL "		\n"	\
+		"	" __SYNC(full, loongson3_war) "		\n"	\
 		"1:	" ld "	%0, %2		# __xchg_asm	\n"	\
 		"	.set	pop				\n"	\
 		"	move	$1, %z3				\n"	\
@@ -108,12 +109,12 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
 	__typeof(*(m)) __ret;						\
 									\
 	if (kernel_uses_llsc) {						\
-		loongson_llsc_mb();					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
 		"	.set	noat				\n"	\
 		"	.set	push				\n"	\
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
+		"	" __SYNC(full, loongson3_war) "		\n"	\
 		"1:	" ld "	%0, %2		# __cmpxchg_asm \n"	\
 		"	bne	%0, %z3, 2f			\n"	\
 		"	.set	pop				\n"	\
@@ -122,11 +123,10 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
 		"	" st "	$1, %1				\n"	\
 		"\t" __SC_BEQZ	"$1, 1b				\n"	\
 		"	.set	pop				\n"	\
-		"2:						\n"	\
+		"2:	" __SYNC(full, loongson3_war) "		\n"	\
 		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
 		: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)	\
 		: __LLSC_CLOBBER);					\
-		loongson_llsc_mb();					\
 	} else {							\
 		unsigned long __flags;					\
 									\
@@ -222,11 +222,11 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 	 */
 	local_irq_save(flags);
 
-	loongson_llsc_mb();
 	asm volatile(
 	"	.set	push				\n"
 	"	.set	" MIPS_ISA_ARCH_LEVEL "		\n"
 	/* Load 64 bits from ptr */
+	"	" __SYNC(full, loongson3_war) "		\n"
 	"1:	lld	%L0, %3		# __cmpxchg64	\n"
 	/*
 	 * Split the 64 bit value we loaded into the 2 registers that hold the
@@ -260,7 +260,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 	/* If we failed, loop! */
 	"\t" __SC_BEQZ "%L1, 1b				\n"
 	"	.set	pop				\n"
-	"2:						\n"
+	"2:	" __SYNC(full, loongson3_war) "		\n"
 	: "=&r"(ret),
 	  "=&r"(tmp),
 	  "=" GCC_OFF_SMALL_ASM() (*(unsigned long long *)ptr)
@@ -268,7 +268,6 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 	  "r" (old),
 	  "r" (new)
 	: "memory");
-	loongson_llsc_mb();
 
 	local_irq_restore(flags);
 	return ret;
-- 
cgit v1.2.3


From a91f2a1dba44c29cd0d75edd8787f4469092ae8f Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:38 +0000
Subject: MIPS: cmpxchg: Omit redundant barriers for Loongson3

When building a kernel configured to support Loongson3 LL/SC workarounds
(ie. CONFIG_CPU_LOONGSON3_WORKAROUNDS=y) the inline assembly in
__xchg_asm() & __cmpxchg_asm() already emits completion barriers, and as
such we don't need to emit extra barriers from the xchg() or cmpxchg()
macros. Add compile-time constant checks causing us to omit the
redundant memory barriers.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index fc121d20a980..820df68e32e1 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -94,7 +94,13 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
 ({									\
 	__typeof__(*(ptr)) __res;					\
 									\
-	smp_mb__before_llsc();						\
+	/*								\
+	 * In the Loongson3 workaround case __xchg_asm() already	\
+	 * contains a completion barrier prior to the LL, so we don't	\
+	 * need to emit an extra one here.				\
+	 */								\
+	if (!__SYNC_loongson3_war)					\
+		smp_mb__before_llsc();					\
 									\
 	__res = (__typeof__(*(ptr)))					\
 		__xchg((ptr), (unsigned long)(x), sizeof(*(ptr)));	\
@@ -179,9 +185,23 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 ({									\
 	__typeof__(*(ptr)) __res;					\
 									\
-	smp_mb__before_llsc();						\
+	/*								\
+	 * In the Loongson3 workaround case __cmpxchg_asm() already	\
+	 * contains a completion barrier prior to the LL, so we don't	\
+	 * need to emit an extra one here.				\
+	 */								\
+	if (!__SYNC_loongson3_war)					\
+		smp_mb__before_llsc();					\
+									\
 	__res = cmpxchg_local((ptr), (old), (new));			\
-	smp_llsc_mb();							\
+									\
+	/*								\
+	 * In the Loongson3 workaround case __cmpxchg_asm() already	\
+	 * contains a completion barrier after the SC, so we don't	\
+	 * need to emit an extra one here.				\
+	 */								\
+	if (!__SYNC_loongson3_war)					\
+		smp_llsc_mb();						\
 									\
 	__res;								\
 })
-- 
cgit v1.2.3


From 3c1d3f0979721a39dd2980c97466127ce65aa130 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:38 +0000
Subject: MIPS: futex: Emit Loongson3 sync workarounds within asm

Generate the sync instructions required to workaround Loongson3 LL/SC
errata within inline asm blocks, which feels a little safer than doing
it from C where strictly speaking the compiler would be well within its
rights to insert a memory access between the separate asm statements we
previously had, containing sync & ll instructions respectively.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/barrier.h | 13 +++++++------
 arch/mips/include/asm/futex.h   | 15 +++++++--------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index c7e05e832da9..133afd565067 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -95,13 +95,14 @@ static inline void wmb(void)
  * ordering will be done by smp_llsc_mb() and friends.
  */
 #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
-#define __WEAK_LLSC_MB		"	sync	\n"
-#define smp_llsc_mb()		__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
-#define __LLSC_CLOBBER
+# define __WEAK_LLSC_MB		sync
+# define smp_llsc_mb() \
+	__asm__ __volatile__(__stringify(__WEAK_LLSC_MB) : : :"memory")
+# define __LLSC_CLOBBER
 #else
-#define __WEAK_LLSC_MB		"		\n"
-#define smp_llsc_mb()		do { } while (0)
-#define __LLSC_CLOBBER		"memory"
+# define __WEAK_LLSC_MB
+# define smp_llsc_mb()		do { } while (0)
+# define __LLSC_CLOBBER		"memory"
 #endif
 
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index b83b0397462d..54cf20530931 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -16,6 +16,7 @@
 #include <asm/barrier.h>
 #include <asm/compiler.h>
 #include <asm/errno.h>
+#include <asm/sync.h>
 #include <asm/war.h>
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)		\
@@ -32,7 +33,7 @@
 		"	.set	arch=r4000			\n"	\
 		"2:	sc	$1, %2				\n"	\
 		"	beqzl	$1, 1b				\n"	\
-		__WEAK_LLSC_MB						\
+		__stringify(__WEAK_LLSC_MB)				\
 		"3:						\n"	\
 		"	.insn					\n"	\
 		"	.set	pop				\n"	\
@@ -50,19 +51,19 @@
 		  "i" (-EFAULT)						\
 		: "memory");						\
 	} else if (cpu_has_llsc) {					\
-		loongson_llsc_mb();					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
 		"	.set	noat				\n"	\
 		"	.set	push				\n"	\
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
+		"	" __SYNC(full, loongson3_war) "		\n"	\
 		"1:	"user_ll("%1", "%4")" # __futex_atomic_op\n"	\
 		"	.set	pop				\n"	\
 		"	" insn	"				\n"	\
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"2:	"user_sc("$1", "%2")"			\n"	\
 		"	beqz	$1, 1b				\n"	\
-		__WEAK_LLSC_MB						\
+		__stringify(__WEAK_LLSC_MB)				\
 		"3:						\n"	\
 		"	.insn					\n"	\
 		"	.set	pop				\n"	\
@@ -147,7 +148,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		"	.set	arch=r4000				\n"
 		"2:	sc	$1, %2					\n"
 		"	beqzl	$1, 1b					\n"
-		__WEAK_LLSC_MB
+		__stringify(__WEAK_LLSC_MB)
 		"3:							\n"
 		"	.insn						\n"
 		"	.set	pop					\n"
@@ -164,13 +165,13 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		  "i" (-EFAULT)
 		: "memory");
 	} else if (cpu_has_llsc) {
-		loongson_llsc_mb();
 		__asm__ __volatile__(
 		"# futex_atomic_cmpxchg_inatomic			\n"
 		"	.set	push					\n"
 		"	.set	noat					\n"
 		"	.set	push					\n"
 		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
+		"	" __SYNC(full, loongson3_war) "			\n"
 		"1:	"user_ll("%1", "%3")"				\n"
 		"	bne	%1, %z4, 3f				\n"
 		"	.set	pop					\n"
@@ -178,8 +179,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"2:	"user_sc("$1", "%2")"				\n"
 		"	beqz	$1, 1b					\n"
-		__WEAK_LLSC_MB
-		"3:							\n"
+		"3:	" __SYNC_ELSE(full, loongson3_war, __WEAK_LLSC_MB) "\n"
 		"	.insn						\n"
 		"	.set	pop					\n"
 		"	.section .fixup,\"ax\"				\n"
@@ -194,7 +194,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
 		  "i" (-EFAULT)
 		: "memory");
-		loongson_llsc_mb();
 	} else
 		return -ENOSYS;
 
-- 
cgit v1.2.3


From e84957e6ae043bb83ad6ae7e949a1ce97b6bbfef Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:39 +0000
Subject: MIPS: syscall: Emit Loongson3 sync workarounds within asm

Generate the sync instructions required to workaround Loongson3 LL/SC
errata within inline asm blocks, which feels a little safer than doing
it from C where strictly speaking the compiler would be well within its
rights to insert a memory access between the separate asm statements we
previously had, containing sync & ll instructions respectively.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/syscall.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 3f16f3823031..c333e5788664 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -37,6 +37,7 @@
 #include <asm/signal.h>
 #include <asm/sim.h>
 #include <asm/shmparam.h>
+#include <asm/sync.h>
 #include <asm/sysmips.h>
 #include <asm/switch_to.h>
 
@@ -133,12 +134,12 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
 		  [efault] "i" (-EFAULT)
 		: "memory");
 	} else if (cpu_has_llsc) {
-		loongson_llsc_mb();
 		__asm__ __volatile__ (
 		"	.set	push					\n"
 		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"	li	%[err], 0				\n"
 		"1:							\n"
+		"	" __SYNC(full, loongson3_war) "			\n"
 		user_ll("%[old]", "(%[addr])")
 		"	move	%[tmp], %[new]				\n"
 		"2:							\n"
-- 
cgit v1.2.3


From 7f56b123548142fd48b2c6891977e8fda695a838 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:40 +0000
Subject: MIPS: barrier: Remove loongson_llsc_mb()

The loongson_llsc_mb() macro is no longer used - instead barriers are
emitted as part of inline asm using the __SYNC() macro. Remove the
now-defunct loongson_llsc_mb() macro.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/barrier.h | 40 ----------------------------------------
 arch/mips/loongson64/Platform   |  2 +-
 2 files changed, 1 insertion(+), 41 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 133afd565067..6d92d5ccdafa 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -122,46 +122,6 @@ static inline void wmb(void)
 #define __smp_mb__before_atomic()	__smp_mb__before_llsc()
 #define __smp_mb__after_atomic()	smp_llsc_mb()
 
-/*
- * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
- * store or prefetch) in between an LL & SC can cause the SC instruction to
- * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
- * containing such sequences, this bug bites harder than we might otherwise
- * expect due to reordering & speculation:
- *
- * 1) A memory access appearing prior to the LL in program order may actually
- *    be executed after the LL - this is the reordering case.
- *
- *    In order to avoid this we need to place a memory barrier (ie. a SYNC
- *    instruction) prior to every LL instruction, in between it and any earlier
- *    memory access instructions.
- *
- *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
- *
- * 2) If a conditional branch exists between an LL & SC with a target outside
- *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
- *    or similar, then misprediction of the branch may allow speculative
- *    execution of memory accesses from outside of the LL-SC loop.
- *
- *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
- *    at each affected branch target, for which we also use loongson_llsc_mb()
- *    defined below.
- *
- *    This case affects all current Loongson 3 CPUs.
- *
- * The above described cases cause an error in the cache coherence protocol;
- * such that the Invalidate of a competing LL-SC goes 'missing' and SC
- * erroneously observes its core still has Exclusive state and lets the SC
- * proceed.
- *
- * Therefore the error only occurs on SMP systems.
- */
-#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
-#define loongson_llsc_mb()	__asm__ __volatile__("sync" : : :"memory")
-#else
-#define loongson_llsc_mb()	do { } while (0)
-#endif
-
 static inline void sync_ginv(void)
 {
 	asm volatile(__SYNC(ginv, always));
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index c1a4d4dc4665..28172500f95a 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -27,7 +27,7 @@ cflags-$(CONFIG_CPU_LOONGSON3)	+= -Wa,--trap
 #
 # Some versions of binutils, not currently mainline as of 2019/02/04, support
 # an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction
-# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a
+# to work around a CPU bug (see __SYNC_loongson3_war in asm/sync.h for a
 # description).
 #
 # We disable this in order to prevent the assembler meddling with the
-- 
cgit v1.2.3


From ae4cd0b1a4756344cb99c0004d156b585cf9e907 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:41 +0000
Subject: MIPS: barrier: Make __smp_mb__before_atomic() a no-op for Loongson3

Loongson3 systems with CONFIG_CPU_LOONGSON3_WORKAROUNDS enabled already
emit a full completion barrier as part of the inline assembly containing
LL/SC loops for atomic operations. As such the barrier emitted by
__smp_mb__before_atomic() is redundant, and we can remove it.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/barrier.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 6d92d5ccdafa..49ff172a72b9 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -119,7 +119,17 @@ static inline void wmb(void)
 #define nudge_writes() mb()
 #endif
 
-#define __smp_mb__before_atomic()	__smp_mb__before_llsc()
+/*
+ * In the Loongson3 LL/SC workaround case, all of our LL/SC loops already have
+ * a completion barrier immediately preceding the LL instruction. Therefore we
+ * can skip emitting a barrier from __smp_mb__before_atomic().
+ */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
+# define __smp_mb__before_atomic()
+#else
+# define __smp_mb__before_atomic()	__smp_mb__before_llsc()
+#endif
+
 #define __smp_mb__after_atomic()	smp_llsc_mb()
 
 static inline void sync_ginv(void)
-- 
cgit v1.2.3


From 12dbb04f2ac1fcbef0d6463abb3071ce8d8fe45f Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:42 +0000
Subject: MIPS: genex: Add Loongson3 LL/SC workaround to ejtag_debug_handler

In ejtag_debug_handler we use LL & SC instructions to acquire & release
an open-coded spinlock. For Loongson3 systems affected by LL/SC errata
this requires that we insert a sync instruction prior to the LL in order
to ensure correct behavior of the LL/SC loop.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/genex.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index d586cdac9605..637048ec2acb 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -18,6 +18,7 @@
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
+#include <asm/sync.h>
 #include <asm/war.h>
 #include <asm/thread_info.h>
 
@@ -353,6 +354,7 @@ NESTED(ejtag_debug_handler, PT_SIZE, sp)
 
 #ifdef CONFIG_SMP
 1:	PTR_LA	k0, ejtag_debug_buffer_spinlock
+	__SYNC(full, loongson3_war)
 	ll	k0, 0(k0)
 	bnez	k0, 1b
 	PTR_LA	k0, ejtag_debug_buffer_spinlock
-- 
cgit v1.2.3


From 4dee90d7b5796692e8da78c7b64cf42d5e4c1b09 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:43 +0000
Subject: MIPS: genex: Don't reload address unnecessarily

In ejtag_debug_handler() we must reload the address of
ejtag_debug_buffer_spinlock if an sc fails, since the address in k0 will
have been clobbered by the result of the sc instruction. In the case
where we simply load a non-zero value (ie. there's contention for the
lock) the address will not be clobbered & we can simply branch back to
repeat the load from memory without reloading the address into k0.

The primary motivation for this change is that it moves the target of
the bnez instruction to an instruction within the LL/SC loop (the LL
itself), which we know contains no other memory accesses & therefore
isn't affected by Loongson3 LL/SC errata.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/genex.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 637048ec2acb..0a43c9125267 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -355,8 +355,8 @@ NESTED(ejtag_debug_handler, PT_SIZE, sp)
 #ifdef CONFIG_SMP
 1:	PTR_LA	k0, ejtag_debug_buffer_spinlock
 	__SYNC(full, loongson3_war)
-	ll	k0, 0(k0)
-	bnez	k0, 1b
+2:	ll	k0, 0(k0)
+	bnez	k0, 2b
 	PTR_LA	k0, ejtag_debug_buffer_spinlock
 	sc	k0, 0(k0)
 	beqz	k0, 1b
-- 
cgit v1.2.3


From e4acfbc18fc9e0d75ad15a652864b3971892e423 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 1 Oct 2019 21:53:44 +0000
Subject: MIPS: Check Loongson3 LL/SC errata workaround correctness

When Loongson3 LL/SC errata workarounds are enabled (ie.
CONFIG_CPU_LOONGSON3_WORKAROUNDS=y) run a tool to scan through the
compiled kernel & ensure that the workaround is applied correctly. That
is, ensure that:

  - Every LL or LLD instruction is preceded by a sync instruction.

  - Any branches from within an LL/SC loop to outside of that loop
    target a sync instruction.

Reasoning for these conditions can be found by reading the comment above
the definition of __SYNC_loongson3_war in arch/mips/include/asm/sync.h.

This tool will help ensure that we don't inadvertently introduce code
paths that miss the required workarounds.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/Makefile                     |   3 +
 arch/mips/Makefile.postlink            |  10 +-
 arch/mips/tools/.gitignore             |   1 +
 arch/mips/tools/Makefile               |   5 +
 arch/mips/tools/loongson3-llsc-check.c | 307 +++++++++++++++++++++++++++++++++
 5 files changed, 325 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/tools/loongson3-llsc-check.c

(limited to 'arch')

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index cdc09b71febe..0a5eab626260 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -14,6 +14,9 @@
 
 archscripts: scripts_basic
 	$(Q)$(MAKE) $(build)=arch/mips/tools elf-entry
+ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUNDS),y)
+	$(Q)$(MAKE) $(build)=arch/mips/tools loongson3-llsc-check
+endif
 	$(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs
 
 KBUILD_DEFCONFIG := 32r2el_defconfig
diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink
index 4eea4188cb20..f03fdc95143e 100644
--- a/arch/mips/Makefile.postlink
+++ b/arch/mips/Makefile.postlink
@@ -3,7 +3,8 @@
 # Post-link MIPS pass
 # ===========================================================================
 #
-# 1. Insert relocations into vmlinux
+# 1. Check that Loongson3 LL/SC workarounds are applied correctly
+# 2. Insert relocations into vmlinux
 
 PHONY := __archpost
 __archpost:
@@ -11,6 +12,10 @@ __archpost:
 -include include/config/auto.conf
 include scripts/Kbuild.include
 
+CMD_LS3_LLSC = arch/mips/tools/loongson3-llsc-check
+quiet_cmd_ls3_llsc = LLSCCHK $@
+      cmd_ls3_llsc = $(CMD_LS3_LLSC) $@
+
 CMD_RELOCS = arch/mips/boot/tools/relocs
 quiet_cmd_relocs = RELOCS $@
       cmd_relocs = $(CMD_RELOCS) $@
@@ -19,6 +24,9 @@ quiet_cmd_relocs = RELOCS $@
 
 vmlinux: FORCE
 	@true
+ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUNDS),y)
+	$(call if_changed,ls3_llsc)
+endif
 ifeq ($(CONFIG_RELOCATABLE),y)
 	$(call if_changed,relocs)
 endif
diff --git a/arch/mips/tools/.gitignore b/arch/mips/tools/.gitignore
index 56d34ccccce4..b0209450d9ff 100644
--- a/arch/mips/tools/.gitignore
+++ b/arch/mips/tools/.gitignore
@@ -1 +1,2 @@
 elf-entry
+loongson3-llsc-check
diff --git a/arch/mips/tools/Makefile b/arch/mips/tools/Makefile
index 3baee4bc6775..aaef688749f5 100644
--- a/arch/mips/tools/Makefile
+++ b/arch/mips/tools/Makefile
@@ -3,3 +3,8 @@ hostprogs-y := elf-entry
 PHONY += elf-entry
 elf-entry: $(obj)/elf-entry
 	@:
+
+hostprogs-$(CONFIG_CPU_LOONGSON3_WORKAROUNDS) += loongson3-llsc-check
+PHONY += loongson3-llsc-check
+loongson3-llsc-check: $(obj)/loongson3-llsc-check
+	@:
diff --git a/arch/mips/tools/loongson3-llsc-check.c b/arch/mips/tools/loongson3-llsc-check.c
new file mode 100644
index 000000000000..0ebddd0ae46f
--- /dev/null
+++ b/arch/mips/tools/loongson3-llsc-check.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <byteswap.h>
+#include <elf.h>
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#ifdef be32toh
+/* If libc provides le{16,32,64}toh() then we'll use them */
+#elif BYTE_ORDER == LITTLE_ENDIAN
+# define le16toh(x)	(x)
+# define le32toh(x)	(x)
+# define le64toh(x)	(x)
+#elif BYTE_ORDER == BIG_ENDIAN
+# define le16toh(x)	bswap_16(x)
+# define le32toh(x)	bswap_32(x)
+# define le64toh(x)	bswap_64(x)
+#endif
+
+/* MIPS opcodes, in bits 31:26 of an instruction */
+#define OP_SPECIAL	0x00
+#define OP_REGIMM	0x01
+#define OP_BEQ		0x04
+#define OP_BNE		0x05
+#define OP_BLEZ		0x06
+#define OP_BGTZ		0x07
+#define OP_BEQL		0x14
+#define OP_BNEL		0x15
+#define OP_BLEZL	0x16
+#define OP_BGTZL	0x17
+#define OP_LL		0x30
+#define OP_LLD		0x34
+#define OP_SC		0x38
+#define OP_SCD		0x3c
+
+/* Bits 20:16 of OP_REGIMM instructions */
+#define REGIMM_BLTZ	0x00
+#define REGIMM_BGEZ	0x01
+#define REGIMM_BLTZL	0x02
+#define REGIMM_BGEZL	0x03
+#define REGIMM_BLTZAL	0x10
+#define REGIMM_BGEZAL	0x11
+#define REGIMM_BLTZALL	0x12
+#define REGIMM_BGEZALL	0x13
+
+/* Bits 5:0 of OP_SPECIAL instructions */
+#define SPECIAL_SYNC	0x0f
+
+static void usage(FILE *f)
+{
+	fprintf(f, "Usage: loongson3-llsc-check /path/to/vmlinux\n");
+}
+
+static int se16(uint16_t x)
+{
+	return (int16_t)x;
+}
+
+static bool is_ll(uint32_t insn)
+{
+	switch (insn >> 26) {
+	case OP_LL:
+	case OP_LLD:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool is_sc(uint32_t insn)
+{
+	switch (insn >> 26) {
+	case OP_SC:
+	case OP_SCD:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool is_sync(uint32_t insn)
+{
+	/* Bits 31:11 should all be zeroes */
+	if (insn >> 11)
+		return false;
+
+	/* Bits 5:0 specify the SYNC special encoding */
+	if ((insn & 0x3f) != SPECIAL_SYNC)
+		return false;
+
+	return true;
+}
+
+static bool is_branch(uint32_t insn, int *off)
+{
+	switch (insn >> 26) {
+	case OP_BEQ:
+	case OP_BEQL:
+	case OP_BNE:
+	case OP_BNEL:
+	case OP_BGTZ:
+	case OP_BGTZL:
+	case OP_BLEZ:
+	case OP_BLEZL:
+		*off = se16(insn) + 1;
+		return true;
+
+	case OP_REGIMM:
+		switch ((insn >> 16) & 0x1f) {
+		case REGIMM_BGEZ:
+		case REGIMM_BGEZL:
+		case REGIMM_BGEZAL:
+		case REGIMM_BGEZALL:
+		case REGIMM_BLTZ:
+		case REGIMM_BLTZL:
+		case REGIMM_BLTZAL:
+		case REGIMM_BLTZALL:
+			*off = se16(insn) + 1;
+			return true;
+
+		default:
+			return false;
+		}
+
+	default:
+		return false;
+	}
+}
+
+static int check_ll(uint64_t pc, uint32_t *code, size_t sz)
+{
+	ssize_t i, max, sc_pos;
+	int off;
+
+	/*
+	 * Every LL must be preceded by a sync instruction in order to ensure
+	 * that instruction reordering doesn't allow a prior memory access to
+	 * execute after the LL & cause erroneous results.
+	 */
+	if (!is_sync(le32toh(code[-1]))) {
+		fprintf(stderr, "%" PRIx64 ": LL not preceded by sync\n", pc);
+		return -EINVAL;
+	}
+
+	/* Find the matching SC instruction */
+	max = sz / 4;
+	for (sc_pos = 0; sc_pos < max; sc_pos++) {
+		if (is_sc(le32toh(code[sc_pos])))
+			break;
+	}
+	if (sc_pos >= max) {
+		fprintf(stderr, "%" PRIx64 ": LL has no matching SC\n", pc);
+		return -EINVAL;
+	}
+
+	/*
+	 * Check branches within the LL/SC loop target sync instructions,
+	 * ensuring that speculative execution can't generate memory accesses
+	 * due to instructions outside of the loop.
+	 */
+	for (i = 0; i < sc_pos; i++) {
+		if (!is_branch(le32toh(code[i]), &off))
+			continue;
+
+		/*
+		 * If the branch target is within the LL/SC loop then we don't
+		 * need to worry about it.
+		 */
+		if ((off >= -i) && (off <= sc_pos))
+			continue;
+
+		/* If the branch targets a sync instruction we're all good... */
+		if (is_sync(le32toh(code[i + off])))
+			continue;
+
+		/* ...but if not, we have a problem */
+		fprintf(stderr, "%" PRIx64 ": Branch target not a sync\n",
+			pc + (i * 4));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int check_code(uint64_t pc, uint32_t *code, size_t sz)
+{
+	int err = 0;
+
+	if (sz % 4) {
+		fprintf(stderr, "%" PRIx64 ": Section size not a multiple of 4\n",
+			pc);
+		err = -EINVAL;
+		sz -= (sz % 4);
+	}
+
+	if (is_ll(le32toh(code[0]))) {
+		fprintf(stderr, "%" PRIx64 ": First instruction in section is an LL\n",
+			pc);
+		err = -EINVAL;
+	}
+
+#define advance() (	\
+	code++,		\
+	pc += 4,	\
+	sz -= 4		\
+)
+
+	/*
+	 * Skip the first instructionm allowing check_ll to look backwards
+	 * unconditionally.
+	 */
+	advance();
+
+	/* Now scan through the code looking for LL instructions */
+	for (; sz; advance()) {
+		if (is_ll(le32toh(code[0])))
+			err |= check_ll(pc, code, sz);
+	}
+
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int vmlinux_fd, status, err, i;
+	const char *vmlinux_path;
+	struct stat st;
+	Elf64_Ehdr *eh;
+	Elf64_Shdr *sh;
+	void *vmlinux;
+
+	status = EXIT_FAILURE;
+
+	if (argc < 2) {
+		usage(stderr);
+		goto out_ret;
+	}
+
+	vmlinux_path = argv[1];
+	vmlinux_fd = open(vmlinux_path, O_RDONLY);
+	if (vmlinux_fd == -1) {
+		perror("Unable to open vmlinux");
+		goto out_ret;
+	}
+
+	err = fstat(vmlinux_fd, &st);
+	if (err) {
+		perror("Unable to stat vmlinux");
+		goto out_close;
+	}
+
+	vmlinux = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, vmlinux_fd, 0);
+	if (vmlinux == MAP_FAILED) {
+		perror("Unable to mmap vmlinux");
+		goto out_close;
+	}
+
+	eh = vmlinux;
+	if (memcmp(eh->e_ident, ELFMAG, SELFMAG)) {
+		fprintf(stderr, "vmlinux is not an ELF?\n");
+		goto out_munmap;
+	}
+
+	if (eh->e_ident[EI_CLASS] != ELFCLASS64) {
+		fprintf(stderr, "vmlinux is not 64b?\n");
+		goto out_munmap;
+	}
+
+	if (eh->e_ident[EI_DATA] != ELFDATA2LSB) {
+		fprintf(stderr, "vmlinux is not little endian?\n");
+		goto out_munmap;
+	}
+
+	for (i = 0; i < le16toh(eh->e_shnum); i++) {
+		sh = vmlinux + le64toh(eh->e_shoff) + (i * le16toh(eh->e_shentsize));
+
+		if (sh->sh_type != SHT_PROGBITS)
+			continue;
+		if (!(sh->sh_flags & SHF_EXECINSTR))
+			continue;
+
+		err = check_code(le64toh(sh->sh_addr),
+				 vmlinux + le64toh(sh->sh_offset),
+				 le64toh(sh->sh_size));
+		if (err)
+			goto out_munmap;
+	}
+
+	status = EXIT_SUCCESS;
+out_munmap:
+	munmap(vmlinux, st.st_size);
+out_close:
+	close(vmlinux_fd);
+out_ret:
+	return status;
+}
-- 
cgit v1.2.3


From 397dc00e249ec64e106374565575dd0eb7e25998 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 16 Sep 2019 14:13:10 +0300
Subject: mips: sgi-ip27: switch from DISCONTIGMEM to SPARSEMEM

The memory initialization of SGI-IP27 is already half-way to support
SPARSEMEM. It only had free_bootmem_with_active_regions() left-overs
interfering with sparse_memory_present_with_active_regions().

Replace these calls with simpler memblocks_present() call in prom_meminit()
and adjust arch/mips/Kconfig to enable SPARSEMEM and SPARSEMEM_EXTREME for
SGI-IP27.

Co-developed-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/Kconfig                | 12 ++----------
 arch/mips/sgi-ip27/ip27-memory.c |  6 ++----
 2 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8736bf4420bc..a4ce359c6ba4 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -674,6 +674,7 @@ config SGI_IP22
 config SGI_IP27
 	bool "SGI IP27 (Origin200/2000)"
 	select ARCH_HAS_PHYS_TO_DMA
+	select ARCH_SPARSEMEM_ENABLE
 	select FW_ARC
 	select FW_ARC64
 	select BOOT_ELF64
@@ -2618,18 +2619,9 @@ config ARCH_FLATMEM_ENABLE
 	def_bool y
 	depends on !NUMA && !CPU_LOONGSON2
 
-config ARCH_DISCONTIGMEM_ENABLE
-	bool
-	default y if SGI_IP27
-	help
-	  Say Y to support efficient handling of discontiguous physical memory,
-	  for architectures which are either NUMA (Non-Uniform Memory Access)
-	  or have huge holes in the physical address space for other reasons.
-	  See <file:Documentation/vm/numa.rst> for more.
-
 config ARCH_SPARSEMEM_ENABLE
 	bool
-	select SPARSEMEM_STATIC
+	select SPARSEMEM_STATIC if !SGI_IP27
 
 config NUMA
 	bool "NUMA Support"
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index f63f30b2cdcd..3e2f39dfbbf5 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -385,12 +385,8 @@ static void __init node_mem_init(nasid_t node)
 	slot_freepfn += PFN_UP(sizeof(struct pglist_data) +
 			       sizeof(struct hub_data));
 
-	free_bootmem_with_active_regions(node, end_pfn);
-
 	memblock_reserve(slot_firstpfn << PAGE_SHIFT,
 			 ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT));
-
-	sparse_memory_present_with_active_regions(node);
 }
 
 /*
@@ -423,6 +419,8 @@ void __init prom_meminit(void)
 		}
 		__node_data[node] = &null_node;
 	}
+
+	memblocks_present();
 }
 
 void __init prom_free_prom_memory(void)
-- 
cgit v1.2.3


From 6a6f9b7dafd50efc1b243fb25c3766ebc78adc7b Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Sat, 21 Sep 2019 21:50:26 +0800
Subject: MIPS: Loongson: Add CFUCFG&CSR support

Loongson-3A R4+ (Loongson-3A4000 and newer) has CPUCFG (CPU config) and
CSR (Control and Status Register) extensions. This patch add read/write
functionalities for them.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: linux-mips@vger.kernel.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhuacai@gmail.com>
---
 .../include/asm/mach-loongson64/loongson_regs.h    | 227 +++++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 arch/mips/include/asm/mach-loongson64/loongson_regs.h

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
new file mode 100644
index 000000000000..6e3569ab8936
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
@@ -0,0 +1,227 @@
+/*
+ * Read/Write Loongson Extension Registers
+ */
+
+#ifndef _LOONGSON_REGS_H_
+#define _LOONGSON_REGS_H_
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#include <asm/mipsregs.h>
+#include <asm/cpu.h>
+
+static inline bool cpu_has_cfg(void)
+{
+	return ((read_c0_prid() & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G);
+}
+
+static inline u32 read_cpucfg(u32 reg)
+{
+	u32 __res;
+
+	__asm__ __volatile__(
+		"parse_r __res,%0\n\t"
+		"parse_r reg,%1\n\t"
+		".insn \n\t"
+		".word (0xc8080118 | (reg << 21) | (__res << 11))\n\t"
+		:"=r"(__res)
+		:"r"(reg)
+		:
+		);
+	return __res;
+}
+
+/* Bit Domains for CFG registers */
+#define LOONGSON_CFG0	0x0
+#define LOONGSON_CFG0_PRID GENMASK(31, 0)
+
+#define LOONGSON_CFG1 0x1
+#define LOONGSON_CFG1_FP	BIT(0)
+#define LOONGSON_CFG1_FPREV	GENMASK(3, 1)
+#define LOONGSON_CFG1_MMI	BIT(4)
+#define LOONGSON_CFG1_MSA1	BIT(5)
+#define LOONGSON_CFG1_MSA2	BIT(6)
+#define LOONGSON_CFG1_CGP	BIT(7)
+#define LOONGSON_CFG1_WRP	BIT(8)
+#define LOONGSON_CFG1_LSX1	BIT(9)
+#define LOONGSON_CFG1_LSX2	BIT(10)
+#define LOONGSON_CFG1_LASX	BIT(11)
+#define LOONGSON_CFG1_R6FXP	BIT(12)
+#define LOONGSON_CFG1_R6CRCP	BIT(13)
+#define LOONGSON_CFG1_R6FPP	BIT(14)
+#define LOONGSON_CFG1_CNT64	BIT(15)
+#define LOONGSON_CFG1_LSLDR0	BIT(16)
+#define LOONGSON_CFG1_LSPREF	BIT(17)
+#define LOONGSON_CFG1_LSPREFX	BIT(18)
+#define LOONGSON_CFG1_LSSYNCI	BIT(19)
+#define LOONGSON_CFG1_LSUCA	BIT(20)
+#define LOONGSON_CFG1_LLSYNC	BIT(21)
+#define LOONGSON_CFG1_TGTSYNC	BIT(22)
+#define LOONGSON_CFG1_LLEXC	BIT(23)
+#define LOONGSON_CFG1_SCRAND	BIT(24)
+#define LOONGSON_CFG1_MUALP	BIT(25)
+#define LOONGSON_CFG1_KMUALEN	BIT(26)
+#define LOONGSON_CFG1_ITLBT	BIT(27)
+#define LOONGSON_CFG1_LSUPERF	BIT(28)
+#define LOONGSON_CFG1_SFBP	BIT(29)
+#define LOONGSON_CFG1_CDMAP	BIT(30)
+
+#define LOONGSON_CFG2 0x2
+#define LOONGSON_CFG2_LEXT1	BIT(0)
+#define LOONGSON_CFG2_LEXT2	BIT(1)
+#define LOONGSON_CFG2_LEXT3	BIT(2)
+#define LOONGSON_CFG2_LSPW	BIT(3)
+#define LOONGSON_CFG2_LBT1	BIT(4)
+#define LOONGSON_CFG2_LBT2	BIT(5)
+#define LOONGSON_CFG2_LBT3	BIT(6)
+#define LOONGSON_CFG2_LBTMMU	BIT(7)
+#define LOONGSON_CFG2_LPMP	BIT(8)
+#define LOONGSON_CFG2_LPMPREV	GENMASK(11, 9)
+#define LOONGSON_CFG2_LAMO	BIT(12)
+#define LOONGSON_CFG2_LPIXU	BIT(13)
+#define LOONGSON_CFG2_LPIXUN	BIT(14)
+#define LOONGSON_CFG2_LZVP	BIT(15)
+#define LOONGSON_CFG2_LZVREV	GENMASK(18, 16)
+#define LOONGSON_CFG2_LGFTP	BIT(19)
+#define LOONGSON_CFG2_LGFTPREV	GENMASK(22, 20)
+#define LOONGSON_CFG2_LLFTP	BIT(23)
+#define LOONGSON_CFG2_LLFTPREV	GENMASK(24, 26)
+#define LOONGSON_CFG2_LCSRP	BIT(27)
+#define LOONGSON_CFG2_LDISBLIKELY	BIT(28)
+
+#define LOONGSON_CFG3 0x3
+#define LOONGSON_CFG3_LCAMP	BIT(0)
+#define LOONGSON_CFG3_LCAMREV	GENMASK(3, 1)
+#define LOONGSON_CFG3_LCAMNUM	GENMASK(11, 4)
+#define LOONGSON_CFG3_LCAMKW	GENMASK(19, 12)
+#define LOONGSON_CFG3_LCAMVW	GENMASK(27, 20)
+
+#define LOONGSON_CFG4 0x4
+#define LOONGSON_CFG4_CCFREQ	GENMASK(31, 0)
+
+#define LOONGSON_CFG5 0x5
+#define LOONGSON_CFG5_CFM	GENMASK(15, 0)
+#define LOONGSON_CFG5_CFD	GENMASK(31, 16)
+
+#define LOONGSON_CFG6 0x6
+
+#define LOONGSON_CFG7 0x7
+#define LOONGSON_CFG7_GCCAEQRP	BIT(0)
+#define LOONGSON_CFG7_UCAWINP	BIT(1)
+
+static inline bool cpu_has_csr(void)
+{
+	if (cpu_has_cfg())
+		return (read_cpucfg(LOONGSON_CFG2) & LOONGSON_CFG2_LCSRP);
+
+	return false;
+}
+
+static inline u32 csr_readl(u32 reg)
+{
+	u32 __res;
+
+	/* RDCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r __res,%0\n\t"
+		"parse_r reg,%1\n\t"
+		".insn \n\t"
+		".word (0xc8000118 | (reg << 21) | (__res << 11))\n\t"
+		:"=r"(__res)
+		:"r"(reg)
+		:
+		);
+	return __res;
+}
+
+static inline u64 csr_readq(u32 reg)
+{
+	u64 __res;
+
+	/* DWRCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r __res,%0\n\t"
+		"parse_r reg,%1\n\t"
+		".insn \n\t"
+		".word (0xc8020118 | (reg << 21) | (__res << 11))\n\t"
+		:"=r"(__res)
+		:"r"(reg)
+		:
+		);
+	return __res;
+}
+
+static inline void csr_writel(u32 val, u32 reg)
+{
+	/* WRCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r reg,%0\n\t"
+		"parse_r val,%1\n\t"
+		".insn \n\t"
+		".word (0xc8010118 | (reg << 21) | (val << 11))\n\t"
+		:
+		:"r"(reg),"r"(val)
+		:
+		);
+}
+
+static inline void csr_writeq(u64 val, u32 reg)
+{
+	/* DWRCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r reg,%0\n\t"
+		"parse_r val,%1\n\t"
+		".insn \n\t"
+		".word (0xc8030118 | (reg << 21) | (val << 11))\n\t"
+		:
+		:"r"(reg),"r"(val)
+		:
+		);
+}
+
+/* Public CSR Register can also be accessed with regular addresses */
+#define CSR_PUBLIC_MMIO_BASE 0x1fe00000
+
+#define MMIO_CSR(x)		(void *)TO_UNCAC(CSR_PUBLIC_MMIO_BASE + x)
+
+#define LOONGSON_CSR_FEATURES	0x8
+#define LOONGSON_CSRF_TEMP	BIT(0)
+#define LOONGSON_CSRF_NODECNT	BIT(1)
+#define LOONGSON_CSRF_MSI	BIT(2)
+#define LOONGSON_CSRF_EXTIOI	BIT(3)
+#define LOONGSON_CSRF_IPI	BIT(4)
+#define LOONGSON_CSRF_FREQ	BIT(5)
+
+#define LOONGSON_CSR_VENDOR	0x10 /* Vendor name string, should be "Loongson" */
+#define LOONGSON_CSR_CPUNAME	0x20 /* Processor name string */
+#define LOONGSON_CSR_NODECNT	0x408
+#define LOONGSON_CSR_CPUTEMP	0x428
+
+/* PerCore CSR, only accessable by local cores */
+#define LOONGSON_CSR_IPI_STATUS	0x1000
+#define LOONGSON_CSR_IPI_EN	0x1004
+#define LOONGSON_CSR_IPI_SET	0x1008
+#define LOONGSON_CSR_IPI_CLEAR	0x100c
+#define LOONGSON_CSR_IPI_SEND	0x1040
+#define CSR_IPI_SEND_IP_SHIFT	0
+#define CSR_IPI_SEND_CPU_SHIFT	16
+#define CSR_IPI_SEND_BLOCK	BIT(31)
+
+static inline u64 drdtime(void)
+{
+	int rID = 0;
+	u64 val = 0;
+
+	__asm__ __volatile__(
+		"parse_r rID,%0\n\t"
+		"parse_r val,%1\n\t"
+		".insn \n\t"
+		".word (0xc8090118 | (rID << 21) | (val << 11))\n\t"
+		:"=r"(rID),"=r"(val)
+		:
+		);
+	return val;
+}
+
+#endif
-- 
cgit v1.2.3


From 7507445b1993087d2a6ef3e30e3eaeb2da40dbc8 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Sat, 21 Sep 2019 21:50:27 +0800
Subject: MIPS: Loongson: Add Loongson-3A R4 basic support

All Loongson-3 CPU family:

Code-name         Brand-name       PRId
Loongson-3A R1    Loongson-3A1000  0x6305
Loongson-3A R2    Loongson-3A2000  0x6308
Loongson-3A R2.1  Loongson-3A2000  0x630c
Loongson-3A R3    Loongson-3A3000  0x6309
Loongson-3A R3.1  Loongson-3A3000  0x630d
Loongson-3A R4    Loongson-3A4000  0xc000
Loongson-3B R1    Loongson-3B1000  0x6306
Loongson-3B R2    Loongson-3B1500  0x6307

Features of R4 revision of Loongson-3A:

  - All R2/R3 features, including SFB, V-Cache, FTLB, RIXI, DSP, etc.
  - Support variable ASID bits.
  - Support MSA and VZ extensions.
  - Support CPUCFG (CPU config) and CSR (Control and Status Register)
      extensions.
  - 64 entries of VTLB (classic TLB), 2048 entries of FTLB (8-way
      set-associative).

Now 64-bit Loongson processors has three types of PRID.IMP: 0x6300 is
the classic one so we call it PRID_IMP_LOONGSON_64C (e.g., Loongson-2E/
2F/3A1000/3B1000/3B1500/3A2000/3A3000), 0x6100 is for some processors
which has reduced capabilities so we call it PRID_IMP_LOONGSON_64R
(e.g., Loongson-2K), 0xc000 is supposed to cover all new processors in
general (e.g., Loongson-3A4000+) so we call it PRID_IMP_LOONGSON_64G.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: linux-mips@vger.kernel.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhuacai@gmail.com>
---
 arch/mips/Kconfig                                  |  2 +
 arch/mips/include/asm/cpu.h                        |  4 +-
 .../asm/mach-loongson64/kernel-entry-init.h        | 28 +++++--
 arch/mips/kernel/cpu-probe.c                       | 16 +++-
 arch/mips/kernel/idle.c                            |  3 +-
 arch/mips/loongson64/loongson-3/smp.c              | 90 ++++++++++++----------
 arch/mips/mm/c-r4k.c                               |  3 +-
 drivers/platform/mips/cpu_hwmon.c                  | 15 ++++
 8 files changed, 111 insertions(+), 50 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a4ce359c6ba4..3ef8f8a2a0fd 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1387,9 +1387,11 @@ config CPU_LOONGSON3
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_SUPPORTS_HUGEPAGES
+	select CPU_SUPPORTS_MSA
 	select CPU_HAS_LOAD_STORE_LR
 	select WEAK_ORDERING
 	select WEAK_REORDERING_BEYOND_LLSC
+	select MIPS_ASID_BITS_VARIABLE
 	select MIPS_PGD_C0_CONTEXT
 	select MIPS_L1_CACHE_SHIFT_6
 	select GPIOLIB
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 7fddcb8350c6..81ddb575502a 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -91,7 +91,9 @@
 #define PRID_IMP_LOONGSON_32	0x4200  /* Loongson-1 */
 #define PRID_IMP_R5432		0x5400
 #define PRID_IMP_R5500		0x5500
-#define PRID_IMP_LOONGSON_64	0x6300  /* Loongson-2/3 */
+#define PRID_IMP_LOONGSON_64R	0x6100  /* Reduced Loongson-2 */
+#define PRID_IMP_LOONGSON_64C	0x6300  /* Classic Loongson-2 and Loongson-3 */
+#define PRID_IMP_LOONGSON_64G	0xc000  /* Generic Loongson-2 and Loongson-3 */
 
 #define PRID_IMP_UNKNOWN	0xff00
 
diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
index b5e288a12dfe..b9687320024d 100644
--- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
@@ -30,13 +30,21 @@
 	mtc0	t0, CP0_PAGEGRAIN
 	/* Enable STFill Buffer */
 	mfc0	t0, CP0_PRID
+	/* Loongson-3A R4+ */
+	andi	t1, t0, PRID_IMP_MASK
+	li	t2, PRID_IMP_LOONGSON_64G
+	beq     t1, t2, 1f
+	nop
+	/* Loongson-3A R2/R3 */
 	andi	t0, (PRID_IMP_MASK | PRID_REV_MASK)
-	slti	t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0)
-	bnez	t0, 1f
+	slti	t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+	bnez	t0, 2f
+	nop
+1:
 	mfc0	t0, CP0_CONFIG6
 	or	t0, 0x100
 	mtc0	t0, CP0_CONFIG6
-1:
+2:
 	_ehb
 	.set	pop
 #endif
@@ -59,13 +67,21 @@
 	mtc0	t0, CP0_PAGEGRAIN
 	/* Enable STFill Buffer */
 	mfc0	t0, CP0_PRID
+	/* Loongson-3A R4+ */
+	andi	t1, t0, PRID_IMP_MASK
+	li	t2, PRID_IMP_LOONGSON_64G
+	beq     t1, t2, 1f
+	nop
+	/* Loongson-3A R2/R3 */
 	andi	t0, (PRID_IMP_MASK | PRID_REV_MASK)
-	slti	t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2_0)
-	bnez	t0, 1f
+	slti	t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+	bnez	t0, 2f
+	nop
+1:
 	mfc0	t0, CP0_CONFIG6
 	or	t0, 0x100
 	mtc0	t0, CP0_CONFIG6
-1:
+2:
 	_ehb
 	.set	pop
 #endif
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index c2eb392597bf..bbfc954615c8 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1526,7 +1526,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			     MIPS_CPU_LLSC | MIPS_CPU_BP_GHIST;
 		c->tlbsize = 64;
 		break;
-	case PRID_IMP_LOONGSON_64:  /* Loongson-2/3 */
+	case PRID_IMP_LOONGSON_64C:  /* Loongson-2/3 */
 		switch (c->processor_id & PRID_REV_MASK) {
 		case PRID_REV_LOONGSON2E:
 			c->cputype = CPU_LOONGSON2;
@@ -1565,6 +1565,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			     MIPS_CPU_FPU | MIPS_CPU_LLSC |
 			     MIPS_CPU_32FPR;
 		c->tlbsize = 64;
+		set_cpu_asid_mask(c, MIPS_ENTRYHI_ASID);
 		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
 		break;
 	case PRID_IMP_LOONGSON_32:  /* Loongson-1 */
@@ -1903,7 +1904,7 @@ platform:
 static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 {
 	switch (c->processor_id & PRID_IMP_MASK) {
-	case PRID_IMP_LOONGSON_64:  /* Loongson-2/3 */
+	case PRID_IMP_LOONGSON_64C:  /* Loongson-2/3 */
 		switch (c->processor_id & PRID_REV_MASK) {
 		case PRID_REV_LOONGSON3A_R2_0:
 		case PRID_REV_LOONGSON3A_R2_1:
@@ -1921,6 +1922,17 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 			break;
 		}
 
+		decode_configs(c);
+		c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
+		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
+		c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM |
+			MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
+		break;
+	case PRID_IMP_LOONGSON_64G:
+		c->cputype = CPU_LOONGSON3;
+		__cpu_name[cpu] = "ICT Loongson-3";
+		set_elf_platform(cpu, "loongson3a");
+		set_isa(c, MIPS_CPU_ISA_M64R2);
 		decode_configs(c);
 		c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
 		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index eb2afc0b8db1..980d6c39aab3 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -179,7 +179,8 @@ void __init check_wait(void)
 		cpu_wait = r4k_wait;
 		break;
 	case CPU_LOONGSON3:
-		if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0)
+		if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
+				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0))
 			cpu_wait = r4k_wait;
 		break;
 
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index ce68cdaaf33c..e999bb11f065 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -450,7 +450,7 @@ static void loongson3_cpu_die(unsigned int cpu)
  * flush all L1 entries at first. Then, another core (usually Core 0) can
  * safely disable the clock of the target core. loongson3_play_dead() is
  * called via CKSEG1 (uncached and unmmaped) */
-static void loongson3a_r1_play_dead(int *state_addr)
+static void loongson3_type1_play_dead(int *state_addr)
 {
 	register int val;
 	register long cpuid, core, node, count;
@@ -512,7 +512,7 @@ static void loongson3a_r1_play_dead(int *state_addr)
 		: "a1");
 }
 
-static void loongson3a_r2r3_play_dead(int *state_addr)
+static void loongson3_type2_play_dead(int *state_addr)
 {
 	register int val;
 	register long cpuid, core, node, count;
@@ -532,27 +532,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
 		"   cache 1, 3(%[addr])           \n"
 		"   addiu %[sets], %[sets], -1    \n"
 		"   bnez  %[sets], 1b             \n"
-		"   addiu %[addr], %[addr], 0x40  \n"
-		"   li %[addr], 0x80000000        \n" /* KSEG0 */
-		"2: cache 2, 0(%[addr])           \n" /* flush L1 VCache */
-		"   cache 2, 1(%[addr])           \n"
-		"   cache 2, 2(%[addr])           \n"
-		"   cache 2, 3(%[addr])           \n"
-		"   cache 2, 4(%[addr])           \n"
-		"   cache 2, 5(%[addr])           \n"
-		"   cache 2, 6(%[addr])           \n"
-		"   cache 2, 7(%[addr])           \n"
-		"   cache 2, 8(%[addr])           \n"
-		"   cache 2, 9(%[addr])           \n"
-		"   cache 2, 10(%[addr])          \n"
-		"   cache 2, 11(%[addr])          \n"
-		"   cache 2, 12(%[addr])          \n"
-		"   cache 2, 13(%[addr])          \n"
-		"   cache 2, 14(%[addr])          \n"
-		"   cache 2, 15(%[addr])          \n"
-		"   addiu %[vsets], %[vsets], -1  \n"
-		"   bnez  %[vsets], 2b            \n"
-		"   addiu %[addr], %[addr], 0x40  \n"
+		"   addiu %[addr], %[addr], 0x20  \n"
 		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
 		"   sw    %[val], (%[state_addr]) \n"
 		"   sync                          \n"
@@ -560,8 +540,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
 		"   .set pop                      \n"
 		: [addr] "=&r" (addr), [val] "=&r" (val)
 		: [state_addr] "r" (state_addr),
-		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
-		  [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
+		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
 
 	__asm__ __volatile__(
 		"   .set push                         \n"
@@ -576,6 +555,8 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
 		"   andi  %[node], %[cpuid], 0xc      \n"
 		"   dsll  %[node], 42                 \n" /* get node id */
 		"   or    %[base], %[base], %[node]   \n"
+		"   dsrl  %[node], 30                 \n" /* 15:14 */
+		"   or    %[base], %[base], %[node]   \n"
 		"1: li    %[count], 0x100             \n" /* wait for init loop */
 		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
 		"   addiu %[count], -1                \n"
@@ -595,7 +576,7 @@ static void loongson3a_r2r3_play_dead(int *state_addr)
 		: "a1");
 }
 
-static void loongson3b_play_dead(int *state_addr)
+static void loongson3_type3_play_dead(int *state_addr)
 {
 	register int val;
 	register long cpuid, core, node, count;
@@ -615,7 +596,27 @@ static void loongson3b_play_dead(int *state_addr)
 		"   cache 1, 3(%[addr])           \n"
 		"   addiu %[sets], %[sets], -1    \n"
 		"   bnez  %[sets], 1b             \n"
-		"   addiu %[addr], %[addr], 0x20  \n"
+		"   addiu %[addr], %[addr], 0x40  \n"
+		"   li %[addr], 0x80000000        \n" /* KSEG0 */
+		"2: cache 2, 0(%[addr])           \n" /* flush L1 VCache */
+		"   cache 2, 1(%[addr])           \n"
+		"   cache 2, 2(%[addr])           \n"
+		"   cache 2, 3(%[addr])           \n"
+		"   cache 2, 4(%[addr])           \n"
+		"   cache 2, 5(%[addr])           \n"
+		"   cache 2, 6(%[addr])           \n"
+		"   cache 2, 7(%[addr])           \n"
+		"   cache 2, 8(%[addr])           \n"
+		"   cache 2, 9(%[addr])           \n"
+		"   cache 2, 10(%[addr])          \n"
+		"   cache 2, 11(%[addr])          \n"
+		"   cache 2, 12(%[addr])          \n"
+		"   cache 2, 13(%[addr])          \n"
+		"   cache 2, 14(%[addr])          \n"
+		"   cache 2, 15(%[addr])          \n"
+		"   addiu %[vsets], %[vsets], -1  \n"
+		"   bnez  %[vsets], 2b            \n"
+		"   addiu %[addr], %[addr], 0x40  \n"
 		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
 		"   sw    %[val], (%[state_addr]) \n"
 		"   sync                          \n"
@@ -623,7 +624,8 @@ static void loongson3b_play_dead(int *state_addr)
 		"   .set pop                      \n"
 		: [addr] "=&r" (addr), [val] "=&r" (val)
 		: [state_addr] "r" (state_addr),
-		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
+		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
+		  [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
 
 	__asm__ __volatile__(
 		"   .set push                         \n"
@@ -638,8 +640,6 @@ static void loongson3b_play_dead(int *state_addr)
 		"   andi  %[node], %[cpuid], 0xc      \n"
 		"   dsll  %[node], 42                 \n" /* get node id */
 		"   or    %[base], %[base], %[node]   \n"
-		"   dsrl  %[node], 30                 \n" /* 15:14 */
-		"   or    %[base], %[base], %[node]   \n"
 		"1: li    %[count], 0x100             \n" /* wait for init loop */
 		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
 		"   addiu %[count], -1                \n"
@@ -661,30 +661,42 @@ static void loongson3b_play_dead(int *state_addr)
 
 void play_dead(void)
 {
-	int *state_addr;
+	int prid_imp, prid_rev, *state_addr;
 	unsigned int cpu = smp_processor_id();
 	void (*play_dead_at_ckseg1)(int *);
 
 	idle_task_exit();
-	switch (read_c0_prid() & PRID_REV_MASK) {
+
+	prid_imp = read_c0_prid() & PRID_IMP_MASK;
+	prid_rev = read_c0_prid() & PRID_REV_MASK;
+
+	if (prid_imp == PRID_IMP_LOONGSON_64G) {
+		play_dead_at_ckseg1 =
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
+		goto out;
+	}
+
+	switch (prid_rev) {
 	case PRID_REV_LOONGSON3A_R1:
 	default:
 		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3a_r1_play_dead);
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type1_play_dead);
+		break;
+	case PRID_REV_LOONGSON3B_R1:
+	case PRID_REV_LOONGSON3B_R2:
+		play_dead_at_ckseg1 =
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type2_play_dead);
 		break;
 	case PRID_REV_LOONGSON3A_R2_0:
 	case PRID_REV_LOONGSON3A_R2_1:
 	case PRID_REV_LOONGSON3A_R3_0:
 	case PRID_REV_LOONGSON3A_R3_1:
 		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3a_r2r3_play_dead);
-		break;
-	case PRID_REV_LOONGSON3B_R1:
-	case PRID_REV_LOONGSON3B_R2:
-		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3b_play_dead);
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
 		break;
 	}
+
+out:
 	state_addr = &per_cpu(cpu_state, cpu);
 	mb();
 	play_dead_at_ckseg1(state_addr);
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 89b9c851d822..4bf990633135 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1267,7 +1267,8 @@ static void probe_pcache(void)
 					  c->dcache.ways *
 					  c->dcache.linesz;
 		c->dcache.waybit = 0;
-		if ((prid & PRID_REV_MASK) >= PRID_REV_LOONGSON3A_R2_0)
+		if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
+				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0))
 			c->options |= MIPS_CPU_PREFETCH;
 		break;
 
diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c
index a7f184bb47e0..1833b51690e5 100644
--- a/drivers/platform/mips/cpu_hwmon.c
+++ b/drivers/platform/mips/cpu_hwmon.c
@@ -9,6 +9,9 @@
 #include <loongson.h>
 #include <boot_param.h>
 #include <loongson_hwmon.h>
+#include <loongson_regs.h>
+
+static int csr_temp_enable = 0;
 
 /*
  * Loongson-3 series cpu has two sensors inside,
@@ -20,8 +23,14 @@ int loongson3_cpu_temp(int cpu)
 {
 	u32 reg, prid_rev;
 
+	if (csr_temp_enable) {
+		reg = (csr_readl(LOONGSON_CSR_CPUTEMP) & 0xff);
+		goto out;
+	}
+
 	reg = LOONGSON_CHIPTEMP(cpu);
 	prid_rev = read_c0_prid() & PRID_REV_MASK;
+
 	switch (prid_rev) {
 	case PRID_REV_LOONGSON3A_R1:
 		reg = (reg >> 8) & 0xff;
@@ -34,9 +43,12 @@ int loongson3_cpu_temp(int cpu)
 		break;
 	case PRID_REV_LOONGSON3A_R3_0:
 	case PRID_REV_LOONGSON3A_R3_1:
+	default:
 		reg = (reg & 0xffff)*731/0x4000 - 273;
 		break;
 	}
+
+out:
 	return (int)reg * 1000;
 }
 
@@ -159,6 +171,9 @@ static int __init loongson_hwmon_init(void)
 
 	pr_info("Loongson Hwmon Enter...\n");
 
+	if (cpu_has_csr())
+		csr_temp_enable = csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_TEMP;
+
 	cpu_hwmon_dev = hwmon_device_register(NULL);
 	if (IS_ERR(cpu_hwmon_dev)) {
 		ret = -ENOMEM;
-- 
cgit v1.2.3


From ffe59ee36aaa8395b10eb94937f038e532432a6b Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Sat, 21 Sep 2019 21:50:28 +0800
Subject: MIPS: Loongson-3: Add CSR IPI support

CSR IPI and legacy MMIO use the same infrastructure, but CSR IPI is
faster than legacy MMIO IPI. This patch enable CSR IPI if possible
(except for MailBox, because CSR IPI is too complicated for MailBox).

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: linux-mips@vger.kernel.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhuacai@gmail.com>
---
 arch/mips/loongson64/loongson-3/smp.c | 70 +++++++++++++++++++++++++++++++----
 1 file changed, 62 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index e999bb11f065..de8e0741ce2d 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -18,6 +18,7 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <loongson.h>
+#include <loongson_regs.h>
 #include <workarounds.h>
 
 #include "smp.h"
@@ -48,6 +49,62 @@ static uint32_t core0_c0count[NR_CPUS];
 		__wbflush();			\
 	} while (0)
 
+u32 (*ipi_read_clear)(int cpu);
+void (*ipi_write_action)(int cpu, u32 action);
+
+static u32 csr_ipi_read_clear(int cpu)
+{
+	u32 action;
+
+	/* Load the ipi register to figure out what we're supposed to do */
+	action = csr_readl(LOONGSON_CSR_IPI_STATUS);
+	/* Clear the ipi register to clear the interrupt */
+	csr_writel(action, LOONGSON_CSR_IPI_CLEAR);
+
+	return action;
+}
+
+static void csr_ipi_write_action(int cpu, u32 action)
+{
+	unsigned int irq = 0;
+
+	while ((irq = ffs(action))) {
+		uint32_t val = CSR_IPI_SEND_BLOCK;
+		val |= (irq - 1);
+		val |= (cpu << CSR_IPI_SEND_CPU_SHIFT);
+		csr_writel(val, LOONGSON_CSR_IPI_SEND);
+		action &= ~BIT(irq - 1);
+	}
+}
+
+static u32 legacy_ipi_read_clear(int cpu)
+{
+	u32 action;
+
+	/* Load the ipi register to figure out what we're supposed to do */
+	action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+	/* Clear the ipi register to clear the interrupt */
+	loongson3_ipi_write32(action, ipi_clear0_regs[cpu_logical_map(cpu)]);
+
+	return action;
+}
+
+static void legacy_ipi_write_action(int cpu, u32 action)
+{
+	loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu]);
+}
+
+static void csr_ipi_probe(void)
+{
+	if (cpu_has_csr() && csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_IPI) {
+		ipi_read_clear = csr_ipi_read_clear;
+		ipi_write_action = csr_ipi_write_action;
+	} else {
+		ipi_read_clear = legacy_ipi_read_clear;
+		ipi_write_action = legacy_ipi_write_action;
+	}
+}
+
 static void ipi_set0_regs_init(void)
 {
 	ipi_set0_regs[0] = (void *)
@@ -233,7 +290,7 @@ static void ipi_mailbox_buf_init(void)
  */
 static void loongson3_send_ipi_single(int cpu, unsigned int action)
 {
-	loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(cpu)]);
+	ipi_write_action(cpu_logical_map(cpu), (u32)action);
 }
 
 static void
@@ -242,14 +299,14 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
 	unsigned int i;
 
 	for_each_cpu(i, mask)
-		loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(i)]);
+		ipi_write_action(cpu_logical_map(i), (u32)action);
 }
 
 #define IPI_IRQ_OFFSET 6
 
 void loongson3_send_irq_by_ipi(int cpu, int irqs)
 {
-	loongson3_ipi_write32(irqs << IPI_IRQ_OFFSET, ipi_set0_regs[cpu_logical_map(cpu)]);
+	ipi_write_action(cpu_logical_map(cpu), irqs << IPI_IRQ_OFFSET);
 }
 
 void loongson3_ipi_interrupt(struct pt_regs *regs)
@@ -257,13 +314,9 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
 	int i, cpu = smp_processor_id();
 	unsigned int action, c0count, irqs;
 
-	/* Load the ipi register to figure out what we're supposed to do */
-	action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+	action = ipi_read_clear(cpu);
 	irqs = action >> IPI_IRQ_OFFSET;
 
-	/* Clear the ipi register to clear the interrupt */
-	loongson3_ipi_write32((u32)action, ipi_clear0_regs[cpu_logical_map(cpu)]);
-
 	if (action & SMP_RESCHEDULE_YOURSELF)
 		scheduler_ipi();
 
@@ -372,6 +425,7 @@ static void __init loongson3_smp_setup(void)
 		num++;
 	}
 
+	csr_ipi_probe();
 	ipi_set0_regs_init();
 	ipi_clear0_regs_init();
 	ipi_status0_regs_init();
-- 
cgit v1.2.3


From a2ecb233e3e759730269b31b7d8add823cf196ba Mon Sep 17 00:00:00 2001
From: Dmitry Korotin <dkorotin@wavecomp.com>
Date: Thu, 12 Sep 2019 22:53:45 +0000
Subject: mips: Kconfig: Add ARCH_HAS_FORTIFY_SOURCE

FORTIFY_SOURCE detects various overflows at compile and run time.
(6974f0c4555e ("include/linux/string.h:
add the option of fortified string.h functions)

ARCH_HAS_FORTIFY_SOURCE means that the architecture can be built and
run with CONFIG_FORTIFY_SOURCE.

Since mips can be built and run with that flag,
select ARCH_HAS_FORTIFY_SOURCE as default.

Signed-off-by: Dmitry Korotin <dkorotin@wavecomp.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/Kconfig              | 1 +
 arch/mips/include/asm/string.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 3ef8f8a2a0fd..41d25179c3ed 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -7,6 +7,7 @@ config MIPS
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_SUPPORTS_UPROBES
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
diff --git a/arch/mips/include/asm/string.h b/arch/mips/include/asm/string.h
index 29030cb398ee..4b938c55b397 100644
--- a/arch/mips/include/asm/string.h
+++ b/arch/mips/include/asm/string.h
@@ -10,6 +10,7 @@
 #ifndef _ASM_STRING_H
 #define _ASM_STRING_H
 
+#if !defined(__OPTIMIZE__) || !defined(CONFIG_FORTIFY_SOURCE)
 
 /*
  * Most of the inline functions are rather naive implementations so I just
@@ -130,6 +131,7 @@ strncmp(__const__ char *__cs, __const__ char *__ct, size_t __count)
 	return __res;
 }
 #endif /* CONFIG_32BIT */
+#endif /* !defined(__OPTIMIZE__) || !defined(CONFIG_FORTIFY_SOURCE) */
 
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, size_t __count);
-- 
cgit v1.2.3


From f56a040c9faf04b4ca043f8c7bd8170c4e0fedcd Mon Sep 17 00:00:00 2001
From: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Date: Tue, 1 Oct 2019 21:08:40 +0200
Subject: MIPS: JZ4780: DTS: Add I2C nodes

Add the devicetree nodes for the I2C core of the JZ4780 SoC, disabled
by default.

Signed-off-by: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: robh+dt@kernel.org
Cc: mark.rutland@arm.com
Cc: ralf@linux-mips.org
Cc: jhogan@kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: devicetree@vger.kernel.org
---
 arch/mips/boot/dts/ingenic/jz4780.dtsi | 86 ++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index c54bd7cfec55..f928329b034b 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -262,6 +262,92 @@
 		status = "disabled";
 	};
 
+	i2c0: i2c@10050000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		reg = <0x10050000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <60>;
+
+		clocks = <&cgu JZ4780_CLK_SMB0>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c0_data>;
+
+		status = "disabled";
+	};
+
+	i2c1: i2c@10051000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x10051000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <59>;
+
+		clocks = <&cgu JZ4780_CLK_SMB1>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c1_data>;
+
+		status = "disabled";
+	};
+
+	i2c2: i2c@10052000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x10052000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <58>;
+
+		clocks = <&cgu JZ4780_CLK_SMB2>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c2_data>;
+
+		status = "disabled";
+	};
+
+	i2c3: i2c@10053000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x10053000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <57>;
+
+		clocks = <&cgu JZ4780_CLK_SMB3>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c3_data>;
+
+		status = "disabled";
+	};
+
+	i2c4: i2c@10054000 {
+		compatible = "ingenic,jz4780-i2c";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x10054000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <56>;
+
+		clocks = <&cgu JZ4780_CLK_SMB4>;
+		clock-frequency = <100000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pins_i2c4_data>;
+
+		status = "disabled";
+	};
+
 	watchdog: watchdog@10002000 {
 		compatible = "ingenic,jz4780-watchdog";
 		reg = <0x10002000 0x10>;
-- 
cgit v1.2.3


From 73f2b940474d1d493eb13267995b0b6dbfe62f99 Mon Sep 17 00:00:00 2001
From: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Date: Tue, 1 Oct 2019 21:09:00 +0200
Subject: MIPS: CI20: DTS: Add I2C nodes

Adding missing I2C nodes and some peripheral:
- PMU
- RTC

Signed-off-by: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: robh+dt@kernel.org
Cc: mark.rutland@arm.com
Cc: ralf@linux-mips.org
Cc: jhogan@kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: devicetree@vger.kernel.org
---
 arch/mips/boot/dts/ingenic/ci20.dts | 147 ++++++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 2e9952311ecd..4a77fa30a9cd 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -87,6 +87,123 @@
 	pinctrl-0 = <&pins_uart4>;
 };
 
+&i2c0 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c0>;
+
+	clock-frequency = <400000>;
+
+	act8600: act8600@5a {
+		compatible = "active-semi,act8600";
+		reg = <0x5a>;
+		status = "okay";
+
+		regulators {
+			vddcore: SUDCDC1 {
+				regulator-name = "VDDCORE";
+				regulator-min-microvolt = <1100000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+			};
+			vddmem: SUDCDC2 {
+				regulator-name = "VDDMEM";
+				regulator-min-microvolt = <1500000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-always-on;
+			};
+			vcc_33: SUDCDC3 {
+				regulator-name = "VCC33";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+			vcc_50: SUDCDC4 {
+				regulator-name = "VCC50";
+				regulator-min-microvolt = <5000000>;
+				regulator-max-microvolt = <5000000>;
+				regulator-always-on;
+			};
+			vcc_25: LDO_REG5 {
+				regulator-name = "VCC25";
+				regulator-min-microvolt = <2500000>;
+				regulator-max-microvolt = <2500000>;
+				regulator-always-on;
+			};
+			wifi_io: LDO_REG6 {
+				regulator-name = "WIFIIO";
+				regulator-min-microvolt = <2500000>;
+				regulator-max-microvolt = <2500000>;
+				regulator-always-on;
+			};
+			vcc_28: LDO_REG7 {
+				regulator-name = "VCC28";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-always-on;
+			};
+			vcc_15: LDO_REG8 {
+				regulator-name = "VCC15";
+				regulator-min-microvolt = <1500000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-always-on;
+			};
+			vcc_18: LDO_REG9 {
+				regulator-name = "VCC18";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+			vcc_11: LDO_REG10 {
+				regulator-name = "VCC11";
+				regulator-min-microvolt = <1100000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+			};
+		};
+	};
+};
+
+&i2c1 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c1>;
+
+};
+
+&i2c2 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c2>;
+
+};
+
+&i2c3 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c3>;
+
+};
+
+&i2c4 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_i2c4>;
+
+	clock-frequency = <400000>;
+
+		rtc@51 {
+			compatible = "nxp,pcf8563";
+			reg = <0x51>;
+			interrupts = <110>;
+		};
+};
+
 &nemc {
 	status = "okay";
 
@@ -209,6 +326,36 @@
 		bias-disable;
 	};
 
+	pins_i2c0: i2c0 {
+		function = "i2c0";
+		groups = "i2c0-data";
+		bias-disable;
+	};
+
+	pins_i2c1: i2c1 {
+		function = "i2c1";
+		groups = "i2c1-data";
+		bias-disable;
+	};
+
+	pins_i2c2: i2c2 {
+		function = "i2c2";
+		groups = "i2c2-data";
+		bias-disable;
+	};
+
+	pins_i2c3: i2c3 {
+		function = "i2c3";
+		groups = "i2c3-data";
+		bias-disable;
+	};
+
+	pins_i2c4: i2c4 {
+		function = "i2c4";
+		groups = "i2c4-data-e";
+		bias-disable;
+	};
+
 	pins_nemc: nemc {
 		function = "nemc";
 		groups = "nemc-data", "nemc-cle-ale", "nemc-rd-we", "nemc-frd-fwe";
-- 
cgit v1.2.3


From 948f2708f945f9354462a1974dd4e351cd329981 Mon Sep 17 00:00:00 2001
From: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Date: Tue, 1 Oct 2019 21:09:14 +0200
Subject: MIPS: CI20: DTS: Add IW8103 Wifi + bluetooth

Add IW8103 Wifi + bluetooth module to device tree and related power domain.

Signed-off-by: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: robh+dt@kernel.org
Cc: mark.rutland@arm.com
Cc: ralf@linux-mips.org
Cc: jhogan@kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: devicetree@vger.kernel.org
---
 arch/mips/boot/dts/ingenic/ci20.dts | 39 +++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 4a77fa30a9cd..c62c36ae94c2 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -31,6 +31,13 @@
 		gpio = <&gpb 25 GPIO_ACTIVE_LOW>;
 		enable-active-high;
 	};
+
+	wlan0_power: fixedregulator@1 {
+		compatible = "regulator-fixed";
+		regulator-name = "wlan0_power";
+		gpio = <&gpb 19 GPIO_ACTIVE_LOW>;
+		enable-active-high;
+	};
 };
 
 &ext {
@@ -54,9 +61,18 @@
 
 	bus-width = <4>;
 	max-frequency = <50000000>;
+	non-removable;
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&pins_mmc1>;
+
+	brcmf: wifi@1 {
+/*		reg = <4>;*/
+		compatible = "brcm,bcm4330-fmac";
+		vcc-supply = <&wlan0_power>;
+		device-wakeup-gpios = <&gpd 9 GPIO_ACTIVE_HIGH>;
+		shutdown-gpios = <&gpf 7 GPIO_ACTIVE_LOW>;
+	};
 };
 
 &uart0 {
@@ -73,6 +89,23 @@
 	pinctrl-0 = <&pins_uart1>;
 };
 
+&uart2 {
+	status = "okay";
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_uart2>;
+	uart-has-rtscts;
+
+	bluetooth {
+		compatible = "brcm,bcm4330-bt";
+		reset-gpios = <&gpf 8 GPIO_ACTIVE_HIGH>;
+		vcc-supply = <&wlan0_power>;
+		device-wakeup-gpios = <&gpf 5 GPIO_ACTIVE_HIGH>;
+		host-wakeup-gpios = <&gpf 6 GPIO_ACTIVE_HIGH>;
+		shutdown-gpios = <&gpf 4 GPIO_ACTIVE_LOW>;
+	};
+};
+
 &uart3 {
 	status = "okay";
 
@@ -314,6 +347,12 @@
 		bias-disable;
 	};
 
+	pins_uart2: uart2 {
+		function = "uart2";
+		groups = "uart2-data", "uart2-hwflow";
+		bias-disable;
+	};
+
 	pins_uart3: uart3 {
 		function = "uart3";
 		groups = "uart3-data", "uart3-hwflow";
-- 
cgit v1.2.3


From 24b0cb4f883adc92689a336800495838fdfa4763 Mon Sep 17 00:00:00 2001
From: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Date: Tue, 1 Oct 2019 21:09:30 +0200
Subject: MIPS: CI20: DTS: Add Leds

Adding leds and related triggers.

Signed-off-by: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: robh+dt@kernel.org
Cc: mark.rutland@arm.com
Cc: ralf@linux-mips.org
Cc: jhogan@kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: devicetree@vger.kernel.org
---
 arch/mips/boot/dts/ingenic/ci20.dts | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index c62c36ae94c2..37b93166bf22 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -25,6 +25,34 @@
 		       0x30000000 0x30000000>;
 	};
 
+	leds {
+		compatible = "gpio-leds";
+
+		led0 {
+			label = "ci20:red:led0";
+			gpios = <&gpc 3 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "none";
+		};
+
+		led1 {
+			label = "ci20:red:led1";
+			gpios = <&gpc 2 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "nand-disk";
+		};
+
+		led2 {
+			label = "ci20:red:led2";
+			gpios = <&gpc 1 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "cpu1";
+		};
+
+		led3 {
+			label = "ci20:red:led3";
+			gpios = <&gpc 0 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "cpu0";
+		};
+	};
+
 	eth0_power: fixedregulator@0 {
 		compatible = "regulator-fixed";
 		regulator-name = "eth0_power";
-- 
cgit v1.2.3


From 5dc76a96e95ae041c1d8e52714bd77576b35919b Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Thu, 3 Oct 2019 11:52:30 +0200
Subject: MIPS: PCI: use information from 1-wire PROM for IOC3 detection

IOC3 chips in SGI system are conntected to a bridge ASIC, which has
a 1-wire prom attached with part number information. This changeset
uses this information to create PCI subsystem information, which
the MFD driver uses for further platform device setup.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-rtc@vger.kernel.org
Cc: linux-serial@vger.kernel.org
---
 arch/mips/include/asm/pci/bridge.h |   1 +
 arch/mips/include/asm/sn/ioc3.h    |   9 +++
 arch/mips/pci/pci-xtalk-bridge.c   | 135 ++++++++++++++++++++++++++++++++++++-
 arch/mips/sgi-ip27/ip27-xtalk.c    |  38 +++++++++--
 4 files changed, 175 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h
index a92cd30b48c9..3bc630ff9ad4 100644
--- a/arch/mips/include/asm/pci/bridge.h
+++ b/arch/mips/include/asm/pci/bridge.h
@@ -807,6 +807,7 @@ struct bridge_controller {
 	unsigned long		intr_addr;
 	struct irq_domain	*domain;
 	unsigned int		pci_int[8];
+	u32			ioc3_sid[8];
 	nasid_t			nasid;
 };
 
diff --git a/arch/mips/include/asm/sn/ioc3.h b/arch/mips/include/asm/sn/ioc3.h
index a947eed48fee..78ef760ddde4 100644
--- a/arch/mips/include/asm/sn/ioc3.h
+++ b/arch/mips/include/asm/sn/ioc3.h
@@ -590,4 +590,13 @@ struct ioc3_etxd {
 
 #define MIDR_DATA_MASK		0x0000ffff
 
+/* subsystem IDs supplied by card detection in pci-xtalk-bridge */
+#define	IOC3_SUBSYS_IP27_BASEIO6G	0xc300
+#define	IOC3_SUBSYS_IP27_MIO		0xc301
+#define	IOC3_SUBSYS_IP27_BASEIO		0xc302
+#define	IOC3_SUBSYS_IP29_SYSBOARD	0xc303
+#define	IOC3_SUBSYS_IP30_SYSBOARD	0xc304
+#define	IOC3_SUBSYS_MENET		0xc305
+#define	IOC3_SUBSYS_MENET4		0xc306
+
 #endif /* MIPS_SN_IOC3_H */
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 4bb5e326305e..312632171832 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -11,16 +11,22 @@
 #include <linux/dma-direct.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/xtalk-bridge.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/crc16.h>
 
 #include <asm/pci/bridge.h>
 #include <asm/paccess.h>
 #include <asm/sn/irq_alloc.h>
+#include <asm/sn/ioc3.h>
+
+#define CRC16_INIT	0
+#define CRC16_VALID	0xb001
 
 /*
  * Most of the IOC3 PCI config register aren't present
  * we emulate what is needed for a normal PCI enumeration
  */
-static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value)
+static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value, u32 sid)
 {
 	u32 cf, shift, mask;
 
@@ -30,6 +36,9 @@ static int ioc3_cfg_rd(void *addr, int where, int size, u32 *value)
 		if (get_dbe(cf, (u32 *)addr))
 			return PCIBIOS_DEVICE_NOT_FOUND;
 		break;
+	case 0x2c:
+		cf = sid;
+		break;
 	case 0x3c:
 		/* emulate sane interrupt pin value */
 		cf = 0x00000100;
@@ -111,7 +120,8 @@ static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn,
 	 */
 	if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
 		addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
-		return ioc3_cfg_rd(addr, where, size, value);
+		return ioc3_cfg_rd(addr, where, size, value,
+				   bc->ioc3_sid[slot]);
 	}
 
 	addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)];
@@ -149,7 +159,8 @@ static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn,
 	 */
 	if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16))) {
 		addr = &bridge->b_type1_cfg.c[(fn << 8) | (where & ~3)];
-		return ioc3_cfg_rd(addr, where, size, value);
+		return ioc3_cfg_rd(addr, where, size, value,
+				   bc->ioc3_sid[slot]);
 	}
 
 	addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))];
@@ -426,6 +437,117 @@ static int bridge_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	return irq;
 }
 
+#define IOC3_SID(sid)	(PCI_VENDOR_ID_SGI << 16 | (sid))
+
+static void bridge_setup_ip27_baseio6g(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO6G);
+	bc->ioc3_sid[6] = IOC3_SID(IOC3_SUBSYS_IP27_MIO);
+}
+
+static void bridge_setup_ip27_baseio(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO);
+}
+
+static void bridge_setup_ip29_baseio(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP29_SYSBOARD);
+}
+
+static void bridge_setup_ip30_sysboard(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP30_SYSBOARD);
+}
+
+static void bridge_setup_menet(struct bridge_controller *bc)
+{
+	bc->ioc3_sid[0] = IOC3_SID(IOC3_SUBSYS_MENET);
+	bc->ioc3_sid[1] = IOC3_SID(IOC3_SUBSYS_MENET);
+	bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_MENET);
+	bc->ioc3_sid[3] = IOC3_SID(IOC3_SUBSYS_MENET4);
+}
+
+#define BRIDGE_BOARD_SETUP(_partno, _setup)	\
+	{ .match = _partno, .setup = _setup }
+
+static const struct {
+	char *match;
+	void (*setup)(struct bridge_controller *bc);
+} bridge_ioc3_devid[] = {
+	BRIDGE_BOARD_SETUP("030-0734-", bridge_setup_ip27_baseio6g),
+	BRIDGE_BOARD_SETUP("030-0880-", bridge_setup_ip27_baseio6g),
+	BRIDGE_BOARD_SETUP("030-1023-", bridge_setup_ip27_baseio),
+	BRIDGE_BOARD_SETUP("030-1124-", bridge_setup_ip27_baseio),
+	BRIDGE_BOARD_SETUP("030-1025-", bridge_setup_ip29_baseio),
+	BRIDGE_BOARD_SETUP("030-1244-", bridge_setup_ip29_baseio),
+	BRIDGE_BOARD_SETUP("030-1389-", bridge_setup_ip29_baseio),
+	BRIDGE_BOARD_SETUP("030-0887-", bridge_setup_ip30_sysboard),
+	BRIDGE_BOARD_SETUP("030-1467-", bridge_setup_ip30_sysboard),
+	BRIDGE_BOARD_SETUP("030-0873-", bridge_setup_menet),
+};
+
+static void bridge_setup_board(struct bridge_controller *bc, char *partnum)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(bridge_ioc3_devid); i++)
+		if (!strncmp(partnum, bridge_ioc3_devid[i].match,
+			     strlen(bridge_ioc3_devid[i].match))) {
+			bridge_ioc3_devid[i].setup(bc);
+		}
+}
+
+static int bridge_nvmem_match(struct device *dev, const void *data)
+{
+	const char *name = dev_name(dev);
+	const char *prefix = data;
+
+	if (strlen(name) < strlen(prefix))
+		return 0;
+
+	return memcmp(prefix, dev_name(dev), strlen(prefix)) == 0;
+}
+
+static int bridge_get_partnum(u64 baddr, char *partnum)
+{
+	struct nvmem_device *nvmem;
+	char prefix[24];
+	u8 prom[64];
+	int i, j;
+	int ret;
+
+	snprintf(prefix, sizeof(prefix), "bridge-%012llx-0b-", baddr);
+
+	nvmem = nvmem_device_find(prefix, bridge_nvmem_match);
+	if (IS_ERR(nvmem))
+		return PTR_ERR(nvmem);
+
+	ret = nvmem_device_read(nvmem, 0, 64, prom);
+	nvmem_device_put(nvmem);
+
+	if (ret != 64)
+		return ret;
+
+	if (crc16(CRC16_INIT, prom, 32) != CRC16_VALID ||
+	    crc16(CRC16_INIT, prom + 32, 32) != CRC16_VALID)
+		return -EINVAL;
+
+	/* Assemble part number */
+	j = 0;
+	for (i = 0; i < 19; i++)
+		if (prom[i + 11] != ' ')
+			partnum[j++] = prom[i + 11];
+
+	for (i = 0; i < 6; i++)
+		if (prom[i + 32] != ' ')
+			partnum[j++] = prom[i + 32];
+
+	partnum[j] = 0;
+
+	return 0;
+}
+
 static int bridge_probe(struct platform_device *pdev)
 {
 	struct xtalk_bridge_platform_data *bd = dev_get_platdata(&pdev->dev);
@@ -434,9 +556,14 @@ static int bridge_probe(struct platform_device *pdev)
 	struct pci_host_bridge *host;
 	struct irq_domain *domain, *parent;
 	struct fwnode_handle *fn;
+	char partnum[26];
 	int slot;
 	int err;
 
+	/* get part number from one wire prom */
+	if (bridge_get_partnum(virt_to_phys((void *)bd->bridge_addr), partnum))
+		return -EPROBE_DEFER; /* not available yet */
+
 	parent = irq_get_default_host();
 	if (!parent)
 		return -ENODEV;
@@ -517,6 +644,8 @@ static int bridge_probe(struct platform_device *pdev)
 	}
 	bridge_read(bc, b_wid_tflush);	  /* wait until Bridge PIO complete */
 
+	bridge_setup_board(bc, partnum);
+
 	host->dev.parent = dev;
 	host->sysdata = bc;
 	host->busnr = 0;
diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c
index 5602bb113921..5218b900f855 100644
--- a/arch/mips/sgi-ip27/ip27-xtalk.c
+++ b/arch/mips/sgi-ip27/ip27-xtalk.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/sgi-w1.h>
 #include <linux/platform_data/xtalk-bridge.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/types.h>
@@ -26,9 +27,35 @@
 static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
 {
 	struct xtalk_bridge_platform_data *bd;
+	struct sgi_w1_platform_data *wd;
 	struct platform_device *pdev;
+	struct resource w1_res;
 	unsigned long offset;
 
+	offset = NODE_OFFSET(nasid);
+
+	wd = kzalloc(sizeof(*wd), GFP_KERNEL);
+	if (!wd)
+		goto no_mem;
+
+	snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx",
+		 offset + (widget << SWIN_SIZE_BITS));
+
+	memset(&w1_res, 0, sizeof(w1_res));
+	w1_res.start = offset + (widget << SWIN_SIZE_BITS) +
+				offsetof(struct bridge_regs, b_nic);
+	w1_res.end = w1_res.start + 3;
+	w1_res.flags = IORESOURCE_MEM;
+
+	pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO);
+	if (!pdev) {
+		kfree(wd);
+		goto no_mem;
+	}
+	platform_device_add_resources(pdev, &w1_res, 1);
+	platform_device_add_data(pdev, wd, sizeof(*wd));
+	platform_device_add(pdev);
+
 	bd = kzalloc(sizeof(*bd), GFP_KERNEL);
 	if (!bd)
 		goto no_mem;
@@ -38,7 +65,6 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
 		goto no_mem;
 	}
 
-	offset = NODE_OFFSET(nasid);
 
 	bd->bridge_addr = RAW_NODE_SWIN_BASE(nasid, widget);
 	bd->intr_addr	= BIT_ULL(47) + 0x01800000 + PI_INT_PEND_MOD;
@@ -46,14 +72,14 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
 	bd->masterwid	= masterwid;
 
 	bd->mem.name	= "Bridge PCI MEM";
-	bd->mem.start	= offset + (widget << SWIN_SIZE_BITS);
-	bd->mem.end	= bd->mem.start + SWIN_SIZE - 1;
+	bd->mem.start	= offset + (widget << SWIN_SIZE_BITS) + BRIDGE_DEVIO0;
+	bd->mem.end	= offset + (widget << SWIN_SIZE_BITS) + SWIN_SIZE - 1;
 	bd->mem.flags	= IORESOURCE_MEM;
 	bd->mem_offset	= offset;
 
 	bd->io.name	= "Bridge PCI IO";
-	bd->io.start	= offset + (widget << SWIN_SIZE_BITS);
-	bd->io.end	= bd->io.start + SWIN_SIZE - 1;
+	bd->io.start	= offset + (widget << SWIN_SIZE_BITS) + BRIDGE_DEVIO0;
+	bd->io.end	= offset + (widget << SWIN_SIZE_BITS) + SWIN_SIZE - 1;
 	bd->io.flags	= IORESOURCE_IO;
 	bd->io_offset	= offset;
 
@@ -81,6 +107,8 @@ static int probe_one_port(nasid_t nasid, int widget, int masterwid)
 		bridge_platform_create(nasid, widget, masterwid);
 		break;
 	default:
+		pr_info("xtalk:n%d/%d unknown widget (0x%x)\n",
+			nasid, widget, partnum);
 		break;
 	}
 
-- 
cgit v1.2.3


From 9662dd752c149a6d50f7b731263c09254c403b21 Mon Sep 17 00:00:00 2001
From: Aurabindo Jayamohanan <mail@aurabindo.in>
Date: Fri, 13 Sep 2019 12:02:17 +0000
Subject: mips: check for dsp presence only once before save/restore

{save,restore}_dsp() internally checks if the cpu has dsp support.
Therefore, explicit check is not required before calling them in
{save,restore}_processor_state()

Signed-off-by: Aurabindo Jayamohanan <mail@aurabindo.in>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: ralf@linux-mips.org
Cc: jhogan@kernel.org
Cc: alexios.zavras@intel.com
Cc: gregkh@linuxfoundation.org
Cc: armijn@tjaldur.nl
Cc: allison@lohutok.net
Cc: tglx@linutronix.de
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/power/cpu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/power/cpu.c b/arch/mips/power/cpu.c
index 3340a5530de3..a15e29dfc7b3 100644
--- a/arch/mips/power/cpu.c
+++ b/arch/mips/power/cpu.c
@@ -19,8 +19,8 @@ void save_processor_state(void)
 
 	if (is_fpu_owner())
 		save_fp(current);
-	if (cpu_has_dsp)
-		save_dsp(current);
+
+	save_dsp(current);
 }
 
 void restore_processor_state(void)
@@ -29,8 +29,8 @@ void restore_processor_state(void)
 
 	if (is_fpu_owner())
 		restore_fp(current);
-	if (cpu_has_dsp)
-		restore_dsp(current);
+
+	restore_dsp(current);
 }
 
 int pfn_is_nosave(unsigned long pfn)
-- 
cgit v1.2.3


From fd7710cb491f900eb63d2ce5aac0e682003e84e9 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Mon, 7 Oct 2019 12:58:44 -0700
Subject: MIPS: futex: Restore \n after sync instructions

Commit 3c1d3f097972 ("MIPS: futex: Emit Loongson3 sync workarounds
within asm") inadvertently removed the newlines following
__WEAK_LLSC_MB, which causes build failures for configurations in which
__WEAK_LLSC_MB expands to a sync instruction:

  {standard input}: Assembler messages:
  {standard input}:9346: Error: symbol `sync3' is already defined
  {standard input}:9380: Error: symbol `sync3' is already defined
  ...

Fix this by restoring the newlines to separate the sync instruction from
anything following it (such as the 3: label), preventing inadvertent
concatenation.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: 3c1d3f097972 ("MIPS: futex: Emit Loongson3 sync workarounds within asm")
---
 arch/mips/include/asm/futex.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index 54cf20530931..110220705e97 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -33,7 +33,7 @@
 		"	.set	arch=r4000			\n"	\
 		"2:	sc	$1, %2				\n"	\
 		"	beqzl	$1, 1b				\n"	\
-		__stringify(__WEAK_LLSC_MB)				\
+		__stringify(__WEAK_LLSC_MB) "			\n"	\
 		"3:						\n"	\
 		"	.insn					\n"	\
 		"	.set	pop				\n"	\
@@ -63,7 +63,7 @@
 		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"2:	"user_sc("$1", "%2")"			\n"	\
 		"	beqz	$1, 1b				\n"	\
-		__stringify(__WEAK_LLSC_MB)				\
+		__stringify(__WEAK_LLSC_MB) "			\n"	\
 		"3:						\n"	\
 		"	.insn					\n"	\
 		"	.set	pop				\n"	\
@@ -148,7 +148,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		"	.set	arch=r4000				\n"
 		"2:	sc	$1, %2					\n"
 		"	beqzl	$1, 1b					\n"
-		__stringify(__WEAK_LLSC_MB)
+		__stringify(__WEAK_LLSC_MB) "				\n"
 		"3:							\n"
 		"	.insn						\n"
 		"	.set	pop					\n"
-- 
cgit v1.2.3


From a14bf1dc494aa5126e4f23ebd9fa04991133814e Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Wed, 9 Oct 2019 00:15:09 +0800
Subject: MIPS: generic: Use __initconst for const init data

Fix the following checkpatch errors:

$ ./scripts/checkpatch.pl --no-tree -f arch/mips/generic/init.c
ERROR: Use of const init definition must use __initconst
#23: FILE: arch/mips/generic/init.c:23:
+static __initdata const void *fdt;

ERROR: Use of const init definition must use __initconst
#24: FILE: arch/mips/generic/init.c:24:
+static __initdata const struct mips_machine *mach;

ERROR: Use of const init definition must use __initconst
#25: FILE: arch/mips/generic/init.c:25:
+static __initdata const void *mach_match_data;

Fixes: eed0eabd12ef ("MIPS: generic: Introduce generic DT-based board support")
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: ralf@linux-mips.org
Cc: jhogan@kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/generic/init.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
index d5b8c4717ded..1de215b283d6 100644
--- a/arch/mips/generic/init.c
+++ b/arch/mips/generic/init.c
@@ -20,9 +20,9 @@
 #include <asm/smp-ops.h>
 #include <asm/time.h>
 
-static __initdata const void *fdt;
-static __initdata const struct mips_machine *mach;
-static __initdata const void *mach_match_data;
+static __initconst const void *fdt;
+static __initconst const struct mips_machine *mach;
+static __initconst const void *mach_match_data;
 
 void __init prom_init(void)
 {
-- 
cgit v1.2.3


From 6baaeadae911ba9cedfead881f3bf305a18fd011 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 8 Oct 2019 18:22:00 +0000
Subject: MIPS: Provide unroll() macro, use it for cache ops

Currently we have a lot of duplication in asm/r4kcache.h to handle
manually unrolling loops of cache ops for various line sizes, and we
have to explicitly handle the difference in cache op immediate width
between MIPSr6 & earlier ISA revisions with further duplication.

Introduce an unroll() macro in asm/unroll.h which expands to a switch
statement which is used to call a function or expand a preprocessor
macro a compile-time constant number of times in a row - effectively
explicitly unrolling a loop. We make use of this here to remove the
cache op duplication & will use it further in later patches.

A nice side effect of this is that calculating the cache op offset
immediate is now the compiler's responsibility, so we're no longer
sensitive to the width change of that immediate in MIPSr6 & will be
similarly agnostic to immediate width in any future supported ISA.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/include/asm/r4kcache.h | 358 +++------------------------------------
 arch/mips/include/asm/unroll.h   |  76 +++++++++
 arch/mips/mm/c-r4k.c             |  12 +-
 3 files changed, 103 insertions(+), 343 deletions(-)
 create mode 100644 arch/mips/include/asm/unroll.h

(limited to 'arch')

diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 7f4a32d3345a..e73fc9e899d2 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -15,12 +15,14 @@
 #include <linux/stringify.h>
 
 #include <asm/asm.h>
+#include <asm/asm-eva.h>
 #include <asm/cacheops.h>
 #include <asm/compiler.h>
 #include <asm/cpu-features.h>
 #include <asm/cpu-type.h>
 #include <asm/mipsmtregs.h>
 #include <asm/mmzone.h>
+#include <asm/unroll.h>
 #include <linux/uaccess.h> /* for uaccess_kernel() */
 
 extern void (*r4k_blast_dcache)(void);
@@ -39,16 +41,19 @@ extern void (*r4k_blast_icache)(void);
  */
 #define INDEX_BASE	CKSEG0
 
-#define cache_op(op,addr)						\
+#define _cache_op(insn, op, addr)					\
 	__asm__ __volatile__(						\
 	"	.set	push					\n"	\
 	"	.set	noreorder				\n"	\
 	"	.set "MIPS_ISA_ARCH_LEVEL"			\n"	\
-	"	cache	%0, %1					\n"	\
+	"	" insn("%0", "%1") "				\n"	\
 	"	.set	pop					\n"	\
 	:								\
 	: "i" (op), "R" (*(unsigned char *)(addr)))
 
+#define cache_op(op, addr)						\
+	_cache_op(kernel_cache, op, addr)
+
 static inline void flush_icache_line_indexed(unsigned long addr)
 {
 	cache_op(Index_Invalidate_I, addr);
@@ -193,338 +198,10 @@ static inline void invalidate_tcache_page(unsigned long addr)
 	cache_op(Page_Invalidate_T, addr);
 }
 
-#ifndef CONFIG_CPU_MIPSR6
-#define cache16_unroll32(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips3					\n"	\
-	"	cache %1, 0x000(%0); cache %1, 0x010(%0)	\n"	\
-	"	cache %1, 0x020(%0); cache %1, 0x030(%0)	\n"	\
-	"	cache %1, 0x040(%0); cache %1, 0x050(%0)	\n"	\
-	"	cache %1, 0x060(%0); cache %1, 0x070(%0)	\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x090(%0)	\n"	\
-	"	cache %1, 0x0a0(%0); cache %1, 0x0b0(%0)	\n"	\
-	"	cache %1, 0x0c0(%0); cache %1, 0x0d0(%0)	\n"	\
-	"	cache %1, 0x0e0(%0); cache %1, 0x0f0(%0)	\n"	\
-	"	cache %1, 0x100(%0); cache %1, 0x110(%0)	\n"	\
-	"	cache %1, 0x120(%0); cache %1, 0x130(%0)	\n"	\
-	"	cache %1, 0x140(%0); cache %1, 0x150(%0)	\n"	\
-	"	cache %1, 0x160(%0); cache %1, 0x170(%0)	\n"	\
-	"	cache %1, 0x180(%0); cache %1, 0x190(%0)	\n"	\
-	"	cache %1, 0x1a0(%0); cache %1, 0x1b0(%0)	\n"	\
-	"	cache %1, 0x1c0(%0); cache %1, 0x1d0(%0)	\n"	\
-	"	cache %1, 0x1e0(%0); cache %1, 0x1f0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache32_unroll32(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips3					\n"	\
-	"	cache %1, 0x000(%0); cache %1, 0x020(%0)	\n"	\
-	"	cache %1, 0x040(%0); cache %1, 0x060(%0)	\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x0a0(%0)	\n"	\
-	"	cache %1, 0x0c0(%0); cache %1, 0x0e0(%0)	\n"	\
-	"	cache %1, 0x100(%0); cache %1, 0x120(%0)	\n"	\
-	"	cache %1, 0x140(%0); cache %1, 0x160(%0)	\n"	\
-	"	cache %1, 0x180(%0); cache %1, 0x1a0(%0)	\n"	\
-	"	cache %1, 0x1c0(%0); cache %1, 0x1e0(%0)	\n"	\
-	"	cache %1, 0x200(%0); cache %1, 0x220(%0)	\n"	\
-	"	cache %1, 0x240(%0); cache %1, 0x260(%0)	\n"	\
-	"	cache %1, 0x280(%0); cache %1, 0x2a0(%0)	\n"	\
-	"	cache %1, 0x2c0(%0); cache %1, 0x2e0(%0)	\n"	\
-	"	cache %1, 0x300(%0); cache %1, 0x320(%0)	\n"	\
-	"	cache %1, 0x340(%0); cache %1, 0x360(%0)	\n"	\
-	"	cache %1, 0x380(%0); cache %1, 0x3a0(%0)	\n"	\
-	"	cache %1, 0x3c0(%0); cache %1, 0x3e0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache64_unroll32(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips3					\n"	\
-	"	cache %1, 0x000(%0); cache %1, 0x040(%0)	\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x0c0(%0)	\n"	\
-	"	cache %1, 0x100(%0); cache %1, 0x140(%0)	\n"	\
-	"	cache %1, 0x180(%0); cache %1, 0x1c0(%0)	\n"	\
-	"	cache %1, 0x200(%0); cache %1, 0x240(%0)	\n"	\
-	"	cache %1, 0x280(%0); cache %1, 0x2c0(%0)	\n"	\
-	"	cache %1, 0x300(%0); cache %1, 0x340(%0)	\n"	\
-	"	cache %1, 0x380(%0); cache %1, 0x3c0(%0)	\n"	\
-	"	cache %1, 0x400(%0); cache %1, 0x440(%0)	\n"	\
-	"	cache %1, 0x480(%0); cache %1, 0x4c0(%0)	\n"	\
-	"	cache %1, 0x500(%0); cache %1, 0x540(%0)	\n"	\
-	"	cache %1, 0x580(%0); cache %1, 0x5c0(%0)	\n"	\
-	"	cache %1, 0x600(%0); cache %1, 0x640(%0)	\n"	\
-	"	cache %1, 0x680(%0); cache %1, 0x6c0(%0)	\n"	\
-	"	cache %1, 0x700(%0); cache %1, 0x740(%0)	\n"	\
-	"	cache %1, 0x780(%0); cache %1, 0x7c0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache128_unroll32(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips3					\n"	\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)	\n"	\
-	"	cache %1, 0x100(%0); cache %1, 0x180(%0)	\n"	\
-	"	cache %1, 0x200(%0); cache %1, 0x280(%0)	\n"	\
-	"	cache %1, 0x300(%0); cache %1, 0x380(%0)	\n"	\
-	"	cache %1, 0x400(%0); cache %1, 0x480(%0)	\n"	\
-	"	cache %1, 0x500(%0); cache %1, 0x580(%0)	\n"	\
-	"	cache %1, 0x600(%0); cache %1, 0x680(%0)	\n"	\
-	"	cache %1, 0x700(%0); cache %1, 0x780(%0)	\n"	\
-	"	cache %1, 0x800(%0); cache %1, 0x880(%0)	\n"	\
-	"	cache %1, 0x900(%0); cache %1, 0x980(%0)	\n"	\
-	"	cache %1, 0xa00(%0); cache %1, 0xa80(%0)	\n"	\
-	"	cache %1, 0xb00(%0); cache %1, 0xb80(%0)	\n"	\
-	"	cache %1, 0xc00(%0); cache %1, 0xc80(%0)	\n"	\
-	"	cache %1, 0xd00(%0); cache %1, 0xd80(%0)	\n"	\
-	"	cache %1, 0xe00(%0); cache %1, 0xe80(%0)	\n"	\
-	"	cache %1, 0xf00(%0); cache %1, 0xf80(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#else
-/*
- * MIPS R6 changed the cache opcode and moved to a 8-bit offset field.
- * This means we now need to increment the base register before we flush
- * more cache lines
- */
-#define cache16_unroll32(base,op)				\
-	__asm__ __volatile__(					\
-	"	.set push\n"					\
-	"	.set noreorder\n"				\
-	"	.set mips64r6\n"				\
-	"	.set noat\n"					\
-	"	cache %1, 0x000(%0); cache %1, 0x010(%0)\n"	\
-	"	cache %1, 0x020(%0); cache %1, 0x030(%0)\n"	\
-	"	cache %1, 0x040(%0); cache %1, 0x050(%0)\n"	\
-	"	cache %1, 0x060(%0); cache %1, 0x070(%0)\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x090(%0)\n"	\
-	"	cache %1, 0x0a0(%0); cache %1, 0x0b0(%0)\n"	\
-	"	cache %1, 0x0c0(%0); cache %1, 0x0d0(%0)\n"	\
-	"	cache %1, 0x0e0(%0); cache %1, 0x0f0(%0)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100	\n"	\
-	"	cache %1, 0x000($1); cache %1, 0x010($1)\n"	\
-	"	cache %1, 0x020($1); cache %1, 0x030($1)\n"	\
-	"	cache %1, 0x040($1); cache %1, 0x050($1)\n"	\
-	"	cache %1, 0x060($1); cache %1, 0x070($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x090($1)\n"	\
-	"	cache %1, 0x0a0($1); cache %1, 0x0b0($1)\n"	\
-	"	cache %1, 0x0c0($1); cache %1, 0x0d0($1)\n"	\
-	"	cache %1, 0x0e0($1); cache %1, 0x0f0($1)\n"	\
-	"	.set pop\n"					\
-		:						\
-		: "r" (base),					\
-		  "i" (op));
-
-#define cache32_unroll32(base,op)				\
-	__asm__ __volatile__(					\
-	"	.set push\n"					\
-	"	.set noreorder\n"				\
-	"	.set mips64r6\n"				\
-	"	.set noat\n"					\
-	"	cache %1, 0x000(%0); cache %1, 0x020(%0)\n"	\
-	"	cache %1, 0x040(%0); cache %1, 0x060(%0)\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x0a0(%0)\n"	\
-	"	cache %1, 0x0c0(%0); cache %1, 0x0e0(%0)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
-	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
-	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
-	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
-	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100\n"	\
-	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
-	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
-	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
-	"	.set pop\n"					\
-		:						\
-		: "r" (base),					\
-		  "i" (op));
-
-#define cache64_unroll32(base,op)				\
-	__asm__ __volatile__(					\
-	"	.set push\n"					\
-	"	.set noreorder\n"				\
-	"	.set mips64r6\n"				\
-	"	.set noat\n"					\
-	"	cache %1, 0x000(%0); cache %1, 0x040(%0)\n"	\
-	"	cache %1, 0x080(%0); cache %1, 0x0c0(%0)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
-	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
-	"	.set pop\n"					\
-		:						\
-		: "r" (base),					\
-		  "i" (op));
-
-#define cache128_unroll32(base,op)				\
-	__asm__ __volatile__(					\
-	"	.set push\n"					\
-	"	.set noreorder\n"				\
-	"	.set mips64r6\n"				\
-	"	.set noat\n"					\
-	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, %0, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	"__stringify(LONG_ADDIU)" $1, $1, 0x100 \n"	\
-	"	cache %1, 0x000($1); cache %1, 0x080($1)\n"	\
-	"	.set pop\n"					\
-		:						\
-		: "r" (base),					\
-		  "i" (op));
-#endif /* CONFIG_CPU_MIPSR6 */
-
-/*
- * Perform the cache operation specified by op using a user mode virtual
- * address while in kernel mode.
- */
-#define cache16_unroll32_user(base,op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips0					\n"	\
-	"	.set eva					\n"	\
-	"	cachee %1, 0x000(%0); cachee %1, 0x010(%0)	\n"	\
-	"	cachee %1, 0x020(%0); cachee %1, 0x030(%0)	\n"	\
-	"	cachee %1, 0x040(%0); cachee %1, 0x050(%0)	\n"	\
-	"	cachee %1, 0x060(%0); cachee %1, 0x070(%0)	\n"	\
-	"	cachee %1, 0x080(%0); cachee %1, 0x090(%0)	\n"	\
-	"	cachee %1, 0x0a0(%0); cachee %1, 0x0b0(%0)	\n"	\
-	"	cachee %1, 0x0c0(%0); cachee %1, 0x0d0(%0)	\n"	\
-	"	cachee %1, 0x0e0(%0); cachee %1, 0x0f0(%0)	\n"	\
-	"	cachee %1, 0x100(%0); cachee %1, 0x110(%0)	\n"	\
-	"	cachee %1, 0x120(%0); cachee %1, 0x130(%0)	\n"	\
-	"	cachee %1, 0x140(%0); cachee %1, 0x150(%0)	\n"	\
-	"	cachee %1, 0x160(%0); cachee %1, 0x170(%0)	\n"	\
-	"	cachee %1, 0x180(%0); cachee %1, 0x190(%0)	\n"	\
-	"	cachee %1, 0x1a0(%0); cachee %1, 0x1b0(%0)	\n"	\
-	"	cachee %1, 0x1c0(%0); cachee %1, 0x1d0(%0)	\n"	\
-	"	cachee %1, 0x1e0(%0); cachee %1, 0x1f0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache32_unroll32_user(base, op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips0					\n"	\
-	"	.set eva					\n"	\
-	"	cachee %1, 0x000(%0); cachee %1, 0x020(%0)	\n"	\
-	"	cachee %1, 0x040(%0); cachee %1, 0x060(%0)	\n"	\
-	"	cachee %1, 0x080(%0); cachee %1, 0x0a0(%0)	\n"	\
-	"	cachee %1, 0x0c0(%0); cachee %1, 0x0e0(%0)	\n"	\
-	"	cachee %1, 0x100(%0); cachee %1, 0x120(%0)	\n"	\
-	"	cachee %1, 0x140(%0); cachee %1, 0x160(%0)	\n"	\
-	"	cachee %1, 0x180(%0); cachee %1, 0x1a0(%0)	\n"	\
-	"	cachee %1, 0x1c0(%0); cachee %1, 0x1e0(%0)	\n"	\
-	"	cachee %1, 0x200(%0); cachee %1, 0x220(%0)	\n"	\
-	"	cachee %1, 0x240(%0); cachee %1, 0x260(%0)	\n"	\
-	"	cachee %1, 0x280(%0); cachee %1, 0x2a0(%0)	\n"	\
-	"	cachee %1, 0x2c0(%0); cachee %1, 0x2e0(%0)	\n"	\
-	"	cachee %1, 0x300(%0); cachee %1, 0x320(%0)	\n"	\
-	"	cachee %1, 0x340(%0); cachee %1, 0x360(%0)	\n"	\
-	"	cachee %1, 0x380(%0); cachee %1, 0x3a0(%0)	\n"	\
-	"	cachee %1, 0x3c0(%0); cachee %1, 0x3e0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
-
-#define cache64_unroll32_user(base, op)					\
-	__asm__ __volatile__(						\
-	"	.set push					\n"	\
-	"	.set noreorder					\n"	\
-	"	.set mips0					\n"	\
-	"	.set eva					\n"	\
-	"	cachee %1, 0x000(%0); cachee %1, 0x040(%0)	\n"	\
-	"	cachee %1, 0x080(%0); cachee %1, 0x0c0(%0)	\n"	\
-	"	cachee %1, 0x100(%0); cachee %1, 0x140(%0)	\n"	\
-	"	cachee %1, 0x180(%0); cachee %1, 0x1c0(%0)	\n"	\
-	"	cachee %1, 0x200(%0); cachee %1, 0x240(%0)	\n"	\
-	"	cachee %1, 0x280(%0); cachee %1, 0x2c0(%0)	\n"	\
-	"	cachee %1, 0x300(%0); cachee %1, 0x340(%0)	\n"	\
-	"	cachee %1, 0x380(%0); cachee %1, 0x3c0(%0)	\n"	\
-	"	cachee %1, 0x400(%0); cachee %1, 0x440(%0)	\n"	\
-	"	cachee %1, 0x480(%0); cachee %1, 0x4c0(%0)	\n"	\
-	"	cachee %1, 0x500(%0); cachee %1, 0x540(%0)	\n"	\
-	"	cachee %1, 0x580(%0); cachee %1, 0x5c0(%0)	\n"	\
-	"	cachee %1, 0x600(%0); cachee %1, 0x640(%0)	\n"	\
-	"	cachee %1, 0x680(%0); cachee %1, 0x6c0(%0)	\n"	\
-	"	cachee %1, 0x700(%0); cachee %1, 0x740(%0)	\n"	\
-	"	cachee %1, 0x780(%0); cachee %1, 0x7c0(%0)	\n"	\
-	"	.set pop					\n"	\
-		:							\
-		: "r" (base),						\
-		  "i" (op));
+#define cache_unroll(times, insn, op, addr, lsize) do {			\
+	int i = 0;							\
+	unroll(times, _cache_op, insn, op, (addr) + (i++ * (lsize)));	\
+} while (0)
 
 /* build blast_xxx, blast_xxx_page, blast_xxx_page_indexed */
 #define __BUILD_BLAST_CACHE(pfx, desc, indexop, hitop, lsize, extra)	\
@@ -539,7 +216,8 @@ static inline void extra##blast_##pfx##cache##lsize(void)		\
 									\
 	for (ws = 0; ws < ws_end; ws += ws_inc)				\
 		for (addr = start; addr < end; addr += lsize * 32)	\
-			cache##lsize##_unroll32(addr|ws, indexop);	\
+			cache_unroll(32, kernel_cache, indexop,		\
+				     addr | ws, lsize);			\
 }									\
 									\
 static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
@@ -548,7 +226,7 @@ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
 	unsigned long end = page + PAGE_SIZE;				\
 									\
 	do {								\
-		cache##lsize##_unroll32(start, hitop);			\
+		cache_unroll(32, kernel_cache, hitop, start, lsize);	\
 		start += lsize * 32;					\
 	} while (start < end);						\
 }									\
@@ -565,7 +243,8 @@ static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long
 									\
 	for (ws = 0; ws < ws_end; ws += ws_inc)				\
 		for (addr = start; addr < end; addr += lsize * 32)	\
-			cache##lsize##_unroll32(addr|ws, indexop);	\
+			cache_unroll(32, kernel_cache, indexop,		\
+				     addr | ws, lsize);			\
 }
 
 __BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16, )
@@ -596,7 +275,7 @@ static inline void blast_##pfx##cache##lsize##_user_page(unsigned long page) \
 	unsigned long end = page + PAGE_SIZE;				\
 									\
 	do {								\
-		cache##lsize##_unroll32_user(start, hitop);             \
+		cache_unroll(32, user_cache, hitop, start, lsize);	\
 		start += lsize * 32;					\
 	} while (start < end);						\
 }
@@ -688,7 +367,8 @@ static inline void blast_##pfx##cache##lsize##_node(long node)		\
 									\
 	for (ws = 0; ws < ws_end; ws += ws_inc)				\
 		for (addr = start; addr < end; addr += lsize * 32)	\
-			cache##lsize##_unroll32(addr|ws, indexop);	\
+			cache_unroll(32, kernel_cache, indexop,		\
+				     addr | ws, lsize);			\
 }
 
 __BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h
new file mode 100644
index 000000000000..df1cdcfc5a47
--- /dev/null
+++ b/arch/mips/include/asm/unroll.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_UNROLL_H__
+#define __ASM_UNROLL_H__
+
+/*
+ * Explicitly unroll a loop, for use in cases where doing so is performance
+ * critical.
+ *
+ * Ideally we'd rely upon the compiler to provide this but there's no commonly
+ * available means to do so. For example GCC's "#pragma GCC unroll"
+ * functionality would be ideal but is only available from GCC 8 onwards. Using
+ * -funroll-loops is an option but GCC tends to make poor choices when
+ * compiling our string functions. -funroll-all-loops leads to massive code
+ * bloat, even if only applied to the string functions.
+ */
+#define unroll(times, fn, ...) do {				\
+	extern void bad_unroll(void)				\
+		__compiletime_error("Unsupported unroll");	\
+								\
+	/*							\
+	 * We can't unroll if the number of iterations isn't	\
+	 * compile-time constant. Unfortunately GCC versions	\
+	 * up until 4.6 tend to miss obvious constants & cause	\
+	 * this check to fail, even though they go on to	\
+	 * generate reasonable code for the switch statement,	\
+	 * so we skip the sanity check for those compilers.	\
+	 */							\
+	BUILD_BUG_ON(GCC_VERSION >= 40700 &&			\
+		     !__builtin_constant_p(times));		\
+								\
+	switch (times) {					\
+	case 32: fn(__VA_ARGS__); /* fall through */		\
+	case 31: fn(__VA_ARGS__); /* fall through */		\
+	case 30: fn(__VA_ARGS__); /* fall through */		\
+	case 29: fn(__VA_ARGS__); /* fall through */		\
+	case 28: fn(__VA_ARGS__); /* fall through */		\
+	case 27: fn(__VA_ARGS__); /* fall through */		\
+	case 26: fn(__VA_ARGS__); /* fall through */		\
+	case 25: fn(__VA_ARGS__); /* fall through */		\
+	case 24: fn(__VA_ARGS__); /* fall through */		\
+	case 23: fn(__VA_ARGS__); /* fall through */		\
+	case 22: fn(__VA_ARGS__); /* fall through */		\
+	case 21: fn(__VA_ARGS__); /* fall through */		\
+	case 20: fn(__VA_ARGS__); /* fall through */		\
+	case 19: fn(__VA_ARGS__); /* fall through */		\
+	case 18: fn(__VA_ARGS__); /* fall through */		\
+	case 17: fn(__VA_ARGS__); /* fall through */		\
+	case 16: fn(__VA_ARGS__); /* fall through */		\
+	case 15: fn(__VA_ARGS__); /* fall through */		\
+	case 14: fn(__VA_ARGS__); /* fall through */		\
+	case 13: fn(__VA_ARGS__); /* fall through */		\
+	case 12: fn(__VA_ARGS__); /* fall through */		\
+	case 11: fn(__VA_ARGS__); /* fall through */		\
+	case 10: fn(__VA_ARGS__); /* fall through */		\
+	case 9: fn(__VA_ARGS__); /* fall through */		\
+	case 8: fn(__VA_ARGS__); /* fall through */		\
+	case 7: fn(__VA_ARGS__); /* fall through */		\
+	case 6: fn(__VA_ARGS__); /* fall through */		\
+	case 5: fn(__VA_ARGS__); /* fall through */		\
+	case 4: fn(__VA_ARGS__); /* fall through */		\
+	case 3: fn(__VA_ARGS__); /* fall through */		\
+	case 2: fn(__VA_ARGS__); /* fall through */		\
+	case 1: fn(__VA_ARGS__); /* fall through */		\
+	case 0: break;						\
+								\
+	default:						\
+		/*						\
+		 * Either the iteration count is unreasonable	\
+		 * or we need to add more cases above.		\
+		 */						\
+		bad_unroll();					\
+		break;						\
+	}							\
+} while (0)
+
+#endif /* __ASM_UNROLL_H__ */
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 4bf990633135..378cbb02dcdd 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -271,12 +271,14 @@ static inline void tx49_blast_icache32(void)
 	/* I'm in even chunk.  blast odd chunks */
 	for (ws = 0; ws < ws_end; ws += ws_inc)
 		for (addr = start + 0x400; addr < end; addr += 0x400 * 2)
-			cache32_unroll32(addr|ws, Index_Invalidate_I);
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
 	CACHE32_UNROLL32_ALIGN;
 	/* I'm in odd chunk.  blast even chunks */
 	for (ws = 0; ws < ws_end; ws += ws_inc)
 		for (addr = start; addr < end; addr += 0x400 * 2)
-			cache32_unroll32(addr|ws, Index_Invalidate_I);
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
 }
 
 static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page)
@@ -302,12 +304,14 @@ static inline void tx49_blast_icache32_page_indexed(unsigned long page)
 	/* I'm in even chunk.  blast odd chunks */
 	for (ws = 0; ws < ws_end; ws += ws_inc)
 		for (addr = start + 0x400; addr < end; addr += 0x400 * 2)
-			cache32_unroll32(addr|ws, Index_Invalidate_I);
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
 	CACHE32_UNROLL32_ALIGN;
 	/* I'm in odd chunk.  blast even chunks */
 	for (ws = 0; ws < ws_end; ws += ws_inc)
 		for (addr = start; addr < end; addr += 0x400 * 2)
-			cache32_unroll32(addr|ws, Index_Invalidate_I);
+			cache_unroll(32, kernel_cache, Index_Invalidate_I,
+				     addr | ws, 32);
 }
 
 static void (* r4k_blast_icache_page)(unsigned long addr);
-- 
cgit v1.2.3


From 3c0be5849259b729580c23549330973a2dd513a2 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Tue, 8 Oct 2019 19:46:01 +0000
Subject: MIPS: Drop 32-bit asm string functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have assembly implementations of strcpy(), strncpy(), strcmp() &
strncmp() which:

 - Are simple byte-at-a-time loops with no particular optimizations. As
   a comment in the code describes, they're "rather naive".

 - Offer no clear performance advantage over the generic C
   implementations - in microbenchmarks performed by Alexander Lobakin
   the asm functions sometimes win & sometimes lose, but generally not
   by large margins in either direction.

 - Don't support 64-bit kernels, where we already make use of the
   generic C implementations.

 - Tend to bloat kernel code size due to inlining.

 - Don't support CONFIG_FORTIFY_SOURCE.

 - Won't support nanoMIPS without rework.

For all of these reasons, delete the asm implementations & make use of
the generic C implementations for 32-bit kernels just like we already do
for 64-bit kernels.

Signed-off-by: Paul Burton <paul.burton@mips.com>
URL: https://lore.kernel.org/linux-mips/a2a35f1cf58d6db19eb4af9b4ae21e35@dlink.ru/
Cc: Alexander Lobakin <alobakin@dlink.ru>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/include/asm/string.h | 123 -----------------------------------------
 1 file changed, 123 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/string.h b/arch/mips/include/asm/string.h
index 4b938c55b397..1de3bbce8e88 100644
--- a/arch/mips/include/asm/string.h
+++ b/arch/mips/include/asm/string.h
@@ -10,129 +10,6 @@
 #ifndef _ASM_STRING_H
 #define _ASM_STRING_H
 
-#if !defined(__OPTIMIZE__) || !defined(CONFIG_FORTIFY_SOURCE)
-
-/*
- * Most of the inline functions are rather naive implementations so I just
- * didn't bother updating them for 64-bit ...
- */
-#ifdef CONFIG_32BIT
-
-#ifndef IN_STRING_C
-
-#define __HAVE_ARCH_STRCPY
-static __inline__ char *strcpy(char *__dest, __const__ char *__src)
-{
-  char *__xdest = __dest;
-
-  __asm__ __volatile__(
-	".set\tnoreorder\n\t"
-	".set\tnoat\n"
-	"1:\tlbu\t$1,(%1)\n\t"
-	"addiu\t%1,1\n\t"
-	"sb\t$1,(%0)\n\t"
-	"bnez\t$1,1b\n\t"
-	"addiu\t%0,1\n\t"
-	".set\tat\n\t"
-	".set\treorder"
-	: "=r" (__dest), "=r" (__src)
-	: "0" (__dest), "1" (__src)
-	: "memory");
-
-  return __xdest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static __inline__ char *strncpy(char *__dest, __const__ char *__src, size_t __n)
-{
-  char *__xdest = __dest;
-
-  if (__n == 0)
-    return __xdest;
-
-  __asm__ __volatile__(
-	".set\tnoreorder\n\t"
-	".set\tnoat\n"
-	"1:\tlbu\t$1,(%1)\n\t"
-	"subu\t%2,1\n\t"
-	"sb\t$1,(%0)\n\t"
-	"beqz\t$1,2f\n\t"
-	"addiu\t%0,1\n\t"
-	"bnez\t%2,1b\n\t"
-	"addiu\t%1,1\n"
-	"2:\n\t"
-	".set\tat\n\t"
-	".set\treorder"
-	: "=r" (__dest), "=r" (__src), "=r" (__n)
-	: "0" (__dest), "1" (__src), "2" (__n)
-	: "memory");
-
-  return __xdest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static __inline__ int strcmp(__const__ char *__cs, __const__ char *__ct)
-{
-  int __res;
-
-  __asm__ __volatile__(
-	".set\tnoreorder\n\t"
-	".set\tnoat\n\t"
-	"lbu\t%2,(%0)\n"
-	"1:\tlbu\t$1,(%1)\n\t"
-	"addiu\t%0,1\n\t"
-	"bne\t$1,%2,2f\n\t"
-	"addiu\t%1,1\n\t"
-	"bnez\t%2,1b\n\t"
-	"lbu\t%2,(%0)\n\t"
-#if defined(CONFIG_CPU_R3000)
-	"nop\n\t"
-#endif
-	"move\t%2,$1\n"
-	"2:\tsubu\t%2,$1\n"
-	"3:\t.set\tat\n\t"
-	".set\treorder"
-	: "=r" (__cs), "=r" (__ct), "=r" (__res)
-	: "0" (__cs), "1" (__ct));
-
-  return __res;
-}
-
-#endif /* !defined(IN_STRING_C) */
-
-#define __HAVE_ARCH_STRNCMP
-static __inline__ int
-strncmp(__const__ char *__cs, __const__ char *__ct, size_t __count)
-{
-	int __res;
-
-	__asm__ __volatile__(
-	".set\tnoreorder\n\t"
-	".set\tnoat\n"
-	"1:\tlbu\t%3,(%0)\n\t"
-	"beqz\t%2,2f\n\t"
-	"lbu\t$1,(%1)\n\t"
-	"subu\t%2,1\n\t"
-	"bne\t$1,%3,3f\n\t"
-	"addiu\t%0,1\n\t"
-	"bnez\t%3,1b\n\t"
-	"addiu\t%1,1\n"
-	"2:\n\t"
-#if defined(CONFIG_CPU_R3000)
-	"nop\n\t"
-#endif
-	"move\t%3,$1\n"
-	"3:\tsubu\t%3,$1\n\t"
-	".set\tat\n\t"
-	".set\treorder"
-	: "=r" (__cs), "=r" (__ct), "=r" (__count), "=r" (__res)
-	: "0" (__cs), "1" (__ct), "2" (__count));
-
-	return __res;
-}
-#endif /* CONFIG_32BIT */
-#endif /* !defined(__OPTIMIZE__) || !defined(CONFIG_FORTIFY_SOURCE) */
-
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, size_t __count);
 
-- 
cgit v1.2.3


From d11646b5ce930c4d5e933c4d97db5a06a67a211b Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Wed, 9 Oct 2019 15:27:12 +0200
Subject: MIPS: fw: arc: remove unused ARC code

Current kernel uses only a few ARC calls. Drop all unused ARC functions.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/fw/arc/Makefile       |   2 +-
 arch/mips/fw/arc/env.c          |   6 --
 arch/mips/fw/arc/file.c         |  49 ----------------
 arch/mips/fw/arc/identify.c     |   5 ++
 arch/mips/fw/arc/misc.c         |  59 -------------------
 arch/mips/fw/arc/salone.c       |  25 --------
 arch/mips/fw/arc/time.c         |  25 --------
 arch/mips/fw/arc/tree.c         | 127 ----------------------------------------
 arch/mips/include/asm/sgialib.h |  12 ----
 9 files changed, 6 insertions(+), 304 deletions(-)
 delete mode 100644 arch/mips/fw/arc/salone.c
 delete mode 100644 arch/mips/fw/arc/time.c
 delete mode 100644 arch/mips/fw/arc/tree.c

(limited to 'arch')

diff --git a/arch/mips/fw/arc/Makefile b/arch/mips/fw/arc/Makefile
index 31dd7305d643..3cc82d7fe548 100644
--- a/arch/mips/fw/arc/Makefile
+++ b/arch/mips/fw/arc/Makefile
@@ -4,7 +4,7 @@
 #
 
 lib-y				+= cmdline.o env.o file.o identify.o init.o \
-				   misc.o salone.o time.o tree.o
+				   misc.o
 
 lib-$(CONFIG_ARC_MEMORY)	+= memory.o
 lib-$(CONFIG_ARC_CONSOLE)	+= arc_con.o
diff --git a/arch/mips/fw/arc/env.c b/arch/mips/fw/arc/env.c
index 1118a26b32ee..02407a7bb38e 100644
--- a/arch/mips/fw/arc/env.c
+++ b/arch/mips/fw/arc/env.c
@@ -19,9 +19,3 @@ ArcGetEnvironmentVariable(CHAR *name)
 {
 	return (CHAR *) ARC_CALL1(get_evar, name);
 }
-
-LONG __init
-ArcSetEnvironmentVariable(PCHAR name, PCHAR value)
-{
-	return ARC_CALL2(set_evar, name, value);
-}
diff --git a/arch/mips/fw/arc/file.c b/arch/mips/fw/arc/file.c
index 49fd3ff13fe5..b0d8535c80cc 100644
--- a/arch/mips/fw/arc/file.c
+++ b/arch/mips/fw/arc/file.c
@@ -12,63 +12,14 @@
 #include <asm/fw/arc/types.h>
 #include <asm/sgialib.h>
 
-LONG
-ArcGetDirectoryEntry(ULONG FileID, struct linux_vdirent *Buffer,
-		     ULONG N, ULONG *Count)
-{
-	return ARC_CALL4(get_vdirent, FileID, Buffer, N, Count);
-}
-
-LONG
-ArcOpen(CHAR *Path, enum linux_omode OpenMode, ULONG *FileID)
-{
-	return ARC_CALL3(open, Path, OpenMode, FileID);
-}
-
-LONG
-ArcClose(ULONG FileID)
-{
-	return ARC_CALL1(close, FileID);
-}
-
 LONG
 ArcRead(ULONG FileID, VOID *Buffer, ULONG N, ULONG *Count)
 {
 	return ARC_CALL4(read, FileID, Buffer, N, Count);
 }
 
-LONG
-ArcGetReadStatus(ULONG FileID)
-{
-	return ARC_CALL1(get_rstatus, FileID);
-}
-
 LONG
 ArcWrite(ULONG FileID, PVOID Buffer, ULONG N, PULONG Count)
 {
 	return ARC_CALL4(write, FileID, Buffer, N, Count);
 }
-
-LONG
-ArcSeek(ULONG FileID, struct linux_bigint *Position, enum linux_seekmode SeekMode)
-{
-	return ARC_CALL3(seek, FileID, Position, SeekMode);
-}
-
-LONG
-ArcMount(char *name, enum linux_mountops op)
-{
-	return ARC_CALL2(mount, name, op);
-}
-
-LONG
-ArcGetFileInformation(ULONG FileID, struct linux_finfo *Information)
-{
-	return ARC_CALL2(get_finfo, FileID, Information);
-}
-
-LONG ArcSetFileInformation(ULONG FileID, ULONG AttributeFlags,
-			   ULONG AttributeMask)
-{
-	return ARC_CALL3(set_finfo, FileID, AttributeFlags, AttributeMask);
-}
diff --git a/arch/mips/fw/arc/identify.c b/arch/mips/fw/arc/identify.c
index f90266c02c9d..7530c7b2fd39 100644
--- a/arch/mips/fw/arc/identify.c
+++ b/arch/mips/fw/arc/identify.c
@@ -87,6 +87,11 @@ const char *get_system_type(void)
 	return system_type;
 }
 
+static pcomponent * __init ArcGetChild(pcomponent *Current)
+{
+	return (pcomponent *) ARC_CALL1(child_component, Current);
+}
+
 void __init prom_identify_arch(void)
 {
 	pcomponent *p;
diff --git a/arch/mips/fw/arc/misc.c b/arch/mips/fw/arc/misc.c
index 19f710117d97..d5b2d5901324 100644
--- a/arch/mips/fw/arc/misc.c
+++ b/arch/mips/fw/arc/misc.c
@@ -20,47 +20,6 @@
 #include <asm/sgialib.h>
 #include <asm/bootinfo.h>
 
-VOID __noreturn
-ArcHalt(VOID)
-{
-	bc_disable();
-	local_irq_disable();
-	ARC_CALL0(halt);
-
-	unreachable();
-}
-
-VOID __noreturn
-ArcPowerDown(VOID)
-{
-	bc_disable();
-	local_irq_disable();
-	ARC_CALL0(pdown);
-
-	unreachable();
-}
-
-/* XXX is this a soft reset basically? XXX */
-VOID __noreturn
-ArcRestart(VOID)
-{
-	bc_disable();
-	local_irq_disable();
-	ARC_CALL0(restart);
-
-	unreachable();
-}
-
-VOID __noreturn
-ArcReboot(VOID)
-{
-	bc_disable();
-	local_irq_disable();
-	ARC_CALL0(reboot);
-
-	unreachable();
-}
-
 VOID __noreturn
 ArcEnterInteractiveMode(VOID)
 {
@@ -71,24 +30,6 @@ ArcEnterInteractiveMode(VOID)
 	unreachable();
 }
 
-LONG
-ArcSaveConfiguration(VOID)
-{
-	return ARC_CALL0(cfg_save);
-}
-
-struct linux_sysid *
-ArcGetSystemId(VOID)
-{
-	return (struct linux_sysid *) ARC_CALL0(get_sysid);
-}
-
-VOID __init
-ArcFlushAllCaches(VOID)
-{
-	ARC_CALL0(cache_flush);
-}
-
 DISPLAY_STATUS * __init ArcGetDisplayStatus(ULONG FileID)
 {
 	return (DISPLAY_STATUS *) ARC_CALL1(GetDisplayStatus, FileID);
diff --git a/arch/mips/fw/arc/salone.c b/arch/mips/fw/arc/salone.c
deleted file mode 100644
index 2d99f44d5576..000000000000
--- a/arch/mips/fw/arc/salone.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Routines to load into memory and execute stand-along program images using
- * ARCS PROM firmware.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- */
-#include <linux/init.h>
-#include <asm/sgialib.h>
-
-LONG __init ArcLoad(CHAR *Path, ULONG TopAddr, ULONG *ExecAddr, ULONG *LowAddr)
-{
-	return ARC_CALL4(load, Path, TopAddr, ExecAddr, LowAddr);
-}
-
-LONG __init ArcInvoke(ULONG ExecAddr, ULONG StackAddr, ULONG Argc, CHAR *Argv[],
-	CHAR *Envp[])
-{
-	return ARC_CALL5(invoke, ExecAddr, StackAddr, Argc, Argv, Envp);
-}
-
-LONG __init ArcExecute(CHAR *Path, LONG Argc, CHAR *Argv[], CHAR *Envp[])
-{
-	return ARC_CALL4(exec, Path, Argc, Argv, Envp);
-}
diff --git a/arch/mips/fw/arc/time.c b/arch/mips/fw/arc/time.c
deleted file mode 100644
index 190cdb50b895..000000000000
--- a/arch/mips/fw/arc/time.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Extracting time information from ARCS prom.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- */
-#include <linux/init.h>
-
-#include <asm/fw/arc/types.h>
-#include <asm/sgialib.h>
-
-struct linux_tinfo * __init
-ArcGetTime(VOID)
-{
-	return (struct linux_tinfo *) ARC_CALL0(get_tinfo);
-}
-
-ULONG __init
-ArcGetRelativeTime(VOID)
-{
-	return ARC_CALL0(get_rtime);
-}
diff --git a/arch/mips/fw/arc/tree.c b/arch/mips/fw/arc/tree.c
deleted file mode 100644
index 924a37dc2569..000000000000
--- a/arch/mips/fw/arc/tree.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * PROM component device tree code.
- *
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1999 Ralf Baechle (ralf@gnu.org)
- * Copyright (C) 1999 Silicon Graphics, Inc.
- */
-#include <linux/init.h>
-#include <asm/fw/arc/types.h>
-#include <asm/sgialib.h>
-
-#undef DEBUG_PROM_TREE
-
-pcomponent * __init
-ArcGetPeer(pcomponent *Current)
-{
-	if (Current == PROM_NULL_COMPONENT)
-		return PROM_NULL_COMPONENT;
-
-	return (pcomponent *) ARC_CALL1(next_component, Current);
-}
-
-pcomponent * __init
-ArcGetChild(pcomponent *Current)
-{
-	return (pcomponent *) ARC_CALL1(child_component, Current);
-}
-
-pcomponent * __init
-ArcGetParent(pcomponent *Current)
-{
-	if (Current == PROM_NULL_COMPONENT)
-		return PROM_NULL_COMPONENT;
-
-	return (pcomponent *) ARC_CALL1(parent_component, Current);
-}
-
-LONG __init
-ArcGetConfigurationData(VOID *Buffer, pcomponent *Current)
-{
-	return ARC_CALL2(component_data, Buffer, Current);
-}
-
-pcomponent * __init
-ArcAddChild(pcomponent *Current, pcomponent *Template, VOID *ConfigurationData)
-{
-	return (pcomponent *)
-	       ARC_CALL3(child_add, Current, Template, ConfigurationData);
-}
-
-LONG __init
-ArcDeleteComponent(pcomponent *ComponentToDelete)
-{
-	return ARC_CALL1(comp_del, ComponentToDelete);
-}
-
-pcomponent * __init
-ArcGetComponent(CHAR *Path)
-{
-	return (pcomponent *)ARC_CALL1(component_by_path, Path);
-}
-
-#ifdef DEBUG_PROM_TREE
-
-static char *classes[] = {
-	"system", "processor", "cache", "adapter", "controller", "peripheral",
-	"memory"
-};
-
-static char *types[] = {
-	"arc", "cpu", "fpu", "picache", "pdcache", "sicache", "sdcache",
-	"sccache", "memdev", "eisa adapter", "tc adapter", "scsi adapter",
-	"dti adapter", "multi-func adapter", "disk controller",
-	"tp controller", "cdrom controller", "worm controller",
-	"serial controller", "net controller", "display controller",
-	"parallel controller", "pointer controller", "keyboard controller",
-	"audio controller", "misc controller", "disk peripheral",
-	"floppy peripheral", "tp peripheral", "modem peripheral",
-	"monitor peripheral", "printer peripheral", "pointer peripheral",
-	"keyboard peripheral", "terminal peripheral", "line peripheral",
-	"net peripheral", "misc peripheral", "anonymous"
-};
-
-static char *iflags[] = {
-	"bogus", "read only", "removable", "console in", "console out",
-	"input", "output"
-};
-
-static void __init
-dump_component(pcomponent *p)
-{
-	printk("[%p]:class<%s>type<%s>flags<%s>ver<%d>rev<%d>",
-	       p, classes[p->class], types[p->type],
-	       iflags[p->iflags], p->vers, p->rev);
-	printk("key<%08lx>\n\tamask<%08lx>cdsize<%d>ilen<%d>iname<%s>\n",
-	       p->key, p->amask, (int)p->cdsize, (int)p->ilen, p->iname);
-}
-
-static void __init
-traverse(pcomponent *p, int op)
-{
-	dump_component(p);
-	if(ArcGetChild(p))
-		traverse(ArcGetChild(p), 1);
-	if(ArcGetPeer(p) && op)
-		traverse(ArcGetPeer(p), 1);
-}
-
-void __init
-prom_testtree(void)
-{
-	pcomponent *p;
-
-	p = ArcGetChild(PROM_NULL_COMPONENT);
-	dump_component(p);
-	p = ArcGetChild(p);
-	while(p) {
-		dump_component(p);
-		p = ArcGetPeer(p);
-	}
-}
-
-#endif /* DEBUG_PROM_TREE  */
diff --git a/arch/mips/include/asm/sgialib.h b/arch/mips/include/asm/sgialib.h
index 0d9fad5915fe..21d17eb25ed8 100644
--- a/arch/mips/include/asm/sgialib.h
+++ b/arch/mips/include/asm/sgialib.h
@@ -47,12 +47,6 @@ extern void prom_meminit(void);
 /* PROM device tree library routines. */
 #define PROM_NULL_COMPONENT ((pcomponent *) 0)
 
-/* Get sibling component of THIS. */
-extern pcomponent *ArcGetPeer(pcomponent *this);
-
-/* Get child component of THIS. */
-extern pcomponent *ArcGetChild(pcomponent *this);
-
 /* This is called at prom_init time to identify the
  * ARC architecture we are running on
  */
@@ -60,7 +54,6 @@ extern void prom_identify_arch(void);
 
 /* Environment variable routines. */
 extern PCHAR ArcGetEnvironmentVariable(PCHAR name);
-extern LONG ArcSetEnvironmentVariable(PCHAR name, PCHAR value);
 
 /* ARCS command line parsing. */
 extern void prom_init_cmdline(void);
@@ -70,12 +63,7 @@ extern LONG ArcRead(ULONG fd, PVOID buf, ULONG num, PULONG cnt);
 extern LONG ArcWrite(ULONG fd, PVOID buf, ULONG num, PULONG cnt);
 
 /* Misc. routines. */
-extern VOID ArcHalt(VOID) __noreturn;
-extern VOID ArcPowerDown(VOID) __noreturn;
-extern VOID ArcRestart(VOID) __noreturn;
-extern VOID ArcReboot(VOID) __noreturn;
 extern VOID ArcEnterInteractiveMode(VOID) __noreturn;
-extern VOID ArcFlushAllCaches(VOID);
 extern DISPLAY_STATUS *ArcGetDisplayStatus(ULONG FileID);
 
 #endif /* _ASM_SGIALIB_H */
-- 
cgit v1.2.3


From ce6c0a593b3cbeb35269c701fc90fc491dc3a348 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Wed, 9 Oct 2019 15:27:13 +0200
Subject: MIPS: fw: arc: use call_o32 to call ARC prom from 64bit kernel

When using a 64bit kernel with generic spaces setup stack is
also placed in XKPYHS, which the 32bit PROM can't handle.
By using call_o32 for ARC_CALLs a stack placed in KSEG0 is used
when calling PROM.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/fw/arc/init.c         |   5 ++
 arch/mips/include/asm/sgiarcs.h | 103 ++++++++++++----------------------------
 2 files changed, 35 insertions(+), 73 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/fw/arc/init.c b/arch/mips/fw/arc/init.c
index 008555969534..7b663455de6b 100644
--- a/arch/mips/fw/arc/init.c
+++ b/arch/mips/fw/arc/init.c
@@ -21,6 +21,11 @@ struct linux_romvec *romvec;
 int prom_argc;
 LONG *_prom_argv, *_prom_envp;
 
+#if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
+/* stack for calling 32bit ARC prom */
+u64 o32_stk[4096];
+#endif
+
 void __init prom_init(void)
 {
 	PSYSTEM_PARAMETER_BLOCK pb = PROMBLOCK;
diff --git a/arch/mips/include/asm/sgiarcs.h b/arch/mips/include/asm/sgiarcs.h
index 105a9479ac5f..e1512cab180b 100644
--- a/arch/mips/include/asm/sgiarcs.h
+++ b/arch/mips/include/asm/sgiarcs.h
@@ -12,6 +12,8 @@
 #ifndef _ASM_SGIARCS_H
 #define _ASM_SGIARCS_H
 
+#include <linux/kernel.h>
+
 #include <asm/types.h>
 #include <asm/fw/arc/types.h>
 
@@ -368,110 +370,65 @@ struct linux_smonblock {
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
 
-#define __arc_clobbers							\
-	"$2", "$3" /* ... */, "$8", "$9", "$10", "$11",				\
-	"$12", "$13", "$14", "$15", "$16", "$24", "$25", "$31"
+extern long call_o32(long vec, void *stack, ...);
+
+extern u64 o32_stk[4096];
+#define O32_STK	(&o32_stk[ARRAY_SIZE(o32_stk)])
 
 #define ARC_CALL0(dest)							\
 ({	long __res;							\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec)							\
-	: __arc_clobbers, "$4", "$5", "$6", "$7");			\
-	(unsigned long) __res;						\
+	__res = call_o32(__vec, O32_STK);				\
+	__res;								\
 })
 
 #define ARC_CALL1(dest, a1)						\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
+	int  __a1 = (int) (long) (a1);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec), "r" (__a1)					\
-	: __arc_clobbers, "$5", "$6", "$7");				\
-	(unsigned long) __res;						\
+	__res = call_o32(__vec, O32_STK, __a1);				\
+	__res;								\
 })
 
 #define ARC_CALL2(dest, a1, a2)						\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
-	register signed int __a2 __asm__("$5") = (int) (long) (a2);	\
+	int  __a1 = (int) (long) (a1);					\
+	int  __a2 = (int) (long) (a2);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec), "r" (__a1), "r" (__a2)				\
-	: __arc_clobbers, "$6", "$7");					\
+	__res = call_o32(__vec, O32_STK, __a1, __a2);			\
 	__res;								\
 })
 
 #define ARC_CALL3(dest, a1, a2, a3)					\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
-	register signed int __a2 __asm__("$5") = (int) (long) (a2);	\
-	register signed int __a3 __asm__("$6") = (int) (long) (a3);	\
+	int  __a1 = (int) (long) (a1);					\
+	int  __a2 = (int) (long) (a2);					\
+	int  __a3 = (int) (long) (a3);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec), "r" (__a1), "r" (__a2), "r" (__a3)		\
-	: __arc_clobbers, "$7");					\
+	__res = call_o32(__vec, O32_STK, __a1, __a2, __a3);		\
 	__res;								\
 })
 
 #define ARC_CALL4(dest, a1, a2, a3, a4)					\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
-	register signed int __a2 __asm__("$5") = (int) (long) (a2);	\
-	register signed int __a3 __asm__("$6") = (int) (long) (a3);	\
-	register signed int __a4 __asm__("$7") = (int) (long) (a4);	\
+	int  __a1 = (int) (long) (a1);					\
+	int  __a2 = (int) (long) (a2);					\
+	int  __a3 = (int) (long) (a3);					\
+	int  __a4 = (int) (long) (a4);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec), "r" (__a1), "r" (__a2), "r" (__a3),		\
-	  "r" (__a4)							\
-	: __arc_clobbers);						\
+	__res = call_o32(__vec, O32_STK, __a1, __a2, __a3, __a4);	\
 	__res;								\
 })
 
-#define ARC_CALL5(dest, a1, a2, a3, a4, a5)					\
+#define ARC_CALL5(dest, a1, a2, a3, a4, a5)				\
 ({	long __res;							\
-	register signed int __a1 __asm__("$4") = (int) (long) (a1);	\
-	register signed int __a2 __asm__("$5") = (int) (long) (a2);	\
-	register signed int __a3 __asm__("$6") = (int) (long) (a3);	\
-	register signed int __a4 __asm__("$7") = (int) (long) (a4);	\
-	register signed int __a5 = (int) (long) (a5);			\
+	int  __a1 = (int) (long) (a1);					\
+	int  __a2 = (int) (long) (a2);					\
+	int  __a3 = (int) (long) (a3);					\
+	int  __a4 = (int) (long) (a4);					\
+	int  __a5 = (int) (long) (a5);					\
 	long __vec = (long) romvec->dest;				\
-	__asm__ __volatile__(						\
-	"dsubu\t$29, 32\n\t"						\
-	"sw\t%7, 16($29)\n\t"						\
-	"jalr\t%1\n\t"							\
-	"daddu\t$29, 32\n\t"						\
-	"move\t%0, $2"							\
-	: "=r" (__res), "=r" (__vec)					\
-	: "1" (__vec),							\
-	  "r" (__a1), "r" (__a2), "r" (__a3), "r" (__a4),		\
-	  "r" (__a5)							\
-	: __arc_clobbers);						\
+	__res = call_o32(__vec, O32_STK, __a1, __a2, __a3, __a4, __a5);	\
 	__res;								\
 })
 
-- 
cgit v1.2.3


From 39b2d7565a4736a30c6eeb550901433b44aebf57 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Wed, 9 Oct 2019 15:27:14 +0200
Subject: MIPS: Kconfig: always select ARC_MEMORY and ARC_PROMLIB for platform

Instead of having a default y option with depends simply select
options for the platforms where they are needed.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/Kconfig | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 41d25179c3ed..27244abf560f 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -360,6 +360,8 @@ config MACH_DECSTATION
 
 config MACH_JAZZ
 	bool "Jazz family of machines"
+	select ARC_MEMORY
+	select ARC_PROMLIB
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select FW_ARC
@@ -632,6 +634,7 @@ config RALINK
 
 config SGI_IP22
 	bool "SGI IP22 (Indy/Indigo2)"
+	select ARC_PROMLIB
 	select FW_ARC
 	select FW_ARC32
 	select ARCH_MIGHT_HAVE_PC_SERIO
@@ -700,6 +703,7 @@ config SGI_IP27
 
 config SGI_IP28
 	bool "SGI IP28 (Indigo2 R10k)"
+	select ARC_PROMLIB
 	select FW_ARC
 	select FW_ARC64
 	select ARCH_MIGHT_HAVE_PC_SERIO
@@ -738,6 +742,8 @@ config SGI_IP28
 
 config SGI_IP32
 	bool "SGI IP32 (O2)"
+	select ARC_MEMORY
+	select ARC_PROMLIB
 	select ARCH_HAS_PHYS_TO_DMA
 	select FW_ARC
 	select FW_ARC32
@@ -845,6 +851,8 @@ config SIBYTE_BIGSUR
 
 config SNI_RM
 	bool "SNI RM200/300/400"
+	select ARC_MEMORY
+	select ARC_PROMLIB
 	select FW_ARC if CPU_LITTLE_ENDIAN
 	select FW_ARC32 if CPU_LITTLE_ENDIAN
 	select FW_SNIPROM if CPU_BIG_ENDIAN
@@ -1361,13 +1369,9 @@ config ARC_CONSOLE
 
 config ARC_MEMORY
 	bool
-	depends on MACH_JAZZ || SNI_RM || SGI_IP32
-	default y
 
 config ARC_PROMLIB
 	bool
-	depends on MACH_JAZZ || SNI_RM || SGI_IP22 || SGI_IP28 || SGI_IP32
-	default y
 
 config FW_ARC64
 	bool
-- 
cgit v1.2.3


From 351889d356290554bf5d20d684244813a654afbe Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Wed, 9 Oct 2019 15:27:15 +0200
Subject: MIPS: fw: arc: workaround 64bit kernel/32bit ARC problems

Pointer arguments for 32bit ARC PROMs must reside in CKSEG0/1. While
the initial stack resides in CKSEG0 the first kernel thread stack
is already placed at a XKPHYS address, which ARC32 can't handle.
The workaround here is to use static variables, which are placed
into BSS and linked to a CKSEG0 address.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/fw/arc/promlib.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/fw/arc/promlib.c b/arch/mips/fw/arc/promlib.c
index be381307fbb0..5e9e840a9314 100644
--- a/arch/mips/fw/arc/promlib.c
+++ b/arch/mips/fw/arc/promlib.c
@@ -11,6 +11,21 @@
 #include <asm/bcache.h>
 #include <asm/setup.h>
 
+#if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
+/*
+ * For 64bit kernels working with a 32bit ARC PROM pointer arguments
+ * for ARC calls need to reside in CKEG0/1. But as soon as the kernel
+ * switches to it's first kernel thread stack is set to an address in
+ * XKPHYS, so anything on stack can't be used anymore. This is solved
+ * by using a * static declartion variables are put into BSS, which is
+ * linked to a CKSEG0 address. Since this is only used on UP platforms
+ * there is not spinlock needed
+ */
+#define O32_STATIC	static
+#else
+#define O32_STATIC
+#endif
+
 /*
  * IP22 boardcache is not compatible with board caches.	 Thus we disable it
  * during romvec action.  Since r4xx0.c is always compiled and linked with your
@@ -23,8 +38,10 @@
 
 void prom_putchar(char c)
 {
-	ULONG cnt;
-	CHAR it = c;
+	O32_STATIC ULONG cnt;
+	O32_STATIC CHAR it;
+
+	it = c;
 
 	bc_disable();
 	ArcWrite(1, &it, 1, &cnt);
@@ -33,8 +50,8 @@ void prom_putchar(char c)
 
 char prom_getchar(void)
 {
-	ULONG cnt;
-	CHAR c;
+	O32_STATIC ULONG cnt;
+	O32_STATIC CHAR c;
 
 	bc_disable();
 	ArcRead(0, &c, 1, &cnt);
-- 
cgit v1.2.3


From 931e1bfea4031811b75f7af688a3e17fc2b121fb Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Wed, 9 Oct 2019 15:27:16 +0200
Subject: MIPS: SGI-IP22: set PHYS_OFFSET to memory start

IP22 started at physical 0x08000000. To avoid wasting memory for
page structs set PHYS_OFFSET.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/mach-ip22/spaces.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-ip22/spaces.h b/arch/mips/include/asm/mach-ip22/spaces.h
index 7f9fa6f66059..78d0335f5f2e 100644
--- a/arch/mips/include/asm/mach-ip22/spaces.h
+++ b/arch/mips/include/asm/mach-ip22/spaces.h
@@ -10,11 +10,10 @@
 #ifndef _ASM_MACH_IP22_SPACES_H
 #define _ASM_MACH_IP22_SPACES_H
 
+#define PHYS_OFFSET     _AC(0x08000000, UL)
 
 #ifdef CONFIG_64BIT
 
-#define PAGE_OFFSET		0xffffffff80000000UL
-
 #define CAC_BASE		0xffffffff80000000
 #define IO_BASE			0xffffffffa0000000
 #define UNCAC_BASE		0xffffffffa0000000
-- 
cgit v1.2.3


From c0de00b286ed1c14064ff94ca70abdfd4e4d11e8 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Wed, 9 Oct 2019 15:27:17 +0200
Subject: MIPS: SGI-IP22/28: Use PROM for memory detection

EARLY_PRINTK uses ArcWrite (via prom_putchar) on IP22/28, which needs
to not mess up PROMs data structures. ARC PROM gives out a list of
memory chunks, which are used and which are free. This fixes the
problem of not working early printk.

By using XKPHYS spaces more than 256MB memory on Indigo2 R4k machines
is working now, too.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/Kconfig                        | 20 ++-------
 arch/mips/fw/arc/memory.c                |  9 ++++
 arch/mips/include/asm/bootinfo.h         |  1 +
 arch/mips/include/asm/mach-ip22/spaces.h |  9 ----
 arch/mips/sgi-ip22/ip22-mc.c             | 74 +++++++++-----------------------
 5 files changed, 34 insertions(+), 79 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 27244abf560f..87bfefcbdb06 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -634,6 +634,7 @@ config RALINK
 
 config SGI_IP22
 	bool "SGI IP22 (Indy/Indigo2)"
+	select ARC_MEMORY
 	select ARC_PROMLIB
 	select FW_ARC
 	select FW_ARC32
@@ -658,14 +659,7 @@ config SGI_IP22
 	select SWAP_IO_SPACE
 	select SYS_HAS_CPU_R4X00
 	select SYS_HAS_CPU_R5000
-	#
-	# Disable EARLY_PRINTK for now since it leads to overwritten prom
-	# memory during early boot on some machines.
-	#
-	# See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com
-	# for a more details discussion
-	#
-	# select SYS_HAS_EARLY_PRINTK
+	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
@@ -703,6 +697,7 @@ config SGI_IP27
 
 config SGI_IP28
 	bool "SGI IP28 (Indigo2 R10k)"
+	select ARC_MEMORY
 	select ARC_PROMLIB
 	select FW_ARC
 	select FW_ARC64
@@ -725,14 +720,7 @@ config SGI_IP28
 	select SGI_HAS_ZILOG
 	select SWAP_IO_SPACE
 	select SYS_HAS_CPU_R10000
-	#
-	# Disable EARLY_PRINTK for now since it leads to overwritten prom
-	# memory during early boot on some machines.
-	#
-	# See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com
-	# for a more details discussion
-	#
-	# select SYS_HAS_EARLY_PRINTK
+	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select MIPS_L1_CACHE_SHIFT_7
diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c
index b4328b3b5288..dbbcddc82823 100644
--- a/arch/mips/fw/arc/memory.c
+++ b/arch/mips/fw/arc/memory.c
@@ -158,6 +158,10 @@ void __init prom_meminit(void)
 	}
 }
 
+void __weak __init prom_cleanup(void)
+{
+}
+
 void __init prom_free_prom_memory(void)
 {
 	int i;
@@ -169,4 +173,9 @@ void __init prom_free_prom_memory(void)
 		free_init_pages("prom memory",
 			prom_mem_base[i], prom_mem_base[i] + prom_mem_size[i]);
 	}
+	/*
+	 * at this point it isn't safe to call PROM functions
+	 * give platforms a way to do PROM cleanups
+	 */
+	prom_cleanup();
 }
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index 34d62229dea5..a9250f5c964f 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -99,6 +99,7 @@ extern void detect_memory_region(phys_addr_t start, phys_addr_t sz_min,  phys_ad
 
 extern void prom_init(void);
 extern void prom_free_prom_memory(void);
+extern void prom_cleanup(void);
 
 extern void free_init_pages(const char *what,
 			    unsigned long begin, unsigned long end);
diff --git a/arch/mips/include/asm/mach-ip22/spaces.h b/arch/mips/include/asm/mach-ip22/spaces.h
index 78d0335f5f2e..24fe92cb5313 100644
--- a/arch/mips/include/asm/mach-ip22/spaces.h
+++ b/arch/mips/include/asm/mach-ip22/spaces.h
@@ -12,15 +12,6 @@
 
 #define PHYS_OFFSET     _AC(0x08000000, UL)
 
-#ifdef CONFIG_64BIT
-
-#define CAC_BASE		0xffffffff80000000
-#define IO_BASE			0xffffffffa0000000
-#define UNCAC_BASE		0xffffffffa0000000
-#define MAP_BASE		0xc000000000000000
-
-#endif /* CONFIG_64BIT */
-
 #include <asm/mach-generic/spaces.h>
 
 #endif /* __ASM_MACH_IP22_SPACES_H */
diff --git a/arch/mips/sgi-ip22/ip22-mc.c b/arch/mips/sgi-ip22/ip22-mc.c
index 1944d41507ef..74e5b9e27d6c 100644
--- a/arch/mips/sgi-ip22/ip22-mc.c
+++ b/arch/mips/sgi-ip22/ip22-mc.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
+#include <linux/memblock.h>
 #include <linux/spinlock.h>
 
 #include <asm/io.h>
@@ -40,70 +41,36 @@ static inline unsigned int get_bank_config(int bank)
 	return bank % 2 ? res & 0xffff : res >> 16;
 }
 
-struct mem {
-	unsigned long addr;
-	unsigned long size;
-};
-
+#if defined(CONFIG_SGI_IP28) || defined(CONFIG_32BIT)
+static void __init probe_memory(void)
+{
+	/* prom detects all usable memory */
+}
+#else
 /*
- * Detect installed memory, do some sanity checks and notify kernel about it
+ * Detect installed memory, which PROM misses
  */
 static void __init probe_memory(void)
 {
-	int i, j, found, cnt = 0;
-	struct mem bank[4];
-	struct mem space[2] = {{SGIMC_SEG0_BADDR, 0}, {SGIMC_SEG1_BADDR, 0}};
+	unsigned long addr, size;
+	int i;
 
 	printk(KERN_INFO "MC: Probing memory configuration:\n");
-	for (i = 0; i < ARRAY_SIZE(bank); i++) {
+	for (i = 0; i < 4; i++) {
 		unsigned int tmp = get_bank_config(i);
 		if (!(tmp & SGIMC_MCONFIG_BVALID))
 			continue;
 
-		bank[cnt].size = get_bank_size(tmp);
-		bank[cnt].addr = get_bank_addr(tmp);
+		size = get_bank_size(tmp);
+		addr = get_bank_addr(tmp);
 		printk(KERN_INFO " bank%d: %3ldM @ %08lx\n",
-			i, bank[cnt].size / 1024 / 1024, bank[cnt].addr);
-		cnt++;
-	}
+			i, size / 1024 / 1024, addr);
 
-	/* And you thought bubble sort is dead algorithm... */
-	do {
-		unsigned long addr, size;
-
-		found = 0;
-		for (i = 1; i < cnt; i++)
-			if (bank[i-1].addr > bank[i].addr) {
-				addr = bank[i].addr;
-				size = bank[i].size;
-				bank[i].addr = bank[i-1].addr;
-				bank[i].size = bank[i-1].size;
-				bank[i-1].addr = addr;
-				bank[i-1].size = size;
-				found = 1;
-			}
-	} while (found);
-
-	/* Figure out how are memory banks mapped into spaces */
-	for (i = 0; i < cnt; i++) {
-		found = 0;
-		for (j = 0; j < ARRAY_SIZE(space) && !found; j++)
-			if (space[j].addr + space[j].size == bank[i].addr) {
-				space[j].size += bank[i].size;
-				found = 1;
-			}
-		/* There is either hole or overlapping memory */
-		if (!found)
-			printk(KERN_CRIT "MC: Memory configuration mismatch "
-					 "(%08lx), expect Bus Error soon\n",
-					 bank[i].addr);
+		if (addr >= SGIMC_SEG1_BADDR)
+			memblock_add(addr, size);
 	}
-
-	for (i = 0; i < ARRAY_SIZE(space); i++)
-		if (space[i].size)
-			add_memory_region(space[i].addr, space[i].size,
-					  BOOT_MEM_RAM);
 }
+#endif
 
 void __init sgimc_init(void)
 {
@@ -205,10 +172,9 @@ void __init sgimc_init(void)
 	probe_memory();
 }
 
-void __init prom_meminit(void) {}
-void __init prom_free_prom_memory(void)
-{
 #ifdef CONFIG_SGI_IP28
+void __init prom_cleanup(void)
+{
 	u32 mconfig1;
 	unsigned long flags;
 	spinlock_t lock;
@@ -233,5 +199,5 @@ void __init prom_free_prom_memory(void)
 	sgimc->mconfig1 = mconfig1;
 	iob();
 	spin_unlock_irqrestore(&lock, flags);
-#endif
 }
+#endif
-- 
cgit v1.2.3


From c85ac57ce24112a93054d8a9eec8fc4e6b844c43 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Mon, 7 Oct 2019 22:20:02 +0000
Subject: MIPS: cmdline: Remove redundant Kconfig defaults
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CMDLINE, CMDLINE_BOOL & CMDLINE_FORCE all explicitly specify default
values that are the same as the default value for their respective types
anyway (ie. n for booleans, and the empty string for strings).

Remove the redundant defaults.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/Kconfig.debug | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 0c86b2a2adfc..93a2974d2ab7 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -32,7 +32,6 @@ config USE_GENERIC_EARLY_PRINTK_8250
 
 config CMDLINE_BOOL
 	bool "Built-in kernel command line"
-	default n
 	help
 	  For most systems, it is firmware or second stage bootloader that
 	  by default specifies the kernel command line options.  However,
@@ -53,7 +52,6 @@ config CMDLINE_BOOL
 config CMDLINE
 	string "Default kernel command string"
 	depends on CMDLINE_BOOL
-	default ""
 	help
 	  On some platforms, there is currently no way for the boot loader to
 	  pass arguments to the kernel.  For these platforms, and for the cases
@@ -68,7 +66,6 @@ config CMDLINE
 
 config CMDLINE_OVERRIDE
 	bool "Built-in command line overrides firmware arguments"
-	default n
 	depends on CMDLINE_BOOL
 	help
 	  By setting this option to 'Y' you will have your kernel ignore
-- 
cgit v1.2.3


From 5474080a3a0a477fd99f587b9e9ef814bcdfc083 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 6 Oct 2019 16:55:53 +0200
Subject: s390/Kconfig: make use of 'depends on cc-option'

Make use of 'depends on cc-option' to only display those Kconfig
options for which compiler support is available.

Add this for the MARCH and TUNE options which are the only options
which may result in compile errors if the selected architecture is not
supported by the compiler.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/Kconfig | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 43a81d0ad507..700ce17e3805 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -246,8 +246,8 @@ choice
 
 config MARCH_Z900
 	bool "IBM zSeries model z800 and z900"
-	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z900_FEATURES
+	depends on $(cc-option,-march=z900)
 	help
 	  Select this to enable optimizations for model z800/z900 (2064 and
 	  2066 series). This will enable some optimizations that are not
@@ -255,8 +255,8 @@ config MARCH_Z900
 
 config MARCH_Z990
 	bool "IBM zSeries model z890 and z990"
-	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z990_FEATURES
+	depends on $(cc-option,-march=z990)
 	help
 	  Select this to enable optimizations for model z890/z990 (2084 and
 	  2086 series). The kernel will be slightly faster but will not work
@@ -264,8 +264,8 @@ config MARCH_Z990
 
 config MARCH_Z9_109
 	bool "IBM System z9"
-	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z9_109_FEATURES
+	depends on $(cc-option,-march=z9-109)
 	help
 	  Select this to enable optimizations for IBM System z9 (2094 and
 	  2096 series). The kernel will be slightly faster but will not work
@@ -274,6 +274,7 @@ config MARCH_Z9_109
 config MARCH_Z10
 	bool "IBM System z10"
 	select HAVE_MARCH_Z10_FEATURES
+	depends on $(cc-option,-march=z10)
 	help
 	  Select this to enable optimizations for IBM System z10 (2097 and
 	  2098 series). The kernel will be slightly faster but will not work
@@ -282,6 +283,7 @@ config MARCH_Z10
 config MARCH_Z196
 	bool "IBM zEnterprise 114 and 196"
 	select HAVE_MARCH_Z196_FEATURES
+	depends on $(cc-option,-march=z196)
 	help
 	  Select this to enable optimizations for IBM zEnterprise 114 and 196
 	  (2818 and 2817 series). The kernel will be slightly faster but will
@@ -290,6 +292,7 @@ config MARCH_Z196
 config MARCH_ZEC12
 	bool "IBM zBC12 and zEC12"
 	select HAVE_MARCH_ZEC12_FEATURES
+	depends on $(cc-option,-march=zEC12)
 	help
 	  Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
 	  2827 series). The kernel will be slightly faster but will not work on
@@ -298,6 +301,7 @@ config MARCH_ZEC12
 config MARCH_Z13
 	bool "IBM z13s and z13"
 	select HAVE_MARCH_Z13_FEATURES
+	depends on $(cc-option,-march=z13)
 	help
 	  Select this to enable optimizations for IBM z13s and z13 (2965 and
 	  2964 series). The kernel will be slightly faster but will not work on
@@ -306,6 +310,7 @@ config MARCH_Z13
 config MARCH_Z14
 	bool "IBM z14 ZR1 and z14"
 	select HAVE_MARCH_Z14_FEATURES
+	depends on $(cc-option,-march=z14)
 	help
 	  Select this to enable optimizations for IBM z14 ZR1 and z14 (3907
 	  and 3906 series). The kernel will be slightly faster but will not
@@ -314,6 +319,7 @@ config MARCH_Z14
 config MARCH_Z15
 	bool "IBM z15"
 	select HAVE_MARCH_Z15_FEATURES
+	depends on $(cc-option,-march=z15)
 	help
 	  Select this to enable optimizations for IBM z15 (8562
 	  and 8561 series). The kernel will be slightly faster but will not
@@ -367,33 +373,39 @@ config TUNE_DEFAULT
 
 config TUNE_Z900
 	bool "IBM zSeries model z800 and z900"
-	depends on !CC_IS_CLANG
+	depends on $(cc-option,-mtune=z900)
 
 config TUNE_Z990
 	bool "IBM zSeries model z890 and z990"
-	depends on !CC_IS_CLANG
+	depends on $(cc-option,-mtune=z990)
 
 config TUNE_Z9_109
 	bool "IBM System z9"
-	depends on !CC_IS_CLANG
+	depends on $(cc-option,-mtune=z9-109)
 
 config TUNE_Z10
 	bool "IBM System z10"
+	depends on $(cc-option,-mtune=z10)
 
 config TUNE_Z196
 	bool "IBM zEnterprise 114 and 196"
+	depends on $(cc-option,-mtune=z196)
 
 config TUNE_ZEC12
 	bool "IBM zBC12 and zEC12"
+	depends on $(cc-option,-mtune=zEC12)
 
 config TUNE_Z13
 	bool "IBM z13"
+	depends on $(cc-option,-mtune=z13)
 
 config TUNE_Z14
 	bool "IBM z14"
+	depends on $(cc-option,-mtune=z14)
 
 config TUNE_Z15
 	bool "IBM z15"
+	depends on $(cc-option,-mtune=z15)
 
 endchoice
 
-- 
cgit v1.2.3


From 89d0180a60fcc5368eb2d92faeb1e012f8a591b3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 6 Oct 2019 17:02:07 +0200
Subject: s390/Kconfig: add z13s and z14 ZR1 to TUNE descriptions

The names for the z13s and z14 ZR1 machines are missing for the
TUNE_Z13 and TUNE_Z14 descriptions. Just add them.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 700ce17e3805..f0df9e48e651 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -396,11 +396,11 @@ config TUNE_ZEC12
 	depends on $(cc-option,-mtune=zEC12)
 
 config TUNE_Z13
-	bool "IBM z13"
+	bool "IBM z13s and z13"
 	depends on $(cc-option,-mtune=z13)
 
 config TUNE_Z14
-	bool "IBM z14"
+	bool "IBM z14 ZR1 and z14"
 	depends on $(cc-option,-mtune=z14)
 
 config TUNE_Z15
-- 
cgit v1.2.3


From 8474e5cac07e7f21dd8c0b3620640db30115db56 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 15 Feb 2019 13:47:20 +0100
Subject: KVM: s390: count invalid yields

To analyze some performance issues with lock contention and scheduling
it is nice to know when diag9c did not result in any action or when
no action was tried.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
---
 arch/s390/include/asm/kvm_host.h |  1 +
 arch/s390/kvm/diag.c             | 18 ++++++++++++++----
 arch/s390/kvm/kvm-s390.c         |  1 +
 3 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index abe60268335d..02f4c21c57f6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -392,6 +392,7 @@ struct kvm_vcpu_stat {
 	u64 diagnose_10;
 	u64 diagnose_44;
 	u64 diagnose_9c;
+	u64 diagnose_9c_ignored;
 	u64 diagnose_258;
 	u64 diagnose_308;
 	u64 diagnose_500;
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 45634b3d2e0a..609c55df3ce8 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -158,14 +158,24 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 
 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	vcpu->stat.diagnose_9c++;
-	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
 
+	/* yield to self */
 	if (tid == vcpu->vcpu_id)
-		return 0;
+		goto no_yield;
 
+	/* yield to invalid */
 	tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
-	if (tcpu)
-		kvm_vcpu_yield_to(tcpu);
+	if (!tcpu)
+		goto no_yield;
+
+	if (kvm_vcpu_yield_to(tcpu) <= 0)
+		goto no_yield;
+
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: done", tid);
+	return 0;
+no_yield:
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
+	vcpu->stat.diagnose_9c_ignored++;
 	return 0;
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 40af442b2e15..3b5ebf48f802 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -155,6 +155,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
+	{ "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
-- 
cgit v1.2.3


From c7b7de63124645089ccf9900b9e5ea08059ccae0 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 15 Feb 2019 13:47:20 +0100
Subject: KVM: s390: Do not yield when target is already running

If the target is already running we do not need to yield.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
---
 arch/s390/kvm/diag.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 609c55df3ce8..3fb54ec2cf3e 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -168,6 +168,10 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 	if (!tcpu)
 		goto no_yield;
 
+	/* target already running */
+	if (READ_ONCE(tcpu->cpu) >= 0)
+		goto no_yield;
+
 	if (kvm_vcpu_yield_to(tcpu) <= 0)
 		goto no_yield;
 
-- 
cgit v1.2.3


From 7784cac697351f0cc0a4bb619594c0c99348c5aa Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Wed, 9 Oct 2019 23:09:45 +0000
Subject: MIPS: cmdline: Clean up boot_command_line initialization

Our current code to initialize boot_command_line is a mess. Some of this
is due to the addition of too many options over the years, and some of
this is due to workarounds for early_init_dt_scan_chosen() performing
actions specific to options from other architectures that probably
shouldn't be in generic code.

Clean this up by introducing a new bootcmdline_init() function that
simplifies the initialization somewhat. The major changes are:

- Because bootcmdline_init() is a function it can return early in the
  CONFIG_CMDLINE_OVERRIDE case.

- We clear boot_command_line rather than inheriting whatever
  early_init_dt_scan_chosen() may have left us. This means we no longer
  need to set boot_command_line to a space character in an attempt to
  prevent early_init_dt_scan_chosen() from copying CONFIG_CMDLINE into
  boot_command_line without us knowing about it.

- Indirection via USE_PROM_CMDLINE, USE_DTB_CMDLINE, EXTEND_WITH_PROM &
  BUILTIN_EXTEND_WITH_PROM macros is removed; they seemingly served only
  to obfuscate the code.

- The logic is cleaner, clearer & commented.

Two minor drawbacks of this approach are:

1) We call of_scan_flat_dt(), which means we scan through the DT again.
   The overhead is fairly minimal & shouldn't be noticeable.

2) cmdline_scan_chosen() duplicates a small amount of the logic from
   early_init_dt_scan_chosen(). Alternatives might be to allow the
   generic FDT code to keep & expose a copy of the arguments taken from
   the /chosen node's bootargs property, or to introduce a function like
   early_init_dt_scan_chosen() that retrieves them without modification
   to handle CONFIG_CMDLINE. Neither of these sounds particularly
   cleaner though, and this way we at least keep the extra work in
   arch/mips.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/kernel/setup.c | 125 +++++++++++++++++++++++++++++++----------------
 1 file changed, 83 insertions(+), 42 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 5eec13b8d222..4aeba3122972 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -538,11 +538,88 @@ static void __init check_kernel_sections_mem(void)
 	}
 }
 
-#define USE_PROM_CMDLINE	IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER)
-#define USE_DTB_CMDLINE		IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)
-#define EXTEND_WITH_PROM	IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)
-#define BUILTIN_EXTEND_WITH_PROM	\
-	IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND)
+static void __init bootcmdline_append(const char *s, size_t max)
+{
+	if (!s[0] || !max)
+		return;
+
+	if (boot_command_line[0])
+		strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+
+	strlcat(boot_command_line, s, max);
+}
+
+static int __init bootcmdline_scan_chosen(unsigned long node, const char *uname,
+					  int depth, void *data)
+{
+	bool *dt_bootargs = data;
+	const char *p;
+	int l;
+
+	if (depth != 1 || !data ||
+	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
+		return 0;
+
+	p = of_get_flat_dt_prop(node, "bootargs", &l);
+	if (p != NULL && l > 0) {
+		bootcmdline_append(p, min(l, COMMAND_LINE_SIZE));
+		*dt_bootargs = true;
+	}
+
+	return 1;
+}
+
+static void __init bootcmdline_init(char **cmdline_p)
+{
+	bool dt_bootargs = false;
+
+	/*
+	 * If CMDLINE_OVERRIDE is enabled then initializing the command line is
+	 * trivial - we simply use the built-in command line unconditionally &
+	 * unmodified.
+	 */
+	if (IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
+		strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+		return;
+	}
+
+	/*
+	 * If the user specified a built-in command line &
+	 * MIPS_CMDLINE_BUILTIN_EXTEND, then the built-in command line is
+	 * prepended to arguments from the bootloader or DT so we'll copy them
+	 * to the start of boot_command_line here. Otherwise, empty
+	 * boot_command_line to undo anything early_init_dt_scan_chosen() did.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND))
+		strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+	else
+		boot_command_line[0] = 0;
+
+	/*
+	 * If we're configured to take boot arguments from DT, look for those
+	 * now.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB))
+		of_scan_flat_dt(bootcmdline_scan_chosen, &dt_bootargs);
+
+	/*
+	 * If we didn't get any arguments from DT (regardless of whether that's
+	 * because we weren't configured to look for them, or because we looked
+	 * & found none) then we'll take arguments from the bootloader.
+	 * plat_mem_setup() should have filled arcs_cmdline with arguments from
+	 * the bootloader.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND) || !dt_bootargs)
+		bootcmdline_append(arcs_cmdline, COMMAND_LINE_SIZE);
+
+	/*
+	 * If the user specified a built-in command line & we didn't already
+	 * prepend it, we append it to boot_command_line here.
+	 */
+	if (IS_ENABLED(CONFIG_CMDLINE_BOOL) &&
+	    !IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND))
+		bootcmdline_append(builtin_cmdline, COMMAND_LINE_SIZE);
+}
 
 /*
  * arch_mem_init - initialize memory management subsystem
@@ -570,48 +647,12 @@ static void __init arch_mem_init(char **cmdline_p)
 {
 	extern void plat_mem_setup(void);
 
-	/*
-	 * Initialize boot_command_line to an innocuous but non-empty string in
-	 * order to prevent early_init_dt_scan_chosen() from copying
-	 * CONFIG_CMDLINE into it without our knowledge. We handle
-	 * CONFIG_CMDLINE ourselves below & don't want to duplicate its
-	 * content because repeating arguments can be problematic.
-	 */
-	strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE);
-
 	/* call board setup routine */
 	plat_mem_setup();
 	memblock_set_bottom_up(true);
 
-#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE)
-	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-#else
-	if ((USE_PROM_CMDLINE && arcs_cmdline[0]) ||
-	    (USE_DTB_CMDLINE && !boot_command_line[0]))
-		strlcpy(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
-
-	if (EXTEND_WITH_PROM && arcs_cmdline[0]) {
-		if (boot_command_line[0])
-			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
-		strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
-	}
-
-#if defined(CONFIG_CMDLINE_BOOL)
-	if (builtin_cmdline[0]) {
-		if (boot_command_line[0])
-			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
-		strlcat(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-	}
-
-	if (BUILTIN_EXTEND_WITH_PROM && arcs_cmdline[0]) {
-		if (boot_command_line[0])
-			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
-		strlcat(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
-	}
-#endif
-#endif
+	bootcmdline_init(cmdline_p);
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
-
 	*cmdline_p = command_line;
 
 	parse_early_param();
-- 
cgit v1.2.3


From df3da04880b45b059b8b064c2dd67289a793109e Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 10 Oct 2019 13:31:59 -0700
Subject: mips: Fix unroll macro when building with Clang

Building with Clang errors after commit 6baaeadae911 ("MIPS: Provide
unroll() macro, use it for cache ops") since the GCC_VERSION macro
is defined in include/linux/compiler-gcc.h, which is only included
in compiler.h when using GCC:

In file included from arch/mips/kernel/mips-mt.c:20:
./arch/mips/include/asm/r4kcache.h:254:1: error: use of undeclared
identifier 'GCC_VERSION'; did you mean 'S_VERSION'?
__BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 32,
)
^
./arch/mips/include/asm/r4kcache.h:219:4: note: expanded from macro
'__BUILD_BLAST_CACHE'
                        cache_unroll(32, kernel_cache, indexop,
                        ^
./arch/mips/include/asm/r4kcache.h:203:2: note: expanded from macro
'cache_unroll'
        unroll(times, _cache_op, insn, op, (addr) + (i++ * (lsize)));
        ^
./arch/mips/include/asm/unroll.h:28:15: note: expanded from macro
'unroll'
        BUILD_BUG_ON(GCC_VERSION >= 40700 &&                    \
                     ^

Use CONFIG_GCC_VERSION, which will always be set by Kconfig.
Additionally, Clang 8 had improvements around __builtin_constant_p so
use that as a lower limit for this check with Clang (although MIPS
wasn't buildable until Clang 9); building a kernel with Clang 9.0.0
has no issues after this change.

Fixes: 6baaeadae911 ("MIPS: Provide unroll() macro, use it for cache ops")
Link: https://github.com/ClangBuiltLinux/linux/issues/736
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: clang-built-linux@googlegroups.com
Cc: Nick Desaulniers <ndesaulniers@google.com>
---
 arch/mips/include/asm/unroll.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h
index df1cdcfc5a47..c628747d4ecd 100644
--- a/arch/mips/include/asm/unroll.h
+++ b/arch/mips/include/asm/unroll.h
@@ -25,7 +25,8 @@
 	 * generate reasonable code for the switch statement,	\
 	 * so we skip the sanity check for those compilers.	\
 	 */							\
-	BUILD_BUG_ON(GCC_VERSION >= 40700 &&			\
+	BUILD_BUG_ON((CONFIG_GCC_VERSION >= 40700 ||		\
+		      CONFIG_CLANG_VERSION >= 80000) &&		\
 		     !__builtin_constant_p(times));		\
 								\
 	switch (times) {					\
-- 
cgit v1.2.3


From b7340422cc16c5deff100812f38114bb5ec81203 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Sat, 12 Oct 2019 20:43:36 +0000
Subject: MIPS: Always define builtin_cmdline

Commit 7784cac69735 ("MIPS: cmdline: Clean up boot_command_line
initialization") made use of builtin_cmdline conditional upon plain C if
statements rather than preprocessor #ifdef's. This caused build failures
for configurations with CONFIG_CMDLINE_BOOL=n where builtin_cmdline
wasn't defined, for example:

   arch/mips/kernel/setup.c: In function 'bootcmdline_init':
>> arch/mips/kernel/setup.c:582:30: error: 'builtin_cmdline' undeclared
    (first use in this function); did you mean 'builtin_driver'?
      strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
                                 ^~~~~~~~~~~~~~~
                                 builtin_driver
   arch/mips/kernel/setup.c:582:30: note: each undeclared identifier is
    reported only once for each function it appears in

Fix this by defining builtin_cmdline as an empty string in the affected
configurations. All of the paths that use it should be optimized out
anyway so the data itself gets optimized away too.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: 7784cac69735 ("MIPS: cmdline: Clean up boot_command_line initialization")
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 4aeba3122972..119999d31558 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -68,6 +68,8 @@ char __initdata arcs_cmdline[COMMAND_LINE_SIZE];
 
 #ifdef CONFIG_CMDLINE_BOOL
 static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+#else
+static const char builtin_cmdline[] __initconst = "";
 #endif
 
 /*
-- 
cgit v1.2.3


From 972727766ee4d9e8b455c09e8dcb1e7dc14c4967 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Sat, 12 Oct 2019 20:43:37 +0000
Subject: MIPS: Fix CONFIG_OF_EARLY_FLATTREE=n builds

Configurations with CONFIG_OF_EARLY_FLATTREE=n fail to build since
commit 7784cac69735 ("MIPS: cmdline: Clean up boot_command_line
initialization") because of_scan_flat_dt() & of_scan_flat_dt() are not
defined in these configurations. Fix this by #ifdef'ing the affected
code...

Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: 7784cac69735 ("MIPS: cmdline: Clean up boot_command_line initialization")
Reported-by: kbuild test robot <lkp@intel.com>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/kernel/setup.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 119999d31558..7ccc8a9e1bfe 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -551,6 +551,8 @@ static void __init bootcmdline_append(const char *s, size_t max)
 	strlcat(boot_command_line, s, max);
 }
 
+#ifdef CONFIG_OF_EARLY_FLATTREE
+
 static int __init bootcmdline_scan_chosen(unsigned long node, const char *uname,
 					  int depth, void *data)
 {
@@ -571,6 +573,8 @@ static int __init bootcmdline_scan_chosen(unsigned long node, const char *uname,
 	return 1;
 }
 
+#endif /* CONFIG_OF_EARLY_FLATTREE */
+
 static void __init bootcmdline_init(char **cmdline_p)
 {
 	bool dt_bootargs = false;
@@ -597,12 +601,14 @@ static void __init bootcmdline_init(char **cmdline_p)
 	else
 		boot_command_line[0] = 0;
 
+#ifdef CONFIG_OF_EARLY_FLATTREE
 	/*
 	 * If we're configured to take boot arguments from DT, look for those
 	 * now.
 	 */
 	if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB))
 		of_scan_flat_dt(bootcmdline_scan_chosen, &dt_bootargs);
+#endif
 
 	/*
 	 * If we didn't get any arguments from DT (regardless of whether that's
-- 
cgit v1.2.3


From 9dd422f69777b928f8a12e5392d4aeeb00a55c2b Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Sat, 12 Oct 2019 20:43:38 +0000
Subject: MIPS: Make builtin_cmdline const & variable length

We have no need for the builtin_cmdline array to be fixed at the length
of COMMAND_LINE_SIZE - we'll only copy out the string it contains up to
its NULL terminator anyway, and cap the size at COMMAND_LINE_SIZE when
copying into or concatenating with boot_command_line.

The string value is also constant, so we can declare it as such to place
it in the .init.rodata section.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 7ccc8a9e1bfe..2af05879772f 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -67,7 +67,7 @@ static char __initdata command_line[COMMAND_LINE_SIZE];
 char __initdata arcs_cmdline[COMMAND_LINE_SIZE];
 
 #ifdef CONFIG_CMDLINE_BOOL
-static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+static const char builtin_cmdline[] __initconst = CONFIG_CMDLINE;
 #else
 static const char builtin_cmdline[] __initconst = "";
 #endif
-- 
cgit v1.2.3


From ce87de45b3243d7023e8a4a76ba004002a7ec087 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 13 Sep 2019 13:55:50 +0100
Subject: arm64: simplify syscall wrapper ifdeffery

Back in commit:

  4378a7d4be30ec69 ("arm64: implement syscall wrappers")

... I implemented the arm64 syscall wrapper glue following the approach
taken on x86. While doing so, I also copied across some ifdeffery that
isn't necessary on arm64.

On arm64 we don't share any of the native wrappers with compat tasks,
and unlike x86 we don't have alternative implementations of
SYSCALL_DEFINE0(), COND_SYSCALL(), or SYS_NI() defined when AArch32
compat support is enabled.

Thus we don't need to prevent multiple definitions of these macros, and
can remove the #ifndef ... #endif guards protecting them. If any of
these had been previously defined elsewhere, syscalls are unlikely to
work correctly, and we'd want the compiler to warn about the multiple
definitions.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/syscall_wrapper.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index 06d880b3526c..b383b4802a7b 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -66,24 +66,18 @@ struct pt_regs;
 	}									\
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
-#ifndef SYSCALL_DEFINE0
 #define SYSCALL_DEFINE0(sname)							\
 	SYSCALL_METADATA(_##sname, 0);						\
 	asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused);	\
 	ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO);			\
 	asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
-#endif
 
-#ifndef COND_SYSCALL
 #define COND_SYSCALL(name)							\
 	asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs)	\
 	{									\
 		return sys_ni_syscall();					\
 	}
-#endif
 
-#ifndef SYS_NI
 #define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
-#endif
 
 #endif /* __ASM_SYSCALL_WRAPPER_H */
-- 
cgit v1.2.3


From c98bd29917281a5023f71a3148f538ad2709c0f0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 9 Aug 2019 14:22:41 +0100
Subject: arm64: errata: use arm_smccc_1_1_get_conduit()

Now that we have arm_smccc_1_1_get_conduit(), we can hide the PSCI
implementation details from the arm64 cpu errata code, so let's do so.

As arm_smccc_1_1_get_conduit() implicitly checks that the SMCCC version
is at least SMCCC_VERSION_1_1, we no longer need to check this
explicitly where switch statements have a default case, e.g. in
has_ssbd_mitigation().

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpu_errata.c | 37 ++++++++++++-------------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index f593f4cffc0d..9c0b011eee20 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/arm-smccc.h>
-#include <linux/psci.h>
 #include <linux/types.h>
 #include <linux/cpu.h>
 #include <asm/cpu.h>
@@ -166,9 +165,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
 }
 #endif	/* CONFIG_KVM_INDIRECT_VECTORS */
 
-#include <uapi/linux/psci.h>
 #include <linux/arm-smccc.h>
-#include <linux/psci.h>
 
 static void call_smc_arch_workaround_1(void)
 {
@@ -212,11 +209,8 @@ static int detect_harden_bp_fw(void)
 	struct arm_smccc_res res;
 	u32 midr = read_cpuid_id();
 
-	if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
-		return -1;
-
-	switch (psci_ops.conduit) {
-	case PSCI_CONDUIT_HVC:
+	switch (arm_smccc_1_1_get_conduit()) {
+	case SMCCC_CONDUIT_HVC:
 		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 		switch ((int)res.a0) {
@@ -234,7 +228,7 @@ static int detect_harden_bp_fw(void)
 		}
 		break;
 
-	case PSCI_CONDUIT_SMC:
+	case SMCCC_CONDUIT_SMC:
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 		switch ((int)res.a0) {
@@ -308,11 +302,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt,
 
 	BUG_ON(nr_inst != 1);
 
-	switch (psci_ops.conduit) {
-	case PSCI_CONDUIT_HVC:
+	switch (arm_smccc_1_1_get_conduit()) {
+	case SMCCC_CONDUIT_HVC:
 		insn = aarch64_insn_get_hvc_value();
 		break;
-	case PSCI_CONDUIT_SMC:
+	case SMCCC_CONDUIT_SMC:
 		insn = aarch64_insn_get_smc_value();
 		break;
 	default:
@@ -351,12 +345,12 @@ void arm64_set_ssbd_mitigation(bool state)
 		return;
 	}
 
-	switch (psci_ops.conduit) {
-	case PSCI_CONDUIT_HVC:
+	switch (arm_smccc_1_1_get_conduit()) {
+	case SMCCC_CONDUIT_HVC:
 		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
 		break;
 
-	case PSCI_CONDUIT_SMC:
+	case SMCCC_CONDUIT_SMC:
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
 		break;
 
@@ -390,20 +384,13 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 		goto out_printmsg;
 	}
 
-	if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
-		ssbd_state = ARM64_SSBD_UNKNOWN;
-		if (!this_cpu_safe)
-			__ssb_safe = false;
-		return false;
-	}
-
-	switch (psci_ops.conduit) {
-	case PSCI_CONDUIT_HVC:
+	switch (arm_smccc_1_1_get_conduit()) {
+	case SMCCC_CONDUIT_HVC:
 		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
 		break;
 
-	case PSCI_CONDUIT_SMC:
+	case SMCCC_CONDUIT_SMC:
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
 		break;
-- 
cgit v1.2.3


From 6848253ddeae9fa44680bab6212599283f9d4ef2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 9 Aug 2019 14:22:42 +0100
Subject: arm: spectre-v2: use arm_smccc_1_1_get_conduit()

Now that we have arm_smccc_1_1_get_conduit(), we can hide the PSCI
implementation details from the arm spectre-v2 code, so let's do so.

As arm_smccc_1_1_get_conduit() implicitly checks that the SMCCC version
is at least SMCCC_VERSION_1_1, we no longer need to check this
explicitly where switch statements have a default case.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mm/proc-v7-bugs.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 9a07916af8dd..54d87506d3b5 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/arm-smccc.h>
 #include <linux/kernel.h>
-#include <linux/psci.h>
 #include <linux/smp.h>
 
 #include <asm/cp15.h>
@@ -75,11 +74,8 @@ static void cpu_v7_spectre_init(void)
 	case ARM_CPU_PART_CORTEX_A72: {
 		struct arm_smccc_res res;
 
-		if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
-			break;
-
-		switch (psci_ops.conduit) {
-		case PSCI_CONDUIT_HVC:
+		switch (arm_smccc_1_1_get_conduit()) {
+		case SMCCC_CONDUIT_HVC:
 			arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 			if ((int)res.a0 != 0)
@@ -90,7 +86,7 @@ static void cpu_v7_spectre_init(void)
 			spectre_v2_method = "hypervisor";
 			break;
 
-		case PSCI_CONDUIT_SMC:
+		case SMCCC_CONDUIT_SMC:
 			arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 			if ((int)res.a0 != 0)
-- 
cgit v1.2.3


From e6ea46511b1ae8c4491904c79411fcd29139af14 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 9 Aug 2019 14:22:44 +0100
Subject: firmware: arm_sdei: use common SMCCC_CONDUIT_*

Now that we have common definitions for SMCCC conduits, move the SDEI
code over to them, and remove the SDEI-specific definitions.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: James Morse <james.morse@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/sdei.c    |  3 ++-
 drivers/firmware/arm_sdei.c | 12 ++++++------
 include/linux/arm_sdei.h    |  6 ------
 3 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index ea94cf8f9dc6..d6259dac62b6 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -2,6 +2,7 @@
 // Copyright (C) 2017 Arm Ltd.
 #define pr_fmt(fmt) "sdei: " fmt
 
+#include <linux/arm-smccc.h>
 #include <linux/arm_sdei.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
@@ -161,7 +162,7 @@ unsigned long sdei_arch_get_entry_point(int conduit)
 			return 0;
 	}
 
-	sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+	sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	if (arm64_kernel_unmapped_at_el0()) {
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 9cd70d1a5622..a479023fa036 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -967,29 +967,29 @@ static int sdei_get_conduit(struct platform_device *pdev)
 	if (np) {
 		if (of_property_read_string(np, "method", &method)) {
 			pr_warn("missing \"method\" property\n");
-			return CONDUIT_INVALID;
+			return SMCCC_CONDUIT_NONE;
 		}
 
 		if (!strcmp("hvc", method)) {
 			sdei_firmware_call = &sdei_smccc_hvc;
-			return CONDUIT_HVC;
+			return SMCCC_CONDUIT_HVC;
 		} else if (!strcmp("smc", method)) {
 			sdei_firmware_call = &sdei_smccc_smc;
-			return CONDUIT_SMC;
+			return SMCCC_CONDUIT_SMC;
 		}
 
 		pr_warn("invalid \"method\" property: %s\n", method);
 	} else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) {
 		if (acpi_psci_use_hvc()) {
 			sdei_firmware_call = &sdei_smccc_hvc;
-			return CONDUIT_HVC;
+			return SMCCC_CONDUIT_HVC;
 		} else {
 			sdei_firmware_call = &sdei_smccc_smc;
-			return CONDUIT_SMC;
+			return SMCCC_CONDUIT_SMC;
 		}
 	}
 
-	return CONDUIT_INVALID;
+	return SMCCC_CONDUIT_NONE;
 }
 
 static int sdei_probe(struct platform_device *pdev)
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 3305ea7f9dc7..0a241c5c911d 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -5,12 +5,6 @@
 
 #include <uapi/linux/arm_sdei.h>
 
-enum sdei_conduit_types {
-	CONDUIT_INVALID = 0,
-	CONDUIT_SMC,
-	CONDUIT_HVC,
-};
-
 #include <acpi/ghes.h>
 
 #ifdef CONFIG_ARM_SDE_INTERFACE
-- 
cgit v1.2.3


From ae970dc096b2d39f65f2e18d142e3978dc9ee1c7 Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Date: Wed, 11 Sep 2019 20:25:43 +0200
Subject: arm64: mm: use arm64_dma_phys_limit instead of calling
 max_zone_dma_phys()

By the time we call zones_sizes_init() arm64_dma_phys_limit already
contains the result of max_zone_dma_phys(). We use the variable instead
of calling the function directly to save some precious cpu time.

Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 45c00a54909c..098c0f5bedf6 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -187,7 +187,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
 
 #ifdef CONFIG_ZONE_DMA32
-	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
+	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma_phys_limit);
 #endif
 	max_zone_pfns[ZONE_NORMAL] = max;
 
-- 
cgit v1.2.3


From a573cdd7973dedd87e62196c400332896bb236c8 Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Date: Wed, 11 Sep 2019 20:25:44 +0200
Subject: arm64: rename variables used to calculate ZONE_DMA32's size

Let the name indicate that they are used to calculate ZONE_DMA32's size
as opposed to ZONE_DMA.

Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 098c0f5bedf6..8e9bc64c5878 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -56,7 +56,7 @@ EXPORT_SYMBOL(physvirt_offset);
 struct page *vmemmap __ro_after_init;
 EXPORT_SYMBOL(vmemmap);
 
-phys_addr_t arm64_dma_phys_limit __ro_after_init;
+phys_addr_t arm64_dma32_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
 /*
@@ -174,7 +174,7 @@ static void __init reserve_elfcorehdr(void)
  * currently assumes that for memory starting above 4G, 32-bit devices will
  * use a DMA offset.
  */
-static phys_addr_t __init max_zone_dma_phys(void)
+static phys_addr_t __init max_zone_dma32_phys(void)
 {
 	phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
 	return min(offset + (1ULL << 32), memblock_end_of_DRAM());
@@ -187,7 +187,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
 
 #ifdef CONFIG_ZONE_DMA32
-	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma_phys_limit);
+	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
 #endif
 	max_zone_pfns[ZONE_NORMAL] = max;
 
@@ -200,16 +200,16 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
 	struct memblock_region *reg;
 	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
-	unsigned long max_dma = min;
+	unsigned long max_dma32 = min;
 
 	memset(zone_size, 0, sizeof(zone_size));
 
 	/* 4GB maximum for 32-bit only capable devices */
 #ifdef CONFIG_ZONE_DMA32
-	max_dma = PFN_DOWN(arm64_dma_phys_limit);
-	zone_size[ZONE_DMA32] = max_dma - min;
+	max_dma32 = PFN_DOWN(arm64_dma32_phys_limit);
+	zone_size[ZONE_DMA32] = max_dma32 - min;
 #endif
-	zone_size[ZONE_NORMAL] = max - max_dma;
+	zone_size[ZONE_NORMAL] = max - max_dma32;
 
 	memcpy(zhole_size, zone_size, sizeof(zhole_size));
 
@@ -221,14 +221,14 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 			continue;
 
 #ifdef CONFIG_ZONE_DMA32
-		if (start < max_dma) {
-			unsigned long dma_end = min(end, max_dma);
+		if (start < max_dma32) {
+			unsigned long dma_end = min(end, max_dma32);
 			zhole_size[ZONE_DMA32] -= dma_end - start;
 		}
 #endif
-		if (end > max_dma) {
+		if (end > max_dma32) {
 			unsigned long normal_end = min(end, max);
-			unsigned long normal_start = max(start, max_dma);
+			unsigned long normal_start = max(start, max_dma32);
 			zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
 		}
 	}
@@ -420,9 +420,9 @@ void __init arm64_memblock_init(void)
 
 	/* 4GB maximum for 32-bit only capable devices */
 	if (IS_ENABLED(CONFIG_ZONE_DMA32))
-		arm64_dma_phys_limit = max_zone_dma_phys();
+		arm64_dma32_phys_limit = max_zone_dma32_phys();
 	else
-		arm64_dma_phys_limit = PHYS_MASK + 1;
+		arm64_dma32_phys_limit = PHYS_MASK + 1;
 
 	reserve_crashkernel();
 
@@ -430,7 +430,7 @@ void __init arm64_memblock_init(void)
 
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
 
-	dma_contiguous_reserve(arm64_dma_phys_limit);
+	dma_contiguous_reserve(arm64_dma32_phys_limit);
 }
 
 void __init bootmem_init(void)
@@ -534,7 +534,7 @@ static void __init free_unused_memmap(void)
 void __init mem_init(void)
 {
 	if (swiotlb_force == SWIOTLB_FORCE ||
-	    max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+	    max_pfn > (arm64_dma32_phys_limit >> PAGE_SHIFT))
 		swiotlb_init(1);
 	else
 		swiotlb_force = SWIOTLB_NO_FORCE;
-- 
cgit v1.2.3


From 1a8e1cef7603e218339ac63cb3178b25554524e5 Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Date: Wed, 11 Sep 2019 20:25:45 +0200
Subject: arm64: use both ZONE_DMA and ZONE_DMA32

So far all arm64 devices have supported 32 bit DMA masks for their
peripherals. This is not true anymore for the Raspberry Pi 4 as most of
it's peripherals can only address the first GB of memory on a total of
up to 4 GB.

This goes against ZONE_DMA32's intent, as it's expected for ZONE_DMA32
to be addressable with a 32 bit mask. So it was decided to re-introduce
ZONE_DMA in arm64.

ZONE_DMA will contain the lower 1G of memory, which is currently the
memory area addressable by any peripheral on an arm64 device.
ZONE_DMA32 will contain the rest of the 32 bit addressable memory.

Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig            |  4 ++++
 arch/arm64/include/asm/page.h |  2 ++
 arch/arm64/mm/init.c          | 54 +++++++++++++++++++++++++++++++------------
 3 files changed, 45 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 950a56b71ff0..1b6ea5a9d1a6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -266,6 +266,10 @@ config GENERIC_CSUM
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
 
+config ZONE_DMA
+	bool "Support DMA zone" if EXPERT
+	default y
+
 config ZONE_DMA32
 	bool "Support DMA32 zone" if EXPERT
 	default y
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index d39ddb258a04..7b8c98830101 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -38,4 +38,6 @@ extern int pfn_valid(unsigned long);
 
 #include <asm-generic/getorder.h>
 
+#define ARCH_ZONE_DMA_BITS 30
+
 #endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 8e9bc64c5878..44f07fdf7a59 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -56,6 +56,13 @@ EXPORT_SYMBOL(physvirt_offset);
 struct page *vmemmap __ro_after_init;
 EXPORT_SYMBOL(vmemmap);
 
+/*
+ * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
+ * memory as some devices, namely the Raspberry Pi 4, have peripherals with
+ * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
+ * bit addressable memory area.
+ */
+phys_addr_t arm64_dma_phys_limit __ro_after_init;
 phys_addr_t arm64_dma32_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
@@ -169,15 +176,16 @@ static void __init reserve_elfcorehdr(void)
 {
 }
 #endif /* CONFIG_CRASH_DUMP */
+
 /*
- * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It
- * currently assumes that for memory starting above 4G, 32-bit devices will
- * use a DMA offset.
+ * Return the maximum physical address for a zone with a given address size
+ * limit. It currently assumes that for memory starting above 4G, 32-bit
+ * devices will use a DMA offset.
  */
-static phys_addr_t __init max_zone_dma32_phys(void)
+static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
 {
-	phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
-	return min(offset + (1ULL << 32), memblock_end_of_DRAM());
+	phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits);
+	return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
 }
 
 #ifdef CONFIG_NUMA
@@ -186,6 +194,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
 
+#ifdef CONFIG_ZONE_DMA
+	max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
+#endif
 #ifdef CONFIG_ZONE_DMA32
 	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
 #endif
@@ -201,13 +212,18 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	struct memblock_region *reg;
 	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
 	unsigned long max_dma32 = min;
+	unsigned long max_dma = min;
 
 	memset(zone_size, 0, sizeof(zone_size));
 
-	/* 4GB maximum for 32-bit only capable devices */
+#ifdef CONFIG_ZONE_DMA
+	max_dma = PFN_DOWN(arm64_dma_phys_limit);
+	zone_size[ZONE_DMA] = max_dma - min;
+	max_dma32 = max_dma;
+#endif
 #ifdef CONFIG_ZONE_DMA32
 	max_dma32 = PFN_DOWN(arm64_dma32_phys_limit);
-	zone_size[ZONE_DMA32] = max_dma32 - min;
+	zone_size[ZONE_DMA32] = max_dma32 - max_dma;
 #endif
 	zone_size[ZONE_NORMAL] = max - max_dma32;
 
@@ -219,11 +235,17 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 
 		if (start >= max)
 			continue;
-
+#ifdef CONFIG_ZONE_DMA
+		if (start < max_dma) {
+			unsigned long dma_end = min_not_zero(end, max_dma);
+			zhole_size[ZONE_DMA] -= dma_end - start;
+		}
+#endif
 #ifdef CONFIG_ZONE_DMA32
 		if (start < max_dma32) {
-			unsigned long dma_end = min(end, max_dma32);
-			zhole_size[ZONE_DMA32] -= dma_end - start;
+			unsigned long dma32_end = min(end, max_dma32);
+			unsigned long dma32_start = max(start, max_dma);
+			zhole_size[ZONE_DMA32] -= dma32_end - dma32_start;
 		}
 #endif
 		if (end > max_dma32) {
@@ -418,9 +440,11 @@ void __init arm64_memblock_init(void)
 
 	early_init_fdt_scan_reserved_mem();
 
-	/* 4GB maximum for 32-bit only capable devices */
+	if (IS_ENABLED(CONFIG_ZONE_DMA))
+		arm64_dma_phys_limit = max_zone_phys(ARCH_ZONE_DMA_BITS);
+
 	if (IS_ENABLED(CONFIG_ZONE_DMA32))
-		arm64_dma32_phys_limit = max_zone_dma32_phys();
+		arm64_dma32_phys_limit = max_zone_phys(32);
 	else
 		arm64_dma32_phys_limit = PHYS_MASK + 1;
 
@@ -430,7 +454,7 @@ void __init arm64_memblock_init(void)
 
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
 
-	dma_contiguous_reserve(arm64_dma32_phys_limit);
+	dma_contiguous_reserve(arm64_dma_phys_limit ? : arm64_dma32_phys_limit);
 }
 
 void __init bootmem_init(void)
@@ -534,7 +558,7 @@ static void __init free_unused_memmap(void)
 void __init mem_init(void)
 {
 	if (swiotlb_force == SWIOTLB_FORCE ||
-	    max_pfn > (arm64_dma32_phys_limit >> PAGE_SHIFT))
+	    max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit))
 		swiotlb_init(1);
 	else
 		swiotlb_force = SWIOTLB_NO_FORCE;
-- 
cgit v1.2.3


From f226650494c6aa87526d12135b7de8b8c074f3de Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 2 Oct 2019 10:06:12 +0100
Subject: arm64: Relax ICC_PMR_EL1 accesses when ICC_CTLR_EL1.PMHE is clear

The GICv3 architecture specification is incredibly misleading when it
comes to PMR and the requirement for a DSB. It turns out that this DSB
is only required if the CPU interface sends an Upstream Control
message to the redistributor in order to update the RD's view of PMR.

This message is only sent when ICC_CTLR_EL1.PMHE is set, which isn't
the case in Linux. It can still be set from EL3, so some special care
is required. But the upshot is that in the (hopefuly large) majority
of the cases, we can drop the DSB altogether.

This relies on a new static key being set if the boot CPU has PMHE
set. The drawback is that this static key has to be exported to
modules.

Cc: Will Deacon <will@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/barrier.h   | 12 ++++++++++++
 arch/arm64/include/asm/daifflags.h |  3 ++-
 arch/arm64/include/asm/irqflags.h  | 19 ++++++++++---------
 arch/arm64/include/asm/kvm_host.h  |  3 +--
 arch/arm64/kernel/entry.S          |  6 ++++--
 arch/arm64/kvm/hyp/switch.c        |  4 ++--
 drivers/irqchip/irq-gic-v3.c       | 20 ++++++++++++++++++++
 include/linux/irqchip/arm-gic-v3.h |  2 ++
 8 files changed, 53 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index e0e2b1946f42..7d9cc5ec4971 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -29,6 +29,18 @@
 						 SB_BARRIER_INSN"nop\n",	\
 						 ARM64_HAS_SB))
 
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#define pmr_sync()						\
+	do {							\
+		extern struct static_key_false gic_pmr_sync;	\
+								\
+		if (static_branch_unlikely(&gic_pmr_sync))	\
+			dsb(sy);				\
+	} while(0)
+#else
+#define pmr_sync()	do {} while (0)
+#endif
+
 #define mb()		dsb(sy)
 #define rmb()		dsb(ld)
 #define wmb()		dsb(st)
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 063c964af705..53cd5fab79a8 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -8,6 +8,7 @@
 #include <linux/irqflags.h>
 
 #include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
 #include <asm/cpufeature.h>
 
 #define DAIF_PROCCTX		0
@@ -65,7 +66,7 @@ static inline void local_daif_restore(unsigned long flags)
 
 		if (system_uses_irq_prio_masking()) {
 			gic_write_pmr(GIC_PRIO_IRQON);
-			dsb(sy);
+			pmr_sync();
 		}
 	} else if (system_uses_irq_prio_masking()) {
 		u64 pmr;
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 1a59f0ed1ae3..aa4b6521ef14 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -6,6 +6,7 @@
 #define __ASM_IRQFLAGS_H
 
 #include <asm/alternative.h>
+#include <asm/barrier.h>
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
@@ -34,14 +35,14 @@ static inline void arch_local_irq_enable(void)
 	}
 
 	asm volatile(ALTERNATIVE(
-		"msr	daifclr, #2		// arch_local_irq_enable\n"
-		"nop",
-		__msr_s(SYS_ICC_PMR_EL1, "%0")
-		"dsb	sy",
+		"msr	daifclr, #2		// arch_local_irq_enable",
+		__msr_s(SYS_ICC_PMR_EL1, "%0"),
 		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
 		: "r" ((unsigned long) GIC_PRIO_IRQON)
 		: "memory");
+
+	pmr_sync();
 }
 
 static inline void arch_local_irq_disable(void)
@@ -116,14 +117,14 @@ static inline unsigned long arch_local_irq_save(void)
 static inline void arch_local_irq_restore(unsigned long flags)
 {
 	asm volatile(ALTERNATIVE(
-			"msr	daif, %0\n"
-			"nop",
-			__msr_s(SYS_ICC_PMR_EL1, "%0")
-			"dsb	sy",
-			ARM64_HAS_IRQ_PRIO_MASKING)
+		"msr	daif, %0",
+		__msr_s(SYS_ICC_PMR_EL1, "%0"),
+		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
 		: "r" (flags)
 		: "memory");
+
+	pmr_sync();
 }
 
 #endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f656169db8c3..5ecb091c8576 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -600,8 +600,7 @@ static inline void kvm_arm_vhe_guest_enter(void)
 	 * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
 	 * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
 	 */
-	if (system_uses_irq_prio_masking())
-		dsb(sy);
+	pmr_sync();
 }
 
 static inline void kvm_arm_vhe_guest_exit(void)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e304fe04b098..0a44f21bf087 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -269,8 +269,10 @@ alternative_else_nop_endif
 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	ldr	x20, [sp, #S_PMR_SAVE]
 	msr_s	SYS_ICC_PMR_EL1, x20
-	/* Ensure priority change is seen by redistributor */
-	dsb	sy
+	mrs_s	x21, SYS_ICC_CTLR_EL1
+	tbz	x21, #6, .L__skip_pmr_sync\@	// Check for ICC_CTLR_EL1.PMHE
+	dsb	sy				// Ensure priority change is seen by redistributor
+.L__skip_pmr_sync\@:
 alternative_else_nop_endif
 
 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 3d3815020e36..402f18664f25 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -12,7 +12,7 @@
 
 #include <kvm/arm_psci.h>
 
-#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
 #include <asm/cpufeature.h>
 #include <asm/kprobes.h>
 #include <asm/kvm_asm.h>
@@ -592,7 +592,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 	 */
 	if (system_uses_irq_prio_masking()) {
 		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-		dsb(sy);
+		pmr_sync();
 	}
 
 	vcpu = kern_hyp_va(vcpu);
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 422664ac5f53..0abc5a13adaa 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -87,6 +87,15 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
  */
 static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
 
+/*
+ * Global static key controlling whether an update to PMR allowing more
+ * interrupts requires to be propagated to the redistributor (DSB SY).
+ * And this needs to be exported for modules to be able to enable
+ * interrupts...
+ */
+DEFINE_STATIC_KEY_FALSE(gic_pmr_sync);
+EXPORT_SYMBOL(gic_pmr_sync);
+
 /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
 static refcount_t *ppi_nmi_refs;
 
@@ -1502,6 +1511,17 @@ static void gic_enable_nmi_support(void)
 	for (i = 0; i < gic_data.ppi_nr; i++)
 		refcount_set(&ppi_nmi_refs[i], 0);
 
+	/*
+	 * Linux itself doesn't use 1:N distribution, so has no need to
+	 * set PMHE. The only reason to have it set is if EL3 requires it
+	 * (and we can't change it).
+	 */
+	if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK)
+		static_branch_enable(&gic_pmr_sync);
+
+	pr_info("%s ICC_PMR_EL1 synchronisation\n",
+		static_branch_unlikely(&gic_pmr_sync) ? "Forcing" : "Relaxing");
+
 	static_branch_enable(&supports_pseudo_nmis);
 
 	if (static_branch_likely(&supports_deactivate_key))
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 5cc10cf7cb3e..a0bde9e12efa 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -487,6 +487,8 @@
 #define ICC_CTLR_EL1_EOImode_MASK	(1 << ICC_CTLR_EL1_EOImode_SHIFT)
 #define ICC_CTLR_EL1_CBPR_SHIFT		0
 #define ICC_CTLR_EL1_CBPR_MASK		(1 << ICC_CTLR_EL1_CBPR_SHIFT)
+#define ICC_CTLR_EL1_PMHE_SHIFT		6
+#define ICC_CTLR_EL1_PMHE_MASK		(1 << ICC_CTLR_EL1_PMHE_SHIFT)
 #define ICC_CTLR_EL1_PRI_BITS_SHIFT	8
 #define ICC_CTLR_EL1_PRI_BITS_MASK	(0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
 #define ICC_CTLR_EL1_ID_BITS_SHIFT	11
-- 
cgit v1.2.3


From 899ee4afe5eb262236717188ccdaa0192c00dc5a Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Sat, 28 Sep 2019 11:02:26 +0300
Subject: arm64: use generic free_initrd_mem()

arm64 calls memblock_free() for the initrd area in its implementation of
free_initrd_mem(), but this call has no actual effect that late in the boot
process. By the time initrd is freed, all the reserved memory is managed by
the page allocator and the memblock.reserved is unused, so the only purpose
of the memblock_free() call is to keep track of initrd memory for debugging
and accounting.

Without the memblock_free() call the only difference between arm64 and the
generic versions of free_initrd_mem() is the memory poisoning.

Move memblock_free() call to the generic code, enable it there
for the architectures that define ARCH_KEEP_MEMBLOCK and use the generic
implementation of free_initrd_mem() on arm64.

Tested-by: Anshuman Khandual <anshuman.khandual@arm.com>	#arm64
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 12 ------------
 init/initramfs.c     |  8 ++++++++
 2 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 45c00a54909c..87a0e3b6c146 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -580,18 +580,6 @@ void free_initmem(void)
 	unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init free_initrd_mem(unsigned long start, unsigned long end)
-{
-	unsigned long aligned_start, aligned_end;
-
-	aligned_start = __virt_to_phys(start) & PAGE_MASK;
-	aligned_end = PAGE_ALIGN(__virt_to_phys(end));
-	memblock_free(aligned_start, aligned_end - aligned_start);
-	free_reserved_area((void *)start, (void *)end, 0, "initrd");
-}
-#endif
-
 /*
  * Dump out memory limit information on panic.
  */
diff --git a/init/initramfs.c b/init/initramfs.c
index c47dad0884f7..8ec1be4d7d51 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -10,6 +10,7 @@
 #include <linux/syscalls.h>
 #include <linux/utime.h>
 #include <linux/file.h>
+#include <linux/memblock.h>
 
 static ssize_t __init xwrite(int fd, const char *p, size_t count)
 {
@@ -529,6 +530,13 @@ extern unsigned long __initramfs_size;
 
 void __weak free_initrd_mem(unsigned long start, unsigned long end)
 {
+#ifdef CONFIG_ARCH_KEEP_MEMBLOCK
+	unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE);
+	unsigned long aligned_end = ALIGN(end, PAGE_SIZE);
+
+	memblock_free(__pa(aligned_start), aligned_end - aligned_start);
+#endif
+
 	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
 			"initrd");
 }
-- 
cgit v1.2.3


From 6ec939f8b809cb06ba7802e17ef7024d1bc0ee84 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 4 Oct 2019 09:53:58 +0530
Subject: arm64/mm: Poison initmem while freeing with free_reserved_area()

Platform implementation for free_initmem() should poison the memory while
freeing it up. Hence pass across POISON_FREE_INITMEM while calling into
free_reserved_area(). The same is being followed in the generic fallback
for free_initmem() and some other platforms overriding it.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 87a0e3b6c146..7c225d0132b8 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -571,7 +571,7 @@ void free_initmem(void)
 {
 	free_reserved_area(lm_alias(__init_begin),
 			   lm_alias(__init_end),
-			   0, "unused kernel");
+			   POISON_FREE_INITMEM, "unused kernel");
 	/*
 	 * Unmap the __init region but leave the VM area in place. This
 	 * prevents the region from being reused for kernel modules, which
-- 
cgit v1.2.3


From 4399d430700d3974ed6c5a1b1380bc6527f17e99 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Wed, 16 Oct 2019 07:47:14 -0700
Subject: arm64: mm: Fix unused variable warning in zone_sizes_init

When building arm64 allnoconfig, CONFIG_ZONE_DMA and CONFIG_ZONE_DMA32
get disabled so there is a warning about max_dma being unused.

../arch/arm64/mm/init.c:215:16: warning: unused variable 'max_dma'
[-Wunused-variable]
        unsigned long max_dma = min;
                      ^
1 warning generated.

Add __maybe_unused to make this clear to the compiler.

Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32")
Reviewed-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 44f07fdf7a59..71b45c58218b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -212,7 +212,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	struct memblock_region *reg;
 	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
 	unsigned long max_dma32 = min;
-	unsigned long max_dma = min;
+	unsigned long __maybe_unused max_dma = min;
 
 	memset(zone_size, 0, sizeof(zone_size));
 
-- 
cgit v1.2.3


From 9c3bafaa1fd88e4dd2dba3735a1f1abb0f2c7bb7 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <bberg@redhat.com>
Date: Wed, 9 Oct 2019 17:54:24 +0200
Subject: x86/mce: Lower throttling MCE messages' priority to warning

On modern CPUs it is quite normal that the temperature limits are
reached and the CPU is throttled. In fact, often the thermal design is
not sufficient to cool the CPU at full load and limits can quickly be
reached when a burst in load happens. This will even happen with
technologies like RAPL limitting the long term power consumption of
the package.

Also, these limits are "softer", as Srinivas explains:

"CPU temperature doesn't have to hit max(TjMax) to get these warnings.
OEMs ha[ve] an ability to program a threshold where a thermal interrupt
can be generated. In some systems the offset is 20C+ (Read only value).

In recent systems, there is another offset on top of it which can be
programmed by OS, once some agent can adjust power limits dynamically.
By default this is set to low by the firmware, which I guess the
prime motivation of Benjamin to submit the patch."

So these messages do not usually indicate a hardware issue (e.g.
insufficient cooling). Log them as warnings to avoid confusion about
their severity.

 [ bp: Massage commit mesage. ]

Signed-off-by: Benjamin Berg <bberg@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Christian Kellner <ckellner@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20191009155424.249277-1-bberg@redhat.com
---
 arch/x86/kernel/cpu/mce/therm_throt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index 6e2becf547c5..bc441d68d060 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -188,7 +188,7 @@ static void therm_throt_process(bool new_event, int event, int level)
 	/* if we just entered the thermal event */
 	if (new_event) {
 		if (event == THERMAL_THROTTLING_EVENT)
-			pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
+			pr_warn("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
 				this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package",
 				state->count);
-- 
cgit v1.2.3


From 3dec541b2e632d630fe7142ed44f0b3702ef1f8c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 15 Oct 2019 20:25:03 -0700
Subject: bpf: Add support for BTF pointers to x86 JIT

Pointer to BTF object is a pointer to kernel object or NULL.
Such pointers can only be used by BPF_LDX instructions.
The verifier changed their opcode from LDX|MEM|size
to LDX|PROBE_MEM|size to make JITing easier.
The number of entries in extable is the number of BPF_LDX insns
that access kernel memory via "pointer to BTF type".
Only these load instructions can fault.
Since x86 extable is relative it has to be allocated in the same
memory region as JITed code.
Allocate it prior to last pass of JITing and let the last pass populate it.
Pointer to extable in bpf_prog_aux is necessary to make page fault
handling fast.
Page fault handling is done in two steps:
1. bpf_prog_kallsyms_find() finds BPF program that page faulted.
   It's done by walking rb tree.
2. then extable for given bpf program is binary searched.
This process is similar to how page faulting is done for kernel modules.
The exception handler skips over faulting x86 instruction and
initializes destination register with zero. This mimics exact
behavior of bpf_probe_read (when probe_kernel_read faults dest is zeroed).

JITs for other architectures can add support in similar way.
Until then they will reject unknown opcode and fallback to interpreter.

Since extable should be aligned and placed near JITed code
make bpf_jit_binary_alloc() return 4 byte aligned image offset,
so that extable aligning formula in bpf_int_jit_compile() doesn't need
to rely on internal implementation of bpf_jit_binary_alloc().
On x86 gcc defaults to 16-byte alignment for regular kernel functions
due to better performance. JITed code may be aligned to 16 in the future,
but it will use 4 in the meantime.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20191016032505.2089704-10-ast@kernel.org
---
 arch/x86/net/bpf_jit_comp.c | 97 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/bpf.h         |  3 ++
 include/linux/extable.h     | 10 +++++
 kernel/bpf/core.c           | 20 +++++++++-
 kernel/bpf/verifier.c       |  1 +
 kernel/extable.c            |  2 +
 6 files changed, 128 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 3ad2ba1ad855..8cd23d8309bf 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,7 +9,7 @@
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
-
+#include <asm/extable.h>
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
 
@@ -123,6 +123,19 @@ static const int reg2hex[] = {
 	[AUX_REG] = 3,    /* R11 temp register */
 };
 
+static const int reg2pt_regs[] = {
+	[BPF_REG_0] = offsetof(struct pt_regs, ax),
+	[BPF_REG_1] = offsetof(struct pt_regs, di),
+	[BPF_REG_2] = offsetof(struct pt_regs, si),
+	[BPF_REG_3] = offsetof(struct pt_regs, dx),
+	[BPF_REG_4] = offsetof(struct pt_regs, cx),
+	[BPF_REG_5] = offsetof(struct pt_regs, r8),
+	[BPF_REG_6] = offsetof(struct pt_regs, bx),
+	[BPF_REG_7] = offsetof(struct pt_regs, r13),
+	[BPF_REG_8] = offsetof(struct pt_regs, r14),
+	[BPF_REG_9] = offsetof(struct pt_regs, r15),
+};
+
 /*
  * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
  * which need extra byte of encoding.
@@ -377,6 +390,19 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
 	*pprog = prog;
 }
 
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+			   struct pt_regs *regs, int trapnr,
+			   unsigned long error_code, unsigned long fault_addr)
+{
+	u32 reg = x->fixup >> 8;
+
+	/* jump over faulting load and clear dest register */
+	*(unsigned long *)((void *)regs + reg) = 0;
+	regs->ip += x->fixup & 0xff;
+	return true;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
@@ -384,7 +410,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 	int insn_cnt = bpf_prog->len;
 	bool seen_exit = false;
 	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
-	int i, cnt = 0;
+	int i, cnt = 0, excnt = 0;
 	int proglen = 0;
 	u8 *prog = temp;
 
@@ -778,14 +804,17 @@ stx:			if (is_imm8(insn->off))
 
 			/* LDX: dst_reg = *(u8*)(src_reg + off) */
 		case BPF_LDX | BPF_MEM | BPF_B:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_B:
 			/* Emit 'movzx rax, byte ptr [rax + off]' */
 			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
 			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_H:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_H:
 			/* Emit 'movzx rax, word ptr [rax + off]' */
 			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
 			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_W:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 			/* Emit 'mov eax, dword ptr [rax+0x14]' */
 			if (is_ereg(dst_reg) || is_ereg(src_reg))
 				EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
@@ -793,6 +822,7 @@ stx:			if (is_imm8(insn->off))
 				EMIT1(0x8B);
 			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_DW:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
 			/* Emit 'mov rax, qword ptr [rax+0x14]' */
 			EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
 ldx:			/*
@@ -805,6 +835,48 @@ ldx:			/*
 			else
 				EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
 					    insn->off);
+			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+				struct exception_table_entry *ex;
+				u8 *_insn = image + proglen;
+				s64 delta;
+
+				if (!bpf_prog->aux->extable)
+					break;
+
+				if (excnt >= bpf_prog->aux->num_exentries) {
+					pr_err("ex gen bug\n");
+					return -EFAULT;
+				}
+				ex = &bpf_prog->aux->extable[excnt++];
+
+				delta = _insn - (u8 *)&ex->insn;
+				if (!is_simm32(delta)) {
+					pr_err("extable->insn doesn't fit into 32-bit\n");
+					return -EFAULT;
+				}
+				ex->insn = delta;
+
+				delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+				if (!is_simm32(delta)) {
+					pr_err("extable->handler doesn't fit into 32-bit\n");
+					return -EFAULT;
+				}
+				ex->handler = delta;
+
+				if (dst_reg > BPF_REG_9) {
+					pr_err("verifier error\n");
+					return -EFAULT;
+				}
+				/*
+				 * Compute size of x86 insn and its target dest x86 register.
+				 * ex_handler_bpf() will use lower 8 bits to adjust
+				 * pt_regs->ip to jump over this x86 instruction
+				 * and upper bits to figure out which pt_regs to zero out.
+				 * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
+				 * of 4 bytes will be ignored and rbx will be zero inited.
+				 */
+				ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
+			}
 			break;
 
 			/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
@@ -1058,6 +1130,11 @@ emit_jmp:
 		addrs[i] = proglen;
 		prog = temp;
 	}
+
+	if (image && excnt != bpf_prog->aux->num_exentries) {
+		pr_err("extable is not populated\n");
+		return -EFAULT;
+	}
 	return proglen;
 }
 
@@ -1158,12 +1235,24 @@ out_image:
 			break;
 		}
 		if (proglen == oldproglen) {
-			header = bpf_jit_binary_alloc(proglen, &image,
-						      1, jit_fill_hole);
+			/*
+			 * The number of entries in extable is the number of BPF_LDX
+			 * insns that access kernel memory via "pointer to BTF type".
+			 * The verifier changed their opcode from LDX|MEM|size
+			 * to LDX|PROBE_MEM|size to make JITing easier.
+			 */
+			u32 align = __alignof__(struct exception_table_entry);
+			u32 extable_size = prog->aux->num_exentries *
+				sizeof(struct exception_table_entry);
+
+			/* allocate module memory for x86 insns and extable */
+			header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
+						      &image, align, jit_fill_hole);
 			if (!header) {
 				prog = orig_prog;
 				goto out_addrs;
 			}
+			prog->aux->extable = (void *) image + roundup(proglen, align);
 		}
 		oldproglen = proglen;
 		cond_resched();
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 028555fcd10d..a7330d75bb94 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -24,6 +24,7 @@ struct sock;
 struct seq_file;
 struct btf;
 struct btf_type;
+struct exception_table_entry;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -423,6 +424,8 @@ struct bpf_prog_aux {
 	 * main prog always has linfo_idx == 0
 	 */
 	u32 linfo_idx;
+	u32 num_exentries;
+	struct exception_table_entry *extable;
 	struct bpf_prog_stats __percpu *stats;
 	union {
 		struct work_struct work;
diff --git a/include/linux/extable.h b/include/linux/extable.h
index 81ecfaa83ad3..4ab9e78f313b 100644
--- a/include/linux/extable.h
+++ b/include/linux/extable.h
@@ -33,4 +33,14 @@ search_module_extables(unsigned long addr)
 }
 #endif /*CONFIG_MODULES*/
 
+#ifdef CONFIG_BPF_JIT
+const struct exception_table_entry *search_bpf_extables(unsigned long addr);
+#else
+static inline const struct exception_table_entry *
+search_bpf_extables(unsigned long addr)
+{
+	return NULL;
+}
+#endif
+
 #endif /* _LINUX_EXTABLE_H */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8a765bbd33f0..673f5d40a93e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -30,7 +30,7 @@
 #include <linux/kallsyms.h>
 #include <linux/rcupdate.h>
 #include <linux/perf_event.h>
-
+#include <linux/extable.h>
 #include <asm/unaligned.h>
 
 /* Registers */
@@ -712,6 +712,24 @@ bool is_bpf_text_address(unsigned long addr)
 	return ret;
 }
 
+const struct exception_table_entry *search_bpf_extables(unsigned long addr)
+{
+	const struct exception_table_entry *e = NULL;
+	struct bpf_prog *prog;
+
+	rcu_read_lock();
+	prog = bpf_prog_kallsyms_find(addr);
+	if (!prog)
+		goto out;
+	if (!prog->aux->num_exentries)
+		goto out;
+
+	e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr);
+out:
+	rcu_read_unlock();
+	return e;
+}
+
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		    char *sym)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c4b6a2cfcd47..fba9ef6a831b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8729,6 +8729,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 				return -EINVAL;
 			}
 			insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
+			env->prog->aux->num_exentries++;
 			continue;
 		default:
 			continue;
diff --git a/kernel/extable.c b/kernel/extable.c
index f6c9406eec7d..f6920a11e28a 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -56,6 +56,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
 	e = search_kernel_exception_table(addr);
 	if (!e)
 		e = search_module_extables(addr);
+	if (!e)
+		e = search_bpf_extables(addr);
 	return e;
 }
 
-- 
cgit v1.2.3


From a7590d68e9ab56c595317457c81e59e74f6671c1 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 18 Oct 2019 11:18:18 +0800
Subject: alpha: Use pr_warn instead of pr_warning

As said in commit f2c2cbcc35d4 ("powerpc: Use pr_warn instead of
pr_warning"), removing pr_warning so all logging messages use a
consistent <prefix>_warn style. Let's do it.

Link: http://lkml.kernel.org/r/20191018031850.48498-1-wangkefeng.wang@huawei.com
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 arch/alpha/kernel/perf_event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 4341ccf5c0c4..e7a59d927d78 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -824,7 +824,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 	if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
 		/* This should never occur! */
 		irq_err_count++;
-		pr_warning("PMI: silly index %ld\n", la_ptr);
+		pr_warn("PMI: silly index %ld\n", la_ptr);
 		wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
 		return;
 	}
@@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 	if (unlikely(!event)) {
 		/* This should never occur! */
 		irq_err_count++;
-		pr_warning("PMI: No event at index %d!\n", idx);
+		pr_warn("PMI: No event at index %d!\n", idx);
 		wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
 		return;
 	}
-- 
cgit v1.2.3


From a74ec64af20a2fc043609339b7a0c8aa2a961c8c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 18 Oct 2019 11:18:19 +0800
Subject: arm64: Use pr_warn instead of pr_warning

As said in commit f2c2cbcc35d4 ("powerpc: Use pr_warn instead of
pr_warning"), removing pr_warning so all logging messages use a
consistent <prefix>_warn style. Let's do it.

Link: http://lkml.kernel.org/r/20191018031850.48498-2-wangkefeng.wang@huawei.com
To: linux-kernel@vger.kernel.org
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Will Deacon <will@kernel.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 arch/arm64/kernel/hw_breakpoint.c |  8 ++++----
 arch/arm64/kernel/smp.c           | 11 +++++------
 2 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index dceb84520948..4f829008f7d8 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -51,7 +51,7 @@ int hw_breakpoint_slots(int type)
 	case TYPE_DATA:
 		return get_num_wrps();
 	default:
-		pr_warning("unknown slot type: %d\n", type);
+		pr_warn("unknown slot type: %d\n", type);
 		return 0;
 	}
 }
@@ -112,7 +112,7 @@ static u64 read_wb_reg(int reg, int n)
 	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
 	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
 	default:
-		pr_warning("attempt to read from unknown breakpoint register %d\n", n);
+		pr_warn("attempt to read from unknown breakpoint register %d\n", n);
 	}
 
 	return val;
@@ -127,7 +127,7 @@ static void write_wb_reg(int reg, int n, u64 val)
 	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
 	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
 	default:
-		pr_warning("attempt to write to unknown breakpoint register %d\n", n);
+		pr_warn("attempt to write to unknown breakpoint register %d\n", n);
 	}
 	isb();
 }
@@ -145,7 +145,7 @@ static enum dbg_active_el debug_exception_level(int privilege)
 	case AARCH64_BREAKPOINT_EL1:
 		return DBG_ACTIVE_EL1;
 	default:
-		pr_warning("invalid breakpoint privilege level %d\n", privilege);
+		pr_warn("invalid breakpoint privilege level %d\n", privilege);
 		return -EINVAL;
 	}
 }
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 9286ee6749e8..09cf729edb20 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -343,8 +343,7 @@ void __cpu_die(unsigned int cpu)
 	 */
 	err = op_cpu_kill(cpu);
 	if (err)
-		pr_warn("CPU%d may not have shut down cleanly: %d\n",
-			cpu, err);
+		pr_warn("CPU%d may not have shut down cleanly: %d\n", cpu, err);
 }
 
 /*
@@ -979,8 +978,8 @@ void smp_send_stop(void)
 		udelay(1);
 
 	if (num_online_cpus() > 1)
-		pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
-			   cpumask_pr_args(cpu_online_mask));
+		pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+			cpumask_pr_args(cpu_online_mask));
 
 	sdei_mask_local_cpu();
 }
@@ -1020,8 +1019,8 @@ void crash_smp_send_stop(void)
 		udelay(1);
 
 	if (atomic_read(&waiting_for_crash_ipi) > 0)
-		pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
-			   cpumask_pr_args(&mask));
+		pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+			cpumask_pr_args(&mask));
 
 	sdei_mask_local_cpu();
 }
-- 
cgit v1.2.3


From 94348b81dd441da398dd10bafce3439b600a16ce Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 18 Oct 2019 11:18:20 +0800
Subject: ia64: Use pr_warn instead of pr_warning

As said in commit f2c2cbcc35d4 ("powerpc: Use pr_warn instead of
pr_warning"), removing pr_warning so all logging messages use a
consistent <prefix>_warn style. Let's do it.

Link: http://lkml.kernel.org/r/20191018031850.48498-3-wangkefeng.wang@huawei.com
To: linux-kernel@vger.kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 arch/ia64/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c9cfa760cd57..5e57708c3306 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -286,7 +286,7 @@ static void __init setup_crashkernel(unsigned long total, int *n)
 		}
 
 		if (!check_crashkernel_memory(base, size)) {
-			pr_warning("crashkernel: There would be kdump memory "
+			pr_warn("crashkernel: There would be kdump memory "
 				"at %ld GB but this is unusable because it "
 				"must\nbe below 4 GB. Change the memory "
 				"configuration of the machine.\n",
-- 
cgit v1.2.3


From 04ce8d3f40cd2dfe48d04d94b79e0c0be60b1339 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 18 Oct 2019 11:18:21 +0800
Subject: riscv: Use pr_warn instead of pr_warning

As said in commit f2c2cbcc35d4 ("powerpc: Use pr_warn instead of
pr_warning"), removing pr_warning so all logging messages use a
consistent <prefix>_warn style. Let's do it.

Link: http://lkml.kernel.org/r/20191018031850.48498-4-wangkefeng.wang@huawei.com
To: linux-kernel@vger.kernel.org
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 arch/riscv/kernel/module.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 70bb94ae61c5..b7401858d872 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -315,8 +315,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 			/* Ignore unresolved weak symbol */
 			if (ELF_ST_BIND(sym->st_info) == STB_WEAK)
 				continue;
-			pr_warning("%s: Unknown symbol %s\n",
-				   me->name, strtab + sym->st_name);
+			pr_warn("%s: Unknown symbol %s\n",
+				me->name, strtab + sym->st_name);
 			return -ENOENT;
 		}
 
-- 
cgit v1.2.3


From 47d7b15b88f96a90694cfc607d0717d62dff6c45 Mon Sep 17 00:00:00 2001
From: Jia He <justin.he@arm.com>
Date: Fri, 11 Oct 2019 22:09:36 +0800
Subject: arm64: cpufeature: introduce helper cpu_has_hw_af()

We unconditionally set the HW_AFDBM capability and only enable it on
CPUs which really have the feature. But sometimes we need to know
whether this cpu has the capability of HW AF. So decouple AF from
DBM by a new helper cpu_has_hw_af().

If later we noticed a potential performance issue on this path, we can
turn it into a static label as with other CPU features.

Signed-off-by: Jia He <justin.he@arm.com>
Suggested-by: Suzuki Poulose <Suzuki.Poulose@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9cde5d2e768f..4261d55e8506 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -659,6 +659,20 @@ static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
 	default: return CONFIG_ARM64_PA_BITS;
 	}
 }
+
+/* Check whether hardware update of the Access flag is supported */
+static inline bool cpu_has_hw_af(void)
+{
+	u64 mmfr1;
+
+	if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
+		return false;
+
+	mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+	return cpuid_feature_extract_unsigned_field(mmfr1,
+						ID_AA64MMFR1_HADBS_SHIFT);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif
-- 
cgit v1.2.3


From 6af31226d0394691f5562eca0134262bb935fa9c Mon Sep 17 00:00:00 2001
From: Jia He <justin.he@arm.com>
Date: Fri, 11 Oct 2019 22:09:37 +0800
Subject: arm64: mm: implement arch_faults_on_old_pte() on arm64

On arm64 without hardware Access Flag, copying from user will fail because
the pte is old and cannot be marked young. So we always end up with zeroed
page after fork() + CoW for pfn mappings. We don't always have a
hardware-managed Access Flag on arm64.

Hence implement arch_faults_on_old_pte on arm64 to indicate that it might
cause page fault when accessing old pte.

Signed-off-by: Jia He <justin.he@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7576df00eb50..e96fb82f62de 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -885,6 +885,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 #define phys_to_ttbr(addr)	(addr)
 #endif
 
+/*
+ * On arm64 without hardware Access Flag, copying from user will fail because
+ * the pte is old and cannot be marked young. So we always end up with zeroed
+ * page after fork() + CoW for pfn mappings. We don't always have a
+ * hardware-managed access flag on arm64.
+ */
+static inline bool arch_faults_on_old_pte(void)
+{
+	WARN_ON(preemptible());
+
+	return !cpu_has_hw_af();
+}
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
-- 
cgit v1.2.3


From f2c4e5970cece75a895fcc45f0cd66b5a5ec0819 Mon Sep 17 00:00:00 2001
From: Jia He <justin.he@arm.com>
Date: Fri, 11 Oct 2019 22:09:38 +0800
Subject: x86/mm: implement arch_faults_on_old_pte() stub on x86

arch_faults_on_old_pte is a helper to indicate that it might cause page
fault when accessing old pte. But on x86, there is feature to setting
pte access flag by hardware. Hence implement an overriding stub which
always returns false.

Signed-off-by: Jia He <justin.he@arm.com>
Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/x86/include/asm/pgtable.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 0bc530c4eb13..ad97dc155195 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1463,6 +1463,12 @@ static inline bool arch_has_pfn_modify_check(void)
 	return boot_cpu_has_bug(X86_BUG_L1TF);
 }
 
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+static inline bool arch_faults_on_old_pte(void)
+{
+	return false;
+}
+
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
-- 
cgit v1.2.3


From f75e2294a4415621b223150065c8d1e823896da5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 23 Nov 2018 17:25:52 +0000
Subject: arm64: Add ARM64_WORKAROUND_1319367 for all A57 and A72 versions

Rework the EL2 vector hardening that is only selected for A57 and A72
so that the table can also be used for ARM64_WORKAROUND_1319367.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/cpucaps.h |  3 ++-
 arch/arm64/kernel/cpu_errata.c   | 13 ++++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index f19fe4b9acc4..277e37b2a513 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -52,7 +52,8 @@
 #define ARM64_HAS_IRQ_PRIO_MASKING		42
 #define ARM64_HAS_DCPODP			43
 #define ARM64_WORKAROUND_1463225		44
+#define ARM64_WORKAROUND_1319367		45
 
-#define ARM64_NCAPS				45
+#define ARM64_NCAPS				46
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index f593f4cffc0d..a7afa6d4a58f 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -623,9 +623,9 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 	return (need_wa > 0);
 }
 
-#ifdef CONFIG_HARDEN_EL2_VECTORS
+#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367)
 
-static const struct midr_range arm64_harden_el2_vectors[] = {
+static const struct midr_range ca57_a72[] = {
 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
 	{},
@@ -819,7 +819,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	{
 		.desc = "EL2 vector hardening",
 		.capability = ARM64_HARDEN_EL2_VECTORS,
-		ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
+		ERRATA_MIDR_RANGE_LIST(ca57_a72),
 	},
 #endif
 	{
@@ -851,6 +851,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = has_cortex_a76_erratum_1463225,
 	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1319367
+	{
+		.desc = "ARM erratum 1319367",
+		.capability = ARM64_WORKAROUND_1319367,
+		ERRATA_MIDR_RANGE_LIST(ca57_a72),
+	},
 #endif
 	{
 	}
-- 
cgit v1.2.3


From 6d80f20c5328ad24dde0fddb980f60734a17b911 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 18 Oct 2019 11:18:22 +0800
Subject: sh: Use pr_warn instead of pr_warning

As said in commit f2c2cbcc35d4 ("powerpc: Use pr_warn instead of
pr_warning"), removing pr_warning so all logging messages use a
consistent <prefix>_warn style. Let's do it.

Link: http://lkml.kernel.org/r/20191018031850.48498-5-wangkefeng.wang@huawei.com
To: linux-kernel@vger.kernel.org
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 arch/sh/boards/mach-sdk7786/nmi.c    | 2 +-
 arch/sh/drivers/pci/fixups-sdk7786.c | 2 +-
 arch/sh/kernel/io_trapped.c          | 2 +-
 arch/sh/kernel/setup.c               | 2 +-
 arch/sh/mm/consistent.c              | 5 ++---
 5 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/sh/boards/mach-sdk7786/nmi.c b/arch/sh/boards/mach-sdk7786/nmi.c
index c2e09d798537..afba49679a12 100644
--- a/arch/sh/boards/mach-sdk7786/nmi.c
+++ b/arch/sh/boards/mach-sdk7786/nmi.c
@@ -37,7 +37,7 @@ static int __init nmi_mode_setup(char *str)
 		nmi_mode = NMI_MODE_ANY;
 	else {
 		nmi_mode = NMI_MODE_UNKNOWN;
-		pr_warning("Unknown NMI mode %s\n", str);
+		pr_warn("Unknown NMI mode %s\n", str);
 	}
 
 	printk("Set NMI mode to %d\n", nmi_mode);
diff --git a/arch/sh/drivers/pci/fixups-sdk7786.c b/arch/sh/drivers/pci/fixups-sdk7786.c
index 8cbfa5310a4b..6972af7b4e93 100644
--- a/arch/sh/drivers/pci/fixups-sdk7786.c
+++ b/arch/sh/drivers/pci/fixups-sdk7786.c
@@ -53,7 +53,7 @@ static int __init sdk7786_pci_init(void)
 
 		/* Warn about forced rerouting if slot#3 is occupied */
 		if ((data & PCIECR_PRST3) == 0) {
-			pr_warning("Unreachable card detected in slot#3\n");
+			pr_warn("Unreachable card detected in slot#3\n");
 			return -EBUSY;
 		}
 	} else
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index bacad6da4fe4..60c828a2b8a2 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -99,7 +99,7 @@ int register_trapped_io(struct trapped_io *tiop)
 
 	return 0;
  bad:
-	pr_warning("unable to install trapped io filter\n");
+	pr_warn("unable to install trapped io filter\n");
 	return -1;
 }
 EXPORT_SYMBOL_GPL(register_trapped_io);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 2c0e0f37a318..6ef341f6cfee 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -354,7 +354,7 @@ void __init setup_arch(char **cmdline_p)
 /* processor boot mode configuration */
 int generic_mode_pins(void)
 {
-	pr_warning("generic_mode_pins(): missing mode pin configuration\n");
+	pr_warn("generic_mode_pins(): missing mode pin configuration\n");
 	return 0;
 }
 
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 792f36129062..3169a343a5ab 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -43,8 +43,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
 
 	r = pdev->resource + pdev->num_resources - 1;
 	if (r->flags) {
-		pr_warning("%s: unable to find empty space for resource\n",
-			name);
+		pr_warn("%s: unable to find empty space for resource\n", name);
 		return -EINVAL;
 	}
 
@@ -54,7 +53,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
 
 	buf = dma_alloc_coherent(&pdev->dev, memsize, &dma_handle, GFP_KERNEL);
 	if (!buf) {
-		pr_warning("%s: unable to allocate memory\n", name);
+		pr_warn("%s: unable to allocate memory\n", name);
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3


From eb1414ec89d31873c912f4073bd22d8b5505797a Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 18 Oct 2019 11:18:23 +0800
Subject: sparc: Use pr_warn instead of pr_warning

As said in commit f2c2cbcc35d4 ("powerpc: Use pr_warn instead of
pr_warning"), removing pr_warning so all logging messages use a
consistent <prefix>_warn style. Let's do it.

Link: http://lkml.kernel.org/r/20191018031850.48498-6-wangkefeng.wang@huawei.com
To: linux-kernel@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 arch/sparc/kernel/smp_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index a8275fea4b70..9b4506373353 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1673,9 +1673,9 @@ void __init setup_per_cpu_areas(void)
 					    pcpu_alloc_bootmem,
 					    pcpu_free_bootmem);
 		if (rc)
-			pr_warning("PERCPU: %s allocator failed (%d), "
-				   "falling back to page size\n",
-				   pcpu_fc_names[pcpu_chosen_fc], rc);
+			pr_warn("PERCPU: %s allocator failed (%d), "
+				"falling back to page size\n",
+				pcpu_fc_names[pcpu_chosen_fc], rc);
 	}
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
-- 
cgit v1.2.3


From 8d3bcc441e6cddbb5fe49b59f7766f01f1e2493b Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 18 Oct 2019 11:18:24 +0800
Subject: x86: Use pr_warn instead of pr_warning

As said in commit f2c2cbcc35d4 ("powerpc: Use pr_warn instead of
pr_warning"), removing pr_warning so all logging messages use a
consistent <prefix>_warn style. Let's do it.

Link: http://lkml.kernel.org/r/20191018031850.48498-7-wangkefeng.wang@huawei.com
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Robert Richter <rric@kernel.org>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Andy Shevchenko <andy@infradead.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 arch/x86/kernel/amd_gart_64.c          | 12 +++++-----
 arch/x86/kernel/apic/apic.c            | 41 ++++++++++++++++------------------
 arch/x86/kernel/setup_percpu.c         |  4 ++--
 arch/x86/kernel/tboot.c                | 15 ++++++-------
 arch/x86/kernel/tsc_sync.c             |  8 +++----
 arch/x86/kernel/umip.c                 |  6 ++---
 arch/x86/mm/kmmio.c                    |  7 +++---
 arch/x86/mm/mmio-mod.c                 |  6 ++---
 arch/x86/mm/numa_emulation.c           |  4 ++--
 arch/x86/mm/testmmiotrace.c            |  6 ++---
 arch/x86/oprofile/op_x86_model.h       |  6 ++---
 arch/x86/platform/olpc/olpc-xo15-sci.c |  2 +-
 arch/x86/platform/sfi/sfi.c            |  3 +--
 arch/x86/xen/setup.c                   |  2 +-
 14 files changed, 57 insertions(+), 65 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index a585ea6f686a..527bc16992e1 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -510,10 +510,9 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
 	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
 
 	if (iommu_size < 64*1024*1024) {
-		pr_warning(
-			"PCI-DMA: Warning: Small IOMMU %luMB."
+		pr_warn("PCI-DMA: Warning: Small IOMMU %luMB."
 			" Consider increasing the AGP aperture in BIOS\n",
-				iommu_size >> 20);
+			iommu_size >> 20);
 	}
 
 	return iommu_size;
@@ -665,8 +664,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
 
  nommu:
 	/* Should not happen anymore */
-	pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
-	       "falling back to iommu=soft.\n");
+	pr_warn("PCI-DMA: More than 4GB of RAM and no IOMMU - falling back to iommu=soft.\n");
 	return -1;
 }
 
@@ -730,8 +728,8 @@ int __init gart_iommu_init(void)
 	    !gart_iommu_aperture ||
 	    (no_agp && init_amd_gatt(&info) < 0)) {
 		if (max_pfn > MAX_DMA32_PFN) {
-			pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
-			pr_warning("falling back to iommu=soft.\n");
+			pr_warn("More than 4GB of memory but GART IOMMU not available.\n");
+			pr_warn("falling back to iommu=soft.\n");
 		}
 		return 0;
 	}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 1bd91cb7b320..5be2c3bc9d93 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -780,8 +780,8 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
 
 	res = (((u64)deltapm) *  mult) >> 22;
 	do_div(res, 1000000);
-	pr_warning("APIC calibration not consistent "
-		   "with PM-Timer: %ldms instead of 100ms\n",(long)res);
+	pr_warn("APIC calibration not consistent "
+		"with PM-Timer: %ldms instead of 100ms\n", (long)res);
 
 	/* Correct the lapic counter value */
 	res = (((u64)(*delta)) * pm_100ms);
@@ -936,7 +936,7 @@ static int __init calibrate_APIC_clock(void)
 	 */
 	if (lapic_timer_period < (1000000 / HZ)) {
 		local_irq_enable();
-		pr_warning("APIC frequency too slow, disabling apic timer\n");
+		pr_warn("APIC frequency too slow, disabling apic timer\n");
 		return -1;
 	}
 
@@ -979,7 +979,7 @@ static int __init calibrate_APIC_clock(void)
 	local_irq_enable();
 
 	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
-		pr_warning("APIC timer disabled due to verification failure\n");
+		pr_warn("APIC timer disabled due to verification failure\n");
 		return -1;
 	}
 
@@ -1053,8 +1053,8 @@ static void local_apic_timer_interrupt(void)
 	 * spurious.
 	 */
 	if (!evt->event_handler) {
-		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
-			   smp_processor_id());
+		pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
+			smp_processor_id());
 		/* Switch it off */
 		lapic_timer_shutdown(evt);
 		return;
@@ -1725,11 +1725,11 @@ static int __init setup_nox2apic(char *str)
 		int apicid = native_apic_msr_read(APIC_ID);
 
 		if (apicid >= 255) {
-			pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
-				   apicid);
+			pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
+				apicid);
 			return 0;
 		}
-		pr_warning("x2apic already enabled.\n");
+		pr_warn("x2apic already enabled.\n");
 		__x2apic_disable();
 	}
 	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
@@ -1897,7 +1897,7 @@ static int __init apic_verify(void)
 	 */
 	features = cpuid_edx(1);
 	if (!(features & (1 << X86_FEATURE_APIC))) {
-		pr_warning("Could not enable APIC!\n");
+		pr_warn("Could not enable APIC!\n");
 		return -1;
 	}
 	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -2324,9 +2324,8 @@ int generic_processor_info(int apicid, int version)
 	    disabled_cpu_apicid == apicid) {
 		int thiscpu = num_processors + disabled_cpus;
 
-		pr_warning("APIC: Disabling requested cpu."
-			   " Processor %d/0x%x ignored.\n",
-			   thiscpu, apicid);
+		pr_warn("APIC: Disabling requested cpu."
+			" Processor %d/0x%x ignored.\n", thiscpu, apicid);
 
 		disabled_cpus++;
 		return -ENODEV;
@@ -2340,8 +2339,7 @@ int generic_processor_info(int apicid, int version)
 	    apicid != boot_cpu_physical_apicid) {
 		int thiscpu = max + disabled_cpus - 1;
 
-		pr_warning(
-			"APIC: NR_CPUS/possible_cpus limit of %i almost"
+		pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost"
 			" reached. Keeping one slot for boot cpu."
 			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
 
@@ -2352,9 +2350,8 @@ int generic_processor_info(int apicid, int version)
 	if (num_processors >= nr_cpu_ids) {
 		int thiscpu = max + disabled_cpus;
 
-		pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
-			   "reached. Processor %d/0x%x ignored.\n",
-			   max, thiscpu, apicid);
+		pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. "
+			"Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
 
 		disabled_cpus++;
 		return -EINVAL;
@@ -2384,13 +2381,13 @@ int generic_processor_info(int apicid, int version)
 	 * Validate version
 	 */
 	if (version == 0x0) {
-		pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
-			   cpu, apicid);
+		pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+			cpu, apicid);
 		version = 0x10;
 	}
 
 	if (version != boot_cpu_apic_version) {
-		pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+		pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
 			boot_cpu_apic_version, cpu, version);
 	}
 
@@ -2759,7 +2756,7 @@ static int __init apic_set_verbosity(char *arg)
 		apic_verbosity = APIC_VERBOSE;
 #ifdef CONFIG_X86_64
 	else {
-		pr_warning("APIC Verbosity level %s not recognised"
+		pr_warn("APIC Verbosity level %s not recognised"
 			" use apic=verbose or apic=debug\n", arg);
 		return -EINVAL;
 	}
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 86663874ef04..e6d7894ad127 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -207,8 +207,8 @@ void __init setup_per_cpu_areas(void)
 					    pcpu_cpu_distance,
 					    pcpu_fc_alloc, pcpu_fc_free);
 		if (rc < 0)
-			pr_warning("%s allocator failed (%d), falling back to page size\n",
-				   pcpu_fc_names[pcpu_chosen_fc], rc);
+			pr_warn("%s allocator failed (%d), falling back to page size\n",
+				pcpu_fc_names[pcpu_chosen_fc], rc);
 	}
 	if (rc < 0)
 		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index a49fe1dcb47e..4c61f0713832 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -57,7 +57,7 @@ void __init tboot_probe(void)
 	 */
 	if (!e820__mapped_any(boot_params.tboot_addr,
 			     boot_params.tboot_addr, E820_TYPE_RESERVED)) {
-		pr_warning("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n");
+		pr_warn("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n");
 		return;
 	}
 
@@ -65,13 +65,12 @@ void __init tboot_probe(void)
 	set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
 	tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
 	if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
-		pr_warning("tboot at 0x%llx is invalid\n",
-			   boot_params.tboot_addr);
+		pr_warn("tboot at 0x%llx is invalid\n", boot_params.tboot_addr);
 		tboot = NULL;
 		return;
 	}
 	if (tboot->version < 5) {
-		pr_warning("tboot version is invalid: %u\n", tboot->version);
+		pr_warn("tboot version is invalid: %u\n", tboot->version);
 		tboot = NULL;
 		return;
 	}
@@ -289,7 +288,7 @@ static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
 
 	if (sleep_state >= ACPI_S_STATE_COUNT ||
 	    acpi_shutdown_map[sleep_state] == -1) {
-		pr_warning("unsupported sleep state 0x%x\n", sleep_state);
+		pr_warn("unsupported sleep state 0x%x\n", sleep_state);
 		return -1;
 	}
 
@@ -302,7 +301,7 @@ static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b)
 	if (!tboot_enabled())
 		return 0;
 
-	pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
+	pr_warn("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
 	return -ENODEV;
 }
 
@@ -320,7 +319,7 @@ static int tboot_wait_for_aps(int num_aps)
 	}
 
 	if (timeout)
-		pr_warning("tboot wait for APs timeout\n");
+		pr_warn("tboot wait for APs timeout\n");
 
 	return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
 }
@@ -516,7 +515,7 @@ int tboot_force_iommu(void)
 		return 1;
 
 	if (no_iommu || swiotlb || dmar_disabled)
-		pr_warning("Forcing Intel-IOMMU to enabled\n");
+		pr_warn("Forcing Intel-IOMMU to enabled\n");
 
 	dmar_disabled = 0;
 #ifdef CONFIG_SWIOTLB
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index ec534f978867..b8acf639abd1 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -364,12 +364,12 @@ retry:
 		/* Force it to 0 if random warps brought us here */
 		atomic_set(&test_runs, 0);
 
-		pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
+		pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n",
 			smp_processor_id(), cpu);
-		pr_warning("Measured %Ld cycles TSC warp between CPUs, "
-			   "turning off TSC clock.\n", max_warp);
+		pr_warn("Measured %Ld cycles TSC warp between CPUs, "
+			"turning off TSC clock.\n", max_warp);
 		if (random_warps)
-			pr_warning("TSC warped randomly between CPUs\n");
+			pr_warn("TSC warped randomly between CPUs\n");
 		mark_tsc_unstable("check_tsc_sync_source failed");
 	}
 
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 5b345add550f..de340e47f82d 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -92,7 +92,7 @@ const char * const umip_insns[5] = {
 
 #define umip_pr_err(regs, fmt, ...) \
 	umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
-#define umip_pr_warning(regs, fmt, ...) \
+#define umip_pr_warn(regs, fmt, ...) \
 	umip_printk(regs, KERN_WARNING, fmt,  ##__VA_ARGS__)
 
 /**
@@ -370,14 +370,14 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	if (umip_inst < 0)
 		return false;
 
-	umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+	umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
 			umip_insns[umip_inst]);
 
 	/* Do not emulate SLDT, STR or user long mode processes. */
 	if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
 		return false;
 
-	umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+	umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
 
 	if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
 		return false;
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 79eb55ce69a9..49d7814b59a9 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -193,8 +193,8 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
 	int ret;
 	WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
 	if (f->armed) {
-		pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
-			   f->addr, f->count, !!f->old_presence);
+		pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n",
+			f->addr, f->count, !!f->old_presence);
 	}
 	ret = clear_page_presence(f, true);
 	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
@@ -341,8 +341,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 		 * something external causing them (f.e. using a debugger while
 		 * mmio tracing enabled), or erroneous behaviour
 		 */
-		pr_warning("unexpected debug trap on CPU %d.\n",
-			   smp_processor_id());
+		pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id());
 		goto out;
 	}
 
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index b8ef8557d4b3..673de6063345 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -394,7 +394,7 @@ static void enter_uniprocessor(void)
 	}
 out:
 	if (num_online_cpus() > 1)
-		pr_warning("multiple CPUs still online, may miss events.\n");
+		pr_warn("multiple CPUs still online, may miss events.\n");
 }
 
 static void leave_uniprocessor(void)
@@ -418,8 +418,8 @@ static void leave_uniprocessor(void)
 static void enter_uniprocessor(void)
 {
 	if (num_online_cpus() > 1)
-		pr_warning("multiple CPUs are online, may miss events. "
-			   "Suggest booting with maxcpus=1 kernel argument.\n");
+		pr_warn("multiple CPUs are online, may miss events. "
+			"Suggest booting with maxcpus=1 kernel argument.\n");
 }
 
 static void leave_uniprocessor(void)
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index abffa0be80da..7f1d2034df1e 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -438,7 +438,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 		goto no_emu;
 
 	if (numa_cleanup_meminfo(&ei) < 0) {
-		pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
+		pr_warn("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
 		goto no_emu;
 	}
 
@@ -449,7 +449,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 		phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 					      phys_size, PAGE_SIZE);
 		if (!phys) {
-			pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
+			pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
 			goto no_emu;
 		}
 		memblock_reserve(phys, phys_size);
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index 0881e1ff1e58..afd828a57c33 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -122,9 +122,9 @@ static int __init init(void)
 		return -ENXIO;
 	}
 
-	pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
-		   "and writing 16 kB of rubbish in there.\n",
-		   size >> 10, mmio_address);
+	pr_warn("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
+		"and writing 16 kB of rubbish in there.\n",
+		size >> 10, mmio_address);
 	do_test(size);
 	do_test_bulk_ioremapping();
 	pr_info("All done.\n");
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 71e8a67337e2..276cf79b5d24 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -67,13 +67,13 @@ static inline void op_x86_warn_in_use(int counter)
 	 * cannot be monitored by any other counter, contact your
 	 * hardware or BIOS vendor.
 	 */
-	pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
-		   counter, smp_processor_id());
+	pr_warn("oprofile: counter #%d on cpu #%d may already be used\n",
+		counter, smp_processor_id());
 }
 
 static inline void op_x86_warn_reserved(int counter)
 {
-	pr_warning("oprofile: counter #%d is already reserved\n", counter);
+	pr_warn("oprofile: counter #%d is already reserved\n", counter);
 }
 
 extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 6d193bb36021..089413cd944e 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -39,7 +39,7 @@ static int set_lid_wake_behavior(bool wake_on_close)
 
 	status = acpi_execute_simple_method(NULL, "\\_SB.PCI0.LID.LIDW", wake_on_close);
 	if (ACPI_FAILURE(status)) {
-		pr_warning(PFX "failed to set lid behavior\n");
+		pr_warn(PFX "failed to set lid behavior\n");
 		return 1;
 	}
 
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index bf6016f8db4e..6259563760f9 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -26,8 +26,7 @@ static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 static void __init mp_sfi_register_lapic(u8 id)
 {
 	if (MAX_LOCAL_APIC - id <= 0) {
-		pr_warning("Processor #%d invalid (max %d)\n",
-			id, MAX_LOCAL_APIC);
+		pr_warn("Processor #%d invalid (max %d)\n", id, MAX_LOCAL_APIC);
 		return;
 	}
 
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 548d1e0a5ba1..33b0e20df7fc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -412,7 +412,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
 
 		remap_range_size = xen_find_pfn_range(&remap_pfn);
 		if (!remap_range_size) {
-			pr_warning("Unable to find available pfn range, not remapping identity pages\n");
+			pr_warn("Unable to find available pfn range, not remapping identity pages\n");
 			xen_set_identity_and_release_chunk(cur_pfn,
 						cur_pfn + left, nr_pages);
 			break;
-- 
cgit v1.2.3


From 4f3d957718e7f0ac2b033dbf48c7cddecd0a8dd3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Oct 2019 13:54:25 +0300
Subject: spi: pxa2xx: No need to keep pointer to platform device

There is no need to keep a pointer to the platform device. Currently there are
no users of it directly, and if there will be in the future we may restore it
from pointer to the struct device.

Convert all users at the same time.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20191018105429.82782-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm/plat-pxa/ssp.c    |  4 ++--
 drivers/spi/spi-pxa2xx.c   |  2 +-
 include/linux/pxa2xx_ssp.h |  2 +-
 sound/soc/pxa/mmp-sspa.c   |  2 +-
 sound/soc/pxa/pxa-ssp.c    | 10 +++++-----
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c
index 9a6e4923bd69..563440315acd 100644
--- a/arch/arm/plat-pxa/ssp.c
+++ b/arch/arm/plat-pxa/ssp.c
@@ -89,7 +89,7 @@ void pxa_ssp_free(struct ssp_device *ssp)
 		ssp->use_count--;
 		ssp->label = NULL;
 	} else
-		dev_err(&ssp->pdev->dev, "device already free\n");
+		dev_err(ssp->dev, "device already free\n");
 	mutex_unlock(&ssp_lock);
 }
 EXPORT_SYMBOL(pxa_ssp_free);
@@ -118,7 +118,7 @@ static int pxa_ssp_probe(struct platform_device *pdev)
 	if (ssp == NULL)
 		return -ENOMEM;
 
-	ssp->pdev = pdev;
+	ssp->dev = dev;
 
 	ssp->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(ssp->clk))
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index bb6a14d1ab0f..59d1406a9c96 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1567,7 +1567,7 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 	ssp->clk = devm_clk_get(&pdev->dev, NULL);
 	ssp->irq = platform_get_irq(pdev, 0);
 	ssp->type = type;
-	ssp->pdev = pdev;
+	ssp->dev = &pdev->dev;
 	ssp->port_id = pxa2xx_spi_get_port_id(adev);
 
 	pdata->is_slave = of_property_read_bool(pdev->dev.of_node, "spi-slave");
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index a5d1837e4f35..6facf27865f9 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -206,7 +206,7 @@ enum pxa_ssp_type {
 };
 
 struct ssp_device {
-	struct platform_device *pdev;
+	struct device	*dev;
 	struct list_head	node;
 
 	struct clk	*clk;
diff --git a/sound/soc/pxa/mmp-sspa.c b/sound/soc/pxa/mmp-sspa.c
index e3e5425b5c62..e701637a9ae9 100644
--- a/sound/soc/pxa/mmp-sspa.c
+++ b/sound/soc/pxa/mmp-sspa.c
@@ -177,7 +177,7 @@ static int mmp_sspa_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 	/* we can only change the settings if the port is not in use */
 	if ((mmp_sspa_read_reg(sspa, SSPA_TXSP) & SSPA_SP_S_EN) ||
 	    (mmp_sspa_read_reg(sspa, SSPA_RXSP) & SSPA_SP_S_EN)) {
-		dev_err(&sspa->pdev->dev,
+		dev_err(sspa->dev,
 			"can't change hardware dai format: stream is in use\n");
 		return -EINVAL;
 	}
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 5fdd1a24c232..6c5201431f6e 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -52,11 +52,11 @@ struct ssp_priv {
 
 static void dump_registers(struct ssp_device *ssp)
 {
-	dev_dbg(&ssp->pdev->dev, "SSCR0 0x%08x SSCR1 0x%08x SSTO 0x%08x\n",
+	dev_dbg(ssp->dev, "SSCR0 0x%08x SSCR1 0x%08x SSTO 0x%08x\n",
 		 pxa_ssp_read_reg(ssp, SSCR0), pxa_ssp_read_reg(ssp, SSCR1),
 		 pxa_ssp_read_reg(ssp, SSTO));
 
-	dev_dbg(&ssp->pdev->dev, "SSPSP 0x%08x SSSR 0x%08x SSACD 0x%08x\n",
+	dev_dbg(ssp->dev, "SSPSP 0x%08x SSSR 0x%08x SSACD 0x%08x\n",
 		 pxa_ssp_read_reg(ssp, SSPSP), pxa_ssp_read_reg(ssp, SSSR),
 		 pxa_ssp_read_reg(ssp, SSACD));
 }
@@ -223,7 +223,7 @@ static int pxa_ssp_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
 		clk_id = PXA_SSP_CLK_EXT;
 	}
 
-	dev_dbg(&ssp->pdev->dev,
+	dev_dbg(ssp->dev,
 		"pxa_ssp_set_dai_sysclk id: %d, clk_id %d, freq %u\n",
 		cpu_dai->id, clk_id, freq);
 
@@ -316,7 +316,7 @@ static int pxa_ssp_set_pll(struct ssp_priv *priv, unsigned int freq)
 
 			ssacd |= (0x6 << 4);
 
-			dev_dbg(&ssp->pdev->dev,
+			dev_dbg(ssp->dev,
 				"Using SSACDD %x to supply %uHz\n",
 				val, freq);
 			break;
@@ -687,7 +687,7 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 	 * - complain loudly and fail if they've not been set up yet.
 	 */
 	if ((sscr0 & SSCR0_MOD) && !ttsa) {
-		dev_err(&ssp->pdev->dev, "No TDM timeslot configured\n");
+		dev_err(ssp->dev, "No TDM timeslot configured\n");
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 1a9167a214f560a23c5050ce6dfebae489528f0d Mon Sep 17 00:00:00 2001
From: Fabiano Rosas <farosas@linux.ibm.com>
Date: Wed, 19 Jun 2019 13:01:27 -0300
Subject: KVM: PPC: Report single stepping capability

When calling the KVM_SET_GUEST_DEBUG ioctl, userspace might request
the next instruction to be single stepped via the
KVM_GUESTDBG_SINGLESTEP control bit of the kvm_guest_debug structure.

This patch adds the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability in order
to inform userspace about the state of single stepping support.

We currently don't have support for guest single stepping implemented
in Book3S HV so the capability is only present for Book3S PR and
BookE.

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 Documentation/virt/kvm/api.txt | 3 +++
 arch/powerpc/kvm/powerpc.c     | 2 ++
 include/uapi/linux/kvm.h       | 1 +
 3 files changed, 6 insertions(+)

(limited to 'arch')

diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 4833904d32a5..f94d06a12d20 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -2982,6 +2982,9 @@ can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
 KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
 indicating the number of supported registers.
 
+For ppc, the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability indicates whether
+the single-step debug event (KVM_GUESTDBG_SINGLESTEP) is supported.
+
 When debug events exit the main run loop with the reason
 KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
 structure containing architecture specific debug information.
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3a77bb643452..9e085e931d74 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -522,6 +522,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_IMMEDIATE_EXIT:
 		r = 1;
 		break;
+	case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
+		/* fall through */
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_OSI:
 	case KVM_CAP_PPC_GET_PVINFO:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 52641d8ca9e8..ce8cfcc51aec 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1000,6 +1000,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PMU_EVENT_FILTER 173
 #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
 #define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
+#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 7cf78b6b12fd5550545e4b73b35dca18bd46b44c Mon Sep 17 00:00:00 2001
From: Fuqian Huang <huangfq.daxian@gmail.com>
Date: Fri, 27 Sep 2019 20:15:44 +0800
Subject: m68k: q40: Fix info-leak in rtc_ioctl

When the option is RTC_PLL_GET, pll will be copied to userland
via copy_to_user. pll is initialized using mach_get_rtc_pll indirect
call and mach_get_rtc_pll is only assigned with function
q40_get_rtc_pll in arch/m68k/q40/config.c.
In function q40_get_rtc_pll, the field pll_ctrl is not initialized.
This will leak uninitialized stack content to userland.
Fix this by zeroing the uninitialized field.

Signed-off-by: Fuqian Huang <huangfq.daxian@gmail.com>
Link: https://lore.kernel.org/r/20190927121544.7650-1-huangfq.daxian@gmail.com
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/q40/config.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index e63eb5f06999..f31890078197 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -264,6 +264,7 @@ static int q40_get_rtc_pll(struct rtc_pll_info *pll)
 {
 	int tmp = Q40_RTC_CTRL;
 
+	pll->pll_ctrl = 0;
 	pll->pll_value = tmp & Q40_RTC_PLL_MASK;
 	if (tmp & Q40_RTC_PLL_SIGN)
 		pll->pll_value = -pll->pll_value;
-- 
cgit v1.2.3


From c726200dd106d4c58a281eea7159b8ba28a4ab34 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Fri, 11 Oct 2019 13:07:05 +0200
Subject: KVM: arm/arm64: Allow reporting non-ISV data aborts to userspace

For a long time, if a guest accessed memory outside of a memslot using
any of the load/store instructions in the architecture which doesn't
supply decoding information in the ESR_EL2 (the ISV bit is not set), the
kernel would print the following message and terminate the VM as a
result of returning -ENOSYS to userspace:

  load/store instruction decoding not implemented

The reason behind this message is that KVM assumes that all accesses
outside a memslot is an MMIO access which should be handled by
userspace, and we originally expected to eventually implement some sort
of decoding of load/store instructions where the ISV bit was not set.

However, it turns out that many of the instructions which don't provide
decoding information on abort are not safe to use for MMIO accesses, and
the remaining few that would potentially make sense to use on MMIO
accesses, such as those with register writeback, are not used in
practice.  It also turns out that fetching an instruction from guest
memory can be a pretty horrible affair, involving stopping all CPUs on
SMP systems, handling multiple corner cases of address translation in
software, and more.  It doesn't appear likely that we'll ever implement
this in the kernel.

What is much more common is that a user has misconfigured his/her guest
and is actually not accessing an MMIO region, but just hitting some
random hole in the IPA space.  In this scenario, the error message above
is almost misleading and has led to a great deal of confusion over the
years.

It is, nevertheless, ABI to userspace, and we therefore need to
introduce a new capability that userspace explicitly enables to change
behavior.

This patch introduces KVM_CAP_ARM_NISV_TO_USER (NISV meaning Non-ISV)
which does exactly that, and introduces a new exit reason to report the
event to userspace.  User space can then emulate an exception to the
guest, restart the guest, suspend the guest, or take any other
appropriate action as per the policy of the running system.

Reported-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: Alexander Graf <graf@amazon.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 Documentation/virt/kvm/api.txt       | 33 +++++++++++++++++++++++++++++++++
 arch/arm/include/asm/kvm_arm.h       |  1 +
 arch/arm/include/asm/kvm_emulate.h   |  5 +++++
 arch/arm/include/asm/kvm_host.h      |  8 ++++++++
 arch/arm64/include/asm/kvm_emulate.h |  5 +++++
 arch/arm64/include/asm/kvm_host.h    |  8 ++++++++
 include/uapi/linux/kvm.h             |  7 +++++++
 virt/kvm/arm/arm.c                   | 21 +++++++++++++++++++++
 virt/kvm/arm/mmio.c                  |  9 ++++++++-
 9 files changed, 96 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 4833904d32a5..7403f15657c2 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -4468,6 +4468,39 @@ Hyper-V SynIC state change. Notification is used to remap SynIC
 event/message pages and to enable/disable SynIC messages/events processing
 in userspace.
 
+		/* KVM_EXIT_ARM_NISV */
+		struct {
+			__u64 esr_iss;
+			__u64 fault_ipa;
+		} arm_nisv;
+
+Used on arm and arm64 systems. If a guest accesses memory not in a memslot,
+KVM will typically return to userspace and ask it to do MMIO emulation on its
+behalf. However, for certain classes of instructions, no instruction decode
+(direction, length of memory access) is provided, and fetching and decoding
+the instruction from the VM is overly complicated to live in the kernel.
+
+Historically, when this situation occurred, KVM would print a warning and kill
+the VM. KVM assumed that if the guest accessed non-memslot memory, it was
+trying to do I/O, which just couldn't be emulated, and the warning message was
+phrased accordingly. However, what happened more often was that a guest bug
+caused access outside the guest memory areas which should lead to a more
+meaningful warning message and an external abort in the guest, if the access
+did not fall within an I/O window.
+
+Userspace implementations can query for KVM_CAP_ARM_NISV_TO_USER, and enable
+this capability at VM creation. Once this is done, these types of errors will
+instead return to userspace with KVM_EXIT_ARM_NISV, with the valid bits from
+the HSR (arm) and ESR_EL2 (arm64) in the esr_iss field, and the faulting IPA
+in the fault_ipa field. Userspace can either fix up the access if it's
+actually an I/O access by decoding the instruction from guest memory (if it's
+very brave) and continue executing the guest, or it can decide to suspend,
+dump, or restart the guest.
+
+Note that KVM does not skip the faulting instruction as it does for
+KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
+if it decides to decode and emulate the instruction.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 0125aa059d5b..9c04bd810d07 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -162,6 +162,7 @@
 #define HSR_ISV		(_AC(1, UL) << HSR_ISV_SHIFT)
 #define HSR_SRT_SHIFT	(16)
 #define HSR_SRT_MASK	(0xf << HSR_SRT_SHIFT)
+#define HSR_CM		(1 << 8)
 #define HSR_FSC		(0x3f)
 #define HSR_FSC_TYPE	(0x3c)
 #define HSR_SSE		(1 << 21)
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 40002416efec..e8ef349c04b4 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -167,6 +167,11 @@ static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
 }
 
+static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & (HSR_CM | HSR_WNR | HSR_FSC);
+}
+
 static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_WNR;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a37c8e89777..19a92c49039c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -76,6 +76,14 @@ struct kvm_arch {
 
 	/* Mandated version of PSCI */
 	u32 psci_version;
+
+	/*
+	 * If we encounter a data abort without valid instruction syndrome
+	 * information, report this to user space.  User space can (and
+	 * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
+	 * supported.
+	 */
+	bool return_nisv_io_abort_to_user;
 };
 
 #define KVM_NR_MEM_OBJS     40
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index d69c1efc63e7..a3c967988e1d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -258,6 +258,11 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
 }
 
+static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
+}
+
 static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
 {
 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f656169db8c3..019bc560edc1 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -83,6 +83,14 @@ struct kvm_arch {
 
 	/* Mandated version of PSCI */
 	u32 psci_version;
+
+	/*
+	 * If we encounter a data abort without valid instruction syndrome
+	 * information, report this to user space.  User space can (and
+	 * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
+	 * supported.
+	 */
+	bool return_nisv_io_abort_to_user;
 };
 
 #define KVM_NR_MEM_OBJS     40
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 52641d8ca9e8..7336ee8d98d7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -235,6 +235,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_S390_STSI        25
 #define KVM_EXIT_IOAPIC_EOI       26
 #define KVM_EXIT_HYPERV           27
+#define KVM_EXIT_ARM_NISV         28
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -394,6 +395,11 @@ struct kvm_run {
 		} eoi;
 		/* KVM_EXIT_HYPERV */
 		struct kvm_hyperv_exit hyperv;
+		/* KVM_EXIT_ARM_NISV */
+		struct {
+			__u64 esr_iss;
+			__u64 fault_ipa;
+		} arm_nisv;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -1000,6 +1006,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PMU_EVENT_FILTER 173
 #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
 #define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
+#define KVM_CAP_ARM_NISV_TO_USER 176
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 86c6aa1cb58e..e6d56f60e4b6 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -98,6 +98,26 @@ int kvm_arch_check_processor_compat(void)
 	return 0;
 }
 
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+			    struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_ARM_NISV_TO_USER:
+		r = 0;
+		kvm->arch.return_nisv_io_abort_to_user = true;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
 
 /**
  * kvm_arch_init_vm - initializes a VM data structure
@@ -197,6 +217,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_IMMEDIATE_EXIT:
 	case KVM_CAP_VCPU_EVENTS:
 	case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
+	case KVM_CAP_ARM_NISV_TO_USER:
 		r = 1;
 		break;
 	case KVM_CAP_ARM_SET_DEVICE_ADDR:
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index 6af5c91337f2..70d3b449692c 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -167,7 +167,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		if (ret)
 			return ret;
 	} else {
-		kvm_err("load/store instruction decoding not implemented\n");
+		if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {
+			run->exit_reason = KVM_EXIT_ARM_NISV;
+			run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu);
+			run->arm_nisv.fault_ipa = fault_ipa;
+			return 0;
+		}
+
+		kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n");
 		return -ENOSYS;
 	}
 
-- 
cgit v1.2.3


From da345174ceca052469e4775e4ae263b5f27a9355 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Fri, 11 Oct 2019 13:07:06 +0200
Subject: KVM: arm/arm64: Allow user injection of external data aborts

In some scenarios, such as buggy guest or incorrect configuration of the
VMM and firmware description data, userspace will detect a memory access
to a portion of the IPA, which is not mapped to any MMIO region.

For this purpose, the appropriate action is to inject an external abort
to the guest.  The kernel already has functionality to inject an
external abort, but we need to wire up a signal from user space that
lets user space tell the kernel to do this.

It turns out, we already have the set event functionality which we can
perfectly reuse for this.

Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 Documentation/virt/kvm/api.txt    | 22 +++++++++++++++++++++-
 arch/arm/include/uapi/asm/kvm.h   |  3 ++-
 arch/arm/kvm/guest.c              | 10 ++++++++++
 arch/arm64/include/uapi/asm/kvm.h |  3 ++-
 arch/arm64/kvm/guest.c            | 10 ++++++++++
 arch/arm64/kvm/inject_fault.c     |  4 ++--
 include/uapi/linux/kvm.h          |  1 +
 virt/kvm/arm/arm.c                |  1 +
 8 files changed, 49 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 7403f15657c2..bd29d44af32b 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -1002,12 +1002,18 @@ Specifying exception.has_esr on a system that does not support it will return
 -EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
 will return -EINVAL.
 
+It is not possible to read back a pending external abort (injected via
+KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered
+directly to the virtual CPU).
+
+
 struct kvm_vcpu_events {
 	struct {
 		__u8 serror_pending;
 		__u8 serror_has_esr;
+		__u8 ext_dabt_pending;
 		/* Align it to 8 bytes */
-		__u8 pad[6];
+		__u8 pad[5];
 		__u64 serror_esr;
 	} exception;
 	__u32 reserved[12];
@@ -1051,9 +1057,23 @@ contain a valid state and shall be written into the VCPU.
 
 ARM/ARM64:
 
+User space may need to inject several types of events to the guest.
+
 Set the pending SError exception state for this VCPU. It is not possible to
 'cancel' an Serror that has been made pending.
 
+If the guest performed an access to I/O memory which could not be handled by
+userspace, for example because of missing instruction syndrome decode
+information or because there is no device mapped at the accessed IPA, then
+userspace can ask the kernel to inject an external abort using the address
+from the exiting fault on the VCPU. It is a programming error to set
+ext_dabt_pending after an exit which was not either KVM_EXIT_MMIO or
+KVM_EXIT_ARM_NISV. This feature is only available if the system supports
+KVM_CAP_ARM_INJECT_EXT_DABT. This is a helper which provides commonality in
+how userspace reports accesses for the above cases to guests, across different
+userspace implementations. Nevertheless, userspace can still emulate all Arm
+exceptions by manipulating individual registers using the KVM_SET_ONE_REG API.
+
 See KVM_GET_VCPU_EVENTS for the data structure.
 
 
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 2769360f195c..03cd7c19a683 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -131,8 +131,9 @@ struct kvm_vcpu_events {
 	struct {
 		__u8 serror_pending;
 		__u8 serror_has_esr;
+		__u8 ext_dabt_pending;
 		/* Align it to 8 bytes */
-		__u8 pad[6];
+		__u8 pad[5];
 		__u64 serror_esr;
 	} exception;
 	__u32 reserved[12];
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 684cf64b4033..735f9b007e58 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -255,6 +255,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
 {
 	events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
 
+	/*
+	 * We never return a pending ext_dabt here because we deliver it to
+	 * the virtual CPU directly when setting the event and it's no longer
+	 * 'pending' at this point.
+	 */
+
 	return 0;
 }
 
@@ -263,12 +269,16 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 {
 	bool serror_pending = events->exception.serror_pending;
 	bool has_esr = events->exception.serror_has_esr;
+	bool ext_dabt_pending = events->exception.ext_dabt_pending;
 
 	if (serror_pending && has_esr)
 		return -EINVAL;
 	else if (serror_pending)
 		kvm_inject_vabt(vcpu);
 
+	if (ext_dabt_pending)
+		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
 	return 0;
 }
 
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 67c21f9bdbad..d49c17a80491 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -164,8 +164,9 @@ struct kvm_vcpu_events {
 	struct {
 		__u8 serror_pending;
 		__u8 serror_has_esr;
+		__u8 ext_dabt_pending;
 		/* Align it to 8 bytes */
-		__u8 pad[6];
+		__u8 pad[5];
 		__u64 serror_esr;
 	} exception;
 	__u32 reserved[12];
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index dfd626447482..ca613a44c6ec 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -712,6 +712,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
 	if (events->exception.serror_pending && events->exception.serror_has_esr)
 		events->exception.serror_esr = vcpu_get_vsesr(vcpu);
 
+	/*
+	 * We never return a pending ext_dabt here because we deliver it to
+	 * the virtual CPU directly when setting the event and it's no longer
+	 * 'pending' at this point.
+	 */
+
 	return 0;
 }
 
@@ -720,6 +726,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 {
 	bool serror_pending = events->exception.serror_pending;
 	bool has_esr = events->exception.serror_has_esr;
+	bool ext_dabt_pending = events->exception.ext_dabt_pending;
 
 	if (serror_pending && has_esr) {
 		if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
@@ -733,6 +740,9 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 		kvm_inject_vabt(vcpu);
 	}
 
+	if (ext_dabt_pending)
+		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
 	return 0;
 }
 
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index a9d25a305af5..ccdb6a051ab2 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -109,7 +109,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
 
 /**
  * kvm_inject_dabt - inject a data abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the data abort
  * @addr: The address to report in the DFAR
  *
  * It is assumed that this code is called from the VCPU thread and that the
@@ -125,7 +125,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 
 /**
  * kvm_inject_pabt - inject a prefetch abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the prefetch abort
  * @addr: The address to report in the DFAR
  *
  * It is assumed that this code is called from the VCPU thread and that the
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7336ee8d98d7..65db5a4257ec 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1007,6 +1007,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
 #define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
 #define KVM_CAP_ARM_NISV_TO_USER 176
+#define KVM_CAP_ARM_INJECT_EXT_DABT 177
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index e6d56f60e4b6..12064780f1d8 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -218,6 +218,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_VCPU_EVENTS:
 	case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
 	case KVM_CAP_ARM_NISV_TO_USER:
+	case KVM_CAP_ARM_INJECT_EXT_DABT:
 		r = 1;
 		break;
 	case KVM_CAP_ARM_SET_DEVICE_ADDR:
-- 
cgit v1.2.3


From 55009c6ed2d24fc0f5521ab2482f145d269389ea Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 21 Oct 2019 16:28:15 +0100
Subject: KVM: arm/arm64: Factor out hypercall handling from PSCI code

We currently intertwine the KVM PSCI implementation with the general
dispatch of hypercall handling, which makes perfect sense because PSCI
is the only category of hypercalls we support.

However, as we are about to support additional hypercalls, factor out
this functionality into a separate hypercall handler file.

Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
[steven.price@arm.com: rebased]
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm/kvm/Makefile        |  2 +-
 arch/arm/kvm/handle_exit.c   |  2 +-
 arch/arm64/kvm/Makefile      |  1 +
 arch/arm64/kvm/handle_exit.c |  4 +--
 include/Kbuild               |  2 ++
 include/kvm/arm_hypercalls.h | 43 +++++++++++++++++++++++
 include/kvm/arm_psci.h       |  2 +-
 virt/kvm/arm/hypercalls.c    | 59 +++++++++++++++++++++++++++++++
 virt/kvm/arm/psci.c          | 84 ++------------------------------------------
 9 files changed, 112 insertions(+), 87 deletions(-)
 create mode 100644 include/kvm/arm_hypercalls.h
 create mode 100644 virt/kvm/arm/hypercalls.c

(limited to 'arch')

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index b76b75bd9e00..e442d82821df 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o
 obj-y += handle_exit.o guest.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o   vgic-v3-coproc.o
 obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
-obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o
 obj-y += $(KVM)/arm/aarch32.o
 
 obj-y += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 2a6a1394d26e..e58a89d2f13f 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -9,7 +9,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_mmu.h>
-#include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
 #include <trace/events/kvm.h>
 
 #include "trace.h"
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 3ac1a64d2fb9..73dce4d47d47 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 706cca23f0d2..aacfc55de44c 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -11,8 +11,6 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 
-#include <kvm/arm_psci.h>
-
 #include <asm/esr.h>
 #include <asm/exception.h>
 #include <asm/kvm_asm.h>
@@ -22,6 +20,8 @@
 #include <asm/debug-monitors.h>
 #include <asm/traps.h>
 
+#include <kvm/arm_hypercalls.h>
+
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
diff --git a/include/Kbuild b/include/Kbuild
index ffba79483cc5..e8154f8bcac5 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -67,6 +67,8 @@ header-test-			+= keys/big_key-type.h
 header-test-			+= keys/request_key_auth-type.h
 header-test-			+= keys/trusted.h
 header-test-			+= kvm/arm_arch_timer.h
+header-test-$(CONFIG_ARM)	+= kvm/arm_hypercalls.h
+header-test-$(CONFIG_ARM64)	+= kvm/arm_hypercalls.h
 header-test-			+= kvm/arm_pmu.h
 header-test-$(CONFIG_ARM)	+= kvm/arm_psci.h
 header-test-$(CONFIG_ARM64)	+= kvm/arm_psci.h
diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h
new file mode 100644
index 000000000000..0e2509d27910
--- /dev/null
+++ b/include/kvm/arm_hypercalls.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 Arm Ltd. */
+
+#ifndef __KVM_ARM_HYPERCALLS_H
+#define __KVM_ARM_HYPERCALLS_H
+
+#include <asm/kvm_emulate.h>
+
+int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
+
+static inline u32 smccc_get_function(struct kvm_vcpu *vcpu)
+{
+	return vcpu_get_reg(vcpu, 0);
+}
+
+static inline unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu)
+{
+	return vcpu_get_reg(vcpu, 1);
+}
+
+static inline unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu)
+{
+	return vcpu_get_reg(vcpu, 2);
+}
+
+static inline unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu)
+{
+	return vcpu_get_reg(vcpu, 3);
+}
+
+static inline void smccc_set_retval(struct kvm_vcpu *vcpu,
+				    unsigned long a0,
+				    unsigned long a1,
+				    unsigned long a2,
+				    unsigned long a3)
+{
+	vcpu_set_reg(vcpu, 0, a0);
+	vcpu_set_reg(vcpu, 1, a1);
+	vcpu_set_reg(vcpu, 2, a2);
+	vcpu_set_reg(vcpu, 3, a3);
+}
+
+#endif
diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h
index 632e78bdef4d..5b58bd2fe088 100644
--- a/include/kvm/arm_psci.h
+++ b/include/kvm/arm_psci.h
@@ -40,7 +40,7 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm)
 }
 
 
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
 
 struct kvm_one_reg;
 
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
new file mode 100644
index 000000000000..f875241bd030
--- /dev/null
+++ b/virt/kvm/arm/hypercalls.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Arm Ltd.
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_psci.h>
+
+int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+{
+	u32 func_id = smccc_get_function(vcpu);
+	u32 val = SMCCC_RET_NOT_SUPPORTED;
+	u32 feature;
+
+	switch (func_id) {
+	case ARM_SMCCC_VERSION_FUNC_ID:
+		val = ARM_SMCCC_VERSION_1_1;
+		break;
+	case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
+		feature = smccc_get_arg1(vcpu);
+		switch (feature) {
+		case ARM_SMCCC_ARCH_WORKAROUND_1:
+			switch (kvm_arm_harden_branch_predictor()) {
+			case KVM_BP_HARDEN_UNKNOWN:
+				break;
+			case KVM_BP_HARDEN_WA_NEEDED:
+				val = SMCCC_RET_SUCCESS;
+				break;
+			case KVM_BP_HARDEN_NOT_REQUIRED:
+				val = SMCCC_RET_NOT_REQUIRED;
+				break;
+			}
+			break;
+		case ARM_SMCCC_ARCH_WORKAROUND_2:
+			switch (kvm_arm_have_ssbd()) {
+			case KVM_SSBD_FORCE_DISABLE:
+			case KVM_SSBD_UNKNOWN:
+				break;
+			case KVM_SSBD_KERNEL:
+				val = SMCCC_RET_SUCCESS;
+				break;
+			case KVM_SSBD_FORCE_ENABLE:
+			case KVM_SSBD_MITIGATED:
+				val = SMCCC_RET_NOT_REQUIRED;
+				break;
+			}
+			break;
+		}
+		break;
+	default:
+		return kvm_psci_call(vcpu);
+	}
+
+	smccc_set_retval(vcpu, val, 0, 0, 0);
+	return 1;
+}
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index 87927f7e1ee7..17e2bdd4b76f 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -15,6 +15,7 @@
 #include <asm/kvm_host.h>
 
 #include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
 
 /*
  * This is an implementation of the Power State Coordination Interface
@@ -23,38 +24,6 @@
 
 #define AFFINITY_MASK(level)	~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
 
-static u32 smccc_get_function(struct kvm_vcpu *vcpu)
-{
-	return vcpu_get_reg(vcpu, 0);
-}
-
-static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu)
-{
-	return vcpu_get_reg(vcpu, 1);
-}
-
-static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu)
-{
-	return vcpu_get_reg(vcpu, 2);
-}
-
-static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu)
-{
-	return vcpu_get_reg(vcpu, 3);
-}
-
-static void smccc_set_retval(struct kvm_vcpu *vcpu,
-			     unsigned long a0,
-			     unsigned long a1,
-			     unsigned long a2,
-			     unsigned long a3)
-{
-	vcpu_set_reg(vcpu, 0, a0);
-	vcpu_set_reg(vcpu, 1, a1);
-	vcpu_set_reg(vcpu, 2, a2);
-	vcpu_set_reg(vcpu, 3, a3);
-}
-
 static unsigned long psci_affinity_mask(unsigned long affinity_level)
 {
 	if (affinity_level <= 3)
@@ -373,7 +342,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
  * Errors:
  * -EINVAL: Unrecognized PSCI function
  */
-static int kvm_psci_call(struct kvm_vcpu *vcpu)
+int kvm_psci_call(struct kvm_vcpu *vcpu)
 {
 	switch (kvm_psci_version(vcpu, vcpu->kvm)) {
 	case KVM_ARM_PSCI_1_0:
@@ -387,55 +356,6 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu)
 	};
 }
 
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
-{
-	u32 func_id = smccc_get_function(vcpu);
-	u32 val = SMCCC_RET_NOT_SUPPORTED;
-	u32 feature;
-
-	switch (func_id) {
-	case ARM_SMCCC_VERSION_FUNC_ID:
-		val = ARM_SMCCC_VERSION_1_1;
-		break;
-	case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
-		feature = smccc_get_arg1(vcpu);
-		switch(feature) {
-		case ARM_SMCCC_ARCH_WORKAROUND_1:
-			switch (kvm_arm_harden_branch_predictor()) {
-			case KVM_BP_HARDEN_UNKNOWN:
-				break;
-			case KVM_BP_HARDEN_WA_NEEDED:
-				val = SMCCC_RET_SUCCESS;
-				break;
-			case KVM_BP_HARDEN_NOT_REQUIRED:
-				val = SMCCC_RET_NOT_REQUIRED;
-				break;
-			}
-			break;
-		case ARM_SMCCC_ARCH_WORKAROUND_2:
-			switch (kvm_arm_have_ssbd()) {
-			case KVM_SSBD_FORCE_DISABLE:
-			case KVM_SSBD_UNKNOWN:
-				break;
-			case KVM_SSBD_KERNEL:
-				val = SMCCC_RET_SUCCESS;
-				break;
-			case KVM_SSBD_FORCE_ENABLE:
-			case KVM_SSBD_MITIGATED:
-				val = SMCCC_RET_NOT_REQUIRED;
-				break;
-			}
-			break;
-		}
-		break;
-	default:
-		return kvm_psci_call(vcpu);
-	}
-
-	smccc_set_retval(vcpu, val, 0, 0, 0);
-	return 1;
-}
-
 int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
 {
 	return 3;		/* PSCI version and two workaround registers */
-- 
cgit v1.2.3


From b48c1a45a190898103cec28771efc399fd65a05a Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Oct 2019 16:28:16 +0100
Subject: KVM: arm64: Implement PV_TIME_FEATURES call

This provides a mechanism for querying which paravirtualized time
features are available in this hypervisor.

Also add the header file which defines the ABI for the paravirtualized
time features we're about to add.

Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm/include/asm/kvm_host.h      |  6 ++++++
 arch/arm64/include/asm/kvm_host.h    |  2 ++
 arch/arm64/include/asm/pvclock-abi.h | 17 +++++++++++++++++
 arch/arm64/kvm/Makefile              |  1 +
 include/linux/arm-smccc.h            | 14 ++++++++++++++
 virt/kvm/arm/hypercalls.c            |  8 +++++++-
 virt/kvm/arm/pvtime.c                | 20 ++++++++++++++++++++
 7 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/include/asm/pvclock-abi.h
 create mode 100644 virt/kvm/arm/pvtime.c

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a37c8e89777..5a0c3569ebde 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -7,6 +7,7 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <linux/arm-smccc.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kvm_types.h>
@@ -323,6 +324,11 @@ static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
+{
+	return SMCCC_RET_NOT_SUPPORTED;
+}
+
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f656169db8c3..93b46d9526d0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -478,6 +478,8 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
+
 void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
diff --git a/arch/arm64/include/asm/pvclock-abi.h b/arch/arm64/include/asm/pvclock-abi.h
new file mode 100644
index 000000000000..c4f1c0a0789c
--- /dev/null
+++ b/arch/arm64/include/asm/pvclock-abi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 Arm Ltd. */
+
+#ifndef __ASM_PVCLOCK_ABI_H
+#define __ASM_PVCLOCK_ABI_H
+
+/* The below structure is defined in ARM DEN0057A */
+
+struct pvclock_vcpu_stolen_time {
+	__le32 revision;
+	__le32 attributes;
+	__le64 stolen_time;
+	/* Structure must be 64 byte aligned, pad to that size */
+	u8 padding[48];
+} __packed;
+
+#endif
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 73dce4d47d47..5ffbdc39e780 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -14,6 +14,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index df01a8579034..92e0046ce7a7 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -45,6 +45,7 @@
 #define ARM_SMCCC_OWNER_SIP		2
 #define ARM_SMCCC_OWNER_OEM		3
 #define ARM_SMCCC_OWNER_STANDARD	4
+#define ARM_SMCCC_OWNER_STANDARD_HYP	5
 #define ARM_SMCCC_OWNER_TRUSTED_APP	48
 #define ARM_SMCCC_OWNER_TRUSTED_APP_END	49
 #define ARM_SMCCC_OWNER_TRUSTED_OS	50
@@ -318,5 +319,18 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
 #define SMCCC_RET_NOT_SUPPORTED			-1
 #define SMCCC_RET_NOT_REQUIRED			-2
 
+/* Paravirtualised time calls (defined by ARM DEN0057A) */
+#define ARM_SMCCC_HV_PV_TIME_FEATURES				\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,			\
+			   ARM_SMCCC_SMC_64,			\
+			   ARM_SMCCC_OWNER_STANDARD_HYP,	\
+			   0x20)
+
+#define ARM_SMCCC_HV_PV_TIME_ST					\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,			\
+			   ARM_SMCCC_SMC_64,			\
+			   ARM_SMCCC_OWNER_STANDARD_HYP,	\
+			   0x21)
+
 #endif /*__ASSEMBLY__*/
 #endif /*__LINUX_ARM_SMCCC_H*/
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
index f875241bd030..97ea8b133e77 100644
--- a/virt/kvm/arm/hypercalls.c
+++ b/virt/kvm/arm/hypercalls.c
@@ -12,7 +12,7 @@
 int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 {
 	u32 func_id = smccc_get_function(vcpu);
-	u32 val = SMCCC_RET_NOT_SUPPORTED;
+	long val = SMCCC_RET_NOT_SUPPORTED;
 	u32 feature;
 
 	switch (func_id) {
@@ -48,8 +48,14 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 				break;
 			}
 			break;
+		case ARM_SMCCC_HV_PV_TIME_FEATURES:
+			val = SMCCC_RET_SUCCESS;
+			break;
 		}
 		break;
+	case ARM_SMCCC_HV_PV_TIME_FEATURES:
+		val = kvm_hypercall_pv_features(vcpu);
+		break;
 	default:
 		return kvm_psci_call(vcpu);
 	}
diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
new file mode 100644
index 000000000000..9fc69fc2d683
--- /dev/null
+++ b/virt/kvm/arm/pvtime.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Arm Ltd.
+
+#include <linux/arm-smccc.h>
+
+#include <kvm/arm_hypercalls.h>
+
+long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
+{
+	u32 feature = smccc_get_arg1(vcpu);
+	long val = SMCCC_RET_NOT_SUPPORTED;
+
+	switch (feature) {
+	case ARM_SMCCC_HV_PV_TIME_FEATURES:
+		val = SMCCC_RET_SUCCESS;
+		break;
+	}
+
+	return val;
+}
-- 
cgit v1.2.3


From 8564d6372a7d8a6d440441b8ed8020f97f744450 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Oct 2019 16:28:18 +0100
Subject: KVM: arm64: Support stolen time reporting via shared structure

Implement the service call for configuring a shared structure between a
VCPU and the hypervisor in which the hypervisor can write the time
stolen from the VCPU's execution time by other tasks on the host.

User space allocates memory which is placed at an IPA also chosen by user
space. The hypervisor then updates the shared structure using
kvm_put_guest() to ensure single copy atomicity of the 64-bit value
reporting the stolen time in nanoseconds.

Whenever stolen time is enabled by the guest, the stolen time counter is
reset.

The stolen time itself is retrieved from the sched_info structure
maintained by the Linux scheduler code. We enable SCHEDSTATS when
selecting KVM Kconfig to ensure this value is meaningful.

Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm/include/asm/kvm_host.h   | 19 ++++++++++++++
 arch/arm64/include/asm/kvm_host.h | 20 +++++++++++++++
 arch/arm64/kvm/Kconfig            |  1 +
 include/linux/kvm_types.h         |  2 ++
 virt/kvm/arm/arm.c                | 11 +++++++++
 virt/kvm/arm/hypercalls.c         |  6 +++++
 virt/kvm/arm/pvtime.c             | 52 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 111 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 5a0c3569ebde..5a077f85813f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -39,6 +39,7 @@
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
@@ -329,6 +330,24 @@ static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
 	return SMCCC_RET_NOT_SUPPORTED;
 }
 
+static inline gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
+{
+	return GPA_INVALID;
+}
+
+static inline void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
+{
+}
+
+static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+	return false;
+}
+
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 93b46d9526d0..75ef37f79633 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
@@ -338,6 +339,13 @@ struct kvm_vcpu_arch {
 	/* True when deferrable sysregs are loaded on the physical CPU,
 	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
 	bool sysregs_loaded_on_cpu;
+
+	/* Guest PV state */
+	struct {
+		u64 steal;
+		u64 last_steal;
+		gpa_t base;
+	} steal;
 };
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -479,6 +487,18 @@ int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
 long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
+gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
+void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
+
+static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
+{
+	vcpu_arch->steal.base = GPA_INVALID;
+}
+
+static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+	return (vcpu_arch->steal.base != GPA_INVALID);
+}
 
 void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
 
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a67121d419a2..d8b88e40d223 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -39,6 +39,7 @@ config KVM
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_VCPU_RUN_PID_CHANGE
+	select SCHEDSTATS
 	---help---
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index bde5374ae021..1c88e69db3d9 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -35,6 +35,8 @@ typedef unsigned long  gva_t;
 typedef u64            gpa_t;
 typedef u64            gfn_t;
 
+#define GPA_INVALID	(~(gpa_t)0)
+
 typedef unsigned long  hva_t;
 typedef u64            hpa_t;
 typedef u64            hfn_t;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 86c6aa1cb58e..2aba375dfd13 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -40,6 +40,10 @@
 #include <asm/kvm_coproc.h>
 #include <asm/sections.h>
 
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_pmu.h>
+#include <kvm/arm_psci.h>
+
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
 #endif
@@ -351,6 +355,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	kvm_arm_reset_debug_ptr(vcpu);
 
+	kvm_arm_pvtime_vcpu_init(&vcpu->arch);
+
 	return kvm_vgic_vcpu_init(vcpu);
 }
 
@@ -380,6 +386,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	kvm_vcpu_load_sysregs(vcpu);
 	kvm_arch_vcpu_load_fp(vcpu);
 	kvm_vcpu_pmu_restore_guest(vcpu);
+	if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
+		kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
 
 	if (single_task_running())
 		vcpu_clear_wfe_traps(vcpu);
@@ -645,6 +653,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
 		 * that a VCPU sees new virtual interrupts.
 		 */
 		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+
+		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
+			kvm_update_stolen_time(vcpu);
 	}
 }
 
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
index 97ea8b133e77..550dfa3e53cd 100644
--- a/virt/kvm/arm/hypercalls.c
+++ b/virt/kvm/arm/hypercalls.c
@@ -14,6 +14,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 	u32 func_id = smccc_get_function(vcpu);
 	long val = SMCCC_RET_NOT_SUPPORTED;
 	u32 feature;
+	gpa_t gpa;
 
 	switch (func_id) {
 	case ARM_SMCCC_VERSION_FUNC_ID:
@@ -56,6 +57,11 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 	case ARM_SMCCC_HV_PV_TIME_FEATURES:
 		val = kvm_hypercall_pv_features(vcpu);
 		break;
+	case ARM_SMCCC_HV_PV_TIME_ST:
+		gpa = kvm_init_stolen_time(vcpu);
+		if (gpa != GPA_INVALID)
+			val = gpa;
+		break;
 	default:
 		return kvm_psci_call(vcpu);
 	}
diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
index 9fc69fc2d683..b90b3a7bea85 100644
--- a/virt/kvm/arm/pvtime.c
+++ b/virt/kvm/arm/pvtime.c
@@ -3,8 +3,35 @@
 
 #include <linux/arm-smccc.h>
 
+#include <asm/pvclock-abi.h>
+
 #include <kvm/arm_hypercalls.h>
 
+void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	u64 steal;
+	__le64 steal_le;
+	u64 offset;
+	int idx;
+	u64 base = vcpu->arch.steal.base;
+
+	if (base == GPA_INVALID)
+		return;
+
+	/* Let's do the local bookkeeping */
+	steal = vcpu->arch.steal.steal;
+	steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
+	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+	vcpu->arch.steal.steal = steal;
+
+	steal_le = cpu_to_le64(steal);
+	idx = srcu_read_lock(&kvm->srcu);
+	offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
+	kvm_put_guest(kvm, base + offset, steal_le, u64);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
 long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
 {
 	u32 feature = smccc_get_arg1(vcpu);
@@ -12,9 +39,34 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
 
 	switch (feature) {
 	case ARM_SMCCC_HV_PV_TIME_FEATURES:
+	case ARM_SMCCC_HV_PV_TIME_ST:
 		val = SMCCC_RET_SUCCESS;
 		break;
 	}
 
 	return val;
 }
+
+gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
+{
+	struct pvclock_vcpu_stolen_time init_values = {};
+	struct kvm *kvm = vcpu->kvm;
+	u64 base = vcpu->arch.steal.base;
+	int idx;
+
+	if (base == GPA_INVALID)
+		return base;
+
+	/*
+	 * Start counting stolen time from the time the guest requests
+	 * the feature enabled.
+	 */
+	vcpu->arch.steal.steal = 0;
+	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	kvm_write_guest(kvm, base, &init_values, sizeof(init_values));
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return base;
+}
-- 
cgit v1.2.3


From 58772e9a3db72d032eeb12bc011bc5184a3925f4 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Oct 2019 16:28:20 +0100
Subject: KVM: arm64: Provide VCPU attributes for stolen time

Allow user space to inform the KVM host where in the physical memory
map the paravirtualized time structures should be located.

User space can set an attribute on the VCPU providing the IPA base
address of the stolen time structure for that VCPU. This must be
repeated for every VCPU in the VM.

The address is given in terms of the physical address visible to
the guest and must be 64 byte aligned. The guest will discover the
address via a hypercall.

Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h |  7 +++++
 arch/arm64/include/uapi/asm/kvm.h |  2 ++
 arch/arm64/kvm/guest.c            |  9 ++++++
 include/uapi/linux/kvm.h          |  2 ++
 virt/kvm/arm/pvtime.c             | 59 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 79 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 75ef37f79633..eb1f33af45aa 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -490,6 +490,13 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
 gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
 void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
 
+int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
+			    struct kvm_device_attr *attr);
+int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+			    struct kvm_device_attr *attr);
+int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
+			    struct kvm_device_attr *attr);
+
 static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
 {
 	vcpu_arch->steal.base = GPA_INVALID;
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 67c21f9bdbad..cff1ba12c768 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -323,6 +323,8 @@ struct kvm_vcpu_events {
 #define KVM_ARM_VCPU_TIMER_CTRL		1
 #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
+#define KVM_ARM_VCPU_PVTIME_CTRL	2
+#define   KVM_ARM_VCPU_PVTIME_IPA	0
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_VCPU2_SHIFT		28
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index dfd626447482..d3ac9d2fd405 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -858,6 +858,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
 	case KVM_ARM_VCPU_TIMER_CTRL:
 		ret = kvm_arm_timer_set_attr(vcpu, attr);
 		break;
+	case KVM_ARM_VCPU_PVTIME_CTRL:
+		ret = kvm_arm_pvtime_set_attr(vcpu, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -878,6 +881,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 	case KVM_ARM_VCPU_TIMER_CTRL:
 		ret = kvm_arm_timer_get_attr(vcpu, attr);
 		break;
+	case KVM_ARM_VCPU_PVTIME_CTRL:
+		ret = kvm_arm_pvtime_get_attr(vcpu, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -898,6 +904,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 	case KVM_ARM_VCPU_TIMER_CTRL:
 		ret = kvm_arm_timer_has_attr(vcpu, attr);
 		break;
+	case KVM_ARM_VCPU_PVTIME_CTRL:
+		ret = kvm_arm_pvtime_has_attr(vcpu, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 52641d8ca9e8..a540c8357049 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1227,6 +1227,8 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_ARM_VGIC_ITS	KVM_DEV_TYPE_ARM_VGIC_ITS
 	KVM_DEV_TYPE_XIVE,
 #define KVM_DEV_TYPE_XIVE		KVM_DEV_TYPE_XIVE
+	KVM_DEV_TYPE_ARM_PV_TIME,
+#define KVM_DEV_TYPE_ARM_PV_TIME	KVM_DEV_TYPE_ARM_PV_TIME
 	KVM_DEV_TYPE_MAX,
 };
 
diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
index b90b3a7bea85..1e0f4c284888 100644
--- a/virt/kvm/arm/pvtime.c
+++ b/virt/kvm/arm/pvtime.c
@@ -2,7 +2,9 @@
 // Copyright (C) 2019 Arm Ltd.
 
 #include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
 
+#include <asm/kvm_mmu.h>
 #include <asm/pvclock-abi.h>
 
 #include <kvm/arm_hypercalls.h>
@@ -70,3 +72,60 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
 
 	return base;
 }
+
+int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
+			    struct kvm_device_attr *attr)
+{
+	u64 __user *user = (u64 __user *)attr->addr;
+	struct kvm *kvm = vcpu->kvm;
+	u64 ipa;
+	int ret = 0;
+	int idx;
+
+	if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
+		return -ENXIO;
+
+	if (get_user(ipa, user))
+		return -EFAULT;
+	if (!IS_ALIGNED(ipa, 64))
+		return -EINVAL;
+	if (vcpu->arch.steal.base != GPA_INVALID)
+		return -EEXIST;
+
+	/* Check the address is in a valid memslot */
+	idx = srcu_read_lock(&kvm->srcu);
+	if (kvm_is_error_hva(gfn_to_hva(kvm, ipa >> PAGE_SHIFT)))
+		ret = -EINVAL;
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	if (!ret)
+		vcpu->arch.steal.base = ipa;
+
+	return ret;
+}
+
+int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+			    struct kvm_device_attr *attr)
+{
+	u64 __user *user = (u64 __user *)attr->addr;
+	u64 ipa;
+
+	if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
+		return -ENXIO;
+
+	ipa = vcpu->arch.steal.base;
+
+	if (put_user(ipa, user))
+		return -EFAULT;
+	return 0;
+}
+
+int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
+			    struct kvm_device_attr *attr)
+{
+	switch (attr->attr) {
+	case KVM_ARM_VCPU_PVTIME_IPA:
+		return 0;
+	}
+	return -ENXIO;
+}
-- 
cgit v1.2.3


From ce4d5ca2b9dd5d85944eb93c1bbf9eb11b7a907d Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Oct 2019 16:28:22 +0100
Subject: arm/arm64: Make use of the SMCCC 1.1 wrapper

Rather than directly choosing which function to use based on
psci_ops.conduit, use the new arm_smccc_1_1 wrapper instead.

In some cases we still need to do some operations based on the
conduit, but the code duplication is removed.

No functional change.

Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm/mm/proc-v7-bugs.c     | 13 +++----
 arch/arm64/kernel/cpu_errata.c | 81 +++++++++++++++---------------------------
 2 files changed, 34 insertions(+), 60 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 54d87506d3b5..7c90b4c615a5 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -74,12 +74,13 @@ static void cpu_v7_spectre_init(void)
 	case ARM_CPU_PART_CORTEX_A72: {
 		struct arm_smccc_res res;
 
+		arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				     ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+		if ((int)res.a0 != 0)
+			return;
+
 		switch (arm_smccc_1_1_get_conduit()) {
 		case SMCCC_CONDUIT_HVC:
-			arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-			if ((int)res.a0 != 0)
-				break;
 			per_cpu(harden_branch_predictor_fn, cpu) =
 				call_hvc_arch_workaround_1;
 			cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
@@ -87,10 +88,6 @@ static void cpu_v7_spectre_init(void)
 			break;
 
 		case SMCCC_CONDUIT_SMC:
-			arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-			if ((int)res.a0 != 0)
-				break;
 			per_cpu(harden_branch_predictor_fn, cpu) =
 				call_smc_arch_workaround_1;
 			cpu_do_switch_mm = cpu_v7_smc_switch_mm;
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9c0b011eee20..401246e095e7 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -209,40 +209,31 @@ static int detect_harden_bp_fw(void)
 	struct arm_smccc_res res;
 	u32 midr = read_cpuid_id();
 
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			     ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+	switch ((int)res.a0) {
+	case 1:
+		/* Firmware says we're just fine */
+		return 0;
+	case 0:
+		break;
+	default:
+		return -1;
+	}
+
 	switch (arm_smccc_1_1_get_conduit()) {
 	case SMCCC_CONDUIT_HVC:
-		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		switch ((int)res.a0) {
-		case 1:
-			/* Firmware says we're just fine */
-			return 0;
-		case 0:
-			cb = call_hvc_arch_workaround_1;
-			/* This is a guest, no need to patch KVM vectors */
-			smccc_start = NULL;
-			smccc_end = NULL;
-			break;
-		default:
-			return -1;
-		}
+		cb = call_hvc_arch_workaround_1;
+		/* This is a guest, no need to patch KVM vectors */
+		smccc_start = NULL;
+		smccc_end = NULL;
 		break;
 
 	case SMCCC_CONDUIT_SMC:
-		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		switch ((int)res.a0) {
-		case 1:
-			/* Firmware says we're just fine */
-			return 0;
-		case 0:
-			cb = call_smc_arch_workaround_1;
-			smccc_start = __smccc_workaround_1_smc_start;
-			smccc_end = __smccc_workaround_1_smc_end;
-			break;
-		default:
-			return -1;
-		}
+		cb = call_smc_arch_workaround_1;
+		smccc_start = __smccc_workaround_1_smc_start;
+		smccc_end = __smccc_workaround_1_smc_end;
 		break;
 
 	default:
@@ -332,6 +323,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
 
 void arm64_set_ssbd_mitigation(bool state)
 {
+	int conduit;
+
 	if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
 		pr_info_once("SSBD disabled by kernel configuration\n");
 		return;
@@ -345,19 +338,10 @@ void arm64_set_ssbd_mitigation(bool state)
 		return;
 	}
 
-	switch (arm_smccc_1_1_get_conduit()) {
-	case SMCCC_CONDUIT_HVC:
-		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
-		break;
-
-	case SMCCC_CONDUIT_SMC:
-		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
-		break;
+	conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state,
+				       NULL);
 
-	default:
-		WARN_ON_ONCE(1);
-		break;
-	}
+	WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE);
 }
 
 static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
@@ -367,6 +351,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 	bool required = true;
 	s32 val;
 	bool this_cpu_safe = false;
+	int conduit;
 
 	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
 
@@ -384,18 +369,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 		goto out_printmsg;
 	}
 
-	switch (arm_smccc_1_1_get_conduit()) {
-	case SMCCC_CONDUIT_HVC:
-		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
-		break;
+	conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				       ARM_SMCCC_ARCH_WORKAROUND_2, &res);
 
-	case SMCCC_CONDUIT_SMC:
-		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				  ARM_SMCCC_ARCH_WORKAROUND_2, &res);
-		break;
-
-	default:
+	if (conduit == SMCCC_CONDUIT_NONE) {
 		ssbd_state = ARM64_SSBD_UNKNOWN;
 		if (!this_cpu_safe)
 			__ssb_safe = false;
-- 
cgit v1.2.3


From e0685fa228fdaf386f82ac0d64b2d6f3e0ddd588 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Oct 2019 16:28:23 +0100
Subject: arm64: Retrieve stolen time as paravirtualized guest

Enable paravirtualization features when running under a hypervisor
supporting the PV_TIME_ST hypercall.

For each (v)CPU, we ask the hypervisor for the location of a shared
page which the hypervisor will use to report stolen time to us. We set
pv_time_ops to the stolen time function which simply reads the stolen
value from the shared page for a VCPU. We guarantee single-copy
atomicity using READ_ONCE which means we can also read the stolen
time for another VCPU than the currently running one while it is
potentially being updated by the hypervisor.

Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt |   6 +-
 arch/arm64/include/asm/paravirt.h               |   9 +-
 arch/arm64/kernel/paravirt.c                    | 140 ++++++++++++++++++++++++
 arch/arm64/kernel/time.c                        |   3 +
 include/linux/cpuhotplug.h                      |   1 +
 5 files changed, 155 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a84a83f8881e..19f465530e86 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3083,9 +3083,9 @@
 			[X86,PV_OPS] Disable paravirtualized VMware scheduler
 			clock and use the default one.
 
-	no-steal-acc	[X86,KVM] Disable paravirtualized steal time accounting.
-			steal time is computed, but won't influence scheduler
-			behaviour
+	no-steal-acc	[X86,KVM,ARM64] Disable paravirtualized steal time
+			accounting. steal time is computed, but won't
+			influence scheduler behaviour
 
 	nolapic		[X86-32,APIC] Do not enable or use the local APIC.
 
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
index 799d9dd6f7cc..cf3a0fd7c1a7 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -21,6 +21,13 @@ static inline u64 paravirt_steal_clock(int cpu)
 {
 	return pv_ops.time.steal_clock(cpu);
 }
-#endif
+
+int __init pv_time_init(void);
+
+#else
+
+#define pv_time_init() do {} while (0)
+
+#endif // CONFIG_PARAVIRT
 
 #endif
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 4cfed91fe256..1ef702b0be2d 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -6,13 +6,153 @@
  * Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
  */
 
+#define pr_fmt(fmt) "arm-pv: " fmt
+
+#include <linux/arm-smccc.h>
+#include <linux/cpuhotplug.h>
 #include <linux/export.h>
+#include <linux/io.h>
 #include <linux/jump_label.h>
+#include <linux/printk.h>
+#include <linux/psci.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
 #include <linux/types.h>
+
 #include <asm/paravirt.h>
+#include <asm/pvclock-abi.h>
+#include <asm/smp_plat.h>
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
 struct paravirt_patch_template pv_ops;
 EXPORT_SYMBOL_GPL(pv_ops);
+
+struct pv_time_stolen_time_region {
+	struct pvclock_vcpu_stolen_time *kaddr;
+};
+
+static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+	steal_acc = false;
+	return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
+/* return stolen time in ns by asking the hypervisor */
+static u64 pv_steal_clock(int cpu)
+{
+	struct pv_time_stolen_time_region *reg;
+
+	reg = per_cpu_ptr(&stolen_time_region, cpu);
+	if (!reg->kaddr) {
+		pr_warn_once("stolen time enabled but not configured for cpu %d\n",
+			     cpu);
+		return 0;
+	}
+
+	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+}
+
+static int stolen_time_dying_cpu(unsigned int cpu)
+{
+	struct pv_time_stolen_time_region *reg;
+
+	reg = this_cpu_ptr(&stolen_time_region);
+	if (!reg->kaddr)
+		return 0;
+
+	memunmap(reg->kaddr);
+	memset(reg, 0, sizeof(*reg));
+
+	return 0;
+}
+
+static int init_stolen_time_cpu(unsigned int cpu)
+{
+	struct pv_time_stolen_time_region *reg;
+	struct arm_smccc_res res;
+
+	reg = this_cpu_ptr(&stolen_time_region);
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+	if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
+		return -EINVAL;
+
+	reg->kaddr = memremap(res.a0,
+			      sizeof(struct pvclock_vcpu_stolen_time),
+			      MEMREMAP_WB);
+
+	if (!reg->kaddr) {
+		pr_warn("Failed to map stolen time data structure\n");
+		return -ENOMEM;
+	}
+
+	if (le32_to_cpu(reg->kaddr->revision) != 0 ||
+	    le32_to_cpu(reg->kaddr->attributes) != 0) {
+		pr_warn_once("Unexpected revision or attributes in stolen time data\n");
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static int pv_time_init_stolen_time(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING,
+				"hypervisor/arm/pvtime:starting",
+				init_stolen_time_cpu, stolen_time_dying_cpu);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static bool has_pv_steal_clock(void)
+{
+	struct arm_smccc_res res;
+
+	/* To detect the presence of PV time support we require SMCCC 1.1+ */
+	if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+		return false;
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			     ARM_SMCCC_HV_PV_TIME_FEATURES, &res);
+
+	if (res.a0 != SMCCC_RET_SUCCESS)
+		return false;
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_FEATURES,
+			     ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+	return (res.a0 == SMCCC_RET_SUCCESS);
+}
+
+int __init pv_time_init(void)
+{
+	int ret;
+
+	if (!has_pv_steal_clock())
+		return 0;
+
+	ret = pv_time_init_stolen_time();
+	if (ret)
+		return ret;
+
+	pv_ops.time.steal_clock = pv_steal_clock;
+
+	static_key_slow_inc(&paravirt_steal_enabled);
+	if (steal_acc)
+		static_key_slow_inc(&paravirt_steal_rq_enabled);
+
+	pr_info("using stolen time PV\n");
+
+	return 0;
+}
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 0b2946414dc9..73f06d4b3aae 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -30,6 +30,7 @@
 
 #include <asm/thread_info.h>
 #include <asm/stacktrace.h>
+#include <asm/paravirt.h>
 
 unsigned long profile_pc(struct pt_regs *regs)
 {
@@ -65,4 +66,6 @@ void __init time_init(void)
 
 	/* Calibrate the delay loop directly */
 	lpj_fine = arch_timer_rate / HZ;
+
+	pv_time_init();
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 068793a619ca..89d75edb5750 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -136,6 +136,7 @@ enum cpuhp_state {
 	/* Must be the last timer callback */
 	CPUHP_AP_DUMMY_TIMER_STARTING,
 	CPUHP_AP_ARM_XEN_STARTING,
+	CPUHP_AP_ARM_KVMPV_STARTING,
 	CPUHP_AP_ARM_CORESIGHT_STARTING,
 	CPUHP_AP_ARM64_ISNDEP_STARTING,
 	CPUHP_AP_SMPCFD_DYING,
-- 
cgit v1.2.3


From 258ed7d02843052d127df2264c8b342276ced18a Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leonardo@linux.ibm.com>
Date: Mon, 23 Sep 2019 18:30:23 -0300
Subject: KVM: PPC: Reduce calls to get current->mm by storing the value
 locally

Reduces the number of calls to get_current() in order to get the value of
current->mm by doing it once and storing the value, since it is not
supposed to change inside the same process).

Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 9a75f0e1933b..f2b9aea43216 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -508,6 +508,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	struct vm_area_struct *vma;
 	unsigned long rcbits;
 	long mmio_update;
+	struct mm_struct *mm;
 
 	if (kvm_is_radix(kvm))
 		return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
@@ -584,6 +585,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	is_ci = false;
 	pfn = 0;
 	page = NULL;
+	mm = current->mm;
 	pte_size = PAGE_SIZE;
 	writing = (dsisr & DSISR_ISSTORE) != 0;
 	/* If writing != 0, then the HPTE must allow writing, if we get here */
@@ -592,8 +594,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
 	if (npages < 1) {
 		/* Check if it's an I/O mapping */
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma(current->mm, hva);
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, hva);
 		if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
 		    (vma->vm_flags & VM_PFNMAP)) {
 			pfn = vma->vm_pgoff +
@@ -602,7 +604,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
 			write_ok = vma->vm_flags & VM_WRITE;
 		}
-		up_read(&current->mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 		if (!pfn)
 			goto out_put;
 	} else {
@@ -621,8 +623,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * hugepage split and collapse.
 			 */
 			local_irq_save(flags);
-			ptep = find_current_mm_pte(current->mm->pgd,
-						   hva, NULL, NULL);
+			ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
 			if (ptep) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
 				if (__pte_write(pte))
-- 
cgit v1.2.3


From f41c4989c8de1fa70aafe950abaf80c56a8b8712 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leonardo@linux.ibm.com>
Date: Mon, 23 Sep 2019 18:24:08 -0300
Subject: KVM: PPC: E500: Replace current->mm by kvm->mm

Given that in kvm_create_vm() there is:
kvm->mm = current->mm;

And that on every kvm_*_ioctl we have:
if (kvm->mm != current->mm)
	return -EIO;

I see no reason to keep using current->mm instead of kvm->mm.

By doing so, we would reduce the use of 'global' variables on code, relying
more in the contents of kvm struct.

Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/e500_mmu_host.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 321db0fdb9db..425d13806645 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -355,9 +355,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 	if (tlbsel == 1) {
 		struct vm_area_struct *vma;
-		down_read(&current->mm->mmap_sem);
+		down_read(&kvm->mm->mmap_sem);
 
-		vma = find_vma(current->mm, hva);
+		vma = find_vma(kvm->mm, hva);
 		if (vma && hva >= vma->vm_start &&
 		    (vma->vm_flags & VM_PFNMAP)) {
 			/*
@@ -441,7 +441,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 			tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
 		}
 
-		up_read(&current->mm->mmap_sem);
+		up_read(&kvm->mm->mmap_sem);
 	}
 
 	if (likely(!pfnmap)) {
-- 
cgit v1.2.3


From e7d71c943040c23f2fd042033d319f56e84f845b Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 27 Sep 2019 13:53:38 +0200
Subject: KVM: PPC: Book3S HV: XIVE: Set kvm->arch.xive when VPs are allocated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we cannot allocate the XIVE VPs in OPAL, the creation of a XIVE or
XICS-on-XIVE device is aborted as expected, but we leave kvm->arch.xive
set forever since the release method isn't called in this case. Any
subsequent tentative to create a XIVE or XICS-on-XIVE for this VM will
thus always fail (DoS). This is a problem for QEMU since it destroys
and re-creates these devices when the VM is reset: the VM would be
restricted to using the much slower emulated XIVE or XICS forever.

As an alternative to adding rollback, do not assign kvm->arch.xive before
making sure the XIVE VPs are allocated in OPAL.

Cc: stable@vger.kernel.org # v5.2
Fixes: 5422e95103cf ("KVM: PPC: Book3S HV: XIVE: Replace the 'destroy' method by a 'release' method")
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_xive.c        | 11 +++++------
 arch/powerpc/kvm/book3s_xive_native.c |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index a3f9c665bb5b..baa740815b3c 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -2005,6 +2005,10 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 
 	pr_devel("Creating xive for partition\n");
 
+	/* Already there ? */
+	if (kvm->arch.xive)
+		return -EEXIST;
+
 	xive = kvmppc_xive_get_device(kvm, type);
 	if (!xive)
 		return -ENOMEM;
@@ -2014,12 +2018,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	xive->kvm = kvm;
 	mutex_init(&xive->lock);
 
-	/* Already there ? */
-	if (kvm->arch.xive)
-		ret = -EEXIST;
-	else
-		kvm->arch.xive = xive;
-
 	/* We use the default queue size set by the host */
 	xive->q_order = xive_native_default_eq_shift();
 	if (xive->q_order < PAGE_SHIFT)
@@ -2039,6 +2037,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	if (ret)
 		return ret;
 
+	kvm->arch.xive = xive;
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 78b906ffa0d2..ebb4215baf45 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -1081,7 +1081,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 	dev->private = xive;
 	xive->dev = dev;
 	xive->kvm = kvm;
-	kvm->arch.xive = xive;
 	mutex_init(&xive->mapping_lock);
 	mutex_init(&xive->lock);
 
@@ -1102,6 +1101,7 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 	if (ret)
 		return ret;
 
+	kvm->arch.xive = xive;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 8a4e7597ba1e41030189b73cd7261f4383588d1d Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 27 Sep 2019 13:53:49 +0200
Subject: KVM: PPC: Book3S HV: XIVE: Show VP id in debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Print out the VP id of each connected vCPU, this allow to see:
- the VP block base in which OPAL encodes information that may be
  useful when debugging
- the packed vCPU id which may differ from the raw vCPU id if the
  latter is >= KVM_MAX_VCPUS (2048)

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_xive.c        | 4 ++--
 arch/powerpc/kvm/book3s_xive_native.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index baa740815b3c..0b7859e40f66 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -2107,9 +2107,9 @@ static int xive_debug_show(struct seq_file *m, void *private)
 		if (!xc)
 			continue;
 
-		seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
+		seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x"
 			   " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
-			   xc->server_num, xc->cppr, xc->hw_cppr,
+			   xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr,
 			   xc->mfrr, xc->pending,
 			   xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
 
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index ebb4215baf45..43a86858390a 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -1204,8 +1204,8 @@ static int xive_native_debug_show(struct seq_file *m, void *private)
 		if (!xc)
 			continue;
 
-		seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
-			   xc->server_num,
+		seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+			   xc->server_num, xc->vp_id,
 			   vcpu->arch.xive_saved_state.nsr,
 			   vcpu->arch.xive_saved_state.cppr,
 			   vcpu->arch.xive_saved_state.ipb,
-- 
cgit v1.2.3


From 8db29ea2391cc6f5b73cc9c04b2dee4409b9fc05 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 27 Sep 2019 13:53:55 +0200
Subject: KVM: PPC: Book3S HV: XIVE: Compute the VP id in a common helper

Reduce code duplication by consolidating the checking of vCPU ids and VP
ids to a common helper used by both legacy and native XIVE KVM devices.
And explain the magic with a comment.

Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_xive.c        | 42 +++++++++++++++++++++++++++--------
 arch/powerpc/kvm/book3s_xive.h        |  1 +
 arch/powerpc/kvm/book3s_xive_native.c | 11 ++-------
 3 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 0b7859e40f66..d84da9f6ee88 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1211,6 +1211,37 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
 	vcpu->arch.xive_vcpu = NULL;
 }
 
+static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
+{
+	/* We have a block of KVM_MAX_VCPUS VPs. We just need to check
+	 * raw vCPU ids are below the expected limit for this guest's
+	 * core stride ; kvmppc_pack_vcpu_id() will pack them down to an
+	 * index that can be safely used to compute a VP id that belongs
+	 * to the VP block.
+	 */
+	return cpu < KVM_MAX_VCPUS * xive->kvm->arch.emul_smt_mode;
+}
+
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
+{
+	u32 vp_id;
+
+	if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
+		pr_devel("Out of bounds !\n");
+		return -EINVAL;
+	}
+
+	vp_id = kvmppc_xive_vp(xive, cpu);
+	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+		pr_devel("Duplicate !\n");
+		return -EEXIST;
+	}
+
+	*vp = vp_id;
+
+	return 0;
+}
+
 int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 			     struct kvm_vcpu *vcpu, u32 cpu)
 {
@@ -1229,20 +1260,13 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 		return -EPERM;
 	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
 		return -EBUSY;
-	if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
-		pr_devel("Out of bounds !\n");
-		return -EINVAL;
-	}
 
 	/* We need to synchronize with queue provisioning */
 	mutex_lock(&xive->lock);
 
-	vp_id = kvmppc_xive_vp(xive, cpu);
-	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
-		pr_devel("Duplicate !\n");
-		r = -EEXIST;
+	r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
+	if (r)
 		goto bail;
-	}
 
 	xc = kzalloc(sizeof(*xc), GFP_KERNEL);
 	if (!xc) {
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index fe3ed50e0818..90cf6ec35a68 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -296,6 +296,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
 struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
 void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
 				    struct kvmppc_xive_vcpu *xc, int irq);
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
 
 #endif /* CONFIG_KVM_XICS */
 #endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 43a86858390a..5bb480b2aafd 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -118,19 +118,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 		return -EPERM;
 	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
 		return -EBUSY;
-	if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
-		pr_devel("Out of bounds !\n");
-		return -EINVAL;
-	}
 
 	mutex_lock(&xive->lock);
 
-	vp_id = kvmppc_xive_vp(xive, server_num);
-	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
-		pr_devel("Duplicate !\n");
-		rc = -EEXIST;
+	rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
+	if (rc)
 		goto bail;
-	}
 
 	xc = kzalloc(sizeof(*xc), GFP_KERNEL);
 	if (!xc) {
-- 
cgit v1.2.3


From 062cfab7069fcb55d77ad5552f29e24178728fa2 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 27 Sep 2019 13:54:01 +0200
Subject: KVM: PPC: Book3S HV: XIVE: Make VP block size configurable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The XIVE VP is an internal structure which allow the XIVE interrupt
controller to maintain the interrupt context state of vCPUs non
dispatched on HW threads.

When a guest is started, the XIVE KVM device allocates a block of
XIVE VPs in OPAL, enough to accommodate the highest possible vCPU
id KVM_MAX_VCPU_ID (16384) packed down to KVM_MAX_VCPUS (2048).
With a guest's core stride of 8 and a threading mode of 1 (QEMU's
default), a VM must run at least 256 vCPUs to actually need such a
range of VPs.

A POWER9 system has a limited XIVE VP space : 512k and KVM is
currently wasting this HW resource with large VP allocations,
especially since a typical VM likely runs with a lot less vCPUs.

Make the size of the VP block configurable. Add an nr_servers
field to the XIVE structure and a function to set it for this
purpose.

Split VP allocation out of the device create function. Since the
VP block isn't used before the first vCPU connects to the XIVE KVM
device, allocation is now performed by kvmppc_xive_connect_vcpu().
This gives the opportunity to set nr_servers in between:

          kvmppc_xive_create() / kvmppc_xive_native_create()
                               .
                               .
                     kvmppc_xive_set_nr_servers()
                               .
                               .
    kvmppc_xive_connect_vcpu() / kvmppc_xive_native_connect_vcpu()

The connect_vcpu() functions check that the vCPU id is below nr_servers
and if it is the first vCPU they allocate the VP block. This is protected
against a concurrent update of nr_servers by kvmppc_xive_set_nr_servers()
with the xive->lock mutex.

Also, the block is allocated once for the device lifetime: nr_servers
should stay constant otherwise connect_vcpu() could generate a boggus
VP id and likely crash OPAL. It is thus forbidden to update nr_servers
once the block is allocated.

If the VP allocation fail, return ENOSPC which seems more appropriate to
report the depletion of system wide HW resource than ENOMEM or ENXIO.

A VM using a stride of 8 and 1 thread per core with 32 vCPUs would hence
only need 256 VPs instead of 2048. If the stride is set to match the number
of threads per core, this goes further down to 32.

This will be exposed to userspace by a subsequent patch.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_xive.c        | 65 ++++++++++++++++++++++++++++-------
 arch/powerpc/kvm/book3s_xive.h        |  4 +++
 arch/powerpc/kvm/book3s_xive_native.c | 18 +++-------
 3 files changed, 62 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index d84da9f6ee88..6c35b3d95986 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1213,13 +1213,13 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
 
 static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
 {
-	/* We have a block of KVM_MAX_VCPUS VPs. We just need to check
+	/* We have a block of xive->nr_servers VPs. We just need to check
 	 * raw vCPU ids are below the expected limit for this guest's
 	 * core stride ; kvmppc_pack_vcpu_id() will pack them down to an
 	 * index that can be safely used to compute a VP id that belongs
 	 * to the VP block.
 	 */
-	return cpu < KVM_MAX_VCPUS * xive->kvm->arch.emul_smt_mode;
+	return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode;
 }
 
 int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
@@ -1231,6 +1231,14 @@ int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
 		return -EINVAL;
 	}
 
+	if (xive->vp_base == XIVE_INVALID_VP) {
+		xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
+		pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
+
+		if (xive->vp_base == XIVE_INVALID_VP)
+			return -ENOSPC;
+	}
+
 	vp_id = kvmppc_xive_vp(xive, cpu);
 	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
 		pr_devel("Duplicate !\n");
@@ -1858,6 +1866,43 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	return 0;
 }
 
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
+{
+	u32 __user *ubufp = (u32 __user *) addr;
+	u32 nr_servers;
+	int rc = 0;
+
+	if (get_user(nr_servers, ubufp))
+		return -EFAULT;
+
+	pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
+
+	if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
+		return -EINVAL;
+
+	mutex_lock(&xive->lock);
+	if (xive->vp_base != XIVE_INVALID_VP)
+		/* The VP block is allocated once and freed when the device
+		 * is released. Better not allow to change its size since its
+		 * used by connect_vcpu to validate vCPU ids are valid (eg,
+		 * setting it back to a higher value could allow connect_vcpu
+		 * to come up with a VP id that goes beyond the VP block, which
+		 * is likely to cause a crash in OPAL).
+		 */
+		rc = -EBUSY;
+	else if (nr_servers > KVM_MAX_VCPUS)
+		/* We don't need more servers. Higher vCPU ids get packed
+		 * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
+		 */
+		xive->nr_servers = KVM_MAX_VCPUS;
+	else
+		xive->nr_servers = nr_servers;
+
+	mutex_unlock(&xive->lock);
+
+	return rc;
+}
+
 static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	struct kvmppc_xive *xive = dev->private;
@@ -2025,7 +2070,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 {
 	struct kvmppc_xive *xive;
 	struct kvm *kvm = dev->kvm;
-	int ret = 0;
 
 	pr_devel("Creating xive for partition\n");
 
@@ -2049,18 +2093,15 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	else
 		xive->q_page_order = xive->q_order - PAGE_SHIFT;
 
-	/* Allocate a bunch of VPs */
-	xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
-	pr_devel("VP_Base=%x\n", xive->vp_base);
-
-	if (xive->vp_base == XIVE_INVALID_VP)
-		ret = -ENOMEM;
+	/* VP allocation is delayed to the first call to connect_vcpu */
+	xive->vp_base = XIVE_INVALID_VP;
+	/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+	 * on a POWER9 system.
+	 */
+	xive->nr_servers = KVM_MAX_VCPUS;
 
 	xive->single_escalation = xive_native_has_single_escalation();
 
-	if (ret)
-		return ret;
-
 	kvm->arch.xive = xive;
 	return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 90cf6ec35a68..382e3a56e789 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -135,6 +135,9 @@ struct kvmppc_xive {
 	/* Flags */
 	u8	single_escalation;
 
+	/* Number of entries in the VP block */
+	u32	nr_servers;
+
 	struct kvmppc_xive_ops *ops;
 	struct address_space   *mapping;
 	struct mutex mapping_lock;
@@ -297,6 +300,7 @@ struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
 void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
 				    struct kvmppc_xive_vcpu *xc, int irq);
 int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
 
 #endif /* CONFIG_KVM_XICS */
 #endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 5bb480b2aafd..8ab333eabeef 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -1060,7 +1060,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 {
 	struct kvmppc_xive *xive;
 	struct kvm *kvm = dev->kvm;
-	int ret = 0;
 
 	pr_devel("Creating xive native device\n");
 
@@ -1077,23 +1076,16 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 	mutex_init(&xive->mapping_lock);
 	mutex_init(&xive->lock);
 
-	/*
-	 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
-	 * a default. Getting the max number of CPUs the VM was
-	 * configured with would improve our usage of the XIVE VP space.
+	/* VP allocation is delayed to the first call to connect_vcpu */
+	xive->vp_base = XIVE_INVALID_VP;
+	/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+	 * on a POWER9 system.
 	 */
-	xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
-	pr_devel("VP_Base=%x\n", xive->vp_base);
-
-	if (xive->vp_base == XIVE_INVALID_VP)
-		ret = -ENXIO;
+	xive->nr_servers = KVM_MAX_VCPUS;
 
 	xive->single_escalation = xive_native_has_single_escalation();
 	xive->ops = &kvmppc_xive_native_ops;
 
-	if (ret)
-		return ret;
-
 	kvm->arch.xive = xive;
 	return 0;
 }
-- 
cgit v1.2.3


From efe5ddcae496b7c7307805d31815df23ba69bf7c Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 27 Sep 2019 13:54:07 +0200
Subject: KVM: PPC: Book3S HV: XIVE: Allow userspace to set the # of VPs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new attribute to both XIVE and XICS-on-XIVE KVM devices so that
userspace can tell how many interrupt servers it needs. If a VM needs
less than the current default of KVM_MAX_VCPUS (2048), we can allocate
less VPs in OPAL. Combined with a core stride (VSMT) that matches the
number of guest threads per core, this may substantially increases the
number of VMs that can run concurrently with an in-kernel XIVE device.

Since the legacy XIVE KVM device is exposed to userspace through the
XICS KVM API, a new attribute group is added to it for this purpose.
While here, fix the syntax of the existing KVM_DEV_XICS_GRP_SOURCES
in the XICS documentation.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 Documentation/virt/kvm/devices/xics.txt | 14 ++++++++++++--
 Documentation/virt/kvm/devices/xive.txt |  8 ++++++++
 arch/powerpc/include/uapi/asm/kvm.h     |  3 +++
 arch/powerpc/kvm/book3s_xive.c          | 10 ++++++++++
 arch/powerpc/kvm/book3s_xive_native.c   |  3 +++
 5 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/Documentation/virt/kvm/devices/xics.txt b/Documentation/virt/kvm/devices/xics.txt
index 42864935ac5d..423332dda7bc 100644
--- a/Documentation/virt/kvm/devices/xics.txt
+++ b/Documentation/virt/kvm/devices/xics.txt
@@ -3,9 +3,19 @@ XICS interrupt controller
 Device type supported: KVM_DEV_TYPE_XICS
 
 Groups:
-  KVM_DEV_XICS_SOURCES
+  1. KVM_DEV_XICS_GRP_SOURCES
   Attributes: One per interrupt source, indexed by the source number.
 
+  2. KVM_DEV_XICS_GRP_CTRL
+  Attributes:
+    2.1 KVM_DEV_XICS_NR_SERVERS (write only)
+  The kvm_device_attr.addr points to a __u32 value which is the number of
+  interrupt server numbers (ie, highest possible vcpu id plus one).
+  Errors:
+    -EINVAL: Value greater than KVM_MAX_VCPU_ID.
+    -EFAULT: Invalid user pointer for attr->addr.
+    -EBUSY:  A vcpu is already connected to the device.
+
 This device emulates the XICS (eXternal Interrupt Controller
 Specification) defined in PAPR.  The XICS has a set of interrupt
 sources, each identified by a 20-bit source number, and a set of
@@ -38,7 +48,7 @@ least-significant end of the word:
 
 Each source has 64 bits of state that can be read and written using
 the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
-KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+KVM_DEV_XICS_GRP_SOURCES attribute group, with the attribute number being
 the interrupt source number.  The 64 bit state word has the following
 bitfields, starting from the least-significant end of the word:
 
diff --git a/Documentation/virt/kvm/devices/xive.txt b/Documentation/virt/kvm/devices/xive.txt
index 9a24a4525253..f5d1d6b5af61 100644
--- a/Documentation/virt/kvm/devices/xive.txt
+++ b/Documentation/virt/kvm/devices/xive.txt
@@ -78,6 +78,14 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
     migrating the VM.
     Errors: none
 
+    1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
+    The kvm_device_attr.addr points to a __u32 value which is the number of
+    interrupt server numbers (ie, highest possible vcpu id plus one).
+    Errors:
+      -EINVAL: Value greater than KVM_MAX_VCPU_ID.
+      -EFAULT: Invalid user pointer for attr->addr.
+      -EBUSY:  A vCPU is already connected to the device.
+
   2. KVM_DEV_XIVE_GRP_SOURCE (write only)
   Initializes a new source in the XIVE device and mask it.
   Attributes:
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index b0f72dea8b11..264e266a85bf 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -667,6 +667,8 @@ struct kvm_ppc_cpu_char {
 
 /* PPC64 eXternal Interrupt Controller Specification */
 #define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
+#define KVM_DEV_XICS_GRP_CTRL		2
+#define   KVM_DEV_XICS_NR_SERVERS	1
 
 /* Layout of 64-bit source attribute values */
 #define  KVM_XICS_DESTINATION_SHIFT	0
@@ -683,6 +685,7 @@ struct kvm_ppc_cpu_char {
 #define KVM_DEV_XIVE_GRP_CTRL		1
 #define   KVM_DEV_XIVE_RESET		1
 #define   KVM_DEV_XIVE_EQ_SYNC		2
+#define   KVM_DEV_XIVE_NR_SERVERS	3
 #define KVM_DEV_XIVE_GRP_SOURCE		2	/* 64-bit source identifier */
 #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG	3	/* 64-bit source identifier */
 #define KVM_DEV_XIVE_GRP_EQ_CONFIG	4	/* 64-bit EQ identifier */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6c35b3d95986..66858b7d3c6b 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1911,6 +1911,11 @@ static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 	switch (attr->group) {
 	case KVM_DEV_XICS_GRP_SOURCES:
 		return xive_set_source(xive, attr->attr, attr->addr);
+	case KVM_DEV_XICS_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XICS_NR_SERVERS:
+			return kvmppc_xive_set_nr_servers(xive, attr->addr);
+		}
 	}
 	return -ENXIO;
 }
@@ -1936,6 +1941,11 @@ static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		    attr->attr < KVMPPC_XICS_NR_IRQS)
 			return 0;
 		break;
+	case KVM_DEV_XICS_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XICS_NR_SERVERS:
+			return 0;
+		}
 	}
 	return -ENXIO;
 }
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 8ab333eabeef..34bd123fa024 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -921,6 +921,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 			return kvmppc_xive_reset(xive);
 		case KVM_DEV_XIVE_EQ_SYNC:
 			return kvmppc_xive_native_eq_sync(xive);
+		case KVM_DEV_XIVE_NR_SERVERS:
+			return kvmppc_xive_set_nr_servers(xive, attr->addr);
 		}
 		break;
 	case KVM_DEV_XIVE_GRP_SOURCE:
@@ -960,6 +962,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
 		switch (attr->attr) {
 		case KVM_DEV_XIVE_RESET:
 		case KVM_DEV_XIVE_EQ_SYNC:
+		case KVM_DEV_XIVE_NR_SERVERS:
 			return 0;
 		}
 		break;
-- 
cgit v1.2.3


From 9ee6471eb9d43114ba4f0de3e0f483bf6fb2a906 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 2 Oct 2019 16:00:21 +1000
Subject: KVM: PPC: Book3S: Define and use SRR1_MSR_BITS

Acked-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/reg.h      | 12 ++++++++++++
 arch/powerpc/kvm/book3s.c           |  2 +-
 arch/powerpc/kvm/book3s_hv_nested.c |  2 +-
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b3cbb1136bce..75c7e95a321b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -748,6 +748,18 @@
 #define SPRN_USPRG7	0x107	/* SPRG7 userspace read */
 #define SPRN_SRR0	0x01A	/* Save/Restore Register 0 */
 #define SPRN_SRR1	0x01B	/* Save/Restore Register 1 */
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * Bits loaded from MSR upon interrupt.
+ * PPC (64-bit) bits 33-36,42-47 are interrupt dependent, the others are
+ * loaded from MSR. The exception is that SRESET and MCE do not always load
+ * bit 62 (RI) from MSR. Don't use PPC_BITMASK for this because 32-bit uses
+ * it.
+ */
+#define   SRR1_MSR_BITS		(~0x783f0000UL)
+#endif
+
 #define   SRR1_ISI_NOPT		0x40000000 /* ISI: Not found in hash */
 #define   SRR1_ISI_N_OR_G	0x10000000 /* ISI: Access is no-exec or G */
 #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index ec2547cc5ecb..a2336c452905 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -136,7 +136,7 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
 	kvmppc_unfixup_split_real(vcpu);
 	kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
-	kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags);
+	kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & SRR1_MSR_BITS) | flags);
 	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
 	vcpu->arch.mmu.reset_msr(vcpu);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index cdf30c6eaf54..dc97e5be76f6 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1186,7 +1186,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
 forward_to_l1:
 	vcpu->arch.fault_dsisr = flags;
 	if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
-		vcpu->arch.shregs.msr &= ~0x783f0000ul;
+		vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
 		vcpu->arch.shregs.msr |= flags;
 	}
 	return RESUME_HOST;
-- 
cgit v1.2.3


From 87a45e07a5abfec4d6b0e8356718f8919d0a3c20 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 2 Oct 2019 16:00:22 +1000
Subject: KVM: PPC: Book3S: Replace reset_msr mmu op with inject_interrupt arch
 op

reset_msr sets the MSR for interrupt injection, but it's cleaner and
more flexible to provide a single op to set both MSR and PC for the
interrupt.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_host.h |  1 -
 arch/powerpc/include/asm/kvm_ppc.h  |  1 +
 arch/powerpc/kvm/book3s.c           | 27 +------------------------
 arch/powerpc/kvm/book3s_32_mmu.c    |  6 ------
 arch/powerpc/kvm/book3s_64_mmu.c    | 15 --------------
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 13 ------------
 arch/powerpc/kvm/book3s_hv.c        | 22 ++++++++++++++++++++
 arch/powerpc/kvm/book3s_pr.c        | 40 ++++++++++++++++++++++++++++++++++++-
 8 files changed, 63 insertions(+), 62 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6fe6ad64cba5..4273e799203d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -401,7 +401,6 @@ struct kvmppc_mmu {
 	u32  (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
 	int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
 		      struct kvmppc_pte *pte, bool data, bool iswrite);
-	void (*reset_msr)(struct kvm_vcpu *vcpu);
 	void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
 	int  (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
 	u64  (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ee62776e5433..d63f649fe713 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -271,6 +271,7 @@ struct kvmppc_ops {
 			   union kvmppc_one_reg *val);
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
+	void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
 	void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
 	int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a2336c452905..58a59ee998e2 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -74,27 +74,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
-		ulong pc = kvmppc_get_pc(vcpu);
-		ulong lr = kvmppc_get_lr(vcpu);
-		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
-			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
-		if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
-			kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
-		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
-	}
-}
-EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-	if (!is_kvmppc_hv_enabled(vcpu->kvm))
-		return to_book3s(vcpu)->hior;
-	return 0;
-}
-
 static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 			unsigned long pending_now, unsigned long old_pending)
 {
@@ -134,11 +113,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
-	kvmppc_unfixup_split_real(vcpu);
-	kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
-	kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & SRR1_MSR_BITS) | flags);
-	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
-	vcpu->arch.mmu.reset_msr(vcpu);
+	vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags);
 }
 
 static int kvmppc_book3s_vec2irqprio(unsigned int vec)
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 18f244aad7aa..f21e73492ce3 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -90,11 +90,6 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
 	return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
 }
 
-static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
-{
-	kvmppc_set_msr(vcpu, 0);
-}
-
 static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
 				      u32 sre, gva_t eaddr,
 				      bool primary)
@@ -406,7 +401,6 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
 	mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
 	mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
 	mmu->xlate = kvmppc_mmu_book3s_32_xlate;
-	mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr;
 	mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
 	mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
 	mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 5f63a5f7f24f..599133256a95 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -24,20 +24,6 @@
 #define dprintk(X...) do { } while(0)
 #endif
 
-static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
-{
-	unsigned long msr = vcpu->arch.intr_msr;
-	unsigned long cur_msr = kvmppc_get_msr(vcpu);
-
-	/* If transactional, change to suspend mode on IRQ delivery */
-	if (MSR_TM_TRANSACTIONAL(cur_msr))
-		msr |= MSR_TS_S;
-	else
-		msr |= cur_msr & MSR_TS_MASK;
-
-	kvmppc_set_msr(vcpu, msr);
-}
-
 static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
 				struct kvm_vcpu *vcpu,
 				gva_t eaddr)
@@ -676,7 +662,6 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
 	mmu->slbie = kvmppc_mmu_book3s_64_slbie;
 	mmu->slbia = kvmppc_mmu_book3s_64_slbia;
 	mmu->xlate = kvmppc_mmu_book3s_64_xlate;
-	mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
 	mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
 	mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
 	mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index f2b9aea43216..4c37e97c75a1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -275,18 +275,6 @@ int kvmppc_mmu_hv_init(void)
 	return 0;
 }
 
-static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
-{
-	unsigned long msr = vcpu->arch.intr_msr;
-
-	/* If transactional, change to suspend mode on IRQ delivery */
-	if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
-		msr |= MSR_TS_S;
-	else
-		msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
-	kvmppc_set_msr(vcpu, msr);
-}
-
 static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 				long pte_index, unsigned long pteh,
 				unsigned long ptel, unsigned long *pte_idx_ret)
@@ -2162,7 +2150,6 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.slb_nr = 32;		/* POWER7/POWER8 */
 
 	mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
-	mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
 
 	vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
 }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 709cf1fd4cf4..94a0a9911b27 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -338,6 +338,27 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
 }
 
+static void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	unsigned long msr, pc, new_msr, new_pc;
+
+	msr = kvmppc_get_msr(vcpu);
+	pc = kvmppc_get_pc(vcpu);
+	new_msr = vcpu->arch.intr_msr;
+	new_pc = vec;
+
+	/* If transactional, change to suspend mode on IRQ delivery */
+	if (MSR_TM_TRANSACTIONAL(msr))
+		new_msr |= MSR_TS_S;
+	else
+		new_msr |= msr & MSR_TS_MASK;
+
+	kvmppc_set_srr0(vcpu, pc);
+	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+	kvmppc_set_pc(vcpu, new_pc);
+	kvmppc_set_msr(vcpu, new_msr);
+}
+
 static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
 {
 	/*
@@ -5401,6 +5422,7 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.set_one_reg = kvmppc_set_one_reg_hv,
 	.vcpu_load   = kvmppc_core_vcpu_load_hv,
 	.vcpu_put    = kvmppc_core_vcpu_put_hv,
+	.inject_interrupt = kvmppc_inject_interrupt_hv,
 	.set_msr     = kvmppc_set_msr_hv,
 	.vcpu_run    = kvmppc_vcpu_run_hv,
 	.vcpu_create = kvmppc_core_vcpu_create_hv,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cc65af8fe6f7..ce4fcf76e53e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -90,7 +90,43 @@ static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
 	kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
 }
 
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu);
+static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+		ulong pc = kvmppc_get_pc(vcpu);
+		ulong lr = kvmppc_get_lr(vcpu);
+		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+		if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+			kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
+		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+	}
+}
+
+static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	unsigned long msr, pc, new_msr, new_pc;
+
+	kvmppc_unfixup_split_real(vcpu);
+
+	msr = kvmppc_get_msr(vcpu);
+	pc = kvmppc_get_pc(vcpu);
+	new_msr = vcpu->arch.intr_msr;
+	new_pc = to_book3s(vcpu)->hior + vec;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* If transactional, change to suspend mode on IRQ delivery */
+	if (MSR_TM_TRANSACTIONAL(msr))
+		new_msr |= MSR_TS_S;
+	else
+		new_msr |= msr & MSR_TS_MASK;
+#endif
+
+	kvmppc_set_srr0(vcpu, pc);
+	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+	kvmppc_set_pc(vcpu, new_pc);
+	kvmppc_set_msr(vcpu, new_msr);
+}
 
 static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 {
@@ -1761,6 +1797,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
 #else
 	/* default to book3s_32 (750) */
 	vcpu->arch.pvr = 0x84202;
+	vcpu->arch.intr_msr = 0;
 #endif
 	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
 	vcpu->arch.slb_nr = 64;
@@ -2058,6 +2095,7 @@ static struct kvmppc_ops kvm_ops_pr = {
 	.set_one_reg = kvmppc_set_one_reg_pr,
 	.vcpu_load   = kvmppc_core_vcpu_load_pr,
 	.vcpu_put    = kvmppc_core_vcpu_put_pr,
+	.inject_interrupt = kvmppc_inject_interrupt_pr,
 	.set_msr     = kvmppc_set_msr_pr,
 	.vcpu_run    = kvmppc_vcpu_run_pr,
 	.vcpu_create = kvmppc_core_vcpu_create_pr,
-- 
cgit v1.2.3


From 268f4ef9954cec198cd6772caadf453bcaed3e5a Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 2 Oct 2019 16:00:23 +1000
Subject: KVM: PPC: Book3S HV: Reuse kvmppc_inject_interrupt for async guest
 delivery

This consolidates the HV interrupt delivery logic into one place.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s.h            |  3 ++
 arch/powerpc/kvm/book3s_hv.c         | 43 -----------------------
 arch/powerpc/kvm/book3s_hv_builtin.c | 67 ++++++++++++++++++++++++++++--------
 3 files changed, 56 insertions(+), 57 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 2ef1311a2a13..3a4613985949 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -32,4 +32,7 @@ extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
 static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {}
 #endif
 
+extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+
 #endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 94a0a9911b27..c340d416dce3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -133,7 +133,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
 /* If set, the threads on each CPU core have to be in the same MMU mode */
 static bool no_mixing_hpt_and_radix;
 
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
 /*
@@ -338,39 +337,6 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
 }
 
-static void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
-{
-	unsigned long msr, pc, new_msr, new_pc;
-
-	msr = kvmppc_get_msr(vcpu);
-	pc = kvmppc_get_pc(vcpu);
-	new_msr = vcpu->arch.intr_msr;
-	new_pc = vec;
-
-	/* If transactional, change to suspend mode on IRQ delivery */
-	if (MSR_TM_TRANSACTIONAL(msr))
-		new_msr |= MSR_TS_S;
-	else
-		new_msr |= msr & MSR_TS_MASK;
-
-	kvmppc_set_srr0(vcpu, pc);
-	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
-	kvmppc_set_pc(vcpu, new_pc);
-	kvmppc_set_msr(vcpu, new_msr);
-}
-
-static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
-{
-	/*
-	 * Check for illegal transactional state bit combination
-	 * and if we find it, force the TS field to a safe state.
-	 */
-	if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
-		msr &= ~MSR_TS_MASK;
-	vcpu->arch.shregs.msr = msr;
-	kvmppc_end_cede(vcpu);
-}
-
 static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
 {
 	vcpu->arch.pvr = pvr;
@@ -2475,15 +2441,6 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 	vcpu->arch.timer_running = 1;
 }
 
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.ceded = 0;
-	if (vcpu->arch.timer_running) {
-		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
-		vcpu->arch.timer_running = 0;
-	}
-}
-
 extern int __kvmppc_vcore_entry(void);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7c1909657b55..068bee941a71 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -755,6 +755,56 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
 	local_paca->kvm_hstate.kvm_split_mode = NULL;
 }
 
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.ceded = 0;
+	if (vcpu->arch.timer_running) {
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		vcpu->arch.timer_running = 0;
+	}
+}
+
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+{
+	/*
+	 * Check for illegal transactional state bit combination
+	 * and if we find it, force the TS field to a safe state.
+	 */
+	if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+		msr &= ~MSR_TS_MASK;
+	vcpu->arch.shregs.msr = msr;
+	kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
+
+static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	unsigned long msr, pc, new_msr, new_pc;
+
+	msr = kvmppc_get_msr(vcpu);
+	pc = kvmppc_get_pc(vcpu);
+	new_msr = vcpu->arch.intr_msr;
+	new_pc = vec;
+
+	/* If transactional, change to suspend mode on IRQ delivery */
+	if (MSR_TM_TRANSACTIONAL(msr))
+		new_msr |= MSR_TS_S;
+	else
+		new_msr |= msr & MSR_TS_MASK;
+
+	kvmppc_set_srr0(vcpu, pc);
+	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+	kvmppc_set_pc(vcpu, new_pc);
+	vcpu->arch.shregs.msr = new_msr;
+}
+
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	inject_interrupt(vcpu, vec, srr1_flags);
+	kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
+
 /*
  * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
  * Can we inject a Decrementer or a External interrupt?
@@ -762,7 +812,6 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
 void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
 {
 	int ext;
-	unsigned long vec = 0;
 	unsigned long lpcr;
 
 	/* Insert EXTERNAL bit into LPCR at the MER bit position */
@@ -774,26 +823,16 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
 
 	if (vcpu->arch.shregs.msr & MSR_EE) {
 		if (ext) {
-			vec = BOOK3S_INTERRUPT_EXTERNAL;
+			inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
 		} else {
 			long int dec = mfspr(SPRN_DEC);
 			if (!(lpcr & LPCR_LD))
 				dec = (int) dec;
 			if (dec < 0)
-				vec = BOOK3S_INTERRUPT_DECREMENTER;
+				inject_interrupt(vcpu,
+					BOOK3S_INTERRUPT_DECREMENTER, 0);
 		}
 	}
-	if (vec) {
-		unsigned long msr, old_msr = vcpu->arch.shregs.msr;
-
-		kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
-		kvmppc_set_srr1(vcpu, old_msr);
-		kvmppc_set_pc(vcpu, vec);
-		msr = vcpu->arch.intr_msr;
-		if (MSR_TM_ACTIVE(old_msr))
-			msr |= MSR_TS_S;
-		vcpu->arch.shregs.msr = msr;
-	}
 
 	if (vcpu->arch.doorbell_request) {
 		mtspr(SPRN_DPDES, 1);
-- 
cgit v1.2.3


From 6a13cb0c376abb436d060b989018257963656d0c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 2 Oct 2019 16:00:24 +1000
Subject: KVM: PPC: Book3S HV: Implement LPCR[AIL]=3 mode for injected
 interrupts

kvmppc_inject_interrupt does not implement LPCR[AIL]!=0 modes, which
can result in the guest receiving interrupts as if LPCR[AIL]=0
contrary to the ISA.

In practice, Linux guests cope with this deviation, but it should be
fixed.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 068bee941a71..7cd3cf3d366b 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -792,6 +792,21 @@ static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
 	else
 		new_msr |= msr & MSR_TS_MASK;
 
+	/*
+	 * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
+	 * applicable. AIL=2 is not supported.
+	 *
+	 * AIL does not apply to SRESET, MCE, or HMI (which is never
+	 * delivered to the guest), and does not apply if IR=0 or DR=0.
+	 */
+	if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
+	    vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
+	    (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
+	    (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
+		new_msr |= MSR_IR | MSR_DR;
+		new_pc += 0xC000000000004000ULL;
+	}
+
 	kvmppc_set_srr0(vcpu, pc);
 	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
 	kvmppc_set_pc(vcpu, new_pc);
-- 
cgit v1.2.3


From 55d7004299eb917767761f01a208d50afad4f535 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 2 Oct 2019 16:00:25 +1000
Subject: KVM: PPC: Book3S HV: Reject mflags=2 (LPCR[AIL]=2) ADDR_TRANS_MODE
 mode

AIL=2 mode has no known users, so is not well tested or supported.
Disallow guests from selecting this mode because it may become
deprecated in future versions of the architecture.

This policy decision is not left to QEMU because KVM support is
required for AIL=2 (when injecting interrupts).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c340d416dce3..ec5c0379296a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -779,6 +779,11 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 		vcpu->arch.dawr  = value1;
 		vcpu->arch.dawrx = value2;
 		return H_SUCCESS;
+	case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+		/* KVM does not support mflags=2 (AIL=2) */
+		if (mflags != 0 && mflags != 3)
+			return H_UNSUPPORTED_FLAG_START;
+		return H_TOO_HARD;
 	default:
 		return H_TOO_HARD;
 	}
-- 
cgit v1.2.3


From e3b9a9e147dbe1a8fb9d8398a2faa47d8a6f50de Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Thu, 3 Oct 2019 21:17:43 +0000
Subject: KVM: SVM: Serialize access to the SEV ASID bitmap

The SEV ASID bitmap currently is not protected against parallel SEV guest
startups. This can result in an SEV guest failing to start because another
SEV guest could have been assigned the same ASID value. Use a mutex to
serialize access to the SEV ASID bitmap.

Fixes: 1654efcbc431 ("KVM: SVM: Add KVM_SEV_INIT command")
Tested-by: David Rientjes <rientjes@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f8ecb6df5106..d371007ab109 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -418,6 +418,7 @@ enum {
 
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 
+static DEFINE_MUTEX(sev_bitmap_lock);
 static unsigned int max_sev_asid;
 static unsigned int min_sev_asid;
 static unsigned long *sev_asid_bitmap;
@@ -1723,25 +1724,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void __sev_asid_free(int asid)
+static void sev_asid_free(int asid)
 {
 	struct svm_cpu_data *sd;
 	int cpu, pos;
 
+	mutex_lock(&sev_bitmap_lock);
+
 	pos = asid - 1;
-	clear_bit(pos, sev_asid_bitmap);
+	__clear_bit(pos, sev_asid_bitmap);
 
 	for_each_possible_cpu(cpu) {
 		sd = per_cpu(svm_data, cpu);
 		sd->sev_vmcbs[pos] = NULL;
 	}
-}
-
-static void sev_asid_free(struct kvm *kvm)
-{
-	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
-	__sev_asid_free(sev->asid);
+	mutex_unlock(&sev_bitmap_lock);
 }
 
 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
@@ -1910,7 +1908,7 @@ static void sev_vm_destroy(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 
 	sev_unbind_asid(kvm, sev->handle);
-	sev_asid_free(kvm);
+	sev_asid_free(sev->asid);
 }
 
 static void avic_vm_destroy(struct kvm *kvm)
@@ -6268,14 +6266,21 @@ static int sev_asid_new(void)
 {
 	int pos;
 
+	mutex_lock(&sev_bitmap_lock);
+
 	/*
 	 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
 	 */
 	pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
-	if (pos >= max_sev_asid)
+	if (pos >= max_sev_asid) {
+		mutex_unlock(&sev_bitmap_lock);
 		return -EBUSY;
+	}
+
+	__set_bit(pos, sev_asid_bitmap);
+
+	mutex_unlock(&sev_bitmap_lock);
 
-	set_bit(pos, sev_asid_bitmap);
 	return pos + 1;
 }
 
@@ -6303,7 +6308,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	return 0;
 
 e_free:
-	__sev_asid_free(asid);
+	sev_asid_free(asid);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 83af5e65a89547633bab7278564219ca8e68b968 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Thu, 3 Oct 2019 21:17:45 +0000
Subject: KVM: SVM: Guard against DEACTIVATE when performing WBINVD/DF_FLUSH

The SEV firmware DEACTIVATE command disassociates an SEV guest from an
ASID, clears the WBINVD indicator on all threads and indicates that the
SEV firmware DF_FLUSH command must be issued before the ASID can be
re-used. The SEV firmware DF_FLUSH command will return an error if a
WBINVD has not been performed on every thread before it has been invoked.
A window exists between the WBINVD and the invocation of the DF_FLUSH
command where an SEV firmware DEACTIVATE command could be invoked on
another thread, clearing the WBINVD indicator. This will cause the
subsequent SEV firmware DF_FLUSH command to fail which, in turn, results
in the SEV firmware ACTIVATE command failing for the reclaimed ASID.
This results in the SEV guest failing to start.

Use a mutex to close the WBINVD/DF_FLUSH window by obtaining the mutex
before the DEACTIVATE and releasing it after the DF_FLUSH. This ensures
that any DEACTIVATE cannot run before a DF_FLUSH has completed.

Fixes: 59414c989220 ("KVM: SVM: Add support for KVM_SEV_LAUNCH_START command")
Tested-by: David Rientjes <rientjes@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d371007ab109..1d217680cf83 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -418,6 +418,7 @@ enum {
 
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 
+static DEFINE_MUTEX(sev_deactivate_lock);
 static DEFINE_MUTEX(sev_bitmap_lock);
 static unsigned int max_sev_asid;
 static unsigned int min_sev_asid;
@@ -1756,10 +1757,20 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
 
 	/* deactivate handle */
 	data->handle = handle;
+
+	/*
+	 * Guard against a parallel DEACTIVATE command before the DF_FLUSH
+	 * command has completed.
+	 */
+	mutex_lock(&sev_deactivate_lock);
+
 	sev_guest_deactivate(data, NULL);
 
 	wbinvd_on_all_cpus();
 	sev_guest_df_flush(NULL);
+
+	mutex_unlock(&sev_deactivate_lock);
+
 	kfree(data);
 
 	decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
@@ -6318,9 +6329,18 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
 	int asid = sev_get_asid(kvm);
 	int ret;
 
+	/*
+	 * Guard against a DEACTIVATE command before the DF_FLUSH command
+	 * has completed.
+	 */
+	mutex_lock(&sev_deactivate_lock);
+
 	wbinvd_on_all_cpus();
 
 	ret = sev_guest_df_flush(error);
+
+	mutex_unlock(&sev_deactivate_lock);
+
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From 04f11ef45810da5ae2542dd78cc353f3761bd2cb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 27 Sep 2019 14:45:16 -0700
Subject: KVM: nVMX: Always write vmcs02.GUEST_CR3 during nested VM-Enter

Write the desired L2 CR3 into vmcs02.GUEST_CR3 during nested VM-Enter
instead of deferring the VMWRITE until vmx_set_cr3().  If the VMWRITE
is deferred, then KVM can consume a stale vmcs02.GUEST_CR3 when it
refreshes vmcs12->guest_cr3 during nested_vmx_vmexit() if the emulated
VM-Exit occurs without actually entering L2, e.g. if the nested run
is squashed because nested VM-Enter (from L1) is putting L2 into HLT.

Note, the above scenario can occur regardless of whether L1 is
intercepting HLT, e.g. L1 can intercept HLT and then re-enter L2 with
vmcs.GUEST_ACTIVITY_STATE=HALTED.  But practically speaking, a VMM will
likely put a guest into HALTED if and only if it's not intercepting HLT.

In an ideal world where EPT *requires* unrestricted guest (and vice
versa), VMX could handle CR3 similar to how it handles RSP and RIP,
e.g. mark CR3 dirty and conditionally load it at vmx_vcpu_run().  But
the unrestricted guest silliness complicates the dirty tracking logic
to the point that explicitly handling vmcs02.GUEST_CR3 during nested
VM-Enter is a simpler overall implementation.

Cc: stable@vger.kernel.org
Reported-and-tested-by: Reto Buerki <reet@codelabs.ch>
Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 10 ++++++++++
 arch/x86/kvm/vmx/vmx.c    | 10 +++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e76eb4f07f6c..d93ddc79a595 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2418,6 +2418,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 				entry_failure_code))
 		return -EINVAL;
 
+	/*
+	 * Immediately write vmcs02.GUEST_CR3.  It will be propagated to vmcs12
+	 * on nested VM-Exit, which can occur without actually running L2 and
+	 * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with
+	 * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
+	 * transition to HLT instead of running L2.
+	 */
+	if (enable_ept)
+		vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
+
 	/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
 	if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
 	    is_pae_paging(vcpu)) {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8f01019295a1..04603f53ca36 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2989,6 +2989,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
 void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
 	struct kvm *kvm = vcpu->kvm;
+	bool update_guest_cr3 = true;
 	unsigned long guest_cr3;
 	u64 eptp;
 
@@ -3005,15 +3006,18 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 			spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
 		}
 
-		if (enable_unrestricted_guest || is_paging(vcpu) ||
-		    is_guest_mode(vcpu))
+		/* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */
+		if (is_guest_mode(vcpu))
+			update_guest_cr3 = false;
+		else if (enable_unrestricted_guest || is_paging(vcpu))
 			guest_cr3 = kvm_read_cr3(vcpu);
 		else
 			guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
 		ept_load_pdptrs(vcpu);
 	}
 
-	vmcs_writel(GUEST_CR3, guest_cr3);
+	if (update_guest_cr3)
+		vmcs_writel(GUEST_CR3, guest_cr3);
 }
 
 int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
-- 
cgit v1.2.3


From 0fc5deae03a2724a4b18373b2e6a3b585019de1e Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Thu, 3 Oct 2019 21:17:46 +0000
Subject: KVM: SVM: Remove unneeded WBINVD and DF_FLUSH when starting SEV
 guests

Performing a WBINVD and DF_FLUSH are expensive operations. The SEV support
currently performs this WBINVD/DF_FLUSH combination when an SEV guest is
terminated, so there is no need for it to be done before LAUNCH.

However, when the SEV firmware transitions the platform from UNINIT state
to INIT state, all ASIDs will be marked invalid across all threads.
Therefore, as part of transitioning the platform to INIT state, perform a
WBINVD/DF_FLUSH after a successful INIT in the PSP/SEV device driver.
Since the PSP/SEV device driver is x86 only, it can reference and use the
WBINVD related functions directly.

Cc: Gary Hook <gary.hook@amd.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Tested-by: David Rientjes <rientjes@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c           | 15 ---------------
 drivers/crypto/ccp/psp-dev.c |  9 +++++++++
 2 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1d217680cf83..389dfd7594eb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -6329,21 +6329,6 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
 	int asid = sev_get_asid(kvm);
 	int ret;
 
-	/*
-	 * Guard against a DEACTIVATE command before the DF_FLUSH command
-	 * has completed.
-	 */
-	mutex_lock(&sev_deactivate_lock);
-
-	wbinvd_on_all_cpus();
-
-	ret = sev_guest_df_flush(error);
-
-	mutex_unlock(&sev_deactivate_lock);
-
-	if (ret)
-		return ret;
-
 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
 	if (!data)
 		return -ENOMEM;
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 6b17d179ef8a..39fdd0641637 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -21,6 +21,8 @@
 #include <linux/ccp.h>
 #include <linux/firmware.h>
 
+#include <asm/smp.h>
+
 #include "sp-dev.h"
 #include "psp-dev.h"
 
@@ -235,6 +237,13 @@ static int __sev_platform_init_locked(int *error)
 		return rc;
 
 	psp->sev_state = SEV_STATE_INIT;
+
+	/* Prepare for first SEV guest launch after INIT */
+	wbinvd_on_all_cpus();
+	rc = __sev_do_cmd_locked(SEV_CMD_DF_FLUSH, NULL, error);
+	if (rc)
+		return rc;
+
 	dev_dbg(psp->dev, "SEV firmware initialized\n");
 
 	return rc;
-- 
cgit v1.2.3


From 33af3a7ef9e6fb6fa5f0168c3c67f51776dafc54 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Thu, 3 Oct 2019 21:17:48 +0000
Subject: KVM: SVM: Reduce WBINVD/DF_FLUSH invocations

Performing a WBINVD and DF_FLUSH are expensive operations. Currently, a
WBINVD/DF_FLUSH is performed every time an SEV guest terminates. However,
the WBINVD/DF_FLUSH is only required when an ASID is being re-allocated
to a new SEV guest. Also, a single WBINVD/DF_FLUSH can enable all ASIDs
that have been disassociated from guests through DEACTIVATE.

To reduce the number of WBINVD/DF_FLUSH invocations, introduce a new ASID
bitmap to track ASIDs that need to be reclaimed. When an SEV guest is
terminated, add its ASID to the reclaim bitmap instead of clearing the
bitmap in the existing SEV ASID bitmap. This delays the need to perform a
WBINVD/DF_FLUSH invocation when an SEV guest terminates until all of the
available SEV ASIDs have been used. At that point, the WBINVD/DF_FLUSH
invocation can be performed and all ASIDs in the reclaim bitmap moved to
the available ASIDs bitmap.

The semaphore around DEACTIVATE can be changed to a read semaphore with
the semaphore taken in write mode before performing the WBINVD/DF_FLUSH.

Tested-by: David Rientjes <rientjes@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 81 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 66 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 389dfd7594eb..62b0938b62ef 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,6 +38,7 @@
 #include <linux/file.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <linux/rwsem.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
@@ -418,11 +419,13 @@ enum {
 
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 
-static DEFINE_MUTEX(sev_deactivate_lock);
+static int sev_flush_asids(void);
+static DECLARE_RWSEM(sev_deactivate_lock);
 static DEFINE_MUTEX(sev_bitmap_lock);
 static unsigned int max_sev_asid;
 static unsigned int min_sev_asid;
 static unsigned long *sev_asid_bitmap;
+static unsigned long *sev_reclaim_asid_bitmap;
 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
 
 struct enc_region {
@@ -1231,11 +1234,15 @@ static __init int sev_hardware_setup(void)
 	/* Minimum ASID value that should be used for SEV guest */
 	min_sev_asid = cpuid_edx(0x8000001F);
 
-	/* Initialize SEV ASID bitmap */
+	/* Initialize SEV ASID bitmaps */
 	sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
 	if (!sev_asid_bitmap)
 		return 1;
 
+	sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+	if (!sev_reclaim_asid_bitmap)
+		return 1;
+
 	status = kmalloc(sizeof(*status), GFP_KERNEL);
 	if (!status)
 		return 1;
@@ -1414,8 +1421,12 @@ static __exit void svm_hardware_unsetup(void)
 {
 	int cpu;
 
-	if (svm_sev_enabled())
+	if (svm_sev_enabled()) {
 		bitmap_free(sev_asid_bitmap);
+		bitmap_free(sev_reclaim_asid_bitmap);
+
+		sev_flush_asids();
+	}
 
 	for_each_possible_cpu(cpu)
 		svm_cpu_uninit(cpu);
@@ -1733,7 +1744,7 @@ static void sev_asid_free(int asid)
 	mutex_lock(&sev_bitmap_lock);
 
 	pos = asid - 1;
-	__clear_bit(pos, sev_asid_bitmap);
+	__set_bit(pos, sev_reclaim_asid_bitmap);
 
 	for_each_possible_cpu(cpu) {
 		sd = per_cpu(svm_data, cpu);
@@ -1758,18 +1769,10 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
 	/* deactivate handle */
 	data->handle = handle;
 
-	/*
-	 * Guard against a parallel DEACTIVATE command before the DF_FLUSH
-	 * command has completed.
-	 */
-	mutex_lock(&sev_deactivate_lock);
-
+	/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
+	down_read(&sev_deactivate_lock);
 	sev_guest_deactivate(data, NULL);
-
-	wbinvd_on_all_cpus();
-	sev_guest_df_flush(NULL);
-
-	mutex_unlock(&sev_deactivate_lock);
+	up_read(&sev_deactivate_lock);
 
 	kfree(data);
 
@@ -6273,8 +6276,51 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int sev_flush_asids(void)
+{
+	int ret, error;
+
+	/*
+	 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
+	 * so it must be guarded.
+	 */
+	down_write(&sev_deactivate_lock);
+
+	wbinvd_on_all_cpus();
+	ret = sev_guest_df_flush(&error);
+
+	up_write(&sev_deactivate_lock);
+
+	if (ret)
+		pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
+
+	return ret;
+}
+
+/* Must be called with the sev_bitmap_lock held */
+static bool __sev_recycle_asids(void)
+{
+	int pos;
+
+	/* Check if there are any ASIDs to reclaim before performing a flush */
+	pos = find_next_bit(sev_reclaim_asid_bitmap,
+			    max_sev_asid, min_sev_asid - 1);
+	if (pos >= max_sev_asid)
+		return false;
+
+	if (sev_flush_asids())
+		return false;
+
+	bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
+		   max_sev_asid);
+	bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+
+	return true;
+}
+
 static int sev_asid_new(void)
 {
+	bool retry = true;
 	int pos;
 
 	mutex_lock(&sev_bitmap_lock);
@@ -6282,8 +6328,13 @@ static int sev_asid_new(void)
 	/*
 	 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
 	 */
+again:
 	pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
 	if (pos >= max_sev_asid) {
+		if (retry && __sev_recycle_asids()) {
+			retry = false;
+			goto again;
+		}
 		mutex_unlock(&sev_bitmap_lock);
 		return -EBUSY;
 	}
-- 
cgit v1.2.3


From b17b7436f2f0c4984f98a0b317b8362fd365700d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 27 Sep 2019 14:45:17 -0700
Subject: KVM: VMX: Skip GUEST_CR3 VMREAD+VMWRITE if the VMCS is up-to-date

Skip the VMWRITE to update GUEST_CR3 if CR3 is not available, i.e. has
not been read from the VMCS since the last VM-Enter.  If vcpu->arch.cr3
is stale, kvm_read_cr3(vcpu) will refresh vcpu->arch.cr3 from the VMCS,
meaning KVM will do a VMREAD and then VMWRITE the value it just pulled
from the VMCS.

Note, this is a purely theoretical change, no instances of skipping
the VMREAD+VMWRITE have been observed with this change.

Tested-by: Reto Buerki <reet@codelabs.ch>
Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 04603f53ca36..71c7a174bdaa 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3009,10 +3009,12 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		/* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */
 		if (is_guest_mode(vcpu))
 			update_guest_cr3 = false;
-		else if (enable_unrestricted_guest || is_paging(vcpu))
-			guest_cr3 = kvm_read_cr3(vcpu);
-		else
+		else if (!enable_unrestricted_guest && !is_paging(vcpu))
 			guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
+		else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+			guest_cr3 = vcpu->arch.cr3;
+		else /* vmcs01.GUEST_CR3 is already up-to-date. */
+			update_guest_cr3 = false;
 		ept_load_pdptrs(vcpu);
 	}
 
-- 
cgit v1.2.3


From e7bddc52582d5961dfb782b40a94f54c9e6673a0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 27 Sep 2019 14:45:18 -0700
Subject: KVM: VMX: Consolidate to_vmx() usage in RFLAGS accessors

Capture struct vcpu_vmx in a local variable to improve the readability
of vmx_{g,s}et_rflags().

No functional change intended.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 71c7a174bdaa..cc83abc93f6d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1412,35 +1412,37 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
 
 unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long rflags, save_rflags;
 
 	if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
 		__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
 		rflags = vmcs_readl(GUEST_RFLAGS);
-		if (to_vmx(vcpu)->rmode.vm86_active) {
+		if (vmx->rmode.vm86_active) {
 			rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
-			save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+			save_rflags = vmx->rmode.save_rflags;
 			rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
 		}
-		to_vmx(vcpu)->rflags = rflags;
+		vmx->rflags = rflags;
 	}
-	return to_vmx(vcpu)->rflags;
+	return vmx->rflags;
 }
 
 void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long old_rflags = vmx_get_rflags(vcpu);
 
 	__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
-	to_vmx(vcpu)->rflags = rflags;
-	if (to_vmx(vcpu)->rmode.vm86_active) {
-		to_vmx(vcpu)->rmode.save_rflags = rflags;
+	vmx->rflags = rflags;
+	if (vmx->rmode.vm86_active) {
+		vmx->rmode.save_rflags = rflags;
 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 	}
 	vmcs_writel(GUEST_RFLAGS, rflags);
 
-	if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
-		to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
+	if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
+		vmx->emulation_required = emulation_required(vcpu);
 }
 
 u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 491c1ad1ac8d891aa440eb0216d023af6c038346 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 27 Sep 2019 14:45:19 -0700
Subject: KVM: VMX: Optimize vmx_set_rflags() for unrestricted guest

Rework vmx_set_rflags() to avoid the extra code need to handle emulation
of real mode and invalid state when unrestricted guest is disabled.  The
primary reason for doing so is to avoid the call to vmx_get_rflags(),
which will incur a VMREAD when RFLAGS is not already available.  When
running nested VMs, the majority of calls to vmx_set_rflags() will occur
without an associated vmx_get_rflags(), i.e. when stuffing GUEST_RFLAGS
during transitions between vmcs01 and vmcs02.

Note, vmx_get_rflags() guarantees RFLAGS is marked available.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
[Replace "else" with early "return" in the unrestricted guest branch. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cc83abc93f6d..9eb35e6cbc3f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1431,9 +1431,16 @@ unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long old_rflags = vmx_get_rflags(vcpu);
+	unsigned long old_rflags;
 
-	__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+	if (enable_unrestricted_guest) {
+		__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+		vmx->rflags = rflags;
+		vmcs_writel(GUEST_RFLAGS, rflags);
+		return;
+	}
+
+	old_rflags = vmx_get_rflags(vcpu);
 	vmx->rflags = rflags;
 	if (vmx->rmode.vm86_active) {
 		vmx->rmode.save_rflags = rflags;
-- 
cgit v1.2.3


From 489cbcf01d1c9e1bf09b7e371d0f312b3a1f3ef2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 27 Sep 2019 14:45:20 -0700
Subject: KVM: x86: Add WARNs to detect out-of-bounds register indices

Add WARN_ON_ONCE() checks in kvm_register_{read,write}() to detect reg
values that would cause KVM to overflow vcpu->arch.regs.  Change the reg
param to an 'int' to make it clear that the reg index is unverified.

Regarding the overhead of WARN_ON_ONCE(), now that all fixed GPR reads
and writes use dedicated accessors, e.g. kvm_rax_read(), the overhead
is limited to flows where the reg index is generated at runtime.  And
there is at least one historical bug where KVM has generated an out-of-
bounds access to arch.regs (see commit b68f3cc7d9789, "KVM: x86: Always
use 32-bit SMRAM save state for 32-bit kernels").

Adding the WARN_ON_ONCE() protection paves the way for additional
cleanup related to kvm_reg and kvm_reg_ex.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/kvm_cache_regs.h | 12 ++++++++----
 arch/x86/kvm/x86.h            |  6 ++----
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 1cc6c47dc77e..807c12c122c0 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -37,19 +37,23 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
 BUILD_KVM_GPR_ACCESSORS(r15, R15)
 #endif
 
-static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
-					      enum kvm_reg reg)
+static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
 {
+	if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+		return 0;
+
 	if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail))
 		kvm_x86_ops->cache_reg(vcpu, reg);
 
 	return vcpu->arch.regs[reg];
 }
 
-static inline void kvm_register_write(struct kvm_vcpu *vcpu,
-				      enum kvm_reg reg,
+static inline void kvm_register_write(struct kvm_vcpu *vcpu, int reg,
 				      unsigned long val)
 {
+	if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+		return;
+
 	vcpu->arch.regs[reg] = val;
 	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
 	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index dbf7442a822b..45d82b8277e5 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -238,8 +238,7 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 	return false;
 }
 
-static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
-					       enum kvm_reg reg)
+static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, int reg)
 {
 	unsigned long val = kvm_register_read(vcpu, reg);
 
@@ -247,8 +246,7 @@ static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
 }
 
 static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
-				       enum kvm_reg reg,
-				       unsigned long val)
+				       int reg, unsigned long val)
 {
 	if (!is_64_bit_mode(vcpu))
 		val = (u32)val;
-- 
cgit v1.2.3


From f8845541e93c5b41618405de6735edd6f0cc8984 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 27 Sep 2019 14:45:21 -0700
Subject: KVM: x86: Fold 'enum kvm_ex_reg' definitions into 'enum kvm_reg'

Now that indexing into arch.regs is either protected by WARN_ON_ONCE or
done with hardcoded enums, combine all definitions for registers that
are tracked by regs_avail and regs_dirty into 'enum kvm_reg'.  Having a
single enum type will simplify additional cleanup related to regs_avail
and regs_dirty.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 4 +---
 arch/x86/kvm/kvm_cache_regs.h   | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 50eb430b0ad8..c86c95a499af 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -156,10 +156,8 @@ enum kvm_reg {
 	VCPU_REGS_R15 = __VCPU_REGS_R15,
 #endif
 	VCPU_REGS_RIP,
-	NR_VCPU_REGS
-};
+	NR_VCPU_REGS,
 
-enum kvm_reg_ex {
 	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
 	VCPU_EXREG_CR3,
 	VCPU_EXREG_RFLAGS,
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 807c12c122c0..728f8e19be64 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -85,7 +85,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
 
 	if (!test_bit(VCPU_EXREG_PDPTR,
 		      (unsigned long *)&vcpu->arch.regs_avail))
-		kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR);
+		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
 
 	return vcpu->arch.walk_mmu->pdptrs[index];
 }
-- 
cgit v1.2.3


From cb3c1e2f3e8d0a77824c05c7c38f03d2cbdeaf9e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 27 Sep 2019 14:45:22 -0700
Subject: KVM: x86: Add helpers to test/mark reg availability and dirtiness

Add helpers to prettify code that tests and/or marks whether or not a
register is available and/or dirty.

Suggested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/kvm_cache_regs.h | 35 +++++++++++++++++++++++++++++------
 arch/x86/kvm/vmx/nested.c     |  4 ++--
 arch/x86/kvm/vmx/vmx.c        | 29 +++++++++++++----------------
 arch/x86/kvm/x86.c            | 13 +++++--------
 4 files changed, 49 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 728f8e19be64..e85b5ed22371 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -37,12 +37,37 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
 BUILD_KVM_GPR_ACCESSORS(r15, R15)
 #endif
 
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+					     enum kvm_reg reg)
+{
+	return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+					 enum kvm_reg reg)
+{
+	return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+					       enum kvm_reg reg)
+{
+	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+					   enum kvm_reg reg)
+{
+	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
 {
 	if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
 		return 0;
 
-	if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail))
+	if (!kvm_register_is_available(vcpu, reg))
 		kvm_x86_ops->cache_reg(vcpu, reg);
 
 	return vcpu->arch.regs[reg];
@@ -55,8 +80,7 @@ static inline void kvm_register_write(struct kvm_vcpu *vcpu, int reg,
 		return;
 
 	vcpu->arch.regs[reg] = val;
-	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+	kvm_register_mark_dirty(vcpu, reg);
 }
 
 static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu)
@@ -83,8 +107,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
 {
 	might_sleep();  /* on svm */
 
-	if (!test_bit(VCPU_EXREG_PDPTR,
-		      (unsigned long *)&vcpu->arch.regs_avail))
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
 		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
 
 	return vcpu->arch.walk_mmu->pdptrs[index];
@@ -113,7 +136,7 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
 
 static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
 {
-	if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
 		kvm_x86_ops->decache_cr3(vcpu);
 	return vcpu->arch.cr3;
 }
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d93ddc79a595..5e231da00310 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1012,7 +1012,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 		kvm_mmu_new_cr3(vcpu, cr3, false);
 
 	vcpu->arch.cr3 = cr3;
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
 	kvm_init_mmu(vcpu, false);
 
@@ -3986,7 +3986,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
 
 	nested_ept_uninit_mmu_context(vcpu);
 	vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
 	/*
 	 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9eb35e6cbc3f..48a41abe016b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -726,8 +726,8 @@ static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
 	bool ret;
 	u32 mask = 1 << (seg * SEG_FIELD_NR + field);
 
-	if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
-		vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
+	if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
+		kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
 		vmx->segment_cache.bitmask = 0;
 	}
 	ret = vmx->segment_cache.bitmask & mask;
@@ -1415,8 +1415,8 @@ unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long rflags, save_rflags;
 
-	if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
-		__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
+		kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
 		rflags = vmcs_readl(GUEST_RFLAGS);
 		if (vmx->rmode.vm86_active) {
 			rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
@@ -1434,7 +1434,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 	unsigned long old_rflags;
 
 	if (enable_unrestricted_guest) {
-		__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+		kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
 		vmx->rflags = rflags;
 		vmcs_writel(GUEST_RFLAGS, rflags);
 		return;
@@ -2179,7 +2179,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 {
-	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, reg);
+
 	switch (reg) {
 	case VCPU_REGS_RSP:
 		vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
@@ -2866,7 +2867,7 @@ static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
 {
 	if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
 		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 }
 
 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
@@ -2881,8 +2882,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
 
-	if (!test_bit(VCPU_EXREG_PDPTR,
-		      (unsigned long *)&vcpu->arch.regs_dirty))
+	if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
 		return;
 
 	if (is_pae_paging(vcpu)) {
@@ -2904,10 +2904,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 		mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
 	}
 
-	__set_bit(VCPU_EXREG_PDPTR,
-		  (unsigned long *)&vcpu->arch.regs_avail);
-	__set_bit(VCPU_EXREG_PDPTR,
-		  (unsigned long *)&vcpu->arch.regs_dirty);
+	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 }
 
 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
@@ -2916,7 +2913,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
 		vmx_decache_cr3(vcpu);
 	if (!(cr0 & X86_CR0_PG)) {
 		/* From paging/starting to nonpaging */
@@ -6520,9 +6517,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vmx->nested.need_vmcs12_to_shadow_sync)
 		nested_sync_vmcs12_to_shadow(vcpu);
 
-	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
+	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
 		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
-	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
+	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
 	cr3 = __get_current_cr3_fast();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5863c38108d9..968f09e029e5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -708,10 +708,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 	ret = 1;
 
 	memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
-	__set_bit(VCPU_EXREG_PDPTR,
-		  (unsigned long *)&vcpu->arch.regs_avail);
-	__set_bit(VCPU_EXREG_PDPTR,
-		  (unsigned long *)&vcpu->arch.regs_dirty);
+	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+
 out:
 
 	return ret;
@@ -729,8 +727,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
 	if (!is_pae_paging(vcpu))
 		return false;
 
-	if (!test_bit(VCPU_EXREG_PDPTR,
-		      (unsigned long *)&vcpu->arch.regs_avail))
+	if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
 		return true;
 
 	gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
@@ -983,7 +980,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 	kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
 	vcpu->arch.cr3 = cr3;
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
 	return 0;
 }
@@ -8763,7 +8760,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	vcpu->arch.cr2 = sregs->cr2;
 	mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
 	vcpu->arch.cr3 = sregs->cr3;
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
 	kvm_set_cr8(vcpu, sregs->cr8);
 
-- 
cgit v1.2.3


From 34059c2570102870df8d8a31bd42f8d9c19cce87 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 27 Sep 2019 14:45:23 -0700
Subject: KVM: x86: Fold decache_cr3() into cache_reg()

Handle caching CR3 (from VMX's VMCS) into struct kvm_vcpu via the common
cache_reg() callback and drop the dedicated decache_cr3().  The name
decache_cr3() is somewhat confusing as the caching behavior of CR3
follows that of GPRs, RFLAGS and PDPTRs, (handled via cache_reg()), and
has nothing in common with the caching behavior of CR0/CR4 (whose
decache_cr{0,4}_guest_bits() likely provided the 'decache' verbiage).

This would effectivel adds a BUG() if KVM attempts to cache CR3 on SVM.
Change it to a WARN_ON_ONCE() -- if the cache never requires filling,
the value is already in the right place -- and opportunistically add one
in VMX to provide an equivalent check.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/kvm_cache_regs.h   |  2 +-
 arch/x86/kvm/svm.c              |  7 +------
 arch/x86/kvm/vmx/vmx.c          | 15 ++++++---------
 4 files changed, 8 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c86c95a499af..cdde7488430d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1033,7 +1033,6 @@ struct kvm_x86_ops {
 			    struct kvm_segment *var, int seg);
 	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
 	void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
-	void (*decache_cr3)(struct kvm_vcpu *vcpu);
 	void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
 	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
 	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index e85b5ed22371..58767020de41 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -137,7 +137,7 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
 static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
 {
 	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
-		kvm_x86_ops->decache_cr3(vcpu);
+		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_CR3);
 	return vcpu->arch.cr3;
 }
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 62b0938b62ef..80711b6e3a59 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2376,7 +2376,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
 		break;
 	default:
-		BUG();
+		WARN_ON_ONCE(1);
 	}
 }
 
@@ -2529,10 +2529,6 @@ static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
 }
 
-static void svm_decache_cr3(struct kvm_vcpu *vcpu)
-{
-}
-
 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
 }
@@ -7269,7 +7265,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.get_cpl = svm_get_cpl,
 	.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
 	.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
-	.decache_cr3 = svm_decache_cr3,
 	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
 	.set_cr0 = svm_set_cr0,
 	.set_cr3 = svm_set_cr3,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 48a41abe016b..1b022db081cf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2192,7 +2192,12 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 		if (enable_ept)
 			ept_save_pdptrs(vcpu);
 		break;
+	case VCPU_EXREG_CR3:
+		if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
+			vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+		break;
 	default:
+		WARN_ON_ONCE(1);
 		break;
 	}
 }
@@ -2863,13 +2868,6 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 	vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
 }
 
-static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
-{
-	if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
-		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
-	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
-}
-
 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
 	ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
@@ -2914,7 +2912,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
-		vmx_decache_cr3(vcpu);
+		vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
 	if (!(cr0 & X86_CR0_PG)) {
 		/* From paging/starting to nonpaging */
 		exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
@@ -7784,7 +7782,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.get_cpl = vmx_get_cpl,
 	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
 	.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
-	.decache_cr3 = vmx_decache_cr3,
 	.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
 	.set_cr0 = vmx_set_cr0,
 	.set_cr3 = vmx_set_cr3,
-- 
cgit v1.2.3


From 2cf9af0b566823de418eb2ff357a2f8233c718e9 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Fri, 13 Sep 2019 19:00:49 +0000
Subject: kvm: x86: Modify kvm_x86_ops.get_enable_apicv() to use struct kvm
 parameter

Generally, APICv for all vcpus in the VM are enable/disable in the same
manner. So, get_enable_apicv() should represent APICv status of the VM
instead of each VCPU.

Modify kvm_x86_ops.get_enable_apicv() to take struct kvm as parameter
instead of struct kvm_vcpu.

Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/svm.c              | 4 ++--
 arch/x86/kvm/vmx/vmx.c          | 2 +-
 arch/x86/kvm/x86.c              | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cdde7488430d..5d8056ff7390 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1081,7 +1081,7 @@ struct kvm_x86_ops {
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-	bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
+	bool (*get_enable_apicv)(struct kvm *kvm);
 	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 80711b6e3a59..e479ea9bc9da 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5112,9 +5112,9 @@ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 	return;
 }
 
-static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
+static bool svm_get_enable_apicv(struct kvm *kvm)
 {
-	return avic && irqchip_split(vcpu->kvm);
+	return avic && irqchip_split(kvm);
 }
 
 static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1b022db081cf..e660e28e9ae0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3757,7 +3757,7 @@ void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
 	}
 }
 
-static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
+static bool vmx_get_enable_apicv(struct kvm *kvm)
 {
 	return enable_apicv;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 968f09e029e5..368a76648b70 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9340,7 +9340,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		goto fail_free_pio_data;
 
 	if (irqchip_in_kernel(vcpu->kvm)) {
-		vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
+		vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm);
 		r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
 		if (r < 0)
 			goto fail_mmu_destroy;
-- 
cgit v1.2.3


From 30ce89acdfe91eb7a88cc5805d2774f11e1eccb4 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 21 Oct 2019 10:52:56 +0800
Subject: KVM: remove redundant code in kvm_arch_vm_ioctl

If we reach here with r = 0, we will reassign r = 0
unnecesarry, then do the label set_irqchip_out work.
If we reach here with r != 0, then we will do the label
work directly. So this if statement and r = 0 assignment
is redundant.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 368a76648b70..38131c834091 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4910,9 +4910,6 @@ set_identity_unlock:
 		if (!irqchip_kernel(kvm))
 			goto set_irqchip_out;
 		r = kvm_vm_ioctl_set_irqchip(kvm, chip);
-		if (r)
-			goto set_irqchip_out;
-		r = 0;
 	set_irqchip_out:
 		kfree(chip);
 		break;
-- 
cgit v1.2.3


From 4be946728f65c10c9bb1a1580ec47a316f5ee6ac Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Mon, 21 Oct 2019 18:55:04 +0800
Subject: KVM: x86/vPMU: Declare kvm_pmu->reprogram_pmi field using
 DECLARE_BITMAP

Replace the explicit declaration of "u64 reprogram_pmi" with the generic
macro DECLARE_BITMAP for all possible appropriate number of bits.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/pmu.c              | 15 +++++----------
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5d8056ff7390..62f32a61c250 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -467,7 +467,7 @@ struct kvm_pmu {
 	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
 	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
 	struct irq_work irq_work;
-	u64 reprogram_pmi;
+	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
 };
 
 struct kvm_pmu_ops;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 46875bbd0419..75e8f9fae031 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -62,8 +62,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 
-	if (!test_and_set_bit(pmc->idx,
-			      (unsigned long *)&pmu->reprogram_pmi)) {
+	if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
 		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
 		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
 	}
@@ -76,8 +75,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 
-	if (!test_and_set_bit(pmc->idx,
-			      (unsigned long *)&pmu->reprogram_pmi)) {
+	if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
 		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
 		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
 
@@ -137,7 +135,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	}
 
 	pmc->perf_event = event;
-	clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi);
+	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
 }
 
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
@@ -253,16 +251,13 @@ EXPORT_SYMBOL_GPL(reprogram_counter);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-	u64 bitmask;
 	int bit;
 
-	bitmask = pmu->reprogram_pmi;
-
-	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+	for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
 		struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit);
 
 		if (unlikely(!pmc || !pmc->perf_event)) {
-			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+			clear_bit(bit, pmu->reprogram_pmi);
 			continue;
 		}
 
-- 
cgit v1.2.3


From 35fbe0d4ef9abb05a8c591481d0196edcb056bcc Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Sun, 20 Oct 2019 17:10:58 +0800
Subject: KVM: VMX: Write VPID to vmcs when creating vcpu

Move the code that writes vmx->vpid to vmcs from vmx_vcpu_reset() to
vmx_vcpu_setup(), because vmx->vpid is allocated when creating vcpu and
never changed. So we don't need to update the vmcs.vpid when resetting
vcpu.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e660e28e9ae0..279f855d892b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4252,6 +4252,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	set_cr4_guest_host_mask(vmx);
 
+	if (vmx->vpid != 0)
+		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+
 	if (vmx_xsaves_supported())
 		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
 
@@ -4354,9 +4357,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-	if (vmx->vpid != 0)
-		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
-
 	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
 	vmx->vcpu.arch.cr0 = cr0;
 	vmx_set_cr0(vcpu, cr0); /* enter rmode */
-- 
cgit v1.2.3


From 3c0f4be1f33b25bd28be1672ecb53627577c0899 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Sun, 20 Oct 2019 17:10:59 +0800
Subject: KVM: VMX: Remove vmx->hv_deadline_tsc initialization from
 vmx_vcpu_setup()

... It can be removed here because the same code is called later in
vmx_vcpu_reset() as the flow:

kvm_arch_vcpu_setup()
	-> kvm_vcpu_reset()
		-> vmx_vcpu_reset()

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 279f855d892b..ec7c42f57b65 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4178,7 +4178,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	/* Control */
 	pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
-	vmx->hv_deadline_tsc = -1;
 
 	exec_controls_set(vmx, vmx_exec_control(vmx));
 
-- 
cgit v1.2.3


From 4be5341026246870818e28b53202b001426a5aec Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Sun, 20 Oct 2019 17:11:00 +0800
Subject: KVM: VMX: Initialize vmx->guest_msrs[] right after allocation

Move the initialization of vmx->guest_msrs[] from vmx_vcpu_setup() to
vmx_create_vcpu(), and put it right after its allocation.

This also is the preperation for next patch.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ec7c42f57b65..84c32395d887 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4166,8 +4166,6 @@ static void ept_set_mmio_spte_mask(void)
  */
 static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 {
-	int i;
-
 	if (nested)
 		nested_vmx_vcpu_setup();
 
@@ -4226,21 +4224,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
-	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
-		u32 index = vmx_msr_index[i];
-		u32 data_low, data_high;
-		int j = vmx->nmsrs;
-
-		if (rdmsr_safe(index, &data_low, &data_high) < 0)
-			continue;
-		if (wrmsr_safe(index, data_low, data_high) < 0)
-			continue;
-		vmx->guest_msrs[j].index = i;
-		vmx->guest_msrs[j].data = 0;
-		vmx->guest_msrs[j].mask = -1ull;
-		++vmx->nmsrs;
-	}
-
 	vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
 
 	/* 22.2.1, 20.8.1 */
@@ -6700,7 +6683,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	int err;
 	struct vcpu_vmx *vmx;
 	unsigned long *msr_bitmap;
-	int cpu;
+	int i, cpu;
 
 	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
 		"struct kvm_vcpu must be at offset 0 for arch usercopy region");
@@ -6752,6 +6735,21 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (!vmx->guest_msrs)
 		goto free_pml;
 
+	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
+		u32 index = vmx_msr_index[i];
+		u32 data_low, data_high;
+		int j = vmx->nmsrs;
+
+		if (rdmsr_safe(index, &data_low, &data_high) < 0)
+			continue;
+		if (wrmsr_safe(index, data_low, data_high) < 0)
+			continue;
+		vmx->guest_msrs[j].index = i;
+		vmx->guest_msrs[j].data = 0;
+		vmx->guest_msrs[j].mask = -1ull;
+		++vmx->nmsrs;
+	}
+
 	err = alloc_loaded_vmcs(&vmx->vmcs01);
 	if (err < 0)
 		goto free_msrs;
-- 
cgit v1.2.3


From 1b84292bea00c042afc2f950c61b2c027bd36ff7 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Sun, 20 Oct 2019 17:11:01 +0800
Subject: KVM: VMX: Rename {vmx,nested_vmx}_vcpu_setup()

Rename {vmx,nested_vmx}_vcpu_setup() to match what they really do.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 2 +-
 arch/x86/kvm/vmx/nested.h | 2 +-
 arch/x86/kvm/vmx/vmx.c    | 9 +++++----
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 5e231da00310..55c5791ac52b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5768,7 +5768,7 @@ error_guest_mode:
 	return ret;
 }
 
-void nested_vmx_vcpu_setup(void)
+void nested_vmx_set_vmcs_shadowing_bitmap(void)
 {
 	if (enable_shadow_vmcs) {
 		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 187d39bf0bf1..4cf1d40da15f 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -11,7 +11,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
 				bool apicv);
 void nested_vmx_hardware_unsetup(void);
 __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
-void nested_vmx_vcpu_setup(void);
+void nested_vmx_set_vmcs_shadowing_bitmap(void);
 void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
 int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
 bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 84c32395d887..4211f72a1a01 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4162,12 +4162,13 @@ static void ept_set_mmio_spte_mask(void)
 #define VMX_XSS_EXIT_BITMAP 0
 
 /*
- * Sets up the vmcs for emulated real mode.
+ * Noting that the initialization of Guest-state Area of VMCS is in
+ * vmx_vcpu_reset().
  */
-static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
+static void init_vmcs(struct vcpu_vmx *vmx)
 {
 	if (nested)
-		nested_vmx_vcpu_setup();
+		nested_vmx_set_vmcs_shadowing_bitmap();
 
 	if (cpu_has_vmx_msr_bitmap())
 		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
@@ -6774,7 +6775,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	cpu = get_cpu();
 	vmx_vcpu_load(&vmx->vcpu, cpu);
 	vmx->vcpu.cpu = cpu;
-	vmx_vcpu_setup(vmx);
+	init_vmcs(vmx);
 	vmx_vcpu_put(&vmx->vcpu);
 	put_cpu();
 	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
-- 
cgit v1.2.3


From 7204160eb7809345d10c983d9d1dfbd98060a56d Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 21 Oct 2019 16:30:20 -0700
Subject: KVM: x86: Introduce vcpu->arch.xsaves_enabled

Cache whether XSAVES is enabled in the guest by adding xsaves_enabled to
vcpu->arch.

Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Change-Id: If4638e0901c28a4494dad2e103e2c075e8ab5d68
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/svm.c              | 3 +++
 arch/x86/kvm/vmx/vmx.c          | 5 +++++
 3 files changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 62f32a61c250..6f6b8886a8eb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -560,6 +560,7 @@ struct kvm_vcpu_arch {
 	u64 smbase;
 	u64 smi_count;
 	bool tpr_access_reporting;
+	bool xsaves_enabled;
 	u64 ia32_xss;
 	u64 microcode_version;
 	u64 arch_capabilities;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e479ea9bc9da..cf224963e7d1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5895,6 +5895,9 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+				    boot_cpu_has(X86_FEATURE_XSAVES);
+
 	/* Update nrips enabled cache */
 	svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4211f72a1a01..751765532305 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4050,6 +4050,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 			guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
 			guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
 
+		vcpu->arch.xsaves_enabled = xsaves_enabled;
+
 		if (!xsaves_enabled)
 			exec_control &= ~SECONDARY_EXEC_XSAVES;
 
@@ -7089,6 +7091,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	/* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
+	vcpu->arch.xsaves_enabled = false;
+
 	if (cpu_has_secondary_exec_ctrls()) {
 		vmx_compute_secondary_exec_control(vmx);
 		vmcs_set_secondary_exec_control(vmx);
-- 
cgit v1.2.3


From c034f2aa8622e1e436563eb34c0f78ba8aa32329 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 21 Oct 2019 16:30:21 -0700
Subject: KVM: VMX: Fix conditions for guest IA32_XSS support

Volume 4 of the SDM says that IA32_XSS is supported
if CPUID(EAX=0DH,ECX=1):EAX.XSS[bit 3] is set, so only the
X86_FEATURE_XSAVES check is necessary (X86_FEATURE_XSAVES is the Linux
name for CPUID(EAX=0DH,ECX=1):EAX.XSS[bit 3]).

Fixes: 4d763b168e9c5 ("KVM: VMX: check CPUID before allowing read/write of IA32_XSS")
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Change-Id: I9059b9f2e3595e4b09a4cdcf14b933b22ebad419
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 751765532305..e5b09c75e529 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1830,10 +1830,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
 				       &msr_info->data);
 	case MSR_IA32_XSS:
-		if (!vmx_xsaves_supported() ||
-		    (!msr_info->host_initiated &&
-		     !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-		       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
 			return 1;
 		msr_info->data = vcpu->arch.ia32_xss;
 		break;
@@ -2073,10 +2071,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		return vmx_set_vmx_msr(vcpu, msr_index, data);
 	case MSR_IA32_XSS:
-		if (!vmx_xsaves_supported() ||
-		    (!msr_info->host_initiated &&
-		     !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-		       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
 			return 1;
 		/*
 		 * The only supported bit as of Skylake is bit 8, but
@@ -2085,11 +2081,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (data != 0)
 			return 1;
 		vcpu->arch.ia32_xss = data;
-		if (vcpu->arch.ia32_xss != host_xss)
-			add_atomic_switch_msr(vmx, MSR_IA32_XSS,
-				vcpu->arch.ia32_xss, host_xss, false);
-		else
-			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
+		if (vcpu->arch.xsaves_enabled) {
+			if (vcpu->arch.ia32_xss != host_xss)
+				add_atomic_switch_msr(vmx, MSR_IA32_XSS,
+					vcpu->arch.ia32_xss, host_xss, false);
+			else
+				clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
+		}
 		break;
 	case MSR_IA32_RTIT_CTL:
 		if ((pt_mode != PT_MODE_HOST_GUEST) ||
-- 
cgit v1.2.3


From 78958563d8023db0c6d03a2fe2a64d79b47b4349 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 21 Oct 2019 16:30:22 -0700
Subject: KVM: x86: Remove unneeded kvm_vcpu variable, guest_xcr0_loaded

The kvm_vcpu variable, guest_xcr0_loaded, is a waste of an 'int'
and a conditional branch.  VMX and SVM are the only users, and both
unconditionally pair kvm_load_guest_xcr0() with kvm_put_guest_xcr0()
making this check unnecessary. Without this variable, the predicates in
kvm_load_guest_xcr0 and kvm_put_guest_xcr0 should match.

Suggested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Change-Id: I7b1eb9b62969d7bbb2850f27e42f863421641b23
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c       | 16 +++++-----------
 include/linux/kvm_host.h |  1 -
 2 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 38131c834091..7141f81141a2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -811,22 +811,16 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
 void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
 {
 	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-			!vcpu->guest_xcr0_loaded) {
-		/* kvm_set_xcr() also depends on this */
-		if (vcpu->arch.xcr0 != host_xcr0)
-			xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
-		vcpu->guest_xcr0_loaded = 1;
-	}
+	    vcpu->arch.xcr0 != host_xcr0)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
 }
 EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
 
 void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->guest_xcr0_loaded) {
-		if (vcpu->arch.xcr0 != host_xcr0)
-			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
-		vcpu->guest_xcr0_loaded = 0;
-	}
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
+	    vcpu->arch.xcr0 != host_xcr0)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
 }
 EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 719fc3e15ea4..d2017302996c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -278,7 +278,6 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	int guest_xcr0_loaded;
 	struct swait_queue_head wq;
 	struct pid __rcu *pid;
 	int sigset_active;
-- 
cgit v1.2.3


From 312a1c87798e6b43ff533393167b3cba33645ead Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 21 Oct 2019 16:30:23 -0700
Subject: KVM: SVM: Use wrmsr for switching between guest and host IA32_XSS on
 AMD

When the guest can execute the XSAVES/XRSTORS instructions, set the
hardware IA32_XSS MSR to guest/host values on VM-entry/VM-exit.

Note that vcpu->arch.ia32_xss is currently guaranteed to be 0 on AMD,
since there is no way to change it.

Suggested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Change-Id: Id51a782462086e6d7a3ab621838e200f1c005afd
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index cf224963e7d1..fa29125193fe 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -116,6 +116,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
 
 static bool erratum_383_found __read_mostly;
 
+static u64 __read_mostly host_xss;
+
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
 	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -1409,6 +1411,9 @@ static __init int svm_hardware_setup(void)
 			pr_info("Virtual GIF supported\n");
 	}
 
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		rdmsrl(MSR_IA32_XSS, host_xss);
+
 	return 0;
 
 err:
@@ -5598,6 +5603,22 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
 	svm_complete_interrupts(svm);
 }
 
+static void svm_load_guest_xss(struct kvm_vcpu *vcpu)
+{
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
+	    vcpu->arch.xsaves_enabled &&
+	    vcpu->arch.ia32_xss != host_xss)
+		wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
+}
+
+static void svm_load_host_xss(struct kvm_vcpu *vcpu)
+{
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
+	    vcpu->arch.xsaves_enabled &&
+	    vcpu->arch.ia32_xss != host_xss)
+		wrmsrl(MSR_IA32_XSS, host_xss);
+}
+
 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -5637,6 +5658,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	clgi();
 	kvm_load_guest_xcr0(vcpu);
+	svm_load_guest_xss(vcpu);
 
 	if (lapic_in_kernel(vcpu) &&
 		vcpu->arch.apic->lapic_timer.timer_advance_ns)
@@ -5786,6 +5808,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 		kvm_before_interrupt(&svm->vcpu);
 
+	svm_load_host_xss(vcpu);
 	kvm_put_guest_xcr0(vcpu);
 	stgi();
 
-- 
cgit v1.2.3


From 9753d68865c5662eee94eb8808b5ad5eb766f5ea Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 21 Oct 2019 16:30:24 -0700
Subject: KVM: VMX: Use wrmsr for switching between guest and host IA32_XSS on
 Intel

When the guest can execute the XSAVES/XRSTORS instructions, use wrmsr to
set the hardware IA32_XSS MSR to guest/host values on VM-entry/VM-exit,
rather than the MSR-load areas. By using the same approach as AMD, we
will be able to use a common implementation for both (in the next
patch).

Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Change-Id: I9447d104b2615c04e39e4af0c911e1e7309bf464
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e5b09c75e529..aee6f1a9d252 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2081,13 +2081,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (data != 0)
 			return 1;
 		vcpu->arch.ia32_xss = data;
-		if (vcpu->arch.xsaves_enabled) {
-			if (vcpu->arch.ia32_xss != host_xss)
-				add_atomic_switch_msr(vmx, MSR_IA32_XSS,
-					vcpu->arch.ia32_xss, host_xss, false);
-			else
-				clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
-		}
 		break;
 	case MSR_IA32_RTIT_CTL:
 		if ((pt_mode != PT_MODE_HOST_GUEST) ||
@@ -6473,6 +6466,22 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
 	}
 }
 
+static void vmx_load_guest_xss(struct kvm_vcpu *vcpu)
+{
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
+	    vcpu->arch.xsaves_enabled &&
+	    vcpu->arch.ia32_xss != host_xss)
+		wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
+}
+
+static void vmx_load_host_xss(struct kvm_vcpu *vcpu)
+{
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
+	    vcpu->arch.xsaves_enabled &&
+	    vcpu->arch.ia32_xss != host_xss)
+		wrmsrl(MSR_IA32_XSS, host_xss);
+}
+
 bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
 
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
@@ -6524,6 +6533,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmx_set_interrupt_shadow(vcpu, 0);
 
 	kvm_load_guest_xcr0(vcpu);
+	vmx_load_guest_xss(vcpu);
 
 	if (static_cpu_has(X86_FEATURE_PKU) &&
 	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
@@ -6630,6 +6640,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 			__write_pkru(vmx->host_pkru);
 	}
 
+	vmx_load_host_xss(vcpu);
 	kvm_put_guest_xcr0(vcpu);
 
 	vmx->nested.nested_run_pending = 0;
-- 
cgit v1.2.3


From 139a12cfe1a040fd881338a7cc042bd37159ea9a Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 21 Oct 2019 16:30:25 -0700
Subject: KVM: x86: Move IA32_XSS-swapping on VM-entry/VM-exit to common x86
 code

Hoist the vendor-specific code related to loading the hardware IA32_XSS
MSR with guest/host values on VM-entry/VM-exit to common x86 code.

Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Change-Id: Ic6e3430833955b98eb9b79ae6715cf2a3fdd6d82
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c     | 27 ++-------------------------
 arch/x86/kvm/vmx/vmx.c | 27 ++-------------------------
 arch/x86/kvm/x86.c     | 38 ++++++++++++++++++++++++++++----------
 arch/x86/kvm/x86.h     |  4 ++--
 4 files changed, 34 insertions(+), 62 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fa29125193fe..77429fa38748 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -116,8 +116,6 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
 
 static bool erratum_383_found __read_mostly;
 
-static u64 __read_mostly host_xss;
-
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
 	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -1411,9 +1409,6 @@ static __init int svm_hardware_setup(void)
 			pr_info("Virtual GIF supported\n");
 	}
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		rdmsrl(MSR_IA32_XSS, host_xss);
-
 	return 0;
 
 err:
@@ -5603,22 +5598,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
 	svm_complete_interrupts(svm);
 }
 
-static void svm_load_guest_xss(struct kvm_vcpu *vcpu)
-{
-	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-	    vcpu->arch.xsaves_enabled &&
-	    vcpu->arch.ia32_xss != host_xss)
-		wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
-}
-
-static void svm_load_host_xss(struct kvm_vcpu *vcpu)
-{
-	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-	    vcpu->arch.xsaves_enabled &&
-	    vcpu->arch.ia32_xss != host_xss)
-		wrmsrl(MSR_IA32_XSS, host_xss);
-}
-
 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -5657,8 +5636,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	svm->vmcb->save.cr2 = vcpu->arch.cr2;
 
 	clgi();
-	kvm_load_guest_xcr0(vcpu);
-	svm_load_guest_xss(vcpu);
+	kvm_load_guest_xsave_state(vcpu);
 
 	if (lapic_in_kernel(vcpu) &&
 		vcpu->arch.apic->lapic_timer.timer_advance_ns)
@@ -5808,8 +5786,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 		kvm_before_interrupt(&svm->vcpu);
 
-	svm_load_host_xss(vcpu);
-	kvm_put_guest_xcr0(vcpu);
+	kvm_load_host_xsave_state(vcpu);
 	stgi();
 
 	/* Any pending NMI will happen here */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index aee6f1a9d252..d06140bdb3ad 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -106,8 +106,6 @@ module_param(enable_apicv, bool, S_IRUGO);
 static bool __read_mostly nested = 1;
 module_param(nested, bool, S_IRUGO);
 
-static u64 __read_mostly host_xss;
-
 bool __read_mostly enable_pml = 1;
 module_param_named(pml, enable_pml, bool, S_IRUGO);
 
@@ -6466,22 +6464,6 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
 	}
 }
 
-static void vmx_load_guest_xss(struct kvm_vcpu *vcpu)
-{
-	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-	    vcpu->arch.xsaves_enabled &&
-	    vcpu->arch.ia32_xss != host_xss)
-		wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
-}
-
-static void vmx_load_host_xss(struct kvm_vcpu *vcpu)
-{
-	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-	    vcpu->arch.xsaves_enabled &&
-	    vcpu->arch.ia32_xss != host_xss)
-		wrmsrl(MSR_IA32_XSS, host_xss);
-}
-
 bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
 
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
@@ -6532,8 +6514,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vmx_set_interrupt_shadow(vcpu, 0);
 
-	kvm_load_guest_xcr0(vcpu);
-	vmx_load_guest_xss(vcpu);
+	kvm_load_guest_xsave_state(vcpu);
 
 	if (static_cpu_has(X86_FEATURE_PKU) &&
 	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
@@ -6640,8 +6621,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 			__write_pkru(vmx->host_pkru);
 	}
 
-	vmx_load_host_xss(vcpu);
-	kvm_put_guest_xcr0(vcpu);
+	kvm_load_host_xsave_state(vcpu);
 
 	vmx->nested.nested_run_pending = 0;
 	vmx->idt_vectoring_info = 0;
@@ -7611,9 +7591,6 @@ static __init int hardware_setup(void)
 		WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
 	}
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		rdmsrl(MSR_IA32_XSS, host_xss);
-
 	if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
 	    !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
 		enable_vpid = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7141f81141a2..1ebe13493241 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -176,6 +176,8 @@ struct kvm_shared_msrs {
 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
 static struct kvm_shared_msrs __percpu *shared_msrs;
 
+static u64 __read_mostly host_xss;
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "pf_fixed", VCPU_STAT(pf_fixed) },
 	{ "pf_guest", VCPU_STAT(pf_guest) },
@@ -808,21 +810,34 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
-void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
 {
-	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-	    vcpu->arch.xcr0 != host_xcr0)
-		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+
+		if (vcpu->arch.xcr0 != host_xcr0)
+			xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+
+		if (vcpu->arch.xsaves_enabled &&
+		    vcpu->arch.ia32_xss != host_xss)
+			wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
+	}
 }
-EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
+EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
 
-void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
 {
-	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
-	    vcpu->arch.xcr0 != host_xcr0)
-		xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+
+		if (vcpu->arch.xcr0 != host_xcr0)
+			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
+
+		if (vcpu->arch.xsaves_enabled &&
+		    vcpu->arch.ia32_xss != host_xss)
+			wrmsrl(MSR_IA32_XSS, host_xss);
+	}
+
 }
-EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
+EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
 
 static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 {
@@ -9278,6 +9293,9 @@ int kvm_arch_hardware_setup(void)
 		kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
 	}
 
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		rdmsrl(MSR_IA32_XSS, host_xss);
+
 	kvm_init_msr_list();
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 45d82b8277e5..2b0805012e3c 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -364,7 +364,7 @@ static inline bool kvm_pat_valid(u64 data)
 	return (data | ((data & 0x0202020202020202ull) << 1)) == data;
 }
 
-void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
-void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
+void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
 
 #endif
-- 
cgit v1.2.3


From 864e2ab2b46db1ac266c46a7c9cefe6cc893029d Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 21 Oct 2019 16:30:26 -0700
Subject: kvm: x86: Move IA32_XSS to kvm_{get,set}_msr_common

Hoist support for RDMSR/WRMSR of IA32_XSS from vmx into common code so
that it can be used for svm as well.

Right now, kvm only allows the guest IA32_XSS to be zero,
so the guest's usage of XSAVES will be exactly the same as XSAVEC.

Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Change-Id: Ie4b0f777d71e428fbee6e82071ac2d7618e9bb40
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 18 ------------------
 arch/x86/kvm/x86.c     | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d06140bdb3ad..31ce6bc2c371 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1827,12 +1827,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
 				       &msr_info->data);
-	case MSR_IA32_XSS:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
-			return 1;
-		msr_info->data = vcpu->arch.ia32_xss;
-		break;
 	case MSR_IA32_RTIT_CTL:
 		if (pt_mode != PT_MODE_HOST_GUEST)
 			return 1;
@@ -2068,18 +2062,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
 		return vmx_set_vmx_msr(vcpu, msr_index, data);
-	case MSR_IA32_XSS:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
-			return 1;
-		/*
-		 * The only supported bit as of Skylake is bit 8, but
-		 * it is not supported on KVM.
-		 */
-		if (data != 0)
-			return 1;
-		vcpu->arch.ia32_xss = data;
-		break;
 	case MSR_IA32_RTIT_CTL:
 		if ((pt_mode != PT_MODE_HOST_GUEST) ||
 			vmx_rtit_ctl_check(vcpu, data) ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1ebe13493241..19a0dc96beca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2694,6 +2694,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC:
 		kvm_write_tsc(vcpu, msr_info);
 		break;
+	case MSR_IA32_XSS:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+			return 1;
+		/*
+		 * We do support PT if kvm_x86_ops->pt_supported(), but we do
+		 * not support IA32_XSS[bit 8]. Guests will have to use
+		 * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT
+		 * MSRs.
+		 */
+		if (data != 0)
+			return 1;
+		vcpu->arch.ia32_xss = data;
+		break;
 	case MSR_SMI_COUNT:
 		if (!msr_info->host_initiated)
 			return 1;
@@ -3021,6 +3035,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
 		return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
 				   msr_info->host_initiated);
+	case MSR_IA32_XSS:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+			return 1;
+		msr_info->data = vcpu->arch.ia32_xss;
+		break;
 	case MSR_K7_CLK_CTL:
 		/*
 		 * Provide expected ramp-up count for K7. All other
-- 
cgit v1.2.3


From 52297436199dde85be557ee6bc779f5b96082f74 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 21 Oct 2019 16:30:27 -0700
Subject: kvm: svm: Update svm_xsaves_supported

AMD CPUs now support XSAVES in a limited fashion (they require IA32_XSS
to be zero).

AMD has no equivalent of Intel's "Enable XSAVES/XRSTORS" VM-execution
control. Instead, XSAVES is always available to the guest when supported
on the host.

Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Change-Id: I40dc2c682eb0d38c2208d95d5eb7bbb6c47f6317
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 77429fa38748..4153ca8cddb7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5973,7 +5973,7 @@ static bool svm_mpx_supported(void)
 
 static bool svm_xsaves_supported(void)
 {
-	return false;
+	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 
 static bool svm_umip_emulated(void)
-- 
cgit v1.2.3


From 149487bdacde32f5a9a344a49533ae0772fb9db7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Mon, 21 Oct 2019 15:58:42 -0700
Subject: KVM: Add separate helper for putting borrowed reference to kvm

Add a new helper, kvm_put_kvm_no_destroy(), to handle putting a borrowed
reference[*] to the VM when installing a new file descriptor fails.  KVM
expects the refcount to remain valid in this case, as the in-progress
ioctl() has an explicit reference to the VM.  The primary motiviation
for the helper is to document that the 'kvm' pointer is still valid
after putting the borrowed reference, e.g. to document that doing
mutex(&kvm->lock) immediately after putting a ref to kvm isn't broken.

[*] When exposing a new object to userspace via a file descriptor, e.g.
    a new vcpu, KVM grabs a reference to itself (the VM) prior to making
    the object visible to userspace to avoid prematurely freeing the VM
    in the scenario where userspace immediately closes file descriptor.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  2 +-
 arch/powerpc/kvm/book3s_64_vio.c    |  2 +-
 include/linux/kvm_host.h            |  1 +
 virt/kvm/kvm_main.c                 | 16 ++++++++++++++--
 4 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 9a75f0e1933b..68678e31c84c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -2000,7 +2000,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
 	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
 	if (ret < 0) {
 		kfree(ctx);
-		kvm_put_kvm(kvm);
+		kvm_put_kvm_no_destroy(kvm);
 		return ret;
 	}
 
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 5834db0a54c6..883a66e76638 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -317,7 +317,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	if (ret >= 0)
 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
 	else
-		kvm_put_kvm(kvm);
+		kvm_put_kvm_no_destroy(kvm);
 
 	mutex_unlock(&kvm->lock);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d2017302996c..a817e446c9aa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -621,6 +621,7 @@ void kvm_exit(void);
 
 void kvm_get_kvm(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
+void kvm_put_kvm_no_destroy(struct kvm *kvm);
 
 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 67ef3f2e19e8..b8534c6b8cf6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -772,6 +772,18 @@ void kvm_put_kvm(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_put_kvm);
 
+/*
+ * Used to put a reference that was taken on behalf of an object associated
+ * with a user-visible file descriptor, e.g. a vcpu or device, if installation
+ * of the new file descriptor fails and the reference cannot be transferred to
+ * its final owner.  In such cases, the caller is still actively using @kvm and
+ * will fail miserably if the refcount unexpectedly hits zero.
+ */
+void kvm_put_kvm_no_destroy(struct kvm *kvm)
+{
+	WARN_ON(refcount_dec_and_test(&kvm->users_count));
+}
+EXPORT_SYMBOL_GPL(kvm_put_kvm_no_destroy);
 
 static int kvm_vm_release(struct inode *inode, struct file *filp)
 {
@@ -2679,7 +2691,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	kvm_get_kvm(kvm);
 	r = create_vcpu_fd(vcpu);
 	if (r < 0) {
-		kvm_put_kvm(kvm);
+		kvm_put_kvm_no_destroy(kvm);
 		goto unlock_vcpu_destroy;
 	}
 
@@ -3117,7 +3129,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	kvm_get_kvm(kvm);
 	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
 	if (ret < 0) {
-		kvm_put_kvm(kvm);
+		kvm_put_kvm_no_destroy(kvm);
 		mutex_lock(&kvm->lock);
 		list_del(&dev->vm_node);
 		mutex_unlock(&kvm->lock);
-- 
cgit v1.2.3


From b4d0c0aad57ac3bd1b5141bac5ab1ab1d5e442b3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 11 Oct 2019 11:08:00 +0200
Subject: crypto: arm - use Kconfig based compiler checks for crypto opcodes

Instead of allowing the Crypto Extensions algorithms to be selected when
using a toolchain that does not support them, and complain about it at
build time, use the information we have about the compiler to prevent
them from being selected in the first place. Users that are stuck with
a GCC version <4.8 are unlikely to care about these routines anyway, and
it cleans up the Makefile considerably.

While at it, add explicit 'armv8-a' CPU specifiers to the code that uses
the 'crypto-neon-fp-armv8' FPU specifier so we don't regress Clang, which
will complain about this in version 10 and later.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Kconfig             | 14 ++++++++------
 arch/arm/crypto/Makefile            | 32 ++++++--------------------------
 arch/arm/crypto/aes-ce-core.S       |  1 +
 arch/arm/crypto/crct10dif-ce-core.S |  2 +-
 arch/arm/crypto/ghash-ce-core.S     |  1 +
 arch/arm/crypto/sha1-ce-core.S      |  1 +
 arch/arm/crypto/sha2-ce-core.S      |  1 +
 7 files changed, 19 insertions(+), 33 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index b24df84a1d7a..9f257c1bf32b 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON
 
 config CRYPTO_SHA1_ARM_CE
 	tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
-	depends on KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
 	select CRYPTO_SHA1_ARM
 	select CRYPTO_HASH
 	help
@@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE
 
 config CRYPTO_SHA2_ARM_CE
 	tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
-	depends on KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
 	select CRYPTO_SHA256_ARM
 	select CRYPTO_HASH
 	help
@@ -96,7 +96,7 @@ config CRYPTO_AES_ARM_BS
 
 config CRYPTO_AES_ARM_CE
 	tristate "Accelerated AES using ARMv8 Crypto Extensions"
-	depends on KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_SIMD
 	help
@@ -105,7 +105,7 @@ config CRYPTO_AES_ARM_CE
 
 config CRYPTO_GHASH_ARM_CE
 	tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
-	depends on KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
 	select CRYPTO_HASH
 	select CRYPTO_CRYPTD
 	select CRYPTO_GF128MUL
@@ -117,12 +117,14 @@ config CRYPTO_GHASH_ARM_CE
 
 config CRYPTO_CRCT10DIF_ARM_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
-	depends on KERNEL_MODE_NEON && CRC_T10DIF
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+	depends on CRC_T10DIF
 	select CRYPTO_HASH
 
 config CRYPTO_CRC32_ARM_CE
 	tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
-	depends on KERNEL_MODE_NEON && CRC32
+	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+	depends on CRC32
 	select CRYPTO_HASH
 
 config CRYPTO_CHACHA20_NEON
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 4180f3a13512..c0d36771a693 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -12,32 +12,12 @@ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
 
-ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
-crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
-
-ifneq ($(crc-obj-y)$(crc-obj-m),)
-ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y)
-ce-obj-y += $(crc-obj-y)
-ce-obj-m += $(crc-obj-m)
-else
-$(warning These CRC Extensions modules need binutils 2.23 or higher)
-$(warning $(crc-obj-y) $(crc-obj-m))
-endif
-endif
-
-ifneq ($(ce-obj-y)$(ce-obj-m),)
-ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
-obj-y += $(ce-obj-y)
-obj-m += $(ce-obj-m)
-else
-$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
-$(warning $(ce-obj-y) $(ce-obj-m))
-endif
-endif
+obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
+obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
+obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
+obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
+obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
+obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
 
 aes-arm-y	:= aes-cipher-core.o aes-cipher-glue.o
 aes-arm-bs-y	:= aes-neonbs-core.o aes-neonbs-glue.o
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index b978cdf133af..4d1707388d94 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -9,6 +9,7 @@
 #include <asm/assembler.h>
 
 	.text
+	.arch		armv8-a
 	.fpu		crypto-neon-fp-armv8
 	.align		3
 
diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S
index 86be258a803f..46c02c518a30 100644
--- a/arch/arm/crypto/crct10dif-ce-core.S
+++ b/arch/arm/crypto/crct10dif-ce-core.S
@@ -72,7 +72,7 @@
 #endif
 
 	.text
-	.arch		armv7-a
+	.arch		armv8-a
 	.fpu		crypto-neon-fp-armv8
 
 	init_crc	.req	r0
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index c47fe81abcb0..534c9647726d 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -88,6 +88,7 @@
 	T3_H		.req	d17
 
 	.text
+	.arch		armv8-a
 	.fpu		crypto-neon-fp-armv8
 
 	.macro		__pmull_p64, rd, rn, rm, b1, b2, b3, b4
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S
index 49a74a441aec..8a702e051738 100644
--- a/arch/arm/crypto/sha1-ce-core.S
+++ b/arch/arm/crypto/sha1-ce-core.S
@@ -10,6 +10,7 @@
 #include <asm/assembler.h>
 
 	.text
+	.arch		armv8-a
 	.fpu		crypto-neon-fp-armv8
 
 	k0		.req	q0
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S
index 4ad517577e23..b6369d2440a1 100644
--- a/arch/arm/crypto/sha2-ce-core.S
+++ b/arch/arm/crypto/sha2-ce-core.S
@@ -10,6 +10,7 @@
 #include <asm/assembler.h>
 
 	.text
+	.arch		armv8-a
 	.fpu		crypto-neon-fp-armv8
 
 	k0		.req	q7
-- 
cgit v1.2.3


From 64db5e7439fb582e394ea413822bd1a43a47bc55 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 11 Oct 2019 21:38:48 -0700
Subject: crypto: sparc/aes - convert to skcipher API

Convert the glue code for the SPARC64 AES opcodes implementations of
AES-ECB, AES-CBC, and AES-CTR from the deprecated "blkcipher" API to the
"skcipher" API.  This is needed in order for the blkcipher API to be
removed.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/sparc/crypto/aes_glue.c | 310 ++++++++++++++++++++-----------------------
 crypto/Kconfig               |   3 +-
 2 files changed, 144 insertions(+), 169 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 7b946b3dee9d..0f5a501c95a9 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -24,6 +24,7 @@
 #include <linux/types.h>
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
 
 #include <asm/fpumacro.h>
 #include <asm/pstate.h>
@@ -197,6 +198,12 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
+static int aes_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *in_key,
+				unsigned int key_len)
+{
+	return aes_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
 static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -211,131 +218,108 @@ static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
 }
 
-#define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1))
-
-static int ecb_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_encrypt_keys(&ctx->key[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->ecb_encrypt(&ctx->key[0],
-					      (const u64 *)walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr,
-					      block_len);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		ctx->ops->ecb_encrypt(&ctx->key[0], walk.src.virt.addr,
+				      walk.dst.virt.addr,
+				      round_down(nbytes, AES_BLOCK_SIZE));
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	u64 *key_end;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	const u64 *key_end;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_decrypt_keys(&ctx->key[0]);
 	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->ecb_decrypt(key_end,
-					      (const u64 *) walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr, block_len);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		ctx->ops->ecb_decrypt(key_end, walk.src.virt.addr,
+				      walk.dst.virt.addr,
+				      round_down(nbytes, AES_BLOCK_SIZE));
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 
 	return err;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_encrypt_keys(&ctx->key[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->cbc_encrypt(&ctx->key[0],
-					      (const u64 *)walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr,
-					      block_len, (u64 *) walk.iv);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		ctx->ops->cbc_encrypt(&ctx->key[0], walk.src.virt.addr,
+				      walk.dst.virt.addr,
+				      round_down(nbytes, AES_BLOCK_SIZE),
+				      walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	u64 *key_end;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	const u64 *key_end;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_decrypt_keys(&ctx->key[0]);
 	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->cbc_decrypt(key_end,
-					      (const u64 *) walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr,
-					      block_len, (u64 *) walk.iv);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		ctx->ops->cbc_decrypt(key_end, walk.src.virt.addr,
+				      walk.dst.virt.addr,
+				      round_down(nbytes, AES_BLOCK_SIZE),
+				      walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 
 	return err;
 }
 
-static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
-			    struct blkcipher_walk *walk)
+static void ctr_crypt_final(const struct crypto_sparc64_aes_ctx *ctx,
+			    struct skcipher_walk *walk)
 {
 	u8 *ctrblk = walk->iv;
 	u64 keystream[AES_BLOCK_SIZE / sizeof(u64)];
@@ -349,40 +333,35 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc,
-		     struct scatterlist *dst, struct scatterlist *src,
-		     unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	ctx->ops->load_encrypt_keys(&ctx->key[0]);
 	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
-		unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			ctx->ops->ctr_crypt(&ctx->key[0],
-					    (const u64 *)walk.src.virt.addr,
-					    (u64 *) walk.dst.virt.addr,
-					    block_len, (u64 *) walk.iv);
-		}
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		ctx->ops->ctr_crypt(&ctx->key[0], walk.src.virt.addr,
+				    walk.dst.virt.addr,
+				    round_down(nbytes, AES_BLOCK_SIZE),
+				    walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
 	}
 	if (walk.nbytes) {
 		ctr_crypt_final(ctx, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = skcipher_walk_done(&walk, 0);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static struct crypto_alg algs[] = { {
+static struct crypto_alg cipher_alg = {
 	.cra_name		= "aes",
 	.cra_driver_name	= "aes-sparc64",
 	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
@@ -400,66 +379,53 @@ static struct crypto_alg algs[] = { {
 			.cia_decrypt		= crypto_aes_decrypt
 		}
 	}
-}, {
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "ecb-aes-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "cbc-aes-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(aes)",
+		.base.cra_driver_name	= "ecb-aes-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct crypto_sparc64_aes_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.setkey			= aes_set_key_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_driver_name	= "cbc-aes-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct crypto_sparc64_aes_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= aes_set_key_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(aes)",
+		.base.cra_driver_name	= "ctr-aes-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct crypto_sparc64_aes_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= aes_set_key_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+		.chunksize		= AES_BLOCK_SIZE,
+	}
+};
 
 static bool __init sparc64_has_aes_opcode(void)
 {
@@ -477,17 +443,27 @@ static bool __init sparc64_has_aes_opcode(void)
 
 static int __init aes_sparc64_mod_init(void)
 {
-	if (sparc64_has_aes_opcode()) {
-		pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
-		return crypto_register_algs(algs, ARRAY_SIZE(algs));
+	int err;
+
+	if (!sparc64_has_aes_opcode()) {
+		pr_info("sparc64 aes opcodes not available.\n");
+		return -ENODEV;
 	}
-	pr_info("sparc64 aes opcodes not available.\n");
-	return -ENODEV;
+	pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
+	err = crypto_register_alg(&cipher_alg);
+	if (err)
+		return err;
+	err = crypto_register_skciphers(skcipher_algs,
+					ARRAY_SIZE(skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&cipher_alg);
+	return err;
 }
 
 static void __exit aes_sparc64_mod_fini(void)
 {
-	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+	crypto_unregister_alg(&cipher_alg);
+	crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
 }
 
 module_init(aes_sparc64_mod_init);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 29472fb795f3..728978838578 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1098,8 +1098,7 @@ config CRYPTO_AES_NI_INTEL
 config CRYPTO_AES_SPARC64
 	tristate "AES cipher algorithms (SPARC64)"
 	depends on SPARC64
-	select CRYPTO_CRYPTD
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	help
 	  Use SPARC64 crypto opcodes for AES algorithm.
 
-- 
cgit v1.2.3


From c72a26ef6b259ccdbaa3f866b404d6ce1312ec30 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 11 Oct 2019 21:38:49 -0700
Subject: crypto: sparc/camellia - convert to skcipher API

Convert the glue code for the SPARC64 Camellia opcodes implementations
of Camellia-ECB and Camellia-CBC from the deprecated "blkcipher" API to
the "skcipher" API.  This is needed in order for the blkcipher API to be
removed.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/sparc/crypto/camellia_glue.c | 217 +++++++++++++++++---------------------
 crypto/Kconfig                    |   1 +
 2 files changed, 96 insertions(+), 122 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 3823f9491a72..1700f863748c 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 
 #include <asm/fpumacro.h>
 #include <asm/pstate.h>
@@ -52,6 +53,12 @@ static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
 	return 0;
 }
 
+static int camellia_set_key_skcipher(struct crypto_skcipher *tfm,
+				     const u8 *in_key, unsigned int key_len)
+{
+	return camellia_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
 extern void camellia_sparc64_crypt(const u64 *key, const u32 *input,
 				   u32 *output, unsigned int key_len);
 
@@ -81,61 +88,46 @@ typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len,
 extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds;
 extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds;
 
-#define CAMELLIA_BLOCK_MASK	(~(CAMELLIA_BLOCK_SIZE - 1))
-
-static int __ecb_crypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes, bool encrypt)
+static int __ecb_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	ecb_crypt_op *op;
 	const u64 *key;
+	unsigned int nbytes;
 	int err;
 
 	op = camellia_sparc64_ecb_crypt_3_grand_rounds;
 	if (ctx->key_len != 16)
 		op = camellia_sparc64_ecb_crypt_4_grand_rounds;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	if (encrypt)
 		key = &ctx->encrypt_key[0];
 	else
 		key = &ctx->decrypt_key[0];
 	camellia_sparc64_load_keys(key, ctx->key_len);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64;
-			u64 *dst64;
-
-			src64 = (const u64 *)walk.src.virt.addr;
-			dst64 = (u64 *) walk.dst.virt.addr;
-			op(src64, dst64, block_len, key);
-		}
-		nbytes &= CAMELLIA_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		op(walk.src.virt.addr, walk.dst.virt.addr,
+		   round_down(nbytes, CAMELLIA_BLOCK_SIZE), key);
+		err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return __ecb_crypt(desc, dst, src, nbytes, true);
+	return __ecb_crypt(req, true);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return __ecb_crypt(desc, dst, src, nbytes, false);
+	return __ecb_crypt(req, false);
 }
 
 typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len,
@@ -146,85 +138,65 @@ extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds;
 extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds;
 extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds;
 
-static int cbc_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	cbc_crypt_op *op;
 	const u64 *key;
+	unsigned int nbytes;
 	int err;
 
 	op = camellia_sparc64_cbc_encrypt_3_grand_rounds;
 	if (ctx->key_len != 16)
 		op = camellia_sparc64_cbc_encrypt_4_grand_rounds;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	key = &ctx->encrypt_key[0];
 	camellia_sparc64_load_keys(key, ctx->key_len);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64;
-			u64 *dst64;
-
-			src64 = (const u64 *)walk.src.virt.addr;
-			dst64 = (u64 *) walk.dst.virt.addr;
-			op(src64, dst64, block_len, key,
-			   (u64 *) walk.iv);
-		}
-		nbytes &= CAMELLIA_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		op(walk.src.virt.addr, walk.dst.virt.addr,
+		   round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	cbc_crypt_op *op;
 	const u64 *key;
+	unsigned int nbytes;
 	int err;
 
 	op = camellia_sparc64_cbc_decrypt_3_grand_rounds;
 	if (ctx->key_len != 16)
 		op = camellia_sparc64_cbc_decrypt_4_grand_rounds;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	key = &ctx->decrypt_key[0];
 	camellia_sparc64_load_keys(key, ctx->key_len);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64;
-			u64 *dst64;
-
-			src64 = (const u64 *)walk.src.virt.addr;
-			dst64 = (u64 *) walk.dst.virt.addr;
-			op(src64, dst64, block_len, key,
-			   (u64 *) walk.iv);
-		}
-		nbytes &= CAMELLIA_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		op(walk.src.virt.addr, walk.dst.virt.addr,
+		   round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static struct crypto_alg algs[] = { {
+static struct crypto_alg cipher_alg = {
 	.cra_name		= "camellia",
 	.cra_driver_name	= "camellia-sparc64",
 	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
@@ -242,46 +214,37 @@ static struct crypto_alg algs[] = { {
 			.cia_decrypt		= camellia_decrypt
 		}
 	}
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_set_key,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_set_key,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(camellia)",
+		.base.cra_driver_name	= "ecb-camellia-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_set_key_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(camellia)",
+		.base.cra_driver_name	= "cbc-camellia-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_set_key_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}
 };
 
 static bool __init sparc64_has_camellia_opcode(void)
@@ -300,17 +263,27 @@ static bool __init sparc64_has_camellia_opcode(void)
 
 static int __init camellia_sparc64_mod_init(void)
 {
-	if (sparc64_has_camellia_opcode()) {
-		pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
-		return crypto_register_algs(algs, ARRAY_SIZE(algs));
+	int err;
+
+	if (!sparc64_has_camellia_opcode()) {
+		pr_info("sparc64 camellia opcodes not available.\n");
+		return -ENODEV;
 	}
-	pr_info("sparc64 camellia opcodes not available.\n");
-	return -ENODEV;
+	pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
+	err = crypto_register_alg(&cipher_alg);
+	if (err)
+		return err;
+	err = crypto_register_skciphers(skcipher_algs,
+					ARRAY_SIZE(skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&cipher_alg);
+	return err;
 }
 
 static void __exit camellia_sparc64_mod_fini(void)
 {
-	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+	crypto_unregister_alg(&cipher_alg);
+	crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
 }
 
 module_init(camellia_sparc64_mod_init);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 728978838578..d331b9e85039 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1275,6 +1275,7 @@ config CRYPTO_CAMELLIA_SPARC64
 	depends on SPARC64
 	depends on CRYPTO
 	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	help
 	  Camellia cipher algorithm module (SPARC64).
 
-- 
cgit v1.2.3


From cd5d2f8457468df573085f91dd7b37ab8350a9af Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 11 Oct 2019 21:38:50 -0700
Subject: crypto: sparc/des - convert to skcipher API

Convert the glue code for the SPARC64 DES opcodes implementations of
DES-ECB, DES-CBC, 3DES-ECB, and 3DES-CBC from the deprecated "blkcipher"
API to the "skcipher" API.  This is needed in order for the blkcipher
API to be removed.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/sparc/crypto/des_glue.c | 499 ++++++++++++++++++++-----------------------
 crypto/Kconfig               |   1 +
 2 files changed, 228 insertions(+), 272 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index db6010b4e52e..a499102bf706 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
 
 #include <asm/fpumacro.h>
 #include <asm/pstate.h>
@@ -61,6 +62,12 @@ static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
+static int des_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				unsigned int keylen)
+{
+	return des_set_key(crypto_skcipher_tfm(tfm), key, keylen);
+}
+
 extern void des_sparc64_crypt(const u64 *key, const u64 *input,
 			      u64 *output);
 
@@ -85,113 +92,90 @@ extern void des_sparc64_load_keys(const u64 *key);
 extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output,
 				  unsigned int len);
 
-#define DES_BLOCK_MASK	(~(DES_BLOCK_SIZE - 1))
-
-static int __ecb_crypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes, bool encrypt)
+static int __ecb_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	if (encrypt)
 		des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
 	else
 		des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr,
-					      (u64 *) walk.dst.virt.addr,
-					      block_len);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		des_sparc64_ecb_crypt(walk.src.virt.addr, walk.dst.virt.addr,
+				      round_down(nbytes, DES_BLOCK_SIZE));
+		err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return __ecb_crypt(desc, dst, src, nbytes, true);
+	return __ecb_crypt(req, true);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return __ecb_crypt(desc, dst, src, nbytes, false);
+	return __ecb_crypt(req, false);
 }
 
 extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output,
 				    unsigned int len, u64 *iv);
 
-static int cbc_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
+				    unsigned int len, u64 *iv);
+
+static int __cbc_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
-		if (likely(block_len)) {
-			des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr,
-						(u64 *) walk.dst.virt.addr,
-						block_len, (u64 *) walk.iv);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	if (encrypt)
+		des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
+	else
+		des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
+	while ((nbytes = walk.nbytes) != 0) {
+		if (encrypt)
+			des_sparc64_cbc_encrypt(walk.src.virt.addr,
+						walk.dst.virt.addr,
+						round_down(nbytes,
+							   DES_BLOCK_SIZE),
+						walk.iv);
+		else
+			des_sparc64_cbc_decrypt(walk.src.virt.addr,
+						walk.dst.virt.addr,
+						round_down(nbytes,
+							   DES_BLOCK_SIZE),
+						walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
-				    unsigned int len, u64 *iv);
-
-static int cbc_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+	return __cbc_crypt(req, true);
+}
 
-		if (likely(block_len)) {
-			des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr,
-						(u64 *) walk.dst.virt.addr,
-						block_len, (u64 *) walk.iv);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-	fprs_write(0);
-	return err;
+static int cbc_decrypt(struct skcipher_request *req)
+{
+	return __cbc_crypt(req, false);
 }
 
 static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
@@ -227,6 +211,12 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
+static int des3_ede_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				     unsigned int keylen)
+{
+	return des3_ede_set_key(crypto_skcipher_tfm(tfm), key, keylen);
+}
+
 extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
 				   u64 *output);
 
@@ -251,241 +241,196 @@ extern void des3_ede_sparc64_load_keys(const u64 *key);
 extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input,
 				       u64 *output, unsigned int len);
 
-static int __ecb3_crypt(struct blkcipher_desc *desc,
-			struct scatterlist *dst, struct scatterlist *src,
-			unsigned int nbytes, bool encrypt)
+static int __ecb3_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	const u64 *K;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
 	if (encrypt)
 		K = &ctx->encrypt_expkey[0];
 	else
 		K = &ctx->decrypt_expkey[0];
 	des3_ede_sparc64_load_keys(K);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64 = (const u64 *)walk.src.virt.addr;
-			des3_ede_sparc64_ecb_crypt(K, src64,
-						   (u64 *) walk.dst.virt.addr,
-						   block_len);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		des3_ede_sparc64_ecb_crypt(K, walk.src.virt.addr,
+					   walk.dst.virt.addr,
+					   round_down(nbytes, DES_BLOCK_SIZE));
+		err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static int ecb3_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb3_encrypt(struct skcipher_request *req)
 {
-	return __ecb3_crypt(desc, dst, src, nbytes, true);
+	return __ecb3_crypt(req, true);
 }
 
-static int ecb3_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb3_decrypt(struct skcipher_request *req)
 {
-	return __ecb3_crypt(desc, dst, src, nbytes, false);
+	return __ecb3_crypt(req, false);
 }
 
 extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input,
 					 u64 *output, unsigned int len,
 					 u64 *iv);
 
-static int cbc3_encrypt(struct blkcipher_desc *desc,
-			struct scatterlist *dst, struct scatterlist *src,
-			unsigned int nbytes)
-{
-	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	const u64 *K;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	K = &ctx->encrypt_expkey[0];
-	des3_ede_sparc64_load_keys(K);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64 = (const u64 *)walk.src.virt.addr;
-			des3_ede_sparc64_cbc_encrypt(K, src64,
-						     (u64 *) walk.dst.virt.addr,
-						     block_len,
-						     (u64 *) walk.iv);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-	fprs_write(0);
-	return err;
-}
-
 extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input,
 					 u64 *output, unsigned int len,
 					 u64 *iv);
 
-static int cbc3_decrypt(struct blkcipher_desc *desc,
-			struct scatterlist *dst, struct scatterlist *src,
-			unsigned int nbytes)
+static int __cbc3_crypt(struct skcipher_request *req, bool encrypt)
 {
-	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	const u64 *K;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
+	if (err)
+		return err;
 
-	K = &ctx->decrypt_expkey[0];
+	if (encrypt)
+		K = &ctx->encrypt_expkey[0];
+	else
+		K = &ctx->decrypt_expkey[0];
 	des3_ede_sparc64_load_keys(K);
-	while ((nbytes = walk.nbytes)) {
-		unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
-		if (likely(block_len)) {
-			const u64 *src64 = (const u64 *)walk.src.virt.addr;
-			des3_ede_sparc64_cbc_decrypt(K, src64,
-						     (u64 *) walk.dst.virt.addr,
-						     block_len,
-						     (u64 *) walk.iv);
-		}
-		nbytes &= DES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while ((nbytes = walk.nbytes) != 0) {
+		if (encrypt)
+			des3_ede_sparc64_cbc_encrypt(K, walk.src.virt.addr,
+						     walk.dst.virt.addr,
+						     round_down(nbytes,
+								DES_BLOCK_SIZE),
+						     walk.iv);
+		else
+			des3_ede_sparc64_cbc_decrypt(K, walk.src.virt.addr,
+						     walk.dst.virt.addr,
+						     round_down(nbytes,
+								DES_BLOCK_SIZE),
+						     walk.iv);
+		err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
 	}
 	fprs_write(0);
 	return err;
 }
 
-static struct crypto_alg algs[] = { {
-	.cra_name		= "des",
-	.cra_driver_name	= "des-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_module		= THIS_MODULE,
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= DES_KEY_SIZE,
-			.cia_max_keysize	= DES_KEY_SIZE,
-			.cia_setkey		= des_set_key,
-			.cia_encrypt		= sparc_des_encrypt,
-			.cia_decrypt		= sparc_des_decrypt
+static int cbc3_encrypt(struct skcipher_request *req)
+{
+	return __cbc3_crypt(req, true);
+}
+
+static int cbc3_decrypt(struct skcipher_request *req)
+{
+	return __cbc3_crypt(req, false);
+}
+
+static struct crypto_alg cipher_algs[] = {
+	{
+		.cra_name		= "des",
+		.cra_driver_name	= "des-sparc64",
+		.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= DES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
+		.cra_u	= {
+			.cipher	= {
+				.cia_min_keysize	= DES_KEY_SIZE,
+				.cia_max_keysize	= DES_KEY_SIZE,
+				.cia_setkey		= des_set_key,
+				.cia_encrypt		= sparc_des_encrypt,
+				.cia_decrypt		= sparc_des_decrypt
+			}
 		}
-	}
-}, {
-	.cra_name		= "ecb(des)",
-	.cra_driver_name	= "ecb-des-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES_KEY_SIZE,
-			.max_keysize	= DES_KEY_SIZE,
-			.setkey		= des_set_key,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(des)",
-	.cra_driver_name	= "cbc-des-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES_KEY_SIZE,
-			.max_keysize	= DES_KEY_SIZE,
-			.ivsize		= DES_BLOCK_SIZE,
-			.setkey		= des_set_key,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "des3_ede",
-	.cra_driver_name	= "des3_ede-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_module		= THIS_MODULE,
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= DES3_EDE_KEY_SIZE,
-			.cia_max_keysize	= DES3_EDE_KEY_SIZE,
-			.cia_setkey		= des3_ede_set_key,
-			.cia_encrypt		= sparc_des3_ede_encrypt,
-			.cia_decrypt		= sparc_des3_ede_decrypt
+	}, {
+		.cra_name		= "des3_ede",
+		.cra_driver_name	= "des3_ede-sparc64",
+		.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
+		.cra_u	= {
+			.cipher	= {
+				.cia_min_keysize	= DES3_EDE_KEY_SIZE,
+				.cia_max_keysize	= DES3_EDE_KEY_SIZE,
+				.cia_setkey		= des3_ede_set_key,
+				.cia_encrypt		= sparc_des3_ede_encrypt,
+				.cia_decrypt		= sparc_des3_ede_decrypt
+			}
 		}
 	}
-}, {
-	.cra_name		= "ecb(des3_ede)",
-	.cra_driver_name	= "ecb-des3_ede-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.setkey		= des3_ede_set_key,
-			.encrypt	= ecb3_encrypt,
-			.decrypt	= ecb3_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(des3_ede)",
-	.cra_driver_name	= "cbc-des3_ede-sparc64",
-	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES3_EDE_BLOCK_SIZE,
-			.setkey		= des3_ede_set_key,
-			.encrypt	= cbc3_encrypt,
-			.decrypt	= cbc3_decrypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(des)",
+		.base.cra_driver_name	= "ecb-des-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
+		.setkey			= des_set_key_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(des)",
+		.base.cra_driver_name	= "cbc-des-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
+		.ivsize			= DES_BLOCK_SIZE,
+		.setkey			= des_set_key_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ecb(des3_ede)",
+		.base.cra_driver_name	= "ecb-des3_ede-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.setkey			= des3_ede_set_key_skcipher,
+		.encrypt		= ecb3_encrypt,
+		.decrypt		= ecb3_decrypt,
+	}, {
+		.base.cra_name		= "cbc(des3_ede)",
+		.base.cra_driver_name	= "cbc-des3_ede-sparc64",
+		.base.cra_priority	= SPARC_CR_OPCODE_PRIORITY,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_sparc64_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.setkey			= des3_ede_set_key_skcipher,
+		.encrypt		= cbc3_encrypt,
+		.decrypt		= cbc3_decrypt,
+	}
+};
 
 static bool __init sparc64_has_des_opcode(void)
 {
@@ -503,17 +448,27 @@ static bool __init sparc64_has_des_opcode(void)
 
 static int __init des_sparc64_mod_init(void)
 {
-	if (sparc64_has_des_opcode()) {
-		pr_info("Using sparc64 des opcodes optimized DES implementation\n");
-		return crypto_register_algs(algs, ARRAY_SIZE(algs));
+	int err;
+
+	if (!sparc64_has_des_opcode()) {
+		pr_info("sparc64 des opcodes not available.\n");
+		return -ENODEV;
 	}
-	pr_info("sparc64 des opcodes not available.\n");
-	return -ENODEV;
+	pr_info("Using sparc64 des opcodes optimized DES implementation\n");
+	err = crypto_register_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+	if (err)
+		return err;
+	err = crypto_register_skciphers(skcipher_algs,
+					ARRAY_SIZE(skcipher_algs));
+	if (err)
+		crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+	return err;
 }
 
 static void __exit des_sparc64_mod_fini(void)
 {
-	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+	crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+	crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
 }
 
 module_init(des_sparc64_mod_init);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index d331b9e85039..8c38c2b7f8e7 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1354,6 +1354,7 @@ config CRYPTO_DES_SPARC64
 	depends on SPARC64
 	select CRYPTO_ALGAPI
 	select CRYPTO_LIB_DES
+	select CRYPTO_BLKCIPHER
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3),
 	  optimized using SPARC64 crypto opcodes.
-- 
cgit v1.2.3


From 7988fb2c03c8cf9936f851ab19a6d21b3c3411c0 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 12 Oct 2019 13:18:07 -0700
Subject: crypto: s390/aes - convert to skcipher API

Convert the glue code for the S390 CPACF implementations of AES-ECB,
AES-CBC, AES-XTS, and AES-CTR from the deprecated "blkcipher" API to the
"skcipher" API.  This is needed in order for the blkcipher API to be
removed.

Note: I made CTR use the same function for encryption and decryption,
since CTR encryption and decryption are identical.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/aes_s390.c | 609 +++++++++++++++++---------------------------
 1 file changed, 234 insertions(+), 375 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 9803e96d2924..ead0b2c9881d 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@ struct s390_aes_ctx {
 	int key_len;
 	unsigned long fc;
 	union {
-		struct crypto_sync_skcipher *blk;
+		struct crypto_skcipher *skcipher;
 		struct crypto_cipher *cip;
 	} fallback;
 };
@@ -54,7 +54,7 @@ struct s390_xts_ctx {
 	u8 pcc_key[32];
 	int key_len;
 	unsigned long fc;
-	struct crypto_sync_skcipher *fallback;
+	struct crypto_skcipher *fallback;
 };
 
 struct gcm_sg_walk {
@@ -178,66 +178,41 @@ static struct crypto_alg aes_alg = {
 	}
 };
 
-static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
-		unsigned int len)
+static int setkey_fallback_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				    unsigned int len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	unsigned int ret;
-
-	crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
-					 CRYPTO_TFM_REQ_MASK);
-	crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
-						      CRYPTO_TFM_REQ_MASK);
-
-	ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
-
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
-			  CRYPTO_TFM_RES_MASK;
-
-	return ret;
-}
-
-static int fallback_blk_dec(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	unsigned int ret;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
-	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_decrypt(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	int ret;
 
-	skcipher_request_zero(req);
+	crypto_skcipher_clear_flags(sctx->fallback.skcipher,
+				    CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(sctx->fallback.skcipher,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	ret = crypto_skcipher_setkey(sctx->fallback.skcipher, key, len);
+	crypto_skcipher_set_flags(tfm,
+				  crypto_skcipher_get_flags(sctx->fallback.skcipher) &
+				  CRYPTO_TFM_RES_MASK);
 	return ret;
 }
 
-static int fallback_blk_enc(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
+static int fallback_skcipher_crypt(struct s390_aes_ctx *sctx,
+				   struct skcipher_request *req,
+				   unsigned long modifier)
 {
-	unsigned int ret;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
-	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+	struct skcipher_request *subreq = skcipher_request_ctx(req);
 
-	ret = crypto_skcipher_encrypt(req);
-	return ret;
+	*subreq = *req;
+	skcipher_request_set_tfm(subreq, sctx->fallback.skcipher);
+	return (modifier & CPACF_DECRYPT) ?
+		crypto_skcipher_decrypt(subreq) :
+		crypto_skcipher_encrypt(subreq);
 }
 
-static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 
 	/* Pick the correct function code based on the key length */
@@ -248,111 +223,92 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	/* Check if the function code is available */
 	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
 	if (!sctx->fc)
-		return setkey_fallback_blk(tfm, in_key, key_len);
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
 	memcpy(sctx->key, in_key, key_len);
 	return 0;
 }
 
-static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int ecb_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, modifier);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		cpacf_km(sctx->fc | modifier, sctx->key,
-			 walk->dst.virt.addr, walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			 walk.dst.virt.addr, walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
-
 	return ret;
 }
 
-static int ecb_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_aes_encrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, 0, &walk);
+	return ecb_aes_crypt(req, 0);
 }
 
-static int ecb_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_aes_decrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
+	return ecb_aes_crypt(req, CPACF_DECRYPT);
 }
 
-static int fallback_init_blk(struct crypto_tfm *tfm)
+static int fallback_init_skcipher(struct crypto_skcipher *tfm)
 {
-	const char *name = tfm->__crt_alg->cra_name;
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 
-	sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
-						   CRYPTO_ALG_NEED_FALLBACK);
+	sctx->fallback.skcipher = crypto_alloc_skcipher(name, 0,
+				CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
 
-	if (IS_ERR(sctx->fallback.blk)) {
+	if (IS_ERR(sctx->fallback.skcipher)) {
 		pr_err("Allocating AES fallback algorithm %s failed\n",
 		       name);
-		return PTR_ERR(sctx->fallback.blk);
+		return PTR_ERR(sctx->fallback.skcipher);
 	}
 
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+				    crypto_skcipher_reqsize(sctx->fallback.skcipher));
 	return 0;
 }
 
-static void fallback_exit_blk(struct crypto_tfm *tfm)
+static void fallback_exit_skcipher(struct crypto_skcipher *tfm)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 
-	crypto_free_sync_skcipher(sctx->fallback.blk);
+	crypto_free_skcipher(sctx->fallback.skcipher);
 }
 
-static struct crypto_alg ecb_aes_alg = {
-	.cra_name		=	"ecb(aes)",
-	.cra_driver_name	=	"ecb-aes-s390",
-	.cra_priority		=	401,	/* combo: aes + ecb + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	fallback_init_blk,
-	.cra_exit		=	fallback_exit_blk,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.setkey			=	ecb_aes_set_key,
-			.encrypt		=	ecb_aes_encrypt,
-			.decrypt		=	ecb_aes_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_aes_alg = {
+	.base.cra_name		=	"ecb(aes)",
+	.base.cra_driver_name	=	"ecb-aes-s390",
+	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.setkey			=	ecb_aes_set_key,
+	.encrypt		=	ecb_aes_encrypt,
+	.decrypt		=	ecb_aes_decrypt,
 };
 
-static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 
 	/* Pick the correct function code based on the key length */
@@ -363,17 +319,18 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	/* Check if the function code is available */
 	sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
 	if (!sctx->fc)
-		return setkey_fallback_blk(tfm, in_key, key_len);
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
 	memcpy(sctx->key, in_key, key_len);
 	return 0;
 }
 
-static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 	struct {
@@ -381,134 +338,74 @@ static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 		u8 key[AES_MAX_KEY_SIZE];
 	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, modifier);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
 	memcpy(param.key, sctx->key, sctx->key_len);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		cpacf_kmc(sctx->fc | modifier, &param,
-			  walk->dst.virt.addr, walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			  walk.dst.virt.addr, walk.src.virt.addr, n);
+		memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
-	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
 	return ret;
 }
 
-static int cbc_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_aes_encrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, 0, &walk);
+	return cbc_aes_crypt(req, 0);
 }
 
-static int cbc_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_aes_decrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
+	return cbc_aes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_aes_alg = {
-	.cra_name		=	"cbc(aes)",
-	.cra_driver_name	=	"cbc-aes-s390",
-	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	fallback_init_blk,
-	.cra_exit		=	fallback_exit_blk,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	cbc_aes_set_key,
-			.encrypt		=	cbc_aes_encrypt,
-			.decrypt		=	cbc_aes_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_aes_alg = {
+	.base.cra_name		=	"cbc(aes)",
+	.base.cra_driver_name	=	"cbc-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	cbc_aes_set_key,
+	.encrypt		=	cbc_aes_encrypt,
+	.decrypt		=	cbc_aes_decrypt,
 };
 
-static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
-				   unsigned int len)
-{
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
-	unsigned int ret;
-
-	crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
-					 CRYPTO_TFM_REQ_MASK);
-	crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
-						     CRYPTO_TFM_REQ_MASK);
-
-	ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
-
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
-			  CRYPTO_TFM_RES_MASK;
-
-	return ret;
-}
-
-static int xts_fallback_decrypt(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
-	unsigned int ret;
-
-	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_decrypt(req);
-
-	skcipher_request_zero(req);
-	return ret;
-}
-
-static int xts_fallback_encrypt(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
+static int xts_fallback_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int len)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
-	unsigned int ret;
-
-	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_encrypt(req);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+	int ret;
 
-	skcipher_request_zero(req);
+	crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(xts_ctx->fallback,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
+	crypto_skcipher_set_flags(tfm,
+				  crypto_skcipher_get_flags(xts_ctx->fallback) &
+				  CRYPTO_TFM_RES_MASK);
 	return ret;
 }
 
-static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 	int err;
 
@@ -518,7 +415,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
 	/* In fips mode only 128 bit or 256 bit keys are valid */
 	if (fips_enabled && key_len != 32 && key_len != 64) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -539,10 +436,11 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int offset, nbytes, n;
 	int ret;
 	struct {
@@ -557,113 +455,100 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 		u8 init[16];
 	} xts_param;
 
-	ret = blkcipher_walk_virt(desc, walk);
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) {
+		struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+		*subreq = *req;
+		skcipher_request_set_tfm(subreq, xts_ctx->fallback);
+		return (modifier & CPACF_DECRYPT) ?
+			crypto_skcipher_decrypt(subreq) :
+			crypto_skcipher_encrypt(subreq);
+	}
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
 	offset = xts_ctx->key_len & 0x10;
 	memset(pcc_param.block, 0, sizeof(pcc_param.block));
 	memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
 	memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
-	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+	memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
 	memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
 	cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
 
 	memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
 	memcpy(xts_param.init, pcc_param.xts, 16);
 
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
-			 walk->dst.virt.addr, walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			 walk.dst.virt.addr, walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	return ret;
 }
 
-static int xts_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int xts_aes_encrypt(struct skcipher_request *req)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (!nbytes)
-		return -EINVAL;
-
-	if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
-		return xts_fallback_encrypt(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, 0, &walk);
+	return xts_aes_crypt(req, 0);
 }
 
-static int xts_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int xts_aes_decrypt(struct skcipher_request *req)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (!nbytes)
-		return -EINVAL;
-
-	if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
-		return xts_fallback_decrypt(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
+	return xts_aes_crypt(req, CPACF_DECRYPT);
 }
 
-static int xts_fallback_init(struct crypto_tfm *tfm)
+static int xts_fallback_init(struct crypto_skcipher *tfm)
 {
-	const char *name = tfm->__crt_alg->cra_name;
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 
-	xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
-						  CRYPTO_ALG_NEED_FALLBACK);
+	xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
+				CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
 
 	if (IS_ERR(xts_ctx->fallback)) {
 		pr_err("Allocating XTS fallback algorithm %s failed\n",
 		       name);
 		return PTR_ERR(xts_ctx->fallback);
 	}
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+				    crypto_skcipher_reqsize(xts_ctx->fallback));
 	return 0;
 }
 
-static void xts_fallback_exit(struct crypto_tfm *tfm)
+static void xts_fallback_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 
-	crypto_free_sync_skcipher(xts_ctx->fallback);
+	crypto_free_skcipher(xts_ctx->fallback);
 }
 
-static struct crypto_alg xts_aes_alg = {
-	.cra_name		=	"xts(aes)",
-	.cra_driver_name	=	"xts-aes-s390",
-	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_xts_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	xts_fallback_init,
-	.cra_exit		=	xts_fallback_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	2 * AES_MIN_KEY_SIZE,
-			.max_keysize		=	2 * AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	xts_aes_set_key,
-			.encrypt		=	xts_aes_encrypt,
-			.decrypt		=	xts_aes_decrypt,
-		}
-	}
+static struct skcipher_alg xts_aes_alg = {
+	.base.cra_name		=	"xts(aes)",
+	.base.cra_driver_name	=	"xts-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_xts_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	xts_fallback_init,
+	.exit			=	xts_fallback_exit,
+	.min_keysize		=	2 * AES_MIN_KEY_SIZE,
+	.max_keysize		=	2 * AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	xts_aes_set_key,
+	.encrypt		=	xts_aes_encrypt,
+	.decrypt		=	xts_aes_decrypt,
 };
 
-static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 
 	/* Pick the correct function code based on the key length */
@@ -674,7 +559,7 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	/* Check if the function code is available */
 	sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
 	if (!sctx->fc)
-		return setkey_fallback_blk(tfm, in_key, key_len);
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
 	memcpy(sctx->key, in_key, key_len);
@@ -696,30 +581,34 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 	return n;
 }
 
-static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int ctr_aes_crypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	u8 buf[AES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
 	unsigned int n, nbytes;
 	int ret, locked;
 
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, 0);
+
 	locked = mutex_trylock(&ctrblk_lock);
 
-	ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
 		n = AES_BLOCK_SIZE;
+
 		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
-		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
-		cpacf_kmctr(sctx->fc | modifier, sctx->key,
-			    walk->dst.virt.addr, walk->src.virt.addr,
-			    n, ctrptr);
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		cpacf_kmctr(sctx->fc, sctx->key, walk.dst.virt.addr,
+			    walk.src.virt.addr, n, ctrptr);
 		if (ctrptr == ctrblk)
-			memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+			memcpy(walk.iv, ctrptr + n - AES_BLOCK_SIZE,
 			       AES_BLOCK_SIZE);
-		crypto_inc(walk->iv, AES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	if (locked)
 		mutex_unlock(&ctrblk_lock);
@@ -727,67 +616,33 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
 	if (nbytes) {
-		cpacf_kmctr(sctx->fc | modifier, sctx->key,
-			    buf, walk->src.virt.addr,
-			    AES_BLOCK_SIZE, walk->iv);
-		memcpy(walk->dst.virt.addr, buf, nbytes);
-		crypto_inc(walk->iv, AES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, 0);
+		cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr,
+			    AES_BLOCK_SIZE, walk.iv);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
 	}
 
 	return ret;
 }
 
-static int ctr_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, 0, &walk);
-}
-
-static int ctr_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_aes_alg = {
-	.cra_name		=	"ctr(aes)",
-	.cra_driver_name	=	"ctr-aes-s390",
-	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	fallback_init_blk,
-	.cra_exit		=	fallback_exit_blk,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ctr_aes_set_key,
-			.encrypt		=	ctr_aes_encrypt,
-			.decrypt		=	ctr_aes_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_aes_alg = {
+	.base.cra_name		=	"ctr(aes)",
+	.base.cra_driver_name	=	"ctr-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	ctr_aes_set_key,
+	.encrypt		=	ctr_aes_crypt,
+	.decrypt		=	ctr_aes_crypt,
+	.chunksize		=	AES_BLOCK_SIZE,
 };
 
 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1116,24 +971,27 @@ static struct aead_alg gcm_aes_aead = {
 	},
 };
 
-static struct crypto_alg *aes_s390_algs_ptr[5];
-static int aes_s390_algs_num;
+static struct crypto_alg *aes_s390_alg;
+static struct skcipher_alg *aes_s390_skcipher_algs[4];
+static int aes_s390_skciphers_num;
 static struct aead_alg *aes_s390_aead_alg;
 
-static int aes_s390_register_alg(struct crypto_alg *alg)
+static int aes_s390_register_skcipher(struct skcipher_alg *alg)
 {
 	int ret;
 
-	ret = crypto_register_alg(alg);
+	ret = crypto_register_skcipher(alg);
 	if (!ret)
-		aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+		aes_s390_skcipher_algs[aes_s390_skciphers_num++] = alg;
 	return ret;
 }
 
 static void aes_s390_fini(void)
 {
-	while (aes_s390_algs_num--)
-		crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+	if (aes_s390_alg)
+		crypto_unregister_alg(aes_s390_alg);
+	while (aes_s390_skciphers_num--)
+		crypto_unregister_skcipher(aes_s390_skcipher_algs[aes_s390_skciphers_num]);
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
 
@@ -1154,10 +1012,11 @@ static int __init aes_s390_init(void)
 	if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
-		ret = aes_s390_register_alg(&aes_alg);
+		ret = crypto_register_alg(&aes_alg);
 		if (ret)
 			goto out_err;
-		ret = aes_s390_register_alg(&ecb_aes_alg);
+		aes_s390_alg = &aes_alg;
+		ret = aes_s390_register_skcipher(&ecb_aes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -1165,14 +1024,14 @@ static int __init aes_s390_init(void)
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
-		ret = aes_s390_register_alg(&cbc_aes_alg);
+		ret = aes_s390_register_skcipher(&cbc_aes_alg);
 		if (ret)
 			goto out_err;
 	}
 
 	if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
-		ret = aes_s390_register_alg(&xts_aes_alg);
+		ret = aes_s390_register_skcipher(&xts_aes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -1185,7 +1044,7 @@ static int __init aes_s390_init(void)
 			ret = -ENOMEM;
 			goto out_err;
 		}
-		ret = aes_s390_register_alg(&ctr_aes_alg);
+		ret = aes_s390_register_skcipher(&ctr_aes_alg);
 		if (ret)
 			goto out_err;
 	}
-- 
cgit v1.2.3


From d00c0639815483d5ae0d93bef4e67759352c7fe4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 12 Oct 2019 13:18:08 -0700
Subject: crypto: s390/paes - convert to skcipher API

Convert the glue code for the S390 CPACF protected key implementations
of AES-ECB, AES-CBC, AES-XTS, and AES-CTR from the deprecated
"blkcipher" API to the "skcipher" API.  This is needed in order for the
blkcipher API to be removed.

Note: I made CTR use the same function for encryption and decryption,
since CTR encryption and decryption are identical.

Signed-off-by: Eric Biggers <ebiggers@google.com>
reviewed-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/paes_s390.c | 414 ++++++++++++++++++-------------------------
 1 file changed, 174 insertions(+), 240 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 6184dceed340..c7119c617b6e 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -21,6 +21,7 @@
 #include <linux/cpufeature.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <asm/cpacf.h>
 #include <asm/pkey.h>
@@ -123,27 +124,27 @@ static int __paes_set_key(struct s390_paes_ctx *ctx)
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int ecb_paes_init(struct crypto_tfm *tfm)
+static int ecb_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb.key = NULL;
 
 	return 0;
 }
 
-static void ecb_paes_exit(struct crypto_tfm *tfm)
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 }
 
-static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
 	int rc;
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 	rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
@@ -151,91 +152,75 @@ static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		return rc;
 
 	if (__paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	return 0;
 }
 
-static int ecb_paes_crypt(struct blkcipher_desc *desc,
-			  unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n, k;
 	int ret;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey,
-			     walk->dst.virt.addr, walk->src.virt.addr, n);
+			     walk.dst.virt.addr, walk.src.virt.addr, n);
 		if (k)
-			ret = blkcipher_walk_done(desc, walk, nbytes - k);
+			ret = skcipher_walk_done(&walk, nbytes - k);
 		if (k < n) {
 			if (__paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 		}
 	}
 	return ret;
 }
 
-static int ecb_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_paes_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_paes_crypt(desc, CPACF_ENCRYPT, &walk);
+	return ecb_paes_crypt(req, 0);
 }
 
-static int ecb_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_paes_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_paes_crypt(desc, CPACF_DECRYPT, &walk);
+	return ecb_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg ecb_paes_alg = {
-	.cra_name		=	"ecb(paes)",
-	.cra_driver_name	=	"ecb-paes-s390",
-	.cra_priority		=	401,	/* combo: aes + ecb + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ecb_paes_alg.cra_list),
-	.cra_init		=	ecb_paes_init,
-	.cra_exit		=	ecb_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	PAES_MIN_KEYSIZE,
-			.max_keysize		=	PAES_MAX_KEYSIZE,
-			.setkey			=	ecb_paes_set_key,
-			.encrypt		=	ecb_paes_encrypt,
-			.decrypt		=	ecb_paes_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_paes_alg = {
+	.base.cra_name		=	"ecb(paes)",
+	.base.cra_driver_name	=	"ecb-paes-s390",
+	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
+	.init			=	ecb_paes_init,
+	.exit			=	ecb_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.setkey			=	ecb_paes_set_key,
+	.encrypt		=	ecb_paes_encrypt,
+	.decrypt		=	ecb_paes_decrypt,
 };
 
-static int cbc_paes_init(struct crypto_tfm *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb.key = NULL;
 
 	return 0;
 }
 
-static void cbc_paes_exit(struct crypto_tfm *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 }
@@ -258,11 +243,11 @@ static int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
 	int rc;
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 	rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
@@ -270,16 +255,17 @@ static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		return rc;
 
 	if (__cbc_paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	return 0;
 }
 
-static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n, k;
 	int ret;
 	struct {
@@ -287,73 +273,60 @@ static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 		u8 key[MAXPROTKEYSIZE];
 	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
 	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		k = cpacf_kmc(ctx->fc | modifier, &param,
-			      walk->dst.virt.addr, walk->src.virt.addr, n);
-		if (k)
-			ret = blkcipher_walk_done(desc, walk, nbytes - k);
+			      walk.dst.virt.addr, walk.src.virt.addr, n);
+		if (k) {
+			memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+			ret = skcipher_walk_done(&walk, nbytes - k);
+		}
 		if (k < n) {
 			if (__cbc_paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
 		}
 	}
-	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
 	return ret;
 }
 
-static int cbc_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_paes_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_paes_crypt(desc, 0, &walk);
+	return cbc_paes_crypt(req, 0);
 }
 
-static int cbc_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_paes_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_paes_crypt(desc, CPACF_DECRYPT, &walk);
+	return cbc_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_paes_alg = {
-	.cra_name		=	"cbc(paes)",
-	.cra_driver_name	=	"cbc-paes-s390",
-	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(cbc_paes_alg.cra_list),
-	.cra_init		=	cbc_paes_init,
-	.cra_exit		=	cbc_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	PAES_MIN_KEYSIZE,
-			.max_keysize		=	PAES_MAX_KEYSIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	cbc_paes_set_key,
-			.encrypt		=	cbc_paes_encrypt,
-			.decrypt		=	cbc_paes_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_paes_alg = {
+	.base.cra_name		=	"cbc(paes)",
+	.base.cra_driver_name	=	"cbc-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
+	.init			=	cbc_paes_init,
+	.exit			=	cbc_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	cbc_paes_set_key,
+	.encrypt		=	cbc_paes_encrypt,
+	.decrypt		=	cbc_paes_decrypt,
 };
 
-static int xts_paes_init(struct crypto_tfm *tfm)
+static int xts_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb[0].key = NULL;
 	ctx->kb[1].key = NULL;
@@ -361,9 +334,9 @@ static int xts_paes_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static void xts_paes_exit(struct crypto_tfm *tfm)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb[0]);
 	_free_kb_keybuf(&ctx->kb[1]);
@@ -391,11 +364,11 @@ static int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int xts_key_len)
 {
 	int rc;
-	struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 ckey[2 * AES_MAX_KEY_SIZE];
 	unsigned int ckey_len, key_len;
 
@@ -414,7 +387,7 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		return rc;
 
 	if (__xts_paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -427,13 +400,14 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		AES_KEYSIZE_128 : AES_KEYSIZE_256;
 	memcpy(ckey, ctx->pk[0].protkey, ckey_len);
 	memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
-	return xts_check_key(tfm, ckey, 2*ckey_len);
+	return xts_verify_key(tfm, ckey, 2*ckey_len);
 }
 
-static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_pxts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int keylen, offset, nbytes, n, k;
 	int ret;
 	struct {
@@ -448,90 +422,76 @@ static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 		u8 init[16];
 	} xts_param;
 
-	ret = blkcipher_walk_virt(desc, walk);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
 	keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
 	offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
 retry:
 	memset(&pcc_param, 0, sizeof(pcc_param));
-	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+	memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
 	memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
 	cpacf_pcc(ctx->fc, pcc_param.key + offset);
 
 	memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
 	memcpy(xts_param.init, pcc_param.xts, 16);
 
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
-			     walk->dst.virt.addr, walk->src.virt.addr, n);
+			     walk.dst.virt.addr, walk.src.virt.addr, n);
 		if (k)
-			ret = blkcipher_walk_done(desc, walk, nbytes - k);
+			ret = skcipher_walk_done(&walk, nbytes - k);
 		if (k < n) {
 			if (__xts_paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 			goto retry;
 		}
 	}
 	return ret;
 }
 
-static int xts_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int xts_paes_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_paes_crypt(desc, 0, &walk);
+	return xts_paes_crypt(req, 0);
 }
 
-static int xts_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int xts_paes_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_paes_crypt(desc, CPACF_DECRYPT, &walk);
+	return xts_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg xts_paes_alg = {
-	.cra_name		=	"xts(paes)",
-	.cra_driver_name	=	"xts-paes-s390",
-	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_pxts_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(xts_paes_alg.cra_list),
-	.cra_init		=	xts_paes_init,
-	.cra_exit		=	xts_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	2 * PAES_MIN_KEYSIZE,
-			.max_keysize		=	2 * PAES_MAX_KEYSIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	xts_paes_set_key,
-			.encrypt		=	xts_paes_encrypt,
-			.decrypt		=	xts_paes_decrypt,
-		}
-	}
+static struct skcipher_alg xts_paes_alg = {
+	.base.cra_name		=	"xts(paes)",
+	.base.cra_driver_name	=	"xts-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_pxts_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
+	.init			=	xts_paes_init,
+	.exit			=	xts_paes_exit,
+	.min_keysize		=	2 * PAES_MIN_KEYSIZE,
+	.max_keysize		=	2 * PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	xts_paes_set_key,
+	.encrypt		=	xts_paes_encrypt,
+	.decrypt		=	xts_paes_decrypt,
 };
 
-static int ctr_paes_init(struct crypto_tfm *tfm)
+static int ctr_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb.key = NULL;
 
 	return 0;
 }
 
-static void ctr_paes_exit(struct crypto_tfm *tfm)
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 }
@@ -555,11 +515,11 @@ static int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
 	int rc;
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 	rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
@@ -567,7 +527,7 @@ static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		return rc;
 
 	if (__ctr_paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	return 0;
@@ -588,37 +548,37 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 	return n;
 }
 
-static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int ctr_paes_crypt(struct skcipher_request *req)
 {
-	struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 buf[AES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
 	unsigned int nbytes, n, k;
 	int ret, locked;
 
 	locked = spin_trylock(&ctrblk_lock);
 
-	ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
 		n = AES_BLOCK_SIZE;
 		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
-		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
-		k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey,
-				walk->dst.virt.addr, walk->src.virt.addr,
-				n, ctrptr);
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		k = cpacf_kmctr(ctx->fc, ctx->pk.protkey, walk.dst.virt.addr,
+				walk.src.virt.addr, n, ctrptr);
 		if (k) {
 			if (ctrptr == ctrblk)
-				memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+				memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
 				       AES_BLOCK_SIZE);
-			crypto_inc(walk->iv, AES_BLOCK_SIZE);
-			ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			crypto_inc(walk.iv, AES_BLOCK_SIZE);
+			ret = skcipher_walk_done(&walk, nbytes - n);
 		}
 		if (k < n) {
 			if (__ctr_paes_set_key(ctx) != 0) {
 				if (locked)
 					spin_unlock(&ctrblk_lock);
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 			}
 		}
 	}
@@ -629,80 +589,54 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 	 */
 	if (nbytes) {
 		while (1) {
-			if (cpacf_kmctr(ctx->fc | modifier,
-					ctx->pk.protkey, buf,
-					walk->src.virt.addr, AES_BLOCK_SIZE,
-					walk->iv) == AES_BLOCK_SIZE)
+			if (cpacf_kmctr(ctx->fc, ctx->pk.protkey, buf,
+					walk.src.virt.addr, AES_BLOCK_SIZE,
+					walk.iv) == AES_BLOCK_SIZE)
 				break;
 			if (__ctr_paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+				return skcipher_walk_done(&walk, -EIO);
 		}
-		memcpy(walk->dst.virt.addr, buf, nbytes);
-		crypto_inc(walk->iv, AES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
 	}
 
 	return ret;
 }
 
-static int ctr_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_paes_crypt(desc, 0, &walk);
-}
-
-static int ctr_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_paes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_paes_alg = {
-	.cra_name		=	"ctr(paes)",
-	.cra_driver_name	=	"ctr-paes-s390",
-	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ctr_paes_alg.cra_list),
-	.cra_init		=	ctr_paes_init,
-	.cra_exit		=	ctr_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	PAES_MIN_KEYSIZE,
-			.max_keysize		=	PAES_MAX_KEYSIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ctr_paes_set_key,
-			.encrypt		=	ctr_paes_encrypt,
-			.decrypt		=	ctr_paes_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_paes_alg = {
+	.base.cra_name		=	"ctr(paes)",
+	.base.cra_driver_name	=	"ctr-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
+	.init			=	ctr_paes_init,
+	.exit			=	ctr_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	ctr_paes_set_key,
+	.encrypt		=	ctr_paes_crypt,
+	.decrypt		=	ctr_paes_crypt,
+	.chunksize		=	AES_BLOCK_SIZE,
 };
 
-static inline void __crypto_unregister_alg(struct crypto_alg *alg)
+static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
 {
-	if (!list_empty(&alg->cra_list))
-		crypto_unregister_alg(alg);
+	if (!list_empty(&alg->base.cra_list))
+		crypto_unregister_skcipher(alg);
 }
 
 static void paes_s390_fini(void)
 {
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
-	__crypto_unregister_alg(&ctr_paes_alg);
-	__crypto_unregister_alg(&xts_paes_alg);
-	__crypto_unregister_alg(&cbc_paes_alg);
-	__crypto_unregister_alg(&ecb_paes_alg);
+	__crypto_unregister_skcipher(&ctr_paes_alg);
+	__crypto_unregister_skcipher(&xts_paes_alg);
+	__crypto_unregister_skcipher(&cbc_paes_alg);
+	__crypto_unregister_skcipher(&ecb_paes_alg);
 }
 
 static int __init paes_s390_init(void)
@@ -717,7 +651,7 @@ static int __init paes_s390_init(void)
 	if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
-		ret = crypto_register_alg(&ecb_paes_alg);
+		ret = crypto_register_skcipher(&ecb_paes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -725,14 +659,14 @@ static int __init paes_s390_init(void)
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
-		ret = crypto_register_alg(&cbc_paes_alg);
+		ret = crypto_register_skcipher(&cbc_paes_alg);
 		if (ret)
 			goto out_err;
 	}
 
 	if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
-		ret = crypto_register_alg(&xts_paes_alg);
+		ret = crypto_register_skcipher(&xts_paes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -740,7 +674,7 @@ static int __init paes_s390_init(void)
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
 	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
 	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
-		ret = crypto_register_alg(&ctr_paes_alg);
+		ret = crypto_register_skcipher(&ctr_paes_alg);
 		if (ret)
 			goto out_err;
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
-- 
cgit v1.2.3


From fabdf25388df74b7922a69687f52730ead682a0d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 12 Oct 2019 13:18:09 -0700
Subject: crypto: s390/des - convert to skcipher API

Convert the glue code for the S390 CPACF implementations of DES-ECB,
DES-CBC, DES-CTR, 3DES-ECB, 3DES-CBC, and 3DES-CTR from the deprecated
"blkcipher" API to the "skcipher" API.  This is needed in order for the
blkcipher API to be removed.

Note: I made CTR use the same function for encryption and decryption,
since CTR encryption and decryption are identical.

Signed-off-by: Eric Biggers <ebiggers@google.com>
reviewed-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/des_s390.c | 419 ++++++++++++++++++--------------------------
 1 file changed, 172 insertions(+), 247 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 439b100c6f2e..bfbafd35bcbd 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
 #include <asm/cpacf.h>
 
 #define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
@@ -45,6 +46,12 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
+static int des_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int key_len)
+{
+	return des_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
 static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -79,28 +86,30 @@ static struct crypto_alg des_alg = {
 	}
 };
 
-static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
-			    struct blkcipher_walk *walk)
+static int ecb_desall_crypt(struct skcipher_request *req, unsigned long fc)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		cpacf_km(fc, ctx->key, walk->dst.virt.addr,
-			 walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		cpacf_km(fc, ctx->key, walk.dst.virt.addr,
+			 walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	return ret;
 }
 
-static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
-			    struct blkcipher_walk *walk)
+static int cbc_desall_crypt(struct skcipher_request *req, unsigned long fc)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 	struct {
@@ -108,99 +117,69 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
 		u8 key[DES3_KEY_SIZE];
 	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, DES_BLOCK_SIZE);
 	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
-	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		cpacf_kmc(fc, &param, walk->dst.virt.addr,
-			  walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		cpacf_kmc(fc, &param, walk.dst.virt.addr,
+			  walk.src.virt.addr, n);
+		memcpy(walk.iv, param.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
-	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
 	return ret;
 }
 
-static int ecb_des_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_des_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
+	return ecb_desall_crypt(req, CPACF_KM_DEA);
 }
 
-static int ecb_des_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_des_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
+	return ecb_desall_crypt(req, CPACF_KM_DEA | CPACF_DECRYPT);
 }
 
-static struct crypto_alg ecb_des_alg = {
-	.cra_name		=	"ecb(des)",
-	.cra_driver_name	=	"ecb-des-s390",
-	.cra_priority		=	400,	/* combo: des + ecb */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES_KEY_SIZE,
-			.max_keysize		=	DES_KEY_SIZE,
-			.setkey			=	des_setkey,
-			.encrypt		=	ecb_des_encrypt,
-			.decrypt		=	ecb_des_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_des_alg = {
+	.base.cra_name		=	"ecb(des)",
+	.base.cra_driver_name	=	"ecb-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + ecb */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	ecb_des_encrypt,
+	.decrypt		=	ecb_des_decrypt,
 };
 
-static int cbc_des_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_des_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
+	return cbc_desall_crypt(req, CPACF_KMC_DEA);
 }
 
-static int cbc_des_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_des_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
+	return cbc_desall_crypt(req, CPACF_KMC_DEA | CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_des_alg = {
-	.cra_name		=	"cbc(des)",
-	.cra_driver_name	=	"cbc-des-s390",
-	.cra_priority		=	400,	/* combo: des + cbc */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES_KEY_SIZE,
-			.max_keysize		=	DES_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des_setkey,
-			.encrypt		=	cbc_des_encrypt,
-			.decrypt		=	cbc_des_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_des_alg = {
+	.base.cra_name		=	"cbc(des)",
+	.base.cra_driver_name	=	"cbc-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + cbc */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	cbc_des_encrypt,
+	.decrypt		=	cbc_des_decrypt,
 };
 
 /*
@@ -232,6 +211,12 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
+static int des3_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				unsigned int key_len)
+{
+	return des3_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
 static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -266,87 +251,53 @@ static struct crypto_alg des3_alg = {
 	}
 };
 
-static int ecb_des3_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_des3_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
+	return ecb_desall_crypt(req, CPACF_KM_TDEA_192);
 }
 
-static int ecb_des3_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_des3_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
-				&walk);
+	return ecb_desall_crypt(req, CPACF_KM_TDEA_192 | CPACF_DECRYPT);
 }
 
-static struct crypto_alg ecb_des3_alg = {
-	.cra_name		=	"ecb(des3_ede)",
-	.cra_driver_name	=	"ecb-des3_ede-s390",
-	.cra_priority		=	400,	/* combo: des3 + ecb */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_KEY_SIZE,
-			.max_keysize		=	DES3_KEY_SIZE,
-			.setkey			=	des3_setkey,
-			.encrypt		=	ecb_des3_encrypt,
-			.decrypt		=	ecb_des3_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_des3_alg = {
+	.base.cra_name		=	"ecb(des3_ede)",
+	.base.cra_driver_name	=	"ecb-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + ecb */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	ecb_des3_encrypt,
+	.decrypt		=	ecb_des3_decrypt,
 };
 
-static int cbc_des3_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_des3_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
+	return cbc_desall_crypt(req, CPACF_KMC_TDEA_192);
 }
 
-static int cbc_des3_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_des3_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
-				&walk);
+	return cbc_desall_crypt(req, CPACF_KMC_TDEA_192 | CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_des3_alg = {
-	.cra_name		=	"cbc(des3_ede)",
-	.cra_driver_name	=	"cbc-des3_ede-s390",
-	.cra_priority		=	400,	/* combo: des3 + cbc */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_KEY_SIZE,
-			.max_keysize		=	DES3_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des3_setkey,
-			.encrypt		=	cbc_des3_encrypt,
-			.decrypt		=	cbc_des3_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_des3_alg = {
+	.base.cra_name		=	"cbc(des3_ede)",
+	.base.cra_driver_name	=	"cbc-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + cbc */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	cbc_des3_encrypt,
+	.decrypt		=	cbc_des3_decrypt,
 };
 
 static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
@@ -364,128 +315,90 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 	return n;
 }
 
-static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
-			    struct blkcipher_walk *walk)
+static int ctr_desall_crypt(struct skcipher_request *req, unsigned long fc)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 buf[DES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
 	unsigned int n, nbytes;
 	int ret, locked;
 
 	locked = mutex_trylock(&ctrblk_lock);
 
-	ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
-	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= DES_BLOCK_SIZE) {
 		n = DES_BLOCK_SIZE;
 		if (nbytes >= 2*DES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
-		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
-		cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
-			    walk->src.virt.addr, n, ctrptr);
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		cpacf_kmctr(fc, ctx->key, walk.dst.virt.addr,
+			    walk.src.virt.addr, n, ctrptr);
 		if (ctrptr == ctrblk)
-			memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+			memcpy(walk.iv, ctrptr + n - DES_BLOCK_SIZE,
 				DES_BLOCK_SIZE);
-		crypto_inc(walk->iv, DES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		crypto_inc(walk.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	if (locked)
 		mutex_unlock(&ctrblk_lock);
 	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
 	if (nbytes) {
-		cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
-			    DES_BLOCK_SIZE, walk->iv);
-		memcpy(walk->dst.virt.addr, buf, nbytes);
-		crypto_inc(walk->iv, DES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, 0);
+		cpacf_kmctr(fc, ctx->key, buf, walk.src.virt.addr,
+			    DES_BLOCK_SIZE, walk.iv);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
 	}
 	return ret;
 }
 
-static int ctr_des_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
-}
-
-static int ctr_des_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ctr_des_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
+	return ctr_desall_crypt(req, CPACF_KMCTR_DEA);
 }
 
-static struct crypto_alg ctr_des_alg = {
-	.cra_name		=	"ctr(des)",
-	.cra_driver_name	=	"ctr-des-s390",
-	.cra_priority		=	400,	/* combo: des + ctr */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES_KEY_SIZE,
-			.max_keysize		=	DES_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des_setkey,
-			.encrypt		=	ctr_des_encrypt,
-			.decrypt		=	ctr_des_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_des_alg = {
+	.base.cra_name		=	"ctr(des)",
+	.base.cra_driver_name	=	"ctr-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + ctr */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	ctr_des_crypt,
+	.decrypt		=	ctr_des_crypt,
+	.chunksize		=	DES_BLOCK_SIZE,
 };
 
-static int ctr_des3_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
-}
-
-static int ctr_des3_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ctr_des3_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
-				&walk);
+	return ctr_desall_crypt(req, CPACF_KMCTR_TDEA_192);
 }
 
-static struct crypto_alg ctr_des3_alg = {
-	.cra_name		=	"ctr(des3_ede)",
-	.cra_driver_name	=	"ctr-des3_ede-s390",
-	.cra_priority		=	400,	/* combo: des3 + ede */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_KEY_SIZE,
-			.max_keysize		=	DES3_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des3_setkey,
-			.encrypt		=	ctr_des3_encrypt,
-			.decrypt		=	ctr_des3_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_des3_alg = {
+	.base.cra_name		=	"ctr(des3_ede)",
+	.base.cra_driver_name	=	"ctr-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + ede */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	ctr_des3_crypt,
+	.decrypt		=	ctr_des3_crypt,
+	.chunksize		=	DES_BLOCK_SIZE,
 };
 
-static struct crypto_alg *des_s390_algs_ptr[8];
+static struct crypto_alg *des_s390_algs_ptr[2];
 static int des_s390_algs_num;
+static struct skcipher_alg *des_s390_skciphers_ptr[6];
+static int des_s390_skciphers_num;
 
 static int des_s390_register_alg(struct crypto_alg *alg)
 {
@@ -497,10 +410,22 @@ static int des_s390_register_alg(struct crypto_alg *alg)
 	return ret;
 }
 
+static int des_s390_register_skcipher(struct skcipher_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_skcipher(alg);
+	if (!ret)
+		des_s390_skciphers_ptr[des_s390_skciphers_num++] = alg;
+	return ret;
+}
+
 static void des_s390_exit(void)
 {
 	while (des_s390_algs_num--)
 		crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+	while (des_s390_skciphers_num--)
+		crypto_unregister_skcipher(des_s390_skciphers_ptr[des_s390_skciphers_num]);
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
 }
@@ -518,12 +443,12 @@ static int __init des_s390_init(void)
 		ret = des_s390_register_alg(&des_alg);
 		if (ret)
 			goto out_err;
-		ret = des_s390_register_alg(&ecb_des_alg);
+		ret = des_s390_register_skcipher(&ecb_des_alg);
 		if (ret)
 			goto out_err;
 	}
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
-		ret = des_s390_register_alg(&cbc_des_alg);
+		ret = des_s390_register_skcipher(&cbc_des_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -531,12 +456,12 @@ static int __init des_s390_init(void)
 		ret = des_s390_register_alg(&des3_alg);
 		if (ret)
 			goto out_err;
-		ret = des_s390_register_alg(&ecb_des3_alg);
+		ret = des_s390_register_skcipher(&ecb_des3_alg);
 		if (ret)
 			goto out_err;
 	}
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
-		ret = des_s390_register_alg(&cbc_des3_alg);
+		ret = des_s390_register_skcipher(&cbc_des3_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -551,12 +476,12 @@ static int __init des_s390_init(void)
 	}
 
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
-		ret = des_s390_register_alg(&ctr_des_alg);
+		ret = des_s390_register_skcipher(&ctr_des_alg);
 		if (ret)
 			goto out_err;
 	}
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
-		ret = des_s390_register_alg(&ctr_des3_alg);
+		ret = des_s390_register_skcipher(&ctr_des3_alg);
 		if (ret)
 			goto out_err;
 	}
-- 
cgit v1.2.3


From e02d026f08f1fedb8c94d6f659ccc7c6ce1043f3 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Tue, 22 Oct 2019 21:25:47 +0200
Subject: MIPS: Loongson: Fix GENMASK misuse

Arguments are supposed to be ordered high then low.

Fixes: 6a6f9b7dafd50efc1b2 ("MIPS: Loongson: Add CFUCFG&CSR support")
Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Reviewed-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: chenhuacai@gmail.com
Cc: jhogan@kernel.org
Cc: jiaxun.yang@flygoat.com
Cc: linux-mips@linux-mips.org
Cc: linux-mips@vger.kernel.org
Cc: paul.burton@mips.com
Cc: ralf@linux-mips.org
Cc: wuzhangjin@gmail.com
Cc: zhangfx@lemote.com
---
 arch/mips/include/asm/mach-loongson64/loongson_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
index 6e3569ab8936..363a47a5d26e 100644
--- a/arch/mips/include/asm/mach-loongson64/loongson_regs.h
+++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
@@ -86,7 +86,7 @@ static inline u32 read_cpucfg(u32 reg)
 #define LOONGSON_CFG2_LGFTP	BIT(19)
 #define LOONGSON_CFG2_LGFTPREV	GENMASK(22, 20)
 #define LOONGSON_CFG2_LLFTP	BIT(23)
-#define LOONGSON_CFG2_LLFTPREV	GENMASK(24, 26)
+#define LOONGSON_CFG2_LLFTPREV	GENMASK(26, 24)
 #define LOONGSON_CFG2_LCSRP	BIT(27)
 #define LOONGSON_CFG2_LDISBLIKELY	BIT(28)
 
-- 
cgit v1.2.3


From 249be5633cdb31d8daf01326b3bf02733d7d7e9a Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 22 Oct 2019 18:13:11 +0200
Subject: MIPS: SGI-IP27: collect externs in new header file

IP27 code has a few externs distributed over .c files. Collect them
together into one commcon header file.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/sgi-ip27/ip27-common.h | 9 +++++++++
 arch/mips/sgi-ip27/ip27-init.c   | 4 ++--
 arch/mips/sgi-ip27/ip27-reset.c  | 2 ++
 arch/mips/sgi-ip27/ip27-smp.c    | 4 ++--
 arch/mips/sgi-ip27/ip27-timer.c  | 2 ++
 5 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 arch/mips/sgi-ip27/ip27-common.h

(limited to 'arch')

diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h
new file mode 100644
index 000000000000..e9e9f1dc8c20
--- /dev/null
+++ b/arch/mips/sgi-ip27/ip27-common.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __IP27_COMMON_H
+#define __IP27_COMMON_H
+
+extern void ip27_reboot_setup(void);
+extern void hub_rt_clock_event_init(void);
+
+#endif /* __IP27_COMMON_H */
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 1dad799758c4..f48e2b3990f6 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -36,6 +36,8 @@
 #include <asm/sn/sn0/ip27.h>
 #include <asm/sn/mapped_kernel.h>
 
+#include "ip27-common.h"
+
 #define CPU_NONE		(cpuid_t)-1
 
 static DECLARE_BITMAP(hub_init_mask, MAX_COMPACT_NODES);
@@ -113,8 +115,6 @@ get_nasid(void)
 			 >> NSRI_NODEID_SHFT);
 }
 
-extern void ip27_reboot_setup(void);
-
 void __init plat_mem_setup(void)
 {
 	u64 p, e, n_mode;
diff --git a/arch/mips/sgi-ip27/ip27-reset.c b/arch/mips/sgi-ip27/ip27-reset.c
index c90228d0d4c2..74d078247e49 100644
--- a/arch/mips/sgi-ip27/ip27-reset.c
+++ b/arch/mips/sgi-ip27/ip27-reset.c
@@ -26,6 +26,8 @@
 #include <asm/sn/gda.h>
 #include <asm/sn/sn0/hub.h>
 
+#include "ip27-common.h"
+
 void machine_restart(char *command) __noreturn;
 void machine_halt(void) __noreturn;
 void machine_power_off(void) __noreturn;
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index 386702abe660..c38df7c62964 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -27,6 +27,8 @@
 #include <asm/sn/sn0/hubio.h>
 #include <asm/sn/sn0/ip27.h>
 
+#include "ip27-common.h"
+
 /*
  * Takes as first input the PROM assigned cpu id, and the kernel
  * assigned cpu id as the second.
@@ -147,8 +149,6 @@ static void ip27_init_cpu(void)
 
 static void ip27_smp_finish(void)
 {
-	extern void hub_rt_clock_event_init(void);
-
 	hub_rt_clock_event_init();
 	local_irq_enable();
 }
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index a317ea83f216..17302bbfa7a6 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -38,6 +38,8 @@
 #include <asm/sn/sn0/hubio.h>
 #include <asm/pci/bridge.h>
 
+#include "ip27-common.h"
+
 static int rt_next_event(unsigned long delta, struct clock_event_device *evt)
 {
 	unsigned int cpu = smp_processor_id();
-- 
cgit v1.2.3


From c823f416097879515a02f3d97aecc1204ffc0773 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 22 Oct 2019 18:13:12 +0200
Subject: MIPS: SGI-IP27: move registering of smp ops into IP27 specific code

Calling register_smp_ops() in plat_mem_setup() is still early enough.
So by doing this we could remove the ugly #ifdef CONFIG_SGI_IP27 in
fw/arc/init.c.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/fw/arc/init.c          | 7 -------
 arch/mips/sgi-ip27/ip27-common.h | 1 +
 arch/mips/sgi-ip27/ip27-init.c   | 2 ++
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/fw/arc/init.c b/arch/mips/fw/arc/init.c
index 7b663455de6b..4ac6466a8872 100644
--- a/arch/mips/fw/arc/init.c
+++ b/arch/mips/fw/arc/init.c
@@ -54,11 +54,4 @@ void __init prom_init(void)
 	ArcRead(0, &c, 1, &cnt);
 	ArcEnterInteractiveMode();
 #endif
-#ifdef CONFIG_SGI_IP27
-	{
-		extern const struct plat_smp_ops ip27_smp_ops;
-
-		register_smp_ops(&ip27_smp_ops);
-	}
-#endif
 }
diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h
index e9e9f1dc8c20..3ffbcf9bfd41 100644
--- a/arch/mips/sgi-ip27/ip27-common.h
+++ b/arch/mips/sgi-ip27/ip27-common.h
@@ -5,5 +5,6 @@
 
 extern void ip27_reboot_setup(void);
 extern void hub_rt_clock_event_init(void);
+extern const struct plat_smp_ops ip27_smp_ops;
 
 #endif /* __IP27_COMMON_H */
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index f48e2b3990f6..d160fb219d6d 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -120,6 +120,8 @@ void __init plat_mem_setup(void)
 	u64 p, e, n_mode;
 	nasid_t nid;
 
+	register_smp_ops(&ip27_smp_ops);
+
 	ip27_reboot_setup();
 
 	/*
-- 
cgit v1.2.3


From cbd09241dd9db02caceb958b9ceb9e91762a0572 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 22 Oct 2019 18:13:13 +0200
Subject: MIPS: arc: remove unused stuff

remove unused _prom_envp and prom_argc macro.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/fw/arc/init.c         | 3 +--
 arch/mips/include/asm/sgialib.h | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/fw/arc/init.c b/arch/mips/fw/arc/init.c
index 4ac6466a8872..c713292462aa 100644
--- a/arch/mips/fw/arc/init.c
+++ b/arch/mips/fw/arc/init.c
@@ -19,7 +19,7 @@
 /* Master romvec interface. */
 struct linux_romvec *romvec;
 int prom_argc;
-LONG *_prom_argv, *_prom_envp;
+LONG *_prom_argv;
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
 /* stack for calling 32bit ARC prom */
@@ -34,7 +34,6 @@ void __init prom_init(void)
 
 	prom_argc = fw_arg0;
 	_prom_argv = (LONG *) fw_arg1;
-	_prom_envp = (LONG *) fw_arg2;
 
 	if (pb->magic != 0x53435241) {
 		printk(KERN_CRIT "Aieee, bad prom vector magic %08lx\n",
diff --git a/arch/mips/include/asm/sgialib.h b/arch/mips/include/asm/sgialib.h
index 21d17eb25ed8..40ab4ef0b1dc 100644
--- a/arch/mips/include/asm/sgialib.h
+++ b/arch/mips/include/asm/sgialib.h
@@ -17,12 +17,11 @@
 extern struct linux_romvec *romvec;
 extern int prom_argc;
 
-extern LONG *_prom_argv, *_prom_envp;
+extern LONG *_prom_argv;
 
 /* A 32-bit ARC PROM pass arguments and environment as 32-bit pointer.
    These macros take care of sign extension.  */
 #define prom_argv(index) ((char *) (long) _prom_argv[(index)])
-#define prom_argc(index) ((char *) (long) _prom_argc[(index)])
 
 extern int prom_flags;
 
-- 
cgit v1.2.3


From 7b16831d1eb1d2f2cbca6f01700a20a73aad57c0 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 22 Oct 2019 18:13:14 +0200
Subject: MIPS: arc: use function argument for passing argc/argv to
 prom_init_cmdline

prom_argc and prom_argv are only used by prom_init_cmdline(), so
we could pass them directly as function argument.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/fw/arc/cmdline.c      | 16 +++++++++++-----
 arch/mips/fw/arc/init.c         |  7 +------
 arch/mips/include/asm/sgialib.h |  9 +--------
 3 files changed, 13 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/fw/arc/cmdline.c b/arch/mips/fw/arc/cmdline.c
index c0122a1dc587..155c5e911723 100644
--- a/arch/mips/fw/arc/cmdline.c
+++ b/arch/mips/fw/arc/cmdline.c
@@ -17,6 +17,12 @@
 
 #undef DEBUG_CMDLINE
 
+/*
+ * A 32-bit ARC PROM pass arguments and environment as 32-bit pointer.
+ * These macro take care of sign extension.
+ */
+#define prom_argv(index) ((char *) (long)argv[(index)])
+
 static char *ignored[] = {
 	"ConsoleIn=",
 	"ConsoleOut=",
@@ -32,14 +38,14 @@ static char *used_arc[][2] = {
 	{ "OSLoadOptions=", "" }
 };
 
-static char * __init move_firmware_args(char* cp)
+static char __init *move_firmware_args(int argc, LONG *argv, char *cp)
 {
 	char *s;
 	int actr, i;
 
 	actr = 1; /* Always ignore argv[0] */
 
-	while (actr < prom_argc) {
+	while (actr < argc) {
 		for(i = 0; i < ARRAY_SIZE(used_arc); i++) {
 			int len = strlen(used_arc[i][0]);
 
@@ -64,7 +70,7 @@ static char * __init move_firmware_args(char* cp)
 	return cp;
 }
 
-void __init prom_init_cmdline(void)
+void __init prom_init_cmdline(int argc, LONG *argv)
 {
 	char *cp;
 	int actr, i;
@@ -76,9 +82,9 @@ void __init prom_init_cmdline(void)
 	 * Move ARC variables to the beginning to make sure they can be
 	 * overridden by later arguments.
 	 */
-	cp = move_firmware_args(cp);
+	cp = move_firmware_args(argc, argv, cp);
 
-	while (actr < prom_argc) {
+	while (actr < argc) {
 		for (i = 0; i < ARRAY_SIZE(ignored); i++) {
 			int len = strlen(ignored[i]);
 
diff --git a/arch/mips/fw/arc/init.c b/arch/mips/fw/arc/init.c
index c713292462aa..f9d1dea9b2ca 100644
--- a/arch/mips/fw/arc/init.c
+++ b/arch/mips/fw/arc/init.c
@@ -18,8 +18,6 @@
 
 /* Master romvec interface. */
 struct linux_romvec *romvec;
-int prom_argc;
-LONG *_prom_argv;
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_FW_ARC32)
 /* stack for calling 32bit ARC prom */
@@ -32,9 +30,6 @@ void __init prom_init(void)
 
 	romvec = ROMVECTOR;
 
-	prom_argc = fw_arg0;
-	_prom_argv = (LONG *) fw_arg1;
-
 	if (pb->magic != 0x53435241) {
 		printk(KERN_CRIT "Aieee, bad prom vector magic %08lx\n",
 		       (unsigned long) pb->magic);
@@ -42,7 +37,7 @@ void __init prom_init(void)
 			;
 	}
 
-	prom_init_cmdline();
+	prom_init_cmdline(fw_arg0, (LONG *)fw_arg1);
 	prom_identify_arch();
 	printk(KERN_INFO "PROMLIB: ARC firmware Version %d Revision %d\n",
 	       pb->ver, pb->rev);
diff --git a/arch/mips/include/asm/sgialib.h b/arch/mips/include/asm/sgialib.h
index 40ab4ef0b1dc..80f900417f7e 100644
--- a/arch/mips/include/asm/sgialib.h
+++ b/arch/mips/include/asm/sgialib.h
@@ -15,13 +15,6 @@
 #include <asm/sgiarcs.h>
 
 extern struct linux_romvec *romvec;
-extern int prom_argc;
-
-extern LONG *_prom_argv;
-
-/* A 32-bit ARC PROM pass arguments and environment as 32-bit pointer.
-   These macros take care of sign extension.  */
-#define prom_argv(index) ((char *) (long) _prom_argv[(index)])
 
 extern int prom_flags;
 
@@ -55,7 +48,7 @@ extern void prom_identify_arch(void);
 extern PCHAR ArcGetEnvironmentVariable(PCHAR name);
 
 /* ARCS command line parsing. */
-extern void prom_init_cmdline(void);
+extern void prom_init_cmdline(int argc, LONG *argv);
 
 /* File operations. */
 extern LONG ArcRead(ULONG fd, PVOID buf, ULONG num, PULONG cnt);
-- 
cgit v1.2.3


From e942242784d0fe4bae957357dfa873af364c684e Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 22 Oct 2019 18:13:15 +0200
Subject: MIPS: SGI-IP27: reduce ARC usage to a minimum

IP27 uses ARC prom only for parsing prom arguments and has a hack
for IP27 to make the ARC code behave. By introducing config symbol
ARC_CMDLINE_ONLY IP27 only drags in ARC cmdline parsing and does
everything else in IP27 specific code.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/Kconfig              |  4 ++++
 arch/mips/fw/arc/Makefile      |  4 ++++
 arch/mips/fw/arc/identify.c    | 10 ----------
 arch/mips/sgi-ip27/ip27-init.c | 14 ++++++++++++++
 4 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 87bfefcbdb06..f7ec1505013c 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -675,6 +675,7 @@ config SGI_IP27
 	select ARCH_SPARSEMEM_ENABLE
 	select FW_ARC
 	select FW_ARC64
+	select ARC_CMDLINE_ONLY
 	select BOOT_ELF64
 	select DEFAULT_SGI_PARTITION
 	select SYS_HAS_EARLY_PRINTK
@@ -1351,6 +1352,9 @@ config MIPS_L1_CACHE_SHIFT
 config HAVE_STD_PC_SERIAL_PORT
 	bool
 
+config ARC_CMDLINE_ONLY
+	bool
+
 config ARC_CONSOLE
 	bool "ARC console support"
 	depends on SGI_IP22 || SGI_IP28 || (SNI_RM && CPU_LITTLE_ENDIAN)
diff --git a/arch/mips/fw/arc/Makefile b/arch/mips/fw/arc/Makefile
index 3cc82d7fe548..64d685efcc77 100644
--- a/arch/mips/fw/arc/Makefile
+++ b/arch/mips/fw/arc/Makefile
@@ -3,8 +3,12 @@
 # Makefile for the ARC prom monitor library routines under Linux.
 #
 
+ifdef CONFIG_ARC_CMDLINE_ONLY
+lib-y				+= cmdline.o
+else
 lib-y				+= cmdline.o env.o file.o identify.o init.o \
 				   misc.o
+endif
 
 lib-$(CONFIG_ARC_MEMORY)	+= memory.o
 lib-$(CONFIG_ARC_CONSOLE)	+= arc_con.o
diff --git a/arch/mips/fw/arc/identify.c b/arch/mips/fw/arc/identify.c
index 7530c7b2fd39..5527e0f54079 100644
--- a/arch/mips/fw/arc/identify.c
+++ b/arch/mips/fw/arc/identify.c
@@ -31,10 +31,6 @@ static struct smatch mach_table[] = {
 		.arcname	= "SGI-IP22",
 		.liname		= "SGI Indy",
 		.flags		= PROM_FLAG_ARCS,
-	}, {
-		.arcname	= "SGI-IP27",
-		.liname		= "SGI Origin",
-		.flags		= PROM_FLAG_ARCS,
 	}, {
 		.arcname	= "SGI-IP28",
 		.liname		= "SGI IP28",
@@ -103,13 +99,7 @@ void __init prom_identify_arch(void)
 	 */
 	p = ArcGetChild(PROM_NULL_COMPONENT);
 	if (p == NULL) {
-#ifdef CONFIG_SGI_IP27
-		/* IP27 PROM misbehaves, seems to not implement ARC
-		   GetChild().	So we just assume it's an IP27.	 */
-		iname = "SGI-IP27";
-#else
 		iname = "Unknown";
-#endif
 	} else
 		iname = (char *) (long) p->iname;
 
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index d160fb219d6d..971aa0d5d534 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -13,9 +13,11 @@
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/cpumask.h>
+#include <asm/bootinfo.h>
 #include <asm/cpu.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
+#include <asm/sgialib.h>
 #include <asm/time.h>
 #include <asm/sn/types.h>
 #include <asm/sn/sn0/addrs.h>
@@ -160,3 +162,15 @@ void __init plat_mem_setup(void)
 	ioport_resource.end = ~0UL;
 	set_io_port_base(IO_BASE);
 }
+
+const char *get_system_type(void)
+{
+	return "SGI Origin";
+}
+
+void __init prom_init(void)
+{
+	prom_init_cmdline(fw_arg0, (LONG *)fw_arg1);
+	prom_meminit();
+}
+
-- 
cgit v1.2.3


From 2409839ab6bfa28b8451cf9ef7df5a8b0e0a82af Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 22 Oct 2019 15:09:19 +0200
Subject: MIPS: include: remove unsued header file asm/sgi/sgi.h

asm/sgi/sgi.h is unused, time to remove it.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/sgi/sgi.h | 48 -----------------------------------------
 1 file changed, 48 deletions(-)
 delete mode 100644 arch/mips/include/asm/sgi/sgi.h

(limited to 'arch')

diff --git a/arch/mips/include/asm/sgi/sgi.h b/arch/mips/include/asm/sgi/sgi.h
deleted file mode 100644
index b61557151e3f..000000000000
--- a/arch/mips/include/asm/sgi/sgi.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * sgi.h: Definitions specific to SGI machines.
- *
- * Copyright (C) 1996 David S. Miller (dm@sgi.com)
- */
-#ifndef _ASM_SGI_SGI_H
-#define _ASM_SGI_SGI_H
-
-/* UP=UniProcessor MP=MultiProcessor(capable) */
-enum sgi_mach {
-	ip4,	/* R2k UP */
-	ip5,	/* R2k MP */
-	ip6,	/* R3k UP */
-	ip7,	/* R3k MP */
-	ip9,	/* R3k UP */
-	ip12,	/* R3kA UP, Indigo */
-	ip15,	/* R3kA MP */
-	ip17,	/* R4K UP */
-	ip19,	/* R4K MP */
-	ip20,	/* R4K UP, Indigo */
-	ip21,	/* R8k/TFP MP */
-	ip22,	/* R4x00 UP, Indy, Indigo2 */
-	ip25,	/* R10k MP */
-	ip26,	/* R8k/TFP UP, Indigo2 */
-	ip27,	/* R10k MP, R12k MP, R14k MP, Origin 200/2k, Onyx2 */
-	ip28,	/* R10k UP, Indigo2 Impact R10k */
-	ip30,	/* R10k MP, R12k MP, R14k MP, Octane */
-	ip32,	/* R5k UP, RM5200 UP, RM7k UP, R10k UP, R12k UP, O2 */
-	ip35,   /* R14k MP, R16k MP, Origin 300/3k, Onyx3, Fuel, Tezro */
-};
-
-extern enum sgi_mach sgimach;
-extern void sgi_sysinit(void);
-
-/* Many I/O space registers are byte sized and are contained within
- * one byte per word, specifically the MSB, this macro helps out.
- */
-#ifdef __MIPSEL__
-#define SGI_MSB(regaddr)   (regaddr)
-#else
-#define SGI_MSB(regaddr)   ((regaddr) | 0x3)
-#endif
-
-#endif /* _ASM_SGI_SGI_H */
-- 
cgit v1.2.3


From c7892db5dd6afe921ead502aff7440a1e450d947 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Thu, 24 Oct 2019 14:31:11 +0100
Subject: KVM: arm64: Select TASK_DELAY_ACCT+TASKSTATS rather than SCHEDSTATS

SCHEDSTATS requires DEBUG_KERNEL (and PROC_FS) and therefore isn't a
good choice for enabling the scheduling statistics required for stolen
time.

Instead match the x86 configuration and select TASK_DELAY_ACCT and
TASKSTATS. This adds the dependencies of NET && MULTIUSER for arm64 KVM.

Suggested-by: Marc Zyngier <maz@kernel.org>
Fixes: 8564d6372a7d ("KVM: arm64: Support stolen time reporting via shared structure")
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/Kconfig | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index d8b88e40d223..a475c68cbfec 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,6 +21,8 @@ if VIRTUALIZATION
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	depends on OF
+	# for TASKSTATS/TASK_DELAY_ACCT:
+	depends on NET && MULTIUSER
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -39,7 +41,8 @@ config KVM
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_VCPU_RUN_PID_CHANGE
-	select SCHEDSTATS
+	select TASKSTATS
+	select TASK_DELAY_ACCT
 	---help---
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
-- 
cgit v1.2.3


From 19308a412ec52c0de92d296842be237778753d9b Mon Sep 17 00:00:00 2001
From: Yi Wang <wang.yi59@zte.com.cn>
Date: Thu, 10 Oct 2019 14:37:25 +0800
Subject: x86/kvm: Fix -Wmissing-prototypes warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We get two warning when build kernel with W=1:
arch/x86/kernel/kvm.c:872:6: warning: no previous prototype for ‘arch_haltpoll_enable’ [-Wmissing-prototypes]
arch/x86/kernel/kvm.c:885:6: warning: no previous prototype for ‘arch_haltpoll_disable’ [-Wmissing-prototypes]

Including the missing head file can fix this.

Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e820568ed4d5..32ef1ee733b7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -33,6 +33,7 @@
 #include <asm/apicdef.h>
 #include <asm/hypervisor.h>
 #include <asm/tlb.h>
+#include <asm/cpuidle_haltpoll.h>
 
 static int kvmapf = 1;
 
-- 
cgit v1.2.3


From 0d6ecb2e43d6b15699cea1fbd7ce0c981694b9b4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 14 Oct 2019 19:45:15 -0700
Subject: crypto: powerpc - don't unnecessarily use atomic scatterwalk

The PowerPC SPE implementations of AES modes only disable preemption
during the actual encryption/decryption, not during the scatterwalk
functions.  It's therefore unnecessary to request an atomic scatterwalk.
So don't do so.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/aes-spe-glue.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 3a4ca7d32477..319f1dbb3a70 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -186,7 +186,6 @@ static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	unsigned int ubytes;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
@@ -214,7 +213,6 @@ static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	unsigned int ubytes;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
@@ -242,7 +240,6 @@ static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	unsigned int ubytes;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
@@ -270,7 +267,6 @@ static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	unsigned int ubytes;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
@@ -298,7 +294,6 @@ static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	unsigned int pbytes, ubytes;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
 
@@ -329,7 +324,6 @@ static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	int err;
 	u32 *twk;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 	twk = ctx->key_twk;
@@ -360,7 +354,6 @@ static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	int err;
 	u32 *twk;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 	twk = ctx->key_twk;
-- 
cgit v1.2.3


From 8255e65df961fd0c9b7d86317e915606751562a4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 14 Oct 2019 19:45:16 -0700
Subject: crypto: powerpc - don't set ivsize for AES-ECB

Set the ivsize for the "ecb-ppc-spe" algorithm to 0, since ECB mode
doesn't take an IV.

This fixes a failure in the extra crypto self-tests:

	alg: skcipher: ivsize for ecb-ppc-spe (16) doesn't match generic impl (0)

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/aes-spe-glue.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 319f1dbb3a70..4189d2644f74 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -415,7 +415,6 @@ static struct crypto_alg aes_algs[] = { {
 		.blkcipher = {
 			.min_keysize		=	AES_MIN_KEY_SIZE,
 			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
 			.setkey			=	ppc_aes_setkey,
 			.encrypt		=	ppc_ecb_encrypt,
 			.decrypt		=	ppc_ecb_decrypt,
-- 
cgit v1.2.3


From 7f725f41f62750832817047e44892ce92d65e6aa Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 14 Oct 2019 19:45:17 -0700
Subject: crypto: powerpc - convert SPE AES algorithms to skcipher API

Convert the glue code for the PowerPC SPE implementations of AES-ECB,
AES-CBC, AES-CTR, and AES-XTS from the deprecated "blkcipher" API to the
"skcipher" API.  This is needed in order for the blkcipher API to be
removed.

Tested with:

	export ARCH=powerpc CROSS_COMPILE=powerpc-linux-gnu-
	make mpc85xx_defconfig
	cat >> .config << EOF
	# CONFIG_MODULES is not set
	# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
	CONFIG_DEBUG_KERNEL=y
	CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y
	CONFIG_CRYPTO_AES=y
	CONFIG_CRYPTO_CBC=y
	CONFIG_CRYPTO_CTR=y
	CONFIG_CRYPTO_ECB=y
	CONFIG_CRYPTO_XTS=y
	CONFIG_CRYPTO_AES_PPC_SPE=y
	EOF
	make olddefconfig
	make -j32
	qemu-system-ppc -M mpc8544ds -cpu e500 -nographic \
		-kernel arch/powerpc/boot/zImage \
		-append cryptomgr.fuzz_iterations=1000

Note that xts-ppc-spe still fails the comparison tests due to the lack
of ciphertext stealing support.  This is not addressed by this patch.

This patch also cleans up the code by making ->encrypt() and ->decrypt()
call a common function for each of ECB, CBC, and XTS, and by using a
clearer way to compute the length to process at each step.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/aes-spe-glue.c | 381 ++++++++++++++++---------------------
 crypto/Kconfig                     |   1 +
 2 files changed, 166 insertions(+), 216 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 4189d2644f74..f828f8bcd0c6 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -17,6 +17,7 @@
 #include <asm/byteorder.h>
 #include <asm/switch_to.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 
 /*
@@ -118,13 +119,19 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *in_key, unsigned int key_len)
+{
+	return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
+static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 		   unsigned int key_len)
 {
-	struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err;
 
-	err = xts_check_key(tfm, in_key, key_len);
+	err = xts_verify_key(tfm, in_key, key_len);
 	if (err)
 		return err;
 
@@ -133,7 +140,7 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 	if (key_len != AES_KEYSIZE_128 &&
 	    key_len != AES_KEYSIZE_192 &&
 	    key_len != AES_KEYSIZE_256) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
@@ -178,201 +185,154 @@ static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	spe_end();
 }
 
-static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_crypt(struct skcipher_request *req, bool enc)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		nbytes = round_down(nbytes, AES_BLOCK_SIZE);
 
 		spe_begin();
-		ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_enc, ctx->rounds, nbytes);
+		if (enc)
+			ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, ctx->rounds, nbytes);
+		else
+			ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, ctx->rounds, nbytes);
 		spe_end();
 
-		err = blkcipher_walk_done(desc, &walk, ubytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_encrypt(struct skcipher_request *req)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
-
-		spe_begin();
-		ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_dec, ctx->rounds, nbytes);
-		spe_end();
-
-		err = blkcipher_walk_done(desc, &walk, ubytes);
-	}
+	return ppc_ecb_crypt(req, true);
+}
 
-	return err;
+static int ppc_ecb_decrypt(struct skcipher_request *req)
+{
+	return ppc_ecb_crypt(req, false);
 }
 
-static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_crypt(struct skcipher_request *req, bool enc)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		nbytes = round_down(nbytes, AES_BLOCK_SIZE);
 
 		spe_begin();
-		ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+		if (enc)
+			ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, ctx->rounds, nbytes,
+					walk.iv);
+		else
+			ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, ctx->rounds, nbytes,
+					walk.iv);
 		spe_end();
 
-		err = blkcipher_walk_done(desc, &walk, ubytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_encrypt(struct skcipher_request *req)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
-
-		spe_begin();
-		ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_dec, ctx->rounds, nbytes, walk.iv);
-		spe_end();
-
-		err = blkcipher_walk_done(desc, &walk, ubytes);
-	}
+	return ppc_cbc_crypt(req, true);
+}
 
-	return err;
+static int ppc_cbc_decrypt(struct skcipher_request *req)
+{
+	return ppc_cbc_crypt(req, false);
 }
 
-static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			 struct scatterlist *src, unsigned int nbytes)
+static int ppc_ctr_crypt(struct skcipher_request *req)
 {
-	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int pbytes, ubytes;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((pbytes = walk.nbytes)) {
-		pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
-		pbytes = pbytes == nbytes ?
-			 nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
-		ubytes = walk.nbytes - pbytes;
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
 
 		spe_begin();
 		ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
-			      ctx->key_enc, ctx->rounds, pbytes , walk.iv);
+			      ctx->key_enc, ctx->rounds, nbytes, walk.iv);
 		spe_end();
 
-		nbytes -= pbytes;
-		err = blkcipher_walk_done(desc, &walk, ubytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_crypt(struct skcipher_request *req, bool enc)
 {
-	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	u32 *twk;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 	twk = ctx->key_twk;
 
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		nbytes = round_down(nbytes, AES_BLOCK_SIZE);
 
 		spe_begin();
-		ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
+		if (enc)
+			ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, ctx->rounds, nbytes,
+					walk.iv, twk);
+		else
+			ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, ctx->rounds, nbytes,
+					walk.iv, twk);
 		spe_end();
 
 		twk = NULL;
-		err = blkcipher_walk_done(desc, &walk, ubytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-			   struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_encrypt(struct skcipher_request *req)
 {
-	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	unsigned int ubytes;
-	int err;
-	u32 *twk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	twk = ctx->key_twk;
-
-	while ((nbytes = walk.nbytes)) {
-		ubytes = nbytes > MAX_BYTES ?
-			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
-		nbytes -= ubytes;
-
-		spe_begin();
-		ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
-		spe_end();
-
-		twk = NULL;
-		err = blkcipher_walk_done(desc, &walk, ubytes);
-	}
+	return ppc_xts_crypt(req, true);
+}
 
-	return err;
+static int ppc_xts_decrypt(struct skcipher_request *req)
+{
+	return ppc_xts_crypt(req, false);
 }
 
 /*
@@ -381,9 +341,9 @@ static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  * This improves IPsec thoughput by another few percent. Additionally we assume
  * that AES context is always aligned to at least 8 bytes because it is created
  * with kmalloc() in the crypto infrastructure
- *
  */
-static struct crypto_alg aes_algs[] = { {
+
+static struct crypto_alg aes_cipher_alg = {
 	.cra_name		=	"aes",
 	.cra_driver_name	=	"aes-ppc-spe",
 	.cra_priority		=	300,
@@ -401,95 +361,84 @@ static struct crypto_alg aes_algs[] = { {
 			.cia_decrypt		=	ppc_aes_decrypt
 		}
 	}
-}, {
-	.cra_name		=	"ecb(aes)",
-	.cra_driver_name	=	"ecb-ppc-spe",
-	.cra_priority		=	300,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
-	.cra_alignmask		=	0,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.setkey			=	ppc_aes_setkey,
-			.encrypt		=	ppc_ecb_encrypt,
-			.decrypt		=	ppc_ecb_decrypt,
-		}
-	}
-}, {
-	.cra_name		=	"cbc(aes)",
-	.cra_driver_name	=	"cbc-ppc-spe",
-	.cra_priority		=	300,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
-	.cra_alignmask		=	0,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ppc_aes_setkey,
-			.encrypt		=	ppc_cbc_encrypt,
-			.decrypt		=	ppc_cbc_decrypt,
-		}
-	}
-}, {
-	.cra_name		=	"ctr(aes)",
-	.cra_driver_name	=	"ctr-ppc-spe",
-	.cra_priority		=	300,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
-	.cra_alignmask		=	0,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ppc_aes_setkey,
-			.encrypt		=	ppc_ctr_crypt,
-			.decrypt		=	ppc_ctr_crypt,
-		}
-	}
-}, {
-	.cra_name		=	"xts(aes)",
-	.cra_driver_name	=	"xts-ppc-spe",
-	.cra_priority		=	300,
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct ppc_xts_ctx),
-	.cra_alignmask		=	0,
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE * 2,
-			.max_keysize		=	AES_MAX_KEY_SIZE * 2,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ppc_xts_setkey,
-			.encrypt		=	ppc_xts_encrypt,
-			.decrypt		=	ppc_xts_decrypt,
-		}
+};
+
+static struct skcipher_alg aes_skcipher_algs[] = {
+	{
+		.base.cra_name		=	"ecb(aes)",
+		.base.cra_driver_name	=	"ecb-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	AES_BLOCK_SIZE,
+		.base.cra_ctxsize	=	sizeof(struct ppc_aes_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE,
+		.max_keysize		=	AES_MAX_KEY_SIZE,
+		.setkey			=	ppc_aes_setkey_skcipher,
+		.encrypt		=	ppc_ecb_encrypt,
+		.decrypt		=	ppc_ecb_decrypt,
+	}, {
+		.base.cra_name		=	"cbc(aes)",
+		.base.cra_driver_name	=	"cbc-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	AES_BLOCK_SIZE,
+		.base.cra_ctxsize	=	sizeof(struct ppc_aes_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE,
+		.max_keysize		=	AES_MAX_KEY_SIZE,
+		.ivsize			=	AES_BLOCK_SIZE,
+		.setkey			=	ppc_aes_setkey_skcipher,
+		.encrypt		=	ppc_cbc_encrypt,
+		.decrypt		=	ppc_cbc_decrypt,
+	}, {
+		.base.cra_name		=	"ctr(aes)",
+		.base.cra_driver_name	=	"ctr-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	1,
+		.base.cra_ctxsize	=	sizeof(struct ppc_aes_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE,
+		.max_keysize		=	AES_MAX_KEY_SIZE,
+		.ivsize			=	AES_BLOCK_SIZE,
+		.setkey			=	ppc_aes_setkey_skcipher,
+		.encrypt		=	ppc_ctr_crypt,
+		.decrypt		=	ppc_ctr_crypt,
+		.chunksize		=	AES_BLOCK_SIZE,
+	}, {
+		.base.cra_name		=	"xts(aes)",
+		.base.cra_driver_name	=	"xts-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	AES_BLOCK_SIZE,
+		.base.cra_ctxsize	=	sizeof(struct ppc_xts_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE * 2,
+		.max_keysize		=	AES_MAX_KEY_SIZE * 2,
+		.ivsize			=	AES_BLOCK_SIZE,
+		.setkey			=	ppc_xts_setkey,
+		.encrypt		=	ppc_xts_encrypt,
+		.decrypt		=	ppc_xts_decrypt,
 	}
-} };
+};
 
 static int __init ppc_aes_mod_init(void)
 {
-	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+	int err;
+
+	err = crypto_register_alg(&aes_cipher_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(aes_skcipher_algs,
+					ARRAY_SIZE(aes_skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&aes_cipher_alg);
+	return err;
 }
 
 static void __exit ppc_aes_mod_fini(void)
 {
-	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+	crypto_unregister_alg(&aes_cipher_alg);
+	crypto_unregister_skciphers(aes_skcipher_algs,
+				    ARRAY_SIZE(aes_skcipher_algs));
 }
 
 module_init(ppc_aes_mod_init);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 8c38c2b7f8e7..320548b4dfa9 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1125,6 +1125,7 @@ config CRYPTO_AES_SPARC64
 config CRYPTO_AES_PPC_SPE
 	tristate "AES cipher algorithms (PPC SPE)"
 	depends on PPC && SPE
+	select CRYPTO_BLKCIPHER
 	help
 	  AES cipher algorithms (FIPS-197). Additionally the acceleration
 	  for popular block cipher modes ECB, CBC, CTR and XTS is supported.
-- 
cgit v1.2.3


From d0be0720576439da2cefc16e648a61a7aebcf34f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 15 Oct 2019 10:14:12 +0200
Subject: crypto: powerpc/spe-xts - implement support for ciphertext stealing

Add the logic to deal with input sizes that are not a round multiple
of the AES block size, as described by the XTS spec. This brings the
SPE implementation in line with other kernel drivers that have been
updated recently to take this into account.

Cc: Eric Biggers <ebiggers@google.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/aes-spe-glue.c | 81 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index f828f8bcd0c6..1fad5d4c658d 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -19,6 +19,8 @@
 #include <crypto/algapi.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
+#include <crypto/gf128mul.h>
+#include <crypto/scatterwalk.h>
 
 /*
  * MAX_BYTES defines the number of bytes that are allowed to be processed
@@ -327,12 +329,87 @@ static int ppc_xts_crypt(struct skcipher_request *req, bool enc)
 
 static int ppc_xts_encrypt(struct skcipher_request *req)
 {
-	return ppc_xts_crypt(req, true);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+	struct skcipher_request subreq;
+	u8 b[2][AES_BLOCK_SIZE];
+	int err;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (tail) {
+		subreq = *req;
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   req->cryptlen - tail, req->iv);
+		req = &subreq;
+	}
+
+	err = ppc_xts_crypt(req, true);
+	if (err || !tail)
+		return err;
+
+	scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0);
+	memcpy(b[1], b[0], tail);
+	scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0);
+
+	spe_begin();
+	ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE,
+			req->iv, NULL);
+	spe_end();
+
+	scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+	return 0;
 }
 
 static int ppc_xts_decrypt(struct skcipher_request *req)
 {
-	return ppc_xts_crypt(req, false);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+	struct skcipher_request subreq;
+	u8 b[3][AES_BLOCK_SIZE];
+	le128 twk;
+	int err;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (tail) {
+		subreq = *req;
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   offset, req->iv);
+		req = &subreq;
+	}
+
+	err = ppc_xts_crypt(req, false);
+	if (err || !tail)
+		return err;
+
+	scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0);
+
+	spe_begin();
+	if (!offset)
+		ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds,
+				AES_BLOCK_SIZE);
+
+	gf128mul_x_ble(&twk, (le128 *)req->iv);
+
+	ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+			(u8 *)&twk, NULL);
+	memcpy(b[0], b[2], tail);
+	memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail);
+	ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+			req->iv, NULL);
+	spe_end();
+
+	scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3


From 7db3e57e6a95435cef5b33f2a90efcac5ce577da Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Fri, 25 Oct 2019 14:32:06 +0800
Subject: arm64: cpufeature: Fix typos in comment

Fix up one typos: CTR_E0 -> CTR_EL0

Cc: Will Deacon <will@kernel.org>
Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpuinfo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 05933c065732..56bba746da1c 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -329,7 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_cntfrq = arch_timer_get_cntfrq();
 	/*
 	 * Use the effective value of the CTR_EL0 than the raw value
-	 * exposed by the CPU. CTR_E0.IDC field value must be interpreted
+	 * exposed by the CPU. CTR_EL0.IDC field value must be interpreted
 	 * with the CLIDR_EL1 fields to avoid triggering false warnings
 	 * when there is a mismatch across the CPUs. Keep track of the
 	 * effective value of the CTR_EL0 in our internal records for
-- 
cgit v1.2.3


From a5315819c5e7e50b2b457b60aaf2cc61d76888a2 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 24 Oct 2019 13:01:43 +0100
Subject: arm64: pgtable: Correct typo in comment

vmmemmap -> vmemmap

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7576df00eb50..4438a23f969c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -17,7 +17,7 @@
  * VMALLOC range.
  *
  * VMALLOC_START: beginning of the kernel vmalloc space
- * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
+ * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
  *	and fixed mappings
  */
 #define VMALLOC_START		(MODULES_END)
-- 
cgit v1.2.3


From bfcef4ab1d7ee8921bc322109b1692036cc6cbe0 Mon Sep 17 00:00:00 2001
From: Yunfeng Ye <yeyunfeng@huawei.com>
Date: Mon, 21 Oct 2019 19:31:21 +0800
Subject: arm64: psci: Reduce the waiting time for cpu_psci_cpu_kill()

In cases like suspend-to-disk and suspend-to-ram, a large number of CPU
cores need to be shut down. At present, the CPU hotplug operation is
serialised, and the CPU cores can only be shut down one by one. In this
process, if PSCI affinity_info() does not return LEVEL_OFF quickly,
cpu_psci_cpu_kill() needs to wait for 10ms. If hundreds of CPU cores
need to be shut down, it will take a long time.

Normally, there is no need to wait 10ms in cpu_psci_cpu_kill(). So
change the wait interval from 10 ms to max 1 ms and use usleep_range()
instead of msleep() for more accurate timer.

In addition, reducing the time interval will increase the messages
output, so remove the "Retry ..." message, instead, track time and
output to the the sucessful message.

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/psci.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index c9f72b2665f1..43ae4e0c968f 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -81,7 +81,8 @@ static void cpu_psci_cpu_die(unsigned int cpu)
 
 static int cpu_psci_cpu_kill(unsigned int cpu)
 {
-	int err, i;
+	int err;
+	unsigned long start, end;
 
 	if (!psci_ops.affinity_info)
 		return 0;
@@ -91,16 +92,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
 	 * while it is dying. So, try again a few times.
 	 */
 
-	for (i = 0; i < 10; i++) {
+	start = jiffies;
+	end = start + msecs_to_jiffies(100);
+	do {
 		err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
 		if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
-			pr_info("CPU%d killed.\n", cpu);
+			pr_info("CPU%d killed (polled %d ms)\n", cpu,
+				jiffies_to_msecs(jiffies - start));
 			return 0;
 		}
 
-		msleep(10);
-		pr_info("Retrying again to check for CPU kill\n");
-	}
+		usleep_range(100, 1000);
+	} while (time_before(jiffies, end));
 
 	pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
 			cpu, err);
-- 
cgit v1.2.3


From 05460849c3b51180d5ada3373d0449aea19075e4 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 17 Oct 2019 18:42:58 +0100
Subject: arm64: errata: Hide CTR_EL0.DIC on systems affected by Neoverse-N1
 #1542419

Cores affected by Neoverse-N1 #1542419 could execute a stale instruction
when a branch is updated to point to freshly generated instructions.

To workaround this issue we need user-space to issue unnecessary
icache maintenance that we can trap. Start by hiding CTR_EL0.DIC.

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/silicon-errata.rst |  2 ++
 arch/arm64/Kconfig                     | 16 ++++++++++++++++
 arch/arm64/include/asm/cpucaps.h       |  3 ++-
 arch/arm64/kernel/cpu_errata.c         | 32 +++++++++++++++++++++++++++++++-
 arch/arm64/kernel/traps.c              |  3 +++
 5 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 17ea3fecddaa..d0d480dac361 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -88,6 +88,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-N1     | #1349291        | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Neoverse-N1     | #1542419        | ARM64_ERRATUM_1542419       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | MMU-500         | #841119,826419  | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 950a56b71ff0..7c3e3e334421 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -558,6 +558,22 @@ config ARM64_ERRATUM_1463225
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_1542419
+	bool "Neoverse-N1: workaround mis-ordering of instruction fetches"
+	default y
+	help
+	  This option adds a workaround for ARM Neoverse-N1 erratum
+	  1542419.
+
+	  Affected Neoverse-N1 cores could execute a stale instruction when
+	  modified by another CPU. The workaround depends on a firmware
+	  counterpart.
+
+	  Workaround the issue by hiding the DIC feature from EL0. This
+	  forces user-space to perform cache maintenance.
+
+	  If unsure, say Y.
+
 config CAVIUM_ERRATUM_22375
 	bool "Cavium erratum 22375, 24313"
 	default y
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index f19fe4b9acc4..f05afaec18cd 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -52,7 +52,8 @@
 #define ARM64_HAS_IRQ_PRIO_MASKING		42
 #define ARM64_HAS_DCPODP			43
 #define ARM64_WORKAROUND_1463225		44
+#define ARM64_WORKAROUND_1542419		45
 
-#define ARM64_NCAPS				45
+#define ARM64_NCAPS				46
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index f593f4cffc0d..3ae9b78b6d94 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -87,13 +87,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
 }
 
 static void
-cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
+cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
 {
 	u64 mask = arm64_ftr_reg_ctrel0.strict_mask;
+	bool enable_uct_trap = false;
 
 	/* Trap CTR_EL0 access on this CPU, only if it has a mismatch */
 	if ((read_cpuid_cachetype() & mask) !=
 	    (arm64_ftr_reg_ctrel0.sys_val & mask))
+		enable_uct_trap = true;
+
+	/* ... or if the system is affected by an erratum */
+	if (cap->capability == ARM64_WORKAROUND_1542419)
+		enable_uct_trap = true;
+
+	if (enable_uct_trap)
 		sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
 }
 
@@ -623,6 +631,18 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 	return (need_wa > 0);
 }
 
+static bool __maybe_unused
+has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
+				int scope)
+{
+	u32 midr = read_cpuid_id();
+	bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT);
+	const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
+
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+	return is_midr_in_range(midr, &range) && has_dic;
+}
+
 #ifdef CONFIG_HARDEN_EL2_VECTORS
 
 static const struct midr_range arm64_harden_el2_vectors[] = {
@@ -851,6 +871,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = has_cortex_a76_erratum_1463225,
 	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1542419
+	{
+		/* we depend on the firmware portion for correctness */
+		.desc = "ARM erratum 1542419 (kernel portion)",
+		.capability = ARM64_WORKAROUND_1542419,
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.matches = has_neoverse_n1_erratum_1542419,
+		.cpu_enable = cpu_enable_trap_ctr_access,
+	},
 #endif
 	{
 	}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 34739e80211b..465f0a0f8f0a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -470,6 +470,9 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
 	int rt = ESR_ELx_SYS64_ISS_RT(esr);
 	unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
 
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1542419))
+		val &= ~BIT(CTR_DIC_SHIFT);
+
 	pt_regs_write_reg(regs, rt, val);
 
 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
-- 
cgit v1.2.3


From ee9d90be9ddace01b7fb126567e4b539fbe1f82f Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 17 Oct 2019 18:42:59 +0100
Subject: arm64: Fake the IminLine size on systems affected by Neoverse-N1
 #1542419

Systems affected by Neoverse-N1 #1542419 support DIC so do not need to
perform icache maintenance once new instructions are cleaned to the PoU.
For the errata workaround, the kernel hides DIC from user-space, so that
the unnecessary cache maintenance can be trapped by firmware.

To reduce the number of traps, produce a fake IminLine value based on
PAGE_SIZE.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cache.h | 3 ++-
 arch/arm64/kernel/traps.c      | 8 +++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 43da6dd29592..806e9dc2a852 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -11,6 +11,7 @@
 #define CTR_L1IP_MASK		3
 #define CTR_DMINLINE_SHIFT	16
 #define CTR_IMINLINE_SHIFT	0
+#define CTR_IMINLINE_MASK	0xf
 #define CTR_ERG_SHIFT		20
 #define CTR_CWG_SHIFT		24
 #define CTR_CWG_MASK		15
@@ -18,7 +19,7 @@
 #define CTR_DIC_SHIFT		29
 
 #define CTR_CACHE_MINLINE_MASK	\
-	(0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT)
+	(0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT)
 
 #define CTR_L1IP(ctr)		(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 465f0a0f8f0a..4e3e9d9c8151 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -470,9 +470,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
 	int rt = ESR_ELx_SYS64_ISS_RT(esr);
 	unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
 
-	if (cpus_have_const_cap(ARM64_WORKAROUND_1542419))
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+		/* Hide DIC so that we can trap the unnecessary maintenance...*/
 		val &= ~BIT(CTR_DIC_SHIFT);
 
+		/* ... and fake IminLine to reduce the number of traps. */
+		val &= ~CTR_IMINLINE_MASK;
+		val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK;
+	}
+
 	pt_regs_write_reg(regs, rt, val);
 
 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
-- 
cgit v1.2.3


From 222fc0c8503d98cec3cb2bac2780cdd21a6e31c0 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 17 Oct 2019 18:43:00 +0100
Subject: arm64: compat: Workaround Neoverse-N1 #1542419 for compat user-space

Compat user-space is unable to perform ICIMVAU instructions from
user-space. Instead it uses a compat-syscall. Add the workaround for
Neoverse-N1 #1542419 to this code path.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/sys_compat.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index f1cb64959427..c9fb02927d3e 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/compat.h>
+#include <linux/cpufeature.h>
 #include <linux/personality.h>
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
@@ -17,6 +18,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/system_misc.h>
+#include <asm/tlbflush.h>
 #include <asm/unistd.h>
 
 static long
@@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
 		if (fatal_signal_pending(current))
 			return 0;
 
+		if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+			/*
+			 * The workaround requires an inner-shareable tlbi.
+			 * We pick the reserved-ASID to minimise the impact.
+			 */
+			__tlbi(aside1is, 0);
+			dsb(ish);
+		}
+
 		ret = __flush_cache_user_range(start, start + chunk);
 		if (ret)
 			return ret;
-- 
cgit v1.2.3


From 1d8cd06af548bb1ba29e16eec78c0862e799a731 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Jan 2019 14:46:23 +0000
Subject: arm64: KVM: Reorder system register restoration and stage-2
 activation

In order to prepare for handling erratum 1319367, we need to make
sure that all system registers (and most importantly the registers
configuring the virtual memory) are set before we enable stage-2
translation.

This results in a minor reorganisation of the load sequence, without
any functional change.

Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/switch.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 3d3815020e36..69e10b29cbd0 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -605,18 +605,23 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 
 	__sysreg_save_state_nvhe(host_ctxt);
 
-	__activate_vm(kern_hyp_va(vcpu->kvm));
-	__activate_traps(vcpu);
-
-	__hyp_vgic_restore_state(vcpu);
-	__timer_enable_traps(vcpu);
-
 	/*
 	 * We must restore the 32-bit state before the sysregs, thanks
 	 * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+	 *
+	 * Also, and in order to be able to deal with erratum #1319537 (A57)
+	 * and #1319367 (A72), we must ensure that all VM-related sysreg are
+	 * restored before we enable S2 translation.
 	 */
 	__sysreg32_restore_state(vcpu);
 	__sysreg_restore_state_nvhe(guest_ctxt);
+
+	__activate_vm(kern_hyp_va(vcpu->kvm));
+	__activate_traps(vcpu);
+
+	__hyp_vgic_restore_state(vcpu);
+	__timer_enable_traps(vcpu);
+
 	__debug_switch_to_guest(vcpu);
 
 	__set_guest_arch_workaround_state(vcpu);
-- 
cgit v1.2.3


From 37553941c670c3ad160b25843e6cdcbee2b3c6eb Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 30 Jul 2019 10:50:38 +0100
Subject: arm64: KVM: Disable EL1 PTW when invalidating S2 TLBs

When erratum 1319367 is being worked around, special care must
be taken not to allow the page table walker to populate TLBs
while we have the stage-2 translation enabled (which would otherwise
result in a bizare mix of the host S1 and the guest S2).

We enforce this by setting TCR_EL1.EPD{0,1} before restoring the S2
configuration, and clear the same bits after having disabled S2.

Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/tlb.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index eb0efc5557f3..c2bc17ca6430 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -63,6 +63,22 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
 static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
 						  struct tlb_inv_context *cxt)
 {
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+		u64 val;
+
+		/*
+		 * For CPUs that are affected by ARM 1319367, we need to
+		 * avoid a host Stage-1 walk while we have the guest's
+		 * VMID set in the VTTBR in order to invalidate TLBs.
+		 * We're guaranteed that the S1 MMU is enabled, so we can
+		 * simply set the EPD bits to avoid any further TLB fill.
+		 */
+		val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+		val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+		write_sysreg_el1(val, SYS_TCR);
+		isb();
+	}
+
 	__load_guest_stage2(kvm);
 	isb();
 }
@@ -100,6 +116,13 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
 						 struct tlb_inv_context *cxt)
 {
 	write_sysreg(0, vttbr_el2);
+
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+		/* Ensure write of the host VMID */
+		isb();
+		/* Restore the host's TCR_EL1 */
+		write_sysreg_el1(cxt->tcr, SYS_TCR);
+	}
 }
 
 static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
-- 
cgit v1.2.3


From bd227553ad5077f21ddb382dcd910ba46181805a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 30 Jul 2019 11:15:31 +0100
Subject: arm64: KVM: Prevent speculative S1 PTW when restoring vcpu context

When handling erratum 1319367, we must ensure that the page table
walker cannot parse the S1 page tables while the guest is in an
inconsistent state. This is done as follows:

On guest entry:
- TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur
- all system registers are restored, except for TCR_EL1 and SCTLR_EL1
- stage-2 is restored
- SCTLR_EL1 and TCR_EL1 are restored

On guest exit:
- SCTLR_EL1.M and TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur
- stage-2 is disabled
- All host system registers are restored

Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/switch.c    | 31 +++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/sysreg-sr.c | 35 +++++++++++++++++++++++++++++++++--
 2 files changed, 64 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 69e10b29cbd0..5765b17c38c7 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -118,6 +118,20 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
 	}
 
 	write_sysreg(val, cptr_el2);
+
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+		struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+		isb();
+		/*
+		 * At this stage, and thanks to the above isb(), S2 is
+		 * configured and enabled. We can now restore the guest's S1
+		 * configuration: SCTLR, and only then TCR.
+		 */
+		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+		isb();
+		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
+	}
 }
 
 static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
@@ -156,6 +170,23 @@ static void __hyp_text __deactivate_traps_nvhe(void)
 {
 	u64 mdcr_el2 = read_sysreg(mdcr_el2);
 
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+		u64 val;
+
+		/*
+		 * Set the TCR and SCTLR registers in the exact opposite
+		 * sequence as __activate_traps_nvhe (first prevent walks,
+		 * then force the MMU on). A generous sprinkling of isb()
+		 * ensure that things happen in this exact order.
+		 */
+		val = read_sysreg_el1(SYS_TCR);
+		write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+		isb();
+		val = read_sysreg_el1(SYS_SCTLR);
+		write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+		isb();
+	}
+
 	__deactivate_traps_common();
 
 	mdcr_el2 &= MDCR_EL2_HPMN_MASK;
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 7ddbc849b580..22b8128d19f6 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -117,12 +117,26 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 {
 	write_sysreg(ctxt->sys_regs[MPIDR_EL1],		vmpidr_el2);
 	write_sysreg(ctxt->sys_regs[CSSELR_EL1],	csselr_el1);
-	write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+
+	if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
+	} else	if (!ctxt->__hyp_running_vcpu) {
+		/*
+		 * Must only be done for guest registers, hence the context
+		 * test. We're coming from the host, so SCTLR.M is already
+		 * set. Pairs with __activate_traps_nvhe().
+		 */
+		write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
+				  TCR_EPD1_MASK | TCR_EPD0_MASK),
+				 SYS_TCR);
+		isb();
+	}
+
 	write_sysreg(ctxt->sys_regs[ACTLR_EL1],		actlr_el1);
 	write_sysreg_el1(ctxt->sys_regs[CPACR_EL1],	SYS_CPACR);
 	write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1],	SYS_TTBR0);
 	write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1],	SYS_TTBR1);
-	write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
 	write_sysreg_el1(ctxt->sys_regs[ESR_EL1],	SYS_ESR);
 	write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1],	SYS_AFSR0);
 	write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1],	SYS_AFSR1);
@@ -135,6 +149,23 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt->sys_regs[PAR_EL1],		par_el1);
 	write_sysreg(ctxt->sys_regs[TPIDR_EL1],		tpidr_el1);
 
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1319367) &&
+	    ctxt->__hyp_running_vcpu) {
+		/*
+		 * Must only be done for host registers, hence the context
+		 * test. Pairs with __deactivate_traps_nvhe().
+		 */
+		isb();
+		/*
+		 * At this stage, and thanks to the above isb(), S2 is
+		 * deconfigured and disabled. We can now restore the host's
+		 * S1 configuration: SCTLR, and only then TCR.
+		 */
+		write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	SYS_SCTLR);
+		isb();
+		write_sysreg_el1(ctxt->sys_regs[TCR_EL1],	SYS_TCR);
+	}
+
 	write_sysreg(ctxt->gp_regs.sp_el1,		sp_el1);
 	write_sysreg_el1(ctxt->gp_regs.elr_el1,		SYS_ELR);
 	write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
-- 
cgit v1.2.3


From c2cc62d831863151fd0cb7da7ac9a0c324aab871 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Jan 2019 14:36:34 +0000
Subject: arm64: Enable and document ARM errata 1319367 and 1319537

Now that everything is in place, let's get the ball rolling
by allowing the corresponding config option to be selected.
Also add the required information to silicon_errata.rst.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 Documentation/arm64/silicon-errata.rst |  4 ++++
 arch/arm64/Kconfig                     | 10 ++++++++++
 2 files changed, 14 insertions(+)

(limited to 'arch')

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 17ea3fecddaa..0808be134fce 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -70,8 +70,12 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220        |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A57      | #1319537        | ARM64_ERRATUM_1319367       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A72      | #853709         | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A72      | #1319367        | ARM64_ERRATUM_1319367       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 950a56b71ff0..b2877ed09307 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -538,6 +538,16 @@ config ARM64_ERRATUM_1286807
 	  invalidated has been observed by other observers. The
 	  workaround repeats the TLBI+DSB operation.
 
+config ARM64_ERRATUM_1319367
+	bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
+	default y
+	help
+	  This option adds work arounds for ARM Cortex-A57 erratum 1319537
+	  and A72 erratum 1319367
+
+	  Cortex-A57 and A72 cores could end-up with corrupted TLBs by
+	  speculating an AT instruction during a guest context switch.
+
 	  If unsure, say Y.
 
 config ARM64_ERRATUM_1463225
-- 
cgit v1.2.3


From 27a22fbdeedd6c5c451cf5f830d51782bf50c3a2 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 28 Oct 2019 09:08:34 +0000
Subject: arm64: Silence clang warning on mismatched value/register sizes

Clang reports a warning on the __tlbi(aside1is, 0) macro expansion since
the value size does not match the register size specified in the inline
asm. Construct the ASID value using the __TLBI_VADDR() macro.

Fixes: 222fc0c8503d ("arm64: compat: Workaround Neoverse-N1 #1542419 for compat user-space")
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Cc: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/sys_compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index c9fb02927d3e..3c18c2454089 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -37,7 +37,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
 			 * The workaround requires an inner-shareable tlbi.
 			 * We pick the reserved-ASID to minimise the impact.
 			 */
-			__tlbi(aside1is, 0);
+			__tlbi(aside1is, __TLBI_VADDR(0, 0));
 			dsb(ish);
 		}
 
-- 
cgit v1.2.3


From b6e43c0e3129ffe87e65c85f20fcbdf0eb86fba0 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 25 Oct 2019 17:42:10 +0100
Subject: arm64: remove __exception annotations

Since commit 732674980139 ("arm64: unwind: reference pt_regs via embedded
stack frame") arm64 has not used the __exception annotation to dump
the pt_regs during stack tracing. in_exception_text() has no callers.

This annotation is only used to blacklist kprobes, it means the same as
__kprobes.

Section annotations like this require the functions to be grouped
together between the start/end markers, and placed according to
the linker script. For kprobes we also have NOKPROBE_SYMBOL() which
logs the symbol address in a section that kprobes parses and
blacklists at boot.

Using NOKPROBE_SYMBOL() instead lets kprobes publish the list of
blacklisted symbols, and saves us from having an arm64 specific
spelling of __kprobes.

do_debug_exception() already has a NOKPROBE_SYMBOL() annotation.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/exception.h |  4 ++--
 arch/arm64/include/asm/traps.h     | 10 ----------
 arch/arm64/kernel/probes/kprobes.c |  4 ----
 arch/arm64/kernel/traps.c          | 10 +++++++---
 arch/arm64/kernel/vmlinux.lds.S    |  3 ---
 arch/arm64/mm/fault.c              | 34 +++++++++++++++++-----------------
 6 files changed, 26 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index a17393ff6677..b0b3ba56e919 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -8,14 +8,14 @@
 #define __ASM_EXCEPTION_H
 
 #include <asm/esr.h>
+#include <asm/kprobes.h>
 
 #include <linux/interrupt.h>
 
-#define __exception	__attribute__((section(".exception.text")))
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #define __exception_irq_entry	__irq_entry
 #else
-#define __exception_irq_entry	__exception
+#define __exception_irq_entry	__kprobes
 #endif
 
 static inline u32 disr_to_esr(u64 disr)
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 59690613ac31..cee5928e1b7d 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -42,16 +42,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
 	       ptr < (unsigned long)&__irqentry_text_end;
 }
 
-static inline int in_exception_text(unsigned long ptr)
-{
-	int in;
-
-	in = ptr >= (unsigned long)&__exception_text_start &&
-	     ptr < (unsigned long)&__exception_text_end;
-
-	return in ? : __in_irqentry_text(ptr);
-}
-
 static inline int in_entry_text(unsigned long ptr)
 {
 	return ptr >= (unsigned long)&__entry_text_start &&
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c4452827419b..d1c95dcf1d78 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -455,10 +455,6 @@ int __init arch_populate_kprobe_blacklist(void)
 					(unsigned long)__irqentry_text_end);
 	if (ret)
 		return ret;
-	ret = kprobe_add_area_blacklist((unsigned long)__exception_text_start,
-					(unsigned long)__exception_text_end);
-	if (ret)
-		return ret;
 	ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start,
 					(unsigned long)__idmap_text_end);
 	if (ret)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 34739e80211b..ba1a571a7774 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -35,6 +35,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/insn.h>
+#include <asm/kprobes.h>
 #include <asm/traps.h>
 #include <asm/smp.h>
 #include <asm/stack_pointer.h>
@@ -393,7 +394,7 @@ void arm64_notify_segfault(unsigned long addr)
 	force_signal_inject(SIGSEGV, code, addr);
 }
 
-asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+asmlinkage void do_undefinstr(struct pt_regs *regs)
 {
 	/* check for AArch32 breakpoint instructions */
 	if (!aarch32_break_handler(regs))
@@ -405,6 +406,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	BUG_ON(!user_mode(regs));
 	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
 }
+NOKPROBE_SYMBOL(do_undefinstr);
 
 #define __user_cache_maint(insn, address, res)			\
 	if (address >= user_addr_max()) {			\
@@ -667,7 +669,7 @@ static const struct sys64_hook cp15_64_hooks[] = {
 	{},
 };
 
-asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
+asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs)
 {
 	const struct sys64_hook *hook, *hook_base;
 
@@ -705,9 +707,10 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
 	 */
 	do_undefinstr(regs);
 }
+NOKPROBE_SYMBOL(do_cp15instr);
 #endif
 
-asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+asmlinkage void do_sysinstr(unsigned int esr, struct pt_regs *regs)
 {
 	const struct sys64_hook *hook;
 
@@ -724,6 +727,7 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
 	 */
 	do_undefinstr(regs);
 }
+NOKPROBE_SYMBOL(do_sysinstr);
 
 static const char *esr_class_str[] = {
 	[0 ... ESR_ELx_EC_MAX]		= "UNRECOGNIZED EC",
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index aa76f7259668..009057517bdd 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -111,9 +111,6 @@ SECTIONS
 	}
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
-			__exception_text_start = .;
-			*(.exception.text)
-			__exception_text_end = .;
 			IRQENTRY_TEXT
 			SOFTIRQENTRY_TEXT
 			ENTRY_TEXT
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 855f2a7954e6..844cd2535826 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -33,6 +33,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/kasan.h>
+#include <asm/kprobes.h>
 #include <asm/sysreg.h>
 #include <asm/system_misc.h>
 #include <asm/pgtable.h>
@@ -732,8 +733,8 @@ static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 63"			},
 };
 
-asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
-					 struct pt_regs *regs)
+asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr,
+			     struct pt_regs *regs)
 {
 	const struct fault_info *inf = esr_to_fault_info(esr);
 
@@ -749,16 +750,17 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 	arm64_notify_die(inf->name, regs,
 			 inf->sig, inf->code, (void __user *)addr, esr);
 }
+NOKPROBE_SYMBOL(do_mem_abort);
 
-asmlinkage void __exception do_el0_irq_bp_hardening(void)
+asmlinkage void do_el0_irq_bp_hardening(void)
 {
 	/* PC has already been checked in entry.S */
 	arm64_apply_bp_hardening();
 }
+NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
 
-asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
-						   unsigned int esr,
-						   struct pt_regs *regs)
+asmlinkage void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
+				       struct pt_regs *regs)
 {
 	/*
 	 * We've taken an instruction abort from userspace and not yet
@@ -771,11 +773,10 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
 	local_daif_restore(DAIF_PROCCTX);
 	do_mem_abort(addr, esr, regs);
 }
+NOKPROBE_SYMBOL(do_el0_ia_bp_hardening);
 
-
-asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
-					   unsigned int esr,
-					   struct pt_regs *regs)
+asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr,
+			       struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
 		if (!is_ttbr0_addr(instruction_pointer(regs)))
@@ -786,6 +787,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
 	arm64_notify_die("SP/PC alignment exception", regs,
 			 SIGBUS, BUS_ADRALN, (void __user *)addr, esr);
 }
+NOKPROBE_SYMBOL(do_sp_pc_abort);
 
 int __init early_brk64(unsigned long addr, unsigned int esr,
 		       struct pt_regs *regs);
@@ -868,8 +870,7 @@ NOKPROBE_SYMBOL(debug_exception_exit);
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
 
-static int __exception
-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 {
 	if (user_mode(regs))
 		return 0;
@@ -888,16 +889,15 @@ cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 	return 1;
 }
 #else
-static int __exception
-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 {
 	return 0;
 }
 #endif /* CONFIG_ARM64_ERRATUM_1463225 */
+NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler);
 
-asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
-					       unsigned int esr,
-					       struct pt_regs *regs)
+asmlinkage void do_debug_exception(unsigned long addr_if_watchpoint,
+				   unsigned int esr, struct pt_regs *regs)
 {
 	const struct fault_info *inf = esr_to_debug_fault_info(esr);
 	unsigned long pc = instruction_pointer(regs);
-- 
cgit v1.2.3


From e540e0a7fa1ff889e37ca9af44eb44ec3d2c8a01 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 25 Oct 2019 17:42:11 +0100
Subject: arm64: Add prototypes for functions called by entry.S

Functions that are only called by assembly don't always have a
C header file prototype.

Add the prototypes before moving the assembly callers to C.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/exception.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index b0b3ba56e919..a9e376623ecf 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -9,6 +9,7 @@
 
 #include <asm/esr.h>
 #include <asm/kprobes.h>
+#include <asm/ptrace.h>
 
 #include <linux/interrupt.h>
 
@@ -31,5 +32,26 @@ static inline u32 disr_to_esr(u64 disr)
 }
 
 asmlinkage void enter_from_user_mode(void);
+asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr,
+			     struct pt_regs *regs);
+asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr,
+			       struct pt_regs *regs);
+asmlinkage void do_undefinstr(struct pt_regs *regs);
+asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
+asmlinkage void do_debug_exception(unsigned long addr_if_watchpoint,
+				   unsigned int esr, struct pt_regs *regs);
+asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
+asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs);
+asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
+asmlinkage void do_sysinstr(unsigned int esr, struct pt_regs *regs);
+asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr,
+			       struct pt_regs *regs);
+asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason,
+			     unsigned int esr);
+asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs);
+asmlinkage void el0_svc_handler(struct pt_regs *regs);
+asmlinkage void el0_svc_compat_handler(struct pt_regs *regs);
+asmlinkage void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
+				       struct pt_regs *regs);
 
 #endif	/* __ASM_EXCEPTION_H */
-- 
cgit v1.2.3


From 51077e03b8cef2a24d6582b8c54b718fced6878c Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 25 Oct 2019 17:42:12 +0100
Subject: arm64: add local_daif_inherit()

Some synchronous exceptions can be taken from a number of contexts,
e.g. where IRQs may or may not be masked. In the entry assembly for
these exceptions, we use the inherit_daif assembly macro to ensure
that we only mask those exceptions which were masked when the exception
was taken.

So that we can do the same from C code, this patch adds a new
local_daif_inherit() function, following the existing local_daif_*()
naming scheme.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
[moved away from local_daif_restore()]
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/daifflags.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 063c964af705..9207cd5aa39e 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -9,6 +9,7 @@
 
 #include <asm/arch_gicv3.h>
 #include <asm/cpufeature.h>
+#include <asm/ptrace.h>
 
 #define DAIF_PROCCTX		0
 #define DAIF_PROCCTX_NOIRQ	PSR_I_BIT
@@ -109,4 +110,19 @@ static inline void local_daif_restore(unsigned long flags)
 		trace_hardirqs_off();
 }
 
+/*
+ * Called by synchronous exception handlers to restore the DAIF bits that were
+ * modified by taking an exception.
+ */
+static inline void local_daif_inherit(struct pt_regs *regs)
+{
+	unsigned long flags = regs->pstate & DAIF_MASK;
+
+	/*
+	 * We can't use local_daif_restore(regs->pstate) here as
+	 * system_has_prio_mask_debugging() won't restore the I bit if it can
+	 * use the pmr instead.
+	 */
+	write_sysreg(flags, daif);
+}
 #endif
-- 
cgit v1.2.3


From ed3768db588291ddb5dc794daed12cc751373566 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 25 Oct 2019 17:42:13 +0100
Subject: arm64: entry: convert el1_sync to C

This patch converts the EL1 sync entry assembly logic to C code.

Doing this will allow us to make changes in a slightly more
readable way. A case in point is supporting kernel-first RAS.
do_sea() should be called on the CPU that took the fault.

Largely the assembly code is converted to C in a relatively
straightforward manner.

Since all sync sites share a common asm entry point, the ASM_BUG()
instances are no longer required for effective backtraces back to
assembly, and we don't need similar BUG() entries.

The ESR_ELx.EC codes for all (supported) debug exceptions are now
checked in the el1_sync_handler's switch statement, which renders the
check in el1_dbg redundant. This both simplifies the el1_dbg handler,
and makes the EL1 exception handling more robust to
currently-unallocated ESR_ELx.EC encodings.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
[split out of a bigger series, added nokprobes, moved prototypes]
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/Makefile       |  6 +--
 arch/arm64/kernel/entry-common.c | 98 ++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/entry.S        | 69 +---------------------------
 3 files changed, 102 insertions(+), 71 deletions(-)
 create mode 100644 arch/arm64/kernel/entry-common.c

(limited to 'arch')

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 478491f07b4f..fc6488660f64 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -13,9 +13,9 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
 
 # Object file lists.
 obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
-			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
-			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
-			   hyp-stub.o psci.o cpu_ops.o insn.o	\
+			   entry-common.o entry-fpsimd.o process.o ptrace.o	\
+			   setup.o signal.o sys.o stacktrace.o time.o traps.o	\
+			   io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o	\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
new file mode 100644
index 000000000000..e726d1f4b9e9
--- /dev/null
+++ b/arch/arm64/kernel/entry-common.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exception handling code
+ *
+ * Copyright (C) 2019 ARM Ltd.
+ */
+
+#include <linux/context_tracking.h>
+#include <linux/ptrace.h>
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
+#include <asm/esr.h>
+#include <asm/exception.h>
+#include <asm/kprobes.h>
+#include <asm/sysreg.h>
+
+static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	local_daif_inherit(regs);
+	far = untagged_addr(far);
+	do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_abort);
+
+static void notrace el1_pc(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	local_daif_inherit(regs);
+	do_sp_pc_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_pc);
+
+static void el1_undef(struct pt_regs *regs)
+{
+	local_daif_inherit(regs);
+	do_undefinstr(regs);
+}
+NOKPROBE_SYMBOL(el1_undef);
+
+static void el1_inv(struct pt_regs *regs, unsigned long esr)
+{
+	local_daif_inherit(regs);
+	bad_mode(regs, 0, esr);
+}
+NOKPROBE_SYMBOL(el1_inv);
+
+static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	/*
+	 * The CPU masked interrupts, and we are leaving them masked during
+	 * do_debug_exception(). Update PMR as if we had called
+	 * local_mask_daif().
+	 */
+	if (system_uses_irq_prio_masking())
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+	do_debug_exception(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_dbg);
+
+asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
+{
+	unsigned long esr = read_sysreg(esr_el1);
+
+	switch (ESR_ELx_EC(esr)) {
+	case ESR_ELx_EC_DABT_CUR:
+	case ESR_ELx_EC_IABT_CUR:
+		el1_abort(regs, esr);
+		break;
+	/*
+	 * We don't handle ESR_ELx_EC_SP_ALIGN, since we will have hit a
+	 * recursive exception when trying to push the initial pt_regs.
+	 */
+	case ESR_ELx_EC_PC_ALIGN:
+		el1_pc(regs, esr);
+		break;
+	case ESR_ELx_EC_SYS64:
+	case ESR_ELx_EC_UNKNOWN:
+		el1_undef(regs);
+		break;
+	case ESR_ELx_EC_BREAKPT_CUR:
+	case ESR_ELx_EC_SOFTSTP_CUR:
+	case ESR_ELx_EC_WATCHPT_CUR:
+	case ESR_ELx_EC_BRK64:
+		el1_dbg(regs, esr);
+		break;
+	default:
+		el1_inv(regs, esr);
+	};
+}
+NOKPROBE_SYMBOL(el1_sync_handler);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e304fe04b098..5d7f42eb0e89 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -578,76 +578,9 @@ ENDPROC(el1_error_invalid)
 	.align	6
 el1_sync:
 	kernel_entry 1
-	mrs	x1, esr_el1			// read the syndrome register
-	lsr	x24, x1, #ESR_ELx_EC_SHIFT	// exception class
-	cmp	x24, #ESR_ELx_EC_DABT_CUR	// data abort in EL1
-	b.eq	el1_da
-	cmp	x24, #ESR_ELx_EC_IABT_CUR	// instruction abort in EL1
-	b.eq	el1_ia
-	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
-	b.eq	el1_undef
-	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el1_pc
-	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL1
-	b.eq	el1_undef
-	cmp	x24, #ESR_ELx_EC_BREAKPT_CUR	// debug exception in EL1
-	b.ge	el1_dbg
-	b	el1_inv
-
-el1_ia:
-	/*
-	 * Fall through to the Data abort case
-	 */
-el1_da:
-	/*
-	 * Data abort handling
-	 */
-	mrs	x3, far_el1
-	inherit_daif	pstate=x23, tmp=x2
-	clear_address_tag x0, x3
-	mov	x2, sp				// struct pt_regs
-	bl	do_mem_abort
-
-	kernel_exit 1
-el1_pc:
-	/*
-	 * PC alignment exception handling. We don't handle SP alignment faults,
-	 * since we will have hit a recursive exception when trying to push the
-	 * initial pt_regs.
-	 */
-	mrs	x0, far_el1
-	inherit_daif	pstate=x23, tmp=x2
-	mov	x2, sp
-	bl	do_sp_pc_abort
-	ASM_BUG()
-el1_undef:
-	/*
-	 * Undefined instruction
-	 */
-	inherit_daif	pstate=x23, tmp=x2
 	mov	x0, sp
-	bl	do_undefinstr
-	kernel_exit 1
-el1_dbg:
-	/*
-	 * Debug exception handling
-	 */
-	cmp	x24, #ESR_ELx_EC_BRK64		// if BRK64
-	cinc	x24, x24, eq			// set bit '0'
-	tbz	x24, #0, el1_inv		// EL1 only
-	gic_prio_kentry_setup tmp=x3
-	mrs	x0, far_el1
-	mov	x2, sp				// struct pt_regs
-	bl	do_debug_exception
+	bl	el1_sync_handler
 	kernel_exit 1
-el1_inv:
-	// TODO: add support for undefined instructions in kernel mode
-	inherit_daif	pstate=x23, tmp=x2
-	mov	x0, sp
-	mov	x2, x1
-	mov	x1, #BAD_SYNC
-	bl	bad_mode
-	ASM_BUG()
 ENDPROC(el1_sync)
 
 	.align	6
-- 
cgit v1.2.3


From 582f95835a8fc812cd38dce0447fe9386b78913e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 25 Oct 2019 17:42:14 +0100
Subject: arm64: entry: convert el0_sync to C

This is largely a 1-1 conversion of asm to C, with a couple of caveats.

The el0_sync{_compat} switches explicitly handle all the EL0 debug
cases, so el0_dbg doesn't have to try to bail out for unexpected EL1
debug ESR values. This also means that an unexpected vector catch from
AArch32 is routed to el0_inv.

We *could* merge the native and compat switches, which would make the
diffstat negative, but I've tried to stay as close to the existing
assembly as possible for the moment.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
[split out of a bigger series, added nokprobes. removed irq trace
 calls as the C helpers do this. renamed el0_dbg's use of FAR]
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/asm-uaccess.h |  10 --
 arch/arm64/kernel/entry-common.c     | 222 +++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/entry.S            | 206 +-------------------------------
 3 files changed, 227 insertions(+), 211 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index f74909ba29bd..a70575edae8e 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -74,14 +74,4 @@ alternative_if ARM64_ALT_PAN_NOT_UAO
 	SET_PSTATE_PAN(0)
 alternative_else_nop_endif
 	.endm
-
-/*
- * Remove the address tag from a virtual address, if present.
- */
-	.macro	clear_address_tag, dst, addr
-	tst	\addr, #(1 << 55)
-	bic	\dst, \addr, #(0xff << 56)
-	csel	\dst, \dst, \addr, eq
-	.endm
-
 #endif
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index e726d1f4b9e9..2c318e41d84b 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -96,3 +96,225 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
 	};
 }
 NOKPROBE_SYMBOL(el1_sync_handler);
+
+static void notrace el0_da(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	far = untagged_addr(far);
+	do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_da);
+
+static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX_NOIRQ);
+	do_el0_ia_bp_hardening(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_ia);
+
+static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_fpsimd_acc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_fpsimd_acc);
+
+static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_sve_acc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sve_acc);
+
+static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_fpsimd_exc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_fpsimd_exc);
+
+static void notrace el0_sys(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_sysinstr(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sys);
+
+static void notrace el0_pc(struct pt_regs *regs, unsigned long esr)
+{
+	unsigned long far = read_sysreg(far_el1);
+
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX_NOIRQ);
+	do_sp_pc_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_pc);
+
+static void notrace el0_sp(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX_NOIRQ);
+	do_sp_pc_abort(regs->sp, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sp);
+
+static void notrace el0_undef(struct pt_regs *regs)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_undefinstr(regs);
+}
+NOKPROBE_SYMBOL(el0_undef);
+
+static void notrace el0_inv(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	bad_el0_sync(regs, 0, esr);
+}
+NOKPROBE_SYMBOL(el0_inv);
+
+static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr)
+{
+	/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
+	unsigned long far = read_sysreg(far_el1);
+
+	if (system_uses_irq_prio_masking())
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+	user_exit_irqoff();
+	do_debug_exception(far, esr, regs);
+	local_daif_restore(DAIF_PROCCTX_NOIRQ);
+}
+NOKPROBE_SYMBOL(el0_dbg);
+
+static void notrace el0_svc(struct pt_regs *regs)
+{
+	if (system_uses_irq_prio_masking())
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+	el0_svc_handler(regs);
+}
+NOKPROBE_SYMBOL(el0_svc);
+
+asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
+{
+	unsigned long esr = read_sysreg(esr_el1);
+
+	switch (ESR_ELx_EC(esr)) {
+	case ESR_ELx_EC_SVC64:
+		el0_svc(regs);
+		break;
+	case ESR_ELx_EC_DABT_LOW:
+		el0_da(regs, esr);
+		break;
+	case ESR_ELx_EC_IABT_LOW:
+		el0_ia(regs, esr);
+		break;
+	case ESR_ELx_EC_FP_ASIMD:
+		el0_fpsimd_acc(regs, esr);
+		break;
+	case ESR_ELx_EC_SVE:
+		el0_sve_acc(regs, esr);
+		break;
+	case ESR_ELx_EC_FP_EXC64:
+		el0_fpsimd_exc(regs, esr);
+		break;
+	case ESR_ELx_EC_SYS64:
+	case ESR_ELx_EC_WFx:
+		el0_sys(regs, esr);
+		break;
+	case ESR_ELx_EC_SP_ALIGN:
+		el0_sp(regs, esr);
+		break;
+	case ESR_ELx_EC_PC_ALIGN:
+		el0_pc(regs, esr);
+		break;
+	case ESR_ELx_EC_UNKNOWN:
+		el0_undef(regs);
+		break;
+	case ESR_ELx_EC_BREAKPT_LOW:
+	case ESR_ELx_EC_SOFTSTP_LOW:
+	case ESR_ELx_EC_WATCHPT_LOW:
+	case ESR_ELx_EC_BRK64:
+		el0_dbg(regs, esr);
+		break;
+	default:
+		el0_inv(regs, esr);
+	}
+}
+NOKPROBE_SYMBOL(el0_sync_handler);
+
+#ifdef CONFIG_COMPAT
+static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_cp15instr(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_cp15);
+
+static void notrace el0_svc_compat(struct pt_regs *regs)
+{
+	if (system_uses_irq_prio_masking())
+		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+	el0_svc_compat_handler(regs);
+}
+NOKPROBE_SYMBOL(el0_svc_compat);
+
+asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
+{
+	unsigned long esr = read_sysreg(esr_el1);
+
+	switch (ESR_ELx_EC(esr)) {
+	case ESR_ELx_EC_SVC32:
+		el0_svc_compat(regs);
+		break;
+	case ESR_ELx_EC_DABT_LOW:
+		el0_da(regs, esr);
+		break;
+	case ESR_ELx_EC_IABT_LOW:
+		el0_ia(regs, esr);
+		break;
+	case ESR_ELx_EC_FP_ASIMD:
+		el0_fpsimd_acc(regs, esr);
+		break;
+	case ESR_ELx_EC_FP_EXC32:
+		el0_fpsimd_exc(regs, esr);
+		break;
+	case ESR_ELx_EC_PC_ALIGN:
+		el0_pc(regs, esr);
+		break;
+	case ESR_ELx_EC_UNKNOWN:
+	case ESR_ELx_EC_CP14_MR:
+	case ESR_ELx_EC_CP14_LS:
+	case ESR_ELx_EC_CP14_64:
+		el0_undef(regs);
+		break;
+	case ESR_ELx_EC_CP15_32:
+	case ESR_ELx_EC_CP15_64:
+		el0_cp15(regs, esr);
+		break;
+	case ESR_ELx_EC_BREAKPT_LOW:
+	case ESR_ELx_EC_SOFTSTP_LOW:
+	case ESR_ELx_EC_WATCHPT_LOW:
+	case ESR_ELx_EC_BKPT32:
+		el0_dbg(regs, esr);
+		break;
+	default:
+		el0_inv(regs, esr);
+	}
+}
+NOKPROBE_SYMBOL(el0_sync_compat_handler);
+#endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 5d7f42eb0e89..15822a0fe37f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -647,71 +647,18 @@ ENDPROC(el1_irq)
 	.align	6
 el0_sync:
 	kernel_entry 0
-	mrs	x25, esr_el1			// read the syndrome register
-	lsr	x24, x25, #ESR_ELx_EC_SHIFT	// exception class
-	cmp	x24, #ESR_ELx_EC_SVC64		// SVC in 64-bit state
-	b.eq	el0_svc
-	cmp	x24, #ESR_ELx_EC_DABT_LOW	// data abort in EL0
-	b.eq	el0_da
-	cmp	x24, #ESR_ELx_EC_IABT_LOW	// instruction abort in EL0
-	b.eq	el0_ia
-	cmp	x24, #ESR_ELx_EC_FP_ASIMD	// FP/ASIMD access
-	b.eq	el0_fpsimd_acc
-	cmp	x24, #ESR_ELx_EC_SVE		// SVE access
-	b.eq	el0_sve_acc
-	cmp	x24, #ESR_ELx_EC_FP_EXC64	// FP/ASIMD exception
-	b.eq	el0_fpsimd_exc
-	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
-	ccmp	x24, #ESR_ELx_EC_WFx, #4, ne
-	b.eq	el0_sys
-	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
-	b.eq	el0_sp
-	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el0_pc
-	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_BREAKPT_LOW	// debug exception in EL0
-	b.ge	el0_dbg
-	b	el0_inv
+	mov	x0, sp
+	bl	el0_sync_handler
+	b	ret_to_user
 
 #ifdef CONFIG_COMPAT
 	.align	6
 el0_sync_compat:
 	kernel_entry 0, 32
-	mrs	x25, esr_el1			// read the syndrome register
-	lsr	x24, x25, #ESR_ELx_EC_SHIFT	// exception class
-	cmp	x24, #ESR_ELx_EC_SVC32		// SVC in 32-bit state
-	b.eq	el0_svc_compat
-	cmp	x24, #ESR_ELx_EC_DABT_LOW	// data abort in EL0
-	b.eq	el0_da
-	cmp	x24, #ESR_ELx_EC_IABT_LOW	// instruction abort in EL0
-	b.eq	el0_ia
-	cmp	x24, #ESR_ELx_EC_FP_ASIMD	// FP/ASIMD access
-	b.eq	el0_fpsimd_acc
-	cmp	x24, #ESR_ELx_EC_FP_EXC32	// FP/ASIMD exception
-	b.eq	el0_fpsimd_exc
-	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el0_pc
-	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_CP15_32	// CP15 MRC/MCR trap
-	b.eq	el0_cp15
-	cmp	x24, #ESR_ELx_EC_CP15_64	// CP15 MRRC/MCRR trap
-	b.eq	el0_cp15
-	cmp	x24, #ESR_ELx_EC_CP14_MR	// CP14 MRC/MCR trap
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_CP14_LS	// CP14 LDC/STC trap
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_CP14_64	// CP14 MRRC/MCRR trap
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_BREAKPT_LOW	// debug exception in EL0
-	b.ge	el0_dbg
-	b	el0_inv
-el0_svc_compat:
-	gic_prio_kentry_setup tmp=x1
 	mov	x0, sp
-	bl	el0_svc_compat_handler
+	bl	el0_sync_compat_handler
 	b	ret_to_user
+ENDPROC(el0_sync)
 
 	.align	6
 el0_irq_compat:
@@ -721,139 +668,7 @@ el0_irq_compat:
 el0_error_compat:
 	kernel_entry 0, 32
 	b	el0_error_naked
-
-el0_cp15:
-	/*
-	 * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_cp15instr
-	b	ret_to_user
-#endif
-
-el0_da:
-	/*
-	 * Data abort handling
-	 */
-	mrs	x26, far_el1
-	ct_user_exit_irqoff
-	enable_daif
-	clear_address_tag x0, x26
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_mem_abort
-	b	ret_to_user
-el0_ia:
-	/*
-	 * Instruction abort handling
-	 */
-	mrs	x26, far_el1
-	gic_prio_kentry_setup tmp=x0
-	ct_user_exit_irqoff
-	enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-	mov	x0, x26
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_el0_ia_bp_hardening
-	b	ret_to_user
-el0_fpsimd_acc:
-	/*
-	 * Floating Point or Advanced SIMD access
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_fpsimd_acc
-	b	ret_to_user
-el0_sve_acc:
-	/*
-	 * Scalable Vector Extension access
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_sve_acc
-	b	ret_to_user
-el0_fpsimd_exc:
-	/*
-	 * Floating Point, Advanced SIMD or SVE exception
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_fpsimd_exc
-	b	ret_to_user
-el0_sp:
-	ldr	x26, [sp, #S_SP]
-	b	el0_sp_pc
-el0_pc:
-	mrs	x26, far_el1
-el0_sp_pc:
-	/*
-	 * Stack or PC alignment exception handling
-	 */
-	gic_prio_kentry_setup tmp=x0
-	ct_user_exit_irqoff
-	enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
 #endif
-	mov	x0, x26
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_sp_pc_abort
-	b	ret_to_user
-el0_undef:
-	/*
-	 * Undefined instruction
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, sp
-	bl	do_undefinstr
-	b	ret_to_user
-el0_sys:
-	/*
-	 * System instructions, for trapped cache maintenance instructions
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_sysinstr
-	b	ret_to_user
-el0_dbg:
-	/*
-	 * Debug exception handling
-	 */
-	tbnz	x24, #0, el0_inv		// EL0 only
-	mrs	x24, far_el1
-	gic_prio_kentry_setup tmp=x3
-	ct_user_exit_irqoff
-	mov	x0, x24
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_debug_exception
-	enable_da_f
-	b	ret_to_user
-el0_inv:
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, sp
-	mov	x1, #BAD_SYNC
-	mov	x2, x25
-	bl	bad_el0_sync
-	b	ret_to_user
-ENDPROC(el0_sync)
 
 	.align	6
 el0_irq:
@@ -932,17 +747,6 @@ finish_ret_to_user:
 	kernel_exit 0
 ENDPROC(ret_to_user)
 
-/*
- * SVC handler.
- */
-	.align	6
-el0_svc:
-	gic_prio_kentry_setup tmp=x1
-	mov	x0, sp
-	bl	el0_svc_handler
-	b	ret_to_user
-ENDPROC(el0_svc)
-
 	.popsection				// .entry.text
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-- 
cgit v1.2.3


From afa7c0e5b965cdb945ad8a2e2973c6d7e19969f9 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 25 Oct 2019 17:42:15 +0100
Subject: arm64: Remove asmlinkage from updated functions

Now that the callers of these functions have moved into C, they no longer
need the asmlinkage annotation. Remove it.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/exception.h | 36 ++++++++++++++++--------------------
 arch/arm64/kernel/fpsimd.c         |  6 +++---
 arch/arm64/kernel/syscall.c        |  4 ++--
 arch/arm64/kernel/traps.c          |  8 ++++----
 arch/arm64/mm/fault.c              | 16 +++++++---------
 5 files changed, 32 insertions(+), 38 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index a9e376623ecf..4d5f3b5f50cd 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -32,26 +32,22 @@ static inline u32 disr_to_esr(u64 disr)
 }
 
 asmlinkage void enter_from_user_mode(void);
-asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr,
-			     struct pt_regs *regs);
-asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr,
-			       struct pt_regs *regs);
-asmlinkage void do_undefinstr(struct pt_regs *regs);
+void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_undefinstr(struct pt_regs *regs);
 asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
-asmlinkage void do_debug_exception(unsigned long addr_if_watchpoint,
-				   unsigned int esr, struct pt_regs *regs);
-asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
-asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs);
-asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
-asmlinkage void do_sysinstr(unsigned int esr, struct pt_regs *regs);
-asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr,
-			       struct pt_regs *regs);
-asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason,
-			     unsigned int esr);
-asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs);
-asmlinkage void el0_svc_handler(struct pt_regs *regs);
-asmlinkage void el0_svc_compat_handler(struct pt_regs *regs);
-asmlinkage void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
-				       struct pt_regs *regs);
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+			struct pt_regs *regs);
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
+void do_sve_acc(unsigned int esr, struct pt_regs *regs);
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
+void do_sysinstr(unsigned int esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
+void do_cp15instr(unsigned int esr, struct pt_regs *regs);
+void el0_svc_handler(struct pt_regs *regs);
+void el0_svc_compat_handler(struct pt_regs *regs);
+void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
+			    struct pt_regs *regs);
 
 #endif	/* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 37d3912cfe06..3eb338f14386 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -920,7 +920,7 @@ void fpsimd_release_task(struct task_struct *dead_task)
  * would have disabled the SVE access trap for userspace during
  * ret_to_user, making an SVE access trap impossible in that case.
  */
-asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 {
 	/* Even if we chose not to use SVE, the hardware could still trap: */
 	if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
@@ -947,7 +947,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 /*
  * Trapped FP/ASIMD access.
  */
-asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
 {
 	/* TODO: implement lazy context saving/restoring */
 	WARN_ON(1);
@@ -956,7 +956,7 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
 /*
  * Raise a SIGFPE for the current process.
  */
-asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
 {
 	unsigned int si_code = FPE_FLTUNK;
 
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 871c739f060a..9a9d98a443fc 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -154,14 +154,14 @@ static inline void sve_user_discard(void)
 	sve_user_disable();
 }
 
-asmlinkage void el0_svc_handler(struct pt_regs *regs)
+void el0_svc_handler(struct pt_regs *regs)
 {
 	sve_user_discard();
 	el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
 }
 
 #ifdef CONFIG_COMPAT
-asmlinkage void el0_svc_compat_handler(struct pt_regs *regs)
+void el0_svc_compat_handler(struct pt_regs *regs)
 {
 	el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls,
 		       compat_sys_call_table);
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index ba1a571a7774..54ebe24ef4b1 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -394,7 +394,7 @@ void arm64_notify_segfault(unsigned long addr)
 	force_signal_inject(SIGSEGV, code, addr);
 }
 
-asmlinkage void do_undefinstr(struct pt_regs *regs)
+void do_undefinstr(struct pt_regs *regs)
 {
 	/* check for AArch32 breakpoint instructions */
 	if (!aarch32_break_handler(regs))
@@ -669,7 +669,7 @@ static const struct sys64_hook cp15_64_hooks[] = {
 	{},
 };
 
-asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs)
+void do_cp15instr(unsigned int esr, struct pt_regs *regs)
 {
 	const struct sys64_hook *hook, *hook_base;
 
@@ -710,7 +710,7 @@ asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs)
 NOKPROBE_SYMBOL(do_cp15instr);
 #endif
 
-asmlinkage void do_sysinstr(unsigned int esr, struct pt_regs *regs)
+void do_sysinstr(unsigned int esr, struct pt_regs *regs)
 {
 	const struct sys64_hook *hook;
 
@@ -797,7 +797,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
  * bad_el0_sync handles unexpected, but potentially recoverable synchronous
  * exceptions taken from EL0. Unlike bad_mode, this returns.
  */
-asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 {
 	void __user *pc = (void __user *)instruction_pointer(regs);
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 844cd2535826..cb13f4daa878 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -733,8 +733,7 @@ static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 63"			},
 };
 
-asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr,
-			     struct pt_regs *regs)
+void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
 	const struct fault_info *inf = esr_to_fault_info(esr);
 
@@ -752,15 +751,15 @@ asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr,
 }
 NOKPROBE_SYMBOL(do_mem_abort);
 
-asmlinkage void do_el0_irq_bp_hardening(void)
+void do_el0_irq_bp_hardening(void)
 {
 	/* PC has already been checked in entry.S */
 	arm64_apply_bp_hardening();
 }
 NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
 
-asmlinkage void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
-				       struct pt_regs *regs)
+void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
+			    struct pt_regs *regs)
 {
 	/*
 	 * We've taken an instruction abort from userspace and not yet
@@ -775,8 +774,7 @@ asmlinkage void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
 }
 NOKPROBE_SYMBOL(do_el0_ia_bp_hardening);
 
-asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr,
-			       struct pt_regs *regs)
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
 		if (!is_ttbr0_addr(instruction_pointer(regs)))
@@ -896,8 +894,8 @@ static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 #endif /* CONFIG_ARM64_ERRATUM_1463225 */
 NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler);
 
-asmlinkage void do_debug_exception(unsigned long addr_if_watchpoint,
-				   unsigned int esr, struct pt_regs *regs)
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+			struct pt_regs *regs)
 {
 	const struct fault_info *inf = esr_to_debug_fault_info(esr);
 	unsigned long pc = instruction_pointer(regs);
-- 
cgit v1.2.3


From bfe298745afc9548ad9344a9a3f26c81fd1a76c4 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 25 Oct 2019 17:42:16 +0100
Subject: arm64: entry-common: don't touch daif before bp-hardening

The previous patches mechanically transformed the assembly version of
entry.S to entry-common.c for synchronous exceptions.

The C version of local_daif_restore() doesn't quite do the same thing
as the assembly versions if pseudo-NMI is in use. In particular,
| local_daif_restore(DAIF_PROCCTX_NOIRQ)
will still allow pNMI to be delivered. This is not the behaviour
do_el0_ia_bp_hardening() and do_sp_pc_abort() want as it should not
be possible for the PMU handler to run as an NMI until the bp-hardening
sequence has run.

The bp-hardening calls were placed where they are because this was the
first C code to run after the relevant exceptions. As we've now moved
that point earlier, move the checks and calls earlier too.

This makes it clearer that this stuff runs before any kind of exception,
and saves modifying PSTATE twice.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/processor.h | 14 ++++++++++++++
 arch/arm64/kernel/entry-common.c   | 18 +++++++++++++++---
 arch/arm64/mm/fault.c              | 36 +-----------------------------------
 3 files changed, 30 insertions(+), 38 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5623685c7d13..8899d26f73ff 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -26,10 +26,12 @@
 #include <linux/init.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <linux/thread_info.h>
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/kasan.h>
 #include <asm/lse.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pointer_auth.h>
@@ -214,6 +216,18 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
 	regs->sp = sp;
 }
 
+static inline bool is_ttbr0_addr(unsigned long addr)
+{
+	/* entry assembly clears tags for TTBR0 addrs */
+	return addr < TASK_SIZE;
+}
+
+static inline bool is_ttbr1_addr(unsigned long addr)
+{
+	/* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
+	return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+}
+
 #ifdef CONFIG_COMPAT
 static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
 				       unsigned long sp)
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 2c318e41d84b..5dce5e56995a 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -14,6 +14,7 @@
 #include <asm/esr.h>
 #include <asm/exception.h>
 #include <asm/kprobes.h>
+#include <asm/mmu.h>
 #include <asm/sysreg.h>
 
 static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
@@ -112,9 +113,17 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
+	/*
+	 * We've taken an instruction abort from userspace and not yet
+	 * re-enabled IRQs. If the address is a kernel address, apply
+	 * BP hardening prior to enabling IRQs and pre-emption.
+	 */
+	if (!is_ttbr0_addr(far))
+		arm64_apply_bp_hardening();
+
 	user_exit_irqoff();
-	local_daif_restore(DAIF_PROCCTX_NOIRQ);
-	do_el0_ia_bp_hardening(far, esr, regs);
+	local_daif_restore(DAIF_PROCCTX);
+	do_mem_abort(far, esr, regs);
 }
 NOKPROBE_SYMBOL(el0_ia);
 
@@ -154,8 +163,11 @@ static void notrace el0_pc(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
+	if (!is_ttbr0_addr(instruction_pointer(regs)))
+		arm64_apply_bp_hardening();
+
 	user_exit_irqoff();
-	local_daif_restore(DAIF_PROCCTX_NOIRQ);
+	local_daif_restore(DAIF_PROCCTX);
 	do_sp_pc_abort(far, esr, regs);
 }
 NOKPROBE_SYMBOL(el0_pc);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index cb13f4daa878..1bb2e3737e51 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -32,8 +32,8 @@
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
-#include <asm/kasan.h>
 #include <asm/kprobes.h>
+#include <asm/processor.h>
 #include <asm/sysreg.h>
 #include <asm/system_misc.h>
 #include <asm/pgtable.h>
@@ -102,18 +102,6 @@ static void mem_abort_decode(unsigned int esr)
 		data_abort_decode(esr);
 }
 
-static inline bool is_ttbr0_addr(unsigned long addr)
-{
-	/* entry assembly clears tags for TTBR0 addrs */
-	return addr < TASK_SIZE;
-}
-
-static inline bool is_ttbr1_addr(unsigned long addr)
-{
-	/* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
-	return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
-}
-
 static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm)
 {
 	/* Either init_pg_dir or swapper_pg_dir */
@@ -758,30 +746,8 @@ void do_el0_irq_bp_hardening(void)
 }
 NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
 
-void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr,
-			    struct pt_regs *regs)
-{
-	/*
-	 * We've taken an instruction abort from userspace and not yet
-	 * re-enabled IRQs. If the address is a kernel address, apply
-	 * BP hardening prior to enabling IRQs and pre-emption.
-	 */
-	if (!is_ttbr0_addr(addr))
-		arm64_apply_bp_hardening();
-
-	local_daif_restore(DAIF_PROCCTX);
-	do_mem_abort(addr, esr, regs);
-}
-NOKPROBE_SYMBOL(do_el0_ia_bp_hardening);
-
 void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
-	if (user_mode(regs)) {
-		if (!is_ttbr0_addr(instruction_pointer(regs)))
-			arm64_apply_bp_hardening();
-		local_daif_restore(DAIF_PROCCTX);
-	}
-
 	arm64_notify_die("SP/PC alignment exception", regs,
 			 SIGBUS, BUS_ADRALN, (void __user *)addr, esr);
 }
-- 
cgit v1.2.3


From 01d035d796fec0ab23dc3f3a3a9f58bbe034fc5b Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Sun, 27 Oct 2019 09:19:50 +0100
Subject: KVM: arm/arm64: Show halt poll counters in debugfs

ARM/ARM64 has counters halt_successful_poll, halt_attempted_poll,
halt_poll_invalid, and halt_wakeup but never exposed those in debugfs.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1572164390-5851-1-git-send-email-borntraeger@de.ibm.com
---
 arch/arm/kvm/guest.c   | 4 ++++
 arch/arm64/kvm/guest.c | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 684cf64b4033..66964642cd42 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -21,6 +21,10 @@
 #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	VCPU_STAT(halt_successful_poll),
+	VCPU_STAT(halt_attempted_poll),
+	VCPU_STAT(halt_poll_invalid),
+	VCPU_STAT(halt_wakeup),
 	VCPU_STAT(hvc_exit_stat),
 	VCPU_STAT(wfe_exit_stat),
 	VCPU_STAT(wfi_exit_stat),
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index dfd626447482..260ea3158682 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -34,6 +34,10 @@
 #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	VCPU_STAT(halt_successful_poll),
+	VCPU_STAT(halt_attempted_poll),
+	VCPU_STAT(halt_poll_invalid),
+	VCPU_STAT(halt_wakeup),
 	VCPU_STAT(hvc_exit_stat),
 	VCPU_STAT(wfe_exit_stat),
 	VCPU_STAT(wfi_exit_stat),
-- 
cgit v1.2.3


From 5c401308017f256ae9de804b4a1c65be1d390571 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 28 Oct 2019 14:05:41 +0100
Subject: KVM: arm64: Don't set HCR_EL2.TVM when S2FWB is supported

On CPUs that support S2FWB (Armv8.4+), KVM configures the stage 2 page
tables to override the memory attributes of memory accesses, regardless
of the stage 1 page table configurations, and also when the stage 1 MMU
is turned off.  This results in all memory accesses to RAM being
cacheable, including during early boot of the guest.

On CPUs without this feature, memory accesses were non-cacheable during
boot until the guest turned on the stage 1 MMU, and we had to detect
when the guest turned on the MMU, such that we could invalidate all cache
entries and ensure a consistent view of memory with the MMU turned on.
When the guest turned on the caches, we would call stage2_flush_vm()
from kvm_toggle_cache().

However, stage2_flush_vm() walks all the stage 2 tables, and calls
__kvm_flush-dcache_pte, which on a system with S2FWB does ... absolutely
nothing.

We can avoid that whole song and dance, and simply not set TVM when
creating a VM on a system that has S2FWB.

Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20191028130541.30536-1-christoffer.dall@arm.com
---
 arch/arm64/include/asm/kvm_arm.h     |  3 +--
 arch/arm64/include/asm/kvm_emulate.h | 12 +++++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ddf9d762ac62..6e5d839f42b5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -61,7 +61,6 @@
  * RW:		64bit by default, can be overridden for 32bit VMs
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
- * TVM:		Trap VM ops (until M+C set in SCTLR_EL1)
  * TSW:		Trap cache operations by set/way
  * TWE:		Trap WFE
  * TWI:		Trap WFI
@@ -74,7 +73,7 @@
  * SWIO:	Turn set/way invalidates into set/way clean+invalidate
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
-			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_BSU_IS | HCR_FB | HCR_TAC | \
 			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
 			 HCR_FMO | HCR_IMO)
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index d69c1efc63e7..6e92f6c7b1e4 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -53,8 +53,18 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 		/* trap error record accesses */
 		vcpu->arch.hcr_el2 |= HCR_TERR;
 	}
-	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+
+	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
 		vcpu->arch.hcr_el2 |= HCR_FWB;
+	} else {
+		/*
+		 * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+		 * get set in SCTLR_EL1 such that we can detect when the guest
+		 * MMU gets turned on and do the necessary cache maintenance
+		 * then.
+		 */
+		vcpu->arch.hcr_el2 |= HCR_TVM;
+	}
 
 	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
 		vcpu->arch.hcr_el2 &= ~HCR_RW;
-- 
cgit v1.2.3


From 4686da5140c18c84ca01a8ab994571d832c63398 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 28 Oct 2019 16:45:07 +0000
Subject: arm64: Make arm64_dma32_phys_limit static

This variable is only used in the arch/arm64/mm/init.c file for
ZONE_DMA32 initialisation, no need to expose it.

Reported-by: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 71b45c58218b..39fc69873b18 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -63,7 +63,7 @@ EXPORT_SYMBOL(vmemmap);
  * bit addressable memory area.
  */
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
-phys_addr_t arm64_dma32_phys_limit __ro_after_init;
+static phys_addr_t arm64_dma32_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
 /*
-- 
cgit v1.2.3


From e44ec4a35dbdf3f3fe772f176fab3b8be7e02b0f Mon Sep 17 00:00:00 2001
From: Xiang Zheng <zhengxiang9@huawei.com>
Date: Tue, 29 Oct 2019 20:41:31 +0800
Subject: arm64: print additional fault message when executing non-exec memory

When attempting to executing non-executable memory, the fault message
shows:

  Unable to handle kernel read from unreadable memory at virtual address
  ffff802dac469000

This may confuse someone, so add a new fault message for instruction
abort.

Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Xiang Zheng <zhengxiang9@huawei.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/fault.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 855f2a7954e6..d46a2bb90f54 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -314,6 +314,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
 	if (is_el1_permission_fault(addr, esr, regs)) {
 		if (esr & ESR_ELx_WNR)
 			msg = "write to read-only memory";
+		else if (is_el1_instruction_abort(esr))
+			msg = "execute from non-executable memory";
 		else
 			msg = "read from unreadable memory";
 	} else if (addr < PAGE_SIZE) {
-- 
cgit v1.2.3


From 6a3035dac6506bc6da40e391803fba50bb2dce9d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 18 Oct 2019 13:23:09 +0200
Subject: s390/bug: make use of asm_inline

This is the s390 version of commit 32ee8230b2b0 ("x86: bug.h: use
asm_inline in _BUG_FLAGS definitions").

See commit eb111869301e ("compiler-types.h: add asm_inline
definition") for more details.

Just like on x86 the .text section size decreases a bit while the
.data section size increases about the same amount (gcc 9.2 with
defconfig).

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/bug.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 713fc9735ffb..a2b11ac00f60 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -9,7 +9,7 @@
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 
 #define __EMIT_BUG(x) do {					\
-	asm volatile(						\
+	asm_inline volatile(					\
 		"0:	j	0b+2\n"				\
 		"1:\n"						\
 		".section .rodata.str,\"aMS\",@progbits,1\n"	\
@@ -28,7 +28,7 @@
 #else /* CONFIG_DEBUG_BUGVERBOSE */
 
 #define __EMIT_BUG(x) do {					\
-	asm volatile(						\
+	asm_inline volatile(					\
 		"0:	j	0b+2\n"				\
 		"1:\n"						\
 		".section __bug_table,\"awM\",@progbits,%1\n"	\
-- 
cgit v1.2.3


From cceb018377a123dd77d3239bbdbfbdf50f0d6a71 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 18 Oct 2019 13:23:16 +0200
Subject: s390/alternatives: make use of asm_inline

This is the s390 version of commit 40576e5e63ea ("x86: alternative.h:
use asm_inline for all alternative variants").

See commit eb111869301e ("compiler-types.h: add asm_inline
definition") for more details.

With this change the compiler will not generate many out-of-line
versions for the three instruction sized arch_spin_unlock() function
anymore. Due to this gcc seems to change a lot of other inline
decisions which results in a net 6k text size growth according to
bloat-o-meter (gcc 9.2 with defconfig).
But that's still better than having many out-of-line versions of
arch_spin_unlock().

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/alternative.h | 4 ++--
 arch/s390/include/asm/spinlock.h    | 2 +-
 arch/s390/lib/spinlock.c            | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index c2cf7bcdef9b..1c8a38f762a3 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -139,10 +139,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
  * without volatile and memory clobber.
  */
 #define alternative(oldinstr, altinstr, facility)			\
-	asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+	asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
 
 #define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
-	asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1,	    \
+	asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1,   \
 				   altinstr2, facility2) ::: "memory")
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index c02bff33f6c7..3a37172d5398 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -85,7 +85,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
 static inline void arch_spin_unlock(arch_spinlock_t *lp)
 {
 	typecheck(int, lp->lock);
-	asm volatile(
+	asm_inline volatile(
 		ALTERNATIVE("", ".long 0xb2fa0070", 49)	/* NIAI 7 */
 		"	sth	%1,%0\n"
 		: "=Q" (((unsigned short *) &lp->lock)[1])
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 30a7c8c29964..ce1e4bbe53aa 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -74,7 +74,7 @@ static inline int arch_load_niai4(int *lock)
 {
 	int owner;
 
-	asm volatile(
+	asm_inline volatile(
 		ALTERNATIVE("", ".long 0xb2fa0040", 49)	/* NIAI 4 */
 		"	l	%0,%1\n"
 		: "=d" (owner) : "Q" (*lock) : "memory");
@@ -85,7 +85,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
 {
 	int expected = old;
 
-	asm volatile(
+	asm_inline volatile(
 		ALTERNATIVE("", ".long 0xb2fa0080", 49)	/* NIAI 8 */
 		"	cs	%0,%3,%1\n"
 		: "=d" (old), "=Q" (*lock)
-- 
cgit v1.2.3


From 4f84b383511de8dabc9bed2b98d2918830723527 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 12 Aug 2019 14:50:34 -0700
Subject: s390/boot: fix section name escaping

GCC unescapes escaped string section names while Clang does not. Because
__section uses the `#` stringification operator for the section name, it
doesn't need to be escaped.

This antipattern was found with:
$ grep -e __section\(\" -e __section__\(\" -r

Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Message-Id: <20190812215052.71840-1-ndesaulniers@google.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/boot/startup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 596ca7cc4d7b..1a41545becec 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -46,7 +46,7 @@ struct diag_ops __bootdata_preserved(diag_dma_ops) = {
 	.diag0c = _diag0c_dma,
 	.diag308_reset = _diag308_reset_dma
 };
-static struct diag210 _diag210_tmp_dma __section(".dma.data");
+static struct diag210 _diag210_tmp_dma __section(.dma.data);
 struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
 void _swsusp_reset_dma(void);
 unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma);
-- 
cgit v1.2.3


From 1917b47dc23f0ace5893971d83642c2cf22f5cb7 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 23 Oct 2019 10:03:23 +0200
Subject: s390/qdio: move SSQD Sniffer mask definition

Put the Sniffer bit next to all the other CHSC AC2 bits.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Steffen Maier <maier@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/qdio.h   | 1 +
 drivers/s390/net/qeth_l3.h     | 2 --
 drivers/s390/net/qeth_l3_sys.c | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index e3f238e8c611..71e3f0146cda 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -276,6 +276,7 @@ struct qdio_outbuf_state {
 #define CHSC_AC2_MULTI_BUFFER_AVAILABLE	0x0080
 #define CHSC_AC2_MULTI_BUFFER_ENABLED	0x0040
 #define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
+#define CHSC_AC2_SNIFFER_AVAILABLE	0x0008
 #define CHSC_AC2_DATA_DIV_ENABLED	0x0002
 
 #define CHSC_AC3_FORMAT2_CQ_AVAILABLE	0x8000
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 87659cfc9066..2b9302cd0117 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -13,8 +13,6 @@
 #include "qeth_core.h"
 #include <linux/hashtable.h>
 
-#define QETH_SNIFF_AVAIL	0x0008
-
 enum qeth_ip_types {
 	QETH_IP_TYPE_NORMAL,
 	QETH_IP_TYPE_VIPA,
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index 2f73b33c9347..c6204520114e 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -228,7 +228,7 @@ static ssize_t qeth_l3_dev_sniffer_store(struct device *dev,
 		break;
 	case 1:
 		qdio_get_ssqd_desc(CARD_DDEV(card), &card->ssqd);
-		if (card->ssqd.qdioac2 & QETH_SNIFF_AVAIL) {
+		if (card->ssqd.qdioac2 & CHSC_AC2_SNIFFER_AVAILABLE) {
 			card->options.sniffer = i;
 			if (card->qdio.init_pool.buf_count !=
 					QETH_IN_BUF_COUNT_MAX)
-- 
cgit v1.2.3


From f653e29bc2d94ae6b6f470145c630d1ba0d856d2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 28 Oct 2019 10:11:41 +0100
Subject: s390/time: remove monotonic_clock()

Remove unused monotonic_clock() function.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/timex.h | 1 -
 arch/s390/kernel/time.c       | 9 ---------
 2 files changed, 10 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 64539c221672..0f5320617cc4 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -179,7 +179,6 @@ static inline cycles_t get_cycles(void)
 
 int get_phys_clock(unsigned long *clock);
 void init_cpu_timer(void);
-unsigned long long monotonic_clock(void);
 
 extern unsigned char tod_clock_base[16] __aligned(8);
 
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e8766beee5ad..f9d070d016e3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -110,15 +110,6 @@ unsigned long long notrace sched_clock(void)
 }
 NOKPROBE_SYMBOL(sched_clock);
 
-/*
- * Monotonic_clock - returns # of nanoseconds passed since time_init()
- */
-unsigned long long monotonic_clock(void)
-{
-	return sched_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
 static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
 {
 	unsigned long long high, low, rem, sec, nsec;
-- 
cgit v1.2.3


From d3baaeb5ae0870973bfe0f6aaed887b894d5e560 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Tue, 22 Oct 2019 17:37:51 +0200
Subject: s390: avoid double handling of "noexec" option

"noexec" option is already parsed during startup and its value is
exposed via noexec_disabled variable. Simply reuse that value during
machine facilities detection.

Suggested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/early.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index b432d63d0b37..2e99e01e4f62 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -238,7 +238,7 @@ static __init void detect_machine_facilities(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
 		__ctl_set_bit(0, 17);
 	}
-	if (test_facility(130)) {
+	if (test_facility(130) && !noexec_disabled) {
 		S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
 		__ctl_set_bit(0, 20);
 	}
@@ -268,21 +268,6 @@ static int __init disable_vector_extension(char *str)
 }
 early_param("novx", disable_vector_extension);
 
-static int __init noexec_setup(char *str)
-{
-	bool enabled;
-	int rc;
-
-	rc = kstrtobool(str, &enabled);
-	if (!rc && !enabled) {
-		/* Disable no-execute support */
-		S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX;
-		__ctl_clear_bit(0, 20);
-	}
-	return rc;
-}
-early_param("noexec", noexec_setup);
-
 static int __init cad_setup(char *str)
 {
 	bool enabled;
-- 
cgit v1.2.3


From ea3f6dcfa71afb51753ad3729c29570f90d6abbc Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Wed, 2 Oct 2019 12:56:27 +0200
Subject: s390/unwind: fix get_stack_pointer(NULL, NULL)

unwind_for_each_frame(NULL, NULL, 0) does not return any valid frames.

The reason is that get_stack_pointer, unlike get_stack_info and
show_stack, does not handle NULL argument.

Fix by making get_stack_pointer treat NULL as current, like
get_stack_info and show_stack do.

Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Tested-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/stacktrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 0ae4bbf7779c..fee40212af11 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -38,7 +38,7 @@ static inline unsigned long get_stack_pointer(struct task_struct *task,
 {
 	if (regs)
 		return (unsigned long) kernel_stack_pointer(regs);
-	if (task == current)
+	if (!task || task == current)
 		return current_stack_pointer();
 	return (unsigned long) task->thread.ksp;
 }
-- 
cgit v1.2.3


From 6756dd9b890fe50c01a6e7546bd498d57ddb98ae Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 28 Oct 2019 15:17:42 +0100
Subject: s390/process: avoid custom stack unwinding in get_wchan

Currently get_wchan uses custom stack unwinding implementation which
relies on back_chain presence. Replace it with more abstract stack
unwinding api usage.

Suggested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/process.c | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index b0afec673f77..6ccef5f29761 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -40,6 +40,7 @@
 #include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 #include <asm/runtime_instr.h>
+#include <asm/unwind.h>
 #include "entry.h"
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -178,9 +179,8 @@ EXPORT_SYMBOL(dump_fpu);
 
 unsigned long get_wchan(struct task_struct *p)
 {
-	struct stack_frame *sf, *low, *high;
-	unsigned long return_address;
-	int count;
+	struct unwind_state state;
+	unsigned long ip = 0;
 
 	if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
 		return 0;
@@ -188,26 +188,22 @@ unsigned long get_wchan(struct task_struct *p)
 	if (!try_get_task_stack(p))
 		return 0;
 
-	low = task_stack_page(p);
-	high = (struct stack_frame *) task_pt_regs(p);
-	sf = (struct stack_frame *) p->thread.ksp;
-	if (sf <= low || sf > high) {
-		return_address = 0;
-		goto out;
-	}
-	for (count = 0; count < 16; count++) {
-		sf = (struct stack_frame *)READ_ONCE_NOCHECK(sf->back_chain);
-		if (sf <= low || sf > high) {
-			return_address = 0;
-			goto out;
+	unwind_for_each_frame(&state, p, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK) {
+			ip = 0;
+			break;
 		}
-		return_address = READ_ONCE_NOCHECK(sf->gprs[8]);
-		if (!in_sched_functions(return_address))
-			goto out;
+
+		ip = unwind_get_return_address(&state);
+		if (!ip)
+			break;
+
+		if (!in_sched_functions(ip))
+			break;
 	}
-out:
+
 	put_task_stack(p);
-	return return_address;
+	return ip;
 }
 
 unsigned long arch_align_stack(unsigned long sp)
-- 
cgit v1.2.3


From 011620688a71f2f1fe9901dbc2479a7c01053196 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 29 Oct 2019 14:09:47 +0100
Subject: s390/time: ensure get_clock_monotonic() returns monotonic values

The current implementation of get_clock_monotonic() leaves it up to
the caller to call the function with preemption disabled. The only
core kernel caller (sched_clock) however does not disable preemption.

In order to make sure that all callers of this function see monotonic
values handle disabling preemption within the function itself.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/timex.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 0f5320617cc4..6da8885251d6 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -10,8 +10,9 @@
 #ifndef _ASM_S390_TIMEX_H
 #define _ASM_S390_TIMEX_H
 
-#include <asm/lowcore.h>
+#include <linux/preempt.h>
 #include <linux/time64.h>
+#include <asm/lowcore.h>
 
 /* The value of the TOD clock for 1.1.1970. */
 #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -185,15 +186,18 @@ extern unsigned char tod_clock_base[16] __aligned(8);
 /**
  * get_clock_monotonic - returns current time in clock rate units
  *
- * The caller must ensure that preemption is disabled.
  * The clock and tod_clock_base get changed via stop_machine.
- * Therefore preemption must be disabled when calling this
- * function, otherwise the returned value is not guaranteed to
- * be monotonic.
+ * Therefore preemption must be disabled, otherwise the returned
+ * value is not guaranteed to be monotonic.
  */
 static inline unsigned long long get_tod_clock_monotonic(void)
 {
-	return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
+	unsigned long long tod;
+
+	preempt_disable();
+	tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
+	preempt_enable();
+	return tod;
 }
 
 /**
-- 
cgit v1.2.3


From 1c27a4bc817b89c0f97914ab93ab0bd74685d2f3 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Fri, 6 Sep 2019 22:00:50 +0200
Subject: s390/mm: make pmd/pud_bad() report large entries as bad

The semantics of pmd/pud_bad() expect that large entries are reported as
bad, but we also check large entries for sanity.

There is currently no issue with this wrong behaviour, but let's conform
to the semantics by reporting large pmd/pud entries as bad, in order to
prevent future issues.

This was found by testing a patch from from Anshuman Khandual, which is
currently discussed on LKML ("mm/debug: Add tests validating architecture
page table helpers").

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5ff98d76a66c..a1bf8d7cbe7f 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -266,11 +266,9 @@ static inline int is_module_addr(void *addr)
 #endif
 
 #define _REGION_ENTRY_BITS	 0xfffffffffffff22fUL
-#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
 
 /* Bits in the segment table entry */
 #define _SEGMENT_ENTRY_BITS			0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE		0xfffffffffff0ff33UL
 #define _SEGMENT_ENTRY_HARDWARE_BITS		0xfffffffffffffe30UL
 #define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE	0xfffffffffff00730UL
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
@@ -699,10 +697,8 @@ static inline int pmd_large(pmd_t pmd)
 
 static inline int pmd_bad(pmd_t pmd)
 {
-	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0)
+	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd))
 		return 1;
-	if (pmd_large(pmd))
-		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
 	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
 }
 
@@ -710,12 +706,10 @@ static inline int pud_bad(pud_t pud)
 {
 	unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
 
-	if (type > _REGION_ENTRY_TYPE_R3)
+	if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
 		return 1;
 	if (type < _REGION_ENTRY_TYPE_R3)
 		return 0;
-	if (pud_large(pud))
-		return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
 	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
 }
 
-- 
cgit v1.2.3


From 2d1fc1eb9b54564cec2dbe8cb1625e233fe49323 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Tue, 10 Sep 2019 19:22:09 +0200
Subject: s390/mm: simplify page table helpers for large entries

For pmds and puds, there are a couple of page table helper functions that
only make sense for large entries, like pxd_(mk)dirty/young/write etc.

We currently explicitly check if the entries are large, but in practice
those functions must never be used for normal entries, which point to lower
level page tables, so the code can be simplified.

This also fixes a theoretical bug, where common code could use one of the
functions before actually marking a pmd large, like this:

pmd = pmd_mkhuge(pmd_mkdirty(pmd))

With the current implementation, the resulting large pmd would not be dirty
as requested. This could in theory result in the loss of dirty information,
e.g. after collapsing into a transparent hugepage. Common code currently
always marks an entry large before using one of the functions, but there is
no hard requirement for this. The only requirement would be that it never
uses the functions for normal entries pointing to lower level page tables,
but they might be called before marking an entry large during its creation.

In order to avoid issues with future common code, and to simplify the page
table helpers, remove the checks for large entries and rely on common code
never using them for normal entries.

This was found by testing a patch from from Anshuman Khandual, which is
currently discussed on LKML ("mm/debug: Add tests validating architecture
page table helpers").

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 83 ++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 55 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index a1bf8d7cbe7f..cf19c0060816 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -752,18 +752,12 @@ static inline int pmd_write(pmd_t pmd)
 
 static inline int pmd_dirty(pmd_t pmd)
 {
-	int dirty = 1;
-	if (pmd_large(pmd))
-		dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
-	return dirty;
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
 }
 
 static inline int pmd_young(pmd_t pmd)
 {
-	int young = 1;
-	if (pmd_large(pmd))
-		young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
-	return young;
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
 }
 
 static inline int pte_present(pte_t pte)
@@ -1291,29 +1285,23 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
 static inline pmd_t pmd_mkwrite(pmd_t pmd)
 {
 	pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
-	if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
-		return pmd;
-	pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
 static inline pmd_t pmd_mkclean(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
-		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
-	}
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
+	pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
-				_SEGMENT_ENTRY_SOFT_DIRTY;
-		if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
-			pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
-	}
+	pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY;
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
@@ -1327,29 +1315,23 @@ static inline pud_t pud_wrprotect(pud_t pud)
 static inline pud_t pud_mkwrite(pud_t pud)
 {
 	pud_val(pud) |= _REGION3_ENTRY_WRITE;
-	if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
-		return pud;
-	pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+	if (pud_val(pud) & _REGION3_ENTRY_DIRTY)
+		pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
 	return pud;
 }
 
 static inline pud_t pud_mkclean(pud_t pud)
 {
-	if (pud_large(pud)) {
-		pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
-		pud_val(pud) |= _REGION_ENTRY_PROTECT;
-	}
+	pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
+	pud_val(pud) |= _REGION_ENTRY_PROTECT;
 	return pud;
 }
 
 static inline pud_t pud_mkdirty(pud_t pud)
 {
-	if (pud_large(pud)) {
-		pud_val(pud) |= _REGION3_ENTRY_DIRTY |
-				_REGION3_ENTRY_SOFT_DIRTY;
-		if (pud_val(pud) & _REGION3_ENTRY_WRITE)
-			pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
-	}
+	pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY;
+	if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+		pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
 	return pud;
 }
 
@@ -1373,38 +1355,29 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
 
 static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
-		if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
-			pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
-	}
+	pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
 	return pmd;
 }
 
 static inline pmd_t pmd_mkold(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
-		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
-	}
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+	pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
 	return pmd;
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
-			_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
-			_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
-		pmd_val(pmd) |= massage_pgprot_pmd(newprot);
-		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
-			pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
-		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
-			pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
-		return pmd;
-	}
-	pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
+	pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+		_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+		_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
 	pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
 	return pmd;
 }
 
-- 
cgit v1.2.3


From ab874f22d35a8058d8fdee5f13eb69d8867efeae Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Wed, 11 Sep 2019 19:42:23 +0200
Subject: s390/mm: properly clear _PAGE_NOEXEC bit when it is not supported

On older HW or under a hypervisor, w/o the instruction-execution-
protection (IEP) facility, and also w/o EDAT-1, a translation-specification
exception may be recognized when bit 55 of a pte is one (_PAGE_NOEXEC).

The current code tries to prevent setting _PAGE_NOEXEC in such cases,
by removing it within set_pte_at(). However, ptep_set_access_flags()
will modify a pte directly, w/o using set_pte_at(). There is at least
one scenario where this can result in an active pte with _PAGE_NOEXEC
set, which would then lead to a panic due to a translation-specification
exception (write to swapped out page):

do_swap_page
  pte = mk_pte (with _PAGE_NOEXEC bit)
  set_pte_at   (will remove _PAGE_NOEXEC bit in page table, but keep it
                in local variable pte)
  vmf->orig_pte = pte (pte still contains _PAGE_NOEXEC bit)
  do_wp_page
    wp_page_reuse
      entry = vmf->orig_pte (still with _PAGE_NOEXEC bit)
      ptep_set_access_flags (writes entry with _PAGE_NOEXEC bit)

Fix this by clearing _PAGE_NOEXEC already in mk_pte_phys(), where the
pgprot value is applied, so that no pte with _PAGE_NOEXEC will ever be
visible, if it is not supported. The check in set_pte_at() can then also
be removed.

Cc: <stable@vger.kernel.org> # 4.11+
Fixes: 57d7f939e7bd ("s390: add no-execute support")
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index cf19c0060816..7b03037a8475 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1161,8 +1161,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t entry)
 {
-	if (!MACHINE_HAS_NX)
-		pte_val(entry) &= ~_PAGE_NOEXEC;
 	if (pte_present(entry))
 		pte_val(entry) &= ~_PAGE_UNUSED;
 	if (mm_has_pgste(mm))
@@ -1179,6 +1177,8 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 {
 	pte_t __pte;
 	pte_val(__pte) = physpage + pgprot_val(pgprot);
+	if (!MACHINE_HAS_NX)
+		pte_val(__pte) &= ~_PAGE_NOEXEC;
 	return pte_mkyoung(__pte);
 }
 
-- 
cgit v1.2.3


From 2416cefc504ba8ae9b17e3e6b40afc72708f96be Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Tue, 22 Oct 2019 14:38:08 +0200
Subject: s390/mm: add mm_pxd_folded() checks to pxd_free()

Unlike pxd_free_tlb(), the pxd_free() functions do not check for folded
page tables. This is not an issue so far, as those functions will actually
never be called, since no code will reach them when page tables are folded.

In order to avoid future issues, and to make the s390 code more similar to
other architectures, add mm_pxd_folded() checks, similar to how it is done
in pxd_free_tlb().

This was found by testing a patch from from Anshuman Khandual, which is
currently discussed on LKML ("mm/debug: Add tests validating architecture
page table helpers").

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/pgalloc.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index bccb8f4a63e2..77606c4acd58 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -56,7 +56,12 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
 		crst_table_init(table, _REGION2_ENTRY_EMPTY);
 	return (p4d_t *) table;
 }
-#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	if (!mm_p4d_folded(mm))
+		crst_table_free(mm, (unsigned long *) p4d);
+}
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
@@ -65,7 +70,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 		crst_table_init(table, _REGION3_ENTRY_EMPTY);
 	return (pud_t *) table;
 }
-#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud)
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	if (!mm_pud_folded(mm))
+		crst_table_free(mm, (unsigned long *) pud);
+}
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
@@ -83,6 +93,8 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
+	if (mm_pmd_folded(mm))
+		return;
 	pgtable_pmd_page_dtor(virt_to_page(pmd));
 	crst_table_free(mm, (unsigned long *) pmd);
 }
-- 
cgit v1.2.3


From 265f79dcb5702aeffcd9453c8cbb5fb707652323 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 30 Oct 2019 16:55:56 +0100
Subject: s390: always inline current_stack_pointer()

This function must be inlined since any caller expects the current
stack pointer; which wouldn't be true if the function isn't inlined.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/processor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 51a0e4a2dc96..881fc37c11c6 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -206,7 +206,7 @@ unsigned long get_wchan(struct task_struct *p);
 /* Has task runtime instrumentation enabled ? */
 #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
 
-static inline unsigned long current_stack_pointer(void)
+static __always_inline unsigned long current_stack_pointer(void)
 {
 	unsigned long sp;
 
-- 
cgit v1.2.3


From effb83ccc83a97dbbe5214f4c443522719f05f3a Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Wed, 30 Oct 2019 14:20:32 +0100
Subject: s390: add error handling to perf_callchain_kernel

perf_callchain_kernel stops neither when it encounters a garbage
address, nor when it runs out of space. Fix both issues using x86
version as an inspiration.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_event.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index fcb6c2e92b07..1e75cc983546 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -224,9 +224,13 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 			   struct pt_regs *regs)
 {
 	struct unwind_state state;
+	unsigned long addr;
 
-	unwind_for_each_frame(&state, current, regs, 0)
-		perf_callchain_store(entry, state.ip);
+	unwind_for_each_frame(&state, current, regs, 0) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || perf_callchain_store(entry, addr))
+			return;
+	}
 }
 
 /* Perf definitions for PMU event attributes in sysfs */
-- 
cgit v1.2.3


From 268a2d60013049cfd9a0aada77284aa6ea8ad26a Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Sun, 20 Oct 2019 22:43:13 +0800
Subject: MIPS: Loongson64: Rename CPU TYPES

CPU_LOONGSON2 -> CPU_LOONGSON2EF
CPU_LOONGSON3 -> CPU_LOONGSON64

As newer loongson-2 products (2G/2H/2K1000) can share kernel
implementation with loongson-3 while 2E/2F are less similar with
other LOONGSON64 products.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: chenhc@lemote.com
Cc: paul.burton@mips.com
---
 arch/mips/Kconfig                                  | 36 +++++++++++-----------
 arch/mips/include/asm/cop2.h                       |  2 +-
 arch/mips/include/asm/cpu-type.h                   |  9 +++---
 arch/mips/include/asm/cpu.h                        |  4 +--
 arch/mips/include/asm/hazards.h                    |  4 +--
 arch/mips/include/asm/io.h                         |  2 +-
 arch/mips/include/asm/irqflags.h                   |  2 +-
 .../asm/mach-loongson64/cpu-feature-overrides.h    |  2 +-
 arch/mips/include/asm/mach-loongson64/irq.h        |  2 +-
 .../asm/mach-loongson64/kernel-entry-init.h        |  4 +--
 arch/mips/include/asm/mach-loongson64/loongson.h   |  2 +-
 arch/mips/include/asm/mach-loongson64/pci.h        |  2 +-
 arch/mips/include/asm/module.h                     |  8 ++---
 arch/mips/include/asm/processor.h                  |  2 +-
 arch/mips/include/asm/r4kcache.h                   |  4 +--
 arch/mips/kernel/cpu-probe.c                       | 16 +++++-----
 arch/mips/kernel/idle.c                            |  2 +-
 arch/mips/kernel/perf_event_mipsxx.c               |  4 +--
 arch/mips/kernel/setup.c                           |  2 +-
 arch/mips/kernel/traps.c                           |  2 +-
 arch/mips/lib/csum_partial.S                       |  4 +--
 arch/mips/loongson64/Kconfig                       |  2 +-
 arch/mips/loongson64/Makefile                      |  2 +-
 arch/mips/loongson64/Platform                      | 12 ++++----
 arch/mips/loongson64/common/pci.c                  |  2 +-
 arch/mips/mm/c-r4k.c                               | 32 +++++++++----------
 arch/mips/mm/page.c                                |  2 +-
 arch/mips/mm/tlb-r4k.c                             |  4 +--
 arch/mips/mm/tlbex.c                               |  6 ++--
 arch/mips/oprofile/Makefile                        |  4 +--
 arch/mips/oprofile/common.c                        |  4 +--
 drivers/gpio/Kconfig                               |  2 +-
 drivers/gpio/gpio-loongson.c                       |  2 +-
 include/drm/drm_cache.h                            |  2 +-
 34 files changed, 95 insertions(+), 96 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f7ec1505013c..a4e8c75bc086 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1377,9 +1377,9 @@ choice
 	prompt "CPU type"
 	default CPU_R4X00
 
-config CPU_LOONGSON3
-	bool "Loongson 3 CPU"
-	depends on SYS_HAS_CPU_LOONGSON3
+config CPU_LOONGSON64
+	bool "Loongson GSx64 CPU"
+	depends on SYS_HAS_CPU_LOONGSON64
 	select ARCH_HAS_PHYS_TO_DMA
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select CPU_SUPPORTS_HIGHMEM
@@ -1394,19 +1394,19 @@ config CPU_LOONGSON3
 	select GPIOLIB
 	select SWIOTLB
 	help
-		The Loongson 3 processor implements the MIPS64R2 instruction
-		set with many extensions.
+		The Loongson GSx64 series of processor cores implements the
+		MIPS64R2 instruction set with many extensions.
 
-config LOONGSON3_ENHANCEMENT
-	bool "New Loongson 3 CPU Enhancements"
+config LOONGSON64_ENHANCEMENT
+	bool "New Loongson GSx64E CPU Enhancements"
 	default n
 	select CPU_MIPSR2
 	select CPU_HAS_PREFETCH
-	depends on CPU_LOONGSON3
+	depends on CPU_LOONGSON64
 	help
-	  New Loongson 3 CPU (since Loongson-3A R2, as opposed to Loongson-3A
+	  New Loongson GSx64E cores (since Loongson-3A R2, as opposed to Loongson-3A
 	  R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as
-	  FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPv2 ASE, User
+	  FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPr2 ASE, User
 	  Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer),
 	  Fast TLB refill support, etc.
 
@@ -1418,7 +1418,7 @@ config LOONGSON3_ENHANCEMENT
 config CPU_LOONGSON3_WORKAROUNDS
 	bool "Old Loongson 3 LLSC Workarounds"
 	default y if SMP
-	depends on CPU_LOONGSON3
+	depends on CPU_LOONGSON64
 	help
 	  Loongson 3 processors have the llsc issues which require workarounds.
 	  Without workarounds the system may hang unexpectedly.
@@ -1433,7 +1433,7 @@ config CPU_LOONGSON3_WORKAROUNDS
 config CPU_LOONGSON2E
 	bool "Loongson 2E"
 	depends on SYS_HAS_CPU_LOONGSON2E
-	select CPU_LOONGSON2
+	select CPU_LOONGSON2EF
 	help
 	  The Loongson 2E processor implements the MIPS III instruction set
 	  with many extensions.
@@ -1444,7 +1444,7 @@ config CPU_LOONGSON2E
 config CPU_LOONGSON2F
 	bool "Loongson 2F"
 	depends on SYS_HAS_CPU_LOONGSON2F
-	select CPU_LOONGSON2
+	select CPU_LOONGSON2EF
 	select GPIOLIB
 	help
 	  The Loongson 2F processor implements the MIPS III instruction set
@@ -1857,7 +1857,7 @@ config SYS_SUPPORTS_ZBOOT_UART_PROM
 	bool
 	select SYS_SUPPORTS_ZBOOT
 
-config CPU_LOONGSON2
+config CPU_LOONGSON2EF
 	bool
 	select CPU_SUPPORTS_32BIT_KERNEL
 	select CPU_SUPPORTS_64BIT_KERNEL
@@ -1900,7 +1900,7 @@ config CPU_BMIPS5000
 	select SYS_SUPPORTS_HOTPLUG_CPU
 	select CPU_HAS_RIXI
 
-config SYS_HAS_CPU_LOONGSON3
+config SYS_HAS_CPU_LOONGSON64
 	bool
 	select CPU_SUPPORTS_CPUFREQ
 	select CPU_HAS_RIXI
@@ -2162,7 +2162,7 @@ choice
 
 config PAGE_SIZE_4KB
 	bool "4kB"
-	depends on !CPU_LOONGSON2 && !CPU_LOONGSON3
+	depends on !CPU_LOONGSON2EF && !CPU_LOONGSON64
 	help
 	  This option select the standard 4kB Linux page size.  On some
 	  R3000-family processors this is the only available page size.  Using
@@ -2616,7 +2616,7 @@ config CPU_SUPPORTS_MSA
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on !NUMA && !CPU_LOONGSON2
+	depends on !NUMA && !CPU_LOONGSON2EF
 
 config ARCH_SPARSEMEM_ENABLE
 	bool
@@ -2697,7 +2697,7 @@ config NODES_SHIFT
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
-	depends on PERF_EVENTS && !OPROFILE && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON3)
+	depends on PERF_EVENTS && !OPROFILE && (CPU_MIPS32 || CPU_MIPS64 || CPU_R10000 || CPU_SB1 || CPU_CAVIUM_OCTEON || CPU_XLP || CPU_LOONGSON64)
 	default y
 	help
 	  Enable hardware performance counter support for perf events. If
diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h
index 63b3468ede4c..6b7396a6a115 100644
--- a/arch/mips/include/asm/cop2.h
+++ b/arch/mips/include/asm/cop2.h
@@ -33,7 +33,7 @@ extern void nlm_cop2_restore(struct nlm_cop2_state *);
 #define cop2_present		1
 #define cop2_lazy_restore	0
 
-#elif defined(CONFIG_CPU_LOONGSON3)
+#elif defined(CONFIG_CPU_LOONGSON64)
 
 #define cop2_present		1
 #define cop2_lazy_restore	1
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index 7bbb66760a07..5117e9119b87 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -15,13 +15,12 @@
 static inline int __pure __get_cpu_type(const int cpu_type)
 {
 	switch (cpu_type) {
-#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \
-    defined(CONFIG_SYS_HAS_CPU_LOONGSON2F)
-	case CPU_LOONGSON2:
+#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2EF)
+	case CPU_LOONGSON2EF:
 #endif
 
-#ifdef CONFIG_SYS_HAS_CPU_LOONGSON3
-	case CPU_LOONGSON3:
+#ifdef CONFIG_SYS_HAS_CPU_LOONGSON64
+	case CPU_LOONGSON64:
 #endif
 
 #if defined(CONFIG_SYS_HAS_CPU_LOONGSON1B) || \
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 81ddb575502a..0e3a8d4de09d 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -319,8 +319,8 @@ enum cpu_type_enum {
 	/*
 	 * MIPS64 class processors
 	 */
-	CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
-	CPU_LOONGSON3, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS,
+	CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2EF,
+	CPU_LOONGSON64, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS,
 	CPU_CAVIUM_OCTEON2, CPU_CAVIUM_OCTEON3, CPU_XLR, CPU_XLP, CPU_I6500,
 
 	CPU_QEMU_GENERIC,
diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
index 0fa27446869a..ea6a8c4b49f3 100644
--- a/arch/mips/include/asm/hazards.h
+++ b/arch/mips/include/asm/hazards.h
@@ -23,7 +23,7 @@
  * TLB hazards
  */
 #if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \
-	!defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON3_ENHANCEMENT)
+	!defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON64_ENHANCEMENT)
 
 /*
  * MIPSR2 defines ehb for hazard avoidance
@@ -158,7 +158,7 @@ do {									\
 } while (0)
 
 #elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \
-	defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \
+	defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_LOONGSON64_ENHANCEMENT) || \
 	defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR)
 
 /*
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 2b7b56736372..3f6ce74335b4 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -306,7 +306,7 @@ static inline void iounmap(const volatile void __iomem *addr)
 #undef __IS_KSEG1
 }
 
-#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON3)
+#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON64)
 #define war_io_reorder_wmb()		wmb()
 #else
 #define war_io_reorder_wmb()		barrier()
diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index f0b862a83816..4d742acf2be0 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h
@@ -41,7 +41,7 @@ static inline unsigned long arch_local_irq_save(void)
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
-#if defined(CONFIG_CPU_LOONGSON3) || defined (CONFIG_CPU_LOONGSON1)
+#if defined(CONFIG_CPU_LOONGSON64) || defined (CONFIG_CPU_LOONGSON1)
 	"	mfc0	%[flags], $12					\n"
 	"	di							\n"
 #else
diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
index 4aca25f2ff06..83ad90d8005d 100644
--- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
@@ -44,7 +44,7 @@
 #define cpu_has_vtag_icache	0
 #define cpu_has_watch		1
 
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 #define cpu_has_wsbh		1
 #define cpu_has_ic_fills_f_dc	1
 #define cpu_hwrena_impl_bits	0xc0000000
diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h
index be9f727a9328..557e069c400c 100644
--- a/arch/mips/include/asm/mach-loongson64/irq.h
+++ b/arch/mips/include/asm/mach-loongson64/irq.h
@@ -4,7 +4,7 @@
 
 #include <boot_param.h>
 
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 
 /* cpu core interrupt numbers */
 #define MIPS_CPU_IRQ_BASE 56
diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
index b9687320024d..28ccb06c8289 100644
--- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
@@ -17,7 +17,7 @@
  * Override macros used in arch/mips/kernel/head.S.
  */
 	.macro	kernel_entry_setup
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 	.set	push
 	.set	mips64
 	/* Set LPA on LOONGSON3 config3 */
@@ -54,7 +54,7 @@
  * Do SMP slave processor setup.
  */
 	.macro	smp_slave_setup
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 	.set	push
 	.set	mips64
 	/* Set LPA on LOONGSON3 config3 */
diff --git a/arch/mips/include/asm/mach-loongson64/loongson.h b/arch/mips/include/asm/mach-loongson64/loongson.h
index 694a58574ec0..40a24b76b874 100644
--- a/arch/mips/include/asm/mach-loongson64/loongson.h
+++ b/arch/mips/include/asm/mach-loongson64/loongson.h
@@ -109,7 +109,7 @@ static inline void do_perfcnt_IRQ(void)
 #define LOONGSON_PCICFG_SIZE	0x00000800	/* 2K */
 #define LOONGSON_PCICFG_TOP	(LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1)
 
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 #define LOONGSON_PCIIO_BASE	loongson_sysconf.pci_io_base
 #else
 #define LOONGSON_PCIIO_BASE	0x1fd00000
diff --git a/arch/mips/include/asm/mach-loongson64/pci.h b/arch/mips/include/asm/mach-loongson64/pci.h
index 97f807fb2117..05cc9052772f 100644
--- a/arch/mips/include/asm/mach-loongson64/pci.h
+++ b/arch/mips/include/asm/mach-loongson64/pci.h
@@ -35,7 +35,7 @@ extern struct pci_ops loongson_pci_ops;
 #else	/* loongson2f/32bit & loongson2e */
 
 /* this pci memory space is mapped by pcimap in pci.c */
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 #define LOONGSON_PCI_MEM_START	0x40000000UL
 #define LOONGSON_PCI_MEM_END	0x7effffffUL
 #else
diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index ed70994fbbec..9fe9515204d6 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -121,10 +121,10 @@ search_module_dbetables(unsigned long addr)
 #define MODULE_PROC_FAMILY "SB1 "
 #elif defined CONFIG_CPU_LOONGSON1
 #define MODULE_PROC_FAMILY "LOONGSON1 "
-#elif defined CONFIG_CPU_LOONGSON2
-#define MODULE_PROC_FAMILY "LOONGSON2 "
-#elif defined CONFIG_CPU_LOONGSON3
-#define MODULE_PROC_FAMILY "LOONGSON3 "
+#elif defined CONFIG_CPU_LOONGSON2EF
+#define MODULE_PROC_FAMILY "LOONGSON2EF "
+#elif defined CONFIG_CPU_LOONGSON64
+#define MODULE_PROC_FAMILY "LOONGSON64 "
 #elif defined CONFIG_CPU_CAVIUM_OCTEON
 #define MODULE_PROC_FAMILY "OCTEON "
 #elif defined CONFIG_CPU_XLR
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index fba18d4a9190..7619ad319400 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -385,7 +385,7 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[29])
 #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status)
 
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 /*
  * Loongson-3's SFB (Store-Fill-Buffer) may buffer writes indefinitely when a
  * tight read loop is executed, because reads take priority over writes & the
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index e73fc9e899d2..15ab16f99f28 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -72,7 +72,7 @@ static inline void flush_scache_line_indexed(unsigned long addr)
 static inline void flush_icache_line(unsigned long addr)
 {
 	switch (boot_cpu_type()) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		cache_op(Hit_Invalidate_I_Loongson2, addr);
 		break;
 
@@ -154,7 +154,7 @@ static inline void flush_scache_line(unsigned long addr)
 static inline int protected_flush_icache_line(unsigned long addr)
 {
 	switch (boot_cpu_type()) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		return protected_cache_op(Hit_Invalidate_I_Loongson2, addr);
 
 	default:
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index bbfc954615c8..a8d49f111cce 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -608,7 +608,7 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags)
 		if (!(flags & FTLB_EN))
 			return 1;
 		return 0;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		/* Flush ITLB, DTLB, VTLB and FTLB */
 		write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB |
 			      LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB);
@@ -1529,21 +1529,21 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_LOONGSON_64C:  /* Loongson-2/3 */
 		switch (c->processor_id & PRID_REV_MASK) {
 		case PRID_REV_LOONGSON2E:
-			c->cputype = CPU_LOONGSON2;
+			c->cputype = CPU_LOONGSON2EF;
 			__cpu_name[cpu] = "ICT Loongson-2";
 			set_elf_platform(cpu, "loongson2e");
 			set_isa(c, MIPS_CPU_ISA_III);
 			c->fpu_msk31 |= FPU_CSR_CONDX;
 			break;
 		case PRID_REV_LOONGSON2F:
-			c->cputype = CPU_LOONGSON2;
+			c->cputype = CPU_LOONGSON2EF;
 			__cpu_name[cpu] = "ICT Loongson-2";
 			set_elf_platform(cpu, "loongson2f");
 			set_isa(c, MIPS_CPU_ISA_III);
 			c->fpu_msk31 |= FPU_CSR_CONDX;
 			break;
 		case PRID_REV_LOONGSON3A_R1:
-			c->cputype = CPU_LOONGSON3;
+			c->cputype = CPU_LOONGSON64;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R1);
@@ -1552,7 +1552,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 			break;
 		case PRID_REV_LOONGSON3B_R1:
 		case PRID_REV_LOONGSON3B_R2:
-			c->cputype = CPU_LOONGSON3;
+			c->cputype = CPU_LOONGSON64;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3b");
 			set_isa(c, MIPS_CPU_ISA_M64R1);
@@ -1908,14 +1908,14 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 		switch (c->processor_id & PRID_REV_MASK) {
 		case PRID_REV_LOONGSON3A_R2_0:
 		case PRID_REV_LOONGSON3A_R2_1:
-			c->cputype = CPU_LOONGSON3;
+			c->cputype = CPU_LOONGSON64;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R2);
 			break;
 		case PRID_REV_LOONGSON3A_R3_0:
 		case PRID_REV_LOONGSON3A_R3_1:
-			c->cputype = CPU_LOONGSON3;
+			c->cputype = CPU_LOONGSON64;
 			__cpu_name[cpu] = "ICT Loongson-3";
 			set_elf_platform(cpu, "loongson3a");
 			set_isa(c, MIPS_CPU_ISA_M64R2);
@@ -1929,7 +1929,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 			MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2);
 		break;
 	case PRID_IMP_LOONGSON_64G:
-		c->cputype = CPU_LOONGSON3;
+		c->cputype = CPU_LOONGSON64;
 		__cpu_name[cpu] = "ICT Loongson-3";
 		set_elf_platform(cpu, "loongson3a");
 		set_isa(c, MIPS_CPU_ISA_M64R2);
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 980d6c39aab3..57dfa6c9edc5 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -178,7 +178,7 @@ void __init check_wait(void)
 	case CPU_XLP:
 		cpu_wait = r4k_wait;
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >=
 				(PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0))
 			cpu_wait = r4k_wait;
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index a3e2da8391ea..0af456a94916 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1623,7 +1623,7 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 			raw_event.cntr_mask =
 				raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
 	break;
 	}
@@ -1769,7 +1769,7 @@ init_hw_perf_events(void)
 		mipspmu.general_event_map = &mipsxxcore_event_map;
 		mipspmu.cache_event_map = &mipsxxcore_cache_map;
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		mipspmu.name = "mips/loongson3";
 		mipspmu.general_event_map = &loongson3_event_map;
 		mipspmu.cache_event_map = &loongson3_cache_map;
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 2af05879772f..c3d4212b5f1d 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -287,7 +287,7 @@ static unsigned long __init init_initrd(void)
  * Initialize the bootmem allocator. It also setup initrd related data
  * if needed.
  */
-#if defined(CONFIG_SGI_IP27) || (defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_NUMA))
+#if defined(CONFIG_SGI_IP27) || (defined(CONFIG_CPU_LOONGSON64) && defined(CONFIG_NUMA))
 
 static void __init bootmem_init(void)
 {
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 342e41de9d64..0c2570e6fcf6 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2394,7 +2394,7 @@ void __init trap_init(void)
 	else {
 		if (cpu_has_vtag_icache)
 			set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
-		else if (current_cpu_type() == CPU_LOONGSON3)
+		else if (current_cpu_type() == CPU_LOONGSON64)
 			set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
 		else
 			set_except_vector(EXCCODE_RI, handle_ri_rdhwr);
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 2ff84f4b1717..fda7b57b826e 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -279,7 +279,7 @@ EXPORT_SYMBOL(csum_partial)
 #endif
 
 	/* odd buffer alignment? */
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64)
 	.set	push
 	.set	arch=mips32r2
 	wsbh	v1, sum
@@ -732,7 +732,7 @@ EXPORT_SYMBOL(csum_partial)
 	addu	sum, v1
 #endif
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64)
 	.set	push
 	.set	arch=mips32r2
 	wsbh	v1, sum
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index 4c14a11525f4..d08b20ff2b27 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -79,7 +79,7 @@ config LOONGSON_MACH3X
 	select I8259
 	select IRQ_MIPS_CPU
 	select NR_CPUS_DEFAULT_4
-	select SYS_HAS_CPU_LOONGSON3
+	select SYS_HAS_CPU_LOONGSON64
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_SMP
 	select SYS_SUPPORTS_HOTPLUG_CPU
diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
index 1a5df773707d..c74bc0251e9d 100644
--- a/arch/mips/loongson64/Makefile
+++ b/arch/mips/loongson64/Makefile
@@ -21,4 +21,4 @@ obj-$(CONFIG_LEMOTE_MACH2F)  += lemote-2f/
 # All Loongson-3 family machines
 #
 
-obj-$(CONFIG_CPU_LOONGSON3)  += loongson-3/
+obj-$(CONFIG_CPU_LOONGSON64)  += loongson-3/
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 28172500f95a..4da74eea7de8 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -3,7 +3,7 @@
 #
 
 # Only gcc >= 4.4 have Loongson specific support
-cflags-$(CONFIG_CPU_LOONGSON2)	+= -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2EF)	+= -Wa,--trap
 cflags-$(CONFIG_CPU_LOONGSON2E) += \
 	$(call cc-option,-march=loongson2e,-march=r4600)
 cflags-$(CONFIG_CPU_LOONGSON2F) += \
@@ -22,7 +22,7 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
   endif
 endif
 
-cflags-$(CONFIG_CPU_LOONGSON3)	+= -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON64)	+= -Wa,--trap
 
 #
 # Some versions of binutils, not currently mainline as of 2019/02/04, support
@@ -44,7 +44,7 @@ cflags-$(CONFIG_CPU_LOONGSON3)	+= -Wa,--trap
 # binutils does not merge support for the flag then we can revisit & remove
 # this later - for now it ensures vendor toolchains don't cause problems.
 #
-cflags-$(CONFIG_CPU_LOONGSON3)	+= $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
+cflags-$(CONFIG_CPU_LOONGSON64)	+= $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
 
 #
 # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
@@ -55,14 +55,14 @@ cflags-$(CONFIG_CPU_LOONGSON3)	+= $(call as-option,-Wa$(comma)-mno-fix-loongson3
 #
 ifeq ($(call cc-ifversion, -ge, 0409, y), y)
   ifeq ($(call ld-ifversion, -ge, 225000000, y), y)
-    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+    cflags-$(CONFIG_CPU_LOONGSON64)  += \
       $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
   else
-    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+    cflags-$(CONFIG_CPU_LOONGSON64)  += \
       $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
   endif
 else
-    cflags-$(CONFIG_CPU_LOONGSON3)  += \
+    cflags-$(CONFIG_CPU_LOONGSON64)  += \
       $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
 endif
 
diff --git a/arch/mips/loongson64/common/pci.c b/arch/mips/loongson64/common/pci.c
index c47bb7bf3aa4..2d9755c49524 100644
--- a/arch/mips/loongson64/common/pci.c
+++ b/arch/mips/loongson64/common/pci.c
@@ -87,7 +87,7 @@ static int __init pcibios_init(void)
 #endif
 	register_pci_controller(&loongson_pci_controller);
 
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 	sbx00_acpi_init();
 #endif
 
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 378cbb02dcdd..9d82cb9ced55 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -324,7 +324,7 @@ static void r4k_blast_icache_page_setup(void)
 		r4k_blast_icache_page = (void *)cache_noop;
 	else if (ic_lsize == 16)
 		r4k_blast_icache_page = blast_icache16_page;
-	else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2)
+	else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2EF)
 		r4k_blast_icache_page = loongson2_blast_icache32_page;
 	else if (ic_lsize == 32)
 		r4k_blast_icache_page = blast_icache32_page;
@@ -373,7 +373,7 @@ static void r4k_blast_icache_page_indexed_setup(void)
 		else if (TX49XX_ICACHE_INDEX_INV_WAR)
 			r4k_blast_icache_page_indexed =
 				tx49_blast_icache32_page_indexed;
-		else if (current_cpu_type() == CPU_LOONGSON2)
+		else if (current_cpu_type() == CPU_LOONGSON2EF)
 			r4k_blast_icache_page_indexed =
 				loongson2_blast_icache32_page_indexed;
 		else
@@ -399,7 +399,7 @@ static void r4k_blast_icache_setup(void)
 			r4k_blast_icache = blast_r4600_v1_icache32;
 		else if (TX49XX_ICACHE_INDEX_INV_WAR)
 			r4k_blast_icache = tx49_blast_icache32;
-		else if (current_cpu_type() == CPU_LOONGSON2)
+		else if (current_cpu_type() == CPU_LOONGSON2EF)
 			r4k_blast_icache = loongson2_blast_icache32;
 		else
 			r4k_blast_icache = blast_icache32;
@@ -469,7 +469,7 @@ static void r4k_blast_scache_node_setup(void)
 {
 	unsigned long sc_lsize = cpu_scache_line_size();
 
-	if (current_cpu_type() != CPU_LOONGSON3)
+	if (current_cpu_type() != CPU_LOONGSON64)
 		r4k_blast_scache_node = (void *)cache_noop;
 	else if (sc_lsize == 16)
 		r4k_blast_scache_node = blast_scache16_node;
@@ -484,7 +484,7 @@ static void r4k_blast_scache_node_setup(void)
 static inline void local_r4k___flush_cache_all(void * args)
 {
 	switch (current_cpu_type()) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 	case CPU_R4000SC:
 	case CPU_R4000MC:
 	case CPU_R4400SC:
@@ -501,7 +501,7 @@ static inline void local_r4k___flush_cache_all(void * args)
 		r4k_blast_scache();
 		break;
 
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		/* Use get_ebase_cpunum() for both NUMA=y/n */
 		r4k_blast_scache_node(get_ebase_cpunum() >> 2);
 		break;
@@ -774,7 +774,7 @@ static inline void __local_r4k_flush_icache_range(unsigned long start,
 		r4k_blast_icache();
 	else {
 		switch (boot_cpu_type()) {
-		case CPU_LOONGSON2:
+		case CPU_LOONGSON2EF:
 			protected_loongson2_blast_icache_range(start, end);
 			break;
 
@@ -867,7 +867,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size) {
-			if (current_cpu_type() != CPU_LOONGSON3)
+			if (current_cpu_type() != CPU_LOONGSON64)
 				r4k_blast_scache();
 			else
 				r4k_blast_scache_node(pa_to_nid(addr));
@@ -908,7 +908,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size) {
-			if (current_cpu_type() != CPU_LOONGSON3)
+			if (current_cpu_type() != CPU_LOONGSON64)
 				r4k_blast_scache();
 			else
 				r4k_blast_scache_node(pa_to_nid(addr));
@@ -1228,7 +1228,7 @@ static void probe_pcache(void)
 		c->options |= MIPS_CPU_PREFETCH;
 		break;
 
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
 		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
 		if (prid & 0x3)
@@ -1246,7 +1246,7 @@ static void probe_pcache(void)
 		c->dcache.waybit = 0;
 		break;
 
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		config1 = read_c0_config1();
 		lsize = (config1 >> 19) & 7;
 		if (lsize)
@@ -1457,7 +1457,7 @@ static void probe_pcache(void)
 		c->dcache.flags &= ~MIPS_CACHE_ALIASES;
 		break;
 
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		/*
 		 * LOONGSON2 has 4 way icache, but when using indexed cache op,
 		 * one op will act on all 4 ways
@@ -1483,7 +1483,7 @@ static void probe_vcache(void)
 	struct cpuinfo_mips *c = &current_cpu_data;
 	unsigned int config2, lsize;
 
-	if (current_cpu_type() != CPU_LOONGSON3)
+	if (current_cpu_type() != CPU_LOONGSON64)
 		return;
 
 	config2 = read_c0_config2();
@@ -1658,11 +1658,11 @@ static void setup_scache(void)
 #endif
 		return;
 
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		loongson2_sc_init();
 		return;
 
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		loongson3_sc_init();
 		return;
 
@@ -1931,7 +1931,7 @@ void r4k_cache_init(void)
 		/* Optimization: an L2 flush implicitly flushes the L1 */
 		current_cpu_data.options |= MIPS_CPU_INCLUSIVE_CACHES;
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		/* Loongson-3 maintains cache coherency by hardware */
 		__flush_cache_all	= cache_noop;
 		__flush_cache_vmap	= cache_noop;
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 56e4f8bffd4c..c5578897a4fa 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -187,7 +187,7 @@ static void set_prefetch_parameters(void)
 			}
 			break;
 
-		case CPU_LOONGSON3:
+		case CPU_LOONGSON64:
 			/* Loongson-3 only support the Pref_Load/Pref_Store. */
 			pref_bias_clear_store = 128;
 			pref_bias_copy_load = 128;
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index c13e46ced425..83b450ddbbc2 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -35,10 +35,10 @@ extern void build_tlb_refill_handler(void);
 static inline void flush_micro_tlb(void)
 {
 	switch (current_cpu_type()) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		write_c0_diag(LOONGSON_DIAG_ITLB);
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB);
 		break;
 	default:
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index e01cb33bfa1a..b963209bec02 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -571,8 +571,8 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_BMIPS4350:
 	case CPU_BMIPS4380:
 	case CPU_BMIPS5000:
-	case CPU_LOONGSON2:
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON2EF:
+	case CPU_LOONGSON64:
 	case CPU_R5500:
 		if (m4kc_tlbp_war())
 			uasm_i_nop(p);
@@ -1370,7 +1370,7 @@ static void build_r4000_tlb_refill_handler(void)
 	switch (boot_cpu_type()) {
 	default:
 		if (sizeof(long) == 4) {
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		/* Loongson2 ebase is different than r4k, we have more space */
 			if ((p - tlb_handler) > 64)
 				panic("TLB refill handler space exceeded");
diff --git a/arch/mips/oprofile/Makefile b/arch/mips/oprofile/Makefile
index 011cf9f891e7..e10f216d0422 100644
--- a/arch/mips/oprofile/Makefile
+++ b/arch/mips/oprofile/Makefile
@@ -14,5 +14,5 @@ oprofile-$(CONFIG_CPU_MIPS64)		+= op_model_mipsxx.o
 oprofile-$(CONFIG_CPU_R10000)		+= op_model_mipsxx.o
 oprofile-$(CONFIG_CPU_SB1)		+= op_model_mipsxx.o
 oprofile-$(CONFIG_CPU_XLR)		+= op_model_mipsxx.o
-oprofile-$(CONFIG_CPU_LOONGSON2)	+= op_model_loongson2.o
-oprofile-$(CONFIG_CPU_LOONGSON3)	+= op_model_loongson3.o
+oprofile-$(CONFIG_CPU_LOONGSON2EF)	+= op_model_loongson2.o
+oprofile-$(CONFIG_CPU_LOONGSON64)	+= op_model_loongson3.o
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index 2f33992f6dff..25cfa70f0ae4 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -104,10 +104,10 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 		lmodel = &op_model_mipsxx_ops;
 		break;
 
-	case CPU_LOONGSON2:
+	case CPU_LOONGSON2EF:
 		lmodel = &op_model_loongson2_ops;
 		break;
-	case CPU_LOONGSON3:
+	case CPU_LOONGSON64:
 		lmodel = &op_model_loongson3_ops;
 		break;
 	};
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 38e096e6925f..92d0ff63b3ea 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -298,7 +298,7 @@ config GPIO_IXP4XX
 
 config GPIO_LOONGSON
 	bool "Loongson-2/3 GPIO support"
-	depends on CPU_LOONGSON2 || CPU_LOONGSON3
+	depends on CPU_LOONGSON2EF || CPU_LOONGSON64
 	help
 	  driver for GPIO functionality on Loongson-2F/3A/3B processors.
 
diff --git a/drivers/gpio/gpio-loongson.c b/drivers/gpio/gpio-loongson.c
index 00943170ce36..a42145873cc9 100644
--- a/drivers/gpio/gpio-loongson.c
+++ b/drivers/gpio/gpio-loongson.c
@@ -22,7 +22,7 @@
 #define STLS2F_N_GPIO		4
 #define STLS3A_N_GPIO		16
 
-#ifdef CONFIG_CPU_LOONGSON3
+#ifdef CONFIG_CPU_LOONGSON64
 #define LOONGSON_N_GPIO	STLS3A_N_GPIO
 #else
 #define LOONGSON_N_GPIO	STLS2F_N_GPIO
diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index 987ff16b9420..e9ad4863d915 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -45,7 +45,7 @@ static inline bool drm_arch_can_wc_memory(void)
 {
 #if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
 	return false;
-#elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
+#elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON64)
 	return false;
 #elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
 	/*
-- 
cgit v1.2.3


From 9b537997b669c42cec67893538037e8d1c83c91c Mon Sep 17 00:00:00 2001
From: Yunfeng Ye <yeyunfeng@huawei.com>
Date: Tue, 22 Oct 2019 16:11:18 +0800
Subject: crypto: arm64/aes-neonbs - add return value of skcipher_walk_done()
 in __xts_crypt()

A warning is found by the static code analysis tool:
  "Identical condition 'err', second condition is always false"

Fix this by adding return value of skcipher_walk_done().

Fixes: 67cfa5d3b721 ("crypto: arm64/aes-neonbs - implement ciphertext stealing for XTS")
Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-neonbs-glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index ea873b8904c4..e3e27349a9fe 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -384,7 +384,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 			goto xts_tail;
 
 		kernel_neon_end();
-		skcipher_walk_done(&walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	if (err || likely(!tail))
-- 
cgit v1.2.3


From b95bba5d01141ba919c99ea6fde206727f3b3eb4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 25 Oct 2019 12:41:13 -0700
Subject: crypto: skcipher - rename the crypto_blkcipher module and kconfig
 option

Now that the blkcipher algorithm type has been removed in favor of
skcipher, rename the crypto_blkcipher kernel module to crypto_skcipher,
and rename the config options accordingly:

	CONFIG_CRYPTO_BLKCIPHER => CONFIG_CRYPTO_SKCIPHER
	CONFIG_CRYPTO_BLKCIPHER2 => CONFIG_CRYPTO_SKCIPHER2

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Kconfig              |  6 +--
 arch/arm64/crypto/Kconfig            |  8 ++--
 crypto/Kconfig                       | 84 ++++++++++++++++++------------------
 crypto/Makefile                      |  6 +--
 drivers/crypto/Kconfig               | 50 ++++++++++-----------
 drivers/crypto/allwinner/Kconfig     |  6 +--
 drivers/crypto/amlogic/Kconfig       |  2 +-
 drivers/crypto/caam/Kconfig          |  6 +--
 drivers/crypto/cavium/nitrox/Kconfig |  2 +-
 drivers/crypto/ccp/Kconfig           |  2 +-
 drivers/crypto/hisilicon/Kconfig     |  2 +-
 drivers/crypto/qat/Kconfig           |  2 +-
 drivers/crypto/ux500/Kconfig         |  2 +-
 drivers/crypto/virtio/Kconfig        |  2 +-
 drivers/net/wireless/cisco/Kconfig   |  2 +-
 net/bluetooth/Kconfig                |  2 +-
 net/rxrpc/Kconfig                    |  2 +-
 net/xfrm/Kconfig                     |  2 +-
 18 files changed, 94 insertions(+), 94 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 9f257c1bf32b..c618c379449f 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -81,7 +81,7 @@ config CRYPTO_AES_ARM
 config CRYPTO_AES_ARM_BS
 	tristate "Bit sliced AES using NEON instructions"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
 	select CRYPTO_SIMD
 	help
@@ -97,7 +97,7 @@ config CRYPTO_AES_ARM_BS
 config CRYPTO_AES_ARM_CE
 	tristate "Accelerated AES using ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_SIMD
 	help
 	  Use an implementation of AES in CBC, CTR and XTS modes that uses
@@ -130,7 +130,7 @@ config CRYPTO_CRC32_ARM_CE
 config CRYPTO_CHACHA20_NEON
 	tristate "NEON accelerated ChaCha stream cipher algorithms"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CHACHA20
 
 config CRYPTO_NHPOLY1305_NEON
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 4922c4451e7c..286e3514d34c 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -86,7 +86,7 @@ config CRYPTO_AES_ARM64_CE_CCM
 config CRYPTO_AES_ARM64_CE_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES_ARM64_CE
 	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
@@ -94,7 +94,7 @@ config CRYPTO_AES_ARM64_CE_BLK
 config CRYPTO_AES_ARM64_NEON_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES_ARM64
 	select CRYPTO_LIB_AES
 	select CRYPTO_SIMD
@@ -102,7 +102,7 @@ config CRYPTO_AES_ARM64_NEON_BLK
 config CRYPTO_CHACHA20_NEON
 	tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CHACHA20
 
 config CRYPTO_NHPOLY1305_NEON
@@ -113,7 +113,7 @@ config CRYPTO_NHPOLY1305_NEON
 config CRYPTO_AES_ARM64_BS
 	tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES_ARM64_NEON_BLK
 	select CRYPTO_AES_ARM64
 	select CRYPTO_LIB_AES
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 083e9ffc17b0..b021b6374d9e 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -52,12 +52,12 @@ config CRYPTO_AEAD2
 	select CRYPTO_NULL2
 	select CRYPTO_RNG2
 
-config CRYPTO_BLKCIPHER
+config CRYPTO_SKCIPHER
 	tristate
-	select CRYPTO_BLKCIPHER2
+	select CRYPTO_SKCIPHER2
 	select CRYPTO_ALGAPI
 
-config CRYPTO_BLKCIPHER2
+config CRYPTO_SKCIPHER2
 	tristate
 	select CRYPTO_ALGAPI2
 	select CRYPTO_RNG2
@@ -123,7 +123,7 @@ config CRYPTO_MANAGER2
 	def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y)
 	select CRYPTO_AEAD2
 	select CRYPTO_HASH2
-	select CRYPTO_BLKCIPHER2
+	select CRYPTO_SKCIPHER2
 	select CRYPTO_AKCIPHER2
 	select CRYPTO_KPP2
 	select CRYPTO_ACOMP2
@@ -169,7 +169,7 @@ config CRYPTO_NULL
 config CRYPTO_NULL2
 	tristate
 	select CRYPTO_ALGAPI2
-	select CRYPTO_BLKCIPHER2
+	select CRYPTO_SKCIPHER2
 	select CRYPTO_HASH2
 
 config CRYPTO_PCRYPT
@@ -184,7 +184,7 @@ config CRYPTO_PCRYPT
 
 config CRYPTO_CRYPTD
 	tristate "Software async crypto daemon"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
 	select CRYPTO_MANAGER
 	help
@@ -195,7 +195,7 @@ config CRYPTO_CRYPTD
 config CRYPTO_AUTHENC
 	tristate "Authenc support"
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	select CRYPTO_HASH
 	select CRYPTO_NULL
@@ -217,7 +217,7 @@ config CRYPTO_SIMD
 config CRYPTO_GLUE_HELPER_X86
 	tristate
 	depends on X86
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 
 config CRYPTO_ENGINE
 	tristate
@@ -323,7 +323,7 @@ config CRYPTO_AEGIS128_AESNI_SSE2
 config CRYPTO_SEQIV
 	tristate "Sequence Number IV Generator"
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_NULL
 	select CRYPTO_RNG_DEFAULT
 	select CRYPTO_MANAGER
@@ -346,7 +346,7 @@ comment "Block modes"
 
 config CRYPTO_CBC
 	tristate "CBC support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  CBC: Cipher Block Chaining mode
@@ -354,7 +354,7 @@ config CRYPTO_CBC
 
 config CRYPTO_CFB
 	tristate "CFB support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  CFB: Cipher FeedBack mode
@@ -362,7 +362,7 @@ config CRYPTO_CFB
 
 config CRYPTO_CTR
 	tristate "CTR support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_SEQIV
 	select CRYPTO_MANAGER
 	help
@@ -371,7 +371,7 @@ config CRYPTO_CTR
 
 config CRYPTO_CTS
 	tristate "CTS support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  CTS: Cipher Text Stealing
@@ -386,7 +386,7 @@ config CRYPTO_CTS
 
 config CRYPTO_ECB
 	tristate "ECB support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  ECB: Electronic CodeBook mode
@@ -395,7 +395,7 @@ config CRYPTO_ECB
 
 config CRYPTO_LRW
 	tristate "LRW support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	select CRYPTO_GF128MUL
 	help
@@ -407,7 +407,7 @@ config CRYPTO_LRW
 
 config CRYPTO_OFB
 	tristate "OFB support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  OFB: the Output Feedback mode makes a block cipher into a synchronous
@@ -419,7 +419,7 @@ config CRYPTO_OFB
 
 config CRYPTO_PCBC
 	tristate "PCBC support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  PCBC: Propagating Cipher Block Chaining mode
@@ -427,7 +427,7 @@ config CRYPTO_PCBC
 
 config CRYPTO_XTS
 	tristate "XTS support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	select CRYPTO_ECB
 	help
@@ -437,7 +437,7 @@ config CRYPTO_XTS
 
 config CRYPTO_KEYWRAP
 	tristate "Key wrapping support"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	help
 	  Support for key wrapping (NIST SP800-38F / RFC3394) without
@@ -1085,7 +1085,7 @@ config CRYPTO_AES_NI_INTEL
 	select CRYPTO_AEAD
 	select CRYPTO_LIB_AES
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86 if 64BIT
 	select CRYPTO_SIMD
 	help
@@ -1115,7 +1115,7 @@ config CRYPTO_AES_NI_INTEL
 config CRYPTO_AES_SPARC64
 	tristate "AES cipher algorithms (SPARC64)"
 	depends on SPARC64
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Use SPARC64 crypto opcodes for AES algorithm.
 
@@ -1142,7 +1142,7 @@ config CRYPTO_AES_SPARC64
 config CRYPTO_AES_PPC_SPE
 	tristate "AES cipher algorithms (PPC SPE)"
 	depends on PPC && SPE
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  AES cipher algorithms (FIPS-197). Additionally the acceleration
 	  for popular block cipher modes ECB, CBC, CTR and XTS is supported.
@@ -1172,7 +1172,7 @@ config CRYPTO_LIB_ARC4
 
 config CRYPTO_ARC4
 	tristate "ARC4 cipher algorithm"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_ARC4
 	help
 	  ARC4 cipher algorithm.
@@ -1208,7 +1208,7 @@ config CRYPTO_BLOWFISH_COMMON
 config CRYPTO_BLOWFISH_X86_64
 	tristate "Blowfish cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_BLOWFISH_COMMON
 	help
 	  Blowfish cipher algorithm (x86_64), by Bruce Schneier.
@@ -1239,7 +1239,7 @@ config CRYPTO_CAMELLIA_X86_64
 	tristate "Camellia cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	help
 	  Camellia cipher algorithm module (x86_64).
@@ -1256,7 +1256,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CAMELLIA_X86_64
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SIMD
@@ -1293,7 +1293,7 @@ config CRYPTO_CAMELLIA_SPARC64
 	depends on SPARC64
 	depends on CRYPTO
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Camellia cipher algorithm module (SPARC64).
 
@@ -1322,7 +1322,7 @@ config CRYPTO_CAST5
 config CRYPTO_CAST5_AVX_X86_64
 	tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CAST5
 	select CRYPTO_CAST_COMMON
 	select CRYPTO_SIMD
@@ -1344,7 +1344,7 @@ config CRYPTO_CAST6
 config CRYPTO_CAST6_AVX_X86_64
 	tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CAST6
 	select CRYPTO_CAST_COMMON
 	select CRYPTO_GLUE_HELPER_X86
@@ -1372,7 +1372,7 @@ config CRYPTO_DES_SPARC64
 	depends on SPARC64
 	select CRYPTO_ALGAPI
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3),
 	  optimized using SPARC64 crypto opcodes.
@@ -1380,7 +1380,7 @@ config CRYPTO_DES_SPARC64
 config CRYPTO_DES3_EDE_X86_64
 	tristate "Triple DES EDE cipher algorithm (x86-64)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	help
 	  Triple DES EDE (FIPS 46-3) algorithm.
@@ -1393,7 +1393,7 @@ config CRYPTO_DES3_EDE_X86_64
 config CRYPTO_FCRYPT
 	tristate "FCrypt cipher algorithm"
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  FCrypt algorithm used by RxRPC.
 
@@ -1412,7 +1412,7 @@ config CRYPTO_KHAZAD
 
 config CRYPTO_SALSA20
 	tristate "Salsa20 stream cipher algorithm"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Salsa20 stream cipher algorithm.
 
@@ -1424,7 +1424,7 @@ config CRYPTO_SALSA20
 
 config CRYPTO_CHACHA20
 	tristate "ChaCha stream cipher algorithms"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
 
@@ -1446,7 +1446,7 @@ config CRYPTO_CHACHA20
 config CRYPTO_CHACHA20_X86_64
 	tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CHACHA20
 	help
 	  SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
@@ -1482,7 +1482,7 @@ config CRYPTO_SERPENT
 config CRYPTO_SERPENT_SSE2_X86_64
 	tristate "Serpent cipher algorithm (x86_64/SSE2)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_SIMD
@@ -1501,7 +1501,7 @@ config CRYPTO_SERPENT_SSE2_X86_64
 config CRYPTO_SERPENT_SSE2_586
 	tristate "Serpent cipher algorithm (i586/SSE2)"
 	depends on X86 && !64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_SIMD
@@ -1520,7 +1520,7 @@ config CRYPTO_SERPENT_SSE2_586
 config CRYPTO_SERPENT_AVX_X86_64
 	tristate "Serpent cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_SIMD
@@ -1651,7 +1651,7 @@ config CRYPTO_TWOFISH_X86_64
 config CRYPTO_TWOFISH_X86_64_3WAY
 	tristate "Twofish cipher algorithm (x86_64, 3-way parallel)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_GLUE_HELPER_X86
@@ -1672,7 +1672,7 @@ config CRYPTO_TWOFISH_X86_64_3WAY
 config CRYPTO_TWOFISH_AVX_X86_64
 	tristate "Twofish cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SIMD
 	select CRYPTO_TWOFISH_COMMON
@@ -1823,7 +1823,7 @@ config CRYPTO_USER_API_HASH
 config CRYPTO_USER_API_SKCIPHER
 	tristate "User-space interface for symmetric key cipher algorithms"
 	depends on NET
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_USER_API
 	help
 	  This option enables the user-spaces interface for symmetric
@@ -1842,7 +1842,7 @@ config CRYPTO_USER_API_AEAD
 	tristate "User-space interface for AEAD cipher algorithms"
 	depends on NET
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_NULL
 	select CRYPTO_USER_API
 	help
diff --git a/crypto/Makefile b/crypto/Makefile
index 66b56ec37dd4..efe63940b4e9 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -15,9 +15,9 @@ obj-$(CONFIG_CRYPTO_ALGAPI2) += crypto_algapi.o
 
 obj-$(CONFIG_CRYPTO_AEAD2) += aead.o
 
-crypto_blkcipher-y := ablkcipher.o
-crypto_blkcipher-y += skcipher.o
-obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o
+crypto_skcipher-y := ablkcipher.o
+crypto_skcipher-y += skcipher.o
+obj-$(CONFIG_CRYPTO_SKCIPHER2) += crypto_skcipher.o
 obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
 obj-$(CONFIG_CRYPTO_ECHAINIV) += echainiv.o
 
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 04a2f53e66a3..607d87788694 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -28,7 +28,7 @@ config CRYPTO_DEV_PADLOCK
 config CRYPTO_DEV_PADLOCK_AES
 	tristate "PadLock driver for AES algorithm"
 	depends on CRYPTO_DEV_PADLOCK
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
 	help
 	  Use VIA PadLock for AES algorithm.
@@ -56,7 +56,7 @@ config CRYPTO_DEV_GEODE
 	tristate "Support for the Geode LX AES engine"
 	depends on X86_32 && PCI
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Say 'Y' here to use the AMD Geode LX processor on-board AES
 	  engine for the CryptoAPI AES algorithm.
@@ -109,7 +109,7 @@ config CRYPTO_PAES_S390
 	depends on ZCRYPT
 	depends on PKEY
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  AES cipher algorithms for use with protected key.
@@ -171,7 +171,7 @@ config CRYPTO_DES_S390
 	tristate "DES and Triple DES cipher algorithms"
 	depends on S390
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	help
 	  This is the s390 hardware accelerated implementation of the
@@ -184,7 +184,7 @@ config CRYPTO_AES_S390
 	tristate "AES cipher algorithms"
 	depends on S390
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  AES cipher algorithms (FIPS-197).
@@ -238,7 +238,7 @@ config CRYPTO_DEV_MARVELL_CESA
 	depends on PLAT_ORION || ARCH_MVEBU
 	select CRYPTO_LIB_AES
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
 	select SRAM
 	help
@@ -250,7 +250,7 @@ config CRYPTO_DEV_MARVELL_CESA
 config CRYPTO_DEV_NIAGARA2
        tristate "Niagara2 Stream Processing Unit driver"
        select CRYPTO_LIB_DES
-       select CRYPTO_BLKCIPHER
+       select CRYPTO_SKCIPHER
        select CRYPTO_HASH
        select CRYPTO_MD5
        select CRYPTO_SHA1
@@ -267,7 +267,7 @@ config CRYPTO_DEV_NIAGARA2
 config CRYPTO_DEV_HIFN_795X
 	tristate "Driver HIFN 795x crypto accelerator chips"
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select HW_RANDOM if CRYPTO_DEV_HIFN_795X_RNG
 	depends on PCI
 	depends on !ARCH_DMA_ADDR_T_64BIT
@@ -287,7 +287,7 @@ config CRYPTO_DEV_TALITOS
 	tristate "Talitos Freescale Security Engine (SEC)"
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
 	select HW_RANDOM
 	depends on FSL_SOC
@@ -325,7 +325,7 @@ config CRYPTO_DEV_IXP4XX
 	select CRYPTO_LIB_DES
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Driver for the IXP4xx NPE crypto engine.
 
@@ -338,7 +338,7 @@ config CRYPTO_DEV_PPC4XX
 	select CRYPTO_CCM
 	select CRYPTO_CTR
 	select CRYPTO_GCM
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  This option allows you to have support for AMCC crypto acceleration.
 
@@ -375,7 +375,7 @@ config CRYPTO_DEV_OMAP_AES
 	tristate "Support for OMAP AES hw engine"
 	depends on ARCH_OMAP2 || ARCH_OMAP3 || ARCH_OMAP2PLUS
 	select CRYPTO_AES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	select CRYPTO_CBC
 	select CRYPTO_ECB
@@ -389,7 +389,7 @@ config CRYPTO_DEV_OMAP_DES
 	tristate "Support for OMAP DES/3DES hw engine"
 	depends on ARCH_OMAP2PLUS
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	help
 	  OMAP processors have DES/3DES module accelerator. Select this if you
@@ -405,7 +405,7 @@ config CRYPTO_DEV_PICOXCELL
 	select CRYPTO_AEAD
 	select CRYPTO_AES
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	select CRYPTO_CBC
 	select CRYPTO_ECB
@@ -420,7 +420,7 @@ config CRYPTO_DEV_PICOXCELL
 config CRYPTO_DEV_SAHARA
 	tristate "Support for SAHARA crypto accelerator"
 	depends on ARCH_MXC && OF
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES
 	select CRYPTO_ECB
 	help
@@ -447,7 +447,7 @@ config CRYPTO_DEV_S5P
 	depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
 	depends on HAS_IOMEM
 	select CRYPTO_AES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  This option allows you to have support for S5P crypto acceleration.
 	  Select this to offload Samsung S5PV210 or S5PC110, Exynos from AES
@@ -507,7 +507,7 @@ config CRYPTO_DEV_ATMEL_AES
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_AES
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Some Atmel processors have AES hw accelerator.
 	  Select this if you want to use the Atmel module for
@@ -520,7 +520,7 @@ config CRYPTO_DEV_ATMEL_TDES
 	tristate "Support for Atmel DES/TDES hw accelerator"
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Some Atmel processors have DES/TDES hw accelerator.
 	  Select this if you want to use the Atmel module for
@@ -592,7 +592,7 @@ config CRYPTO_DEV_MXS_DCP
 	select CRYPTO_CBC
 	select CRYPTO_ECB
 	select CRYPTO_AES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
 	help
 	  The Freescale i.MX23/i.MX28 has SHA1/SHA256 and AES128 CBC/ECB
@@ -622,7 +622,7 @@ config CRYPTO_DEV_QCE
 	select CRYPTO_CBC
 	select CRYPTO_XTS
 	select CRYPTO_CTR
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  This driver supports Qualcomm crypto engine accelerator
 	  hardware. To compile this driver as a module, choose M here. The
@@ -668,7 +668,7 @@ config CRYPTO_DEV_ROCKCHIP
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_HASH
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 
 	help
 	  This driver interfaces with the hardware crypto accelerator.
@@ -679,7 +679,7 @@ config CRYPTO_DEV_MEDIATEK
 	depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
 	select CRYPTO_AES
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CTR
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
@@ -717,7 +717,7 @@ config CRYPTO_DEV_SAFEXCEL
 	depends on OF || PCI || COMPILE_TEST
 	select CRYPTO_LIB_AES
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	select CRYPTO_HASH
 	select CRYPTO_HMAC
@@ -741,7 +741,7 @@ config CRYPTO_DEV_ARTPEC6
 	select CRYPTO_AEAD
 	select CRYPTO_AES
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_CTR
 	select CRYPTO_HASH
 	select CRYPTO_SHA1
@@ -758,7 +758,7 @@ config CRYPTO_DEV_CCREE
 	depends on CRYPTO && CRYPTO_HW && OF && HAS_DMA
 	default n
 	select CRYPTO_HASH
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
diff --git a/drivers/crypto/allwinner/Kconfig b/drivers/crypto/allwinner/Kconfig
index 675f9984f52c..22c9d6b4b744 100644
--- a/drivers/crypto/allwinner/Kconfig
+++ b/drivers/crypto/allwinner/Kconfig
@@ -14,7 +14,7 @@ config CRYPTO_DEV_SUN4I_SS
 	select CRYPTO_SHA1
 	select CRYPTO_AES
 	select CRYPTO_LIB_DES
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	help
 	  Some Allwinner SoC have a crypto accelerator named
 	  Security System. Select this if you want to use it.
@@ -34,7 +34,7 @@ config CRYPTO_DEV_SUN4I_SS_PRNG
 
 config CRYPTO_DEV_SUN8I_CE
 	tristate "Support for Allwinner Crypto Engine cryptographic offloader"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	select CRYPTO_ECB
 	select CRYPTO_CBC
@@ -61,7 +61,7 @@ config CRYPTO_DEV_SUN8I_CE_DEBUG
 
 config CRYPTO_DEV_SUN8I_SS
 	tristate "Support for Allwinner Security System cryptographic offloader"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	select CRYPTO_ECB
 	select CRYPTO_CBC
diff --git a/drivers/crypto/amlogic/Kconfig b/drivers/crypto/amlogic/Kconfig
index 5c81a4ad0fae..b90850d18965 100644
--- a/drivers/crypto/amlogic/Kconfig
+++ b/drivers/crypto/amlogic/Kconfig
@@ -1,7 +1,7 @@
 config CRYPTO_DEV_AMLOGIC_GXL
 	tristate "Support for amlogic cryptographic offloader"
 	default y if ARCH_MESON
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	select CRYPTO_ECB
 	select CRYPTO_CBC
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 137ed3df0c74..87053e46c788 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -97,7 +97,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 	select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	help
 	  Selecting this will offload crypto for users of the
@@ -110,7 +110,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI
 	default y
 	select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_DES
 	help
 	  Selecting this will use CAAM Queue Interface (QI) for sending
@@ -158,7 +158,7 @@ config CRYPTO_DEV_FSL_DPAA2_CAAM
 	select CRYPTO_DEV_FSL_CAAM_COMMON
 	select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
 	select CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AUTHENC
 	select CRYPTO_AEAD
 	select CRYPTO_HASH
diff --git a/drivers/crypto/cavium/nitrox/Kconfig b/drivers/crypto/cavium/nitrox/Kconfig
index 7b1e751bb9cd..7dc008332a81 100644
--- a/drivers/crypto/cavium/nitrox/Kconfig
+++ b/drivers/crypto/cavium/nitrox/Kconfig
@@ -4,7 +4,7 @@
 #
 config CRYPTO_DEV_NITROX
 	tristate
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AES
 	select CRYPTO_LIB_DES
 	select FW_LOADER
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index 8fec733f567f..e0a8bd15aa74 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -27,7 +27,7 @@ config CRYPTO_DEV_CCP_CRYPTO
 	depends on CRYPTO_DEV_CCP_DD
 	depends on CRYPTO_DEV_SP_CCP
 	select CRYPTO_HASH
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AUTHENC
 	select CRYPTO_RSA
 	select CRYPTO_LIB_AES
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index a71f2bfaf084..1598468286be 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -2,7 +2,7 @@
 
 config CRYPTO_DEV_HISI_SEC
 	tristate "Support for Hisilicon SEC crypto block cipher accelerator"
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ALGAPI
 	select CRYPTO_LIB_DES
 	select SG_SPLIT
diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig
index 6ab7e5a88756..2006322345de 100644
--- a/drivers/crypto/qat/Kconfig
+++ b/drivers/crypto/qat/Kconfig
@@ -3,7 +3,7 @@ config CRYPTO_DEV_QAT
 	tristate
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_AKCIPHER
 	select CRYPTO_DH
 	select CRYPTO_HMAC
diff --git a/drivers/crypto/ux500/Kconfig b/drivers/crypto/ux500/Kconfig
index b1c6f739f77b..b731895aa241 100644
--- a/drivers/crypto/ux500/Kconfig
+++ b/drivers/crypto/ux500/Kconfig
@@ -8,7 +8,7 @@ config CRYPTO_DEV_UX500_CRYP
 	tristate "UX500 crypto driver for CRYP block"
 	depends on CRYPTO_DEV_UX500
 	select CRYPTO_ALGAPI
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_DES
 	help
         This selects the crypto driver for the UX500_CRYP hardware. It supports
diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig
index 01b625e4e5ad..fb294174e408 100644
--- a/drivers/crypto/virtio/Kconfig
+++ b/drivers/crypto/virtio/Kconfig
@@ -3,7 +3,7 @@ config CRYPTO_DEV_VIRTIO
 	tristate "VirtIO crypto driver"
 	depends on VIRTIO
 	select CRYPTO_AEAD
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	default m
 	help
diff --git a/drivers/net/wireless/cisco/Kconfig b/drivers/net/wireless/cisco/Kconfig
index 01e173ede894..7a3b3bb2ce15 100644
--- a/drivers/net/wireless/cisco/Kconfig
+++ b/drivers/net/wireless/cisco/Kconfig
@@ -17,7 +17,7 @@ config AIRO
 	depends on CFG80211 && ISA_DMA_API && (PCI || BROKEN)
 	select WIRELESS_EXT
 	select CRYPTO
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select WEXT_SPY
 	select WEXT_PRIV
 	---help---
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 3803135c88ff..165148c7c4ce 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -9,7 +9,7 @@ menuconfig BT
 	depends on RFKILL || !RFKILL
 	select CRC16
 	select CRYPTO
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
 	imply CRYPTO_AES
 	select CRYPTO_CMAC
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 05610c3a3d25..57ebb29c26ad 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -49,7 +49,7 @@ config RXKAD
 	depends on AF_RXRPC
 	select CRYPTO
 	select CRYPTO_MANAGER
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 	select CRYPTO_PCBC
 	select CRYPTO_FCRYPT
 	help
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 51bb6018f3bf..3981bc0d9e6c 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -16,7 +16,7 @@ config XFRM_ALGO
 	select XFRM
 	select CRYPTO
 	select CRYPTO_HASH
-	select CRYPTO_BLKCIPHER
+	select CRYPTO_SKCIPHER
 
 if INET
 config XFRM_USER
-- 
cgit v1.2.3


From 8b5369ea580964dbc982781bfb9fb93459fc5e8d Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Date: Mon, 14 Oct 2019 20:31:03 +0200
Subject: dma/direct: turn ARCH_ZONE_DMA_BITS into a variable

Some architectures, notably ARM, are interested in tweaking this
depending on their runtime DMA addressing limitations.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/page.h   |  2 --
 arch/arm64/mm/init.c            |  9 +++++++--
 arch/powerpc/include/asm/page.h |  9 ---------
 arch/powerpc/mm/mem.c           | 20 +++++++++++++++-----
 arch/s390/include/asm/page.h    |  2 --
 arch/s390/mm/init.c             |  1 +
 include/linux/dma-direct.h      |  2 ++
 kernel/dma/direct.c             | 13 ++++++-------
 8 files changed, 31 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7b8c98830101..d39ddb258a04 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -38,6 +38,4 @@ extern int pfn_valid(unsigned long);
 
 #include <asm-generic/getorder.h>
 
-#define ARCH_ZONE_DMA_BITS 30
-
 #endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 39fc69873b18..35f27b839101 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -20,6 +20,7 @@
 #include <linux/sort.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
+#include <linux/dma-direct.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-contiguous.h>
 #include <linux/efi.h>
@@ -41,6 +42,8 @@
 #include <asm/tlb.h>
 #include <asm/alternative.h>
 
+#define ARM64_ZONE_DMA_BITS	30
+
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
@@ -440,8 +443,10 @@ void __init arm64_memblock_init(void)
 
 	early_init_fdt_scan_reserved_mem();
 
-	if (IS_ENABLED(CONFIG_ZONE_DMA))
-		arm64_dma_phys_limit = max_zone_phys(ARCH_ZONE_DMA_BITS);
+	if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+		zone_dma_bits = ARM64_ZONE_DMA_BITS;
+		arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
+	}
 
 	if (IS_ENABLED(CONFIG_ZONE_DMA32))
 		arm64_dma32_phys_limit = max_zone_phys(32);
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index c8bb14ff4713..f6c562acc3f8 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -329,13 +329,4 @@ struct vm_area_struct;
 #endif /* __ASSEMBLY__ */
 #include <asm/slice.h>
 
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index be941d382c8d..c95b7fe9f298 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/memremap.h>
+#include <linux/dma-direct.h>
 
 #include <asm/pgalloc.h>
 #include <asm/prom.h>
@@ -201,10 +202,10 @@ static int __init mark_nonram_nosave(void)
  * everything else. GFP_DMA32 page allocations automatically fall back to
  * ZONE_DMA.
  *
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code.  32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code.  32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
  */
 static unsigned long max_zone_pfns[MAX_NR_ZONES];
 
@@ -237,9 +238,18 @@ void __init paging_init(void)
 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
 	       (long int)((top_of_ram - total_ram) >> 20));
 
+	/*
+	 * Allow 30-bit DMA for very limited Broadcom wifi chips on many
+	 * powerbooks.
+	 */
+	if (IS_ENABLED(CONFIG_PPC32))
+		zone_dma_bits = 30;
+	else
+		zone_dma_bits = 31;
+
 #ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA]	= min(max_low_pfn,
-				      1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+				      1UL << (zone_dma_bits - PAGE_SHIFT));
 #endif
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#define ARCH_ZONE_DMA_BITS	31
-
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index a124f19f7b3c..f0ce22220565 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,6 +118,7 @@ void __init paging_init(void)
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
+	zone_dma_bits = 31;
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index adf993a3bd58..d03af3605460 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,6 +5,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mem_encrypt.h>
 
+extern unsigned int zone_dma_bits;
+
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
 #else
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 8402b29c280f..0b67c04e531b 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -16,12 +16,11 @@
 #include <linux/swiotlb.h>
 
 /*
- * Most architectures use ZONE_DMA for the first 16 Megabytes, but
- * some use it for entirely different regions:
+ * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
+ * it for entirely different regions. In that case the arch code needs to
+ * override the variable below for dma-direct to work properly.
  */
-#ifndef ARCH_ZONE_DMA_BITS
-#define ARCH_ZONE_DMA_BITS 24
-#endif
+unsigned int zone_dma_bits __ro_after_init = 24;
 
 static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
 {
@@ -69,7 +68,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
 	 * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
 	 * zones.
 	 */
-	if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+	if (*phys_mask <= DMA_BIT_MASK(zone_dma_bits))
 		return GFP_DMA;
 	if (*phys_mask <= DMA_BIT_MASK(32))
 		return GFP_DMA32;
@@ -395,7 +394,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
 	u64 min_mask;
 
 	if (IS_ENABLED(CONFIG_ZONE_DMA))
-		min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS);
+		min_mask = DMA_BIT_MASK(zone_dma_bits);
 	else
 		min_mask = DMA_BIT_MASK(32);
 
-- 
cgit v1.2.3


From 9ef8567ccf2eb00473b1280d0911caf3f413dc67 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Wed, 30 Oct 2019 11:46:17 +0800
Subject: arm64: perf: Simplify the ARMv8 PMUv3 event attributes

For each PMU event, there is a ARMV8_EVENT_ATTR(xx, XX) and
&armv8_event_attr_xx.attr.attr. Let's redefine the ARMV8_EVENT_ATTR
to simplify the armv8_pmuv3_event_attrs.

Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
[will: Dropped unnecessary array syntax]
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_event.c | 191 ++++++++++++++---------------------------
 1 file changed, 66 insertions(+), 125 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a0b4f1bca491..e40b65645c86 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -158,133 +158,74 @@ armv8pmu_events_sysfs_show(struct device *dev,
 	return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
 }
 
-#define ARMV8_EVENT_ATTR(name, config) \
-	PMU_EVENT_ATTR(name, armv8_event_attr_##name, \
-		       config, armv8pmu_events_sysfs_show)
-
-ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR);
-ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE);
-ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL);
-ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED);
-ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED);
-ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED);
-ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN);
-ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN);
-ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED);
-ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED);
-ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED);
-ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES);
-ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED);
-ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS);
-ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE);
-ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE);
-ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB);
-ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
-ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR);
-ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC);
-ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES);
-/* Don't expose the chain event in /sys, since it's useless in isolation */
-ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED);
-ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND);
-ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND);
-ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB);
-ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB);
-ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE);
-ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE);
-ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
-ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
-ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS);
-ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE);
-ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS);
-ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK);
-ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK);
-ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD);
-ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD);
-ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD);
-ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP);
-ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED);
-ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE);
-ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION);
+#define ARMV8_EVENT_ATTR(name, config)						\
+	(&((struct perf_pmu_events_attr) {					\
+		.attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL),	\
+		.id = config,							\
+	}).attr.attr)
 
 static struct attribute *armv8_pmuv3_event_attrs[] = {
-	&armv8_event_attr_sw_incr.attr.attr,
-	&armv8_event_attr_l1i_cache_refill.attr.attr,
-	&armv8_event_attr_l1i_tlb_refill.attr.attr,
-	&armv8_event_attr_l1d_cache_refill.attr.attr,
-	&armv8_event_attr_l1d_cache.attr.attr,
-	&armv8_event_attr_l1d_tlb_refill.attr.attr,
-	&armv8_event_attr_ld_retired.attr.attr,
-	&armv8_event_attr_st_retired.attr.attr,
-	&armv8_event_attr_inst_retired.attr.attr,
-	&armv8_event_attr_exc_taken.attr.attr,
-	&armv8_event_attr_exc_return.attr.attr,
-	&armv8_event_attr_cid_write_retired.attr.attr,
-	&armv8_event_attr_pc_write_retired.attr.attr,
-	&armv8_event_attr_br_immed_retired.attr.attr,
-	&armv8_event_attr_br_return_retired.attr.attr,
-	&armv8_event_attr_unaligned_ldst_retired.attr.attr,
-	&armv8_event_attr_br_mis_pred.attr.attr,
-	&armv8_event_attr_cpu_cycles.attr.attr,
-	&armv8_event_attr_br_pred.attr.attr,
-	&armv8_event_attr_mem_access.attr.attr,
-	&armv8_event_attr_l1i_cache.attr.attr,
-	&armv8_event_attr_l1d_cache_wb.attr.attr,
-	&armv8_event_attr_l2d_cache.attr.attr,
-	&armv8_event_attr_l2d_cache_refill.attr.attr,
-	&armv8_event_attr_l2d_cache_wb.attr.attr,
-	&armv8_event_attr_bus_access.attr.attr,
-	&armv8_event_attr_memory_error.attr.attr,
-	&armv8_event_attr_inst_spec.attr.attr,
-	&armv8_event_attr_ttbr_write_retired.attr.attr,
-	&armv8_event_attr_bus_cycles.attr.attr,
-	&armv8_event_attr_l1d_cache_allocate.attr.attr,
-	&armv8_event_attr_l2d_cache_allocate.attr.attr,
-	&armv8_event_attr_br_retired.attr.attr,
-	&armv8_event_attr_br_mis_pred_retired.attr.attr,
-	&armv8_event_attr_stall_frontend.attr.attr,
-	&armv8_event_attr_stall_backend.attr.attr,
-	&armv8_event_attr_l1d_tlb.attr.attr,
-	&armv8_event_attr_l1i_tlb.attr.attr,
-	&armv8_event_attr_l2i_cache.attr.attr,
-	&armv8_event_attr_l2i_cache_refill.attr.attr,
-	&armv8_event_attr_l3d_cache_allocate.attr.attr,
-	&armv8_event_attr_l3d_cache_refill.attr.attr,
-	&armv8_event_attr_l3d_cache.attr.attr,
-	&armv8_event_attr_l3d_cache_wb.attr.attr,
-	&armv8_event_attr_l2d_tlb_refill.attr.attr,
-	&armv8_event_attr_l2i_tlb_refill.attr.attr,
-	&armv8_event_attr_l2d_tlb.attr.attr,
-	&armv8_event_attr_l2i_tlb.attr.attr,
-	&armv8_event_attr_remote_access.attr.attr,
-	&armv8_event_attr_ll_cache.attr.attr,
-	&armv8_event_attr_ll_cache_miss.attr.attr,
-	&armv8_event_attr_dtlb_walk.attr.attr,
-	&armv8_event_attr_itlb_walk.attr.attr,
-	&armv8_event_attr_ll_cache_rd.attr.attr,
-	&armv8_event_attr_ll_cache_miss_rd.attr.attr,
-	&armv8_event_attr_remote_access_rd.attr.attr,
-	&armv8_event_attr_sample_pop.attr.attr,
-	&armv8_event_attr_sample_feed.attr.attr,
-	&armv8_event_attr_sample_filtrate.attr.attr,
-	&armv8_event_attr_sample_collision.attr.attr,
+	ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+	ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
+	ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE),
+	ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL),
+	ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED),
+	ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED),
+	ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED),
+	ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN),
+	ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN),
+	ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED),
+	ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED),
+	ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED),
+	ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED),
+	ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED),
+	ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED),
+	ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES),
+	ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED),
+	ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS),
+	ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE),
+	ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB),
+	ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE),
+	ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB),
+	ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS),
+	ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR),
+	ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC),
+	ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED),
+	ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES),
+	/* Don't expose the chain event in /sys, since it's useless in isolation */
+	ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE),
+	ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE),
+	ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED),
+	ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED),
+	ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND),
+	ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND),
+	ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB),
+	ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB),
+	ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE),
+	ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE),
+	ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE),
+	ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB),
+	ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL),
+	ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL),
+	ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB),
+	ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB),
+	ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS),
+	ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE),
+	ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS),
+	ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK),
+	ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK),
+	ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
+	ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
+	ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+	ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
+	ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
+	ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
+	ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
 	NULL,
 };
 
-- 
cgit v1.2.3


From dc6b025de95bcd22ff37c4fabb022ec8a027abf1 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 28 Oct 2019 09:37:19 -0700
Subject: x86/mce: Add Xeon Icelake to list of CPUs that support PPIN

New CPU model, same MSRs to control and read the inventory number.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20191028163719.19708-1-tony.luck@intel.com
---
 arch/x86/kernel/cpu/mce/intel.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 68a1d25c971e..e270d0770134 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -484,6 +484,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
 	case INTEL_FAM6_BROADWELL_D:
 	case INTEL_FAM6_BROADWELL_X:
 	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_ICELAKE_X:
 	case INTEL_FAM6_XEON_PHI_KNL:
 	case INTEL_FAM6_XEON_PHI_KNM:
 
-- 
cgit v1.2.3


From 71e2f4dd5a65bd8dbca0b77661e75eea471168f8 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Sun, 20 Oct 2019 22:43:14 +0800
Subject: MIPS: Fork loongson2ef from loongson64

As later model of GSx64 family processors including 2-series-soc have
similar design with initial loongson3a while loongson2e/f seems less
identical, we separate loongson2e/f support code out of mach-loongson64
to make our life easier.

This patch contains mostly file moving works.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
[paulburton@kernel.org: Squash in the MAINTAINERS updates]
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: paul.burton@mips.com
---
 MAINTAINERS                                        |  10 +-
 arch/mips/Kbuild.platforms                         |   1 +
 arch/mips/Kconfig                                  |  19 +-
 arch/mips/configs/fuloong2e_defconfig              |   2 +-
 arch/mips/configs/lemote2f_defconfig               |   2 +-
 .../mips/include/asm/mach-loongson2ef/boot_param.h | 221 +++++++++++++
 .../asm/mach-loongson2ef/cpu-feature-overrides.h   |  53 +++
 .../include/asm/mach-loongson2ef/cs5536/cs5536.h   | 306 ++++++++++++++++++
 .../asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h     |  36 +++
 .../asm/mach-loongson2ef/cs5536/cs5536_pci.h       | 153 +++++++++
 .../asm/mach-loongson2ef/cs5536/cs5536_vsm.h       |  32 ++
 arch/mips/include/asm/mach-loongson2ef/irq.h       |  44 +++
 .../asm/mach-loongson2ef/kernel-entry-init.h       |  90 ++++++
 arch/mips/include/asm/mach-loongson2ef/loongson.h  | 355 +++++++++++++++++++++
 .../include/asm/mach-loongson2ef/loongson_hwmon.h  |  56 ++++
 .../include/asm/mach-loongson2ef/loongson_regs.h   | 227 +++++++++++++
 arch/mips/include/asm/mach-loongson2ef/machine.h   |  29 ++
 .../include/asm/mach-loongson2ef/mc146818rtc.h     |  36 +++
 arch/mips/include/asm/mach-loongson2ef/mem.h       |  37 +++
 arch/mips/include/asm/mach-loongson2ef/mmzone.h    |  50 +++
 arch/mips/include/asm/mach-loongson2ef/pci.h       |  50 +++
 arch/mips/include/asm/mach-loongson2ef/spaces.h    |  10 +
 arch/mips/include/asm/mach-loongson2ef/topology.h  |  23 ++
 .../include/asm/mach-loongson2ef/workarounds.h     |   8 +
 arch/mips/loongson2ef/Kconfig                      |  96 ++++++
 arch/mips/loongson2ef/Makefile                     |  18 ++
 arch/mips/loongson2ef/Platform                     |  32 ++
 arch/mips/loongson2ef/common/Makefile              |  27 ++
 arch/mips/loongson2ef/common/bonito-irq.c          |  49 +++
 arch/mips/loongson2ef/common/cmdline.c             |  44 +++
 arch/mips/loongson2ef/common/cs5536/Makefile       |  12 +
 arch/mips/loongson2ef/common/cs5536/cs5536_acc.c   | 136 ++++++++
 arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c  | 156 +++++++++
 arch/mips/loongson2ef/common/cs5536/cs5536_ide.c   | 188 +++++++++++
 arch/mips/loongson2ef/common/cs5536/cs5536_isa.c   | 326 +++++++++++++++++++
 arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c | 207 ++++++++++++
 arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c  | 145 +++++++++
 arch/mips/loongson2ef/common/cs5536/cs5536_pci.c   |  84 +++++
 arch/mips/loongson2ef/common/early_printk.c        |  38 +++
 arch/mips/loongson2ef/common/env.c                 | 212 ++++++++++++
 arch/mips/loongson2ef/common/init.c                |  56 ++++
 arch/mips/loongson2ef/common/irq.c                 |  63 ++++
 arch/mips/loongson2ef/common/machtype.c            |  63 ++++
 arch/mips/loongson2ef/common/mem.c                 | 157 +++++++++
 arch/mips/loongson2ef/common/pci.c                 |  97 ++++++
 arch/mips/loongson2ef/common/platform.c            |  27 ++
 arch/mips/loongson2ef/common/pm.c                  | 157 +++++++++
 arch/mips/loongson2ef/common/reset.c               |  94 ++++++
 arch/mips/loongson2ef/common/rtc.c                 |  39 +++
 arch/mips/loongson2ef/common/serial.c              | 117 +++++++
 arch/mips/loongson2ef/common/setup.c               |  51 +++
 arch/mips/loongson2ef/common/time.c                |  32 ++
 arch/mips/loongson2ef/common/uart_base.c           |  46 +++
 arch/mips/loongson2ef/fuloong-2e/Makefile          |   6 +
 arch/mips/loongson2ef/fuloong-2e/dma.c             |  12 +
 arch/mips/loongson2ef/fuloong-2e/irq.c             |  65 ++++
 arch/mips/loongson2ef/fuloong-2e/reset.c           |  19 ++
 arch/mips/loongson2ef/lemote-2f/Makefile           |  12 +
 arch/mips/loongson2ef/lemote-2f/clock.c            | 143 +++++++++
 arch/mips/loongson2ef/lemote-2f/dma.c              |  14 +
 arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c       | 125 ++++++++
 arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h       | 184 +++++++++++
 arch/mips/loongson2ef/lemote-2f/irq.c              | 126 ++++++++
 arch/mips/loongson2ef/lemote-2f/machtype.c         |  41 +++
 arch/mips/loongson2ef/lemote-2f/pm.c               | 145 +++++++++
 arch/mips/loongson2ef/lemote-2f/reset.c            | 155 +++++++++
 arch/mips/loongson64/Kconfig                       |  75 -----
 arch/mips/loongson64/Makefile                      |  12 -
 arch/mips/loongson64/Platform                      |  21 --
 arch/mips/loongson64/common/Makefile               |   6 -
 arch/mips/loongson64/common/cs5536/Makefile        |  12 -
 arch/mips/loongson64/common/cs5536/cs5536_acc.c    | 136 --------
 arch/mips/loongson64/common/cs5536/cs5536_ehci.c   | 156 ---------
 arch/mips/loongson64/common/cs5536/cs5536_ide.c    | 188 -----------
 arch/mips/loongson64/common/cs5536/cs5536_isa.c    | 326 -------------------
 arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c  | 207 ------------
 arch/mips/loongson64/common/cs5536/cs5536_ohci.c   | 145 ---------
 arch/mips/loongson64/common/cs5536/cs5536_pci.c    |  84 -----
 arch/mips/loongson64/fuloong-2e/Makefile           |   6 -
 arch/mips/loongson64/fuloong-2e/dma.c              |  12 -
 arch/mips/loongson64/fuloong-2e/irq.c              |  65 ----
 arch/mips/loongson64/fuloong-2e/reset.c            |  19 --
 arch/mips/loongson64/lemote-2f/Makefile            |  12 -
 arch/mips/loongson64/lemote-2f/clock.c             | 143 ---------
 arch/mips/loongson64/lemote-2f/dma.c               |  14 -
 arch/mips/loongson64/lemote-2f/ec_kb3310b.c        | 125 --------
 arch/mips/loongson64/lemote-2f/ec_kb3310b.h        | 184 -----------
 arch/mips/loongson64/lemote-2f/irq.c               | 126 --------
 arch/mips/loongson64/lemote-2f/machtype.c          |  41 ---
 arch/mips/loongson64/lemote-2f/pm.c                | 145 ---------
 arch/mips/loongson64/lemote-2f/reset.c             | 155 ---------
 drivers/cpufreq/loongson2_cpufreq.c                |   2 +-
 92 files changed, 5651 insertions(+), 2432 deletions(-)
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/boot_param.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/irq.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/kernel-entry-init.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/loongson.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/loongson_hwmon.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/loongson_regs.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/machine.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/mem.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/mmzone.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/pci.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/spaces.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/topology.h
 create mode 100644 arch/mips/include/asm/mach-loongson2ef/workarounds.h
 create mode 100644 arch/mips/loongson2ef/Kconfig
 create mode 100644 arch/mips/loongson2ef/Makefile
 create mode 100644 arch/mips/loongson2ef/Platform
 create mode 100644 arch/mips/loongson2ef/common/Makefile
 create mode 100644 arch/mips/loongson2ef/common/bonito-irq.c
 create mode 100644 arch/mips/loongson2ef/common/cmdline.c
 create mode 100644 arch/mips/loongson2ef/common/cs5536/Makefile
 create mode 100644 arch/mips/loongson2ef/common/cs5536/cs5536_acc.c
 create mode 100644 arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c
 create mode 100644 arch/mips/loongson2ef/common/cs5536/cs5536_ide.c
 create mode 100644 arch/mips/loongson2ef/common/cs5536/cs5536_isa.c
 create mode 100644 arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c
 create mode 100644 arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c
 create mode 100644 arch/mips/loongson2ef/common/cs5536/cs5536_pci.c
 create mode 100644 arch/mips/loongson2ef/common/early_printk.c
 create mode 100644 arch/mips/loongson2ef/common/env.c
 create mode 100644 arch/mips/loongson2ef/common/init.c
 create mode 100644 arch/mips/loongson2ef/common/irq.c
 create mode 100644 arch/mips/loongson2ef/common/machtype.c
 create mode 100644 arch/mips/loongson2ef/common/mem.c
 create mode 100644 arch/mips/loongson2ef/common/pci.c
 create mode 100644 arch/mips/loongson2ef/common/platform.c
 create mode 100644 arch/mips/loongson2ef/common/pm.c
 create mode 100644 arch/mips/loongson2ef/common/reset.c
 create mode 100644 arch/mips/loongson2ef/common/rtc.c
 create mode 100644 arch/mips/loongson2ef/common/serial.c
 create mode 100644 arch/mips/loongson2ef/common/setup.c
 create mode 100644 arch/mips/loongson2ef/common/time.c
 create mode 100644 arch/mips/loongson2ef/common/uart_base.c
 create mode 100644 arch/mips/loongson2ef/fuloong-2e/Makefile
 create mode 100644 arch/mips/loongson2ef/fuloong-2e/dma.c
 create mode 100644 arch/mips/loongson2ef/fuloong-2e/irq.c
 create mode 100644 arch/mips/loongson2ef/fuloong-2e/reset.c
 create mode 100644 arch/mips/loongson2ef/lemote-2f/Makefile
 create mode 100644 arch/mips/loongson2ef/lemote-2f/clock.c
 create mode 100644 arch/mips/loongson2ef/lemote-2f/dma.c
 create mode 100644 arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c
 create mode 100644 arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h
 create mode 100644 arch/mips/loongson2ef/lemote-2f/irq.c
 create mode 100644 arch/mips/loongson2ef/lemote-2f/machtype.c
 create mode 100644 arch/mips/loongson2ef/lemote-2f/pm.c
 create mode 100644 arch/mips/loongson2ef/lemote-2f/reset.c
 delete mode 100644 arch/mips/loongson64/common/cs5536/Makefile
 delete mode 100644 arch/mips/loongson64/common/cs5536/cs5536_acc.c
 delete mode 100644 arch/mips/loongson64/common/cs5536/cs5536_ehci.c
 delete mode 100644 arch/mips/loongson64/common/cs5536/cs5536_ide.c
 delete mode 100644 arch/mips/loongson64/common/cs5536/cs5536_isa.c
 delete mode 100644 arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
 delete mode 100644 arch/mips/loongson64/common/cs5536/cs5536_ohci.c
 delete mode 100644 arch/mips/loongson64/common/cs5536/cs5536_pci.c
 delete mode 100644 arch/mips/loongson64/fuloong-2e/Makefile
 delete mode 100644 arch/mips/loongson64/fuloong-2e/dma.c
 delete mode 100644 arch/mips/loongson64/fuloong-2e/irq.c
 delete mode 100644 arch/mips/loongson64/fuloong-2e/reset.c
 delete mode 100644 arch/mips/loongson64/lemote-2f/Makefile
 delete mode 100644 arch/mips/loongson64/lemote-2f/clock.c
 delete mode 100644 arch/mips/loongson64/lemote-2f/dma.c
 delete mode 100644 arch/mips/loongson64/lemote-2f/ec_kb3310b.c
 delete mode 100644 arch/mips/loongson64/lemote-2f/ec_kb3310b.h
 delete mode 100644 arch/mips/loongson64/lemote-2f/irq.c
 delete mode 100644 arch/mips/loongson64/lemote-2f/machtype.c
 delete mode 100644 arch/mips/loongson64/lemote-2f/pm.c
 delete mode 100644 arch/mips/loongson64/lemote-2f/reset.c

(limited to 'arch')

diff --git a/MAINTAINERS b/MAINTAINERS
index 55199ef7fa74..6bd0df79d832 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10871,18 +10871,18 @@ F:	arch/mips/include/asm/mach-loongson32/
 F:	drivers/*/*loongson1*
 F:	drivers/*/*/*loongson1*
 
-MIPS/LOONGSON2 ARCHITECTURE
+MIPS/LOONGSON2EF ARCHITECTURE
 M:	Jiaxun Yang <jiaxun.yang@flygoat.com>
 L:	linux-mips@vger.kernel.org
 S:	Maintained
-F:	arch/mips/loongson64/fuloong-2e/
-F:	arch/mips/loongson64/lemote-2f/
-F:	arch/mips/include/asm/mach-loongson64/
+F:	arch/mips/loongson2ef/
+F:	arch/mips/include/asm/mach-loongson2ef/
 F:	drivers/*/*loongson2*
 F:	drivers/*/*/*loongson2*
 
-MIPS/LOONGSON3 ARCHITECTURE
+MIPS/LOONGSON64 ARCHITECTURE
 M:	Huacai Chen <chenhc@lemote.com>
+M:	Jiaxun Yang <jiaxun.yang@flygoat.com>
 L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/loongson64/
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 0de839882106..7c0d461483ef 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -17,6 +17,7 @@ platforms += jazz
 platforms += jz4740
 platforms += lantiq
 platforms += lasat
+platforms += loongson2ef
 platforms += loongson32
 platforms += loongson64
 platforms += mti-malta
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a4e8c75bc086..aa6f8497ddd9 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -453,18 +453,18 @@ config MACH_LOONGSON32
 	  the Institute of Computing Technology (ICT), Chinese Academy of
 	  Sciences (CAS).
 
-config MACH_LOONGSON64
-	bool "Loongson-2/3 family of machines"
+config MACH_LOONGSON2EF
+	bool "Loongson-2E/F family of machines"
 	select SYS_SUPPORTS_ZBOOT
 	help
-	  This enables the support of Loongson-2/3 family of machines.
+	  This enables the support of early Loongson-2E/F family of machines.
 
-	  Loongson-2 is a family of single-core CPUs and Loongson-3 is a
-	  family of multi-core CPUs. They are both 64-bit general-purpose
-	  MIPS-compatible CPUs. Loongson-2/3 are developed by the Institute
-	  of Computing Technology (ICT), Chinese Academy of Sciences (CAS)
-	  in the People's Republic of China. The chief architect is Professor
-	  Weiwu Hu.
+config MACH_LOONGSON64
+	bool "Loongson-2/3 GSx64 family of machines"
+	select SYS_SUPPORTS_ZBOOT
+	help
+	  This enables the support of Loongson-2/3 family of processors with
+	  GSx64 microarchitecture.
 
 config MACH_PISTACHIO
 	bool "IMG Pistachio SoC based boards"
@@ -1037,6 +1037,7 @@ source "arch/mips/sibyte/Kconfig"
 source "arch/mips/txx9/Kconfig"
 source "arch/mips/vr41xx/Kconfig"
 source "arch/mips/cavium-octeon/Kconfig"
+source "arch/mips/loongson2ef/Kconfig"
 source "arch/mips/loongson32/Kconfig"
 source "arch/mips/loongson64/Kconfig"
 source "arch/mips/netlogic/Kconfig"
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 7a7af706e898..1788ae23bff9 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -15,7 +15,7 @@ CONFIG_EXPERT=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
-CONFIG_MACH_LOONGSON64=y
+CONFIG_MACH_LOONGSON2EF=y
 CONFIG_PCI=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index d44f1469cf64..f9f93427c9bd 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -12,7 +12,7 @@ CONFIG_LOG_BUF_SHIFT=15
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 CONFIG_PROFILING=y
-CONFIG_MACH_LOONGSON64=y
+CONFIG_MACH_LOONGSON2EF=y
 CONFIG_LEMOTE_MACH2F=y
 CONFIG_KEXEC=y
 # CONFIG_SECCOMP is not set
diff --git a/arch/mips/include/asm/mach-loongson2ef/boot_param.h b/arch/mips/include/asm/mach-loongson2ef/boot_param.h
new file mode 100644
index 000000000000..8c286bedff3e
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/boot_param.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MACH_LOONGSON64_BOOT_PARAM_H_
+#define __ASM_MACH_LOONGSON64_BOOT_PARAM_H_
+
+#define SYSTEM_RAM_LOW		1
+#define SYSTEM_RAM_HIGH		2
+#define SYSTEM_RAM_RESERVED	3
+#define PCI_IO			4
+#define PCI_MEM			5
+#define LOONGSON_CFG_REG	6
+#define VIDEO_ROM		7
+#define ADAPTER_ROM		8
+#define ACPI_TABLE		9
+#define SMBIOS_TABLE		10
+#define MAX_MEMORY_TYPE		11
+
+#define LOONGSON3_BOOT_MEM_MAP_MAX 128
+struct efi_memory_map_loongson {
+	u16 vers;	/* version of efi_memory_map */
+	u32 nr_map;	/* number of memory_maps */
+	u32 mem_freq;	/* memory frequence */
+	struct mem_map {
+		u32 node_id;	/* node_id which memory attached to */
+		u32 mem_type;	/* system memory, pci memory, pci io, etc. */
+		u64 mem_start;	/* memory map start address */
+		u32 mem_size;	/* each memory_map size, not the total size */
+	} map[LOONGSON3_BOOT_MEM_MAP_MAX];
+} __packed;
+
+enum loongson_cpu_type {
+	Legacy_2E = 0x0,
+	Legacy_2F = 0x1,
+	Legacy_3A = 0x2,
+	Legacy_3B = 0x3,
+	Legacy_1A = 0x4,
+	Legacy_1B = 0x5,
+	Legacy_2G = 0x6,
+	Legacy_2H = 0x7,
+	Loongson_1A = 0x100,
+	Loongson_1B = 0x101,
+	Loongson_2E = 0x200,
+	Loongson_2F = 0x201,
+	Loongson_2G = 0x202,
+	Loongson_2H = 0x203,
+	Loongson_3A = 0x300,
+	Loongson_3B = 0x301
+};
+
+/*
+ * Capability and feature descriptor structure for MIPS CPU
+ */
+struct efi_cpuinfo_loongson {
+	u16 vers;     /* version of efi_cpuinfo_loongson */
+	u32 processor_id; /* PRID, e.g. 6305, 6306 */
+	u32 cputype;  /* Loongson_3A/3B, etc. */
+	u32 total_node;   /* num of total numa nodes */
+	u16 cpu_startup_core_id; /* Boot core id */
+	u16 reserved_cores_mask;
+	u32 cpu_clock_freq; /* cpu_clock */
+	u32 nr_cpus;
+} __packed;
+
+#define MAX_UARTS 64
+struct uart_device {
+	u32 iotype; /* see include/linux/serial_core.h */
+	u32 uartclk;
+	u32 int_offset;
+	u64 uart_base;
+} __packed;
+
+#define MAX_SENSORS 64
+#define SENSOR_TEMPER  0x00000001
+#define SENSOR_VOLTAGE 0x00000002
+#define SENSOR_FAN     0x00000004
+struct sensor_device {
+	char name[32];  /* a formal name */
+	char label[64]; /* a flexible description */
+	u32 type;       /* SENSOR_* */
+	u32 id;         /* instance id of a sensor-class */
+	u32 fan_policy; /* see loongson_hwmon.h */
+	u32 fan_percent;/* only for constant speed policy */
+	u64 base_addr;  /* base address of device registers */
+} __packed;
+
+struct system_loongson {
+	u16 vers;     /* version of system_loongson */
+	u32 ccnuma_smp; /* 0: no numa; 1: has numa */
+	u32 sing_double_channel; /* 1:single; 2:double */
+	u32 nr_uarts;
+	struct uart_device uarts[MAX_UARTS];
+	u32 nr_sensors;
+	struct sensor_device sensors[MAX_SENSORS];
+	char has_ec;
+	char ec_name[32];
+	u64 ec_base_addr;
+	char has_tcm;
+	char tcm_name[32];
+	u64 tcm_base_addr;
+	u64 workarounds; /* see workarounds.h */
+} __packed;
+
+struct irq_source_routing_table {
+	u16 vers;
+	u16 size;
+	u16 rtr_bus;
+	u16 rtr_devfn;
+	u32 vendor;
+	u32 device;
+	u32 PIC_type;   /* conform use HT or PCI to route to CPU-PIC */
+	u64 ht_int_bit; /* 3A: 1<<24; 3B: 1<<16 */
+	u64 ht_enable;  /* irqs used in this PIC */
+	u32 node_id;    /* node id: 0x0-0; 0x1-1; 0x10-2; 0x11-3 */
+	u64 pci_mem_start_addr;
+	u64 pci_mem_end_addr;
+	u64 pci_io_start_addr;
+	u64 pci_io_end_addr;
+	u64 pci_config_addr;
+	u32 dma_mask_bits;
+} __packed;
+
+struct interface_info {
+	u16 vers; /* version of the specificition */
+	u16 size;
+	u8  flag;
+	char description[64];
+} __packed;
+
+#define MAX_RESOURCE_NUMBER 128
+struct resource_loongson {
+	u64 start; /* resource start address */
+	u64 end;   /* resource end address */
+	char name[64];
+	u32 flags;
+};
+
+struct archdev_data {};  /* arch specific additions */
+
+struct board_devices {
+	char name[64];    /* hold the device name */
+	u32 num_resources; /* number of device_resource */
+	/* for each device's resource */
+	struct resource_loongson resource[MAX_RESOURCE_NUMBER];
+	/* arch specific additions */
+	struct archdev_data archdata;
+};
+
+struct loongson_special_attribute {
+	u16 vers;     /* version of this special */
+	char special_name[64]; /* special_atribute_name */
+	u32 loongson_special_type; /* type of special device */
+	/* for each device's resource */
+	struct resource_loongson resource[MAX_RESOURCE_NUMBER];
+};
+
+struct loongson_params {
+	u64 memory_offset;	/* efi_memory_map_loongson struct offset */
+	u64 cpu_offset;		/* efi_cpuinfo_loongson struct offset */
+	u64 system_offset;	/* system_loongson struct offset */
+	u64 irq_offset;		/* irq_source_routing_table struct offset */
+	u64 interface_offset;	/* interface_info struct offset */
+	u64 special_offset;	/* loongson_special_attribute struct offset */
+	u64 boarddev_table_offset;  /* board_devices offset */
+};
+
+struct smbios_tables {
+	u16 vers;     /* version of smbios */
+	u64 vga_bios; /* vga_bios address */
+	struct loongson_params lp;
+};
+
+struct efi_reset_system_t {
+	u64 ResetCold;
+	u64 ResetWarm;
+	u64 ResetType;
+	u64 Shutdown;
+	u64 DoSuspend; /* NULL if not support */
+};
+
+struct efi_loongson {
+	u64 mps;	/* MPS table */
+	u64 acpi;	/* ACPI table (IA64 ext 0.71) */
+	u64 acpi20;	/* ACPI table (ACPI 2.0) */
+	struct smbios_tables smbios;	/* SM BIOS table */
+	u64 sal_systab;	/* SAL system table */
+	u64 boot_info;	/* boot info table */
+};
+
+struct boot_params {
+	struct efi_loongson efi;
+	struct efi_reset_system_t reset_system;
+};
+
+struct loongson_system_configuration {
+	u32 nr_cpus;
+	u32 nr_nodes;
+	int cores_per_node;
+	int cores_per_package;
+	u16 boot_cpu_id;
+	u16 reserved_cpus_mask;
+	enum loongson_cpu_type cputype;
+	u64 ht_control_base;
+	u64 pci_mem_start_addr;
+	u64 pci_mem_end_addr;
+	u64 pci_io_base;
+	u64 restart_addr;
+	u64 poweroff_addr;
+	u64 suspend_addr;
+	u64 vgabios_addr;
+	u32 dma_mask_bits;
+	char ecname[32];
+	u32 nr_uarts;
+	struct uart_device uarts[MAX_UARTS];
+	u32 nr_sensors;
+	struct sensor_device sensors[MAX_SENSORS];
+	u64 workarounds;
+};
+
+extern struct efi_memory_map_loongson *loongson_memmap;
+extern struct loongson_system_configuration loongson_sysconf;
+
+#endif
diff --git a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
new file mode 100644
index 000000000000..83ad90d8005d
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
@@ -0,0 +1,53 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2009 Wu Zhangjin <wuzhangjin@gmail.com>
+ * Copyright (C) 2009 Philippe Vachon <philippe@cowpig.ca>
+ * Copyright (C) 2009 Zhang Le <r0bertz@gentoo.org>
+ *
+ * reference: /proc/cpuinfo,
+ *	arch/mips/kernel/cpu-probe.c(cpu_probe_legacy),
+ *	arch/mips/kernel/proc.c(show_cpuinfo),
+ *	loongson2f user manual.
+ */
+
+#ifndef __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_32fpr		1
+#define cpu_has_3k_cache	0
+#define cpu_has_4k_cache	1
+#define cpu_has_4kex		1
+#define cpu_has_64bits		1
+#define cpu_has_cache_cdex_p	0
+#define cpu_has_cache_cdex_s	0
+#define cpu_has_counter		1
+#define cpu_has_dc_aliases	(PAGE_SIZE < 0x4000)
+#define cpu_has_divec		0
+#define cpu_has_ejtag		0
+#define cpu_has_inclusive_pcaches	1
+#define cpu_has_llsc		1
+#define cpu_has_mcheck		0
+#define cpu_has_mdmx		0
+#define cpu_has_mips16		0
+#define cpu_has_mips16e2	0
+#define cpu_has_mips3d		0
+#define cpu_has_mipsmt		0
+#define cpu_has_smartmips	0
+#define cpu_has_tlb		1
+#define cpu_has_tx39_cache	0
+#define cpu_has_vce		0
+#define cpu_has_veic		0
+#define cpu_has_vint		0
+#define cpu_has_vtag_icache	0
+#define cpu_has_watch		1
+
+#ifdef CONFIG_CPU_LOONGSON64
+#define cpu_has_wsbh		1
+#define cpu_has_ic_fills_f_dc	1
+#define cpu_hwrena_impl_bits	0xc0000000
+#endif
+
+#endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h
new file mode 100644
index 000000000000..9795b3361532
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The header file of cs5536 south bridge.
+ *
+ * Copyright (C) 2007 Lemote, Inc.
+ * Author : jlliu <liujl@lemote.com>
+ */
+
+#ifndef _CS5536_H
+#define _CS5536_H
+
+#include <linux/types.h>
+
+extern void _rdmsr(u32 msr, u32 *hi, u32 *lo);
+extern void _wrmsr(u32 msr, u32 hi, u32 lo);
+
+/*
+ * MSR module base
+ */
+#define CS5536_SB_MSR_BASE	(0x00000000)
+#define CS5536_GLIU_MSR_BASE	(0x10000000)
+#define CS5536_ILLEGAL_MSR_BASE (0x20000000)
+#define CS5536_USB_MSR_BASE	(0x40000000)
+#define CS5536_IDE_MSR_BASE	(0x60000000)
+#define CS5536_DIVIL_MSR_BASE	(0x80000000)
+#define CS5536_ACC_MSR_BASE	(0xa0000000)
+#define CS5536_UNUSED_MSR_BASE	(0xc0000000)
+#define CS5536_GLCP_MSR_BASE	(0xe0000000)
+
+#define SB_MSR_REG(offset)	(CS5536_SB_MSR_BASE	| (offset))
+#define GLIU_MSR_REG(offset)	(CS5536_GLIU_MSR_BASE	| (offset))
+#define ILLEGAL_MSR_REG(offset) (CS5536_ILLEGAL_MSR_BASE | (offset))
+#define USB_MSR_REG(offset)	(CS5536_USB_MSR_BASE	| (offset))
+#define IDE_MSR_REG(offset)	(CS5536_IDE_MSR_BASE	| (offset))
+#define DIVIL_MSR_REG(offset)	(CS5536_DIVIL_MSR_BASE	| (offset))
+#define ACC_MSR_REG(offset)	(CS5536_ACC_MSR_BASE	| (offset))
+#define UNUSED_MSR_REG(offset)	(CS5536_UNUSED_MSR_BASE | (offset))
+#define GLCP_MSR_REG(offset)	(CS5536_GLCP_MSR_BASE	| (offset))
+
+/*
+ * BAR SPACE OF VIRTUAL PCI :
+ * range for pci probe use, length is the actual size.
+ */
+/* IO space for all DIVIL modules */
+#define CS5536_IRQ_RANGE	0xffffffe0 /* USERD FOR PCI PROBE */
+#define CS5536_IRQ_LENGTH	0x20	/* THE REGS ACTUAL LENGTH */
+#define CS5536_SMB_RANGE	0xfffffff8
+#define CS5536_SMB_LENGTH	0x08
+#define CS5536_GPIO_RANGE	0xffffff00
+#define CS5536_GPIO_LENGTH	0x100
+#define CS5536_MFGPT_RANGE	0xffffffc0
+#define CS5536_MFGPT_LENGTH	0x40
+#define CS5536_ACPI_RANGE	0xffffffe0
+#define CS5536_ACPI_LENGTH	0x20
+#define CS5536_PMS_RANGE	0xffffff80
+#define CS5536_PMS_LENGTH	0x80
+/* IO space for IDE */
+#define CS5536_IDE_RANGE	0xfffffff0
+#define CS5536_IDE_LENGTH	0x10
+/* IO space for ACC */
+#define CS5536_ACC_RANGE	0xffffff80
+#define CS5536_ACC_LENGTH	0x80
+/* MEM space for ALL USB modules */
+#define CS5536_OHCI_RANGE	0xfffff000
+#define CS5536_OHCI_LENGTH	0x1000
+#define CS5536_EHCI_RANGE	0xfffff000
+#define CS5536_EHCI_LENGTH	0x1000
+
+/*
+ * PCI MSR ACCESS
+ */
+#define PCI_MSR_CTRL		0xF0
+#define PCI_MSR_ADDR		0xF4
+#define PCI_MSR_DATA_LO		0xF8
+#define PCI_MSR_DATA_HI		0xFC
+
+/**************** MSR *****************************/
+
+/*
+ * GLIU STANDARD MSR
+ */
+#define GLIU_CAP		0x00
+#define GLIU_CONFIG		0x01
+#define GLIU_SMI		0x02
+#define GLIU_ERROR		0x03
+#define GLIU_PM			0x04
+#define GLIU_DIAG		0x05
+
+/*
+ * GLIU SPEC. MSR
+ */
+#define GLIU_P2D_BM0		0x20
+#define GLIU_P2D_BM1		0x21
+#define GLIU_P2D_BM2		0x22
+#define GLIU_P2D_BMK0		0x23
+#define GLIU_P2D_BMK1		0x24
+#define GLIU_P2D_BM3		0x25
+#define GLIU_P2D_BM4		0x26
+#define GLIU_COH		0x80
+#define GLIU_PAE		0x81
+#define GLIU_ARB		0x82
+#define GLIU_ASMI		0x83
+#define GLIU_AERR		0x84
+#define GLIU_DEBUG		0x85
+#define GLIU_PHY_CAP		0x86
+#define GLIU_NOUT_RESP		0x87
+#define GLIU_NOUT_WDATA		0x88
+#define GLIU_WHOAMI		0x8B
+#define GLIU_SLV_DIS		0x8C
+#define GLIU_IOD_BM0		0xE0
+#define GLIU_IOD_BM1		0xE1
+#define GLIU_IOD_BM2		0xE2
+#define GLIU_IOD_BM3		0xE3
+#define GLIU_IOD_BM4		0xE4
+#define GLIU_IOD_BM5		0xE5
+#define GLIU_IOD_BM6		0xE6
+#define GLIU_IOD_BM7		0xE7
+#define GLIU_IOD_BM8		0xE8
+#define GLIU_IOD_BM9		0xE9
+#define GLIU_IOD_SC0		0xEA
+#define GLIU_IOD_SC1		0xEB
+#define GLIU_IOD_SC2		0xEC
+#define GLIU_IOD_SC3		0xED
+#define GLIU_IOD_SC4		0xEE
+#define GLIU_IOD_SC5		0xEF
+#define GLIU_IOD_SC6		0xF0
+#define GLIU_IOD_SC7		0xF1
+
+/*
+ * SB STANDARD
+ */
+#define SB_CAP		0x00
+#define SB_CONFIG	0x01
+#define SB_SMI		0x02
+#define SB_ERROR	0x03
+#define SB_MAR_ERR_EN		0x00000001
+#define SB_TAR_ERR_EN		0x00000002
+#define SB_RSVD_BIT1		0x00000004
+#define SB_EXCEP_ERR_EN		0x00000008
+#define SB_SYSE_ERR_EN		0x00000010
+#define SB_PARE_ERR_EN		0x00000020
+#define SB_TAS_ERR_EN		0x00000040
+#define SB_MAR_ERR_FLAG		0x00010000
+#define SB_TAR_ERR_FLAG		0x00020000
+#define SB_RSVD_BIT2		0x00040000
+#define SB_EXCEP_ERR_FLAG	0x00080000
+#define SB_SYSE_ERR_FLAG	0x00100000
+#define SB_PARE_ERR_FLAG	0x00200000
+#define SB_TAS_ERR_FLAG		0x00400000
+#define SB_PM		0x04
+#define SB_DIAG		0x05
+
+/*
+ * SB SPEC.
+ */
+#define SB_CTRL		0x10
+#define SB_R0		0x20
+#define SB_R1		0x21
+#define SB_R2		0x22
+#define SB_R3		0x23
+#define SB_R4		0x24
+#define SB_R5		0x25
+#define SB_R6		0x26
+#define SB_R7		0x27
+#define SB_R8		0x28
+#define SB_R9		0x29
+#define SB_R10		0x2A
+#define SB_R11		0x2B
+#define SB_R12		0x2C
+#define SB_R13		0x2D
+#define SB_R14		0x2E
+#define SB_R15		0x2F
+
+/*
+ * GLCP STANDARD
+ */
+#define GLCP_CAP		0x00
+#define GLCP_CONFIG		0x01
+#define GLCP_SMI		0x02
+#define GLCP_ERROR		0x03
+#define GLCP_PM			0x04
+#define GLCP_DIAG		0x05
+
+/*
+ * GLCP SPEC.
+ */
+#define GLCP_CLK_DIS_DELAY	0x08
+#define GLCP_PM_CLK_DISABLE	0x09
+#define GLCP_GLB_PM		0x0B
+#define GLCP_DBG_OUT		0x0C
+#define GLCP_RSVD1		0x0D
+#define GLCP_SOFT_COM		0x0E
+#define SOFT_BAR_SMB_FLAG	0x00000001
+#define SOFT_BAR_GPIO_FLAG	0x00000002
+#define SOFT_BAR_MFGPT_FLAG	0x00000004
+#define SOFT_BAR_IRQ_FLAG	0x00000008
+#define SOFT_BAR_PMS_FLAG	0x00000010
+#define SOFT_BAR_ACPI_FLAG	0x00000020
+#define SOFT_BAR_IDE_FLAG	0x00000400
+#define SOFT_BAR_ACC_FLAG	0x00000800
+#define SOFT_BAR_OHCI_FLAG	0x00001000
+#define SOFT_BAR_EHCI_FLAG	0x00002000
+#define GLCP_RSVD2		0x0F
+#define GLCP_CLK_OFF		0x10
+#define GLCP_CLK_ACTIVE		0x11
+#define GLCP_CLK_DISABLE	0x12
+#define GLCP_CLK4ACK		0x13
+#define GLCP_SYS_RST		0x14
+#define GLCP_RSVD3		0x15
+#define GLCP_DBG_CLK_CTRL	0x16
+#define GLCP_CHIP_REV_ID	0x17
+
+/* PIC */
+#define PIC_YSEL_LOW		0x20
+#define PIC_YSEL_LOW_USB_SHIFT		8
+#define PIC_YSEL_LOW_ACC_SHIFT		16
+#define PIC_YSEL_LOW_FLASH_SHIFT	24
+#define PIC_YSEL_HIGH		0x21
+#define PIC_ZSEL_LOW		0x22
+#define PIC_ZSEL_HIGH		0x23
+#define PIC_IRQM_PRIM		0x24
+#define PIC_IRQM_LPC		0x25
+#define PIC_XIRR_STS_LOW	0x26
+#define PIC_XIRR_STS_HIGH	0x27
+#define PCI_SHDW		0x34
+
+/*
+ * DIVIL STANDARD
+ */
+#define DIVIL_CAP		0x00
+#define DIVIL_CONFIG		0x01
+#define DIVIL_SMI		0x02
+#define DIVIL_ERROR		0x03
+#define DIVIL_PM		0x04
+#define DIVIL_DIAG		0x05
+
+/*
+ * DIVIL SPEC.
+ */
+#define DIVIL_LBAR_IRQ		0x08
+#define DIVIL_LBAR_KEL		0x09
+#define DIVIL_LBAR_SMB		0x0B
+#define DIVIL_LBAR_GPIO		0x0C
+#define DIVIL_LBAR_MFGPT	0x0D
+#define DIVIL_LBAR_ACPI		0x0E
+#define DIVIL_LBAR_PMS		0x0F
+#define DIVIL_LEG_IO		0x14
+#define DIVIL_BALL_OPTS		0x15
+#define DIVIL_SOFT_IRQ		0x16
+#define DIVIL_SOFT_RESET	0x17
+
+/* MFGPT */
+#define MFGPT_IRQ	0x28
+
+/*
+ * IDE STANDARD
+ */
+#define IDE_CAP		0x00
+#define IDE_CONFIG	0x01
+#define IDE_SMI		0x02
+#define IDE_ERROR	0x03
+#define IDE_PM		0x04
+#define IDE_DIAG	0x05
+
+/*
+ * IDE SPEC.
+ */
+#define IDE_IO_BAR	0x08
+#define IDE_CFG		0x10
+#define IDE_DTC		0x12
+#define IDE_CAST	0x13
+#define IDE_ETC		0x14
+#define IDE_INTERNAL_PM 0x15
+
+/*
+ * ACC STANDARD
+ */
+#define ACC_CAP		0x00
+#define ACC_CONFIG	0x01
+#define ACC_SMI		0x02
+#define ACC_ERROR	0x03
+#define ACC_PM		0x04
+#define ACC_DIAG	0x05
+
+/*
+ * USB STANDARD
+ */
+#define USB_CAP		0x00
+#define USB_CONFIG	0x01
+#define USB_SMI		0x02
+#define USB_ERROR	0x03
+#define USB_PM		0x04
+#define USB_DIAG	0x05
+
+/*
+ * USB SPEC.
+ */
+#define USB_OHCI	0x08
+#define USB_EHCI	0x09
+
+/****************** NATIVE ***************************/
+/* GPIO : I/O SPACE; REG : 32BITS */
+#define GPIOL_OUT_VAL		0x00
+#define GPIOL_OUT_EN		0x04
+
+#endif				/* _CS5536_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h
new file mode 100644
index 000000000000..52e8bb0fc04d
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_mfgpt.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * cs5536 mfgpt header file
+ */
+
+#ifndef _CS5536_MFGPT_H
+#define _CS5536_MFGPT_H
+
+#include <cs5536/cs5536.h>
+#include <cs5536/cs5536_pci.h>
+
+#ifdef CONFIG_CS5536_MFGPT
+extern void setup_mfgpt0_timer(void);
+extern void disable_mfgpt0_counter(void);
+extern void enable_mfgpt0_counter(void);
+#else
+static inline void __maybe_unused setup_mfgpt0_timer(void)
+{
+}
+static inline void __maybe_unused disable_mfgpt0_counter(void)
+{
+}
+static inline void __maybe_unused enable_mfgpt0_counter(void)
+{
+}
+#endif
+
+#define MFGPT_TICK_RATE 14318000
+#define COMPARE	 ((MFGPT_TICK_RATE + HZ/2) / HZ)
+
+#define MFGPT_BASE	mfgpt_base
+#define MFGPT0_CMP2	(MFGPT_BASE + 2)
+#define MFGPT0_CNT	(MFGPT_BASE + 4)
+#define MFGPT0_SETUP	(MFGPT_BASE + 6)
+
+#endif /*!_CS5536_MFGPT_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
new file mode 100644
index 000000000000..a0d4b752899e
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * the definition file of cs5536 Virtual Support Module(VSM).
+ * pci configuration space can be accessed through the VSM, so
+ * there is no need of the MSR read/write now, except the spec.
+ * MSR registers which are not implemented yet.
+ *
+ * Copyright (C) 2007 Lemote Inc.
+ * Author : jlliu, liujl@lemote.com
+ */
+
+#ifndef _CS5536_PCI_H
+#define _CS5536_PCI_H
+
+#include <linux/types.h>
+#include <linux/pci_regs.h>
+
+extern void cs5536_pci_conf_write4(int function, int reg, u32 value);
+extern u32 cs5536_pci_conf_read4(int function, int reg);
+
+#define CS5536_ACC_INTR		9
+#define CS5536_IDE_INTR		14
+#define CS5536_USB_INTR		11
+#define CS5536_MFGPT_INTR	5
+#define CS5536_UART1_INTR	4
+#define CS5536_UART2_INTR	3
+
+/************** PCI BUS DEVICE FUNCTION ***************/
+
+/*
+ * PCI bus device function
+ */
+#define PCI_BUS_CS5536		0
+#define PCI_IDSEL_CS5536	14
+
+/********** STANDARD PCI-2.2 EXPANSION ****************/
+
+/*
+ * PCI configuration space
+ * we have to virtualize the PCI configure space head, so we should
+ * define the necessary IDs and some others.
+ */
+
+/* CONFIG of PCI VENDOR ID*/
+#define CFG_PCI_VENDOR_ID(mod_dev_id, sys_vendor_id) \
+	(((mod_dev_id) << 16) | (sys_vendor_id))
+
+/* VENDOR ID */
+#define CS5536_VENDOR_ID	0x1022
+
+/* DEVICE ID */
+#define CS5536_ISA_DEVICE_ID		0x2090
+#define CS5536_IDE_DEVICE_ID		0x209a
+#define CS5536_ACC_DEVICE_ID		0x2093
+#define CS5536_OHCI_DEVICE_ID		0x2094
+#define CS5536_EHCI_DEVICE_ID		0x2095
+
+/* CLASS CODE : CLASS SUB-CLASS INTERFACE */
+#define CS5536_ISA_CLASS_CODE		0x060100
+#define CS5536_IDE_CLASS_CODE		0x010180
+#define CS5536_ACC_CLASS_CODE		0x040100
+#define CS5536_OHCI_CLASS_CODE		0x0C0310
+#define CS5536_EHCI_CLASS_CODE		0x0C0320
+
+/* BHLC : BIST HEADER-TYPE LATENCY-TIMER CACHE-LINE-SIZE */
+
+#define CFG_PCI_CACHE_LINE_SIZE(header_type, latency_timer)	\
+	((PCI_NONE_BIST << 24) | ((header_type) << 16) \
+		| ((latency_timer) << 8) | PCI_NORMAL_CACHE_LINE_SIZE);
+
+#define PCI_NONE_BIST			0x00	/* RO not implemented yet. */
+#define PCI_BRIDGE_HEADER_TYPE		0x80	/* RO */
+#define PCI_NORMAL_HEADER_TYPE		0x00
+#define PCI_NORMAL_LATENCY_TIMER	0x00
+#define PCI_NORMAL_CACHE_LINE_SIZE	0x08	/* RW */
+
+/* BAR */
+#define PCI_BAR0_REG			0x10
+#define PCI_BAR1_REG			0x14
+#define PCI_BAR2_REG			0x18
+#define PCI_BAR3_REG			0x1c
+#define PCI_BAR4_REG			0x20
+#define PCI_BAR5_REG			0x24
+#define PCI_BAR_RANGE_MASK		0xFFFFFFFF
+
+/* CARDBUS CIS POINTER */
+#define PCI_CARDBUS_CIS_POINTER		0x00000000
+
+/* SUBSYSTEM VENDOR ID	*/
+#define CS5536_SUB_VENDOR_ID		CS5536_VENDOR_ID
+
+/* SUBSYSTEM ID */
+#define CS5536_ISA_SUB_ID		CS5536_ISA_DEVICE_ID
+#define CS5536_IDE_SUB_ID		CS5536_IDE_DEVICE_ID
+#define CS5536_ACC_SUB_ID		CS5536_ACC_DEVICE_ID
+#define CS5536_OHCI_SUB_ID		CS5536_OHCI_DEVICE_ID
+#define CS5536_EHCI_SUB_ID		CS5536_EHCI_DEVICE_ID
+
+/* EXPANSION ROM BAR */
+#define PCI_EXPANSION_ROM_BAR		0x00000000
+
+/* CAPABILITIES POINTER */
+#define PCI_CAPLIST_POINTER		0x00000000
+#define PCI_CAPLIST_USB_POINTER		0x40
+/* INTERRUPT */
+
+#define CFG_PCI_INTERRUPT_LINE(pin, mod_intr) \
+	((PCI_MAX_LATENCY << 24) | (PCI_MIN_GRANT << 16) | \
+		((pin) << 8) | (mod_intr))
+
+#define PCI_MAX_LATENCY			0x40
+#define PCI_MIN_GRANT			0x00
+#define PCI_DEFAULT_PIN			0x01
+
+/*********** EXPANSION PCI REG ************************/
+
+/*
+ * ISA EXPANSION
+ */
+#define PCI_UART1_INT_REG	0x50
+#define PCI_UART2_INT_REG	0x54
+#define PCI_ISA_FIXUP_REG	0x58
+
+/*
+ * IDE EXPANSION
+ */
+#define PCI_IDE_CFG_REG		0x40
+#define CS5536_IDE_FLASH_SIGNATURE	0xDEADBEEF
+#define PCI_IDE_DTC_REG		0x48
+#define PCI_IDE_CAST_REG	0x4C
+#define PCI_IDE_ETC_REG		0x50
+#define PCI_IDE_PM_REG		0x54
+#define PCI_IDE_INT_REG		0x60
+
+/*
+ * ACC EXPANSION
+ */
+#define PCI_ACC_INT_REG		0x50
+
+/*
+ * OHCI EXPANSION : INTTERUPT IS IMPLEMENTED BY THE OHCI
+ */
+#define PCI_OHCI_PM_REG		0x40
+#define PCI_OHCI_INT_REG	0x50
+
+/*
+ * EHCI EXPANSION
+ */
+#define PCI_EHCI_LEGSMIEN_REG	0x50
+#define PCI_EHCI_LEGSMISTS_REG	0x54
+#define PCI_EHCI_FLADJ_REG	0x60
+
+#endif				/* _CS5536_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h
new file mode 100644
index 000000000000..70d0153cccc3
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_vsm.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * the read/write interfaces for Virtual Support Module(VSM)
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef _CS5536_VSM_H
+#define _CS5536_VSM_H
+
+#include <linux/types.h>
+
+typedef void (*cs5536_pci_vsm_write)(int reg, u32 value);
+typedef u32 (*cs5536_pci_vsm_read)(int reg);
+
+#define DECLARE_CS5536_MODULE(name) \
+extern void pci_##name##_write_reg(int reg, u32 value); \
+extern u32 pci_##name##_read_reg(int reg);
+
+/* ide module */
+DECLARE_CS5536_MODULE(ide)
+/* acc module */
+DECLARE_CS5536_MODULE(acc)
+/* ohci module */
+DECLARE_CS5536_MODULE(ohci)
+/* isa module */
+DECLARE_CS5536_MODULE(isa)
+/* ehci module */
+DECLARE_CS5536_MODULE(ehci)
+
+#endif				/* _CS5536_VSM_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/irq.h b/arch/mips/include/asm/mach-loongson2ef/irq.h
new file mode 100644
index 000000000000..557e069c400c
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/irq.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MACH_LOONGSON64_IRQ_H_
+#define __ASM_MACH_LOONGSON64_IRQ_H_
+
+#include <boot_param.h>
+
+#ifdef CONFIG_CPU_LOONGSON64
+
+/* cpu core interrupt numbers */
+#define MIPS_CPU_IRQ_BASE 56
+
+#define LOONGSON_UART_IRQ   (MIPS_CPU_IRQ_BASE + 2) /* UART */
+#define LOONGSON_BRIDGE_IRQ (MIPS_CPU_IRQ_BASE + 3) /* CASCADE */
+#define LOONGSON_TIMER_IRQ  (MIPS_CPU_IRQ_BASE + 7) /* CPU Timer */
+
+#define LOONGSON_HT1_CFG_BASE		loongson_sysconf.ht_control_base
+#define LOONGSON_HT1_INT_VECTOR_BASE	(LOONGSON_HT1_CFG_BASE + 0x80)
+#define LOONGSON_HT1_INT_EN_BASE	(LOONGSON_HT1_CFG_BASE + 0xa0)
+#define LOONGSON_HT1_INT_VECTOR(n)	\
+		LOONGSON3_REG32(LOONGSON_HT1_INT_VECTOR_BASE, 4 * (n))
+#define LOONGSON_HT1_INTN_EN(n)		\
+		LOONGSON3_REG32(LOONGSON_HT1_INT_EN_BASE, 4 * (n))
+
+#define LOONGSON_INT_ROUTER_OFFSET	0x1400
+#define LOONGSON_INT_ROUTER_INTEN	\
+	  LOONGSON3_REG32(LOONGSON3_REG_BASE, LOONGSON_INT_ROUTER_OFFSET + 0x24)
+#define LOONGSON_INT_ROUTER_INTENSET	\
+	  LOONGSON3_REG32(LOONGSON3_REG_BASE, LOONGSON_INT_ROUTER_OFFSET + 0x28)
+#define LOONGSON_INT_ROUTER_INTENCLR	\
+	  LOONGSON3_REG32(LOONGSON3_REG_BASE, LOONGSON_INT_ROUTER_OFFSET + 0x2c)
+#define LOONGSON_INT_ROUTER_ENTRY(n)	\
+	  LOONGSON3_REG8(LOONGSON3_REG_BASE, LOONGSON_INT_ROUTER_OFFSET + n)
+#define LOONGSON_INT_ROUTER_LPC		LOONGSON_INT_ROUTER_ENTRY(0x0a)
+#define LOONGSON_INT_ROUTER_HT1(n)	LOONGSON_INT_ROUTER_ENTRY(n + 0x18)
+
+#define LOONGSON_INT_COREx_INTy(x, y)	(1<<(x) | 1<<(y+4))	/* route to int y of core x */
+
+#endif
+
+extern void fixup_irqs(void);
+extern void loongson3_ipi_interrupt(struct pt_regs *regs);
+
+#include_next <irq.h>
+#endif /* __ASM_MACH_LOONGSON64_IRQ_H_ */
diff --git a/arch/mips/include/asm/mach-loongson2ef/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson2ef/kernel-entry-init.h
new file mode 100644
index 000000000000..28ccb06c8289
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/kernel-entry-init.h
@@ -0,0 +1,90 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005 Embedded Alley Solutions, Inc
+ * Copyright (C) 2005 Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2009 Jiajie Chen (chenjiajie@cse.buaa.edu.cn)
+ * Copyright (C) 2012 Huacai Chen (chenhc@lemote.com)
+ */
+#ifndef __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H
+#define __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H
+
+#include <asm/cpu.h>
+
+/*
+ * Override macros used in arch/mips/kernel/head.S.
+ */
+	.macro	kernel_entry_setup
+#ifdef CONFIG_CPU_LOONGSON64
+	.set	push
+	.set	mips64
+	/* Set LPA on LOONGSON3 config3 */
+	mfc0	t0, CP0_CONFIG3
+	or	t0, (0x1 << 7)
+	mtc0	t0, CP0_CONFIG3
+	/* Set ELPA on LOONGSON3 pagegrain */
+	mfc0	t0, CP0_PAGEGRAIN
+	or	t0, (0x1 << 29)
+	mtc0	t0, CP0_PAGEGRAIN
+	/* Enable STFill Buffer */
+	mfc0	t0, CP0_PRID
+	/* Loongson-3A R4+ */
+	andi	t1, t0, PRID_IMP_MASK
+	li	t2, PRID_IMP_LOONGSON_64G
+	beq     t1, t2, 1f
+	nop
+	/* Loongson-3A R2/R3 */
+	andi	t0, (PRID_IMP_MASK | PRID_REV_MASK)
+	slti	t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+	bnez	t0, 2f
+	nop
+1:
+	mfc0	t0, CP0_CONFIG6
+	or	t0, 0x100
+	mtc0	t0, CP0_CONFIG6
+2:
+	_ehb
+	.set	pop
+#endif
+	.endm
+
+/*
+ * Do SMP slave processor setup.
+ */
+	.macro	smp_slave_setup
+#ifdef CONFIG_CPU_LOONGSON64
+	.set	push
+	.set	mips64
+	/* Set LPA on LOONGSON3 config3 */
+	mfc0	t0, CP0_CONFIG3
+	or	t0, (0x1 << 7)
+	mtc0	t0, CP0_CONFIG3
+	/* Set ELPA on LOONGSON3 pagegrain */
+	mfc0	t0, CP0_PAGEGRAIN
+	or	t0, (0x1 << 29)
+	mtc0	t0, CP0_PAGEGRAIN
+	/* Enable STFill Buffer */
+	mfc0	t0, CP0_PRID
+	/* Loongson-3A R4+ */
+	andi	t1, t0, PRID_IMP_MASK
+	li	t2, PRID_IMP_LOONGSON_64G
+	beq     t1, t2, 1f
+	nop
+	/* Loongson-3A R2/R3 */
+	andi	t0, (PRID_IMP_MASK | PRID_REV_MASK)
+	slti	t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+	bnez	t0, 2f
+	nop
+1:
+	mfc0	t0, CP0_CONFIG6
+	or	t0, 0x100
+	mtc0	t0, CP0_CONFIG6
+2:
+	_ehb
+	.set	pop
+#endif
+	.endm
+
+#endif /* __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h
new file mode 100644
index 000000000000..40a24b76b874
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef __ASM_MACH_LOONGSON64_LOONGSON_H
+#define __ASM_MACH_LOONGSON64_LOONGSON_H
+
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <boot_param.h>
+
+/* loongson internal northbridge initialization */
+extern void bonito_irq_init(void);
+
+/* machine-specific reboot/halt operation */
+extern void mach_prepare_reboot(void);
+extern void mach_prepare_shutdown(void);
+
+/* environment arguments from bootloader */
+extern u32 cpu_clock_freq;
+extern u32 memsize, highmemsize;
+extern const struct plat_smp_ops loongson3_smp_ops;
+
+/* loongson-specific command line, env and memory initialization */
+extern void __init prom_init_memory(void);
+extern void __init prom_init_cmdline(void);
+extern void __init prom_init_machtype(void);
+extern void __init prom_init_env(void);
+#ifdef CONFIG_LOONGSON_UART_BASE
+extern unsigned long _loongson_uart_base[], loongson_uart_base[];
+extern void prom_init_loongson_uart_base(void);
+#endif
+
+static inline void prom_init_uart_base(void)
+{
+#ifdef CONFIG_LOONGSON_UART_BASE
+	prom_init_loongson_uart_base();
+#endif
+}
+
+/* irq operation functions */
+extern void bonito_irqdispatch(void);
+extern void __init bonito_irq_init(void);
+extern void __init mach_init_irq(void);
+extern void mach_irq_dispatch(unsigned int pending);
+extern int mach_i8259_irq(void);
+
+/* We need this in some places... */
+#define delay() ({		\
+	int x;				\
+	for (x = 0; x < 100000; x++)	\
+		__asm__ __volatile__(""); \
+})
+
+#define LOONGSON_REG(x) \
+	(*(volatile u32 *)((char *)CKSEG1ADDR(LOONGSON_REG_BASE) + (x)))
+
+#define LOONGSON3_REG8(base, x) \
+	(*(volatile u8 *)((char *)TO_UNCAC(base) + (x)))
+
+#define LOONGSON3_REG32(base, x) \
+	(*(volatile u32 *)((char *)TO_UNCAC(base) + (x)))
+
+#define LOONGSON_IRQ_BASE	32
+#define LOONGSON2_PERFCNT_IRQ	(MIPS_CPU_IRQ_BASE + 6) /* cpu perf counter */
+
+#include <linux/interrupt.h>
+static inline void do_perfcnt_IRQ(void)
+{
+#if IS_ENABLED(CONFIG_OPROFILE)
+	do_IRQ(LOONGSON2_PERFCNT_IRQ);
+#endif
+}
+
+#define LOONGSON_FLASH_BASE	0x1c000000
+#define LOONGSON_FLASH_SIZE	0x02000000	/* 32M */
+#define LOONGSON_FLASH_TOP	(LOONGSON_FLASH_BASE+LOONGSON_FLASH_SIZE-1)
+
+#define LOONGSON_LIO0_BASE	0x1e000000
+#define LOONGSON_LIO0_SIZE	0x01C00000	/* 28M */
+#define LOONGSON_LIO0_TOP	(LOONGSON_LIO0_BASE+LOONGSON_LIO0_SIZE-1)
+
+#define LOONGSON_BOOT_BASE	0x1fc00000
+#define LOONGSON_BOOT_SIZE	0x00100000	/* 1M */
+#define LOONGSON_BOOT_TOP	(LOONGSON_BOOT_BASE+LOONGSON_BOOT_SIZE-1)
+#define LOONGSON_REG_BASE	0x1fe00000
+#define LOONGSON_REG_SIZE	0x00100000	/* 256Bytes + 256Bytes + ??? */
+#define LOONGSON_REG_TOP	(LOONGSON_REG_BASE+LOONGSON_REG_SIZE-1)
+/* Loongson-3 specific registers */
+#define LOONGSON3_REG_BASE	0x3ff00000
+#define LOONGSON3_REG_SIZE	0x00100000	/* 256Bytes + 256Bytes + ??? */
+#define LOONGSON3_REG_TOP	(LOONGSON3_REG_BASE+LOONGSON3_REG_SIZE-1)
+
+#define LOONGSON_LIO1_BASE	0x1ff00000
+#define LOONGSON_LIO1_SIZE	0x00100000	/* 1M */
+#define LOONGSON_LIO1_TOP	(LOONGSON_LIO1_BASE+LOONGSON_LIO1_SIZE-1)
+
+#define LOONGSON_PCILO0_BASE	0x10000000
+#define LOONGSON_PCILO1_BASE	0x14000000
+#define LOONGSON_PCILO2_BASE	0x18000000
+#define LOONGSON_PCILO_BASE	LOONGSON_PCILO0_BASE
+#define LOONGSON_PCILO_SIZE	0x0c000000	/* 64M * 3 */
+#define LOONGSON_PCILO_TOP	(LOONGSON_PCILO0_BASE+LOONGSON_PCILO_SIZE-1)
+
+#define LOONGSON_PCICFG_BASE	0x1fe80000
+#define LOONGSON_PCICFG_SIZE	0x00000800	/* 2K */
+#define LOONGSON_PCICFG_TOP	(LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1)
+
+#ifdef CONFIG_CPU_LOONGSON64
+#define LOONGSON_PCIIO_BASE	loongson_sysconf.pci_io_base
+#else
+#define LOONGSON_PCIIO_BASE	0x1fd00000
+#endif
+
+#define LOONGSON_PCIIO_SIZE	0x00100000	/* 1M */
+#define LOONGSON_PCIIO_TOP	(LOONGSON_PCIIO_BASE+LOONGSON_PCIIO_SIZE-1)
+
+/* Loongson Register Bases */
+
+#define LOONGSON_PCICONFIGBASE	0x00
+#define LOONGSON_REGBASE	0x100
+
+/* PCI Configuration Registers */
+
+#define LOONGSON_PCI_REG(x)	LOONGSON_REG(LOONGSON_PCICONFIGBASE + (x))
+#define LOONGSON_PCIDID		LOONGSON_PCI_REG(0x00)
+#define LOONGSON_PCICMD		LOONGSON_PCI_REG(0x04)
+#define LOONGSON_PCICLASS	LOONGSON_PCI_REG(0x08)
+#define LOONGSON_PCILTIMER	LOONGSON_PCI_REG(0x0c)
+#define LOONGSON_PCIBASE0	LOONGSON_PCI_REG(0x10)
+#define LOONGSON_PCIBASE1	LOONGSON_PCI_REG(0x14)
+#define LOONGSON_PCIBASE2	LOONGSON_PCI_REG(0x18)
+#define LOONGSON_PCIBASE3	LOONGSON_PCI_REG(0x1c)
+#define LOONGSON_PCIBASE4	LOONGSON_PCI_REG(0x20)
+#define LOONGSON_PCIEXPRBASE	LOONGSON_PCI_REG(0x30)
+#define LOONGSON_PCIINT		LOONGSON_PCI_REG(0x3c)
+
+#define LOONGSON_PCI_ISR4C	LOONGSON_PCI_REG(0x4c)
+
+#define LOONGSON_PCICMD_PERR_CLR	0x80000000
+#define LOONGSON_PCICMD_SERR_CLR	0x40000000
+#define LOONGSON_PCICMD_MABORT_CLR	0x20000000
+#define LOONGSON_PCICMD_MTABORT_CLR	0x10000000
+#define LOONGSON_PCICMD_TABORT_CLR	0x08000000
+#define LOONGSON_PCICMD_MPERR_CLR	0x01000000
+#define LOONGSON_PCICMD_PERRRESPEN	0x00000040
+#define LOONGSON_PCICMD_ASTEPEN		0x00000080
+#define LOONGSON_PCICMD_SERREN		0x00000100
+#define LOONGSON_PCILTIMER_BUSLATENCY	0x0000ff00
+#define LOONGSON_PCILTIMER_BUSLATENCY_SHIFT	8
+
+/* Loongson h/w Configuration */
+
+#define LOONGSON_GENCFG_OFFSET		0x4
+#define LOONGSON_GENCFG LOONGSON_REG(LOONGSON_REGBASE + LOONGSON_GENCFG_OFFSET)
+
+#define LOONGSON_GENCFG_DEBUGMODE	0x00000001
+#define LOONGSON_GENCFG_SNOOPEN		0x00000002
+#define LOONGSON_GENCFG_CPUSELFRESET	0x00000004
+
+#define LOONGSON_GENCFG_FORCE_IRQA	0x00000008
+#define LOONGSON_GENCFG_IRQA_ISOUT	0x00000010
+#define LOONGSON_GENCFG_IRQA_FROM_INT1	0x00000020
+#define LOONGSON_GENCFG_BYTESWAP	0x00000040
+
+#define LOONGSON_GENCFG_UNCACHED	0x00000080
+#define LOONGSON_GENCFG_PREFETCHEN	0x00000100
+#define LOONGSON_GENCFG_WBEHINDEN	0x00000200
+#define LOONGSON_GENCFG_CACHEALG	0x00000c00
+#define LOONGSON_GENCFG_CACHEALG_SHIFT	10
+#define LOONGSON_GENCFG_PCIQUEUE	0x00001000
+#define LOONGSON_GENCFG_CACHESTOP	0x00002000
+#define LOONGSON_GENCFG_MSTRBYTESWAP	0x00004000
+#define LOONGSON_GENCFG_BUSERREN	0x00008000
+#define LOONGSON_GENCFG_NORETRYTIMEOUT	0x00010000
+#define LOONGSON_GENCFG_SHORTCOPYTIMEOUT	0x00020000
+
+/* PCI address map control */
+
+#define LOONGSON_PCIMAP			LOONGSON_REG(LOONGSON_REGBASE + 0x10)
+#define LOONGSON_PCIMEMBASECFG		LOONGSON_REG(LOONGSON_REGBASE + 0x14)
+#define LOONGSON_PCIMAP_CFG		LOONGSON_REG(LOONGSON_REGBASE + 0x18)
+
+/* GPIO Regs - r/w */
+
+#define LOONGSON_GPIODATA		LOONGSON_REG(LOONGSON_REGBASE + 0x1c)
+#define LOONGSON_GPIOIE			LOONGSON_REG(LOONGSON_REGBASE + 0x20)
+
+/* ICU Configuration Regs - r/w */
+
+#define LOONGSON_INTEDGE		LOONGSON_REG(LOONGSON_REGBASE + 0x24)
+#define LOONGSON_INTSTEER		LOONGSON_REG(LOONGSON_REGBASE + 0x28)
+#define LOONGSON_INTPOL			LOONGSON_REG(LOONGSON_REGBASE + 0x2c)
+
+/* ICU Enable Regs - IntEn & IntISR are r/o. */
+
+#define LOONGSON_INTENSET		LOONGSON_REG(LOONGSON_REGBASE + 0x30)
+#define LOONGSON_INTENCLR		LOONGSON_REG(LOONGSON_REGBASE + 0x34)
+#define LOONGSON_INTEN			LOONGSON_REG(LOONGSON_REGBASE + 0x38)
+#define LOONGSON_INTISR			LOONGSON_REG(LOONGSON_REGBASE + 0x3c)
+
+/* ICU */
+#define LOONGSON_ICU_MBOXES		0x0000000f
+#define LOONGSON_ICU_MBOXES_SHIFT	0
+#define LOONGSON_ICU_DMARDY		0x00000010
+#define LOONGSON_ICU_DMAEMPTY		0x00000020
+#define LOONGSON_ICU_COPYRDY		0x00000040
+#define LOONGSON_ICU_COPYEMPTY		0x00000080
+#define LOONGSON_ICU_COPYERR		0x00000100
+#define LOONGSON_ICU_PCIIRQ		0x00000200
+#define LOONGSON_ICU_MASTERERR		0x00000400
+#define LOONGSON_ICU_SYSTEMERR		0x00000800
+#define LOONGSON_ICU_DRAMPERR		0x00001000
+#define LOONGSON_ICU_RETRYERR		0x00002000
+#define LOONGSON_ICU_GPIOS		0x01ff0000
+#define LOONGSON_ICU_GPIOS_SHIFT		16
+#define LOONGSON_ICU_GPINS		0x7e000000
+#define LOONGSON_ICU_GPINS_SHIFT		25
+#define LOONGSON_ICU_MBOX(N)		(1<<(LOONGSON_ICU_MBOXES_SHIFT+(N)))
+#define LOONGSON_ICU_GPIO(N)		(1<<(LOONGSON_ICU_GPIOS_SHIFT+(N)))
+#define LOONGSON_ICU_GPIN(N)		(1<<(LOONGSON_ICU_GPINS_SHIFT+(N)))
+
+/* PCI prefetch window base & mask */
+
+#define LOONGSON_MEM_WIN_BASE_L		LOONGSON_REG(LOONGSON_REGBASE + 0x40)
+#define LOONGSON_MEM_WIN_BASE_H		LOONGSON_REG(LOONGSON_REGBASE + 0x44)
+#define LOONGSON_MEM_WIN_MASK_L		LOONGSON_REG(LOONGSON_REGBASE + 0x48)
+#define LOONGSON_MEM_WIN_MASK_H		LOONGSON_REG(LOONGSON_REGBASE + 0x4c)
+
+/* PCI_Hit*_Sel_* */
+
+#define LOONGSON_PCI_HIT0_SEL_L		LOONGSON_REG(LOONGSON_REGBASE + 0x50)
+#define LOONGSON_PCI_HIT0_SEL_H		LOONGSON_REG(LOONGSON_REGBASE + 0x54)
+#define LOONGSON_PCI_HIT1_SEL_L		LOONGSON_REG(LOONGSON_REGBASE + 0x58)
+#define LOONGSON_PCI_HIT1_SEL_H		LOONGSON_REG(LOONGSON_REGBASE + 0x5c)
+#define LOONGSON_PCI_HIT2_SEL_L		LOONGSON_REG(LOONGSON_REGBASE + 0x60)
+#define LOONGSON_PCI_HIT2_SEL_H		LOONGSON_REG(LOONGSON_REGBASE + 0x64)
+
+/* PXArb Config & Status */
+
+#define LOONGSON_PXARB_CFG		LOONGSON_REG(LOONGSON_REGBASE + 0x68)
+#define LOONGSON_PXARB_STATUS		LOONGSON_REG(LOONGSON_REGBASE + 0x6c)
+
+#define MAX_PACKAGES 4
+
+/* Chip Config registor of each physical cpu package, PRid >= Loongson-2F */
+extern u64 loongson_chipcfg[MAX_PACKAGES];
+#define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id]))
+
+/* Chip Temperature registor of each physical cpu package, PRid >= Loongson-3A */
+extern u64 loongson_chiptemp[MAX_PACKAGES];
+#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id]))
+
+/* Freq Control register of each physical cpu package, PRid >= Loongson-3B */
+extern u64 loongson_freqctrl[MAX_PACKAGES];
+#define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id]))
+
+/* pcimap */
+
+#define LOONGSON_PCIMAP_PCIMAP_LO0	0x0000003f
+#define LOONGSON_PCIMAP_PCIMAP_LO0_SHIFT	0
+#define LOONGSON_PCIMAP_PCIMAP_LO1	0x00000fc0
+#define LOONGSON_PCIMAP_PCIMAP_LO1_SHIFT	6
+#define LOONGSON_PCIMAP_PCIMAP_LO2	0x0003f000
+#define LOONGSON_PCIMAP_PCIMAP_LO2_SHIFT	12
+#define LOONGSON_PCIMAP_PCIMAP_2	0x00040000
+#define LOONGSON_PCIMAP_WIN(WIN, ADDR)	\
+	((((ADDR)>>26) & LOONGSON_PCIMAP_PCIMAP_LO0) << ((WIN)*6))
+
+#ifdef CONFIG_CPU_SUPPORTS_CPUFREQ
+#include <linux/cpufreq.h>
+extern struct cpufreq_frequency_table loongson2_clockmod_table[];
+#endif
+
+/*
+ * address windows configuration module
+ *
+ * loongson2e do not have this module
+ */
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+
+/* address window config module base address */
+#define LOONGSON_ADDRWINCFG_BASE		0x3ff00000ul
+#define LOONGSON_ADDRWINCFG_SIZE		0x180
+
+extern unsigned long _loongson_addrwincfg_base;
+#define LOONGSON_ADDRWINCFG(offset) \
+	(*(volatile u64 *)(_loongson_addrwincfg_base + (offset)))
+
+#define CPU_WIN0_BASE	LOONGSON_ADDRWINCFG(0x00)
+#define CPU_WIN1_BASE	LOONGSON_ADDRWINCFG(0x08)
+#define CPU_WIN2_BASE	LOONGSON_ADDRWINCFG(0x10)
+#define CPU_WIN3_BASE	LOONGSON_ADDRWINCFG(0x18)
+
+#define CPU_WIN0_MASK	LOONGSON_ADDRWINCFG(0x20)
+#define CPU_WIN1_MASK	LOONGSON_ADDRWINCFG(0x28)
+#define CPU_WIN2_MASK	LOONGSON_ADDRWINCFG(0x30)
+#define CPU_WIN3_MASK	LOONGSON_ADDRWINCFG(0x38)
+
+#define CPU_WIN0_MMAP	LOONGSON_ADDRWINCFG(0x40)
+#define CPU_WIN1_MMAP	LOONGSON_ADDRWINCFG(0x48)
+#define CPU_WIN2_MMAP	LOONGSON_ADDRWINCFG(0x50)
+#define CPU_WIN3_MMAP	LOONGSON_ADDRWINCFG(0x58)
+
+#define PCIDMA_WIN0_BASE	LOONGSON_ADDRWINCFG(0x60)
+#define PCIDMA_WIN1_BASE	LOONGSON_ADDRWINCFG(0x68)
+#define PCIDMA_WIN2_BASE	LOONGSON_ADDRWINCFG(0x70)
+#define PCIDMA_WIN3_BASE	LOONGSON_ADDRWINCFG(0x78)
+
+#define PCIDMA_WIN0_MASK	LOONGSON_ADDRWINCFG(0x80)
+#define PCIDMA_WIN1_MASK	LOONGSON_ADDRWINCFG(0x88)
+#define PCIDMA_WIN2_MASK	LOONGSON_ADDRWINCFG(0x90)
+#define PCIDMA_WIN3_MASK	LOONGSON_ADDRWINCFG(0x98)
+
+#define PCIDMA_WIN0_MMAP	LOONGSON_ADDRWINCFG(0xa0)
+#define PCIDMA_WIN1_MMAP	LOONGSON_ADDRWINCFG(0xa8)
+#define PCIDMA_WIN2_MMAP	LOONGSON_ADDRWINCFG(0xb0)
+#define PCIDMA_WIN3_MMAP	LOONGSON_ADDRWINCFG(0xb8)
+
+#define ADDRWIN_WIN0	0
+#define ADDRWIN_WIN1	1
+#define ADDRWIN_WIN2	2
+#define ADDRWIN_WIN3	3
+
+#define ADDRWIN_MAP_DST_DDR	0
+#define ADDRWIN_MAP_DST_PCI	1
+#define ADDRWIN_MAP_DST_LIO	1
+
+/*
+ * s: CPU, PCIDMA
+ * d: DDR, PCI, LIO
+ * win: 0, 1, 2, 3
+ * src: map source
+ * dst: map destination
+ * size: ~mask + 1
+ */
+#define LOONGSON_ADDRWIN_CFG(s, d, w, src, dst, size) do {\
+	s##_WIN##w##_BASE = (src); \
+	s##_WIN##w##_MMAP = (dst) | ADDRWIN_MAP_DST_##d; \
+	s##_WIN##w##_MASK = ~(size-1); \
+} while (0)
+
+#define LOONGSON_ADDRWIN_CPUTOPCI(win, src, dst, size) \
+	LOONGSON_ADDRWIN_CFG(CPU, PCI, win, src, dst, size)
+#define LOONGSON_ADDRWIN_CPUTODDR(win, src, dst, size) \
+	LOONGSON_ADDRWIN_CFG(CPU, DDR, win, src, dst, size)
+#define LOONGSON_ADDRWIN_PCITODDR(win, src, dst, size) \
+	LOONGSON_ADDRWIN_CFG(PCIDMA, DDR, win, src, dst, size)
+
+#endif	/* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */
+
+#endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson_hwmon.h b/arch/mips/include/asm/mach-loongson2ef/loongson_hwmon.h
new file mode 100644
index 000000000000..545f91f2ae16
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson_hwmon.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LOONGSON_HWMON_H_
+#define __LOONGSON_HWMON_H_
+
+#include <linux/types.h>
+
+#define MIN_TEMP	0
+#define MAX_TEMP	255
+#define NOT_VALID_TEMP	999
+
+typedef int (*get_temp_fun)(int);
+extern int loongson3_cpu_temp(int);
+
+/* 0:Max speed, 1:Manual, 2:Auto */
+enum fan_control_mode {
+	FAN_FULL_MODE = 0,
+	FAN_MANUAL_MODE = 1,
+	FAN_AUTO_MODE = 2,
+	FAN_MODE_END
+};
+
+struct temp_range {
+	u8 low;
+	u8 high;
+	u8 level;
+};
+
+#define CONSTANT_SPEED_POLICY	0  /* at constant speed */
+#define STEP_SPEED_POLICY	1  /* use up/down arrays to describe policy */
+#define KERNEL_HELPER_POLICY	2  /* kernel as a helper to fan control */
+
+#define MAX_STEP_NUM	16
+#define MAX_FAN_LEVEL	255
+
+/* loongson_fan_policy works when fan work at FAN_AUTO_MODE */
+struct loongson_fan_policy {
+	u8	type;
+
+	/* percent only used when type is CONSTANT_SPEED_POLICY */
+	u8	percent;
+
+	/* period between two check. (Unit: S) */
+	u8	adjust_period;
+
+	/* fan adjust usually depend on a temprature input */
+	get_temp_fun	depend_temp;
+
+	/* up_step/down_step used when type is STEP_SPEED_POLICY */
+	u8	up_step_num;
+	u8	down_step_num;
+	struct temp_range up_step[MAX_STEP_NUM];
+	struct temp_range down_step[MAX_STEP_NUM];
+	struct delayed_work work;
+};
+
+#endif /* __LOONGSON_HWMON_H_*/
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson_regs.h b/arch/mips/include/asm/mach-loongson2ef/loongson_regs.h
new file mode 100644
index 000000000000..363a47a5d26e
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson_regs.h
@@ -0,0 +1,227 @@
+/*
+ * Read/Write Loongson Extension Registers
+ */
+
+#ifndef _LOONGSON_REGS_H_
+#define _LOONGSON_REGS_H_
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#include <asm/mipsregs.h>
+#include <asm/cpu.h>
+
+static inline bool cpu_has_cfg(void)
+{
+	return ((read_c0_prid() & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G);
+}
+
+static inline u32 read_cpucfg(u32 reg)
+{
+	u32 __res;
+
+	__asm__ __volatile__(
+		"parse_r __res,%0\n\t"
+		"parse_r reg,%1\n\t"
+		".insn \n\t"
+		".word (0xc8080118 | (reg << 21) | (__res << 11))\n\t"
+		:"=r"(__res)
+		:"r"(reg)
+		:
+		);
+	return __res;
+}
+
+/* Bit Domains for CFG registers */
+#define LOONGSON_CFG0	0x0
+#define LOONGSON_CFG0_PRID GENMASK(31, 0)
+
+#define LOONGSON_CFG1 0x1
+#define LOONGSON_CFG1_FP	BIT(0)
+#define LOONGSON_CFG1_FPREV	GENMASK(3, 1)
+#define LOONGSON_CFG1_MMI	BIT(4)
+#define LOONGSON_CFG1_MSA1	BIT(5)
+#define LOONGSON_CFG1_MSA2	BIT(6)
+#define LOONGSON_CFG1_CGP	BIT(7)
+#define LOONGSON_CFG1_WRP	BIT(8)
+#define LOONGSON_CFG1_LSX1	BIT(9)
+#define LOONGSON_CFG1_LSX2	BIT(10)
+#define LOONGSON_CFG1_LASX	BIT(11)
+#define LOONGSON_CFG1_R6FXP	BIT(12)
+#define LOONGSON_CFG1_R6CRCP	BIT(13)
+#define LOONGSON_CFG1_R6FPP	BIT(14)
+#define LOONGSON_CFG1_CNT64	BIT(15)
+#define LOONGSON_CFG1_LSLDR0	BIT(16)
+#define LOONGSON_CFG1_LSPREF	BIT(17)
+#define LOONGSON_CFG1_LSPREFX	BIT(18)
+#define LOONGSON_CFG1_LSSYNCI	BIT(19)
+#define LOONGSON_CFG1_LSUCA	BIT(20)
+#define LOONGSON_CFG1_LLSYNC	BIT(21)
+#define LOONGSON_CFG1_TGTSYNC	BIT(22)
+#define LOONGSON_CFG1_LLEXC	BIT(23)
+#define LOONGSON_CFG1_SCRAND	BIT(24)
+#define LOONGSON_CFG1_MUALP	BIT(25)
+#define LOONGSON_CFG1_KMUALEN	BIT(26)
+#define LOONGSON_CFG1_ITLBT	BIT(27)
+#define LOONGSON_CFG1_LSUPERF	BIT(28)
+#define LOONGSON_CFG1_SFBP	BIT(29)
+#define LOONGSON_CFG1_CDMAP	BIT(30)
+
+#define LOONGSON_CFG2 0x2
+#define LOONGSON_CFG2_LEXT1	BIT(0)
+#define LOONGSON_CFG2_LEXT2	BIT(1)
+#define LOONGSON_CFG2_LEXT3	BIT(2)
+#define LOONGSON_CFG2_LSPW	BIT(3)
+#define LOONGSON_CFG2_LBT1	BIT(4)
+#define LOONGSON_CFG2_LBT2	BIT(5)
+#define LOONGSON_CFG2_LBT3	BIT(6)
+#define LOONGSON_CFG2_LBTMMU	BIT(7)
+#define LOONGSON_CFG2_LPMP	BIT(8)
+#define LOONGSON_CFG2_LPMPREV	GENMASK(11, 9)
+#define LOONGSON_CFG2_LAMO	BIT(12)
+#define LOONGSON_CFG2_LPIXU	BIT(13)
+#define LOONGSON_CFG2_LPIXUN	BIT(14)
+#define LOONGSON_CFG2_LZVP	BIT(15)
+#define LOONGSON_CFG2_LZVREV	GENMASK(18, 16)
+#define LOONGSON_CFG2_LGFTP	BIT(19)
+#define LOONGSON_CFG2_LGFTPREV	GENMASK(22, 20)
+#define LOONGSON_CFG2_LLFTP	BIT(23)
+#define LOONGSON_CFG2_LLFTPREV	GENMASK(26, 24)
+#define LOONGSON_CFG2_LCSRP	BIT(27)
+#define LOONGSON_CFG2_LDISBLIKELY	BIT(28)
+
+#define LOONGSON_CFG3 0x3
+#define LOONGSON_CFG3_LCAMP	BIT(0)
+#define LOONGSON_CFG3_LCAMREV	GENMASK(3, 1)
+#define LOONGSON_CFG3_LCAMNUM	GENMASK(11, 4)
+#define LOONGSON_CFG3_LCAMKW	GENMASK(19, 12)
+#define LOONGSON_CFG3_LCAMVW	GENMASK(27, 20)
+
+#define LOONGSON_CFG4 0x4
+#define LOONGSON_CFG4_CCFREQ	GENMASK(31, 0)
+
+#define LOONGSON_CFG5 0x5
+#define LOONGSON_CFG5_CFM	GENMASK(15, 0)
+#define LOONGSON_CFG5_CFD	GENMASK(31, 16)
+
+#define LOONGSON_CFG6 0x6
+
+#define LOONGSON_CFG7 0x7
+#define LOONGSON_CFG7_GCCAEQRP	BIT(0)
+#define LOONGSON_CFG7_UCAWINP	BIT(1)
+
+static inline bool cpu_has_csr(void)
+{
+	if (cpu_has_cfg())
+		return (read_cpucfg(LOONGSON_CFG2) & LOONGSON_CFG2_LCSRP);
+
+	return false;
+}
+
+static inline u32 csr_readl(u32 reg)
+{
+	u32 __res;
+
+	/* RDCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r __res,%0\n\t"
+		"parse_r reg,%1\n\t"
+		".insn \n\t"
+		".word (0xc8000118 | (reg << 21) | (__res << 11))\n\t"
+		:"=r"(__res)
+		:"r"(reg)
+		:
+		);
+	return __res;
+}
+
+static inline u64 csr_readq(u32 reg)
+{
+	u64 __res;
+
+	/* DWRCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r __res,%0\n\t"
+		"parse_r reg,%1\n\t"
+		".insn \n\t"
+		".word (0xc8020118 | (reg << 21) | (__res << 11))\n\t"
+		:"=r"(__res)
+		:"r"(reg)
+		:
+		);
+	return __res;
+}
+
+static inline void csr_writel(u32 val, u32 reg)
+{
+	/* WRCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r reg,%0\n\t"
+		"parse_r val,%1\n\t"
+		".insn \n\t"
+		".word (0xc8010118 | (reg << 21) | (val << 11))\n\t"
+		:
+		:"r"(reg),"r"(val)
+		:
+		);
+}
+
+static inline void csr_writeq(u64 val, u32 reg)
+{
+	/* DWRCSR reg, val */
+	__asm__ __volatile__(
+		"parse_r reg,%0\n\t"
+		"parse_r val,%1\n\t"
+		".insn \n\t"
+		".word (0xc8030118 | (reg << 21) | (val << 11))\n\t"
+		:
+		:"r"(reg),"r"(val)
+		:
+		);
+}
+
+/* Public CSR Register can also be accessed with regular addresses */
+#define CSR_PUBLIC_MMIO_BASE 0x1fe00000
+
+#define MMIO_CSR(x)		(void *)TO_UNCAC(CSR_PUBLIC_MMIO_BASE + x)
+
+#define LOONGSON_CSR_FEATURES	0x8
+#define LOONGSON_CSRF_TEMP	BIT(0)
+#define LOONGSON_CSRF_NODECNT	BIT(1)
+#define LOONGSON_CSRF_MSI	BIT(2)
+#define LOONGSON_CSRF_EXTIOI	BIT(3)
+#define LOONGSON_CSRF_IPI	BIT(4)
+#define LOONGSON_CSRF_FREQ	BIT(5)
+
+#define LOONGSON_CSR_VENDOR	0x10 /* Vendor name string, should be "Loongson" */
+#define LOONGSON_CSR_CPUNAME	0x20 /* Processor name string */
+#define LOONGSON_CSR_NODECNT	0x408
+#define LOONGSON_CSR_CPUTEMP	0x428
+
+/* PerCore CSR, only accessable by local cores */
+#define LOONGSON_CSR_IPI_STATUS	0x1000
+#define LOONGSON_CSR_IPI_EN	0x1004
+#define LOONGSON_CSR_IPI_SET	0x1008
+#define LOONGSON_CSR_IPI_CLEAR	0x100c
+#define LOONGSON_CSR_IPI_SEND	0x1040
+#define CSR_IPI_SEND_IP_SHIFT	0
+#define CSR_IPI_SEND_CPU_SHIFT	16
+#define CSR_IPI_SEND_BLOCK	BIT(31)
+
+static inline u64 drdtime(void)
+{
+	int rID = 0;
+	u64 val = 0;
+
+	__asm__ __volatile__(
+		"parse_r rID,%0\n\t"
+		"parse_r val,%1\n\t"
+		".insn \n\t"
+		".word (0xc8090118 | (rID << 21) | (val << 11))\n\t"
+		:"=r"(rID),"=r"(val)
+		:
+		);
+	return val;
+}
+
+#endif
diff --git a/arch/mips/include/asm/mach-loongson2ef/machine.h b/arch/mips/include/asm/mach-loongson2ef/machine.h
new file mode 100644
index 000000000000..8ef7ea94a26d
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/machine.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef __ASM_MACH_LOONGSON64_MACHINE_H
+#define __ASM_MACH_LOONGSON64_MACHINE_H
+
+#ifdef CONFIG_LEMOTE_FULOONG2E
+
+#define LOONGSON_MACHTYPE MACH_LEMOTE_FL2E
+
+#endif
+
+/* use fuloong2f as the default machine of LEMOTE_MACH2F */
+#ifdef CONFIG_LEMOTE_MACH2F
+
+#define LOONGSON_MACHTYPE MACH_LEMOTE_FL2F
+
+#endif
+
+#ifdef CONFIG_LOONGSON_MACH3X
+
+#define LOONGSON_MACHTYPE MACH_LOONGSON_GENERIC
+
+#endif /* CONFIG_LOONGSON_MACH3X */
+
+#endif /* __ASM_MACH_LOONGSON64_MACHINE_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
new file mode 100644
index 000000000000..ebdccfee50be
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
@@ -0,0 +1,36 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org)
+ *
+ * RTC routines for PC style attached Dallas chip.
+ */
+#ifndef __ASM_MACH_LOONGSON64_MC146818RTC_H
+#define __ASM_MACH_LOONGSON64_MC146818RTC_H
+
+#include <linux/io.h>
+
+#define RTC_PORT(x)	(0x70 + (x))
+#define RTC_IRQ		8
+
+static inline unsigned char CMOS_READ(unsigned long addr)
+{
+	outb_p(addr, RTC_PORT(0));
+	return inb_p(RTC_PORT(1));
+}
+
+static inline void CMOS_WRITE(unsigned char data, unsigned long addr)
+{
+	outb_p(addr, RTC_PORT(0));
+	outb_p(data, RTC_PORT(1));
+}
+
+#define RTC_ALWAYS_BCD	0
+
+#ifndef mc146818_decode_year
+#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970)
+#endif
+
+#endif /* __ASM_MACH_LOONGSON64_MC146818RTC_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/mem.h b/arch/mips/include/asm/mach-loongson2ef/mem.h
new file mode 100644
index 000000000000..ce33c174c04d
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/mem.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef __ASM_MACH_LOONGSON64_MEM_H
+#define __ASM_MACH_LOONGSON64_MEM_H
+
+/*
+ * high memory space
+ *
+ * in loongson2e, starts from 512M
+ * in loongson2f, starts from 2G 256M
+ */
+#ifdef CONFIG_CPU_LOONGSON2E
+#define LOONGSON_HIGHMEM_START	0x20000000
+#else
+#define LOONGSON_HIGHMEM_START	0x90000000
+#endif
+
+/*
+ * the peripheral registers(MMIO):
+ *
+ * On the Lemote Loongson 2e system, reside between 0x1000:0000 and 0x2000:0000.
+ * On the Lemote Loongson 2f system, reside between 0x1000:0000 and 0x8000:0000.
+ */
+
+#define LOONGSON_MMIO_MEM_START 0x10000000
+
+#ifdef CONFIG_CPU_LOONGSON2E
+#define LOONGSON_MMIO_MEM_END	0x20000000
+#else
+#define LOONGSON_MMIO_MEM_END	0x80000000
+#endif
+
+#endif /* __ASM_MACH_LOONGSON64_MEM_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/mmzone.h b/arch/mips/include/asm/mach-loongson2ef/mmzone.h
new file mode 100644
index 000000000000..62073d60739f
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/mmzone.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2010 Loongson Inc. & Lemote Inc. &
+ *                    Institute of Computing Technology
+ * Author:  Xiang Gao, gaoxiang@ict.ac.cn
+ *          Huacai Chen, chenhc@lemote.com
+ *          Xiaofu Meng, Shuangshuang Zhang
+ */
+#ifndef _ASM_MACH_MMZONE_H
+#define _ASM_MACH_MMZONE_H
+
+#include <boot_param.h>
+#define NODE_ADDRSPACE_SHIFT 44
+#define NODE0_ADDRSPACE_OFFSET 0x000000000000UL
+#define NODE1_ADDRSPACE_OFFSET 0x100000000000UL
+#define NODE2_ADDRSPACE_OFFSET 0x200000000000UL
+#define NODE3_ADDRSPACE_OFFSET 0x300000000000UL
+
+#define pa_to_nid(addr)  (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
+#define nid_to_addrbase(nid) ((nid) << NODE_ADDRSPACE_SHIFT)
+
+#define LEVELS_PER_SLICE 128
+
+struct slice_data {
+	unsigned long irq_enable_mask[2];
+	int level_to_irq[LEVELS_PER_SLICE];
+};
+
+struct hub_data {
+	cpumask_t	h_cpus;
+	unsigned long slice_map;
+	unsigned long irq_alloc_mask[2];
+	struct slice_data slice[2];
+};
+
+struct node_data {
+	struct pglist_data pglist;
+	struct hub_data hub;
+	cpumask_t cpumask;
+};
+
+extern struct node_data *__node_data[];
+
+#define NODE_DATA(n)		(&__node_data[(n)]->pglist)
+#define hub_data(n)		(&__node_data[(n)]->hub)
+
+extern void setup_zero_pages(void);
+extern void __init prom_init_numa_memory(void);
+
+#endif /* _ASM_MACH_MMZONE_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/pci.h b/arch/mips/include/asm/mach-loongson2ef/pci.h
new file mode 100644
index 000000000000..05cc9052772f
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/pci.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2008 Zhang Le <r0bertz@gentoo.org>
+ * Copyright (c) 2009 Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#ifndef __ASM_MACH_LOONGSON64_PCI_H_
+#define __ASM_MACH_LOONGSON64_PCI_H_
+
+extern struct pci_ops loongson_pci_ops;
+
+/* this is an offset from mips_io_port_base */
+#define LOONGSON_PCI_IO_START	0x00004000UL
+
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+
+/*
+ * we use address window2 to map cpu address space to pci space
+ * window2: cpu [1G, 2G] -> pci [1G, 2G]
+ * why not use window 0 & 1? because they are used by cpu when booting.
+ * window0: cpu [0, 256M] -> ddr [0, 256M]
+ * window1: cpu [256M, 512M] -> pci [256M, 512M]
+ */
+
+/* the smallest LOONGSON_CPU_MEM_SRC can be 512M */
+#define LOONGSON_CPU_MEM_SRC	0x40000000ul		/* 1G */
+#define LOONGSON_PCI_MEM_DST	LOONGSON_CPU_MEM_SRC
+
+#define LOONGSON_PCI_MEM_START	LOONGSON_PCI_MEM_DST
+#define LOONGSON_PCI_MEM_END	(0x80000000ul-1)	/* 2G */
+
+#define MMAP_CPUTOPCI_SIZE	(LOONGSON_PCI_MEM_END - \
+					LOONGSON_PCI_MEM_START + 1)
+
+#else	/* loongson2f/32bit & loongson2e */
+
+/* this pci memory space is mapped by pcimap in pci.c */
+#ifdef CONFIG_CPU_LOONGSON64
+#define LOONGSON_PCI_MEM_START	0x40000000UL
+#define LOONGSON_PCI_MEM_END	0x7effffffUL
+#else
+#define LOONGSON_PCI_MEM_START	LOONGSON_PCILO1_BASE
+#define LOONGSON_PCI_MEM_END	(LOONGSON_PCILO1_BASE + 0x04000000 * 2)
+#endif
+/* this is an offset from mips_io_port_base */
+#define LOONGSON_PCI_IO_START	0x00004000UL
+
+#endif	/* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
+
+#endif /* !__ASM_MACH_LOONGSON64_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-loongson2ef/spaces.h b/arch/mips/include/asm/mach-loongson2ef/spaces.h
new file mode 100644
index 000000000000..e85bc1d9c4f2
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/spaces.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MACH_LOONGSON64_SPACES_H_
+#define __ASM_MACH_LOONGSON64_SPACES_H_
+
+#if defined(CONFIG_64BIT)
+#define CAC_BASE        _AC(0x9800000000000000, UL)
+#endif /* CONFIG_64BIT */
+
+#include <asm/mach-generic/spaces.h>
+#endif
diff --git a/arch/mips/include/asm/mach-loongson2ef/topology.h b/arch/mips/include/asm/mach-loongson2ef/topology.h
new file mode 100644
index 000000000000..7ff819ab308a
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/topology.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_MACH_TOPOLOGY_H
+#define _ASM_MACH_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+
+#define cpu_to_node(cpu)	(cpu_logical_map(cpu) >> 2)
+#define cpumask_of_node(node)	(&__node_data[(node)]->cpumask)
+
+struct pci_bus;
+extern int pcibus_to_node(struct pci_bus *);
+
+#define cpumask_of_pcibus(bus)	(cpu_online_mask)
+
+extern unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
+
+#define node_distance(from, to)	(__node_distances[(from)][(to)])
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_MACH_TOPOLOGY_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/workarounds.h b/arch/mips/include/asm/mach-loongson2ef/workarounds.h
new file mode 100644
index 000000000000..17b71172a097
--- /dev/null
+++ b/arch/mips/include/asm/mach-loongson2ef/workarounds.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MACH_LOONGSON64_WORKAROUNDS_H_
+#define __ASM_MACH_LOONGSON64_WORKAROUNDS_H_
+
+#define WORKAROUND_CPUFREQ	0x00000001
+#define WORKAROUND_CPUHOTPLUG	0x00000002
+
+#endif
diff --git a/arch/mips/loongson2ef/Kconfig b/arch/mips/loongson2ef/Kconfig
new file mode 100644
index 000000000000..007bd023a4e9
--- /dev/null
+++ b/arch/mips/loongson2ef/Kconfig
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: GPL-2.0
+if MACH_LOONGSON2EF
+
+choice
+	prompt "Machine Type"
+
+config LEMOTE_FULOONG2E
+	bool "Lemote Fuloong(2e) mini-PC"
+	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select CEVT_R4K
+	select CSRC_R4K
+	select SYS_HAS_CPU_LOONGSON2E
+	select DMA_NONCOHERENT
+	select BOOT_ELF32
+	select BOARD_SCACHE
+	select FORCE_PCI
+	select I8259
+	select ISA
+	select IRQ_MIPS_CPU
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SYS_SUPPORTS_HIGHMEM
+	select SYS_HAS_EARLY_PRINTK
+	select GENERIC_ISA_DMA_SUPPORT_BROKEN
+	select CPU_HAS_WB
+	select LOONGSON_MC146818
+	help
+	  Lemote Fuloong(2e) mini-PC board based on the Chinese Loongson-2E CPU and
+	  an FPGA northbridge
+
+	  Lemote Fuloong(2e) mini PC have a VIA686B south bridge.
+
+config LEMOTE_MACH2F
+	bool "Lemote Loongson 2F family machines"
+	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select BOARD_SCACHE
+	select BOOT_ELF32
+	select CEVT_R4K if ! MIPS_EXTERNAL_TIMER
+	select CPU_HAS_WB
+	select CS5536
+	select CSRC_R4K if ! MIPS_EXTERNAL_TIMER
+	select DMA_NONCOHERENT
+	select GENERIC_ISA_DMA_SUPPORT_BROKEN
+	select HAVE_CLK
+	select FORCE_PCI
+	select I8259
+	select IRQ_MIPS_CPU
+	select ISA
+	select SYS_HAS_CPU_LOONGSON2F
+	select SYS_HAS_EARLY_PRINTK
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select SYS_SUPPORTS_HIGHMEM
+	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select LOONGSON_MC146818
+	help
+	  Lemote Loongson 2F family machines utilize the 2F revision of
+	  Loongson processor and the AMD CS5536 south bridge.
+
+	  These family machines include fuloong2f mini PC, yeeloong2f notebook,
+	  LingLoong allinone PC and so forth.
+
+endchoice
+
+config CS5536
+	bool
+
+config CS5536_MFGPT
+	bool "CS5536 MFGPT Timer"
+	depends on CS5536 && !HIGH_RES_TIMERS
+	select MIPS_EXTERNAL_TIMER
+	help
+	  This option enables the mfgpt0 timer of AMD CS5536. With this timer
+	  switched on you can not use high resolution timers.
+
+	  If you want to enable the Loongson2 CPUFreq Driver, Please enable
+	  this option at first, otherwise, You will get wrong system time.
+
+	  If unsure, say Yes.
+
+config LOONGSON_UART_BASE
+	bool
+	default y
+	depends on EARLY_PRINTK || SERIAL_8250
+
+config LOONGSON_MC146818
+	bool
+	default n
+
+config LEFI_FIRMWARE_INTERFACE
+	bool
+
+endif # MACH_LOONGSON2EF
diff --git a/arch/mips/loongson2ef/Makefile b/arch/mips/loongson2ef/Makefile
new file mode 100644
index 000000000000..d4af1605cc9b
--- /dev/null
+++ b/arch/mips/loongson2ef/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Common code for all Loongson based systems
+#
+
+obj-$(CONFIG_MACH_LOONGSON2EF) += common/
+
+#
+# Lemote Fuloong mini-PC (Loongson 2E-based)
+#
+
+obj-$(CONFIG_LEMOTE_FULOONG2E)	+= fuloong-2e/
+
+#
+# Lemote loongson2f family machines
+#
+
+obj-$(CONFIG_LEMOTE_MACH2F)  += lemote-2f/
diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform
new file mode 100644
index 000000000000..3aca42963f35
--- /dev/null
+++ b/arch/mips/loongson2ef/Platform
@@ -0,0 +1,32 @@
+#
+# Loongson Processors' Support
+#
+
+# Only gcc >= 4.4 have Loongson specific support
+cflags-$(CONFIG_CPU_LOONGSON2EF)	+= -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2E) += \
+	$(call cc-option,-march=loongson2e,-march=r4600)
+cflags-$(CONFIG_CPU_LOONGSON2F) += \
+	$(call cc-option,-march=loongson2f,-march=r4600)
+# Enable the workarounds for Loongson2f
+ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
+  ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-nop,),)
+    $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-nop)
+  else
+    cflags-$(CONFIG_CPU_NOP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-nop
+  endif
+  ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-jump,),)
+    $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-jump)
+  else
+    cflags-$(CONFIG_CPU_JUMP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-jump
+  endif
+endif
+
+#
+# Loongson Machines' Support
+#
+
+platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/
+cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef -mno-branch-likely
+load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000
+load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000
diff --git a/arch/mips/loongson2ef/common/Makefile b/arch/mips/loongson2ef/common/Makefile
new file mode 100644
index 000000000000..684624f61f5a
--- /dev/null
+++ b/arch/mips/loongson2ef/common/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for loongson based machines.
+#
+
+obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
+    bonito-irq.o mem.o machtype.o platform.o serial.o
+obj-$(CONFIG_PCI) += pci.o
+
+#
+# Serial port support
+#
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
+obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
+
+#
+# Enable CS5536 Virtual Support Module(VSM) to virtulize the PCI configure
+# space
+#
+obj-$(CONFIG_CS5536) += cs5536/
+
+#
+# Suspend Support
+#
+
+obj-$(CONFIG_SUSPEND) += pm.o
diff --git a/arch/mips/loongson2ef/common/bonito-irq.c b/arch/mips/loongson2ef/common/bonito-irq.c
new file mode 100644
index 000000000000..82352cc25e4c
--- /dev/null
+++ b/arch/mips/loongson2ef/common/bonito-irq.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2001 MontaVista Software Inc.
+ * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
+ * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org)
+ *
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+
+#include <loongson.h>
+
+static inline void bonito_irq_enable(struct irq_data *d)
+{
+	LOONGSON_INTENSET = (1 << (d->irq - LOONGSON_IRQ_BASE));
+	mmiowb();
+}
+
+static inline void bonito_irq_disable(struct irq_data *d)
+{
+	LOONGSON_INTENCLR = (1 << (d->irq - LOONGSON_IRQ_BASE));
+	mmiowb();
+}
+
+static struct irq_chip bonito_irq_type = {
+	.name		= "bonito_irq",
+	.irq_mask	= bonito_irq_disable,
+	.irq_unmask	= bonito_irq_enable,
+};
+
+static struct irqaction __maybe_unused dma_timeout_irqaction = {
+	.handler	= no_action,
+	.name		= "dma_timeout",
+};
+
+void bonito_irq_init(void)
+{
+	u32 i;
+
+	for (i = LOONGSON_IRQ_BASE; i < LOONGSON_IRQ_BASE + 32; i++)
+		irq_set_chip_and_handler(i, &bonito_irq_type,
+					 handle_level_irq);
+
+#ifdef CONFIG_CPU_LOONGSON2E
+	setup_irq(LOONGSON_IRQ_BASE + 10, &dma_timeout_irqaction);
+#endif
+}
diff --git a/arch/mips/loongson2ef/common/cmdline.c b/arch/mips/loongson2ef/common/cmdline.c
new file mode 100644
index 000000000000..a735460682cf
--- /dev/null
+++ b/arch/mips/loongson2ef/common/cmdline.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on Ocelot Linux port, which is
+ * Copyright 2001 MontaVista Software Inc.
+ * Author: jsun@mvista.com or jsun@junsun.net
+ *
+ * Copyright 2003 ICT CAS
+ * Author: Michael Guo <guoyi@ict.ac.cn>
+ *
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+void __init prom_init_cmdline(void)
+{
+	int prom_argc;
+	/* pmon passes arguments in 32bit pointers */
+	int *_prom_argv;
+	int i;
+	long l;
+
+	/* firmware arguments are initialized in head.S */
+	prom_argc = fw_arg0;
+	_prom_argv = (int *)fw_arg1;
+
+	/* arg[0] is "g", the rest is boot parameters */
+	arcs_cmdline[0] = '\0';
+	for (i = 1; i < prom_argc; i++) {
+		l = (long)_prom_argv[i];
+		if (strlen(arcs_cmdline) + strlen(((char *)l) + 1)
+		    >= sizeof(arcs_cmdline))
+			break;
+		strcat(arcs_cmdline, ((char *)l));
+		strcat(arcs_cmdline, " ");
+	}
+
+	prom_init_machtype();
+}
diff --git a/arch/mips/loongson2ef/common/cs5536/Makefile b/arch/mips/loongson2ef/common/cs5536/Makefile
new file mode 100644
index 000000000000..b32b29661245
--- /dev/null
+++ b/arch/mips/loongson2ef/common/cs5536/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for CS5536 support.
+#
+
+obj-$(CONFIG_CS5536) += cs5536_pci.o cs5536_ide.o cs5536_acc.o cs5536_ohci.o \
+			cs5536_isa.o cs5536_ehci.o
+
+#
+# Enable cs5536 mfgpt Timer
+#
+obj-$(CONFIG_CS5536_MFGPT) += cs5536_mfgpt.o
diff --git a/arch/mips/loongson2ef/common/cs5536/cs5536_acc.c b/arch/mips/loongson2ef/common/cs5536/cs5536_acc.c
new file mode 100644
index 000000000000..ff50aae72916
--- /dev/null
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_acc.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * the ACC Virtual Support Module of AMD CS5536
+ *
+ * Copyright (C) 2007 Lemote, Inc.
+ * Author : jlliu, liujl@lemote.com
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <cs5536/cs5536.h>
+#include <cs5536/cs5536_pci.h>
+
+void pci_acc_write_reg(int reg, u32 value)
+{
+	u32 hi = 0, lo = value;
+
+	switch (reg) {
+	case PCI_COMMAND:
+		_rdmsr(GLIU_MSR_REG(GLIU_PAE), &hi, &lo);
+		if (value & PCI_COMMAND_MASTER)
+			lo |= (0x03 << 8);
+		else
+			lo &= ~(0x03 << 8);
+		_wrmsr(GLIU_MSR_REG(GLIU_PAE), hi, lo);
+		break;
+	case PCI_STATUS:
+		if (value & PCI_STATUS_PARITY) {
+			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+			if (lo & SB_PARE_ERR_FLAG) {
+				lo = (lo & 0x0000ffff) | SB_PARE_ERR_FLAG;
+				_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
+			}
+		}
+		break;
+	case PCI_BAR0_REG:
+		if (value == PCI_BAR_RANGE_MASK) {
+			_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+			lo |= SOFT_BAR_ACC_FLAG;
+			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+		} else if (value & 0x01) {
+			value &= 0xfffffffc;
+			hi = 0xA0000000 | ((value & 0x000ff000) >> 12);
+			lo = 0x000fff80 | ((value & 0x00000fff) << 20);
+			_wrmsr(GLIU_MSR_REG(GLIU_IOD_BM1), hi, lo);
+		}
+		break;
+	case PCI_ACC_INT_REG:
+		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), &hi, &lo);
+		/* disable all the usb interrupt in PIC */
+		lo &= ~(0xf << PIC_YSEL_LOW_ACC_SHIFT);
+		if (value)	/* enable all the acc interrupt in PIC */
+			lo |= (CS5536_ACC_INTR << PIC_YSEL_LOW_ACC_SHIFT);
+		_wrmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), hi, lo);
+		break;
+	default:
+		break;
+	}
+}
+
+u32 pci_acc_read_reg(int reg)
+{
+	u32 hi, lo;
+	u32 conf_data = 0;
+
+	switch (reg) {
+	case PCI_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_ACC_DEVICE_ID, CS5536_VENDOR_ID);
+		break;
+	case PCI_COMMAND:
+		_rdmsr(GLIU_MSR_REG(GLIU_IOD_BM1), &hi, &lo);
+		if (((lo & 0xfff00000) || (hi & 0x000000ff))
+		    && ((hi & 0xf0000000) == 0xa0000000))
+			conf_data |= PCI_COMMAND_IO;
+		_rdmsr(GLIU_MSR_REG(GLIU_PAE), &hi, &lo);
+		if ((lo & 0x300) == 0x300)
+			conf_data |= PCI_COMMAND_MASTER;
+		break;
+	case PCI_STATUS:
+		conf_data |= PCI_STATUS_66MHZ;
+		conf_data |= PCI_STATUS_FAST_BACK;
+		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+		if (lo & SB_PARE_ERR_FLAG)
+			conf_data |= PCI_STATUS_PARITY;
+		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
+		break;
+	case PCI_CLASS_REVISION:
+		_rdmsr(ACC_MSR_REG(ACC_CAP), &hi, &lo);
+		conf_data = lo & 0x000000ff;
+		conf_data |= (CS5536_ACC_CLASS_CODE << 8);
+		break;
+	case PCI_CACHE_LINE_SIZE:
+		conf_data =
+		    CFG_PCI_CACHE_LINE_SIZE(PCI_NORMAL_HEADER_TYPE,
+					    PCI_NORMAL_LATENCY_TIMER);
+		break;
+	case PCI_BAR0_REG:
+		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+		if (lo & SOFT_BAR_ACC_FLAG) {
+			conf_data = CS5536_ACC_RANGE |
+			    PCI_BASE_ADDRESS_SPACE_IO;
+			lo &= ~SOFT_BAR_ACC_FLAG;
+			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+		} else {
+			_rdmsr(GLIU_MSR_REG(GLIU_IOD_BM1), &hi, &lo);
+			conf_data = (hi & 0x000000ff) << 12;
+			conf_data |= (lo & 0xfff00000) >> 20;
+			conf_data |= 0x01;
+			conf_data &= ~0x02;
+		}
+		break;
+	case PCI_CARDBUS_CIS:
+		conf_data = PCI_CARDBUS_CIS_POINTER;
+		break;
+	case PCI_SUBSYSTEM_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_ACC_SUB_ID, CS5536_SUB_VENDOR_ID);
+		break;
+	case PCI_ROM_ADDRESS:
+		conf_data = PCI_EXPANSION_ROM_BAR;
+		break;
+	case PCI_CAPABILITY_LIST:
+		conf_data = PCI_CAPLIST_USB_POINTER;
+		break;
+	case PCI_INTERRUPT_LINE:
+		conf_data =
+		    CFG_PCI_INTERRUPT_LINE(PCI_DEFAULT_PIN, CS5536_ACC_INTR);
+		break;
+	default:
+		break;
+	}
+
+	return conf_data;
+}
diff --git a/arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c
new file mode 100644
index 000000000000..bd4c39fe6109
--- /dev/null
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ehci.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * the EHCI Virtual Support Module of AMD CS5536
+ *
+ * Copyright (C) 2007 Lemote, Inc.
+ * Author : jlliu, liujl@lemote.com
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <cs5536/cs5536.h>
+#include <cs5536/cs5536_pci.h>
+
+void pci_ehci_write_reg(int reg, u32 value)
+{
+	u32 hi = 0, lo = value;
+
+	switch (reg) {
+	case PCI_COMMAND:
+		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
+		if (value & PCI_COMMAND_MASTER)
+			hi |= PCI_COMMAND_MASTER;
+		else
+			hi &= ~PCI_COMMAND_MASTER;
+
+		if (value & PCI_COMMAND_MEMORY)
+			hi |= PCI_COMMAND_MEMORY;
+		else
+			hi &= ~PCI_COMMAND_MEMORY;
+		_wrmsr(USB_MSR_REG(USB_EHCI), hi, lo);
+		break;
+	case PCI_STATUS:
+		if (value & PCI_STATUS_PARITY) {
+			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+			if (lo & SB_PARE_ERR_FLAG) {
+				lo = (lo & 0x0000ffff) | SB_PARE_ERR_FLAG;
+				_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
+			}
+		}
+		break;
+	case PCI_BAR0_REG:
+		if (value == PCI_BAR_RANGE_MASK) {
+			_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+			lo |= SOFT_BAR_EHCI_FLAG;
+			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+		} else if ((value & 0x01) == 0x00) {
+			_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
+			lo = value;
+			_wrmsr(USB_MSR_REG(USB_EHCI), hi, lo);
+
+			value &= 0xfffffff0;
+			hi = 0x40000000 | ((value & 0xff000000) >> 24);
+			lo = 0x000fffff | ((value & 0x00fff000) << 8);
+			_wrmsr(GLIU_MSR_REG(GLIU_P2D_BM4), hi, lo);
+		}
+		break;
+	case PCI_EHCI_LEGSMIEN_REG:
+		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
+		hi &= 0x003f0000;
+		hi |= (value & 0x3f) << 16;
+		_wrmsr(USB_MSR_REG(USB_EHCI), hi, lo);
+		break;
+	case PCI_EHCI_FLADJ_REG:
+		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
+		hi &= ~0x00003f00;
+		hi |= value & 0x00003f00;
+		_wrmsr(USB_MSR_REG(USB_EHCI), hi, lo);
+		break;
+	default:
+		break;
+	}
+}
+
+u32 pci_ehci_read_reg(int reg)
+{
+	u32 conf_data = 0;
+	u32 hi, lo;
+
+	switch (reg) {
+	case PCI_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_EHCI_DEVICE_ID, CS5536_VENDOR_ID);
+		break;
+	case PCI_COMMAND:
+		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
+		if (hi & PCI_COMMAND_MASTER)
+			conf_data |= PCI_COMMAND_MASTER;
+		if (hi & PCI_COMMAND_MEMORY)
+			conf_data |= PCI_COMMAND_MEMORY;
+		break;
+	case PCI_STATUS:
+		conf_data |= PCI_STATUS_66MHZ;
+		conf_data |= PCI_STATUS_FAST_BACK;
+		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+		if (lo & SB_PARE_ERR_FLAG)
+			conf_data |= PCI_STATUS_PARITY;
+		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
+		break;
+	case PCI_CLASS_REVISION:
+		_rdmsr(USB_MSR_REG(USB_CAP), &hi, &lo);
+		conf_data = lo & 0x000000ff;
+		conf_data |= (CS5536_EHCI_CLASS_CODE << 8);
+		break;
+	case PCI_CACHE_LINE_SIZE:
+		conf_data =
+		    CFG_PCI_CACHE_LINE_SIZE(PCI_NORMAL_HEADER_TYPE,
+					    PCI_NORMAL_LATENCY_TIMER);
+		break;
+	case PCI_BAR0_REG:
+		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+		if (lo & SOFT_BAR_EHCI_FLAG) {
+			conf_data = CS5536_EHCI_RANGE |
+			    PCI_BASE_ADDRESS_SPACE_MEMORY;
+			lo &= ~SOFT_BAR_EHCI_FLAG;
+			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+		} else {
+			_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
+			conf_data = lo & 0xfffff000;
+		}
+		break;
+	case PCI_CARDBUS_CIS:
+		conf_data = PCI_CARDBUS_CIS_POINTER;
+		break;
+	case PCI_SUBSYSTEM_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_EHCI_SUB_ID, CS5536_SUB_VENDOR_ID);
+		break;
+	case PCI_ROM_ADDRESS:
+		conf_data = PCI_EXPANSION_ROM_BAR;
+		break;
+	case PCI_CAPABILITY_LIST:
+		conf_data = PCI_CAPLIST_USB_POINTER;
+		break;
+	case PCI_INTERRUPT_LINE:
+		conf_data =
+		    CFG_PCI_INTERRUPT_LINE(PCI_DEFAULT_PIN, CS5536_USB_INTR);
+		break;
+	case PCI_EHCI_LEGSMIEN_REG:
+		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
+		conf_data = (hi & 0x003f0000) >> 16;
+		break;
+	case PCI_EHCI_LEGSMISTS_REG:
+		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
+		conf_data = (hi & 0x3f000000) >> 24;
+		break;
+	case PCI_EHCI_FLADJ_REG:
+		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
+		conf_data = hi & 0x00003f00;
+		break;
+	default:
+		break;
+	}
+
+	return conf_data;
+}
diff --git a/arch/mips/loongson2ef/common/cs5536/cs5536_ide.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ide.c
new file mode 100644
index 000000000000..bb933294b092
--- /dev/null
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ide.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * the IDE Virtual Support Module of AMD CS5536
+ *
+ * Copyright (C) 2007 Lemote, Inc.
+ * Author : jlliu, liujl@lemote.com
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <cs5536/cs5536.h>
+#include <cs5536/cs5536_pci.h>
+
+void pci_ide_write_reg(int reg, u32 value)
+{
+	u32 hi = 0, lo = value;
+
+	switch (reg) {
+	case PCI_COMMAND:
+		_rdmsr(GLIU_MSR_REG(GLIU_PAE), &hi, &lo);
+		if (value & PCI_COMMAND_MASTER)
+			lo |= (0x03 << 4);
+		else
+			lo &= ~(0x03 << 4);
+		_wrmsr(GLIU_MSR_REG(GLIU_PAE), hi, lo);
+		break;
+	case PCI_STATUS:
+		if (value & PCI_STATUS_PARITY) {
+			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+			if (lo & SB_PARE_ERR_FLAG) {
+				lo = (lo & 0x0000ffff) | SB_PARE_ERR_FLAG;
+				_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
+			}
+		}
+		break;
+	case PCI_CACHE_LINE_SIZE:
+		value &= 0x0000ff00;
+		_rdmsr(SB_MSR_REG(SB_CTRL), &hi, &lo);
+		hi &= 0xffffff00;
+		hi |= (value >> 8);
+		_wrmsr(SB_MSR_REG(SB_CTRL), hi, lo);
+		break;
+	case PCI_BAR4_REG:
+		if (value == PCI_BAR_RANGE_MASK) {
+			_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+			lo |= SOFT_BAR_IDE_FLAG;
+			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+		} else if (value & 0x01) {
+			_rdmsr(IDE_MSR_REG(IDE_IO_BAR), &hi, &lo);
+			lo = (value & 0xfffffff0) | 0x1;
+			_wrmsr(IDE_MSR_REG(IDE_IO_BAR), hi, lo);
+
+			value &= 0xfffffffc;
+			hi = 0x60000000 | ((value & 0x000ff000) >> 12);
+			lo = 0x000ffff0 | ((value & 0x00000fff) << 20);
+			_wrmsr(GLIU_MSR_REG(GLIU_IOD_BM2), hi, lo);
+		}
+		break;
+	case PCI_IDE_CFG_REG:
+		if (value == CS5536_IDE_FLASH_SIGNATURE) {
+			_rdmsr(DIVIL_MSR_REG(DIVIL_BALL_OPTS), &hi, &lo);
+			lo |= 0x01;
+			_wrmsr(DIVIL_MSR_REG(DIVIL_BALL_OPTS), hi, lo);
+		} else {
+			_rdmsr(IDE_MSR_REG(IDE_CFG), &hi, &lo);
+			lo = value;
+			_wrmsr(IDE_MSR_REG(IDE_CFG), hi, lo);
+		}
+		break;
+	case PCI_IDE_DTC_REG:
+		_rdmsr(IDE_MSR_REG(IDE_DTC), &hi, &lo);
+		lo = value;
+		_wrmsr(IDE_MSR_REG(IDE_DTC), hi, lo);
+		break;
+	case PCI_IDE_CAST_REG:
+		_rdmsr(IDE_MSR_REG(IDE_CAST), &hi, &lo);
+		lo = value;
+		_wrmsr(IDE_MSR_REG(IDE_CAST), hi, lo);
+		break;
+	case PCI_IDE_ETC_REG:
+		_rdmsr(IDE_MSR_REG(IDE_ETC), &hi, &lo);
+		lo = value;
+		_wrmsr(IDE_MSR_REG(IDE_ETC), hi, lo);
+		break;
+	case PCI_IDE_PM_REG:
+		_rdmsr(IDE_MSR_REG(IDE_INTERNAL_PM), &hi, &lo);
+		lo = value;
+		_wrmsr(IDE_MSR_REG(IDE_INTERNAL_PM), hi, lo);
+		break;
+	default:
+		break;
+	}
+}
+
+u32 pci_ide_read_reg(int reg)
+{
+	u32 conf_data = 0;
+	u32 hi, lo;
+
+	switch (reg) {
+	case PCI_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_IDE_DEVICE_ID, CS5536_VENDOR_ID);
+		break;
+	case PCI_COMMAND:
+		_rdmsr(IDE_MSR_REG(IDE_IO_BAR), &hi, &lo);
+		if (lo & 0xfffffff0)
+			conf_data |= PCI_COMMAND_IO;
+		_rdmsr(GLIU_MSR_REG(GLIU_PAE), &hi, &lo);
+		if ((lo & 0x30) == 0x30)
+			conf_data |= PCI_COMMAND_MASTER;
+		break;
+	case PCI_STATUS:
+		conf_data |= PCI_STATUS_66MHZ;
+		conf_data |= PCI_STATUS_FAST_BACK;
+		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+		if (lo & SB_PARE_ERR_FLAG)
+			conf_data |= PCI_STATUS_PARITY;
+		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
+		break;
+	case PCI_CLASS_REVISION:
+		_rdmsr(IDE_MSR_REG(IDE_CAP), &hi, &lo);
+		conf_data = lo & 0x000000ff;
+		conf_data |= (CS5536_IDE_CLASS_CODE << 8);
+		break;
+	case PCI_CACHE_LINE_SIZE:
+		_rdmsr(SB_MSR_REG(SB_CTRL), &hi, &lo);
+		hi &= 0x000000f8;
+		conf_data = CFG_PCI_CACHE_LINE_SIZE(PCI_NORMAL_HEADER_TYPE, hi);
+		break;
+	case PCI_BAR4_REG:
+		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+		if (lo & SOFT_BAR_IDE_FLAG) {
+			conf_data = CS5536_IDE_RANGE |
+			    PCI_BASE_ADDRESS_SPACE_IO;
+			lo &= ~SOFT_BAR_IDE_FLAG;
+			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+		} else {
+			_rdmsr(IDE_MSR_REG(IDE_IO_BAR), &hi, &lo);
+			conf_data = lo & 0xfffffff0;
+			conf_data |= 0x01;
+			conf_data &= ~0x02;
+		}
+		break;
+	case PCI_CARDBUS_CIS:
+		conf_data = PCI_CARDBUS_CIS_POINTER;
+		break;
+	case PCI_SUBSYSTEM_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_IDE_SUB_ID, CS5536_SUB_VENDOR_ID);
+		break;
+	case PCI_ROM_ADDRESS:
+		conf_data = PCI_EXPANSION_ROM_BAR;
+		break;
+	case PCI_CAPABILITY_LIST:
+		conf_data = PCI_CAPLIST_POINTER;
+		break;
+	case PCI_INTERRUPT_LINE:
+		conf_data =
+		    CFG_PCI_INTERRUPT_LINE(PCI_DEFAULT_PIN, CS5536_IDE_INTR);
+		break;
+	case PCI_IDE_CFG_REG:
+		_rdmsr(IDE_MSR_REG(IDE_CFG), &hi, &lo);
+		conf_data = lo;
+		break;
+	case PCI_IDE_DTC_REG:
+		_rdmsr(IDE_MSR_REG(IDE_DTC), &hi, &lo);
+		conf_data = lo;
+		break;
+	case PCI_IDE_CAST_REG:
+		_rdmsr(IDE_MSR_REG(IDE_CAST), &hi, &lo);
+		conf_data = lo;
+		break;
+	case PCI_IDE_ETC_REG:
+		_rdmsr(IDE_MSR_REG(IDE_ETC), &hi, &lo);
+		conf_data = lo;
+		break;
+	case PCI_IDE_PM_REG:
+		_rdmsr(IDE_MSR_REG(IDE_INTERNAL_PM), &hi, &lo);
+		conf_data = lo;
+		break;
+	default:
+		break;
+	}
+
+	return conf_data;
+}
diff --git a/arch/mips/loongson2ef/common/cs5536/cs5536_isa.c b/arch/mips/loongson2ef/common/cs5536/cs5536_isa.c
new file mode 100644
index 000000000000..5ad38f86ee62
--- /dev/null
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_isa.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * the ISA Virtual Support Module of AMD CS5536
+ *
+ * Copyright (C) 2007 Lemote, Inc.
+ * Author : jlliu, liujl@lemote.com
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/pci.h>
+#include <cs5536/cs5536.h>
+#include <cs5536/cs5536_pci.h>
+
+/* common variables for PCI_ISA_READ/WRITE_BAR */
+static const u32 divil_msr_reg[6] = {
+	DIVIL_MSR_REG(DIVIL_LBAR_SMB), DIVIL_MSR_REG(DIVIL_LBAR_GPIO),
+	DIVIL_MSR_REG(DIVIL_LBAR_MFGPT), DIVIL_MSR_REG(DIVIL_LBAR_IRQ),
+	DIVIL_MSR_REG(DIVIL_LBAR_PMS), DIVIL_MSR_REG(DIVIL_LBAR_ACPI),
+};
+
+static const u32 soft_bar_flag[6] = {
+	SOFT_BAR_SMB_FLAG, SOFT_BAR_GPIO_FLAG, SOFT_BAR_MFGPT_FLAG,
+	SOFT_BAR_IRQ_FLAG, SOFT_BAR_PMS_FLAG, SOFT_BAR_ACPI_FLAG,
+};
+
+static const u32 sb_msr_reg[6] = {
+	SB_MSR_REG(SB_R0), SB_MSR_REG(SB_R1), SB_MSR_REG(SB_R2),
+	SB_MSR_REG(SB_R3), SB_MSR_REG(SB_R4), SB_MSR_REG(SB_R5),
+};
+
+static const u32 bar_space_range[6] = {
+	CS5536_SMB_RANGE, CS5536_GPIO_RANGE, CS5536_MFGPT_RANGE,
+	CS5536_IRQ_RANGE, CS5536_PMS_RANGE, CS5536_ACPI_RANGE,
+};
+
+static const int bar_space_len[6] = {
+	CS5536_SMB_LENGTH, CS5536_GPIO_LENGTH, CS5536_MFGPT_LENGTH,
+	CS5536_IRQ_LENGTH, CS5536_PMS_LENGTH, CS5536_ACPI_LENGTH,
+};
+
+/*
+ * enable the divil module bar space.
+ *
+ * For all the DIVIL module LBAR, you should control the DIVIL LBAR reg
+ * and the RCONFx(0~5) reg to use the modules.
+ */
+static void divil_lbar_enable(void)
+{
+	u32 hi, lo;
+	int offset;
+
+	/*
+	 * The DIVIL IRQ is not used yet. and make the RCONF0 reserved.
+	 */
+
+	for (offset = DIVIL_LBAR_SMB; offset <= DIVIL_LBAR_PMS; offset++) {
+		_rdmsr(DIVIL_MSR_REG(offset), &hi, &lo);
+		hi |= 0x01;
+		_wrmsr(DIVIL_MSR_REG(offset), hi, lo);
+	}
+}
+
+/*
+ * disable the divil module bar space.
+ */
+static void divil_lbar_disable(void)
+{
+	u32 hi, lo;
+	int offset;
+
+	for (offset = DIVIL_LBAR_SMB; offset <= DIVIL_LBAR_PMS; offset++) {
+		_rdmsr(DIVIL_MSR_REG(offset), &hi, &lo);
+		hi &= ~0x01;
+		_wrmsr(DIVIL_MSR_REG(offset), hi, lo);
+	}
+}
+
+/*
+ * BAR write: write value to the n BAR
+ */
+
+void pci_isa_write_bar(int n, u32 value)
+{
+	u32 hi = 0, lo = value;
+
+	if (value == PCI_BAR_RANGE_MASK) {
+		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+		lo |= soft_bar_flag[n];
+		_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+	} else if (value & 0x01) {
+		/* NATIVE reg */
+		hi = 0x0000f001;
+		lo &= bar_space_range[n];
+		_wrmsr(divil_msr_reg[n], hi, lo);
+
+		/* RCONFx is 4bytes in units for I/O space */
+		hi = ((value & 0x000ffffc) << 12) |
+		    ((bar_space_len[n] - 4) << 12) | 0x01;
+		lo = ((value & 0x000ffffc) << 12) | 0x01;
+		_wrmsr(sb_msr_reg[n], hi, lo);
+	}
+}
+
+/*
+ * BAR read: read the n BAR
+ */
+
+u32 pci_isa_read_bar(int n)
+{
+	u32 conf_data = 0;
+	u32 hi, lo;
+
+	_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+	if (lo & soft_bar_flag[n]) {
+		conf_data = bar_space_range[n] | PCI_BASE_ADDRESS_SPACE_IO;
+		lo &= ~soft_bar_flag[n];
+		_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+	} else {
+		_rdmsr(divil_msr_reg[n], &hi, &lo);
+		conf_data = lo & bar_space_range[n];
+		conf_data |= 0x01;
+		conf_data &= ~0x02;
+	}
+	return conf_data;
+}
+
+/*
+ * isa_write: ISA write transfer
+ *
+ * We assume that this is not a bus master transfer.
+ */
+void pci_isa_write_reg(int reg, u32 value)
+{
+	u32 hi = 0, lo = value;
+	u32 temp;
+
+	switch (reg) {
+	case PCI_COMMAND:
+		if (value & PCI_COMMAND_IO)
+			divil_lbar_enable();
+		else
+			divil_lbar_disable();
+		break;
+	case PCI_STATUS:
+		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+		temp = lo & 0x0000ffff;
+		if ((value & PCI_STATUS_SIG_TARGET_ABORT) &&
+		    (lo & SB_TAS_ERR_EN))
+			temp |= SB_TAS_ERR_FLAG;
+
+		if ((value & PCI_STATUS_REC_TARGET_ABORT) &&
+		    (lo & SB_TAR_ERR_EN))
+			temp |= SB_TAR_ERR_FLAG;
+
+		if ((value & PCI_STATUS_REC_MASTER_ABORT)
+		    && (lo & SB_MAR_ERR_EN))
+			temp |= SB_MAR_ERR_FLAG;
+
+		if ((value & PCI_STATUS_DETECTED_PARITY)
+		    && (lo & SB_PARE_ERR_EN))
+			temp |= SB_PARE_ERR_FLAG;
+
+		lo = temp;
+		_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
+		break;
+	case PCI_CACHE_LINE_SIZE:
+		value &= 0x0000ff00;
+		_rdmsr(SB_MSR_REG(SB_CTRL), &hi, &lo);
+		hi &= 0xffffff00;
+		hi |= (value >> 8);
+		_wrmsr(SB_MSR_REG(SB_CTRL), hi, lo);
+		break;
+	case PCI_BAR0_REG:
+		pci_isa_write_bar(0, value);
+		break;
+	case PCI_BAR1_REG:
+		pci_isa_write_bar(1, value);
+		break;
+	case PCI_BAR2_REG:
+		pci_isa_write_bar(2, value);
+		break;
+	case PCI_BAR3_REG:
+		pci_isa_write_bar(3, value);
+		break;
+	case PCI_BAR4_REG:
+		pci_isa_write_bar(4, value);
+		break;
+	case PCI_BAR5_REG:
+		pci_isa_write_bar(5, value);
+		break;
+	case PCI_UART1_INT_REG:
+		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_HIGH), &hi, &lo);
+		/* disable uart1 interrupt in PIC */
+		lo &= ~(0xf << 24);
+		if (value)	/* enable uart1 interrupt in PIC */
+			lo |= (CS5536_UART1_INTR << 24);
+		_wrmsr(DIVIL_MSR_REG(PIC_YSEL_HIGH), hi, lo);
+		break;
+	case PCI_UART2_INT_REG:
+		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_HIGH), &hi, &lo);
+		/* disable uart2 interrupt in PIC */
+		lo &= ~(0xf << 28);
+		if (value)	/* enable uart2 interrupt in PIC */
+			lo |= (CS5536_UART2_INTR << 28);
+		_wrmsr(DIVIL_MSR_REG(PIC_YSEL_HIGH), hi, lo);
+		break;
+	case PCI_ISA_FIXUP_REG:
+		if (value) {
+			/* enable the TARGET ABORT/MASTER ABORT etc. */
+			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+			lo |= 0x00000063;
+			_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
+		}
+
+	default:
+		/* ALL OTHER PCI CONFIG SPACE HEADER IS NOT IMPLEMENTED. */
+		break;
+	}
+}
+
+/*
+ * isa_read: ISA read transfers
+ *
+ * We assume that this is not a bus master transfer.
+ */
+u32 pci_isa_read_reg(int reg)
+{
+	u32 conf_data = 0;
+	u32 hi, lo;
+
+	switch (reg) {
+	case PCI_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_ISA_DEVICE_ID, CS5536_VENDOR_ID);
+		break;
+	case PCI_COMMAND:
+		/* we just check the first LBAR for the IO enable bit, */
+		/* maybe we should changed later. */
+		_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_SMB), &hi, &lo);
+		if (hi & 0x01)
+			conf_data |= PCI_COMMAND_IO;
+		break;
+	case PCI_STATUS:
+		conf_data |= PCI_STATUS_66MHZ;
+		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
+		conf_data |= PCI_STATUS_FAST_BACK;
+
+		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+		if (lo & SB_TAS_ERR_FLAG)
+			conf_data |= PCI_STATUS_SIG_TARGET_ABORT;
+		if (lo & SB_TAR_ERR_FLAG)
+			conf_data |= PCI_STATUS_REC_TARGET_ABORT;
+		if (lo & SB_MAR_ERR_FLAG)
+			conf_data |= PCI_STATUS_REC_MASTER_ABORT;
+		if (lo & SB_PARE_ERR_FLAG)
+			conf_data |= PCI_STATUS_DETECTED_PARITY;
+		break;
+	case PCI_CLASS_REVISION:
+		_rdmsr(GLCP_MSR_REG(GLCP_CHIP_REV_ID), &hi, &lo);
+		conf_data = lo & 0x000000ff;
+		conf_data |= (CS5536_ISA_CLASS_CODE << 8);
+		break;
+	case PCI_CACHE_LINE_SIZE:
+		_rdmsr(SB_MSR_REG(SB_CTRL), &hi, &lo);
+		hi &= 0x000000f8;
+		conf_data = CFG_PCI_CACHE_LINE_SIZE(PCI_BRIDGE_HEADER_TYPE, hi);
+		break;
+		/*
+		 * we only use the LBAR of DIVIL, no RCONF used.
+		 * all of them are IO space.
+		 */
+	case PCI_BAR0_REG:
+		return pci_isa_read_bar(0);
+		break;
+	case PCI_BAR1_REG:
+		return pci_isa_read_bar(1);
+		break;
+	case PCI_BAR2_REG:
+		return pci_isa_read_bar(2);
+		break;
+	case PCI_BAR3_REG:
+		break;
+	case PCI_BAR4_REG:
+		return pci_isa_read_bar(4);
+		break;
+	case PCI_BAR5_REG:
+		return pci_isa_read_bar(5);
+		break;
+	case PCI_CARDBUS_CIS:
+		conf_data = PCI_CARDBUS_CIS_POINTER;
+		break;
+	case PCI_SUBSYSTEM_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_ISA_SUB_ID, CS5536_SUB_VENDOR_ID);
+		break;
+	case PCI_ROM_ADDRESS:
+		conf_data = PCI_EXPANSION_ROM_BAR;
+		break;
+	case PCI_CAPABILITY_LIST:
+		conf_data = PCI_CAPLIST_POINTER;
+		break;
+	case PCI_INTERRUPT_LINE:
+		/* no interrupt used here */
+		conf_data = CFG_PCI_INTERRUPT_LINE(0x00, 0x00);
+		break;
+	default:
+		break;
+	}
+
+	return conf_data;
+}
+
+/*
+ * The mfgpt timer interrupt is running early, so we must keep the south bridge
+ * mmio always enabled. Otherwise we may race with the PCI configuration which
+ * may temporarily disable it. When that happens and the timer interrupt fires,
+ * we are not able to clear it and the system will hang.
+ */
+static void cs5536_isa_mmio_always_on(struct pci_dev *dev)
+{
+	dev->mmio_always_on = 1;
+}
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA,
+	PCI_CLASS_BRIDGE_ISA, 8, cs5536_isa_mmio_always_on);
diff --git a/arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c
new file mode 100644
index 000000000000..30af1b7c7529
--- /dev/null
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_mfgpt.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * CS5536 General timer functions
+ *
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Yanhua, yanh@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu zhangjin, wuzhangjin@gmail.com
+ *
+ * Reference: AMD Geode(TM) CS5536 Companion Device Data Book
+ */
+
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/clockchips.h>
+
+#include <asm/time.h>
+
+#include <cs5536/cs5536_mfgpt.h>
+
+static DEFINE_RAW_SPINLOCK(mfgpt_lock);
+
+static u32 mfgpt_base;
+
+/*
+ * Initialize the MFGPT timer.
+ *
+ * This is also called after resume to bring the MFGPT into operation again.
+ */
+
+/* disable counter */
+void disable_mfgpt0_counter(void)
+{
+	outw(inw(MFGPT0_SETUP) & 0x7fff, MFGPT0_SETUP);
+}
+EXPORT_SYMBOL(disable_mfgpt0_counter);
+
+/* enable counter, comparator2 to event mode, 14.318MHz clock */
+void enable_mfgpt0_counter(void)
+{
+	outw(0xe310, MFGPT0_SETUP);
+}
+EXPORT_SYMBOL(enable_mfgpt0_counter);
+
+static int mfgpt_timer_set_periodic(struct clock_event_device *evt)
+{
+	raw_spin_lock(&mfgpt_lock);
+
+	outw(COMPARE, MFGPT0_CMP2);	/* set comparator2 */
+	outw(0, MFGPT0_CNT);		/* set counter to 0 */
+	enable_mfgpt0_counter();
+
+	raw_spin_unlock(&mfgpt_lock);
+	return 0;
+}
+
+static int mfgpt_timer_shutdown(struct clock_event_device *evt)
+{
+	if (clockevent_state_periodic(evt) || clockevent_state_oneshot(evt)) {
+		raw_spin_lock(&mfgpt_lock);
+		disable_mfgpt0_counter();
+		raw_spin_unlock(&mfgpt_lock);
+	}
+
+	return 0;
+}
+
+static struct clock_event_device mfgpt_clockevent = {
+	.name = "mfgpt",
+	.features = CLOCK_EVT_FEAT_PERIODIC,
+
+	/* The oneshot mode have very high deviation, don't use it! */
+	.set_state_shutdown = mfgpt_timer_shutdown,
+	.set_state_periodic = mfgpt_timer_set_periodic,
+	.irq = CS5536_MFGPT_INTR,
+};
+
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
+{
+	u32 basehi;
+
+	/*
+	 * get MFGPT base address
+	 *
+	 * NOTE: do not remove me, it's need for the value of mfgpt_base is
+	 * variable
+	 */
+	_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_MFGPT), &basehi, &mfgpt_base);
+
+	/* ack */
+	outw(inw(MFGPT0_SETUP) | 0x4000, MFGPT0_SETUP);
+
+	mfgpt_clockevent.event_handler(&mfgpt_clockevent);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq5 = {
+	.handler = timer_interrupt,
+	.flags = IRQF_NOBALANCING | IRQF_TIMER,
+	.name = "timer"
+};
+
+/*
+ * Initialize the conversion factor and the min/max deltas of the clock event
+ * structure and register the clock event source with the framework.
+ */
+void __init setup_mfgpt0_timer(void)
+{
+	u32 basehi;
+	struct clock_event_device *cd = &mfgpt_clockevent;
+	unsigned int cpu = smp_processor_id();
+
+	cd->cpumask = cpumask_of(cpu);
+	clockevent_set_clock(cd, MFGPT_TICK_RATE);
+	cd->max_delta_ns = clockevent_delta2ns(0xffff, cd);
+	cd->max_delta_ticks = 0xffff;
+	cd->min_delta_ns = clockevent_delta2ns(0xf, cd);
+	cd->min_delta_ticks = 0xf;
+
+	/* Enable MFGPT0 Comparator 2 Output to the Interrupt Mapper */
+	_wrmsr(DIVIL_MSR_REG(MFGPT_IRQ), 0, 0x100);
+
+	/* Enable Interrupt Gate 5 */
+	_wrmsr(DIVIL_MSR_REG(PIC_ZSEL_LOW), 0, 0x50000);
+
+	/* get MFGPT base address */
+	_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_MFGPT), &basehi, &mfgpt_base);
+
+	clockevents_register_device(cd);
+
+	setup_irq(CS5536_MFGPT_INTR, &irq5);
+}
+
+/*
+ * Since the MFGPT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter:
+ */
+static u64 mfgpt_read(struct clocksource *cs)
+{
+	unsigned long flags;
+	int count;
+	u32 jifs;
+	static int old_count;
+	static u32 old_jifs;
+
+	raw_spin_lock_irqsave(&mfgpt_lock, flags);
+	/*
+	 * Although our caller may have the read side of xtime_lock,
+	 * this is now a seqlock, and we are cheating in this routine
+	 * by having side effects on state that we cannot undo if
+	 * there is a collision on the seqlock and our caller has to
+	 * retry.  (Namely, old_jifs and old_count.)  So we must treat
+	 * jiffies as volatile despite the lock.  We read jiffies
+	 * before latching the timer count to guarantee that although
+	 * the jiffies value might be older than the count (that is,
+	 * the counter may underflow between the last point where
+	 * jiffies was incremented and the point where we latch the
+	 * count), it cannot be newer.
+	 */
+	jifs = jiffies;
+	/* read the count */
+	count = inw(MFGPT0_CNT);
+
+	/*
+	 * It's possible for count to appear to go the wrong way for this
+	 * reason:
+	 *
+	 *  The timer counter underflows, but we haven't handled the resulting
+	 *  interrupt and incremented jiffies yet.
+	 *
+	 * Previous attempts to handle these cases intelligently were buggy, so
+	 * we just do the simple thing now.
+	 */
+	if (count < old_count && jifs == old_jifs)
+		count = old_count;
+
+	old_count = count;
+	old_jifs = jifs;
+
+	raw_spin_unlock_irqrestore(&mfgpt_lock, flags);
+
+	return (u64) (jifs * COMPARE) + count;
+}
+
+static struct clocksource clocksource_mfgpt = {
+	.name = "mfgpt",
+	.rating = 120, /* Functional for real use, but not desired */
+	.read = mfgpt_read,
+	.mask = CLOCKSOURCE_MASK(32),
+};
+
+int __init init_mfgpt_clocksource(void)
+{
+	if (num_possible_cpus() > 1)	/* MFGPT does not scale! */
+		return 0;
+
+	return clocksource_register_hz(&clocksource_mfgpt, MFGPT_TICK_RATE);
+}
+
+arch_initcall(init_mfgpt_clocksource);
diff --git a/arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c
new file mode 100644
index 000000000000..71a52b120317
--- /dev/null
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_ohci.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * the OHCI Virtual Support Module of AMD CS5536
+ *
+ * Copyright (C) 2007 Lemote, Inc.
+ * Author : jlliu, liujl@lemote.com
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <cs5536/cs5536.h>
+#include <cs5536/cs5536_pci.h>
+
+void pci_ohci_write_reg(int reg, u32 value)
+{
+	u32 hi = 0, lo = value;
+
+	switch (reg) {
+	case PCI_COMMAND:
+		_rdmsr(USB_MSR_REG(USB_OHCI), &hi, &lo);
+		if (value & PCI_COMMAND_MASTER)
+			hi |= PCI_COMMAND_MASTER;
+		else
+			hi &= ~PCI_COMMAND_MASTER;
+
+		if (value & PCI_COMMAND_MEMORY)
+			hi |= PCI_COMMAND_MEMORY;
+		else
+			hi &= ~PCI_COMMAND_MEMORY;
+		_wrmsr(USB_MSR_REG(USB_OHCI), hi, lo);
+		break;
+	case PCI_STATUS:
+		if (value & PCI_STATUS_PARITY) {
+			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+			if (lo & SB_PARE_ERR_FLAG) {
+				lo = (lo & 0x0000ffff) | SB_PARE_ERR_FLAG;
+				_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
+			}
+		}
+		break;
+	case PCI_BAR0_REG:
+		if (value == PCI_BAR_RANGE_MASK) {
+			_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+			lo |= SOFT_BAR_OHCI_FLAG;
+			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+		} else if ((value & 0x01) == 0x00) {
+			_rdmsr(USB_MSR_REG(USB_OHCI), &hi, &lo);
+			lo = value;
+			_wrmsr(USB_MSR_REG(USB_OHCI), hi, lo);
+
+			value &= 0xfffffff0;
+			hi = 0x40000000 | ((value & 0xff000000) >> 24);
+			lo = 0x000fffff | ((value & 0x00fff000) << 8);
+			_wrmsr(GLIU_MSR_REG(GLIU_P2D_BM3), hi, lo);
+		}
+		break;
+	case PCI_OHCI_INT_REG:
+		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), &hi, &lo);
+		lo &= ~(0xf << PIC_YSEL_LOW_USB_SHIFT);
+		if (value)	/* enable all the usb interrupt in PIC */
+			lo |= (CS5536_USB_INTR << PIC_YSEL_LOW_USB_SHIFT);
+		_wrmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), hi, lo);
+		break;
+	default:
+		break;
+	}
+}
+
+u32 pci_ohci_read_reg(int reg)
+{
+	u32 conf_data = 0;
+	u32 hi, lo;
+
+	switch (reg) {
+	case PCI_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_OHCI_DEVICE_ID, CS5536_VENDOR_ID);
+		break;
+	case PCI_COMMAND:
+		_rdmsr(USB_MSR_REG(USB_OHCI), &hi, &lo);
+		if (hi & PCI_COMMAND_MASTER)
+			conf_data |= PCI_COMMAND_MASTER;
+		if (hi & PCI_COMMAND_MEMORY)
+			conf_data |= PCI_COMMAND_MEMORY;
+		break;
+	case PCI_STATUS:
+		conf_data |= PCI_STATUS_66MHZ;
+		conf_data |= PCI_STATUS_FAST_BACK;
+		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
+		if (lo & SB_PARE_ERR_FLAG)
+			conf_data |= PCI_STATUS_PARITY;
+		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
+		break;
+	case PCI_CLASS_REVISION:
+		_rdmsr(USB_MSR_REG(USB_CAP), &hi, &lo);
+		conf_data = lo & 0x000000ff;
+		conf_data |= (CS5536_OHCI_CLASS_CODE << 8);
+		break;
+	case PCI_CACHE_LINE_SIZE:
+		conf_data =
+		    CFG_PCI_CACHE_LINE_SIZE(PCI_NORMAL_HEADER_TYPE,
+					    PCI_NORMAL_LATENCY_TIMER);
+		break;
+	case PCI_BAR0_REG:
+		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
+		if (lo & SOFT_BAR_OHCI_FLAG) {
+			conf_data = CS5536_OHCI_RANGE |
+			    PCI_BASE_ADDRESS_SPACE_MEMORY;
+			lo &= ~SOFT_BAR_OHCI_FLAG;
+			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
+		} else {
+			_rdmsr(USB_MSR_REG(USB_OHCI), &hi, &lo);
+			conf_data = lo & 0xffffff00;
+			conf_data &= ~0x0000000f;	/* 32bit mem */
+		}
+		break;
+	case PCI_CARDBUS_CIS:
+		conf_data = PCI_CARDBUS_CIS_POINTER;
+		break;
+	case PCI_SUBSYSTEM_VENDOR_ID:
+		conf_data =
+		    CFG_PCI_VENDOR_ID(CS5536_OHCI_SUB_ID, CS5536_SUB_VENDOR_ID);
+		break;
+	case PCI_ROM_ADDRESS:
+		conf_data = PCI_EXPANSION_ROM_BAR;
+		break;
+	case PCI_CAPABILITY_LIST:
+		conf_data = PCI_CAPLIST_USB_POINTER;
+		break;
+	case PCI_INTERRUPT_LINE:
+		conf_data =
+		    CFG_PCI_INTERRUPT_LINE(PCI_DEFAULT_PIN, CS5536_USB_INTR);
+		break;
+	case PCI_OHCI_INT_REG:
+		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), &hi, &lo);
+		if (((lo >> PIC_YSEL_LOW_USB_SHIFT) & 0xf) == CS5536_USB_INTR)
+			conf_data = 1;
+		break;
+	default:
+		break;
+	}
+
+	return conf_data;
+}
diff --git a/arch/mips/loongson2ef/common/cs5536/cs5536_pci.c b/arch/mips/loongson2ef/common/cs5536/cs5536_pci.c
new file mode 100644
index 000000000000..202c89b568ba
--- /dev/null
+++ b/arch/mips/loongson2ef/common/cs5536/cs5536_pci.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * read/write operation to the PCI config space of CS5536
+ *
+ * Copyright (C) 2007 Lemote, Inc.
+ * Author : jlliu, liujl@lemote.com
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ *
+ *	the Virtual Support Module(VSM) for virtulizing the PCI
+ *	configure space are defined in cs5536_modulename.c respectively,
+ *
+ *	after this virtulizing, user can access the PCI configure space
+ *	directly as a normal multi-function PCI device which follows
+ *	the PCI-2.2 spec.
+ */
+
+#include <linux/types.h>
+#include <cs5536/cs5536_pci.h>
+#include <cs5536/cs5536_vsm.h>
+
+enum {
+	CS5536_FUNC_START = -1,
+	CS5536_ISA_FUNC,
+	reserved_func,
+	CS5536_IDE_FUNC,
+	CS5536_ACC_FUNC,
+	CS5536_OHCI_FUNC,
+	CS5536_EHCI_FUNC,
+	CS5536_FUNC_END,
+};
+
+static const cs5536_pci_vsm_write vsm_conf_write[] = {
+	[CS5536_ISA_FUNC]	= pci_isa_write_reg,
+	[reserved_func]		= NULL,
+	[CS5536_IDE_FUNC]	= pci_ide_write_reg,
+	[CS5536_ACC_FUNC]	= pci_acc_write_reg,
+	[CS5536_OHCI_FUNC]	= pci_ohci_write_reg,
+	[CS5536_EHCI_FUNC]	= pci_ehci_write_reg,
+};
+
+static const cs5536_pci_vsm_read vsm_conf_read[] = {
+	[CS5536_ISA_FUNC]	= pci_isa_read_reg,
+	[reserved_func]		= NULL,
+	[CS5536_IDE_FUNC]	= pci_ide_read_reg,
+	[CS5536_ACC_FUNC]	= pci_acc_read_reg,
+	[CS5536_OHCI_FUNC]	= pci_ohci_read_reg,
+	[CS5536_EHCI_FUNC]	= pci_ehci_read_reg,
+};
+
+/*
+ * write to PCI config space and transfer it to MSR write.
+ */
+void cs5536_pci_conf_write4(int function, int reg, u32 value)
+{
+	if ((function <= CS5536_FUNC_START) || (function >= CS5536_FUNC_END))
+		return;
+	if ((reg < 0) || (reg > 0x100) || ((reg & 0x03) != 0))
+		return;
+
+	if (vsm_conf_write[function] != NULL)
+		vsm_conf_write[function](reg, value);
+}
+
+/*
+ * read PCI config space and transfer it to MSR access.
+ */
+u32 cs5536_pci_conf_read4(int function, int reg)
+{
+	u32 data = 0;
+
+	if ((function <= CS5536_FUNC_START) || (function >= CS5536_FUNC_END))
+		return 0;
+	if ((reg < 0) || ((reg & 0x03) != 0))
+		return 0;
+	if (reg > 0x100)
+		return 0xffffffff;
+
+	if (vsm_conf_read[function] != NULL)
+		data = vsm_conf_read[function](reg);
+
+	return data;
+}
diff --git a/arch/mips/loongson2ef/common/early_printk.c b/arch/mips/loongson2ef/common/early_printk.c
new file mode 100644
index 000000000000..5e2a151aa30c
--- /dev/null
+++ b/arch/mips/loongson2ef/common/early_printk.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*  early printk support
+ *
+ *  Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
+ *  Copyright (c) 2009 Lemote Inc.
+ *  Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <linux/serial_reg.h>
+#include <asm/setup.h>
+
+#include <loongson.h>
+
+#define PORT(base, offset) (u8 *)(base + offset)
+
+static inline unsigned int serial_in(unsigned char *base, int offset)
+{
+	return readb(PORT(base, offset));
+}
+
+static inline void serial_out(unsigned char *base, int offset, int value)
+{
+	writeb(value, PORT(base, offset));
+}
+
+void prom_putchar(char c)
+{
+	int timeout;
+	unsigned char *uart_base;
+
+	uart_base = (unsigned char *)_loongson_uart_base[0];
+	timeout = 1024;
+
+	while (((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) &&
+			(timeout-- > 0))
+		;
+
+	serial_out(uart_base, UART_TX, c);
+}
diff --git a/arch/mips/loongson2ef/common/env.c b/arch/mips/loongson2ef/common/env.c
new file mode 100644
index 000000000000..09d5cf4676ca
--- /dev/null
+++ b/arch/mips/loongson2ef/common/env.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on Ocelot Linux port, which is
+ * Copyright 2001 MontaVista Software Inc.
+ * Author: jsun@mvista.com or jsun@junsun.net
+ *
+ * Copyright 2003 ICT CAS
+ * Author: Michael Guo <guoyi@ict.ac.cn>
+ *
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <linux/export.h>
+#include <asm/bootinfo.h>
+#include <loongson.h>
+#include <boot_param.h>
+#include <workarounds.h>
+
+u32 cpu_clock_freq;
+EXPORT_SYMBOL(cpu_clock_freq);
+struct efi_memory_map_loongson *loongson_memmap;
+struct loongson_system_configuration loongson_sysconf;
+
+u64 loongson_chipcfg[MAX_PACKAGES] = {0xffffffffbfc00180};
+u64 loongson_chiptemp[MAX_PACKAGES];
+u64 loongson_freqctrl[MAX_PACKAGES];
+
+unsigned long long smp_group[4];
+
+#define parse_even_earlier(res, option, p)				\
+do {									\
+	unsigned int tmp __maybe_unused;				\
+									\
+	if (strncmp(option, (char *)p, strlen(option)) == 0)		\
+		tmp = kstrtou32((char *)p + strlen(option"="), 10, &res); \
+} while (0)
+
+void __init prom_init_env(void)
+{
+	/* pmon passes arguments in 32bit pointers */
+	unsigned int processor_id;
+
+#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
+	int *_prom_envp;
+	long l;
+
+	/* firmware arguments are initialized in head.S */
+	_prom_envp = (int *)fw_arg2;
+
+	l = (long)*_prom_envp;
+	while (l != 0) {
+		parse_even_earlier(cpu_clock_freq, "cpuclock", l);
+		parse_even_earlier(memsize, "memsize", l);
+		parse_even_earlier(highmemsize, "highmemsize", l);
+		_prom_envp++;
+		l = (long)*_prom_envp;
+	}
+	if (memsize == 0)
+		memsize = 256;
+
+	loongson_sysconf.nr_uarts = 1;
+
+	pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize);
+#else
+	struct boot_params *boot_p;
+	struct loongson_params *loongson_p;
+	struct system_loongson *esys;
+	struct efi_cpuinfo_loongson *ecpu;
+	struct irq_source_routing_table *eirq_source;
+
+	/* firmware arguments are initialized in head.S */
+	boot_p = (struct boot_params *)fw_arg2;
+	loongson_p = &(boot_p->efi.smbios.lp);
+
+	esys = (struct system_loongson *)
+		((u64)loongson_p + loongson_p->system_offset);
+	ecpu = (struct efi_cpuinfo_loongson *)
+		((u64)loongson_p + loongson_p->cpu_offset);
+	eirq_source = (struct irq_source_routing_table *)
+		((u64)loongson_p + loongson_p->irq_offset);
+	loongson_memmap = (struct efi_memory_map_loongson *)
+		((u64)loongson_p + loongson_p->memory_offset);
+
+	cpu_clock_freq = ecpu->cpu_clock_freq;
+	loongson_sysconf.cputype = ecpu->cputype;
+	switch (ecpu->cputype) {
+	case Legacy_3A:
+	case Loongson_3A:
+		loongson_sysconf.cores_per_node = 4;
+		loongson_sysconf.cores_per_package = 4;
+		smp_group[0] = 0x900000003ff01000;
+		smp_group[1] = 0x900010003ff01000;
+		smp_group[2] = 0x900020003ff01000;
+		smp_group[3] = 0x900030003ff01000;
+		loongson_chipcfg[0] = 0x900000001fe00180;
+		loongson_chipcfg[1] = 0x900010001fe00180;
+		loongson_chipcfg[2] = 0x900020001fe00180;
+		loongson_chipcfg[3] = 0x900030001fe00180;
+		loongson_chiptemp[0] = 0x900000001fe0019c;
+		loongson_chiptemp[1] = 0x900010001fe0019c;
+		loongson_chiptemp[2] = 0x900020001fe0019c;
+		loongson_chiptemp[3] = 0x900030001fe0019c;
+		loongson_freqctrl[0] = 0x900000001fe001d0;
+		loongson_freqctrl[1] = 0x900010001fe001d0;
+		loongson_freqctrl[2] = 0x900020001fe001d0;
+		loongson_freqctrl[3] = 0x900030001fe001d0;
+		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
+		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
+		break;
+	case Legacy_3B:
+	case Loongson_3B:
+		loongson_sysconf.cores_per_node = 4; /* One chip has 2 nodes */
+		loongson_sysconf.cores_per_package = 8;
+		smp_group[0] = 0x900000003ff01000;
+		smp_group[1] = 0x900010003ff05000;
+		smp_group[2] = 0x900020003ff09000;
+		smp_group[3] = 0x900030003ff0d000;
+		loongson_chipcfg[0] = 0x900000001fe00180;
+		loongson_chipcfg[1] = 0x900020001fe00180;
+		loongson_chipcfg[2] = 0x900040001fe00180;
+		loongson_chipcfg[3] = 0x900060001fe00180;
+		loongson_chiptemp[0] = 0x900000001fe0019c;
+		loongson_chiptemp[1] = 0x900020001fe0019c;
+		loongson_chiptemp[2] = 0x900040001fe0019c;
+		loongson_chiptemp[3] = 0x900060001fe0019c;
+		loongson_freqctrl[0] = 0x900000001fe001d0;
+		loongson_freqctrl[1] = 0x900020001fe001d0;
+		loongson_freqctrl[2] = 0x900040001fe001d0;
+		loongson_freqctrl[3] = 0x900060001fe001d0;
+		loongson_sysconf.ht_control_base = 0x90001EFDFB000000;
+		loongson_sysconf.workarounds = WORKAROUND_CPUHOTPLUG;
+		break;
+	default:
+		loongson_sysconf.cores_per_node = 1;
+		loongson_sysconf.cores_per_package = 1;
+		loongson_chipcfg[0] = 0x900000001fe00180;
+	}
+
+	loongson_sysconf.nr_cpus = ecpu->nr_cpus;
+	loongson_sysconf.boot_cpu_id = ecpu->cpu_startup_core_id;
+	loongson_sysconf.reserved_cpus_mask = ecpu->reserved_cores_mask;
+	if (ecpu->nr_cpus > NR_CPUS || ecpu->nr_cpus == 0)
+		loongson_sysconf.nr_cpus = NR_CPUS;
+	loongson_sysconf.nr_nodes = (loongson_sysconf.nr_cpus +
+		loongson_sysconf.cores_per_node - 1) /
+		loongson_sysconf.cores_per_node;
+
+	loongson_sysconf.pci_mem_start_addr = eirq_source->pci_mem_start_addr;
+	loongson_sysconf.pci_mem_end_addr = eirq_source->pci_mem_end_addr;
+	loongson_sysconf.pci_io_base = eirq_source->pci_io_start_addr;
+	loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits;
+	if (loongson_sysconf.dma_mask_bits < 32 ||
+		loongson_sysconf.dma_mask_bits > 64)
+		loongson_sysconf.dma_mask_bits = 32;
+
+	loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm;
+	loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown;
+	loongson_sysconf.suspend_addr = boot_p->reset_system.DoSuspend;
+
+	loongson_sysconf.vgabios_addr = boot_p->efi.smbios.vga_bios;
+	pr_debug("Shutdown Addr: %llx, Restart Addr: %llx, VBIOS Addr: %llx\n",
+		loongson_sysconf.poweroff_addr, loongson_sysconf.restart_addr,
+		loongson_sysconf.vgabios_addr);
+
+	memset(loongson_sysconf.ecname, 0, 32);
+	if (esys->has_ec)
+		memcpy(loongson_sysconf.ecname, esys->ec_name, 32);
+	loongson_sysconf.workarounds |= esys->workarounds;
+
+	loongson_sysconf.nr_uarts = esys->nr_uarts;
+	if (esys->nr_uarts < 1 || esys->nr_uarts > MAX_UARTS)
+		loongson_sysconf.nr_uarts = 1;
+	memcpy(loongson_sysconf.uarts, esys->uarts,
+		sizeof(struct uart_device) * loongson_sysconf.nr_uarts);
+
+	loongson_sysconf.nr_sensors = esys->nr_sensors;
+	if (loongson_sysconf.nr_sensors > MAX_SENSORS)
+		loongson_sysconf.nr_sensors = 0;
+	if (loongson_sysconf.nr_sensors)
+		memcpy(loongson_sysconf.sensors, esys->sensors,
+			sizeof(struct sensor_device) * loongson_sysconf.nr_sensors);
+#endif
+	if (cpu_clock_freq == 0) {
+		processor_id = (&current_cpu_data)->processor_id;
+		switch (processor_id & PRID_REV_MASK) {
+		case PRID_REV_LOONGSON2E:
+			cpu_clock_freq = 533080000;
+			break;
+		case PRID_REV_LOONGSON2F:
+			cpu_clock_freq = 797000000;
+			break;
+		case PRID_REV_LOONGSON3A_R1:
+		case PRID_REV_LOONGSON3A_R2_0:
+		case PRID_REV_LOONGSON3A_R2_1:
+		case PRID_REV_LOONGSON3A_R3_0:
+		case PRID_REV_LOONGSON3A_R3_1:
+			cpu_clock_freq = 900000000;
+			break;
+		case PRID_REV_LOONGSON3B_R1:
+		case PRID_REV_LOONGSON3B_R2:
+			cpu_clock_freq = 1000000000;
+			break;
+		default:
+			cpu_clock_freq = 100000000;
+			break;
+		}
+	}
+	pr_info("CpuClock = %u\n", cpu_clock_freq);
+}
diff --git a/arch/mips/loongson2ef/common/init.c b/arch/mips/loongson2ef/common/init.c
new file mode 100644
index 000000000000..912fe61c4fc7
--- /dev/null
+++ b/arch/mips/loongson2ef/common/init.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/memblock.h>
+#include <asm/bootinfo.h>
+#include <asm/traps.h>
+#include <asm/smp-ops.h>
+#include <asm/cacheflush.h>
+
+#include <loongson.h>
+
+/* Loongson CPU address windows config space base address */
+unsigned long __maybe_unused _loongson_addrwincfg_base;
+
+static void __init mips_nmi_setup(void)
+{
+	void *base;
+	extern char except_vec_nmi;
+
+	base = (void *)(CAC_BASE + 0x380);
+	memcpy(base, &except_vec_nmi, 0x80);
+	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
+}
+
+void __init prom_init(void)
+{
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+	_loongson_addrwincfg_base = (unsigned long)
+		ioremap(LOONGSON_ADDRWINCFG_BASE, LOONGSON_ADDRWINCFG_SIZE);
+#endif
+
+	prom_init_cmdline();
+	prom_init_env();
+
+	/* init base address of io space */
+	set_io_port_base((unsigned long)
+		ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
+
+#ifdef CONFIG_NUMA
+	prom_init_numa_memory();
+#else
+	prom_init_memory();
+#endif
+
+	/*init the uart base address */
+	prom_init_uart_base();
+	register_smp_ops(&loongson3_smp_ops);
+	board_nmi_handler_setup = mips_nmi_setup;
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
diff --git a/arch/mips/loongson2ef/common/irq.c b/arch/mips/loongson2ef/common/irq.c
new file mode 100644
index 000000000000..0ea93c1c0a97
--- /dev/null
+++ b/arch/mips/loongson2ef/common/irq.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <loongson.h>
+/*
+ * the first level int-handler will jump here if it is a bonito irq
+ */
+void bonito_irqdispatch(void)
+{
+	u32 int_status;
+	int i;
+
+	/* workaround the IO dma problem: let cpu looping to allow DMA finish */
+	int_status = LOONGSON_INTISR;
+	while (int_status & (1 << 10)) {
+		udelay(1);
+		int_status = LOONGSON_INTISR;
+	}
+
+	/* Get pending sources, masked by current enables */
+	int_status = LOONGSON_INTISR & LOONGSON_INTEN;
+
+	if (int_status) {
+		i = __ffs(int_status);
+		do_IRQ(LOONGSON_IRQ_BASE + i);
+	}
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned int pending;
+
+	pending = read_c0_cause() & read_c0_status() & ST0_IM;
+
+	/* machine-specific plat_irq_dispatch */
+	mach_irq_dispatch(pending);
+}
+
+void __init arch_init_irq(void)
+{
+	/*
+	 * Clear all of the interrupts while we change the able around a bit.
+	 * int-handler is not on bootstrap
+	 */
+	clear_c0_status(ST0_IM | ST0_BEV);
+
+	/* no steer */
+	LOONGSON_INTSTEER = 0;
+
+	/*
+	 * Mask out all interrupt by writing "1" to all bit position in
+	 * the interrupt reset reg.
+	 */
+	LOONGSON_INTENCLR = ~0;
+
+	/* machine specific irq init */
+	mach_init_irq();
+}
diff --git a/arch/mips/loongson2ef/common/machtype.c b/arch/mips/loongson2ef/common/machtype.c
new file mode 100644
index 000000000000..4e42d929f1c7
--- /dev/null
+++ b/arch/mips/loongson2ef/common/machtype.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ *
+ * Copyright (c) 2009 Zhang Le <r0bertz@gentoo.org>
+ */
+#include <linux/errno.h>
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+#include <machine.h>
+
+/* please ensure the length of the machtype string is less than 50 */
+#define MACHTYPE_LEN 50
+
+static const char *system_types[] = {
+	[MACH_LOONGSON_UNKNOWN]	= "unknown loongson machine",
+	[MACH_LEMOTE_FL2E]	= "lemote-fuloong-2e-box",
+	[MACH_LEMOTE_FL2F]	= "lemote-fuloong-2f-box",
+	[MACH_LEMOTE_ML2F7]	= "lemote-mengloong-2f-7inches",
+	[MACH_LEMOTE_YL2F89]	= "lemote-yeeloong-2f-8.9inches",
+	[MACH_DEXXON_GDIUM2F10]	= "dexxon-gdium-2f",
+	[MACH_LEMOTE_NAS]	= "lemote-nas-2f",
+	[MACH_LEMOTE_LL2F]	= "lemote-lynloong-2f",
+	[MACH_LOONGSON_GENERIC]	= "generic-loongson-machine",
+	[MACH_LOONGSON_END]	= NULL,
+};
+
+const char *get_system_type(void)
+{
+	return system_types[mips_machtype];
+}
+
+void __weak __init mach_prom_init_machtype(void)
+{
+}
+
+void __init prom_init_machtype(void)
+{
+	char *p, str[MACHTYPE_LEN + 1];
+	int machtype = MACH_LEMOTE_FL2E;
+
+	mips_machtype = LOONGSON_MACHTYPE;
+
+	p = strstr(arcs_cmdline, "machtype=");
+	if (!p) {
+		mach_prom_init_machtype();
+		return;
+	}
+	p += strlen("machtype=");
+	strncpy(str, p, MACHTYPE_LEN);
+	str[MACHTYPE_LEN] = '\0';
+	p = strstr(str, " ");
+	if (p)
+		*p = '\0';
+
+	for (; system_types[machtype]; machtype++)
+		if (strstr(system_types[machtype], str)) {
+			mips_machtype = machtype;
+			break;
+		}
+}
diff --git a/arch/mips/loongson2ef/common/mem.c b/arch/mips/loongson2ef/common/mem.c
new file mode 100644
index 000000000000..4254ac4ec616
--- /dev/null
+++ b/arch/mips/loongson2ef/common/mem.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+#include <boot_param.h>
+#include <mem.h>
+#include <pci.h>
+
+#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
+
+u32 memsize, highmemsize;
+
+void __init prom_init_memory(void)
+{
+	add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
+
+	add_memory_region(memsize << 20, LOONGSON_PCI_MEM_START - (memsize <<
+				20), BOOT_MEM_RESERVED);
+
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+	{
+		int bit;
+
+		bit = fls(memsize + highmemsize);
+		if (bit != ffs(memsize + highmemsize))
+			bit += 20;
+		else
+			bit = bit + 20 - 1;
+
+		/* set cpu window3 to map CPU to DDR: 2G -> 2G */
+		LOONGSON_ADDRWIN_CPUTODDR(ADDRWIN_WIN3, 0x80000000ul,
+					  0x80000000ul, (1 << bit));
+		mmiowb();
+	}
+#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
+
+#ifdef CONFIG_64BIT
+	if (highmemsize > 0)
+		add_memory_region(LOONGSON_HIGHMEM_START,
+				  highmemsize << 20, BOOT_MEM_RAM);
+
+	add_memory_region(LOONGSON_PCI_MEM_END + 1, LOONGSON_HIGHMEM_START -
+			  LOONGSON_PCI_MEM_END - 1, BOOT_MEM_RESERVED);
+
+#endif /* !CONFIG_64BIT */
+}
+
+#else /* CONFIG_LEFI_FIRMWARE_INTERFACE */
+
+void __init prom_init_memory(void)
+{
+	int i;
+	u32 node_id;
+	u32 mem_type;
+
+	/* parse memory information */
+	for (i = 0; i < loongson_memmap->nr_map; i++) {
+		node_id = loongson_memmap->map[i].node_id;
+		mem_type = loongson_memmap->map[i].mem_type;
+
+		if (node_id != 0)
+			continue;
+
+		switch (mem_type) {
+		case SYSTEM_RAM_LOW:
+			memblock_add(loongson_memmap->map[i].mem_start,
+				(u64)loongson_memmap->map[i].mem_size << 20);
+			break;
+		case SYSTEM_RAM_HIGH:
+			memblock_add(loongson_memmap->map[i].mem_start,
+				(u64)loongson_memmap->map[i].mem_size << 20);
+			break;
+		case SYSTEM_RAM_RESERVED:
+			memblock_reserve(loongson_memmap->map[i].mem_start,
+				(u64)loongson_memmap->map[i].mem_size << 20);
+			break;
+		}
+	}
+}
+
+#endif /* CONFIG_LEFI_FIRMWARE_INTERFACE */
+
+/* override of arch/mips/mm/cache.c: __uncached_access */
+int __uncached_access(struct file *file, unsigned long addr)
+{
+	if (file->f_flags & O_DSYNC)
+		return 1;
+
+	return addr >= __pa(high_memory) ||
+		((addr >= LOONGSON_MMIO_MEM_START) &&
+		 (addr < LOONGSON_MMIO_MEM_END));
+}
+
+#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED
+
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <asm/current.h>
+
+static unsigned long uca_start, uca_end;
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	unsigned long offset = pfn << PAGE_SHIFT;
+	unsigned long end = offset + size;
+
+	if (__uncached_access(file, offset)) {
+		if (uca_start && (offset >= uca_start) &&
+		    (end <= uca_end))
+			return __pgprot((pgprot_val(vma_prot) &
+					 ~_CACHE_MASK) |
+					_CACHE_UNCACHED_ACCELERATED);
+		else
+			return pgprot_noncached(vma_prot);
+	}
+	return vma_prot;
+}
+
+static int __init find_vga_mem_init(void)
+{
+	struct pci_dev *dev = 0;
+	struct resource *r;
+	int idx;
+
+	if (uca_start)
+		return 0;
+
+	for_each_pci_dev(dev) {
+		if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+			for (idx = 0; idx < PCI_NUM_RESOURCES; idx++) {
+				r = &dev->resource[idx];
+				if (!r->start && r->end)
+					continue;
+				if (r->flags & IORESOURCE_IO)
+					continue;
+				if (r->flags & IORESOURCE_MEM) {
+					uca_start = r->start;
+					uca_end = r->end;
+					return 0;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+late_initcall(find_vga_mem_init);
+#endif /* !CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED */
diff --git a/arch/mips/loongson2ef/common/pci.c b/arch/mips/loongson2ef/common/pci.c
new file mode 100644
index 000000000000..2d9755c49524
--- /dev/null
+++ b/arch/mips/loongson2ef/common/pci.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/pci.h>
+
+#include <pci.h>
+#include <loongson.h>
+#include <boot_param.h>
+
+static struct resource loongson_pci_mem_resource = {
+	.name	= "pci memory space",
+	.start	= LOONGSON_PCI_MEM_START,
+	.end	= LOONGSON_PCI_MEM_END,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct resource loongson_pci_io_resource = {
+	.name	= "pci io space",
+	.start	= LOONGSON_PCI_IO_START,
+	.end	= IO_SPACE_LIMIT,
+	.flags	= IORESOURCE_IO,
+};
+
+static struct pci_controller  loongson_pci_controller = {
+	.pci_ops	= &loongson_pci_ops,
+	.io_resource	= &loongson_pci_io_resource,
+	.mem_resource	= &loongson_pci_mem_resource,
+	.mem_offset	= 0x00000000UL,
+	.io_offset	= 0x00000000UL,
+};
+
+static void __init setup_pcimap(void)
+{
+	/*
+	 * local to PCI mapping for CPU accessing PCI space
+	 * CPU address space [256M,448M] is window for accessing pci space
+	 * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M]
+	 *
+	 * pcimap: PCI_MAP2  PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0
+	 *	     [<2G]   [384M,448M] [320M,384M] [0M,64M]
+	 */
+	LOONGSON_PCIMAP = LOONGSON_PCIMAP_PCIMAP_2 |
+		LOONGSON_PCIMAP_WIN(2, LOONGSON_PCILO2_BASE) |
+		LOONGSON_PCIMAP_WIN(1, LOONGSON_PCILO1_BASE) |
+		LOONGSON_PCIMAP_WIN(0, 0);
+
+	/*
+	 * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M]
+	 */
+	LOONGSON_PCIBASE0 = 0x80000000ul;   /* base: 2G -> mmap: 0M */
+	/* size: 256M, burst transmission, pre-fetch enable, 64bit */
+	LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul;
+	LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful;
+	LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */
+	LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul;
+	LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */
+	LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul;
+
+	/* avoid deadlock of PCI reading/writing lock operation */
+	LOONGSON_PCI_ISR4C = 0xd2000001ul;
+
+	/* can not change gnt to break pci transfer when device's gnt not
+	deassert for some broken device */
+	LOONGSON_PXARB_CFG = 0x00fe0105ul;
+
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+	/*
+	 * set cpu addr window2 to map CPU address space to PCI address space
+	 */
+	LOONGSON_ADDRWIN_CPUTOPCI(ADDRWIN_WIN2, LOONGSON_CPU_MEM_SRC,
+		LOONGSON_PCI_MEM_DST, MMAP_CPUTOPCI_SIZE);
+#endif
+}
+
+extern int sbx00_acpi_init(void);
+
+static int __init pcibios_init(void)
+{
+	setup_pcimap();
+
+	loongson_pci_controller.io_map_base = mips_io_port_base;
+#ifdef CONFIG_LEFI_FIRMWARE_INTERFACE
+	loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
+	loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
+#endif
+	register_pci_controller(&loongson_pci_controller);
+
+#ifdef CONFIG_CPU_LOONGSON64
+	sbx00_acpi_init();
+#endif
+
+	return 0;
+}
+
+arch_initcall(pcibios_init);
diff --git a/arch/mips/loongson2ef/common/platform.c b/arch/mips/loongson2ef/common/platform.c
new file mode 100644
index 000000000000..0084820cffaa
--- /dev/null
+++ b/arch/mips/loongson2ef/common/platform.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/err.h>
+#include <linux/smp.h>
+#include <linux/platform_device.h>
+
+static struct platform_device loongson2_cpufreq_device = {
+	.name = "loongson2_cpufreq",
+	.id = -1,
+};
+
+static int __init loongson2_cpufreq_init(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+
+	/* Only 2F revision and it's successors support CPUFreq */
+	if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON2F)
+		return platform_device_register(&loongson2_cpufreq_device);
+
+	return -ENODEV;
+}
+
+arch_initcall(loongson2_cpufreq_init);
diff --git a/arch/mips/loongson2ef/common/pm.c b/arch/mips/loongson2ef/common/pm.c
new file mode 100644
index 000000000000..b8aed878d912
--- /dev/null
+++ b/arch/mips/loongson2ef/common/pm.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * loongson-specific suspend support
+ *
+ *  Copyright (C) 2009 Lemote Inc.
+ *  Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+#include <linux/suspend.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+
+#include <asm/i8259.h>
+#include <asm/mipsregs.h>
+
+#include <loongson.h>
+
+static unsigned int __maybe_unused cached_master_mask;	/* i8259A */
+static unsigned int __maybe_unused cached_slave_mask;
+static unsigned int __maybe_unused cached_bonito_irq_mask; /* bonito */
+
+void arch_suspend_disable_irqs(void)
+{
+	/* disable all mips events */
+	local_irq_disable();
+
+#ifdef CONFIG_I8259
+	/* disable all events of i8259A */
+	cached_slave_mask = inb(PIC_SLAVE_IMR);
+	cached_master_mask = inb(PIC_MASTER_IMR);
+
+	outb(0xff, PIC_SLAVE_IMR);
+	inb(PIC_SLAVE_IMR);
+	outb(0xff, PIC_MASTER_IMR);
+	inb(PIC_MASTER_IMR);
+#endif
+	/* disable all events of bonito */
+	cached_bonito_irq_mask = LOONGSON_INTEN;
+	LOONGSON_INTENCLR = 0xffff;
+	(void)LOONGSON_INTENCLR;
+}
+
+void arch_suspend_enable_irqs(void)
+{
+	/* enable all mips events */
+	local_irq_enable();
+#ifdef CONFIG_I8259
+	/* only enable the cached events of i8259A */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);
+	outb(cached_master_mask, PIC_MASTER_IMR);
+#endif
+	/* enable all cached events of bonito */
+	LOONGSON_INTENSET = cached_bonito_irq_mask;
+	(void)LOONGSON_INTENSET;
+}
+
+/*
+ * Setup the board-specific events for waking up loongson from wait mode
+ */
+void __weak setup_wakeup_events(void)
+{
+}
+
+/*
+ * Check wakeup events
+ */
+int __weak wakeup_loongson(void)
+{
+	return 1;
+}
+
+/*
+ * If the events are really what we want to wakeup the CPU, wake it up
+ * otherwise put the CPU asleep again.
+ */
+static void wait_for_wakeup_events(void)
+{
+	while (!wakeup_loongson())
+		LOONGSON_CHIPCFG(0) &= ~0x7;
+}
+
+/*
+ * Stop all perf counters
+ *
+ * $24 is the control register of Loongson perf counter
+ */
+static inline void stop_perf_counters(void)
+{
+	__write_64bit_c0_register($24, 0, 0);
+}
+
+
+static void loongson_suspend_enter(void)
+{
+	static unsigned int cached_cpu_freq;
+
+	/* setup wakeup events via enabling the IRQs */
+	setup_wakeup_events();
+
+	stop_perf_counters();
+
+	cached_cpu_freq = LOONGSON_CHIPCFG(0);
+
+	/* Put CPU into wait mode */
+	LOONGSON_CHIPCFG(0) &= ~0x7;
+
+	/* wait for the given events to wakeup cpu from wait mode */
+	wait_for_wakeup_events();
+
+	LOONGSON_CHIPCFG(0) = cached_cpu_freq;
+	mmiowb();
+}
+
+void __weak mach_suspend(void)
+{
+}
+
+void __weak mach_resume(void)
+{
+}
+
+static int loongson_pm_enter(suspend_state_t state)
+{
+	mach_suspend();
+
+	/* processor specific suspend */
+	loongson_suspend_enter();
+
+	mach_resume();
+
+	return 0;
+}
+
+static int loongson_pm_valid_state(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_ON:
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		return 1;
+
+	default:
+		return 0;
+	}
+}
+
+static const struct platform_suspend_ops loongson_pm_ops = {
+	.valid	= loongson_pm_valid_state,
+	.enter	= loongson_pm_enter,
+};
+
+static int __init loongson_pm_init(void)
+{
+	suspend_set_ops(&loongson_pm_ops);
+
+	return 0;
+}
+arch_initcall(loongson_pm_init);
diff --git a/arch/mips/loongson2ef/common/reset.c b/arch/mips/loongson2ef/common/reset.c
new file mode 100644
index 000000000000..ce39e918e4d5
--- /dev/null
+++ b/arch/mips/loongson2ef/common/reset.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Zhangjin Wu, wuzhangjin@gmail.com
+ */
+#include <linux/init.h>
+#include <linux/pm.h>
+
+#include <asm/idle.h>
+#include <asm/reboot.h>
+
+#include <loongson.h>
+#include <boot_param.h>
+
+static inline void loongson_reboot(void)
+{
+#ifndef CONFIG_CPU_JUMP_WORKAROUNDS
+	((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) ();
+#else
+	void (*func)(void);
+
+	func = (void *)ioremap_nocache(LOONGSON_BOOT_BASE, 4);
+
+	__asm__ __volatile__(
+	"	.set	noat						\n"
+	"	jr	%[func]						\n"
+	"	.set	at						\n"
+	: /* No outputs */
+	: [func] "r" (func));
+#endif
+}
+
+static void loongson_restart(char *command)
+{
+#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
+	/* do preparation for reboot */
+	mach_prepare_reboot();
+
+	/* reboot via jumping to boot base address */
+	loongson_reboot();
+#else
+	void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr;
+
+	fw_restart();
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+#endif
+}
+
+static void loongson_poweroff(void)
+{
+#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
+	mach_prepare_shutdown();
+
+	/*
+	 * It needs a wait loop here, but mips/kernel/reset.c already calls
+	 * a generic delay loop, machine_hang(), so simply return.
+	 */
+	return;
+#else
+	void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
+
+	fw_poweroff();
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+#endif
+}
+
+static void loongson_halt(void)
+{
+	pr_notice("\n\n** You can safely turn off the power now **\n\n");
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+}
+
+static int __init mips_reboot_setup(void)
+{
+	_machine_restart = loongson_restart;
+	_machine_halt = loongson_halt;
+	pm_power_off = loongson_poweroff;
+
+	return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/loongson2ef/common/rtc.c b/arch/mips/loongson2ef/common/rtc.c
new file mode 100644
index 000000000000..8d7628c0f513
--- /dev/null
+++ b/arch/mips/loongson2ef/common/rtc.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Lemote Fuloong platform support
+ *
+ *  Copyright(c) 2010 Arnaud Patard <apatard@mandriva.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/mc146818rtc.h>
+
+static struct resource loongson_rtc_resources[] = {
+	{
+		.start	= RTC_PORT(0),
+		.end	= RTC_PORT(1),
+		.flags	= IORESOURCE_IO,
+	}, {
+		.start	= RTC_IRQ,
+		.end	= RTC_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	}
+};
+
+static struct platform_device loongson_rtc_device = {
+	.name		= "rtc_cmos",
+	.id		= -1,
+	.resource	= loongson_rtc_resources,
+	.num_resources	= ARRAY_SIZE(loongson_rtc_resources),
+};
+
+
+static int __init loongson_rtc_platform_init(void)
+{
+	platform_device_register(&loongson_rtc_device);
+	return 0;
+}
+
+device_initcall(loongson_rtc_platform_init);
diff --git a/arch/mips/loongson2ef/common/serial.c b/arch/mips/loongson2ef/common/serial.c
new file mode 100644
index 000000000000..98c3a7feb10f
--- /dev/null
+++ b/arch/mips/loongson2ef/common/serial.c
@@ -0,0 +1,117 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
+ *
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Yan hua (yanhua@lemote.com)
+ * Author: Wu Zhangjin (wuzhangjin@gmail.com)
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/serial_8250.h>
+
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+#include <machine.h>
+
+#define PORT(int, clk)			\
+{								\
+	.irq		= int,					\
+	.uartclk	= clk,					\
+	.iotype		= UPIO_PORT,				\
+	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,	\
+	.regshift	= 0,					\
+}
+
+#define PORT_M(int, clk)				\
+{								\
+	.irq		= MIPS_CPU_IRQ_BASE + (int),		\
+	.uartclk	= clk,					\
+	.iotype		= UPIO_MEM,				\
+	.membase	= (void __iomem *)NULL,			\
+	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,	\
+	.regshift	= 0,					\
+}
+
+static struct plat_serial8250_port uart8250_data[][MAX_UARTS + 1] = {
+	[MACH_LOONGSON_UNKNOWN]	= {},
+	[MACH_LEMOTE_FL2E]	= {PORT(4, 1843200), {} },
+	[MACH_LEMOTE_FL2F]	= {PORT(3, 1843200), {} },
+	[MACH_LEMOTE_ML2F7]	= {PORT_M(3, 3686400), {} },
+	[MACH_LEMOTE_YL2F89]	= {PORT_M(3, 3686400), {} },
+	[MACH_DEXXON_GDIUM2F10]	= {PORT_M(3, 3686400), {} },
+	[MACH_LEMOTE_NAS]	= {PORT_M(3, 3686400), {} },
+	[MACH_LEMOTE_LL2F]	= {PORT(3, 1843200), {} },
+	[MACH_LOONGSON_GENERIC]	= {PORT_M(2, 25000000), {} },
+	[MACH_LOONGSON_END]	= {},
+};
+
+static struct platform_device uart8250_device = {
+	.name = "serial8250",
+	.id = PLAT8250_DEV_PLATFORM,
+};
+
+static int __init serial_init(void)
+{
+	int i;
+	unsigned char iotype;
+
+	iotype = uart8250_data[mips_machtype][0].iotype;
+
+	if (UPIO_MEM == iotype) {
+		uart8250_data[mips_machtype][0].mapbase =
+			loongson_uart_base[0];
+		uart8250_data[mips_machtype][0].membase =
+			(void __iomem *)_loongson_uart_base[0];
+	}
+	else if (UPIO_PORT == iotype)
+		uart8250_data[mips_machtype][0].iobase =
+			loongson_uart_base[0] - LOONGSON_PCIIO_BASE;
+
+	if (loongson_sysconf.uarts[0].uartclk)
+		uart8250_data[mips_machtype][0].uartclk =
+			loongson_sysconf.uarts[0].uartclk;
+
+	for (i = 1; i < loongson_sysconf.nr_uarts; i++) {
+		iotype = loongson_sysconf.uarts[i].iotype;
+		uart8250_data[mips_machtype][i].iotype = iotype;
+		loongson_uart_base[i] = loongson_sysconf.uarts[i].uart_base;
+
+		if (UPIO_MEM == iotype) {
+			uart8250_data[mips_machtype][i].irq =
+				MIPS_CPU_IRQ_BASE + loongson_sysconf.uarts[i].int_offset;
+			uart8250_data[mips_machtype][i].mapbase =
+				loongson_uart_base[i];
+			uart8250_data[mips_machtype][i].membase =
+				ioremap_nocache(loongson_uart_base[i], 8);
+		} else if (UPIO_PORT == iotype) {
+			uart8250_data[mips_machtype][i].irq =
+				loongson_sysconf.uarts[i].int_offset;
+			uart8250_data[mips_machtype][i].iobase =
+				loongson_uart_base[i] - LOONGSON_PCIIO_BASE;
+		}
+
+		uart8250_data[mips_machtype][i].uartclk =
+			loongson_sysconf.uarts[i].uartclk;
+		uart8250_data[mips_machtype][i].flags =
+			UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
+	}
+
+	memset(&uart8250_data[mips_machtype][loongson_sysconf.nr_uarts],
+			0, sizeof(struct plat_serial8250_port));
+	uart8250_device.dev.platform_data = uart8250_data[mips_machtype];
+
+	return platform_device_register(&uart8250_device);
+}
+module_init(serial_init);
+
+static void __exit serial_exit(void)
+{
+	platform_device_unregister(&uart8250_device);
+}
+module_exit(serial_exit);
diff --git a/arch/mips/loongson2ef/common/setup.c b/arch/mips/loongson2ef/common/setup.c
new file mode 100644
index 000000000000..bc2da4c140c4
--- /dev/null
+++ b/arch/mips/loongson2ef/common/setup.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/export.h>
+#include <linux/init.h>
+
+#include <asm/wbflush.h>
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+#ifdef CONFIG_VT
+#include <linux/console.h>
+#include <linux/screen_info.h>
+#endif
+
+static void wbflush_loongson(void)
+{
+	asm(".set\tpush\n\t"
+	    ".set\tnoreorder\n\t"
+	    ".set mips3\n\t"
+	    "sync\n\t"
+	    "nop\n\t"
+	    ".set\tpop\n\t"
+	    ".set mips0\n\t");
+}
+
+void (*__wbflush)(void) = wbflush_loongson;
+EXPORT_SYMBOL(__wbflush);
+
+void __init plat_mem_setup(void)
+{
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+	conswitchp = &vga_con;
+
+	screen_info = (struct screen_info) {
+		.orig_x			= 0,
+		.orig_y			= 25,
+		.orig_video_cols	= 80,
+		.orig_video_lines	= 25,
+		.orig_video_isVGA	= VIDEO_TYPE_VGAC,
+		.orig_video_points	= 16,
+	};
+#elif defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+#endif
+}
diff --git a/arch/mips/loongson2ef/common/time.c b/arch/mips/loongson2ef/common/time.c
new file mode 100644
index 000000000000..e78760ce475b
--- /dev/null
+++ b/arch/mips/loongson2ef/common/time.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <asm/mc146818-time.h>
+#include <asm/time.h>
+#include <asm/hpet.h>
+
+#include <loongson.h>
+#include <cs5536/cs5536_mfgpt.h>
+
+void __init plat_time_init(void)
+{
+	/* setup mips r4k timer */
+	mips_hpt_frequency = cpu_clock_freq / 2;
+
+#ifdef CONFIG_RS780_HPET
+	setup_hpet_timer();
+#else
+	setup_mfgpt0_timer();
+#endif
+}
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+	ts->tv_sec = mc146818_get_cmos_time();
+	ts->tv_nsec = 0;
+}
diff --git a/arch/mips/loongson2ef/common/uart_base.c b/arch/mips/loongson2ef/common/uart_base.c
new file mode 100644
index 000000000000..e88d937f10fe
--- /dev/null
+++ b/arch/mips/loongson2ef/common/uart_base.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/export.h>
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+/* raw */
+unsigned long loongson_uart_base[MAX_UARTS] = {};
+/* ioremapped */
+unsigned long _loongson_uart_base[MAX_UARTS] = {};
+
+EXPORT_SYMBOL(loongson_uart_base);
+EXPORT_SYMBOL(_loongson_uart_base);
+
+void prom_init_loongson_uart_base(void)
+{
+	switch (mips_machtype) {
+	case MACH_LOONGSON_GENERIC:
+		/* The CPU provided serial port (CPU) */
+		loongson_uart_base[0] = LOONGSON_REG_BASE + 0x1e0;
+		break;
+	case MACH_LEMOTE_FL2E:
+		loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x3f8;
+		break;
+	case MACH_LEMOTE_FL2F:
+	case MACH_LEMOTE_LL2F:
+		loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x2f8;
+		break;
+	case MACH_LEMOTE_ML2F7:
+	case MACH_LEMOTE_YL2F89:
+	case MACH_DEXXON_GDIUM2F10:
+	case MACH_LEMOTE_NAS:
+	default:
+		/* The CPU provided serial port (LPC) */
+		loongson_uart_base[0] = LOONGSON_LIO1_BASE + 0x3f8;
+		break;
+	}
+
+	_loongson_uart_base[0] =
+		(unsigned long)ioremap_nocache(loongson_uart_base[0], 8);
+}
diff --git a/arch/mips/loongson2ef/fuloong-2e/Makefile b/arch/mips/loongson2ef/fuloong-2e/Makefile
new file mode 100644
index 000000000000..bb58edb3bea7
--- /dev/null
+++ b/arch/mips/loongson2ef/fuloong-2e/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for Lemote Fuloong2e mini-PC board.
+#
+
+obj-y += irq.o reset.o dma.o
diff --git a/arch/mips/loongson2ef/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c
new file mode 100644
index 000000000000..e122292bf666
--- /dev/null
+++ b/arch/mips/loongson2ef/fuloong-2e/dma.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/dma-direct.h>
+
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr | 0x80000000;
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr & 0x7fffffff;
+}
diff --git a/arch/mips/loongson2ef/fuloong-2e/irq.c b/arch/mips/loongson2ef/fuloong-2e/irq.c
new file mode 100644
index 000000000000..32278e7bf85c
--- /dev/null
+++ b/arch/mips/loongson2ef/fuloong-2e/irq.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/interrupt.h>
+
+#include <asm/irq_cpu.h>
+#include <asm/i8259.h>
+
+#include <loongson.h>
+
+static void i8259_irqdispatch(void)
+{
+	int irq;
+
+	irq = i8259_irq();
+	if (irq >= 0)
+		do_IRQ(irq);
+	else
+		spurious_interrupt();
+}
+
+asmlinkage void mach_irq_dispatch(unsigned int pending)
+{
+	if (pending & CAUSEF_IP7)
+		do_IRQ(MIPS_CPU_IRQ_BASE + 7);
+	else if (pending & CAUSEF_IP6) /* perf counter loverflow */
+		do_perfcnt_IRQ();
+	else if (pending & CAUSEF_IP5)
+		i8259_irqdispatch();
+	else if (pending & CAUSEF_IP2)
+		bonito_irqdispatch();
+	else
+		spurious_interrupt();
+}
+
+static struct irqaction cascade_irqaction = {
+	.handler = no_action,
+	.name = "cascade",
+	.flags = IRQF_NO_THREAD,
+};
+
+void __init mach_init_irq(void)
+{
+	/* init all controller
+	 *   0-15	  ------> i8259 interrupt
+	 *   16-23	  ------> mips cpu interrupt
+	 *   32-63	  ------> bonito irq
+	 */
+
+	/* most bonito irq should be level triggered */
+	LOONGSON_INTEDGE = LOONGSON_ICU_SYSTEMERR | LOONGSON_ICU_MASTERERR |
+	    LOONGSON_ICU_RETRYERR | LOONGSON_ICU_MBOXES;
+
+	/* Sets the first-level interrupt dispatcher. */
+	mips_cpu_irq_init();
+	init_i8259_irqs();
+	bonito_irq_init();
+
+	/* bonito irq at IP2 */
+	setup_irq(MIPS_CPU_IRQ_BASE + 2, &cascade_irqaction);
+	/* 8259 irq at IP5 */
+	setup_irq(MIPS_CPU_IRQ_BASE + 5, &cascade_irqaction);
+}
diff --git a/arch/mips/loongson2ef/fuloong-2e/reset.c b/arch/mips/loongson2ef/fuloong-2e/reset.c
new file mode 100644
index 000000000000..8273de1cf4bb
--- /dev/null
+++ b/arch/mips/loongson2ef/fuloong-2e/reset.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Board-specific reboot/shutdown routines
+ * Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <loongson.h>
+
+void mach_prepare_reboot(void)
+{
+	LOONGSON_GENCFG &= ~(1 << 2);
+	LOONGSON_GENCFG |= (1 << 2);
+}
+
+void mach_prepare_shutdown(void)
+{
+}
diff --git a/arch/mips/loongson2ef/lemote-2f/Makefile b/arch/mips/loongson2ef/lemote-2f/Makefile
new file mode 100644
index 000000000000..881a0ec06d1f
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for lemote loongson2f family machines
+#
+
+obj-y += clock.o machtype.o irq.o reset.o dma.o ec_kb3310b.o
+
+#
+# Suspend Support
+#
+
+obj-$(CONFIG_SUSPEND) += pm.o
diff --git a/arch/mips/loongson2ef/lemote-2f/clock.c b/arch/mips/loongson2ef/lemote-2f/clock.c
new file mode 100644
index 000000000000..1ced30e7aeef
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/clock.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology
+ * Author: Yanhua, yanh@lemote.com
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+
+#include <asm/clock.h>
+#include <asm/mach-loongson2ef/loongson.h>
+
+static LIST_HEAD(clock_list);
+static DEFINE_SPINLOCK(clock_lock);
+static DEFINE_MUTEX(clock_list_sem);
+
+/* Minimum CLK support */
+enum {
+	DC_ZERO, DC_25PT = 2, DC_37PT, DC_50PT, DC_62PT, DC_75PT,
+	DC_87PT, DC_DISABLE, DC_RESV
+};
+
+struct cpufreq_frequency_table loongson2_clockmod_table[] = {
+	{0, DC_RESV, CPUFREQ_ENTRY_INVALID},
+	{0, DC_ZERO, CPUFREQ_ENTRY_INVALID},
+	{0, DC_25PT, 0},
+	{0, DC_37PT, 0},
+	{0, DC_50PT, 0},
+	{0, DC_62PT, 0},
+	{0, DC_75PT, 0},
+	{0, DC_87PT, 0},
+	{0, DC_DISABLE, 0},
+	{0, DC_RESV, CPUFREQ_TABLE_END},
+};
+EXPORT_SYMBOL_GPL(loongson2_clockmod_table);
+
+static struct clk cpu_clk = {
+	.name = "cpu_clk",
+	.flags = CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
+	.rate = 800000000,
+};
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	return &cpu_clk;
+}
+EXPORT_SYMBOL(clk_get);
+
+static void propagate_rate(struct clk *clk)
+{
+	struct clk *clkp;
+
+	list_for_each_entry(clkp, &clock_list, node) {
+		if (likely(clkp->parent != clk))
+			continue;
+		if (likely(clkp->ops && clkp->ops->recalc))
+			clkp->ops->recalc(clkp);
+		if (unlikely(clkp->flags & CLK_RATE_PROPAGATES))
+			propagate_rate(clkp);
+	}
+}
+
+int clk_enable(struct clk *clk)
+{
+	return 0;
+}
+EXPORT_SYMBOL(clk_enable);
+
+void clk_disable(struct clk *clk)
+{
+}
+EXPORT_SYMBOL(clk_disable);
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	if (!clk)
+		return 0;
+
+	return (unsigned long)clk->rate;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+void clk_put(struct clk *clk)
+{
+}
+EXPORT_SYMBOL(clk_put);
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+	unsigned int rate_khz = rate / 1000;
+	struct cpufreq_frequency_table *pos;
+	int ret = 0;
+	int regval;
+
+	if (likely(clk->ops && clk->ops->set_rate)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&clock_lock, flags);
+		ret = clk->ops->set_rate(clk, rate, 0);
+		spin_unlock_irqrestore(&clock_lock, flags);
+	}
+
+	if (unlikely(clk->flags & CLK_RATE_PROPAGATES))
+		propagate_rate(clk);
+
+	cpufreq_for_each_valid_entry(pos, loongson2_clockmod_table)
+		if (rate_khz == pos->frequency)
+			break;
+	if (rate_khz != pos->frequency)
+		return -ENOTSUPP;
+
+	clk->rate = rate;
+
+	regval = LOONGSON_CHIPCFG(0);
+	regval = (regval & ~0x7) | (pos->driver_data - 1);
+	LOONGSON_CHIPCFG(0) = regval;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(clk_set_rate);
+
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	if (likely(clk->ops && clk->ops->round_rate)) {
+		unsigned long flags, rounded;
+
+		spin_lock_irqsave(&clock_lock, flags);
+		rounded = clk->ops->round_rate(clk, rate);
+		spin_unlock_irqrestore(&clock_lock, flags);
+
+		return rounded;
+	}
+
+	return rate;
+}
+EXPORT_SYMBOL_GPL(clk_round_rate);
diff --git a/arch/mips/loongson2ef/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c
new file mode 100644
index 000000000000..abf0e39d7e46
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/dma.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/dma-direct.h>
+
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr | 0x80000000;
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+	if (dma_addr > 0x8fffffff)
+		return dma_addr;
+	return dma_addr & 0x0fffffff;
+}
diff --git a/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c
new file mode 100644
index 000000000000..d138220e96a2
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Basic KB3310B Embedded Controller support for the YeeLoong 2F netbook
+ *
+ *  Copyright (C) 2008 Lemote Inc.
+ *  Author: liujl <liujl@lemote.com>, 2008-04-20
+ */
+
+#include <linux/io.h>
+#include <linux/export.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include "ec_kb3310b.h"
+
+static DEFINE_SPINLOCK(index_access_lock);
+static DEFINE_SPINLOCK(port_access_lock);
+
+unsigned char ec_read(unsigned short addr)
+{
+	unsigned char value;
+	unsigned long flags;
+
+	spin_lock_irqsave(&index_access_lock, flags);
+	outb((addr & 0xff00) >> 8, EC_IO_PORT_HIGH);
+	outb((addr & 0x00ff), EC_IO_PORT_LOW);
+	value = inb(EC_IO_PORT_DATA);
+	spin_unlock_irqrestore(&index_access_lock, flags);
+
+	return value;
+}
+EXPORT_SYMBOL_GPL(ec_read);
+
+void ec_write(unsigned short addr, unsigned char val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&index_access_lock, flags);
+	outb((addr & 0xff00) >> 8, EC_IO_PORT_HIGH);
+	outb((addr & 0x00ff), EC_IO_PORT_LOW);
+	outb(val, EC_IO_PORT_DATA);
+	/*  flush the write action */
+	inb(EC_IO_PORT_DATA);
+	spin_unlock_irqrestore(&index_access_lock, flags);
+}
+EXPORT_SYMBOL_GPL(ec_write);
+
+/*
+ * This function is used for EC command writes and corresponding status queries.
+ */
+int ec_query_seq(unsigned char cmd)
+{
+	int timeout;
+	unsigned char status;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&port_access_lock, flags);
+
+	/* make chip goto reset mode */
+	udelay(EC_REG_DELAY);
+	outb(cmd, EC_CMD_PORT);
+	udelay(EC_REG_DELAY);
+
+	/* check if the command is received by ec */
+	timeout = EC_CMD_TIMEOUT;
+	status = inb(EC_STS_PORT);
+	while (timeout-- && (status & (1 << 1))) {
+		status = inb(EC_STS_PORT);
+		udelay(EC_REG_DELAY);
+	}
+
+	spin_unlock_irqrestore(&port_access_lock, flags);
+
+	if (timeout <= 0) {
+		printk(KERN_ERR "%s: deadable error : timeout...\n", __func__);
+		ret = -EINVAL;
+	} else
+		printk(KERN_INFO
+			   "(%x/%d)ec issued command %d status : 0x%x\n",
+			   timeout, EC_CMD_TIMEOUT - timeout, cmd, status);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ec_query_seq);
+
+/*
+ * Send query command to EC to get the proper event number
+ */
+int ec_query_event_num(void)
+{
+	return ec_query_seq(CMD_GET_EVENT_NUM);
+}
+EXPORT_SYMBOL(ec_query_event_num);
+
+/*
+ * Get event number from EC
+ *
+ * NOTE: This routine must follow the query_event_num function in the
+ * interrupt.
+ */
+int ec_get_event_num(void)
+{
+	int timeout = 100;
+	unsigned char value;
+	unsigned char status;
+
+	udelay(EC_REG_DELAY);
+	status = inb(EC_STS_PORT);
+	udelay(EC_REG_DELAY);
+	while (timeout-- && !(status & (1 << 0))) {
+		status = inb(EC_STS_PORT);
+		udelay(EC_REG_DELAY);
+	}
+	if (timeout <= 0) {
+		pr_info("%s: get event number timeout.\n", __func__);
+
+		return -EINVAL;
+	}
+	value = inb(EC_DAT_PORT);
+	udelay(EC_REG_DELAY);
+
+	return value;
+}
+EXPORT_SYMBOL(ec_get_event_num);
diff --git a/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h
new file mode 100644
index 000000000000..aecdbc9c875a
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/ec_kb3310b.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * KB3310B Embedded Controller
+ *
+ *  Copyright (C) 2008 Lemote Inc.
+ *  Author: liujl <liujl@lemote.com>, 2008-03-14
+ */
+
+#ifndef _EC_KB3310B_H
+#define _EC_KB3310B_H
+
+extern unsigned char ec_read(unsigned short addr);
+extern void ec_write(unsigned short addr, unsigned char val);
+extern int ec_query_seq(unsigned char cmd);
+extern int ec_query_event_num(void);
+extern int ec_get_event_num(void);
+
+typedef int (*sci_handler) (int status);
+extern sci_handler yeeloong_report_lid_status;
+
+#define SCI_IRQ_NUM 0x0A
+
+/*
+ * The following registers are determined by the EC index configuration.
+ * 1, fill the PORT_HIGH as EC register high part.
+ * 2, fill the PORT_LOW as EC register low part.
+ * 3, fill the PORT_DATA as EC register write data or get the data from it.
+ */
+#define EC_IO_PORT_HIGH 0x0381
+#define EC_IO_PORT_LOW	0x0382
+#define EC_IO_PORT_DATA 0x0383
+
+/*
+ * EC delay time is 500us for register and status access
+ */
+#define EC_REG_DELAY	500	/* unit : us */
+#define EC_CMD_TIMEOUT	0x1000
+
+/*
+ * EC access port for SCI communication
+ */
+#define EC_CMD_PORT		0x66
+#define EC_STS_PORT		0x66
+#define EC_DAT_PORT		0x62
+#define CMD_INIT_IDLE_MODE	0xdd
+#define CMD_EXIT_IDLE_MODE	0xdf
+#define CMD_INIT_RESET_MODE	0xd8
+#define CMD_REBOOT_SYSTEM	0x8c
+#define CMD_GET_EVENT_NUM	0x84
+#define CMD_PROGRAM_PIECE	0xda
+
+/* temperature & fan registers */
+#define REG_TEMPERATURE_VALUE	0xF458
+#define REG_FAN_AUTO_MAN_SWITCH 0xF459
+#define BIT_FAN_AUTO		0
+#define BIT_FAN_MANUAL		1
+#define REG_FAN_CONTROL		0xF4D2
+#define BIT_FAN_CONTROL_ON	(1 << 0)
+#define BIT_FAN_CONTROL_OFF	(0 << 0)
+#define REG_FAN_STATUS		0xF4DA
+#define BIT_FAN_STATUS_ON	(1 << 0)
+#define BIT_FAN_STATUS_OFF	(0 << 0)
+#define REG_FAN_SPEED_HIGH	0xFE22
+#define REG_FAN_SPEED_LOW	0xFE23
+#define REG_FAN_SPEED_LEVEL	0xF4CC
+/* fan speed divider */
+#define FAN_SPEED_DIVIDER	480000	/* (60*1000*1000/62.5/2)*/
+
+/* battery registers */
+#define REG_BAT_DESIGN_CAP_HIGH		0xF77D
+#define REG_BAT_DESIGN_CAP_LOW		0xF77E
+#define REG_BAT_FULLCHG_CAP_HIGH	0xF780
+#define REG_BAT_FULLCHG_CAP_LOW		0xF781
+#define REG_BAT_DESIGN_VOL_HIGH		0xF782
+#define REG_BAT_DESIGN_VOL_LOW		0xF783
+#define REG_BAT_CURRENT_HIGH		0xF784
+#define REG_BAT_CURRENT_LOW		0xF785
+#define REG_BAT_VOLTAGE_HIGH		0xF786
+#define REG_BAT_VOLTAGE_LOW		0xF787
+#define REG_BAT_TEMPERATURE_HIGH	0xF788
+#define REG_BAT_TEMPERATURE_LOW		0xF789
+#define REG_BAT_RELATIVE_CAP_HIGH	0xF492
+#define REG_BAT_RELATIVE_CAP_LOW	0xF493
+#define REG_BAT_VENDOR			0xF4C4
+#define FLAG_BAT_VENDOR_SANYO		0x01
+#define FLAG_BAT_VENDOR_SIMPLO		0x02
+#define REG_BAT_CELL_COUNT		0xF4C6
+#define FLAG_BAT_CELL_3S1P		0x03
+#define FLAG_BAT_CELL_3S2P		0x06
+#define REG_BAT_CHARGE			0xF4A2
+#define FLAG_BAT_CHARGE_DISCHARGE	0x01
+#define FLAG_BAT_CHARGE_CHARGE		0x02
+#define FLAG_BAT_CHARGE_ACPOWER		0x00
+#define REG_BAT_STATUS			0xF4B0
+#define BIT_BAT_STATUS_LOW		(1 << 5)
+#define BIT_BAT_STATUS_DESTROY		(1 << 2)
+#define BIT_BAT_STATUS_FULL		(1 << 1)
+#define BIT_BAT_STATUS_IN		(1 << 0)
+#define REG_BAT_CHARGE_STATUS		0xF4B1
+#define BIT_BAT_CHARGE_STATUS_OVERTEMP	(1 << 2)
+#define BIT_BAT_CHARGE_STATUS_PRECHG	(1 << 1)
+#define REG_BAT_STATE			0xF482
+#define BIT_BAT_STATE_CHARGING		(1 << 1)
+#define BIT_BAT_STATE_DISCHARGING	(1 << 0)
+#define REG_BAT_POWER			0xF440
+#define BIT_BAT_POWER_S3		(1 << 2)
+#define BIT_BAT_POWER_ON		(1 << 1)
+#define BIT_BAT_POWER_ACIN		(1 << 0)
+
+/* other registers */
+/* Audio: rd/wr */
+#define REG_AUDIO_VOLUME	0xF46C
+#define REG_AUDIO_MUTE		0xF4E7
+#define REG_AUDIO_BEEP		0xF4D0
+/* USB port power or not: rd/wr */
+#define REG_USB0_FLAG		0xF461
+#define REG_USB1_FLAG		0xF462
+#define REG_USB2_FLAG		0xF463
+#define BIT_USB_FLAG_ON		1
+#define BIT_USB_FLAG_OFF	0
+/* LID */
+#define REG_LID_DETECT		0xF4BD
+#define BIT_LID_DETECT_ON	1
+#define BIT_LID_DETECT_OFF	0
+/* CRT */
+#define REG_CRT_DETECT		0xF4AD
+#define BIT_CRT_DETECT_PLUG	1
+#define BIT_CRT_DETECT_UNPLUG	0
+/* LCD backlight brightness adjust: 9 levels */
+#define REG_DISPLAY_BRIGHTNESS	0xF4F5
+/* Black screen Status */
+#define BIT_DISPLAY_LCD_ON	1
+#define BIT_DISPLAY_LCD_OFF	0
+/* LCD backlight control: off/restore */
+#define REG_BACKLIGHT_CTRL	0xF7BD
+#define BIT_BACKLIGHT_ON	1
+#define BIT_BACKLIGHT_OFF	0
+/* Reset the machine auto-clear: rd/wr */
+#define REG_RESET		0xF4EC
+#define BIT_RESET_ON		1
+/* Light the led: rd/wr */
+#define REG_LED			0xF4C8
+#define BIT_LED_RED_POWER	(1 << 0)
+#define BIT_LED_ORANGE_POWER	(1 << 1)
+#define BIT_LED_GREEN_CHARGE	(1 << 2)
+#define BIT_LED_RED_CHARGE	(1 << 3)
+#define BIT_LED_NUMLOCK		(1 << 4)
+/* Test led mode, all led on/off */
+#define REG_LED_TEST		0xF4C2
+#define BIT_LED_TEST_IN		1
+#define BIT_LED_TEST_OUT	0
+/* Camera on/off */
+#define REG_CAMERA_STATUS	0xF46A
+#define BIT_CAMERA_STATUS_ON	1
+#define BIT_CAMERA_STATUS_OFF	0
+#define REG_CAMERA_CONTROL	0xF7B7
+#define BIT_CAMERA_CONTROL_OFF	0
+#define BIT_CAMERA_CONTROL_ON	1
+/* Wlan Status */
+#define REG_WLAN		0xF4FA
+#define BIT_WLAN_ON		1
+#define BIT_WLAN_OFF		0
+#define REG_DISPLAY_LCD		0xF79F
+
+/* SCI Event Number from EC */
+enum {
+	EVENT_LID = 0x23,	/*  LID open/close */
+	EVENT_DISPLAY_TOGGLE,	/*  Fn+F3 for display switch */
+	EVENT_SLEEP,		/*  Fn+F1 for entering sleep mode */
+	EVENT_OVERTEMP,		/*  Over-temperature happened */
+	EVENT_CRT_DETECT,	/*  CRT is connected */
+	EVENT_CAMERA,		/*  Camera on/off */
+	EVENT_USB_OC2,		/*  USB2 Over Current occurred */
+	EVENT_USB_OC0,		/*  USB0 Over Current occurred */
+	EVENT_BLACK_SCREEN,	/*  Turn on/off backlight */
+	EVENT_AUDIO_MUTE,	/*  Mute on/off */
+	EVENT_DISPLAY_BRIGHTNESS,/* LCD backlight brightness adjust */
+	EVENT_AC_BAT,		/*  AC & Battery relative issue */
+	EVENT_AUDIO_VOLUME,	/*  Volume adjust */
+	EVENT_WLAN,		/*  Wlan on/off */
+	EVENT_END
+};
+
+#endif /* !_EC_KB3310B_H */
diff --git a/arch/mips/loongson2ef/lemote-2f/irq.c b/arch/mips/loongson2ef/lemote-2f/irq.c
new file mode 100644
index 000000000000..c58a044c6c07
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/irq.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote Inc.
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+
+#include <asm/irq_cpu.h>
+#include <asm/i8259.h>
+#include <asm/mipsregs.h>
+
+#include <loongson.h>
+#include <machine.h>
+
+#define LOONGSON_TIMER_IRQ	(MIPS_CPU_IRQ_BASE + 7) /* cpu timer */
+#define LOONGSON_NORTH_BRIDGE_IRQ	(MIPS_CPU_IRQ_BASE + 6) /* bonito */
+#define LOONGSON_UART_IRQ	(MIPS_CPU_IRQ_BASE + 3) /* cpu serial port */
+#define LOONGSON_SOUTH_BRIDGE_IRQ	(MIPS_CPU_IRQ_BASE + 2) /* i8259 */
+
+#define LOONGSON_INT_BIT_INT0		(1 << 11)
+#define LOONGSON_INT_BIT_INT1		(1 << 12)
+
+/*
+ * The generic i8259_irq() make the kernel hang on booting.  Since we cannot
+ * get the irq via the IRR directly, we access the ISR instead.
+ */
+int mach_i8259_irq(void)
+{
+	int irq, isr;
+
+	irq = -1;
+
+	if ((LOONGSON_INTISR & LOONGSON_INTEN) & LOONGSON_INT_BIT_INT0) {
+		raw_spin_lock(&i8259A_lock);
+		isr = inb(PIC_MASTER_CMD) &
+			~inb(PIC_MASTER_IMR) & ~(1 << PIC_CASCADE_IR);
+		if (!isr)
+			isr = (inb(PIC_SLAVE_CMD) & ~inb(PIC_SLAVE_IMR)) << 8;
+		irq = ffs(isr) - 1;
+		if (unlikely(irq == 7)) {
+			/*
+			 * This may be a spurious interrupt.
+			 *
+			 * Read the interrupt status register (ISR). If the most
+			 * significant bit is not set then there is no valid
+			 * interrupt.
+			 */
+			outb(0x0B, PIC_MASTER_ISR);	/* ISR register */
+			if (~inb(PIC_MASTER_ISR) & 0x80)
+				irq = -1;
+		}
+		raw_spin_unlock(&i8259A_lock);
+	}
+
+	return irq;
+}
+EXPORT_SYMBOL(mach_i8259_irq);
+
+static void i8259_irqdispatch(void)
+{
+	int irq;
+
+	irq = mach_i8259_irq();
+	if (irq >= 0)
+		do_IRQ(irq);
+	else
+		spurious_interrupt();
+}
+
+void mach_irq_dispatch(unsigned int pending)
+{
+	if (pending & CAUSEF_IP7)
+		do_IRQ(LOONGSON_TIMER_IRQ);
+	else if (pending & CAUSEF_IP6) {	/* North Bridge, Perf counter */
+		do_perfcnt_IRQ();
+		bonito_irqdispatch();
+	} else if (pending & CAUSEF_IP3)	/* CPU UART */
+		do_IRQ(LOONGSON_UART_IRQ);
+	else if (pending & CAUSEF_IP2)	/* South Bridge */
+		i8259_irqdispatch();
+	else
+		spurious_interrupt();
+}
+
+static irqreturn_t ip6_action(int cpl, void *dev_id)
+{
+	return IRQ_HANDLED;
+}
+
+static struct irqaction ip6_irqaction = {
+	.handler = ip6_action,
+	.name = "cascade",
+	.flags = IRQF_SHARED | IRQF_NO_THREAD,
+};
+
+static struct irqaction cascade_irqaction = {
+	.handler = no_action,
+	.name = "cascade",
+	.flags = IRQF_NO_THREAD | IRQF_NO_SUSPEND,
+};
+
+void __init mach_init_irq(void)
+{
+	/* init all controller
+	 *   0-15	  ------> i8259 interrupt
+	 *   16-23	  ------> mips cpu interrupt
+	 *   32-63	  ------> bonito irq
+	 */
+
+	/* setup cs5536 as high level trigger */
+	LOONGSON_INTPOL = LOONGSON_INT_BIT_INT0 | LOONGSON_INT_BIT_INT1;
+	LOONGSON_INTEDGE &= ~(LOONGSON_INT_BIT_INT0 | LOONGSON_INT_BIT_INT1);
+
+	/* Sets the first-level interrupt dispatcher. */
+	mips_cpu_irq_init();
+	init_i8259_irqs();
+	bonito_irq_init();
+
+	/* setup north bridge irq (bonito) */
+	setup_irq(LOONGSON_NORTH_BRIDGE_IRQ, &ip6_irqaction);
+	/* setup source bridge irq (i8259) */
+	setup_irq(LOONGSON_SOUTH_BRIDGE_IRQ, &cascade_irqaction);
+}
diff --git a/arch/mips/loongson2ef/lemote-2f/machtype.c b/arch/mips/loongson2ef/lemote-2f/machtype.c
new file mode 100644
index 000000000000..9462a3ab57be
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/machtype.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+void __init mach_prom_init_machtype(void)
+{
+	/* We share the same kernel image file among Lemote 2F family
+	 * of machines, and provide the machtype= kernel command line
+	 * to users to indicate their machine, this command line will
+	 * be passed by the latest PMON automatically. and fortunately,
+	 * up to now, we can get the machine type from the PMON_VER=
+	 * commandline directly except the NAS machine, In the old
+	 * machines, this will help the users a lot.
+	 *
+	 * If no "machtype=" passed, get machine type from "PMON_VER=".
+	 *	PMON_VER=LM8089		Lemote 8.9'' netbook
+	 *		 LM8101		Lemote 10.1'' netbook
+	 *	(The above two netbooks have the same kernel support)
+	 *		 LM6XXX		Lemote FuLoong(2F) box series
+	 *		 LM9XXX		Lemote LynLoong PC series
+	 */
+	if (strstr(arcs_cmdline, "PMON_VER=LM")) {
+		if (strstr(arcs_cmdline, "PMON_VER=LM8"))
+			mips_machtype = MACH_LEMOTE_YL2F89;
+		else if (strstr(arcs_cmdline, "PMON_VER=LM6"))
+			mips_machtype = MACH_LEMOTE_FL2F;
+		else if (strstr(arcs_cmdline, "PMON_VER=LM9"))
+			mips_machtype = MACH_LEMOTE_LL2F;
+		else
+			mips_machtype = MACH_LEMOTE_NAS;
+
+		strcat(arcs_cmdline, " machtype=");
+		strcat(arcs_cmdline, get_system_type());
+		strcat(arcs_cmdline, " ");
+	}
+}
diff --git a/arch/mips/loongson2ef/lemote-2f/pm.c b/arch/mips/loongson2ef/lemote-2f/pm.c
new file mode 100644
index 000000000000..3d0027229e3c
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/pm.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Lemote loongson2f family machines' specific suspend support
+ *
+ *  Copyright (C) 2009 Lemote Inc.
+ *  Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+
+#include <linux/suspend.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+#include <linux/i8042.h>
+#include <linux/export.h>
+
+#include <asm/i8259.h>
+#include <asm/mipsregs.h>
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+#include <cs5536/cs5536_mfgpt.h>
+#include "ec_kb3310b.h"
+
+#define I8042_KBD_IRQ		1
+#define I8042_CTR_KBDINT	0x01
+#define I8042_CTR_KBDDIS	0x10
+
+static unsigned char i8042_ctr;
+
+static int i8042_enable_kbd_port(void)
+{
+	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_RCTR)) {
+		pr_err("i8042.c: Can't read CTR while enabling i8042 kbd port."
+		       "\n");
+		return -EIO;
+	}
+
+	i8042_ctr &= ~I8042_CTR_KBDDIS;
+	i8042_ctr |= I8042_CTR_KBDINT;
+
+	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
+		i8042_ctr &= ~I8042_CTR_KBDINT;
+		i8042_ctr |= I8042_CTR_KBDDIS;
+		pr_err("i8042.c: Failed to enable KBD port.\n");
+
+		return -EIO;
+	}
+
+	return 0;
+}
+
+void setup_wakeup_events(void)
+{
+	int irq_mask;
+
+	switch (mips_machtype) {
+	case MACH_LEMOTE_ML2F7:
+	case MACH_LEMOTE_YL2F89:
+		/* open the keyboard irq in i8259A */
+		outb((0xff & ~(1 << I8042_KBD_IRQ)), PIC_MASTER_IMR);
+		irq_mask = inb(PIC_MASTER_IMR);
+
+		/* enable keyboard port */
+		i8042_enable_kbd_port();
+
+		/* Wakeup CPU via SCI lid open event */
+		outb(irq_mask & ~(1 << PIC_CASCADE_IR), PIC_MASTER_IMR);
+		inb(PIC_MASTER_IMR);
+		outb(0xff & ~(1 << (SCI_IRQ_NUM - 8)), PIC_SLAVE_IMR);
+		inb(PIC_SLAVE_IMR);
+
+		break;
+
+	default:
+		break;
+	}
+}
+
+static struct delayed_work lid_task;
+static int initialized;
+/* yeeloong_report_lid_status will be implemented in yeeloong_laptop.c */
+sci_handler yeeloong_report_lid_status;
+EXPORT_SYMBOL(yeeloong_report_lid_status);
+static void yeeloong_lid_update_task(struct work_struct *work)
+{
+	if (yeeloong_report_lid_status)
+		yeeloong_report_lid_status(BIT_LID_DETECT_ON);
+}
+
+int wakeup_loongson(void)
+{
+	int irq;
+
+	/* query the interrupt number */
+	irq = mach_i8259_irq();
+	if (irq < 0)
+		return 0;
+
+	printk(KERN_INFO "%s: irq = %d\n", __func__, irq);
+
+	if (irq == I8042_KBD_IRQ)
+		return 1;
+	else if (irq == SCI_IRQ_NUM) {
+		int ret, sci_event;
+		/* query the event number */
+		ret = ec_query_seq(CMD_GET_EVENT_NUM);
+		if (ret < 0)
+			return 0;
+		sci_event = ec_get_event_num();
+		if (sci_event < 0)
+			return 0;
+		if (sci_event == EVENT_LID) {
+			int lid_status;
+			/* check the LID status */
+			lid_status = ec_read(REG_LID_DETECT);
+			/* wakeup cpu when people open the LID */
+			if (lid_status == BIT_LID_DETECT_ON) {
+				/* If we call it directly here, the WARNING
+				 * will be sent out by getnstimeofday
+				 * via "WARN_ON(timekeeping_suspended);"
+				 * because we can not schedule in suspend mode.
+				 */
+				if (initialized == 0) {
+					INIT_DELAYED_WORK(&lid_task,
+						yeeloong_lid_update_task);
+					initialized = 1;
+				}
+				schedule_delayed_work(&lid_task, 1);
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+void __weak mach_suspend(void)
+{
+	disable_mfgpt0_counter();
+}
+
+void __weak mach_resume(void)
+{
+	enable_mfgpt0_counter();
+}
diff --git a/arch/mips/loongson2ef/lemote-2f/reset.c b/arch/mips/loongson2ef/lemote-2f/reset.c
new file mode 100644
index 000000000000..0db0934302ea
--- /dev/null
+++ b/arch/mips/loongson2ef/lemote-2f/reset.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Board-specific reboot/shutdown routines
+ *
+ * Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+#include <cs5536/cs5536.h>
+#include "ec_kb3310b.h"
+
+static void reset_cpu(void)
+{
+	/*
+	 * reset cpu to full speed, this is needed when enabling cpu frequency
+	 * scalling
+	 */
+	LOONGSON_CHIPCFG(0) |= 0x7;
+}
+
+/* reset support for fuloong2f */
+
+static void fl2f_reboot(void)
+{
+	reset_cpu();
+
+	/* send a reset signal to south bridge.
+	 *
+	 * NOTE: if enable "Power Management" in kernel, rtl8169 will not reset
+	 * normally with this reset operation and it will not work in PMON, but
+	 * you can type halt command and then reboot, seems the hardware reset
+	 * logic not work normally.
+	 */
+	{
+		u32 hi, lo;
+		_rdmsr(DIVIL_MSR_REG(DIVIL_SOFT_RESET), &hi, &lo);
+		lo |= 0x00000001;
+		_wrmsr(DIVIL_MSR_REG(DIVIL_SOFT_RESET), hi, lo);
+	}
+}
+
+static void fl2f_shutdown(void)
+{
+	u32 hi, lo, val;
+	int gpio_base;
+
+	/* get gpio base */
+	_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_GPIO), &hi, &lo);
+	gpio_base = lo & 0xff00;
+
+	/* make cs5536 gpio13 output enable */
+	val = inl(gpio_base + GPIOL_OUT_EN);
+	val &= ~(1 << (16 + 13));
+	val |= (1 << 13);
+	outl(val, gpio_base + GPIOL_OUT_EN);
+	mmiowb();
+	/* make cs5536 gpio13 output low level voltage. */
+	val = inl(gpio_base + GPIOL_OUT_VAL) & ~(1 << (13));
+	val |= (1 << (16 + 13));
+	outl(val, gpio_base + GPIOL_OUT_VAL);
+	mmiowb();
+}
+
+/* reset support for yeeloong2f and mengloong2f notebook */
+
+static void ml2f_reboot(void)
+{
+	reset_cpu();
+
+	/* sending an reset signal to EC(embedded controller) */
+	ec_write(REG_RESET, BIT_RESET_ON);
+}
+
+#define yl2f89_reboot ml2f_reboot
+
+/* menglong(7inches) laptop has different shutdown logic from 8.9inches */
+#define EC_SHUTDOWN_IO_PORT_HIGH 0xff2d
+#define EC_SHUTDOWN_IO_PORT_LOW	 0xff2e
+#define EC_SHUTDOWN_IO_PORT_DATA 0xff2f
+#define REG_SHUTDOWN_HIGH	 0xFC
+#define REG_SHUTDOWN_LOW	 0x29
+#define BIT_SHUTDOWN_ON		 (1 << 1)
+
+static void ml2f_shutdown(void)
+{
+	u8 val;
+	u64 i;
+
+	outb(REG_SHUTDOWN_HIGH, EC_SHUTDOWN_IO_PORT_HIGH);
+	outb(REG_SHUTDOWN_LOW, EC_SHUTDOWN_IO_PORT_LOW);
+	mmiowb();
+	val = inb(EC_SHUTDOWN_IO_PORT_DATA);
+	outb(val & (~BIT_SHUTDOWN_ON), EC_SHUTDOWN_IO_PORT_DATA);
+	mmiowb();
+	/* need enough wait here... how many microseconds needs? */
+	for (i = 0; i < 0x10000; i++)
+		delay();
+	outb(val | BIT_SHUTDOWN_ON, EC_SHUTDOWN_IO_PORT_DATA);
+	mmiowb();
+}
+
+static void yl2f89_shutdown(void)
+{
+	/* cpu-gpio0 output low */
+	LOONGSON_GPIODATA &= ~0x00000001;
+	/* cpu-gpio0 as output */
+	LOONGSON_GPIOIE &= ~0x00000001;
+}
+
+void mach_prepare_reboot(void)
+{
+	switch (mips_machtype) {
+	case MACH_LEMOTE_FL2F:
+	case MACH_LEMOTE_NAS:
+	case MACH_LEMOTE_LL2F:
+		fl2f_reboot();
+		break;
+	case MACH_LEMOTE_ML2F7:
+		ml2f_reboot();
+		break;
+	case MACH_LEMOTE_YL2F89:
+		yl2f89_reboot();
+		break;
+	default:
+		break;
+	}
+}
+
+void mach_prepare_shutdown(void)
+{
+	switch (mips_machtype) {
+	case MACH_LEMOTE_FL2F:
+	case MACH_LEMOTE_NAS:
+	case MACH_LEMOTE_LL2F:
+		fl2f_shutdown();
+		break;
+	case MACH_LEMOTE_ML2F7:
+		ml2f_shutdown();
+		break;
+	case MACH_LEMOTE_YL2F89:
+		yl2f89_shutdown();
+		break;
+	default:
+		break;
+	}
+}
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index d08b20ff2b27..0e99a5af6e90 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -4,65 +4,6 @@ if MACH_LOONGSON64
 choice
 	prompt "Machine Type"
 
-config LEMOTE_FULOONG2E
-	bool "Lemote Fuloong(2e) mini-PC"
-	select ARCH_SPARSEMEM_ENABLE
-	select ARCH_MIGHT_HAVE_PC_PARPORT
-	select ARCH_MIGHT_HAVE_PC_SERIO
-	select CEVT_R4K
-	select CSRC_R4K
-	select SYS_HAS_CPU_LOONGSON2E
-	select DMA_NONCOHERENT
-	select BOOT_ELF32
-	select BOARD_SCACHE
-	select HAVE_PCI
-	select I8259
-	select ISA
-	select IRQ_MIPS_CPU
-	select SYS_SUPPORTS_64BIT_KERNEL
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select SYS_SUPPORTS_HIGHMEM
-	select SYS_HAS_EARLY_PRINTK
-	select GENERIC_ISA_DMA_SUPPORT_BROKEN
-	select CPU_HAS_WB
-	select LOONGSON_MC146818
-	help
-	  Lemote Fuloong(2e) mini-PC board based on the Chinese Loongson-2E CPU and
-	  an FPGA northbridge
-
-	  Lemote Fuloong(2e) mini PC have a VIA686B south bridge.
-
-config LEMOTE_MACH2F
-	bool "Lemote Loongson 2F family machines"
-	select ARCH_SPARSEMEM_ENABLE
-	select ARCH_MIGHT_HAVE_PC_PARPORT
-	select ARCH_MIGHT_HAVE_PC_SERIO
-	select BOARD_SCACHE
-	select BOOT_ELF32
-	select CEVT_R4K if ! MIPS_EXTERNAL_TIMER
-	select CPU_HAS_WB
-	select CS5536
-	select CSRC_R4K if ! MIPS_EXTERNAL_TIMER
-	select DMA_NONCOHERENT
-	select GENERIC_ISA_DMA_SUPPORT_BROKEN
-	select HAVE_CLK
-	select HAVE_PCI
-	select I8259
-	select IRQ_MIPS_CPU
-	select ISA
-	select SYS_HAS_CPU_LOONGSON2F
-	select SYS_HAS_EARLY_PRINTK
-	select SYS_SUPPORTS_64BIT_KERNEL
-	select SYS_SUPPORTS_HIGHMEM
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select LOONGSON_MC146818
-	help
-	  Lemote Loongson 2F family machines utilize the 2F revision of
-	  Loongson processor and the AMD CS5536 south bridge.
-
-	  These family machines include fuloong2f mini PC, yeeloong2f notebook,
-	  LingLoong allinone PC and so forth.
-
 config LOONGSON_MACH3X
 	bool "Generic Loongson 3 family machines"
 	select ARCH_SPARSEMEM_ENABLE
@@ -95,22 +36,6 @@ config LOONGSON_MACH3X
 		of Loongson processor and RS780/SBX00 chipset.
 endchoice
 
-config CS5536
-	bool
-
-config CS5536_MFGPT
-	bool "CS5536 MFGPT Timer"
-	depends on CS5536 && !HIGH_RES_TIMERS
-	select MIPS_EXTERNAL_TIMER
-	help
-	  This option enables the mfgpt0 timer of AMD CS5536. With this timer
-	  switched on you can not use high resolution timers.
-
-	  If you want to enable the Loongson2 CPUFreq Driver, Please enable
-	  this option at first, otherwise, You will get wrong system time.
-
-	  If unsure, say Yes.
-
 config RS780_HPET
 	bool "RS780/SBX00 HPET Timer"
 	depends on LOONGSON_MACH3X
diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
index c74bc0251e9d..dc16a23c171f 100644
--- a/arch/mips/loongson64/Makefile
+++ b/arch/mips/loongson64/Makefile
@@ -5,18 +5,6 @@
 
 obj-$(CONFIG_MACH_LOONGSON64) += common/
 
-#
-# Lemote Fuloong mini-PC (Loongson 2E-based)
-#
-
-obj-$(CONFIG_LEMOTE_FULOONG2E)	+= fuloong-2e/
-
-#
-# Lemote loongson2f family machines
-#
-
-obj-$(CONFIG_LEMOTE_MACH2F)  += lemote-2f/
-
 #
 # All Loongson-3 family machines
 #
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 4da74eea7de8..31167e568e46 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -2,25 +2,6 @@
 # Loongson Processors' Support
 #
 
-# Only gcc >= 4.4 have Loongson specific support
-cflags-$(CONFIG_CPU_LOONGSON2EF)	+= -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2E) += \
-	$(call cc-option,-march=loongson2e,-march=r4600)
-cflags-$(CONFIG_CPU_LOONGSON2F) += \
-	$(call cc-option,-march=loongson2f,-march=r4600)
-# Enable the workarounds for Loongson2f
-ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
-  ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-nop,),)
-    $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-nop)
-  else
-    cflags-$(CONFIG_CPU_NOP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-nop
-  endif
-  ifeq ($(call as-option,-Wa$(comma)-mfix-loongson2f-jump,),)
-    $(error only binutils >= 2.20.2 have needed option -mfix-loongson2f-jump)
-  else
-    cflags-$(CONFIG_CPU_JUMP_WORKAROUNDS) += -Wa$(comma)-mfix-loongson2f-jump
-  endif
-endif
 
 cflags-$(CONFIG_CPU_LOONGSON64)	+= -Wa,--trap
 
@@ -72,6 +53,4 @@ endif
 
 platform-$(CONFIG_MACH_LOONGSON64) += loongson64/
 cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely
-load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000
-load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000
 load-$(CONFIG_LOONGSON_MACH3X) += 0xffffffff80200000
diff --git a/arch/mips/loongson64/common/Makefile b/arch/mips/loongson64/common/Makefile
index 684624f61f5a..85438df80950 100644
--- a/arch/mips/loongson64/common/Makefile
+++ b/arch/mips/loongson64/common/Makefile
@@ -14,12 +14,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
 obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
 
-#
-# Enable CS5536 Virtual Support Module(VSM) to virtulize the PCI configure
-# space
-#
-obj-$(CONFIG_CS5536) += cs5536/
-
 #
 # Suspend Support
 #
diff --git a/arch/mips/loongson64/common/cs5536/Makefile b/arch/mips/loongson64/common/cs5536/Makefile
deleted file mode 100644
index b32b29661245..000000000000
--- a/arch/mips/loongson64/common/cs5536/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for CS5536 support.
-#
-
-obj-$(CONFIG_CS5536) += cs5536_pci.o cs5536_ide.o cs5536_acc.o cs5536_ohci.o \
-			cs5536_isa.o cs5536_ehci.o
-
-#
-# Enable cs5536 mfgpt Timer
-#
-obj-$(CONFIG_CS5536_MFGPT) += cs5536_mfgpt.o
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_acc.c b/arch/mips/loongson64/common/cs5536/cs5536_acc.c
deleted file mode 100644
index ff50aae72916..000000000000
--- a/arch/mips/loongson64/common/cs5536/cs5536_acc.c
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * the ACC Virtual Support Module of AMD CS5536
- *
- * Copyright (C) 2007 Lemote, Inc.
- * Author : jlliu, liujl@lemote.com
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <cs5536/cs5536.h>
-#include <cs5536/cs5536_pci.h>
-
-void pci_acc_write_reg(int reg, u32 value)
-{
-	u32 hi = 0, lo = value;
-
-	switch (reg) {
-	case PCI_COMMAND:
-		_rdmsr(GLIU_MSR_REG(GLIU_PAE), &hi, &lo);
-		if (value & PCI_COMMAND_MASTER)
-			lo |= (0x03 << 8);
-		else
-			lo &= ~(0x03 << 8);
-		_wrmsr(GLIU_MSR_REG(GLIU_PAE), hi, lo);
-		break;
-	case PCI_STATUS:
-		if (value & PCI_STATUS_PARITY) {
-			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-			if (lo & SB_PARE_ERR_FLAG) {
-				lo = (lo & 0x0000ffff) | SB_PARE_ERR_FLAG;
-				_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
-			}
-		}
-		break;
-	case PCI_BAR0_REG:
-		if (value == PCI_BAR_RANGE_MASK) {
-			_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-			lo |= SOFT_BAR_ACC_FLAG;
-			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-		} else if (value & 0x01) {
-			value &= 0xfffffffc;
-			hi = 0xA0000000 | ((value & 0x000ff000) >> 12);
-			lo = 0x000fff80 | ((value & 0x00000fff) << 20);
-			_wrmsr(GLIU_MSR_REG(GLIU_IOD_BM1), hi, lo);
-		}
-		break;
-	case PCI_ACC_INT_REG:
-		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), &hi, &lo);
-		/* disable all the usb interrupt in PIC */
-		lo &= ~(0xf << PIC_YSEL_LOW_ACC_SHIFT);
-		if (value)	/* enable all the acc interrupt in PIC */
-			lo |= (CS5536_ACC_INTR << PIC_YSEL_LOW_ACC_SHIFT);
-		_wrmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), hi, lo);
-		break;
-	default:
-		break;
-	}
-}
-
-u32 pci_acc_read_reg(int reg)
-{
-	u32 hi, lo;
-	u32 conf_data = 0;
-
-	switch (reg) {
-	case PCI_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_ACC_DEVICE_ID, CS5536_VENDOR_ID);
-		break;
-	case PCI_COMMAND:
-		_rdmsr(GLIU_MSR_REG(GLIU_IOD_BM1), &hi, &lo);
-		if (((lo & 0xfff00000) || (hi & 0x000000ff))
-		    && ((hi & 0xf0000000) == 0xa0000000))
-			conf_data |= PCI_COMMAND_IO;
-		_rdmsr(GLIU_MSR_REG(GLIU_PAE), &hi, &lo);
-		if ((lo & 0x300) == 0x300)
-			conf_data |= PCI_COMMAND_MASTER;
-		break;
-	case PCI_STATUS:
-		conf_data |= PCI_STATUS_66MHZ;
-		conf_data |= PCI_STATUS_FAST_BACK;
-		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-		if (lo & SB_PARE_ERR_FLAG)
-			conf_data |= PCI_STATUS_PARITY;
-		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
-		break;
-	case PCI_CLASS_REVISION:
-		_rdmsr(ACC_MSR_REG(ACC_CAP), &hi, &lo);
-		conf_data = lo & 0x000000ff;
-		conf_data |= (CS5536_ACC_CLASS_CODE << 8);
-		break;
-	case PCI_CACHE_LINE_SIZE:
-		conf_data =
-		    CFG_PCI_CACHE_LINE_SIZE(PCI_NORMAL_HEADER_TYPE,
-					    PCI_NORMAL_LATENCY_TIMER);
-		break;
-	case PCI_BAR0_REG:
-		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-		if (lo & SOFT_BAR_ACC_FLAG) {
-			conf_data = CS5536_ACC_RANGE |
-			    PCI_BASE_ADDRESS_SPACE_IO;
-			lo &= ~SOFT_BAR_ACC_FLAG;
-			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-		} else {
-			_rdmsr(GLIU_MSR_REG(GLIU_IOD_BM1), &hi, &lo);
-			conf_data = (hi & 0x000000ff) << 12;
-			conf_data |= (lo & 0xfff00000) >> 20;
-			conf_data |= 0x01;
-			conf_data &= ~0x02;
-		}
-		break;
-	case PCI_CARDBUS_CIS:
-		conf_data = PCI_CARDBUS_CIS_POINTER;
-		break;
-	case PCI_SUBSYSTEM_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_ACC_SUB_ID, CS5536_SUB_VENDOR_ID);
-		break;
-	case PCI_ROM_ADDRESS:
-		conf_data = PCI_EXPANSION_ROM_BAR;
-		break;
-	case PCI_CAPABILITY_LIST:
-		conf_data = PCI_CAPLIST_USB_POINTER;
-		break;
-	case PCI_INTERRUPT_LINE:
-		conf_data =
-		    CFG_PCI_INTERRUPT_LINE(PCI_DEFAULT_PIN, CS5536_ACC_INTR);
-		break;
-	default:
-		break;
-	}
-
-	return conf_data;
-}
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ehci.c b/arch/mips/loongson64/common/cs5536/cs5536_ehci.c
deleted file mode 100644
index bd4c39fe6109..000000000000
--- a/arch/mips/loongson64/common/cs5536/cs5536_ehci.c
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * the EHCI Virtual Support Module of AMD CS5536
- *
- * Copyright (C) 2007 Lemote, Inc.
- * Author : jlliu, liujl@lemote.com
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <cs5536/cs5536.h>
-#include <cs5536/cs5536_pci.h>
-
-void pci_ehci_write_reg(int reg, u32 value)
-{
-	u32 hi = 0, lo = value;
-
-	switch (reg) {
-	case PCI_COMMAND:
-		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
-		if (value & PCI_COMMAND_MASTER)
-			hi |= PCI_COMMAND_MASTER;
-		else
-			hi &= ~PCI_COMMAND_MASTER;
-
-		if (value & PCI_COMMAND_MEMORY)
-			hi |= PCI_COMMAND_MEMORY;
-		else
-			hi &= ~PCI_COMMAND_MEMORY;
-		_wrmsr(USB_MSR_REG(USB_EHCI), hi, lo);
-		break;
-	case PCI_STATUS:
-		if (value & PCI_STATUS_PARITY) {
-			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-			if (lo & SB_PARE_ERR_FLAG) {
-				lo = (lo & 0x0000ffff) | SB_PARE_ERR_FLAG;
-				_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
-			}
-		}
-		break;
-	case PCI_BAR0_REG:
-		if (value == PCI_BAR_RANGE_MASK) {
-			_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-			lo |= SOFT_BAR_EHCI_FLAG;
-			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-		} else if ((value & 0x01) == 0x00) {
-			_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
-			lo = value;
-			_wrmsr(USB_MSR_REG(USB_EHCI), hi, lo);
-
-			value &= 0xfffffff0;
-			hi = 0x40000000 | ((value & 0xff000000) >> 24);
-			lo = 0x000fffff | ((value & 0x00fff000) << 8);
-			_wrmsr(GLIU_MSR_REG(GLIU_P2D_BM4), hi, lo);
-		}
-		break;
-	case PCI_EHCI_LEGSMIEN_REG:
-		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
-		hi &= 0x003f0000;
-		hi |= (value & 0x3f) << 16;
-		_wrmsr(USB_MSR_REG(USB_EHCI), hi, lo);
-		break;
-	case PCI_EHCI_FLADJ_REG:
-		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
-		hi &= ~0x00003f00;
-		hi |= value & 0x00003f00;
-		_wrmsr(USB_MSR_REG(USB_EHCI), hi, lo);
-		break;
-	default:
-		break;
-	}
-}
-
-u32 pci_ehci_read_reg(int reg)
-{
-	u32 conf_data = 0;
-	u32 hi, lo;
-
-	switch (reg) {
-	case PCI_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_EHCI_DEVICE_ID, CS5536_VENDOR_ID);
-		break;
-	case PCI_COMMAND:
-		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
-		if (hi & PCI_COMMAND_MASTER)
-			conf_data |= PCI_COMMAND_MASTER;
-		if (hi & PCI_COMMAND_MEMORY)
-			conf_data |= PCI_COMMAND_MEMORY;
-		break;
-	case PCI_STATUS:
-		conf_data |= PCI_STATUS_66MHZ;
-		conf_data |= PCI_STATUS_FAST_BACK;
-		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-		if (lo & SB_PARE_ERR_FLAG)
-			conf_data |= PCI_STATUS_PARITY;
-		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
-		break;
-	case PCI_CLASS_REVISION:
-		_rdmsr(USB_MSR_REG(USB_CAP), &hi, &lo);
-		conf_data = lo & 0x000000ff;
-		conf_data |= (CS5536_EHCI_CLASS_CODE << 8);
-		break;
-	case PCI_CACHE_LINE_SIZE:
-		conf_data =
-		    CFG_PCI_CACHE_LINE_SIZE(PCI_NORMAL_HEADER_TYPE,
-					    PCI_NORMAL_LATENCY_TIMER);
-		break;
-	case PCI_BAR0_REG:
-		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-		if (lo & SOFT_BAR_EHCI_FLAG) {
-			conf_data = CS5536_EHCI_RANGE |
-			    PCI_BASE_ADDRESS_SPACE_MEMORY;
-			lo &= ~SOFT_BAR_EHCI_FLAG;
-			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-		} else {
-			_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
-			conf_data = lo & 0xfffff000;
-		}
-		break;
-	case PCI_CARDBUS_CIS:
-		conf_data = PCI_CARDBUS_CIS_POINTER;
-		break;
-	case PCI_SUBSYSTEM_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_EHCI_SUB_ID, CS5536_SUB_VENDOR_ID);
-		break;
-	case PCI_ROM_ADDRESS:
-		conf_data = PCI_EXPANSION_ROM_BAR;
-		break;
-	case PCI_CAPABILITY_LIST:
-		conf_data = PCI_CAPLIST_USB_POINTER;
-		break;
-	case PCI_INTERRUPT_LINE:
-		conf_data =
-		    CFG_PCI_INTERRUPT_LINE(PCI_DEFAULT_PIN, CS5536_USB_INTR);
-		break;
-	case PCI_EHCI_LEGSMIEN_REG:
-		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
-		conf_data = (hi & 0x003f0000) >> 16;
-		break;
-	case PCI_EHCI_LEGSMISTS_REG:
-		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
-		conf_data = (hi & 0x3f000000) >> 24;
-		break;
-	case PCI_EHCI_FLADJ_REG:
-		_rdmsr(USB_MSR_REG(USB_EHCI), &hi, &lo);
-		conf_data = hi & 0x00003f00;
-		break;
-	default:
-		break;
-	}
-
-	return conf_data;
-}
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ide.c b/arch/mips/loongson64/common/cs5536/cs5536_ide.c
deleted file mode 100644
index bb933294b092..000000000000
--- a/arch/mips/loongson64/common/cs5536/cs5536_ide.c
+++ /dev/null
@@ -1,188 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * the IDE Virtual Support Module of AMD CS5536
- *
- * Copyright (C) 2007 Lemote, Inc.
- * Author : jlliu, liujl@lemote.com
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <cs5536/cs5536.h>
-#include <cs5536/cs5536_pci.h>
-
-void pci_ide_write_reg(int reg, u32 value)
-{
-	u32 hi = 0, lo = value;
-
-	switch (reg) {
-	case PCI_COMMAND:
-		_rdmsr(GLIU_MSR_REG(GLIU_PAE), &hi, &lo);
-		if (value & PCI_COMMAND_MASTER)
-			lo |= (0x03 << 4);
-		else
-			lo &= ~(0x03 << 4);
-		_wrmsr(GLIU_MSR_REG(GLIU_PAE), hi, lo);
-		break;
-	case PCI_STATUS:
-		if (value & PCI_STATUS_PARITY) {
-			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-			if (lo & SB_PARE_ERR_FLAG) {
-				lo = (lo & 0x0000ffff) | SB_PARE_ERR_FLAG;
-				_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
-			}
-		}
-		break;
-	case PCI_CACHE_LINE_SIZE:
-		value &= 0x0000ff00;
-		_rdmsr(SB_MSR_REG(SB_CTRL), &hi, &lo);
-		hi &= 0xffffff00;
-		hi |= (value >> 8);
-		_wrmsr(SB_MSR_REG(SB_CTRL), hi, lo);
-		break;
-	case PCI_BAR4_REG:
-		if (value == PCI_BAR_RANGE_MASK) {
-			_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-			lo |= SOFT_BAR_IDE_FLAG;
-			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-		} else if (value & 0x01) {
-			_rdmsr(IDE_MSR_REG(IDE_IO_BAR), &hi, &lo);
-			lo = (value & 0xfffffff0) | 0x1;
-			_wrmsr(IDE_MSR_REG(IDE_IO_BAR), hi, lo);
-
-			value &= 0xfffffffc;
-			hi = 0x60000000 | ((value & 0x000ff000) >> 12);
-			lo = 0x000ffff0 | ((value & 0x00000fff) << 20);
-			_wrmsr(GLIU_MSR_REG(GLIU_IOD_BM2), hi, lo);
-		}
-		break;
-	case PCI_IDE_CFG_REG:
-		if (value == CS5536_IDE_FLASH_SIGNATURE) {
-			_rdmsr(DIVIL_MSR_REG(DIVIL_BALL_OPTS), &hi, &lo);
-			lo |= 0x01;
-			_wrmsr(DIVIL_MSR_REG(DIVIL_BALL_OPTS), hi, lo);
-		} else {
-			_rdmsr(IDE_MSR_REG(IDE_CFG), &hi, &lo);
-			lo = value;
-			_wrmsr(IDE_MSR_REG(IDE_CFG), hi, lo);
-		}
-		break;
-	case PCI_IDE_DTC_REG:
-		_rdmsr(IDE_MSR_REG(IDE_DTC), &hi, &lo);
-		lo = value;
-		_wrmsr(IDE_MSR_REG(IDE_DTC), hi, lo);
-		break;
-	case PCI_IDE_CAST_REG:
-		_rdmsr(IDE_MSR_REG(IDE_CAST), &hi, &lo);
-		lo = value;
-		_wrmsr(IDE_MSR_REG(IDE_CAST), hi, lo);
-		break;
-	case PCI_IDE_ETC_REG:
-		_rdmsr(IDE_MSR_REG(IDE_ETC), &hi, &lo);
-		lo = value;
-		_wrmsr(IDE_MSR_REG(IDE_ETC), hi, lo);
-		break;
-	case PCI_IDE_PM_REG:
-		_rdmsr(IDE_MSR_REG(IDE_INTERNAL_PM), &hi, &lo);
-		lo = value;
-		_wrmsr(IDE_MSR_REG(IDE_INTERNAL_PM), hi, lo);
-		break;
-	default:
-		break;
-	}
-}
-
-u32 pci_ide_read_reg(int reg)
-{
-	u32 conf_data = 0;
-	u32 hi, lo;
-
-	switch (reg) {
-	case PCI_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_IDE_DEVICE_ID, CS5536_VENDOR_ID);
-		break;
-	case PCI_COMMAND:
-		_rdmsr(IDE_MSR_REG(IDE_IO_BAR), &hi, &lo);
-		if (lo & 0xfffffff0)
-			conf_data |= PCI_COMMAND_IO;
-		_rdmsr(GLIU_MSR_REG(GLIU_PAE), &hi, &lo);
-		if ((lo & 0x30) == 0x30)
-			conf_data |= PCI_COMMAND_MASTER;
-		break;
-	case PCI_STATUS:
-		conf_data |= PCI_STATUS_66MHZ;
-		conf_data |= PCI_STATUS_FAST_BACK;
-		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-		if (lo & SB_PARE_ERR_FLAG)
-			conf_data |= PCI_STATUS_PARITY;
-		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
-		break;
-	case PCI_CLASS_REVISION:
-		_rdmsr(IDE_MSR_REG(IDE_CAP), &hi, &lo);
-		conf_data = lo & 0x000000ff;
-		conf_data |= (CS5536_IDE_CLASS_CODE << 8);
-		break;
-	case PCI_CACHE_LINE_SIZE:
-		_rdmsr(SB_MSR_REG(SB_CTRL), &hi, &lo);
-		hi &= 0x000000f8;
-		conf_data = CFG_PCI_CACHE_LINE_SIZE(PCI_NORMAL_HEADER_TYPE, hi);
-		break;
-	case PCI_BAR4_REG:
-		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-		if (lo & SOFT_BAR_IDE_FLAG) {
-			conf_data = CS5536_IDE_RANGE |
-			    PCI_BASE_ADDRESS_SPACE_IO;
-			lo &= ~SOFT_BAR_IDE_FLAG;
-			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-		} else {
-			_rdmsr(IDE_MSR_REG(IDE_IO_BAR), &hi, &lo);
-			conf_data = lo & 0xfffffff0;
-			conf_data |= 0x01;
-			conf_data &= ~0x02;
-		}
-		break;
-	case PCI_CARDBUS_CIS:
-		conf_data = PCI_CARDBUS_CIS_POINTER;
-		break;
-	case PCI_SUBSYSTEM_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_IDE_SUB_ID, CS5536_SUB_VENDOR_ID);
-		break;
-	case PCI_ROM_ADDRESS:
-		conf_data = PCI_EXPANSION_ROM_BAR;
-		break;
-	case PCI_CAPABILITY_LIST:
-		conf_data = PCI_CAPLIST_POINTER;
-		break;
-	case PCI_INTERRUPT_LINE:
-		conf_data =
-		    CFG_PCI_INTERRUPT_LINE(PCI_DEFAULT_PIN, CS5536_IDE_INTR);
-		break;
-	case PCI_IDE_CFG_REG:
-		_rdmsr(IDE_MSR_REG(IDE_CFG), &hi, &lo);
-		conf_data = lo;
-		break;
-	case PCI_IDE_DTC_REG:
-		_rdmsr(IDE_MSR_REG(IDE_DTC), &hi, &lo);
-		conf_data = lo;
-		break;
-	case PCI_IDE_CAST_REG:
-		_rdmsr(IDE_MSR_REG(IDE_CAST), &hi, &lo);
-		conf_data = lo;
-		break;
-	case PCI_IDE_ETC_REG:
-		_rdmsr(IDE_MSR_REG(IDE_ETC), &hi, &lo);
-		conf_data = lo;
-		break;
-	case PCI_IDE_PM_REG:
-		_rdmsr(IDE_MSR_REG(IDE_INTERNAL_PM), &hi, &lo);
-		conf_data = lo;
-		break;
-	default:
-		break;
-	}
-
-	return conf_data;
-}
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_isa.c b/arch/mips/loongson64/common/cs5536/cs5536_isa.c
deleted file mode 100644
index 5ad38f86ee62..000000000000
--- a/arch/mips/loongson64/common/cs5536/cs5536_isa.c
+++ /dev/null
@@ -1,326 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * the ISA Virtual Support Module of AMD CS5536
- *
- * Copyright (C) 2007 Lemote, Inc.
- * Author : jlliu, liujl@lemote.com
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <linux/pci.h>
-#include <cs5536/cs5536.h>
-#include <cs5536/cs5536_pci.h>
-
-/* common variables for PCI_ISA_READ/WRITE_BAR */
-static const u32 divil_msr_reg[6] = {
-	DIVIL_MSR_REG(DIVIL_LBAR_SMB), DIVIL_MSR_REG(DIVIL_LBAR_GPIO),
-	DIVIL_MSR_REG(DIVIL_LBAR_MFGPT), DIVIL_MSR_REG(DIVIL_LBAR_IRQ),
-	DIVIL_MSR_REG(DIVIL_LBAR_PMS), DIVIL_MSR_REG(DIVIL_LBAR_ACPI),
-};
-
-static const u32 soft_bar_flag[6] = {
-	SOFT_BAR_SMB_FLAG, SOFT_BAR_GPIO_FLAG, SOFT_BAR_MFGPT_FLAG,
-	SOFT_BAR_IRQ_FLAG, SOFT_BAR_PMS_FLAG, SOFT_BAR_ACPI_FLAG,
-};
-
-static const u32 sb_msr_reg[6] = {
-	SB_MSR_REG(SB_R0), SB_MSR_REG(SB_R1), SB_MSR_REG(SB_R2),
-	SB_MSR_REG(SB_R3), SB_MSR_REG(SB_R4), SB_MSR_REG(SB_R5),
-};
-
-static const u32 bar_space_range[6] = {
-	CS5536_SMB_RANGE, CS5536_GPIO_RANGE, CS5536_MFGPT_RANGE,
-	CS5536_IRQ_RANGE, CS5536_PMS_RANGE, CS5536_ACPI_RANGE,
-};
-
-static const int bar_space_len[6] = {
-	CS5536_SMB_LENGTH, CS5536_GPIO_LENGTH, CS5536_MFGPT_LENGTH,
-	CS5536_IRQ_LENGTH, CS5536_PMS_LENGTH, CS5536_ACPI_LENGTH,
-};
-
-/*
- * enable the divil module bar space.
- *
- * For all the DIVIL module LBAR, you should control the DIVIL LBAR reg
- * and the RCONFx(0~5) reg to use the modules.
- */
-static void divil_lbar_enable(void)
-{
-	u32 hi, lo;
-	int offset;
-
-	/*
-	 * The DIVIL IRQ is not used yet. and make the RCONF0 reserved.
-	 */
-
-	for (offset = DIVIL_LBAR_SMB; offset <= DIVIL_LBAR_PMS; offset++) {
-		_rdmsr(DIVIL_MSR_REG(offset), &hi, &lo);
-		hi |= 0x01;
-		_wrmsr(DIVIL_MSR_REG(offset), hi, lo);
-	}
-}
-
-/*
- * disable the divil module bar space.
- */
-static void divil_lbar_disable(void)
-{
-	u32 hi, lo;
-	int offset;
-
-	for (offset = DIVIL_LBAR_SMB; offset <= DIVIL_LBAR_PMS; offset++) {
-		_rdmsr(DIVIL_MSR_REG(offset), &hi, &lo);
-		hi &= ~0x01;
-		_wrmsr(DIVIL_MSR_REG(offset), hi, lo);
-	}
-}
-
-/*
- * BAR write: write value to the n BAR
- */
-
-void pci_isa_write_bar(int n, u32 value)
-{
-	u32 hi = 0, lo = value;
-
-	if (value == PCI_BAR_RANGE_MASK) {
-		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-		lo |= soft_bar_flag[n];
-		_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-	} else if (value & 0x01) {
-		/* NATIVE reg */
-		hi = 0x0000f001;
-		lo &= bar_space_range[n];
-		_wrmsr(divil_msr_reg[n], hi, lo);
-
-		/* RCONFx is 4bytes in units for I/O space */
-		hi = ((value & 0x000ffffc) << 12) |
-		    ((bar_space_len[n] - 4) << 12) | 0x01;
-		lo = ((value & 0x000ffffc) << 12) | 0x01;
-		_wrmsr(sb_msr_reg[n], hi, lo);
-	}
-}
-
-/*
- * BAR read: read the n BAR
- */
-
-u32 pci_isa_read_bar(int n)
-{
-	u32 conf_data = 0;
-	u32 hi, lo;
-
-	_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-	if (lo & soft_bar_flag[n]) {
-		conf_data = bar_space_range[n] | PCI_BASE_ADDRESS_SPACE_IO;
-		lo &= ~soft_bar_flag[n];
-		_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-	} else {
-		_rdmsr(divil_msr_reg[n], &hi, &lo);
-		conf_data = lo & bar_space_range[n];
-		conf_data |= 0x01;
-		conf_data &= ~0x02;
-	}
-	return conf_data;
-}
-
-/*
- * isa_write: ISA write transfer
- *
- * We assume that this is not a bus master transfer.
- */
-void pci_isa_write_reg(int reg, u32 value)
-{
-	u32 hi = 0, lo = value;
-	u32 temp;
-
-	switch (reg) {
-	case PCI_COMMAND:
-		if (value & PCI_COMMAND_IO)
-			divil_lbar_enable();
-		else
-			divil_lbar_disable();
-		break;
-	case PCI_STATUS:
-		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-		temp = lo & 0x0000ffff;
-		if ((value & PCI_STATUS_SIG_TARGET_ABORT) &&
-		    (lo & SB_TAS_ERR_EN))
-			temp |= SB_TAS_ERR_FLAG;
-
-		if ((value & PCI_STATUS_REC_TARGET_ABORT) &&
-		    (lo & SB_TAR_ERR_EN))
-			temp |= SB_TAR_ERR_FLAG;
-
-		if ((value & PCI_STATUS_REC_MASTER_ABORT)
-		    && (lo & SB_MAR_ERR_EN))
-			temp |= SB_MAR_ERR_FLAG;
-
-		if ((value & PCI_STATUS_DETECTED_PARITY)
-		    && (lo & SB_PARE_ERR_EN))
-			temp |= SB_PARE_ERR_FLAG;
-
-		lo = temp;
-		_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
-		break;
-	case PCI_CACHE_LINE_SIZE:
-		value &= 0x0000ff00;
-		_rdmsr(SB_MSR_REG(SB_CTRL), &hi, &lo);
-		hi &= 0xffffff00;
-		hi |= (value >> 8);
-		_wrmsr(SB_MSR_REG(SB_CTRL), hi, lo);
-		break;
-	case PCI_BAR0_REG:
-		pci_isa_write_bar(0, value);
-		break;
-	case PCI_BAR1_REG:
-		pci_isa_write_bar(1, value);
-		break;
-	case PCI_BAR2_REG:
-		pci_isa_write_bar(2, value);
-		break;
-	case PCI_BAR3_REG:
-		pci_isa_write_bar(3, value);
-		break;
-	case PCI_BAR4_REG:
-		pci_isa_write_bar(4, value);
-		break;
-	case PCI_BAR5_REG:
-		pci_isa_write_bar(5, value);
-		break;
-	case PCI_UART1_INT_REG:
-		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_HIGH), &hi, &lo);
-		/* disable uart1 interrupt in PIC */
-		lo &= ~(0xf << 24);
-		if (value)	/* enable uart1 interrupt in PIC */
-			lo |= (CS5536_UART1_INTR << 24);
-		_wrmsr(DIVIL_MSR_REG(PIC_YSEL_HIGH), hi, lo);
-		break;
-	case PCI_UART2_INT_REG:
-		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_HIGH), &hi, &lo);
-		/* disable uart2 interrupt in PIC */
-		lo &= ~(0xf << 28);
-		if (value)	/* enable uart2 interrupt in PIC */
-			lo |= (CS5536_UART2_INTR << 28);
-		_wrmsr(DIVIL_MSR_REG(PIC_YSEL_HIGH), hi, lo);
-		break;
-	case PCI_ISA_FIXUP_REG:
-		if (value) {
-			/* enable the TARGET ABORT/MASTER ABORT etc. */
-			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-			lo |= 0x00000063;
-			_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
-		}
-
-	default:
-		/* ALL OTHER PCI CONFIG SPACE HEADER IS NOT IMPLEMENTED. */
-		break;
-	}
-}
-
-/*
- * isa_read: ISA read transfers
- *
- * We assume that this is not a bus master transfer.
- */
-u32 pci_isa_read_reg(int reg)
-{
-	u32 conf_data = 0;
-	u32 hi, lo;
-
-	switch (reg) {
-	case PCI_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_ISA_DEVICE_ID, CS5536_VENDOR_ID);
-		break;
-	case PCI_COMMAND:
-		/* we just check the first LBAR for the IO enable bit, */
-		/* maybe we should changed later. */
-		_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_SMB), &hi, &lo);
-		if (hi & 0x01)
-			conf_data |= PCI_COMMAND_IO;
-		break;
-	case PCI_STATUS:
-		conf_data |= PCI_STATUS_66MHZ;
-		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
-		conf_data |= PCI_STATUS_FAST_BACK;
-
-		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-		if (lo & SB_TAS_ERR_FLAG)
-			conf_data |= PCI_STATUS_SIG_TARGET_ABORT;
-		if (lo & SB_TAR_ERR_FLAG)
-			conf_data |= PCI_STATUS_REC_TARGET_ABORT;
-		if (lo & SB_MAR_ERR_FLAG)
-			conf_data |= PCI_STATUS_REC_MASTER_ABORT;
-		if (lo & SB_PARE_ERR_FLAG)
-			conf_data |= PCI_STATUS_DETECTED_PARITY;
-		break;
-	case PCI_CLASS_REVISION:
-		_rdmsr(GLCP_MSR_REG(GLCP_CHIP_REV_ID), &hi, &lo);
-		conf_data = lo & 0x000000ff;
-		conf_data |= (CS5536_ISA_CLASS_CODE << 8);
-		break;
-	case PCI_CACHE_LINE_SIZE:
-		_rdmsr(SB_MSR_REG(SB_CTRL), &hi, &lo);
-		hi &= 0x000000f8;
-		conf_data = CFG_PCI_CACHE_LINE_SIZE(PCI_BRIDGE_HEADER_TYPE, hi);
-		break;
-		/*
-		 * we only use the LBAR of DIVIL, no RCONF used.
-		 * all of them are IO space.
-		 */
-	case PCI_BAR0_REG:
-		return pci_isa_read_bar(0);
-		break;
-	case PCI_BAR1_REG:
-		return pci_isa_read_bar(1);
-		break;
-	case PCI_BAR2_REG:
-		return pci_isa_read_bar(2);
-		break;
-	case PCI_BAR3_REG:
-		break;
-	case PCI_BAR4_REG:
-		return pci_isa_read_bar(4);
-		break;
-	case PCI_BAR5_REG:
-		return pci_isa_read_bar(5);
-		break;
-	case PCI_CARDBUS_CIS:
-		conf_data = PCI_CARDBUS_CIS_POINTER;
-		break;
-	case PCI_SUBSYSTEM_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_ISA_SUB_ID, CS5536_SUB_VENDOR_ID);
-		break;
-	case PCI_ROM_ADDRESS:
-		conf_data = PCI_EXPANSION_ROM_BAR;
-		break;
-	case PCI_CAPABILITY_LIST:
-		conf_data = PCI_CAPLIST_POINTER;
-		break;
-	case PCI_INTERRUPT_LINE:
-		/* no interrupt used here */
-		conf_data = CFG_PCI_INTERRUPT_LINE(0x00, 0x00);
-		break;
-	default:
-		break;
-	}
-
-	return conf_data;
-}
-
-/*
- * The mfgpt timer interrupt is running early, so we must keep the south bridge
- * mmio always enabled. Otherwise we may race with the PCI configuration which
- * may temporarily disable it. When that happens and the timer interrupt fires,
- * we are not able to clear it and the system will hang.
- */
-static void cs5536_isa_mmio_always_on(struct pci_dev *dev)
-{
-	dev->mmio_always_on = 1;
-}
-DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA,
-	PCI_CLASS_BRIDGE_ISA, 8, cs5536_isa_mmio_always_on);
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
deleted file mode 100644
index 30af1b7c7529..000000000000
--- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
+++ /dev/null
@@ -1,207 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * CS5536 General timer functions
- *
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Yanhua, yanh@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu zhangjin, wuzhangjin@gmail.com
- *
- * Reference: AMD Geode(TM) CS5536 Companion Device Data Book
- */
-
-#include <linux/io.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/jiffies.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/clockchips.h>
-
-#include <asm/time.h>
-
-#include <cs5536/cs5536_mfgpt.h>
-
-static DEFINE_RAW_SPINLOCK(mfgpt_lock);
-
-static u32 mfgpt_base;
-
-/*
- * Initialize the MFGPT timer.
- *
- * This is also called after resume to bring the MFGPT into operation again.
- */
-
-/* disable counter */
-void disable_mfgpt0_counter(void)
-{
-	outw(inw(MFGPT0_SETUP) & 0x7fff, MFGPT0_SETUP);
-}
-EXPORT_SYMBOL(disable_mfgpt0_counter);
-
-/* enable counter, comparator2 to event mode, 14.318MHz clock */
-void enable_mfgpt0_counter(void)
-{
-	outw(0xe310, MFGPT0_SETUP);
-}
-EXPORT_SYMBOL(enable_mfgpt0_counter);
-
-static int mfgpt_timer_set_periodic(struct clock_event_device *evt)
-{
-	raw_spin_lock(&mfgpt_lock);
-
-	outw(COMPARE, MFGPT0_CMP2);	/* set comparator2 */
-	outw(0, MFGPT0_CNT);		/* set counter to 0 */
-	enable_mfgpt0_counter();
-
-	raw_spin_unlock(&mfgpt_lock);
-	return 0;
-}
-
-static int mfgpt_timer_shutdown(struct clock_event_device *evt)
-{
-	if (clockevent_state_periodic(evt) || clockevent_state_oneshot(evt)) {
-		raw_spin_lock(&mfgpt_lock);
-		disable_mfgpt0_counter();
-		raw_spin_unlock(&mfgpt_lock);
-	}
-
-	return 0;
-}
-
-static struct clock_event_device mfgpt_clockevent = {
-	.name = "mfgpt",
-	.features = CLOCK_EVT_FEAT_PERIODIC,
-
-	/* The oneshot mode have very high deviation, don't use it! */
-	.set_state_shutdown = mfgpt_timer_shutdown,
-	.set_state_periodic = mfgpt_timer_set_periodic,
-	.irq = CS5536_MFGPT_INTR,
-};
-
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
-	u32 basehi;
-
-	/*
-	 * get MFGPT base address
-	 *
-	 * NOTE: do not remove me, it's need for the value of mfgpt_base is
-	 * variable
-	 */
-	_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_MFGPT), &basehi, &mfgpt_base);
-
-	/* ack */
-	outw(inw(MFGPT0_SETUP) | 0x4000, MFGPT0_SETUP);
-
-	mfgpt_clockevent.event_handler(&mfgpt_clockevent);
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq5 = {
-	.handler = timer_interrupt,
-	.flags = IRQF_NOBALANCING | IRQF_TIMER,
-	.name = "timer"
-};
-
-/*
- * Initialize the conversion factor and the min/max deltas of the clock event
- * structure and register the clock event source with the framework.
- */
-void __init setup_mfgpt0_timer(void)
-{
-	u32 basehi;
-	struct clock_event_device *cd = &mfgpt_clockevent;
-	unsigned int cpu = smp_processor_id();
-
-	cd->cpumask = cpumask_of(cpu);
-	clockevent_set_clock(cd, MFGPT_TICK_RATE);
-	cd->max_delta_ns = clockevent_delta2ns(0xffff, cd);
-	cd->max_delta_ticks = 0xffff;
-	cd->min_delta_ns = clockevent_delta2ns(0xf, cd);
-	cd->min_delta_ticks = 0xf;
-
-	/* Enable MFGPT0 Comparator 2 Output to the Interrupt Mapper */
-	_wrmsr(DIVIL_MSR_REG(MFGPT_IRQ), 0, 0x100);
-
-	/* Enable Interrupt Gate 5 */
-	_wrmsr(DIVIL_MSR_REG(PIC_ZSEL_LOW), 0, 0x50000);
-
-	/* get MFGPT base address */
-	_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_MFGPT), &basehi, &mfgpt_base);
-
-	clockevents_register_device(cd);
-
-	setup_irq(CS5536_MFGPT_INTR, &irq5);
-}
-
-/*
- * Since the MFGPT overflows every tick, its not very useful
- * to just read by itself. So use jiffies to emulate a free
- * running counter:
- */
-static u64 mfgpt_read(struct clocksource *cs)
-{
-	unsigned long flags;
-	int count;
-	u32 jifs;
-	static int old_count;
-	static u32 old_jifs;
-
-	raw_spin_lock_irqsave(&mfgpt_lock, flags);
-	/*
-	 * Although our caller may have the read side of xtime_lock,
-	 * this is now a seqlock, and we are cheating in this routine
-	 * by having side effects on state that we cannot undo if
-	 * there is a collision on the seqlock and our caller has to
-	 * retry.  (Namely, old_jifs and old_count.)  So we must treat
-	 * jiffies as volatile despite the lock.  We read jiffies
-	 * before latching the timer count to guarantee that although
-	 * the jiffies value might be older than the count (that is,
-	 * the counter may underflow between the last point where
-	 * jiffies was incremented and the point where we latch the
-	 * count), it cannot be newer.
-	 */
-	jifs = jiffies;
-	/* read the count */
-	count = inw(MFGPT0_CNT);
-
-	/*
-	 * It's possible for count to appear to go the wrong way for this
-	 * reason:
-	 *
-	 *  The timer counter underflows, but we haven't handled the resulting
-	 *  interrupt and incremented jiffies yet.
-	 *
-	 * Previous attempts to handle these cases intelligently were buggy, so
-	 * we just do the simple thing now.
-	 */
-	if (count < old_count && jifs == old_jifs)
-		count = old_count;
-
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_spin_unlock_irqrestore(&mfgpt_lock, flags);
-
-	return (u64) (jifs * COMPARE) + count;
-}
-
-static struct clocksource clocksource_mfgpt = {
-	.name = "mfgpt",
-	.rating = 120, /* Functional for real use, but not desired */
-	.read = mfgpt_read,
-	.mask = CLOCKSOURCE_MASK(32),
-};
-
-int __init init_mfgpt_clocksource(void)
-{
-	if (num_possible_cpus() > 1)	/* MFGPT does not scale! */
-		return 0;
-
-	return clocksource_register_hz(&clocksource_mfgpt, MFGPT_TICK_RATE);
-}
-
-arch_initcall(init_mfgpt_clocksource);
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c b/arch/mips/loongson64/common/cs5536/cs5536_ohci.c
deleted file mode 100644
index 71a52b120317..000000000000
--- a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c
+++ /dev/null
@@ -1,145 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * the OHCI Virtual Support Module of AMD CS5536
- *
- * Copyright (C) 2007 Lemote, Inc.
- * Author : jlliu, liujl@lemote.com
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <cs5536/cs5536.h>
-#include <cs5536/cs5536_pci.h>
-
-void pci_ohci_write_reg(int reg, u32 value)
-{
-	u32 hi = 0, lo = value;
-
-	switch (reg) {
-	case PCI_COMMAND:
-		_rdmsr(USB_MSR_REG(USB_OHCI), &hi, &lo);
-		if (value & PCI_COMMAND_MASTER)
-			hi |= PCI_COMMAND_MASTER;
-		else
-			hi &= ~PCI_COMMAND_MASTER;
-
-		if (value & PCI_COMMAND_MEMORY)
-			hi |= PCI_COMMAND_MEMORY;
-		else
-			hi &= ~PCI_COMMAND_MEMORY;
-		_wrmsr(USB_MSR_REG(USB_OHCI), hi, lo);
-		break;
-	case PCI_STATUS:
-		if (value & PCI_STATUS_PARITY) {
-			_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-			if (lo & SB_PARE_ERR_FLAG) {
-				lo = (lo & 0x0000ffff) | SB_PARE_ERR_FLAG;
-				_wrmsr(SB_MSR_REG(SB_ERROR), hi, lo);
-			}
-		}
-		break;
-	case PCI_BAR0_REG:
-		if (value == PCI_BAR_RANGE_MASK) {
-			_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-			lo |= SOFT_BAR_OHCI_FLAG;
-			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-		} else if ((value & 0x01) == 0x00) {
-			_rdmsr(USB_MSR_REG(USB_OHCI), &hi, &lo);
-			lo = value;
-			_wrmsr(USB_MSR_REG(USB_OHCI), hi, lo);
-
-			value &= 0xfffffff0;
-			hi = 0x40000000 | ((value & 0xff000000) >> 24);
-			lo = 0x000fffff | ((value & 0x00fff000) << 8);
-			_wrmsr(GLIU_MSR_REG(GLIU_P2D_BM3), hi, lo);
-		}
-		break;
-	case PCI_OHCI_INT_REG:
-		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), &hi, &lo);
-		lo &= ~(0xf << PIC_YSEL_LOW_USB_SHIFT);
-		if (value)	/* enable all the usb interrupt in PIC */
-			lo |= (CS5536_USB_INTR << PIC_YSEL_LOW_USB_SHIFT);
-		_wrmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), hi, lo);
-		break;
-	default:
-		break;
-	}
-}
-
-u32 pci_ohci_read_reg(int reg)
-{
-	u32 conf_data = 0;
-	u32 hi, lo;
-
-	switch (reg) {
-	case PCI_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_OHCI_DEVICE_ID, CS5536_VENDOR_ID);
-		break;
-	case PCI_COMMAND:
-		_rdmsr(USB_MSR_REG(USB_OHCI), &hi, &lo);
-		if (hi & PCI_COMMAND_MASTER)
-			conf_data |= PCI_COMMAND_MASTER;
-		if (hi & PCI_COMMAND_MEMORY)
-			conf_data |= PCI_COMMAND_MEMORY;
-		break;
-	case PCI_STATUS:
-		conf_data |= PCI_STATUS_66MHZ;
-		conf_data |= PCI_STATUS_FAST_BACK;
-		_rdmsr(SB_MSR_REG(SB_ERROR), &hi, &lo);
-		if (lo & SB_PARE_ERR_FLAG)
-			conf_data |= PCI_STATUS_PARITY;
-		conf_data |= PCI_STATUS_DEVSEL_MEDIUM;
-		break;
-	case PCI_CLASS_REVISION:
-		_rdmsr(USB_MSR_REG(USB_CAP), &hi, &lo);
-		conf_data = lo & 0x000000ff;
-		conf_data |= (CS5536_OHCI_CLASS_CODE << 8);
-		break;
-	case PCI_CACHE_LINE_SIZE:
-		conf_data =
-		    CFG_PCI_CACHE_LINE_SIZE(PCI_NORMAL_HEADER_TYPE,
-					    PCI_NORMAL_LATENCY_TIMER);
-		break;
-	case PCI_BAR0_REG:
-		_rdmsr(GLCP_MSR_REG(GLCP_SOFT_COM), &hi, &lo);
-		if (lo & SOFT_BAR_OHCI_FLAG) {
-			conf_data = CS5536_OHCI_RANGE |
-			    PCI_BASE_ADDRESS_SPACE_MEMORY;
-			lo &= ~SOFT_BAR_OHCI_FLAG;
-			_wrmsr(GLCP_MSR_REG(GLCP_SOFT_COM), hi, lo);
-		} else {
-			_rdmsr(USB_MSR_REG(USB_OHCI), &hi, &lo);
-			conf_data = lo & 0xffffff00;
-			conf_data &= ~0x0000000f;	/* 32bit mem */
-		}
-		break;
-	case PCI_CARDBUS_CIS:
-		conf_data = PCI_CARDBUS_CIS_POINTER;
-		break;
-	case PCI_SUBSYSTEM_VENDOR_ID:
-		conf_data =
-		    CFG_PCI_VENDOR_ID(CS5536_OHCI_SUB_ID, CS5536_SUB_VENDOR_ID);
-		break;
-	case PCI_ROM_ADDRESS:
-		conf_data = PCI_EXPANSION_ROM_BAR;
-		break;
-	case PCI_CAPABILITY_LIST:
-		conf_data = PCI_CAPLIST_USB_POINTER;
-		break;
-	case PCI_INTERRUPT_LINE:
-		conf_data =
-		    CFG_PCI_INTERRUPT_LINE(PCI_DEFAULT_PIN, CS5536_USB_INTR);
-		break;
-	case PCI_OHCI_INT_REG:
-		_rdmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), &hi, &lo);
-		if (((lo >> PIC_YSEL_LOW_USB_SHIFT) & 0xf) == CS5536_USB_INTR)
-			conf_data = 1;
-		break;
-	default:
-		break;
-	}
-
-	return conf_data;
-}
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_pci.c b/arch/mips/loongson64/common/cs5536/cs5536_pci.c
deleted file mode 100644
index 202c89b568ba..000000000000
--- a/arch/mips/loongson64/common/cs5536/cs5536_pci.c
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * read/write operation to the PCI config space of CS5536
- *
- * Copyright (C) 2007 Lemote, Inc.
- * Author : jlliu, liujl@lemote.com
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- *
- *	the Virtual Support Module(VSM) for virtulizing the PCI
- *	configure space are defined in cs5536_modulename.c respectively,
- *
- *	after this virtulizing, user can access the PCI configure space
- *	directly as a normal multi-function PCI device which follows
- *	the PCI-2.2 spec.
- */
-
-#include <linux/types.h>
-#include <cs5536/cs5536_pci.h>
-#include <cs5536/cs5536_vsm.h>
-
-enum {
-	CS5536_FUNC_START = -1,
-	CS5536_ISA_FUNC,
-	reserved_func,
-	CS5536_IDE_FUNC,
-	CS5536_ACC_FUNC,
-	CS5536_OHCI_FUNC,
-	CS5536_EHCI_FUNC,
-	CS5536_FUNC_END,
-};
-
-static const cs5536_pci_vsm_write vsm_conf_write[] = {
-	[CS5536_ISA_FUNC]	= pci_isa_write_reg,
-	[reserved_func]		= NULL,
-	[CS5536_IDE_FUNC]	= pci_ide_write_reg,
-	[CS5536_ACC_FUNC]	= pci_acc_write_reg,
-	[CS5536_OHCI_FUNC]	= pci_ohci_write_reg,
-	[CS5536_EHCI_FUNC]	= pci_ehci_write_reg,
-};
-
-static const cs5536_pci_vsm_read vsm_conf_read[] = {
-	[CS5536_ISA_FUNC]	= pci_isa_read_reg,
-	[reserved_func]		= NULL,
-	[CS5536_IDE_FUNC]	= pci_ide_read_reg,
-	[CS5536_ACC_FUNC]	= pci_acc_read_reg,
-	[CS5536_OHCI_FUNC]	= pci_ohci_read_reg,
-	[CS5536_EHCI_FUNC]	= pci_ehci_read_reg,
-};
-
-/*
- * write to PCI config space and transfer it to MSR write.
- */
-void cs5536_pci_conf_write4(int function, int reg, u32 value)
-{
-	if ((function <= CS5536_FUNC_START) || (function >= CS5536_FUNC_END))
-		return;
-	if ((reg < 0) || (reg > 0x100) || ((reg & 0x03) != 0))
-		return;
-
-	if (vsm_conf_write[function] != NULL)
-		vsm_conf_write[function](reg, value);
-}
-
-/*
- * read PCI config space and transfer it to MSR access.
- */
-u32 cs5536_pci_conf_read4(int function, int reg)
-{
-	u32 data = 0;
-
-	if ((function <= CS5536_FUNC_START) || (function >= CS5536_FUNC_END))
-		return 0;
-	if ((reg < 0) || ((reg & 0x03) != 0))
-		return 0;
-	if (reg > 0x100)
-		return 0xffffffff;
-
-	if (vsm_conf_read[function] != NULL)
-		data = vsm_conf_read[function](reg);
-
-	return data;
-}
diff --git a/arch/mips/loongson64/fuloong-2e/Makefile b/arch/mips/loongson64/fuloong-2e/Makefile
deleted file mode 100644
index bb58edb3bea7..000000000000
--- a/arch/mips/loongson64/fuloong-2e/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for Lemote Fuloong2e mini-PC board.
-#
-
-obj-y += irq.o reset.o dma.o
diff --git a/arch/mips/loongson64/fuloong-2e/dma.c b/arch/mips/loongson64/fuloong-2e/dma.c
deleted file mode 100644
index e122292bf666..000000000000
--- a/arch/mips/loongson64/fuloong-2e/dma.c
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/dma-direct.h>
-
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
-	return paddr | 0x80000000;
-}
-
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
-{
-	return dma_addr & 0x7fffffff;
-}
diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson64/fuloong-2e/irq.c
deleted file mode 100644
index 32278e7bf85c..000000000000
--- a/arch/mips/loongson64/fuloong-2e/irq.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- */
-#include <linux/interrupt.h>
-
-#include <asm/irq_cpu.h>
-#include <asm/i8259.h>
-
-#include <loongson.h>
-
-static void i8259_irqdispatch(void)
-{
-	int irq;
-
-	irq = i8259_irq();
-	if (irq >= 0)
-		do_IRQ(irq);
-	else
-		spurious_interrupt();
-}
-
-asmlinkage void mach_irq_dispatch(unsigned int pending)
-{
-	if (pending & CAUSEF_IP7)
-		do_IRQ(MIPS_CPU_IRQ_BASE + 7);
-	else if (pending & CAUSEF_IP6) /* perf counter loverflow */
-		do_perfcnt_IRQ();
-	else if (pending & CAUSEF_IP5)
-		i8259_irqdispatch();
-	else if (pending & CAUSEF_IP2)
-		bonito_irqdispatch();
-	else
-		spurious_interrupt();
-}
-
-static struct irqaction cascade_irqaction = {
-	.handler = no_action,
-	.name = "cascade",
-	.flags = IRQF_NO_THREAD,
-};
-
-void __init mach_init_irq(void)
-{
-	/* init all controller
-	 *   0-15	  ------> i8259 interrupt
-	 *   16-23	  ------> mips cpu interrupt
-	 *   32-63	  ------> bonito irq
-	 */
-
-	/* most bonito irq should be level triggered */
-	LOONGSON_INTEDGE = LOONGSON_ICU_SYSTEMERR | LOONGSON_ICU_MASTERERR |
-	    LOONGSON_ICU_RETRYERR | LOONGSON_ICU_MBOXES;
-
-	/* Sets the first-level interrupt dispatcher. */
-	mips_cpu_irq_init();
-	init_i8259_irqs();
-	bonito_irq_init();
-
-	/* bonito irq at IP2 */
-	setup_irq(MIPS_CPU_IRQ_BASE + 2, &cascade_irqaction);
-	/* 8259 irq at IP5 */
-	setup_irq(MIPS_CPU_IRQ_BASE + 5, &cascade_irqaction);
-}
diff --git a/arch/mips/loongson64/fuloong-2e/reset.c b/arch/mips/loongson64/fuloong-2e/reset.c
deleted file mode 100644
index 8273de1cf4bb..000000000000
--- a/arch/mips/loongson64/fuloong-2e/reset.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* Board-specific reboot/shutdown routines
- * Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <loongson.h>
-
-void mach_prepare_reboot(void)
-{
-	LOONGSON_GENCFG &= ~(1 << 2);
-	LOONGSON_GENCFG |= (1 << 2);
-}
-
-void mach_prepare_shutdown(void)
-{
-}
diff --git a/arch/mips/loongson64/lemote-2f/Makefile b/arch/mips/loongson64/lemote-2f/Makefile
deleted file mode 100644
index 881a0ec06d1f..000000000000
--- a/arch/mips/loongson64/lemote-2f/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for lemote loongson2f family machines
-#
-
-obj-y += clock.o machtype.o irq.o reset.o dma.o ec_kb3310b.o
-
-#
-# Suspend Support
-#
-
-obj-$(CONFIG_SUSPEND) += pm.o
diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c
deleted file mode 100644
index 8281334df9c8..000000000000
--- a/arch/mips/loongson64/lemote-2f/clock.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology
- * Author: Yanhua, yanh@lemote.com
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/clk.h>
-#include <linux/cpufreq.h>
-#include <linux/errno.h>
-#include <linux/export.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-
-#include <asm/clock.h>
-#include <asm/mach-loongson64/loongson.h>
-
-static LIST_HEAD(clock_list);
-static DEFINE_SPINLOCK(clock_lock);
-static DEFINE_MUTEX(clock_list_sem);
-
-/* Minimum CLK support */
-enum {
-	DC_ZERO, DC_25PT = 2, DC_37PT, DC_50PT, DC_62PT, DC_75PT,
-	DC_87PT, DC_DISABLE, DC_RESV
-};
-
-struct cpufreq_frequency_table loongson2_clockmod_table[] = {
-	{0, DC_RESV, CPUFREQ_ENTRY_INVALID},
-	{0, DC_ZERO, CPUFREQ_ENTRY_INVALID},
-	{0, DC_25PT, 0},
-	{0, DC_37PT, 0},
-	{0, DC_50PT, 0},
-	{0, DC_62PT, 0},
-	{0, DC_75PT, 0},
-	{0, DC_87PT, 0},
-	{0, DC_DISABLE, 0},
-	{0, DC_RESV, CPUFREQ_TABLE_END},
-};
-EXPORT_SYMBOL_GPL(loongson2_clockmod_table);
-
-static struct clk cpu_clk = {
-	.name = "cpu_clk",
-	.flags = CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
-	.rate = 800000000,
-};
-
-struct clk *clk_get(struct device *dev, const char *id)
-{
-	return &cpu_clk;
-}
-EXPORT_SYMBOL(clk_get);
-
-static void propagate_rate(struct clk *clk)
-{
-	struct clk *clkp;
-
-	list_for_each_entry(clkp, &clock_list, node) {
-		if (likely(clkp->parent != clk))
-			continue;
-		if (likely(clkp->ops && clkp->ops->recalc))
-			clkp->ops->recalc(clkp);
-		if (unlikely(clkp->flags & CLK_RATE_PROPAGATES))
-			propagate_rate(clkp);
-	}
-}
-
-int clk_enable(struct clk *clk)
-{
-	return 0;
-}
-EXPORT_SYMBOL(clk_enable);
-
-void clk_disable(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_disable);
-
-unsigned long clk_get_rate(struct clk *clk)
-{
-	if (!clk)
-		return 0;
-
-	return (unsigned long)clk->rate;
-}
-EXPORT_SYMBOL(clk_get_rate);
-
-void clk_put(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_put);
-
-int clk_set_rate(struct clk *clk, unsigned long rate)
-{
-	unsigned int rate_khz = rate / 1000;
-	struct cpufreq_frequency_table *pos;
-	int ret = 0;
-	int regval;
-
-	if (likely(clk->ops && clk->ops->set_rate)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&clock_lock, flags);
-		ret = clk->ops->set_rate(clk, rate, 0);
-		spin_unlock_irqrestore(&clock_lock, flags);
-	}
-
-	if (unlikely(clk->flags & CLK_RATE_PROPAGATES))
-		propagate_rate(clk);
-
-	cpufreq_for_each_valid_entry(pos, loongson2_clockmod_table)
-		if (rate_khz == pos->frequency)
-			break;
-	if (rate_khz != pos->frequency)
-		return -ENOTSUPP;
-
-	clk->rate = rate;
-
-	regval = LOONGSON_CHIPCFG(0);
-	regval = (regval & ~0x7) | (pos->driver_data - 1);
-	LOONGSON_CHIPCFG(0) = regval;
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(clk_set_rate);
-
-long clk_round_rate(struct clk *clk, unsigned long rate)
-{
-	if (likely(clk->ops && clk->ops->round_rate)) {
-		unsigned long flags, rounded;
-
-		spin_lock_irqsave(&clock_lock, flags);
-		rounded = clk->ops->round_rate(clk, rate);
-		spin_unlock_irqrestore(&clock_lock, flags);
-
-		return rounded;
-	}
-
-	return rate;
-}
-EXPORT_SYMBOL_GPL(clk_round_rate);
diff --git a/arch/mips/loongson64/lemote-2f/dma.c b/arch/mips/loongson64/lemote-2f/dma.c
deleted file mode 100644
index abf0e39d7e46..000000000000
--- a/arch/mips/loongson64/lemote-2f/dma.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/dma-direct.h>
-
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
-	return paddr | 0x80000000;
-}
-
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
-{
-	if (dma_addr > 0x8fffffff)
-		return dma_addr;
-	return dma_addr & 0x0fffffff;
-}
diff --git a/arch/mips/loongson64/lemote-2f/ec_kb3310b.c b/arch/mips/loongson64/lemote-2f/ec_kb3310b.c
deleted file mode 100644
index d138220e96a2..000000000000
--- a/arch/mips/loongson64/lemote-2f/ec_kb3310b.c
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Basic KB3310B Embedded Controller support for the YeeLoong 2F netbook
- *
- *  Copyright (C) 2008 Lemote Inc.
- *  Author: liujl <liujl@lemote.com>, 2008-04-20
- */
-
-#include <linux/io.h>
-#include <linux/export.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-
-#include "ec_kb3310b.h"
-
-static DEFINE_SPINLOCK(index_access_lock);
-static DEFINE_SPINLOCK(port_access_lock);
-
-unsigned char ec_read(unsigned short addr)
-{
-	unsigned char value;
-	unsigned long flags;
-
-	spin_lock_irqsave(&index_access_lock, flags);
-	outb((addr & 0xff00) >> 8, EC_IO_PORT_HIGH);
-	outb((addr & 0x00ff), EC_IO_PORT_LOW);
-	value = inb(EC_IO_PORT_DATA);
-	spin_unlock_irqrestore(&index_access_lock, flags);
-
-	return value;
-}
-EXPORT_SYMBOL_GPL(ec_read);
-
-void ec_write(unsigned short addr, unsigned char val)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&index_access_lock, flags);
-	outb((addr & 0xff00) >> 8, EC_IO_PORT_HIGH);
-	outb((addr & 0x00ff), EC_IO_PORT_LOW);
-	outb(val, EC_IO_PORT_DATA);
-	/*  flush the write action */
-	inb(EC_IO_PORT_DATA);
-	spin_unlock_irqrestore(&index_access_lock, flags);
-}
-EXPORT_SYMBOL_GPL(ec_write);
-
-/*
- * This function is used for EC command writes and corresponding status queries.
- */
-int ec_query_seq(unsigned char cmd)
-{
-	int timeout;
-	unsigned char status;
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&port_access_lock, flags);
-
-	/* make chip goto reset mode */
-	udelay(EC_REG_DELAY);
-	outb(cmd, EC_CMD_PORT);
-	udelay(EC_REG_DELAY);
-
-	/* check if the command is received by ec */
-	timeout = EC_CMD_TIMEOUT;
-	status = inb(EC_STS_PORT);
-	while (timeout-- && (status & (1 << 1))) {
-		status = inb(EC_STS_PORT);
-		udelay(EC_REG_DELAY);
-	}
-
-	spin_unlock_irqrestore(&port_access_lock, flags);
-
-	if (timeout <= 0) {
-		printk(KERN_ERR "%s: deadable error : timeout...\n", __func__);
-		ret = -EINVAL;
-	} else
-		printk(KERN_INFO
-			   "(%x/%d)ec issued command %d status : 0x%x\n",
-			   timeout, EC_CMD_TIMEOUT - timeout, cmd, status);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ec_query_seq);
-
-/*
- * Send query command to EC to get the proper event number
- */
-int ec_query_event_num(void)
-{
-	return ec_query_seq(CMD_GET_EVENT_NUM);
-}
-EXPORT_SYMBOL(ec_query_event_num);
-
-/*
- * Get event number from EC
- *
- * NOTE: This routine must follow the query_event_num function in the
- * interrupt.
- */
-int ec_get_event_num(void)
-{
-	int timeout = 100;
-	unsigned char value;
-	unsigned char status;
-
-	udelay(EC_REG_DELAY);
-	status = inb(EC_STS_PORT);
-	udelay(EC_REG_DELAY);
-	while (timeout-- && !(status & (1 << 0))) {
-		status = inb(EC_STS_PORT);
-		udelay(EC_REG_DELAY);
-	}
-	if (timeout <= 0) {
-		pr_info("%s: get event number timeout.\n", __func__);
-
-		return -EINVAL;
-	}
-	value = inb(EC_DAT_PORT);
-	udelay(EC_REG_DELAY);
-
-	return value;
-}
-EXPORT_SYMBOL(ec_get_event_num);
diff --git a/arch/mips/loongson64/lemote-2f/ec_kb3310b.h b/arch/mips/loongson64/lemote-2f/ec_kb3310b.h
deleted file mode 100644
index aecdbc9c875a..000000000000
--- a/arch/mips/loongson64/lemote-2f/ec_kb3310b.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * KB3310B Embedded Controller
- *
- *  Copyright (C) 2008 Lemote Inc.
- *  Author: liujl <liujl@lemote.com>, 2008-03-14
- */
-
-#ifndef _EC_KB3310B_H
-#define _EC_KB3310B_H
-
-extern unsigned char ec_read(unsigned short addr);
-extern void ec_write(unsigned short addr, unsigned char val);
-extern int ec_query_seq(unsigned char cmd);
-extern int ec_query_event_num(void);
-extern int ec_get_event_num(void);
-
-typedef int (*sci_handler) (int status);
-extern sci_handler yeeloong_report_lid_status;
-
-#define SCI_IRQ_NUM 0x0A
-
-/*
- * The following registers are determined by the EC index configuration.
- * 1, fill the PORT_HIGH as EC register high part.
- * 2, fill the PORT_LOW as EC register low part.
- * 3, fill the PORT_DATA as EC register write data or get the data from it.
- */
-#define EC_IO_PORT_HIGH 0x0381
-#define EC_IO_PORT_LOW	0x0382
-#define EC_IO_PORT_DATA 0x0383
-
-/*
- * EC delay time is 500us for register and status access
- */
-#define EC_REG_DELAY	500	/* unit : us */
-#define EC_CMD_TIMEOUT	0x1000
-
-/*
- * EC access port for SCI communication
- */
-#define EC_CMD_PORT		0x66
-#define EC_STS_PORT		0x66
-#define EC_DAT_PORT		0x62
-#define CMD_INIT_IDLE_MODE	0xdd
-#define CMD_EXIT_IDLE_MODE	0xdf
-#define CMD_INIT_RESET_MODE	0xd8
-#define CMD_REBOOT_SYSTEM	0x8c
-#define CMD_GET_EVENT_NUM	0x84
-#define CMD_PROGRAM_PIECE	0xda
-
-/* temperature & fan registers */
-#define REG_TEMPERATURE_VALUE	0xF458
-#define REG_FAN_AUTO_MAN_SWITCH 0xF459
-#define BIT_FAN_AUTO		0
-#define BIT_FAN_MANUAL		1
-#define REG_FAN_CONTROL		0xF4D2
-#define BIT_FAN_CONTROL_ON	(1 << 0)
-#define BIT_FAN_CONTROL_OFF	(0 << 0)
-#define REG_FAN_STATUS		0xF4DA
-#define BIT_FAN_STATUS_ON	(1 << 0)
-#define BIT_FAN_STATUS_OFF	(0 << 0)
-#define REG_FAN_SPEED_HIGH	0xFE22
-#define REG_FAN_SPEED_LOW	0xFE23
-#define REG_FAN_SPEED_LEVEL	0xF4CC
-/* fan speed divider */
-#define FAN_SPEED_DIVIDER	480000	/* (60*1000*1000/62.5/2)*/
-
-/* battery registers */
-#define REG_BAT_DESIGN_CAP_HIGH		0xF77D
-#define REG_BAT_DESIGN_CAP_LOW		0xF77E
-#define REG_BAT_FULLCHG_CAP_HIGH	0xF780
-#define REG_BAT_FULLCHG_CAP_LOW		0xF781
-#define REG_BAT_DESIGN_VOL_HIGH		0xF782
-#define REG_BAT_DESIGN_VOL_LOW		0xF783
-#define REG_BAT_CURRENT_HIGH		0xF784
-#define REG_BAT_CURRENT_LOW		0xF785
-#define REG_BAT_VOLTAGE_HIGH		0xF786
-#define REG_BAT_VOLTAGE_LOW		0xF787
-#define REG_BAT_TEMPERATURE_HIGH	0xF788
-#define REG_BAT_TEMPERATURE_LOW		0xF789
-#define REG_BAT_RELATIVE_CAP_HIGH	0xF492
-#define REG_BAT_RELATIVE_CAP_LOW	0xF493
-#define REG_BAT_VENDOR			0xF4C4
-#define FLAG_BAT_VENDOR_SANYO		0x01
-#define FLAG_BAT_VENDOR_SIMPLO		0x02
-#define REG_BAT_CELL_COUNT		0xF4C6
-#define FLAG_BAT_CELL_3S1P		0x03
-#define FLAG_BAT_CELL_3S2P		0x06
-#define REG_BAT_CHARGE			0xF4A2
-#define FLAG_BAT_CHARGE_DISCHARGE	0x01
-#define FLAG_BAT_CHARGE_CHARGE		0x02
-#define FLAG_BAT_CHARGE_ACPOWER		0x00
-#define REG_BAT_STATUS			0xF4B0
-#define BIT_BAT_STATUS_LOW		(1 << 5)
-#define BIT_BAT_STATUS_DESTROY		(1 << 2)
-#define BIT_BAT_STATUS_FULL		(1 << 1)
-#define BIT_BAT_STATUS_IN		(1 << 0)
-#define REG_BAT_CHARGE_STATUS		0xF4B1
-#define BIT_BAT_CHARGE_STATUS_OVERTEMP	(1 << 2)
-#define BIT_BAT_CHARGE_STATUS_PRECHG	(1 << 1)
-#define REG_BAT_STATE			0xF482
-#define BIT_BAT_STATE_CHARGING		(1 << 1)
-#define BIT_BAT_STATE_DISCHARGING	(1 << 0)
-#define REG_BAT_POWER			0xF440
-#define BIT_BAT_POWER_S3		(1 << 2)
-#define BIT_BAT_POWER_ON		(1 << 1)
-#define BIT_BAT_POWER_ACIN		(1 << 0)
-
-/* other registers */
-/* Audio: rd/wr */
-#define REG_AUDIO_VOLUME	0xF46C
-#define REG_AUDIO_MUTE		0xF4E7
-#define REG_AUDIO_BEEP		0xF4D0
-/* USB port power or not: rd/wr */
-#define REG_USB0_FLAG		0xF461
-#define REG_USB1_FLAG		0xF462
-#define REG_USB2_FLAG		0xF463
-#define BIT_USB_FLAG_ON		1
-#define BIT_USB_FLAG_OFF	0
-/* LID */
-#define REG_LID_DETECT		0xF4BD
-#define BIT_LID_DETECT_ON	1
-#define BIT_LID_DETECT_OFF	0
-/* CRT */
-#define REG_CRT_DETECT		0xF4AD
-#define BIT_CRT_DETECT_PLUG	1
-#define BIT_CRT_DETECT_UNPLUG	0
-/* LCD backlight brightness adjust: 9 levels */
-#define REG_DISPLAY_BRIGHTNESS	0xF4F5
-/* Black screen Status */
-#define BIT_DISPLAY_LCD_ON	1
-#define BIT_DISPLAY_LCD_OFF	0
-/* LCD backlight control: off/restore */
-#define REG_BACKLIGHT_CTRL	0xF7BD
-#define BIT_BACKLIGHT_ON	1
-#define BIT_BACKLIGHT_OFF	0
-/* Reset the machine auto-clear: rd/wr */
-#define REG_RESET		0xF4EC
-#define BIT_RESET_ON		1
-/* Light the led: rd/wr */
-#define REG_LED			0xF4C8
-#define BIT_LED_RED_POWER	(1 << 0)
-#define BIT_LED_ORANGE_POWER	(1 << 1)
-#define BIT_LED_GREEN_CHARGE	(1 << 2)
-#define BIT_LED_RED_CHARGE	(1 << 3)
-#define BIT_LED_NUMLOCK		(1 << 4)
-/* Test led mode, all led on/off */
-#define REG_LED_TEST		0xF4C2
-#define BIT_LED_TEST_IN		1
-#define BIT_LED_TEST_OUT	0
-/* Camera on/off */
-#define REG_CAMERA_STATUS	0xF46A
-#define BIT_CAMERA_STATUS_ON	1
-#define BIT_CAMERA_STATUS_OFF	0
-#define REG_CAMERA_CONTROL	0xF7B7
-#define BIT_CAMERA_CONTROL_OFF	0
-#define BIT_CAMERA_CONTROL_ON	1
-/* Wlan Status */
-#define REG_WLAN		0xF4FA
-#define BIT_WLAN_ON		1
-#define BIT_WLAN_OFF		0
-#define REG_DISPLAY_LCD		0xF79F
-
-/* SCI Event Number from EC */
-enum {
-	EVENT_LID = 0x23,	/*  LID open/close */
-	EVENT_DISPLAY_TOGGLE,	/*  Fn+F3 for display switch */
-	EVENT_SLEEP,		/*  Fn+F1 for entering sleep mode */
-	EVENT_OVERTEMP,		/*  Over-temperature happened */
-	EVENT_CRT_DETECT,	/*  CRT is connected */
-	EVENT_CAMERA,		/*  Camera on/off */
-	EVENT_USB_OC2,		/*  USB2 Over Current occurred */
-	EVENT_USB_OC0,		/*  USB0 Over Current occurred */
-	EVENT_BLACK_SCREEN,	/*  Turn on/off backlight */
-	EVENT_AUDIO_MUTE,	/*  Mute on/off */
-	EVENT_DISPLAY_BRIGHTNESS,/* LCD backlight brightness adjust */
-	EVENT_AC_BAT,		/*  AC & Battery relative issue */
-	EVENT_AUDIO_VOLUME,	/*  Volume adjust */
-	EVENT_WLAN,		/*  Wlan on/off */
-	EVENT_END
-};
-
-#endif /* !_EC_KB3310B_H */
diff --git a/arch/mips/loongson64/lemote-2f/irq.c b/arch/mips/loongson64/lemote-2f/irq.c
deleted file mode 100644
index c58a044c6c07..000000000000
--- a/arch/mips/loongson64/lemote-2f/irq.c
+++ /dev/null
@@ -1,126 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote Inc.
- * Author: Fuxin Zhang, zhangfx@lemote.com
- */
-
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-
-#include <asm/irq_cpu.h>
-#include <asm/i8259.h>
-#include <asm/mipsregs.h>
-
-#include <loongson.h>
-#include <machine.h>
-
-#define LOONGSON_TIMER_IRQ	(MIPS_CPU_IRQ_BASE + 7) /* cpu timer */
-#define LOONGSON_NORTH_BRIDGE_IRQ	(MIPS_CPU_IRQ_BASE + 6) /* bonito */
-#define LOONGSON_UART_IRQ	(MIPS_CPU_IRQ_BASE + 3) /* cpu serial port */
-#define LOONGSON_SOUTH_BRIDGE_IRQ	(MIPS_CPU_IRQ_BASE + 2) /* i8259 */
-
-#define LOONGSON_INT_BIT_INT0		(1 << 11)
-#define LOONGSON_INT_BIT_INT1		(1 << 12)
-
-/*
- * The generic i8259_irq() make the kernel hang on booting.  Since we cannot
- * get the irq via the IRR directly, we access the ISR instead.
- */
-int mach_i8259_irq(void)
-{
-	int irq, isr;
-
-	irq = -1;
-
-	if ((LOONGSON_INTISR & LOONGSON_INTEN) & LOONGSON_INT_BIT_INT0) {
-		raw_spin_lock(&i8259A_lock);
-		isr = inb(PIC_MASTER_CMD) &
-			~inb(PIC_MASTER_IMR) & ~(1 << PIC_CASCADE_IR);
-		if (!isr)
-			isr = (inb(PIC_SLAVE_CMD) & ~inb(PIC_SLAVE_IMR)) << 8;
-		irq = ffs(isr) - 1;
-		if (unlikely(irq == 7)) {
-			/*
-			 * This may be a spurious interrupt.
-			 *
-			 * Read the interrupt status register (ISR). If the most
-			 * significant bit is not set then there is no valid
-			 * interrupt.
-			 */
-			outb(0x0B, PIC_MASTER_ISR);	/* ISR register */
-			if (~inb(PIC_MASTER_ISR) & 0x80)
-				irq = -1;
-		}
-		raw_spin_unlock(&i8259A_lock);
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL(mach_i8259_irq);
-
-static void i8259_irqdispatch(void)
-{
-	int irq;
-
-	irq = mach_i8259_irq();
-	if (irq >= 0)
-		do_IRQ(irq);
-	else
-		spurious_interrupt();
-}
-
-void mach_irq_dispatch(unsigned int pending)
-{
-	if (pending & CAUSEF_IP7)
-		do_IRQ(LOONGSON_TIMER_IRQ);
-	else if (pending & CAUSEF_IP6) {	/* North Bridge, Perf counter */
-		do_perfcnt_IRQ();
-		bonito_irqdispatch();
-	} else if (pending & CAUSEF_IP3)	/* CPU UART */
-		do_IRQ(LOONGSON_UART_IRQ);
-	else if (pending & CAUSEF_IP2)	/* South Bridge */
-		i8259_irqdispatch();
-	else
-		spurious_interrupt();
-}
-
-static irqreturn_t ip6_action(int cpl, void *dev_id)
-{
-	return IRQ_HANDLED;
-}
-
-static struct irqaction ip6_irqaction = {
-	.handler = ip6_action,
-	.name = "cascade",
-	.flags = IRQF_SHARED | IRQF_NO_THREAD,
-};
-
-static struct irqaction cascade_irqaction = {
-	.handler = no_action,
-	.name = "cascade",
-	.flags = IRQF_NO_THREAD | IRQF_NO_SUSPEND,
-};
-
-void __init mach_init_irq(void)
-{
-	/* init all controller
-	 *   0-15	  ------> i8259 interrupt
-	 *   16-23	  ------> mips cpu interrupt
-	 *   32-63	  ------> bonito irq
-	 */
-
-	/* setup cs5536 as high level trigger */
-	LOONGSON_INTPOL = LOONGSON_INT_BIT_INT0 | LOONGSON_INT_BIT_INT1;
-	LOONGSON_INTEDGE &= ~(LOONGSON_INT_BIT_INT0 | LOONGSON_INT_BIT_INT1);
-
-	/* Sets the first-level interrupt dispatcher. */
-	mips_cpu_irq_init();
-	init_i8259_irqs();
-	bonito_irq_init();
-
-	/* setup north bridge irq (bonito) */
-	setup_irq(LOONGSON_NORTH_BRIDGE_IRQ, &ip6_irqaction);
-	/* setup source bridge irq (i8259) */
-	setup_irq(LOONGSON_SOUTH_BRIDGE_IRQ, &cascade_irqaction);
-}
diff --git a/arch/mips/loongson64/lemote-2f/machtype.c b/arch/mips/loongson64/lemote-2f/machtype.c
deleted file mode 100644
index 9462a3ab57be..000000000000
--- a/arch/mips/loongson64/lemote-2f/machtype.c
+++ /dev/null
@@ -1,41 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-void __init mach_prom_init_machtype(void)
-{
-	/* We share the same kernel image file among Lemote 2F family
-	 * of machines, and provide the machtype= kernel command line
-	 * to users to indicate their machine, this command line will
-	 * be passed by the latest PMON automatically. and fortunately,
-	 * up to now, we can get the machine type from the PMON_VER=
-	 * commandline directly except the NAS machine, In the old
-	 * machines, this will help the users a lot.
-	 *
-	 * If no "machtype=" passed, get machine type from "PMON_VER=".
-	 *	PMON_VER=LM8089		Lemote 8.9'' netbook
-	 *		 LM8101		Lemote 10.1'' netbook
-	 *	(The above two netbooks have the same kernel support)
-	 *		 LM6XXX		Lemote FuLoong(2F) box series
-	 *		 LM9XXX		Lemote LynLoong PC series
-	 */
-	if (strstr(arcs_cmdline, "PMON_VER=LM")) {
-		if (strstr(arcs_cmdline, "PMON_VER=LM8"))
-			mips_machtype = MACH_LEMOTE_YL2F89;
-		else if (strstr(arcs_cmdline, "PMON_VER=LM6"))
-			mips_machtype = MACH_LEMOTE_FL2F;
-		else if (strstr(arcs_cmdline, "PMON_VER=LM9"))
-			mips_machtype = MACH_LEMOTE_LL2F;
-		else
-			mips_machtype = MACH_LEMOTE_NAS;
-
-		strcat(arcs_cmdline, " machtype=");
-		strcat(arcs_cmdline, get_system_type());
-		strcat(arcs_cmdline, " ");
-	}
-}
diff --git a/arch/mips/loongson64/lemote-2f/pm.c b/arch/mips/loongson64/lemote-2f/pm.c
deleted file mode 100644
index 3d0027229e3c..000000000000
--- a/arch/mips/loongson64/lemote-2f/pm.c
+++ /dev/null
@@ -1,145 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Lemote loongson2f family machines' specific suspend support
- *
- *  Copyright (C) 2009 Lemote Inc.
- *  Author: Wu Zhangjin <wuzhangjin@gmail.com>
- */
-
-#include <linux/suspend.h>
-#include <linux/interrupt.h>
-#include <linux/pm.h>
-#include <linux/i8042.h>
-#include <linux/export.h>
-
-#include <asm/i8259.h>
-#include <asm/mipsregs.h>
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-#include <cs5536/cs5536_mfgpt.h>
-#include "ec_kb3310b.h"
-
-#define I8042_KBD_IRQ		1
-#define I8042_CTR_KBDINT	0x01
-#define I8042_CTR_KBDDIS	0x10
-
-static unsigned char i8042_ctr;
-
-static int i8042_enable_kbd_port(void)
-{
-	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_RCTR)) {
-		pr_err("i8042.c: Can't read CTR while enabling i8042 kbd port."
-		       "\n");
-		return -EIO;
-	}
-
-	i8042_ctr &= ~I8042_CTR_KBDDIS;
-	i8042_ctr |= I8042_CTR_KBDINT;
-
-	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
-		i8042_ctr &= ~I8042_CTR_KBDINT;
-		i8042_ctr |= I8042_CTR_KBDDIS;
-		pr_err("i8042.c: Failed to enable KBD port.\n");
-
-		return -EIO;
-	}
-
-	return 0;
-}
-
-void setup_wakeup_events(void)
-{
-	int irq_mask;
-
-	switch (mips_machtype) {
-	case MACH_LEMOTE_ML2F7:
-	case MACH_LEMOTE_YL2F89:
-		/* open the keyboard irq in i8259A */
-		outb((0xff & ~(1 << I8042_KBD_IRQ)), PIC_MASTER_IMR);
-		irq_mask = inb(PIC_MASTER_IMR);
-
-		/* enable keyboard port */
-		i8042_enable_kbd_port();
-
-		/* Wakeup CPU via SCI lid open event */
-		outb(irq_mask & ~(1 << PIC_CASCADE_IR), PIC_MASTER_IMR);
-		inb(PIC_MASTER_IMR);
-		outb(0xff & ~(1 << (SCI_IRQ_NUM - 8)), PIC_SLAVE_IMR);
-		inb(PIC_SLAVE_IMR);
-
-		break;
-
-	default:
-		break;
-	}
-}
-
-static struct delayed_work lid_task;
-static int initialized;
-/* yeeloong_report_lid_status will be implemented in yeeloong_laptop.c */
-sci_handler yeeloong_report_lid_status;
-EXPORT_SYMBOL(yeeloong_report_lid_status);
-static void yeeloong_lid_update_task(struct work_struct *work)
-{
-	if (yeeloong_report_lid_status)
-		yeeloong_report_lid_status(BIT_LID_DETECT_ON);
-}
-
-int wakeup_loongson(void)
-{
-	int irq;
-
-	/* query the interrupt number */
-	irq = mach_i8259_irq();
-	if (irq < 0)
-		return 0;
-
-	printk(KERN_INFO "%s: irq = %d\n", __func__, irq);
-
-	if (irq == I8042_KBD_IRQ)
-		return 1;
-	else if (irq == SCI_IRQ_NUM) {
-		int ret, sci_event;
-		/* query the event number */
-		ret = ec_query_seq(CMD_GET_EVENT_NUM);
-		if (ret < 0)
-			return 0;
-		sci_event = ec_get_event_num();
-		if (sci_event < 0)
-			return 0;
-		if (sci_event == EVENT_LID) {
-			int lid_status;
-			/* check the LID status */
-			lid_status = ec_read(REG_LID_DETECT);
-			/* wakeup cpu when people open the LID */
-			if (lid_status == BIT_LID_DETECT_ON) {
-				/* If we call it directly here, the WARNING
-				 * will be sent out by getnstimeofday
-				 * via "WARN_ON(timekeeping_suspended);"
-				 * because we can not schedule in suspend mode.
-				 */
-				if (initialized == 0) {
-					INIT_DELAYED_WORK(&lid_task,
-						yeeloong_lid_update_task);
-					initialized = 1;
-				}
-				schedule_delayed_work(&lid_task, 1);
-				return 1;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void __weak mach_suspend(void)
-{
-	disable_mfgpt0_counter();
-}
-
-void __weak mach_resume(void)
-{
-	enable_mfgpt0_counter();
-}
diff --git a/arch/mips/loongson64/lemote-2f/reset.c b/arch/mips/loongson64/lemote-2f/reset.c
deleted file mode 100644
index 0db0934302ea..000000000000
--- a/arch/mips/loongson64/lemote-2f/reset.c
+++ /dev/null
@@ -1,155 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* Board-specific reboot/shutdown routines
- *
- * Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/types.h>
-
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-#include <cs5536/cs5536.h>
-#include "ec_kb3310b.h"
-
-static void reset_cpu(void)
-{
-	/*
-	 * reset cpu to full speed, this is needed when enabling cpu frequency
-	 * scalling
-	 */
-	LOONGSON_CHIPCFG(0) |= 0x7;
-}
-
-/* reset support for fuloong2f */
-
-static void fl2f_reboot(void)
-{
-	reset_cpu();
-
-	/* send a reset signal to south bridge.
-	 *
-	 * NOTE: if enable "Power Management" in kernel, rtl8169 will not reset
-	 * normally with this reset operation and it will not work in PMON, but
-	 * you can type halt command and then reboot, seems the hardware reset
-	 * logic not work normally.
-	 */
-	{
-		u32 hi, lo;
-		_rdmsr(DIVIL_MSR_REG(DIVIL_SOFT_RESET), &hi, &lo);
-		lo |= 0x00000001;
-		_wrmsr(DIVIL_MSR_REG(DIVIL_SOFT_RESET), hi, lo);
-	}
-}
-
-static void fl2f_shutdown(void)
-{
-	u32 hi, lo, val;
-	int gpio_base;
-
-	/* get gpio base */
-	_rdmsr(DIVIL_MSR_REG(DIVIL_LBAR_GPIO), &hi, &lo);
-	gpio_base = lo & 0xff00;
-
-	/* make cs5536 gpio13 output enable */
-	val = inl(gpio_base + GPIOL_OUT_EN);
-	val &= ~(1 << (16 + 13));
-	val |= (1 << 13);
-	outl(val, gpio_base + GPIOL_OUT_EN);
-	mmiowb();
-	/* make cs5536 gpio13 output low level voltage. */
-	val = inl(gpio_base + GPIOL_OUT_VAL) & ~(1 << (13));
-	val |= (1 << (16 + 13));
-	outl(val, gpio_base + GPIOL_OUT_VAL);
-	mmiowb();
-}
-
-/* reset support for yeeloong2f and mengloong2f notebook */
-
-static void ml2f_reboot(void)
-{
-	reset_cpu();
-
-	/* sending an reset signal to EC(embedded controller) */
-	ec_write(REG_RESET, BIT_RESET_ON);
-}
-
-#define yl2f89_reboot ml2f_reboot
-
-/* menglong(7inches) laptop has different shutdown logic from 8.9inches */
-#define EC_SHUTDOWN_IO_PORT_HIGH 0xff2d
-#define EC_SHUTDOWN_IO_PORT_LOW	 0xff2e
-#define EC_SHUTDOWN_IO_PORT_DATA 0xff2f
-#define REG_SHUTDOWN_HIGH	 0xFC
-#define REG_SHUTDOWN_LOW	 0x29
-#define BIT_SHUTDOWN_ON		 (1 << 1)
-
-static void ml2f_shutdown(void)
-{
-	u8 val;
-	u64 i;
-
-	outb(REG_SHUTDOWN_HIGH, EC_SHUTDOWN_IO_PORT_HIGH);
-	outb(REG_SHUTDOWN_LOW, EC_SHUTDOWN_IO_PORT_LOW);
-	mmiowb();
-	val = inb(EC_SHUTDOWN_IO_PORT_DATA);
-	outb(val & (~BIT_SHUTDOWN_ON), EC_SHUTDOWN_IO_PORT_DATA);
-	mmiowb();
-	/* need enough wait here... how many microseconds needs? */
-	for (i = 0; i < 0x10000; i++)
-		delay();
-	outb(val | BIT_SHUTDOWN_ON, EC_SHUTDOWN_IO_PORT_DATA);
-	mmiowb();
-}
-
-static void yl2f89_shutdown(void)
-{
-	/* cpu-gpio0 output low */
-	LOONGSON_GPIODATA &= ~0x00000001;
-	/* cpu-gpio0 as output */
-	LOONGSON_GPIOIE &= ~0x00000001;
-}
-
-void mach_prepare_reboot(void)
-{
-	switch (mips_machtype) {
-	case MACH_LEMOTE_FL2F:
-	case MACH_LEMOTE_NAS:
-	case MACH_LEMOTE_LL2F:
-		fl2f_reboot();
-		break;
-	case MACH_LEMOTE_ML2F7:
-		ml2f_reboot();
-		break;
-	case MACH_LEMOTE_YL2F89:
-		yl2f89_reboot();
-		break;
-	default:
-		break;
-	}
-}
-
-void mach_prepare_shutdown(void)
-{
-	switch (mips_machtype) {
-	case MACH_LEMOTE_FL2F:
-	case MACH_LEMOTE_NAS:
-	case MACH_LEMOTE_LL2F:
-		fl2f_shutdown();
-		break;
-	case MACH_LEMOTE_ML2F7:
-		ml2f_shutdown();
-		break;
-	case MACH_LEMOTE_YL2F89:
-		yl2f89_shutdown();
-		break;
-	default:
-		break;
-	}
-}
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index 890813e0bb76..e9caa9586982 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -23,7 +23,7 @@
 #include <asm/clock.h>
 #include <asm/idle.h>
 
-#include <asm/mach-loongson64/loongson.h>
+#include <asm/mach-loongson2ef/loongson.h>
 
 static uint nowait;
 
-- 
cgit v1.2.3


From 5831fdb099ddeec9849658cd534fdcbb03aa6ff9 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Sun, 20 Oct 2019 22:43:15 +0800
Subject: MIPS: Loongson2ef: clean up loongson64 related code

Remove unrelevent macros, defines and codes from loongson2ef mach.
Also rename some defines to match new naming.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: paul.burton@mips.com
---
 .../mips/include/asm/mach-loongson2ef/boot_param.h | 221 --------------------
 .../asm/mach-loongson2ef/cpu-feature-overrides.h   |  15 +-
 arch/mips/include/asm/mach-loongson2ef/irq.h       |  44 ----
 .../asm/mach-loongson2ef/kernel-entry-init.h       |  90 --------
 arch/mips/include/asm/mach-loongson2ef/loongson.h  |  37 +---
 .../include/asm/mach-loongson2ef/loongson_hwmon.h  |  56 -----
 .../include/asm/mach-loongson2ef/loongson_regs.h   | 227 ---------------------
 arch/mips/include/asm/mach-loongson2ef/machine.h   |  12 +-
 .../include/asm/mach-loongson2ef/mc146818rtc.h     |   6 +-
 arch/mips/include/asm/mach-loongson2ef/mem.h       |   6 +-
 arch/mips/include/asm/mach-loongson2ef/mmzone.h    |  50 -----
 arch/mips/include/asm/mach-loongson2ef/pci.h       |  12 +-
 arch/mips/include/asm/mach-loongson2ef/spaces.h    |   4 +-
 arch/mips/include/asm/mach-loongson2ef/topology.h  |  23 ---
 .../include/asm/mach-loongson2ef/workarounds.h     |   8 -
 arch/mips/loongson2ef/Kconfig                      |   3 -
 arch/mips/loongson2ef/common/early_printk.c        |   2 +-
 arch/mips/loongson2ef/common/env.c                 | 141 -------------
 arch/mips/loongson2ef/common/init.c                |   5 -
 arch/mips/loongson2ef/common/machtype.c            |   1 -
 arch/mips/loongson2ef/common/mem.c                 |  37 ----
 arch/mips/loongson2ef/common/pci.c                 |   8 -
 arch/mips/loongson2ef/common/pm.c                  |   9 +-
 arch/mips/loongson2ef/common/reset.c               |  21 --
 arch/mips/loongson2ef/common/serial.c              |  67 ++----
 arch/mips/loongson2ef/common/setup.c               |  21 --
 arch/mips/loongson2ef/common/time.c                |   4 -
 arch/mips/loongson2ef/common/uart_base.c           |  17 +-
 arch/mips/loongson2ef/lemote-2f/clock.c            |   4 +-
 arch/mips/loongson2ef/lemote-2f/reset.c            |   2 +-
 30 files changed, 56 insertions(+), 1097 deletions(-)
 delete mode 100644 arch/mips/include/asm/mach-loongson2ef/boot_param.h
 delete mode 100644 arch/mips/include/asm/mach-loongson2ef/irq.h
 delete mode 100644 arch/mips/include/asm/mach-loongson2ef/kernel-entry-init.h
 delete mode 100644 arch/mips/include/asm/mach-loongson2ef/loongson_hwmon.h
 delete mode 100644 arch/mips/include/asm/mach-loongson2ef/loongson_regs.h
 delete mode 100644 arch/mips/include/asm/mach-loongson2ef/mmzone.h
 delete mode 100644 arch/mips/include/asm/mach-loongson2ef/topology.h
 delete mode 100644 arch/mips/include/asm/mach-loongson2ef/workarounds.h

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-loongson2ef/boot_param.h b/arch/mips/include/asm/mach-loongson2ef/boot_param.h
deleted file mode 100644
index 8c286bedff3e..000000000000
--- a/arch/mips/include/asm/mach-loongson2ef/boot_param.h
+++ /dev/null
@@ -1,221 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MACH_LOONGSON64_BOOT_PARAM_H_
-#define __ASM_MACH_LOONGSON64_BOOT_PARAM_H_
-
-#define SYSTEM_RAM_LOW		1
-#define SYSTEM_RAM_HIGH		2
-#define SYSTEM_RAM_RESERVED	3
-#define PCI_IO			4
-#define PCI_MEM			5
-#define LOONGSON_CFG_REG	6
-#define VIDEO_ROM		7
-#define ADAPTER_ROM		8
-#define ACPI_TABLE		9
-#define SMBIOS_TABLE		10
-#define MAX_MEMORY_TYPE		11
-
-#define LOONGSON3_BOOT_MEM_MAP_MAX 128
-struct efi_memory_map_loongson {
-	u16 vers;	/* version of efi_memory_map */
-	u32 nr_map;	/* number of memory_maps */
-	u32 mem_freq;	/* memory frequence */
-	struct mem_map {
-		u32 node_id;	/* node_id which memory attached to */
-		u32 mem_type;	/* system memory, pci memory, pci io, etc. */
-		u64 mem_start;	/* memory map start address */
-		u32 mem_size;	/* each memory_map size, not the total size */
-	} map[LOONGSON3_BOOT_MEM_MAP_MAX];
-} __packed;
-
-enum loongson_cpu_type {
-	Legacy_2E = 0x0,
-	Legacy_2F = 0x1,
-	Legacy_3A = 0x2,
-	Legacy_3B = 0x3,
-	Legacy_1A = 0x4,
-	Legacy_1B = 0x5,
-	Legacy_2G = 0x6,
-	Legacy_2H = 0x7,
-	Loongson_1A = 0x100,
-	Loongson_1B = 0x101,
-	Loongson_2E = 0x200,
-	Loongson_2F = 0x201,
-	Loongson_2G = 0x202,
-	Loongson_2H = 0x203,
-	Loongson_3A = 0x300,
-	Loongson_3B = 0x301
-};
-
-/*
- * Capability and feature descriptor structure for MIPS CPU
- */
-struct efi_cpuinfo_loongson {
-	u16 vers;     /* version of efi_cpuinfo_loongson */
-	u32 processor_id; /* PRID, e.g. 6305, 6306 */
-	u32 cputype;  /* Loongson_3A/3B, etc. */
-	u32 total_node;   /* num of total numa nodes */
-	u16 cpu_startup_core_id; /* Boot core id */
-	u16 reserved_cores_mask;
-	u32 cpu_clock_freq; /* cpu_clock */
-	u32 nr_cpus;
-} __packed;
-
-#define MAX_UARTS 64
-struct uart_device {
-	u32 iotype; /* see include/linux/serial_core.h */
-	u32 uartclk;
-	u32 int_offset;
-	u64 uart_base;
-} __packed;
-
-#define MAX_SENSORS 64
-#define SENSOR_TEMPER  0x00000001
-#define SENSOR_VOLTAGE 0x00000002
-#define SENSOR_FAN     0x00000004
-struct sensor_device {
-	char name[32];  /* a formal name */
-	char label[64]; /* a flexible description */
-	u32 type;       /* SENSOR_* */
-	u32 id;         /* instance id of a sensor-class */
-	u32 fan_policy; /* see loongson_hwmon.h */
-	u32 fan_percent;/* only for constant speed policy */
-	u64 base_addr;  /* base address of device registers */
-} __packed;
-
-struct system_loongson {
-	u16 vers;     /* version of system_loongson */
-	u32 ccnuma_smp; /* 0: no numa; 1: has numa */
-	u32 sing_double_channel; /* 1:single; 2:double */
-	u32 nr_uarts;
-	struct uart_device uarts[MAX_UARTS];
-	u32 nr_sensors;
-	struct sensor_device sensors[MAX_SENSORS];
-	char has_ec;
-	char ec_name[32];
-	u64 ec_base_addr;
-	char has_tcm;
-	char tcm_name[32];
-	u64 tcm_base_addr;
-	u64 workarounds; /* see workarounds.h */
-} __packed;
-
-struct irq_source_routing_table {
-	u16 vers;
-	u16 size;
-	u16 rtr_bus;
-	u16 rtr_devfn;
-	u32 vendor;
-	u32 device;
-	u32 PIC_type;   /* conform use HT or PCI to route to CPU-PIC */
-	u64 ht_int_bit; /* 3A: 1<<24; 3B: 1<<16 */
-	u64 ht_enable;  /* irqs used in this PIC */
-	u32 node_id;    /* node id: 0x0-0; 0x1-1; 0x10-2; 0x11-3 */
-	u64 pci_mem_start_addr;
-	u64 pci_mem_end_addr;
-	u64 pci_io_start_addr;
-	u64 pci_io_end_addr;
-	u64 pci_config_addr;
-	u32 dma_mask_bits;
-} __packed;
-
-struct interface_info {
-	u16 vers; /* version of the specificition */
-	u16 size;
-	u8  flag;
-	char description[64];
-} __packed;
-
-#define MAX_RESOURCE_NUMBER 128
-struct resource_loongson {
-	u64 start; /* resource start address */
-	u64 end;   /* resource end address */
-	char name[64];
-	u32 flags;
-};
-
-struct archdev_data {};  /* arch specific additions */
-
-struct board_devices {
-	char name[64];    /* hold the device name */
-	u32 num_resources; /* number of device_resource */
-	/* for each device's resource */
-	struct resource_loongson resource[MAX_RESOURCE_NUMBER];
-	/* arch specific additions */
-	struct archdev_data archdata;
-};
-
-struct loongson_special_attribute {
-	u16 vers;     /* version of this special */
-	char special_name[64]; /* special_atribute_name */
-	u32 loongson_special_type; /* type of special device */
-	/* for each device's resource */
-	struct resource_loongson resource[MAX_RESOURCE_NUMBER];
-};
-
-struct loongson_params {
-	u64 memory_offset;	/* efi_memory_map_loongson struct offset */
-	u64 cpu_offset;		/* efi_cpuinfo_loongson struct offset */
-	u64 system_offset;	/* system_loongson struct offset */
-	u64 irq_offset;		/* irq_source_routing_table struct offset */
-	u64 interface_offset;	/* interface_info struct offset */
-	u64 special_offset;	/* loongson_special_attribute struct offset */
-	u64 boarddev_table_offset;  /* board_devices offset */
-};
-
-struct smbios_tables {
-	u16 vers;     /* version of smbios */
-	u64 vga_bios; /* vga_bios address */
-	struct loongson_params lp;
-};
-
-struct efi_reset_system_t {
-	u64 ResetCold;
-	u64 ResetWarm;
-	u64 ResetType;
-	u64 Shutdown;
-	u64 DoSuspend; /* NULL if not support */
-};
-
-struct efi_loongson {
-	u64 mps;	/* MPS table */
-	u64 acpi;	/* ACPI table (IA64 ext 0.71) */
-	u64 acpi20;	/* ACPI table (ACPI 2.0) */
-	struct smbios_tables smbios;	/* SM BIOS table */
-	u64 sal_systab;	/* SAL system table */
-	u64 boot_info;	/* boot info table */
-};
-
-struct boot_params {
-	struct efi_loongson efi;
-	struct efi_reset_system_t reset_system;
-};
-
-struct loongson_system_configuration {
-	u32 nr_cpus;
-	u32 nr_nodes;
-	int cores_per_node;
-	int cores_per_package;
-	u16 boot_cpu_id;
-	u16 reserved_cpus_mask;
-	enum loongson_cpu_type cputype;
-	u64 ht_control_base;
-	u64 pci_mem_start_addr;
-	u64 pci_mem_end_addr;
-	u64 pci_io_base;
-	u64 restart_addr;
-	u64 poweroff_addr;
-	u64 suspend_addr;
-	u64 vgabios_addr;
-	u32 dma_mask_bits;
-	char ecname[32];
-	u32 nr_uarts;
-	struct uart_device uarts[MAX_UARTS];
-	u32 nr_sensors;
-	struct sensor_device sensors[MAX_SENSORS];
-	u64 workarounds;
-};
-
-extern struct efi_memory_map_loongson *loongson_memmap;
-extern struct loongson_system_configuration loongson_sysconf;
-
-#endif
diff --git a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
index 83ad90d8005d..b2ee859ca0b7 100644
--- a/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson2ef/cpu-feature-overrides.h
@@ -1,8 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
  * Copyright (C) 2009 Wu Zhangjin <wuzhangjin@gmail.com>
  * Copyright (C) 2009 Philippe Vachon <philippe@cowpig.ca>
  * Copyright (C) 2009 Zhang Le <r0bertz@gentoo.org>
@@ -13,8 +10,8 @@
  *	loongson2f user manual.
  */
 
-#ifndef __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H
-#define __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H
+#ifndef __ASM_MACH_LOONGSON2EF_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_LOONGSON2EF_CPU_FEATURE_OVERRIDES_H
 
 #define cpu_has_32fpr		1
 #define cpu_has_3k_cache	0
@@ -44,10 +41,4 @@
 #define cpu_has_vtag_icache	0
 #define cpu_has_watch		1
 
-#ifdef CONFIG_CPU_LOONGSON64
-#define cpu_has_wsbh		1
-#define cpu_has_ic_fills_f_dc	1
-#define cpu_hwrena_impl_bits	0xc0000000
-#endif
-
 #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/irq.h b/arch/mips/include/asm/mach-loongson2ef/irq.h
deleted file mode 100644
index 557e069c400c..000000000000
--- a/arch/mips/include/asm/mach-loongson2ef/irq.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MACH_LOONGSON64_IRQ_H_
-#define __ASM_MACH_LOONGSON64_IRQ_H_
-
-#include <boot_param.h>
-
-#ifdef CONFIG_CPU_LOONGSON64
-
-/* cpu core interrupt numbers */
-#define MIPS_CPU_IRQ_BASE 56
-
-#define LOONGSON_UART_IRQ   (MIPS_CPU_IRQ_BASE + 2) /* UART */
-#define LOONGSON_BRIDGE_IRQ (MIPS_CPU_IRQ_BASE + 3) /* CASCADE */
-#define LOONGSON_TIMER_IRQ  (MIPS_CPU_IRQ_BASE + 7) /* CPU Timer */
-
-#define LOONGSON_HT1_CFG_BASE		loongson_sysconf.ht_control_base
-#define LOONGSON_HT1_INT_VECTOR_BASE	(LOONGSON_HT1_CFG_BASE + 0x80)
-#define LOONGSON_HT1_INT_EN_BASE	(LOONGSON_HT1_CFG_BASE + 0xa0)
-#define LOONGSON_HT1_INT_VECTOR(n)	\
-		LOONGSON3_REG32(LOONGSON_HT1_INT_VECTOR_BASE, 4 * (n))
-#define LOONGSON_HT1_INTN_EN(n)		\
-		LOONGSON3_REG32(LOONGSON_HT1_INT_EN_BASE, 4 * (n))
-
-#define LOONGSON_INT_ROUTER_OFFSET	0x1400
-#define LOONGSON_INT_ROUTER_INTEN	\
-	  LOONGSON3_REG32(LOONGSON3_REG_BASE, LOONGSON_INT_ROUTER_OFFSET + 0x24)
-#define LOONGSON_INT_ROUTER_INTENSET	\
-	  LOONGSON3_REG32(LOONGSON3_REG_BASE, LOONGSON_INT_ROUTER_OFFSET + 0x28)
-#define LOONGSON_INT_ROUTER_INTENCLR	\
-	  LOONGSON3_REG32(LOONGSON3_REG_BASE, LOONGSON_INT_ROUTER_OFFSET + 0x2c)
-#define LOONGSON_INT_ROUTER_ENTRY(n)	\
-	  LOONGSON3_REG8(LOONGSON3_REG_BASE, LOONGSON_INT_ROUTER_OFFSET + n)
-#define LOONGSON_INT_ROUTER_LPC		LOONGSON_INT_ROUTER_ENTRY(0x0a)
-#define LOONGSON_INT_ROUTER_HT1(n)	LOONGSON_INT_ROUTER_ENTRY(n + 0x18)
-
-#define LOONGSON_INT_COREx_INTy(x, y)	(1<<(x) | 1<<(y+4))	/* route to int y of core x */
-
-#endif
-
-extern void fixup_irqs(void);
-extern void loongson3_ipi_interrupt(struct pt_regs *regs);
-
-#include_next <irq.h>
-#endif /* __ASM_MACH_LOONGSON64_IRQ_H_ */
diff --git a/arch/mips/include/asm/mach-loongson2ef/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson2ef/kernel-entry-init.h
deleted file mode 100644
index 28ccb06c8289..000000000000
--- a/arch/mips/include/asm/mach-loongson2ef/kernel-entry-init.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2005 Embedded Alley Solutions, Inc
- * Copyright (C) 2005 Ralf Baechle (ralf@linux-mips.org)
- * Copyright (C) 2009 Jiajie Chen (chenjiajie@cse.buaa.edu.cn)
- * Copyright (C) 2012 Huacai Chen (chenhc@lemote.com)
- */
-#ifndef __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H
-#define __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H
-
-#include <asm/cpu.h>
-
-/*
- * Override macros used in arch/mips/kernel/head.S.
- */
-	.macro	kernel_entry_setup
-#ifdef CONFIG_CPU_LOONGSON64
-	.set	push
-	.set	mips64
-	/* Set LPA on LOONGSON3 config3 */
-	mfc0	t0, CP0_CONFIG3
-	or	t0, (0x1 << 7)
-	mtc0	t0, CP0_CONFIG3
-	/* Set ELPA on LOONGSON3 pagegrain */
-	mfc0	t0, CP0_PAGEGRAIN
-	or	t0, (0x1 << 29)
-	mtc0	t0, CP0_PAGEGRAIN
-	/* Enable STFill Buffer */
-	mfc0	t0, CP0_PRID
-	/* Loongson-3A R4+ */
-	andi	t1, t0, PRID_IMP_MASK
-	li	t2, PRID_IMP_LOONGSON_64G
-	beq     t1, t2, 1f
-	nop
-	/* Loongson-3A R2/R3 */
-	andi	t0, (PRID_IMP_MASK | PRID_REV_MASK)
-	slti	t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
-	bnez	t0, 2f
-	nop
-1:
-	mfc0	t0, CP0_CONFIG6
-	or	t0, 0x100
-	mtc0	t0, CP0_CONFIG6
-2:
-	_ehb
-	.set	pop
-#endif
-	.endm
-
-/*
- * Do SMP slave processor setup.
- */
-	.macro	smp_slave_setup
-#ifdef CONFIG_CPU_LOONGSON64
-	.set	push
-	.set	mips64
-	/* Set LPA on LOONGSON3 config3 */
-	mfc0	t0, CP0_CONFIG3
-	or	t0, (0x1 << 7)
-	mtc0	t0, CP0_CONFIG3
-	/* Set ELPA on LOONGSON3 pagegrain */
-	mfc0	t0, CP0_PAGEGRAIN
-	or	t0, (0x1 << 29)
-	mtc0	t0, CP0_PAGEGRAIN
-	/* Enable STFill Buffer */
-	mfc0	t0, CP0_PRID
-	/* Loongson-3A R4+ */
-	andi	t1, t0, PRID_IMP_MASK
-	li	t2, PRID_IMP_LOONGSON_64G
-	beq     t1, t2, 1f
-	nop
-	/* Loongson-3A R2/R3 */
-	andi	t0, (PRID_IMP_MASK | PRID_REV_MASK)
-	slti	t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
-	bnez	t0, 2f
-	nop
-1:
-	mfc0	t0, CP0_CONFIG6
-	or	t0, 0x100
-	mtc0	t0, CP0_CONFIG6
-2:
-	_ehb
-	.set	pop
-#endif
-	.endm
-
-#endif /* __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h
index 40a24b76b874..8ed460a64d10 100644
--- a/arch/mips/include/asm/mach-loongson2ef/loongson.h
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h
@@ -4,13 +4,12 @@
  * Author: Wu Zhangjin <wuzhangjin@gmail.com>
  */
 
-#ifndef __ASM_MACH_LOONGSON64_LOONGSON_H
-#define __ASM_MACH_LOONGSON64_LOONGSON_H
+#ifndef __ASM_MACH_LOONGSON2EF_LOONGSON_H
+#define __ASM_MACH_LOONGSON2EF_LOONGSON_H
 
 #include <linux/io.h>
 #include <linux/init.h>
 #include <linux/irq.h>
-#include <boot_param.h>
 
 /* loongson internal northbridge initialization */
 extern void bonito_irq_init(void);
@@ -30,7 +29,7 @@ extern void __init prom_init_cmdline(void);
 extern void __init prom_init_machtype(void);
 extern void __init prom_init_env(void);
 #ifdef CONFIG_LOONGSON_UART_BASE
-extern unsigned long _loongson_uart_base[], loongson_uart_base[];
+extern unsigned long _loongson_uart_base, loongson_uart_base;
 extern void prom_init_loongson_uart_base(void);
 #endif
 
@@ -58,12 +57,6 @@ extern int mach_i8259_irq(void);
 #define LOONGSON_REG(x) \
 	(*(volatile u32 *)((char *)CKSEG1ADDR(LOONGSON_REG_BASE) + (x)))
 
-#define LOONGSON3_REG8(base, x) \
-	(*(volatile u8 *)((char *)TO_UNCAC(base) + (x)))
-
-#define LOONGSON3_REG32(base, x) \
-	(*(volatile u32 *)((char *)TO_UNCAC(base) + (x)))
-
 #define LOONGSON_IRQ_BASE	32
 #define LOONGSON2_PERFCNT_IRQ	(MIPS_CPU_IRQ_BASE + 6) /* cpu perf counter */
 
@@ -89,10 +82,6 @@ static inline void do_perfcnt_IRQ(void)
 #define LOONGSON_REG_BASE	0x1fe00000
 #define LOONGSON_REG_SIZE	0x00100000	/* 256Bytes + 256Bytes + ??? */
 #define LOONGSON_REG_TOP	(LOONGSON_REG_BASE+LOONGSON_REG_SIZE-1)
-/* Loongson-3 specific registers */
-#define LOONGSON3_REG_BASE	0x3ff00000
-#define LOONGSON3_REG_SIZE	0x00100000	/* 256Bytes + 256Bytes + ??? */
-#define LOONGSON3_REG_TOP	(LOONGSON3_REG_BASE+LOONGSON3_REG_SIZE-1)
 
 #define LOONGSON_LIO1_BASE	0x1ff00000
 #define LOONGSON_LIO1_SIZE	0x00100000	/* 1M */
@@ -108,12 +97,7 @@ static inline void do_perfcnt_IRQ(void)
 #define LOONGSON_PCICFG_BASE	0x1fe80000
 #define LOONGSON_PCICFG_SIZE	0x00000800	/* 2K */
 #define LOONGSON_PCICFG_TOP	(LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1)
-
-#ifdef CONFIG_CPU_LOONGSON64
-#define LOONGSON_PCIIO_BASE	loongson_sysconf.pci_io_base
-#else
 #define LOONGSON_PCIIO_BASE	0x1fd00000
-#endif
 
 #define LOONGSON_PCIIO_SIZE	0x00100000	/* 1M */
 #define LOONGSON_PCIIO_TOP	(LOONGSON_PCIIO_BASE+LOONGSON_PCIIO_SIZE-1)
@@ -244,19 +228,8 @@ static inline void do_perfcnt_IRQ(void)
 #define LOONGSON_PXARB_CFG		LOONGSON_REG(LOONGSON_REGBASE + 0x68)
 #define LOONGSON_PXARB_STATUS		LOONGSON_REG(LOONGSON_REGBASE + 0x6c)
 
-#define MAX_PACKAGES 4
-
 /* Chip Config registor of each physical cpu package, PRid >= Loongson-2F */
-extern u64 loongson_chipcfg[MAX_PACKAGES];
-#define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id]))
-
-/* Chip Temperature registor of each physical cpu package, PRid >= Loongson-3A */
-extern u64 loongson_chiptemp[MAX_PACKAGES];
-#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id]))
-
-/* Freq Control register of each physical cpu package, PRid >= Loongson-3B */
-extern u64 loongson_freqctrl[MAX_PACKAGES];
-#define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id]))
+#define LOONGSON_CHIPCFG	(void __iomem *)TO_UNCAC(0x1fc00180)
 
 /* pcimap */
 
@@ -352,4 +325,4 @@ extern unsigned long _loongson_addrwincfg_base;
 
 #endif	/* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */
 
-#endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */
+#endif /* __ASM_MACH_LOONGSON2EF_LOONGSON_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson_hwmon.h b/arch/mips/include/asm/mach-loongson2ef/loongson_hwmon.h
deleted file mode 100644
index 545f91f2ae16..000000000000
--- a/arch/mips/include/asm/mach-loongson2ef/loongson_hwmon.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LOONGSON_HWMON_H_
-#define __LOONGSON_HWMON_H_
-
-#include <linux/types.h>
-
-#define MIN_TEMP	0
-#define MAX_TEMP	255
-#define NOT_VALID_TEMP	999
-
-typedef int (*get_temp_fun)(int);
-extern int loongson3_cpu_temp(int);
-
-/* 0:Max speed, 1:Manual, 2:Auto */
-enum fan_control_mode {
-	FAN_FULL_MODE = 0,
-	FAN_MANUAL_MODE = 1,
-	FAN_AUTO_MODE = 2,
-	FAN_MODE_END
-};
-
-struct temp_range {
-	u8 low;
-	u8 high;
-	u8 level;
-};
-
-#define CONSTANT_SPEED_POLICY	0  /* at constant speed */
-#define STEP_SPEED_POLICY	1  /* use up/down arrays to describe policy */
-#define KERNEL_HELPER_POLICY	2  /* kernel as a helper to fan control */
-
-#define MAX_STEP_NUM	16
-#define MAX_FAN_LEVEL	255
-
-/* loongson_fan_policy works when fan work at FAN_AUTO_MODE */
-struct loongson_fan_policy {
-	u8	type;
-
-	/* percent only used when type is CONSTANT_SPEED_POLICY */
-	u8	percent;
-
-	/* period between two check. (Unit: S) */
-	u8	adjust_period;
-
-	/* fan adjust usually depend on a temprature input */
-	get_temp_fun	depend_temp;
-
-	/* up_step/down_step used when type is STEP_SPEED_POLICY */
-	u8	up_step_num;
-	u8	down_step_num;
-	struct temp_range up_step[MAX_STEP_NUM];
-	struct temp_range down_step[MAX_STEP_NUM];
-	struct delayed_work work;
-};
-
-#endif /* __LOONGSON_HWMON_H_*/
diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson_regs.h b/arch/mips/include/asm/mach-loongson2ef/loongson_regs.h
deleted file mode 100644
index 363a47a5d26e..000000000000
--- a/arch/mips/include/asm/mach-loongson2ef/loongson_regs.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Read/Write Loongson Extension Registers
- */
-
-#ifndef _LOONGSON_REGS_H_
-#define _LOONGSON_REGS_H_
-
-#include <linux/types.h>
-#include <linux/bits.h>
-
-#include <asm/mipsregs.h>
-#include <asm/cpu.h>
-
-static inline bool cpu_has_cfg(void)
-{
-	return ((read_c0_prid() & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G);
-}
-
-static inline u32 read_cpucfg(u32 reg)
-{
-	u32 __res;
-
-	__asm__ __volatile__(
-		"parse_r __res,%0\n\t"
-		"parse_r reg,%1\n\t"
-		".insn \n\t"
-		".word (0xc8080118 | (reg << 21) | (__res << 11))\n\t"
-		:"=r"(__res)
-		:"r"(reg)
-		:
-		);
-	return __res;
-}
-
-/* Bit Domains for CFG registers */
-#define LOONGSON_CFG0	0x0
-#define LOONGSON_CFG0_PRID GENMASK(31, 0)
-
-#define LOONGSON_CFG1 0x1
-#define LOONGSON_CFG1_FP	BIT(0)
-#define LOONGSON_CFG1_FPREV	GENMASK(3, 1)
-#define LOONGSON_CFG1_MMI	BIT(4)
-#define LOONGSON_CFG1_MSA1	BIT(5)
-#define LOONGSON_CFG1_MSA2	BIT(6)
-#define LOONGSON_CFG1_CGP	BIT(7)
-#define LOONGSON_CFG1_WRP	BIT(8)
-#define LOONGSON_CFG1_LSX1	BIT(9)
-#define LOONGSON_CFG1_LSX2	BIT(10)
-#define LOONGSON_CFG1_LASX	BIT(11)
-#define LOONGSON_CFG1_R6FXP	BIT(12)
-#define LOONGSON_CFG1_R6CRCP	BIT(13)
-#define LOONGSON_CFG1_R6FPP	BIT(14)
-#define LOONGSON_CFG1_CNT64	BIT(15)
-#define LOONGSON_CFG1_LSLDR0	BIT(16)
-#define LOONGSON_CFG1_LSPREF	BIT(17)
-#define LOONGSON_CFG1_LSPREFX	BIT(18)
-#define LOONGSON_CFG1_LSSYNCI	BIT(19)
-#define LOONGSON_CFG1_LSUCA	BIT(20)
-#define LOONGSON_CFG1_LLSYNC	BIT(21)
-#define LOONGSON_CFG1_TGTSYNC	BIT(22)
-#define LOONGSON_CFG1_LLEXC	BIT(23)
-#define LOONGSON_CFG1_SCRAND	BIT(24)
-#define LOONGSON_CFG1_MUALP	BIT(25)
-#define LOONGSON_CFG1_KMUALEN	BIT(26)
-#define LOONGSON_CFG1_ITLBT	BIT(27)
-#define LOONGSON_CFG1_LSUPERF	BIT(28)
-#define LOONGSON_CFG1_SFBP	BIT(29)
-#define LOONGSON_CFG1_CDMAP	BIT(30)
-
-#define LOONGSON_CFG2 0x2
-#define LOONGSON_CFG2_LEXT1	BIT(0)
-#define LOONGSON_CFG2_LEXT2	BIT(1)
-#define LOONGSON_CFG2_LEXT3	BIT(2)
-#define LOONGSON_CFG2_LSPW	BIT(3)
-#define LOONGSON_CFG2_LBT1	BIT(4)
-#define LOONGSON_CFG2_LBT2	BIT(5)
-#define LOONGSON_CFG2_LBT3	BIT(6)
-#define LOONGSON_CFG2_LBTMMU	BIT(7)
-#define LOONGSON_CFG2_LPMP	BIT(8)
-#define LOONGSON_CFG2_LPMPREV	GENMASK(11, 9)
-#define LOONGSON_CFG2_LAMO	BIT(12)
-#define LOONGSON_CFG2_LPIXU	BIT(13)
-#define LOONGSON_CFG2_LPIXUN	BIT(14)
-#define LOONGSON_CFG2_LZVP	BIT(15)
-#define LOONGSON_CFG2_LZVREV	GENMASK(18, 16)
-#define LOONGSON_CFG2_LGFTP	BIT(19)
-#define LOONGSON_CFG2_LGFTPREV	GENMASK(22, 20)
-#define LOONGSON_CFG2_LLFTP	BIT(23)
-#define LOONGSON_CFG2_LLFTPREV	GENMASK(26, 24)
-#define LOONGSON_CFG2_LCSRP	BIT(27)
-#define LOONGSON_CFG2_LDISBLIKELY	BIT(28)
-
-#define LOONGSON_CFG3 0x3
-#define LOONGSON_CFG3_LCAMP	BIT(0)
-#define LOONGSON_CFG3_LCAMREV	GENMASK(3, 1)
-#define LOONGSON_CFG3_LCAMNUM	GENMASK(11, 4)
-#define LOONGSON_CFG3_LCAMKW	GENMASK(19, 12)
-#define LOONGSON_CFG3_LCAMVW	GENMASK(27, 20)
-
-#define LOONGSON_CFG4 0x4
-#define LOONGSON_CFG4_CCFREQ	GENMASK(31, 0)
-
-#define LOONGSON_CFG5 0x5
-#define LOONGSON_CFG5_CFM	GENMASK(15, 0)
-#define LOONGSON_CFG5_CFD	GENMASK(31, 16)
-
-#define LOONGSON_CFG6 0x6
-
-#define LOONGSON_CFG7 0x7
-#define LOONGSON_CFG7_GCCAEQRP	BIT(0)
-#define LOONGSON_CFG7_UCAWINP	BIT(1)
-
-static inline bool cpu_has_csr(void)
-{
-	if (cpu_has_cfg())
-		return (read_cpucfg(LOONGSON_CFG2) & LOONGSON_CFG2_LCSRP);
-
-	return false;
-}
-
-static inline u32 csr_readl(u32 reg)
-{
-	u32 __res;
-
-	/* RDCSR reg, val */
-	__asm__ __volatile__(
-		"parse_r __res,%0\n\t"
-		"parse_r reg,%1\n\t"
-		".insn \n\t"
-		".word (0xc8000118 | (reg << 21) | (__res << 11))\n\t"
-		:"=r"(__res)
-		:"r"(reg)
-		:
-		);
-	return __res;
-}
-
-static inline u64 csr_readq(u32 reg)
-{
-	u64 __res;
-
-	/* DWRCSR reg, val */
-	__asm__ __volatile__(
-		"parse_r __res,%0\n\t"
-		"parse_r reg,%1\n\t"
-		".insn \n\t"
-		".word (0xc8020118 | (reg << 21) | (__res << 11))\n\t"
-		:"=r"(__res)
-		:"r"(reg)
-		:
-		);
-	return __res;
-}
-
-static inline void csr_writel(u32 val, u32 reg)
-{
-	/* WRCSR reg, val */
-	__asm__ __volatile__(
-		"parse_r reg,%0\n\t"
-		"parse_r val,%1\n\t"
-		".insn \n\t"
-		".word (0xc8010118 | (reg << 21) | (val << 11))\n\t"
-		:
-		:"r"(reg),"r"(val)
-		:
-		);
-}
-
-static inline void csr_writeq(u64 val, u32 reg)
-{
-	/* DWRCSR reg, val */
-	__asm__ __volatile__(
-		"parse_r reg,%0\n\t"
-		"parse_r val,%1\n\t"
-		".insn \n\t"
-		".word (0xc8030118 | (reg << 21) | (val << 11))\n\t"
-		:
-		:"r"(reg),"r"(val)
-		:
-		);
-}
-
-/* Public CSR Register can also be accessed with regular addresses */
-#define CSR_PUBLIC_MMIO_BASE 0x1fe00000
-
-#define MMIO_CSR(x)		(void *)TO_UNCAC(CSR_PUBLIC_MMIO_BASE + x)
-
-#define LOONGSON_CSR_FEATURES	0x8
-#define LOONGSON_CSRF_TEMP	BIT(0)
-#define LOONGSON_CSRF_NODECNT	BIT(1)
-#define LOONGSON_CSRF_MSI	BIT(2)
-#define LOONGSON_CSRF_EXTIOI	BIT(3)
-#define LOONGSON_CSRF_IPI	BIT(4)
-#define LOONGSON_CSRF_FREQ	BIT(5)
-
-#define LOONGSON_CSR_VENDOR	0x10 /* Vendor name string, should be "Loongson" */
-#define LOONGSON_CSR_CPUNAME	0x20 /* Processor name string */
-#define LOONGSON_CSR_NODECNT	0x408
-#define LOONGSON_CSR_CPUTEMP	0x428
-
-/* PerCore CSR, only accessable by local cores */
-#define LOONGSON_CSR_IPI_STATUS	0x1000
-#define LOONGSON_CSR_IPI_EN	0x1004
-#define LOONGSON_CSR_IPI_SET	0x1008
-#define LOONGSON_CSR_IPI_CLEAR	0x100c
-#define LOONGSON_CSR_IPI_SEND	0x1040
-#define CSR_IPI_SEND_IP_SHIFT	0
-#define CSR_IPI_SEND_CPU_SHIFT	16
-#define CSR_IPI_SEND_BLOCK	BIT(31)
-
-static inline u64 drdtime(void)
-{
-	int rID = 0;
-	u64 val = 0;
-
-	__asm__ __volatile__(
-		"parse_r rID,%0\n\t"
-		"parse_r val,%1\n\t"
-		".insn \n\t"
-		".word (0xc8090118 | (rID << 21) | (val << 11))\n\t"
-		:"=r"(rID),"=r"(val)
-		:
-		);
-	return val;
-}
-
-#endif
diff --git a/arch/mips/include/asm/mach-loongson2ef/machine.h b/arch/mips/include/asm/mach-loongson2ef/machine.h
index 8ef7ea94a26d..4097267ef186 100644
--- a/arch/mips/include/asm/mach-loongson2ef/machine.h
+++ b/arch/mips/include/asm/mach-loongson2ef/machine.h
@@ -4,8 +4,8 @@
  * Author: Wu Zhangjin <wuzhangjin@gmail.com>
  */
 
-#ifndef __ASM_MACH_LOONGSON64_MACHINE_H
-#define __ASM_MACH_LOONGSON64_MACHINE_H
+#ifndef __ASM_MACH_LOONGSON2EF_MACHINE_H
+#define __ASM_MACH_LOONGSON2EF_MACHINE_H
 
 #ifdef CONFIG_LEMOTE_FULOONG2E
 
@@ -20,10 +20,4 @@
 
 #endif
 
-#ifdef CONFIG_LOONGSON_MACH3X
-
-#define LOONGSON_MACHTYPE MACH_LOONGSON_GENERIC
-
-#endif /* CONFIG_LOONGSON_MACH3X */
-
-#endif /* __ASM_MACH_LOONGSON64_MACHINE_H */
+#endif /* __ASM_MACH_LOONGSON2EF_MACHINE_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
index ebdccfee50be..00d602629a55 100644
--- a/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
+++ b/arch/mips/include/asm/mach-loongson2ef/mc146818rtc.h
@@ -7,8 +7,8 @@
  *
  * RTC routines for PC style attached Dallas chip.
  */
-#ifndef __ASM_MACH_LOONGSON64_MC146818RTC_H
-#define __ASM_MACH_LOONGSON64_MC146818RTC_H
+#ifndef __ASM_MACH_LOONGSON2EF_MC146818RTC_H
+#define __ASM_MACH_LOONGSON2EF_MC146818RTC_H
 
 #include <linux/io.h>
 
@@ -33,4 +33,4 @@ static inline void CMOS_WRITE(unsigned char data, unsigned long addr)
 #define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970)
 #endif
 
-#endif /* __ASM_MACH_LOONGSON64_MC146818RTC_H */
+#endif /* __ASM_MACH_LOONGSON2EF_MC146818RTC_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/mem.h b/arch/mips/include/asm/mach-loongson2ef/mem.h
index ce33c174c04d..d1d759b8974e 100644
--- a/arch/mips/include/asm/mach-loongson2ef/mem.h
+++ b/arch/mips/include/asm/mach-loongson2ef/mem.h
@@ -4,8 +4,8 @@
  * Author: Wu Zhangjin <wuzhangjin@gmail.com>
  */
 
-#ifndef __ASM_MACH_LOONGSON64_MEM_H
-#define __ASM_MACH_LOONGSON64_MEM_H
+#ifndef __ASM_MACH_LOONGSON2EF_MEM_H
+#define __ASM_MACH_LOONGSON2EF_MEM_H
 
 /*
  * high memory space
@@ -34,4 +34,4 @@
 #define LOONGSON_MMIO_MEM_END	0x80000000
 #endif
 
-#endif /* __ASM_MACH_LOONGSON64_MEM_H */
+#endif /* __ASM_MACH_LOONGSON2EF_MEM_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/mmzone.h b/arch/mips/include/asm/mach-loongson2ef/mmzone.h
deleted file mode 100644
index 62073d60739f..000000000000
--- a/arch/mips/include/asm/mach-loongson2ef/mmzone.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2010 Loongson Inc. & Lemote Inc. &
- *                    Institute of Computing Technology
- * Author:  Xiang Gao, gaoxiang@ict.ac.cn
- *          Huacai Chen, chenhc@lemote.com
- *          Xiaofu Meng, Shuangshuang Zhang
- */
-#ifndef _ASM_MACH_MMZONE_H
-#define _ASM_MACH_MMZONE_H
-
-#include <boot_param.h>
-#define NODE_ADDRSPACE_SHIFT 44
-#define NODE0_ADDRSPACE_OFFSET 0x000000000000UL
-#define NODE1_ADDRSPACE_OFFSET 0x100000000000UL
-#define NODE2_ADDRSPACE_OFFSET 0x200000000000UL
-#define NODE3_ADDRSPACE_OFFSET 0x300000000000UL
-
-#define pa_to_nid(addr)  (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
-#define nid_to_addrbase(nid) ((nid) << NODE_ADDRSPACE_SHIFT)
-
-#define LEVELS_PER_SLICE 128
-
-struct slice_data {
-	unsigned long irq_enable_mask[2];
-	int level_to_irq[LEVELS_PER_SLICE];
-};
-
-struct hub_data {
-	cpumask_t	h_cpus;
-	unsigned long slice_map;
-	unsigned long irq_alloc_mask[2];
-	struct slice_data slice[2];
-};
-
-struct node_data {
-	struct pglist_data pglist;
-	struct hub_data hub;
-	cpumask_t cpumask;
-};
-
-extern struct node_data *__node_data[];
-
-#define NODE_DATA(n)		(&__node_data[(n)]->pglist)
-#define hub_data(n)		(&__node_data[(n)]->hub)
-
-extern void setup_zero_pages(void);
-extern void __init prom_init_numa_memory(void);
-
-#endif /* _ASM_MACH_MMZONE_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/pci.h b/arch/mips/include/asm/mach-loongson2ef/pci.h
index 05cc9052772f..5588c5bc5395 100644
--- a/arch/mips/include/asm/mach-loongson2ef/pci.h
+++ b/arch/mips/include/asm/mach-loongson2ef/pci.h
@@ -4,8 +4,8 @@
  * Copyright (c) 2009 Wu Zhangjin <wuzhangjin@gmail.com>
  */
 
-#ifndef __ASM_MACH_LOONGSON64_PCI_H_
-#define __ASM_MACH_LOONGSON64_PCI_H_
+#ifndef __ASM_MACH_LOONGSON2EF_PCI_H_
+#define __ASM_MACH_LOONGSON2EF_PCI_H_
 
 extern struct pci_ops loongson_pci_ops;
 
@@ -35,16 +35,12 @@ extern struct pci_ops loongson_pci_ops;
 #else	/* loongson2f/32bit & loongson2e */
 
 /* this pci memory space is mapped by pcimap in pci.c */
-#ifdef CONFIG_CPU_LOONGSON64
-#define LOONGSON_PCI_MEM_START	0x40000000UL
-#define LOONGSON_PCI_MEM_END	0x7effffffUL
-#else
 #define LOONGSON_PCI_MEM_START	LOONGSON_PCILO1_BASE
 #define LOONGSON_PCI_MEM_END	(LOONGSON_PCILO1_BASE + 0x04000000 * 2)
-#endif
+
 /* this is an offset from mips_io_port_base */
 #define LOONGSON_PCI_IO_START	0x00004000UL
 
 #endif	/* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
 
-#endif /* !__ASM_MACH_LOONGSON64_PCI_H_ */
+#endif /* !__ASM_MACH_LOONGSON2EF_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-loongson2ef/spaces.h b/arch/mips/include/asm/mach-loongson2ef/spaces.h
index e85bc1d9c4f2..ba4e8e9b618e 100644
--- a/arch/mips/include/asm/mach-loongson2ef/spaces.h
+++ b/arch/mips/include/asm/mach-loongson2ef/spaces.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MACH_LOONGSON64_SPACES_H_
-#define __ASM_MACH_LOONGSON64_SPACES_H_
+#ifndef __ASM_MACH_LOONGSON2EF_SPACES_H_
+#define __ASM_MACH_LOONGSON2EF_SPACES_H_
 
 #if defined(CONFIG_64BIT)
 #define CAC_BASE        _AC(0x9800000000000000, UL)
diff --git a/arch/mips/include/asm/mach-loongson2ef/topology.h b/arch/mips/include/asm/mach-loongson2ef/topology.h
deleted file mode 100644
index 7ff819ab308a..000000000000
--- a/arch/mips/include/asm/mach-loongson2ef/topology.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_MACH_TOPOLOGY_H
-#define _ASM_MACH_TOPOLOGY_H
-
-#ifdef CONFIG_NUMA
-
-#define cpu_to_node(cpu)	(cpu_logical_map(cpu) >> 2)
-#define cpumask_of_node(node)	(&__node_data[(node)]->cpumask)
-
-struct pci_bus;
-extern int pcibus_to_node(struct pci_bus *);
-
-#define cpumask_of_pcibus(bus)	(cpu_online_mask)
-
-extern unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
-
-#define node_distance(from, to)	(__node_distances[(from)][(to)])
-
-#endif
-
-#include <asm-generic/topology.h>
-
-#endif /* _ASM_MACH_TOPOLOGY_H */
diff --git a/arch/mips/include/asm/mach-loongson2ef/workarounds.h b/arch/mips/include/asm/mach-loongson2ef/workarounds.h
deleted file mode 100644
index 17b71172a097..000000000000
--- a/arch/mips/include/asm/mach-loongson2ef/workarounds.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MACH_LOONGSON64_WORKAROUNDS_H_
-#define __ASM_MACH_LOONGSON64_WORKAROUNDS_H_
-
-#define WORKAROUND_CPUFREQ	0x00000001
-#define WORKAROUND_CPUHOTPLUG	0x00000002
-
-#endif
diff --git a/arch/mips/loongson2ef/Kconfig b/arch/mips/loongson2ef/Kconfig
index 007bd023a4e9..66a584a833e5 100644
--- a/arch/mips/loongson2ef/Kconfig
+++ b/arch/mips/loongson2ef/Kconfig
@@ -90,7 +90,4 @@ config LOONGSON_MC146818
 	bool
 	default n
 
-config LEFI_FIRMWARE_INTERFACE
-	bool
-
 endif # MACH_LOONGSON2EF
diff --git a/arch/mips/loongson2ef/common/early_printk.c b/arch/mips/loongson2ef/common/early_printk.c
index 5e2a151aa30c..d90c5e5a0e78 100644
--- a/arch/mips/loongson2ef/common/early_printk.c
+++ b/arch/mips/loongson2ef/common/early_printk.c
@@ -27,7 +27,7 @@ void prom_putchar(char c)
 	int timeout;
 	unsigned char *uart_base;
 
-	uart_base = (unsigned char *)_loongson_uart_base[0];
+	uart_base = (unsigned char *)_loongson_uart_base;
 	timeout = 1024;
 
 	while (((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) &&
diff --git a/arch/mips/loongson2ef/common/env.c b/arch/mips/loongson2ef/common/env.c
index 09d5cf4676ca..29c5fecb8282 100644
--- a/arch/mips/loongson2ef/common/env.c
+++ b/arch/mips/loongson2ef/common/env.c
@@ -16,17 +16,9 @@
 #include <linux/export.h>
 #include <asm/bootinfo.h>
 #include <loongson.h>
-#include <boot_param.h>
-#include <workarounds.h>
 
 u32 cpu_clock_freq;
 EXPORT_SYMBOL(cpu_clock_freq);
-struct efi_memory_map_loongson *loongson_memmap;
-struct loongson_system_configuration loongson_sysconf;
-
-u64 loongson_chipcfg[MAX_PACKAGES] = {0xffffffffbfc00180};
-u64 loongson_chiptemp[MAX_PACKAGES];
-u64 loongson_freqctrl[MAX_PACKAGES];
 
 unsigned long long smp_group[4];
 
@@ -42,8 +34,6 @@ void __init prom_init_env(void)
 {
 	/* pmon passes arguments in 32bit pointers */
 	unsigned int processor_id;
-
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
 	int *_prom_envp;
 	long l;
 
@@ -61,128 +51,8 @@ void __init prom_init_env(void)
 	if (memsize == 0)
 		memsize = 256;
 
-	loongson_sysconf.nr_uarts = 1;
-
 	pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize);
-#else
-	struct boot_params *boot_p;
-	struct loongson_params *loongson_p;
-	struct system_loongson *esys;
-	struct efi_cpuinfo_loongson *ecpu;
-	struct irq_source_routing_table *eirq_source;
-
-	/* firmware arguments are initialized in head.S */
-	boot_p = (struct boot_params *)fw_arg2;
-	loongson_p = &(boot_p->efi.smbios.lp);
-
-	esys = (struct system_loongson *)
-		((u64)loongson_p + loongson_p->system_offset);
-	ecpu = (struct efi_cpuinfo_loongson *)
-		((u64)loongson_p + loongson_p->cpu_offset);
-	eirq_source = (struct irq_source_routing_table *)
-		((u64)loongson_p + loongson_p->irq_offset);
-	loongson_memmap = (struct efi_memory_map_loongson *)
-		((u64)loongson_p + loongson_p->memory_offset);
-
-	cpu_clock_freq = ecpu->cpu_clock_freq;
-	loongson_sysconf.cputype = ecpu->cputype;
-	switch (ecpu->cputype) {
-	case Legacy_3A:
-	case Loongson_3A:
-		loongson_sysconf.cores_per_node = 4;
-		loongson_sysconf.cores_per_package = 4;
-		smp_group[0] = 0x900000003ff01000;
-		smp_group[1] = 0x900010003ff01000;
-		smp_group[2] = 0x900020003ff01000;
-		smp_group[3] = 0x900030003ff01000;
-		loongson_chipcfg[0] = 0x900000001fe00180;
-		loongson_chipcfg[1] = 0x900010001fe00180;
-		loongson_chipcfg[2] = 0x900020001fe00180;
-		loongson_chipcfg[3] = 0x900030001fe00180;
-		loongson_chiptemp[0] = 0x900000001fe0019c;
-		loongson_chiptemp[1] = 0x900010001fe0019c;
-		loongson_chiptemp[2] = 0x900020001fe0019c;
-		loongson_chiptemp[3] = 0x900030001fe0019c;
-		loongson_freqctrl[0] = 0x900000001fe001d0;
-		loongson_freqctrl[1] = 0x900010001fe001d0;
-		loongson_freqctrl[2] = 0x900020001fe001d0;
-		loongson_freqctrl[3] = 0x900030001fe001d0;
-		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
-		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
-		break;
-	case Legacy_3B:
-	case Loongson_3B:
-		loongson_sysconf.cores_per_node = 4; /* One chip has 2 nodes */
-		loongson_sysconf.cores_per_package = 8;
-		smp_group[0] = 0x900000003ff01000;
-		smp_group[1] = 0x900010003ff05000;
-		smp_group[2] = 0x900020003ff09000;
-		smp_group[3] = 0x900030003ff0d000;
-		loongson_chipcfg[0] = 0x900000001fe00180;
-		loongson_chipcfg[1] = 0x900020001fe00180;
-		loongson_chipcfg[2] = 0x900040001fe00180;
-		loongson_chipcfg[3] = 0x900060001fe00180;
-		loongson_chiptemp[0] = 0x900000001fe0019c;
-		loongson_chiptemp[1] = 0x900020001fe0019c;
-		loongson_chiptemp[2] = 0x900040001fe0019c;
-		loongson_chiptemp[3] = 0x900060001fe0019c;
-		loongson_freqctrl[0] = 0x900000001fe001d0;
-		loongson_freqctrl[1] = 0x900020001fe001d0;
-		loongson_freqctrl[2] = 0x900040001fe001d0;
-		loongson_freqctrl[3] = 0x900060001fe001d0;
-		loongson_sysconf.ht_control_base = 0x90001EFDFB000000;
-		loongson_sysconf.workarounds = WORKAROUND_CPUHOTPLUG;
-		break;
-	default:
-		loongson_sysconf.cores_per_node = 1;
-		loongson_sysconf.cores_per_package = 1;
-		loongson_chipcfg[0] = 0x900000001fe00180;
-	}
 
-	loongson_sysconf.nr_cpus = ecpu->nr_cpus;
-	loongson_sysconf.boot_cpu_id = ecpu->cpu_startup_core_id;
-	loongson_sysconf.reserved_cpus_mask = ecpu->reserved_cores_mask;
-	if (ecpu->nr_cpus > NR_CPUS || ecpu->nr_cpus == 0)
-		loongson_sysconf.nr_cpus = NR_CPUS;
-	loongson_sysconf.nr_nodes = (loongson_sysconf.nr_cpus +
-		loongson_sysconf.cores_per_node - 1) /
-		loongson_sysconf.cores_per_node;
-
-	loongson_sysconf.pci_mem_start_addr = eirq_source->pci_mem_start_addr;
-	loongson_sysconf.pci_mem_end_addr = eirq_source->pci_mem_end_addr;
-	loongson_sysconf.pci_io_base = eirq_source->pci_io_start_addr;
-	loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits;
-	if (loongson_sysconf.dma_mask_bits < 32 ||
-		loongson_sysconf.dma_mask_bits > 64)
-		loongson_sysconf.dma_mask_bits = 32;
-
-	loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm;
-	loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown;
-	loongson_sysconf.suspend_addr = boot_p->reset_system.DoSuspend;
-
-	loongson_sysconf.vgabios_addr = boot_p->efi.smbios.vga_bios;
-	pr_debug("Shutdown Addr: %llx, Restart Addr: %llx, VBIOS Addr: %llx\n",
-		loongson_sysconf.poweroff_addr, loongson_sysconf.restart_addr,
-		loongson_sysconf.vgabios_addr);
-
-	memset(loongson_sysconf.ecname, 0, 32);
-	if (esys->has_ec)
-		memcpy(loongson_sysconf.ecname, esys->ec_name, 32);
-	loongson_sysconf.workarounds |= esys->workarounds;
-
-	loongson_sysconf.nr_uarts = esys->nr_uarts;
-	if (esys->nr_uarts < 1 || esys->nr_uarts > MAX_UARTS)
-		loongson_sysconf.nr_uarts = 1;
-	memcpy(loongson_sysconf.uarts, esys->uarts,
-		sizeof(struct uart_device) * loongson_sysconf.nr_uarts);
-
-	loongson_sysconf.nr_sensors = esys->nr_sensors;
-	if (loongson_sysconf.nr_sensors > MAX_SENSORS)
-		loongson_sysconf.nr_sensors = 0;
-	if (loongson_sysconf.nr_sensors)
-		memcpy(loongson_sysconf.sensors, esys->sensors,
-			sizeof(struct sensor_device) * loongson_sysconf.nr_sensors);
-#endif
 	if (cpu_clock_freq == 0) {
 		processor_id = (&current_cpu_data)->processor_id;
 		switch (processor_id & PRID_REV_MASK) {
@@ -192,17 +62,6 @@ void __init prom_init_env(void)
 		case PRID_REV_LOONGSON2F:
 			cpu_clock_freq = 797000000;
 			break;
-		case PRID_REV_LOONGSON3A_R1:
-		case PRID_REV_LOONGSON3A_R2_0:
-		case PRID_REV_LOONGSON3A_R2_1:
-		case PRID_REV_LOONGSON3A_R3_0:
-		case PRID_REV_LOONGSON3A_R3_1:
-			cpu_clock_freq = 900000000;
-			break;
-		case PRID_REV_LOONGSON3B_R1:
-		case PRID_REV_LOONGSON3B_R2:
-			cpu_clock_freq = 1000000000;
-			break;
 		default:
 			cpu_clock_freq = 100000000;
 			break;
diff --git a/arch/mips/loongson2ef/common/init.c b/arch/mips/loongson2ef/common/init.c
index 912fe61c4fc7..a45430365729 100644
--- a/arch/mips/loongson2ef/common/init.c
+++ b/arch/mips/loongson2ef/common/init.c
@@ -38,12 +38,7 @@ void __init prom_init(void)
 	/* init base address of io space */
 	set_io_port_base((unsigned long)
 		ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
-
-#ifdef CONFIG_NUMA
-	prom_init_numa_memory();
-#else
 	prom_init_memory();
-#endif
 
 	/*init the uart base address */
 	prom_init_uart_base();
diff --git a/arch/mips/loongson2ef/common/machtype.c b/arch/mips/loongson2ef/common/machtype.c
index 4e42d929f1c7..82f6de49f20f 100644
--- a/arch/mips/loongson2ef/common/machtype.c
+++ b/arch/mips/loongson2ef/common/machtype.c
@@ -23,7 +23,6 @@ static const char *system_types[] = {
 	[MACH_DEXXON_GDIUM2F10]	= "dexxon-gdium-2f",
 	[MACH_LEMOTE_NAS]	= "lemote-nas-2f",
 	[MACH_LEMOTE_LL2F]	= "lemote-lynloong-2f",
-	[MACH_LOONGSON_GENERIC]	= "generic-loongson-machine",
 	[MACH_LOONGSON_END]	= NULL,
 };
 
diff --git a/arch/mips/loongson2ef/common/mem.c b/arch/mips/loongson2ef/common/mem.c
index 4254ac4ec616..11bf6eefb82a 100644
--- a/arch/mips/loongson2ef/common/mem.c
+++ b/arch/mips/loongson2ef/common/mem.c
@@ -9,11 +9,9 @@
 #include <asm/bootinfo.h>
 
 #include <loongson.h>
-#include <boot_param.h>
 #include <mem.h>
 #include <pci.h>
 
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
 
 u32 memsize, highmemsize;
 
@@ -52,41 +50,6 @@ void __init prom_init_memory(void)
 #endif /* !CONFIG_64BIT */
 }
 
-#else /* CONFIG_LEFI_FIRMWARE_INTERFACE */
-
-void __init prom_init_memory(void)
-{
-	int i;
-	u32 node_id;
-	u32 mem_type;
-
-	/* parse memory information */
-	for (i = 0; i < loongson_memmap->nr_map; i++) {
-		node_id = loongson_memmap->map[i].node_id;
-		mem_type = loongson_memmap->map[i].mem_type;
-
-		if (node_id != 0)
-			continue;
-
-		switch (mem_type) {
-		case SYSTEM_RAM_LOW:
-			memblock_add(loongson_memmap->map[i].mem_start,
-				(u64)loongson_memmap->map[i].mem_size << 20);
-			break;
-		case SYSTEM_RAM_HIGH:
-			memblock_add(loongson_memmap->map[i].mem_start,
-				(u64)loongson_memmap->map[i].mem_size << 20);
-			break;
-		case SYSTEM_RAM_RESERVED:
-			memblock_reserve(loongson_memmap->map[i].mem_start,
-				(u64)loongson_memmap->map[i].mem_size << 20);
-			break;
-		}
-	}
-}
-
-#endif /* CONFIG_LEFI_FIRMWARE_INTERFACE */
-
 /* override of arch/mips/mm/cache.c: __uncached_access */
 int __uncached_access(struct file *file, unsigned long addr)
 {
diff --git a/arch/mips/loongson2ef/common/pci.c b/arch/mips/loongson2ef/common/pci.c
index 2d9755c49524..200916925e95 100644
--- a/arch/mips/loongson2ef/common/pci.c
+++ b/arch/mips/loongson2ef/common/pci.c
@@ -7,7 +7,6 @@
 
 #include <pci.h>
 #include <loongson.h>
-#include <boot_param.h>
 
 static struct resource loongson_pci_mem_resource = {
 	.name	= "pci memory space",
@@ -81,15 +80,8 @@ static int __init pcibios_init(void)
 	setup_pcimap();
 
 	loongson_pci_controller.io_map_base = mips_io_port_base;
-#ifdef CONFIG_LEFI_FIRMWARE_INTERFACE
-	loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
-	loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
-#endif
 	register_pci_controller(&loongson_pci_controller);
 
-#ifdef CONFIG_CPU_LOONGSON64
-	sbx00_acpi_init();
-#endif
 
 	return 0;
 }
diff --git a/arch/mips/loongson2ef/common/pm.c b/arch/mips/loongson2ef/common/pm.c
index b8aed878d912..11f4cfd581fb 100644
--- a/arch/mips/loongson2ef/common/pm.c
+++ b/arch/mips/loongson2ef/common/pm.c
@@ -75,7 +75,7 @@ int __weak wakeup_loongson(void)
 static void wait_for_wakeup_events(void)
 {
 	while (!wakeup_loongson())
-		LOONGSON_CHIPCFG(0) &= ~0x7;
+		writel(readl(LOONGSON_CHIPCFG) & ~0x7, LOONGSON_CHIPCFG);
 }
 
 /*
@@ -98,15 +98,16 @@ static void loongson_suspend_enter(void)
 
 	stop_perf_counters();
 
-	cached_cpu_freq = LOONGSON_CHIPCFG(0);
+	cached_cpu_freq = readl(LOONGSON_CHIPCFG);
 
 	/* Put CPU into wait mode */
-	LOONGSON_CHIPCFG(0) &= ~0x7;
+	writel(readl(LOONGSON_CHIPCFG) & ~0x7, LOONGSON_CHIPCFG);
 
 	/* wait for the given events to wakeup cpu from wait mode */
 	wait_for_wakeup_events();
 
-	LOONGSON_CHIPCFG(0) = cached_cpu_freq;
+	writel(cached_cpu_freq, LOONGSON_CHIPCFG);
+
 	mmiowb();
 }
 
diff --git a/arch/mips/loongson2ef/common/reset.c b/arch/mips/loongson2ef/common/reset.c
index ce39e918e4d5..e7c87161ce00 100644
--- a/arch/mips/loongson2ef/common/reset.c
+++ b/arch/mips/loongson2ef/common/reset.c
@@ -13,7 +13,6 @@
 #include <asm/reboot.h>
 
 #include <loongson.h>
-#include <boot_param.h>
 
 static inline void loongson_reboot(void)
 {
@@ -35,26 +34,15 @@ static inline void loongson_reboot(void)
 
 static void loongson_restart(char *command)
 {
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
 	/* do preparation for reboot */
 	mach_prepare_reboot();
 
 	/* reboot via jumping to boot base address */
 	loongson_reboot();
-#else
-	void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr;
-
-	fw_restart();
-	while (1) {
-		if (cpu_wait)
-			cpu_wait();
-	}
-#endif
 }
 
 static void loongson_poweroff(void)
 {
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
 	mach_prepare_shutdown();
 
 	/*
@@ -62,15 +50,6 @@ static void loongson_poweroff(void)
 	 * a generic delay loop, machine_hang(), so simply return.
 	 */
 	return;
-#else
-	void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
-
-	fw_poweroff();
-	while (1) {
-		if (cpu_wait)
-			cpu_wait();
-	}
-#endif
 }
 
 static void loongson_halt(void)
diff --git a/arch/mips/loongson2ef/common/serial.c b/arch/mips/loongson2ef/common/serial.c
index 98c3a7feb10f..ac4f6e3ebc3e 100644
--- a/arch/mips/loongson2ef/common/serial.c
+++ b/arch/mips/loongson2ef/common/serial.c
@@ -38,16 +38,15 @@
 	.regshift	= 0,					\
 }
 
-static struct plat_serial8250_port uart8250_data[][MAX_UARTS + 1] = {
+static struct plat_serial8250_port uart8250_data[MACH_LOONGSON_END + 1] = {
 	[MACH_LOONGSON_UNKNOWN]	= {},
-	[MACH_LEMOTE_FL2E]	= {PORT(4, 1843200), {} },
-	[MACH_LEMOTE_FL2F]	= {PORT(3, 1843200), {} },
-	[MACH_LEMOTE_ML2F7]	= {PORT_M(3, 3686400), {} },
-	[MACH_LEMOTE_YL2F89]	= {PORT_M(3, 3686400), {} },
-	[MACH_DEXXON_GDIUM2F10]	= {PORT_M(3, 3686400), {} },
-	[MACH_LEMOTE_NAS]	= {PORT_M(3, 3686400), {} },
-	[MACH_LEMOTE_LL2F]	= {PORT(3, 1843200), {} },
-	[MACH_LOONGSON_GENERIC]	= {PORT_M(2, 25000000), {} },
+	[MACH_LEMOTE_FL2E]	= PORT(4, 1843200),
+	[MACH_LEMOTE_FL2F]	= PORT(3, 1843200),
+	[MACH_LEMOTE_ML2F7]	= PORT_M(3, 3686400),
+	[MACH_LEMOTE_YL2F89]	= PORT_M(3, 3686400),
+	[MACH_DEXXON_GDIUM2F10]	= PORT_M(3, 3686400),
+	[MACH_LEMOTE_NAS]	= PORT_M(3, 3686400),
+	[MACH_LEMOTE_LL2F]	= PORT(3, 1843200),
 	[MACH_LOONGSON_END]	= {},
 };
 
@@ -58,53 +57,23 @@ static struct platform_device uart8250_device = {
 
 static int __init serial_init(void)
 {
-	int i;
 	unsigned char iotype;
 
-	iotype = uart8250_data[mips_machtype][0].iotype;
+	iotype = uart8250_data[mips_machtype].iotype;
 
 	if (UPIO_MEM == iotype) {
-		uart8250_data[mips_machtype][0].mapbase =
-			loongson_uart_base[0];
-		uart8250_data[mips_machtype][0].membase =
-			(void __iomem *)_loongson_uart_base[0];
+		uart8250_data[mips_machtype].mapbase =
+			loongson_uart_base;
+		uart8250_data[mips_machtype].membase =
+			(void __iomem *)_loongson_uart_base;
 	}
 	else if (UPIO_PORT == iotype)
-		uart8250_data[mips_machtype][0].iobase =
-			loongson_uart_base[0] - LOONGSON_PCIIO_BASE;
+		uart8250_data[mips_machtype].iobase =
+			loongson_uart_base - LOONGSON_PCIIO_BASE;
 
-	if (loongson_sysconf.uarts[0].uartclk)
-		uart8250_data[mips_machtype][0].uartclk =
-			loongson_sysconf.uarts[0].uartclk;
-
-	for (i = 1; i < loongson_sysconf.nr_uarts; i++) {
-		iotype = loongson_sysconf.uarts[i].iotype;
-		uart8250_data[mips_machtype][i].iotype = iotype;
-		loongson_uart_base[i] = loongson_sysconf.uarts[i].uart_base;
-
-		if (UPIO_MEM == iotype) {
-			uart8250_data[mips_machtype][i].irq =
-				MIPS_CPU_IRQ_BASE + loongson_sysconf.uarts[i].int_offset;
-			uart8250_data[mips_machtype][i].mapbase =
-				loongson_uart_base[i];
-			uart8250_data[mips_machtype][i].membase =
-				ioremap_nocache(loongson_uart_base[i], 8);
-		} else if (UPIO_PORT == iotype) {
-			uart8250_data[mips_machtype][i].irq =
-				loongson_sysconf.uarts[i].int_offset;
-			uart8250_data[mips_machtype][i].iobase =
-				loongson_uart_base[i] - LOONGSON_PCIIO_BASE;
-		}
-
-		uart8250_data[mips_machtype][i].uartclk =
-			loongson_sysconf.uarts[i].uartclk;
-		uart8250_data[mips_machtype][i].flags =
-			UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
-	}
-
-	memset(&uart8250_data[mips_machtype][loongson_sysconf.nr_uarts],
-			0, sizeof(struct plat_serial8250_port));
-	uart8250_device.dev.platform_data = uart8250_data[mips_machtype];
+	memset(&uart8250_data[mips_machtype + 1], 0,
+			sizeof(struct plat_serial8250_port));
+	uart8250_device.dev.platform_data = &uart8250_data[mips_machtype];
 
 	return platform_device_register(&uart8250_device);
 }
diff --git a/arch/mips/loongson2ef/common/setup.c b/arch/mips/loongson2ef/common/setup.c
index bc2da4c140c4..4fd27f4f90ed 100644
--- a/arch/mips/loongson2ef/common/setup.c
+++ b/arch/mips/loongson2ef/common/setup.c
@@ -11,11 +11,6 @@
 
 #include <loongson.h>
 
-#ifdef CONFIG_VT
-#include <linux/console.h>
-#include <linux/screen_info.h>
-#endif
-
 static void wbflush_loongson(void)
 {
 	asm(".set\tpush\n\t"
@@ -32,20 +27,4 @@ EXPORT_SYMBOL(__wbflush);
 
 void __init plat_mem_setup(void)
 {
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
-	conswitchp = &vga_con;
-
-	screen_info = (struct screen_info) {
-		.orig_x			= 0,
-		.orig_y			= 25,
-		.orig_video_cols	= 80,
-		.orig_video_lines	= 25,
-		.orig_video_isVGA	= VIDEO_TYPE_VGAC,
-		.orig_video_points	= 16,
-	};
-#elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
-#endif
 }
diff --git a/arch/mips/loongson2ef/common/time.c b/arch/mips/loongson2ef/common/time.c
index e78760ce475b..585741af42a9 100644
--- a/arch/mips/loongson2ef/common/time.c
+++ b/arch/mips/loongson2ef/common/time.c
@@ -18,11 +18,7 @@ void __init plat_time_init(void)
 	/* setup mips r4k timer */
 	mips_hpt_frequency = cpu_clock_freq / 2;
 
-#ifdef CONFIG_RS780_HPET
-	setup_hpet_timer();
-#else
 	setup_mfgpt0_timer();
-#endif
 }
 
 void read_persistent_clock64(struct timespec64 *ts)
diff --git a/arch/mips/loongson2ef/common/uart_base.c b/arch/mips/loongson2ef/common/uart_base.c
index e88d937f10fe..bbfe1095a843 100644
--- a/arch/mips/loongson2ef/common/uart_base.c
+++ b/arch/mips/loongson2ef/common/uart_base.c
@@ -10,9 +10,9 @@
 #include <loongson.h>
 
 /* raw */
-unsigned long loongson_uart_base[MAX_UARTS] = {};
+unsigned long loongson_uart_base;
 /* ioremapped */
-unsigned long _loongson_uart_base[MAX_UARTS] = {};
+unsigned long _loongson_uart_base;
 
 EXPORT_SYMBOL(loongson_uart_base);
 EXPORT_SYMBOL(_loongson_uart_base);
@@ -20,16 +20,12 @@ EXPORT_SYMBOL(_loongson_uart_base);
 void prom_init_loongson_uart_base(void)
 {
 	switch (mips_machtype) {
-	case MACH_LOONGSON_GENERIC:
-		/* The CPU provided serial port (CPU) */
-		loongson_uart_base[0] = LOONGSON_REG_BASE + 0x1e0;
-		break;
 	case MACH_LEMOTE_FL2E:
-		loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x3f8;
+		loongson_uart_base = LOONGSON_PCIIO_BASE + 0x3f8;
 		break;
 	case MACH_LEMOTE_FL2F:
 	case MACH_LEMOTE_LL2F:
-		loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x2f8;
+		loongson_uart_base = LOONGSON_PCIIO_BASE + 0x2f8;
 		break;
 	case MACH_LEMOTE_ML2F7:
 	case MACH_LEMOTE_YL2F89:
@@ -37,10 +33,9 @@ void prom_init_loongson_uart_base(void)
 	case MACH_LEMOTE_NAS:
 	default:
 		/* The CPU provided serial port (LPC) */
-		loongson_uart_base[0] = LOONGSON_LIO1_BASE + 0x3f8;
+		loongson_uart_base = LOONGSON_LIO1_BASE + 0x3f8;
 		break;
 	}
 
-	_loongson_uart_base[0] =
-		(unsigned long)ioremap_nocache(loongson_uart_base[0], 8);
+	_loongson_uart_base = TO_UNCAC(loongson_uart_base);
 }
diff --git a/arch/mips/loongson2ef/lemote-2f/clock.c b/arch/mips/loongson2ef/lemote-2f/clock.c
index 1ced30e7aeef..414f282c8ab5 100644
--- a/arch/mips/loongson2ef/lemote-2f/clock.c
+++ b/arch/mips/loongson2ef/lemote-2f/clock.c
@@ -118,9 +118,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
 
 	clk->rate = rate;
 
-	regval = LOONGSON_CHIPCFG(0);
+	regval = readl(LOONGSON_CHIPCFG);
 	regval = (regval & ~0x7) | (pos->driver_data - 1);
-	LOONGSON_CHIPCFG(0) = regval;
+	writel(regval, LOONGSON_CHIPCFG);
 
 	return ret;
 }
diff --git a/arch/mips/loongson2ef/lemote-2f/reset.c b/arch/mips/loongson2ef/lemote-2f/reset.c
index 0db0934302ea..197dae4ffd23 100644
--- a/arch/mips/loongson2ef/lemote-2f/reset.c
+++ b/arch/mips/loongson2ef/lemote-2f/reset.c
@@ -24,7 +24,7 @@ static void reset_cpu(void)
 	 * reset cpu to full speed, this is needed when enabling cpu frequency
 	 * scalling
 	 */
-	LOONGSON_CHIPCFG(0) |= 0x7;
+	writel(readl(LOONGSON_CHIPCFG) | 0x7, LOONGSON_CHIPCFG);
 }
 
 /* reset support for fuloong2f */
-- 
cgit v1.2.3


From 1bdb7b76705a38936e9875950587ea91c9ec0a98 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Sun, 20 Oct 2019 23:01:35 +0800
Subject: MIPS: Loongson64: Cleanup unused code

Clean up legacy code after stripping out Loongson2ef code.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: paul.burton@mips.com
---
 arch/mips/include/asm/bootinfo.h                   |   3 +-
 .../asm/mach-loongson64/cpu-feature-overrides.h    |   2 -
 .../include/asm/mach-loongson64/cs5536/cs5536.h    | 306 ---------------------
 .../asm/mach-loongson64/cs5536/cs5536_mfgpt.h      |  36 ---
 .../asm/mach-loongson64/cs5536/cs5536_pci.h        | 153 -----------
 .../asm/mach-loongson64/cs5536/cs5536_vsm.h        |  32 ---
 arch/mips/include/asm/mach-loongson64/irq.h        |   4 -
 .../asm/mach-loongson64/kernel-entry-init.h        |   4 -
 arch/mips/include/asm/mach-loongson64/loongson.h   | 114 --------
 arch/mips/include/asm/mach-loongson64/machine.h    |  29 --
 arch/mips/include/asm/mach-loongson64/mem.h        |  37 ---
 arch/mips/include/asm/mach-loongson64/mmzone.h     |  29 +-
 arch/mips/include/asm/mach-loongson64/pci.h        |  31 ---
 arch/mips/include/asm/mach-loongson64/topology.h   |   4 +-
 arch/mips/loongson64/Kconfig                       |  10 +-
 arch/mips/loongson64/Platform                      |   2 +-
 arch/mips/loongson64/common/Makefile               |   9 +-
 arch/mips/loongson64/common/bonito-irq.c           |  49 ----
 arch/mips/loongson64/common/cmdline.c              |   2 -
 arch/mips/loongson64/common/early_printk.c         |  38 ---
 arch/mips/loongson64/common/env.c                  |  62 +----
 arch/mips/loongson64/common/init.c                 |  17 +-
 arch/mips/loongson64/common/irq.c                  |  63 -----
 arch/mips/loongson64/common/machtype.c             |  63 -----
 arch/mips/loongson64/common/mem.c                  | 157 -----------
 arch/mips/loongson64/common/pci.c                  |   5 +-
 arch/mips/loongson64/common/platform.c             |  27 --
 arch/mips/loongson64/common/pm.c                   |  53 ----
 arch/mips/loongson64/common/reset.c                |  30 --
 arch/mips/loongson64/common/serial.c               | 117 --------
 arch/mips/loongson64/common/setup.c                |  21 --
 arch/mips/loongson64/common/time.c                 |   3 -
 arch/mips/loongson64/common/uart_base.c            |  46 ----
 arch/mips/loongson64/loongson-3/irq.c              |   8 +-
 arch/mips/loongson64/loongson-3/numa.c             |  11 +-
 35 files changed, 34 insertions(+), 1543 deletions(-)
 delete mode 100644 arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h
 delete mode 100644 arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h
 delete mode 100644 arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h
 delete mode 100644 arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h
 delete mode 100644 arch/mips/include/asm/mach-loongson64/machine.h
 delete mode 100644 arch/mips/include/asm/mach-loongson64/mem.h
 delete mode 100644 arch/mips/loongson64/common/bonito-irq.c
 delete mode 100644 arch/mips/loongson64/common/early_printk.c
 delete mode 100644 arch/mips/loongson64/common/irq.c
 delete mode 100644 arch/mips/loongson64/common/machtype.c
 delete mode 100644 arch/mips/loongson64/common/mem.c
 delete mode 100644 arch/mips/loongson64/common/platform.c
 delete mode 100644 arch/mips/loongson64/common/serial.c
 delete mode 100644 arch/mips/loongson64/common/uart_base.c

(limited to 'arch')

diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index a9250f5c964f..d41a5057bc69 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -61,7 +61,7 @@
 /*
  * Valid machtype for Loongson family
  */
-enum loongson_machine_type {
+enum loongson2ef_machine_type {
 	MACH_LOONGSON_UNKNOWN,
 	MACH_LEMOTE_FL2E,
 	MACH_LEMOTE_FL2F,
@@ -70,7 +70,6 @@ enum loongson_machine_type {
 	MACH_DEXXON_GDIUM2F10,
 	MACH_LEMOTE_NAS,
 	MACH_LEMOTE_LL2F,
-	MACH_LOONGSON_GENERIC,
 	MACH_LOONGSON_END
 };
 
diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
index 83ad90d8005d..895607eb81ca 100644
--- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
@@ -44,10 +44,8 @@
 #define cpu_has_vtag_icache	0
 #define cpu_has_watch		1
 
-#ifdef CONFIG_CPU_LOONGSON64
 #define cpu_has_wsbh		1
 #define cpu_has_ic_fills_f_dc	1
 #define cpu_hwrena_impl_bits	0xc0000000
-#endif
 
 #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h
deleted file mode 100644
index 9795b3361532..000000000000
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h
+++ /dev/null
@@ -1,306 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * The header file of cs5536 south bridge.
- *
- * Copyright (C) 2007 Lemote, Inc.
- * Author : jlliu <liujl@lemote.com>
- */
-
-#ifndef _CS5536_H
-#define _CS5536_H
-
-#include <linux/types.h>
-
-extern void _rdmsr(u32 msr, u32 *hi, u32 *lo);
-extern void _wrmsr(u32 msr, u32 hi, u32 lo);
-
-/*
- * MSR module base
- */
-#define CS5536_SB_MSR_BASE	(0x00000000)
-#define CS5536_GLIU_MSR_BASE	(0x10000000)
-#define CS5536_ILLEGAL_MSR_BASE (0x20000000)
-#define CS5536_USB_MSR_BASE	(0x40000000)
-#define CS5536_IDE_MSR_BASE	(0x60000000)
-#define CS5536_DIVIL_MSR_BASE	(0x80000000)
-#define CS5536_ACC_MSR_BASE	(0xa0000000)
-#define CS5536_UNUSED_MSR_BASE	(0xc0000000)
-#define CS5536_GLCP_MSR_BASE	(0xe0000000)
-
-#define SB_MSR_REG(offset)	(CS5536_SB_MSR_BASE	| (offset))
-#define GLIU_MSR_REG(offset)	(CS5536_GLIU_MSR_BASE	| (offset))
-#define ILLEGAL_MSR_REG(offset) (CS5536_ILLEGAL_MSR_BASE | (offset))
-#define USB_MSR_REG(offset)	(CS5536_USB_MSR_BASE	| (offset))
-#define IDE_MSR_REG(offset)	(CS5536_IDE_MSR_BASE	| (offset))
-#define DIVIL_MSR_REG(offset)	(CS5536_DIVIL_MSR_BASE	| (offset))
-#define ACC_MSR_REG(offset)	(CS5536_ACC_MSR_BASE	| (offset))
-#define UNUSED_MSR_REG(offset)	(CS5536_UNUSED_MSR_BASE | (offset))
-#define GLCP_MSR_REG(offset)	(CS5536_GLCP_MSR_BASE	| (offset))
-
-/*
- * BAR SPACE OF VIRTUAL PCI :
- * range for pci probe use, length is the actual size.
- */
-/* IO space for all DIVIL modules */
-#define CS5536_IRQ_RANGE	0xffffffe0 /* USERD FOR PCI PROBE */
-#define CS5536_IRQ_LENGTH	0x20	/* THE REGS ACTUAL LENGTH */
-#define CS5536_SMB_RANGE	0xfffffff8
-#define CS5536_SMB_LENGTH	0x08
-#define CS5536_GPIO_RANGE	0xffffff00
-#define CS5536_GPIO_LENGTH	0x100
-#define CS5536_MFGPT_RANGE	0xffffffc0
-#define CS5536_MFGPT_LENGTH	0x40
-#define CS5536_ACPI_RANGE	0xffffffe0
-#define CS5536_ACPI_LENGTH	0x20
-#define CS5536_PMS_RANGE	0xffffff80
-#define CS5536_PMS_LENGTH	0x80
-/* IO space for IDE */
-#define CS5536_IDE_RANGE	0xfffffff0
-#define CS5536_IDE_LENGTH	0x10
-/* IO space for ACC */
-#define CS5536_ACC_RANGE	0xffffff80
-#define CS5536_ACC_LENGTH	0x80
-/* MEM space for ALL USB modules */
-#define CS5536_OHCI_RANGE	0xfffff000
-#define CS5536_OHCI_LENGTH	0x1000
-#define CS5536_EHCI_RANGE	0xfffff000
-#define CS5536_EHCI_LENGTH	0x1000
-
-/*
- * PCI MSR ACCESS
- */
-#define PCI_MSR_CTRL		0xF0
-#define PCI_MSR_ADDR		0xF4
-#define PCI_MSR_DATA_LO		0xF8
-#define PCI_MSR_DATA_HI		0xFC
-
-/**************** MSR *****************************/
-
-/*
- * GLIU STANDARD MSR
- */
-#define GLIU_CAP		0x00
-#define GLIU_CONFIG		0x01
-#define GLIU_SMI		0x02
-#define GLIU_ERROR		0x03
-#define GLIU_PM			0x04
-#define GLIU_DIAG		0x05
-
-/*
- * GLIU SPEC. MSR
- */
-#define GLIU_P2D_BM0		0x20
-#define GLIU_P2D_BM1		0x21
-#define GLIU_P2D_BM2		0x22
-#define GLIU_P2D_BMK0		0x23
-#define GLIU_P2D_BMK1		0x24
-#define GLIU_P2D_BM3		0x25
-#define GLIU_P2D_BM4		0x26
-#define GLIU_COH		0x80
-#define GLIU_PAE		0x81
-#define GLIU_ARB		0x82
-#define GLIU_ASMI		0x83
-#define GLIU_AERR		0x84
-#define GLIU_DEBUG		0x85
-#define GLIU_PHY_CAP		0x86
-#define GLIU_NOUT_RESP		0x87
-#define GLIU_NOUT_WDATA		0x88
-#define GLIU_WHOAMI		0x8B
-#define GLIU_SLV_DIS		0x8C
-#define GLIU_IOD_BM0		0xE0
-#define GLIU_IOD_BM1		0xE1
-#define GLIU_IOD_BM2		0xE2
-#define GLIU_IOD_BM3		0xE3
-#define GLIU_IOD_BM4		0xE4
-#define GLIU_IOD_BM5		0xE5
-#define GLIU_IOD_BM6		0xE6
-#define GLIU_IOD_BM7		0xE7
-#define GLIU_IOD_BM8		0xE8
-#define GLIU_IOD_BM9		0xE9
-#define GLIU_IOD_SC0		0xEA
-#define GLIU_IOD_SC1		0xEB
-#define GLIU_IOD_SC2		0xEC
-#define GLIU_IOD_SC3		0xED
-#define GLIU_IOD_SC4		0xEE
-#define GLIU_IOD_SC5		0xEF
-#define GLIU_IOD_SC6		0xF0
-#define GLIU_IOD_SC7		0xF1
-
-/*
- * SB STANDARD
- */
-#define SB_CAP		0x00
-#define SB_CONFIG	0x01
-#define SB_SMI		0x02
-#define SB_ERROR	0x03
-#define SB_MAR_ERR_EN		0x00000001
-#define SB_TAR_ERR_EN		0x00000002
-#define SB_RSVD_BIT1		0x00000004
-#define SB_EXCEP_ERR_EN		0x00000008
-#define SB_SYSE_ERR_EN		0x00000010
-#define SB_PARE_ERR_EN		0x00000020
-#define SB_TAS_ERR_EN		0x00000040
-#define SB_MAR_ERR_FLAG		0x00010000
-#define SB_TAR_ERR_FLAG		0x00020000
-#define SB_RSVD_BIT2		0x00040000
-#define SB_EXCEP_ERR_FLAG	0x00080000
-#define SB_SYSE_ERR_FLAG	0x00100000
-#define SB_PARE_ERR_FLAG	0x00200000
-#define SB_TAS_ERR_FLAG		0x00400000
-#define SB_PM		0x04
-#define SB_DIAG		0x05
-
-/*
- * SB SPEC.
- */
-#define SB_CTRL		0x10
-#define SB_R0		0x20
-#define SB_R1		0x21
-#define SB_R2		0x22
-#define SB_R3		0x23
-#define SB_R4		0x24
-#define SB_R5		0x25
-#define SB_R6		0x26
-#define SB_R7		0x27
-#define SB_R8		0x28
-#define SB_R9		0x29
-#define SB_R10		0x2A
-#define SB_R11		0x2B
-#define SB_R12		0x2C
-#define SB_R13		0x2D
-#define SB_R14		0x2E
-#define SB_R15		0x2F
-
-/*
- * GLCP STANDARD
- */
-#define GLCP_CAP		0x00
-#define GLCP_CONFIG		0x01
-#define GLCP_SMI		0x02
-#define GLCP_ERROR		0x03
-#define GLCP_PM			0x04
-#define GLCP_DIAG		0x05
-
-/*
- * GLCP SPEC.
- */
-#define GLCP_CLK_DIS_DELAY	0x08
-#define GLCP_PM_CLK_DISABLE	0x09
-#define GLCP_GLB_PM		0x0B
-#define GLCP_DBG_OUT		0x0C
-#define GLCP_RSVD1		0x0D
-#define GLCP_SOFT_COM		0x0E
-#define SOFT_BAR_SMB_FLAG	0x00000001
-#define SOFT_BAR_GPIO_FLAG	0x00000002
-#define SOFT_BAR_MFGPT_FLAG	0x00000004
-#define SOFT_BAR_IRQ_FLAG	0x00000008
-#define SOFT_BAR_PMS_FLAG	0x00000010
-#define SOFT_BAR_ACPI_FLAG	0x00000020
-#define SOFT_BAR_IDE_FLAG	0x00000400
-#define SOFT_BAR_ACC_FLAG	0x00000800
-#define SOFT_BAR_OHCI_FLAG	0x00001000
-#define SOFT_BAR_EHCI_FLAG	0x00002000
-#define GLCP_RSVD2		0x0F
-#define GLCP_CLK_OFF		0x10
-#define GLCP_CLK_ACTIVE		0x11
-#define GLCP_CLK_DISABLE	0x12
-#define GLCP_CLK4ACK		0x13
-#define GLCP_SYS_RST		0x14
-#define GLCP_RSVD3		0x15
-#define GLCP_DBG_CLK_CTRL	0x16
-#define GLCP_CHIP_REV_ID	0x17
-
-/* PIC */
-#define PIC_YSEL_LOW		0x20
-#define PIC_YSEL_LOW_USB_SHIFT		8
-#define PIC_YSEL_LOW_ACC_SHIFT		16
-#define PIC_YSEL_LOW_FLASH_SHIFT	24
-#define PIC_YSEL_HIGH		0x21
-#define PIC_ZSEL_LOW		0x22
-#define PIC_ZSEL_HIGH		0x23
-#define PIC_IRQM_PRIM		0x24
-#define PIC_IRQM_LPC		0x25
-#define PIC_XIRR_STS_LOW	0x26
-#define PIC_XIRR_STS_HIGH	0x27
-#define PCI_SHDW		0x34
-
-/*
- * DIVIL STANDARD
- */
-#define DIVIL_CAP		0x00
-#define DIVIL_CONFIG		0x01
-#define DIVIL_SMI		0x02
-#define DIVIL_ERROR		0x03
-#define DIVIL_PM		0x04
-#define DIVIL_DIAG		0x05
-
-/*
- * DIVIL SPEC.
- */
-#define DIVIL_LBAR_IRQ		0x08
-#define DIVIL_LBAR_KEL		0x09
-#define DIVIL_LBAR_SMB		0x0B
-#define DIVIL_LBAR_GPIO		0x0C
-#define DIVIL_LBAR_MFGPT	0x0D
-#define DIVIL_LBAR_ACPI		0x0E
-#define DIVIL_LBAR_PMS		0x0F
-#define DIVIL_LEG_IO		0x14
-#define DIVIL_BALL_OPTS		0x15
-#define DIVIL_SOFT_IRQ		0x16
-#define DIVIL_SOFT_RESET	0x17
-
-/* MFGPT */
-#define MFGPT_IRQ	0x28
-
-/*
- * IDE STANDARD
- */
-#define IDE_CAP		0x00
-#define IDE_CONFIG	0x01
-#define IDE_SMI		0x02
-#define IDE_ERROR	0x03
-#define IDE_PM		0x04
-#define IDE_DIAG	0x05
-
-/*
- * IDE SPEC.
- */
-#define IDE_IO_BAR	0x08
-#define IDE_CFG		0x10
-#define IDE_DTC		0x12
-#define IDE_CAST	0x13
-#define IDE_ETC		0x14
-#define IDE_INTERNAL_PM 0x15
-
-/*
- * ACC STANDARD
- */
-#define ACC_CAP		0x00
-#define ACC_CONFIG	0x01
-#define ACC_SMI		0x02
-#define ACC_ERROR	0x03
-#define ACC_PM		0x04
-#define ACC_DIAG	0x05
-
-/*
- * USB STANDARD
- */
-#define USB_CAP		0x00
-#define USB_CONFIG	0x01
-#define USB_SMI		0x02
-#define USB_ERROR	0x03
-#define USB_PM		0x04
-#define USB_DIAG	0x05
-
-/*
- * USB SPEC.
- */
-#define USB_OHCI	0x08
-#define USB_EHCI	0x09
-
-/****************** NATIVE ***************************/
-/* GPIO : I/O SPACE; REG : 32BITS */
-#define GPIOL_OUT_VAL		0x00
-#define GPIOL_OUT_EN		0x04
-
-#endif				/* _CS5536_H */
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h
deleted file mode 100644
index 52e8bb0fc04d..000000000000
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * cs5536 mfgpt header file
- */
-
-#ifndef _CS5536_MFGPT_H
-#define _CS5536_MFGPT_H
-
-#include <cs5536/cs5536.h>
-#include <cs5536/cs5536_pci.h>
-
-#ifdef CONFIG_CS5536_MFGPT
-extern void setup_mfgpt0_timer(void);
-extern void disable_mfgpt0_counter(void);
-extern void enable_mfgpt0_counter(void);
-#else
-static inline void __maybe_unused setup_mfgpt0_timer(void)
-{
-}
-static inline void __maybe_unused disable_mfgpt0_counter(void)
-{
-}
-static inline void __maybe_unused enable_mfgpt0_counter(void)
-{
-}
-#endif
-
-#define MFGPT_TICK_RATE 14318000
-#define COMPARE	 ((MFGPT_TICK_RATE + HZ/2) / HZ)
-
-#define MFGPT_BASE	mfgpt_base
-#define MFGPT0_CMP2	(MFGPT_BASE + 2)
-#define MFGPT0_CNT	(MFGPT_BASE + 4)
-#define MFGPT0_SETUP	(MFGPT_BASE + 6)
-
-#endif /*!_CS5536_MFGPT_H */
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h
deleted file mode 100644
index a0d4b752899e..000000000000
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * the definition file of cs5536 Virtual Support Module(VSM).
- * pci configuration space can be accessed through the VSM, so
- * there is no need of the MSR read/write now, except the spec.
- * MSR registers which are not implemented yet.
- *
- * Copyright (C) 2007 Lemote Inc.
- * Author : jlliu, liujl@lemote.com
- */
-
-#ifndef _CS5536_PCI_H
-#define _CS5536_PCI_H
-
-#include <linux/types.h>
-#include <linux/pci_regs.h>
-
-extern void cs5536_pci_conf_write4(int function, int reg, u32 value);
-extern u32 cs5536_pci_conf_read4(int function, int reg);
-
-#define CS5536_ACC_INTR		9
-#define CS5536_IDE_INTR		14
-#define CS5536_USB_INTR		11
-#define CS5536_MFGPT_INTR	5
-#define CS5536_UART1_INTR	4
-#define CS5536_UART2_INTR	3
-
-/************** PCI BUS DEVICE FUNCTION ***************/
-
-/*
- * PCI bus device function
- */
-#define PCI_BUS_CS5536		0
-#define PCI_IDSEL_CS5536	14
-
-/********** STANDARD PCI-2.2 EXPANSION ****************/
-
-/*
- * PCI configuration space
- * we have to virtualize the PCI configure space head, so we should
- * define the necessary IDs and some others.
- */
-
-/* CONFIG of PCI VENDOR ID*/
-#define CFG_PCI_VENDOR_ID(mod_dev_id, sys_vendor_id) \
-	(((mod_dev_id) << 16) | (sys_vendor_id))
-
-/* VENDOR ID */
-#define CS5536_VENDOR_ID	0x1022
-
-/* DEVICE ID */
-#define CS5536_ISA_DEVICE_ID		0x2090
-#define CS5536_IDE_DEVICE_ID		0x209a
-#define CS5536_ACC_DEVICE_ID		0x2093
-#define CS5536_OHCI_DEVICE_ID		0x2094
-#define CS5536_EHCI_DEVICE_ID		0x2095
-
-/* CLASS CODE : CLASS SUB-CLASS INTERFACE */
-#define CS5536_ISA_CLASS_CODE		0x060100
-#define CS5536_IDE_CLASS_CODE		0x010180
-#define CS5536_ACC_CLASS_CODE		0x040100
-#define CS5536_OHCI_CLASS_CODE		0x0C0310
-#define CS5536_EHCI_CLASS_CODE		0x0C0320
-
-/* BHLC : BIST HEADER-TYPE LATENCY-TIMER CACHE-LINE-SIZE */
-
-#define CFG_PCI_CACHE_LINE_SIZE(header_type, latency_timer)	\
-	((PCI_NONE_BIST << 24) | ((header_type) << 16) \
-		| ((latency_timer) << 8) | PCI_NORMAL_CACHE_LINE_SIZE);
-
-#define PCI_NONE_BIST			0x00	/* RO not implemented yet. */
-#define PCI_BRIDGE_HEADER_TYPE		0x80	/* RO */
-#define PCI_NORMAL_HEADER_TYPE		0x00
-#define PCI_NORMAL_LATENCY_TIMER	0x00
-#define PCI_NORMAL_CACHE_LINE_SIZE	0x08	/* RW */
-
-/* BAR */
-#define PCI_BAR0_REG			0x10
-#define PCI_BAR1_REG			0x14
-#define PCI_BAR2_REG			0x18
-#define PCI_BAR3_REG			0x1c
-#define PCI_BAR4_REG			0x20
-#define PCI_BAR5_REG			0x24
-#define PCI_BAR_RANGE_MASK		0xFFFFFFFF
-
-/* CARDBUS CIS POINTER */
-#define PCI_CARDBUS_CIS_POINTER		0x00000000
-
-/* SUBSYSTEM VENDOR ID	*/
-#define CS5536_SUB_VENDOR_ID		CS5536_VENDOR_ID
-
-/* SUBSYSTEM ID */
-#define CS5536_ISA_SUB_ID		CS5536_ISA_DEVICE_ID
-#define CS5536_IDE_SUB_ID		CS5536_IDE_DEVICE_ID
-#define CS5536_ACC_SUB_ID		CS5536_ACC_DEVICE_ID
-#define CS5536_OHCI_SUB_ID		CS5536_OHCI_DEVICE_ID
-#define CS5536_EHCI_SUB_ID		CS5536_EHCI_DEVICE_ID
-
-/* EXPANSION ROM BAR */
-#define PCI_EXPANSION_ROM_BAR		0x00000000
-
-/* CAPABILITIES POINTER */
-#define PCI_CAPLIST_POINTER		0x00000000
-#define PCI_CAPLIST_USB_POINTER		0x40
-/* INTERRUPT */
-
-#define CFG_PCI_INTERRUPT_LINE(pin, mod_intr) \
-	((PCI_MAX_LATENCY << 24) | (PCI_MIN_GRANT << 16) | \
-		((pin) << 8) | (mod_intr))
-
-#define PCI_MAX_LATENCY			0x40
-#define PCI_MIN_GRANT			0x00
-#define PCI_DEFAULT_PIN			0x01
-
-/*********** EXPANSION PCI REG ************************/
-
-/*
- * ISA EXPANSION
- */
-#define PCI_UART1_INT_REG	0x50
-#define PCI_UART2_INT_REG	0x54
-#define PCI_ISA_FIXUP_REG	0x58
-
-/*
- * IDE EXPANSION
- */
-#define PCI_IDE_CFG_REG		0x40
-#define CS5536_IDE_FLASH_SIGNATURE	0xDEADBEEF
-#define PCI_IDE_DTC_REG		0x48
-#define PCI_IDE_CAST_REG	0x4C
-#define PCI_IDE_ETC_REG		0x50
-#define PCI_IDE_PM_REG		0x54
-#define PCI_IDE_INT_REG		0x60
-
-/*
- * ACC EXPANSION
- */
-#define PCI_ACC_INT_REG		0x50
-
-/*
- * OHCI EXPANSION : INTTERUPT IS IMPLEMENTED BY THE OHCI
- */
-#define PCI_OHCI_PM_REG		0x40
-#define PCI_OHCI_INT_REG	0x50
-
-/*
- * EHCI EXPANSION
- */
-#define PCI_EHCI_LEGSMIEN_REG	0x50
-#define PCI_EHCI_LEGSMISTS_REG	0x54
-#define PCI_EHCI_FLADJ_REG	0x60
-
-#endif				/* _CS5536_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h
deleted file mode 100644
index 70d0153cccc3..000000000000
--- a/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * the read/write interfaces for Virtual Support Module(VSM)
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin <wuzhangjin@gmail.com>
- */
-
-#ifndef _CS5536_VSM_H
-#define _CS5536_VSM_H
-
-#include <linux/types.h>
-
-typedef void (*cs5536_pci_vsm_write)(int reg, u32 value);
-typedef u32 (*cs5536_pci_vsm_read)(int reg);
-
-#define DECLARE_CS5536_MODULE(name) \
-extern void pci_##name##_write_reg(int reg, u32 value); \
-extern u32 pci_##name##_read_reg(int reg);
-
-/* ide module */
-DECLARE_CS5536_MODULE(ide)
-/* acc module */
-DECLARE_CS5536_MODULE(acc)
-/* ohci module */
-DECLARE_CS5536_MODULE(ohci)
-/* isa module */
-DECLARE_CS5536_MODULE(isa)
-/* ehci module */
-DECLARE_CS5536_MODULE(ehci)
-
-#endif				/* _CS5536_VSM_H */
diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h
index 557e069c400c..73a89913dc38 100644
--- a/arch/mips/include/asm/mach-loongson64/irq.h
+++ b/arch/mips/include/asm/mach-loongson64/irq.h
@@ -4,8 +4,6 @@
 
 #include <boot_param.h>
 
-#ifdef CONFIG_CPU_LOONGSON64
-
 /* cpu core interrupt numbers */
 #define MIPS_CPU_IRQ_BASE 56
 
@@ -35,8 +33,6 @@
 
 #define LOONGSON_INT_COREx_INTy(x, y)	(1<<(x) | 1<<(y+4))	/* route to int y of core x */
 
-#endif
-
 extern void fixup_irqs(void);
 extern void loongson3_ipi_interrupt(struct pt_regs *regs);
 
diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
index 28ccb06c8289..87a5bfbf8cfe 100644
--- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
@@ -17,7 +17,6 @@
  * Override macros used in arch/mips/kernel/head.S.
  */
 	.macro	kernel_entry_setup
-#ifdef CONFIG_CPU_LOONGSON64
 	.set	push
 	.set	mips64
 	/* Set LPA on LOONGSON3 config3 */
@@ -47,14 +46,12 @@
 2:
 	_ehb
 	.set	pop
-#endif
 	.endm
 
 /*
  * Do SMP slave processor setup.
  */
 	.macro	smp_slave_setup
-#ifdef CONFIG_CPU_LOONGSON64
 	.set	push
 	.set	mips64
 	/* Set LPA on LOONGSON3 config3 */
@@ -84,7 +81,6 @@
 2:
 	_ehb
 	.set	pop
-#endif
 	.endm
 
 #endif /* __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H */
diff --git a/arch/mips/include/asm/mach-loongson64/loongson.h b/arch/mips/include/asm/mach-loongson64/loongson.h
index 40a24b76b874..bc00c2d88225 100644
--- a/arch/mips/include/asm/mach-loongson64/loongson.h
+++ b/arch/mips/include/asm/mach-loongson64/loongson.h
@@ -12,8 +12,6 @@
 #include <linux/irq.h>
 #include <boot_param.h>
 
-/* loongson internal northbridge initialization */
-extern void bonito_irq_init(void);
 
 /* machine-specific reboot/halt operation */
 extern void mach_prepare_reboot(void);
@@ -27,24 +25,9 @@ extern const struct plat_smp_ops loongson3_smp_ops;
 /* loongson-specific command line, env and memory initialization */
 extern void __init prom_init_memory(void);
 extern void __init prom_init_cmdline(void);
-extern void __init prom_init_machtype(void);
 extern void __init prom_init_env(void);
-#ifdef CONFIG_LOONGSON_UART_BASE
-extern unsigned long _loongson_uart_base[], loongson_uart_base[];
-extern void prom_init_loongson_uart_base(void);
-#endif
-
-static inline void prom_init_uart_base(void)
-{
-#ifdef CONFIG_LOONGSON_UART_BASE
-	prom_init_loongson_uart_base();
-#endif
-}
 
 /* irq operation functions */
-extern void bonito_irqdispatch(void);
-extern void __init bonito_irq_init(void);
-extern void __init mach_init_irq(void);
 extern void mach_irq_dispatch(unsigned int pending);
 extern int mach_i8259_irq(void);
 
@@ -64,17 +47,6 @@ extern int mach_i8259_irq(void);
 #define LOONGSON3_REG32(base, x) \
 	(*(volatile u32 *)((char *)TO_UNCAC(base) + (x)))
 
-#define LOONGSON_IRQ_BASE	32
-#define LOONGSON2_PERFCNT_IRQ	(MIPS_CPU_IRQ_BASE + 6) /* cpu perf counter */
-
-#include <linux/interrupt.h>
-static inline void do_perfcnt_IRQ(void)
-{
-#if IS_ENABLED(CONFIG_OPROFILE)
-	do_IRQ(LOONGSON2_PERFCNT_IRQ);
-#endif
-}
-
 #define LOONGSON_FLASH_BASE	0x1c000000
 #define LOONGSON_FLASH_SIZE	0x02000000	/* 32M */
 #define LOONGSON_FLASH_TOP	(LOONGSON_FLASH_BASE+LOONGSON_FLASH_SIZE-1)
@@ -109,11 +81,7 @@ static inline void do_perfcnt_IRQ(void)
 #define LOONGSON_PCICFG_SIZE	0x00000800	/* 2K */
 #define LOONGSON_PCICFG_TOP	(LOONGSON_PCICFG_BASE+LOONGSON_PCICFG_SIZE-1)
 
-#ifdef CONFIG_CPU_LOONGSON64
 #define LOONGSON_PCIIO_BASE	loongson_sysconf.pci_io_base
-#else
-#define LOONGSON_PCIIO_BASE	0x1fd00000
-#endif
 
 #define LOONGSON_PCIIO_SIZE	0x00100000	/* 1M */
 #define LOONGSON_PCIIO_TOP	(LOONGSON_PCIIO_BASE+LOONGSON_PCIIO_SIZE-1)
@@ -270,86 +238,4 @@ extern u64 loongson_freqctrl[MAX_PACKAGES];
 #define LOONGSON_PCIMAP_WIN(WIN, ADDR)	\
 	((((ADDR)>>26) & LOONGSON_PCIMAP_PCIMAP_LO0) << ((WIN)*6))
 
-#ifdef CONFIG_CPU_SUPPORTS_CPUFREQ
-#include <linux/cpufreq.h>
-extern struct cpufreq_frequency_table loongson2_clockmod_table[];
-#endif
-
-/*
- * address windows configuration module
- *
- * loongson2e do not have this module
- */
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-
-/* address window config module base address */
-#define LOONGSON_ADDRWINCFG_BASE		0x3ff00000ul
-#define LOONGSON_ADDRWINCFG_SIZE		0x180
-
-extern unsigned long _loongson_addrwincfg_base;
-#define LOONGSON_ADDRWINCFG(offset) \
-	(*(volatile u64 *)(_loongson_addrwincfg_base + (offset)))
-
-#define CPU_WIN0_BASE	LOONGSON_ADDRWINCFG(0x00)
-#define CPU_WIN1_BASE	LOONGSON_ADDRWINCFG(0x08)
-#define CPU_WIN2_BASE	LOONGSON_ADDRWINCFG(0x10)
-#define CPU_WIN3_BASE	LOONGSON_ADDRWINCFG(0x18)
-
-#define CPU_WIN0_MASK	LOONGSON_ADDRWINCFG(0x20)
-#define CPU_WIN1_MASK	LOONGSON_ADDRWINCFG(0x28)
-#define CPU_WIN2_MASK	LOONGSON_ADDRWINCFG(0x30)
-#define CPU_WIN3_MASK	LOONGSON_ADDRWINCFG(0x38)
-
-#define CPU_WIN0_MMAP	LOONGSON_ADDRWINCFG(0x40)
-#define CPU_WIN1_MMAP	LOONGSON_ADDRWINCFG(0x48)
-#define CPU_WIN2_MMAP	LOONGSON_ADDRWINCFG(0x50)
-#define CPU_WIN3_MMAP	LOONGSON_ADDRWINCFG(0x58)
-
-#define PCIDMA_WIN0_BASE	LOONGSON_ADDRWINCFG(0x60)
-#define PCIDMA_WIN1_BASE	LOONGSON_ADDRWINCFG(0x68)
-#define PCIDMA_WIN2_BASE	LOONGSON_ADDRWINCFG(0x70)
-#define PCIDMA_WIN3_BASE	LOONGSON_ADDRWINCFG(0x78)
-
-#define PCIDMA_WIN0_MASK	LOONGSON_ADDRWINCFG(0x80)
-#define PCIDMA_WIN1_MASK	LOONGSON_ADDRWINCFG(0x88)
-#define PCIDMA_WIN2_MASK	LOONGSON_ADDRWINCFG(0x90)
-#define PCIDMA_WIN3_MASK	LOONGSON_ADDRWINCFG(0x98)
-
-#define PCIDMA_WIN0_MMAP	LOONGSON_ADDRWINCFG(0xa0)
-#define PCIDMA_WIN1_MMAP	LOONGSON_ADDRWINCFG(0xa8)
-#define PCIDMA_WIN2_MMAP	LOONGSON_ADDRWINCFG(0xb0)
-#define PCIDMA_WIN3_MMAP	LOONGSON_ADDRWINCFG(0xb8)
-
-#define ADDRWIN_WIN0	0
-#define ADDRWIN_WIN1	1
-#define ADDRWIN_WIN2	2
-#define ADDRWIN_WIN3	3
-
-#define ADDRWIN_MAP_DST_DDR	0
-#define ADDRWIN_MAP_DST_PCI	1
-#define ADDRWIN_MAP_DST_LIO	1
-
-/*
- * s: CPU, PCIDMA
- * d: DDR, PCI, LIO
- * win: 0, 1, 2, 3
- * src: map source
- * dst: map destination
- * size: ~mask + 1
- */
-#define LOONGSON_ADDRWIN_CFG(s, d, w, src, dst, size) do {\
-	s##_WIN##w##_BASE = (src); \
-	s##_WIN##w##_MMAP = (dst) | ADDRWIN_MAP_DST_##d; \
-	s##_WIN##w##_MASK = ~(size-1); \
-} while (0)
-
-#define LOONGSON_ADDRWIN_CPUTOPCI(win, src, dst, size) \
-	LOONGSON_ADDRWIN_CFG(CPU, PCI, win, src, dst, size)
-#define LOONGSON_ADDRWIN_CPUTODDR(win, src, dst, size) \
-	LOONGSON_ADDRWIN_CFG(CPU, DDR, win, src, dst, size)
-#define LOONGSON_ADDRWIN_PCITODDR(win, src, dst, size) \
-	LOONGSON_ADDRWIN_CFG(PCIDMA, DDR, win, src, dst, size)
-
-#endif	/* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */
-
 #endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */
diff --git a/arch/mips/include/asm/mach-loongson64/machine.h b/arch/mips/include/asm/mach-loongson64/machine.h
deleted file mode 100644
index 8ef7ea94a26d..000000000000
--- a/arch/mips/include/asm/mach-loongson64/machine.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin <wuzhangjin@gmail.com>
- */
-
-#ifndef __ASM_MACH_LOONGSON64_MACHINE_H
-#define __ASM_MACH_LOONGSON64_MACHINE_H
-
-#ifdef CONFIG_LEMOTE_FULOONG2E
-
-#define LOONGSON_MACHTYPE MACH_LEMOTE_FL2E
-
-#endif
-
-/* use fuloong2f as the default machine of LEMOTE_MACH2F */
-#ifdef CONFIG_LEMOTE_MACH2F
-
-#define LOONGSON_MACHTYPE MACH_LEMOTE_FL2F
-
-#endif
-
-#ifdef CONFIG_LOONGSON_MACH3X
-
-#define LOONGSON_MACHTYPE MACH_LOONGSON_GENERIC
-
-#endif /* CONFIG_LOONGSON_MACH3X */
-
-#endif /* __ASM_MACH_LOONGSON64_MACHINE_H */
diff --git a/arch/mips/include/asm/mach-loongson64/mem.h b/arch/mips/include/asm/mach-loongson64/mem.h
deleted file mode 100644
index ce33c174c04d..000000000000
--- a/arch/mips/include/asm/mach-loongson64/mem.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Wu Zhangjin <wuzhangjin@gmail.com>
- */
-
-#ifndef __ASM_MACH_LOONGSON64_MEM_H
-#define __ASM_MACH_LOONGSON64_MEM_H
-
-/*
- * high memory space
- *
- * in loongson2e, starts from 512M
- * in loongson2f, starts from 2G 256M
- */
-#ifdef CONFIG_CPU_LOONGSON2E
-#define LOONGSON_HIGHMEM_START	0x20000000
-#else
-#define LOONGSON_HIGHMEM_START	0x90000000
-#endif
-
-/*
- * the peripheral registers(MMIO):
- *
- * On the Lemote Loongson 2e system, reside between 0x1000:0000 and 0x2000:0000.
- * On the Lemote Loongson 2f system, reside between 0x1000:0000 and 0x8000:0000.
- */
-
-#define LOONGSON_MMIO_MEM_START 0x10000000
-
-#ifdef CONFIG_CPU_LOONGSON2E
-#define LOONGSON_MMIO_MEM_END	0x20000000
-#else
-#define LOONGSON_MMIO_MEM_END	0x80000000
-#endif
-
-#endif /* __ASM_MACH_LOONGSON64_MEM_H */
diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h
index 62073d60739f..3a25dbd3b3e9 100644
--- a/arch/mips/include/asm/mach-loongson64/mmzone.h
+++ b/arch/mips/include/asm/mach-loongson64/mmzone.h
@@ -6,8 +6,8 @@
  *          Huacai Chen, chenhc@lemote.com
  *          Xiaofu Meng, Shuangshuang Zhang
  */
-#ifndef _ASM_MACH_MMZONE_H
-#define _ASM_MACH_MMZONE_H
+#ifndef _ASM_MACH_LOONGSON64_MMZONE_H
+#define _ASM_MACH_LOONGSON64_MMZONE_H
 
 #include <boot_param.h>
 #define NODE_ADDRSPACE_SHIFT 44
@@ -19,30 +19,9 @@
 #define pa_to_nid(addr)  (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
 #define nid_to_addrbase(nid) ((nid) << NODE_ADDRSPACE_SHIFT)
 
-#define LEVELS_PER_SLICE 128
+extern struct pglist_data *__node_data[];
 
-struct slice_data {
-	unsigned long irq_enable_mask[2];
-	int level_to_irq[LEVELS_PER_SLICE];
-};
-
-struct hub_data {
-	cpumask_t	h_cpus;
-	unsigned long slice_map;
-	unsigned long irq_alloc_mask[2];
-	struct slice_data slice[2];
-};
-
-struct node_data {
-	struct pglist_data pglist;
-	struct hub_data hub;
-	cpumask_t cpumask;
-};
-
-extern struct node_data *__node_data[];
-
-#define NODE_DATA(n)		(&__node_data[(n)]->pglist)
-#define hub_data(n)		(&__node_data[(n)]->hub)
+#define NODE_DATA(n)		(__node_data[n])
 
 extern void setup_zero_pages(void);
 extern void __init prom_init_numa_memory(void);
diff --git a/arch/mips/include/asm/mach-loongson64/pci.h b/arch/mips/include/asm/mach-loongson64/pci.h
index 05cc9052772f..8b59d64a23e8 100644
--- a/arch/mips/include/asm/mach-loongson64/pci.h
+++ b/arch/mips/include/asm/mach-loongson64/pci.h
@@ -12,39 +12,8 @@ extern struct pci_ops loongson_pci_ops;
 /* this is an offset from mips_io_port_base */
 #define LOONGSON_PCI_IO_START	0x00004000UL
 
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-
-/*
- * we use address window2 to map cpu address space to pci space
- * window2: cpu [1G, 2G] -> pci [1G, 2G]
- * why not use window 0 & 1? because they are used by cpu when booting.
- * window0: cpu [0, 256M] -> ddr [0, 256M]
- * window1: cpu [256M, 512M] -> pci [256M, 512M]
- */
-
-/* the smallest LOONGSON_CPU_MEM_SRC can be 512M */
-#define LOONGSON_CPU_MEM_SRC	0x40000000ul		/* 1G */
-#define LOONGSON_PCI_MEM_DST	LOONGSON_CPU_MEM_SRC
-
-#define LOONGSON_PCI_MEM_START	LOONGSON_PCI_MEM_DST
-#define LOONGSON_PCI_MEM_END	(0x80000000ul-1)	/* 2G */
-
-#define MMAP_CPUTOPCI_SIZE	(LOONGSON_PCI_MEM_END - \
-					LOONGSON_PCI_MEM_START + 1)
-
-#else	/* loongson2f/32bit & loongson2e */
-
-/* this pci memory space is mapped by pcimap in pci.c */
-#ifdef CONFIG_CPU_LOONGSON64
 #define LOONGSON_PCI_MEM_START	0x40000000UL
 #define LOONGSON_PCI_MEM_END	0x7effffffUL
-#else
-#define LOONGSON_PCI_MEM_START	LOONGSON_PCILO1_BASE
-#define LOONGSON_PCI_MEM_END	(LOONGSON_PCILO1_BASE + 0x04000000 * 2)
-#endif
-/* this is an offset from mips_io_port_base */
-#define LOONGSON_PCI_IO_START	0x00004000UL
 
-#endif	/* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
 
 #endif /* !__ASM_MACH_LOONGSON64_PCI_H_ */
diff --git a/arch/mips/include/asm/mach-loongson64/topology.h b/arch/mips/include/asm/mach-loongson64/topology.h
index 7ff819ab308a..3414a1fd1783 100644
--- a/arch/mips/include/asm/mach-loongson64/topology.h
+++ b/arch/mips/include/asm/mach-loongson64/topology.h
@@ -5,7 +5,9 @@
 #ifdef CONFIG_NUMA
 
 #define cpu_to_node(cpu)	(cpu_logical_map(cpu) >> 2)
-#define cpumask_of_node(node)	(&__node_data[(node)]->cpumask)
+
+extern cpumask_t __node_cpumask[];
+#define cpumask_of_node(node)	(&__node_cpumask[node])
 
 struct pci_bus;
 extern int pcibus_to_node(struct pci_bus *);
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index 0e99a5af6e90..3215b768bb88 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -20,6 +20,7 @@ config LOONGSON_MACH3X
 	select I8259
 	select IRQ_MIPS_CPU
 	select NR_CPUS_DEFAULT_4
+	select USE_GENERIC_EARLY_PRINTK_8250
 	select SYS_HAS_CPU_LOONGSON64
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_SMP
@@ -30,7 +31,7 @@ config LOONGSON_MACH3X
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select LOONGSON_MC146818
 	select ZONE_DMA32
-	select LEFI_FIRMWARE_INTERFACE
+	select NUMA
 	help
 		Generic Loongson 3 family machines utilize the 3A/3B revision
 		of Loongson processor and RS780/SBX00 chipset.
@@ -48,16 +49,9 @@ config RS780_HPET
 
 	  If unsure, say Yes.
 
-config LOONGSON_UART_BASE
-	bool
-	default y
-	depends on EARLY_PRINTK || SERIAL_8250
 
 config LOONGSON_MC146818
 	bool
 	default n
 
-config LEFI_FIRMWARE_INTERFACE
-	bool
-
 endif # MACH_LOONGSON64
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 31167e568e46..a1efea6ce6bb 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -53,4 +53,4 @@ endif
 
 platform-$(CONFIG_MACH_LOONGSON64) += loongson64/
 cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely
-load-$(CONFIG_LOONGSON_MACH3X) += 0xffffffff80200000
+load-$(CONFIG_CPU_LOONGSON64) += 0xffffffff80200000
diff --git a/arch/mips/loongson64/common/Makefile b/arch/mips/loongson64/common/Makefile
index 85438df80950..c476401c9b13 100644
--- a/arch/mips/loongson64/common/Makefile
+++ b/arch/mips/loongson64/common/Makefile
@@ -3,15 +3,10 @@
 # Makefile for loongson based machines.
 #
 
-obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
-    bonito-irq.o mem.o machtype.o platform.o serial.o
+obj-y += setup.o init.o cmdline.o env.o time.o reset.o
+
 obj-$(CONFIG_PCI) += pci.o
 
-#
-# Serial port support
-#
-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
 obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
 
 #
diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson64/common/bonito-irq.c
deleted file mode 100644
index 82352cc25e4c..000000000000
--- a/arch/mips/loongson64/common/bonito-irq.c
+++ /dev/null
@@ -1,49 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright 2001 MontaVista Software Inc.
- * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
- * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org)
- *
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- */
-#include <linux/interrupt.h>
-#include <linux/compiler.h>
-
-#include <loongson.h>
-
-static inline void bonito_irq_enable(struct irq_data *d)
-{
-	LOONGSON_INTENSET = (1 << (d->irq - LOONGSON_IRQ_BASE));
-	mmiowb();
-}
-
-static inline void bonito_irq_disable(struct irq_data *d)
-{
-	LOONGSON_INTENCLR = (1 << (d->irq - LOONGSON_IRQ_BASE));
-	mmiowb();
-}
-
-static struct irq_chip bonito_irq_type = {
-	.name		= "bonito_irq",
-	.irq_mask	= bonito_irq_disable,
-	.irq_unmask	= bonito_irq_enable,
-};
-
-static struct irqaction __maybe_unused dma_timeout_irqaction = {
-	.handler	= no_action,
-	.name		= "dma_timeout",
-};
-
-void bonito_irq_init(void)
-{
-	u32 i;
-
-	for (i = LOONGSON_IRQ_BASE; i < LOONGSON_IRQ_BASE + 32; i++)
-		irq_set_chip_and_handler(i, &bonito_irq_type,
-					 handle_level_irq);
-
-#ifdef CONFIG_CPU_LOONGSON2E
-	setup_irq(LOONGSON_IRQ_BASE + 10, &dma_timeout_irqaction);
-#endif
-}
diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c
index a735460682cf..fb1644b01471 100644
--- a/arch/mips/loongson64/common/cmdline.c
+++ b/arch/mips/loongson64/common/cmdline.c
@@ -39,6 +39,4 @@ void __init prom_init_cmdline(void)
 		strcat(arcs_cmdline, ((char *)l));
 		strcat(arcs_cmdline, " ");
 	}
-
-	prom_init_machtype();
 }
diff --git a/arch/mips/loongson64/common/early_printk.c b/arch/mips/loongson64/common/early_printk.c
deleted file mode 100644
index 5e2a151aa30c..000000000000
--- a/arch/mips/loongson64/common/early_printk.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*  early printk support
- *
- *  Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
- *  Copyright (c) 2009 Lemote Inc.
- *  Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <linux/serial_reg.h>
-#include <asm/setup.h>
-
-#include <loongson.h>
-
-#define PORT(base, offset) (u8 *)(base + offset)
-
-static inline unsigned int serial_in(unsigned char *base, int offset)
-{
-	return readb(PORT(base, offset));
-}
-
-static inline void serial_out(unsigned char *base, int offset, int value)
-{
-	writeb(value, PORT(base, offset));
-}
-
-void prom_putchar(char c)
-{
-	int timeout;
-	unsigned char *uart_base;
-
-	uart_base = (unsigned char *)_loongson_uart_base[0];
-	timeout = 1024;
-
-	while (((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) &&
-			(timeout-- > 0))
-		;
-
-	serial_out(uart_base, UART_TX, c);
-}
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
index 09d5cf4676ca..0daeb7bcf023 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/common/env.c
@@ -30,41 +30,13 @@ u64 loongson_freqctrl[MAX_PACKAGES];
 
 unsigned long long smp_group[4];
 
-#define parse_even_earlier(res, option, p)				\
-do {									\
-	unsigned int tmp __maybe_unused;				\
-									\
-	if (strncmp(option, (char *)p, strlen(option)) == 0)		\
-		tmp = kstrtou32((char *)p + strlen(option"="), 10, &res); \
-} while (0)
+const char *get_system_type(void)
+{
+	return "Generic Loongson64 System";
+}
 
 void __init prom_init_env(void)
 {
-	/* pmon passes arguments in 32bit pointers */
-	unsigned int processor_id;
-
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
-	int *_prom_envp;
-	long l;
-
-	/* firmware arguments are initialized in head.S */
-	_prom_envp = (int *)fw_arg2;
-
-	l = (long)*_prom_envp;
-	while (l != 0) {
-		parse_even_earlier(cpu_clock_freq, "cpuclock", l);
-		parse_even_earlier(memsize, "memsize", l);
-		parse_even_earlier(highmemsize, "highmemsize", l);
-		_prom_envp++;
-		l = (long)*_prom_envp;
-	}
-	if (memsize == 0)
-		memsize = 256;
-
-	loongson_sysconf.nr_uarts = 1;
-
-	pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize);
-#else
 	struct boot_params *boot_p;
 	struct loongson_params *loongson_p;
 	struct system_loongson *esys;
@@ -182,31 +154,5 @@ void __init prom_init_env(void)
 	if (loongson_sysconf.nr_sensors)
 		memcpy(loongson_sysconf.sensors, esys->sensors,
 			sizeof(struct sensor_device) * loongson_sysconf.nr_sensors);
-#endif
-	if (cpu_clock_freq == 0) {
-		processor_id = (&current_cpu_data)->processor_id;
-		switch (processor_id & PRID_REV_MASK) {
-		case PRID_REV_LOONGSON2E:
-			cpu_clock_freq = 533080000;
-			break;
-		case PRID_REV_LOONGSON2F:
-			cpu_clock_freq = 797000000;
-			break;
-		case PRID_REV_LOONGSON3A_R1:
-		case PRID_REV_LOONGSON3A_R2_0:
-		case PRID_REV_LOONGSON3A_R2_1:
-		case PRID_REV_LOONGSON3A_R3_0:
-		case PRID_REV_LOONGSON3A_R3_1:
-			cpu_clock_freq = 900000000;
-			break;
-		case PRID_REV_LOONGSON3B_R1:
-		case PRID_REV_LOONGSON3B_R2:
-			cpu_clock_freq = 1000000000;
-			break;
-		default:
-			cpu_clock_freq = 100000000;
-			break;
-		}
-	}
 	pr_info("CpuClock = %u\n", cpu_clock_freq);
 }
diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson64/common/init.c
index 912fe61c4fc7..48b44f415059 100644
--- a/arch/mips/loongson64/common/init.c
+++ b/arch/mips/loongson64/common/init.c
@@ -12,9 +12,6 @@
 
 #include <loongson.h>
 
-/* Loongson CPU address windows config space base address */
-unsigned long __maybe_unused _loongson_addrwincfg_base;
-
 static void __init mips_nmi_setup(void)
 {
 	void *base;
@@ -27,11 +24,6 @@ static void __init mips_nmi_setup(void)
 
 void __init prom_init(void)
 {
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-	_loongson_addrwincfg_base = (unsigned long)
-		ioremap(LOONGSON_ADDRWINCFG_BASE, LOONGSON_ADDRWINCFG_SIZE);
-#endif
-
 	prom_init_cmdline();
 	prom_init_env();
 
@@ -39,14 +31,11 @@ void __init prom_init(void)
 	set_io_port_base((unsigned long)
 		ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
 
-#ifdef CONFIG_NUMA
 	prom_init_numa_memory();
-#else
-	prom_init_memory();
-#endif
 
-	/*init the uart base address */
-	prom_init_uart_base();
+	/* Hardcode to CPU UART 0 */
+	setup_8250_early_printk_port(TO_UNCAC(LOONGSON_REG_BASE + 0x1e0), 0, 1024);
+
 	register_smp_ops(&loongson3_smp_ops);
 	board_nmi_handler_setup = mips_nmi_setup;
 }
diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson64/common/irq.c
deleted file mode 100644
index 0ea93c1c0a97..000000000000
--- a/arch/mips/loongson64/common/irq.c
+++ /dev/null
@@ -1,63 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- */
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-
-#include <loongson.h>
-/*
- * the first level int-handler will jump here if it is a bonito irq
- */
-void bonito_irqdispatch(void)
-{
-	u32 int_status;
-	int i;
-
-	/* workaround the IO dma problem: let cpu looping to allow DMA finish */
-	int_status = LOONGSON_INTISR;
-	while (int_status & (1 << 10)) {
-		udelay(1);
-		int_status = LOONGSON_INTISR;
-	}
-
-	/* Get pending sources, masked by current enables */
-	int_status = LOONGSON_INTISR & LOONGSON_INTEN;
-
-	if (int_status) {
-		i = __ffs(int_status);
-		do_IRQ(LOONGSON_IRQ_BASE + i);
-	}
-}
-
-asmlinkage void plat_irq_dispatch(void)
-{
-	unsigned int pending;
-
-	pending = read_c0_cause() & read_c0_status() & ST0_IM;
-
-	/* machine-specific plat_irq_dispatch */
-	mach_irq_dispatch(pending);
-}
-
-void __init arch_init_irq(void)
-{
-	/*
-	 * Clear all of the interrupts while we change the able around a bit.
-	 * int-handler is not on bootstrap
-	 */
-	clear_c0_status(ST0_IM | ST0_BEV);
-
-	/* no steer */
-	LOONGSON_INTSTEER = 0;
-
-	/*
-	 * Mask out all interrupt by writing "1" to all bit position in
-	 * the interrupt reset reg.
-	 */
-	LOONGSON_INTENCLR = ~0;
-
-	/* machine specific irq init */
-	mach_init_irq();
-}
diff --git a/arch/mips/loongson64/common/machtype.c b/arch/mips/loongson64/common/machtype.c
deleted file mode 100644
index 4e42d929f1c7..000000000000
--- a/arch/mips/loongson64/common/machtype.c
+++ /dev/null
@@ -1,63 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- *
- * Copyright (c) 2009 Zhang Le <r0bertz@gentoo.org>
- */
-#include <linux/errno.h>
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-#include <machine.h>
-
-/* please ensure the length of the machtype string is less than 50 */
-#define MACHTYPE_LEN 50
-
-static const char *system_types[] = {
-	[MACH_LOONGSON_UNKNOWN]	= "unknown loongson machine",
-	[MACH_LEMOTE_FL2E]	= "lemote-fuloong-2e-box",
-	[MACH_LEMOTE_FL2F]	= "lemote-fuloong-2f-box",
-	[MACH_LEMOTE_ML2F7]	= "lemote-mengloong-2f-7inches",
-	[MACH_LEMOTE_YL2F89]	= "lemote-yeeloong-2f-8.9inches",
-	[MACH_DEXXON_GDIUM2F10]	= "dexxon-gdium-2f",
-	[MACH_LEMOTE_NAS]	= "lemote-nas-2f",
-	[MACH_LEMOTE_LL2F]	= "lemote-lynloong-2f",
-	[MACH_LOONGSON_GENERIC]	= "generic-loongson-machine",
-	[MACH_LOONGSON_END]	= NULL,
-};
-
-const char *get_system_type(void)
-{
-	return system_types[mips_machtype];
-}
-
-void __weak __init mach_prom_init_machtype(void)
-{
-}
-
-void __init prom_init_machtype(void)
-{
-	char *p, str[MACHTYPE_LEN + 1];
-	int machtype = MACH_LEMOTE_FL2E;
-
-	mips_machtype = LOONGSON_MACHTYPE;
-
-	p = strstr(arcs_cmdline, "machtype=");
-	if (!p) {
-		mach_prom_init_machtype();
-		return;
-	}
-	p += strlen("machtype=");
-	strncpy(str, p, MACHTYPE_LEN);
-	str[MACHTYPE_LEN] = '\0';
-	p = strstr(str, " ");
-	if (p)
-		*p = '\0';
-
-	for (; system_types[machtype]; machtype++)
-		if (strstr(system_types[machtype], str)) {
-			mips_machtype = machtype;
-			break;
-		}
-}
diff --git a/arch/mips/loongson64/common/mem.c b/arch/mips/loongson64/common/mem.c
deleted file mode 100644
index 4254ac4ec616..000000000000
--- a/arch/mips/loongson64/common/mem.c
+++ /dev/null
@@ -1,157 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- */
-#include <linux/fs.h>
-#include <linux/fcntl.h>
-#include <linux/memblock.h>
-#include <linux/mm.h>
-
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-#include <boot_param.h>
-#include <mem.h>
-#include <pci.h>
-
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
-
-u32 memsize, highmemsize;
-
-void __init prom_init_memory(void)
-{
-	add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
-
-	add_memory_region(memsize << 20, LOONGSON_PCI_MEM_START - (memsize <<
-				20), BOOT_MEM_RESERVED);
-
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-	{
-		int bit;
-
-		bit = fls(memsize + highmemsize);
-		if (bit != ffs(memsize + highmemsize))
-			bit += 20;
-		else
-			bit = bit + 20 - 1;
-
-		/* set cpu window3 to map CPU to DDR: 2G -> 2G */
-		LOONGSON_ADDRWIN_CPUTODDR(ADDRWIN_WIN3, 0x80000000ul,
-					  0x80000000ul, (1 << bit));
-		mmiowb();
-	}
-#endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */
-
-#ifdef CONFIG_64BIT
-	if (highmemsize > 0)
-		add_memory_region(LOONGSON_HIGHMEM_START,
-				  highmemsize << 20, BOOT_MEM_RAM);
-
-	add_memory_region(LOONGSON_PCI_MEM_END + 1, LOONGSON_HIGHMEM_START -
-			  LOONGSON_PCI_MEM_END - 1, BOOT_MEM_RESERVED);
-
-#endif /* !CONFIG_64BIT */
-}
-
-#else /* CONFIG_LEFI_FIRMWARE_INTERFACE */
-
-void __init prom_init_memory(void)
-{
-	int i;
-	u32 node_id;
-	u32 mem_type;
-
-	/* parse memory information */
-	for (i = 0; i < loongson_memmap->nr_map; i++) {
-		node_id = loongson_memmap->map[i].node_id;
-		mem_type = loongson_memmap->map[i].mem_type;
-
-		if (node_id != 0)
-			continue;
-
-		switch (mem_type) {
-		case SYSTEM_RAM_LOW:
-			memblock_add(loongson_memmap->map[i].mem_start,
-				(u64)loongson_memmap->map[i].mem_size << 20);
-			break;
-		case SYSTEM_RAM_HIGH:
-			memblock_add(loongson_memmap->map[i].mem_start,
-				(u64)loongson_memmap->map[i].mem_size << 20);
-			break;
-		case SYSTEM_RAM_RESERVED:
-			memblock_reserve(loongson_memmap->map[i].mem_start,
-				(u64)loongson_memmap->map[i].mem_size << 20);
-			break;
-		}
-	}
-}
-
-#endif /* CONFIG_LEFI_FIRMWARE_INTERFACE */
-
-/* override of arch/mips/mm/cache.c: __uncached_access */
-int __uncached_access(struct file *file, unsigned long addr)
-{
-	if (file->f_flags & O_DSYNC)
-		return 1;
-
-	return addr >= __pa(high_memory) ||
-		((addr >= LOONGSON_MMIO_MEM_START) &&
-		 (addr < LOONGSON_MMIO_MEM_END));
-}
-
-#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED
-
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <asm/current.h>
-
-static unsigned long uca_start, uca_end;
-
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-			      unsigned long size, pgprot_t vma_prot)
-{
-	unsigned long offset = pfn << PAGE_SHIFT;
-	unsigned long end = offset + size;
-
-	if (__uncached_access(file, offset)) {
-		if (uca_start && (offset >= uca_start) &&
-		    (end <= uca_end))
-			return __pgprot((pgprot_val(vma_prot) &
-					 ~_CACHE_MASK) |
-					_CACHE_UNCACHED_ACCELERATED);
-		else
-			return pgprot_noncached(vma_prot);
-	}
-	return vma_prot;
-}
-
-static int __init find_vga_mem_init(void)
-{
-	struct pci_dev *dev = 0;
-	struct resource *r;
-	int idx;
-
-	if (uca_start)
-		return 0;
-
-	for_each_pci_dev(dev) {
-		if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-			for (idx = 0; idx < PCI_NUM_RESOURCES; idx++) {
-				r = &dev->resource[idx];
-				if (!r->start && r->end)
-					continue;
-				if (r->flags & IORESOURCE_IO)
-					continue;
-				if (r->flags & IORESOURCE_MEM) {
-					uca_start = r->start;
-					uca_end = r->end;
-					return 0;
-				}
-			}
-		}
-	}
-
-	return 0;
-}
-
-late_initcall(find_vga_mem_init);
-#endif /* !CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED */
diff --git a/arch/mips/loongson64/common/pci.c b/arch/mips/loongson64/common/pci.c
index 2d9755c49524..7bbe2388f38e 100644
--- a/arch/mips/loongson64/common/pci.c
+++ b/arch/mips/loongson64/common/pci.c
@@ -81,15 +81,12 @@ static int __init pcibios_init(void)
 	setup_pcimap();
 
 	loongson_pci_controller.io_map_base = mips_io_port_base;
-#ifdef CONFIG_LEFI_FIRMWARE_INTERFACE
 	loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
 	loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
-#endif
+
 	register_pci_controller(&loongson_pci_controller);
 
-#ifdef CONFIG_CPU_LOONGSON64
 	sbx00_acpi_init();
-#endif
 
 	return 0;
 }
diff --git a/arch/mips/loongson64/common/platform.c b/arch/mips/loongson64/common/platform.c
deleted file mode 100644
index 0084820cffaa..000000000000
--- a/arch/mips/loongson64/common/platform.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <linux/err.h>
-#include <linux/smp.h>
-#include <linux/platform_device.h>
-
-static struct platform_device loongson2_cpufreq_device = {
-	.name = "loongson2_cpufreq",
-	.id = -1,
-};
-
-static int __init loongson2_cpufreq_init(void)
-{
-	struct cpuinfo_mips *c = &current_cpu_data;
-
-	/* Only 2F revision and it's successors support CPUFreq */
-	if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON2F)
-		return platform_device_register(&loongson2_cpufreq_device);
-
-	return -ENODEV;
-}
-
-arch_initcall(loongson2_cpufreq_init);
diff --git a/arch/mips/loongson64/common/pm.c b/arch/mips/loongson64/common/pm.c
index b8aed878d912..7c8556f09781 100644
--- a/arch/mips/loongson64/common/pm.c
+++ b/arch/mips/loongson64/common/pm.c
@@ -60,56 +60,6 @@ void __weak setup_wakeup_events(void)
 {
 }
 
-/*
- * Check wakeup events
- */
-int __weak wakeup_loongson(void)
-{
-	return 1;
-}
-
-/*
- * If the events are really what we want to wakeup the CPU, wake it up
- * otherwise put the CPU asleep again.
- */
-static void wait_for_wakeup_events(void)
-{
-	while (!wakeup_loongson())
-		LOONGSON_CHIPCFG(0) &= ~0x7;
-}
-
-/*
- * Stop all perf counters
- *
- * $24 is the control register of Loongson perf counter
- */
-static inline void stop_perf_counters(void)
-{
-	__write_64bit_c0_register($24, 0, 0);
-}
-
-
-static void loongson_suspend_enter(void)
-{
-	static unsigned int cached_cpu_freq;
-
-	/* setup wakeup events via enabling the IRQs */
-	setup_wakeup_events();
-
-	stop_perf_counters();
-
-	cached_cpu_freq = LOONGSON_CHIPCFG(0);
-
-	/* Put CPU into wait mode */
-	LOONGSON_CHIPCFG(0) &= ~0x7;
-
-	/* wait for the given events to wakeup cpu from wait mode */
-	wait_for_wakeup_events();
-
-	LOONGSON_CHIPCFG(0) = cached_cpu_freq;
-	mmiowb();
-}
-
 void __weak mach_suspend(void)
 {
 }
@@ -122,9 +72,6 @@ static int loongson_pm_enter(suspend_state_t state)
 {
 	mach_suspend();
 
-	/* processor specific suspend */
-	loongson_suspend_enter();
-
 	mach_resume();
 
 	return 0;
diff --git a/arch/mips/loongson64/common/reset.c b/arch/mips/loongson64/common/reset.c
index ce39e918e4d5..88b3bd5fed25 100644
--- a/arch/mips/loongson64/common/reset.c
+++ b/arch/mips/loongson64/common/reset.c
@@ -17,31 +17,12 @@
 
 static inline void loongson_reboot(void)
 {
-#ifndef CONFIG_CPU_JUMP_WORKAROUNDS
 	((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) ();
-#else
-	void (*func)(void);
-
-	func = (void *)ioremap_nocache(LOONGSON_BOOT_BASE, 4);
-
-	__asm__ __volatile__(
-	"	.set	noat						\n"
-	"	jr	%[func]						\n"
-	"	.set	at						\n"
-	: /* No outputs */
-	: [func] "r" (func));
-#endif
 }
 
 static void loongson_restart(char *command)
 {
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
-	/* do preparation for reboot */
-	mach_prepare_reboot();
 
-	/* reboot via jumping to boot base address */
-	loongson_reboot();
-#else
 	void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr;
 
 	fw_restart();
@@ -49,20 +30,10 @@ static void loongson_restart(char *command)
 		if (cpu_wait)
 			cpu_wait();
 	}
-#endif
 }
 
 static void loongson_poweroff(void)
 {
-#ifndef CONFIG_LEFI_FIRMWARE_INTERFACE
-	mach_prepare_shutdown();
-
-	/*
-	 * It needs a wait loop here, but mips/kernel/reset.c already calls
-	 * a generic delay loop, machine_hang(), so simply return.
-	 */
-	return;
-#else
 	void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
 
 	fw_poweroff();
@@ -70,7 +41,6 @@ static void loongson_poweroff(void)
 		if (cpu_wait)
 			cpu_wait();
 	}
-#endif
 }
 
 static void loongson_halt(void)
diff --git a/arch/mips/loongson64/common/serial.c b/arch/mips/loongson64/common/serial.c
deleted file mode 100644
index 98c3a7feb10f..000000000000
--- a/arch/mips/loongson64/common/serial.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
- *
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Yan hua (yanhua@lemote.com)
- * Author: Wu Zhangjin (wuzhangjin@gmail.com)
- */
-
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/serial_8250.h>
-
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-#include <machine.h>
-
-#define PORT(int, clk)			\
-{								\
-	.irq		= int,					\
-	.uartclk	= clk,					\
-	.iotype		= UPIO_PORT,				\
-	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,	\
-	.regshift	= 0,					\
-}
-
-#define PORT_M(int, clk)				\
-{								\
-	.irq		= MIPS_CPU_IRQ_BASE + (int),		\
-	.uartclk	= clk,					\
-	.iotype		= UPIO_MEM,				\
-	.membase	= (void __iomem *)NULL,			\
-	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,	\
-	.regshift	= 0,					\
-}
-
-static struct plat_serial8250_port uart8250_data[][MAX_UARTS + 1] = {
-	[MACH_LOONGSON_UNKNOWN]	= {},
-	[MACH_LEMOTE_FL2E]	= {PORT(4, 1843200), {} },
-	[MACH_LEMOTE_FL2F]	= {PORT(3, 1843200), {} },
-	[MACH_LEMOTE_ML2F7]	= {PORT_M(3, 3686400), {} },
-	[MACH_LEMOTE_YL2F89]	= {PORT_M(3, 3686400), {} },
-	[MACH_DEXXON_GDIUM2F10]	= {PORT_M(3, 3686400), {} },
-	[MACH_LEMOTE_NAS]	= {PORT_M(3, 3686400), {} },
-	[MACH_LEMOTE_LL2F]	= {PORT(3, 1843200), {} },
-	[MACH_LOONGSON_GENERIC]	= {PORT_M(2, 25000000), {} },
-	[MACH_LOONGSON_END]	= {},
-};
-
-static struct platform_device uart8250_device = {
-	.name = "serial8250",
-	.id = PLAT8250_DEV_PLATFORM,
-};
-
-static int __init serial_init(void)
-{
-	int i;
-	unsigned char iotype;
-
-	iotype = uart8250_data[mips_machtype][0].iotype;
-
-	if (UPIO_MEM == iotype) {
-		uart8250_data[mips_machtype][0].mapbase =
-			loongson_uart_base[0];
-		uart8250_data[mips_machtype][0].membase =
-			(void __iomem *)_loongson_uart_base[0];
-	}
-	else if (UPIO_PORT == iotype)
-		uart8250_data[mips_machtype][0].iobase =
-			loongson_uart_base[0] - LOONGSON_PCIIO_BASE;
-
-	if (loongson_sysconf.uarts[0].uartclk)
-		uart8250_data[mips_machtype][0].uartclk =
-			loongson_sysconf.uarts[0].uartclk;
-
-	for (i = 1; i < loongson_sysconf.nr_uarts; i++) {
-		iotype = loongson_sysconf.uarts[i].iotype;
-		uart8250_data[mips_machtype][i].iotype = iotype;
-		loongson_uart_base[i] = loongson_sysconf.uarts[i].uart_base;
-
-		if (UPIO_MEM == iotype) {
-			uart8250_data[mips_machtype][i].irq =
-				MIPS_CPU_IRQ_BASE + loongson_sysconf.uarts[i].int_offset;
-			uart8250_data[mips_machtype][i].mapbase =
-				loongson_uart_base[i];
-			uart8250_data[mips_machtype][i].membase =
-				ioremap_nocache(loongson_uart_base[i], 8);
-		} else if (UPIO_PORT == iotype) {
-			uart8250_data[mips_machtype][i].irq =
-				loongson_sysconf.uarts[i].int_offset;
-			uart8250_data[mips_machtype][i].iobase =
-				loongson_uart_base[i] - LOONGSON_PCIIO_BASE;
-		}
-
-		uart8250_data[mips_machtype][i].uartclk =
-			loongson_sysconf.uarts[i].uartclk;
-		uart8250_data[mips_machtype][i].flags =
-			UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
-	}
-
-	memset(&uart8250_data[mips_machtype][loongson_sysconf.nr_uarts],
-			0, sizeof(struct plat_serial8250_port));
-	uart8250_device.dev.platform_data = uart8250_data[mips_machtype];
-
-	return platform_device_register(&uart8250_device);
-}
-module_init(serial_init);
-
-static void __exit serial_exit(void)
-{
-	platform_device_unregister(&uart8250_device);
-}
-module_exit(serial_exit);
diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson64/common/setup.c
index bc2da4c140c4..4fd27f4f90ed 100644
--- a/arch/mips/loongson64/common/setup.c
+++ b/arch/mips/loongson64/common/setup.c
@@ -11,11 +11,6 @@
 
 #include <loongson.h>
 
-#ifdef CONFIG_VT
-#include <linux/console.h>
-#include <linux/screen_info.h>
-#endif
-
 static void wbflush_loongson(void)
 {
 	asm(".set\tpush\n\t"
@@ -32,20 +27,4 @@ EXPORT_SYMBOL(__wbflush);
 
 void __init plat_mem_setup(void)
 {
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
-	conswitchp = &vga_con;
-
-	screen_info = (struct screen_info) {
-		.orig_x			= 0,
-		.orig_y			= 25,
-		.orig_video_cols	= 80,
-		.orig_video_lines	= 25,
-		.orig_video_isVGA	= VIDEO_TYPE_VGAC,
-		.orig_video_points	= 16,
-	};
-#elif defined(CONFIG_DUMMY_CONSOLE)
-	conswitchp = &dummy_con;
-#endif
-#endif
 }
diff --git a/arch/mips/loongson64/common/time.c b/arch/mips/loongson64/common/time.c
index e78760ce475b..1245f22cec84 100644
--- a/arch/mips/loongson64/common/time.c
+++ b/arch/mips/loongson64/common/time.c
@@ -11,7 +11,6 @@
 #include <asm/hpet.h>
 
 #include <loongson.h>
-#include <cs5536/cs5536_mfgpt.h>
 
 void __init plat_time_init(void)
 {
@@ -20,8 +19,6 @@ void __init plat_time_init(void)
 
 #ifdef CONFIG_RS780_HPET
 	setup_hpet_timer();
-#else
-	setup_mfgpt0_timer();
 #endif
 }
 
diff --git a/arch/mips/loongson64/common/uart_base.c b/arch/mips/loongson64/common/uart_base.c
deleted file mode 100644
index e88d937f10fe..000000000000
--- a/arch/mips/loongson64/common/uart_base.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <linux/export.h>
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-/* raw */
-unsigned long loongson_uart_base[MAX_UARTS] = {};
-/* ioremapped */
-unsigned long _loongson_uart_base[MAX_UARTS] = {};
-
-EXPORT_SYMBOL(loongson_uart_base);
-EXPORT_SYMBOL(_loongson_uart_base);
-
-void prom_init_loongson_uart_base(void)
-{
-	switch (mips_machtype) {
-	case MACH_LOONGSON_GENERIC:
-		/* The CPU provided serial port (CPU) */
-		loongson_uart_base[0] = LOONGSON_REG_BASE + 0x1e0;
-		break;
-	case MACH_LEMOTE_FL2E:
-		loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x3f8;
-		break;
-	case MACH_LEMOTE_FL2F:
-	case MACH_LEMOTE_LL2F:
-		loongson_uart_base[0] = LOONGSON_PCIIO_BASE + 0x2f8;
-		break;
-	case MACH_LEMOTE_ML2F7:
-	case MACH_LEMOTE_YL2F89:
-	case MACH_DEXXON_GDIUM2F10:
-	case MACH_LEMOTE_NAS:
-	default:
-		/* The CPU provided serial port (LPC) */
-		loongson_uart_base[0] = LOONGSON_LIO1_BASE + 0x3f8;
-		break;
-	}
-
-	_loongson_uart_base[0] =
-		(unsigned long)ioremap_nocache(loongson_uart_base[0], 8);
-}
diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c
index 5605061f5f98..79ad797497e4 100644
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ b/arch/mips/loongson64/loongson-3/irq.c
@@ -78,8 +78,12 @@ static void ht_irqdispatch(void)
 
 #define UNUSED_IPS (CAUSEF_IP5 | CAUSEF_IP4 | CAUSEF_IP1 | CAUSEF_IP0)
 
-void mach_irq_dispatch(unsigned int pending)
+asmlinkage void plat_irq_dispatch(void)
 {
+	unsigned int pending;
+
+	pending = read_c0_cause() & read_c0_status() & ST0_IM;
+
 	if (pending & CAUSEF_IP7)
 		do_IRQ(LOONGSON_TIMER_IRQ);
 #if defined(CONFIG_SMP)
@@ -127,7 +131,7 @@ void irq_router_init(void)
 		LOONGSON_INT_ROUTER_INTEN | (0xffff << 16) | 0x1 << 10;
 }
 
-void __init mach_init_irq(void)
+void __init arch_init_irq(void)
 {
 	struct irq_chip *chip;
 
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index 8f20d2cb3767..ef94a2278561 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -26,12 +26,15 @@
 #include <asm/wbflush.h>
 #include <boot_param.h>
 
-static struct node_data prealloc__node_data[MAX_NUMNODES];
+static struct pglist_data prealloc__node_data[MAX_NUMNODES];
 unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
 EXPORT_SYMBOL(__node_distances);
-struct node_data *__node_data[MAX_NUMNODES];
+struct pglist_data *__node_data[MAX_NUMNODES];
 EXPORT_SYMBOL(__node_data);
 
+cpumask_t __node_cpumask[MAX_NUMNODES];
+EXPORT_SYMBOL(__node_cpumask);
+
 static void enable_lpa(void)
 {
 	unsigned long value;
@@ -214,7 +217,7 @@ static __init void prom_meminit(void)
 		if (node_online(node)) {
 			szmem(node);
 			node_mem_init(node);
-			cpumask_clear(&__node_data[(node)]->cpumask);
+			cpumask_clear(&__node_cpumask[node]);
 		}
 	}
 	memblocks_present();
@@ -228,7 +231,7 @@ static __init void prom_meminit(void)
 		if (loongson_sysconf.reserved_cpus_mask & (1<<cpu))
 			continue;
 
-		cpumask_set_cpu(active_cpu, &__node_data[(node)]->cpumask);
+		cpumask_set_cpu(active_cpu, &__node_cpumask[node]);
 		pr_info("NUMA: set cpumask cpu %d on node %d\n", active_cpu, node);
 
 		active_cpu++;
-- 
cgit v1.2.3


From 6fbde6b492dfc761ad60a68fb2cb32b1eb05b786 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Sun, 20 Oct 2019 23:01:36 +0800
Subject: MIPS: Loongson64: Move files to the top-level directory

Current Loongson-3 code can share among all Loongson64 processors.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: chenhc@lemote.com
Cc: paul.burton@mips.com
---
 arch/mips/Kconfig                                  |  26 +
 arch/mips/configs/loongson3_defconfig              |   1 -
 .../asm/mach-loongson64/cpu-feature-overrides.h    |   1 -
 arch/mips/loongson64/Kconfig                       |  38 +-
 arch/mips/loongson64/Makefile                      |  17 +-
 arch/mips/loongson64/acpi_init.c                   | 151 ++++
 arch/mips/loongson64/cmdline.c                     |  42 ++
 arch/mips/loongson64/common/Makefile               |  16 -
 arch/mips/loongson64/common/cmdline.c              |  42 --
 arch/mips/loongson64/common/env.c                  | 158 ----
 arch/mips/loongson64/common/init.c                 |  45 --
 arch/mips/loongson64/common/pci.c                  |  94 ---
 arch/mips/loongson64/common/pm.c                   | 104 ---
 arch/mips/loongson64/common/reset.c                |  64 --
 arch/mips/loongson64/common/rtc.c                  |  39 -
 arch/mips/loongson64/common/setup.c                |  30 -
 arch/mips/loongson64/common/time.c                 |  29 -
 arch/mips/loongson64/cop2-ex.c                     |  61 ++
 arch/mips/loongson64/dma.c                         |  25 +
 arch/mips/loongson64/env.c                         | 158 ++++
 arch/mips/loongson64/hpet.c                        | 289 ++++++++
 arch/mips/loongson64/init.c                        |  45 ++
 arch/mips/loongson64/irq.c                         | 162 ++++
 arch/mips/loongson64/loongson-3/Makefile           |  11 -
 arch/mips/loongson64/loongson-3/acpi_init.c        | 151 ----
 arch/mips/loongson64/loongson-3/cop2-ex.c          |  61 --
 arch/mips/loongson64/loongson-3/dma.c              |  25 -
 arch/mips/loongson64/loongson-3/hpet.c             | 289 --------
 arch/mips/loongson64/loongson-3/irq.c              | 162 ----
 arch/mips/loongson64/loongson-3/numa.c             | 273 -------
 arch/mips/loongson64/loongson-3/platform.c         |  39 -
 arch/mips/loongson64/loongson-3/smp.c              | 813 ---------------------
 arch/mips/loongson64/loongson-3/smp.h              |  31 -
 arch/mips/loongson64/numa.c                        | 273 +++++++
 arch/mips/loongson64/pci.c                         |  94 +++
 arch/mips/loongson64/platform.c                    |  39 +
 arch/mips/loongson64/pm.c                          | 104 +++
 arch/mips/loongson64/reset.c                       |  64 ++
 arch/mips/loongson64/rtc.c                         |  39 +
 arch/mips/loongson64/setup.c                       |  30 +
 arch/mips/loongson64/smp.c                         | 813 +++++++++++++++++++++
 arch/mips/loongson64/smp.h                         |  31 +
 arch/mips/loongson64/time.c                        |  29 +
 arch/mips/pci/Makefile                             |   2 +-
 drivers/platform/mips/Kconfig                      |   4 +-
 45 files changed, 2488 insertions(+), 2526 deletions(-)
 create mode 100644 arch/mips/loongson64/acpi_init.c
 create mode 100644 arch/mips/loongson64/cmdline.c
 delete mode 100644 arch/mips/loongson64/common/Makefile
 delete mode 100644 arch/mips/loongson64/common/cmdline.c
 delete mode 100644 arch/mips/loongson64/common/env.c
 delete mode 100644 arch/mips/loongson64/common/init.c
 delete mode 100644 arch/mips/loongson64/common/pci.c
 delete mode 100644 arch/mips/loongson64/common/pm.c
 delete mode 100644 arch/mips/loongson64/common/reset.c
 delete mode 100644 arch/mips/loongson64/common/rtc.c
 delete mode 100644 arch/mips/loongson64/common/setup.c
 delete mode 100644 arch/mips/loongson64/common/time.c
 create mode 100644 arch/mips/loongson64/cop2-ex.c
 create mode 100644 arch/mips/loongson64/dma.c
 create mode 100644 arch/mips/loongson64/env.c
 create mode 100644 arch/mips/loongson64/hpet.c
 create mode 100644 arch/mips/loongson64/init.c
 create mode 100644 arch/mips/loongson64/irq.c
 delete mode 100644 arch/mips/loongson64/loongson-3/Makefile
 delete mode 100644 arch/mips/loongson64/loongson-3/acpi_init.c
 delete mode 100644 arch/mips/loongson64/loongson-3/cop2-ex.c
 delete mode 100644 arch/mips/loongson64/loongson-3/dma.c
 delete mode 100644 arch/mips/loongson64/loongson-3/hpet.c
 delete mode 100644 arch/mips/loongson64/loongson-3/irq.c
 delete mode 100644 arch/mips/loongson64/loongson-3/numa.c
 delete mode 100644 arch/mips/loongson64/loongson-3/platform.c
 delete mode 100644 arch/mips/loongson64/loongson-3/smp.c
 delete mode 100644 arch/mips/loongson64/loongson-3/smp.h
 create mode 100644 arch/mips/loongson64/numa.c
 create mode 100644 arch/mips/loongson64/pci.c
 create mode 100644 arch/mips/loongson64/platform.c
 create mode 100644 arch/mips/loongson64/pm.c
 create mode 100644 arch/mips/loongson64/reset.c
 create mode 100644 arch/mips/loongson64/rtc.c
 create mode 100644 arch/mips/loongson64/setup.c
 create mode 100644 arch/mips/loongson64/smp.c
 create mode 100644 arch/mips/loongson64/smp.h
 create mode 100644 arch/mips/loongson64/time.c

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index aa6f8497ddd9..b303b5c2d764 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -461,7 +461,33 @@ config MACH_LOONGSON2EF
 
 config MACH_LOONGSON64
 	bool "Loongson-2/3 GSx64 family of machines"
+	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select GENERIC_ISA_DMA_SUPPORT_BROKEN
+	select BOOT_ELF32
+	select BOARD_SCACHE
+	select CSRC_R4K
+	select CEVT_R4K
+	select CPU_HAS_WB
+	select FORCE_PCI
+	select ISA
+	select I8259
+	select IRQ_MIPS_CPU
+	select NR_CPUS_DEFAULT_4
+	select USE_GENERIC_EARLY_PRINTK_8250
+	select SYS_HAS_CPU_LOONGSON64
+	select SYS_HAS_EARLY_PRINTK
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_HOTPLUG_CPU
+	select SYS_SUPPORTS_NUMA
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select SYS_SUPPORTS_HIGHMEM
+	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_ZBOOT
+	select LOONGSON_MC146818
+	select ZONE_DMA32
+	select NUMA
 	help
 	  This enables the support of Loongson-2/3 family of processors with
 	  GSx64 microarchitecture.
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 90ee0084d786..caad7bf7902c 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -24,7 +24,6 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_MACH_LOONGSON64=y
-CONFIG_LOONGSON_MACH3X=y
 CONFIG_SMP=y
 CONFIG_HZ_256=y
 CONFIG_KEXEC=y
diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
index 895607eb81ca..7dc8d75445a9 100644
--- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h
@@ -43,7 +43,6 @@
 #define cpu_has_vint		0
 #define cpu_has_vtag_icache	0
 #define cpu_has_watch		1
-
 #define cpu_has_wsbh		1
 #define cpu_has_ic_fills_f_dc	1
 #define cpu_hwrena_impl_bits	0xc0000000
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index 3215b768bb88..b1aefd06e3f5 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -1,45 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 if MACH_LOONGSON64
 
-choice
-	prompt "Machine Type"
-
-config LOONGSON_MACH3X
-	bool "Generic Loongson 3 family machines"
-	select ARCH_SPARSEMEM_ENABLE
-	select ARCH_MIGHT_HAVE_PC_PARPORT
-	select ARCH_MIGHT_HAVE_PC_SERIO
-	select GENERIC_ISA_DMA_SUPPORT_BROKEN
-	select BOOT_ELF32
-	select BOARD_SCACHE
-	select CSRC_R4K
-	select CEVT_R4K
-	select CPU_HAS_WB
-	select FORCE_PCI
-	select ISA
-	select I8259
-	select IRQ_MIPS_CPU
-	select NR_CPUS_DEFAULT_4
-	select USE_GENERIC_EARLY_PRINTK_8250
-	select SYS_HAS_CPU_LOONGSON64
-	select SYS_HAS_EARLY_PRINTK
-	select SYS_SUPPORTS_SMP
-	select SYS_SUPPORTS_HOTPLUG_CPU
-	select SYS_SUPPORTS_NUMA
-	select SYS_SUPPORTS_64BIT_KERNEL
-	select SYS_SUPPORTS_HIGHMEM
-	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select LOONGSON_MC146818
-	select ZONE_DMA32
-	select NUMA
-	help
-		Generic Loongson 3 family machines utilize the 3A/3B revision
-		of Loongson processor and RS780/SBX00 chipset.
-endchoice
-
 config RS780_HPET
 	bool "RS780/SBX00 HPET Timer"
-	depends on LOONGSON_MACH3X
+	depends on CONFIG_MACH_LOONGSON64
 	select MIPS_EXTERNAL_TIMER
 	help
 	  This option enables the hpet timer of AMD RS780/SBX00.
diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
index dc16a23c171f..0b3c65b52965 100644
--- a/arch/mips/loongson64/Makefile
+++ b/arch/mips/loongson64/Makefile
@@ -1,12 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
-# Common code for all Loongson based systems
+# Makefile for Loongson-3 family machines
 #
+obj-$(CONFIG_MACH_LOONGSON64) += irq.o cop2-ex.o platform.o acpi_init.o dma.o \
+				setup.o init.o cmdline.o env.o time.o reset.o \
 
-obj-$(CONFIG_MACH_LOONGSON64) += common/
-
-#
-# All Loongson-3 family machines
-#
-
-obj-$(CONFIG_CPU_LOONGSON64)  += loongson-3/
+obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_NUMA)	+= numa.o
+obj-$(CONFIG_RS780_HPET) += hpet.o
+obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
+obj-$(CONFIG_SUSPEND) += pm.o
diff --git a/arch/mips/loongson64/acpi_init.c b/arch/mips/loongson64/acpi_init.c
new file mode 100644
index 000000000000..8d7c119ddf91
--- /dev/null
+++ b/arch/mips/loongson64/acpi_init.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/export.h>
+
+#define SBX00_ACPI_IO_BASE 0x800
+#define SBX00_ACPI_IO_SIZE 0x100
+
+#define ACPI_PM_EVT_BLK         (SBX00_ACPI_IO_BASE + 0x00) /* 4 bytes */
+#define ACPI_PM_CNT_BLK         (SBX00_ACPI_IO_BASE + 0x04) /* 2 bytes */
+#define ACPI_PMA_CNT_BLK        (SBX00_ACPI_IO_BASE + 0x0F) /* 1 byte */
+#define ACPI_PM_TMR_BLK         (SBX00_ACPI_IO_BASE + 0x18) /* 4 bytes */
+#define ACPI_GPE0_BLK           (SBX00_ACPI_IO_BASE + 0x10) /* 8 bytes */
+#define ACPI_END                (SBX00_ACPI_IO_BASE + 0x80)
+
+#define PM_INDEX        0xCD6
+#define PM_DATA         0xCD7
+#define PM2_INDEX       0xCD0
+#define PM2_DATA        0xCD1
+
+/*
+ * SCI interrupt need acpi space, allocate here
+ */
+
+static int __init register_acpi_resource(void)
+{
+	request_region(SBX00_ACPI_IO_BASE, SBX00_ACPI_IO_SIZE, "acpi");
+	return 0;
+}
+
+static void pmio_write_index(u16 index, u8 reg, u8 value)
+{
+	outb(reg, index);
+	outb(value, index + 1);
+}
+
+static u8 pmio_read_index(u16 index, u8 reg)
+{
+	outb(reg, index);
+	return inb(index + 1);
+}
+
+void pm_iowrite(u8 reg, u8 value)
+{
+	pmio_write_index(PM_INDEX, reg, value);
+}
+EXPORT_SYMBOL(pm_iowrite);
+
+u8 pm_ioread(u8 reg)
+{
+	return pmio_read_index(PM_INDEX, reg);
+}
+EXPORT_SYMBOL(pm_ioread);
+
+void pm2_iowrite(u8 reg, u8 value)
+{
+	pmio_write_index(PM2_INDEX, reg, value);
+}
+EXPORT_SYMBOL(pm2_iowrite);
+
+u8 pm2_ioread(u8 reg)
+{
+	return pmio_read_index(PM2_INDEX, reg);
+}
+EXPORT_SYMBOL(pm2_ioread);
+
+static void acpi_hw_clear_status(void)
+{
+	u16 value;
+
+	/* PMStatus: Clear WakeStatus/PwrBtnStatus */
+	value = inw(ACPI_PM_EVT_BLK);
+	value |= (1 << 8 | 1 << 15);
+	outw(value, ACPI_PM_EVT_BLK);
+
+	/* GPEStatus: Clear all generated events */
+	outl(inl(ACPI_GPE0_BLK), ACPI_GPE0_BLK);
+}
+
+void acpi_registers_setup(void)
+{
+	u32 value;
+
+	/* PM Status Base */
+	pm_iowrite(0x20, ACPI_PM_EVT_BLK & 0xff);
+	pm_iowrite(0x21, ACPI_PM_EVT_BLK >> 8);
+
+	/* PM Control Base */
+	pm_iowrite(0x22, ACPI_PM_CNT_BLK & 0xff);
+	pm_iowrite(0x23, ACPI_PM_CNT_BLK >> 8);
+
+	/* GPM Base */
+	pm_iowrite(0x28, ACPI_GPE0_BLK & 0xff);
+	pm_iowrite(0x29, ACPI_GPE0_BLK >> 8);
+
+	/* ACPI End */
+	pm_iowrite(0x2e, ACPI_END & 0xff);
+	pm_iowrite(0x2f, ACPI_END >> 8);
+
+	/* IO Decode: When AcpiDecodeEnable set, South-Bridge uses the contents
+	 * of the PM registers at index 0x20~0x2B to decode ACPI I/O address. */
+	pm_iowrite(0x0e, 1 << 3);
+
+	/* SCI_EN set */
+	outw(1, ACPI_PM_CNT_BLK);
+
+	/* Enable to generate SCI */
+	pm_iowrite(0x10, pm_ioread(0x10) | 1);
+
+	/* GPM3/GPM9 enable */
+	value = inl(ACPI_GPE0_BLK + 4);
+	outl(value | (1 << 14) | (1 << 22), ACPI_GPE0_BLK + 4);
+
+	/* Set GPM9 as input */
+	pm_iowrite(0x8d, pm_ioread(0x8d) & (~(1 << 1)));
+
+	/* Set GPM9 as non-output */
+	pm_iowrite(0x94, pm_ioread(0x94) | (1 << 3));
+
+	/* GPM3 config ACPI trigger SCIOUT */
+	pm_iowrite(0x33, pm_ioread(0x33) & (~(3 << 4)));
+
+	/* GPM9 config ACPI trigger SCIOUT */
+	pm_iowrite(0x3d, pm_ioread(0x3d) & (~(3 << 2)));
+
+	/* GPM3 config falling edge trigger */
+	pm_iowrite(0x37, pm_ioread(0x37) & (~(1 << 6)));
+
+	/* No wait for STPGNT# in ACPI Sx state */
+	pm_iowrite(0x7c, pm_ioread(0x7c) | (1 << 6));
+
+	/* Set GPM3 pull-down enable */
+	value = pm2_ioread(0xf6);
+	value |= ((1 << 7) | (1 << 3));
+	pm2_iowrite(0xf6, value);
+
+	/* Set GPM9 pull-down enable */
+	value = pm2_ioread(0xf8);
+	value |= ((1 << 5) | (1 << 1));
+	pm2_iowrite(0xf8, value);
+}
+
+int __init sbx00_acpi_init(void)
+{
+	register_acpi_resource();
+	acpi_registers_setup();
+	acpi_hw_clear_status();
+
+	return 0;
+}
diff --git a/arch/mips/loongson64/cmdline.c b/arch/mips/loongson64/cmdline.c
new file mode 100644
index 000000000000..fb1644b01471
--- /dev/null
+++ b/arch/mips/loongson64/cmdline.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on Ocelot Linux port, which is
+ * Copyright 2001 MontaVista Software Inc.
+ * Author: jsun@mvista.com or jsun@junsun.net
+ *
+ * Copyright 2003 ICT CAS
+ * Author: Michael Guo <guoyi@ict.ac.cn>
+ *
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+void __init prom_init_cmdline(void)
+{
+	int prom_argc;
+	/* pmon passes arguments in 32bit pointers */
+	int *_prom_argv;
+	int i;
+	long l;
+
+	/* firmware arguments are initialized in head.S */
+	prom_argc = fw_arg0;
+	_prom_argv = (int *)fw_arg1;
+
+	/* arg[0] is "g", the rest is boot parameters */
+	arcs_cmdline[0] = '\0';
+	for (i = 1; i < prom_argc; i++) {
+		l = (long)_prom_argv[i];
+		if (strlen(arcs_cmdline) + strlen(((char *)l) + 1)
+		    >= sizeof(arcs_cmdline))
+			break;
+		strcat(arcs_cmdline, ((char *)l));
+		strcat(arcs_cmdline, " ");
+	}
+}
diff --git a/arch/mips/loongson64/common/Makefile b/arch/mips/loongson64/common/Makefile
deleted file mode 100644
index c476401c9b13..000000000000
--- a/arch/mips/loongson64/common/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for loongson based machines.
-#
-
-obj-y += setup.o init.o cmdline.o env.o time.o reset.o
-
-obj-$(CONFIG_PCI) += pci.o
-
-obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
-
-#
-# Suspend Support
-#
-
-obj-$(CONFIG_SUSPEND) += pm.o
diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c
deleted file mode 100644
index fb1644b01471..000000000000
--- a/arch/mips/loongson64/common/cmdline.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Based on Ocelot Linux port, which is
- * Copyright 2001 MontaVista Software Inc.
- * Author: jsun@mvista.com or jsun@junsun.net
- *
- * Copyright 2003 ICT CAS
- * Author: Michael Guo <guoyi@ict.ac.cn>
- *
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-void __init prom_init_cmdline(void)
-{
-	int prom_argc;
-	/* pmon passes arguments in 32bit pointers */
-	int *_prom_argv;
-	int i;
-	long l;
-
-	/* firmware arguments are initialized in head.S */
-	prom_argc = fw_arg0;
-	_prom_argv = (int *)fw_arg1;
-
-	/* arg[0] is "g", the rest is boot parameters */
-	arcs_cmdline[0] = '\0';
-	for (i = 1; i < prom_argc; i++) {
-		l = (long)_prom_argv[i];
-		if (strlen(arcs_cmdline) + strlen(((char *)l) + 1)
-		    >= sizeof(arcs_cmdline))
-			break;
-		strcat(arcs_cmdline, ((char *)l));
-		strcat(arcs_cmdline, " ");
-	}
-}
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
deleted file mode 100644
index 0daeb7bcf023..000000000000
--- a/arch/mips/loongson64/common/env.c
+++ /dev/null
@@ -1,158 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Based on Ocelot Linux port, which is
- * Copyright 2001 MontaVista Software Inc.
- * Author: jsun@mvista.com or jsun@junsun.net
- *
- * Copyright 2003 ICT CAS
- * Author: Michael Guo <guoyi@ict.ac.cn>
- *
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <linux/export.h>
-#include <asm/bootinfo.h>
-#include <loongson.h>
-#include <boot_param.h>
-#include <workarounds.h>
-
-u32 cpu_clock_freq;
-EXPORT_SYMBOL(cpu_clock_freq);
-struct efi_memory_map_loongson *loongson_memmap;
-struct loongson_system_configuration loongson_sysconf;
-
-u64 loongson_chipcfg[MAX_PACKAGES] = {0xffffffffbfc00180};
-u64 loongson_chiptemp[MAX_PACKAGES];
-u64 loongson_freqctrl[MAX_PACKAGES];
-
-unsigned long long smp_group[4];
-
-const char *get_system_type(void)
-{
-	return "Generic Loongson64 System";
-}
-
-void __init prom_init_env(void)
-{
-	struct boot_params *boot_p;
-	struct loongson_params *loongson_p;
-	struct system_loongson *esys;
-	struct efi_cpuinfo_loongson *ecpu;
-	struct irq_source_routing_table *eirq_source;
-
-	/* firmware arguments are initialized in head.S */
-	boot_p = (struct boot_params *)fw_arg2;
-	loongson_p = &(boot_p->efi.smbios.lp);
-
-	esys = (struct system_loongson *)
-		((u64)loongson_p + loongson_p->system_offset);
-	ecpu = (struct efi_cpuinfo_loongson *)
-		((u64)loongson_p + loongson_p->cpu_offset);
-	eirq_source = (struct irq_source_routing_table *)
-		((u64)loongson_p + loongson_p->irq_offset);
-	loongson_memmap = (struct efi_memory_map_loongson *)
-		((u64)loongson_p + loongson_p->memory_offset);
-
-	cpu_clock_freq = ecpu->cpu_clock_freq;
-	loongson_sysconf.cputype = ecpu->cputype;
-	switch (ecpu->cputype) {
-	case Legacy_3A:
-	case Loongson_3A:
-		loongson_sysconf.cores_per_node = 4;
-		loongson_sysconf.cores_per_package = 4;
-		smp_group[0] = 0x900000003ff01000;
-		smp_group[1] = 0x900010003ff01000;
-		smp_group[2] = 0x900020003ff01000;
-		smp_group[3] = 0x900030003ff01000;
-		loongson_chipcfg[0] = 0x900000001fe00180;
-		loongson_chipcfg[1] = 0x900010001fe00180;
-		loongson_chipcfg[2] = 0x900020001fe00180;
-		loongson_chipcfg[3] = 0x900030001fe00180;
-		loongson_chiptemp[0] = 0x900000001fe0019c;
-		loongson_chiptemp[1] = 0x900010001fe0019c;
-		loongson_chiptemp[2] = 0x900020001fe0019c;
-		loongson_chiptemp[3] = 0x900030001fe0019c;
-		loongson_freqctrl[0] = 0x900000001fe001d0;
-		loongson_freqctrl[1] = 0x900010001fe001d0;
-		loongson_freqctrl[2] = 0x900020001fe001d0;
-		loongson_freqctrl[3] = 0x900030001fe001d0;
-		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
-		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
-		break;
-	case Legacy_3B:
-	case Loongson_3B:
-		loongson_sysconf.cores_per_node = 4; /* One chip has 2 nodes */
-		loongson_sysconf.cores_per_package = 8;
-		smp_group[0] = 0x900000003ff01000;
-		smp_group[1] = 0x900010003ff05000;
-		smp_group[2] = 0x900020003ff09000;
-		smp_group[3] = 0x900030003ff0d000;
-		loongson_chipcfg[0] = 0x900000001fe00180;
-		loongson_chipcfg[1] = 0x900020001fe00180;
-		loongson_chipcfg[2] = 0x900040001fe00180;
-		loongson_chipcfg[3] = 0x900060001fe00180;
-		loongson_chiptemp[0] = 0x900000001fe0019c;
-		loongson_chiptemp[1] = 0x900020001fe0019c;
-		loongson_chiptemp[2] = 0x900040001fe0019c;
-		loongson_chiptemp[3] = 0x900060001fe0019c;
-		loongson_freqctrl[0] = 0x900000001fe001d0;
-		loongson_freqctrl[1] = 0x900020001fe001d0;
-		loongson_freqctrl[2] = 0x900040001fe001d0;
-		loongson_freqctrl[3] = 0x900060001fe001d0;
-		loongson_sysconf.ht_control_base = 0x90001EFDFB000000;
-		loongson_sysconf.workarounds = WORKAROUND_CPUHOTPLUG;
-		break;
-	default:
-		loongson_sysconf.cores_per_node = 1;
-		loongson_sysconf.cores_per_package = 1;
-		loongson_chipcfg[0] = 0x900000001fe00180;
-	}
-
-	loongson_sysconf.nr_cpus = ecpu->nr_cpus;
-	loongson_sysconf.boot_cpu_id = ecpu->cpu_startup_core_id;
-	loongson_sysconf.reserved_cpus_mask = ecpu->reserved_cores_mask;
-	if (ecpu->nr_cpus > NR_CPUS || ecpu->nr_cpus == 0)
-		loongson_sysconf.nr_cpus = NR_CPUS;
-	loongson_sysconf.nr_nodes = (loongson_sysconf.nr_cpus +
-		loongson_sysconf.cores_per_node - 1) /
-		loongson_sysconf.cores_per_node;
-
-	loongson_sysconf.pci_mem_start_addr = eirq_source->pci_mem_start_addr;
-	loongson_sysconf.pci_mem_end_addr = eirq_source->pci_mem_end_addr;
-	loongson_sysconf.pci_io_base = eirq_source->pci_io_start_addr;
-	loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits;
-	if (loongson_sysconf.dma_mask_bits < 32 ||
-		loongson_sysconf.dma_mask_bits > 64)
-		loongson_sysconf.dma_mask_bits = 32;
-
-	loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm;
-	loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown;
-	loongson_sysconf.suspend_addr = boot_p->reset_system.DoSuspend;
-
-	loongson_sysconf.vgabios_addr = boot_p->efi.smbios.vga_bios;
-	pr_debug("Shutdown Addr: %llx, Restart Addr: %llx, VBIOS Addr: %llx\n",
-		loongson_sysconf.poweroff_addr, loongson_sysconf.restart_addr,
-		loongson_sysconf.vgabios_addr);
-
-	memset(loongson_sysconf.ecname, 0, 32);
-	if (esys->has_ec)
-		memcpy(loongson_sysconf.ecname, esys->ec_name, 32);
-	loongson_sysconf.workarounds |= esys->workarounds;
-
-	loongson_sysconf.nr_uarts = esys->nr_uarts;
-	if (esys->nr_uarts < 1 || esys->nr_uarts > MAX_UARTS)
-		loongson_sysconf.nr_uarts = 1;
-	memcpy(loongson_sysconf.uarts, esys->uarts,
-		sizeof(struct uart_device) * loongson_sysconf.nr_uarts);
-
-	loongson_sysconf.nr_sensors = esys->nr_sensors;
-	if (loongson_sysconf.nr_sensors > MAX_SENSORS)
-		loongson_sysconf.nr_sensors = 0;
-	if (loongson_sysconf.nr_sensors)
-		memcpy(loongson_sysconf.sensors, esys->sensors,
-			sizeof(struct sensor_device) * loongson_sysconf.nr_sensors);
-	pr_info("CpuClock = %u\n", cpu_clock_freq);
-}
diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson64/common/init.c
deleted file mode 100644
index 48b44f415059..000000000000
--- a/arch/mips/loongson64/common/init.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-
-#include <linux/memblock.h>
-#include <asm/bootinfo.h>
-#include <asm/traps.h>
-#include <asm/smp-ops.h>
-#include <asm/cacheflush.h>
-
-#include <loongson.h>
-
-static void __init mips_nmi_setup(void)
-{
-	void *base;
-	extern char except_vec_nmi;
-
-	base = (void *)(CAC_BASE + 0x380);
-	memcpy(base, &except_vec_nmi, 0x80);
-	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
-}
-
-void __init prom_init(void)
-{
-	prom_init_cmdline();
-	prom_init_env();
-
-	/* init base address of io space */
-	set_io_port_base((unsigned long)
-		ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
-
-	prom_init_numa_memory();
-
-	/* Hardcode to CPU UART 0 */
-	setup_8250_early_printk_port(TO_UNCAC(LOONGSON_REG_BASE + 0x1e0), 0, 1024);
-
-	register_smp_ops(&loongson3_smp_ops);
-	board_nmi_handler_setup = mips_nmi_setup;
-}
-
-void __init prom_free_prom_memory(void)
-{
-}
diff --git a/arch/mips/loongson64/common/pci.c b/arch/mips/loongson64/common/pci.c
deleted file mode 100644
index 7bbe2388f38e..000000000000
--- a/arch/mips/loongson64/common/pci.c
+++ /dev/null
@@ -1,94 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- */
-#include <linux/pci.h>
-
-#include <pci.h>
-#include <loongson.h>
-#include <boot_param.h>
-
-static struct resource loongson_pci_mem_resource = {
-	.name	= "pci memory space",
-	.start	= LOONGSON_PCI_MEM_START,
-	.end	= LOONGSON_PCI_MEM_END,
-	.flags	= IORESOURCE_MEM,
-};
-
-static struct resource loongson_pci_io_resource = {
-	.name	= "pci io space",
-	.start	= LOONGSON_PCI_IO_START,
-	.end	= IO_SPACE_LIMIT,
-	.flags	= IORESOURCE_IO,
-};
-
-static struct pci_controller  loongson_pci_controller = {
-	.pci_ops	= &loongson_pci_ops,
-	.io_resource	= &loongson_pci_io_resource,
-	.mem_resource	= &loongson_pci_mem_resource,
-	.mem_offset	= 0x00000000UL,
-	.io_offset	= 0x00000000UL,
-};
-
-static void __init setup_pcimap(void)
-{
-	/*
-	 * local to PCI mapping for CPU accessing PCI space
-	 * CPU address space [256M,448M] is window for accessing pci space
-	 * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M]
-	 *
-	 * pcimap: PCI_MAP2  PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0
-	 *	     [<2G]   [384M,448M] [320M,384M] [0M,64M]
-	 */
-	LOONGSON_PCIMAP = LOONGSON_PCIMAP_PCIMAP_2 |
-		LOONGSON_PCIMAP_WIN(2, LOONGSON_PCILO2_BASE) |
-		LOONGSON_PCIMAP_WIN(1, LOONGSON_PCILO1_BASE) |
-		LOONGSON_PCIMAP_WIN(0, 0);
-
-	/*
-	 * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M]
-	 */
-	LOONGSON_PCIBASE0 = 0x80000000ul;   /* base: 2G -> mmap: 0M */
-	/* size: 256M, burst transmission, pre-fetch enable, 64bit */
-	LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul;
-	LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful;
-	LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */
-	LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul;
-	LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */
-	LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul;
-
-	/* avoid deadlock of PCI reading/writing lock operation */
-	LOONGSON_PCI_ISR4C = 0xd2000001ul;
-
-	/* can not change gnt to break pci transfer when device's gnt not
-	deassert for some broken device */
-	LOONGSON_PXARB_CFG = 0x00fe0105ul;
-
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-	/*
-	 * set cpu addr window2 to map CPU address space to PCI address space
-	 */
-	LOONGSON_ADDRWIN_CPUTOPCI(ADDRWIN_WIN2, LOONGSON_CPU_MEM_SRC,
-		LOONGSON_PCI_MEM_DST, MMAP_CPUTOPCI_SIZE);
-#endif
-}
-
-extern int sbx00_acpi_init(void);
-
-static int __init pcibios_init(void)
-{
-	setup_pcimap();
-
-	loongson_pci_controller.io_map_base = mips_io_port_base;
-	loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
-	loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
-
-	register_pci_controller(&loongson_pci_controller);
-
-	sbx00_acpi_init();
-
-	return 0;
-}
-
-arch_initcall(pcibios_init);
diff --git a/arch/mips/loongson64/common/pm.c b/arch/mips/loongson64/common/pm.c
deleted file mode 100644
index 7c8556f09781..000000000000
--- a/arch/mips/loongson64/common/pm.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * loongson-specific suspend support
- *
- *  Copyright (C) 2009 Lemote Inc.
- *  Author: Wu Zhangjin <wuzhangjin@gmail.com>
- */
-#include <linux/suspend.h>
-#include <linux/interrupt.h>
-#include <linux/pm.h>
-
-#include <asm/i8259.h>
-#include <asm/mipsregs.h>
-
-#include <loongson.h>
-
-static unsigned int __maybe_unused cached_master_mask;	/* i8259A */
-static unsigned int __maybe_unused cached_slave_mask;
-static unsigned int __maybe_unused cached_bonito_irq_mask; /* bonito */
-
-void arch_suspend_disable_irqs(void)
-{
-	/* disable all mips events */
-	local_irq_disable();
-
-#ifdef CONFIG_I8259
-	/* disable all events of i8259A */
-	cached_slave_mask = inb(PIC_SLAVE_IMR);
-	cached_master_mask = inb(PIC_MASTER_IMR);
-
-	outb(0xff, PIC_SLAVE_IMR);
-	inb(PIC_SLAVE_IMR);
-	outb(0xff, PIC_MASTER_IMR);
-	inb(PIC_MASTER_IMR);
-#endif
-	/* disable all events of bonito */
-	cached_bonito_irq_mask = LOONGSON_INTEN;
-	LOONGSON_INTENCLR = 0xffff;
-	(void)LOONGSON_INTENCLR;
-}
-
-void arch_suspend_enable_irqs(void)
-{
-	/* enable all mips events */
-	local_irq_enable();
-#ifdef CONFIG_I8259
-	/* only enable the cached events of i8259A */
-	outb(cached_slave_mask, PIC_SLAVE_IMR);
-	outb(cached_master_mask, PIC_MASTER_IMR);
-#endif
-	/* enable all cached events of bonito */
-	LOONGSON_INTENSET = cached_bonito_irq_mask;
-	(void)LOONGSON_INTENSET;
-}
-
-/*
- * Setup the board-specific events for waking up loongson from wait mode
- */
-void __weak setup_wakeup_events(void)
-{
-}
-
-void __weak mach_suspend(void)
-{
-}
-
-void __weak mach_resume(void)
-{
-}
-
-static int loongson_pm_enter(suspend_state_t state)
-{
-	mach_suspend();
-
-	mach_resume();
-
-	return 0;
-}
-
-static int loongson_pm_valid_state(suspend_state_t state)
-{
-	switch (state) {
-	case PM_SUSPEND_ON:
-	case PM_SUSPEND_STANDBY:
-	case PM_SUSPEND_MEM:
-		return 1;
-
-	default:
-		return 0;
-	}
-}
-
-static const struct platform_suspend_ops loongson_pm_ops = {
-	.valid	= loongson_pm_valid_state,
-	.enter	= loongson_pm_enter,
-};
-
-static int __init loongson_pm_init(void)
-{
-	suspend_set_ops(&loongson_pm_ops);
-
-	return 0;
-}
-arch_initcall(loongson_pm_init);
diff --git a/arch/mips/loongson64/common/reset.c b/arch/mips/loongson64/common/reset.c
deleted file mode 100644
index 88b3bd5fed25..000000000000
--- a/arch/mips/loongson64/common/reset.c
+++ /dev/null
@@ -1,64 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- * Copyright (C) 2009 Lemote, Inc.
- * Author: Zhangjin Wu, wuzhangjin@gmail.com
- */
-#include <linux/init.h>
-#include <linux/pm.h>
-
-#include <asm/idle.h>
-#include <asm/reboot.h>
-
-#include <loongson.h>
-#include <boot_param.h>
-
-static inline void loongson_reboot(void)
-{
-	((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) ();
-}
-
-static void loongson_restart(char *command)
-{
-
-	void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr;
-
-	fw_restart();
-	while (1) {
-		if (cpu_wait)
-			cpu_wait();
-	}
-}
-
-static void loongson_poweroff(void)
-{
-	void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
-
-	fw_poweroff();
-	while (1) {
-		if (cpu_wait)
-			cpu_wait();
-	}
-}
-
-static void loongson_halt(void)
-{
-	pr_notice("\n\n** You can safely turn off the power now **\n\n");
-	while (1) {
-		if (cpu_wait)
-			cpu_wait();
-	}
-}
-
-static int __init mips_reboot_setup(void)
-{
-	_machine_restart = loongson_restart;
-	_machine_halt = loongson_halt;
-	pm_power_off = loongson_poweroff;
-
-	return 0;
-}
-
-arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/loongson64/common/rtc.c b/arch/mips/loongson64/common/rtc.c
deleted file mode 100644
index 8d7628c0f513..000000000000
--- a/arch/mips/loongson64/common/rtc.c
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Lemote Fuloong platform support
- *
- *  Copyright(c) 2010 Arnaud Patard <apatard@mandriva.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/mc146818rtc.h>
-
-static struct resource loongson_rtc_resources[] = {
-	{
-		.start	= RTC_PORT(0),
-		.end	= RTC_PORT(1),
-		.flags	= IORESOURCE_IO,
-	}, {
-		.start	= RTC_IRQ,
-		.end	= RTC_IRQ,
-		.flags	= IORESOURCE_IRQ,
-	}
-};
-
-static struct platform_device loongson_rtc_device = {
-	.name		= "rtc_cmos",
-	.id		= -1,
-	.resource	= loongson_rtc_resources,
-	.num_resources	= ARRAY_SIZE(loongson_rtc_resources),
-};
-
-
-static int __init loongson_rtc_platform_init(void)
-{
-	platform_device_register(&loongson_rtc_device);
-	return 0;
-}
-
-device_initcall(loongson_rtc_platform_init);
diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson64/common/setup.c
deleted file mode 100644
index 4fd27f4f90ed..000000000000
--- a/arch/mips/loongson64/common/setup.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- */
-#include <linux/export.h>
-#include <linux/init.h>
-
-#include <asm/wbflush.h>
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-static void wbflush_loongson(void)
-{
-	asm(".set\tpush\n\t"
-	    ".set\tnoreorder\n\t"
-	    ".set mips3\n\t"
-	    "sync\n\t"
-	    "nop\n\t"
-	    ".set\tpop\n\t"
-	    ".set mips0\n\t");
-}
-
-void (*__wbflush)(void) = wbflush_loongson;
-EXPORT_SYMBOL(__wbflush);
-
-void __init plat_mem_setup(void)
-{
-}
diff --git a/arch/mips/loongson64/common/time.c b/arch/mips/loongson64/common/time.c
deleted file mode 100644
index 1245f22cec84..000000000000
--- a/arch/mips/loongson64/common/time.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <asm/mc146818-time.h>
-#include <asm/time.h>
-#include <asm/hpet.h>
-
-#include <loongson.h>
-
-void __init plat_time_init(void)
-{
-	/* setup mips r4k timer */
-	mips_hpt_frequency = cpu_clock_freq / 2;
-
-#ifdef CONFIG_RS780_HPET
-	setup_hpet_timer();
-#endif
-}
-
-void read_persistent_clock64(struct timespec64 *ts)
-{
-	ts->tv_sec = mc146818_get_cmos_time();
-	ts->tv_nsec = 0;
-}
diff --git a/arch/mips/loongson64/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c
new file mode 100644
index 000000000000..9efdfe430ff0
--- /dev/null
+++ b/arch/mips/loongson64/cop2-ex.c
@@ -0,0 +1,61 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2014 Lemote Corporation.
+ *   written by Huacai Chen <chenhc@lemote.com>
+ *
+ * based on arch/mips/cavium-octeon/cpu.c
+ * Copyright (C) 2009 Wind River Systems,
+ *   written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/ptrace.h>
+
+#include <asm/fpu.h>
+#include <asm/cop2.h>
+#include <asm/current.h>
+#include <asm/mipsregs.h>
+
+static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
+	void *data)
+{
+	int fpu_owned;
+	int fr = !test_thread_flag(TIF_32BIT_FPREGS);
+
+	switch (action) {
+	case CU2_EXCEPTION:
+		preempt_disable();
+		fpu_owned = __is_fpu_owner();
+		if (!fr)
+			set_c0_status(ST0_CU1 | ST0_CU2);
+		else
+			set_c0_status(ST0_CU1 | ST0_CU2 | ST0_FR);
+		enable_fpu_hazard();
+		KSTK_STATUS(current) |= (ST0_CU1 | ST0_CU2);
+		if (fr)
+			KSTK_STATUS(current) |= ST0_FR;
+		else
+			KSTK_STATUS(current) &= ~ST0_FR;
+		/* If FPU is owned, we needn't init or restore fp */
+		if (!fpu_owned) {
+			set_thread_flag(TIF_USEDFPU);
+			init_fp_ctx(current);
+			_restore_fp(current);
+		}
+		preempt_enable();
+
+		return NOTIFY_STOP;	/* Don't call default notifier */
+	}
+
+	return NOTIFY_OK;		/* Let default notifier send signals */
+}
+
+static int __init loongson_cu2_setup(void)
+{
+	return cu2_notifier(loongson_cu2_call, 0);
+}
+early_initcall(loongson_cu2_setup);
diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c
new file mode 100644
index 000000000000..5e86635f71db
--- /dev/null
+++ b/arch/mips/loongson64/dma.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/dma-direct.h>
+#include <linux/init.h>
+#include <linux/swiotlb.h>
+
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	/* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from
+	 * Loongson-3's 48bit address space and embed it into 40bit */
+	long nid = (paddr >> 44) & 0x3;
+	return ((nid << 44) ^ paddr) | (nid << 37);
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	/* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from
+	 * Loongson-3's 48bit address space and embed it into 40bit */
+	long nid = (daddr >> 37) & 0x3;
+	return ((nid << 37) ^ daddr) | (nid << 44);
+}
+
+void __init plat_swiotlb_setup(void)
+{
+	swiotlb_init(1);
+}
diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c
new file mode 100644
index 000000000000..0daeb7bcf023
--- /dev/null
+++ b/arch/mips/loongson64/env.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on Ocelot Linux port, which is
+ * Copyright 2001 MontaVista Software Inc.
+ * Author: jsun@mvista.com or jsun@junsun.net
+ *
+ * Copyright 2003 ICT CAS
+ * Author: Michael Guo <guoyi@ict.ac.cn>
+ *
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <linux/export.h>
+#include <asm/bootinfo.h>
+#include <loongson.h>
+#include <boot_param.h>
+#include <workarounds.h>
+
+u32 cpu_clock_freq;
+EXPORT_SYMBOL(cpu_clock_freq);
+struct efi_memory_map_loongson *loongson_memmap;
+struct loongson_system_configuration loongson_sysconf;
+
+u64 loongson_chipcfg[MAX_PACKAGES] = {0xffffffffbfc00180};
+u64 loongson_chiptemp[MAX_PACKAGES];
+u64 loongson_freqctrl[MAX_PACKAGES];
+
+unsigned long long smp_group[4];
+
+const char *get_system_type(void)
+{
+	return "Generic Loongson64 System";
+}
+
+void __init prom_init_env(void)
+{
+	struct boot_params *boot_p;
+	struct loongson_params *loongson_p;
+	struct system_loongson *esys;
+	struct efi_cpuinfo_loongson *ecpu;
+	struct irq_source_routing_table *eirq_source;
+
+	/* firmware arguments are initialized in head.S */
+	boot_p = (struct boot_params *)fw_arg2;
+	loongson_p = &(boot_p->efi.smbios.lp);
+
+	esys = (struct system_loongson *)
+		((u64)loongson_p + loongson_p->system_offset);
+	ecpu = (struct efi_cpuinfo_loongson *)
+		((u64)loongson_p + loongson_p->cpu_offset);
+	eirq_source = (struct irq_source_routing_table *)
+		((u64)loongson_p + loongson_p->irq_offset);
+	loongson_memmap = (struct efi_memory_map_loongson *)
+		((u64)loongson_p + loongson_p->memory_offset);
+
+	cpu_clock_freq = ecpu->cpu_clock_freq;
+	loongson_sysconf.cputype = ecpu->cputype;
+	switch (ecpu->cputype) {
+	case Legacy_3A:
+	case Loongson_3A:
+		loongson_sysconf.cores_per_node = 4;
+		loongson_sysconf.cores_per_package = 4;
+		smp_group[0] = 0x900000003ff01000;
+		smp_group[1] = 0x900010003ff01000;
+		smp_group[2] = 0x900020003ff01000;
+		smp_group[3] = 0x900030003ff01000;
+		loongson_chipcfg[0] = 0x900000001fe00180;
+		loongson_chipcfg[1] = 0x900010001fe00180;
+		loongson_chipcfg[2] = 0x900020001fe00180;
+		loongson_chipcfg[3] = 0x900030001fe00180;
+		loongson_chiptemp[0] = 0x900000001fe0019c;
+		loongson_chiptemp[1] = 0x900010001fe0019c;
+		loongson_chiptemp[2] = 0x900020001fe0019c;
+		loongson_chiptemp[3] = 0x900030001fe0019c;
+		loongson_freqctrl[0] = 0x900000001fe001d0;
+		loongson_freqctrl[1] = 0x900010001fe001d0;
+		loongson_freqctrl[2] = 0x900020001fe001d0;
+		loongson_freqctrl[3] = 0x900030001fe001d0;
+		loongson_sysconf.ht_control_base = 0x90000EFDFB000000;
+		loongson_sysconf.workarounds = WORKAROUND_CPUFREQ;
+		break;
+	case Legacy_3B:
+	case Loongson_3B:
+		loongson_sysconf.cores_per_node = 4; /* One chip has 2 nodes */
+		loongson_sysconf.cores_per_package = 8;
+		smp_group[0] = 0x900000003ff01000;
+		smp_group[1] = 0x900010003ff05000;
+		smp_group[2] = 0x900020003ff09000;
+		smp_group[3] = 0x900030003ff0d000;
+		loongson_chipcfg[0] = 0x900000001fe00180;
+		loongson_chipcfg[1] = 0x900020001fe00180;
+		loongson_chipcfg[2] = 0x900040001fe00180;
+		loongson_chipcfg[3] = 0x900060001fe00180;
+		loongson_chiptemp[0] = 0x900000001fe0019c;
+		loongson_chiptemp[1] = 0x900020001fe0019c;
+		loongson_chiptemp[2] = 0x900040001fe0019c;
+		loongson_chiptemp[3] = 0x900060001fe0019c;
+		loongson_freqctrl[0] = 0x900000001fe001d0;
+		loongson_freqctrl[1] = 0x900020001fe001d0;
+		loongson_freqctrl[2] = 0x900040001fe001d0;
+		loongson_freqctrl[3] = 0x900060001fe001d0;
+		loongson_sysconf.ht_control_base = 0x90001EFDFB000000;
+		loongson_sysconf.workarounds = WORKAROUND_CPUHOTPLUG;
+		break;
+	default:
+		loongson_sysconf.cores_per_node = 1;
+		loongson_sysconf.cores_per_package = 1;
+		loongson_chipcfg[0] = 0x900000001fe00180;
+	}
+
+	loongson_sysconf.nr_cpus = ecpu->nr_cpus;
+	loongson_sysconf.boot_cpu_id = ecpu->cpu_startup_core_id;
+	loongson_sysconf.reserved_cpus_mask = ecpu->reserved_cores_mask;
+	if (ecpu->nr_cpus > NR_CPUS || ecpu->nr_cpus == 0)
+		loongson_sysconf.nr_cpus = NR_CPUS;
+	loongson_sysconf.nr_nodes = (loongson_sysconf.nr_cpus +
+		loongson_sysconf.cores_per_node - 1) /
+		loongson_sysconf.cores_per_node;
+
+	loongson_sysconf.pci_mem_start_addr = eirq_source->pci_mem_start_addr;
+	loongson_sysconf.pci_mem_end_addr = eirq_source->pci_mem_end_addr;
+	loongson_sysconf.pci_io_base = eirq_source->pci_io_start_addr;
+	loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits;
+	if (loongson_sysconf.dma_mask_bits < 32 ||
+		loongson_sysconf.dma_mask_bits > 64)
+		loongson_sysconf.dma_mask_bits = 32;
+
+	loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm;
+	loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown;
+	loongson_sysconf.suspend_addr = boot_p->reset_system.DoSuspend;
+
+	loongson_sysconf.vgabios_addr = boot_p->efi.smbios.vga_bios;
+	pr_debug("Shutdown Addr: %llx, Restart Addr: %llx, VBIOS Addr: %llx\n",
+		loongson_sysconf.poweroff_addr, loongson_sysconf.restart_addr,
+		loongson_sysconf.vgabios_addr);
+
+	memset(loongson_sysconf.ecname, 0, 32);
+	if (esys->has_ec)
+		memcpy(loongson_sysconf.ecname, esys->ec_name, 32);
+	loongson_sysconf.workarounds |= esys->workarounds;
+
+	loongson_sysconf.nr_uarts = esys->nr_uarts;
+	if (esys->nr_uarts < 1 || esys->nr_uarts > MAX_UARTS)
+		loongson_sysconf.nr_uarts = 1;
+	memcpy(loongson_sysconf.uarts, esys->uarts,
+		sizeof(struct uart_device) * loongson_sysconf.nr_uarts);
+
+	loongson_sysconf.nr_sensors = esys->nr_sensors;
+	if (loongson_sysconf.nr_sensors > MAX_SENSORS)
+		loongson_sysconf.nr_sensors = 0;
+	if (loongson_sysconf.nr_sensors)
+		memcpy(loongson_sysconf.sensors, esys->sensors,
+			sizeof(struct sensor_device) * loongson_sysconf.nr_sensors);
+	pr_info("CpuClock = %u\n", cpu_clock_freq);
+}
diff --git a/arch/mips/loongson64/hpet.c b/arch/mips/loongson64/hpet.c
new file mode 100644
index 000000000000..ed15430ad64f
--- /dev/null
+++ b/arch/mips/loongson64/hpet.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/percpu.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+#include <asm/hpet.h>
+#include <asm/time.h>
+
+#define SMBUS_CFG_BASE		(loongson_sysconf.ht_control_base + 0x0300a000)
+#define SMBUS_PCI_REG40		0x40
+#define SMBUS_PCI_REG64		0x64
+#define SMBUS_PCI_REGB4		0xb4
+
+#define HPET_MIN_CYCLES		16
+#define HPET_MIN_PROG_DELTA	(HPET_MIN_CYCLES * 12)
+
+static DEFINE_SPINLOCK(hpet_lock);
+DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device);
+
+static unsigned int smbus_read(int offset)
+{
+	return *(volatile unsigned int *)(SMBUS_CFG_BASE + offset);
+}
+
+static void smbus_write(int offset, int data)
+{
+	*(volatile unsigned int *)(SMBUS_CFG_BASE + offset) = data;
+}
+
+static void smbus_enable(int offset, int bit)
+{
+	unsigned int cfg = smbus_read(offset);
+
+	cfg |= bit;
+	smbus_write(offset, cfg);
+}
+
+static int hpet_read(int offset)
+{
+	return *(volatile unsigned int *)(HPET_MMIO_ADDR + offset);
+}
+
+static void hpet_write(int offset, int data)
+{
+	*(volatile unsigned int *)(HPET_MMIO_ADDR + offset) = data;
+}
+
+static void hpet_start_counter(void)
+{
+	unsigned int cfg = hpet_read(HPET_CFG);
+
+	cfg |= HPET_CFG_ENABLE;
+	hpet_write(HPET_CFG, cfg);
+}
+
+static void hpet_stop_counter(void)
+{
+	unsigned int cfg = hpet_read(HPET_CFG);
+
+	cfg &= ~HPET_CFG_ENABLE;
+	hpet_write(HPET_CFG, cfg);
+}
+
+static void hpet_reset_counter(void)
+{
+	hpet_write(HPET_COUNTER, 0);
+	hpet_write(HPET_COUNTER + 4, 0);
+}
+
+static void hpet_restart_counter(void)
+{
+	hpet_stop_counter();
+	hpet_reset_counter();
+	hpet_start_counter();
+}
+
+static void hpet_enable_legacy_int(void)
+{
+	/* Do nothing on Loongson-3 */
+}
+
+static int hpet_set_state_periodic(struct clock_event_device *evt)
+{
+	int cfg;
+
+	spin_lock(&hpet_lock);
+
+	pr_info("set clock event to periodic mode!\n");
+	/* stop counter */
+	hpet_stop_counter();
+
+	/* enables the timer0 to generate a periodic interrupt */
+	cfg = hpet_read(HPET_T0_CFG);
+	cfg &= ~HPET_TN_LEVEL;
+	cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
+		HPET_TN_32BIT;
+	hpet_write(HPET_T0_CFG, cfg);
+
+	/* set the comparator */
+	hpet_write(HPET_T0_CMP, HPET_COMPARE_VAL);
+	udelay(1);
+	hpet_write(HPET_T0_CMP, HPET_COMPARE_VAL);
+
+	/* start counter */
+	hpet_start_counter();
+
+	spin_unlock(&hpet_lock);
+	return 0;
+}
+
+static int hpet_set_state_shutdown(struct clock_event_device *evt)
+{
+	int cfg;
+
+	spin_lock(&hpet_lock);
+
+	cfg = hpet_read(HPET_T0_CFG);
+	cfg &= ~HPET_TN_ENABLE;
+	hpet_write(HPET_T0_CFG, cfg);
+
+	spin_unlock(&hpet_lock);
+	return 0;
+}
+
+static int hpet_set_state_oneshot(struct clock_event_device *evt)
+{
+	int cfg;
+
+	spin_lock(&hpet_lock);
+
+	pr_info("set clock event to one shot mode!\n");
+	cfg = hpet_read(HPET_T0_CFG);
+	/*
+	 * set timer0 type
+	 * 1 : periodic interrupt
+	 * 0 : non-periodic(oneshot) interrupt
+	 */
+	cfg &= ~HPET_TN_PERIODIC;
+	cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+	hpet_write(HPET_T0_CFG, cfg);
+
+	spin_unlock(&hpet_lock);
+	return 0;
+}
+
+static int hpet_tick_resume(struct clock_event_device *evt)
+{
+	spin_lock(&hpet_lock);
+	hpet_enable_legacy_int();
+	spin_unlock(&hpet_lock);
+
+	return 0;
+}
+
+static int hpet_next_event(unsigned long delta,
+		struct clock_event_device *evt)
+{
+	u32 cnt;
+	s32 res;
+
+	cnt = hpet_read(HPET_COUNTER);
+	cnt += (u32) delta;
+	hpet_write(HPET_T0_CMP, cnt);
+
+	res = (s32)(cnt - hpet_read(HPET_COUNTER));
+
+	return res < HPET_MIN_CYCLES ? -ETIME : 0;
+}
+
+static irqreturn_t hpet_irq_handler(int irq, void *data)
+{
+	int is_irq;
+	struct clock_event_device *cd;
+	unsigned int cpu = smp_processor_id();
+
+	is_irq = hpet_read(HPET_STATUS);
+	if (is_irq & HPET_T0_IRS) {
+		/* clear the TIMER0 irq status register */
+		hpet_write(HPET_STATUS, HPET_T0_IRS);
+		cd = &per_cpu(hpet_clockevent_device, cpu);
+		cd->event_handler(cd);
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+static struct irqaction hpet_irq = {
+	.handler = hpet_irq_handler,
+	.flags = IRQF_NOBALANCING | IRQF_TIMER,
+	.name = "hpet",
+};
+
+/*
+ * hpet address assignation and irq setting should be done in bios.
+ * but pmon don't do this, we just setup here directly.
+ * The operation under is normal. unfortunately, hpet_setup process
+ * is before pci initialize.
+ *
+ * {
+ *	struct pci_dev *pdev;
+ *
+ *	pdev = pci_get_device(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, NULL);
+ *	pci_write_config_word(pdev, SMBUS_PCI_REGB4, HPET_ADDR);
+ *
+ *	...
+ * }
+ */
+static void hpet_setup(void)
+{
+	/* set hpet base address */
+	smbus_write(SMBUS_PCI_REGB4, HPET_ADDR);
+
+	/* enable decoding of access to HPET MMIO*/
+	smbus_enable(SMBUS_PCI_REG40, (1 << 28));
+
+	/* HPET irq enable */
+	smbus_enable(SMBUS_PCI_REG64, (1 << 10));
+
+	hpet_enable_legacy_int();
+}
+
+void __init setup_hpet_timer(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *cd;
+
+	hpet_setup();
+
+	cd = &per_cpu(hpet_clockevent_device, cpu);
+	cd->name = "hpet";
+	cd->rating = 100;
+	cd->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	cd->set_state_shutdown = hpet_set_state_shutdown;
+	cd->set_state_periodic = hpet_set_state_periodic;
+	cd->set_state_oneshot = hpet_set_state_oneshot;
+	cd->tick_resume = hpet_tick_resume;
+	cd->set_next_event = hpet_next_event;
+	cd->irq = HPET_T0_IRQ;
+	cd->cpumask = cpumask_of(cpu);
+	clockevent_set_clock(cd, HPET_FREQ);
+	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
+	cd->max_delta_ticks = 0x7fffffff;
+	cd->min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, cd);
+	cd->min_delta_ticks = HPET_MIN_PROG_DELTA;
+
+	clockevents_register_device(cd);
+	setup_irq(HPET_T0_IRQ, &hpet_irq);
+	pr_info("hpet clock event device register\n");
+}
+
+static u64 hpet_read_counter(struct clocksource *cs)
+{
+	return (u64)hpet_read(HPET_COUNTER);
+}
+
+static void hpet_suspend(struct clocksource *cs)
+{
+}
+
+static void hpet_resume(struct clocksource *cs)
+{
+	hpet_setup();
+	hpet_restart_counter();
+}
+
+static struct clocksource csrc_hpet = {
+	.name = "hpet",
+	/* mips clocksource rating is less than 300, so hpet is better. */
+	.rating = 300,
+	.read = hpet_read_counter,
+	.mask = CLOCKSOURCE_MASK(32),
+	/* oneshot mode work normal with this flag */
+	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+	.suspend = hpet_suspend,
+	.resume = hpet_resume,
+	.mult = 0,
+	.shift = 10,
+};
+
+int __init init_hpet_clocksource(void)
+{
+	csrc_hpet.mult = clocksource_hz2mult(HPET_FREQ, csrc_hpet.shift);
+	return clocksource_register_hz(&csrc_hpet, HPET_FREQ);
+}
+
+arch_initcall(init_hpet_clocksource);
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
new file mode 100644
index 000000000000..48b44f415059
--- /dev/null
+++ b/arch/mips/loongson64/init.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+
+#include <linux/memblock.h>
+#include <asm/bootinfo.h>
+#include <asm/traps.h>
+#include <asm/smp-ops.h>
+#include <asm/cacheflush.h>
+
+#include <loongson.h>
+
+static void __init mips_nmi_setup(void)
+{
+	void *base;
+	extern char except_vec_nmi;
+
+	base = (void *)(CAC_BASE + 0x380);
+	memcpy(base, &except_vec_nmi, 0x80);
+	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
+}
+
+void __init prom_init(void)
+{
+	prom_init_cmdline();
+	prom_init_env();
+
+	/* init base address of io space */
+	set_io_port_base((unsigned long)
+		ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE));
+
+	prom_init_numa_memory();
+
+	/* Hardcode to CPU UART 0 */
+	setup_8250_early_printk_port(TO_UNCAC(LOONGSON_REG_BASE + 0x1e0), 0, 1024);
+
+	register_smp_ops(&loongson3_smp_ops);
+	board_nmi_handler_setup = mips_nmi_setup;
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
diff --git a/arch/mips/loongson64/irq.c b/arch/mips/loongson64/irq.c
new file mode 100644
index 000000000000..79ad797497e4
--- /dev/null
+++ b/arch/mips/loongson64/irq.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <loongson.h>
+#include <irq.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <asm/irq_cpu.h>
+#include <asm/i8259.h>
+#include <asm/mipsregs.h>
+
+#include "smp.h"
+
+extern void loongson3_send_irq_by_ipi(int cpu, int irqs);
+
+unsigned int irq_cpu[16] = {[0 ... 15] = -1};
+unsigned int ht_irq[] = {0, 1, 3, 4, 5, 6, 7, 8, 12, 14, 15};
+unsigned int local_irq = 1<<0 | 1<<1 | 1<<2 | 1<<7 | 1<<8 | 1<<12;
+
+int plat_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
+			  bool force)
+{
+	unsigned int cpu;
+	struct cpumask new_affinity;
+
+	/* I/O devices are connected on package-0 */
+	cpumask_copy(&new_affinity, affinity);
+	for_each_cpu(cpu, affinity)
+		if (cpu_data[cpu].package > 0)
+			cpumask_clear_cpu(cpu, &new_affinity);
+
+	if (cpumask_empty(&new_affinity))
+		return -EINVAL;
+
+	cpumask_copy(d->common->affinity, &new_affinity);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static void ht_irqdispatch(void)
+{
+	unsigned int i, irq;
+	struct irq_data *irqd;
+	struct cpumask affinity;
+
+	irq = LOONGSON_HT1_INT_VECTOR(0);
+	LOONGSON_HT1_INT_VECTOR(0) = irq; /* Acknowledge the IRQs */
+
+	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
+		if (!(irq & (0x1 << ht_irq[i])))
+			continue;
+
+		/* handled by local core */
+		if (local_irq & (0x1 << ht_irq[i])) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
+
+		irqd = irq_get_irq_data(ht_irq[i]);
+		cpumask_and(&affinity, irqd->common->affinity, cpu_active_mask);
+		if (cpumask_empty(&affinity)) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
+
+		irq_cpu[ht_irq[i]] = cpumask_next(irq_cpu[ht_irq[i]], &affinity);
+		if (irq_cpu[ht_irq[i]] >= nr_cpu_ids)
+			irq_cpu[ht_irq[i]] = cpumask_first(&affinity);
+
+		if (irq_cpu[ht_irq[i]] == 0) {
+			do_IRQ(ht_irq[i]);
+			continue;
+		}
+
+		/* balanced by other cores */
+		loongson3_send_irq_by_ipi(irq_cpu[ht_irq[i]], (0x1 << ht_irq[i]));
+	}
+}
+
+#define UNUSED_IPS (CAUSEF_IP5 | CAUSEF_IP4 | CAUSEF_IP1 | CAUSEF_IP0)
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned int pending;
+
+	pending = read_c0_cause() & read_c0_status() & ST0_IM;
+
+	if (pending & CAUSEF_IP7)
+		do_IRQ(LOONGSON_TIMER_IRQ);
+#if defined(CONFIG_SMP)
+	if (pending & CAUSEF_IP6)
+		loongson3_ipi_interrupt(NULL);
+#endif
+	if (pending & CAUSEF_IP3)
+		ht_irqdispatch();
+	if (pending & CAUSEF_IP2)
+		do_IRQ(LOONGSON_UART_IRQ);
+	if (pending & UNUSED_IPS) {
+		pr_err("%s : spurious interrupt\n", __func__);
+		spurious_interrupt();
+	}
+}
+
+static inline void mask_loongson_irq(struct irq_data *d) { }
+static inline void unmask_loongson_irq(struct irq_data *d) { }
+
+ /* For MIPS IRQs which shared by all cores */
+static struct irq_chip loongson_irq_chip = {
+	.name		= "Loongson",
+	.irq_ack	= mask_loongson_irq,
+	.irq_mask	= mask_loongson_irq,
+	.irq_mask_ack	= mask_loongson_irq,
+	.irq_unmask	= unmask_loongson_irq,
+	.irq_eoi	= unmask_loongson_irq,
+};
+
+void irq_router_init(void)
+{
+	int i;
+
+	/* route LPC int to cpu core0 int 0 */
+	LOONGSON_INT_ROUTER_LPC =
+		LOONGSON_INT_COREx_INTy(loongson_sysconf.boot_cpu_id, 0);
+	/* route HT1 int0 ~ int7 to cpu core0 INT1*/
+	for (i = 0; i < 8; i++)
+		LOONGSON_INT_ROUTER_HT1(i) =
+			LOONGSON_INT_COREx_INTy(loongson_sysconf.boot_cpu_id, 1);
+	/* enable HT1 interrupt */
+	LOONGSON_HT1_INTN_EN(0) = 0xffffffff;
+	/* enable router interrupt intenset */
+	LOONGSON_INT_ROUTER_INTENSET =
+		LOONGSON_INT_ROUTER_INTEN | (0xffff << 16) | 0x1 << 10;
+}
+
+void __init arch_init_irq(void)
+{
+	struct irq_chip *chip;
+
+	clear_c0_status(ST0_IM | ST0_BEV);
+
+	irq_router_init();
+	mips_cpu_irq_init();
+	init_i8259_irqs();
+	chip = irq_get_chip(I8259A_IRQ_BASE);
+	chip->irq_set_affinity = plat_set_irq_affinity;
+
+	irq_set_chip_and_handler(LOONGSON_UART_IRQ,
+			&loongson_irq_chip, handle_percpu_irq);
+	irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ,
+			&loongson_irq_chip, handle_percpu_irq);
+
+	set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(void)
+{
+	irq_cpu_offline();
+	clear_c0_status(ST0_IM);
+}
+
+#endif
diff --git a/arch/mips/loongson64/loongson-3/Makefile b/arch/mips/loongson64/loongson-3/Makefile
deleted file mode 100644
index df39598742b2..000000000000
--- a/arch/mips/loongson64/loongson-3/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for Loongson-3 family machines
-#
-obj-y			+= irq.o cop2-ex.o platform.o acpi_init.o dma.o
-
-obj-$(CONFIG_SMP)	+= smp.o
-
-obj-$(CONFIG_NUMA)	+= numa.o
-
-obj-$(CONFIG_RS780_HPET) += hpet.o
diff --git a/arch/mips/loongson64/loongson-3/acpi_init.c b/arch/mips/loongson64/loongson-3/acpi_init.c
deleted file mode 100644
index 8d7c119ddf91..000000000000
--- a/arch/mips/loongson64/loongson-3/acpi_init.c
+++ /dev/null
@@ -1,151 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/io.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/export.h>
-
-#define SBX00_ACPI_IO_BASE 0x800
-#define SBX00_ACPI_IO_SIZE 0x100
-
-#define ACPI_PM_EVT_BLK         (SBX00_ACPI_IO_BASE + 0x00) /* 4 bytes */
-#define ACPI_PM_CNT_BLK         (SBX00_ACPI_IO_BASE + 0x04) /* 2 bytes */
-#define ACPI_PMA_CNT_BLK        (SBX00_ACPI_IO_BASE + 0x0F) /* 1 byte */
-#define ACPI_PM_TMR_BLK         (SBX00_ACPI_IO_BASE + 0x18) /* 4 bytes */
-#define ACPI_GPE0_BLK           (SBX00_ACPI_IO_BASE + 0x10) /* 8 bytes */
-#define ACPI_END                (SBX00_ACPI_IO_BASE + 0x80)
-
-#define PM_INDEX        0xCD6
-#define PM_DATA         0xCD7
-#define PM2_INDEX       0xCD0
-#define PM2_DATA        0xCD1
-
-/*
- * SCI interrupt need acpi space, allocate here
- */
-
-static int __init register_acpi_resource(void)
-{
-	request_region(SBX00_ACPI_IO_BASE, SBX00_ACPI_IO_SIZE, "acpi");
-	return 0;
-}
-
-static void pmio_write_index(u16 index, u8 reg, u8 value)
-{
-	outb(reg, index);
-	outb(value, index + 1);
-}
-
-static u8 pmio_read_index(u16 index, u8 reg)
-{
-	outb(reg, index);
-	return inb(index + 1);
-}
-
-void pm_iowrite(u8 reg, u8 value)
-{
-	pmio_write_index(PM_INDEX, reg, value);
-}
-EXPORT_SYMBOL(pm_iowrite);
-
-u8 pm_ioread(u8 reg)
-{
-	return pmio_read_index(PM_INDEX, reg);
-}
-EXPORT_SYMBOL(pm_ioread);
-
-void pm2_iowrite(u8 reg, u8 value)
-{
-	pmio_write_index(PM2_INDEX, reg, value);
-}
-EXPORT_SYMBOL(pm2_iowrite);
-
-u8 pm2_ioread(u8 reg)
-{
-	return pmio_read_index(PM2_INDEX, reg);
-}
-EXPORT_SYMBOL(pm2_ioread);
-
-static void acpi_hw_clear_status(void)
-{
-	u16 value;
-
-	/* PMStatus: Clear WakeStatus/PwrBtnStatus */
-	value = inw(ACPI_PM_EVT_BLK);
-	value |= (1 << 8 | 1 << 15);
-	outw(value, ACPI_PM_EVT_BLK);
-
-	/* GPEStatus: Clear all generated events */
-	outl(inl(ACPI_GPE0_BLK), ACPI_GPE0_BLK);
-}
-
-void acpi_registers_setup(void)
-{
-	u32 value;
-
-	/* PM Status Base */
-	pm_iowrite(0x20, ACPI_PM_EVT_BLK & 0xff);
-	pm_iowrite(0x21, ACPI_PM_EVT_BLK >> 8);
-
-	/* PM Control Base */
-	pm_iowrite(0x22, ACPI_PM_CNT_BLK & 0xff);
-	pm_iowrite(0x23, ACPI_PM_CNT_BLK >> 8);
-
-	/* GPM Base */
-	pm_iowrite(0x28, ACPI_GPE0_BLK & 0xff);
-	pm_iowrite(0x29, ACPI_GPE0_BLK >> 8);
-
-	/* ACPI End */
-	pm_iowrite(0x2e, ACPI_END & 0xff);
-	pm_iowrite(0x2f, ACPI_END >> 8);
-
-	/* IO Decode: When AcpiDecodeEnable set, South-Bridge uses the contents
-	 * of the PM registers at index 0x20~0x2B to decode ACPI I/O address. */
-	pm_iowrite(0x0e, 1 << 3);
-
-	/* SCI_EN set */
-	outw(1, ACPI_PM_CNT_BLK);
-
-	/* Enable to generate SCI */
-	pm_iowrite(0x10, pm_ioread(0x10) | 1);
-
-	/* GPM3/GPM9 enable */
-	value = inl(ACPI_GPE0_BLK + 4);
-	outl(value | (1 << 14) | (1 << 22), ACPI_GPE0_BLK + 4);
-
-	/* Set GPM9 as input */
-	pm_iowrite(0x8d, pm_ioread(0x8d) & (~(1 << 1)));
-
-	/* Set GPM9 as non-output */
-	pm_iowrite(0x94, pm_ioread(0x94) | (1 << 3));
-
-	/* GPM3 config ACPI trigger SCIOUT */
-	pm_iowrite(0x33, pm_ioread(0x33) & (~(3 << 4)));
-
-	/* GPM9 config ACPI trigger SCIOUT */
-	pm_iowrite(0x3d, pm_ioread(0x3d) & (~(3 << 2)));
-
-	/* GPM3 config falling edge trigger */
-	pm_iowrite(0x37, pm_ioread(0x37) & (~(1 << 6)));
-
-	/* No wait for STPGNT# in ACPI Sx state */
-	pm_iowrite(0x7c, pm_ioread(0x7c) | (1 << 6));
-
-	/* Set GPM3 pull-down enable */
-	value = pm2_ioread(0xf6);
-	value |= ((1 << 7) | (1 << 3));
-	pm2_iowrite(0xf6, value);
-
-	/* Set GPM9 pull-down enable */
-	value = pm2_ioread(0xf8);
-	value |= ((1 << 5) | (1 << 1));
-	pm2_iowrite(0xf8, value);
-}
-
-int __init sbx00_acpi_init(void)
-{
-	register_acpi_resource();
-	acpi_registers_setup();
-	acpi_hw_clear_status();
-
-	return 0;
-}
diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c
deleted file mode 100644
index 9efdfe430ff0..000000000000
--- a/arch/mips/loongson64/loongson-3/cop2-ex.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2014 Lemote Corporation.
- *   written by Huacai Chen <chenhc@lemote.com>
- *
- * based on arch/mips/cavium-octeon/cpu.c
- * Copyright (C) 2009 Wind River Systems,
- *   written by Ralf Baechle <ralf@linux-mips.org>
- */
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/notifier.h>
-#include <linux/ptrace.h>
-
-#include <asm/fpu.h>
-#include <asm/cop2.h>
-#include <asm/current.h>
-#include <asm/mipsregs.h>
-
-static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
-	void *data)
-{
-	int fpu_owned;
-	int fr = !test_thread_flag(TIF_32BIT_FPREGS);
-
-	switch (action) {
-	case CU2_EXCEPTION:
-		preempt_disable();
-		fpu_owned = __is_fpu_owner();
-		if (!fr)
-			set_c0_status(ST0_CU1 | ST0_CU2);
-		else
-			set_c0_status(ST0_CU1 | ST0_CU2 | ST0_FR);
-		enable_fpu_hazard();
-		KSTK_STATUS(current) |= (ST0_CU1 | ST0_CU2);
-		if (fr)
-			KSTK_STATUS(current) |= ST0_FR;
-		else
-			KSTK_STATUS(current) &= ~ST0_FR;
-		/* If FPU is owned, we needn't init or restore fp */
-		if (!fpu_owned) {
-			set_thread_flag(TIF_USEDFPU);
-			init_fp_ctx(current);
-			_restore_fp(current);
-		}
-		preempt_enable();
-
-		return NOTIFY_STOP;	/* Don't call default notifier */
-	}
-
-	return NOTIFY_OK;		/* Let default notifier send signals */
-}
-
-static int __init loongson_cu2_setup(void)
-{
-	return cu2_notifier(loongson_cu2_call, 0);
-}
-early_initcall(loongson_cu2_setup);
diff --git a/arch/mips/loongson64/loongson-3/dma.c b/arch/mips/loongson64/loongson-3/dma.c
deleted file mode 100644
index 5e86635f71db..000000000000
--- a/arch/mips/loongson64/loongson-3/dma.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/dma-direct.h>
-#include <linux/init.h>
-#include <linux/swiotlb.h>
-
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
-	/* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from
-	 * Loongson-3's 48bit address space and embed it into 40bit */
-	long nid = (paddr >> 44) & 0x3;
-	return ((nid << 44) ^ paddr) | (nid << 37);
-}
-
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
-{
-	/* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from
-	 * Loongson-3's 48bit address space and embed it into 40bit */
-	long nid = (daddr >> 37) & 0x3;
-	return ((nid << 37) ^ daddr) | (nid << 44);
-}
-
-void __init plat_swiotlb_setup(void)
-{
-	swiotlb_init(1);
-}
diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/loongson-3/hpet.c
deleted file mode 100644
index ed15430ad64f..000000000000
--- a/arch/mips/loongson64/loongson-3/hpet.c
+++ /dev/null
@@ -1,289 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/percpu.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-
-#include <asm/hpet.h>
-#include <asm/time.h>
-
-#define SMBUS_CFG_BASE		(loongson_sysconf.ht_control_base + 0x0300a000)
-#define SMBUS_PCI_REG40		0x40
-#define SMBUS_PCI_REG64		0x64
-#define SMBUS_PCI_REGB4		0xb4
-
-#define HPET_MIN_CYCLES		16
-#define HPET_MIN_PROG_DELTA	(HPET_MIN_CYCLES * 12)
-
-static DEFINE_SPINLOCK(hpet_lock);
-DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device);
-
-static unsigned int smbus_read(int offset)
-{
-	return *(volatile unsigned int *)(SMBUS_CFG_BASE + offset);
-}
-
-static void smbus_write(int offset, int data)
-{
-	*(volatile unsigned int *)(SMBUS_CFG_BASE + offset) = data;
-}
-
-static void smbus_enable(int offset, int bit)
-{
-	unsigned int cfg = smbus_read(offset);
-
-	cfg |= bit;
-	smbus_write(offset, cfg);
-}
-
-static int hpet_read(int offset)
-{
-	return *(volatile unsigned int *)(HPET_MMIO_ADDR + offset);
-}
-
-static void hpet_write(int offset, int data)
-{
-	*(volatile unsigned int *)(HPET_MMIO_ADDR + offset) = data;
-}
-
-static void hpet_start_counter(void)
-{
-	unsigned int cfg = hpet_read(HPET_CFG);
-
-	cfg |= HPET_CFG_ENABLE;
-	hpet_write(HPET_CFG, cfg);
-}
-
-static void hpet_stop_counter(void)
-{
-	unsigned int cfg = hpet_read(HPET_CFG);
-
-	cfg &= ~HPET_CFG_ENABLE;
-	hpet_write(HPET_CFG, cfg);
-}
-
-static void hpet_reset_counter(void)
-{
-	hpet_write(HPET_COUNTER, 0);
-	hpet_write(HPET_COUNTER + 4, 0);
-}
-
-static void hpet_restart_counter(void)
-{
-	hpet_stop_counter();
-	hpet_reset_counter();
-	hpet_start_counter();
-}
-
-static void hpet_enable_legacy_int(void)
-{
-	/* Do nothing on Loongson-3 */
-}
-
-static int hpet_set_state_periodic(struct clock_event_device *evt)
-{
-	int cfg;
-
-	spin_lock(&hpet_lock);
-
-	pr_info("set clock event to periodic mode!\n");
-	/* stop counter */
-	hpet_stop_counter();
-
-	/* enables the timer0 to generate a periodic interrupt */
-	cfg = hpet_read(HPET_T0_CFG);
-	cfg &= ~HPET_TN_LEVEL;
-	cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
-		HPET_TN_32BIT;
-	hpet_write(HPET_T0_CFG, cfg);
-
-	/* set the comparator */
-	hpet_write(HPET_T0_CMP, HPET_COMPARE_VAL);
-	udelay(1);
-	hpet_write(HPET_T0_CMP, HPET_COMPARE_VAL);
-
-	/* start counter */
-	hpet_start_counter();
-
-	spin_unlock(&hpet_lock);
-	return 0;
-}
-
-static int hpet_set_state_shutdown(struct clock_event_device *evt)
-{
-	int cfg;
-
-	spin_lock(&hpet_lock);
-
-	cfg = hpet_read(HPET_T0_CFG);
-	cfg &= ~HPET_TN_ENABLE;
-	hpet_write(HPET_T0_CFG, cfg);
-
-	spin_unlock(&hpet_lock);
-	return 0;
-}
-
-static int hpet_set_state_oneshot(struct clock_event_device *evt)
-{
-	int cfg;
-
-	spin_lock(&hpet_lock);
-
-	pr_info("set clock event to one shot mode!\n");
-	cfg = hpet_read(HPET_T0_CFG);
-	/*
-	 * set timer0 type
-	 * 1 : periodic interrupt
-	 * 0 : non-periodic(oneshot) interrupt
-	 */
-	cfg &= ~HPET_TN_PERIODIC;
-	cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-	hpet_write(HPET_T0_CFG, cfg);
-
-	spin_unlock(&hpet_lock);
-	return 0;
-}
-
-static int hpet_tick_resume(struct clock_event_device *evt)
-{
-	spin_lock(&hpet_lock);
-	hpet_enable_legacy_int();
-	spin_unlock(&hpet_lock);
-
-	return 0;
-}
-
-static int hpet_next_event(unsigned long delta,
-		struct clock_event_device *evt)
-{
-	u32 cnt;
-	s32 res;
-
-	cnt = hpet_read(HPET_COUNTER);
-	cnt += (u32) delta;
-	hpet_write(HPET_T0_CMP, cnt);
-
-	res = (s32)(cnt - hpet_read(HPET_COUNTER));
-
-	return res < HPET_MIN_CYCLES ? -ETIME : 0;
-}
-
-static irqreturn_t hpet_irq_handler(int irq, void *data)
-{
-	int is_irq;
-	struct clock_event_device *cd;
-	unsigned int cpu = smp_processor_id();
-
-	is_irq = hpet_read(HPET_STATUS);
-	if (is_irq & HPET_T0_IRS) {
-		/* clear the TIMER0 irq status register */
-		hpet_write(HPET_STATUS, HPET_T0_IRS);
-		cd = &per_cpu(hpet_clockevent_device, cpu);
-		cd->event_handler(cd);
-		return IRQ_HANDLED;
-	}
-	return IRQ_NONE;
-}
-
-static struct irqaction hpet_irq = {
-	.handler = hpet_irq_handler,
-	.flags = IRQF_NOBALANCING | IRQF_TIMER,
-	.name = "hpet",
-};
-
-/*
- * hpet address assignation and irq setting should be done in bios.
- * but pmon don't do this, we just setup here directly.
- * The operation under is normal. unfortunately, hpet_setup process
- * is before pci initialize.
- *
- * {
- *	struct pci_dev *pdev;
- *
- *	pdev = pci_get_device(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, NULL);
- *	pci_write_config_word(pdev, SMBUS_PCI_REGB4, HPET_ADDR);
- *
- *	...
- * }
- */
-static void hpet_setup(void)
-{
-	/* set hpet base address */
-	smbus_write(SMBUS_PCI_REGB4, HPET_ADDR);
-
-	/* enable decoding of access to HPET MMIO*/
-	smbus_enable(SMBUS_PCI_REG40, (1 << 28));
-
-	/* HPET irq enable */
-	smbus_enable(SMBUS_PCI_REG64, (1 << 10));
-
-	hpet_enable_legacy_int();
-}
-
-void __init setup_hpet_timer(void)
-{
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *cd;
-
-	hpet_setup();
-
-	cd = &per_cpu(hpet_clockevent_device, cpu);
-	cd->name = "hpet";
-	cd->rating = 100;
-	cd->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
-	cd->set_state_shutdown = hpet_set_state_shutdown;
-	cd->set_state_periodic = hpet_set_state_periodic;
-	cd->set_state_oneshot = hpet_set_state_oneshot;
-	cd->tick_resume = hpet_tick_resume;
-	cd->set_next_event = hpet_next_event;
-	cd->irq = HPET_T0_IRQ;
-	cd->cpumask = cpumask_of(cpu);
-	clockevent_set_clock(cd, HPET_FREQ);
-	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
-	cd->max_delta_ticks = 0x7fffffff;
-	cd->min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, cd);
-	cd->min_delta_ticks = HPET_MIN_PROG_DELTA;
-
-	clockevents_register_device(cd);
-	setup_irq(HPET_T0_IRQ, &hpet_irq);
-	pr_info("hpet clock event device register\n");
-}
-
-static u64 hpet_read_counter(struct clocksource *cs)
-{
-	return (u64)hpet_read(HPET_COUNTER);
-}
-
-static void hpet_suspend(struct clocksource *cs)
-{
-}
-
-static void hpet_resume(struct clocksource *cs)
-{
-	hpet_setup();
-	hpet_restart_counter();
-}
-
-static struct clocksource csrc_hpet = {
-	.name = "hpet",
-	/* mips clocksource rating is less than 300, so hpet is better. */
-	.rating = 300,
-	.read = hpet_read_counter,
-	.mask = CLOCKSOURCE_MASK(32),
-	/* oneshot mode work normal with this flag */
-	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
-	.suspend = hpet_suspend,
-	.resume = hpet_resume,
-	.mult = 0,
-	.shift = 10,
-};
-
-int __init init_hpet_clocksource(void)
-{
-	csrc_hpet.mult = clocksource_hz2mult(HPET_FREQ, csrc_hpet.shift);
-	return clocksource_register_hz(&csrc_hpet, HPET_FREQ);
-}
-
-arch_initcall(init_hpet_clocksource);
diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c
deleted file mode 100644
index 79ad797497e4..000000000000
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ /dev/null
@@ -1,162 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <loongson.h>
-#include <irq.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-
-#include <asm/irq_cpu.h>
-#include <asm/i8259.h>
-#include <asm/mipsregs.h>
-
-#include "smp.h"
-
-extern void loongson3_send_irq_by_ipi(int cpu, int irqs);
-
-unsigned int irq_cpu[16] = {[0 ... 15] = -1};
-unsigned int ht_irq[] = {0, 1, 3, 4, 5, 6, 7, 8, 12, 14, 15};
-unsigned int local_irq = 1<<0 | 1<<1 | 1<<2 | 1<<7 | 1<<8 | 1<<12;
-
-int plat_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
-			  bool force)
-{
-	unsigned int cpu;
-	struct cpumask new_affinity;
-
-	/* I/O devices are connected on package-0 */
-	cpumask_copy(&new_affinity, affinity);
-	for_each_cpu(cpu, affinity)
-		if (cpu_data[cpu].package > 0)
-			cpumask_clear_cpu(cpu, &new_affinity);
-
-	if (cpumask_empty(&new_affinity))
-		return -EINVAL;
-
-	cpumask_copy(d->common->affinity, &new_affinity);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static void ht_irqdispatch(void)
-{
-	unsigned int i, irq;
-	struct irq_data *irqd;
-	struct cpumask affinity;
-
-	irq = LOONGSON_HT1_INT_VECTOR(0);
-	LOONGSON_HT1_INT_VECTOR(0) = irq; /* Acknowledge the IRQs */
-
-	for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
-		if (!(irq & (0x1 << ht_irq[i])))
-			continue;
-
-		/* handled by local core */
-		if (local_irq & (0x1 << ht_irq[i])) {
-			do_IRQ(ht_irq[i]);
-			continue;
-		}
-
-		irqd = irq_get_irq_data(ht_irq[i]);
-		cpumask_and(&affinity, irqd->common->affinity, cpu_active_mask);
-		if (cpumask_empty(&affinity)) {
-			do_IRQ(ht_irq[i]);
-			continue;
-		}
-
-		irq_cpu[ht_irq[i]] = cpumask_next(irq_cpu[ht_irq[i]], &affinity);
-		if (irq_cpu[ht_irq[i]] >= nr_cpu_ids)
-			irq_cpu[ht_irq[i]] = cpumask_first(&affinity);
-
-		if (irq_cpu[ht_irq[i]] == 0) {
-			do_IRQ(ht_irq[i]);
-			continue;
-		}
-
-		/* balanced by other cores */
-		loongson3_send_irq_by_ipi(irq_cpu[ht_irq[i]], (0x1 << ht_irq[i]));
-	}
-}
-
-#define UNUSED_IPS (CAUSEF_IP5 | CAUSEF_IP4 | CAUSEF_IP1 | CAUSEF_IP0)
-
-asmlinkage void plat_irq_dispatch(void)
-{
-	unsigned int pending;
-
-	pending = read_c0_cause() & read_c0_status() & ST0_IM;
-
-	if (pending & CAUSEF_IP7)
-		do_IRQ(LOONGSON_TIMER_IRQ);
-#if defined(CONFIG_SMP)
-	if (pending & CAUSEF_IP6)
-		loongson3_ipi_interrupt(NULL);
-#endif
-	if (pending & CAUSEF_IP3)
-		ht_irqdispatch();
-	if (pending & CAUSEF_IP2)
-		do_IRQ(LOONGSON_UART_IRQ);
-	if (pending & UNUSED_IPS) {
-		pr_err("%s : spurious interrupt\n", __func__);
-		spurious_interrupt();
-	}
-}
-
-static inline void mask_loongson_irq(struct irq_data *d) { }
-static inline void unmask_loongson_irq(struct irq_data *d) { }
-
- /* For MIPS IRQs which shared by all cores */
-static struct irq_chip loongson_irq_chip = {
-	.name		= "Loongson",
-	.irq_ack	= mask_loongson_irq,
-	.irq_mask	= mask_loongson_irq,
-	.irq_mask_ack	= mask_loongson_irq,
-	.irq_unmask	= unmask_loongson_irq,
-	.irq_eoi	= unmask_loongson_irq,
-};
-
-void irq_router_init(void)
-{
-	int i;
-
-	/* route LPC int to cpu core0 int 0 */
-	LOONGSON_INT_ROUTER_LPC =
-		LOONGSON_INT_COREx_INTy(loongson_sysconf.boot_cpu_id, 0);
-	/* route HT1 int0 ~ int7 to cpu core0 INT1*/
-	for (i = 0; i < 8; i++)
-		LOONGSON_INT_ROUTER_HT1(i) =
-			LOONGSON_INT_COREx_INTy(loongson_sysconf.boot_cpu_id, 1);
-	/* enable HT1 interrupt */
-	LOONGSON_HT1_INTN_EN(0) = 0xffffffff;
-	/* enable router interrupt intenset */
-	LOONGSON_INT_ROUTER_INTENSET =
-		LOONGSON_INT_ROUTER_INTEN | (0xffff << 16) | 0x1 << 10;
-}
-
-void __init arch_init_irq(void)
-{
-	struct irq_chip *chip;
-
-	clear_c0_status(ST0_IM | ST0_BEV);
-
-	irq_router_init();
-	mips_cpu_irq_init();
-	init_i8259_irqs();
-	chip = irq_get_chip(I8259A_IRQ_BASE);
-	chip->irq_set_affinity = plat_set_irq_affinity;
-
-	irq_set_chip_and_handler(LOONGSON_UART_IRQ,
-			&loongson_irq_chip, handle_percpu_irq);
-	irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ,
-			&loongson_irq_chip, handle_percpu_irq);
-
-	set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-void fixup_irqs(void)
-{
-	irq_cpu_offline();
-	clear_c0_status(ST0_IM);
-}
-
-#endif
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
deleted file mode 100644
index ef94a2278561..000000000000
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ /dev/null
@@ -1,273 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2010 Loongson Inc. & Lemote Inc. &
- *                    Institute of Computing Technology
- * Author:  Xiang Gao, gaoxiang@ict.ac.cn
- *          Huacai Chen, chenhc@lemote.com
- *          Xiaofu Meng, Shuangshuang Zhang
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/export.h>
-#include <linux/nodemask.h>
-#include <linux/swap.h>
-#include <linux/memblock.h>
-#include <linux/pfn.h>
-#include <linux/highmem.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/sections.h>
-#include <linux/irq.h>
-#include <asm/bootinfo.h>
-#include <asm/mc146818-time.h>
-#include <asm/time.h>
-#include <asm/wbflush.h>
-#include <boot_param.h>
-
-static struct pglist_data prealloc__node_data[MAX_NUMNODES];
-unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
-EXPORT_SYMBOL(__node_distances);
-struct pglist_data *__node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(__node_data);
-
-cpumask_t __node_cpumask[MAX_NUMNODES];
-EXPORT_SYMBOL(__node_cpumask);
-
-static void enable_lpa(void)
-{
-	unsigned long value;
-
-	value = __read_32bit_c0_register($16, 3);
-	value |= 0x00000080;
-	__write_32bit_c0_register($16, 3, value);
-	value = __read_32bit_c0_register($16, 3);
-	pr_info("CP0_Config3: CP0 16.3 (0x%lx)\n", value);
-
-	value = __read_32bit_c0_register($5, 1);
-	value |= 0x20000000;
-	__write_32bit_c0_register($5, 1, value);
-	value = __read_32bit_c0_register($5, 1);
-	pr_info("CP0_PageGrain: CP0 5.1 (0x%lx)\n", value);
-}
-
-static void cpu_node_probe(void)
-{
-	int i;
-
-	nodes_clear(node_possible_map);
-	nodes_clear(node_online_map);
-	for (i = 0; i < loongson_sysconf.nr_nodes; i++) {
-		node_set_state(num_online_nodes(), N_POSSIBLE);
-		node_set_online(num_online_nodes());
-	}
-
-	pr_info("NUMA: Discovered %d cpus on %d nodes\n",
-		loongson_sysconf.nr_cpus, num_online_nodes());
-}
-
-static int __init compute_node_distance(int row, int col)
-{
-	int package_row = row * loongson_sysconf.cores_per_node /
-				loongson_sysconf.cores_per_package;
-	int package_col = col * loongson_sysconf.cores_per_node /
-				loongson_sysconf.cores_per_package;
-
-	if (col == row)
-		return 0;
-	else if (package_row == package_col)
-		return 40;
-	else
-		return 100;
-}
-
-static void __init init_topology_matrix(void)
-{
-	int row, col;
-
-	for (row = 0; row < MAX_NUMNODES; row++)
-		for (col = 0; col < MAX_NUMNODES; col++)
-			__node_distances[row][col] = -1;
-
-	for_each_online_node(row) {
-		for_each_online_node(col) {
-			__node_distances[row][col] =
-				compute_node_distance(row, col);
-		}
-	}
-}
-
-static unsigned long nid_to_addroffset(unsigned int nid)
-{
-	unsigned long result;
-	switch (nid) {
-	case 0:
-	default:
-		result = NODE0_ADDRSPACE_OFFSET;
-		break;
-	case 1:
-		result = NODE1_ADDRSPACE_OFFSET;
-		break;
-	case 2:
-		result = NODE2_ADDRSPACE_OFFSET;
-		break;
-	case 3:
-		result = NODE3_ADDRSPACE_OFFSET;
-		break;
-	}
-	return result;
-}
-
-static void __init szmem(unsigned int node)
-{
-	u32 i, mem_type;
-	static unsigned long num_physpages = 0;
-	u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size;
-
-	/* Parse memory information and activate */
-	for (i = 0; i < loongson_memmap->nr_map; i++) {
-		node_id = loongson_memmap->map[i].node_id;
-		if (node_id != node)
-			continue;
-
-		mem_type = loongson_memmap->map[i].mem_type;
-		mem_size = loongson_memmap->map[i].mem_size;
-		mem_start = loongson_memmap->map[i].mem_start;
-
-		switch (mem_type) {
-		case SYSTEM_RAM_LOW:
-			start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT;
-			node_psize = (mem_size << 20) >> PAGE_SHIFT;
-			end_pfn  = start_pfn + node_psize;
-			num_physpages += node_psize;
-			pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
-				(u32)node_id, mem_type, mem_start, mem_size);
-			pr_info("       start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
-				start_pfn, end_pfn, num_physpages);
-			memblock_add_node(PFN_PHYS(start_pfn),
-				PFN_PHYS(end_pfn - start_pfn), node);
-			break;
-		case SYSTEM_RAM_HIGH:
-			start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT;
-			node_psize = (mem_size << 20) >> PAGE_SHIFT;
-			end_pfn  = start_pfn + node_psize;
-			num_physpages += node_psize;
-			pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
-				(u32)node_id, mem_type, mem_start, mem_size);
-			pr_info("       start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
-				start_pfn, end_pfn, num_physpages);
-			memblock_add_node(PFN_PHYS(start_pfn),
-				PFN_PHYS(end_pfn - start_pfn), node);
-			break;
-		case SYSTEM_RAM_RESERVED:
-			pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
-				(u32)node_id, mem_type, mem_start, mem_size);
-			memblock_reserve(((node_id << 44) + mem_start),
-				mem_size << 20);
-			break;
-		}
-	}
-}
-
-static void __init node_mem_init(unsigned int node)
-{
-	unsigned long node_addrspace_offset;
-	unsigned long start_pfn, end_pfn;
-
-	node_addrspace_offset = nid_to_addroffset(node);
-	pr_info("Node%d's addrspace_offset is 0x%lx\n",
-			node, node_addrspace_offset);
-
-	get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
-	pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n",
-		node, start_pfn, end_pfn);
-
-	__node_data[node] = prealloc__node_data + node;
-
-	NODE_DATA(node)->node_start_pfn = start_pfn;
-	NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
-
-	if (node == 0) {
-		/* kernel end address */
-		unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end));
-
-		/* used by finalize_initrd() */
-		max_low_pfn = end_pfn;
-
-		/* Reserve the kernel text/data/bss */
-		memblock_reserve(start_pfn << PAGE_SHIFT,
-				 ((kernel_end_pfn - start_pfn) << PAGE_SHIFT));
-
-		/* Reserve 0xfe000000~0xffffffff for RS780E integrated GPU */
-		if (node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT))
-			memblock_reserve((node_addrspace_offset | 0xfe000000),
-					 32 << 20);
-	}
-}
-
-static __init void prom_meminit(void)
-{
-	unsigned int node, cpu, active_cpu = 0;
-
-	cpu_node_probe();
-	init_topology_matrix();
-
-	for (node = 0; node < loongson_sysconf.nr_nodes; node++) {
-		if (node_online(node)) {
-			szmem(node);
-			node_mem_init(node);
-			cpumask_clear(&__node_cpumask[node]);
-		}
-	}
-	memblocks_present();
-	max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
-
-	for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) {
-		node = cpu / loongson_sysconf.cores_per_node;
-		if (node >= num_online_nodes())
-			node = 0;
-
-		if (loongson_sysconf.reserved_cpus_mask & (1<<cpu))
-			continue;
-
-		cpumask_set_cpu(active_cpu, &__node_cpumask[node]);
-		pr_info("NUMA: set cpumask cpu %d on node %d\n", active_cpu, node);
-
-		active_cpu++;
-	}
-}
-
-void __init paging_init(void)
-{
-	unsigned long zones_size[MAX_NR_ZONES] = {0, };
-
-	pagetable_init();
-#ifdef CONFIG_ZONE_DMA32
-	zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
-#endif
-	zones_size[ZONE_NORMAL] = max_low_pfn;
-	free_area_init_nodes(zones_size);
-}
-
-void __init mem_init(void)
-{
-	high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
-	memblock_free_all();
-	setup_zero_pages();	/* This comes from node 0 */
-	mem_init_print_info(NULL);
-}
-
-/* All PCI device belongs to logical Node-0 */
-int pcibus_to_node(struct pci_bus *bus)
-{
-	return 0;
-}
-EXPORT_SYMBOL(pcibus_to_node);
-
-void __init prom_init_numa_memory(void)
-{
-	enable_lpa();
-	prom_meminit();
-}
-EXPORT_SYMBOL(prom_init_numa_memory);
diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c
deleted file mode 100644
index 13f3404f0030..000000000000
--- a/arch/mips/loongson64/loongson-3/platform.c
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- *         Xiang Yu, xiangy@lemote.com
- *         Chen Huacai, chenhc@lemote.com
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/platform_device.h>
-#include <asm/bootinfo.h>
-#include <boot_param.h>
-#include <loongson_hwmon.h>
-#include <workarounds.h>
-
-static int __init loongson3_platform_init(void)
-{
-	int i;
-	struct platform_device *pdev;
-
-	if (loongson_sysconf.ecname[0] != '\0')
-		platform_device_register_simple(loongson_sysconf.ecname, -1, NULL, 0);
-
-	for (i = 0; i < loongson_sysconf.nr_sensors; i++) {
-		if (loongson_sysconf.sensors[i].type > SENSOR_FAN)
-			continue;
-
-		pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL);
-		pdev->name = loongson_sysconf.sensors[i].name;
-		pdev->id = loongson_sysconf.sensors[i].id;
-		pdev->dev.platform_data = &loongson_sysconf.sensors[i];
-		platform_device_register(pdev);
-	}
-
-	return 0;
-}
-
-arch_initcall(loongson3_platform_init);
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
deleted file mode 100644
index de8e0741ce2d..000000000000
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ /dev/null
@@ -1,813 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2010, 2011, 2012, Lemote, Inc.
- * Author: Chen Huacai, chenhc@lemote.com
- */
-
-#include <linux/init.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/sched/hotplug.h>
-#include <linux/sched/task_stack.h>
-#include <linux/smp.h>
-#include <linux/cpufreq.h>
-#include <linux/kexec.h>
-#include <asm/processor.h>
-#include <asm/time.h>
-#include <asm/clock.h>
-#include <asm/tlbflush.h>
-#include <asm/cacheflush.h>
-#include <loongson.h>
-#include <loongson_regs.h>
-#include <workarounds.h>
-
-#include "smp.h"
-
-DEFINE_PER_CPU(int, cpu_state);
-
-static void *ipi_set0_regs[16];
-static void *ipi_clear0_regs[16];
-static void *ipi_status0_regs[16];
-static void *ipi_en0_regs[16];
-static void *ipi_mailbox_buf[16];
-static uint32_t core0_c0count[NR_CPUS];
-
-/* read a 32bit value from ipi register */
-#define loongson3_ipi_read32(addr) readl(addr)
-/* read a 64bit value from ipi register */
-#define loongson3_ipi_read64(addr) readq(addr)
-/* write a 32bit value to ipi register */
-#define loongson3_ipi_write32(action, addr)	\
-	do {					\
-		writel(action, addr);		\
-		__wbflush();			\
-	} while (0)
-/* write a 64bit value to ipi register */
-#define loongson3_ipi_write64(action, addr)	\
-	do {					\
-		writeq(action, addr);		\
-		__wbflush();			\
-	} while (0)
-
-u32 (*ipi_read_clear)(int cpu);
-void (*ipi_write_action)(int cpu, u32 action);
-
-static u32 csr_ipi_read_clear(int cpu)
-{
-	u32 action;
-
-	/* Load the ipi register to figure out what we're supposed to do */
-	action = csr_readl(LOONGSON_CSR_IPI_STATUS);
-	/* Clear the ipi register to clear the interrupt */
-	csr_writel(action, LOONGSON_CSR_IPI_CLEAR);
-
-	return action;
-}
-
-static void csr_ipi_write_action(int cpu, u32 action)
-{
-	unsigned int irq = 0;
-
-	while ((irq = ffs(action))) {
-		uint32_t val = CSR_IPI_SEND_BLOCK;
-		val |= (irq - 1);
-		val |= (cpu << CSR_IPI_SEND_CPU_SHIFT);
-		csr_writel(val, LOONGSON_CSR_IPI_SEND);
-		action &= ~BIT(irq - 1);
-	}
-}
-
-static u32 legacy_ipi_read_clear(int cpu)
-{
-	u32 action;
-
-	/* Load the ipi register to figure out what we're supposed to do */
-	action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
-	/* Clear the ipi register to clear the interrupt */
-	loongson3_ipi_write32(action, ipi_clear0_regs[cpu_logical_map(cpu)]);
-
-	return action;
-}
-
-static void legacy_ipi_write_action(int cpu, u32 action)
-{
-	loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu]);
-}
-
-static void csr_ipi_probe(void)
-{
-	if (cpu_has_csr() && csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_IPI) {
-		ipi_read_clear = csr_ipi_read_clear;
-		ipi_write_action = csr_ipi_write_action;
-	} else {
-		ipi_read_clear = legacy_ipi_read_clear;
-		ipi_write_action = legacy_ipi_write_action;
-	}
-}
-
-static void ipi_set0_regs_init(void)
-{
-	ipi_set0_regs[0] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + SET0);
-	ipi_set0_regs[1] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + SET0);
-	ipi_set0_regs[2] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + SET0);
-	ipi_set0_regs[3] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + SET0);
-	ipi_set0_regs[4] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + SET0);
-	ipi_set0_regs[5] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + SET0);
-	ipi_set0_regs[6] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + SET0);
-	ipi_set0_regs[7] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + SET0);
-	ipi_set0_regs[8] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + SET0);
-	ipi_set0_regs[9] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + SET0);
-	ipi_set0_regs[10] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + SET0);
-	ipi_set0_regs[11] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + SET0);
-	ipi_set0_regs[12] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + SET0);
-	ipi_set0_regs[13] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + SET0);
-	ipi_set0_regs[14] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + SET0);
-	ipi_set0_regs[15] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + SET0);
-}
-
-static void ipi_clear0_regs_init(void)
-{
-	ipi_clear0_regs[0] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + CLEAR0);
-	ipi_clear0_regs[1] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + CLEAR0);
-	ipi_clear0_regs[2] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + CLEAR0);
-	ipi_clear0_regs[3] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + CLEAR0);
-	ipi_clear0_regs[4] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + CLEAR0);
-	ipi_clear0_regs[5] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + CLEAR0);
-	ipi_clear0_regs[6] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + CLEAR0);
-	ipi_clear0_regs[7] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + CLEAR0);
-	ipi_clear0_regs[8] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + CLEAR0);
-	ipi_clear0_regs[9] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + CLEAR0);
-	ipi_clear0_regs[10] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + CLEAR0);
-	ipi_clear0_regs[11] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + CLEAR0);
-	ipi_clear0_regs[12] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + CLEAR0);
-	ipi_clear0_regs[13] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + CLEAR0);
-	ipi_clear0_regs[14] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + CLEAR0);
-	ipi_clear0_regs[15] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + CLEAR0);
-}
-
-static void ipi_status0_regs_init(void)
-{
-	ipi_status0_regs[0] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + STATUS0);
-	ipi_status0_regs[1] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + STATUS0);
-	ipi_status0_regs[2] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + STATUS0);
-	ipi_status0_regs[3] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + STATUS0);
-	ipi_status0_regs[4] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + STATUS0);
-	ipi_status0_regs[5] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + STATUS0);
-	ipi_status0_regs[6] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + STATUS0);
-	ipi_status0_regs[7] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + STATUS0);
-	ipi_status0_regs[8] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + STATUS0);
-	ipi_status0_regs[9] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + STATUS0);
-	ipi_status0_regs[10] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + STATUS0);
-	ipi_status0_regs[11] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + STATUS0);
-	ipi_status0_regs[12] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + STATUS0);
-	ipi_status0_regs[13] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + STATUS0);
-	ipi_status0_regs[14] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + STATUS0);
-	ipi_status0_regs[15] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + STATUS0);
-}
-
-static void ipi_en0_regs_init(void)
-{
-	ipi_en0_regs[0] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + EN0);
-	ipi_en0_regs[1] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + EN0);
-	ipi_en0_regs[2] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + EN0);
-	ipi_en0_regs[3] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + EN0);
-	ipi_en0_regs[4] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + EN0);
-	ipi_en0_regs[5] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + EN0);
-	ipi_en0_regs[6] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + EN0);
-	ipi_en0_regs[7] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + EN0);
-	ipi_en0_regs[8] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + EN0);
-	ipi_en0_regs[9] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + EN0);
-	ipi_en0_regs[10] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + EN0);
-	ipi_en0_regs[11] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + EN0);
-	ipi_en0_regs[12] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + EN0);
-	ipi_en0_regs[13] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + EN0);
-	ipi_en0_regs[14] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + EN0);
-	ipi_en0_regs[15] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + EN0);
-}
-
-static void ipi_mailbox_buf_init(void)
-{
-	ipi_mailbox_buf[0] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + BUF);
-	ipi_mailbox_buf[1] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + BUF);
-	ipi_mailbox_buf[2] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + BUF);
-	ipi_mailbox_buf[3] = (void *)
-		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + BUF);
-	ipi_mailbox_buf[4] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + BUF);
-	ipi_mailbox_buf[5] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + BUF);
-	ipi_mailbox_buf[6] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + BUF);
-	ipi_mailbox_buf[7] = (void *)
-		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + BUF);
-	ipi_mailbox_buf[8] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + BUF);
-	ipi_mailbox_buf[9] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + BUF);
-	ipi_mailbox_buf[10] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + BUF);
-	ipi_mailbox_buf[11] = (void *)
-		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + BUF);
-	ipi_mailbox_buf[12] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + BUF);
-	ipi_mailbox_buf[13] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + BUF);
-	ipi_mailbox_buf[14] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + BUF);
-	ipi_mailbox_buf[15] = (void *)
-		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + BUF);
-}
-
-/*
- * Simple enough, just poke the appropriate ipi register
- */
-static void loongson3_send_ipi_single(int cpu, unsigned int action)
-{
-	ipi_write_action(cpu_logical_map(cpu), (u32)action);
-}
-
-static void
-loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
-{
-	unsigned int i;
-
-	for_each_cpu(i, mask)
-		ipi_write_action(cpu_logical_map(i), (u32)action);
-}
-
-#define IPI_IRQ_OFFSET 6
-
-void loongson3_send_irq_by_ipi(int cpu, int irqs)
-{
-	ipi_write_action(cpu_logical_map(cpu), irqs << IPI_IRQ_OFFSET);
-}
-
-void loongson3_ipi_interrupt(struct pt_regs *regs)
-{
-	int i, cpu = smp_processor_id();
-	unsigned int action, c0count, irqs;
-
-	action = ipi_read_clear(cpu);
-	irqs = action >> IPI_IRQ_OFFSET;
-
-	if (action & SMP_RESCHEDULE_YOURSELF)
-		scheduler_ipi();
-
-	if (action & SMP_CALL_FUNCTION) {
-		irq_enter();
-		generic_smp_call_function_interrupt();
-		irq_exit();
-	}
-
-	if (action & SMP_ASK_C0COUNT) {
-		BUG_ON(cpu != 0);
-		c0count = read_c0_count();
-		c0count = c0count ? c0count : 1;
-		for (i = 1; i < nr_cpu_ids; i++)
-			core0_c0count[i] = c0count;
-		__wbflush(); /* Let others see the result ASAP */
-	}
-
-	if (irqs) {
-		int irq;
-		while ((irq = ffs(irqs))) {
-			do_IRQ(irq-1);
-			irqs &= ~(1<<(irq-1));
-		}
-	}
-}
-
-#define MAX_LOOPS 800
-/*
- * SMP init and finish on secondary CPUs
- */
-static void loongson3_init_secondary(void)
-{
-	int i;
-	uint32_t initcount;
-	unsigned int cpu = smp_processor_id();
-	unsigned int imask = STATUSF_IP7 | STATUSF_IP6 |
-			     STATUSF_IP3 | STATUSF_IP2;
-
-	/* Set interrupt mask, but don't enable */
-	change_c0_status(ST0_IM, imask);
-
-	for (i = 0; i < num_possible_cpus(); i++)
-		loongson3_ipi_write32(0xffffffff, ipi_en0_regs[cpu_logical_map(i)]);
-
-	per_cpu(cpu_state, cpu) = CPU_ONLINE;
-	cpu_set_core(&cpu_data[cpu],
-		     cpu_logical_map(cpu) % loongson_sysconf.cores_per_package);
-	cpu_data[cpu].package =
-		cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
-
-	i = 0;
-	core0_c0count[cpu] = 0;
-	loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
-	while (!core0_c0count[cpu]) {
-		i++;
-		cpu_relax();
-	}
-
-	if (i > MAX_LOOPS)
-		i = MAX_LOOPS;
-	if (cpu_data[cpu].package)
-		initcount = core0_c0count[cpu] + i;
-	else /* Local access is faster for loops */
-		initcount = core0_c0count[cpu] + i/2;
-
-	write_c0_count(initcount);
-}
-
-static void loongson3_smp_finish(void)
-{
-	int cpu = smp_processor_id();
-
-	write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
-	local_irq_enable();
-	loongson3_ipi_write64(0,
-			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0);
-	pr_info("CPU#%d finished, CP0_ST=%x\n",
-			smp_processor_id(), read_c0_status());
-}
-
-static void __init loongson3_smp_setup(void)
-{
-	int i = 0, num = 0; /* i: physical id, num: logical id */
-
-	init_cpu_possible(cpu_none_mask);
-
-	/* For unified kernel, NR_CPUS is the maximum possible value,
-	 * loongson_sysconf.nr_cpus is the really present value */
-	while (i < loongson_sysconf.nr_cpus) {
-		if (loongson_sysconf.reserved_cpus_mask & (1<<i)) {
-			/* Reserved physical CPU cores */
-			__cpu_number_map[i] = -1;
-		} else {
-			__cpu_number_map[i] = num;
-			__cpu_logical_map[num] = i;
-			set_cpu_possible(num, true);
-			num++;
-		}
-		i++;
-	}
-	pr_info("Detected %i available CPU(s)\n", num);
-
-	while (num < loongson_sysconf.nr_cpus) {
-		__cpu_logical_map[num] = -1;
-		num++;
-	}
-
-	csr_ipi_probe();
-	ipi_set0_regs_init();
-	ipi_clear0_regs_init();
-	ipi_status0_regs_init();
-	ipi_en0_regs_init();
-	ipi_mailbox_buf_init();
-	cpu_set_core(&cpu_data[0],
-		     cpu_logical_map(0) % loongson_sysconf.cores_per_package);
-	cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
-}
-
-static void __init loongson3_prepare_cpus(unsigned int max_cpus)
-{
-	init_cpu_present(cpu_possible_mask);
-	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
-}
-
-/*
- * Setup the PC, SP, and GP of a secondary processor and start it runing!
- */
-static int loongson3_boot_secondary(int cpu, struct task_struct *idle)
-{
-	unsigned long startargs[4];
-
-	pr_info("Booting CPU#%d...\n", cpu);
-
-	/* startargs[] are initial PC, SP and GP for secondary CPU */
-	startargs[0] = (unsigned long)&smp_bootstrap;
-	startargs[1] = (unsigned long)__KSTK_TOS(idle);
-	startargs[2] = (unsigned long)task_thread_info(idle);
-	startargs[3] = 0;
-
-	pr_debug("CPU#%d, func_pc=%lx, sp=%lx, gp=%lx\n",
-			cpu, startargs[0], startargs[1], startargs[2]);
-
-	loongson3_ipi_write64(startargs[3],
-			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x18);
-	loongson3_ipi_write64(startargs[2],
-			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x10);
-	loongson3_ipi_write64(startargs[1],
-			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x8);
-	loongson3_ipi_write64(startargs[0],
-			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0);
-	return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-static int loongson3_cpu_disable(void)
-{
-	unsigned long flags;
-	unsigned int cpu = smp_processor_id();
-
-	if (cpu == 0)
-		return -EBUSY;
-
-	set_cpu_online(cpu, false);
-	calculate_cpu_foreign_map();
-	local_irq_save(flags);
-	fixup_irqs();
-	local_irq_restore(flags);
-	local_flush_tlb_all();
-
-	return 0;
-}
-
-
-static void loongson3_cpu_die(unsigned int cpu)
-{
-	while (per_cpu(cpu_state, cpu) != CPU_DEAD)
-		cpu_relax();
-
-	mb();
-}
-
-/* To shutdown a core in Loongson 3, the target core should go to CKSEG1 and
- * flush all L1 entries at first. Then, another core (usually Core 0) can
- * safely disable the clock of the target core. loongson3_play_dead() is
- * called via CKSEG1 (uncached and unmmaped) */
-static void loongson3_type1_play_dead(int *state_addr)
-{
-	register int val;
-	register long cpuid, core, node, count;
-	register void *addr, *base, *initfunc;
-
-	__asm__ __volatile__(
-		"   .set push                     \n"
-		"   .set noreorder                \n"
-		"   li %[addr], 0x80000000        \n" /* KSEG0 */
-		"1: cache 0, 0(%[addr])           \n" /* flush L1 ICache */
-		"   cache 0, 1(%[addr])           \n"
-		"   cache 0, 2(%[addr])           \n"
-		"   cache 0, 3(%[addr])           \n"
-		"   cache 1, 0(%[addr])           \n" /* flush L1 DCache */
-		"   cache 1, 1(%[addr])           \n"
-		"   cache 1, 2(%[addr])           \n"
-		"   cache 1, 3(%[addr])           \n"
-		"   addiu %[sets], %[sets], -1    \n"
-		"   bnez  %[sets], 1b             \n"
-		"   addiu %[addr], %[addr], 0x20  \n"
-		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
-		"   sw    %[val], (%[state_addr]) \n"
-		"   sync                          \n"
-		"   cache 21, (%[state_addr])     \n" /* flush entry of *state_addr */
-		"   .set pop                      \n"
-		: [addr] "=&r" (addr), [val] "=&r" (val)
-		: [state_addr] "r" (state_addr),
-		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
-
-	__asm__ __volatile__(
-		"   .set push                         \n"
-		"   .set noreorder                    \n"
-		"   .set mips64                       \n"
-		"   mfc0  %[cpuid], $15, 1            \n"
-		"   andi  %[cpuid], 0x3ff             \n"
-		"   dli   %[base], 0x900000003ff01000 \n"
-		"   andi  %[core], %[cpuid], 0x3      \n"
-		"   sll   %[core], 8                  \n" /* get core id */
-		"   or    %[base], %[base], %[core]   \n"
-		"   andi  %[node], %[cpuid], 0xc      \n"
-		"   dsll  %[node], 42                 \n" /* get node id */
-		"   or    %[base], %[base], %[node]   \n"
-		"1: li    %[count], 0x100             \n" /* wait for init loop */
-		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
-		"   addiu %[count], -1                \n"
-		"   ld    %[initfunc], 0x20(%[base])  \n" /* get PC via mailbox */
-		"   beqz  %[initfunc], 1b             \n"
-		"   nop                               \n"
-		"   ld    $sp, 0x28(%[base])          \n" /* get SP via mailbox */
-		"   ld    $gp, 0x30(%[base])          \n" /* get GP via mailbox */
-		"   ld    $a1, 0x38(%[base])          \n"
-		"   jr    %[initfunc]                 \n" /* jump to initial PC */
-		"   nop                               \n"
-		"   .set pop                          \n"
-		: [core] "=&r" (core), [node] "=&r" (node),
-		  [base] "=&r" (base), [cpuid] "=&r" (cpuid),
-		  [count] "=&r" (count), [initfunc] "=&r" (initfunc)
-		: /* No Input */
-		: "a1");
-}
-
-static void loongson3_type2_play_dead(int *state_addr)
-{
-	register int val;
-	register long cpuid, core, node, count;
-	register void *addr, *base, *initfunc;
-
-	__asm__ __volatile__(
-		"   .set push                     \n"
-		"   .set noreorder                \n"
-		"   li %[addr], 0x80000000        \n" /* KSEG0 */
-		"1: cache 0, 0(%[addr])           \n" /* flush L1 ICache */
-		"   cache 0, 1(%[addr])           \n"
-		"   cache 0, 2(%[addr])           \n"
-		"   cache 0, 3(%[addr])           \n"
-		"   cache 1, 0(%[addr])           \n" /* flush L1 DCache */
-		"   cache 1, 1(%[addr])           \n"
-		"   cache 1, 2(%[addr])           \n"
-		"   cache 1, 3(%[addr])           \n"
-		"   addiu %[sets], %[sets], -1    \n"
-		"   bnez  %[sets], 1b             \n"
-		"   addiu %[addr], %[addr], 0x20  \n"
-		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
-		"   sw    %[val], (%[state_addr]) \n"
-		"   sync                          \n"
-		"   cache 21, (%[state_addr])     \n" /* flush entry of *state_addr */
-		"   .set pop                      \n"
-		: [addr] "=&r" (addr), [val] "=&r" (val)
-		: [state_addr] "r" (state_addr),
-		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
-
-	__asm__ __volatile__(
-		"   .set push                         \n"
-		"   .set noreorder                    \n"
-		"   .set mips64                       \n"
-		"   mfc0  %[cpuid], $15, 1            \n"
-		"   andi  %[cpuid], 0x3ff             \n"
-		"   dli   %[base], 0x900000003ff01000 \n"
-		"   andi  %[core], %[cpuid], 0x3      \n"
-		"   sll   %[core], 8                  \n" /* get core id */
-		"   or    %[base], %[base], %[core]   \n"
-		"   andi  %[node], %[cpuid], 0xc      \n"
-		"   dsll  %[node], 42                 \n" /* get node id */
-		"   or    %[base], %[base], %[node]   \n"
-		"   dsrl  %[node], 30                 \n" /* 15:14 */
-		"   or    %[base], %[base], %[node]   \n"
-		"1: li    %[count], 0x100             \n" /* wait for init loop */
-		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
-		"   addiu %[count], -1                \n"
-		"   ld    %[initfunc], 0x20(%[base])  \n" /* get PC via mailbox */
-		"   beqz  %[initfunc], 1b             \n"
-		"   nop                               \n"
-		"   ld    $sp, 0x28(%[base])          \n" /* get SP via mailbox */
-		"   ld    $gp, 0x30(%[base])          \n" /* get GP via mailbox */
-		"   ld    $a1, 0x38(%[base])          \n"
-		"   jr    %[initfunc]                 \n" /* jump to initial PC */
-		"   nop                               \n"
-		"   .set pop                          \n"
-		: [core] "=&r" (core), [node] "=&r" (node),
-		  [base] "=&r" (base), [cpuid] "=&r" (cpuid),
-		  [count] "=&r" (count), [initfunc] "=&r" (initfunc)
-		: /* No Input */
-		: "a1");
-}
-
-static void loongson3_type3_play_dead(int *state_addr)
-{
-	register int val;
-	register long cpuid, core, node, count;
-	register void *addr, *base, *initfunc;
-
-	__asm__ __volatile__(
-		"   .set push                     \n"
-		"   .set noreorder                \n"
-		"   li %[addr], 0x80000000        \n" /* KSEG0 */
-		"1: cache 0, 0(%[addr])           \n" /* flush L1 ICache */
-		"   cache 0, 1(%[addr])           \n"
-		"   cache 0, 2(%[addr])           \n"
-		"   cache 0, 3(%[addr])           \n"
-		"   cache 1, 0(%[addr])           \n" /* flush L1 DCache */
-		"   cache 1, 1(%[addr])           \n"
-		"   cache 1, 2(%[addr])           \n"
-		"   cache 1, 3(%[addr])           \n"
-		"   addiu %[sets], %[sets], -1    \n"
-		"   bnez  %[sets], 1b             \n"
-		"   addiu %[addr], %[addr], 0x40  \n"
-		"   li %[addr], 0x80000000        \n" /* KSEG0 */
-		"2: cache 2, 0(%[addr])           \n" /* flush L1 VCache */
-		"   cache 2, 1(%[addr])           \n"
-		"   cache 2, 2(%[addr])           \n"
-		"   cache 2, 3(%[addr])           \n"
-		"   cache 2, 4(%[addr])           \n"
-		"   cache 2, 5(%[addr])           \n"
-		"   cache 2, 6(%[addr])           \n"
-		"   cache 2, 7(%[addr])           \n"
-		"   cache 2, 8(%[addr])           \n"
-		"   cache 2, 9(%[addr])           \n"
-		"   cache 2, 10(%[addr])          \n"
-		"   cache 2, 11(%[addr])          \n"
-		"   cache 2, 12(%[addr])          \n"
-		"   cache 2, 13(%[addr])          \n"
-		"   cache 2, 14(%[addr])          \n"
-		"   cache 2, 15(%[addr])          \n"
-		"   addiu %[vsets], %[vsets], -1  \n"
-		"   bnez  %[vsets], 2b            \n"
-		"   addiu %[addr], %[addr], 0x40  \n"
-		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
-		"   sw    %[val], (%[state_addr]) \n"
-		"   sync                          \n"
-		"   cache 21, (%[state_addr])     \n" /* flush entry of *state_addr */
-		"   .set pop                      \n"
-		: [addr] "=&r" (addr), [val] "=&r" (val)
-		: [state_addr] "r" (state_addr),
-		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
-		  [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
-
-	__asm__ __volatile__(
-		"   .set push                         \n"
-		"   .set noreorder                    \n"
-		"   .set mips64                       \n"
-		"   mfc0  %[cpuid], $15, 1            \n"
-		"   andi  %[cpuid], 0x3ff             \n"
-		"   dli   %[base], 0x900000003ff01000 \n"
-		"   andi  %[core], %[cpuid], 0x3      \n"
-		"   sll   %[core], 8                  \n" /* get core id */
-		"   or    %[base], %[base], %[core]   \n"
-		"   andi  %[node], %[cpuid], 0xc      \n"
-		"   dsll  %[node], 42                 \n" /* get node id */
-		"   or    %[base], %[base], %[node]   \n"
-		"1: li    %[count], 0x100             \n" /* wait for init loop */
-		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
-		"   addiu %[count], -1                \n"
-		"   ld    %[initfunc], 0x20(%[base])  \n" /* get PC via mailbox */
-		"   beqz  %[initfunc], 1b             \n"
-		"   nop                               \n"
-		"   ld    $sp, 0x28(%[base])          \n" /* get SP via mailbox */
-		"   ld    $gp, 0x30(%[base])          \n" /* get GP via mailbox */
-		"   ld    $a1, 0x38(%[base])          \n"
-		"   jr    %[initfunc]                 \n" /* jump to initial PC */
-		"   nop                               \n"
-		"   .set pop                          \n"
-		: [core] "=&r" (core), [node] "=&r" (node),
-		  [base] "=&r" (base), [cpuid] "=&r" (cpuid),
-		  [count] "=&r" (count), [initfunc] "=&r" (initfunc)
-		: /* No Input */
-		: "a1");
-}
-
-void play_dead(void)
-{
-	int prid_imp, prid_rev, *state_addr;
-	unsigned int cpu = smp_processor_id();
-	void (*play_dead_at_ckseg1)(int *);
-
-	idle_task_exit();
-
-	prid_imp = read_c0_prid() & PRID_IMP_MASK;
-	prid_rev = read_c0_prid() & PRID_REV_MASK;
-
-	if (prid_imp == PRID_IMP_LOONGSON_64G) {
-		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
-		goto out;
-	}
-
-	switch (prid_rev) {
-	case PRID_REV_LOONGSON3A_R1:
-	default:
-		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3_type1_play_dead);
-		break;
-	case PRID_REV_LOONGSON3B_R1:
-	case PRID_REV_LOONGSON3B_R2:
-		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3_type2_play_dead);
-		break;
-	case PRID_REV_LOONGSON3A_R2_0:
-	case PRID_REV_LOONGSON3A_R2_1:
-	case PRID_REV_LOONGSON3A_R3_0:
-	case PRID_REV_LOONGSON3A_R3_1:
-		play_dead_at_ckseg1 =
-			(void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
-		break;
-	}
-
-out:
-	state_addr = &per_cpu(cpu_state, cpu);
-	mb();
-	play_dead_at_ckseg1(state_addr);
-}
-
-static int loongson3_disable_clock(unsigned int cpu)
-{
-	uint64_t core_id = cpu_core(&cpu_data[cpu]);
-	uint64_t package_id = cpu_data[cpu].package;
-
-	if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1) {
-		LOONGSON_CHIPCFG(package_id) &= ~(1 << (12 + core_id));
-	} else {
-		if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
-			LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3));
-	}
-	return 0;
-}
-
-static int loongson3_enable_clock(unsigned int cpu)
-{
-	uint64_t core_id = cpu_core(&cpu_data[cpu]);
-	uint64_t package_id = cpu_data[cpu].package;
-
-	if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1) {
-		LOONGSON_CHIPCFG(package_id) |= 1 << (12 + core_id);
-	} else {
-		if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
-			LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3);
-	}
-	return 0;
-}
-
-static int register_loongson3_notifier(void)
-{
-	return cpuhp_setup_state_nocalls(CPUHP_MIPS_SOC_PREPARE,
-					 "mips/loongson:prepare",
-					 loongson3_enable_clock,
-					 loongson3_disable_clock);
-}
-early_initcall(register_loongson3_notifier);
-
-#endif
-
-const struct plat_smp_ops loongson3_smp_ops = {
-	.send_ipi_single = loongson3_send_ipi_single,
-	.send_ipi_mask = loongson3_send_ipi_mask,
-	.init_secondary = loongson3_init_secondary,
-	.smp_finish = loongson3_smp_finish,
-	.boot_secondary = loongson3_boot_secondary,
-	.smp_setup = loongson3_smp_setup,
-	.prepare_cpus = loongson3_prepare_cpus,
-#ifdef CONFIG_HOTPLUG_CPU
-	.cpu_disable = loongson3_cpu_disable,
-	.cpu_die = loongson3_cpu_die,
-#endif
-#ifdef CONFIG_KEXEC
-	.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
-#endif
-};
diff --git a/arch/mips/loongson64/loongson-3/smp.h b/arch/mips/loongson64/loongson-3/smp.h
deleted file mode 100644
index 957bde81e0e4..000000000000
--- a/arch/mips/loongson64/loongson-3/smp.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LOONGSON_SMP_H_
-#define __LOONGSON_SMP_H_
-
-/* for Loongson-3 smp support */
-extern unsigned long long smp_group[4];
-
-/* 4 groups(nodes) in maximum in numa case */
-#define SMP_CORE_GROUP0_BASE	(smp_group[0])
-#define SMP_CORE_GROUP1_BASE	(smp_group[1])
-#define SMP_CORE_GROUP2_BASE	(smp_group[2])
-#define SMP_CORE_GROUP3_BASE	(smp_group[3])
-
-/* 4 cores in each group(node) */
-#define SMP_CORE0_OFFSET  0x000
-#define SMP_CORE1_OFFSET  0x100
-#define SMP_CORE2_OFFSET  0x200
-#define SMP_CORE3_OFFSET  0x300
-
-/* ipi registers offsets */
-#define STATUS0  0x00
-#define EN0      0x04
-#define SET0     0x08
-#define CLEAR0   0x0c
-#define STATUS1  0x10
-#define MASK1    0x14
-#define SET1     0x18
-#define CLEAR1   0x1c
-#define BUF      0x20
-
-#endif
diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
new file mode 100644
index 000000000000..ef94a2278561
--- /dev/null
+++ b/arch/mips/loongson64/numa.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2010 Loongson Inc. & Lemote Inc. &
+ *                    Institute of Computing Technology
+ * Author:  Xiang Gao, gaoxiang@ict.ac.cn
+ *          Huacai Chen, chenhc@lemote.com
+ *          Xiaofu Meng, Shuangshuang Zhang
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/export.h>
+#include <linux/nodemask.h>
+#include <linux/swap.h>
+#include <linux/memblock.h>
+#include <linux/pfn.h>
+#include <linux/highmem.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
+#include <linux/irq.h>
+#include <asm/bootinfo.h>
+#include <asm/mc146818-time.h>
+#include <asm/time.h>
+#include <asm/wbflush.h>
+#include <boot_param.h>
+
+static struct pglist_data prealloc__node_data[MAX_NUMNODES];
+unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
+EXPORT_SYMBOL(__node_distances);
+struct pglist_data *__node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(__node_data);
+
+cpumask_t __node_cpumask[MAX_NUMNODES];
+EXPORT_SYMBOL(__node_cpumask);
+
+static void enable_lpa(void)
+{
+	unsigned long value;
+
+	value = __read_32bit_c0_register($16, 3);
+	value |= 0x00000080;
+	__write_32bit_c0_register($16, 3, value);
+	value = __read_32bit_c0_register($16, 3);
+	pr_info("CP0_Config3: CP0 16.3 (0x%lx)\n", value);
+
+	value = __read_32bit_c0_register($5, 1);
+	value |= 0x20000000;
+	__write_32bit_c0_register($5, 1, value);
+	value = __read_32bit_c0_register($5, 1);
+	pr_info("CP0_PageGrain: CP0 5.1 (0x%lx)\n", value);
+}
+
+static void cpu_node_probe(void)
+{
+	int i;
+
+	nodes_clear(node_possible_map);
+	nodes_clear(node_online_map);
+	for (i = 0; i < loongson_sysconf.nr_nodes; i++) {
+		node_set_state(num_online_nodes(), N_POSSIBLE);
+		node_set_online(num_online_nodes());
+	}
+
+	pr_info("NUMA: Discovered %d cpus on %d nodes\n",
+		loongson_sysconf.nr_cpus, num_online_nodes());
+}
+
+static int __init compute_node_distance(int row, int col)
+{
+	int package_row = row * loongson_sysconf.cores_per_node /
+				loongson_sysconf.cores_per_package;
+	int package_col = col * loongson_sysconf.cores_per_node /
+				loongson_sysconf.cores_per_package;
+
+	if (col == row)
+		return 0;
+	else if (package_row == package_col)
+		return 40;
+	else
+		return 100;
+}
+
+static void __init init_topology_matrix(void)
+{
+	int row, col;
+
+	for (row = 0; row < MAX_NUMNODES; row++)
+		for (col = 0; col < MAX_NUMNODES; col++)
+			__node_distances[row][col] = -1;
+
+	for_each_online_node(row) {
+		for_each_online_node(col) {
+			__node_distances[row][col] =
+				compute_node_distance(row, col);
+		}
+	}
+}
+
+static unsigned long nid_to_addroffset(unsigned int nid)
+{
+	unsigned long result;
+	switch (nid) {
+	case 0:
+	default:
+		result = NODE0_ADDRSPACE_OFFSET;
+		break;
+	case 1:
+		result = NODE1_ADDRSPACE_OFFSET;
+		break;
+	case 2:
+		result = NODE2_ADDRSPACE_OFFSET;
+		break;
+	case 3:
+		result = NODE3_ADDRSPACE_OFFSET;
+		break;
+	}
+	return result;
+}
+
+static void __init szmem(unsigned int node)
+{
+	u32 i, mem_type;
+	static unsigned long num_physpages = 0;
+	u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size;
+
+	/* Parse memory information and activate */
+	for (i = 0; i < loongson_memmap->nr_map; i++) {
+		node_id = loongson_memmap->map[i].node_id;
+		if (node_id != node)
+			continue;
+
+		mem_type = loongson_memmap->map[i].mem_type;
+		mem_size = loongson_memmap->map[i].mem_size;
+		mem_start = loongson_memmap->map[i].mem_start;
+
+		switch (mem_type) {
+		case SYSTEM_RAM_LOW:
+			start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT;
+			node_psize = (mem_size << 20) >> PAGE_SHIFT;
+			end_pfn  = start_pfn + node_psize;
+			num_physpages += node_psize;
+			pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
+				(u32)node_id, mem_type, mem_start, mem_size);
+			pr_info("       start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
+				start_pfn, end_pfn, num_physpages);
+			memblock_add_node(PFN_PHYS(start_pfn),
+				PFN_PHYS(end_pfn - start_pfn), node);
+			break;
+		case SYSTEM_RAM_HIGH:
+			start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT;
+			node_psize = (mem_size << 20) >> PAGE_SHIFT;
+			end_pfn  = start_pfn + node_psize;
+			num_physpages += node_psize;
+			pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
+				(u32)node_id, mem_type, mem_start, mem_size);
+			pr_info("       start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
+				start_pfn, end_pfn, num_physpages);
+			memblock_add_node(PFN_PHYS(start_pfn),
+				PFN_PHYS(end_pfn - start_pfn), node);
+			break;
+		case SYSTEM_RAM_RESERVED:
+			pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
+				(u32)node_id, mem_type, mem_start, mem_size);
+			memblock_reserve(((node_id << 44) + mem_start),
+				mem_size << 20);
+			break;
+		}
+	}
+}
+
+static void __init node_mem_init(unsigned int node)
+{
+	unsigned long node_addrspace_offset;
+	unsigned long start_pfn, end_pfn;
+
+	node_addrspace_offset = nid_to_addroffset(node);
+	pr_info("Node%d's addrspace_offset is 0x%lx\n",
+			node, node_addrspace_offset);
+
+	get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
+	pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n",
+		node, start_pfn, end_pfn);
+
+	__node_data[node] = prealloc__node_data + node;
+
+	NODE_DATA(node)->node_start_pfn = start_pfn;
+	NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
+
+	if (node == 0) {
+		/* kernel end address */
+		unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end));
+
+		/* used by finalize_initrd() */
+		max_low_pfn = end_pfn;
+
+		/* Reserve the kernel text/data/bss */
+		memblock_reserve(start_pfn << PAGE_SHIFT,
+				 ((kernel_end_pfn - start_pfn) << PAGE_SHIFT));
+
+		/* Reserve 0xfe000000~0xffffffff for RS780E integrated GPU */
+		if (node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT))
+			memblock_reserve((node_addrspace_offset | 0xfe000000),
+					 32 << 20);
+	}
+}
+
+static __init void prom_meminit(void)
+{
+	unsigned int node, cpu, active_cpu = 0;
+
+	cpu_node_probe();
+	init_topology_matrix();
+
+	for (node = 0; node < loongson_sysconf.nr_nodes; node++) {
+		if (node_online(node)) {
+			szmem(node);
+			node_mem_init(node);
+			cpumask_clear(&__node_cpumask[node]);
+		}
+	}
+	memblocks_present();
+	max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
+
+	for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) {
+		node = cpu / loongson_sysconf.cores_per_node;
+		if (node >= num_online_nodes())
+			node = 0;
+
+		if (loongson_sysconf.reserved_cpus_mask & (1<<cpu))
+			continue;
+
+		cpumask_set_cpu(active_cpu, &__node_cpumask[node]);
+		pr_info("NUMA: set cpumask cpu %d on node %d\n", active_cpu, node);
+
+		active_cpu++;
+	}
+}
+
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = {0, };
+
+	pagetable_init();
+#ifdef CONFIG_ZONE_DMA32
+	zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
+#endif
+	zones_size[ZONE_NORMAL] = max_low_pfn;
+	free_area_init_nodes(zones_size);
+}
+
+void __init mem_init(void)
+{
+	high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
+	memblock_free_all();
+	setup_zero_pages();	/* This comes from node 0 */
+	mem_init_print_info(NULL);
+}
+
+/* All PCI device belongs to logical Node-0 */
+int pcibus_to_node(struct pci_bus *bus)
+{
+	return 0;
+}
+EXPORT_SYMBOL(pcibus_to_node);
+
+void __init prom_init_numa_memory(void)
+{
+	enable_lpa();
+	prom_meminit();
+}
+EXPORT_SYMBOL(prom_init_numa_memory);
diff --git a/arch/mips/loongson64/pci.c b/arch/mips/loongson64/pci.c
new file mode 100644
index 000000000000..7bbe2388f38e
--- /dev/null
+++ b/arch/mips/loongson64/pci.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/pci.h>
+
+#include <pci.h>
+#include <loongson.h>
+#include <boot_param.h>
+
+static struct resource loongson_pci_mem_resource = {
+	.name	= "pci memory space",
+	.start	= LOONGSON_PCI_MEM_START,
+	.end	= LOONGSON_PCI_MEM_END,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct resource loongson_pci_io_resource = {
+	.name	= "pci io space",
+	.start	= LOONGSON_PCI_IO_START,
+	.end	= IO_SPACE_LIMIT,
+	.flags	= IORESOURCE_IO,
+};
+
+static struct pci_controller  loongson_pci_controller = {
+	.pci_ops	= &loongson_pci_ops,
+	.io_resource	= &loongson_pci_io_resource,
+	.mem_resource	= &loongson_pci_mem_resource,
+	.mem_offset	= 0x00000000UL,
+	.io_offset	= 0x00000000UL,
+};
+
+static void __init setup_pcimap(void)
+{
+	/*
+	 * local to PCI mapping for CPU accessing PCI space
+	 * CPU address space [256M,448M] is window for accessing pci space
+	 * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M]
+	 *
+	 * pcimap: PCI_MAP2  PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0
+	 *	     [<2G]   [384M,448M] [320M,384M] [0M,64M]
+	 */
+	LOONGSON_PCIMAP = LOONGSON_PCIMAP_PCIMAP_2 |
+		LOONGSON_PCIMAP_WIN(2, LOONGSON_PCILO2_BASE) |
+		LOONGSON_PCIMAP_WIN(1, LOONGSON_PCILO1_BASE) |
+		LOONGSON_PCIMAP_WIN(0, 0);
+
+	/*
+	 * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M]
+	 */
+	LOONGSON_PCIBASE0 = 0x80000000ul;   /* base: 2G -> mmap: 0M */
+	/* size: 256M, burst transmission, pre-fetch enable, 64bit */
+	LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul;
+	LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful;
+	LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */
+	LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul;
+	LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */
+	LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul;
+
+	/* avoid deadlock of PCI reading/writing lock operation */
+	LOONGSON_PCI_ISR4C = 0xd2000001ul;
+
+	/* can not change gnt to break pci transfer when device's gnt not
+	deassert for some broken device */
+	LOONGSON_PXARB_CFG = 0x00fe0105ul;
+
+#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
+	/*
+	 * set cpu addr window2 to map CPU address space to PCI address space
+	 */
+	LOONGSON_ADDRWIN_CPUTOPCI(ADDRWIN_WIN2, LOONGSON_CPU_MEM_SRC,
+		LOONGSON_PCI_MEM_DST, MMAP_CPUTOPCI_SIZE);
+#endif
+}
+
+extern int sbx00_acpi_init(void);
+
+static int __init pcibios_init(void)
+{
+	setup_pcimap();
+
+	loongson_pci_controller.io_map_base = mips_io_port_base;
+	loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
+	loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr;
+
+	register_pci_controller(&loongson_pci_controller);
+
+	sbx00_acpi_init();
+
+	return 0;
+}
+
+arch_initcall(pcibios_init);
diff --git a/arch/mips/loongson64/platform.c b/arch/mips/loongson64/platform.c
new file mode 100644
index 000000000000..13f3404f0030
--- /dev/null
+++ b/arch/mips/loongson64/platform.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ *         Xiang Yu, xiangy@lemote.com
+ *         Chen Huacai, chenhc@lemote.com
+ */
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <asm/bootinfo.h>
+#include <boot_param.h>
+#include <loongson_hwmon.h>
+#include <workarounds.h>
+
+static int __init loongson3_platform_init(void)
+{
+	int i;
+	struct platform_device *pdev;
+
+	if (loongson_sysconf.ecname[0] != '\0')
+		platform_device_register_simple(loongson_sysconf.ecname, -1, NULL, 0);
+
+	for (i = 0; i < loongson_sysconf.nr_sensors; i++) {
+		if (loongson_sysconf.sensors[i].type > SENSOR_FAN)
+			continue;
+
+		pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL);
+		pdev->name = loongson_sysconf.sensors[i].name;
+		pdev->id = loongson_sysconf.sensors[i].id;
+		pdev->dev.platform_data = &loongson_sysconf.sensors[i];
+		platform_device_register(pdev);
+	}
+
+	return 0;
+}
+
+arch_initcall(loongson3_platform_init);
diff --git a/arch/mips/loongson64/pm.c b/arch/mips/loongson64/pm.c
new file mode 100644
index 000000000000..7c8556f09781
--- /dev/null
+++ b/arch/mips/loongson64/pm.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * loongson-specific suspend support
+ *
+ *  Copyright (C) 2009 Lemote Inc.
+ *  Author: Wu Zhangjin <wuzhangjin@gmail.com>
+ */
+#include <linux/suspend.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+
+#include <asm/i8259.h>
+#include <asm/mipsregs.h>
+
+#include <loongson.h>
+
+static unsigned int __maybe_unused cached_master_mask;	/* i8259A */
+static unsigned int __maybe_unused cached_slave_mask;
+static unsigned int __maybe_unused cached_bonito_irq_mask; /* bonito */
+
+void arch_suspend_disable_irqs(void)
+{
+	/* disable all mips events */
+	local_irq_disable();
+
+#ifdef CONFIG_I8259
+	/* disable all events of i8259A */
+	cached_slave_mask = inb(PIC_SLAVE_IMR);
+	cached_master_mask = inb(PIC_MASTER_IMR);
+
+	outb(0xff, PIC_SLAVE_IMR);
+	inb(PIC_SLAVE_IMR);
+	outb(0xff, PIC_MASTER_IMR);
+	inb(PIC_MASTER_IMR);
+#endif
+	/* disable all events of bonito */
+	cached_bonito_irq_mask = LOONGSON_INTEN;
+	LOONGSON_INTENCLR = 0xffff;
+	(void)LOONGSON_INTENCLR;
+}
+
+void arch_suspend_enable_irqs(void)
+{
+	/* enable all mips events */
+	local_irq_enable();
+#ifdef CONFIG_I8259
+	/* only enable the cached events of i8259A */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);
+	outb(cached_master_mask, PIC_MASTER_IMR);
+#endif
+	/* enable all cached events of bonito */
+	LOONGSON_INTENSET = cached_bonito_irq_mask;
+	(void)LOONGSON_INTENSET;
+}
+
+/*
+ * Setup the board-specific events for waking up loongson from wait mode
+ */
+void __weak setup_wakeup_events(void)
+{
+}
+
+void __weak mach_suspend(void)
+{
+}
+
+void __weak mach_resume(void)
+{
+}
+
+static int loongson_pm_enter(suspend_state_t state)
+{
+	mach_suspend();
+
+	mach_resume();
+
+	return 0;
+}
+
+static int loongson_pm_valid_state(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_ON:
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		return 1;
+
+	default:
+		return 0;
+	}
+}
+
+static const struct platform_suspend_ops loongson_pm_ops = {
+	.valid	= loongson_pm_valid_state,
+	.enter	= loongson_pm_enter,
+};
+
+static int __init loongson_pm_init(void)
+{
+	suspend_set_ops(&loongson_pm_ops);
+
+	return 0;
+}
+arch_initcall(loongson_pm_init);
diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c
new file mode 100644
index 000000000000..88b3bd5fed25
--- /dev/null
+++ b/arch/mips/loongson64/reset.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ * Copyright (C) 2009 Lemote, Inc.
+ * Author: Zhangjin Wu, wuzhangjin@gmail.com
+ */
+#include <linux/init.h>
+#include <linux/pm.h>
+
+#include <asm/idle.h>
+#include <asm/reboot.h>
+
+#include <loongson.h>
+#include <boot_param.h>
+
+static inline void loongson_reboot(void)
+{
+	((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) ();
+}
+
+static void loongson_restart(char *command)
+{
+
+	void (*fw_restart)(void) = (void *)loongson_sysconf.restart_addr;
+
+	fw_restart();
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+}
+
+static void loongson_poweroff(void)
+{
+	void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr;
+
+	fw_poweroff();
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+}
+
+static void loongson_halt(void)
+{
+	pr_notice("\n\n** You can safely turn off the power now **\n\n");
+	while (1) {
+		if (cpu_wait)
+			cpu_wait();
+	}
+}
+
+static int __init mips_reboot_setup(void)
+{
+	_machine_restart = loongson_restart;
+	_machine_halt = loongson_halt;
+	pm_power_off = loongson_poweroff;
+
+	return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/loongson64/rtc.c b/arch/mips/loongson64/rtc.c
new file mode 100644
index 000000000000..8d7628c0f513
--- /dev/null
+++ b/arch/mips/loongson64/rtc.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Lemote Fuloong platform support
+ *
+ *  Copyright(c) 2010 Arnaud Patard <apatard@mandriva.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/mc146818rtc.h>
+
+static struct resource loongson_rtc_resources[] = {
+	{
+		.start	= RTC_PORT(0),
+		.end	= RTC_PORT(1),
+		.flags	= IORESOURCE_IO,
+	}, {
+		.start	= RTC_IRQ,
+		.end	= RTC_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	}
+};
+
+static struct platform_device loongson_rtc_device = {
+	.name		= "rtc_cmos",
+	.id		= -1,
+	.resource	= loongson_rtc_resources,
+	.num_resources	= ARRAY_SIZE(loongson_rtc_resources),
+};
+
+
+static int __init loongson_rtc_platform_init(void)
+{
+	platform_device_register(&loongson_rtc_device);
+	return 0;
+}
+
+device_initcall(loongson_rtc_platform_init);
diff --git a/arch/mips/loongson64/setup.c b/arch/mips/loongson64/setup.c
new file mode 100644
index 000000000000..4fd27f4f90ed
--- /dev/null
+++ b/arch/mips/loongson64/setup.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ */
+#include <linux/export.h>
+#include <linux/init.h>
+
+#include <asm/wbflush.h>
+#include <asm/bootinfo.h>
+
+#include <loongson.h>
+
+static void wbflush_loongson(void)
+{
+	asm(".set\tpush\n\t"
+	    ".set\tnoreorder\n\t"
+	    ".set mips3\n\t"
+	    "sync\n\t"
+	    "nop\n\t"
+	    ".set\tpop\n\t"
+	    ".set mips0\n\t");
+}
+
+void (*__wbflush)(void) = wbflush_loongson;
+EXPORT_SYMBOL(__wbflush);
+
+void __init plat_mem_setup(void)
+{
+}
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
new file mode 100644
index 000000000000..de8e0741ce2d
--- /dev/null
+++ b/arch/mips/loongson64/smp.c
@@ -0,0 +1,813 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2010, 2011, 2012, Lemote, Inc.
+ * Author: Chen Huacai, chenhc@lemote.com
+ */
+
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/clock.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <loongson.h>
+#include <loongson_regs.h>
+#include <workarounds.h>
+
+#include "smp.h"
+
+DEFINE_PER_CPU(int, cpu_state);
+
+static void *ipi_set0_regs[16];
+static void *ipi_clear0_regs[16];
+static void *ipi_status0_regs[16];
+static void *ipi_en0_regs[16];
+static void *ipi_mailbox_buf[16];
+static uint32_t core0_c0count[NR_CPUS];
+
+/* read a 32bit value from ipi register */
+#define loongson3_ipi_read32(addr) readl(addr)
+/* read a 64bit value from ipi register */
+#define loongson3_ipi_read64(addr) readq(addr)
+/* write a 32bit value to ipi register */
+#define loongson3_ipi_write32(action, addr)	\
+	do {					\
+		writel(action, addr);		\
+		__wbflush();			\
+	} while (0)
+/* write a 64bit value to ipi register */
+#define loongson3_ipi_write64(action, addr)	\
+	do {					\
+		writeq(action, addr);		\
+		__wbflush();			\
+	} while (0)
+
+u32 (*ipi_read_clear)(int cpu);
+void (*ipi_write_action)(int cpu, u32 action);
+
+static u32 csr_ipi_read_clear(int cpu)
+{
+	u32 action;
+
+	/* Load the ipi register to figure out what we're supposed to do */
+	action = csr_readl(LOONGSON_CSR_IPI_STATUS);
+	/* Clear the ipi register to clear the interrupt */
+	csr_writel(action, LOONGSON_CSR_IPI_CLEAR);
+
+	return action;
+}
+
+static void csr_ipi_write_action(int cpu, u32 action)
+{
+	unsigned int irq = 0;
+
+	while ((irq = ffs(action))) {
+		uint32_t val = CSR_IPI_SEND_BLOCK;
+		val |= (irq - 1);
+		val |= (cpu << CSR_IPI_SEND_CPU_SHIFT);
+		csr_writel(val, LOONGSON_CSR_IPI_SEND);
+		action &= ~BIT(irq - 1);
+	}
+}
+
+static u32 legacy_ipi_read_clear(int cpu)
+{
+	u32 action;
+
+	/* Load the ipi register to figure out what we're supposed to do */
+	action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+	/* Clear the ipi register to clear the interrupt */
+	loongson3_ipi_write32(action, ipi_clear0_regs[cpu_logical_map(cpu)]);
+
+	return action;
+}
+
+static void legacy_ipi_write_action(int cpu, u32 action)
+{
+	loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu]);
+}
+
+static void csr_ipi_probe(void)
+{
+	if (cpu_has_csr() && csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_IPI) {
+		ipi_read_clear = csr_ipi_read_clear;
+		ipi_write_action = csr_ipi_write_action;
+	} else {
+		ipi_read_clear = legacy_ipi_read_clear;
+		ipi_write_action = legacy_ipi_write_action;
+	}
+}
+
+static void ipi_set0_regs_init(void)
+{
+	ipi_set0_regs[0] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + SET0);
+	ipi_set0_regs[1] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + SET0);
+	ipi_set0_regs[2] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + SET0);
+	ipi_set0_regs[3] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + SET0);
+	ipi_set0_regs[4] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + SET0);
+	ipi_set0_regs[5] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + SET0);
+	ipi_set0_regs[6] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + SET0);
+	ipi_set0_regs[7] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + SET0);
+	ipi_set0_regs[8] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + SET0);
+	ipi_set0_regs[9] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + SET0);
+	ipi_set0_regs[10] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + SET0);
+	ipi_set0_regs[11] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + SET0);
+	ipi_set0_regs[12] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + SET0);
+	ipi_set0_regs[13] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + SET0);
+	ipi_set0_regs[14] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + SET0);
+	ipi_set0_regs[15] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + SET0);
+}
+
+static void ipi_clear0_regs_init(void)
+{
+	ipi_clear0_regs[0] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + CLEAR0);
+	ipi_clear0_regs[1] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + CLEAR0);
+	ipi_clear0_regs[2] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + CLEAR0);
+	ipi_clear0_regs[3] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + CLEAR0);
+	ipi_clear0_regs[4] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + CLEAR0);
+	ipi_clear0_regs[5] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + CLEAR0);
+	ipi_clear0_regs[6] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + CLEAR0);
+	ipi_clear0_regs[7] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + CLEAR0);
+	ipi_clear0_regs[8] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + CLEAR0);
+	ipi_clear0_regs[9] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + CLEAR0);
+	ipi_clear0_regs[10] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + CLEAR0);
+	ipi_clear0_regs[11] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + CLEAR0);
+	ipi_clear0_regs[12] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + CLEAR0);
+	ipi_clear0_regs[13] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + CLEAR0);
+	ipi_clear0_regs[14] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + CLEAR0);
+	ipi_clear0_regs[15] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + CLEAR0);
+}
+
+static void ipi_status0_regs_init(void)
+{
+	ipi_status0_regs[0] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + STATUS0);
+	ipi_status0_regs[1] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + STATUS0);
+	ipi_status0_regs[2] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + STATUS0);
+	ipi_status0_regs[3] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + STATUS0);
+	ipi_status0_regs[4] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + STATUS0);
+	ipi_status0_regs[5] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + STATUS0);
+	ipi_status0_regs[6] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + STATUS0);
+	ipi_status0_regs[7] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + STATUS0);
+	ipi_status0_regs[8] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + STATUS0);
+	ipi_status0_regs[9] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + STATUS0);
+	ipi_status0_regs[10] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + STATUS0);
+	ipi_status0_regs[11] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + STATUS0);
+	ipi_status0_regs[12] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + STATUS0);
+	ipi_status0_regs[13] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + STATUS0);
+	ipi_status0_regs[14] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + STATUS0);
+	ipi_status0_regs[15] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + STATUS0);
+}
+
+static void ipi_en0_regs_init(void)
+{
+	ipi_en0_regs[0] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + EN0);
+	ipi_en0_regs[1] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + EN0);
+	ipi_en0_regs[2] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + EN0);
+	ipi_en0_regs[3] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + EN0);
+	ipi_en0_regs[4] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + EN0);
+	ipi_en0_regs[5] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + EN0);
+	ipi_en0_regs[6] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + EN0);
+	ipi_en0_regs[7] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + EN0);
+	ipi_en0_regs[8] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + EN0);
+	ipi_en0_regs[9] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + EN0);
+	ipi_en0_regs[10] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + EN0);
+	ipi_en0_regs[11] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + EN0);
+	ipi_en0_regs[12] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + EN0);
+	ipi_en0_regs[13] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + EN0);
+	ipi_en0_regs[14] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + EN0);
+	ipi_en0_regs[15] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + EN0);
+}
+
+static void ipi_mailbox_buf_init(void)
+{
+	ipi_mailbox_buf[0] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + BUF);
+	ipi_mailbox_buf[1] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + BUF);
+	ipi_mailbox_buf[2] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + BUF);
+	ipi_mailbox_buf[3] = (void *)
+		(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + BUF);
+	ipi_mailbox_buf[4] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + BUF);
+	ipi_mailbox_buf[5] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + BUF);
+	ipi_mailbox_buf[6] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + BUF);
+	ipi_mailbox_buf[7] = (void *)
+		(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + BUF);
+	ipi_mailbox_buf[8] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + BUF);
+	ipi_mailbox_buf[9] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + BUF);
+	ipi_mailbox_buf[10] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + BUF);
+	ipi_mailbox_buf[11] = (void *)
+		(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + BUF);
+	ipi_mailbox_buf[12] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + BUF);
+	ipi_mailbox_buf[13] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + BUF);
+	ipi_mailbox_buf[14] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + BUF);
+	ipi_mailbox_buf[15] = (void *)
+		(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + BUF);
+}
+
+/*
+ * Simple enough, just poke the appropriate ipi register
+ */
+static void loongson3_send_ipi_single(int cpu, unsigned int action)
+{
+	ipi_write_action(cpu_logical_map(cpu), (u32)action);
+}
+
+static void
+loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+	unsigned int i;
+
+	for_each_cpu(i, mask)
+		ipi_write_action(cpu_logical_map(i), (u32)action);
+}
+
+#define IPI_IRQ_OFFSET 6
+
+void loongson3_send_irq_by_ipi(int cpu, int irqs)
+{
+	ipi_write_action(cpu_logical_map(cpu), irqs << IPI_IRQ_OFFSET);
+}
+
+void loongson3_ipi_interrupt(struct pt_regs *regs)
+{
+	int i, cpu = smp_processor_id();
+	unsigned int action, c0count, irqs;
+
+	action = ipi_read_clear(cpu);
+	irqs = action >> IPI_IRQ_OFFSET;
+
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
+
+	if (action & SMP_CALL_FUNCTION) {
+		irq_enter();
+		generic_smp_call_function_interrupt();
+		irq_exit();
+	}
+
+	if (action & SMP_ASK_C0COUNT) {
+		BUG_ON(cpu != 0);
+		c0count = read_c0_count();
+		c0count = c0count ? c0count : 1;
+		for (i = 1; i < nr_cpu_ids; i++)
+			core0_c0count[i] = c0count;
+		__wbflush(); /* Let others see the result ASAP */
+	}
+
+	if (irqs) {
+		int irq;
+		while ((irq = ffs(irqs))) {
+			do_IRQ(irq-1);
+			irqs &= ~(1<<(irq-1));
+		}
+	}
+}
+
+#define MAX_LOOPS 800
+/*
+ * SMP init and finish on secondary CPUs
+ */
+static void loongson3_init_secondary(void)
+{
+	int i;
+	uint32_t initcount;
+	unsigned int cpu = smp_processor_id();
+	unsigned int imask = STATUSF_IP7 | STATUSF_IP6 |
+			     STATUSF_IP3 | STATUSF_IP2;
+
+	/* Set interrupt mask, but don't enable */
+	change_c0_status(ST0_IM, imask);
+
+	for (i = 0; i < num_possible_cpus(); i++)
+		loongson3_ipi_write32(0xffffffff, ipi_en0_regs[cpu_logical_map(i)]);
+
+	per_cpu(cpu_state, cpu) = CPU_ONLINE;
+	cpu_set_core(&cpu_data[cpu],
+		     cpu_logical_map(cpu) % loongson_sysconf.cores_per_package);
+	cpu_data[cpu].package =
+		cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
+
+	i = 0;
+	core0_c0count[cpu] = 0;
+	loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
+	while (!core0_c0count[cpu]) {
+		i++;
+		cpu_relax();
+	}
+
+	if (i > MAX_LOOPS)
+		i = MAX_LOOPS;
+	if (cpu_data[cpu].package)
+		initcount = core0_c0count[cpu] + i;
+	else /* Local access is faster for loops */
+		initcount = core0_c0count[cpu] + i/2;
+
+	write_c0_count(initcount);
+}
+
+static void loongson3_smp_finish(void)
+{
+	int cpu = smp_processor_id();
+
+	write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
+	local_irq_enable();
+	loongson3_ipi_write64(0,
+			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0);
+	pr_info("CPU#%d finished, CP0_ST=%x\n",
+			smp_processor_id(), read_c0_status());
+}
+
+static void __init loongson3_smp_setup(void)
+{
+	int i = 0, num = 0; /* i: physical id, num: logical id */
+
+	init_cpu_possible(cpu_none_mask);
+
+	/* For unified kernel, NR_CPUS is the maximum possible value,
+	 * loongson_sysconf.nr_cpus is the really present value */
+	while (i < loongson_sysconf.nr_cpus) {
+		if (loongson_sysconf.reserved_cpus_mask & (1<<i)) {
+			/* Reserved physical CPU cores */
+			__cpu_number_map[i] = -1;
+		} else {
+			__cpu_number_map[i] = num;
+			__cpu_logical_map[num] = i;
+			set_cpu_possible(num, true);
+			num++;
+		}
+		i++;
+	}
+	pr_info("Detected %i available CPU(s)\n", num);
+
+	while (num < loongson_sysconf.nr_cpus) {
+		__cpu_logical_map[num] = -1;
+		num++;
+	}
+
+	csr_ipi_probe();
+	ipi_set0_regs_init();
+	ipi_clear0_regs_init();
+	ipi_status0_regs_init();
+	ipi_en0_regs_init();
+	ipi_mailbox_buf_init();
+	cpu_set_core(&cpu_data[0],
+		     cpu_logical_map(0) % loongson_sysconf.cores_per_package);
+	cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
+}
+
+static void __init loongson3_prepare_cpus(unsigned int max_cpus)
+{
+	init_cpu_present(cpu_possible_mask);
+	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+}
+
+/*
+ * Setup the PC, SP, and GP of a secondary processor and start it runing!
+ */
+static int loongson3_boot_secondary(int cpu, struct task_struct *idle)
+{
+	unsigned long startargs[4];
+
+	pr_info("Booting CPU#%d...\n", cpu);
+
+	/* startargs[] are initial PC, SP and GP for secondary CPU */
+	startargs[0] = (unsigned long)&smp_bootstrap;
+	startargs[1] = (unsigned long)__KSTK_TOS(idle);
+	startargs[2] = (unsigned long)task_thread_info(idle);
+	startargs[3] = 0;
+
+	pr_debug("CPU#%d, func_pc=%lx, sp=%lx, gp=%lx\n",
+			cpu, startargs[0], startargs[1], startargs[2]);
+
+	loongson3_ipi_write64(startargs[3],
+			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x18);
+	loongson3_ipi_write64(startargs[2],
+			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x10);
+	loongson3_ipi_write64(startargs[1],
+			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x8);
+	loongson3_ipi_write64(startargs[0],
+			ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0);
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int loongson3_cpu_disable(void)
+{
+	unsigned long flags;
+	unsigned int cpu = smp_processor_id();
+
+	if (cpu == 0)
+		return -EBUSY;
+
+	set_cpu_online(cpu, false);
+	calculate_cpu_foreign_map();
+	local_irq_save(flags);
+	fixup_irqs();
+	local_irq_restore(flags);
+	local_flush_tlb_all();
+
+	return 0;
+}
+
+
+static void loongson3_cpu_die(unsigned int cpu)
+{
+	while (per_cpu(cpu_state, cpu) != CPU_DEAD)
+		cpu_relax();
+
+	mb();
+}
+
+/* To shutdown a core in Loongson 3, the target core should go to CKSEG1 and
+ * flush all L1 entries at first. Then, another core (usually Core 0) can
+ * safely disable the clock of the target core. loongson3_play_dead() is
+ * called via CKSEG1 (uncached and unmmaped) */
+static void loongson3_type1_play_dead(int *state_addr)
+{
+	register int val;
+	register long cpuid, core, node, count;
+	register void *addr, *base, *initfunc;
+
+	__asm__ __volatile__(
+		"   .set push                     \n"
+		"   .set noreorder                \n"
+		"   li %[addr], 0x80000000        \n" /* KSEG0 */
+		"1: cache 0, 0(%[addr])           \n" /* flush L1 ICache */
+		"   cache 0, 1(%[addr])           \n"
+		"   cache 0, 2(%[addr])           \n"
+		"   cache 0, 3(%[addr])           \n"
+		"   cache 1, 0(%[addr])           \n" /* flush L1 DCache */
+		"   cache 1, 1(%[addr])           \n"
+		"   cache 1, 2(%[addr])           \n"
+		"   cache 1, 3(%[addr])           \n"
+		"   addiu %[sets], %[sets], -1    \n"
+		"   bnez  %[sets], 1b             \n"
+		"   addiu %[addr], %[addr], 0x20  \n"
+		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
+		"   sw    %[val], (%[state_addr]) \n"
+		"   sync                          \n"
+		"   cache 21, (%[state_addr])     \n" /* flush entry of *state_addr */
+		"   .set pop                      \n"
+		: [addr] "=&r" (addr), [val] "=&r" (val)
+		: [state_addr] "r" (state_addr),
+		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
+
+	__asm__ __volatile__(
+		"   .set push                         \n"
+		"   .set noreorder                    \n"
+		"   .set mips64                       \n"
+		"   mfc0  %[cpuid], $15, 1            \n"
+		"   andi  %[cpuid], 0x3ff             \n"
+		"   dli   %[base], 0x900000003ff01000 \n"
+		"   andi  %[core], %[cpuid], 0x3      \n"
+		"   sll   %[core], 8                  \n" /* get core id */
+		"   or    %[base], %[base], %[core]   \n"
+		"   andi  %[node], %[cpuid], 0xc      \n"
+		"   dsll  %[node], 42                 \n" /* get node id */
+		"   or    %[base], %[base], %[node]   \n"
+		"1: li    %[count], 0x100             \n" /* wait for init loop */
+		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
+		"   addiu %[count], -1                \n"
+		"   ld    %[initfunc], 0x20(%[base])  \n" /* get PC via mailbox */
+		"   beqz  %[initfunc], 1b             \n"
+		"   nop                               \n"
+		"   ld    $sp, 0x28(%[base])          \n" /* get SP via mailbox */
+		"   ld    $gp, 0x30(%[base])          \n" /* get GP via mailbox */
+		"   ld    $a1, 0x38(%[base])          \n"
+		"   jr    %[initfunc]                 \n" /* jump to initial PC */
+		"   nop                               \n"
+		"   .set pop                          \n"
+		: [core] "=&r" (core), [node] "=&r" (node),
+		  [base] "=&r" (base), [cpuid] "=&r" (cpuid),
+		  [count] "=&r" (count), [initfunc] "=&r" (initfunc)
+		: /* No Input */
+		: "a1");
+}
+
+static void loongson3_type2_play_dead(int *state_addr)
+{
+	register int val;
+	register long cpuid, core, node, count;
+	register void *addr, *base, *initfunc;
+
+	__asm__ __volatile__(
+		"   .set push                     \n"
+		"   .set noreorder                \n"
+		"   li %[addr], 0x80000000        \n" /* KSEG0 */
+		"1: cache 0, 0(%[addr])           \n" /* flush L1 ICache */
+		"   cache 0, 1(%[addr])           \n"
+		"   cache 0, 2(%[addr])           \n"
+		"   cache 0, 3(%[addr])           \n"
+		"   cache 1, 0(%[addr])           \n" /* flush L1 DCache */
+		"   cache 1, 1(%[addr])           \n"
+		"   cache 1, 2(%[addr])           \n"
+		"   cache 1, 3(%[addr])           \n"
+		"   addiu %[sets], %[sets], -1    \n"
+		"   bnez  %[sets], 1b             \n"
+		"   addiu %[addr], %[addr], 0x20  \n"
+		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
+		"   sw    %[val], (%[state_addr]) \n"
+		"   sync                          \n"
+		"   cache 21, (%[state_addr])     \n" /* flush entry of *state_addr */
+		"   .set pop                      \n"
+		: [addr] "=&r" (addr), [val] "=&r" (val)
+		: [state_addr] "r" (state_addr),
+		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets));
+
+	__asm__ __volatile__(
+		"   .set push                         \n"
+		"   .set noreorder                    \n"
+		"   .set mips64                       \n"
+		"   mfc0  %[cpuid], $15, 1            \n"
+		"   andi  %[cpuid], 0x3ff             \n"
+		"   dli   %[base], 0x900000003ff01000 \n"
+		"   andi  %[core], %[cpuid], 0x3      \n"
+		"   sll   %[core], 8                  \n" /* get core id */
+		"   or    %[base], %[base], %[core]   \n"
+		"   andi  %[node], %[cpuid], 0xc      \n"
+		"   dsll  %[node], 42                 \n" /* get node id */
+		"   or    %[base], %[base], %[node]   \n"
+		"   dsrl  %[node], 30                 \n" /* 15:14 */
+		"   or    %[base], %[base], %[node]   \n"
+		"1: li    %[count], 0x100             \n" /* wait for init loop */
+		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
+		"   addiu %[count], -1                \n"
+		"   ld    %[initfunc], 0x20(%[base])  \n" /* get PC via mailbox */
+		"   beqz  %[initfunc], 1b             \n"
+		"   nop                               \n"
+		"   ld    $sp, 0x28(%[base])          \n" /* get SP via mailbox */
+		"   ld    $gp, 0x30(%[base])          \n" /* get GP via mailbox */
+		"   ld    $a1, 0x38(%[base])          \n"
+		"   jr    %[initfunc]                 \n" /* jump to initial PC */
+		"   nop                               \n"
+		"   .set pop                          \n"
+		: [core] "=&r" (core), [node] "=&r" (node),
+		  [base] "=&r" (base), [cpuid] "=&r" (cpuid),
+		  [count] "=&r" (count), [initfunc] "=&r" (initfunc)
+		: /* No Input */
+		: "a1");
+}
+
+static void loongson3_type3_play_dead(int *state_addr)
+{
+	register int val;
+	register long cpuid, core, node, count;
+	register void *addr, *base, *initfunc;
+
+	__asm__ __volatile__(
+		"   .set push                     \n"
+		"   .set noreorder                \n"
+		"   li %[addr], 0x80000000        \n" /* KSEG0 */
+		"1: cache 0, 0(%[addr])           \n" /* flush L1 ICache */
+		"   cache 0, 1(%[addr])           \n"
+		"   cache 0, 2(%[addr])           \n"
+		"   cache 0, 3(%[addr])           \n"
+		"   cache 1, 0(%[addr])           \n" /* flush L1 DCache */
+		"   cache 1, 1(%[addr])           \n"
+		"   cache 1, 2(%[addr])           \n"
+		"   cache 1, 3(%[addr])           \n"
+		"   addiu %[sets], %[sets], -1    \n"
+		"   bnez  %[sets], 1b             \n"
+		"   addiu %[addr], %[addr], 0x40  \n"
+		"   li %[addr], 0x80000000        \n" /* KSEG0 */
+		"2: cache 2, 0(%[addr])           \n" /* flush L1 VCache */
+		"   cache 2, 1(%[addr])           \n"
+		"   cache 2, 2(%[addr])           \n"
+		"   cache 2, 3(%[addr])           \n"
+		"   cache 2, 4(%[addr])           \n"
+		"   cache 2, 5(%[addr])           \n"
+		"   cache 2, 6(%[addr])           \n"
+		"   cache 2, 7(%[addr])           \n"
+		"   cache 2, 8(%[addr])           \n"
+		"   cache 2, 9(%[addr])           \n"
+		"   cache 2, 10(%[addr])          \n"
+		"   cache 2, 11(%[addr])          \n"
+		"   cache 2, 12(%[addr])          \n"
+		"   cache 2, 13(%[addr])          \n"
+		"   cache 2, 14(%[addr])          \n"
+		"   cache 2, 15(%[addr])          \n"
+		"   addiu %[vsets], %[vsets], -1  \n"
+		"   bnez  %[vsets], 2b            \n"
+		"   addiu %[addr], %[addr], 0x40  \n"
+		"   li    %[val], 0x7             \n" /* *state_addr = CPU_DEAD; */
+		"   sw    %[val], (%[state_addr]) \n"
+		"   sync                          \n"
+		"   cache 21, (%[state_addr])     \n" /* flush entry of *state_addr */
+		"   .set pop                      \n"
+		: [addr] "=&r" (addr), [val] "=&r" (val)
+		: [state_addr] "r" (state_addr),
+		  [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
+		  [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
+
+	__asm__ __volatile__(
+		"   .set push                         \n"
+		"   .set noreorder                    \n"
+		"   .set mips64                       \n"
+		"   mfc0  %[cpuid], $15, 1            \n"
+		"   andi  %[cpuid], 0x3ff             \n"
+		"   dli   %[base], 0x900000003ff01000 \n"
+		"   andi  %[core], %[cpuid], 0x3      \n"
+		"   sll   %[core], 8                  \n" /* get core id */
+		"   or    %[base], %[base], %[core]   \n"
+		"   andi  %[node], %[cpuid], 0xc      \n"
+		"   dsll  %[node], 42                 \n" /* get node id */
+		"   or    %[base], %[base], %[node]   \n"
+		"1: li    %[count], 0x100             \n" /* wait for init loop */
+		"2: bnez  %[count], 2b                \n" /* limit mailbox access */
+		"   addiu %[count], -1                \n"
+		"   ld    %[initfunc], 0x20(%[base])  \n" /* get PC via mailbox */
+		"   beqz  %[initfunc], 1b             \n"
+		"   nop                               \n"
+		"   ld    $sp, 0x28(%[base])          \n" /* get SP via mailbox */
+		"   ld    $gp, 0x30(%[base])          \n" /* get GP via mailbox */
+		"   ld    $a1, 0x38(%[base])          \n"
+		"   jr    %[initfunc]                 \n" /* jump to initial PC */
+		"   nop                               \n"
+		"   .set pop                          \n"
+		: [core] "=&r" (core), [node] "=&r" (node),
+		  [base] "=&r" (base), [cpuid] "=&r" (cpuid),
+		  [count] "=&r" (count), [initfunc] "=&r" (initfunc)
+		: /* No Input */
+		: "a1");
+}
+
+void play_dead(void)
+{
+	int prid_imp, prid_rev, *state_addr;
+	unsigned int cpu = smp_processor_id();
+	void (*play_dead_at_ckseg1)(int *);
+
+	idle_task_exit();
+
+	prid_imp = read_c0_prid() & PRID_IMP_MASK;
+	prid_rev = read_c0_prid() & PRID_REV_MASK;
+
+	if (prid_imp == PRID_IMP_LOONGSON_64G) {
+		play_dead_at_ckseg1 =
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
+		goto out;
+	}
+
+	switch (prid_rev) {
+	case PRID_REV_LOONGSON3A_R1:
+	default:
+		play_dead_at_ckseg1 =
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type1_play_dead);
+		break;
+	case PRID_REV_LOONGSON3B_R1:
+	case PRID_REV_LOONGSON3B_R2:
+		play_dead_at_ckseg1 =
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type2_play_dead);
+		break;
+	case PRID_REV_LOONGSON3A_R2_0:
+	case PRID_REV_LOONGSON3A_R2_1:
+	case PRID_REV_LOONGSON3A_R3_0:
+	case PRID_REV_LOONGSON3A_R3_1:
+		play_dead_at_ckseg1 =
+			(void *)CKSEG1ADDR((unsigned long)loongson3_type3_play_dead);
+		break;
+	}
+
+out:
+	state_addr = &per_cpu(cpu_state, cpu);
+	mb();
+	play_dead_at_ckseg1(state_addr);
+}
+
+static int loongson3_disable_clock(unsigned int cpu)
+{
+	uint64_t core_id = cpu_core(&cpu_data[cpu]);
+	uint64_t package_id = cpu_data[cpu].package;
+
+	if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1) {
+		LOONGSON_CHIPCFG(package_id) &= ~(1 << (12 + core_id));
+	} else {
+		if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
+			LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3));
+	}
+	return 0;
+}
+
+static int loongson3_enable_clock(unsigned int cpu)
+{
+	uint64_t core_id = cpu_core(&cpu_data[cpu]);
+	uint64_t package_id = cpu_data[cpu].package;
+
+	if ((read_c0_prid() & PRID_REV_MASK) == PRID_REV_LOONGSON3A_R1) {
+		LOONGSON_CHIPCFG(package_id) |= 1 << (12 + core_id);
+	} else {
+		if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
+			LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3);
+	}
+	return 0;
+}
+
+static int register_loongson3_notifier(void)
+{
+	return cpuhp_setup_state_nocalls(CPUHP_MIPS_SOC_PREPARE,
+					 "mips/loongson:prepare",
+					 loongson3_enable_clock,
+					 loongson3_disable_clock);
+}
+early_initcall(register_loongson3_notifier);
+
+#endif
+
+const struct plat_smp_ops loongson3_smp_ops = {
+	.send_ipi_single = loongson3_send_ipi_single,
+	.send_ipi_mask = loongson3_send_ipi_mask,
+	.init_secondary = loongson3_init_secondary,
+	.smp_finish = loongson3_smp_finish,
+	.boot_secondary = loongson3_boot_secondary,
+	.smp_setup = loongson3_smp_setup,
+	.prepare_cpus = loongson3_prepare_cpus,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable = loongson3_cpu_disable,
+	.cpu_die = loongson3_cpu_die,
+#endif
+#ifdef CONFIG_KEXEC
+	.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
+#endif
+};
diff --git a/arch/mips/loongson64/smp.h b/arch/mips/loongson64/smp.h
new file mode 100644
index 000000000000..957bde81e0e4
--- /dev/null
+++ b/arch/mips/loongson64/smp.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LOONGSON_SMP_H_
+#define __LOONGSON_SMP_H_
+
+/* for Loongson-3 smp support */
+extern unsigned long long smp_group[4];
+
+/* 4 groups(nodes) in maximum in numa case */
+#define SMP_CORE_GROUP0_BASE	(smp_group[0])
+#define SMP_CORE_GROUP1_BASE	(smp_group[1])
+#define SMP_CORE_GROUP2_BASE	(smp_group[2])
+#define SMP_CORE_GROUP3_BASE	(smp_group[3])
+
+/* 4 cores in each group(node) */
+#define SMP_CORE0_OFFSET  0x000
+#define SMP_CORE1_OFFSET  0x100
+#define SMP_CORE2_OFFSET  0x200
+#define SMP_CORE3_OFFSET  0x300
+
+/* ipi registers offsets */
+#define STATUS0  0x00
+#define EN0      0x04
+#define SET0     0x08
+#define CLEAR0   0x0c
+#define STATUS1  0x10
+#define MASK1    0x14
+#define SET1     0x18
+#define CLEAR1   0x1c
+#define BUF      0x20
+
+#endif
diff --git a/arch/mips/loongson64/time.c b/arch/mips/loongson64/time.c
new file mode 100644
index 000000000000..1245f22cec84
--- /dev/null
+++ b/arch/mips/loongson64/time.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology
+ * Author: Fuxin Zhang, zhangfx@lemote.com
+ *
+ * Copyright (C) 2009 Lemote Inc.
+ * Author: Wu Zhangjin, wuzhangjin@gmail.com
+ */
+#include <asm/mc146818-time.h>
+#include <asm/time.h>
+#include <asm/hpet.h>
+
+#include <loongson.h>
+
+void __init plat_time_init(void)
+{
+	/* setup mips r4k timer */
+	mips_hpt_frequency = cpu_clock_freq / 2;
+
+#ifdef CONFIG_RS780_HPET
+	setup_hpet_timer();
+#endif
+}
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+	ts->tv_sec = mc146818_get_cmos_time();
+	ts->tv_nsec = 0;
+}
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index d6de4cb2e31c..342ce10ef593 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -35,7 +35,7 @@ obj-$(CONFIG_LASAT)		+= pci-lasat.o
 obj-$(CONFIG_MIPS_COBALT)	+= fixup-cobalt.o
 obj-$(CONFIG_LEMOTE_FULOONG2E)	+= fixup-fuloong2e.o ops-loongson2.o
 obj-$(CONFIG_LEMOTE_MACH2F)	+= fixup-lemote2f.o ops-loongson2.o
-obj-$(CONFIG_LOONGSON_MACH3X)	+= fixup-loongson3.o ops-loongson3.o
+obj-$(CONFIG_MACH_LOONGSON64)	+= fixup-loongson3.o ops-loongson3.o
 obj-$(CONFIG_MIPS_MALTA)	+= fixup-malta.o pci-malta.o
 obj-$(CONFIG_PMC_MSP7120_GW)	+= fixup-pmcmsp.o ops-pmcmsp.o
 obj-$(CONFIG_PMC_MSP7120_EVAL)	+= fixup-pmcmsp.o ops-pmcmsp.o
diff --git a/drivers/platform/mips/Kconfig b/drivers/platform/mips/Kconfig
index 62ea1934fb6a..f4d0a86c00d0 100644
--- a/drivers/platform/mips/Kconfig
+++ b/drivers/platform/mips/Kconfig
@@ -17,8 +17,8 @@ menuconfig MIPS_PLATFORM_DEVICES
 if MIPS_PLATFORM_DEVICES
 
 config CPU_HWMON
-	tristate "Loongson CPU HWMon Driver"
-	depends on LOONGSON_MACH3X
+	tristate "Loongson-3 CPU HWMon Driver"
+	depends on CONFIG_MACH_LOONGSON64
 	select HWMON
 	default y
 	help
-- 
cgit v1.2.3


From 474435a058309cf1a253dbd77cac2ab89c75d4a6 Mon Sep 17 00:00:00 2001
From: Paul Menzel <pmenzel@molgen.mpg.de>
Date: Mon, 2 Sep 2019 11:55:06 +0200
Subject: mips/cavium-octeon: Fix typo *must* in comment

Fixes: 5b3b16880f ("MIPS: Add Cavium OCTEON processor support files to arch/mips/cavium-octeon.")
Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/cavium-octeon/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 95034bf5ca83..1f742c32a883 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -844,7 +844,7 @@ void __init prom_init(void)
 	 * BIST should always be enabled when doing a soft reset. L2
 	 * Cache locking for instance is not cleared unless BIST is
 	 * enabled.  Unfortunately due to a chip errata G-200 for
-	 * Cn38XX and CN31XX, BIST msut be disabled on these parts.
+	 * Cn38XX and CN31XX, BIST must be disabled on these parts.
 	 */
 	if (OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2) ||
 	    OCTEON_IS_MODEL(OCTEON_CN31XX))
-- 
cgit v1.2.3


From c80b48965a3f5908468d0c078a910ca22f5dede3 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Wed, 30 Oct 2019 11:51:44 +0100
Subject: MIPS: SGI-IP27: replace MAX_COMPACT_NODE with MAX_NUMNODES

MAX_COMPACT_NODE is a leftover from the compact node implementation,
which is removed now.  Use MAX_NUMNODES instead.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/mach-ip27/topology.h |  2 +-
 arch/mips/include/asm/sn/gda.h             |  4 +---
 arch/mips/include/asm/sn/sn0/arch.h        | 16 +---------------
 arch/mips/sgi-ip27/ip27-init.c             |  2 +-
 arch/mips/sgi-ip27/ip27-memory.c           | 10 +++++-----
 arch/mips/sgi-ip27/ip27-smp.c              |  6 +-----
 6 files changed, 10 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index a717af9177ff..be61ddcdacab 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -22,7 +22,7 @@ extern int pcibus_to_node(struct pci_bus *);
 
 #define cpumask_of_pcibus(bus)	(cpumask_of_node(pcibus_to_node(bus)))
 
-extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
+extern unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
 
 #define node_distance(from, to) (__node_distances[(from)][(to)])
 
diff --git a/arch/mips/include/asm/sn/gda.h b/arch/mips/include/asm/sn/gda.h
index 85fa1b5f639d..d52f81620661 100644
--- a/arch/mips/include/asm/sn/gda.h
+++ b/arch/mips/include/asm/sn/gda.h
@@ -60,9 +60,7 @@ typedef struct gda {
 				/* Pointer to a mask of nodes with copies
 				 * of the kernel. */
 	char	g_padding[56];	/* pad out to 128 bytes */
-	nasid_t g_nasidtable[MAX_COMPACT_NODES]; /* NASID of each node,
-						  * indexed by cnodeid.
-						  */
+	nasid_t g_nasidtable[MAX_NUMNODES]; /* NASID of each node */
 } gda_t;
 
 #define GDA ((gda_t*) GDA_ADDR(get_nasid()))
diff --git a/arch/mips/include/asm/sn/sn0/arch.h b/arch/mips/include/asm/sn/sn0/arch.h
index ea8a6983f6a4..12f4c4649ff0 100644
--- a/arch/mips/include/asm/sn/sn0/arch.h
+++ b/arch/mips/include/asm/sn/sn0/arch.h
@@ -12,25 +12,11 @@
 #define _ASM_SN_SN0_ARCH_H
 
 
-#ifndef SN0XXL	/* 128 cpu SMP max */
-/*
- * This is the maximum number of nodes that can be part of a kernel.
- * Effectively, it's the maximum number of compact node ids (cnodeid_t).
- */
-#define MAX_COMPACT_NODES	64
-
 /*
  * MAXCPUS refers to the maximum number of CPUs in a single kernel.
  * This is not necessarily the same as MAXNODES * CPUS_PER_NODE
  */
-#define MAXCPUS			128
-
-#else /* SN0XXL system */
-
-#define MAX_COMPACT_NODES	128
-#define MAXCPUS			256
-
-#endif /* SN0XXL */
+#define MAXCPUS			(MAX_NUMNODES * CPUS_PER_NODE)
 
 /*
  * This is the maximum number of NASIDS that can be present in a system.
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 971aa0d5d534..8fd3505e2b9c 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -42,7 +42,7 @@
 
 #define CPU_NONE		(cpuid_t)-1
 
-static DECLARE_BITMAP(hub_init_mask, MAX_COMPACT_NODES);
+static DECLARE_BITMAP(hub_init_mask, MAX_NUMNODES);
 nasid_t master_nasid = INVALID_NASID;
 
 struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 3e2f39dfbbf5..f610fff592a6 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -33,7 +33,7 @@
 #define SLOT_PFNSHIFT		(SLOT_SHIFT - PAGE_SHIFT)
 #define PFN_NASIDSHFT		(NASID_SHFT - PAGE_SHIFT)
 
-struct node_data *__node_data[MAX_COMPACT_NODES];
+struct node_data *__node_data[MAX_NUMNODES];
 
 EXPORT_SYMBOL(__node_data);
 
@@ -104,7 +104,7 @@ static void router_recurse(klrou_t *router_a, klrou_t *router_b, int depth)
 	router_a->rou_rflag = 0;
 }
 
-unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
+unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
 EXPORT_SYMBOL(__node_distances);
 
 static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b)
@@ -173,8 +173,8 @@ static void __init init_topology_matrix(void)
 {
 	nasid_t row, col;
 
-	for (row = 0; row < MAX_COMPACT_NODES; row++)
-		for (col = 0; col < MAX_COMPACT_NODES; col++)
+	for (row = 0; row < MAX_NUMNODES; row++)
+		for (col = 0; col < MAX_NUMNODES; col++)
 			__node_distances[row][col] = -1;
 
 	for_each_online_node(row) {
@@ -412,7 +412,7 @@ void __init prom_meminit(void)
 	szmem();
 	max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
 
-	for (node = 0; node < MAX_COMPACT_NODES; node++) {
+	for (node = 0; node < MAX_NUMNODES; node++) {
 		if (node_online(node)) {
 			node_mem_init(node);
 			continue;
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index c38df7c62964..faa0244c8b0c 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -81,12 +81,8 @@ void cpu_node_probe(void)
 	int i, highest = 0;
 	gda_t *gdap = GDA;
 
-	/*
-	 * MCD - this whole "compact node" stuff can probably be dropped,
-	 * as we can handle sparse numbering now
-	 */
 	nodes_clear(node_online_map);
-	for (i = 0; i < MAX_COMPACT_NODES; i++) {
+	for (i = 0; i < MAX_NUMNODES; i++) {
 		nasid_t nasid = gdap->g_nasidtable[i];
 		if (nasid == INVALID_NASID)
 			break;
-- 
cgit v1.2.3


From 8a5a499871308c093ced3c5a383b72502b96e0d2 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Wed, 16 Oct 2019 18:03:00 +0800
Subject: MIPS: Loongson: Make default kernel log buffer size as 128KB for
 Loongson3

When I update kernel with loongson3_defconfig based on the Loongson 3A3000
platform, then using dmesg command to show kernel ring buffer, the initial
kernel messages have disappeared due to the log buffer is too small, it is
better to change the kernel log buffer size from 16KB to 128KB which is
enough to save the boot messages.

Since the default LOG_BUF_SHIFT value is 17, the default kernel log buffer
size is 128KB, just delete the CONFIG_LOG_BUF_SHIFT line.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: paul.burton@mips.com
Cc: ralf@linux-mips.org
Cc: jhogan@kernel.org
Cc: chenhc@lemote.com
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/configs/loongson3_defconfig | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index caad7bf7902c..c16a2330e84d 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -12,7 +12,6 @@ CONFIG_TASKSTATS=y
 CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
-- 
cgit v1.2.3


From b9e9defb5e603a8c0822c9e1f457fc823fab5a15 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Thu, 24 Oct 2019 12:18:28 +0200
Subject: MIPS: PCI: make phys_to_dma/dma_to_phys for pci-xtalk-bridge common

All platforms using pci-xtalk-bridge can share common phys_to_dma/
dma_to_phys function. So we move it form ip27 specific file to
pci-xtalk-bridge.c

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/pci/pci-ip27.c         | 13 -------------
 arch/mips/pci/pci-xtalk-bridge.c | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index 441eb9383b20..45a0be40c0c3 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -9,19 +9,6 @@
  */
 #include <asm/pci/bridge.h>
 
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus);
-
-	return bc->baddr + paddr;
-}
-
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
-{
-	return dma_addr & ~(0xffUL << 56);
-}
-
 #ifdef CONFIG_NUMA
 int pcibus_to_node(struct pci_bus *bus)
 {
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 312632171832..72e60df505f4 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -22,6 +22,22 @@
 #define CRC16_INIT	0
 #define CRC16_VALID	0xb001
 
+/*
+ * Common phys<->dma mapping for platforms using pci xtalk bridge
+ */
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus);
+
+	return bc->baddr + paddr;
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr & ~(0xffUL << 56);
+}
+
 /*
  * Most of the IOC3 PCI config register aren't present
  * we emulate what is needed for a normal PCI enumeration
-- 
cgit v1.2.3


From 7505576d1c1ac0cfe85fdf90999433dd8b673012 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Thu, 24 Oct 2019 12:18:29 +0200
Subject: MIPS: add support for SGI Octane (IP30)

This changeset adds support for SGI Octane/Octane2 workstations.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/Kbuild.platforms                         |   1 +
 arch/mips/Kconfig                                  |  27 ++
 .../include/asm/mach-ip30/cpu-feature-overrides.h  |  83 ++++++
 arch/mips/include/asm/mach-ip30/irq.h              |  87 ++++++
 .../mips/include/asm/mach-ip30/kernel-entry-init.h |  13 +
 arch/mips/include/asm/mach-ip30/mangle-port.h      |  22 ++
 arch/mips/include/asm/mach-ip30/spaces.h           |  20 ++
 arch/mips/include/asm/mach-ip30/war.h              |  26 ++
 arch/mips/include/asm/sgi/heart.h                  | 272 +++++++++++++++++
 arch/mips/sgi-ip30/Makefile                        |   9 +
 arch/mips/sgi-ip30/Platform                        |   8 +
 arch/mips/sgi-ip30/ip30-common.h                   |   9 +
 arch/mips/sgi-ip30/ip30-console.c                  |  23 ++
 arch/mips/sgi-ip30/ip30-irq.c                      | 328 +++++++++++++++++++++
 arch/mips/sgi-ip30/ip30-power.c                    |  41 +++
 arch/mips/sgi-ip30/ip30-setup.c                    | 138 +++++++++
 arch/mips/sgi-ip30/ip30-smp.c                      | 149 ++++++++++
 arch/mips/sgi-ip30/ip30-timer.c                    |  63 ++++
 arch/mips/sgi-ip30/ip30-xtalk.c                    | 152 ++++++++++
 19 files changed, 1471 insertions(+)
 create mode 100644 arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
 create mode 100644 arch/mips/include/asm/mach-ip30/irq.h
 create mode 100644 arch/mips/include/asm/mach-ip30/kernel-entry-init.h
 create mode 100644 arch/mips/include/asm/mach-ip30/mangle-port.h
 create mode 100644 arch/mips/include/asm/mach-ip30/spaces.h
 create mode 100644 arch/mips/include/asm/mach-ip30/war.h
 create mode 100644 arch/mips/include/asm/sgi/heart.h
 create mode 100644 arch/mips/sgi-ip30/Makefile
 create mode 100644 arch/mips/sgi-ip30/Platform
 create mode 100644 arch/mips/sgi-ip30/ip30-common.h
 create mode 100644 arch/mips/sgi-ip30/ip30-console.c
 create mode 100644 arch/mips/sgi-ip30/ip30-irq.c
 create mode 100644 arch/mips/sgi-ip30/ip30-power.c
 create mode 100644 arch/mips/sgi-ip30/ip30-setup.c
 create mode 100644 arch/mips/sgi-ip30/ip30-smp.c
 create mode 100644 arch/mips/sgi-ip30/ip30-timer.c
 create mode 100644 arch/mips/sgi-ip30/ip30-xtalk.c

(limited to 'arch')

diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 7c0d461483ef..a69b272a3ab0 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -31,6 +31,7 @@ platforms += ralink
 platforms += rb532
 platforms += sgi-ip22
 platforms += sgi-ip27
+platforms += sgi-ip30
 platforms += sgi-ip32
 platforms += sibyte
 platforms += sni
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b303b5c2d764..7cb894776f44 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -755,6 +755,33 @@ config SGI_IP28
 	  This is the SGI Indigo2 with R10000 processor.  To compile a Linux
 	  kernel that runs on these, say Y here.
 
+config SGI_IP30
+	bool "SGI IP30 (Octane/Octane2)"
+	select ARCH_HAS_PHYS_TO_DMA
+	select FW_ARC
+	select FW_ARC64
+	select BOOT_ELF64
+	select CEVT_R4K
+	select CSRC_R4K
+	select SYNC_R4K if SMP
+	select ZONE_DMA32
+	select HAVE_PCI
+	select IRQ_MIPS_CPU
+	select IRQ_DOMAIN_HIERARCHY
+	select NR_CPUS_DEFAULT_2
+	select PCI_DRIVERS_GENERIC
+	select PCI_XTALK_BRIDGE
+	select SYS_HAS_EARLY_PRINTK
+	select SYS_HAS_CPU_R10000
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_SMP
+	select MIPS_L1_CACHE_SHIFT_7
+	select ARC_MEMORY
+	help
+	  These are the SGI Octane and Octane2 graphics workstations.  To
+	  compile a Linux kernel that runs on these, say Y here.
+
 config SGI_IP32
 	bool "SGI IP32 (O2)"
 	select ARC_MEMORY
diff --git a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
new file mode 100644
index 000000000000..cfa02f3d25df
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IP30/Octane cpu-features overrides.
+ *
+ * Copyright (C) 2003 Ralf Baechle <ralf@linux-mips.org>
+ *		 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *		 2009 Johannes Dickgreber <tanzy@gmx.de>
+ *		 2015 Joshua Kinard <kumba@gentoo.org>
+ *
+ */
+#ifndef __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H
+
+#include <asm/cpu.h>
+
+/*
+ * IP30 only supports R1[024]000 processors, all using the same config
+ */
+#define cpu_has_tlb			1
+#define cpu_has_tlbinv			0
+#define cpu_has_segments		0
+#define cpu_has_eva			0
+#define cpu_has_htw			0
+#define cpu_has_rixiex			0
+#define cpu_has_maar			0
+#define cpu_has_rw_llb			0
+#define cpu_has_3kex			0
+#define cpu_has_4kex			1
+#define cpu_has_3k_cache		0
+#define cpu_has_4k_cache		1
+#define cpu_has_6k_cache		0
+#define cpu_has_8k_cache		0
+#define cpu_has_tx39_cache		0
+#define cpu_has_fpu			1
+#define cpu_has_nofpuex			0
+#define cpu_has_32fpr			1
+#define cpu_has_counter			1
+#define cpu_has_watch			1
+#define cpu_has_64bits			1
+#define cpu_has_divec			0
+#define cpu_has_vce			0
+#define cpu_has_cache_cdex_p		0
+#define cpu_has_cache_cdex_s		0
+#define cpu_has_prefetch		1
+#define cpu_has_mcheck			0
+#define cpu_has_ejtag			0
+#define cpu_has_llsc			1
+#define cpu_has_mips16			0
+#define cpu_has_mdmx			0
+#define cpu_has_mips3d			0
+#define cpu_has_smartmips		0
+#define cpu_has_rixi			0
+#define cpu_has_xpa			0
+#define cpu_has_vtag_icache		0
+#define cpu_has_dc_aliases		0
+#define cpu_has_ic_fills_f_dc		0
+
+#define cpu_icache_snoops_remote_store	1
+
+#define cpu_has_mips32r1		0
+#define cpu_has_mips32r2		0
+#define cpu_has_mips64r1		0
+#define cpu_has_mips64r2		0
+#define cpu_has_mips32r6		0
+#define cpu_has_mips64r6		0
+
+#define cpu_has_dsp			0
+#define cpu_has_dsp2			0
+#define cpu_has_mipsmt			0
+#define cpu_has_userlocal		0
+#define cpu_has_inclusive_pcaches	1
+#define cpu_hwrena_impl_bits		0
+#define cpu_has_perf_cntr_intr_bit	0
+#define cpu_has_vz			0
+#define cpu_has_fre			0
+#define cpu_has_cdmm			0
+
+#define cpu_dcache_line_size()		32
+#define cpu_icache_line_size()		64
+#define cpu_scache_line_size()		128
+
+#endif /* __ASM_MACH_IP30_CPU_FEATURE_OVERRIDES_H */
+
diff --git a/arch/mips/include/asm/mach-ip30/irq.h b/arch/mips/include/asm/mach-ip30/irq.h
new file mode 100644
index 000000000000..e5c3dd965266
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/irq.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HEART IRQ defines
+ *
+ * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
+ *		 2014-2016 Joshua Kinard <kumba@gentoo.org>
+ *
+ */
+
+#ifndef __ASM_MACH_IP30_IRQ_H
+#define __ASM_MACH_IP30_IRQ_H
+
+/*
+ * HEART has 64 hardware interrupts, but use 128 to leave room for a few
+ * software interrupts as well (such as the CPU timer interrupt.
+ */
+#define NR_IRQS				128
+
+extern void __init ip30_install_ipi(void);
+
+/*
+ * HEART has 64 interrupt vectors available to it, subdivided into five
+ * priority levels.  They are numbered 0 to 63.
+ */
+#define HEART_NUM_IRQS			64
+
+/*
+ * These are the five interrupt priority levels and their corresponding
+ * CPU IPx interrupt pins.
+ *
+ * Level 4 - Error Interrupts.
+ * Level 3 - HEART timer interrupt.
+ * Level 2 - CPU IPI, CPU debug, power putton, general device interrupts.
+ * Level 1 - General device interrupts.
+ * Level 0 - General device GFX flow control interrupts.
+ */
+#define HEART_L4_INT_MASK		0xfff8000000000000ULL	/* IP6 */
+#define HEART_L3_INT_MASK		0x0004000000000000ULL	/* IP5 */
+#define HEART_L2_INT_MASK		0x0003ffff00000000ULL	/* IP4 */
+#define HEART_L1_INT_MASK		0x00000000ffff0000ULL	/* IP3 */
+#define HEART_L0_INT_MASK		0x000000000000ffffULL	/* IP2 */
+
+/* HEART L0 Interrupts (Low Priority) */
+#define HEART_L0_INT_GENERIC		 0
+#define HEART_L0_INT_FLOW_CTRL_HWTR_0	 1
+#define HEART_L0_INT_FLOW_CTRL_HWTR_1	 2
+
+/* HEART L2 Interrupts (High Priority) */
+#define HEART_L2_INT_RESCHED_CPU_0	46
+#define HEART_L2_INT_RESCHED_CPU_1	47
+#define HEART_L2_INT_CALL_CPU_0		48
+#define HEART_L2_INT_CALL_CPU_1		49
+
+/* HEART L3 Interrupts (Compare/Counter Timer) */
+#define HEART_L3_INT_TIMER		50
+
+/* HEART L4 Interrupts (Errors) */
+#define HEART_L4_INT_XWID_ERR_9		51
+#define HEART_L4_INT_XWID_ERR_A		52
+#define HEART_L4_INT_XWID_ERR_B		53
+#define HEART_L4_INT_XWID_ERR_C		54
+#define HEART_L4_INT_XWID_ERR_D		55
+#define HEART_L4_INT_XWID_ERR_E		56
+#define HEART_L4_INT_XWID_ERR_F		57
+#define HEART_L4_INT_XWID_ERR_XBOW	58
+#define HEART_L4_INT_CPU_BUS_ERR_0	59
+#define HEART_L4_INT_CPU_BUS_ERR_1	60
+#define HEART_L4_INT_CPU_BUS_ERR_2	61
+#define HEART_L4_INT_CPU_BUS_ERR_3	62
+#define HEART_L4_INT_HEART_EXCP		63
+
+/*
+ * Power Switch is wired via BaseIO BRIDGE slot #6.
+ *
+ * ACFail is wired via BaseIO BRIDGE slot #7.
+ */
+#define IP30_POWER_IRQ		HEART_L2_INT_POWER_BTN
+
+#include_next <irq.h>
+
+#define IP30_HEART_L0_IRQ	(MIPS_CPU_IRQ_BASE + 2)
+#define IP30_HEART_L1_IRQ	(MIPS_CPU_IRQ_BASE + 3)
+#define IP30_HEART_L2_IRQ	(MIPS_CPU_IRQ_BASE + 4)
+#define IP30_HEART_TIMER_IRQ	(MIPS_CPU_IRQ_BASE + 5)
+#define IP30_HEART_ERR_IRQ	(MIPS_CPU_IRQ_BASE + 6)
+
+#endif /* __ASM_MACH_IP30_IRQ_H */
diff --git a/arch/mips/include/asm/mach-ip30/kernel-entry-init.h b/arch/mips/include/asm/mach-ip30/kernel-entry-init.h
new file mode 100644
index 000000000000..be0472c977d8
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/kernel-entry-init.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_MACH_IP30_KERNEL_ENTRY_H
+#define __ASM_MACH_IP30_KERNEL_ENTRY_H
+
+	.macro  kernel_entry_setup
+	.endm
+
+	.macro	smp_slave_setup
+	move	gp, a0
+	.endm
+
+#endif /* __ASM_MACH_IP30_KERNEL_ENTRY_H */
diff --git a/arch/mips/include/asm/mach-ip30/mangle-port.h b/arch/mips/include/asm/mach-ip30/mangle-port.h
new file mode 100644
index 000000000000..f3e1262a2d5e
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/mangle-port.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2003, 2004 Ralf Baechle
+ */
+#ifndef __ASM_MACH_IP30_MANGLE_PORT_H
+#define __ASM_MACH_IP30_MANGLE_PORT_H
+
+#define __swizzle_addr_b(port)	((port)^3)
+#define __swizzle_addr_w(port)	((port)^2)
+#define __swizzle_addr_l(port)	(port)
+#define __swizzle_addr_q(port)	(port)
+
+#define ioswabb(a, x)		(x)
+#define __mem_ioswabb(a, x)	(x)
+#define ioswabw(a, x)		(x)
+#define __mem_ioswabw(a, x)	cpu_to_le16(x)
+#define ioswabl(a, x)		(x)
+#define __mem_ioswabl(a, x)	cpu_to_le32(x)
+#define ioswabq(a, x)		(x)
+#define __mem_ioswabq(a, x)	cpu_to_le64(x)
+
+#endif /* __ASM_MACH_IP30_MANGLE_PORT_H */
diff --git a/arch/mips/include/asm/mach-ip30/spaces.h b/arch/mips/include/asm/mach-ip30/spaces.h
new file mode 100644
index 000000000000..c8a302dfbe05
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/spaces.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2016 Joshua Kinard <kumba@gentoo.org>
+ *
+ */
+#ifndef _ASM_MACH_IP30_SPACES_H
+#define _ASM_MACH_IP30_SPACES_H
+
+/*
+ * Memory in IP30/Octane is offset 512MB in the physical address space.
+ */
+#define PHYS_OFFSET	_AC(0x20000000, UL)
+
+#ifdef CONFIG_64BIT
+#define CAC_BASE	_AC(0xA800000000000000, UL)
+#endif
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* _ASM_MACH_IP30_SPACES_H */
diff --git a/arch/mips/include/asm/mach-ip30/war.h b/arch/mips/include/asm/mach-ip30/war.h
new file mode 100644
index 000000000000..a98ba204f183
--- /dev/null
+++ b/arch/mips/include/asm/mach-ip30/war.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
+ */
+#ifndef __ASM_MIPS_MACH_IP30_WAR_H
+#define __ASM_MIPS_MACH_IP30_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR	0
+#define R4600_V1_HIT_CACHEOP_WAR	0
+#define R4600_V2_HIT_CACHEOP_WAR	0
+#define MIPS_CACHE_SYNC_WAR		0
+#define BCM1250_M3_WAR			0
+#define SIBYTE_1956_WAR			0
+#define MIPS4K_ICACHE_REFILL_WAR	0
+#define MIPS34K_MISSED_ITLB_WAR		0
+#define R5432_CP0_INTERRUPT_WAR		0
+#define TX49XX_ICACHE_INDEX_INV_WAR	0
+#define ICACHE_REFILLS_WORKAROUND_WAR	0
+
+#ifdef CONFIG_CPU_R10000
+#define R10000_LLSC_WAR			1
+#else
+#define R10000_LLSC_WAR			0
+#endif
+
+#endif /* __ASM_MIPS_MACH_IP30_WAR_H */
diff --git a/arch/mips/include/asm/sgi/heart.h b/arch/mips/include/asm/sgi/heart.h
new file mode 100644
index 000000000000..c423221b4792
--- /dev/null
+++ b/arch/mips/include/asm/sgi/heart.h
@@ -0,0 +1,272 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * HEART chip definitions
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *		 2009 Johannes Dickgreber <tanzy@gmx.de>
+ *		 2007-2015 Joshua Kinard <kumba@gentoo.org>
+ */
+#ifndef __ASM_SGI_HEART_H
+#define __ASM_SGI_HEART_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+
+/*
+ * There are 8 DIMM slots on an IP30 system
+ * board, which are grouped into four banks
+ */
+#define HEART_MEMORY_BANKS	4
+
+/* HEART can support up to four CPUs */
+#define HEART_MAX_CPUS		4
+
+#define HEART_XKPHYS_BASE	((void *)(IO_BASE | 0x000000000ff00000ULL))
+
+/**
+ * struct ip30_heart_regs - struct that maps IP30 HEART registers.
+ * @mode: HEART_MODE - Purpose Unknown, machine reset called from here.
+ * @sdram_mode: HEART_SDRAM_MODE - purpose unknown.
+ * @mem_refresh: HEART_MEM_REF - purpose unknown.
+ * @mem_req_arb: HEART_MEM_REQ_ARB - purpose unknown.
+ * @mem_cfg.q: union for 64bit access to HEART_MEMCFG - 4x 64bit registers.
+ * @mem_cfg.l: union for 32bit access to HEART_MEMCFG - 8x 32bit registers.
+ * @fc_mode: HEART_FC_MODE - Purpose Unknown, possibly for GFX flow control.
+ * @fc_timer_limit: HEART_FC_TIMER_LIMIT - purpose unknown.
+ * @fc_addr: HEART_FC0_ADDR, HEART_FC1_ADDR - purpose unknown.
+ * @fc_credit_cnt: HEART_FC0_CR_CNT, HEART_FC1_CR_CNT - purpose unknown.
+ * @fc_timer: HEART_FC0_TIMER, HEART_FC1_TIMER - purpose unknown.
+ * @status: HEART_STATUS - HEART status information.
+ * @bus_err_addr: HEART_BERR_ADDR - likely contains addr of recent SIGBUS.
+ * @bus_err_misc: HEART_BERR_MISC - purpose unknown.
+ * @mem_err_addr: HEART_MEMERR_ADDR - likely contains addr of recent mem err.
+ * @mem_err_data: HEART_MEMERR_DATA - purpose unknown.
+ * @piur_acc_err: HEART_PIUR_ACC_ERR - likely for access err to HEART regs.
+ * @mlan_clock_div: HEART_MLAN_CLK_DIV - MicroLAN clock divider.
+ * @mlan_ctrl: HEART_MLAN_CTL - MicroLAN control.
+ * @__pad0: 0x0f40 bytes of padding -> next HEART register 0x01000.
+ * @undefined: Undefined/diag register, write to it triggers PIUR_ACC_ERR.
+ * @__pad1: 0xeff8 bytes of padding -> next HEART register 0x10000.
+ * @imr: HEART_IMR0 to HEART_IMR3 - per-cpu interrupt mask register.
+ * @set_isr: HEART_SET_ISR - set interrupt status register.
+ * @clear_isr: HEART_CLR_ISR - clear interrupt status register.
+ * @isr: HEART_ISR - interrupt status register (read-only).
+ * @imsr: HEART_IMSR - purpose unknown.
+ * @cause: HEART_CAUSE - HEART cause information.
+ * @__pad2: 0xffb8 bytes of padding -> next HEART register 0x20000.
+ * @count: HEART_COUNT - 52-bit counter.
+ * @__pad3: 0xfff8 bytes of padding -> next HEART register 0x30000.
+ * @compare: HEART_COMPARE - 24-bit compare.
+ * @__pad4: 0xfff8 bytes of padding -> next HEART register 0x40000.
+ * @trigger: HEART_TRIGGER - purpose unknown.
+ * @__pad5: 0xfff8 bytes of padding -> next HEART register 0x50000.
+ * @cpuid: HEART_PRID - contains CPU ID of CPU currently accessing HEART.
+ * @__pad6: 0xfff8 bytes of padding -> next HEART register 0x60000.
+ * @sync: HEART_SYNC - purpose unknown.
+ *
+ * HEART is the main system controller ASIC for IP30 system.  It incorporates
+ * a memory controller, interrupt status/cause/set/clear management, basic
+ * timer with count/compare, and other functionality.  For Linux, not all of
+ * HEART's functions are fully understood.
+ *
+ * Implementation note: All HEART registers are 64bits-wide, but the mem_cfg
+ * register only reports correct values if queried in 32bits.  Hence the need
+ * for a union.  Even though mem_cfg.l has 8 array slots, we only ever query
+ * up to 4 of those.  IP30 has 8 DIMM slots arranged into 4 banks, w/ 2 DIMMs
+ * per bank.  Each 32bit read accesses one of these banks.  Perhaps HEART was
+ * designed to address up to 8 banks (16 DIMMs)?  We may never know.
+ */
+struct ip30_heart_regs {		/* 0x0ff00000 */
+	u64 mode;			/* +  0x00000 */
+	/* Memory */
+	u64 sdram_mode;			/* +  0x00008 */
+	u64 mem_refresh;		/* +  0x00010 */
+	u64 mem_req_arb;		/* +  0x00018 */
+	union {
+		u64 q[HEART_MEMORY_BANKS];	/* readq() */
+		u32 l[HEART_MEMORY_BANKS * 2];	/* readl() */
+	} mem_cfg;			/* +  0x00020 */
+	/* Flow control (gfx?) */
+	u64 fc_mode;			/* +  0x00040 */
+	u64 fc_timer_limit;		/* +  0x00048 */
+	u64 fc_addr[2];			/* +  0x00050 */
+	u64 fc_credit_cnt[2];		/* +  0x00060 */
+	u64 fc_timer[2];		/* +  0x00070 */
+	/* Status */
+	u64 status;			/* +  0x00080 */
+	/* Bus error */
+	u64 bus_err_addr;		/* +  0x00088 */
+	u64 bus_err_misc;		/* +  0x00090 */
+	/* Memory error */
+	u64 mem_err_addr;		/* +  0x00098 */
+	u64 mem_err_data;		/* +  0x000a0 */
+	/* Misc */
+	u64 piur_acc_err;		/* +  0x000a8 */
+	u64 mlan_clock_div;		/* +  0x000b0 */
+	u64 mlan_ctrl;			/* +  0x000b8 */
+	u64 __pad0[0x01e8];		/* +  0x000c0 + 0x0f40 */
+	/* Undefined */
+	u64 undefined;			/* +  0x01000 */
+	u64 __pad1[0x1dff];		/* +  0x01008 + 0xeff8 */
+	/* Interrupts */
+	u64 imr[HEART_MAX_CPUS];	/* +  0x10000 */
+	u64 set_isr;			/* +  0x10020 */
+	u64 clear_isr;			/* +  0x10028 */
+	u64 isr;			/* +  0x10030 */
+	u64 imsr;			/* +  0x10038 */
+	u64 cause;			/* +  0x10040 */
+	u64 __pad2[0x1ff7];		/* +  0x10048 + 0xffb8 */
+	/* Timer */
+	u64 count;			/* +  0x20000 */
+	u64 __pad3[0x1fff];		/* +  0x20008 + 0xfff8 */
+	u64 compare;			/* +  0x30000 */
+	u64 __pad4[0x1fff];		/* +  0x30008 + 0xfff8 */
+	u64 trigger;			/* +  0x40000 */
+	u64 __pad5[0x1fff];		/* +  0x40008 + 0xfff8 */
+	/* Misc */
+	u64 cpuid;			/* +  0x50000 */
+	u64 __pad6[0x1fff];		/* +  0x50008 + 0xfff8 */
+	u64 sync;			/* +  0x60000 */
+};
+
+
+/* For timer-related bits. */
+#define HEART_NS_PER_CYCLE	80
+#define HEART_CYCLES_PER_SEC	(NSEC_PER_SEC / HEART_NS_PER_CYCLE)
+
+
+/*
+ * XXX: Everything below this comment will either go away or be cleaned
+ *      up to fit in better with Linux.  A lot of the bit definitions for
+ *      HEART were derived from IRIX's sys/RACER/heart.h header file.
+ */
+
+/* HEART Masks */
+#define HEART_ATK_MASK		0x0007ffffffffffff	/* HEART attack mask */
+#define HEART_ACK_ALL_MASK	0xffffffffffffffff	/* Ack everything */
+#define HEART_CLR_ALL_MASK	0x0000000000000000	/* Clear all */
+#define HEART_BR_ERR_MASK	0x7ff8000000000000	/* BRIDGE error mask */
+#define HEART_CPU0_ERR_MASK	0x8ff8000000000000	/* CPU0 error mask */
+#define HEART_CPU1_ERR_MASK	0x97f8000000000000	/* CPU1 error mask */
+#define HEART_CPU2_ERR_MASK	0xa7f8000000000000	/* CPU2 error mask */
+#define HEART_CPU3_ERR_MASK	0xc7f8000000000000	/* CPU3 error mask */
+#define HEART_ERR_MASK		0x1ff			/* HEART error mask */
+#define HEART_ERR_MASK_START	51			/* HEART error start */
+#define HEART_ERR_MASK_END	63			/* HEART error end */
+
+/* Bits in the HEART_MODE register. */
+#define HM_PROC_DISABLE_SHFT		60
+#define HM_PROC_DISABLE_MSK		(0xfUL << HM_PROC_DISABLE_SHFT)
+#define HM_PROC_DISABLE(x)		(0x1UL << (x) + HM_PROC_DISABLE_SHFT)
+#define HM_MAX_PSR			(0x7UL << 57)
+#define HM_MAX_IOSR			(0x7UL << 54)
+#define HM_MAX_PEND_IOSR		(0x7UL << 51)
+#define HM_TRIG_SRC_SEL_MSK		(0x7UL << 48)
+#define HM_TRIG_HEART_EXC		(0x0UL << 48)
+#define HM_TRIG_REG_BIT			(0x1UL << 48)
+#define HM_TRIG_SYSCLK			(0x2UL << 48)
+#define HM_TRIG_MEMCLK_2X		(0x3UL << 48)
+#define HM_TRIG_MEMCLK			(0x4UL << 48)
+#define HM_TRIG_IOCLK			(0x5UL << 48)
+#define HM_PIU_TEST_MODE		(0xfUL << 40)
+#define HM_GP_FLAG_MSK			(0xfUL << 36)
+#define HM_GP_FLAG(x)			BIT((x) + 36)
+#define HM_MAX_PROC_HYST		(0xfUL << 32)
+#define HM_LLP_WRST_AFTER_RST		BIT(28)
+#define HM_LLP_LINK_RST			BIT(27)
+#define HM_LLP_WARM_RST			BIT(26)
+#define HM_COR_ECC_LCK			BIT(25)
+#define HM_REDUCED_PWR			BIT(24)
+#define HM_COLD_RST			BIT(23)
+#define HM_SW_RST			BIT(22)
+#define HM_MEM_FORCE_WR			BIT(21)
+#define HM_DB_ERR_GEN			BIT(20)
+#define HM_SB_ERR_GEN			BIT(19)
+#define HM_CACHED_PIO_EN		BIT(18)
+#define HM_CACHED_PROM_EN		BIT(17)
+#define HM_PE_SYS_COR_ERE		BIT(16)
+#define HM_GLOBAL_ECC_EN		BIT(15)
+#define HM_IO_COH_EN			BIT(14)
+#define HM_INT_EN			BIT(13)
+#define HM_DATA_CHK_EN			BIT(12)
+#define HM_REF_EN			BIT(11)
+#define HM_BAD_SYSWR_ERE		BIT(10)
+#define HM_BAD_SYSRD_ERE		BIT(9)
+#define HM_SYSSTATE_ERE			BIT(8)
+#define HM_SYSCMD_ERE			BIT(7)
+#define HM_NCOR_SYS_ERE			BIT(6)
+#define HM_COR_SYS_ERE			BIT(5)
+#define HM_DATA_ELMNT_ERE		BIT(4)
+#define HM_MEM_ADDR_PROC_ERE		BIT(3)
+#define HM_MEM_ADDR_IO_ERE		BIT(2)
+#define HM_NCOR_MEM_ERE			BIT(1)
+#define HM_COR_MEM_ERE			BIT(0)
+
+/* Bits in the HEART_MEM_REF register. */
+#define HEART_MEMREF_REFS(x)		((0xfUL & (x)) << 16)
+#define HEART_MEMREF_PERIOD(x)		((0xffffUL & (x)))
+#define HEART_MEMREF_REFS_VAL		HEART_MEMREF_REFS(8)
+#define HEART_MEMREF_PERIOD_VAL		HEART_MEMREF_PERIOD(0x4000)
+#define HEART_MEMREF_VAL		(HEART_MEMREF_REFS_VAL | \
+					 HEART_MEMREF_PERIOD_VAL)
+
+/* Bits in the HEART_MEM_REQ_ARB register. */
+#define HEART_MEMARB_IODIS		(1  << 20)
+#define HEART_MEMARB_MAXPMWRQS		(15 << 16)
+#define HEART_MEMARB_MAXPMRRQS		(15 << 12)
+#define HEART_MEMARB_MAXPMRQS		(15 << 8)
+#define HEART_MEMARB_MAXRRRQS		(15 << 4)
+#define HEART_MEMARB_MAXGBRRQS		(15)
+
+/* Bits in the HEART_MEMCFG<x> registers. */
+#define HEART_MEMCFG_VALID		0x80000000	/* Bank is valid */
+#define HEART_MEMCFG_DENSITY		0x01c00000	/* Mem density */
+#define HEART_MEMCFG_SIZE_MASK		0x003f0000	/* Mem size mask */
+#define HEART_MEMCFG_ADDR_MASK		0x000001ff	/* Base addr mask */
+#define HEART_MEMCFG_SIZE_SHIFT		16		/* Mem size shift */
+#define HEART_MEMCFG_DENSITY_SHIFT	22		/* Density Shift */
+#define HEART_MEMCFG_UNIT_SHIFT		25		/* Unit Shift, 32MB */
+
+/* Bits in the HEART_STATUS register */
+#define HEART_STAT_HSTL_SDRV		BIT(14)
+#define HEART_STAT_FC_CR_OUT(x)		BIT((x) + 12)
+#define HEART_STAT_DIR_CNNCT		BIT(11)
+#define HEART_STAT_TRITON		BIT(10)
+#define HEART_STAT_R4K			BIT(9)
+#define HEART_STAT_BIG_ENDIAN		BIT(8)
+#define HEART_STAT_PROC_SHFT		4
+#define HEART_STAT_PROC_MSK		(0xfUL << HEART_STAT_PROC_SHFT)
+#define HEART_STAT_PROC_ACTIVE(x)	(0x1UL << ((x) + HEART_STAT_PROC_SHFT))
+#define HEART_STAT_WIDGET_ID		0xf
+
+/* Bits in the HEART_CAUSE register */
+#define HC_PE_SYS_COR_ERR_MSK		(0xfUL << 60)
+#define HC_PE_SYS_COR_ERR(x)		BIT((x) + 60)
+#define HC_PIOWDB_OFLOW			BIT(44)
+#define HC_PIORWRB_OFLOW		BIT(43)
+#define HC_PIUR_ACC_ERR			BIT(42)
+#define HC_BAD_SYSWR_ERR		BIT(41)
+#define HC_BAD_SYSRD_ERR		BIT(40)
+#define HC_SYSSTATE_ERR_MSK		(0xfUL << 36)
+#define HC_SYSSTATE_ERR(x)		BIT((x) + 36)
+#define HC_SYSCMD_ERR_MSK		(0xfUL << 32)
+#define HC_SYSCMD_ERR(x)		BIT((x) + 32)
+#define HC_NCOR_SYSAD_ERR_MSK		(0xfUL << 28)
+#define HC_NCOR_SYSAD_ERR(x)		BIT((x) + 28)
+#define HC_COR_SYSAD_ERR_MSK		(0xfUL << 24)
+#define HC_COR_SYSAD_ERR(x)		BIT((x) + 24)
+#define HC_DATA_ELMNT_ERR_MSK		(0xfUL << 20)
+#define HC_DATA_ELMNT_ERR(x)		BIT((x) + 20)
+#define HC_WIDGET_ERR			BIT(16)
+#define HC_MEM_ADDR_ERR_PROC_MSK	(0xfUL << 4)
+#define HC_MEM_ADDR_ERR_PROC(x)	BIT((x) + 4)
+#define HC_MEM_ADDR_ERR_IO		BIT(2)
+#define HC_NCOR_MEM_ERR			BIT(1)
+#define HC_COR_MEM_ERR			BIT(0)
+
+extern struct ip30_heart_regs __iomem *heart_regs;
+
+#define heart_read	____raw_readq
+#define heart_write	____raw_writeq
+
+#endif /* __ASM_SGI_HEART_H */
diff --git a/arch/mips/sgi-ip30/Makefile b/arch/mips/sgi-ip30/Makefile
new file mode 100644
index 000000000000..18cf561b3d61
--- /dev/null
+++ b/arch/mips/sgi-ip30/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the IP30 specific kernel interface routines under Linux.
+#
+
+obj-y	:= ip30-irq.o ip30-power.o ip30-setup.o ip30-timer.o ip30-xtalk.o
+
+obj-$(CONFIG_EARLY_PRINTK)	+= ip30-console.o
+obj-$(CONFIG_SMP)		+= ip30-smp.o
diff --git a/arch/mips/sgi-ip30/Platform b/arch/mips/sgi-ip30/Platform
new file mode 100644
index 000000000000..2b5695c2049a
--- /dev/null
+++ b/arch/mips/sgi-ip30/Platform
@@ -0,0 +1,8 @@
+#
+# SGI-IP30 (Octane/Octane2)
+#
+ifdef CONFIG_SGI_IP30
+platform-$(CONFIG_SGI_IP30)		+= sgi-ip30/
+cflags-$(CONFIG_SGI_IP30)		+= -I$(srctree)/arch/mips/include/asm/mach-ip30
+load-$(CONFIG_SGI_IP30)			+= 0xa800000020004000
+endif
diff --git a/arch/mips/sgi-ip30/ip30-common.h b/arch/mips/sgi-ip30/ip30-common.h
new file mode 100644
index 000000000000..d2bcaee712f3
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-common.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __IP30_COMMON_H
+#define __IP30_COMMON_H
+
+extern struct plat_smp_ops ip30_smp_ops;
+extern void __init ip30_per_cpu_init(void);
+
+#endif /* __IP30_COMMON_H */
diff --git a/arch/mips/sgi-ip30/ip30-console.c b/arch/mips/sgi-ip30/ip30-console.c
new file mode 100644
index 000000000000..b91f8c4fdc78
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-console.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/io.h>
+
+#include <asm/sn/ioc3.h>
+
+static inline struct ioc3_uartregs *console_uart(void)
+{
+	struct ioc3 *ioc3;
+
+	ioc3 = (struct ioc3 *)((void *)(0x900000001f600000));
+	return &ioc3->sregs.uarta;
+}
+
+void prom_putchar(char c)
+{
+	struct ioc3_uartregs *uart = console_uart();
+
+	while ((readb(&uart->iu_lsr) & 0x20) == 0)
+		cpu_relax();
+
+	writeb(c, &uart->iu_thr);
+}
diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c
new file mode 100644
index 000000000000..d46655b914f1
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-irq.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-irq.c: Highlevel interrupt handling for IP30 architecture.
+ */
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/tick.h>
+#include <linux/types.h>
+
+#include <asm/irq_cpu.h>
+#include <asm/sgi/heart.h>
+
+struct heart_irq_data {
+	u64	*irq_mask;
+	int	cpu;
+};
+
+static DECLARE_BITMAP(heart_irq_map, HEART_NUM_IRQS);
+
+static DEFINE_PER_CPU(unsigned long, irq_enable_mask);
+
+static inline int heart_alloc_int(void)
+{
+	int bit;
+
+again:
+	bit = find_first_zero_bit(heart_irq_map, HEART_NUM_IRQS);
+	if (bit >= HEART_NUM_IRQS)
+		return -ENOSPC;
+
+	if (test_and_set_bit(bit, heart_irq_map))
+		goto again;
+
+	return bit;
+}
+
+static void ip30_error_irq(struct irq_desc *desc)
+{
+	u64 pending, mask, cause, error_irqs, err_reg;
+	int cpu = smp_processor_id();
+	int i;
+
+	pending = heart_read(&heart_regs->isr);
+	mask = heart_read(&heart_regs->imr[cpu]);
+	cause = heart_read(&heart_regs->cause);
+	error_irqs = (pending & HEART_L4_INT_MASK & mask);
+
+	/* Bail if there's nothing to process (how did we get here, then?) */
+	if (unlikely(!error_irqs))
+		return;
+
+	/* Prevent any of the error IRQs from firing again. */
+	heart_write(mask & ~(pending), &heart_regs->imr[cpu]);
+
+	/* Ack all error IRQs. */
+	heart_write(HEART_L4_INT_MASK, &heart_regs->clear_isr);
+
+	/*
+	 * If we also have a cause value, then something happened, so loop
+	 * through the error IRQs and report a "heart attack" for each one
+	 * and print the value of the HEART cause register.  This is really
+	 * primitive right now, but it should hopefully work until a more
+	 * robust error handling routine can be put together.
+	 *
+	 * Refer to heart.h for the HC_* macros to work out the cause
+	 * that got us here.
+	 */
+	if (cause) {
+		pr_alert("IP30: CPU%d: HEART ATTACK! ISR = 0x%.16llx, IMR = 0x%.16llx, CAUSE = 0x%.16llx\n",
+			 cpu, pending, mask, cause);
+
+		if (cause & HC_COR_MEM_ERR) {
+			err_reg = heart_read(&heart_regs->mem_err_addr);
+			pr_alert("  HEART_MEMERR_ADDR = 0x%.16llx\n", err_reg);
+		}
+
+		/* i = 63; i >= 51; i-- */
+		for (i = HEART_ERR_MASK_END; i >= HEART_ERR_MASK_START; i--)
+			if ((pending >> i) & 1)
+				pr_alert("  HEART Error IRQ #%d\n", i);
+
+		/* XXX: Seems possible to loop forever here, so panic(). */
+		panic("IP30: Fatal Error !\n");
+	}
+
+	/* Unmask the error IRQs. */
+	heart_write(mask, &heart_regs->imr[cpu]);
+}
+
+static void ip30_normal_irq(struct irq_desc *desc)
+{
+	int cpu = smp_processor_id();
+	struct irq_domain *domain;
+	u64 pend, mask;
+	int irq;
+
+	pend = heart_read(&heart_regs->isr);
+	mask = (heart_read(&heart_regs->imr[cpu]) &
+		(HEART_L0_INT_MASK | HEART_L1_INT_MASK | HEART_L2_INT_MASK));
+
+	pend &= mask;
+	if (unlikely(!pend))
+		return;
+
+#ifdef CONFIG_SMP
+	if (pend & BIT_ULL(HEART_L2_INT_RESCHED_CPU_0)) {
+		heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_0),
+			    &heart_regs->clear_isr);
+		scheduler_ipi();
+	} else if (pend & BIT_ULL(HEART_L2_INT_RESCHED_CPU_1)) {
+		heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_1),
+			    &heart_regs->clear_isr);
+		scheduler_ipi();
+	} else if (pend & BIT_ULL(HEART_L2_INT_CALL_CPU_0)) {
+		heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_0),
+			    &heart_regs->clear_isr);
+		generic_smp_call_function_interrupt();
+	} else if (pend & BIT_ULL(HEART_L2_INT_CALL_CPU_1)) {
+		heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_1),
+			    &heart_regs->clear_isr);
+		generic_smp_call_function_interrupt();
+	} else
+#endif
+	{
+		domain = irq_desc_get_handler_data(desc);
+		irq = irq_linear_revmap(domain, __ffs(pend));
+		if (irq)
+			generic_handle_irq(irq);
+		else
+			spurious_interrupt();
+	}
+}
+
+static void ip30_ack_heart_irq(struct irq_data *d)
+{
+	heart_write(BIT_ULL(d->hwirq), &heart_regs->clear_isr);
+}
+
+static void ip30_mask_heart_irq(struct irq_data *d)
+{
+	struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+	unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu);
+
+	clear_bit(d->hwirq, mask);
+	heart_write(*mask, &heart_regs->imr[hd->cpu]);
+}
+
+static void ip30_mask_and_ack_heart_irq(struct irq_data *d)
+{
+	struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+	unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu);
+
+	clear_bit(d->hwirq, mask);
+	heart_write(*mask, &heart_regs->imr[hd->cpu]);
+	heart_write(BIT_ULL(d->hwirq), &heart_regs->clear_isr);
+}
+
+static void ip30_unmask_heart_irq(struct irq_data *d)
+{
+	struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+	unsigned long *mask = &per_cpu(irq_enable_mask, hd->cpu);
+
+	set_bit(d->hwirq, mask);
+	heart_write(*mask, &heart_regs->imr[hd->cpu]);
+}
+
+static int ip30_set_heart_irq_affinity(struct irq_data *d,
+				       const struct cpumask *mask, bool force)
+{
+	struct heart_irq_data *hd = irq_data_get_irq_chip_data(d);
+
+	if (!hd)
+		return -EINVAL;
+
+	if (irqd_is_started(d))
+		ip30_mask_and_ack_heart_irq(d);
+
+	hd->cpu = cpumask_first_and(mask, cpu_online_mask);
+
+	if (irqd_is_started(d))
+		ip30_unmask_heart_irq(d);
+
+	irq_data_update_effective_affinity(d, cpumask_of(hd->cpu));
+
+	return 0;
+}
+
+static struct irq_chip heart_irq_chip = {
+	.name			= "HEART",
+	.irq_ack		= ip30_ack_heart_irq,
+	.irq_mask		= ip30_mask_heart_irq,
+	.irq_mask_ack		= ip30_mask_and_ack_heart_irq,
+	.irq_unmask		= ip30_unmask_heart_irq,
+	.irq_set_affinity	= ip30_set_heart_irq_affinity,
+};
+
+static int heart_domain_alloc(struct irq_domain *domain, unsigned int virq,
+			      unsigned int nr_irqs, void *arg)
+{
+	struct irq_alloc_info *info = arg;
+	struct heart_irq_data *hd;
+	int hwirq;
+
+	if (nr_irqs > 1 || !info)
+		return -EINVAL;
+
+	hd = kzalloc(sizeof(*hd), GFP_KERNEL);
+	if (!hd)
+		return -ENOMEM;
+
+	hwirq = heart_alloc_int();
+	if (hwirq < 0) {
+		kfree(hd);
+		return -EAGAIN;
+	}
+	irq_domain_set_info(domain, virq, hwirq, &heart_irq_chip, hd,
+			    handle_level_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void heart_domain_free(struct irq_domain *domain,
+			      unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *irqd;
+
+	if (nr_irqs > 1)
+		return;
+
+	irqd = irq_domain_get_irq_data(domain, virq);
+	clear_bit(irqd->hwirq, heart_irq_map);
+	if (irqd && irqd->chip_data)
+		kfree(irqd->chip_data);
+}
+
+static const struct irq_domain_ops heart_domain_ops = {
+	.alloc = heart_domain_alloc,
+	.free  = heart_domain_free,
+};
+
+void __init ip30_install_ipi(void)
+{
+	int cpu = smp_processor_id();
+	unsigned long *mask = &per_cpu(irq_enable_mask, cpu);
+
+	set_bit(HEART_L2_INT_RESCHED_CPU_0 + cpu, mask);
+	heart_write(BIT_ULL(HEART_L2_INT_RESCHED_CPU_0 + cpu),
+		    &heart_regs->clear_isr);
+	set_bit(HEART_L2_INT_CALL_CPU_0 + cpu, mask);
+	heart_write(BIT_ULL(HEART_L2_INT_CALL_CPU_0 + cpu),
+		    &heart_regs->clear_isr);
+
+	heart_write(*mask, &heart_regs->imr[cpu]);
+}
+
+void __init arch_init_irq(void)
+{
+	struct irq_domain *domain;
+	struct fwnode_handle *fn;
+	unsigned long *mask;
+	int i;
+
+	mips_cpu_irq_init();
+
+	/* Mask all IRQs. */
+	heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[0]);
+	heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[1]);
+	heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[2]);
+	heart_write(HEART_CLR_ALL_MASK, &heart_regs->imr[3]);
+
+	/* Ack everything. */
+	heart_write(HEART_ACK_ALL_MASK, &heart_regs->clear_isr);
+
+	/* Enable specific HEART error IRQs for each CPU. */
+	mask = &per_cpu(irq_enable_mask, 0);
+	*mask |= HEART_CPU0_ERR_MASK;
+	heart_write(*mask, &heart_regs->imr[0]);
+	mask = &per_cpu(irq_enable_mask, 1);
+	*mask |= HEART_CPU1_ERR_MASK;
+	heart_write(*mask, &heart_regs->imr[1]);
+
+	/*
+	 * Some HEART bits are reserved by hardware or by software convention.
+	 * Mark these as reserved right away so they won't be accidentally
+	 * used later.
+	 */
+	set_bit(HEART_L0_INT_GENERIC, heart_irq_map);
+	set_bit(HEART_L0_INT_FLOW_CTRL_HWTR_0, heart_irq_map);
+	set_bit(HEART_L0_INT_FLOW_CTRL_HWTR_1, heart_irq_map);
+	set_bit(HEART_L2_INT_RESCHED_CPU_0, heart_irq_map);
+	set_bit(HEART_L2_INT_RESCHED_CPU_1, heart_irq_map);
+	set_bit(HEART_L2_INT_CALL_CPU_0, heart_irq_map);
+	set_bit(HEART_L2_INT_CALL_CPU_1, heart_irq_map);
+	set_bit(HEART_L3_INT_TIMER, heart_irq_map);
+
+	/* Reserve the error interrupts (#51 to #63). */
+	for (i = HEART_L4_INT_XWID_ERR_9; i <= HEART_L4_INT_HEART_EXCP; i++)
+		set_bit(i, heart_irq_map);
+
+	fn = irq_domain_alloc_named_fwnode("HEART");
+	WARN_ON(fn == NULL);
+	if (!fn)
+		return;
+	domain = irq_domain_create_linear(fn, HEART_NUM_IRQS,
+					  &heart_domain_ops, NULL);
+	WARN_ON(domain == NULL);
+	if (!domain)
+		return;
+
+	irq_set_default_host(domain);
+
+	irq_set_percpu_devid(IP30_HEART_L0_IRQ);
+	irq_set_chained_handler_and_data(IP30_HEART_L0_IRQ, ip30_normal_irq,
+					 domain);
+	irq_set_percpu_devid(IP30_HEART_L1_IRQ);
+	irq_set_chained_handler_and_data(IP30_HEART_L1_IRQ, ip30_normal_irq,
+					 domain);
+	irq_set_percpu_devid(IP30_HEART_L2_IRQ);
+	irq_set_chained_handler_and_data(IP30_HEART_L2_IRQ, ip30_normal_irq,
+					 domain);
+	irq_set_percpu_devid(IP30_HEART_ERR_IRQ);
+	irq_set_chained_handler_and_data(IP30_HEART_ERR_IRQ, ip30_error_irq,
+					 domain);
+}
diff --git a/arch/mips/sgi-ip30/ip30-power.c b/arch/mips/sgi-ip30/ip30-power.c
new file mode 100644
index 000000000000..120b3f3d5108
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-power.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-power.c: Software powerdown and reset handling for IP30 architecture.
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *               2014 Joshua Kinard <kumba@gentoo.org>
+ *               2009 Johannes Dickgreber <tanzy@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/delay.h>
+#include <linux/rtc/ds1685.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+
+#include <asm/reboot.h>
+#include <asm/sgi/heart.h>
+
+static void __noreturn ip30_machine_restart(char *cmd)
+{
+	/*
+	 * Execute HEART cold reset
+	 *   Yes, it's cold-HEARTed!
+	 */
+	heart_write((heart_read(&heart_regs->mode) | HM_COLD_RST),
+		    &heart_regs->mode);
+	unreachable();
+}
+
+static int __init ip30_reboot_setup(void)
+{
+	_machine_restart = ip30_machine_restart;
+
+	return 0;
+}
+
+subsys_initcall(ip30_reboot_setup);
diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c
new file mode 100644
index 000000000000..44b1607e964d
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-setup.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SGI IP30 miscellaneous setup bits.
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *               2007 Joshua Kinard <kumba@gentoo.org>
+ *               2009 Johannes Dickgreber <tanzy@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/percpu.h>
+#include <linux/memblock.h>
+
+#include <asm/smp-ops.h>
+#include <asm/sgialib.h>
+#include <asm/time.h>
+#include <asm/sgi/heart.h>
+
+#include "ip30-common.h"
+
+/* Structure of accessible HEART registers located in XKPHYS space. */
+struct ip30_heart_regs __iomem *heart_regs = HEART_XKPHYS_BASE;
+
+/*
+ * ARCS will report up to the first 1GB of
+ * memory if queried.  Anything beyond that
+ * is marked as reserved.
+ */
+#define IP30_MAX_PROM_MEMORY	_AC(0x40000000, UL)
+
+/*
+ * Memory in the Octane starts at 512MB
+ */
+#define IP30_MEMORY_BASE	_AC(0x20000000, UL)
+
+/*
+ * If using ARCS to probe for memory, then
+ * remaining memory will start at this offset.
+ */
+#define IP30_REAL_MEMORY_START  (IP30_MEMORY_BASE + IP30_MAX_PROM_MEMORY)
+
+#define MEM_SHIFT(x) ((x) >> 20)
+
+static void __init ip30_mem_init(void)
+{
+	unsigned long total_mem;
+	phys_addr_t addr;
+	phys_addr_t size;
+	u32 memcfg;
+	int i;
+
+	total_mem = 0;
+	for (i = 0; i < HEART_MEMORY_BANKS; i++) {
+		memcfg = __raw_readl(&heart_regs->mem_cfg.l[i]);
+		if (!(memcfg & HEART_MEMCFG_VALID))
+			continue;
+
+		addr = memcfg & HEART_MEMCFG_ADDR_MASK;
+		addr <<= HEART_MEMCFG_UNIT_SHIFT;
+		addr += IP30_MEMORY_BASE;
+		size = memcfg & HEART_MEMCFG_SIZE_MASK;
+		size >>= HEART_MEMCFG_SIZE_SHIFT;
+		size += 1;
+		size <<= HEART_MEMCFG_UNIT_SHIFT;
+
+		total_mem += size;
+
+		if (addr >= IP30_REAL_MEMORY_START)
+			memblock_free(addr, size);
+		else if ((addr + size) > IP30_REAL_MEMORY_START)
+			memblock_free(IP30_REAL_MEMORY_START,
+				     size - IP30_MAX_PROM_MEMORY);
+	}
+	pr_info("Detected %luMB of physical memory.\n", MEM_SHIFT(total_mem));
+}
+
+/**
+ * ip30_cpu_time_init - platform time initialization.
+ */
+static void __init ip30_cpu_time_init(void)
+{
+	int cpu = smp_processor_id();
+	u64 heart_compare;
+	unsigned int start, end;
+	int time_diff;
+
+	heart_compare = (heart_read(&heart_regs->count) +
+			 (HEART_CYCLES_PER_SEC / 10));
+	start = read_c0_count();
+	while ((heart_read(&heart_regs->count) - heart_compare) & 0x800000)
+		cpu_relax();
+
+	end = read_c0_count();
+	time_diff = (int)end - (int)start;
+	mips_hpt_frequency = time_diff * 10;
+	pr_info("IP30: CPU%d: %d MHz CPU detected.\n", cpu,
+		(mips_hpt_frequency * 2) / 1000000);
+}
+
+void __init ip30_per_cpu_init(void)
+{
+	/* Disable all interrupts. */
+	clear_c0_status(ST0_IM);
+
+	ip30_cpu_time_init();
+#ifdef CONFIG_SMP
+	ip30_install_ipi();
+#endif
+
+	enable_percpu_irq(IP30_HEART_L0_IRQ, IRQ_TYPE_NONE);
+	enable_percpu_irq(IP30_HEART_L1_IRQ, IRQ_TYPE_NONE);
+	enable_percpu_irq(IP30_HEART_L2_IRQ, IRQ_TYPE_NONE);
+	enable_percpu_irq(IP30_HEART_ERR_IRQ, IRQ_TYPE_NONE);
+}
+
+/**
+ * plat_mem_setup - despite the name, misc setup happens here.
+ */
+void __init plat_mem_setup(void)
+{
+	ip30_mem_init();
+
+	/* XXX: Hard lock on /sbin/init if this flag isn't specified. */
+	prom_flags |= PROM_FLAG_DONT_FREE_TEMP;
+
+#ifdef CONFIG_SMP
+	register_smp_ops(&ip30_smp_ops);
+#else
+	ip30_per_cpu_init();
+#endif
+
+	ioport_resource.start = 0;
+	ioport_resource.end = ~0UL;
+	set_io_port_base(IO_BASE);
+}
diff --git a/arch/mips/sgi-ip30/ip30-smp.c b/arch/mips/sgi-ip30/ip30-smp.c
new file mode 100644
index 000000000000..4bfe654602b1
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-smp.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-smp.c: SMP on IP30 architecture.
+ * Based off of the original IP30 SMP code, with inspiration from ip27-smp.c
+ * and smp-bmips.c.
+ *
+ * Copyright (C) 2005-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *               2006-2007, 2014-2015 Joshua Kinard <kumba@gentoo.org>
+ *               2009 Johannes Dickgreber <tanzy@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/time.h>
+#include <asm/sgi/heart.h>
+
+#include "ip30-common.h"
+
+#define MPCONF_MAGIC	0xbaddeed2
+#define	MPCONF_ADDR	0xa800000000000600L
+#define MPCONF_SIZE	0x80
+#define MPCONF(x)	(MPCONF_ADDR + (x) * MPCONF_SIZE)
+
+/* HEART can theoretically do 4 CPUs, but only 2 are physically possible */
+#define MP_NCPU		2
+
+struct mpconf {
+	u32 magic;
+	u32 prid;
+	u32 physid;
+	u32 virtid;
+	u32 scachesz;
+	u16 fanloads;
+	u16 res;
+	void *launch;
+	void *rendezvous;
+	u64 res2[3];
+	void *stackaddr;
+	void *lnch_parm;
+	void *rndv_parm;
+	u32 idleflag;
+};
+
+static void ip30_smp_send_ipi_single(int cpu, u32 action)
+{
+	int irq;
+
+	switch (action) {
+	case SMP_RESCHEDULE_YOURSELF:
+		irq = HEART_L2_INT_RESCHED_CPU_0;
+		break;
+	case SMP_CALL_FUNCTION:
+		irq = HEART_L2_INT_CALL_CPU_0;
+		break;
+	default:
+		panic("IP30: Unknown action value in %s!\n", __func__);
+	}
+
+	irq += cpu;
+
+	/* Poke the other CPU -- it's got mail! */
+	heart_write(BIT_ULL(irq), &heart_regs->set_isr);
+}
+
+static void ip30_smp_send_ipi_mask(const struct cpumask *mask, u32 action)
+{
+	u32 i;
+
+	for_each_cpu(i, mask)
+		ip30_smp_send_ipi_single(i, action);
+}
+
+static void __init ip30_smp_setup(void)
+{
+	int i;
+	int ncpu = 0;
+	struct mpconf *mpc;
+
+	init_cpu_possible(cpumask_of(0));
+
+	/* Scan the MPCONF structure and enumerate available CPUs. */
+	for (i = 0; i < MP_NCPU; i++) {
+		mpc = (struct mpconf *)MPCONF(i);
+		if (mpc->magic == MPCONF_MAGIC) {
+			set_cpu_possible(i, true);
+			__cpu_number_map[i] = ++ncpu;
+			__cpu_logical_map[ncpu] = i;
+			pr_info("IP30: Slot: %d, PrID: %.8x, PhyID: %d, VirtID: %d\n",
+				i, mpc->prid, mpc->physid, mpc->virtid);
+		}
+	}
+	pr_info("IP30: Detected %d CPU(s) present.\n", ncpu);
+
+	/*
+	 * Set the coherency algorithm to '5' (cacheable coherent
+	 * exclusive on write).  This is needed on IP30 SMP, especially
+	 * for R14000 CPUs, otherwise, instruction bus errors will
+	 * occur upon reaching userland.
+	 */
+	change_c0_config(CONF_CM_CMASK, CONF_CM_CACHABLE_COW);
+}
+
+static void __init ip30_smp_prepare_cpus(unsigned int max_cpus)
+{
+	/* nothing to do here */
+}
+
+static int __init ip30_smp_boot_secondary(int cpu, struct task_struct *idle)
+{
+	struct mpconf *mpc = (struct mpconf *)MPCONF(cpu);
+
+	/* Stack pointer (sp). */
+	mpc->stackaddr = (void *)__KSTK_TOS(idle);
+
+	/* Global pointer (gp). */
+	mpc->lnch_parm = task_thread_info(idle);
+
+	mb(); /* make sure stack and lparm are written */
+
+	/* Boot CPUx. */
+	mpc->launch = smp_bootstrap;
+
+	/* CPUx now executes smp_bootstrap, then ip30_smp_finish */
+	return 0;
+}
+
+static void __init ip30_smp_init_cpu(void)
+{
+	ip30_per_cpu_init();
+}
+
+static void __init ip30_smp_finish(void)
+{
+	enable_percpu_irq(get_c0_compare_int(), IRQ_TYPE_NONE);
+	local_irq_enable();
+}
+
+struct plat_smp_ops __read_mostly ip30_smp_ops = {
+	.send_ipi_single	= ip30_smp_send_ipi_single,
+	.send_ipi_mask		= ip30_smp_send_ipi_mask,
+	.smp_setup		= ip30_smp_setup,
+	.prepare_cpus		= ip30_smp_prepare_cpus,
+	.boot_secondary		= ip30_smp_boot_secondary,
+	.init_secondary		= ip30_smp_init_cpu,
+	.smp_finish		= ip30_smp_finish,
+	.prepare_boot_cpu	= ip30_smp_init_cpu,
+};
diff --git a/arch/mips/sgi-ip30/ip30-timer.c b/arch/mips/sgi-ip30/ip30-timer.c
new file mode 100644
index 000000000000..d13e105478ae
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-timer.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-timer.c: Clocksource/clockevent support for the
+ *               HEART chip in SGI Octane (IP30) systems.
+ *
+ * Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ * Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
+ * Copyright (C) 2011 Joshua Kinard <kumba@gentoo.org>
+ */
+
+#include <linux/clocksource.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/sched_clock.h>
+
+#include <asm/time.h>
+#include <asm/cevt-r4k.h>
+#include <asm/sgi/heart.h>
+
+static u64 ip30_heart_counter_read(struct clocksource *cs)
+{
+	return heart_read(&heart_regs->count);
+}
+
+struct clocksource ip30_heart_clocksource = {
+	.name	= "HEART",
+	.rating	= 400,
+	.read	= ip30_heart_counter_read,
+	.mask	= CLOCKSOURCE_MASK(52),
+	.flags	= (CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_VALID_FOR_HRES),
+};
+
+static u64 notrace ip30_heart_read_sched_clock(void)
+{
+	return heart_read(&heart_regs->count);
+}
+
+static void __init ip30_heart_clocksource_init(void)
+{
+	struct clocksource *cs = &ip30_heart_clocksource;
+
+	clocksource_register_hz(cs, HEART_CYCLES_PER_SEC);
+
+	sched_clock_register(ip30_heart_read_sched_clock, 52,
+			     HEART_CYCLES_PER_SEC);
+}
+
+void __init plat_time_init(void)
+{
+	int irq = get_c0_compare_int();
+
+	cp0_timer_irq_installed = 1;
+	c0_compare_irqaction.percpu_dev_id = &mips_clockevent_device;
+	c0_compare_irqaction.flags &= ~IRQF_SHARED;
+	irq_set_handler(irq, handle_percpu_devid_irq);
+	irq_set_percpu_devid(irq);
+	setup_percpu_irq(irq, &c0_compare_irqaction);
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+
+	ip30_heart_clocksource_init();
+}
diff --git a/arch/mips/sgi-ip30/ip30-xtalk.c b/arch/mips/sgi-ip30/ip30-xtalk.c
new file mode 100644
index 000000000000..8a2894645529
--- /dev/null
+++ b/arch/mips/sgi-ip30/ip30-xtalk.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ip30-xtalk.c - Very basic Crosstalk (XIO) detection support.
+ *   Copyright (C) 2004-2007 Stanislaw Skowronek <skylark@unaligned.org>
+ *   Copyright (C) 2009 Johannes Dickgreber <tanzy@gmx.de>
+ *   Copyright (C) 2007, 2014-2016 Joshua Kinard <kumba@gentoo.org>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/sgi-w1.h>
+#include <linux/platform_data/xtalk-bridge.h>
+
+#include <asm/xtalk/xwidget.h>
+#include <asm/pci/bridge.h>
+
+#define IP30_SWIN_BASE(widget) \
+		(0x0000000010000000 | (((unsigned long)(widget)) << 24))
+
+#define IP30_RAW_SWIN_BASE(widget)	(IO_BASE + IP30_SWIN_BASE(widget))
+
+#define IP30_SWIN_SIZE		(1 << 24)
+
+#define IP30_WIDGET_XBOW        _AC(0x0, UL)    /* XBow is always 0 */
+#define IP30_WIDGET_HEART       _AC(0x8, UL)    /* HEART is always 8 */
+#define IP30_WIDGET_PCI_BASE    _AC(0xf, UL)    /* BaseIO PCI is always 15 */
+
+#define XTALK_NODEV             0xffffffff
+
+#define XBOW_REG_LINK_STAT_0    0x114
+#define XBOW_REG_LINK_BLK_SIZE  0x40
+#define XBOW_REG_LINK_ALIVE     0x80000000
+
+#define HEART_INTR_ADDR		0x00000080
+
+#define xtalk_read	__raw_readl
+
+static void bridge_platform_create(int widget, int masterwid)
+{
+	struct xtalk_bridge_platform_data *bd;
+	struct sgi_w1_platform_data *wd;
+	struct platform_device *pdev;
+	struct resource w1_res;
+
+	wd = kzalloc(sizeof(*wd), GFP_KERNEL);
+	if (!wd)
+		goto no_mem;
+
+	snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx",
+		 IP30_SWIN_BASE(widget));
+
+	memset(&w1_res, 0, sizeof(w1_res));
+	w1_res.start = IP30_SWIN_BASE(widget) +
+				offsetof(struct bridge_regs, b_nic);
+	w1_res.end = w1_res.start + 3;
+	w1_res.flags = IORESOURCE_MEM;
+
+	pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO);
+	if (!pdev) {
+		kfree(wd);
+		goto no_mem;
+	}
+	platform_device_add_resources(pdev, &w1_res, 1);
+	platform_device_add_data(pdev, wd, sizeof(*wd));
+	platform_device_add(pdev);
+
+	bd = kzalloc(sizeof(*bd), GFP_KERNEL);
+	if (!bd)
+		goto no_mem;
+	pdev = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO);
+	if (!pdev) {
+		kfree(bd);
+		goto no_mem;
+	}
+
+	bd->bridge_addr	= IP30_RAW_SWIN_BASE(widget);
+	bd->intr_addr	= HEART_INTR_ADDR;
+	bd->nasid	= 0;
+	bd->masterwid	= masterwid;
+
+	bd->mem.name	= "Bridge PCI MEM";
+	bd->mem.start	= IP30_SWIN_BASE(widget) + BRIDGE_DEVIO0;
+	bd->mem.end	= IP30_SWIN_BASE(widget) + IP30_SWIN_SIZE - 1;
+	bd->mem.flags	= IORESOURCE_MEM;
+	bd->mem_offset	= IP30_SWIN_BASE(widget);
+
+	bd->io.name	= "Bridge PCI IO";
+	bd->io.start	= IP30_SWIN_BASE(widget) + BRIDGE_DEVIO0;
+	bd->io.end	= IP30_SWIN_BASE(widget) + IP30_SWIN_SIZE - 1;
+	bd->io.flags	= IORESOURCE_IO;
+	bd->io_offset	= IP30_SWIN_BASE(widget);
+
+	platform_device_add_data(pdev, bd, sizeof(*bd));
+	platform_device_add(pdev);
+	pr_info("xtalk:%x bridge widget\n", widget);
+	return;
+
+no_mem:
+	pr_warn("xtalk:%x bridge create out of memory\n", widget);
+}
+
+static unsigned int __init xbow_widget_active(s8 wid)
+{
+	unsigned int link_stat;
+
+	link_stat = xtalk_read((void *)(IP30_RAW_SWIN_BASE(IP30_WIDGET_XBOW) +
+					XBOW_REG_LINK_STAT_0 +
+					XBOW_REG_LINK_BLK_SIZE *
+					(wid - 8)));
+
+	return (link_stat & XBOW_REG_LINK_ALIVE) ? 1 : 0;
+}
+
+static void __init xtalk_init_widget(s8 wid, s8 masterwid)
+{
+	xwidget_part_num_t partnum;
+	widgetreg_t widget_id;
+
+	if (!xbow_widget_active(wid))
+		return;
+
+	widget_id = xtalk_read((void *)(IP30_RAW_SWIN_BASE(wid) + WIDGET_ID));
+
+	partnum = XWIDGET_PART_NUM(widget_id);
+
+	switch (partnum) {
+	case BRIDGE_WIDGET_PART_NUM:
+	case XBRIDGE_WIDGET_PART_NUM:
+		bridge_platform_create(wid, masterwid);
+		break;
+	default:
+		pr_info("xtalk:%x unknown widget (0x%x)\n", wid, partnum);
+		break;
+	}
+}
+
+static int __init ip30_xtalk_init(void)
+{
+	int i;
+
+	/*
+	 * Walk widget IDs backwards so that BaseIO is probed first.  This
+	 * ensures that the BaseIO IOC3 is always detected as eth0.
+	 */
+	for (i = IP30_WIDGET_PCI_BASE; i > IP30_WIDGET_HEART; i--)
+		xtalk_init_widget(i, IP30_WIDGET_HEART);
+
+	return 0;
+}
+
+arch_initcall(ip30_xtalk_init);
-- 
cgit v1.2.3


From 53fafdbb8b21fa99dfd8376ca056bffde8cafc11 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 28 Oct 2019 12:36:22 -0200
Subject: KVM: x86: switch KVMCLOCK base to monotonic raw clock

Commit 0bc48bea36d1 ("KVM: x86: update master clock before computing
kvmclock_offset")
switches the order of operations to avoid the conversion

TSC (without frequency correction) ->
system_timestamp (with frequency correction),

which might cause a time jump.

However, it leaves any other masterclock update unsafe, which includes,
at the moment:

        * HV_X64_MSR_REFERENCE_TSC MSR write.
        * TSC writes.
        * Host suspend/resume.

Avoid the time jump issue by using frequency uncorrected
CLOCK_MONOTONIC_RAW clock.

Its the guests time keeping software responsability
to track and correct a reference clock such as UTC.

This fixes forward time jump (which can result in
failure to bring up a vCPU) during vCPU hotplug:

Oct 11 14:48:33 storage kernel: CPU2 has been hot-added
Oct 11 14:48:34 storage kernel: CPU3 has been hot-added
Oct 11 14:49:22 storage kernel: smpboot: Booting Node 0 Processor 2 APIC 0x2          <-- time jump of almost 1 minute
Oct 11 14:49:22 storage kernel: smpboot: do_boot_cpu failed(-1) to wakeup CPU#2
Oct 11 14:49:23 storage kernel: smpboot: Booting Node 0 Processor 3 APIC 0x3
Oct 11 14:49:23 storage kernel: kvm-clock: cpu 3, msr 0:7ff640c1, secondary cpu clock

Which happens because:

                /*
                 * Wait 10s total for a response from AP
                 */
                boot_error = -1;
                timeout = jiffies + 10*HZ;
                while (time_before(jiffies, timeout)) {
                         ...
                }

Analyzed-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 59 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19a0dc96beca..89621025577a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1526,20 +1526,25 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
 }
 
 #ifdef CONFIG_X86_64
+struct pvclock_clock {
+	int vclock_mode;
+	u64 cycle_last;
+	u64 mask;
+	u32 mult;
+	u32 shift;
+};
+
 struct pvclock_gtod_data {
 	seqcount_t	seq;
 
-	struct { /* extract of a clocksource struct */
-		int vclock_mode;
-		u64	cycle_last;
-		u64	mask;
-		u32	mult;
-		u32	shift;
-	} clock;
+	struct pvclock_clock clock; /* extract of a clocksource struct */
+	struct pvclock_clock raw_clock; /* extract of a clocksource struct */
 
+	u64		boot_ns_raw;
 	u64		boot_ns;
 	u64		nsec_base;
 	u64		wall_time_sec;
+	u64		monotonic_raw_nsec;
 };
 
 static struct pvclock_gtod_data pvclock_gtod_data;
@@ -1547,9 +1552,10 @@ static struct pvclock_gtod_data pvclock_gtod_data;
 static void update_pvclock_gtod(struct timekeeper *tk)
 {
 	struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
-	u64 boot_ns;
+	u64 boot_ns, boot_ns_raw;
 
 	boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
+	boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot));
 
 	write_seqcount_begin(&vdata->seq);
 
@@ -1560,11 +1566,20 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 	vdata->clock.mult		= tk->tkr_mono.mult;
 	vdata->clock.shift		= tk->tkr_mono.shift;
 
+	vdata->raw_clock.vclock_mode	= tk->tkr_raw.clock->archdata.vclock_mode;
+	vdata->raw_clock.cycle_last	= tk->tkr_raw.cycle_last;
+	vdata->raw_clock.mask		= tk->tkr_raw.mask;
+	vdata->raw_clock.mult		= tk->tkr_raw.mult;
+	vdata->raw_clock.shift		= tk->tkr_raw.shift;
+
 	vdata->boot_ns			= boot_ns;
 	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
 
 	vdata->wall_time_sec            = tk->xtime_sec;
 
+	vdata->boot_ns_raw		= boot_ns_raw;
+	vdata->monotonic_raw_nsec	= tk->tkr_raw.xtime_nsec;
+
 	write_seqcount_end(&vdata->seq);
 }
 #endif
@@ -1988,21 +2003,21 @@ static u64 read_tsc(void)
 	return last;
 }
 
-static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
+static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
+			  int *mode)
 {
 	long v;
-	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 	u64 tsc_pg_val;
 
-	switch (gtod->clock.vclock_mode) {
+	switch (clock->vclock_mode) {
 	case VCLOCK_HVCLOCK:
 		tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
 						  tsc_timestamp);
 		if (tsc_pg_val != U64_MAX) {
 			/* TSC page valid */
 			*mode = VCLOCK_HVCLOCK;
-			v = (tsc_pg_val - gtod->clock.cycle_last) &
-				gtod->clock.mask;
+			v = (tsc_pg_val - clock->cycle_last) &
+				clock->mask;
 		} else {
 			/* TSC page invalid */
 			*mode = VCLOCK_NONE;
@@ -2011,8 +2026,8 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
 	case VCLOCK_TSC:
 		*mode = VCLOCK_TSC;
 		*tsc_timestamp = read_tsc();
-		v = (*tsc_timestamp - gtod->clock.cycle_last) &
-			gtod->clock.mask;
+		v = (*tsc_timestamp - clock->cycle_last) &
+			clock->mask;
 		break;
 	default:
 		*mode = VCLOCK_NONE;
@@ -2021,10 +2036,10 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
 	if (*mode == VCLOCK_NONE)
 		*tsc_timestamp = v = 0;
 
-	return v * gtod->clock.mult;
+	return v * clock->mult;
 }
 
-static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
+static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
 {
 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 	unsigned long seq;
@@ -2033,10 +2048,10 @@ static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
 
 	do {
 		seq = read_seqcount_begin(&gtod->seq);
-		ns = gtod->nsec_base;
-		ns += vgettsc(tsc_timestamp, &mode);
+		ns = gtod->monotonic_raw_nsec;
+		ns += vgettsc(&gtod->raw_clock, tsc_timestamp, &mode);
 		ns >>= gtod->clock.shift;
-		ns += gtod->boot_ns;
+		ns += gtod->boot_ns_raw;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 	*t = ns;
 
@@ -2054,7 +2069,7 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
 		seq = read_seqcount_begin(&gtod->seq);
 		ts->tv_sec = gtod->wall_time_sec;
 		ns = gtod->nsec_base;
-		ns += vgettsc(tsc_timestamp, &mode);
+		ns += vgettsc(&gtod->clock, tsc_timestamp, &mode);
 		ns >>= gtod->clock.shift;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 
@@ -2071,7 +2086,7 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
 	if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
 		return false;
 
-	return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
+	return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
 						      tsc_timestamp));
 }
 
-- 
cgit v1.2.3


From 75a1a607bb7e6d918be3aca11ec2214a275392f4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 2 Nov 2019 00:17:57 +0100
Subject: uaccess: Add strict non-pagefault kernel-space read function

Add two new probe_kernel_read_strict() and strncpy_from_unsafe_strict()
helpers which by default alias to the __probe_kernel_read() and the
__strncpy_from_unsafe(), respectively, but can be overridden by archs
which have non-overlapping address ranges for kernel space and user
space in order to bail out with -EFAULT when attempting to probe user
memory including non-canonical user access addresses [0]:

  4-level page tables:
    user-space mem: 0x0000000000000000 - 0x00007fffffffffff
    non-canonical:  0x0000800000000000 - 0xffff7fffffffffff

  5-level page tables:
    user-space mem: 0x0000000000000000 - 0x00ffffffffffffff
    non-canonical:  0x0100000000000000 - 0xfeffffffffffffff

The idea is that these helpers are complementary to the probe_user_read()
and strncpy_from_unsafe_user() which probe user-only memory. Both added
helpers here do the same, but for kernel-only addresses.

Both set of helpers are going to be used for BPF tracing. They also
explicitly avoid throwing the splat for non-canonical user addresses from
00c42373d397 ("x86-64: add warning for non-canonical user access address
dereferences").

For compat, the current probe_kernel_read() and strncpy_from_unsafe() are
left as-is.

  [0] Documentation/x86/x86_64/mm.txt

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: x86@kernel.org
Link: https://lore.kernel.org/bpf/eefeefd769aa5a013531f491a71f0936779e916b.1572649915.git.daniel@iogearbox.net
---
 arch/x86/mm/Makefile    |  2 +-
 arch/x86/mm/maccess.c   | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/uaccess.h |  4 ++++
 mm/maccess.c            | 25 ++++++++++++++++++++++++-
 4 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/mm/maccess.c

(limited to 'arch')

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc9b341..bbc68a54795e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o	= -pg
 endif
 
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
new file mode 100644
index 000000000000..f5b85bdc0535
--- /dev/null
+++ b/arch/x86/mm/maccess.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+	return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+	/*
+	 * Range covering the highest possible canonical userspace address
+	 * as well as non-canonical address range. For the canonical range
+	 * we also need to include the userspace guard page.
+	 */
+	return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
+	       canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
+}
+#else
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+	return vaddr < TASK_SIZE_MAX;
+}
+#endif
+
+long probe_kernel_read_strict(void *dst, const void *src, size_t size)
+{
+	if (unlikely(invalid_probe_range((unsigned long)src)))
+		return -EFAULT;
+
+	return __probe_kernel_read(dst, src, size);
+}
+
+long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
+{
+	if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
+		return -EFAULT;
+
+	return __strncpy_from_unsafe(dst, unsafe_addr, count);
+}
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 38555435a64a..67f016010aad 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
  * happens, handle that and return -EFAULT.
  */
 extern long probe_kernel_read(void *dst, const void *src, size_t size);
+extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
 extern long __probe_kernel_read(void *dst, const void *src, size_t size);
 
 /*
@@ -350,6 +351,9 @@ extern long notrace probe_user_write(void __user *dst, const void *src, size_t s
 extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
 
 extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
+extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
+				       long count);
+extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
 extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
 				     long count);
 extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
diff --git a/mm/maccess.c b/mm/maccess.c
index 2d3c3d01064c..3ca8d97e5010 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -43,11 +43,20 @@ probe_write_common(void __user *dst, const void *src, size_t size)
  * do_page_fault() doesn't attempt to take mmap_sem.  This makes
  * probe_kernel_read() suitable for use within regions where the caller
  * already holds mmap_sem, or other locks which nest inside mmap_sem.
+ *
+ * probe_kernel_read_strict() is the same as probe_kernel_read() except for
+ * the case where architectures have non-overlapping user and kernel address
+ * ranges: probe_kernel_read_strict() will additionally return -EFAULT for
+ * probing memory on a user address range where probe_user_read() is supposed
+ * to be used instead.
  */
 
 long __weak probe_kernel_read(void *dst, const void *src, size_t size)
     __attribute__((alias("__probe_kernel_read")));
 
+long __weak probe_kernel_read_strict(void *dst, const void *src, size_t size)
+    __attribute__((alias("__probe_kernel_read")));
+
 long __probe_kernel_read(void *dst, const void *src, size_t size)
 {
 	long ret;
@@ -157,8 +166,22 @@ EXPORT_SYMBOL_GPL(probe_user_write);
  *
  * If @count is smaller than the length of the string, copies @count-1 bytes,
  * sets the last byte of @dst buffer to NUL and returns @count.
+ *
+ * strncpy_from_unsafe_strict() is the same as strncpy_from_unsafe() except
+ * for the case where architectures have non-overlapping user and kernel address
+ * ranges: strncpy_from_unsafe_strict() will additionally return -EFAULT for
+ * probing memory on a user address range where strncpy_from_unsafe_user() is
+ * supposed to be used instead.
  */
-long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
+
+long __weak strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
+    __attribute__((alias("__strncpy_from_unsafe")));
+
+long __weak strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
+				       long count)
+    __attribute__((alias("__strncpy_from_unsafe")));
+
+long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
 {
 	mm_segment_t old_fs = get_fs();
 	const void *src = unsafe_addr;
-- 
cgit v1.2.3


From 84ba838990fcbdf84f87d11ce97779ce28ade5c8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 1 Oct 2019 09:35:39 +0200
Subject: m68k: defconfig: Update defconfigs for v5.4-rc1

Actual changes:
    -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
    -CONFIG_CRYPTO_AEGIS128L=m
    -CONFIG_CRYPTO_AEGIS256=m
    -CONFIG_CRYPTO_MORUS1280=m
    -CONFIG_CRYPTO_MORUS640=m
    +CONFIG_DM_CLONE=m
    +CONFIG_EROFS_FS=m
    -# CONFIG_LCD_CLASS_DEVICE is not set

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20191001073539.4488-1-geert@linux-m68k.org
---
 arch/m68k/configs/amiga_defconfig    | 8 ++------
 arch/m68k/configs/apollo_defconfig   | 8 ++------
 arch/m68k/configs/atari_defconfig    | 8 ++------
 arch/m68k/configs/bvme6000_defconfig | 8 ++------
 arch/m68k/configs/hp300_defconfig    | 8 ++------
 arch/m68k/configs/mac_defconfig      | 8 ++------
 arch/m68k/configs/multi_defconfig    | 8 ++------
 arch/m68k/configs/mvme147_defconfig  | 8 ++------
 arch/m68k/configs/mvme16x_defconfig  | 8 ++------
 arch/m68k/configs/q40_defconfig      | 8 ++------
 arch/m68k/configs/sun3_defconfig     | 8 ++------
 arch/m68k/configs/sun3x_defconfig    | 8 ++------
 12 files changed, 24 insertions(+), 72 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 9a33c1c006a1..6c9d4e47cf17 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -355,6 +355,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -432,8 +433,6 @@ CONFIG_FB_AMIGA_OCS=y
 CONFIG_FB_AMIGA_ECS=y
 CONFIG_FB_AMIGA_AGA=y
 CONFIG_FB_FM2=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
@@ -490,6 +489,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -560,10 +560,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 7fdbc797a05d..caa0558abcdb 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -334,6 +334,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -393,8 +394,6 @@ CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_VGA16 is not set
@@ -450,6 +449,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -520,10 +520,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index f1763405a539..2551c7e9ac54 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -350,6 +350,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -417,8 +418,6 @@ CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FB_ATARI=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
@@ -472,6 +471,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -542,10 +542,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 91154d6acb31..4ffc1e5646d5 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -332,6 +332,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -390,8 +391,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
@@ -443,6 +442,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -513,10 +513,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index c398c4a94d95..806da3d97ca4 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -333,6 +333,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -395,8 +396,6 @@ CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
@@ -452,6 +451,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -522,10 +522,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 350d004559be..250da20e291c 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -342,6 +342,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -419,8 +420,6 @@ CONFIG_PTP_1588_CLOCK=m
 CONFIG_FB=y
 CONFIG_FB_VALKYRIE=y
 CONFIG_FB_MAC=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_HID=m
@@ -474,6 +473,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -544,10 +544,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index b838dd820348..45654650f50a 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -386,6 +386,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -495,8 +496,6 @@ CONFIG_FB_FM2=y
 CONFIG_FB_ATARI=y
 CONFIG_FB_VALKYRIE=y
 CONFIG_FB_MAC=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
@@ -556,6 +555,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -626,10 +626,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 3f8dd61559cf..7800d3a8d46e 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -331,6 +331,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -389,8 +390,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
@@ -442,6 +441,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -512,10 +512,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index ae3b2d4f636c..c32dc2d2058d 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -332,6 +332,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -390,8 +391,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
@@ -443,6 +442,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -513,10 +513,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index cd61ef14b582..bf0a65ce57e0 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -339,6 +339,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -404,8 +405,6 @@ CONFIG_PPS_CLIENT_PARPORT=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
@@ -461,6 +460,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -531,10 +531,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 151f5371cd3d..5f3cfa2926d2 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -329,6 +329,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -390,8 +391,6 @@ CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_HID=m
@@ -445,6 +444,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -515,10 +515,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 1dcb0ee1fe98..58354d2018d5 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -329,6 +329,7 @@ CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_WRITECACHE=m
 CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_RAID=m
 CONFIG_DM_ZERO=m
@@ -389,8 +390,6 @@ CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_HID=m
@@ -444,6 +443,7 @@ CONFIG_QNX4FS_FS=m
 CONFIG_QNX6FS_FS=m
 CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -514,10 +514,6 @@ CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_AEGIS128L=m
-CONFIG_CRYPTO_AEGIS256=m
-CONFIG_CRYPTO_MORUS640=m
-CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
-- 
cgit v1.2.3


From 032f128dbd29a376ff8f622150191f3aea8b2e9f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 21 Oct 2019 09:04:38 +0200
Subject: m68k: defconfig: Enable ICY I2C and LTC2990 on Amiga

Enable support for the ICY I2C board for Amiga, which is typically
equipped with an LTC2990 hwmon chip, in the Amiga and multi-platform
defconfig files.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20191021070438.10819-1-geert@linux-m68k.org
Acked-by: Max Staudt <max@enpas.org>
---
 arch/m68k/configs/amiga_defconfig | 6 +++++-
 arch/m68k/configs/multi_defconfig | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 6c9d4e47cf17..619d30d663a2 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -421,11 +421,15 @@ CONFIG_INPUT_M68K_BEEP=m
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_PRINTER=m
 # CONFIG_HW_RANDOM is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_ICY=m
 CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PPS_CLIENT_PARPORT=m
 CONFIG_PTP_1588_CLOCK=m
-# CONFIG_HWMON is not set
+CONFIG_HWMON=m
+CONFIG_SENSORS_LTC2990=m
 CONFIG_FB=y
 CONFIG_FB_CIRRUS=y
 CONFIG_FB_AMIGA=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 45654650f50a..b764a0368a56 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -481,11 +481,15 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y
 CONFIG_SERIAL_PMACZILOG_CONSOLE=y
 CONFIG_PRINTER=m
 # CONFIG_HW_RANDOM is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_ICY=m
 CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PPS_CLIENT_PARPORT=m
 CONFIG_PTP_1588_CLOCK=m
-# CONFIG_HWMON is not set
+CONFIG_HWMON=m
+CONFIG_SENSORS_LTC2990=m
 CONFIG_FB=y
 CONFIG_FB_CIRRUS=y
 CONFIG_FB_AMIGA=y
-- 
cgit v1.2.3


From c2f2093e149d3109f1457deac4909191d9aca323 Mon Sep 17 00:00:00 2001
From: Miroslav Benes <mbenes@suse.cz>
Date: Tue, 29 Oct 2019 15:39:02 +0100
Subject: s390/unwind: drop unnecessary code around calling
 ftrace_graph_ret_addr()

The current code around calling ftrace_graph_ret_addr() is ifdeffed and
also tests if ftrace redirection is present on stack.
ftrace_graph_ret_addr() however performs the test internally and there
is a version for !CONFIG_FUNCTION_GRAPH_TRACER as well. The unnecessary
code can thus be dropped.

Link: http://lkml.kernel.org/r/20191029143904.24051-2-mbenes@suse.cz
Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/unwind_bc.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 8fc9daae47a2..5699e820c621 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -80,12 +80,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		}
 	}
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Decode any ftrace redirection */
-	if (ip == (unsigned long) return_to_handler)
-		ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-					   ip, (void *) sp);
-#endif
+	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp);
 
 	/* Update unwind state */
 	state->sp = sp;
@@ -140,12 +135,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		reliable = false;
 	}
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Decode any ftrace redirection */
-	if (ip == (unsigned long) return_to_handler)
-		ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-					   ip, NULL);
-#endif
+	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
 
 	/* Update unwind state */
 	state->sp = sp;
-- 
cgit v1.2.3


From 32d1870877ba7675c642e903e5ef71c82a245325 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 3 Nov 2019 21:35:58 +0900
Subject: arm64: mm: simplify the page end calculation in
 __create_pgd_mapping()

Calculate the page-aligned end address more simply.

The local variable, "length" is unneeded.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 60c929f3683b..a9f541912289 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -338,7 +338,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 				 phys_addr_t (*pgtable_alloc)(int),
 				 int flags)
 {
-	unsigned long addr, length, end, next;
+	unsigned long addr, end, next;
 	pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
 
 	/*
@@ -350,9 +350,8 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 
 	phys &= PAGE_MASK;
 	addr = virt & PAGE_MASK;
-	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
+	end = PAGE_ALIGN(virt + size);
 
-	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
 		alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
-- 
cgit v1.2.3


From 218564b164ad9d283d3cb3d5367705726123a610 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhsharma@redhat.com>
Date: Tue, 5 Nov 2019 03:26:46 +0530
Subject: arm64: mm: Remove MAX_USER_VA_BITS definition

commit 9b31cf493ffa ("arm64: mm: Introduce MAX_USER_VA_BITS definition")
introduced the MAX_USER_VA_BITS definition, which was used to support
the arm64 mm use-cases where the user-space could use 52-bit virtual
addresses whereas the kernel-space would still could a maximum of 48-bit
virtual addressing.

But, now with commit b6d00d47e81a ("arm64: mm: Introduce 52-bit Kernel
VAs"), we removed the 52-bit user/48-bit kernel kconfig option and hence
there is no longer any scenario where user VA != kernel VA size
(even with CONFIG_ARM64_FORCE_52BIT enabled, the same is true).

Hence we can do away with the MAX_USER_VA_BITS macro as it is equal to
VA_BITS (maximum VA space size) in all possible use-cases. Note that
even though the 'vabits_actual' value would be 48 for arm64 hardware
which don't support LVA-8.2 extension (even when CONFIG_ARM64_VA_BITS_52
is enabled), VA_BITS would still be set to a value 52. Hence this change
would be safe in all possible VA address space combinations.

Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: linux-kernel@vger.kernel.org
Cc: kexec@lists.infradead.org
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/memory.h        | 6 ------
 arch/arm64/include/asm/pgtable-hwdef.h | 2 +-
 arch/arm64/include/asm/processor.h     | 2 +-
 3 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index b61b50bf68b1..4867e58dbc9c 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -69,12 +69,6 @@
 #define KERNEL_START		_text
 #define KERNEL_END		_end
 
-#ifdef CONFIG_ARM64_VA_BITS_52
-#define MAX_USER_VA_BITS	52
-#else
-#define MAX_USER_VA_BITS	VA_BITS
-#endif
-
 /*
  * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual
  * address space for the shadow region respectively. They can bloat the stack
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 3df60f97da1f..d9fbd433cc17 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -69,7 +69,7 @@
 #define PGDIR_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
 #define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD		(1 << (MAX_USER_VA_BITS - PGDIR_SHIFT))
+#define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
 
 /*
  * Section address mask and size definitions.
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5623685c7d13..586fcd4b1965 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -9,7 +9,7 @@
 #define __ASM_PROCESSOR_H
 
 #define KERNEL_DS		UL(-1)
-#define USER_DS			((UL(1) << MAX_USER_VA_BITS) - 1)
+#define USER_DS			((UL(1) << VA_BITS) - 1)
 
 /*
  * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
-- 
cgit v1.2.3


From 918e1946c8ac2c0473eefc1dc910780178490e95 Mon Sep 17 00:00:00 2001
From: Rich Wiley <rwiley@nvidia.com>
Date: Tue, 5 Nov 2019 10:45:10 -0800
Subject: arm64: kpti: Add NVIDIA's Carmel core to the KPTI whitelist

NVIDIA Carmel CPUs don't implement ID_AA64PFR0_EL1.CSV3 but
aren't susceptible to Meltdown, so add Carmel to kpti_safe_list[].

Signed-off-by: Rich Wiley <rwiley@nvidia.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index cabebf1a7976..b3eea965c930 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -977,6 +977,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 		MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
 		MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
 		MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+		MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
 		{ /* sentinel */ }
 	};
 	char const *str = "kpti command line option";
-- 
cgit v1.2.3


From a1326b17ac03a9012cb3d01e434aacb4d67a416c Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 16 Oct 2019 18:17:11 +0100
Subject: module/ftrace: handle patchable-function-entry

When using patchable-function-entry, the compiler will record the
callsites into a section named "__patchable_function_entries" rather
than "__mcount_loc". Let's abstract this difference behind a new
FTRACE_CALLSITE_SECTION, so that architectures don't have to handle this
explicitly (e.g. with custom module linker scripts).

As parisc currently handles this explicitly, it is fixed up accordingly,
with its custom linker script removed. Since FTRACE_CALLSITE_SECTION is
only defined when DYNAMIC_FTRACE is selected, the parisc module loading
code is updated to only use the definition in that case. When
DYNAMIC_FTRACE is not selected, modules shouldn't have this section, so
this removes some redundant work in that case.

To make sure that this is keep up-to-date for modules and the main
kernel, a comment is added to vmlinux.lds.h, with the existing ifdeffery
simplified for legibility.

I built parisc generic-{32,64}bit_defconfig with DYNAMIC_FTRACE enabled,
and verified that the section made it into the .ko files for modules.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Helge Deller <deller@gmx.de>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Sven Schnelle <svens@stackframe.org>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: linux-parisc@vger.kernel.org
---
 arch/parisc/Makefile              |  1 -
 arch/parisc/kernel/module.c       | 10 +++++++---
 arch/parisc/kernel/module.lds     |  7 -------
 include/asm-generic/vmlinux.lds.h | 14 +++++++-------
 include/linux/ftrace.h            |  5 +++++
 kernel/module.c                   |  2 +-
 6 files changed, 20 insertions(+), 19 deletions(-)
 delete mode 100644 arch/parisc/kernel/module.lds

(limited to 'arch')

diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 36b834f1c933..dca8f2de8cf5 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -60,7 +60,6 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \
 		 -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT)
 
 CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1)))
-KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds
 endif
 
 OBJCOPY_FLAGS =-O binary -R .note -R .comment -S
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index ac5f34993b53..1c50093e2ebe 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -43,6 +43,7 @@
 #include <linux/elf.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
+#include <linux/ftrace.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
@@ -862,7 +863,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 	const char *strtab = NULL;
 	const Elf_Shdr *s;
 	char *secstrings;
-	int err, symindex = -1;
+	int symindex = -1;
 	Elf_Sym *newptr, *oldptr;
 	Elf_Shdr *symhdr = NULL;
 #ifdef DEBUG
@@ -946,11 +947,13 @@ int module_finalize(const Elf_Ehdr *hdr,
 			/* patch .altinstructions */
 			apply_alternatives(aseg, aseg + s->sh_size, me->name);
 
+#ifdef CONFIG_DYNAMIC_FTRACE
 		/* For 32 bit kernels we're compiling modules with
 		 * -ffunction-sections so we must relocate the addresses in the
-		 *__mcount_loc section.
+		 *  ftrace callsite section.
 		 */
-		if (symindex != -1 && !strcmp(secname, "__mcount_loc")) {
+		if (symindex != -1 && !strcmp(secname, FTRACE_CALLSITE_SECTION)) {
+			int err;
 			if (s->sh_type == SHT_REL)
 				err = apply_relocate((Elf_Shdr *)sechdrs,
 							strtab, symindex,
@@ -962,6 +965,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 			if (err)
 				return err;
 		}
+#endif
 	}
 	return 0;
 }
diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds
deleted file mode 100644
index 1a9a92aca5c8..000000000000
--- a/arch/parisc/kernel/module.lds
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-SECTIONS {
-	__mcount_loc : {
-		*(__patchable_function_entries)
-	}
-}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index dae64600ccbf..a9c4e4721434 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -110,17 +110,17 @@
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
-#define MCOUNT_REC()	. = ALIGN(8);				\
-			__start_mcount_loc = .;			\
-			KEEP(*(__patchable_function_entries))	\
-			__stop_mcount_loc = .;
-#else
+/*
+ * The ftrace call sites are logged to a section whose name depends on the
+ * compiler option used. A given kernel image will only use one, AKA
+ * FTRACE_CALLSITE_SECTION. We capture all of them here to avoid header
+ * dependencies for FTRACE_CALLSITE_SECTION's definition.
+ */
 #define MCOUNT_REC()	. = ALIGN(8);				\
 			__start_mcount_loc = .;			\
 			KEEP(*(__mcount_loc))			\
+			KEEP(*(__patchable_function_entries))	\
 			__stop_mcount_loc = .;
-#endif
 #else
 #define MCOUNT_REC()
 #endif
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9867d90d635e..9141f2263286 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -738,6 +738,11 @@ static inline unsigned long get_lock_parent_ip(void)
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
+#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
+#define FTRACE_CALLSITE_SECTION	"__patchable_function_entries"
+#else
+#define FTRACE_CALLSITE_SECTION	"__mcount_loc"
+#endif
 #else
 static inline void ftrace_init(void) { }
 #endif
diff --git a/kernel/module.c b/kernel/module.c
index ff2d7359a418..acf7962936c4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3222,7 +3222,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 	/* sechdrs[0].sh_size is always zero */
-	mod->ftrace_callsites = section_objs(info, "__mcount_loc",
+	mod->ftrace_callsites = section_objs(info, FTRACE_CALLSITE_SECTION,
 					     sizeof(*mod->ftrace_callsites),
 					     &mod->num_ftrace_callsites);
 #endif
-- 
cgit v1.2.3


From bd8b21d3dd661658addc1cd4cc869bab11d28596 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 17 Oct 2019 14:03:26 +0100
Subject: arm64: module: rework special section handling

When we load a module, we have to perform some special work for a couple
of named sections. To do this, we iterate over all of the module's
sections, and perform work for each section we recognize.

To make it easier to handle the unexpected absence of a section, and to
make the section-specific logic easer to read, let's factor the section
search into a helper. Similar is already done in the core module loader,
and other architectures (and ideally we'd unify these in future).

If we expect a module to have an ftrace trampoline section, but it
doesn't have one, we'll now reject loading the module. When
ARM64_MODULE_PLTS is selected, any correctly built module should have
one (and this is assumed by arm64's ftrace PLT code) and the absence of
such a section implies something has gone wrong at build time.

Subsequent patches will make use of the new helper.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/module.c | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 03ff15bffbb6..763a86d52fef 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -470,22 +470,39 @@ overflow:
 	return -ENOEXEC;
 }
 
-int module_finalize(const Elf_Ehdr *hdr,
-		    const Elf_Shdr *sechdrs,
-		    struct module *me)
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+				    const Elf_Shdr *sechdrs,
+				    const char *name)
 {
 	const Elf_Shdr *s, *se;
 	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
-		if (strcmp(".altinstructions", secstrs + s->sh_name) == 0)
-			apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+		if (strcmp(name, secstrs + s->sh_name) == 0)
+			return s;
+	}
+
+	return NULL;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	const Elf_Shdr *s;
+
+	s = find_section(hdr, sechdrs, ".altinstructions");
+	if (s)
+		apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+
 #ifdef CONFIG_ARM64_MODULE_PLTS
-		if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
-		    !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name))
-			me->arch.ftrace_trampoline = (void *)s->sh_addr;
-#endif
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE)) {
+		s = find_section(hdr, sechdrs, ".text.ftrace_trampoline");
+		if (!s)
+			return -ENOEXEC;
+		me->arch.ftrace_trampoline = (void *)s->sh_addr;
 	}
+#endif
 
 	return 0;
 }
-- 
cgit v1.2.3


From f1a54ae9af0da4d76239256ed640a93ab3aadac0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 17 Oct 2019 15:26:38 +0100
Subject: arm64: module/ftrace: intialize PLT at load time

Currently we lazily-initialize a module's ftrace PLT at runtime when we
install the first ftrace call. To do so we have to apply a number of
sanity checks, transiently mark the module text as RW, and perform an
IPI as part of handling Neoverse-N1 erratum #1542419.

We only expect the ftrace trampoline to point at ftrace_caller() (AKA
FTRACE_ADDR), so let's simplify all of this by intializing the PLT at
module load time, before the module loader marks the module RO and
performs the intial I-cache maintenance for the module.

Thus we can rely on the module having been correctly intialized, and can
simplify the runtime work necessary to install an ftrace call in a
module. This will also allow for the removal of module_disable_ro().

Tested by forcing ftrace_make_call() to use the module PLT, and then
loading up a module after setting up ftrace with:

| echo ":mod:<module-name>" > set_ftrace_filter;
| echo function > current_tracer;
| modprobe <module-name>

Since FTRACE_ADDR is only defined when CONFIG_DYNAMIC_FTRACE is
selected, we wrap its use along with most of module_init_ftrace_plt()
with ifdeffery rather than using IS_ENABLED().

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/ftrace.c | 55 ++++++++++++----------------------------------
 arch/arm64/kernel/module.c | 32 +++++++++++++++++----------
 2 files changed, 35 insertions(+), 52 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 06e56b470315..822718eafdb4 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -73,9 +73,21 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
 	if (offset < -SZ_128M || offset >= SZ_128M) {
 #ifdef CONFIG_ARM64_MODULE_PLTS
-		struct plt_entry trampoline, *dst;
 		struct module *mod;
 
+		/*
+		 * There is only one ftrace trampoline per module. For now,
+		 * this is not a problem since on arm64, all dynamic ftrace
+		 * invocations are routed via ftrace_caller(). This will need
+		 * to be revisited if support for multiple ftrace entry points
+		 * is added in the future, but for now, the pr_err() below
+		 * deals with a theoretical issue only.
+		 */
+		if (addr != FTRACE_ADDR) {
+			pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
+			return -EINVAL;
+		}
+
 		/*
 		 * On kernels that support module PLTs, the offset between the
 		 * branch instruction and its target may legally exceed the
@@ -93,46 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		if (WARN_ON(!mod))
 			return -EINVAL;
 
-		/*
-		 * There is only one ftrace trampoline per module. For now,
-		 * this is not a problem since on arm64, all dynamic ftrace
-		 * invocations are routed via ftrace_caller(). This will need
-		 * to be revisited if support for multiple ftrace entry points
-		 * is added in the future, but for now, the pr_err() below
-		 * deals with a theoretical issue only.
-		 *
-		 * Note that PLTs are place relative, and plt_entries_equal()
-		 * checks whether they point to the same target. Here, we need
-		 * to check if the actual opcodes are in fact identical,
-		 * regardless of the offset in memory so use memcmp() instead.
-		 */
-		dst = mod->arch.ftrace_trampoline;
-		trampoline = get_plt_entry(addr, dst);
-		if (memcmp(dst, &trampoline, sizeof(trampoline))) {
-			if (plt_entry_is_initialized(dst)) {
-				pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
-				return -EINVAL;
-			}
-
-			/* point the trampoline to our ftrace entry point */
-			module_disable_ro(mod);
-			*dst = trampoline;
-			module_enable_ro(mod, true);
-
-			/*
-			 * Ensure updated trampoline is visible to instruction
-			 * fetch before we patch in the branch. Although the
-			 * architecture doesn't require an IPI in this case,
-			 * Neoverse-N1 erratum #1542419 does require one
-			 * if the TLB maintenance in module_enable_ro() is
-			 * skipped due to rodata_enabled. It doesn't seem worth
-			 * it to make it conditional given that this is
-			 * certainly not a fast-path.
-			 */
-			flush_icache_range((unsigned long)&dst[0],
-					   (unsigned long)&dst[1]);
-		}
-		addr = (unsigned long)dst;
+		addr = (unsigned long)mod->arch.ftrace_trampoline;
 #else /* CONFIG_ARM64_MODULE_PLTS */
 		return -EINVAL;
 #endif /* CONFIG_ARM64_MODULE_PLTS */
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 763a86d52fef..d0692ecb99bb 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -9,6 +9,7 @@
 
 #include <linux/bitops.h>
 #include <linux/elf.h>
+#include <linux/ftrace.h>
 #include <linux/gfp.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
@@ -485,24 +486,33 @@ static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
 	return NULL;
 }
 
+static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
+				  const Elf_Shdr *sechdrs,
+				  struct module *mod)
+{
+#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
+	const Elf_Shdr *s;
+	struct plt_entry *plt;
+
+	s = find_section(hdr, sechdrs, ".text.ftrace_trampoline");
+	if (!s)
+		return -ENOEXEC;
+
+	plt = (void *)s->sh_addr;
+	*plt = get_plt_entry(FTRACE_ADDR, plt);
+	mod->arch.ftrace_trampoline = plt;
+#endif
+	return 0;
+}
+
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
 	const Elf_Shdr *s;
-
 	s = find_section(hdr, sechdrs, ".altinstructions");
 	if (s)
 		apply_alternatives_module((void *)s->sh_addr, s->sh_size);
 
-#ifdef CONFIG_ARM64_MODULE_PLTS
-	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE)) {
-		s = find_section(hdr, sechdrs, ".text.ftrace_trampoline");
-		if (!s)
-			return -ENOEXEC;
-		me->arch.ftrace_trampoline = (void *)s->sh_addr;
-	}
-#endif
-
-	return 0;
+	return module_init_ftrace_plt(hdr, sechdrs, me);
 }
-- 
cgit v1.2.3


From e3bf8a67f759b498e09999804c3837688e03b304 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 18 Oct 2019 11:25:26 +0100
Subject: arm64: insn: add encoder for MOV (register)

For FTRACE_WITH_REGS, we're going to want to generate a MOV (register)
instruction as part of the callsite intialization. As MOV (register) is
an alias for ORR (shifted register), we can generate this with
aarch64_insn_gen_logical_shifted_reg(), but it's somewhat verbose and
difficult to read in-context.

Add a aarch64_insn_gen_move_reg() wrapper for this case so that we can
write callers in a more straightforward way.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h |  3 +++
 arch/arm64/kernel/insn.c      | 13 +++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 39e7780bedd6..bb313dde58a4 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -440,6 +440,9 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
 					 int shift,
 					 enum aarch64_insn_variant variant,
 					 enum aarch64_insn_logic_type type);
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+			      enum aarch64_insn_register src,
+			      enum aarch64_insn_variant variant);
 u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
 				       enum aarch64_insn_variant variant,
 				       enum aarch64_insn_register Rn,
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index d801a7094076..513b29c3e735 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -1268,6 +1268,19 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
 	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
 }
 
+/*
+ * MOV (register) is architecturally an alias of ORR (shifted register) where
+ * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m>
+ */
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+			      enum aarch64_insn_register src,
+			      enum aarch64_insn_variant variant)
+{
+	return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR,
+						    src, 0, variant,
+						    AARCH64_INSN_LOGIC_ORR);
+}
+
 u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
 			 enum aarch64_insn_register reg,
 			 enum aarch64_insn_adr_type type)
-- 
cgit v1.2.3


From 1f377e043b3b8ef68caffe47bdad794f4e2cb030 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 18 Oct 2019 16:37:47 +0100
Subject: arm64: asm-offsets: add S_FP

So that assembly code can more easily manipulate the FP (x29) within a
pt_regs, add an S_FP asm-offsets definition.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/asm-offsets.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 214685760e1c..a5bdce8af65b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ int main(void)
   DEFINE(S_X24,			offsetof(struct pt_regs, regs[24]));
   DEFINE(S_X26,			offsetof(struct pt_regs, regs[26]));
   DEFINE(S_X28,			offsetof(struct pt_regs, regs[28]));
+  DEFINE(S_FP,			offsetof(struct pt_regs, regs[29]));
   DEFINE(S_LR,			offsetof(struct pt_regs, regs[30]));
   DEFINE(S_SP,			offsetof(struct pt_regs, sp));
   DEFINE(S_PSTATE,		offsetof(struct pt_regs, pstate));
-- 
cgit v1.2.3


From 3b23e4991fb66f6d152f9055ede271a726ef9f21 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Fri, 8 Feb 2019 16:10:19 +0100
Subject: arm64: implement ftrace with regs

This patch implements FTRACE_WITH_REGS for arm64, which allows a traced
function's arguments (and some other registers) to be captured into a
struct pt_regs, allowing these to be inspected and/or modified. This is
a building block for live-patching, where a function's arguments may be
forwarded to another function. This is also necessary to enable ftrace
and in-kernel pointer authentication at the same time, as it allows the
LR value to be captured and adjusted prior to signing.

Using GCC's -fpatchable-function-entry=N option, we can have the
compiler insert a configurable number of NOPs between the function entry
point and the usual prologue. This also ensures functions are AAPCS
compliant (e.g. disabling inter-procedural register allocation).

For example, with -fpatchable-function-entry=2, GCC 8.1.0 compiles the
following:

| unsigned long bar(void);
|
| unsigned long foo(void)
| {
|         return bar() + 1;
| }

... to:

| <foo>:
|         nop
|         nop
|         stp     x29, x30, [sp, #-16]!
|         mov     x29, sp
|         bl      0 <bar>
|         add     x0, x0, #0x1
|         ldp     x29, x30, [sp], #16
|         ret

This patch builds the kernel with -fpatchable-function-entry=2,
prefixing each function with two NOPs. To trace a function, we replace
these NOPs with a sequence that saves the LR into a GPR, then calls an
ftrace entry assembly function which saves this and other relevant
registers:

| mov	x9, x30
| bl	<ftrace-entry>

Since patchable functions are AAPCS compliant (and the kernel does not
use x18 as a platform register), x9-x18 can be safely clobbered in the
patched sequence and the ftrace entry code.

There are now two ftrace entry functions, ftrace_regs_entry (which saves
all GPRs), and ftrace_entry (which saves the bare minimum). A PLT is
allocated for each within modules.

Signed-off-by: Torsten Duwe <duwe@suse.de>
[Mark: rework asm, comments, PLTs, initialization, commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Julien Thierry <jthierry@redhat.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig               |   2 +
 arch/arm64/Makefile              |   5 ++
 arch/arm64/include/asm/ftrace.h  |  23 +++++++
 arch/arm64/include/asm/module.h  |   2 +-
 arch/arm64/kernel/entry-ftrace.S | 140 +++++++++++++++++++++++++++++++++++++--
 arch/arm64/kernel/ftrace.c       |  84 +++++++++++++++++++----
 arch/arm64/kernel/module-plts.c  |   3 +-
 arch/arm64/kernel/module.c       |  18 +++--
 8 files changed, 252 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 950a56b71ff0..0ffb8596b8a1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -143,6 +143,8 @@ config ARM64
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS \
+		if $(cc-option,-fpatchable-function-entry=2)
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_FAST_GUP
 	select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2c0238ce0551..1fbe24d4fdb6 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -95,6 +95,11 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
 KBUILD_LDS_MODULE	+= $(srctree)/arch/arm64/kernel/module.lds
 endif
 
+ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
+  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+endif
+
 # Default value
 head-y		:= arch/arm64/kernel/head.o
 
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index d48667b04c41..91fa4baa1a93 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -11,9 +11,20 @@
 #include <asm/insn.h>
 
 #define HAVE_FUNCTION_GRAPH_FP_TEST
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#else
 #define MCOUNT_ADDR		((unsigned long)_mcount)
+#endif
+
+/* The BL at the callsite's adjusted rec->ip */
 #define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
 
+#define FTRACE_PLT_IDX		0
+#define FTRACE_REGS_PLT_IDX	1
+#define NR_FTRACE_PLTS		2
+
 /*
  * Currently, gcc tends to save the link register after the local variables
  * on the stack. This causes the max stack tracer to report the function
@@ -43,6 +54,12 @@ extern void return_to_handler(void);
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
+	/*
+	 * Adjust addr to point at the BL in the callsite.
+	 * See ftrace_init_nop() for the callsite sequence.
+	 */
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		return addr + AARCH64_INSN_SIZE;
 	/*
 	 * addr is the address of the mcount call instruction.
 	 * recordmcount does the necessary offset calculation.
@@ -50,6 +67,12 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 	return addr;
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+struct dyn_ftrace;
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+#endif
+
 #define ftrace_return_address(n) return_address(n)
 
 /*
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index f80e13cbf8ec..1e93de68c044 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -21,7 +21,7 @@ struct mod_arch_specific {
 	struct mod_plt_sec	init;
 
 	/* for CONFIG_DYNAMIC_FTRACE */
-	struct plt_entry 	*ftrace_trampoline;
+	struct plt_entry	*ftrace_trampolines;
 };
 #endif
 
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 33d003d80121..4fe1514fcbfd 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -7,10 +7,137 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/asm-offsets.h>
 #include <asm/assembler.h>
 #include <asm/ftrace.h>
 #include <asm/insn.h>
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+/*
+ * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
+ * the regular function prologue. For an enabled callsite, ftrace_init_nop() and
+ * ftrace_make_call() have patched those NOPs to:
+ *
+ * 	MOV	X9, LR
+ * 	BL	<entry>
+ *
+ * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
+ *
+ * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
+ * live, and x9-x18 are safe to clobber.
+ *
+ * We save the callsite's context into a pt_regs before invoking any ftrace
+ * callbacks. So that we can get a sensible backtrace, we create a stack record
+ * for the callsite and the ftrace entry assembly. This is not sufficient for
+ * reliable stacktrace: until we create the callsite stack record, its caller
+ * is missing from the LR and existing chain of frame records.
+ */
+	.macro  ftrace_regs_entry, allregs=0
+	/* Make room for pt_regs, plus a callee frame */
+	sub	sp, sp, #(S_FRAME_SIZE + 16)
+
+	/* Save function arguments (and x9 for simplicity) */
+	stp	x0, x1, [sp, #S_X0]
+	stp	x2, x3, [sp, #S_X2]
+	stp	x4, x5, [sp, #S_X4]
+	stp	x6, x7, [sp, #S_X6]
+	stp	x8, x9, [sp, #S_X8]
+
+	/* Optionally save the callee-saved registers, always save the FP */
+	.if \allregs == 1
+	stp	x10, x11, [sp, #S_X10]
+	stp	x12, x13, [sp, #S_X12]
+	stp	x14, x15, [sp, #S_X14]
+	stp	x16, x17, [sp, #S_X16]
+	stp	x18, x19, [sp, #S_X18]
+	stp	x20, x21, [sp, #S_X20]
+	stp	x22, x23, [sp, #S_X22]
+	stp	x24, x25, [sp, #S_X24]
+	stp	x26, x27, [sp, #S_X26]
+	stp	x28, x29, [sp, #S_X28]
+	.else
+	str	x29, [sp, #S_FP]
+	.endif
+
+	/* Save the callsite's SP and LR */
+	add	x10, sp, #(S_FRAME_SIZE + 16)
+	stp	x9, x10, [sp, #S_LR]
+
+	/* Save the PC after the ftrace callsite */
+	str	x30, [sp, #S_PC]
+
+	/* Create a frame record for the callsite above pt_regs */
+	stp	x29, x9, [sp, #S_FRAME_SIZE]
+	add	x29, sp, #S_FRAME_SIZE
+
+	/* Create our frame record within pt_regs. */
+	stp	x29, x30, [sp, #S_STACKFRAME]
+	add	x29, sp, #S_STACKFRAME
+	.endm
+
+ENTRY(ftrace_regs_caller)
+	ftrace_regs_entry	1
+	b	ftrace_common
+ENDPROC(ftrace_regs_caller)
+
+ENTRY(ftrace_caller)
+	ftrace_regs_entry	0
+	b	ftrace_common
+ENDPROC(ftrace_caller)
+
+ENTRY(ftrace_common)
+	sub	x0, x30, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
+	mov	x1, x9				// parent_ip (callsite's LR)
+	ldr_l	x2, function_trace_op		// op
+	mov	x3, sp				// regs
+
+GLOBAL(ftrace_call)
+	bl	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+GLOBAL(ftrace_graph_call)		// ftrace_graph_caller();
+	nop				// If enabled, this will be replaced
+					// "b ftrace_graph_caller"
+#endif
+
+/*
+ * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
+ * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
+ * to restore x0-x8, x29, and x30.
+ */
+ftrace_common_return:
+	/* Restore function arguments */
+	ldp	x0, x1, [sp]
+	ldp	x2, x3, [sp, #S_X2]
+	ldp	x4, x5, [sp, #S_X4]
+	ldp	x6, x7, [sp, #S_X6]
+	ldr	x8, [sp, #S_X8]
+
+	/* Restore the callsite's FP, LR, PC */
+	ldr	x29, [sp, #S_FP]
+	ldr	x30, [sp, #S_LR]
+	ldr	x9, [sp, #S_PC]
+
+	/* Restore the callsite's SP */
+	add	sp, sp, #S_FRAME_SIZE + 16
+
+	ret	x9
+ENDPROC(ftrace_common)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+	ldr	x0, [sp, #S_PC]
+	sub	x0, x0, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
+	add	x1, sp, #S_LR			// parent_ip (callsite's LR)
+	ldr	x2, [sp, #S_FRAME_SIZE]	   	// parent fp (callsite's FP)
+	bl	prepare_ftrace_return
+	b	ftrace_common_return
+ENDPROC(ftrace_graph_caller)
+#else
+#endif
+
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
 /*
  * Gcc with -pg will put the following code in the beginning of each function:
  *      mov x0, x30
@@ -160,11 +287,6 @@ GLOBAL(ftrace_graph_call)		// ftrace_graph_caller();
 
 	mcount_exit
 ENDPROC(ftrace_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-ENTRY(ftrace_stub)
-	ret
-ENDPROC(ftrace_stub)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
@@ -184,7 +306,15 @@ ENTRY(ftrace_graph_caller)
 
 	mcount_exit
 ENDPROC(ftrace_graph_caller)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+ENTRY(ftrace_stub)
+	ret
+ENDPROC(ftrace_stub)
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
  * void return_to_handler(void)
  *
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 822718eafdb4..aea652c33a38 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -62,6 +62,19 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 	return ftrace_modify_code(pc, 0, new, false);
 }
 
+#ifdef CONFIG_ARM64_MODULE_PLTS
+static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
+{
+	struct plt_entry *plt = mod->arch.ftrace_trampolines;
+
+	if (addr == FTRACE_ADDR)
+		return &plt[FTRACE_PLT_IDX];
+	if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
+		return &plt[FTRACE_REGS_PLT_IDX];
+	return NULL;
+}
+#endif
+
 /*
  * Turn on the call to ftrace_caller() in instrumented function
  */
@@ -74,19 +87,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	if (offset < -SZ_128M || offset >= SZ_128M) {
 #ifdef CONFIG_ARM64_MODULE_PLTS
 		struct module *mod;
-
-		/*
-		 * There is only one ftrace trampoline per module. For now,
-		 * this is not a problem since on arm64, all dynamic ftrace
-		 * invocations are routed via ftrace_caller(). This will need
-		 * to be revisited if support for multiple ftrace entry points
-		 * is added in the future, but for now, the pr_err() below
-		 * deals with a theoretical issue only.
-		 */
-		if (addr != FTRACE_ADDR) {
-			pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
-			return -EINVAL;
-		}
+		struct plt_entry *plt;
 
 		/*
 		 * On kernels that support module PLTs, the offset between the
@@ -105,7 +106,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		if (WARN_ON(!mod))
 			return -EINVAL;
 
-		addr = (unsigned long)mod->arch.ftrace_trampoline;
+		plt = get_ftrace_plt(mod, addr);
+		if (!plt) {
+			pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
+			return -EINVAL;
+		}
+
+		addr = (unsigned long)plt;
 #else /* CONFIG_ARM64_MODULE_PLTS */
 		return -EINVAL;
 #endif /* CONFIG_ARM64_MODULE_PLTS */
@@ -117,6 +124,55 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	return ftrace_modify_code(pc, old, new, true);
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	u32 old, new;
+
+	old = aarch64_insn_gen_branch_imm(pc, old_addr,
+					  AARCH64_INSN_BRANCH_LINK);
+	new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * The compiler has inserted two NOPs before the regular function prologue.
+ * All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
+ * and x9-x18 are free for our use.
+ *
+ * At runtime we want to be able to swing a single NOP <-> BL to enable or
+ * disable the ftrace call. The BL requires us to save the original LR value,
+ * so here we insert a <MOV X9, LR> over the first NOP so the instructions
+ * before the regular prologue are:
+ *
+ * | Compiled | Disabled   | Enabled    |
+ * +----------+------------+------------+
+ * | NOP      | MOV X9, LR | MOV X9, LR |
+ * | NOP      | NOP        | BL <entry> |
+ *
+ * The LR value will be recovered by ftrace_regs_entry, and restored into LR
+ * before returning to the regular function prologue. When a function is not
+ * being traced, the MOV is not harmful given x9 is not live per the AAPCS.
+ *
+ * Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of
+ * the BL.
+ */
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+	unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
+	u32 old, new;
+
+	old = aarch64_insn_gen_nop();
+	new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
+					AARCH64_INSN_REG_LR,
+					AARCH64_INSN_VARIANT_64BIT);
+	return ftrace_modify_code(pc, old, new, true);
+}
+#endif
+
 /*
  * Turn off the call to ftrace_caller() in instrumented function
  */
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index b182442b87a3..65b08a74aec6 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/elf.h>
+#include <linux/ftrace.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sort.h>
@@ -330,7 +331,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		tramp->sh_type = SHT_NOBITS;
 		tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
 		tramp->sh_addralign = __alignof__(struct plt_entry);
-		tramp->sh_size = sizeof(struct plt_entry);
+		tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
 	}
 
 	return 0;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index d0692ecb99bb..1cd1a4d0ed30 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -486,21 +486,31 @@ static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
 	return NULL;
 }
 
+static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
+{
+	*plt = get_plt_entry(addr, plt);
+}
+
 static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
 				  const Elf_Shdr *sechdrs,
 				  struct module *mod)
 {
 #if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
 	const Elf_Shdr *s;
-	struct plt_entry *plt;
+	struct plt_entry *plts;
 
 	s = find_section(hdr, sechdrs, ".text.ftrace_trampoline");
 	if (!s)
 		return -ENOEXEC;
 
-	plt = (void *)s->sh_addr;
-	*plt = get_plt_entry(FTRACE_ADDR, plt);
-	mod->arch.ftrace_trampoline = plt;
+	plts = (void *)s->sh_addr;
+
+	__init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		__init_plt(&plts[FTRACE_REGS_PLT_IDX], FTRACE_REGS_ADDR);
+
+	mod->arch.ftrace_trampolines = plts;
 #endif
 	return 0;
 }
-- 
cgit v1.2.3


From 7f08ae53a7e3ac2a2f86175226ee19f0117d5b6c Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 21 Oct 2019 15:05:52 +0100
Subject: arm64: ftrace: minimize ifdeffery

Now that we no longer refer to mod->arch.ftrace_trampolines in the body
of ftrace_make_call(), we can use IS_ENABLED() rather than ifdeffery,
and make the code easier to follow. Likewise in ftrace_make_nop().

Let's do so.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/ftrace.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index aea652c33a38..8618faa82e6d 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -62,18 +62,18 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 	return ftrace_modify_code(pc, 0, new, false);
 }
 
-#ifdef CONFIG_ARM64_MODULE_PLTS
 static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
 {
+#ifdef CONFIG_ARM64_MODULE_PLTS
 	struct plt_entry *plt = mod->arch.ftrace_trampolines;
 
 	if (addr == FTRACE_ADDR)
 		return &plt[FTRACE_PLT_IDX];
 	if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
 		return &plt[FTRACE_REGS_PLT_IDX];
+#endif
 	return NULL;
 }
-#endif
 
 /*
  * Turn on the call to ftrace_caller() in instrumented function
@@ -85,10 +85,12 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	long offset = (long)pc - (long)addr;
 
 	if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
 		struct module *mod;
 		struct plt_entry *plt;
 
+		if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+			return -EINVAL;
+
 		/*
 		 * On kernels that support module PLTs, the offset between the
 		 * branch instruction and its target may legally exceed the
@@ -113,9 +115,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		}
 
 		addr = (unsigned long)plt;
-#else /* CONFIG_ARM64_MODULE_PLTS */
-		return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
 	}
 
 	old = aarch64_insn_gen_nop();
@@ -185,9 +184,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 	long offset = (long)pc - (long)addr;
 
 	if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
 		u32 replaced;
 
+		if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+			return -EINVAL;
+
 		/*
 		 * 'mod' is only set at module load time, but if we end up
 		 * dealing with an out-of-range condition, we can assume it
@@ -218,9 +219,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 			return -EINVAL;
 
 		validate = false;
-#else /* CONFIG_ARM64_MODULE_PLTS */
-		return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
 	} else {
 		old = aarch64_insn_gen_branch_imm(pc, addr,
 						  AARCH64_INSN_BRANCH_LINK);
-- 
cgit v1.2.3


From e8d255e4703a820bab46f856460f318a60d42ace Mon Sep 17 00:00:00 2001
From: "Ben Dooks (Codethink)" <ben.dooks@codethink.co.uk>
Date: Tue, 22 Oct 2019 13:50:06 +0100
Subject: xen: mm: include <xen/xen-ops.h> for missing declarations

Include <xen/xen-ops.h> for xen_{create,destroy}_contigous_region
call declarations. Fixes the following sparse warnings:

arch/arm/xen/mm.c:119:5: warning: symbol 'xen_create_contiguous_region' was not declared. Should it be static?
arch/arm/xen/mm.c:131:6: warning: symbol 'xen_destroy_contiguous_region' was not declared. Should it be static?

Signed-off-by: Ben Dooks (Codethink) <ben.dooks@codethink.co.uk>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/arm/xen/mm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 38fa917c8585..6feb6b78b13c 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -15,6 +15,7 @@
 #include <xen/interface/grant_table.h>
 #include <xen/interface/memory.h>
 #include <xen/page.h>
+#include <xen/xen-ops.h>
 #include <xen/swiotlb-xen.h>
 
 #include <asm/cacheflush.h>
-- 
cgit v1.2.3


From 88920ddebd2f4afe67eb39a9fe3661275e6e3e2d Mon Sep 17 00:00:00 2001
From: "Ben Dooks (Codethink)" <ben.dooks@codethink.co.uk>
Date: Tue, 22 Oct 2019 13:52:51 +0100
Subject: xen: mm: make xen_mm_init static

The xen_mm_init is not exported or used outside of the file
it is declared in, so make it static. This fixes the following
sparse warning:

arch/arm/xen/mm.c:136:12: warning: symbol 'xen_mm_init' was not declared. Should it be static?

Signed-off-by: Ben Dooks (Codethink) <ben.dooks@codethink.co.uk>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/arm/xen/mm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 6feb6b78b13c..3c7645d7b9b4 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -134,7 +134,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 	return;
 }
 
-int __init xen_mm_init(void)
+static int __init xen_mm_init(void)
 {
 	struct gnttab_cache_flush cflush;
 	if (!xen_initial_domain())
-- 
cgit v1.2.3


From bff3b04460a80f425442fe8e5c6ee8c3ebef611f Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Date: Thu, 7 Nov 2019 10:56:11 +0100
Subject: arm64: mm: reserve CMA and crashkernel in ZONE_DMA32

With the introduction of ZONE_DMA in arm64 we moved the default CMA and
crashkernel reservation into that area. This caused a regression on big
machines that need big CMA and crashkernel reservations. Note that
ZONE_DMA is only 1GB big.

Restore the previous behavior as the wide majority of devices are OK
with reserving these in ZONE_DMA32. The ones that need them in ZONE_DMA
will configure it explicitly.

Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32")
Reported-by: Qian Cai <cai@lca.pw>
Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 35f27b839101..d933589c48e8 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -91,7 +91,7 @@ static void __init reserve_crashkernel(void)
 
 	if (crash_base == 0) {
 		/* Current arm64 boot protocol requires 2MB alignment */
-		crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+		crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit,
 				crash_size, SZ_2M);
 		if (crash_base == 0) {
 			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@@ -459,7 +459,7 @@ void __init arm64_memblock_init(void)
 
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
 
-	dma_contiguous_reserve(arm64_dma_phys_limit ? : arm64_dma32_phys_limit);
+	dma_contiguous_reserve(arm64_dma32_phys_limit);
 }
 
 void __init bootmem_init(void)
-- 
cgit v1.2.3


From e9f37e0900585bad631fbadbc2bee9af61ba0d0d Mon Sep 17 00:00:00 2001
From: Joe Lawrence <joe.lawrence@redhat.com>
Date: Wed, 6 Nov 2019 22:29:58 -0500
Subject: x86/stacktrace: update kconfig help text for reliable unwinders

commit 6415b38bae26 ("x86/stacktrace: Enable HAVE_RELIABLE_STACKTRACE
for the ORC unwinder") added the ORC unwinder as a "reliable" unwinder.
Update the help text to reflect that change: the frame pointer unwinder
is no longer the only one that can provide HAVE_RELIABLE_STACKTRACE.

Link: http://lkml.kernel.org/r/20191107032958.14034-1-joe.lawrence@redhat.com
To: linux-kernel@vger.kernel.org
To: live-patching@vger.kernel.org
Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 arch/x86/Kconfig.debug | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index bf9cd83de777..409c00f74e60 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -316,10 +316,6 @@ config UNWINDER_FRAME_POINTER
 	  unwinder, but the kernel text size will grow by ~3% and the kernel's
 	  overall performance will degrade by roughly 5-10%.
 
-	  This option is recommended if you want to use the livepatch
-	  consistency model, as this is currently the only way to get a
-	  reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
-
 config UNWINDER_GUESS
 	bool "Guess unwinder"
 	depends on EXPERT
-- 
cgit v1.2.3


From 166f11d11f6f70439830d09bfa5552ec1b368494 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 7 Nov 2019 15:18:38 +0100
Subject: s390/bpf: Use kvcalloc for addrs array

A BPF program may consist of 1m instructions, which means JIT
instruction-address mapping can be as large as 4m. s390 has
FORCE_MAX_ZONEORDER=9 (for memory hotplug reasons), which means maximum
kmalloc size is 1m. This makes it impossible to JIT programs with more
than 256k instructions.

Fix by using kvcalloc, which falls back to vmalloc for larger
allocations. An alternative would be to use a radix tree, but that is
not supported by bpf_prog_fill_jited_linfo.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20191107141838.92202-1-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index ce88211b9c6c..c8c16b5eed6b 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -23,6 +23,7 @@
 #include <linux/filter.h>
 #include <linux/init.h>
 #include <linux/bpf.h>
+#include <linux/mm.h>
 #include <asm/cacheflush.h>
 #include <asm/dis.h>
 #include <asm/facility.h>
@@ -1369,7 +1370,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	}
 
 	memset(&jit, 0, sizeof(jit));
-	jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+	jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
 	if (jit.addrs == NULL) {
 		fp = orig_fp;
 		goto out;
@@ -1422,7 +1423,7 @@ skip_init_ctx:
 	if (!fp->is_func || extra_pass) {
 		bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
 free_addrs:
-		kfree(jit.addrs);
+		kvfree(jit.addrs);
 		kfree(jit_data);
 		fp->aux->jit_data = NULL;
 	}
-- 
cgit v1.2.3


From 6ad2e1a00729f9a27b80f8d9962520b89420280d Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 7 Nov 2019 12:32:11 +0100
Subject: s390/bpf: Wrap JIT macro parameter usages in parentheses

This change does not alter JIT behavior; it only makes it possible to
safely invoke JIT macros with complex arguments in the future.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20191107113211.90105-1-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 62 ++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index c8c16b5eed6b..35661c2b736e 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -132,13 +132,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define _EMIT2(op)						\
 ({								\
 	if (jit->prg_buf)					\
-		*(u16 *) (jit->prg_buf + jit->prg) = op;	\
+		*(u16 *) (jit->prg_buf + jit->prg) = (op);	\
 	jit->prg += 2;						\
 })
 
 #define EMIT2(op, b1, b2)					\
 ({								\
-	_EMIT2(op | reg(b1, b2));				\
+	_EMIT2((op) | reg(b1, b2));				\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
@@ -146,20 +146,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define _EMIT4(op)						\
 ({								\
 	if (jit->prg_buf)					\
-		*(u32 *) (jit->prg_buf + jit->prg) = op;	\
+		*(u32 *) (jit->prg_buf + jit->prg) = (op);	\
 	jit->prg += 4;						\
 })
 
 #define EMIT4(op, b1, b2)					\
 ({								\
-	_EMIT4(op | reg(b1, b2));				\
+	_EMIT4((op) | reg(b1, b2));				\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
 
 #define EMIT4_RRF(op, b1, b2, b3)				\
 ({								\
-	_EMIT4(op | reg_high(b3) << 8 | reg(b1, b2));		\
+	_EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2));		\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 	REG_SET_SEEN(b3);					\
@@ -168,13 +168,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define _EMIT4_DISP(op, disp)					\
 ({								\
 	unsigned int __disp = (disp) & 0xfff;			\
-	_EMIT4(op | __disp);					\
+	_EMIT4((op) | __disp);					\
 })
 
 #define EMIT4_DISP(op, b1, b2, disp)				\
 ({								\
-	_EMIT4_DISP(op | reg_high(b1) << 16 |			\
-		    reg_high(b2) << 8, disp);			\
+	_EMIT4_DISP((op) | reg_high(b1) << 16 |			\
+		    reg_high(b2) << 8, (disp));			\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
@@ -182,21 +182,21 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define EMIT4_IMM(op, b1, imm)					\
 ({								\
 	unsigned int __imm = (imm) & 0xffff;			\
-	_EMIT4(op | reg_high(b1) << 16 | __imm);		\
+	_EMIT4((op) | reg_high(b1) << 16 | __imm);		\
 	REG_SET_SEEN(b1);					\
 })
 
 #define EMIT4_PCREL(op, pcrel)					\
 ({								\
 	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
-	_EMIT4(op | __pcrel);					\
+	_EMIT4((op) | __pcrel);					\
 })
 
 #define _EMIT6(op1, op2)					\
 ({								\
 	if (jit->prg_buf) {					\
-		*(u32 *) (jit->prg_buf + jit->prg) = op1;	\
-		*(u16 *) (jit->prg_buf + jit->prg + 4) = op2;	\
+		*(u32 *) (jit->prg_buf + jit->prg) = (op1);	\
+		*(u16 *) (jit->prg_buf + jit->prg + 4) = (op2);	\
 	}							\
 	jit->prg += 6;						\
 })
@@ -204,20 +204,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define _EMIT6_DISP(op1, op2, disp)				\
 ({								\
 	unsigned int __disp = (disp) & 0xfff;			\
-	_EMIT6(op1 | __disp, op2);				\
+	_EMIT6((op1) | __disp, op2);				\
 })
 
 #define _EMIT6_DISP_LH(op1, op2, disp)				\
 ({								\
-	u32 _disp = (u32) disp;					\
+	u32 _disp = (u32) (disp);				\
 	unsigned int __disp_h = _disp & 0xff000;		\
 	unsigned int __disp_l = _disp & 0x00fff;		\
-	_EMIT6(op1 | __disp_l, op2 | __disp_h >> 4);		\
+	_EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4);	\
 })
 
 #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
 ({								\
-	_EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 |		\
+	_EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 |		\
 		       reg_high(b3) << 8, op2, disp);		\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
@@ -227,8 +227,8 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask)	\
 ({								\
 	int rel = (jit->labels[label] - jit->prg) >> 1;		\
-	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff),	\
-	       op2 | mask << 12);				\
+	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
+	       (op2) | (mask) << 12);				\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
@@ -236,43 +236,43 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask)	\
 ({								\
 	int rel = (jit->labels[label] - jit->prg) >> 1;		\
-	_EMIT6(op1 | (reg_high(b1) | mask) << 16 |		\
-		(rel & 0xffff), op2 | (imm & 0xff) << 8);	\
+	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
+		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
 	REG_SET_SEEN(b1);					\
-	BUILD_BUG_ON(((unsigned long) imm) > 0xff);		\
+	BUILD_BUG_ON(((unsigned long) (imm)) > 0xff);		\
 })
 
 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
 ({								\
 	/* Branch instruction needs 6 bytes */			\
-	int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
-	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask);	\
+	int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\
+	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
 
 #define EMIT6_PCREL_RILB(op, b, target)				\
 ({								\
-	int rel = (target - jit->prg) / 2;			\
-	_EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff);	\
+	int rel = ((target) - jit->prg) / 2;			\
+	_EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
 	REG_SET_SEEN(b);					\
 })
 
 #define EMIT6_PCREL_RIL(op, target)				\
 ({								\
-	int rel = (target - jit->prg) / 2;			\
-	_EMIT6(op | rel >> 16, rel & 0xffff);			\
+	int rel = ((target) - jit->prg) / 2;			\
+	_EMIT6((op) | rel >> 16, rel & 0xffff);			\
 })
 
 #define _EMIT6_IMM(op, imm)					\
 ({								\
 	unsigned int __imm = (imm);				\
-	_EMIT6(op | (__imm >> 16), __imm & 0xffff);		\
+	_EMIT6((op) | (__imm >> 16), __imm & 0xffff);		\
 })
 
 #define EMIT6_IMM(op, b1, imm)					\
 ({								\
-	_EMIT6_IMM(op | reg_high(b1) << 16, imm);		\
+	_EMIT6_IMM((op) | reg_high(b1) << 16, imm);		\
 	REG_SET_SEEN(b1);					\
 })
 
@@ -282,7 +282,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 	ret = jit->lit - jit->base_ip;				\
 	jit->seen |= SEEN_LITERAL;				\
 	if (jit->prg_buf)					\
-		*(u32 *) (jit->prg_buf + jit->lit) = (u32) val;	\
+		*(u32 *) (jit->prg_buf + jit->lit) = (u32) (val);\
 	jit->lit += 4;						\
 	ret;							\
 })
@@ -293,7 +293,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 	ret = jit->lit - jit->base_ip;				\
 	jit->seen |= SEEN_LITERAL;				\
 	if (jit->prg_buf)					\
-		*(u64 *) (jit->prg_buf + jit->lit) = (u64) val;	\
+		*(u64 *) (jit->prg_buf + jit->lit) = (u64) (val);\
 	jit->lit += 8;						\
 	ret;							\
 })
-- 
cgit v1.2.3


From dab2e9eb187cb53c951c0c556172a73ac7f0e834 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 7 Nov 2019 12:40:33 +0100
Subject: s390/bpf: Remove unused SEEN_RET0, SEEN_REG_AX and ret0_ip

We don't need them since commit e1cf4befa297 ("bpf, s390x: remove
ld_abs/ld_ind") and commit a3212b8f15d8 ("bpf, s390x: remove obsolete
exception handling from div/mod").

Also, use BIT(n) instead of 1 << n, because checkpatch says so.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20191107114033.90505-1-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 35661c2b736e..1115071c8ff7 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -42,7 +42,6 @@ struct bpf_jit {
 	int lit_start;		/* Start of literal pool */
 	int lit;		/* Current position in literal pool */
 	int base_ip;		/* Base address for literal pool */
-	int ret0_ip;		/* Address of return 0 */
 	int exit_ip;		/* Address of exit */
 	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
 	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
@@ -52,12 +51,10 @@ struct bpf_jit {
 
 #define BPF_SIZE_MAX	0xffff	/* Max size for program (16 bit branches) */
 
-#define SEEN_MEM	(1 << 0)	/* use mem[] for temporary storage */
-#define SEEN_RET0	(1 << 1)	/* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL	(1 << 2)	/* code uses literals */
-#define SEEN_FUNC	(1 << 3)	/* calls C functions */
-#define SEEN_TAIL_CALL	(1 << 4)	/* code uses tail calls */
-#define SEEN_REG_AX	(1 << 5)	/* code uses constant blinding */
+#define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
+#define SEEN_LITERAL	BIT(1)		/* code uses literals */
+#define SEEN_FUNC	BIT(2)		/* calls C functions */
+#define SEEN_TAIL_CALL	BIT(3)		/* code uses tail calls */
 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
 
 /*
@@ -447,12 +444,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
  */
 static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
 {
-	/* Return 0 */
-	if (jit->seen & SEEN_RET0) {
-		jit->ret0_ip = jit->prg;
-		/* lghi %b0,0 */
-		EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
-	}
 	jit->exit_ip = jit->prg;
 	/* Load exit code: lgr %r2,%b0 */
 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
@@ -515,8 +506,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	s16 off = insn->off;
 	unsigned int mask;
 
-	if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
-		jit->seen |= SEEN_REG_AX;
 	switch (insn->code) {
 	/*
 	 * BPF_MOV
@@ -1111,7 +1100,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		break;
 	case BPF_JMP | BPF_EXIT: /* return b0 */
 		last = (i == fp->len - 1) ? 1 : 0;
-		if (last && !(jit->seen & SEEN_RET0))
+		if (last)
 			break;
 		/* j <exit> */
 		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
-- 
cgit v1.2.3


From ef2e78ddadbb939ce79553b10dee0131d65d8f3e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 7 Nov 2019 16:04:12 +0000
Subject: KVM: arm64: Opportunistically turn off WFI trapping when using direct
 LPI injection

Just like we do for WFE trapping, it can be useful to turn off
WFI trapping when the physical CPU is not oversubscribed (that
is, the vcpu is the only runnable process on this CPU) *and*
that we're using direct injection of interrupts.

The conditions are reevaluated on each vcpu_load(), ensuring that
we don't switch to this mode on a busy system.

On a GICv4 system, this has the effect of reducing the generation
of doorbell interrupts to zero when the right conditions are
met, which is a huge improvement over the current situation
(where the doorbells are screaming if the CPU ever hits a
blocking WFI).

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Link: https://lore.kernel.org/r/20191107160412.30301-3-maz@kernel.org
---
 arch/arm/include/asm/kvm_emulate.h   | 4 ++--
 arch/arm64/include/asm/kvm_emulate.h | 9 +++++++--
 virt/kvm/arm/arm.c                   | 4 ++--
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 40002416efec..023c01cad2b1 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -95,12 +95,12 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
 	return (unsigned long *)&vcpu->arch.hcr;
 }
 
-static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr &= ~HCR_TWE;
 }
 
-static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr |= HCR_TWE;
 }
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 6e92f6c7b1e4..5a542d801f07 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -87,14 +87,19 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
 	return (unsigned long *)&vcpu->arch.hcr_el2;
 }
 
-static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr_el2 &= ~HCR_TWE;
+	if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count))
+		vcpu->arch.hcr_el2 &= ~HCR_TWI;
+	else
+		vcpu->arch.hcr_el2 |= HCR_TWI;
 }
 
-static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr_el2 |= HCR_TWE;
+	vcpu->arch.hcr_el2 |= HCR_TWI;
 }
 
 static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index bd2afcf9a13f..dac96e355f69 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -386,9 +386,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	kvm_vcpu_pmu_restore_guest(vcpu);
 
 	if (single_task_running())
-		vcpu_clear_wfe_traps(vcpu);
+		vcpu_clear_wfx_traps(vcpu);
 	else
-		vcpu_set_wfe_traps(vcpu);
+		vcpu_set_wfx_traps(vcpu);
 
 	vcpu_ptrauth_setup_lazy(vcpu);
 }
-- 
cgit v1.2.3


From 294a9ddde6cdbf931a28b8c8c928d3f799b61cb5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 8 Nov 2019 17:12:43 +0000
Subject: arm64: kaslr: Announce KASLR status on boot

Currently the KASLR code is silent at boot unless it forces on KPTI in
which case a message will be printed for that. This can lead to users
incorrectly believing their system has the feature enabled when it in
fact does not, and if they notice the problem the lack of any
diagnostics makes it harder to understand the problem. Add an initcall
which prints a message showing the status of KASLR during boot to make
the status clear.

This is particularly useful in cases where we don't have a seed. It
seems to be a relatively common error for system integrators and
administrators to enable KASLR in their configuration but not provide
the seed at runtime, often due to seed provisioning breaking at some
later point after it is initially enabled and verified.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/kaslr.c | 41 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 416f537bf614..0039dc50e556 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -19,6 +19,14 @@
 #include <asm/pgtable.h>
 #include <asm/sections.h>
 
+enum kaslr_status {
+	KASLR_ENABLED,
+	KASLR_DISABLED_CMDLINE,
+	KASLR_DISABLED_NO_SEED,
+	KASLR_DISABLED_FDT_REMAP,
+};
+
+enum kaslr_status __ro_after_init kaslr_status;
 u64 __ro_after_init module_alloc_base;
 u16 __initdata memstart_offset_seed;
 
@@ -91,15 +99,19 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	 */
 	early_fixmap_init();
 	fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
-	if (!fdt)
+	if (!fdt) {
+		kaslr_status = KASLR_DISABLED_FDT_REMAP;
 		return 0;
+	}
 
 	/*
 	 * Retrieve (and wipe) the seed from the FDT
 	 */
 	seed = get_kaslr_seed(fdt);
-	if (!seed)
+	if (!seed) {
+		kaslr_status = KASLR_DISABLED_NO_SEED;
 		return 0;
+	}
 
 	/*
 	 * Check if 'nokaslr' appears on the command line, and
@@ -107,8 +119,10 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	 */
 	cmdline = kaslr_get_cmdline(fdt);
 	str = strstr(cmdline, "nokaslr");
-	if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+	if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) {
+		kaslr_status = KASLR_DISABLED_CMDLINE;
 		return 0;
+	}
 
 	/*
 	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
@@ -170,3 +184,24 @@ u64 __init kaslr_early_init(u64 dt_phys)
 
 	return offset;
 }
+
+static int __init kaslr_init(void)
+{
+	switch (kaslr_status) {
+	case KASLR_ENABLED:
+		pr_info("KASLR enabled\n");
+		break;
+	case KASLR_DISABLED_CMDLINE:
+		pr_info("KASLR disabled on command line\n");
+		break;
+	case KASLR_DISABLED_NO_SEED:
+		pr_warn("KASLR disabled due to lack of seed\n");
+		break;
+	case KASLR_DISABLED_FDT_REMAP:
+		pr_warn("KASLR disabled due to FDT remapping failure\n");
+		break;
+	}
+
+	return 0;
+}
+core_initcall(kaslr_init)
-- 
cgit v1.2.3


From 2203e1adb936a92ab2fd8f705e888af322462736 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 8 Nov 2019 17:12:44 +0000
Subject: arm64: kaslr: Check command line before looking for a seed

Now that we print diagnostics at boot the reason why we do not initialise
KASLR matters. Currently we check for a seed before we check if the user
has explicitly disabled KASLR on the command line which will result in
misleading diagnostics so reverse the order of those checks. We still
parse the seed from the DT early so that if the user has both provided a
seed and disabled KASLR on the command line we still mask the seed on
the command line.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/kaslr.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 0039dc50e556..2a11a962e571 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -26,7 +26,7 @@ enum kaslr_status {
 	KASLR_DISABLED_FDT_REMAP,
 };
 
-enum kaslr_status __ro_after_init kaslr_status;
+static enum kaslr_status __initdata kaslr_status;
 u64 __ro_after_init module_alloc_base;
 u16 __initdata memstart_offset_seed;
 
@@ -108,10 +108,6 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	 * Retrieve (and wipe) the seed from the FDT
 	 */
 	seed = get_kaslr_seed(fdt);
-	if (!seed) {
-		kaslr_status = KASLR_DISABLED_NO_SEED;
-		return 0;
-	}
 
 	/*
 	 * Check if 'nokaslr' appears on the command line, and
@@ -124,6 +120,11 @@ u64 __init kaslr_early_init(u64 dt_phys)
 		return 0;
 	}
 
+	if (!seed) {
+		kaslr_status = KASLR_DISABLED_NO_SEED;
+		return 0;
+	}
+
 	/*
 	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
 	 * kernel image offset from the seed. Let's place the kernel in the
-- 
cgit v1.2.3


From 7f7f0d9c0bcbed864551012e4eb88a631fd376f9 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Fri, 25 Oct 2019 18:54:34 +0800
Subject: KVM: x86: get rid of odd out jump label in pdptrs_changed

The odd out jump label is really not needed. Get rid of
it by return true directly while r < 0 as suggested by
Paolo. This further lead to var changed being unused.
Remove it too.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 89621025577a..8b3dcaa7985a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -721,7 +721,6 @@ EXPORT_SYMBOL_GPL(load_pdptrs);
 bool pdptrs_changed(struct kvm_vcpu *vcpu)
 {
 	u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
-	bool changed = true;
 	int offset;
 	gfn_t gfn;
 	int r;
@@ -737,11 +736,9 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
 	r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
 				       PFERR_USER_MASK | PFERR_WRITE_MASK);
 	if (r < 0)
-		goto out;
-	changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
-out:
+		return true;
 
-	return changed;
+	return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
 }
 EXPORT_SYMBOL_GPL(pdptrs_changed);
 
-- 
cgit v1.2.3


From f70c08e46d55126efa6d23e149f8472c178686f2 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Mon, 11 Nov 2019 09:59:56 +0100
Subject: arm64: Kconfig: make CMDLINE_FORCE depend on CMDLINE

When building allmodconfig KCONFIG_ALLCONFIG=$(pwd)/arch/arm64/configs/defconfig
CONFIG_CMDLINE_FORCE gets enabled. Which forces the user to pass the
full cmdline to CONFIG_CMDLINE="...".

Rework so that CONFIG_CMDLINE_FORCE gets set only if CONFIG_CMDLINE is
set to something except an empty string.

Suggested-by: John Garry <john.garry@huawei.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3e53441ee067..8a0800e5be9d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1629,6 +1629,7 @@ config CMDLINE
 
 config CMDLINE_FORCE
 	bool "Always use the default kernel command string"
+	depends on CMDLINE != ""
 	help
 	  Always use the default kernel command string, even if the boot
 	  loader passes other arguments to the kernel.
-- 
cgit v1.2.3


From b2afb64cccd243afd8a4337d8ee4c2f2afbe991d Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Mon, 4 Nov 2019 14:11:20 +0800
Subject: MIPS: Loongson: Rename LOONGSON1 to LOONGSON32

Now old Loongson-2E/2F use LOONGSON2EF and will be removed in future,
newer Loongson-2/3 use LOONGSON64. So rename LOONGSON1 to LOONGSON32
will make the naming style more unified.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
[paulburton@kernel.org: Fix checkpatch whitespace warning in irqflags.h]
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: linux-mips@vger.kernel.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhuacai@gmail.com>
---
 arch/mips/Kconfig                    | 6 +++---
 arch/mips/include/asm/cpu-type.h     | 2 +-
 arch/mips/include/asm/cpu.h          | 2 +-
 arch/mips/include/asm/irqflags.h     | 2 +-
 arch/mips/include/asm/module.h       | 4 ++--
 arch/mips/kernel/cpu-probe.c         | 2 +-
 arch/mips/kernel/idle.c              | 2 +-
 arch/mips/kernel/perf_event_mipsxx.c | 2 +-
 arch/mips/kernel/traps.c             | 2 +-
 arch/mips/loongson32/Kconfig         | 2 +-
 arch/mips/loongson32/Platform        | 4 ++--
 arch/mips/oprofile/common.c          | 2 +-
 arch/mips/oprofile/op_model_mipsxx.c | 2 +-
 13 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 7cb894776f44..02b869df8ef8 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1511,7 +1511,7 @@ config CPU_LOONGSON2F
 config CPU_LOONGSON1B
 	bool "Loongson 1B"
 	depends on SYS_HAS_CPU_LOONGSON1B
-	select CPU_LOONGSON1
+	select CPU_LOONGSON32
 	select LEDS_GPIO_REGISTER
 	help
 	  The Loongson 1B is a 32-bit SoC, which implements the MIPS32
@@ -1521,7 +1521,7 @@ config CPU_LOONGSON1B
 config CPU_LOONGSON1C
 	bool "Loongson 1C"
 	depends on SYS_HAS_CPU_LOONGSON1C
-	select CPU_LOONGSON1
+	select CPU_LOONGSON32
 	select LEDS_GPIO_REGISTER
 	help
 	  The Loongson 1C is a 32-bit SoC, which implements the MIPS32
@@ -1920,7 +1920,7 @@ config CPU_LOONGSON2EF
 	select ARCH_HAS_PHYS_TO_DMA
 	select CPU_HAS_LOAD_STORE_LR
 
-config CPU_LOONGSON1
+config CPU_LOONGSON32
 	bool
 	select CPU_MIPS32
 	select CPU_MIPSR2
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index 5117e9119b87..c46c59b0f1b4 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -25,7 +25,7 @@ static inline int __pure __get_cpu_type(const int cpu_type)
 
 #if defined(CONFIG_SYS_HAS_CPU_LOONGSON1B) || \
     defined(CONFIG_SYS_HAS_CPU_LOONGSON1C)
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 #endif
 
 #ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 0e3a8d4de09d..ea830783d663 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -312,7 +312,7 @@ enum cpu_type_enum {
 	 */
 	CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,
 	CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350,
-	CPU_BMIPS4380, CPU_BMIPS5000, CPU_XBURST, CPU_LOONGSON1, CPU_M14KC,
+	CPU_BMIPS4380, CPU_BMIPS5000, CPU_XBURST, CPU_LOONGSON32, CPU_M14KC,
 	CPU_M14KEC, CPU_INTERAPTIV, CPU_P5600, CPU_PROAPTIV, CPU_1074K,
 	CPU_M5150, CPU_I6400, CPU_P6600, CPU_M6250,
 
diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index 4d742acf2be0..c4728bbdf15b 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h
@@ -41,7 +41,7 @@ static inline unsigned long arch_local_irq_save(void)
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
-#if defined(CONFIG_CPU_LOONGSON64) || defined (CONFIG_CPU_LOONGSON1)
+#if defined(CONFIG_CPU_LOONGSON64) || defined(CONFIG_CPU_LOONGSON32)
 	"	mfc0	%[flags], $12					\n"
 	"	di							\n"
 #else
diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index 9fe9515204d6..9846047b3d3d 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -119,8 +119,8 @@ search_module_dbetables(unsigned long addr)
 #define MODULE_PROC_FAMILY "RM7000 "
 #elif defined CONFIG_CPU_SB1
 #define MODULE_PROC_FAMILY "SB1 "
-#elif defined CONFIG_CPU_LOONGSON1
-#define MODULE_PROC_FAMILY "LOONGSON1 "
+#elif defined CONFIG_CPU_LOONGSON32
+#define MODULE_PROC_FAMILY "LOONGSON32 "
 #elif defined CONFIG_CPU_LOONGSON2EF
 #define MODULE_PROC_FAMILY "LOONGSON2EF "
 #elif defined CONFIG_CPU_LOONGSON64
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index c849991548bb..105d89caf256 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1571,7 +1571,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_LOONGSON_32:  /* Loongson-1 */
 		decode_configs(c);
 
-		c->cputype = CPU_LOONGSON1;
+		c->cputype = CPU_LOONGSON32;
 
 		switch (c->processor_id & PRID_REV_MASK) {
 		case PRID_REV_LOONGSON1B:
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 57dfa6c9edc5..37f8e78e2869 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -173,7 +173,7 @@ void __init check_wait(void)
 	case CPU_CAVIUM_OCTEON2:
 	case CPU_CAVIUM_OCTEON3:
 	case CPU_XBURST:
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 	case CPU_XLR:
 	case CPU_XLP:
 		cpu_wait = r4k_wait;
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 0af456a94916..128fc9999c56 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1764,7 +1764,7 @@ init_hw_perf_events(void)
 		mipspmu.general_event_map = &mipsxxcore_event_map;
 		mipspmu.cache_event_map = &mipsxxcore_cache_map;
 		break;
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 		mipspmu.name = "mips/loongson1";
 		mipspmu.general_event_map = &mipsxxcore_event_map;
 		mipspmu.cache_event_map = &mipsxxcore_cache_map;
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 0c2570e6fcf6..83f2a437d9e2 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1761,7 +1761,7 @@ static inline void parity_protection_init(void)
 
 	case CPU_5KC:
 	case CPU_5KE:
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 		write_c0_ecc(0x80000000);
 		back_to_back_c0_hazard();
 		/* Set the PE bit (bit 31) in the c0_errctl register. */
diff --git a/arch/mips/loongson32/Kconfig b/arch/mips/loongson32/Kconfig
index 6dacc1438906..e27879b4813b 100644
--- a/arch/mips/loongson32/Kconfig
+++ b/arch/mips/loongson32/Kconfig
@@ -38,7 +38,7 @@ endchoice
 menuconfig CEVT_CSRC_LS1X
 	bool "Use PWM Timer for clockevent/clocksource"
 	select MIPS_EXTERNAL_TIMER
-	depends on CPU_LOONGSON1
+	depends on CPU_LOONGSON32
 	help
 	  This option changes the default clockevent/clocksource to PWM Timer,
 	  and is required by Loongson1 CPUFreq support.
diff --git a/arch/mips/loongson32/Platform b/arch/mips/loongson32/Platform
index 333215593092..7f8e342f1ef5 100644
--- a/arch/mips/loongson32/Platform
+++ b/arch/mips/loongson32/Platform
@@ -1,4 +1,4 @@
-cflags-$(CONFIG_CPU_LOONGSON1)		+= -march=mips32r2 -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON32)		+= -march=mips32r2 -Wa,--trap
 platform-$(CONFIG_MACH_LOONGSON32)	+= loongson32/
 cflags-$(CONFIG_MACH_LOONGSON32)	+= -I$(srctree)/arch/mips/include/asm/mach-loongson32
-load-$(CONFIG_CPU_LOONGSON1)		+= 0xffffffff80200000
+load-$(CONFIG_CPU_LOONGSON32)		+= 0xffffffff80200000
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index 25cfa70f0ae4..03db268cba5c 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -93,7 +93,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	case CPU_P5600:
 	case CPU_I6400:
 	case CPU_M5150:
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 	case CPU_SB1:
 	case CPU_SB1A:
 	case CPU_R10000:
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index 96c13a0ab078..a537bf98912c 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -420,7 +420,7 @@ static int __init mipsxx_init(void)
 		op_model_mipsxx_ops.cpu_type = "mips/sb1";
 		break;
 
-	case CPU_LOONGSON1:
+	case CPU_LOONGSON32:
 		op_model_mipsxx_ops.cpu_type = "mips/loongson1";
 		break;
 
-- 
cgit v1.2.3


From caed1d1b20cbf7ecf7e8be629fd593c96c8ff2d2 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Mon, 4 Nov 2019 14:11:21 +0800
Subject: MIPS: Loongson: Unify LOONGSON3/LOONGSON64 Kconfig usage

There are mixed LOONGSON3/LOONGSON64 usages in recently changes, let's
establish some rules:

1, In Kconfig symbols, we only use CPU_LOONGSON64, MACH_LOONGSON64 and
SYS_HAS_CPU_LOONGSON64, all other derived symbols use "LOONGSON3" since
they all not widely-used symbols and sometimes not suitable for all
64-bit Loongson processors. E.g., we use symbols LOONGSON3_ENHANCEMENT,
CPU_LOONGSON3_WORKAROUNDS, etc.

2, Hide GSx64/GSx64E in Kconfig title since it is not useful for
general users. However, in the full description we use a more detailed
manner. E.g., GS264/GS464/GS464E/GS464V.

All Kconfig titles and descriptions of Loongson processors and machines
have also been updated in this patch for consistency.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@linux-mips.org
Cc: linux-mips@vger.kernel.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhuacai@gmail.com>
---
 arch/mips/Kconfig               | 35 +++++++++++++++++++++--------------
 arch/mips/include/asm/hazards.h |  4 ++--
 arch/mips/loongson64/Kconfig    |  2 +-
 3 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 02b869df8ef8..783111156a37 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -444,7 +444,7 @@ config LASAT
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 
 config MACH_LOONGSON32
-	bool "Loongson-1 family of machines"
+	bool "Loongson 32-bit family of machines"
 	select SYS_SUPPORTS_ZBOOT
 	help
 	  This enables support for the Loongson-1 family of machines.
@@ -460,7 +460,7 @@ config MACH_LOONGSON2EF
 	  This enables the support of early Loongson-2E/F family of machines.
 
 config MACH_LOONGSON64
-	bool "Loongson-2/3 GSx64 family of machines"
+	bool "Loongson 64-bit family of machines"
 	select ARCH_SPARSEMEM_ENABLE
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
@@ -489,8 +489,12 @@ config MACH_LOONGSON64
 	select ZONE_DMA32
 	select NUMA
 	help
-	  This enables the support of Loongson-2/3 family of processors with
-	  GSx64 microarchitecture.
+	  This enables the support of Loongson-2/3 family of machines.
+
+	  Loongson-2 and Loongson-3 are 64-bit general-purpose processors with
+	  GS264/GS464/GS464E/GS464V microarchitecture (except old Loongson-2E
+	  and Loongson-2F which will be removed), developed by the Institute
+	  of Computing Technology (ICT), Chinese Academy of Sciences (CAS).
 
 config MACH_PISTACHIO
 	bool "IMG Pistachio SoC based boards"
@@ -1432,7 +1436,7 @@ choice
 	default CPU_R4X00
 
 config CPU_LOONGSON64
-	bool "Loongson GSx64 CPU"
+	bool "Loongson 64-bit CPU"
 	depends on SYS_HAS_CPU_LOONGSON64
 	select ARCH_HAS_PHYS_TO_DMA
 	select CPU_SUPPORTS_64BIT_KERNEL
@@ -1448,17 +1452,20 @@ config CPU_LOONGSON64
 	select GPIOLIB
 	select SWIOTLB
 	help
-		The Loongson GSx64 series of processor cores implements the
-		MIPS64R2 instruction set with many extensions.
+		The Loongson GSx64(GS264/GS464/GS464E/GS464V) series of processor
+		cores implements the MIPS64R2 instruction set with many extensions,
+		including most 64-bit Loongson-2 (2H, 2K) and Loongson-3 (3A1000,
+		3B1000, 3B1500, 3A2000, 3A3000 and 3A4000) processors. However, old
+		Loongson-2E/2F is not covered here and will be removed in future.
 
-config LOONGSON64_ENHANCEMENT
-	bool "New Loongson GSx64E CPU Enhancements"
+config LOONGSON3_ENHANCEMENT
+	bool "New Loongson-3 CPU Enhancements"
 	default n
 	select CPU_MIPSR2
 	select CPU_HAS_PREFETCH
 	depends on CPU_LOONGSON64
 	help
-	  New Loongson GSx64E cores (since Loongson-3A R2, as opposed to Loongson-3A
+	  New Loongson-3 cores (since Loongson-3A R2, as opposed to Loongson-3A
 	  R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as
 	  FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPr2 ASE, User
 	  Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer),
@@ -1467,17 +1474,17 @@ config LOONGSON64_ENHANCEMENT
 	  This option enable those enhancements which are not probed at run
 	  time. If you want a generic kernel to run on all Loongson 3 machines,
 	  please say 'N' here. If you want a high-performance kernel to run on
-	  new Loongson 3 machines only, please say 'Y' here.
+	  new Loongson-3 machines only, please say 'Y' here.
 
 config CPU_LOONGSON3_WORKAROUNDS
-	bool "Old Loongson 3 LLSC Workarounds"
+	bool "Old Loongson-3 LLSC Workarounds"
 	default y if SMP
 	depends on CPU_LOONGSON64
 	help
-	  Loongson 3 processors have the llsc issues which require workarounds.
+	  Loongson-3 processors have the llsc issues which require workarounds.
 	  Without workarounds the system may hang unexpectedly.
 
-	  Newer Loongson 3 will fix these issues and no workarounds are needed.
+	  Newer Loongson-3 will fix these issues and no workarounds are needed.
 	  The workarounds have no significant side effect on them but may
 	  decrease the performance of the system so this option should be
 	  disabled unless the kernel is intended to be run on old systems.
diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
index ea6a8c4b49f3..a4f48b0f5541 100644
--- a/arch/mips/include/asm/hazards.h
+++ b/arch/mips/include/asm/hazards.h
@@ -23,7 +23,7 @@
  * TLB hazards
  */
 #if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \
-	!defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON64_ENHANCEMENT)
+	!defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON3_ENHANCEMENT)
 
 /*
  * MIPSR2 defines ehb for hazard avoidance
@@ -158,7 +158,7 @@ do {									\
 } while (0)
 
 #elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \
-	defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_LOONGSON64_ENHANCEMENT) || \
+	defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \
 	defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR)
 
 /*
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index b1aefd06e3f5..48b29c198acf 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -3,7 +3,7 @@ if MACH_LOONGSON64
 
 config RS780_HPET
 	bool "RS780/SBX00 HPET Timer"
-	depends on CONFIG_MACH_LOONGSON64
+	depends on MACH_LOONGSON64
 	select MIPS_EXTERNAL_TIMER
 	help
 	  This option enables the hpet timer of AMD RS780/SBX00.
-- 
cgit v1.2.3


From 28e6b875fdbb17ef57cc4343d8825e0d5770f427 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Thu, 7 Nov 2019 12:01:14 +0800
Subject: MIPS: Drop pmon.h

There is no code still using pmon callvectors.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: chenhe@lemote.com
---
 arch/mips/include/asm/pmon.h | 46 --------------------------------------------
 arch/mips/kernel/smp-bmips.c |  1 -
 2 files changed, 47 deletions(-)
 delete mode 100644 arch/mips/include/asm/pmon.h

(limited to 'arch')

diff --git a/arch/mips/include/asm/pmon.h b/arch/mips/include/asm/pmon.h
deleted file mode 100644
index 6ad519189ce2..000000000000
--- a/arch/mips/include/asm/pmon.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2004 by Ralf Baechle
- *
- * The cpustart method is a PMC-Sierra's function to start the secondary CPU.
- * Stock PMON 2000 has the smpfork, semlock and semunlock methods instead.
- */
-#ifndef _ASM_PMON_H
-#define _ASM_PMON_H
-
-struct callvectors {
-	int	(*open) (char*, int, int);
-	int	(*close) (int);
-	int	(*read) (int, void*, int);
-	int	(*write) (int, void*, int);
-	off_t	(*lseek) (int, off_t, int);
-	int	(*printf) (const char*, ...);
-	void	(*cacheflush) (void);
-	char*	(*gets) (char*);
-	union {
-		int	(*smpfork) (unsigned long cp, char *sp);
-		int	(*cpustart) (long, void (*)(void), void *, long);
-	} _s;
-	int	(*semlock) (int sem);
-	void	(*semunlock) (int sem);
-};
-
-extern struct callvectors *debug_vectors;
-
-#define pmon_open(name, flags, mode)	debug_vectors->open(name, flage, mode)
-#define pmon_close(fd)			debug_vectors->close(fd)
-#define pmon_read(fd, buf, count)	debug_vectors->read(fd, buf, count)
-#define pmon_write(fd, buf, count)	debug_vectors->write(fd, buf, count)
-#define pmon_lseek(fd, off, whence)	debug_vectors->lseek(fd, off, whence)
-#define pmon_printf(fmt...)		debug_vectors->printf(fmt)
-#define pmon_cacheflush()		debug_vectors->cacheflush()
-#define pmon_gets(s)			debug_vectors->gets(s)
-#define pmon_cpustart(n, f, sp, gp)	debug_vectors->_s.cpustart(n, f, sp, gp)
-#define pmon_smpfork(cp, sp)		debug_vectors->_s.smpfork(cp, sp)
-#define pmon_semlock(sem)		debug_vectors->semlock(sem)
-#define pmon_semunlock(sem)		debug_vectors->semunlock(sem)
-
-#endif /* _ASM_PMON_H */
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 712c15de6ab9..9058e9dcf080 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -31,7 +31,6 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/bootinfo.h>
-#include <asm/pmon.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/mipsregs.h>
-- 
cgit v1.2.3


From 75cac781dca43e735fbb4166d994263a14f0823e Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Thu, 7 Nov 2019 12:01:15 +0800
Subject: MIPS: Loongson{2ef, 32, 64} convert to generic fw cmdline

All of Loongson firmwares are passing boot cmdline/env
in the manner of YAMON/PMON. Thus we can remove duplicated
cmdline initialize code and convert to generic fw method.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: chenhe@lemote.com
---
 arch/mips/include/asm/mach-loongson2ef/loongson.h |  1 -
 arch/mips/include/asm/mach-loongson32/prom.h      | 20 --------
 arch/mips/include/asm/mach-loongson64/loongson.h  |  1 -
 arch/mips/loongson2ef/common/Makefile             |  2 +-
 arch/mips/loongson2ef/common/cmdline.c            | 44 -----------------
 arch/mips/loongson2ef/common/env.c                | 26 ++--------
 arch/mips/loongson2ef/common/init.c               |  4 +-
 arch/mips/loongson32/common/prom.c                | 59 +++++------------------
 arch/mips/loongson32/common/setup.c               | 11 ++---
 arch/mips/loongson64/Makefile                     |  2 +-
 arch/mips/loongson64/cmdline.c                    | 42 ----------------
 arch/mips/loongson64/init.c                       |  3 +-
 12 files changed, 28 insertions(+), 187 deletions(-)
 delete mode 100644 arch/mips/include/asm/mach-loongson32/prom.h
 delete mode 100644 arch/mips/loongson2ef/common/cmdline.c
 delete mode 100644 arch/mips/loongson64/cmdline.c

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h
index 8ed460a64d10..622456539add 100644
--- a/arch/mips/include/asm/mach-loongson2ef/loongson.h
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h
@@ -25,7 +25,6 @@ extern const struct plat_smp_ops loongson3_smp_ops;
 
 /* loongson-specific command line, env and memory initialization */
 extern void __init prom_init_memory(void);
-extern void __init prom_init_cmdline(void);
 extern void __init prom_init_machtype(void);
 extern void __init prom_init_env(void);
 #ifdef CONFIG_LOONGSON_UART_BASE
diff --git a/arch/mips/include/asm/mach-loongson32/prom.h b/arch/mips/include/asm/mach-loongson32/prom.h
deleted file mode 100644
index cb789f18d790..000000000000
--- a/arch/mips/include/asm/mach-loongson32/prom.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
- */
-
-#ifndef __ASM_MACH_LOONGSON32_PROM_H
-#define __ASM_MACH_LOONGSON32_PROM_H
-
-#include <linux/io.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-
-/* environment arguments from bootloader */
-extern unsigned long memsize, highmemsize;
-
-/* loongson-specific command line, env and memory initialization */
-extern char *prom_getenv(char *name);
-extern void __init prom_init_cmdline(void);
-
-#endif /* __ASM_MACH_LOONGSON32_PROM_H */
diff --git a/arch/mips/include/asm/mach-loongson64/loongson.h b/arch/mips/include/asm/mach-loongson64/loongson.h
index bc00c2d88225..a8fce112a9b0 100644
--- a/arch/mips/include/asm/mach-loongson64/loongson.h
+++ b/arch/mips/include/asm/mach-loongson64/loongson.h
@@ -24,7 +24,6 @@ extern const struct plat_smp_ops loongson3_smp_ops;
 
 /* loongson-specific command line, env and memory initialization */
 extern void __init prom_init_memory(void);
-extern void __init prom_init_cmdline(void);
 extern void __init prom_init_env(void);
 
 /* irq operation functions */
diff --git a/arch/mips/loongson2ef/common/Makefile b/arch/mips/loongson2ef/common/Makefile
index 684624f61f5a..10dd009a20a0 100644
--- a/arch/mips/loongson2ef/common/Makefile
+++ b/arch/mips/loongson2ef/common/Makefile
@@ -3,7 +3,7 @@
 # Makefile for loongson based machines.
 #
 
-obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
+obj-y += setup.o init.o env.o time.o reset.o irq.o \
     bonito-irq.o mem.o machtype.o platform.o serial.o
 obj-$(CONFIG_PCI) += pci.o
 
diff --git a/arch/mips/loongson2ef/common/cmdline.c b/arch/mips/loongson2ef/common/cmdline.c
deleted file mode 100644
index a735460682cf..000000000000
--- a/arch/mips/loongson2ef/common/cmdline.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Based on Ocelot Linux port, which is
- * Copyright 2001 MontaVista Software Inc.
- * Author: jsun@mvista.com or jsun@junsun.net
- *
- * Copyright 2003 ICT CAS
- * Author: Michael Guo <guoyi@ict.ac.cn>
- *
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-void __init prom_init_cmdline(void)
-{
-	int prom_argc;
-	/* pmon passes arguments in 32bit pointers */
-	int *_prom_argv;
-	int i;
-	long l;
-
-	/* firmware arguments are initialized in head.S */
-	prom_argc = fw_arg0;
-	_prom_argv = (int *)fw_arg1;
-
-	/* arg[0] is "g", the rest is boot parameters */
-	arcs_cmdline[0] = '\0';
-	for (i = 1; i < prom_argc; i++) {
-		l = (long)_prom_argv[i];
-		if (strlen(arcs_cmdline) + strlen(((char *)l) + 1)
-		    >= sizeof(arcs_cmdline))
-			break;
-		strcat(arcs_cmdline, ((char *)l));
-		strcat(arcs_cmdline, " ");
-	}
-
-	prom_init_machtype();
-}
diff --git a/arch/mips/loongson2ef/common/env.c b/arch/mips/loongson2ef/common/env.c
index 29c5fecb8282..6f20bdf9b242 100644
--- a/arch/mips/loongson2ef/common/env.c
+++ b/arch/mips/loongson2ef/common/env.c
@@ -15,39 +15,21 @@
  */
 #include <linux/export.h>
 #include <asm/bootinfo.h>
+#include <asm/fw/fw.h>
 #include <loongson.h>
 
 u32 cpu_clock_freq;
 EXPORT_SYMBOL(cpu_clock_freq);
 
-unsigned long long smp_group[4];
-
-#define parse_even_earlier(res, option, p)				\
-do {									\
-	unsigned int tmp __maybe_unused;				\
-									\
-	if (strncmp(option, (char *)p, strlen(option)) == 0)		\
-		tmp = kstrtou32((char *)p + strlen(option"="), 10, &res); \
-} while (0)
-
 void __init prom_init_env(void)
 {
 	/* pmon passes arguments in 32bit pointers */
 	unsigned int processor_id;
-	int *_prom_envp;
-	long l;
 
-	/* firmware arguments are initialized in head.S */
-	_prom_envp = (int *)fw_arg2;
+	cpu_clock_freq = fw_getenvl("cpuclock");
+	memsize = fw_getenvl("memsize");
+	highmemsize = fw_getenvl("highmemsize");
 
-	l = (long)*_prom_envp;
-	while (l != 0) {
-		parse_even_earlier(cpu_clock_freq, "cpuclock", l);
-		parse_even_earlier(memsize, "memsize", l);
-		parse_even_earlier(highmemsize, "highmemsize", l);
-		_prom_envp++;
-		l = (long)*_prom_envp;
-	}
 	if (memsize == 0)
 		memsize = 256;
 
diff --git a/arch/mips/loongson2ef/common/init.c b/arch/mips/loongson2ef/common/init.c
index a45430365729..dab3ffda8b14 100644
--- a/arch/mips/loongson2ef/common/init.c
+++ b/arch/mips/loongson2ef/common/init.c
@@ -9,6 +9,7 @@
 #include <asm/traps.h>
 #include <asm/smp-ops.h>
 #include <asm/cacheflush.h>
+#include <asm/fw/fw.h>
 
 #include <loongson.h>
 
@@ -32,7 +33,8 @@ void __init prom_init(void)
 		ioremap(LOONGSON_ADDRWINCFG_BASE, LOONGSON_ADDRWINCFG_SIZE);
 #endif
 
-	prom_init_cmdline();
+	fw_init_cmdline();
+	prom_init_machtype();
 	prom_init_env();
 
 	/* init base address of io space */
diff --git a/arch/mips/loongson32/common/prom.c b/arch/mips/loongson32/common/prom.c
index c4e043ee53ff..73dd25142484 100644
--- a/arch/mips/loongson32/common/prom.c
+++ b/arch/mips/loongson32/common/prom.c
@@ -5,63 +5,25 @@
  * Modified from arch/mips/pnx833x/common/prom.c.
  */
 
+#include <linux/io.h>
+#include <linux/init.h>
 #include <linux/serial_reg.h>
 #include <asm/bootinfo.h>
+#include <asm/fw/fw.h>
 
 #include <loongson1.h>
-#include <prom.h>
 
-int prom_argc;
-char **prom_argv, **prom_envp;
-unsigned long memsize, highmemsize;
-
-char *prom_getenv(char *envname)
-{
-	char **env = prom_envp;
-	int i;
-
-	i = strlen(envname);
-
-	while (*env) {
-		if (strncmp(envname, *env, i) == 0 && *(*env + i) == '=')
-			return *env + i + 1;
-		env++;
-	}
-
-	return 0;
-}
-
-static inline unsigned long env_or_default(char *env, unsigned long dfl)
-{
-	char *str = prom_getenv(env);
-	return str ? simple_strtol(str, 0, 0) : dfl;
-}
-
-void __init prom_init_cmdline(void)
-{
-	char *c = &(arcs_cmdline[0]);
-	int i;
-
-	for (i = 1; i < prom_argc; i++) {
-		strcpy(c, prom_argv[i]);
-		c += strlen(prom_argv[i]);
-		if (i < prom_argc - 1)
-			*c++ = ' ';
-	}
-	*c = 0;
-}
+unsigned long memsize;
 
 void __init prom_init(void)
 {
 	void __iomem *uart_base;
-	prom_argc = fw_arg0;
-	prom_argv = (char **)fw_arg1;
-	prom_envp = (char **)fw_arg2;
 
-	prom_init_cmdline();
+	fw_init_cmdline();
 
-	memsize = env_or_default("memsize", DEFAULT_MEMSIZE);
-	highmemsize = env_or_default("highmemsize", 0x0);
+	memsize = fw_getenvl("memsize");
+	if(!memsize)
+		memsize = DEFAULT_MEMSIZE;
 
 	if (strstr(arcs_cmdline, "console=ttyS3"))
 		uart_base = ioremap_nocache(LS1X_UART3_BASE, 0x0f);
@@ -77,3 +39,8 @@ void __init prom_init(void)
 void __init prom_free_prom_memory(void)
 {
 }
+
+void __init plat_mem_setup(void)
+{
+	add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
+}
diff --git a/arch/mips/loongson32/common/setup.c b/arch/mips/loongson32/common/setup.c
index 8b03e18fc4d8..4733fe037176 100644
--- a/arch/mips/loongson32/common/setup.c
+++ b/arch/mips/loongson32/common/setup.c
@@ -3,15 +3,12 @@
  * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
  */
 
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/cpu-info.h>
 #include <asm/bootinfo.h>
 
-#include <prom.h>
-
-void __init plat_mem_setup(void)
-{
-	add_memory_region(0x0, (memsize << 20), BOOT_MEM_RAM);
-}
-
 const char *get_system_type(void)
 {
 	unsigned int processor_id = (&current_cpu_data)->processor_id;
diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile
index 0b3c65b52965..7821891bc5d0 100644
--- a/arch/mips/loongson64/Makefile
+++ b/arch/mips/loongson64/Makefile
@@ -3,7 +3,7 @@
 # Makefile for Loongson-3 family machines
 #
 obj-$(CONFIG_MACH_LOONGSON64) += irq.o cop2-ex.o platform.o acpi_init.o dma.o \
-				setup.o init.o cmdline.o env.o time.o reset.o \
+				setup.o init.o env.o time.o reset.o \
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_NUMA)	+= numa.o
diff --git a/arch/mips/loongson64/cmdline.c b/arch/mips/loongson64/cmdline.c
deleted file mode 100644
index fb1644b01471..000000000000
--- a/arch/mips/loongson64/cmdline.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Based on Ocelot Linux port, which is
- * Copyright 2001 MontaVista Software Inc.
- * Author: jsun@mvista.com or jsun@junsun.net
- *
- * Copyright 2003 ICT CAS
- * Author: Michael Guo <guoyi@ict.ac.cn>
- *
- * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
- * Author: Fuxin Zhang, zhangfx@lemote.com
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <asm/bootinfo.h>
-
-#include <loongson.h>
-
-void __init prom_init_cmdline(void)
-{
-	int prom_argc;
-	/* pmon passes arguments in 32bit pointers */
-	int *_prom_argv;
-	int i;
-	long l;
-
-	/* firmware arguments are initialized in head.S */
-	prom_argc = fw_arg0;
-	_prom_argv = (int *)fw_arg1;
-
-	/* arg[0] is "g", the rest is boot parameters */
-	arcs_cmdline[0] = '\0';
-	for (i = 1; i < prom_argc; i++) {
-		l = (long)_prom_argv[i];
-		if (strlen(arcs_cmdline) + strlen(((char *)l) + 1)
-		    >= sizeof(arcs_cmdline))
-			break;
-		strcat(arcs_cmdline, ((char *)l));
-		strcat(arcs_cmdline, " ");
-	}
-}
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
index 48b44f415059..5ac1a0f35ca4 100644
--- a/arch/mips/loongson64/init.c
+++ b/arch/mips/loongson64/init.c
@@ -9,6 +9,7 @@
 #include <asm/traps.h>
 #include <asm/smp-ops.h>
 #include <asm/cacheflush.h>
+#include <asm/fw/fw.h>
 
 #include <loongson.h>
 
@@ -24,7 +25,7 @@ static void __init mips_nmi_setup(void)
 
 void __init prom_init(void)
 {
-	prom_init_cmdline();
+	fw_init_cmdline();
 	prom_init_env();
 
 	/* init base address of io space */
-- 
cgit v1.2.3


From 2a5984360b015929e62c7f77924535dfbf2b2b35 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Thu, 7 Nov 2019 12:01:16 +0800
Subject: MIPS: Drop CPU_SUPPORTS_UNCACHED_ACCELERATED

CPU_SUPPORTS_UNCACHED_ACCELERATED was introduced when kernel can't handle
writecombine remap well. Nowadays drivers can try writecombine remap by
themselves so this function is nolonger needed.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: chenhe@lemote.com
---
 arch/mips/Kconfig                  |  3 --
 arch/mips/include/asm/pgtable.h    | 11 --------
 arch/mips/loongson2ef/common/mem.c | 58 --------------------------------------
 3 files changed, 72 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 783111156a37..d689aa485d0c 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1973,7 +1973,6 @@ config SYS_HAS_CPU_LOONGSON2F
 	bool
 	select CPU_SUPPORTS_CPUFREQ
 	select CPU_SUPPORTS_ADDRWINCFG if 64BIT
-	select CPU_SUPPORTS_UNCACHED_ACCELERATED
 
 config SYS_HAS_CPU_LOONGSON1B
 	bool
@@ -2150,8 +2149,6 @@ config CPU_SUPPORTS_ADDRWINCFG
 config CPU_SUPPORTS_HUGEPAGES
 	bool
 	depends on !(32BIT && (ARCH_PHYS_ADDR_T_64BIT || EVA))
-config CPU_SUPPORTS_UNCACHED_ACCELERATED
-	bool
 config MIPS_PGD_C0_CONTEXT
 	bool
 	default y if 64BIT && (CPU_MIPSR2 || CPU_MIPSR6) && !CPU_XLP
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index f85bd5b15f51..91b89aab1787 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -643,17 +643,6 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 
 #include <asm-generic/pgtable.h>
 
-/*
- * uncached accelerated TLB map for video memory access
- */
-#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED
-#define __HAVE_PHYS_MEM_ACCESS_PROT
-
-struct file;
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-		unsigned long size, pgprot_t vma_prot);
-#endif
-
 /*
  * We provide our own get_unmapped area to cope with the virtual aliasing
  * constraints placed on us by the cache architecture.
diff --git a/arch/mips/loongson2ef/common/mem.c b/arch/mips/loongson2ef/common/mem.c
index 11bf6eefb82a..ae21f1c62baa 100644
--- a/arch/mips/loongson2ef/common/mem.c
+++ b/arch/mips/loongson2ef/common/mem.c
@@ -60,61 +60,3 @@ int __uncached_access(struct file *file, unsigned long addr)
 		((addr >= LOONGSON_MMIO_MEM_START) &&
 		 (addr < LOONGSON_MMIO_MEM_END));
 }
-
-#ifdef CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED
-
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <asm/current.h>
-
-static unsigned long uca_start, uca_end;
-
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-			      unsigned long size, pgprot_t vma_prot)
-{
-	unsigned long offset = pfn << PAGE_SHIFT;
-	unsigned long end = offset + size;
-
-	if (__uncached_access(file, offset)) {
-		if (uca_start && (offset >= uca_start) &&
-		    (end <= uca_end))
-			return __pgprot((pgprot_val(vma_prot) &
-					 ~_CACHE_MASK) |
-					_CACHE_UNCACHED_ACCELERATED);
-		else
-			return pgprot_noncached(vma_prot);
-	}
-	return vma_prot;
-}
-
-static int __init find_vga_mem_init(void)
-{
-	struct pci_dev *dev = 0;
-	struct resource *r;
-	int idx;
-
-	if (uca_start)
-		return 0;
-
-	for_each_pci_dev(dev) {
-		if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-			for (idx = 0; idx < PCI_NUM_RESOURCES; idx++) {
-				r = &dev->resource[idx];
-				if (!r->start && r->end)
-					continue;
-				if (r->flags & IORESOURCE_IO)
-					continue;
-				if (r->flags & IORESOURCE_MEM) {
-					uca_start = r->start;
-					uca_end = r->end;
-					return 0;
-				}
-			}
-		}
-	}
-
-	return 0;
-}
-
-late_initcall(find_vga_mem_init);
-#endif /* !CONFIG_CPU_SUPPORTS_UNCACHED_ACCELERATED */
-- 
cgit v1.2.3


From 574b9a04abfc68c41c3915c2eba2767b78e3e923 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Thu, 7 Nov 2019 12:01:17 +0800
Subject: MIPS: Loongson2ef: Convert to early_printk_8250

early_printk.c is doing the same with early_printk_8250.
Remove duplicated code.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: chenhe@lemote.com
---
 arch/mips/include/asm/mach-loongson2ef/loongson.h |  1 -
 arch/mips/loongson2ef/Kconfig                     |  2 ++
 arch/mips/loongson2ef/common/Makefile             |  1 -
 arch/mips/loongson2ef/common/early_printk.c       | 38 -----------------------
 arch/mips/loongson2ef/common/init.c               |  1 -
 arch/mips/loongson2ef/common/uart_base.c          |  2 ++
 6 files changed, 4 insertions(+), 41 deletions(-)
 delete mode 100644 arch/mips/loongson2ef/common/early_printk.c

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h
index 622456539add..5008af0a1a19 100644
--- a/arch/mips/include/asm/mach-loongson2ef/loongson.h
+++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h
@@ -21,7 +21,6 @@ extern void mach_prepare_shutdown(void);
 /* environment arguments from bootloader */
 extern u32 cpu_clock_freq;
 extern u32 memsize, highmemsize;
-extern const struct plat_smp_ops loongson3_smp_ops;
 
 /* loongson-specific command line, env and memory initialization */
 extern void __init prom_init_memory(void);
diff --git a/arch/mips/loongson2ef/Kconfig b/arch/mips/loongson2ef/Kconfig
index 66a584a833e5..595dd48e1e4d 100644
--- a/arch/mips/loongson2ef/Kconfig
+++ b/arch/mips/loongson2ef/Kconfig
@@ -23,6 +23,7 @@ config LEMOTE_FULOONG2E
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_HIGHMEM
 	select SYS_HAS_EARLY_PRINTK
+	select USE_GENERIC_EARLY_PRINTK_8250
 	select GENERIC_ISA_DMA_SUPPORT_BROKEN
 	select CPU_HAS_WB
 	select LOONGSON_MC146818
@@ -52,6 +53,7 @@ config LEMOTE_MACH2F
 	select ISA
 	select SYS_HAS_CPU_LOONGSON2F
 	select SYS_HAS_EARLY_PRINTK
+	select USE_GENERIC_EARLY_PRINTK_8250
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_HIGHMEM
 	select SYS_SUPPORTS_LITTLE_ENDIAN
diff --git a/arch/mips/loongson2ef/common/Makefile b/arch/mips/loongson2ef/common/Makefile
index 10dd009a20a0..d5ab3e543ea3 100644
--- a/arch/mips/loongson2ef/common/Makefile
+++ b/arch/mips/loongson2ef/common/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_PCI) += pci.o
 #
 # Serial port support
 #
-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
 obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
 
diff --git a/arch/mips/loongson2ef/common/early_printk.c b/arch/mips/loongson2ef/common/early_printk.c
deleted file mode 100644
index d90c5e5a0e78..000000000000
--- a/arch/mips/loongson2ef/common/early_printk.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*  early printk support
- *
- *  Copyright (c) 2009 Philippe Vachon <philippe@cowpig.ca>
- *  Copyright (c) 2009 Lemote Inc.
- *  Author: Wu Zhangjin, wuzhangjin@gmail.com
- */
-#include <linux/serial_reg.h>
-#include <asm/setup.h>
-
-#include <loongson.h>
-
-#define PORT(base, offset) (u8 *)(base + offset)
-
-static inline unsigned int serial_in(unsigned char *base, int offset)
-{
-	return readb(PORT(base, offset));
-}
-
-static inline void serial_out(unsigned char *base, int offset, int value)
-{
-	writeb(value, PORT(base, offset));
-}
-
-void prom_putchar(char c)
-{
-	int timeout;
-	unsigned char *uart_base;
-
-	uart_base = (unsigned char *)_loongson_uart_base;
-	timeout = 1024;
-
-	while (((serial_in(uart_base, UART_LSR) & UART_LSR_THRE) == 0) &&
-			(timeout-- > 0))
-		;
-
-	serial_out(uart_base, UART_TX, c);
-}
diff --git a/arch/mips/loongson2ef/common/init.c b/arch/mips/loongson2ef/common/init.c
index dab3ffda8b14..45512178be77 100644
--- a/arch/mips/loongson2ef/common/init.c
+++ b/arch/mips/loongson2ef/common/init.c
@@ -44,7 +44,6 @@ void __init prom_init(void)
 
 	/*init the uart base address */
 	prom_init_uart_base();
-	register_smp_ops(&loongson3_smp_ops);
 	board_nmi_handler_setup = mips_nmi_setup;
 }
 
diff --git a/arch/mips/loongson2ef/common/uart_base.c b/arch/mips/loongson2ef/common/uart_base.c
index bbfe1095a843..522bea6ad7b0 100644
--- a/arch/mips/loongson2ef/common/uart_base.c
+++ b/arch/mips/loongson2ef/common/uart_base.c
@@ -6,6 +6,7 @@
 
 #include <linux/export.h>
 #include <asm/bootinfo.h>
+#include <asm/setup.h>
 
 #include <loongson.h>
 
@@ -38,4 +39,5 @@ void prom_init_loongson_uart_base(void)
 	}
 
 	_loongson_uart_base = TO_UNCAC(loongson_uart_base);
+	setup_8250_early_printk_port(_loongson_uart_base, 0, 1024);
 }
-- 
cgit v1.2.3


From 53949e0a65b72f1551067d13a4dea3cbce184ded Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Thu, 7 Nov 2019 12:01:18 +0800
Subject: MIPS: Loongson64: Drop setup_pcimap

setup_pcimap is used to setup address windows for Loongson-3
built-in PCI-X controller, but this function is never been used
in the real world and lack of support in kernel.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: chenhe@lemote.com
---
 arch/mips/loongson64/pci.c | 43 -------------------------------------------
 1 file changed, 43 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/loongson64/pci.c b/arch/mips/loongson64/pci.c
index 7bbe2388f38e..e84ae20c3290 100644
--- a/arch/mips/loongson64/pci.c
+++ b/arch/mips/loongson64/pci.c
@@ -31,54 +31,11 @@ static struct pci_controller  loongson_pci_controller = {
 	.io_offset	= 0x00000000UL,
 };
 
-static void __init setup_pcimap(void)
-{
-	/*
-	 * local to PCI mapping for CPU accessing PCI space
-	 * CPU address space [256M,448M] is window for accessing pci space
-	 * we set pcimap_lo[0,1,2] to map it to pci space[0M,64M], [320M,448M]
-	 *
-	 * pcimap: PCI_MAP2  PCI_Mem_Lo2 PCI_Mem_Lo1 PCI_Mem_Lo0
-	 *	     [<2G]   [384M,448M] [320M,384M] [0M,64M]
-	 */
-	LOONGSON_PCIMAP = LOONGSON_PCIMAP_PCIMAP_2 |
-		LOONGSON_PCIMAP_WIN(2, LOONGSON_PCILO2_BASE) |
-		LOONGSON_PCIMAP_WIN(1, LOONGSON_PCILO1_BASE) |
-		LOONGSON_PCIMAP_WIN(0, 0);
-
-	/*
-	 * PCI-DMA to local mapping: [2G,2G+256M] -> [0M,256M]
-	 */
-	LOONGSON_PCIBASE0 = 0x80000000ul;   /* base: 2G -> mmap: 0M */
-	/* size: 256M, burst transmission, pre-fetch enable, 64bit */
-	LOONGSON_PCI_HIT0_SEL_L = 0xc000000cul;
-	LOONGSON_PCI_HIT0_SEL_H = 0xfffffffful;
-	LOONGSON_PCI_HIT1_SEL_L = 0x00000006ul; /* set this BAR as invalid */
-	LOONGSON_PCI_HIT1_SEL_H = 0x00000000ul;
-	LOONGSON_PCI_HIT2_SEL_L = 0x00000006ul; /* set this BAR as invalid */
-	LOONGSON_PCI_HIT2_SEL_H = 0x00000000ul;
-
-	/* avoid deadlock of PCI reading/writing lock operation */
-	LOONGSON_PCI_ISR4C = 0xd2000001ul;
-
-	/* can not change gnt to break pci transfer when device's gnt not
-	deassert for some broken device */
-	LOONGSON_PXARB_CFG = 0x00fe0105ul;
-
-#ifdef CONFIG_CPU_SUPPORTS_ADDRWINCFG
-	/*
-	 * set cpu addr window2 to map CPU address space to PCI address space
-	 */
-	LOONGSON_ADDRWIN_CPUTOPCI(ADDRWIN_WIN2, LOONGSON_CPU_MEM_SRC,
-		LOONGSON_PCI_MEM_DST, MMAP_CPUTOPCI_SIZE);
-#endif
-}
 
 extern int sbx00_acpi_init(void);
 
 static int __init pcibios_init(void)
 {
-	setup_pcimap();
 
 	loongson_pci_controller.io_map_base = mips_io_port_base;
 	loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr;
-- 
cgit v1.2.3


From dcf78ee660888d8302a0f0888bf746a164d267fa Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Wed, 6 Nov 2019 20:28:42 +0300
Subject: MIPS: allow building with kcov coverage

Add ARCH_HAS_KCOV and HAVE_GCC_PLUGINS to MIPS config.
Disable instrumentation of vdso to avoid build failure.

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/Kconfig       | 2 ++
 arch/mips/vdso/Makefile | 1 +
 2 files changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d689aa485d0c..c86be02b6d89 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -87,6 +87,8 @@ config MIPS
 	select SYSCTL_EXCEPTION_TRACE
 	select VIRT_TO_BUS
 	select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI)
+	select ARCH_HAS_KCOV
+	select HAVE_GCC_PLUGINS
 
 menu "Machine selection"
 
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 996a934ece7d..e05938997e69 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -75,6 +75,7 @@ CFLAGS_REMOVE_vdso.o = -pg
 
 GCOV_PROFILE := n
 UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
 
 #
 # Shared build commands.
-- 
cgit v1.2.3


From 7d73d572111ff585b953b88be4edaf2769bc017e Mon Sep 17 00:00:00 2001
From: Paul Burton <paulburton@kernel.org>
Date: Mon, 11 Nov 2019 10:50:59 -0800
Subject: MIPS: math-emu: Reuse name array in debugfs_fpuemu()

The FPU_STAT_CREATE_EX() macro used 114 times in debugfs_fpuemu()
declares a 32 byte char array to hold the name of a debugfs file. Since
each use of the macro declares a new char array out of the scope of all
the other uses, we end up with an unnecessarily large stack frame of
3648 bytes (ie. 114*32) plus the size of 2 pointers
(fpuemu_debugfs_base_dir & fpuemu_debugfs_inst_dir). This is enough to
trigger the frame size warnings from GCC in common configurations.

Avoid the unnecessary stack bloat by using a single name char array
which each usage of FPU_STAT_CREATE_EX() will reinitialize via the
strcpy() in adjust_instruction_counter_name().

Signed-off-by: Paul Burton <paulburton@kernel.org>
Reported-by: kbuild test robot <lkp@intel.com>
URL: https://lore.kernel.org/linux-mips/201911090929.xvXYuHUz%25lkp@intel.com/
---
 arch/mips/math-emu/me-debugfs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c
index 387724860fa6..d5ad76b2bb67 100644
--- a/arch/mips/math-emu/me-debugfs.c
+++ b/arch/mips/math-emu/me-debugfs.c
@@ -189,6 +189,7 @@ static int __init debugfs_fpuemu(void)
 {
 	struct dentry *fpuemu_debugfs_base_dir;
 	struct dentry *fpuemu_debugfs_inst_dir;
+	char name[32];
 
 	fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats",
 						     mips_debugfs_dir);
@@ -225,8 +226,6 @@ do {									\
 
 #define FPU_STAT_CREATE_EX(m)						\
 do {									\
-	char name[32];							\
-									\
 	adjust_instruction_counter_name(name, #m);			\
 									\
 	debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir,	\
-- 
cgit v1.2.3


From 50ec88120ea16cf8b9aabf8422c364166ce3ee17 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 8 Oct 2019 19:20:39 +0300
Subject: can: mcp251x: get rid of legacy platform data

Instead of using legacy platform data, switch to use device properties.
For clock frequency we are using well established clock-frequency property.

Users, two for now, are also converted here.

Cc: Daniel Mack <daniel@zonque.org>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: Robert Jarzmik <robert.jarzmik@free.fr>
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 arch/arm/mach-pxa/icontrol.c         |  9 +++++----
 arch/arm/mach-pxa/zeus.c             |  9 +++++----
 drivers/net/can/spi/mcp251x.c        |  9 ++++-----
 include/linux/can/platform/mcp251x.h | 22 ----------------------
 4 files changed, 14 insertions(+), 35 deletions(-)
 delete mode 100644 include/linux/can/platform/mcp251x.h

(limited to 'arch')

diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 865b10344ea2..0474a4b1394d 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -12,6 +12,7 @@
 
 #include <linux/irq.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/gpio.h>
 
 #include <asm/mach-types.h>
@@ -22,7 +23,6 @@
 
 #include <linux/spi/spi.h>
 #include <linux/spi/pxa2xx_spi.h>
-#include <linux/can/platform/mcp251x.h>
 #include <linux/regulator/machine.h>
 
 #include "generic.h"
@@ -69,8 +69,9 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
 	.gpio_cs        = ICONTROL_MCP251x_nCS4
 };
 
-static struct mcp251x_platform_data mcp251x_info = {
-	.oscillator_frequency = 16E6,
+static const struct property_entry mcp251x_properties[] = {
+	PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+	{}
 };
 
 static struct spi_board_info mcp251x_board_info[] = {
@@ -79,7 +80,7 @@ static struct spi_board_info mcp251x_board_info[] = {
 		.max_speed_hz    = 6500000,
 		.bus_num         = 3,
 		.chip_select     = 0,
-		.platform_data   = &mcp251x_info,
+		.properties      = mcp251x_properties,
 		.controller_data = &mcp251x_chip_info1,
 		.irq             = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ1)
 	},
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index da113c8eefbf..b27fc7ac9cea 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -13,6 +13,7 @@
 #include <linux/leds.h>
 #include <linux/irq.h>
 #include <linux/pm.h>
+#include <linux/property.h>
 #include <linux/gpio.h>
 #include <linux/gpio/machine.h>
 #include <linux/serial_8250.h>
@@ -27,7 +28,6 @@
 #include <linux/platform_data/i2c-pxa.h>
 #include <linux/platform_data/pca953x.h>
 #include <linux/apm-emulation.h>
-#include <linux/can/platform/mcp251x.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
 
@@ -428,14 +428,15 @@ static struct gpiod_lookup_table can_regulator_gpiod_table = {
 	},
 };
 
-static struct mcp251x_platform_data zeus_mcp2515_pdata = {
-	.oscillator_frequency	= 16*1000*1000,
+static const struct property_entry mcp251x_properties[] = {
+	PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+	{}
 };
 
 static struct spi_board_info zeus_spi_board_info[] = {
 	[0] = {
 		.modalias	= "mcp2515",
-		.platform_data	= &zeus_mcp2515_pdata,
+		.properties	= mcp251x_properties,
 		.irq		= PXA_GPIO_TO_IRQ(ZEUS_CAN_GPIO),
 		.max_speed_hz	= 1*1000*1000,
 		.bus_num	= 3,
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index bb20a9b75cc6..ee2e97da4e1d 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -22,7 +22,6 @@
 #include <linux/can/core.h>
 #include <linux/can/dev.h>
 #include <linux/can/led.h>
-#include <linux/can/platform/mcp251x.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
@@ -986,19 +985,19 @@ MODULE_DEVICE_TABLE(spi, mcp251x_id_table);
 static int mcp251x_can_probe(struct spi_device *spi)
 {
 	const void *match = device_get_match_data(&spi->dev);
-	struct mcp251x_platform_data *pdata = dev_get_platdata(&spi->dev);
 	struct net_device *net;
 	struct mcp251x_priv *priv;
 	struct clk *clk;
-	int freq, ret;
+	u32 freq;
+	int ret;
 
 	clk = devm_clk_get_optional(&spi->dev, NULL);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
 	freq = clk_get_rate(clk);
-	if (freq == 0 && pdata)
-		freq = pdata->oscillator_frequency;
+	if (freq == 0)
+		device_property_read_u32(&spi->dev, "clock-frequency", &freq);
 
 	/* Sanity check */
 	if (freq < 1000000 || freq > 25000000)
diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h
deleted file mode 100644
index 9e5ac27fb6c1..000000000000
--- a/include/linux/can/platform/mcp251x.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _CAN_PLATFORM_MCP251X_H
-#define _CAN_PLATFORM_MCP251X_H
-
-/*
- *
- * CAN bus driver for Microchip 251x CAN Controller with SPI Interface
- *
- */
-
-#include <linux/spi/spi.h>
-
-/*
- * struct mcp251x_platform_data - MCP251X SPI CAN controller platform data
- * @oscillator_frequency:       - oscillator frequency in Hz
- */
-
-struct mcp251x_platform_data {
-	unsigned long oscillator_frequency;
-};
-
-#endif /* !_CAN_PLATFORM_MCP251X_H */
-- 
cgit v1.2.3


From d98b5d0728d6d9f49f370d1846dbac30ec596a42 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Thu, 31 Oct 2019 09:54:31 +0100
Subject: s390/cpum_sf: Use consistant debug print format for sampling

Use consistant debug print format of the form variable
blank value. Also add leading 0x for all hex values.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 91 ++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 47 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3d8b12a9a6ff..504dac2be4dd 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -157,7 +157,7 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
 	}
 
 	debug_sprintf_event(sfdbg, 5,
-			    "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+			    "free_sampling_buffer: freed sdbt %p\n", sfb->sdbt);
 	memset(sfb, 0, sizeof(*sfb));
 }
 
@@ -213,9 +213,9 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	 */
 	if (sfb->sdbt != get_next_sdbt(tail)) {
 		debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
-				    "sampling buffer is not linked: origin=%p"
-				    "tail=%p\n",
-				    (void *) sfb->sdbt, (void *) tail);
+				    "sampling buffer is not linked: origin %p"
+				    " tail %p\n",
+				    (void *)sfb->sdbt, (void *)tail);
 		return -EINVAL;
 	}
 
@@ -252,7 +252,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	sfb->tail = tail;
 
 	debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
-			    " settings: sdbt=%lu sdb=%lu\n",
+			    " settings: sdbt %lu sdb %lu\n",
 			    sfb->num_sdbt, sfb->num_sdb);
 	return rc;
 }
@@ -293,11 +293,11 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 	if (rc) {
 		free_sampling_buffer(sfb);
 		debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
-			"realloc_sampling_buffer failed with rc=%i\n", rc);
+			"realloc_sampling_buffer failed with rc %i\n", rc);
 	} else
 		debug_sprintf_event(sfdbg, 4,
-			"alloc_sampling_buffer: tear=%p dear=%p\n",
-			sfb->sdbt, (void *) *sfb->sdbt);
+			"alloc_sampling_buffer: tear %p dear %p\n",
+			sfb->sdbt, (void *)*sfb->sdbt);
 	return rc;
 }
 
@@ -404,8 +404,8 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 		return 0;
 
 	debug_sprintf_event(sfdbg, 3,
-			    "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
-			    " sample_size=%lu cpuhw=%p\n",
+			    "allocate_buffers: rate %lu f %lu sdb %lu/%lu"
+			    " sample_size %lu cpuhw %p\n",
 			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
 			    sample_size, cpuhw);
 
@@ -465,8 +465,8 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
 	if (num)
 		sfb_account_allocs(num, hwc);
 
-	debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
-			    " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+	debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow %llu ratio %lu"
+			    " num %lu\n", OVERFLOW_REG(hwc), ratio, num);
 	OVERFLOW_REG(hwc) = 0;
 }
 
@@ -505,11 +505,11 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
 	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
 	if (rc)
 		debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
-				    "failed with rc=%i\n", rc);
+				    "failed with rc %i\n", rc);
 
 	if (sfb_has_pending_allocs(sfb, hwc))
 		debug_sprintf_event(sfdbg, 5, "sfb: extend: "
-				    "req=%lu alloc=%lu remaining=%lu\n",
+				    "req %lu alloc %lu remaining %lu\n",
 				    num, sfb->num_sdb - num_old,
 				    sfb_pending_allocs(sfb, hwc));
 }
@@ -538,20 +538,22 @@ static void setup_pmc_cpu(void *flags)
 		err = sf_disable();
 		if (err)
 			pr_err("Switching off the sampling facility failed "
-			       "with rc=%i\n", err);
+			       "with rc %i\n", err);
 		debug_sprintf_event(sfdbg, 5,
-				    "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+				    "setup_pmc_cpu: initialized: cpuhw %p\n",
+				    cpusf);
 		break;
 	case PMC_RELEASE:
 		cpusf->flags &= ~PMU_F_RESERVED;
 		err = sf_disable();
 		if (err) {
 			pr_err("Switching off the sampling facility failed "
-			       "with rc=%i\n", err);
+			       "with rc %i\n", err);
 		} else
 			deallocate_buffers(cpusf);
 		debug_sprintf_event(sfdbg, 5,
-				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+				    "setup_pmc_cpu: released: cpuhw %p\n",
+				    cpusf);
 		break;
 	}
 	if (err)
@@ -744,7 +746,7 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
 	SAMPL_RATE(hwc) = rate;
 	hw_init_period(hwc, SAMPL_RATE(hwc));
 	debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:"
-			    "cpu:%d period:%llx freq:%d,%#lx\n", event->cpu,
+			    "cpu:%d period:%#llx freq:%d,%#lx\n", event->cpu,
 			    event->attr.sample_period, event->attr.freq,
 			    SAMPLE_FREQ_MODE(hwc));
 	return 0;
@@ -963,7 +965,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 	err = lsctl(&cpuhw->lsctl);
 	if (err) {
 		cpuhw->flags &= ~PMU_F_ENABLED;
-		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+		pr_err("Loading sampling controls failed: op %i err %i\n",
 			1, err);
 		return;
 	}
@@ -971,8 +973,8 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 	/* Load current program parameter */
 	lpp(&S390_lowcore.lpp);
 
-	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
-			    "interval:%lx tear=%p dear=%p\n",
+	debug_sprintf_event(sfdbg, 6, "pmu_enable: es %i cs %i ed %i cd %i "
+			    "interval %#lx tear %p dear %p\n",
 			    cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
 			    cpuhw->lsctl.cd, cpuhw->lsctl.interval,
 			    (void *) cpuhw->lsctl.tear,
@@ -999,7 +1001,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 
 	err = lsctl(&inactive);
 	if (err) {
-		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+		pr_err("Loading sampling controls failed: op %i err %i\n",
 			2, err);
 		return;
 	}
@@ -1017,7 +1019,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 		}
 	} else
 		debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
-				    "qsi() failed with err=%i\n", err);
+				    "qsi() failed with err %i\n", err);
 
 	cpuhw->flags &= ~PMU_F_ENABLED;
 }
@@ -1130,15 +1132,6 @@ static void perf_event_count_update(struct perf_event *event, u64 count)
 	local64_add(count, &event->count);
 }
 
-static void debug_sample_entry(struct hws_basic_entry *sample,
-			       struct hws_trailer_entry *te)
-{
-	debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
-			    "sampling data entry: te->f=%i basic.def=%04x "
-			    "(%p)\n",
-			    te->f, sample->def, sample);
-}
-
 /* hw_collect_samples() - Walk through a sample-data-block and collect samples
  * @event:	The perf event
  * @sdbt:	Sample-data-block table
@@ -1192,7 +1185,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
 				/* Count discarded samples */
 				*overflow += 1;
 		} else {
-			debug_sample_entry(sample, te);
+			debug_sprintf_event(sfdbg, 4,
+					    "hw_collect_samples: Found unknown"
+					    " sampling data entry: te->f %i"
+					    " basic.def %#4x (%p)\n",
+					    te->f, sample->def, sample);
 			/* Sample slot is not yet written or other record.
 			 *
 			 * This condition can occur if the buffer was reused
@@ -1267,8 +1264,8 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 			sampl_overflow += te->overflow;
 
 		/* Timestamps are valid for full sample-data-blocks only */
-		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
-				    "overflow=%llu timestamp=%#llx\n",
+		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt %p "
+				    "overflow %llu timestamp %#llx\n",
 				    sdbt, te->overflow,
 				    (te->f) ? trailer_timestamp(te) : 0ULL);
 
@@ -1314,7 +1311,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 						 sampl_overflow, 1 + num_sdb);
 	if (sampl_overflow || event_overflow)
 		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
-				    "overflow stats: sample=%llu event=%llu\n",
+				    "overflow stats: sample %llu event %llu\n",
 				    sampl_overflow, event_overflow);
 }
 
@@ -1368,7 +1365,7 @@ static void aux_output_end(struct perf_output_handle *handle)
 	te = aux_sdb_trailer(aux, aux->alert_mark);
 	te->flags &= ~SDB_TE_ALERT_REQ_MASK;
 
-	debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
+	debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %#lx SDBs\n", i);
 }
 
 /*
@@ -1428,8 +1425,8 @@ static int aux_output_begin(struct perf_output_handle *handle,
 
 	debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
 			    "head->alert_mark->empty_mark (num_alert, range)"
-			    "[%lx -> %lx -> %lx] (%lx, %lx) "
-			    "tear index %lx, tear %lx dear %lx\n",
+			    "[%#lx -> %#lx -> %#lx] (%#lx, %#lx) "
+			    "tear index %#lx, tear %#lx dear %#lx\n",
 			    aux->head, aux->alert_mark, aux->empty_mark,
 			    AUX_SDB_NUM_ALERT(aux), range,
 			    head / CPUM_SF_SDB_PER_TABLE,
@@ -1596,13 +1593,13 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 			perf_aux_output_end(&cpuhw->handle, size);
 			pr_err("Sample data caused the AUX buffer with %lu "
 			       "pages to overflow\n", num_sdb);
-			debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
-					    "overflow %llx\n",
+			debug_sprintf_event(sfdbg, 1, "head %#lx range %#lx "
+					    "overflow %#llx\n",
 					    aux->head, range, overflow);
 		} else {
 			size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
 			perf_aux_output_end(&cpuhw->handle, size);
-			debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
+			debug_sprintf_event(sfdbg, 6, "head %#lx alert %#lx "
 					    "already full, try another\n",
 					    aux->head, aux->alert_mark);
 		}
@@ -1610,7 +1607,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 
 	if (done)
 		debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
-				    "[%lx -> %lx -> %lx] (%lx, %lx)\n",
+				    "[%#lx -> %#lx -> %#lx] (%#lx, %#lx)\n",
 				    aux->head, aux->alert_mark, aux->empty_mark,
 				    AUX_SDB_NUM_ALERT(aux), range);
 }
@@ -1800,7 +1797,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
 	SAMPL_RATE(&event->hw) = rate;
 	hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
 	debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:"
-			    "cpu:%d value:%llx period:%llx freq:%d\n",
+			    "cpu:%d value:%#llx period:%#llx freq:%d\n",
 			    event->cpu, value,
 			    event->attr.sample_period, do_freq);
 	return 0;
@@ -2111,7 +2108,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
 
 	sfb_set_limits(min, max);
 	pr_info("The sampling buffer limits have changed to: "
-		"min=%lu max=%lu (diag=x%lu)\n",
+		"min %lu max %lu (diag %lu)\n",
 		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
 	return 0;
 }
@@ -2129,7 +2126,7 @@ static const struct kernel_param_ops param_ops_sfb_size = {
 static void __init pr_cpumsf_err(unsigned int reason)
 {
 	pr_err("Sampling facility support for perf is not available: "
-	       "reason=%04x\n", reason);
+	       "reason %#x\n", reason);
 }
 
 static int __init init_cpum_sampling_pmu(void)
-- 
cgit v1.2.3


From c18388340c4165c53f82d30569e8a5fc2f8a850e Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Thu, 31 Oct 2019 10:21:05 +0100
Subject: s390/cpum_sf: Replace function name in debug statements

Replace hard coded function names in debug statements
by the "%s ...", __func__ construct suggested by checkpatch.pl
script.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 504dac2be4dd..e7587343e272 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -156,8 +156,8 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
 		}
 	}
 
-	debug_sprintf_event(sfdbg, 5,
-			    "free_sampling_buffer: freed sdbt %p\n", sfb->sdbt);
+	debug_sprintf_event(sfdbg, 5, "%s freed sdbt %p\n", __func__,
+			    sfb->sdbt);
 	memset(sfb, 0, sizeof(*sfb));
 }
 
@@ -212,9 +212,9 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	 * the sampling buffer origin.
 	 */
 	if (sfb->sdbt != get_next_sdbt(tail)) {
-		debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+		debug_sprintf_event(sfdbg, 3, "%s: "
 				    "sampling buffer is not linked: origin %p"
-				    " tail %p\n",
+				    " tail %p\n", __func__,
 				    (void *)sfb->sdbt, (void *)tail);
 		return -EINVAL;
 	}
@@ -404,8 +404,8 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 		return 0;
 
 	debug_sprintf_event(sfdbg, 3,
-			    "allocate_buffers: rate %lu f %lu sdb %lu/%lu"
-			    " sample_size %lu cpuhw %p\n",
+			    "%s: rate %lu f %lu sdb %lu/%lu"
+			    " sample_size %lu cpuhw %p\n", __func__,
 			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
 			    sample_size, cpuhw);
 
@@ -540,7 +540,7 @@ static void setup_pmc_cpu(void *flags)
 			pr_err("Switching off the sampling facility failed "
 			       "with rc %i\n", err);
 		debug_sprintf_event(sfdbg, 5,
-				    "setup_pmc_cpu: initialized: cpuhw %p\n",
+				    "%s: initialized: cpuhw %p\n", __func__,
 				    cpusf);
 		break;
 	case PMC_RELEASE:
@@ -552,7 +552,7 @@ static void setup_pmc_cpu(void *flags)
 		} else
 			deallocate_buffers(cpusf);
 		debug_sprintf_event(sfdbg, 5,
-				    "setup_pmc_cpu: released: cpuhw %p\n",
+				    "%s: released: cpuhw %p\n", __func__,
 				    cpusf);
 		break;
 	}
@@ -1186,9 +1186,9 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
 				*overflow += 1;
 		} else {
 			debug_sprintf_event(sfdbg, 4,
-					    "hw_collect_samples: Found unknown"
+					    "%s: Found unknown"
 					    " sampling data entry: te->f %i"
-					    " basic.def %#4x (%p)\n",
+					    " basic.def %#4x (%p)\n", __func__,
 					    te->f, sample->def, sample);
 			/* Sample slot is not yet written or other record.
 			 *
@@ -1264,9 +1264,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 			sampl_overflow += te->overflow;
 
 		/* Timestamps are valid for full sample-data-blocks only */
-		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt %p "
+		debug_sprintf_event(sfdbg, 6, "%s: sdbt %p "
 				    "overflow %llu timestamp %#llx\n",
-				    sdbt, te->overflow,
+				    __func__, sdbt, te->overflow,
 				    (te->f) ? trailer_timestamp(te) : 0ULL);
 
 		/* Collect all samples from a single sample-data-block and
@@ -1310,9 +1310,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
 						 sampl_overflow, 1 + num_sdb);
 	if (sampl_overflow || event_overflow)
-		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+		debug_sprintf_event(sfdbg, 4, "%s: "
 				    "overflow stats: sample %llu event %llu\n",
-				    sampl_overflow, event_overflow);
+				    __func__, sampl_overflow, event_overflow);
 }
 
 #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
@@ -1365,7 +1365,7 @@ static void aux_output_end(struct perf_output_handle *handle)
 	te = aux_sdb_trailer(aux, aux->alert_mark);
 	te->flags &= ~SDB_TE_ALERT_REQ_MASK;
 
-	debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %#lx SDBs\n", i);
+	debug_sprintf_event(sfdbg, 6, "%s: collect %#lx SDBs\n", __func__, i);
 }
 
 /*
-- 
cgit v1.2.3


From 72fbcd057fa1d25da70e33312a0c8401d4295599 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Thu, 31 Oct 2019 10:38:53 +0100
Subject: s390/cpum_sf: Assign error value to err variable

When starting the CPU Measurement sampling facility using
qsi() function, this function may return an error value.
This error value is referenced in the else part of the
if statement to dump its value in a debug statement.
Right now this value is always zero because it has not been
assigned a value.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index e7587343e272..69506fdbd9a1 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1007,7 +1007,8 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 	}
 
 	/* Save state of TEAR and DEAR register contents */
-	if (!qsi(&si)) {
+	err = qsi(&si);
+	if (!err) {
 		/* TEAR/DEAR values are valid only if the sampling facility is
 		 * enabled.  Note that cpumsf_pmu_disable() might be called even
 		 * for a disabled sampling facility because cpumsf_pmu_enable()
-- 
cgit v1.2.3


From 544f1d62e3e6c6e6d17a5e56f6139208acb5ff46 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 31 Oct 2019 18:25:16 +0100
Subject: s390/disassembler: don't hide instruction addresses

Due to kptr_restrict, JITted BPF code is now displayed like this:

000000000b6ed1b2: ebdff0800024  stmg    %r13,%r15,128(%r15)
000000004cde2ba0: 41d0f040      la      %r13,64(%r15)
00000000fbad41b0: a7fbffa0      aghi    %r15,-96

Leaking kernel addresses to dmesg is not a concern in this case, because
this happens only when JIT debugging is explicitly activated, which only
root can do.

Use %px in this particular instance, and also to print an instruction
address in show_code and PCREL (e.g. brasl) arguments in print_insn.
While at present functionally equivalent to %016lx, %px is recommended
by Documentation/core-api/printk-formats.rst for such cases.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/dis.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 7abe6ae261b4..f304802ecf7b 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -461,10 +461,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
 				ptr += sprintf(ptr, "%%c%i", value);
 			else if (operand->flags & OPERAND_VR)
 				ptr += sprintf(ptr, "%%v%i", value);
-			else if (operand->flags & OPERAND_PCREL)
-				ptr += sprintf(ptr, "%lx", (signed int) value
-								      + addr);
-			else if (operand->flags & OPERAND_SIGNED)
+			else if (operand->flags & OPERAND_PCREL) {
+				void *pcrel = (void *)((int)value + addr);
+
+				ptr += sprintf(ptr, "%px", pcrel);
+			} else if (operand->flags & OPERAND_SIGNED)
 				ptr += sprintf(ptr, "%i", value);
 			else
 				ptr += sprintf(ptr, "%u", value);
@@ -536,7 +537,7 @@ void show_code(struct pt_regs *regs)
 		else
 			*ptr++ = ' ';
 		addr = regs->psw.addr + start - 32;
-		ptr += sprintf(ptr, "%016lx: ", addr);
+		ptr += sprintf(ptr, "%px: ", (void *)addr);
 		if (start + opsize >= end)
 			break;
 		for (i = 0; i < opsize; i++)
@@ -564,7 +565,7 @@ void print_fn_code(unsigned char *code, unsigned long len)
 		opsize = insn_length(*code);
 		if (opsize > len)
 			break;
-		ptr += sprintf(ptr, "%p: ", code);
+		ptr += sprintf(ptr, "%px: ", code);
 		for (i = 0; i < opsize; i++)
 			ptr += sprintf(ptr, "%02x", code[i]);
 		*ptr++ = '\t';
-- 
cgit v1.2.3


From f6656208f04e5b3804054008eba4bf7170f4c841 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 11 Nov 2019 13:43:12 -0800
Subject: x86/mce/therm_throt: Optimize notifications of thermal throttle

Some modern systems have very tight thermal tolerances. Because of this
they may cross thermal thresholds when running normal workloads (even
during boot). The CPU hardware will react by limiting power/frequency
and using duty cycles to bring the temperature back into normal range.

Thus users may see a "critical" message about the "temperature above
threshold" which is soon followed by "temperature/speed normal". These
messages are rate-limited, but still may repeat every few minutes.

This issue became worse starting with the Ivy Bridge generation of
CPUs because they include a TCC activation offset in the MSR
IA32_TEMPERATURE_TARGET. OEMs use this to provide alerts long before
critical temperatures are reached.

A test run on a laptop with Intel 8th Gen i5 core for two hours with a
workload resulted in 20K+ thermal interrupts per CPU for core level and
another 20K+ interrupts at package level. The kernel logs were full of
throttling messages.

The real value of these threshold interrupts, is to debug problems with
the external cooling solutions and performance issues due to excessive
throttling.

So the solution here is the following:

  - In the current thermal_throttle folder, show:
    - the maximum time for one throttling event and,
    - the total amount of time the system was in throttling state.

  - Do not log short excursions.

  - Log only when, in spite of thermal throttling, the temperature is rising.
  On the high threshold interrupt trigger a delayed workqueue that
  monitors the threshold violation log bit (THERM_STATUS_PROCHOT_LOG). When
  the log bit is set, this workqueue callback calculates three point moving
  average and logs a warning message when the temperature trend is rising.

  When this log bit is clear and temperature is below threshold
  temperature, then the workqueue callback logs a "Normal" message. Once a
  high threshold event is logged, the logging is rate-limited.

With this patch on the same test laptop, no warnings are printed in the logs
as the max time the processor could bring the temperature under control is
only 280 ms.

This implementation is done with the inputs from Alan Cox and Tony Luck.

 [ bp: Touchups. ]

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: bberg@redhat.com
Cc: ckellner@redhat.com
Cc: hdegoede@redhat.com
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20191111214312.81365-1-srinivas.pandruvada@linux.intel.com
---
 arch/x86/kernel/cpu/mce/therm_throt.c | 251 ++++++++++++++++++++++++++++++----
 1 file changed, 227 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index bc441d68d060..d01e0da0163a 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -40,15 +40,58 @@
 #define THERMAL_THROTTLING_EVENT	0
 #define POWER_LIMIT_EVENT		1
 
-/*
- * Current thermal event state:
+/**
+ * struct _thermal_state - Represent the current thermal event state
+ * @next_check:			Stores the next timestamp, when it is allowed
+ *				to log the next warning message.
+ * @last_interrupt_time:	Stores the timestamp for the last threshold
+ *				high event.
+ * @therm_work:			Delayed workqueue structure
+ * @count:			Stores the current running count for thermal
+ *				or power threshold interrupts.
+ * @last_count:			Stores the previous running count for thermal
+ *				or power threshold interrupts.
+ * @max_time_ms:		This shows the maximum amount of time CPU was
+ *				in throttled state for a single thermal
+ *				threshold high to low state.
+ * @total_time_ms:		This is a cumulative time during which CPU was
+ *				in the throttled state.
+ * @rate_control_active:	Set when a throttling message is logged.
+ *				This is used for the purpose of rate-control.
+ * @new_event:			Stores the last high/low status of the
+ *				THERM_STATUS_PROCHOT or
+ *				THERM_STATUS_POWER_LIMIT.
+ * @level:			Stores whether this _thermal_state instance is
+ *				for a CORE level or for PACKAGE level.
+ * @sample_index:		Index for storing the next sample in the buffer
+ *				temp_samples[].
+ * @sample_count:		Total number of samples collected in the buffer
+ *				temp_samples[].
+ * @average:			The last moving average of temperature samples
+ * @baseline_temp:		Temperature at which thermal threshold high
+ *				interrupt was generated.
+ * @temp_samples:		Storage for temperature samples to calculate
+ *				moving average.
+ *
+ * This structure is used to represent data related to thermal state for a CPU.
+ * There is a separate storage for core and package level for each CPU.
  */
 struct _thermal_state {
-	bool			new_event;
-	int			event;
 	u64			next_check;
+	u64			last_interrupt_time;
+	struct delayed_work	therm_work;
 	unsigned long		count;
 	unsigned long		last_count;
+	unsigned long		max_time_ms;
+	unsigned long		total_time_ms;
+	bool			rate_control_active;
+	bool			new_event;
+	u8			level;
+	u8			sample_index;
+	u8			sample_count;
+	u8			average;
+	u8			baseline_temp;
+	u8			temp_samples[3];
 };
 
 struct thermal_state {
@@ -121,8 +164,22 @@ define_therm_throt_device_one_ro(package_throttle_count);
 define_therm_throt_device_show_func(package_power_limit, count);
 define_therm_throt_device_one_ro(package_power_limit_count);
 
+define_therm_throt_device_show_func(core_throttle, max_time_ms);
+define_therm_throt_device_one_ro(core_throttle_max_time_ms);
+
+define_therm_throt_device_show_func(package_throttle, max_time_ms);
+define_therm_throt_device_one_ro(package_throttle_max_time_ms);
+
+define_therm_throt_device_show_func(core_throttle, total_time_ms);
+define_therm_throt_device_one_ro(core_throttle_total_time_ms);
+
+define_therm_throt_device_show_func(package_throttle, total_time_ms);
+define_therm_throt_device_one_ro(package_throttle_total_time_ms);
+
 static struct attribute *thermal_throttle_attrs[] = {
 	&dev_attr_core_throttle_count.attr,
+	&dev_attr_core_throttle_max_time_ms.attr,
+	&dev_attr_core_throttle_total_time_ms.attr,
 	NULL
 };
 
@@ -135,6 +192,105 @@ static const struct attribute_group thermal_attr_group = {
 #define CORE_LEVEL	0
 #define PACKAGE_LEVEL	1
 
+#define THERM_THROT_POLL_INTERVAL	HZ
+#define THERM_STATUS_PROCHOT_LOG	BIT(1)
+
+static void clear_therm_status_log(int level)
+{
+	int msr;
+	u64 msr_val;
+
+	if (level == CORE_LEVEL)
+		msr = MSR_IA32_THERM_STATUS;
+	else
+		msr = MSR_IA32_PACKAGE_THERM_STATUS;
+
+	rdmsrl(msr, msr_val);
+	wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
+}
+
+static void get_therm_status(int level, bool *proc_hot, u8 *temp)
+{
+	int msr;
+	u64 msr_val;
+
+	if (level == CORE_LEVEL)
+		msr = MSR_IA32_THERM_STATUS;
+	else
+		msr = MSR_IA32_PACKAGE_THERM_STATUS;
+
+	rdmsrl(msr, msr_val);
+	if (msr_val & THERM_STATUS_PROCHOT_LOG)
+		*proc_hot = true;
+	else
+		*proc_hot = false;
+
+	*temp = (msr_val >> 16) & 0x7F;
+}
+
+static void throttle_active_work(struct work_struct *work)
+{
+	struct _thermal_state *state = container_of(to_delayed_work(work),
+						struct _thermal_state, therm_work);
+	unsigned int i, avg, this_cpu = smp_processor_id();
+	u64 now = get_jiffies_64();
+	bool hot;
+	u8 temp;
+
+	get_therm_status(state->level, &hot, &temp);
+	/* temperature value is offset from the max so lesser means hotter */
+	if (!hot && temp > state->baseline_temp) {
+		if (state->rate_control_active)
+			pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
+				this_cpu,
+				state->level == CORE_LEVEL ? "Core" : "Package",
+				state->count);
+
+		state->rate_control_active = false;
+		return;
+	}
+
+	if (time_before64(now, state->next_check) &&
+			  state->rate_control_active)
+		goto re_arm;
+
+	state->next_check = now + CHECK_INTERVAL;
+
+	if (state->count != state->last_count) {
+		/* There was one new thermal interrupt */
+		state->last_count = state->count;
+		state->average = 0;
+		state->sample_count = 0;
+		state->sample_index = 0;
+	}
+
+	state->temp_samples[state->sample_index] = temp;
+	state->sample_count++;
+	state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
+	if (state->sample_count < ARRAY_SIZE(state->temp_samples))
+		goto re_arm;
+
+	avg = 0;
+	for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
+		avg += state->temp_samples[i];
+
+	avg /= ARRAY_SIZE(state->temp_samples);
+
+	if (state->average > avg) {
+		pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
+			this_cpu,
+			state->level == CORE_LEVEL ? "Core" : "Package",
+			state->count);
+		state->rate_control_active = true;
+	}
+
+	state->average = avg;
+
+re_arm:
+	clear_therm_status_log(state->level);
+	schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
+}
+
 /***
  * therm_throt_process - Process thermal throttling event from interrupt
  * @curr: Whether the condition is current or not (boolean), since the
@@ -178,27 +334,33 @@ static void therm_throt_process(bool new_event, int event, int level)
 	if (new_event)
 		state->count++;
 
-	if (time_before64(now, state->next_check) &&
-			state->count != state->last_count)
+	if (event != THERMAL_THROTTLING_EVENT)
 		return;
 
-	state->next_check = now + CHECK_INTERVAL;
-	state->last_count = state->count;
+	if (new_event && !state->last_interrupt_time) {
+		bool hot;
+		u8 temp;
+
+		get_therm_status(state->level, &hot, &temp);
+		/*
+		 * Ignore short temperature spike as the system is not close
+		 * to PROCHOT. 10C offset is large enough to ignore. It is
+		 * already dropped from the high threshold temperature.
+		 */
+		if (temp > 10)
+			return;
 
-	/* if we just entered the thermal event */
-	if (new_event) {
-		if (event == THERMAL_THROTTLING_EVENT)
-			pr_warn("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
-				this_cpu,
-				level == CORE_LEVEL ? "Core" : "Package",
-				state->count);
-		return;
-	}
-	if (old_event) {
-		if (event == THERMAL_THROTTLING_EVENT)
-			pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
-				level == CORE_LEVEL ? "Core" : "Package");
-		return;
+		state->baseline_temp = temp;
+		state->last_interrupt_time = now;
+		schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
+	} else if (old_event && state->last_interrupt_time) {
+		unsigned long throttle_time;
+
+		throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
+		if (throttle_time > state->max_time_ms)
+			state->max_time_ms = throttle_time;
+		state->total_time_ms += throttle_time;
+		state->last_interrupt_time = 0;
 	}
 }
 
@@ -244,20 +406,47 @@ static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
 	if (err)
 		return err;
 
-	if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+	if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
 		err = sysfs_add_file_to_group(&dev->kobj,
 					      &dev_attr_core_power_limit_count.attr,
 					      thermal_attr_group.name);
+		if (err)
+			goto del_group;
+	}
+
 	if (cpu_has(c, X86_FEATURE_PTS)) {
 		err = sysfs_add_file_to_group(&dev->kobj,
 					      &dev_attr_package_throttle_count.attr,
 					      thermal_attr_group.name);
-		if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+		if (err)
+			goto del_group;
+
+		err = sysfs_add_file_to_group(&dev->kobj,
+					      &dev_attr_package_throttle_max_time_ms.attr,
+					      thermal_attr_group.name);
+		if (err)
+			goto del_group;
+
+		err = sysfs_add_file_to_group(&dev->kobj,
+					      &dev_attr_package_throttle_total_time_ms.attr,
+					      thermal_attr_group.name);
+		if (err)
+			goto del_group;
+
+		if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
 			err = sysfs_add_file_to_group(&dev->kobj,
 					&dev_attr_package_power_limit_count.attr,
 					thermal_attr_group.name);
+			if (err)
+				goto del_group;
+		}
 	}
 
+	return 0;
+
+del_group:
+	sysfs_remove_group(&dev->kobj, &thermal_attr_group);
+
 	return err;
 }
 
@@ -269,15 +458,29 @@ static void thermal_throttle_remove_dev(struct device *dev)
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
 static int thermal_throttle_online(unsigned int cpu)
 {
+	struct thermal_state *state = &per_cpu(thermal_state, cpu);
 	struct device *dev = get_cpu_device(cpu);
 
+	state->package_throttle.level = PACKAGE_LEVEL;
+	state->core_throttle.level = CORE_LEVEL;
+
+	INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
+	INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
+
 	return thermal_throttle_add_dev(dev, cpu);
 }
 
 static int thermal_throttle_offline(unsigned int cpu)
 {
+	struct thermal_state *state = &per_cpu(thermal_state, cpu);
 	struct device *dev = get_cpu_device(cpu);
 
+	cancel_delayed_work(&state->package_throttle.therm_work);
+	cancel_delayed_work(&state->core_throttle.therm_work);
+
+	state->package_throttle.rate_control_active = false;
+	state->core_throttle.rate_control_active = false;
+
 	thermal_throttle_remove_dev(dev);
 	return 0;
 }
-- 
cgit v1.2.3


From 4e3f77d8419b6787f3eb4d4f5178f459d693f9bb Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Mon, 11 Nov 2019 15:46:26 +0100
Subject: xen/mcelog: add PPIN to record when available

This is to augment commit 3f5a7896a5 ("x86/mce: Include the PPIN in MCE
records when available").

I'm also adding "synd" and "ipid" fields to struct xen_mce, in an
attempt to keep field offsets in sync with struct mce. These two fields
won't get populated for now, though.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/include/asm/msr-index.h | 2 ++
 drivers/xen/mcelog.c             | 5 +++++
 include/xen/interface/xen-mca.h  | 9 ++++++++-
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 20ce682a2540..6ec319ecb001 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -393,6 +393,8 @@
 #define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
 #define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
 #define MSR_AMD64_OSVW_STATUS		0xc0010141
+#define MSR_AMD_PPIN_CTL		0xc00102f0
+#define MSR_AMD_PPIN			0xc00102f1
 #define MSR_AMD64_LS_CFG		0xc0011020
 #define MSR_AMD64_DC_CFG		0xc0011022
 #define MSR_AMD64_BU_CFG2		0xc001102a
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index c870cdcc9bbf..e9ac3b8c4167 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -253,6 +253,11 @@ static int convert_log(struct mc_info *mi)
 		case MSR_IA32_MCG_CAP:
 			m.mcgcap = g_physinfo[i].mc_msrvalues[j].value;
 			break;
+
+		case MSR_PPIN:
+		case MSR_AMD_PPIN:
+			m.ppin = g_physinfo[i].mc_msrvalues[j].value;
+			break;
 		}
 
 	mic = NULL;
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
index d7a45f08fb48..7483a78d2425 100644
--- a/include/xen/interface/xen-mca.h
+++ b/include/xen/interface/xen-mca.h
@@ -331,7 +331,11 @@ struct xen_mc {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
 
-/* Fields are zero when not available */
+/*
+ * Fields are zero when not available. Also, this struct is shared with
+ * userspace mcelog and thus must keep existing fields at current offsets.
+ * Only add new fields to the end of the structure
+ */
 struct xen_mce {
 	__u64 status;
 	__u64 misc;
@@ -352,6 +356,9 @@ struct xen_mce {
 	__u32 socketid;	/* CPU socket ID */
 	__u32 apicid;	/* CPU initial apic ID */
 	__u64 mcgcap;	/* MCGCAP MSR: machine check capabilities of CPU */
+	__u64 synd;	/* MCA_SYND MSR: only valid on SMCA systems */
+	__u64 ipid;	/* MCA_IPID MSR: only valid on SMCA systems */
+	__u64 ppin;	/* Protected Processor Inventory Number */
 };
 
 /*
-- 
cgit v1.2.3


From d8e85e144bbe12e8d82c6b05d690a34da62cc991 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Wed, 13 Nov 2019 10:26:52 +0100
Subject: arm64: Kconfig: add a choice for endianness

When building allmodconfig KCONFIG_ALLCONFIG=$(pwd)/arch/arm64/configs/defconfig
CONFIG_CPU_BIG_ENDIAN gets enabled. Which tends not to be what most
people want. Another concern that has come up is that ACPI isn't built
for an allmodconfig kernel today since that also depends on !CPU_BIG_ENDIAN.

Rework so that we introduce a 'choice' and default the choice to
CPU_LITTLE_ENDIAN. That means that when we build an allmodconfig kernel
it will default to CPU_LITTLE_ENDIAN that most people tends to want.

Reviewed-by: John Garry <john.garry@huawei.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8a0800e5be9d..d66a9727344d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -877,10 +877,26 @@ config ARM64_PA_BITS
 	default 48 if ARM64_PA_BITS_48
 	default 52 if ARM64_PA_BITS_52
 
+choice
+	prompt "Endianness"
+	default CPU_LITTLE_ENDIAN
+	help
+	  Select the endianness of data accesses performed by the CPU. Userspace
+	  applications will need to be compiled and linked for the endianness
+	  that is selected here.
+
 config CPU_BIG_ENDIAN
        bool "Build big-endian kernel"
        help
-         Say Y if you plan on running a kernel in big-endian mode.
+	  Say Y if you plan on running a kernel with a big-endian userspace.
+
+config CPU_LITTLE_ENDIAN
+	bool "Build little-endian kernel"
+	help
+	  Say Y if you plan on running a kernel with a little-endian userspace.
+	  This is usually the case for distributions targeting arm64.
+
+endchoice
 
 config SCHED_MC
 	bool "Multi-core scheduler support"
-- 
cgit v1.2.3


From f399e60c45f6b6e6ad6dfcedff1dd6386e086b0b Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 4 Nov 2019 17:59:58 -0500
Subject: KVM: x86: optimize more exit handlers in vmx.c

Eliminate wasteful call/ret non RETPOLINE case and unnecessary fentry
dynamic tracing hooking points.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 30 +++++-------------------------
 1 file changed, 5 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 31ce6bc2c371..e8c21e330449 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4667,7 +4667,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.irq_exits;
 	return 1;
@@ -4939,21 +4939,6 @@ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
 	vmcs_writel(GUEST_DR7, val);
 }
 
-static int handle_cpuid(struct kvm_vcpu *vcpu)
-{
-	return kvm_emulate_cpuid(vcpu);
-}
-
-static int handle_rdmsr(struct kvm_vcpu *vcpu)
-{
-	return kvm_emulate_rdmsr(vcpu);
-}
-
-static int handle_wrmsr(struct kvm_vcpu *vcpu)
-{
-	return kvm_emulate_wrmsr(vcpu);
-}
-
 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
 {
 	kvm_apic_update_ppr(vcpu);
@@ -4970,11 +4955,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static int handle_halt(struct kvm_vcpu *vcpu)
-{
-	return kvm_emulate_halt(vcpu);
-}
-
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
 	return kvm_emulate_hypercall(vcpu);
@@ -5522,11 +5502,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_IO_INSTRUCTION]          = handle_io,
 	[EXIT_REASON_CR_ACCESS]               = handle_cr,
 	[EXIT_REASON_DR_ACCESS]               = handle_dr,
-	[EXIT_REASON_CPUID]                   = handle_cpuid,
-	[EXIT_REASON_MSR_READ]                = handle_rdmsr,
-	[EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
+	[EXIT_REASON_CPUID]                   = kvm_emulate_cpuid,
+	[EXIT_REASON_MSR_READ]                = kvm_emulate_rdmsr,
+	[EXIT_REASON_MSR_WRITE]               = kvm_emulate_wrmsr,
 	[EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
-	[EXIT_REASON_HLT]                     = handle_halt,
+	[EXIT_REASON_HLT]                     = kvm_emulate_halt,
 	[EXIT_REASON_INVD]		      = handle_invd,
 	[EXIT_REASON_INVLPG]		      = handle_invlpg,
 	[EXIT_REASON_RDPMC]                   = handle_rdpmc,
-- 
cgit v1.2.3


From 4289d2728664fc1fb49cfc76a6a7d96d913b921f Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 4 Nov 2019 17:59:59 -0500
Subject: KVM: retpolines: x86: eliminate retpoline from vmx.c exit handlers

It's enough to check the exit value and issue a direct call to avoid
the retpoline for all the common vmexit reasons.

Of course CONFIG_RETPOLINE already forbids gcc to use indirect jumps
while compiling all switch() statements, however switch() would still
allow the compiler to bisect the case value. It's more efficient to
prioritize the most frequent vmexits instead.

The halt may be slow paths from the point of the guest, but not
necessarily so from the point of the host if the host runs at full CPU
capacity and no host CPU is ever left idle.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e8c21e330449..55f73d1c1765 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5879,9 +5879,23 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	}
 
 	if (exit_reason < kvm_vmx_max_exit_handlers
-	    && kvm_vmx_exit_handlers[exit_reason])
+	    && kvm_vmx_exit_handlers[exit_reason]) {
+#ifdef CONFIG_RETPOLINE
+		if (exit_reason == EXIT_REASON_MSR_WRITE)
+			return kvm_emulate_wrmsr(vcpu);
+		else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
+			return handle_preemption_timer(vcpu);
+		else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT)
+			return handle_interrupt_window(vcpu);
+		else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+			return handle_external_interrupt(vcpu);
+		else if (exit_reason == EXIT_REASON_HLT)
+			return kvm_emulate_halt(vcpu);
+		else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
+			return handle_ept_misconfig(vcpu);
+#endif
 		return kvm_vmx_exit_handlers[exit_reason](vcpu);
-	else {
+	} else {
 		vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
 				exit_reason);
 		dump_vmcs();
-- 
cgit v1.2.3


From 3dcb2a3fa5a0c903fd754bfba2b8defb9f191974 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 4 Nov 2019 18:00:00 -0500
Subject: KVM: retpolines: x86: eliminate retpoline from svm.c exit handlers

It's enough to check the exit value and issue a direct call to avoid
the retpoline for all the common vmexit reasons.

After this commit is applied, here the most common retpolines executed
under a high resolution timer workload in the guest on a SVM host:

[..]
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    ktime_get_update_offsets_now+70
    hrtimer_interrupt+131
    smp_apic_timer_interrupt+106
    apic_timer_interrupt+15
    start_sw_timer+359
    restart_apic_timer+85
    kvm_set_msr_common+1497
    msr_interception+142
    vcpu_enter_guest+684
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 1940
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_r12+33
    force_qs_rnp+217
    rcu_gp_kthread+1270
    kthread+268
    ret_from_fork+34
]: 4644
@[]: 25095
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    lapic_next_event+28
    clockevents_program_event+148
    hrtimer_start_range_ns+528
    start_sw_timer+356
    restart_apic_timer+85
    kvm_set_msr_common+1497
    msr_interception+142
    vcpu_enter_guest+684
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 41474
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    clockevents_program_event+148
    hrtimer_start_range_ns+528
    start_sw_timer+356
    restart_apic_timer+85
    kvm_set_msr_common+1497
    msr_interception+142
    vcpu_enter_guest+684
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 41474
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    ktime_get+58
    clockevents_program_event+84
    hrtimer_start_range_ns+528
    start_sw_timer+356
    restart_apic_timer+85
    kvm_set_msr_common+1497
    msr_interception+142
    vcpu_enter_guest+684
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 41887
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    lapic_next_event+28
    clockevents_program_event+148
    hrtimer_try_to_cancel+168
    hrtimer_cancel+21
    kvm_set_lapic_tscdeadline_msr+43
    kvm_set_msr_common+1497
    msr_interception+142
    vcpu_enter_guest+684
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 42723
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    clockevents_program_event+148
    hrtimer_try_to_cancel+168
    hrtimer_cancel+21
    kvm_set_lapic_tscdeadline_msr+43
    kvm_set_msr_common+1497
    msr_interception+142
    vcpu_enter_guest+684
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 42766
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    ktime_get+58
    clockevents_program_event+84
    hrtimer_try_to_cancel+168
    hrtimer_cancel+21
    kvm_set_lapic_tscdeadline_msr+43
    kvm_set_msr_common+1497
    msr_interception+142
    vcpu_enter_guest+684
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 42848
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    ktime_get+58
    start_sw_timer+279
    restart_apic_timer+85
    kvm_set_msr_common+1497
    msr_interception+142
    vcpu_enter_guest+684
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 499845

@total: 1780243

SVM has no TSC based programmable preemption timer so it is invoking
ktime_get() frequently.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4153ca8cddb7..a7b358f20aca 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4999,6 +4999,18 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 
+#ifdef CONFIG_RETPOLINE
+	if (exit_code == SVM_EXIT_MSR)
+		return msr_interception(svm);
+	else if (exit_code == SVM_EXIT_VINTR)
+		return interrupt_window_interception(svm);
+	else if (exit_code == SVM_EXIT_INTR)
+		return intr_interception(svm);
+	else if (exit_code == SVM_EXIT_HLT)
+		return halt_interception(svm);
+	else if (exit_code == SVM_EXIT_NPF)
+		return npf_interception(svm);
+#endif
 	return svm_exit_handlers[exit_code](svm);
 }
 
-- 
cgit v1.2.3


From 74c504a6d70ab29b2c28bee62f5f39e3dd847ea2 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 4 Nov 2019 18:00:01 -0500
Subject: x86: retpolines: eliminate retpoline from msr event handlers

It's enough to check the value and issue the direct call.

After this commit is applied, here the most common retpolines executed
under a high resolution timer workload in the guest on a VMX host:

[..]
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 267
@[]: 2256
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    __kvm_wait_lapic_expire+284
    vmx_vcpu_run.part.97+1091
    vcpu_enter_guest+377
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 2390
@[]: 33410

@total: 315707

Note the highest hit above is __delay so probably not worth optimizing
even if it would be more frequent than 2k hits per sec.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/events/intel/core.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fcef678c3423..937363b803c1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3323,8 +3323,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	return 0;
 }
 
+#ifdef CONFIG_RETPOLINE
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
+#endif
+
 struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
+#ifdef CONFIG_RETPOLINE
+	if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
+		return intel_guest_get_msrs(nr);
+	else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
+		return core_guest_get_msrs(nr);
+#endif
 	if (x86_pmu.guest_get_msrs)
 		return x86_pmu.guest_get_msrs(nr);
 	*nr = 0;
-- 
cgit v1.2.3


From ff90afa75573502f3ac05acd5a282d6e3d4cef34 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 11 Nov 2019 11:16:39 +0200
Subject: KVM: x86: Evaluate latched_init in KVM_SET_VCPU_EVENTS when vCPU not
 in SMM

Commit 4b9852f4f389 ("KVM: x86: Fix INIT signal handling in various CPU states")
fixed KVM to also latch pending LAPIC INIT event when vCPU is in VMX
operation.

However, current API of KVM_SET_VCPU_EVENTS defines this field as
part of SMM state and only set pending LAPIC INIT event if vCPU is
specified to be in SMM mode (events->smi.smm is set).

Change KVM_SET_VCPU_EVENTS handler to set pending LAPIC INIT event
by latched_init field regardless of if vCPU is in SMM mode or not.

Fixes: 4b9852f4f389 ("KVM: x86: Fix INIT signal handling in various CPU states")
Reviewed-by: Mihai Carabas <mihai.carabas@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b3dcaa7985a..c5886eed3d57 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3830,12 +3830,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 				vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
 			else
 				vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
-			if (lapic_in_kernel(vcpu)) {
-				if (events->smi.latched_init)
-					set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
-				else
-					clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
-			}
+		}
+
+		if (lapic_in_kernel(vcpu)) {
+			if (events->smi.latched_init)
+				set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+			else
+				clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
 		}
 	}
 
-- 
cgit v1.2.3


From 27cbe7d61898a1d1d39be32e5acff7d4be6e9d87 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 11 Nov 2019 11:16:40 +0200
Subject: KVM: x86: Prevent set vCPU into INIT/SIPI_RECEIVED state when INIT
 are latched

Commit 4b9852f4f389 ("KVM: x86: Fix INIT signal handling in various CPU states")
fixed KVM to also latch pending LAPIC INIT event when vCPU is in VMX
operation.

However, current API of KVM_SET_MP_STATE allows userspace to put vCPU
into KVM_MP_STATE_SIPI_RECEIVED or KVM_MP_STATE_INIT_RECEIVED even when
vCPU is in VMX operation.

Fix this by introducing a util method to check if vCPU state latch INIT
signals and use it in KVM_SET_MP_STATE handler.

Fixes: 4b9852f4f389 ("KVM: x86: Fix INIT signal handling in various CPU states")
Reported-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Mihai Carabas <mihai.carabas@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 2 +-
 arch/x86/kvm/x86.c   | 8 ++++++--
 arch/x86/kvm/x86.h   | 5 +++++
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 87b0fcc23ef8..cacfe14717d6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2714,7 +2714,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 	 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
 	 * and leave the INIT pending.
 	 */
-	if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
+	if (kvm_vcpu_latch_init(vcpu)) {
 		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
 		if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
 			clear_bit(KVM_APIC_SIPI, &apic->pending_events);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c5886eed3d57..34d9048c881e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8706,8 +8706,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	    mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
 		goto out;
 
-	/* INITs are latched while in SMM */
-	if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+	/*
+	 * KVM_MP_STATE_INIT_RECEIVED means the processor is in
+	 * INIT state; latched init should be reported using
+	 * KVM_SET_VCPU_EVENTS, so reject it here.
+	 */
+	if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
 	    (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
 	     mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
 		goto out;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2b0805012e3c..29391af8871d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -258,6 +258,11 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
 	return !(kvm->arch.disabled_quirks & quirk);
 }
 
+static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
+{
+	return is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu);
+}
+
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
-- 
cgit v1.2.3


From e64a8508234afb17a15d1aa98e8c1434fc207755 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 11 Nov 2019 14:16:05 +0200
Subject: KVM: VMX: Consume pending LAPIC INIT event when exit on INIT_SIGNAL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Intel SDM section 25.2 OTHER CAUSES OF VM EXITS specifies the following
on INIT signals: "Such exits do not modify register state or clear pending
events as they would outside of VMX operation."

When commit 4b9852f4f389 ("KVM: x86: Fix INIT signal handling in various CPU states")
was applied, I interepted above Intel SDM statement such that
INIT_SIGNAL exit don’t consume the LAPIC INIT pending event.

However, when Nadav Amit run matching kvm-unit-test on a bare-metal
machine, it turned out my interpetation was wrong. i.e. INIT_SIGNAL
exit does consume the LAPIC INIT pending event.
(See: https://www.spinics.net/lists/kvm/msg196757.html)

Therefore, fix KVM code to behave as observed on bare-metal.

Fixes: 4b9852f4f389 ("KVM: x86: Fix INIT signal handling in various CPU states")
Reported-by: Nadav Amit <nadav.amit@gmail.com>
Reviewed-by: Mihai Carabas <mihai.carabas@oracle.com>
Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 55c5791ac52b..cd8d0b040daa 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3463,6 +3463,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 		test_bit(KVM_APIC_INIT, &apic->pending_events)) {
 		if (block_nested_events)
 			return -EBUSY;
+		clear_bit(KVM_APIC_INIT, &apic->pending_events);
 		nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
 		return 0;
 	}
-- 
cgit v1.2.3


From 5b4ce93a8fe759e2d6b2ee05765cd5a3b4b6a2f1 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 9 Nov 2019 16:58:54 +0800
Subject: KVM: X86: avoid unused setup_syscalls_segments call when SYSCALL
 check failed

When SYSCALL/SYSENTER ability check failed, cs and ss is inited but
remain not used. Delay initializing cs and ss until SYSCALL/SYSENTER
ability check passed.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 698efb8c3897..952d1a4f4d7e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2770,11 +2770,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 		return emulate_ud(ctxt);
 
 	ops->get_msr(ctxt, MSR_EFER, &efer);
-	setup_syscalls_segments(ctxt, &cs, &ss);
-
 	if (!(efer & EFER_SCE))
 		return emulate_ud(ctxt);
 
+	setup_syscalls_segments(ctxt, &cs, &ss);
 	ops->get_msr(ctxt, MSR_STAR, &msr_data);
 	msr_data >>= 32;
 	cs_sel = (u16)(msr_data & 0xfffc);
@@ -2838,12 +2837,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->mode == X86EMUL_MODE_PROT64)
 		return X86EMUL_UNHANDLEABLE;
 
-	setup_syscalls_segments(ctxt, &cs, &ss);
-
 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
 	if ((msr_data & 0xfffc) == 0x0)
 		return emulate_gp(ctxt, 0);
 
+	setup_syscalls_segments(ctxt, &cs, &ss);
 	ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 	cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
 	ss_sel = cs_sel + 8;
-- 
cgit v1.2.3


From 1a686237d94b8a4bab9ce16ffd3e2208370d7695 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 9 Nov 2019 17:46:49 +0800
Subject: KVM: APIC: add helper func to remove duplicate code in
 kvm_pv_send_ipi

There are some duplicate code in kvm_pv_send_ipi when deal with ipi
bitmap. Add helper func to remove it, and eliminate odd out label,
get rid of unnecessary kvm_lapic_irq field init and so on.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 65 +++++++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cacfe14717d6..60fb21fe7f42 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -562,60 +562,53 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 			irq->level, irq->trig_mode, dest_map);
 }
 
+static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
+			 struct kvm_lapic_irq *irq, u32 min)
+{
+	int i, count = 0;
+	struct kvm_vcpu *vcpu;
+
+	if (min > map->max_apic_id)
+		return 0;
+
+	for_each_set_bit(i, ipi_bitmap,
+		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+		if (map->phys_map[min + i]) {
+			vcpu = map->phys_map[min + i]->vcpu;
+			count += kvm_apic_set_irq(vcpu, irq, NULL);
+		}
+	}
+
+	return count;
+}
+
 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 		    unsigned long ipi_bitmap_high, u32 min,
 		    unsigned long icr, int op_64_bit)
 {
-	int i;
 	struct kvm_apic_map *map;
-	struct kvm_vcpu *vcpu;
 	struct kvm_lapic_irq irq = {0};
 	int cluster_size = op_64_bit ? 64 : 32;
-	int count = 0;
+	int count;
+
+	if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
+		return -KVM_EINVAL;
 
 	irq.vector = icr & APIC_VECTOR_MASK;
 	irq.delivery_mode = icr & APIC_MODE_MASK;
 	irq.level = (icr & APIC_INT_ASSERT) != 0;
 	irq.trig_mode = icr & APIC_INT_LEVELTRIG;
 
-	if (icr & APIC_DEST_MASK)
-		return -KVM_EINVAL;
-	if (icr & APIC_SHORT_MASK)
-		return -KVM_EINVAL;
-
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
-	if (unlikely(!map)) {
-		count = -EOPNOTSUPP;
-		goto out;
+	count = -EOPNOTSUPP;
+	if (likely(map)) {
+		count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
+		min += cluster_size;
+		count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
 	}
 
-	if (min > map->max_apic_id)
-		goto out;
-	/* Bits above cluster_size are masked in the caller.  */
-	for_each_set_bit(i, &ipi_bitmap_low,
-		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
-		if (map->phys_map[min + i]) {
-			vcpu = map->phys_map[min + i]->vcpu;
-			count += kvm_apic_set_irq(vcpu, &irq, NULL);
-		}
-	}
-
-	min += cluster_size;
-
-	if (min > map->max_apic_id)
-		goto out;
-
-	for_each_set_bit(i, &ipi_bitmap_high,
-		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
-		if (map->phys_map[min + i]) {
-			vcpu = map->phys_map[min + i]->vcpu;
-			count += kvm_apic_set_irq(vcpu, &irq, NULL);
-		}
-	}
-
-out:
 	rcu_read_unlock();
 	return count;
 }
-- 
cgit v1.2.3


From 49d654d85f857d9ca34fe2b4ac6d6cf34677e6c1 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 11 Nov 2019 14:26:21 +0200
Subject: KVM: SVM: Remove check if APICv enabled in SVM update_cr8_intercept()
 handler

This check is unnecessary as x86 update_cr8_intercept() which calls
this VMX/SVM specific callback already performs this check.

Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a7b358f20aca..d02a73a48461 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5106,8 +5106,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (svm_nested_virtualize_tpr(vcpu) ||
-	    kvm_vcpu_apicv_active(vcpu))
+	if (svm_nested_virtualize_tpr(vcpu))
 		return;
 
 	clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
-- 
cgit v1.2.3


From 132f4f7e39fd270c5e3c9c577939081cfd499b16 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 11 Nov 2019 14:30:54 +0200
Subject: KVM: VMX: Refactor update_cr8_intercept()

No functional changes.

Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 55f73d1c1765..2a64bf3c62b9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5981,17 +5981,14 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	int tpr_threshold;
 
 	if (is_guest_mode(vcpu) &&
 		nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
 		return;
 
-	if (irr == -1 || tpr < irr) {
-		vmcs_write32(TPR_THRESHOLD, 0);
-		return;
-	}
-
-	vmcs_write32(TPR_THRESHOLD, irr);
+	tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
+	vmcs_write32(TPR_THRESHOLD, tpr_threshold);
 }
 
 void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 02d496cfb88a4856b9d67ade32317077c510aebc Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 11 Nov 2019 14:30:55 +0200
Subject: KVM: nVMX: Update vmcs01 TPR_THRESHOLD if L2 changed L1 TPR

When L1 don't use TPR-Shadow to run L2, L0 configures vmcs02 without
TPR-Shadow and install intercepts on CR8 access (load and store).

If L1 do not intercept L2 CR8 access, L0 intercepts on those accesses
will emulate load/store on L1's LAPIC TPR. If in this case L2 lowers
TPR such that there is now an injectable interrupt to L1,
apic_update_ppr() will request a KVM_REQ_EVENT which will trigger a call
to update_cr8_intercept() to update TPR-Threshold to highest pending IRR
priority.

However, this update to TPR-Threshold is done while active vmcs is
vmcs02 instead of vmcs01. Thus, when later at some point L0 will
emulate an exit from L2 to L1, L1 will still run with high
TPR-Threshold. This will result in every VMEntry to L1 to immediately
exit on TPR_BELOW_THRESHOLD and continue to do so infinitely until
some condition will cause KVM_REQ_EVENT to be set.
(Note that TPR_BELOW_THRESHOLD exit handler do not set KVM_REQ_EVENT
until apic_update_ppr() will notice a new injectable interrupt for PPR)

To fix this issue, change update_cr8_intercept() such that if L2 lowers
L1's TPR in a way that requires to lower L1's TPR-Threshold, save update
to TPR-Threshold and apply it to vmcs01 when L0 emulates an exit from
L2 to L1.

Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 3 +++
 arch/x86/kvm/vmx/vmx.c    | 5 ++++-
 arch/x86/kvm/vmx/vmx.h    | 3 +++
 3 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index cd8d0b040daa..bdb9b3028250 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2073,6 +2073,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	exec_control &= ~CPU_BASED_TPR_SHADOW;
 	exec_control |= vmcs12->cpu_based_vm_exec_control;
 
+	vmx->nested.l1_tpr_threshold = -1;
 	if (exec_control & CPU_BASED_TPR_SHADOW)
 		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
 #ifdef CONFIG_X86_64
@@ -4115,6 +4116,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+	if (vmx->nested.l1_tpr_threshold != -1)
+		vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
 
 	if (kvm_has_tsc_control)
 		decache_tsc_multiplier(vmx);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2a64bf3c62b9..765086756177 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5988,7 +5988,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 		return;
 
 	tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
-	vmcs_write32(TPR_THRESHOLD, tpr_threshold);
+	if (is_guest_mode(vcpu))
+		to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
+	else
+		vmcs_write32(TPR_THRESHOLD, tpr_threshold);
 }
 
 void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index bee16687dc0b..43331dfafffe 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -167,6 +167,9 @@ struct nested_vmx {
 	u64 vmcs01_debugctl;
 	u64 vmcs01_guest_bndcfgs;
 
+	/* to migrate it to L1 if L2 writes to L1's CR8 directly */
+	int l1_tpr_threshold;
+
 	u16 vpid02;
 	u16 last_vpid;
 
-- 
cgit v1.2.3


From 98ff80f5b788c1818464022cc61924ef5630d99d Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Sun, 27 Oct 2019 18:52:40 +0800
Subject: KVM: x86/vPMU: Rename pmu_ops callbacks from msr_idx to rdpmc_ecx

The leagcy pmu_ops->msr_idx_to_pmc is only called in kvm_pmu_rdpmc, so
this function actually receives the contents of ECX before RDPMC, and
translates it to a kvm_pmc. Let's clarify its semantic by renaming the
existing msr_idx_to_pmc to rdpmc_ecx_to_pmc, and is_valid_msr_idx to
is_valid_rdpmc_ecx; likewise for the wrapper kvm_pmu_is_valid_msr_idx.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/pmu.c           |  6 +++---
 arch/x86/kvm/pmu.h           |  8 ++++----
 arch/x86/kvm/pmu_amd.c       |  9 +++++----
 arch/x86/kvm/vmx/pmu_intel.c | 10 +++++-----
 arch/x86/kvm/x86.c           |  2 +-
 5 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 75e8f9fae031..33f6fe1b5c56 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -266,9 +266,9 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 }
 
 /* check if idx is a valid index to access PMU */
-int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
-	return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
+	return kvm_x86_ops->pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
 }
 
 bool is_vmware_backdoor_pmc(u32 pmc_idx)
@@ -318,7 +318,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 	if (is_vmware_backdoor_pmc(idx))
 		return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
 
-	pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask);
+	pmc = kvm_x86_ops->pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
 	if (!pmc)
 		return 1;
 
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 58265f761c3b..c4a80fe285a5 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -25,9 +25,9 @@ struct kvm_pmu_ops {
 	unsigned (*find_fixed_event)(int idx);
 	bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
 	struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
-	struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx,
-					  u64 *mask);
-	int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
+	struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
+		unsigned int idx, u64 *mask);
+	int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
 	bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 	int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -110,7 +110,7 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx);
+int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index c8388389a3b0..a4a6d8a09f70 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -174,7 +174,7 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
 }
 
 /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
@@ -184,7 +184,8 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
 }
 
 /* idx is the ECX register of RDPMC instruction */
-static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask)
+static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+	unsigned int idx, u64 *mask)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *counters;
@@ -306,8 +307,8 @@ struct kvm_pmu_ops amd_pmu_ops = {
 	.find_fixed_event = amd_find_fixed_event,
 	.pmc_is_enabled = amd_pmc_is_enabled,
 	.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
-	.msr_idx_to_pmc = amd_msr_idx_to_pmc,
-	.is_valid_msr_idx = amd_is_valid_msr_idx,
+	.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
+	.is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,
 	.is_valid_msr = amd_is_valid_msr,
 	.get_msr = amd_pmu_get_msr,
 	.set_msr = amd_pmu_set_msr,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 3e9c059099e9..7a8067ec19bb 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -111,7 +111,7 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
 }
 
 /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	bool fixed = idx & (1u << 30);
@@ -122,8 +122,8 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
 		(fixed && idx >= pmu->nr_arch_fixed_counters);
 }
 
-static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
-					    unsigned idx, u64 *mask)
+static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+					    unsigned int idx, u64 *mask)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	bool fixed = idx & (1u << 30);
@@ -366,8 +366,8 @@ struct kvm_pmu_ops intel_pmu_ops = {
 	.find_fixed_event = intel_find_fixed_event,
 	.pmc_is_enabled = intel_pmc_is_enabled,
 	.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
-	.msr_idx_to_pmc = intel_msr_idx_to_pmc,
-	.is_valid_msr_idx = intel_is_valid_msr_idx,
+	.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
+	.is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
 	.is_valid_msr = intel_is_valid_msr,
 	.get_msr = intel_pmu_get_msr,
 	.set_msr = intel_pmu_set_msr,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34d9048c881e..72189160bb81 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6144,7 +6144,7 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
 static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
 			      u32 pmc)
 {
-	return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
+	return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
 }
 
 static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
-- 
cgit v1.2.3


From c900c156c518302058a48d2efe3ca44e465cad22 Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Sun, 27 Oct 2019 18:52:41 +0800
Subject: KVM: x86/vPMU: Introduce a new kvm_pmu_ops->msr_idx_to_pmc callback

Introduce a new callback msr_idx_to_pmc that returns a struct kvm_pmc*,
and change kvm_pmu_is_valid_msr to return ".msr_idx_to_pmc(vcpu, msr) ||
.is_valid_msr(vcpu, msr)" and AMD just returns false from .is_valid_msr.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/pmu.c           |  3 ++-
 arch/x86/kvm/pmu.h           |  1 +
 arch/x86/kvm/pmu_amd.c       | 15 +++++++++++----
 arch/x86/kvm/vmx/pmu_intel.c | 13 +++++++++++++
 4 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 33f6fe1b5c56..472b69b3b6c3 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -334,7 +334,8 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
 
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 {
-	return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
+	return kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
+		kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
 }
 
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index c4a80fe285a5..b253dd5e56cf 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -27,6 +27,7 @@ struct kvm_pmu_ops {
 	struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
 	struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
 		unsigned int idx, u64 *mask);
+	struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
 	int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
 	bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index a4a6d8a09f70..e8609ccd0b62 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -199,14 +199,20 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
 }
 
 static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	/* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough.  */
+	return false;
+}
+
+static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-	int ret = false;
+	struct kvm_pmc *pmc;
 
-	ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) ||
-		get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
+	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
+	pmc = pmc ? pmc : get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
 
-	return ret;
+	return pmc;
 }
 
 static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
@@ -308,6 +314,7 @@ struct kvm_pmu_ops amd_pmu_ops = {
 	.pmc_is_enabled = amd_pmc_is_enabled,
 	.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
 	.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
+	.msr_idx_to_pmc = amd_msr_idx_to_pmc,
 	.is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,
 	.is_valid_msr = amd_is_valid_msr,
 	.get_msr = amd_pmu_get_msr,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 7a8067ec19bb..dcde142327ca 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -162,6 +162,18 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 	return ret;
 }
 
+static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc;
+
+	pmc = get_fixed_pmc(pmu, msr);
+	pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0);
+	pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0);
+
+	return pmc;
+}
+
 static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -367,6 +379,7 @@ struct kvm_pmu_ops intel_pmu_ops = {
 	.pmc_is_enabled = intel_pmc_is_enabled,
 	.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
 	.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
+	.msr_idx_to_pmc = intel_msr_idx_to_pmc,
 	.is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
 	.is_valid_msr = intel_is_valid_msr,
 	.get_msr = intel_pmu_get_msr,
-- 
cgit v1.2.3


From a6da0d77e98e94fa66187a5ce3cf7e11fbf95503 Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Sun, 27 Oct 2019 18:52:42 +0800
Subject: KVM: x86/vPMU: Reuse perf_event to avoid unnecessary
 pmc_reprogram_counter

The perf_event_create_kernel_counter() in the pmc_reprogram_counter() is
a heavyweight and high-frequency operation, especially when host disables
the watchdog (maximum 21000000 ns) which leads to an unacceptable latency
of the guest NMI handler. It limits the use of vPMUs in the guest.

When a vPMC is fully enabled, the legacy reprogram_*_counter() would stop
and release its existing perf_event (if any) every time EVEN in most cases
almost the same requested perf_event will be created and configured again.

For each vPMC, if the reuqested config ('u64 eventsel' for gp and 'u8 ctrl'
for fixed) is the same as its current config AND a new sample period based
on pmc->counter is accepted by host perf interface, the current event could
be reused safely as a new created one does. Otherwise, do release the
undesirable perf_event and reprogram a new one as usual.

It's light-weight to call pmc_pause_counter (disable, read and reset event)
and pmc_resume_counter (recalibrate period and re-enable event) as guest
expects instead of release-and-create again on any condition. Compared to
use the filterable event->attr or hw.config, a new 'u64 current_config'
field is added to save the last original programed config for each vPMC.

Based on this implementation, the number of calls to pmc_reprogram_counter
is reduced by ~82.5% for a gp sampling event and ~99.9% for a fixed event.
In the usage of multiplexing perf sampling mode, the average latency of the
guest NMI handler is reduced from 104923 ns to 48393 ns (~2.16x speed up).
If host disables watchdog, the minimum latecy of guest NMI handler could be
speed up at ~3413x (from 20407603 to 5979 ns) and at ~786x in the average.

Suggested-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++++
 arch/x86/kvm/pmu.c              | 45 +++++++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/pmu.h              | 12 +++++++++--
 arch/x86/kvm/pmu_amd.c          |  1 +
 arch/x86/kvm/vmx/pmu_intel.c    |  2 ++
 5 files changed, 61 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6f6b8886a8eb..a87a6c98adee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -449,6 +449,11 @@ struct kvm_pmc {
 	u64 eventsel;
 	struct perf_event *perf_event;
 	struct kvm_vcpu *vcpu;
+	/*
+	 * eventsel value for general purpose counters,
+	 * ctrl value for fixed counters.
+	 */
+	u64 current_config;
 };
 
 struct kvm_pmu {
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 472b69b3b6c3..99565de5410a 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -138,6 +138,35 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
 }
 
+static void pmc_pause_counter(struct kvm_pmc *pmc)
+{
+	u64 counter = pmc->counter;
+
+	if (!pmc->perf_event)
+		return;
+
+	/* update counter, reset event value to avoid redundant accumulation */
+	counter += perf_event_pause(pmc->perf_event, true);
+	pmc->counter = counter & pmc_bitmask(pmc);
+}
+
+static bool pmc_resume_counter(struct kvm_pmc *pmc)
+{
+	if (!pmc->perf_event)
+		return false;
+
+	/* recalibrate sample period and check if it's accepted by perf core */
+	if (perf_event_period(pmc->perf_event,
+			(-pmc->counter) & pmc_bitmask(pmc)))
+		return false;
+
+	/* reuse perf_event to serve as pmc_reprogram_counter() does*/
+	perf_event_enable(pmc->perf_event);
+
+	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
+	return true;
+}
+
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 {
 	unsigned config, type = PERF_TYPE_RAW;
@@ -152,7 +181,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 
 	pmc->eventsel = eventsel;
 
-	pmc_stop_counter(pmc);
+	pmc_pause_counter(pmc);
 
 	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
 		return;
@@ -191,6 +220,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 	if (type == PERF_TYPE_RAW)
 		config = eventsel & X86_RAW_EVENT_MASK;
 
+	if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
+		return;
+
+	pmc_release_perf_event(pmc);
+
+	pmc->current_config = eventsel;
 	pmc_reprogram_counter(pmc, type, config,
 			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
 			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
@@ -207,7 +242,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
 	struct kvm_pmu_event_filter *filter;
 	struct kvm *kvm = pmc->vcpu->kvm;
 
-	pmc_stop_counter(pmc);
+	pmc_pause_counter(pmc);
 
 	if (!en_field || !pmc_is_enabled(pmc))
 		return;
@@ -222,6 +257,12 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
 			return;
 	}
 
+	if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
+		return;
+
+	pmc_release_perf_event(pmc);
+
+	pmc->current_config = (u64)ctrl;
 	pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
 			      kvm_x86_ops->pmu_ops->find_fixed_event(idx),
 			      !(en_field & 0x2), /* exclude user */
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index b253dd5e56cf..7eba298587dc 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -56,12 +56,20 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
 	return counter & pmc_bitmask(pmc);
 }
 
-static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
 {
 	if (pmc->perf_event) {
-		pmc->counter = pmc_read_counter(pmc);
 		perf_event_release_kernel(pmc->perf_event);
 		pmc->perf_event = NULL;
+		pmc->current_config = 0;
+	}
+}
+
+static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+{
+	if (pmc->perf_event) {
+		pmc->counter = pmc_read_counter(pmc);
+		pmc_release_perf_event(pmc);
 	}
 }
 
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index e8609ccd0b62..e87d34136047 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -292,6 +292,7 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
 		pmu->gp_counters[i].type = KVM_PMC_GP;
 		pmu->gp_counters[i].vcpu = vcpu;
 		pmu->gp_counters[i].idx = i;
+		pmu->gp_counters[i].current_config = 0;
 	}
 }
 
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index dcde142327ca..9b1ddc42f604 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -340,12 +340,14 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
 		pmu->gp_counters[i].type = KVM_PMC_GP;
 		pmu->gp_counters[i].vcpu = vcpu;
 		pmu->gp_counters[i].idx = i;
+		pmu->gp_counters[i].current_config = 0;
 	}
 
 	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
 		pmu->fixed_counters[i].type = KVM_PMC_FIXED;
 		pmu->fixed_counters[i].vcpu = vcpu;
 		pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
+		pmu->fixed_counters[i].current_config = 0;
 	}
 }
 
-- 
cgit v1.2.3


From b35e5548b41131eb06de041af2f5fb0890d96f96 Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Sun, 27 Oct 2019 18:52:43 +0800
Subject: KVM: x86/vPMU: Add lazy mechanism to release perf_event per vPMC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, a host perf_event is created for a vPMC functionality emulation.
It’s unpredictable to determine if a disabled perf_event will be reused.
If they are disabled and are not reused for a considerable period of time,
those obsolete perf_events would increase host context switch overhead that
could have been avoided.

If the guest doesn't WRMSR any of the vPMC's MSRs during an entire vcpu
sched time slice, and its independent enable bit of the vPMC isn't set,
we can predict that the guest has finished the use of this vPMC, and then
do request KVM_REQ_PMU in kvm_arch_sched_in and release those perf_events
in the first call of kvm_pmu_handle_event() after the vcpu is scheduled in.

This lazy mechanism delays the event release time to the beginning of the
next scheduled time slice if vPMC's MSRs aren't changed during this time
slice. If guest comes back to use this vPMC in next time slice, a new perf
event would be re-created via perf_event_create_kernel_counter() as usual.

Suggested-by: Wei Wang <wei.w.wang@intel.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 14 +++++++++++
 arch/x86/kvm/pmu.c              | 55 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/pmu.h              |  2 ++
 arch/x86/kvm/pmu_amd.c          |  1 +
 arch/x86/kvm/vmx/pmu_intel.c    |  6 +++++
 arch/x86/kvm/x86.c              |  6 +++++
 6 files changed, 84 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a87a6c98adee..20bb2fc0883a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -473,6 +473,20 @@ struct kvm_pmu {
 	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
 	struct irq_work irq_work;
 	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+
+	/*
+	 * The gate to release perf_events not marked in
+	 * pmc_in_use only once in a vcpu time slice.
+	 */
+	bool need_cleanup;
+
+	/*
+	 * The total number of programmed perf_events and it helps to avoid
+	 * redundant check before cleanup if guest don't use vPMU at all.
+	 */
+	u8 event_count;
 };
 
 struct kvm_pmu_ops;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 99565de5410a..d5e6d5b3f06f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -135,6 +135,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	}
 
 	pmc->perf_event = event;
+	pmc_to_pmu(pmc)->event_count++;
 	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
 }
 
@@ -304,6 +305,14 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 
 		reprogram_counter(pmu, bit);
 	}
+
+	/*
+	 * Unused perf_events are only released if the corresponding MSRs
+	 * weren't accessed during the last vCPU time slice. kvm_arch_sched_in
+	 * triggers KVM_REQ_PMU if cleanup is needed.
+	 */
+	if (unlikely(pmu->need_cleanup))
+		kvm_pmu_cleanup(vcpu);
 }
 
 /* check if idx is a valid index to access PMU */
@@ -379,6 +388,15 @@ bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 		kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
 }
 
+static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr);
+
+	if (pmc)
+		__set_bit(pmc->idx, pmu->pmc_in_use);
+}
+
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 {
 	return kvm_x86_ops->pmu_ops->get_msr(vcpu, msr, data);
@@ -386,6 +404,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
+	kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
 	return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info);
 }
 
@@ -413,9 +432,45 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
 	memset(pmu, 0, sizeof(*pmu));
 	kvm_x86_ops->pmu_ops->init(vcpu);
 	init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
+	pmu->event_count = 0;
+	pmu->need_cleanup = false;
 	kvm_pmu_refresh(vcpu);
 }
 
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+	if (pmc_is_fixed(pmc))
+		return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+			pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+
+	return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
+}
+
+/* Release perf_events for vPMCs that have been unused for a full time slice.  */
+void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc = NULL;
+	DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
+	int i;
+
+	pmu->need_cleanup = false;
+
+	bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
+		      pmu->pmc_in_use, X86_PMC_IDX_MAX);
+
+	for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
+		pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, i);
+
+		if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
+			pmc_stop_counter(pmc);
+	}
+
+	bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
+}
+
 void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
 {
 	kvm_pmu_reset(vcpu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7eba298587dc..b7a625874203 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -62,6 +62,7 @@ static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
 		perf_event_release_kernel(pmc->perf_event);
 		pmc->perf_event = NULL;
 		pmc->current_config = 0;
+		pmc_to_pmu(pmc)->event_count--;
 	}
 }
 
@@ -126,6 +127,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
 void kvm_pmu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_init(struct kvm_vcpu *vcpu);
+void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
 void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
 
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index e87d34136047..ce0b10fe5e2b 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -279,6 +279,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
 	pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
 	pmu->nr_arch_fixed_counters = 0;
 	pmu->global_status = 0;
+	bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
 }
 
 static void amd_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 9b1ddc42f604..b5a16379f534 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -46,6 +46,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 		if (old_ctrl == new_ctrl)
 			continue;
 
+		__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
 		reprogram_fixed_counter(pmc, new_ctrl, i);
 	}
 
@@ -329,6 +330,11 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	    (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
 	    (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
 		pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
+
+	bitmap_set(pmu->all_valid_pmc_idx,
+		0, pmu->nr_arch_gp_counters);
+	bitmap_set(pmu->all_valid_pmc_idx,
+		INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
 }
 
 static void intel_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 72189160bb81..8db7275d313f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9455,7 +9455,13 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
 	vcpu->arch.l1tf_flush_l1d = true;
+	if (pmu->version && unlikely(pmu->event_count)) {
+		pmu->need_cleanup = true;
+		kvm_make_request(KVM_REQ_PMU, vcpu);
+	}
 	kvm_x86_ops->sched_in(vcpu, cpu);
 }
 
-- 
cgit v1.2.3


From 1924242b2abadfb1144c3c22083fd6f71caadd64 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Tue, 12 Nov 2019 20:33:00 +0200
Subject: KVM: x86: Optimization: Requst TLB flush in fast_cr3_switch() instead
 of do it directly

When KVM emulates a nested VMEntry (L1->L2 VMEntry), it switches mmu root
page. If nEPT is used, this will happen from
kvm_init_shadow_ept_mmu()->__kvm_mmu_new_cr3() and otherwise it will
happpen from nested_vmx_load_cr3()->kvm_mmu_new_cr3(). Either case,
__kvm_mmu_new_cr3() will use fast_cr3_switch() in attempt to switch to a
previously cached root page.

In case fast_cr3_switch() finds a matching cached root page, it will
set it in mmu->root_hpa and request KVM_REQ_LOAD_CR3 such that on
next entry to guest, KVM will set root HPA in appropriate hardware
fields (e.g. vmcs->eptp). In addition, fast_cr3_switch() calls
kvm_x86_ops->tlb_flush() in order to flush TLB as MMU root page
was replaced.

This works as mmu->root_hpa, which vmx_flush_tlb() use, was
already replaced in cached_root_available(). However, this may
result in unnecessary INVEPT execution because a KVM_REQ_TLB_FLUSH
may have already been requested. For example, by prepare_vmcs02()
in case L1 don't use VPID.

Therefore, change fast_cr3_switch() to just request TLB flush on
next entry to guest.

Reviewed-by: Bhavesh Davda <bhavesh.davda@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 24c23c66b226..150d982ec1d2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4295,7 +4295,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
 			kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
 			if (!skip_tlb_flush) {
 				kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-				kvm_x86_ops->tlb_flush(vcpu, true);
+				kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 			}
 
 			/*
-- 
cgit v1.2.3


From 9477f4449b0b011ce1d058c09ec450bfcdaab784 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 13 Nov 2019 16:17:15 -0800
Subject: KVM: VMX: Add helper to check reserved bits in IA32_PERF_GLOBAL_CTRL

Create a helper function to check the validity of a proposed value for
IA32_PERF_GLOBAL_CTRL from the existing check in intel_pmu_set_msr().

Per Intel's SDM, the reserved bits in IA32_PERF_GLOBAL_CTRL must be
cleared for the corresponding host/guest state fields.

Suggested-by: Jim Mattson <jmattson@google.com>
Co-developed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/pmu.h           | 6 ++++++
 arch/x86/kvm/vmx/pmu_intel.c | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index b7a625874203..7ebb62326c14 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -89,6 +89,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
 	return kvm_x86_ops->pmu_ops->pmc_is_enabled(pmc);
 }
 
+static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu,
+						 u64 data)
+{
+	return !(pmu->global_ctrl_mask & data);
+}
+
 /* returns general purpose PMC with the specified MSR. Note that it can be
  * used for both PERFCTRn and EVNTSELn; that is why it accepts base as a
  * paramenter to tell them apart.
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index b5a16379f534..0990a12a76a8 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -236,7 +236,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_CORE_PERF_GLOBAL_CTRL:
 		if (pmu->global_ctrl == data)
 			return 0;
-		if (!(data & pmu->global_ctrl_mask)) {
+		if (kvm_valid_perf_global_ctrl(pmu, data)) {
 			global_ctrl_changed(pmu, data);
 			return 0;
 		}
-- 
cgit v1.2.3


From bfc6ad6ab3563b4151bbcfe162c612930a3e0854 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 13 Nov 2019 16:17:16 -0800
Subject: KVM: nVMX: Check GUEST_IA32_PERF_GLOBAL_CTRL on VM-Entry

Add condition to nested_vmx_check_guest_state() to check the validity of
GUEST_IA32_PERF_GLOBAL_CTRL. Per Intel's SDM Vol 3 26.3.1.1:

  If the "load IA32_PERF_GLOBAL_CTRL" VM-entry control is 1, bits
  reserved in the IA32_PERF_GLOBAL_CTRL MSR must be 0 in the field for that
  register.

Suggested-by: Jim Mattson <jmattson@google.com>
Co-developed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bdb9b3028250..9190da3579c4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -10,6 +10,7 @@
 #include "hyperv.h"
 #include "mmu.h"
 #include "nested.h"
+#include "pmu.h"
 #include "trace.h"
 #include "x86.h"
 
@@ -2790,6 +2791,11 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 	}
 
+	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+					   vmcs12->guest_ia32_perf_global_ctrl)))
+		return -EINVAL;
+
 	/*
 	 * If the load IA32_EFER VM-entry control is 1, the following checks
 	 * are performed on the field for the IA32_EFER MSR:
-- 
cgit v1.2.3


From c547cb6f78cf5dc8f029459b115ef44c56a2a776 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 13 Nov 2019 16:17:17 -0800
Subject: KVM: nVMX: Check HOST_IA32_PERF_GLOBAL_CTRL on VM-Entry

Add a consistency check on nested vm-entry for host's
IA32_PERF_GLOBAL_CTRL from vmcs12. Per Intel's SDM Vol 3 26.2.2:

  If the "load IA32_PERF_GLOBAL_CTRL"
  VM-exit control is 1, bits reserved in the IA32_PERF_GLOBAL_CTRL
  MSR must be 0 in the field for that register"

Suggested-by: Jim Mattson <jmattson@google.com>
Co-developed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 9190da3579c4..ac896e92de23 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2676,6 +2676,11 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 	    CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
 		return -EINVAL;
 
+	if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+					   vmcs12->host_ia32_perf_global_ctrl)))
+		return -EINVAL;
+
 #ifdef CONFIG_X86_64
 	ia32e = !!(vcpu->arch.efer & EFER_LMA);
 #else
-- 
cgit v1.2.3


From 458151f65b4d8acfc7403b59fd9694ca15dbfe2e Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 13 Nov 2019 16:17:18 -0800
Subject: KVM: nVMX: Use kvm_set_msr to load IA32_PERF_GLOBAL_CTRL on VM-Exit

The existing implementation for loading the IA32_PERF_GLOBAL_CTRL MSR
on VM-exit was incorrect, as the next call to atomic_switch_perf_msrs()
could cause this value to be overwritten. Instead, call kvm_set_msr()
which will allow atomic_switch_perf_msrs() to correctly set the values.

Define a macro, SET_MSR_OR_WARN(), to set the MSR with kvm_set_msr()
and WARN on failure.

Suggested-by: Jim Mattson <jmattson@google.com>
Co-developed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ac896e92de23..75b7091e4a88 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -28,6 +28,16 @@ module_param(nested_early_check, bool, S_IRUGO);
 	failed;								\
 })
 
+#define SET_MSR_OR_WARN(vcpu, idx, data)				\
+({									\
+	bool failed = kvm_set_msr(vcpu, idx, data);			\
+	if (failed)							\
+		pr_warn_ratelimited(					\
+				"%s cannot write MSR (0x%x, 0x%llx)\n",	\
+				__func__, idx, data);			\
+	failed;								\
+})
+
 /*
  * Hyper-V requires all of these, so mark them as supported even though
  * they are just treated the same as all-context.
@@ -3879,8 +3889,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 		vcpu->arch.pat = vmcs12->host_ia32_pat;
 	}
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
-		vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
-			vmcs12->host_ia32_perf_global_ctrl);
+		SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+				vmcs12->host_ia32_perf_global_ctrl);
 
 	/* Set L1 segment info according to Intel SDM
 	    27.5.2 Loading Host Segment and Descriptor-Table Registers */
-- 
cgit v1.2.3


From 71f7347025bf10f5c0b48e149898df57b7f3d414 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 13 Nov 2019 16:17:19 -0800
Subject: KVM: nVMX: Load GUEST_IA32_PERF_GLOBAL_CTRL MSR on VM-Entry

Add condition to prepare_vmcs02 which loads IA32_PERF_GLOBAL_CTRL on
VM-entry if the "load IA32_PERF_GLOBAL_CTRL" bit on the VM-entry control
is set. Use SET_MSR_OR_WARN() rather than directly writing to the field
to avoid overwrite by atomic_switch_perf_msrs().

Suggested-by: Jim Mattson <jmattson@google.com>
Co-developed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 75b7091e4a88..abef0dbe94bb 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2452,6 +2452,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	if (!enable_ept)
 		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
 
+	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+	    SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+			    vmcs12->guest_ia32_perf_global_ctrl))
+		return -EINVAL;
+
 	kvm_rsp_write(vcpu, vmcs12->guest_rsp);
 	kvm_rip_write(vcpu, vmcs12->guest_rip);
 	return 0;
-- 
cgit v1.2.3


From 03a8871add95213827e2bea84c12133ae5df952e Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 13 Nov 2019 16:17:20 -0800
Subject: KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL VM-{Entry,Exit} control

The "load IA32_PERF_GLOBAL_CTRL" bit for VM-entry and VM-exit should
only be exposed to the guest if IA32_PERF_GLOBAL_CTRL is a valid MSR.
Create a new helper to allow pmu_refresh() to update the VM-Entry and
VM-Exit controls to ensure PMU values are initialized when performing
the is_valid_msr() check.

Suggested-by: Jim Mattson <jmattson@google.com>
Co-developed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c    | 21 +++++++++++++++++++++
 arch/x86/kvm/vmx/nested.h    |  1 +
 arch/x86/kvm/vmx/pmu_intel.c |  3 +++
 3 files changed, 25 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index abef0dbe94bb..c6f5e5821d4c 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4359,6 +4359,27 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
 	return 0;
 }
 
+void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx;
+
+	if (!nested_vmx_allowed(vcpu))
+		return;
+
+	vmx = to_vmx(vcpu);
+	if (kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
+		vmx->nested.msrs.entry_ctls_high |=
+				VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+		vmx->nested.msrs.exit_ctls_high |=
+				VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+	} else {
+		vmx->nested.msrs.entry_ctls_high &=
+				~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+		vmx->nested.msrs.exit_ctls_high &=
+				~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+	}
+}
+
 static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
 {
 	gva_t gva;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 4cf1d40da15f..19e6015722a9 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -22,6 +22,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
 int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
 			u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
+void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
 
 static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 0990a12a76a8..7023138b1cb0 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -15,6 +15,7 @@
 #include "x86.h"
 #include "cpuid.h"
 #include "lapic.h"
+#include "nested.h"
 #include "pmu.h"
 
 static struct kvm_event_hw_type_mapping intel_arch_events[] = {
@@ -335,6 +336,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 		0, pmu->nr_arch_gp_counters);
 	bitmap_set(pmu->all_valid_pmc_idx,
 		INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
+
+	nested_vmx_pmu_entry_exit_ctls_update(vcpu);
 }
 
 static void intel_pmu_init(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From d4069dbeb51e34e1db0458a7455e509daaaa529a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 15 Nov 2019 11:36:10 +0100
Subject: KVM: nVMX: mark functions in the header as "static inline"

Correct a small inaccuracy in the shattering of vmx.c, which becomes
visible now that pmu_intel.c includes nested.h.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 19e6015722a9..b9e519840f28 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -246,7 +246,7 @@ static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
 	return ((val & fixed1) | fixed0) == val;
 }
 
-static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
@@ -260,7 +260,7 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 	return fixed_bits_valid(val, fixed0, fixed1);
 }
 
-static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
@@ -268,7 +268,7 @@ static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 	return fixed_bits_valid(val, fixed0, fixed1);
 }
 
-static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
-- 
cgit v1.2.3


From 365d3d55d6019233c02d68dbd3d2dfde1b8a1467 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Thu, 7 Nov 2019 21:14:36 -0800
Subject: kvm: nested: Introduce read_and_check_msr_entry()

Add the function read_and_check_msr_entry() which just pulls some code
out of nested_vmx_store_msr().  This will be useful as reusable code in
upcoming patches.

Reviewed-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c6f5e5821d4c..3ef529cc72fb 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -940,6 +940,26 @@ fail:
 	return i + 1;
 }
 
+static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
+				     struct vmx_msr_entry *e)
+{
+	if (kvm_vcpu_read_guest(vcpu,
+				gpa + i * sizeof(*e),
+				e, 2 * sizeof(u32))) {
+		pr_debug_ratelimited(
+			"%s cannot read MSR entry (%u, 0x%08llx)\n",
+			__func__, i, gpa + i * sizeof(*e));
+		return false;
+	}
+	if (nested_vmx_store_msr_check(vcpu, e)) {
+		pr_debug_ratelimited(
+			"%s check failed (%u, 0x%x, 0x%x)\n",
+			__func__, i, e->index, e->reserved);
+		return false;
+	}
+	return true;
+}
+
 static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 {
 	u64 data;
@@ -951,20 +971,9 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 		if (unlikely(i >= max_msr_list_size))
 			return -EINVAL;
 
-		if (kvm_vcpu_read_guest(vcpu,
-					gpa + i * sizeof(e),
-					&e, 2 * sizeof(u32))) {
-			pr_debug_ratelimited(
-				"%s cannot read MSR entry (%u, 0x%08llx)\n",
-				__func__, i, gpa + i * sizeof(e));
+		if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
 			return -EINVAL;
-		}
-		if (nested_vmx_store_msr_check(vcpu, &e)) {
-			pr_debug_ratelimited(
-				"%s check failed (%u, 0x%x, 0x%x)\n",
-				__func__, i, e.index, e.reserved);
-			return -EINVAL;
-		}
+
 		if (kvm_get_msr(vcpu, e.index, &data)) {
 			pr_debug_ratelimited(
 				"%s cannot read MSR (%u, 0x%x)\n",
-- 
cgit v1.2.3


From 7cfe0526fd379e4ff9c3dcf933c1966a3a635013 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Thu, 7 Nov 2019 21:14:37 -0800
Subject: kvm: vmx: Rename NR_AUTOLOAD_MSRS to NR_LOADSTORE_MSRS

Rename NR_AUTOLOAD_MSRS to NR_LOADSTORE_MSRS.  This needs to be done
due to the addition of the MSR-autostore area that will be added in a
future patch.  After that the name AUTOLOAD will no longer make sense.

Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 4 ++--
 arch/x86/kvm/vmx/vmx.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 765086756177..ba0124e66db7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -938,8 +938,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 	if (!entry_only)
 		j = find_msr(&m->host, msr);
 
-	if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) ||
-		(j < 0 &&  m->host.nr == NR_AUTOLOAD_MSRS)) {
+	if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
+		(j < 0 &&  m->host.nr == NR_LOADSTORE_MSRS)) {
 		printk_once(KERN_WARNING "Not enough msr switch entries. "
 				"Can't add msr %x\n", msr);
 		return;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 43331dfafffe..73ff03091d29 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -22,11 +22,11 @@ extern u32 get_umwait_control_msr(void);
 
 #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
 
-#define NR_AUTOLOAD_MSRS 8
+#define NR_LOADSTORE_MSRS 8
 
 struct vmx_msrs {
 	unsigned int		nr;
-	struct vmx_msr_entry	val[NR_AUTOLOAD_MSRS];
+	struct vmx_msr_entry	val[NR_LOADSTORE_MSRS];
 };
 
 struct shared_msr_entry {
-- 
cgit v1.2.3


From ef0fbcac3f2aadb10d9a6c461eabc7dd01cbed9b Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Thu, 7 Nov 2019 21:14:38 -0800
Subject: kvm: vmx: Rename function find_msr() to vmx_find_msr_index()

Rename function find_msr() to vmx_find_msr_index() in preparation for an
upcoming patch where we export it and use it in nested.c.

Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ba0124e66db7..7b191963dde1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -833,7 +833,7 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 	vm_exit_controls_clearbit(vmx, exit);
 }
 
-static int find_msr(struct vmx_msrs *m, unsigned int msr)
+static int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
 {
 	unsigned int i;
 
@@ -867,7 +867,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 		}
 		break;
 	}
-	i = find_msr(&m->guest, msr);
+	i = vmx_find_msr_index(&m->guest, msr);
 	if (i < 0)
 		goto skip_guest;
 	--m->guest.nr;
@@ -875,7 +875,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
 
 skip_guest:
-	i = find_msr(&m->host, msr);
+	i = vmx_find_msr_index(&m->host, msr);
 	if (i < 0)
 		return;
 
@@ -934,9 +934,9 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 	}
 
-	i = find_msr(&m->guest, msr);
+	i = vmx_find_msr_index(&m->guest, msr);
 	if (!entry_only)
-		j = find_msr(&m->host, msr);
+		j = vmx_find_msr_index(&m->host, msr);
 
 	if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
 		(j < 0 &&  m->host.nr == NR_LOADSTORE_MSRS)) {
-- 
cgit v1.2.3


From 662f1d1d19317e792ccfc53dee625c02dcefac58 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Thu, 7 Nov 2019 21:14:39 -0800
Subject: KVM: nVMX: Add support for capturing highest observable L2 TSC

The L1 hypervisor may include the IA32_TIME_STAMP_COUNTER MSR in the
vmcs12 MSR VM-exit MSR-store area as a way of determining the highest
TSC value that might have been observed by L2 prior to VM-exit. The
current implementation does not capture a very tight bound on this
value.  To tighten the bound, add the IA32_TIME_STAMP_COUNTER MSR to the
vmcs02 VM-exit MSR-store area whenever it appears in the vmcs12 VM-exit
MSR-store area.  When L0 processes the vmcs12 VM-exit MSR-store area
during the emulation of an L2->L1 VM-exit, special-case the
IA32_TIME_STAMP_COUNTER MSR, using the value stored in the vmcs02
VM-exit MSR-store area to derive the value to be stored in the vmcs12
VM-exit MSR-store area.

Reviewed-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 101 +++++++++++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/vmx/vmx.c    |   2 +-
 arch/x86/kvm/vmx/vmx.h    |   5 +++
 3 files changed, 101 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 3ef529cc72fb..60d42ce42403 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -940,6 +940,37 @@ fail:
 	return i + 1;
 }
 
+static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
+					    u32 msr_index,
+					    u64 *data)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	/*
+	 * If the L0 hypervisor stored a more accurate value for the TSC that
+	 * does not include the time taken for emulation of the L2->L1
+	 * VM-exit in L0, use the more accurate value.
+	 */
+	if (msr_index == MSR_IA32_TSC) {
+		int index = vmx_find_msr_index(&vmx->msr_autostore.guest,
+					       MSR_IA32_TSC);
+
+		if (index >= 0) {
+			u64 val = vmx->msr_autostore.guest.val[index].value;
+
+			*data = kvm_read_l1_tsc(vcpu, val);
+			return true;
+		}
+	}
+
+	if (kvm_get_msr(vcpu, msr_index, data)) {
+		pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
+			msr_index);
+		return false;
+	}
+	return true;
+}
+
 static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
 				     struct vmx_msr_entry *e)
 {
@@ -974,12 +1005,9 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 		if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
 			return -EINVAL;
 
-		if (kvm_get_msr(vcpu, e.index, &data)) {
-			pr_debug_ratelimited(
-				"%s cannot read MSR (%u, 0x%x)\n",
-				__func__, i, e.index);
+		if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
 			return -EINVAL;
-		}
+
 		if (kvm_vcpu_write_guest(vcpu,
 					 gpa + i * sizeof(e) +
 					     offsetof(struct vmx_msr_entry, value),
@@ -993,6 +1021,60 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 	return 0;
 }
 
+static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	u32 count = vmcs12->vm_exit_msr_store_count;
+	u64 gpa = vmcs12->vm_exit_msr_store_addr;
+	struct vmx_msr_entry e;
+	u32 i;
+
+	for (i = 0; i < count; i++) {
+		if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
+			return false;
+
+		if (e.index == msr_index)
+			return true;
+	}
+	return false;
+}
+
+static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
+					   u32 msr_index)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
+	bool in_vmcs12_store_list;
+	int msr_autostore_index;
+	bool in_autostore_list;
+	int last;
+
+	msr_autostore_index = vmx_find_msr_index(autostore, msr_index);
+	in_autostore_list = msr_autostore_index >= 0;
+	in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
+
+	if (in_vmcs12_store_list && !in_autostore_list) {
+		if (autostore->nr == NR_LOADSTORE_MSRS) {
+			/*
+			 * Emulated VMEntry does not fail here.  Instead a less
+			 * accurate value will be returned by
+			 * nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
+			 * instead of reading the value from the vmcs02 VMExit
+			 * MSR-store area.
+			 */
+			pr_warn_ratelimited(
+				"Not enough msr entries in msr_autostore.  Can't add msr %x\n",
+				msr_index);
+			return;
+		}
+		last = autostore->nr++;
+		autostore->val[last].index = msr_index;
+	} else if (!in_vmcs12_store_list && in_autostore_list) {
+		last = --autostore->nr;
+		autostore->val[msr_autostore_index] = autostore->val[last];
+	}
+}
+
 static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	unsigned long invalid_mask;
@@ -2038,7 +2120,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
 	 * addresses are constant (for vmcs02), the counts can change based
 	 * on L2's behavior, e.g. switching to/from long mode.
 	 */
-	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
 	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
 
@@ -2306,6 +2388,13 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 		vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
 	}
 
+	/*
+	 * Make sure the msr_autostore list is up to date before we set the
+	 * count in the vmcs02.
+	 */
+	prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
+
+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7b191963dde1..621142e55e28 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -833,7 +833,7 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 	vm_exit_controls_clearbit(vmx, exit);
 }
 
-static int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
+int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
 {
 	unsigned int i;
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 73ff03091d29..90b97d9d4f7d 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -233,6 +233,10 @@ struct vcpu_vmx {
 		struct vmx_msrs host;
 	} msr_autoload;
 
+	struct msr_autostore {
+		struct vmx_msrs guest;
+	} msr_autostore;
+
 	struct {
 		int vm86_active;
 		ulong save_rflags;
@@ -337,6 +341,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
 struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
 void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
 void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+int vmx_find_msr_index(struct vmx_msrs *m, u32 msr);
 
 #define POSTED_INTR_ON  0
 #define POSTED_INTR_SN  1
-- 
cgit v1.2.3


From 7ee30bc132c683d06a6d9e360e39e483e3990708 Mon Sep 17 00:00:00 2001
From: Nitesh Narayan Lal <nitesh@redhat.com>
Date: Thu, 7 Nov 2019 07:53:43 -0500
Subject: KVM: x86: deliver KVM IOAPIC scan request to target vCPUs

In IOAPIC fixed delivery mode instead of flushing the scan
requests to all vCPUs, we should only send the requests to
vCPUs specified within the destination field.

This patch introduces kvm_get_dest_vcpus_mask() API which
retrieves an array of target vCPUs by using
kvm_apic_map_get_dest_lapic() and then based on the
vcpus_idx, it sets the bit in a bitmap. However, if the above
fails kvm_get_dest_vcpus_mask() finds the target vCPUs by
traversing all available vCPUs. Followed by setting the
bits in the bitmap.

If we had different vCPUs in the previous request for the
same redirection table entry then bits corresponding to
these vCPUs are also set. This to done to keep
ioapic_handled_vectors synchronized.

This bitmap is then eventually passed on to
kvm_make_vcpus_request_mask() to generate a masked request
only for the target vCPUs.

This would enable us to reduce the latency overhead on isolated
vCPUs caused by the IPI to process due to KVM_REQ_IOAPIC_SCAN.

Suggested-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/ioapic.c           | 33 +++++++++++++++++++++++++++++--
 arch/x86/kvm/lapic.c            | 44 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.h            |  3 +++
 arch/x86/kvm/x86.c              | 14 +++++++++++++
 include/linux/kvm_host.h        |  2 ++
 6 files changed, 96 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 20bb2fc0883a..898ab9eb4dc8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1588,6 +1588,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
 void kvm_make_scan_ioapic_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+				       unsigned long *vcpu_bitmap);
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index d859ae8890d0..ce30ef23c86b 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -271,8 +271,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 {
 	unsigned index;
 	bool mask_before, mask_after;
-	int old_remote_irr, old_delivery_status;
 	union kvm_ioapic_redirect_entry *e;
+	unsigned long vcpu_bitmap;
+	int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode;
 
 	switch (ioapic->ioregsel) {
 	case IOAPIC_REG_VERSION:
@@ -296,6 +297,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 		/* Preserve read-only fields */
 		old_remote_irr = e->fields.remote_irr;
 		old_delivery_status = e->fields.delivery_status;
+		old_dest_id = e->fields.dest_id;
+		old_dest_mode = e->fields.dest_mode;
 		if (ioapic->ioregsel & 1) {
 			e->bits &= 0xffffffff;
 			e->bits |= (u64) val << 32;
@@ -321,7 +324,33 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
 		    && ioapic->irr & (1 << index))
 			ioapic_service(ioapic, index, false);
-		kvm_make_scan_ioapic_request(ioapic->kvm);
+		if (e->fields.delivery_mode == APIC_DM_FIXED) {
+			struct kvm_lapic_irq irq;
+
+			irq.shorthand = 0;
+			irq.vector = e->fields.vector;
+			irq.delivery_mode = e->fields.delivery_mode << 8;
+			irq.dest_id = e->fields.dest_id;
+			irq.dest_mode = e->fields.dest_mode;
+			kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+						 &vcpu_bitmap);
+			if (old_dest_mode != e->fields.dest_mode ||
+			    old_dest_id != e->fields.dest_id) {
+				/*
+				 * Update vcpu_bitmap with vcpus specified in
+				 * the previous request as well. This is done to
+				 * keep ioapic_handled_vectors synchronized.
+				 */
+				irq.dest_id = old_dest_id;
+				irq.dest_mode = old_dest_mode;
+				kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+							 &vcpu_bitmap);
+			}
+			kvm_make_scan_ioapic_request_mask(ioapic->kvm,
+							  &vcpu_bitmap);
+		} else {
+			kvm_make_scan_ioapic_request(ioapic->kvm);
+		}
 		break;
 	}
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 60fb21fe7f42..452cedd6382b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1122,6 +1122,50 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	return result;
 }
 
+/*
+ * This routine identifies the destination vcpus mask meant to receive the
+ * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
+ * out the destination vcpus array and set the bitmap or it traverses to
+ * each available vcpu to identify the same.
+ */
+void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			      unsigned long *vcpu_bitmap)
+{
+	struct kvm_lapic **dest_vcpu = NULL;
+	struct kvm_lapic *src = NULL;
+	struct kvm_apic_map *map;
+	struct kvm_vcpu *vcpu;
+	unsigned long bitmap;
+	int i, vcpu_idx;
+	bool ret;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
+					  &bitmap);
+	if (ret) {
+		for_each_set_bit(i, &bitmap, 16) {
+			if (!dest_vcpu[i])
+				continue;
+			vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
+			__set_bit(vcpu_idx, vcpu_bitmap);
+		}
+	} else {
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			if (!kvm_apic_present(vcpu))
+				continue;
+			if (!kvm_apic_match_dest(vcpu, NULL,
+						 irq->delivery_mode,
+						 irq->dest_id,
+						 irq->dest_mode))
+				continue;
+			__set_bit(i, vcpu_bitmap);
+		}
+	}
+	rcu_read_unlock();
+}
+
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 {
 	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2aad7e226fc0..c1d77436126a 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -226,6 +226,9 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
 
+void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			      unsigned long *vcpu_bitmap);
+
 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8db7275d313f..991dd01ba08b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7874,6 +7874,20 @@ static void process_smi(struct kvm_vcpu *vcpu)
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+				       unsigned long *vcpu_bitmap)
+{
+	cpumask_var_t cpus;
+	bool called;
+
+	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+
+	called = kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
+					     vcpu_bitmap, cpus);
+
+	free_cpumask_var(cpus);
+}
+
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
 	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 70b2296fb2ae..bfe6c6729988 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -786,6 +786,8 @@ void kvm_reload_remote_mmus(struct kvm *kvm);
 bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
 				 unsigned long *vcpu_bitmap, cpumask_var_t tmp);
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
+bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
+				unsigned long *vcpu_bitmap);
 
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg);
-- 
cgit v1.2.3


From fcf35131396ace1339e2ca89b45a6b12eed17105 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 14 Nov 2019 16:18:20 +0100
Subject: s390/bpf: Make sure JIT passes do not increase code size

The upcoming s390 branch length extension patches rely on "passes do
not increase code size" property in order to consistently choose between
short and long branches. Currently this property does not hold between
the first and the second passes for register save/restore sequences, as
well as various code fragments that depend on SEEN_* flags.

Generate the code during the first pass conservatively: assume register
save/restore sequences have the maximum possible length, and that all
SEEN_* flags are set.

Also refuse to JIT if this happens anyway (e.g. due to a bug), as this
might lead to verifier bypass once long branches are introduced.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20191114151820.53222-1-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 74 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 66 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 1115071c8ff7..7bddb27c81e3 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -304,6 +304,24 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 	}							\
 })
 
+/*
+ * Return whether this is the first pass. The first pass is special, since we
+ * don't know any sizes yet, and thus must be conservative.
+ */
+static bool is_first_pass(struct bpf_jit *jit)
+{
+	return jit->size == 0;
+}
+
+/*
+ * Return whether this is the code generation pass. The code generation pass is
+ * special, since we should change as little as possible.
+ */
+static bool is_codegen_pass(struct bpf_jit *jit)
+{
+	return jit->prg_buf;
+}
+
 /*
  * Fill whole space with illegal instructions
  */
@@ -381,9 +399,18 @@ static int get_end(struct bpf_jit *jit, int start)
  */
 static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
 {
-
+	const int last = 15, save_restore_size = 6;
 	int re = 6, rs;
 
+	if (is_first_pass(jit)) {
+		/*
+		 * We don't know yet which registers are used. Reserve space
+		 * conservatively.
+		 */
+		jit->prg += (last - re + 1) * save_restore_size;
+		return;
+	}
+
 	do {
 		rs = get_start(jit, re);
 		if (!rs)
@@ -394,7 +421,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
 		else
 			restore_regs(jit, rs, re, stack_depth);
 		re++;
-	} while (re <= 15);
+	} while (re <= last);
 }
 
 /*
@@ -418,21 +445,21 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 	/* Save registers */
 	save_restore_regs(jit, REGS_SAVE, stack_depth);
 	/* Setup literal pool */
-	if (jit->seen & SEEN_LITERAL) {
+	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
 		/* basr %r13,0 */
 		EMIT2(0x0d00, REG_L, REG_0);
 		jit->base_ip = jit->prg;
 	}
 	/* Setup stack and backchain */
-	if (jit->seen & SEEN_STACK) {
-		if (jit->seen & SEEN_FUNC)
+	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
 			/* lgr %w1,%r15 (backchain) */
 			EMIT4(0xb9040000, REG_W1, REG_15);
 		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
 		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
 		/* aghi %r15,-STK_OFF */
 		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
-		if (jit->seen & SEEN_FUNC)
+		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
 			/* stg %w1,152(%r15) (backchain) */
 			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
 				      REG_15, 152);
@@ -468,7 +495,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
 	_EMIT2(0x07fe);
 
 	if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
-	    (jit->seen & SEEN_FUNC)) {
+	    (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
 		jit->r1_thunk_ip = jit->prg;
 		/* Generate __s390_indirect_jump_r1 thunk */
 		if (test_facility(35)) {
@@ -1275,6 +1302,34 @@ branch_oc:
 	return insn_count;
 }
 
+/*
+ * Return whether new i-th instruction address does not violate any invariant
+ */
+static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
+{
+	/* On the first pass anything goes */
+	if (is_first_pass(jit))
+		return true;
+
+	/* The codegen pass must not change anything */
+	if (is_codegen_pass(jit))
+		return jit->addrs[i] == jit->prg;
+
+	/* Passes in between must not increase code size */
+	return jit->addrs[i] >= jit->prg;
+}
+
+/*
+ * Update the address of i-th instruction
+ */
+static int bpf_set_addr(struct bpf_jit *jit, int i)
+{
+	if (!bpf_is_new_addr_sane(jit, i))
+		return -1;
+	jit->addrs[i] = jit->prg;
+	return 0;
+}
+
 /*
  * Compile eBPF program into s390x code
  */
@@ -1287,12 +1342,15 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
 	jit->prg = 0;
 
 	bpf_jit_prologue(jit, fp->aux->stack_depth);
+	if (bpf_set_addr(jit, 0) < 0)
+		return -1;
 	for (i = 0; i < fp->len; i += insn_count) {
 		insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
 		if (insn_count < 0)
 			return -1;
 		/* Next instruction address */
-		jit->addrs[i + insn_count] = jit->prg;
+		if (bpf_set_addr(jit, i + insn_count) < 0)
+			return -1;
 	}
 	bpf_jit_epilogue(jit, fp->aux->stack_depth);
 
-- 
cgit v1.2.3


From c3d6324f841bab2403be6419986e2b1d1068d423 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 5 Jun 2019 10:48:37 +0200
Subject: x86/alternatives: Teach text_poke_bp() to emulate instructions

In preparation for static_call and variable size jump_label support,
teach text_poke_bp() to emulate instructions, namely:

  JMP32, JMP8, CALL, NOP2, NOP_ATOMIC5, INT3

The current text_poke_bp() takes a @handler argument which is used as
a jump target when the temporary INT3 is hit by a different CPU.

When patching CALL instructions, this doesn't work because we'd miss
the PUSH of the return address. Instead, teach poke_int3_handler() to
emulate an instruction, typically the instruction we're patching in.

This fits almost all text_poke_bp() users, except
arch_unoptimize_kprobe() which restores random text, and for that site
we have to build an explicit emulate instruction.

Tested-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20191111132457.529086974@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 8c7eebc10687af45ac8e40ad1bac0cf7893dba9f)
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/include/asm/text-patching.h |  24 +++++--
 arch/x86/kernel/alternative.c        | 132 +++++++++++++++++++++++++++--------
 arch/x86/kernel/jump_label.c         |   9 +--
 arch/x86/kernel/kprobes/opt.c        |  11 ++-
 4 files changed, 130 insertions(+), 46 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 5e8319bb207a..23c626a742e8 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -26,10 +26,11 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
 #define POKE_MAX_OPCODE_SIZE	5
 
 struct text_poke_loc {
-	void *detour;
 	void *addr;
-	size_t len;
-	const char opcode[POKE_MAX_OPCODE_SIZE];
+	int len;
+	s32 rel32;
+	u8 opcode;
+	const u8 text[POKE_MAX_OPCODE_SIZE];
 };
 
 extern void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
 extern int poke_int3_handler(struct pt_regs *regs);
-extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
 extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
+extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+			       const void *opcode, size_t len, const void *emulate);
 extern int after_bootmem;
 extern __ro_after_init struct mm_struct *poking_mm;
 extern __ro_after_init unsigned long poking_addr;
@@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
 	regs->ip = ip;
 }
 
-#define INT3_INSN_SIZE 1
-#define CALL_INSN_SIZE 5
+#define INT3_INSN_SIZE		1
+#define INT3_INSN_OPCODE	0xCC
+
+#define CALL_INSN_SIZE		5
+#define CALL_INSN_OPCODE	0xE8
+
+#define JMP32_INSN_SIZE		5
+#define JMP32_INSN_OPCODE	0xE9
+
+#define JMP8_INSN_SIZE		2
+#define JMP8_INSN_OPCODE	0xEB
 
 static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
 {
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9d3a971ea364..9ec463fe96f2 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp);
 int poke_int3_handler(struct pt_regs *regs)
 {
 	struct text_poke_loc *tp;
-	unsigned char int3 = 0xcc;
 	void *ip;
 
 	/*
 	 * Having observed our INT3 instruction, we now must observe
 	 * bp_patching.nr_entries.
 	 *
-	 * 	nr_entries != 0			INT3
-	 * 	WMB				RMB
-	 * 	write INT3			if (nr_entries)
+	 *	nr_entries != 0			INT3
+	 *	WMB				RMB
+	 *	write INT3			if (nr_entries)
 	 *
 	 * Idem for other elements in bp_patching.
 	 */
@@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *regs)
 		return 0;
 
 	/*
-	 * Discount the sizeof(int3). See text_poke_bp_batch().
+	 * Discount the INT3. See text_poke_bp_batch().
 	 */
-	ip = (void *) regs->ip - sizeof(int3);
+	ip = (void *) regs->ip - INT3_INSN_SIZE;
 
 	/*
 	 * Skip the binary search if there is a single member in the vector.
@@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *regs)
 			return 0;
 	}
 
-	/* set up the specified breakpoint detour */
-	regs->ip = (unsigned long) tp->detour;
+	ip += tp->len;
+
+	switch (tp->opcode) {
+	case INT3_INSN_OPCODE:
+		/*
+		 * Someone poked an explicit INT3, they'll want to handle it,
+		 * do not consume.
+		 */
+		return 0;
+
+	case CALL_INSN_OPCODE:
+		int3_emulate_call(regs, (long)ip + tp->rel32);
+		break;
+
+	case JMP32_INSN_OPCODE:
+	case JMP8_INSN_OPCODE:
+		int3_emulate_jmp(regs, (long)ip + tp->rel32);
+		break;
+
+	default:
+		BUG();
+	}
 
 	return 1;
 }
@@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
  * synchronization using int3 breakpoint.
  *
  * The way it is done:
- * 	- For each entry in the vector:
+ *	- For each entry in the vector:
  *		- add a int3 trap to the address that will be patched
  *	- sync cores
  *	- For each entry in the vector:
@@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler);
  */
 void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
 {
-	int patched_all_but_first = 0;
-	unsigned char int3 = 0xcc;
+	unsigned char int3 = INT3_INSN_OPCODE;
 	unsigned int i;
+	int do_sync;
 
 	lockdep_assert_held(&text_mutex);
 
@@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
 	/*
 	 * Second step: update all but the first byte of the patched range.
 	 */
-	for (i = 0; i < nr_entries; i++) {
+	for (do_sync = 0, i = 0; i < nr_entries; i++) {
 		if (tp[i].len - sizeof(int3) > 0) {
 			text_poke((char *)tp[i].addr + sizeof(int3),
-				  (const char *)tp[i].opcode + sizeof(int3),
+				  (const char *)tp[i].text + sizeof(int3),
 				  tp[i].len - sizeof(int3));
-			patched_all_but_first++;
+			do_sync++;
 		}
 	}
 
-	if (patched_all_but_first) {
+	if (do_sync) {
 		/*
 		 * According to Intel, this core syncing is very likely
 		 * not necessary and we'd be safe even without it. But
@@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
 	 * Third step: replace the first byte (int3) by the first byte of
 	 * replacing opcode.
 	 */
-	for (i = 0; i < nr_entries; i++)
-		text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
+	for (do_sync = 0, i = 0; i < nr_entries; i++) {
+		if (tp[i].text[0] == INT3_INSN_OPCODE)
+			continue;
+
+		text_poke(tp[i].addr, tp[i].text, sizeof(int3));
+		do_sync++;
+	}
+
+	if (do_sync)
+		on_each_cpu(do_sync_core, NULL, 1);
 
-	on_each_cpu(do_sync_core, NULL, 1);
 	/*
 	 * sync_core() implies an smp_mb() and orders this store against
 	 * the writing of the new instruction.
@@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
 	bp_patching.nr_entries = 0;
 }
 
+void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+			const void *opcode, size_t len, const void *emulate)
+{
+	struct insn insn;
+
+	if (!opcode)
+		opcode = (void *)tp->text;
+	else
+		memcpy((void *)tp->text, opcode, len);
+
+	if (!emulate)
+		emulate = opcode;
+
+	kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
+	insn_get_length(&insn);
+
+	BUG_ON(!insn_complete(&insn));
+	BUG_ON(len != insn.length);
+
+	tp->addr = addr;
+	tp->len = len;
+	tp->opcode = insn.opcode.bytes[0];
+
+	switch (tp->opcode) {
+	case INT3_INSN_OPCODE:
+		break;
+
+	case CALL_INSN_OPCODE:
+	case JMP32_INSN_OPCODE:
+	case JMP8_INSN_OPCODE:
+		tp->rel32 = insn.immediate.value;
+		break;
+
+	default: /* assume NOP */
+		switch (len) {
+		case 2: /* NOP2 -- emulate as JMP8+0 */
+			BUG_ON(memcmp(emulate, ideal_nops[len], len));
+			tp->opcode = JMP8_INSN_OPCODE;
+			tp->rel32 = 0;
+			break;
+
+		case 5: /* NOP5 -- emulate as JMP32+0 */
+			BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+			tp->opcode = JMP32_INSN_OPCODE;
+			tp->rel32 = 0;
+			break;
+
+		default: /* unknown instruction */
+			BUG();
+		}
+		break;
+	}
+}
+
 /**
  * text_poke_bp() -- update instructions on live kernel on SMP
  * @addr:	address to patch
@@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
  * dynamically allocated memory. This function should be used when it is
  * not possible to allocate memory.
  */
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
 {
-	struct text_poke_loc tp = {
-		.detour = handler,
-		.addr = addr,
-		.len = len,
-	};
-
-	if (len > POKE_MAX_OPCODE_SIZE) {
-		WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
-		return;
-	}
-
-	memcpy((void *)tp.opcode, opcode, len);
+	struct text_poke_loc tp;
 
+	text_poke_loc_init(&tp, addr, opcode, len, emulate);
 	text_poke_bp_batch(&tp, 1);
 }
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 044053235302..c1a8b9e71408 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -89,8 +89,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
 		return;
 	}
 
-	text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
-		     (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+	text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
 }
 
 void arch_jump_label_transform(struct jump_entry *entry,
@@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
 	}
 
 	__jump_label_set_jump_code(entry, type,
-				   (union jump_code_union *) &tp->opcode, 0);
+				   (union jump_code_union *)&tp->text, 0);
 
-	tp->addr = entry_code;
-	tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
-	tp->len = JUMP_LABEL_NOP_SIZE;
+	text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
 
 	tp_vec_nr++;
 
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index b348dd506d58..8900329c28a7 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
 		insn_buff[0] = RELATIVEJUMP_OPCODE;
 		*(s32 *)(&insn_buff[1]) = rel;
 
-		text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
-			     op->optinsn.insn);
+		text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
 
 		list_del_init(&op->list);
 	}
@@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_head *oplist)
 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
 	u8 insn_buff[RELATIVEJUMP_SIZE];
+	u8 emulate_buff[RELATIVEJUMP_SIZE];
 
 	/* Set int3 to first byte for kprobes */
 	insn_buff[0] = BREAKPOINT_INSTRUCTION;
 	memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+	emulate_buff[0] = RELATIVEJUMP_OPCODE;
+	*(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
+			((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
 	text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
-		     op->optinsn.insn);
+		     emulate_buff);
 }
 
 /*
-- 
cgit v1.2.3


From 3b2744e665206ea82ce7673cb3ec889b2898a267 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 14 Nov 2019 10:57:02 -0800
Subject: bpf: Refactor x86 JIT into helpers

Refactor x86 JITing of LDX, STX, CALL instructions into separate helper
functions.  No functional changes in LDX and STX helpers.  There is a minor
change in CALL helper. It will populate target address correctly on the first
pass of JIT instead of second pass. That won't reduce total number of JIT
passes though.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20191114185720.1641606-3-ast@kernel.org
---
 arch/x86/net/bpf_jit_comp.c | 152 ++++++++++++++++++++++++++++----------------
 1 file changed, 98 insertions(+), 54 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8cd23d8309bf..fb99d976ad6e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -198,6 +198,8 @@ struct jit_context {
 /* Maximum number of bytes emitted while JITing one eBPF insn */
 #define BPF_MAX_INSN_SIZE	128
 #define BPF_INSN_SAFETY		64
+/* number of bytes emit_call() needs to generate call instruction */
+#define X86_CALL_SIZE		5
 
 #define PROLOGUE_SIZE		20
 
@@ -390,6 +392,99 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
 	*pprog = prog;
 }
 
+/* LDX: dst_reg = *(u8*)(src_reg + off) */
+static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	switch (size) {
+	case BPF_B:
+		/* Emit 'movzx rax, byte ptr [rax + off]' */
+		EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
+		break;
+	case BPF_H:
+		/* Emit 'movzx rax, word ptr [rax + off]' */
+		EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
+		break;
+	case BPF_W:
+		/* Emit 'mov eax, dword ptr [rax+0x14]' */
+		if (is_ereg(dst_reg) || is_ereg(src_reg))
+			EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
+		else
+			EMIT1(0x8B);
+		break;
+	case BPF_DW:
+		/* Emit 'mov rax, qword ptr [rax+0x14]' */
+		EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
+		break;
+	}
+	/*
+	 * If insn->off == 0 we can save one extra byte, but
+	 * special case of x86 R13 which always needs an offset
+	 * is not worth the hassle
+	 */
+	if (is_imm8(off))
+		EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
+	else
+		EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+	*pprog = prog;
+}
+
+/* STX: *(u8*)(dst_reg + off) = src_reg */
+static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	switch (size) {
+	case BPF_B:
+		/* Emit 'mov byte ptr [rax + off], al' */
+		if (is_ereg(dst_reg) || is_ereg(src_reg) ||
+		    /* We have to add extra byte for x86 SIL, DIL regs */
+		    src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+			EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
+		else
+			EMIT1(0x88);
+		break;
+	case BPF_H:
+		if (is_ereg(dst_reg) || is_ereg(src_reg))
+			EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
+		else
+			EMIT2(0x66, 0x89);
+		break;
+	case BPF_W:
+		if (is_ereg(dst_reg) || is_ereg(src_reg))
+			EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
+		else
+			EMIT1(0x89);
+		break;
+	case BPF_DW:
+		EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
+		break;
+	}
+	if (is_imm8(off))
+		EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
+	else
+		EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+	*pprog = prog;
+}
+
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+	s64 offset;
+
+	offset = func - (ip + X86_CALL_SIZE);
+	if (!is_simm32(offset)) {
+		pr_err("Target call %p is out of range\n", func);
+		return -EINVAL;
+	}
+	EMIT1_off32(0xE8, offset);
+	*pprog = prog;
+	return 0;
+}
 
 static bool ex_handler_bpf(const struct exception_table_entry *x,
 			   struct pt_regs *regs, int trapnr,
@@ -773,68 +868,22 @@ st:			if (is_imm8(insn->off))
 
 			/* STX: *(u8*)(dst_reg + off) = src_reg */
 		case BPF_STX | BPF_MEM | BPF_B:
-			/* Emit 'mov byte ptr [rax + off], al' */
-			if (is_ereg(dst_reg) || is_ereg(src_reg) ||
-			    /* We have to add extra byte for x86 SIL, DIL regs */
-			    src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
-				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
-			else
-				EMIT1(0x88);
-			goto stx;
 		case BPF_STX | BPF_MEM | BPF_H:
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
-			else
-				EMIT2(0x66, 0x89);
-			goto stx;
 		case BPF_STX | BPF_MEM | BPF_W:
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
-			else
-				EMIT1(0x89);
-			goto stx;
 		case BPF_STX | BPF_MEM | BPF_DW:
-			EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
-stx:			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
-			else
-				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
-					    insn->off);
+			emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
 			break;
 
 			/* LDX: dst_reg = *(u8*)(src_reg + off) */
 		case BPF_LDX | BPF_MEM | BPF_B:
 		case BPF_LDX | BPF_PROBE_MEM | BPF_B:
-			/* Emit 'movzx rax, byte ptr [rax + off]' */
-			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
-			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_H:
 		case BPF_LDX | BPF_PROBE_MEM | BPF_H:
-			/* Emit 'movzx rax, word ptr [rax + off]' */
-			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
-			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_W:
 		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
-			/* Emit 'mov eax, dword ptr [rax+0x14]' */
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
-			else
-				EMIT1(0x8B);
-			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_DW:
 		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
-			/* Emit 'mov rax, qword ptr [rax+0x14]' */
-			EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx:			/*
-			 * If insn->off == 0 we can save one extra byte, but
-			 * special case of x86 R13 which always needs an offset
-			 * is not worth the hassle
-			 */
-			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
-			else
-				EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
-					    insn->off);
+			emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
 			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
 				struct exception_table_entry *ex;
 				u8 *_insn = image + proglen;
@@ -899,13 +948,8 @@ xadd:			if (is_imm8(insn->off))
 			/* call */
 		case BPF_JMP | BPF_CALL:
 			func = (u8 *) __bpf_call_base + imm32;
-			jmp_offset = func - (image + addrs[i]);
-			if (!imm32 || !is_simm32(jmp_offset)) {
-				pr_err("unsupported BPF func %d addr %p image %p\n",
-				       imm32, func, image);
+			if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
 				return -EINVAL;
-			}
-			EMIT1_off32(0xE8, jmp_offset);
 			break;
 
 		case BPF_JMP | BPF_TAIL_CALL:
-- 
cgit v1.2.3


From 5964b2000f283ff5df366f718e0f083ebbaae977 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 14 Nov 2019 10:57:03 -0800
Subject: bpf: Add bpf_arch_text_poke() helper

Add bpf_arch_text_poke() helper that is used by BPF trampoline logic to patch
nops/calls in kernel text into calls into BPF trampoline and to patch
calls/nops inside BPF programs too.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20191114185720.1641606-4-ast@kernel.org
---
 arch/x86/net/bpf_jit_comp.c | 51 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/bpf.h         |  8 +++++++
 kernel/bpf/core.c           |  6 ++++++
 3 files changed, 65 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index fb99d976ad6e..254b2889e881 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,9 +9,11 @@
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
+#include <linux/memory.h>
 #include <asm/extable.h>
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
+#include <asm/text-patching.h>
 
 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -486,6 +488,55 @@ static int emit_call(u8 **pprog, void *func, void *ip)
 	return 0;
 }
 
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+		       void *old_addr, void *new_addr)
+{
+	u8 old_insn[X86_CALL_SIZE] = {};
+	u8 new_insn[X86_CALL_SIZE] = {};
+	u8 *prog;
+	int ret;
+
+	if (!is_kernel_text((long)ip))
+		/* BPF trampoline in modules is not supported */
+		return -EINVAL;
+
+	if (old_addr) {
+		prog = old_insn;
+		ret = emit_call(&prog, old_addr, (void *)ip);
+		if (ret)
+			return ret;
+	}
+	if (new_addr) {
+		prog = new_insn;
+		ret = emit_call(&prog, new_addr, (void *)ip);
+		if (ret)
+			return ret;
+	}
+	ret = -EBUSY;
+	mutex_lock(&text_mutex);
+	switch (t) {
+	case BPF_MOD_NOP_TO_CALL:
+		if (memcmp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE))
+			goto out;
+		text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
+		break;
+	case BPF_MOD_CALL_TO_CALL:
+		if (memcmp(ip, old_insn, X86_CALL_SIZE))
+			goto out;
+		text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
+		break;
+	case BPF_MOD_CALL_TO_NOP:
+		if (memcmp(ip, old_insn, X86_CALL_SIZE))
+			goto out;
+		text_poke_bp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE, NULL);
+		break;
+	}
+	ret = 0;
+out:
+	mutex_unlock(&text_mutex);
+	return ret;
+}
+
 static bool ex_handler_bpf(const struct exception_table_entry *x,
 			   struct pt_regs *regs, int trapnr,
 			   unsigned long error_code, unsigned long fault_addr)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7c7f518811a6..8b90db25348a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1157,4 +1157,12 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
 }
 #endif /* CONFIG_INET */
 
+enum bpf_text_poke_type {
+	BPF_MOD_NOP_TO_CALL,
+	BPF_MOD_CALL_TO_CALL,
+	BPF_MOD_CALL_TO_NOP,
+};
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+		       void *addr1, void *addr2);
+
 #endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 99693f3c4e99..434a0d920153 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2144,6 +2144,12 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
 	return -EFAULT;
 }
 
+int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+			      void *addr1, void *addr2)
+{
+	return -ENOTSUPP;
+}
+
 DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 EXPORT_SYMBOL(bpf_stats_enabled_key);
 
-- 
cgit v1.2.3


From fec56f5890d93fc2ed74166c397dc186b1c25951 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 14 Nov 2019 10:57:04 -0800
Subject: bpf: Introduce BPF trampoline

Introduce BPF trampoline concept to allow kernel code to call into BPF programs
with practically zero overhead.  The trampoline generation logic is
architecture dependent.  It's converting native calling convention into BPF
calling convention.  BPF ISA is 64-bit (even on 32-bit architectures). The
registers R1 to R5 are used to pass arguments into BPF functions. The main BPF
program accepts only single argument "ctx" in R1. Whereas CPU native calling
convention is different. x86-64 is passing first 6 arguments in registers
and the rest on the stack. x86-32 is passing first 3 arguments in registers.
sparc64 is passing first 6 in registers. And so on.

The trampolines between BPF and kernel already exist.  BPF_CALL_x macros in
include/linux/filter.h statically compile trampolines from BPF into kernel
helpers. They convert up to five u64 arguments into kernel C pointers and
integers. On 64-bit architectures this BPF_to_kernel trampolines are nops. On
32-bit architecture they're meaningful.

The opposite job kernel_to_BPF trampolines is done by CAST_TO_U64 macros and
__bpf_trace_##call() shim functions in include/trace/bpf_probe.h. They convert
kernel function arguments into array of u64s that BPF program consumes via
R1=ctx pointer.

This patch set is doing the same job as __bpf_trace_##call() static
trampolines, but dynamically for any kernel function. There are ~22k global
kernel functions that are attachable via nop at function entry. The function
arguments and types are described in BTF.  The job of btf_distill_func_proto()
function is to extract useful information from BTF into "function model" that
architecture dependent trampoline generators will use to generate assembly code
to cast kernel function arguments into array of u64s.  For example the kernel
function eth_type_trans has two pointers. They will be casted to u64 and stored
into stack of generated trampoline. The pointer to that stack space will be
passed into BPF program in R1. On x86-64 such generated trampoline will consume
16 bytes of stack and two stores of %rdi and %rsi into stack. The verifier will
make sure that only two u64 are accessed read-only by BPF program. The verifier
will also recognize the precise type of the pointers being accessed and will
not allow typecasting of the pointer to a different type within BPF program.

The tracing use case in the datacenter demonstrated that certain key kernel
functions have (like tcp_retransmit_skb) have 2 or more kprobes that are always
active.  Other functions have both kprobe and kretprobe.  So it is essential to
keep both kernel code and BPF programs executing at maximum speed. Hence
generated BPF trampoline is re-generated every time new program is attached or
detached to maintain maximum performance.

To avoid the high cost of retpoline the attached BPF programs are called
directly. __bpf_prog_enter/exit() are used to support per-program execution
stats.  In the future this logic will be optimized further by adding support
for bpf_stats_enabled_key inside generated assembly code. Introduction of
preemptible and sleepable BPF programs will completely remove the need to call
to __bpf_prog_enter/exit().

Detach of a BPF program from the trampoline should not fail. To avoid memory
allocation in detach path the half of the page is used as a reserve and flipped
after each attach/detach. 2k bytes is enough to call 40+ BPF programs directly
which is enough for BPF tracing use cases. This limit can be increased in the
future.

BPF_TRACE_FENTRY programs have access to raw kernel function arguments while
BPF_TRACE_FEXIT programs have access to kernel return value as well. Often
kprobe BPF program remembers function arguments in a map while kretprobe
fetches arguments from a map and analyzes them together with return value.
BPF_TRACE_FEXIT accelerates this typical use case.

Recursion prevention for kprobe BPF programs is done via per-cpu
bpf_prog_active counter. In practice that turned out to be a mistake. It
caused programs to randomly skip execution. The tracing tools missed results
they were looking for. Hence BPF trampoline doesn't provide builtin recursion
prevention. It's a job of BPF program itself and will be addressed in the
follow up patches.

BPF trampoline is intended to be used beyond tracing and fentry/fexit use cases
in the future. For example to remove retpoline cost from XDP programs.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20191114185720.1641606-5-ast@kernel.org
---
 arch/x86/net/bpf_jit_comp.c | 211 +++++++++++++++++++++++++++++++++++-
 include/linux/bpf.h         | 105 ++++++++++++++++++
 include/uapi/linux/bpf.h    |   2 +
 kernel/bpf/Makefile         |   1 +
 kernel/bpf/btf.c            |  77 +++++++++++++-
 kernel/bpf/core.c           |   1 +
 kernel/bpf/syscall.c        |  53 +++++++++-
 kernel/bpf/trampoline.c     | 253 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c       |  42 ++++++++
 9 files changed, 735 insertions(+), 10 deletions(-)
 create mode 100644 kernel/bpf/trampoline.c

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 254b2889e881..be2b43a894f6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -98,6 +98,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 
 /* Pick a register outside of BPF range for JIT internal work */
 #define AUX_REG (MAX_BPF_JIT_REG + 1)
+#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
 
 /*
  * The following table maps BPF registers to x86-64 registers.
@@ -106,8 +107,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
  * register in load/store instructions, it always needs an
  * extra byte of encoding and is callee saved.
  *
- * Also x86-64 register R9 is unused. x86-64 register R10 is
- * used for blinding (if enabled).
+ * x86-64 register R9 is not used by BPF programs, but can be used by BPF
+ * trampoline. x86-64 register R10 is used for blinding (if enabled).
  */
 static const int reg2hex[] = {
 	[BPF_REG_0] = 0,  /* RAX */
@@ -123,6 +124,7 @@ static const int reg2hex[] = {
 	[BPF_REG_FP] = 5, /* RBP readonly */
 	[BPF_REG_AX] = 2, /* R10 temp register */
 	[AUX_REG] = 3,    /* R11 temp register */
+	[X86_REG_R9] = 1, /* R9 register, 6th function argument */
 };
 
 static const int reg2pt_regs[] = {
@@ -150,6 +152,7 @@ static bool is_ereg(u32 reg)
 			     BIT(BPF_REG_7) |
 			     BIT(BPF_REG_8) |
 			     BIT(BPF_REG_9) |
+			     BIT(X86_REG_R9) |
 			     BIT(BPF_REG_AX));
 }
 
@@ -1233,6 +1236,210 @@ emit_jmp:
 	return proglen;
 }
 
+static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+		      int stack_size)
+{
+	int i;
+	/* Store function arguments to stack.
+	 * For a function that accepts two pointers the sequence will be:
+	 * mov QWORD PTR [rbp-0x10],rdi
+	 * mov QWORD PTR [rbp-0x8],rsi
+	 */
+	for (i = 0; i < min(nr_args, 6); i++)
+		emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]),
+			 BPF_REG_FP,
+			 i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+			 -(stack_size - i * 8));
+}
+
+static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+			 int stack_size)
+{
+	int i;
+
+	/* Restore function arguments from stack.
+	 * For a function that accepts two pointers the sequence will be:
+	 * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
+	 * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+	 */
+	for (i = 0; i < min(nr_args, 6); i++)
+		emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]),
+			 i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+			 BPF_REG_FP,
+			 -(stack_size - i * 8));
+}
+
+static int invoke_bpf(struct btf_func_model *m, u8 **pprog,
+		      struct bpf_prog **progs, int prog_cnt, int stack_size)
+{
+	u8 *prog = *pprog;
+	int cnt = 0, i;
+
+	for (i = 0; i < prog_cnt; i++) {
+		if (emit_call(&prog, __bpf_prog_enter, prog))
+			return -EINVAL;
+		/* remember prog start time returned by __bpf_prog_enter */
+		emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+
+		/* arg1: lea rdi, [rbp - stack_size] */
+		EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+		/* arg2: progs[i]->insnsi for interpreter */
+		if (!progs[i]->jited)
+			emit_mov_imm64(&prog, BPF_REG_2,
+				       (long) progs[i]->insnsi >> 32,
+				       (u32) (long) progs[i]->insnsi);
+		/* call JITed bpf program or interpreter */
+		if (emit_call(&prog, progs[i]->bpf_func, prog))
+			return -EINVAL;
+
+		/* arg1: mov rdi, progs[i] */
+		emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
+			       (u32) (long) progs[i]);
+		/* arg2: mov rsi, rbx <- start time in nsec */
+		emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+		if (emit_call(&prog, __bpf_prog_exit, prog))
+			return -EINVAL;
+	}
+	*pprog = prog;
+	return 0;
+}
+
+/* Example:
+ * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+ * its 'struct btf_func_model' will be nr_args=2
+ * The assembly code when eth_type_trans is executing after trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 16                     // space for skb and dev
+ * push rbx                        // temp regs to pass start time
+ * mov qword ptr [rbp - 16], rdi   // save skb pointer to stack
+ * mov qword ptr [rbp - 8], rsi    // save dev pointer to stack
+ * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
+ * mov rbx, rax                    // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 16]             // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog  // unused if bpf stats are off
+ * mov rsi, rbx                    // prog start time
+ * call __bpf_prog_exit            // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 16]   // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 8]    // restore dev pointer from stack
+ * pop rbx
+ * leave
+ * ret
+ *
+ * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be
+ * replaced with 'call generated_bpf_trampoline'. When it returns
+ * eth_type_trans will continue executing with original skb and dev pointers.
+ *
+ * The assembly code when eth_type_trans is called from trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 24                     // space for skb, dev, return value
+ * push rbx                        // temp regs to pass start time
+ * mov qword ptr [rbp - 24], rdi   // save skb pointer to stack
+ * mov qword ptr [rbp - 16], rsi   // save dev pointer to stack
+ * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
+ * mov rbx, rax                    // remember start time if bpf stats are enabled
+ * lea rdi, [rbp - 24]             // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog  // bpf prog can access skb and dev
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog  // unused if bpf stats are off
+ * mov rsi, rbx                    // prog start time
+ * call __bpf_prog_exit            // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 24]   // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 16]   // restore dev pointer from stack
+ * call eth_type_trans+5           // execute body of eth_type_trans
+ * mov qword ptr [rbp - 8], rax    // save return value
+ * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
+ * mov rbx, rax                    // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 24]             // R1==ctx of bpf prog
+ * call addr_of_jited_FEXIT_prog   // bpf prog can access skb, dev, return value
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog  // unused if bpf stats are off
+ * mov rsi, rbx                    // prog start time
+ * call __bpf_prog_exit            // rcu_read_unlock, preempt_enable and stats math
+ * mov rax, qword ptr [rbp - 8]    // restore eth_type_trans's return value
+ * pop rbx
+ * leave
+ * add rsp, 8                      // skip eth_type_trans's frame
+ * ret                             // return to its caller
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+				struct bpf_prog **fentry_progs, int fentry_cnt,
+				struct bpf_prog **fexit_progs, int fexit_cnt,
+				void *orig_call)
+{
+	int cnt = 0, nr_args = m->nr_args;
+	int stack_size = nr_args * 8;
+	u8 *prog;
+
+	/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
+	if (nr_args > 6)
+		return -ENOTSUPP;
+
+	if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+	    (flags & BPF_TRAMP_F_SKIP_FRAME))
+		return -EINVAL;
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG)
+		stack_size += 8; /* room for return value of orig_call */
+
+	if (flags & BPF_TRAMP_F_SKIP_FRAME)
+		/* skip patched call instruction and point orig_call to actual
+		 * body of the kernel function.
+		 */
+		orig_call += X86_CALL_SIZE;
+
+	prog = image;
+
+	EMIT1(0x55);		 /* push rbp */
+	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+	EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
+	EMIT1(0x53);		 /* push rbx */
+
+	save_regs(m, &prog, nr_args, stack_size);
+
+	if (fentry_cnt)
+		if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
+			return -EINVAL;
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		if (fentry_cnt)
+			restore_regs(m, &prog, nr_args, stack_size);
+
+		/* call original function */
+		if (emit_call(&prog, orig_call, prog))
+			return -EINVAL;
+		/* remember return value in a stack for bpf prog to access */
+		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+	}
+
+	if (fexit_cnt)
+		if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
+			return -EINVAL;
+
+	if (flags & BPF_TRAMP_F_RESTORE_REGS)
+		restore_regs(m, &prog, nr_args, stack_size);
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG)
+		/* restore original return value back into RAX */
+		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
+
+	EMIT1(0x5B); /* pop rbx */
+	EMIT1(0xC9); /* leave */
+	if (flags & BPF_TRAMP_F_SKIP_FRAME)
+		/* skip our return address and return to parent */
+		EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+	EMIT1(0xC3); /* ret */
+	/* One half of the page has active running trampoline.
+	 * Another half is an area for next trampoline.
+	 * Make sure the trampoline generation logic doesn't overflow.
+	 */
+	if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY))
+		return -EFAULT;
+	return 0;
+}
+
 struct x64_jit_data {
 	struct bpf_binary_header *header;
 	int *addrs;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8b90db25348a..0d4c5c224d79 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -14,6 +14,8 @@
 #include <linux/numa.h>
 #include <linux/wait.h>
 #include <linux/u64_stats_sync.h>
+#include <linux/refcount.h>
+#include <linux/mutex.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -384,6 +386,100 @@ struct bpf_prog_stats {
 	struct u64_stats_sync syncp;
 } __aligned(2 * sizeof(u64));
 
+struct btf_func_model {
+	u8 ret_size;
+	u8 nr_args;
+	u8 arg_size[MAX_BPF_FUNC_ARGS];
+};
+
+/* Restore arguments before returning from trampoline to let original function
+ * continue executing. This flag is used for fentry progs when there are no
+ * fexit progs.
+ */
+#define BPF_TRAMP_F_RESTORE_REGS	BIT(0)
+/* Call original function after fentry progs, but before fexit progs.
+ * Makes sense for fentry/fexit, normal calls and indirect calls.
+ */
+#define BPF_TRAMP_F_CALL_ORIG		BIT(1)
+/* Skip current frame and return to parent.  Makes sense for fentry/fexit
+ * programs only. Should not be used with normal calls and indirect calls.
+ */
+#define BPF_TRAMP_F_SKIP_FRAME		BIT(2)
+
+/* Different use cases for BPF trampoline:
+ * 1. replace nop at the function entry (kprobe equivalent)
+ *    flags = BPF_TRAMP_F_RESTORE_REGS
+ *    fentry = a set of programs to run before returning from trampoline
+ *
+ * 2. replace nop at the function entry (kprobe + kretprobe equivalent)
+ *    flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME
+ *    orig_call = fentry_ip + MCOUNT_INSN_SIZE
+ *    fentry = a set of program to run before calling original function
+ *    fexit = a set of program to run after original function
+ *
+ * 3. replace direct call instruction anywhere in the function body
+ *    or assign a function pointer for indirect call (like tcp_congestion_ops->cong_avoid)
+ *    With flags = 0
+ *      fentry = a set of programs to run before returning from trampoline
+ *    With flags = BPF_TRAMP_F_CALL_ORIG
+ *      orig_call = original callback addr or direct function addr
+ *      fentry = a set of program to run before calling original function
+ *      fexit = a set of program to run after original function
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+				struct bpf_prog **fentry_progs, int fentry_cnt,
+				struct bpf_prog **fexit_progs, int fexit_cnt,
+				void *orig_call);
+/* these two functions are called from generated trampoline */
+u64 notrace __bpf_prog_enter(void);
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
+
+enum bpf_tramp_prog_type {
+	BPF_TRAMP_FENTRY,
+	BPF_TRAMP_FEXIT,
+	BPF_TRAMP_MAX
+};
+
+struct bpf_trampoline {
+	/* hlist for trampoline_table */
+	struct hlist_node hlist;
+	/* serializes access to fields of this trampoline */
+	struct mutex mutex;
+	refcount_t refcnt;
+	u64 key;
+	struct {
+		struct btf_func_model model;
+		void *addr;
+	} func;
+	/* list of BPF programs using this trampoline */
+	struct hlist_head progs_hlist[BPF_TRAMP_MAX];
+	/* Number of attached programs. A counter per kind. */
+	int progs_cnt[BPF_TRAMP_MAX];
+	/* Executable image of trampoline */
+	void *image;
+	u64 selector;
+};
+#ifdef CONFIG_BPF_JIT
+struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
+int bpf_trampoline_link_prog(struct bpf_prog *prog);
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
+void bpf_trampoline_put(struct bpf_trampoline *tr);
+#else
+static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+{
+	return NULL;
+}
+static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
+{
+	return -ENOTSUPP;
+}
+static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+{
+	return -ENOTSUPP;
+}
+static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
+#endif
+
 struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
@@ -398,6 +494,9 @@ struct bpf_prog_aux {
 	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
+	enum bpf_tramp_prog_type trampoline_prog_type;
+	struct bpf_trampoline *trampoline;
+	struct hlist_node tramp_hlist;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
 	/* function name for valid attach_btf_id */
@@ -784,6 +883,12 @@ int btf_struct_access(struct bpf_verifier_log *log,
 		      u32 *next_btf_id);
 u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int);
 
+int btf_distill_func_proto(struct bpf_verifier_log *log,
+			   struct btf *btf,
+			   const struct btf_type *func_proto,
+			   const char *func_name,
+			   struct btf_func_model *m);
+
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index df6809a76404..69c200e6e696 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -201,6 +201,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_GETSOCKOPT,
 	BPF_CGROUP_SETSOCKOPT,
 	BPF_TRACE_RAW_TP,
+	BPF_TRACE_FENTRY,
+	BPF_TRACE_FEXIT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1d9adb212f9..3f671bf617e8 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
+obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4639c4ba9a9b..9e1164e5b429 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3517,13 +3517,18 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		args++;
 		nr_args--;
 	}
-	if (arg >= nr_args) {
+
+	if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
+	    arg == nr_args) {
+		/* function return type */
+		t = btf_type_by_id(btf_vmlinux, t->type);
+	} else if (arg >= nr_args) {
 		bpf_log(log, "func '%s' doesn't have %d-th argument\n",
-			tname, arg);
+			tname, arg + 1);
 		return false;
+	} else {
+		t = btf_type_by_id(btf_vmlinux, args[arg].type);
 	}
-
-	t = btf_type_by_id(btf_vmlinux, args[arg].type);
 	/* skip modifiers */
 	while (btf_type_is_modifier(t))
 		t = btf_type_by_id(btf_vmlinux, t->type);
@@ -3784,6 +3789,70 @@ u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg)
 	return btf_id;
 }
 
+static int __get_type_size(struct btf *btf, u32 btf_id,
+			   const struct btf_type **bad_type)
+{
+	const struct btf_type *t;
+
+	if (!btf_id)
+		/* void */
+		return 0;
+	t = btf_type_by_id(btf, btf_id);
+	while (t && btf_type_is_modifier(t))
+		t = btf_type_by_id(btf, t->type);
+	if (!t)
+		return -EINVAL;
+	if (btf_type_is_ptr(t))
+		/* kernel size of pointer. Not BPF's size of pointer*/
+		return sizeof(void *);
+	if (btf_type_is_int(t) || btf_type_is_enum(t))
+		return t->size;
+	*bad_type = t;
+	return -EINVAL;
+}
+
+int btf_distill_func_proto(struct bpf_verifier_log *log,
+			   struct btf *btf,
+			   const struct btf_type *func,
+			   const char *tname,
+			   struct btf_func_model *m)
+{
+	const struct btf_param *args;
+	const struct btf_type *t;
+	u32 i, nargs;
+	int ret;
+
+	args = (const struct btf_param *)(func + 1);
+	nargs = btf_type_vlen(func);
+	if (nargs >= MAX_BPF_FUNC_ARGS) {
+		bpf_log(log,
+			"The function %s has %d arguments. Too many.\n",
+			tname, nargs);
+		return -EINVAL;
+	}
+	ret = __get_type_size(btf, func->type, &t);
+	if (ret < 0) {
+		bpf_log(log,
+			"The function %s return type %s is unsupported.\n",
+			tname, btf_kind_str[BTF_INFO_KIND(t->info)]);
+		return -EINVAL;
+	}
+	m->ret_size = ret;
+
+	for (i = 0; i < nargs; i++) {
+		ret = __get_type_size(btf, args[i].type, &t);
+		if (ret < 0) {
+			bpf_log(log,
+				"The function %s arg%d type %s is unsupported.\n",
+				tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
+			return -EINVAL;
+		}
+		m->arg_size[i] = ret;
+	}
+	m->nr_args = nargs;
+	return 0;
+}
+
 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
 		       struct seq_file *m)
 {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 434a0d920153..da5a8b8e278f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2015,6 +2015,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
 	if (aux->prog->has_callchain_buf)
 		put_callchain_buffers();
 #endif
+	bpf_trampoline_put(aux->trampoline);
 	for (i = 0; i < aux->func_cnt; i++)
 		bpf_jit_free(aux->func[i]);
 	if (aux->func_cnt) {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6d9ce95e5a8d..e2e37bea86bc 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1799,6 +1799,49 @@ static int bpf_obj_get(const union bpf_attr *attr)
 				attr->file_flags);
 }
 
+static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_prog *prog = filp->private_data;
+
+	WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+	bpf_prog_put(prog);
+	return 0;
+}
+
+static const struct file_operations bpf_tracing_prog_fops = {
+	.release	= bpf_tracing_prog_release,
+	.read		= bpf_dummy_read,
+	.write		= bpf_dummy_write,
+};
+
+static int bpf_tracing_prog_attach(struct bpf_prog *prog)
+{
+	int tr_fd, err;
+
+	if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
+	    prog->expected_attach_type != BPF_TRACE_FEXIT) {
+		err = -EINVAL;
+		goto out_put_prog;
+	}
+
+	err = bpf_trampoline_link_prog(prog);
+	if (err)
+		goto out_put_prog;
+
+	tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
+				 prog, O_CLOEXEC);
+	if (tr_fd < 0) {
+		WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+		err = tr_fd;
+		goto out_put_prog;
+	}
+	return tr_fd;
+
+out_put_prog:
+	bpf_prog_put(prog);
+	return err;
+}
+
 struct bpf_raw_tracepoint {
 	struct bpf_raw_event_map *btp;
 	struct bpf_prog *prog;
@@ -1850,14 +1893,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 
 	if (prog->type == BPF_PROG_TYPE_TRACING) {
 		if (attr->raw_tracepoint.name) {
-			/* raw_tp name should not be specified in raw_tp
-			 * programs that were verified via in-kernel BTF info
+			/* The attach point for this category of programs
+			 * should be specified via btf_id during program load.
 			 */
 			err = -EINVAL;
 			goto out_put_prog;
 		}
-		/* raw_tp name is taken from type name instead */
-		tp_name = prog->aux->attach_func_name;
+		if (prog->expected_attach_type == BPF_TRACE_RAW_TP)
+			tp_name = prog->aux->attach_func_name;
+		else
+			return bpf_tracing_prog_attach(prog);
 	} else {
 		if (strncpy_from_user(buf,
 				      u64_to_user_ptr(attr->raw_tracepoint.name),
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
new file mode 100644
index 000000000000..10ae59d65f13
--- /dev/null
+++ b/kernel/bpf/trampoline.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <linux/hash.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
+#define TRAMPOLINE_HASH_BITS 10
+#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
+
+static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
+
+/* serializes access to trampoline_table */
+static DEFINE_MUTEX(trampoline_mutex);
+
+struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+{
+	struct bpf_trampoline *tr;
+	struct hlist_head *head;
+	void *image;
+	int i;
+
+	mutex_lock(&trampoline_mutex);
+	head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
+	hlist_for_each_entry(tr, head, hlist) {
+		if (tr->key == key) {
+			refcount_inc(&tr->refcnt);
+			goto out;
+		}
+	}
+	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
+	if (!tr)
+		goto out;
+
+	/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
+	image = bpf_jit_alloc_exec(PAGE_SIZE);
+	if (!image) {
+		kfree(tr);
+		tr = NULL;
+		goto out;
+	}
+
+	tr->key = key;
+	INIT_HLIST_NODE(&tr->hlist);
+	hlist_add_head(&tr->hlist, head);
+	refcount_set(&tr->refcnt, 1);
+	mutex_init(&tr->mutex);
+	for (i = 0; i < BPF_TRAMP_MAX; i++)
+		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
+
+	set_vm_flush_reset_perms(image);
+	/* Keep image as writeable. The alternative is to keep flipping ro/rw
+	 * everytime new program is attached or detached.
+	 */
+	set_memory_x((long)image, 1);
+	tr->image = image;
+out:
+	mutex_unlock(&trampoline_mutex);
+	return tr;
+}
+
+/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
+ * bytes on x86.  Pick a number to fit into PAGE_SIZE / 2
+ */
+#define BPF_MAX_TRAMP_PROGS 40
+
+static int bpf_trampoline_update(struct bpf_trampoline *tr)
+{
+	void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
+	void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+	struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
+	int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
+	int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
+	struct bpf_prog **progs, **fentry, **fexit;
+	u32 flags = BPF_TRAMP_F_RESTORE_REGS;
+	struct bpf_prog_aux *aux;
+	int err;
+
+	if (fentry_cnt + fexit_cnt == 0) {
+		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_NOP,
+					 old_image, NULL);
+		tr->selector = 0;
+		goto out;
+	}
+
+	/* populate fentry progs */
+	fentry = progs = progs_to_run;
+	hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
+		*progs++ = aux->prog;
+
+	/* populate fexit progs */
+	fexit = progs;
+	hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
+		*progs++ = aux->prog;
+
+	if (fexit_cnt)
+		flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
+
+	err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags,
+					  fentry, fentry_cnt,
+					  fexit, fexit_cnt,
+					  tr->func.addr);
+	if (err)
+		goto out;
+
+	if (tr->selector)
+		/* progs already running at this address */
+		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_CALL,
+					 old_image, new_image);
+	else
+		/* first time registering */
+		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP_TO_CALL,
+					 NULL, new_image);
+	if (err)
+		goto out;
+	tr->selector++;
+out:
+	return err;
+}
+
+static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
+{
+	switch (t) {
+	case BPF_TRACE_FENTRY:
+		return BPF_TRAMP_FENTRY;
+	default:
+		return BPF_TRAMP_FEXIT;
+	}
+}
+
+int bpf_trampoline_link_prog(struct bpf_prog *prog)
+{
+	enum bpf_tramp_prog_type kind;
+	struct bpf_trampoline *tr;
+	int err = 0;
+
+	tr = prog->aux->trampoline;
+	kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
+	mutex_lock(&tr->mutex);
+	if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]
+	    >= BPF_MAX_TRAMP_PROGS) {
+		err = -E2BIG;
+		goto out;
+	}
+	if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
+		/* prog already linked */
+		err = -EBUSY;
+		goto out;
+	}
+	hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
+	tr->progs_cnt[kind]++;
+	err = bpf_trampoline_update(prog->aux->trampoline);
+	if (err) {
+		hlist_del(&prog->aux->tramp_hlist);
+		tr->progs_cnt[kind]--;
+	}
+out:
+	mutex_unlock(&tr->mutex);
+	return err;
+}
+
+/* bpf_trampoline_unlink_prog() should never fail. */
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+{
+	enum bpf_tramp_prog_type kind;
+	struct bpf_trampoline *tr;
+	int err;
+
+	tr = prog->aux->trampoline;
+	kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
+	mutex_lock(&tr->mutex);
+	hlist_del(&prog->aux->tramp_hlist);
+	tr->progs_cnt[kind]--;
+	err = bpf_trampoline_update(prog->aux->trampoline);
+	mutex_unlock(&tr->mutex);
+	return err;
+}
+
+void bpf_trampoline_put(struct bpf_trampoline *tr)
+{
+	if (!tr)
+		return;
+	mutex_lock(&trampoline_mutex);
+	if (!refcount_dec_and_test(&tr->refcnt))
+		goto out;
+	WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
+	if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
+		goto out;
+	if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
+		goto out;
+	bpf_jit_free_exec(tr->image);
+	hlist_del(&tr->hlist);
+	kfree(tr);
+out:
+	mutex_unlock(&trampoline_mutex);
+}
+
+/* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that
+ * are needed for trampoline. The macro is split into
+ * call _bpf_prog_enter
+ * call prog->bpf_func
+ * call __bpf_prog_exit
+ */
+u64 notrace __bpf_prog_enter(void)
+{
+	u64 start = 0;
+
+	rcu_read_lock();
+	preempt_disable();
+	if (static_branch_unlikely(&bpf_stats_enabled_key))
+		start = sched_clock();
+	return start;
+}
+
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+{
+	struct bpf_prog_stats *stats;
+
+	if (static_branch_unlikely(&bpf_stats_enabled_key) &&
+	    /* static_key could be enabled in __bpf_prog_enter
+	     * and disabled in __bpf_prog_exit.
+	     * And vice versa.
+	     * Hence check that 'start' is not zero.
+	     */
+	    start) {
+		stats = this_cpu_ptr(prog->aux->stats);
+		u64_stats_update_begin(&stats->syncp);
+		stats->cnt++;
+		stats->nsecs += sched_clock() - start;
+		u64_stats_update_end(&stats->syncp);
+	}
+	preempt_enable();
+	rcu_read_unlock();
+}
+
+int __weak
+arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+			    struct bpf_prog **fentry_progs, int fentry_cnt,
+			    struct bpf_prog **fexit_progs, int fexit_cnt,
+			    void *orig_call)
+{
+	return -ENOTSUPP;
+}
+
+static int __init init_trampolines(void)
+{
+	int i;
+
+	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
+		INIT_HLIST_HEAD(&trampoline_table[i]);
+	return 0;
+}
+late_initcall(init_trampolines);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2f2374967b36..8f89cfa93e88 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9382,8 +9382,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 	struct bpf_prog *prog = env->prog;
 	u32 btf_id = prog->aux->attach_btf_id;
 	const char prefix[] = "btf_trace_";
+	struct bpf_trampoline *tr;
 	const struct btf_type *t;
 	const char *tname;
+	int ret = 0;
+	long addr;
 
 	if (prog->type != BPF_PROG_TYPE_TRACING)
 		return 0;
@@ -9432,6 +9435,45 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 		prog->aux->attach_func_proto = t;
 		prog->aux->attach_btf_trace = true;
 		return 0;
+	case BPF_TRACE_FENTRY:
+	case BPF_TRACE_FEXIT:
+		if (!btf_type_is_func(t)) {
+			verbose(env, "attach_btf_id %u is not a function\n",
+				btf_id);
+			return -EINVAL;
+		}
+		t = btf_type_by_id(btf_vmlinux, t->type);
+		if (!btf_type_is_func_proto(t))
+			return -EINVAL;
+		tr = bpf_trampoline_lookup(btf_id);
+		if (!tr)
+			return -ENOMEM;
+		prog->aux->attach_func_name = tname;
+		prog->aux->attach_func_proto = t;
+		mutex_lock(&tr->mutex);
+		if (tr->func.addr) {
+			prog->aux->trampoline = tr;
+			goto out;
+		}
+		ret = btf_distill_func_proto(&env->log, btf_vmlinux, t,
+					     tname, &tr->func.model);
+		if (ret < 0)
+			goto out;
+		addr = kallsyms_lookup_name(tname);
+		if (!addr) {
+			verbose(env,
+				"The address of function %s cannot be found\n",
+				tname);
+			ret = -ENOENT;
+			goto out;
+		}
+		tr->func.addr = (void *)addr;
+		prog->aux->trampoline = tr;
+out:
+		mutex_unlock(&tr->mutex);
+		if (ret)
+			bpf_trampoline_put(tr);
+		return ret;
 	default:
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 9fd4a39dc7fe734d26eb89ea97e8c91331c6378c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 14 Nov 2019 10:57:13 -0800
Subject: bpf: Reserve space for BPF trampoline in BPF programs

BPF trampoline can be made to work with existing 5 bytes of BPF program
prologue, but let's add 5 bytes of NOPs to the beginning of every JITed BPF
program to make BPF trampoline job easier. They can be removed in the future.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20191114185720.1641606-14-ast@kernel.org
---
 arch/x86/net/bpf_jit_comp.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index be2b43a894f6..c06096df9118 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -206,7 +206,7 @@ struct jit_context {
 /* number of bytes emit_call() needs to generate call instruction */
 #define X86_CALL_SIZE		5
 
-#define PROLOGUE_SIZE		20
+#define PROLOGUE_SIZE		25
 
 /*
  * Emit x86-64 prologue code for BPF program and check its size.
@@ -215,8 +215,13 @@ struct jit_context {
 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
+	int cnt = X86_CALL_SIZE;
 
+	/* BPF trampoline can be made to work without these nops,
+	 * but let's waste 5 bytes for now and optimize later
+	 */
+	memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
+	prog += cnt;
 	EMIT1(0x55);             /* push rbp */
 	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
 	/* sub rsp, rounded_stack_depth */
-- 
cgit v1.2.3


From 5b92a28aae4dd0f88778d540ecfdcdaec5a41723 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 14 Nov 2019 10:57:17 -0800
Subject: bpf: Support attaching tracing BPF program to other BPF programs

Allow FENTRY/FEXIT BPF programs to attach to other BPF programs of any type
including their subprograms. This feature allows snooping on input and output
packets in XDP, TC programs including their return values. In order to do that
the verifier needs to track types not only of vmlinux, but types of other BPF
programs as well. The verifier also needs to translate uapi/linux/bpf.h types
used by networking programs into kernel internal BTF types used by FENTRY/FEXIT
BPF programs. In some cases LLVM optimizations can remove arguments from BPF
subprograms without adjusting BTF info that LLVM backend knows. When BTF info
disagrees with actual types that the verifiers sees the BPF trampoline has to
fallback to conservative and treat all arguments as u64. The FENTRY/FEXIT
program can still attach to such subprograms, but it won't be able to recognize
pointer types like 'struct sk_buff *' and it won't be able to pass them to
bpf_skb_output() for dumping packets to user space. The FENTRY/FEXIT program
would need to use bpf_probe_read_kernel() instead.

The BPF_PROG_LOAD command is extended with attach_prog_fd field. When it's set
to zero the attach_btf_id is one vmlinux BTF type ids. When attach_prog_fd
points to previously loaded BPF program the attach_btf_id is BTF type id of
main function or one of its subprograms.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20191114185720.1641606-18-ast@kernel.org
---
 arch/x86/net/bpf_jit_comp.c |  3 +-
 include/linux/bpf.h         |  1 +
 include/linux/btf.h         |  1 +
 include/uapi/linux/bpf.h    |  1 +
 kernel/bpf/btf.c            | 70 +++++++++++++++++++++++++++++++++-----
 kernel/bpf/core.c           |  2 ++
 kernel/bpf/syscall.c        | 19 ++++++++---
 kernel/bpf/verifier.c       | 83 +++++++++++++++++++++++++++++++++++++--------
 8 files changed, 152 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c06096df9118..2e586f579945 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -504,7 +504,8 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 	u8 *prog;
 	int ret;
 
-	if (!is_kernel_text((long)ip))
+	if (!is_kernel_text((long)ip) &&
+	    !is_bpf_text_address((long)ip))
 		/* BPF trampoline in modules is not supported */
 		return -EINVAL;
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c70bf04726b4..5b81cde47314 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -495,6 +495,7 @@ struct bpf_prog_aux {
 	u32 func_cnt; /* used by non-func prog as the number of func progs */
 	u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
 	u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+	struct bpf_prog *linked_prog;
 	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9dee00859c5f..79d4abc2556a 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -88,6 +88,7 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
 struct btf *btf_parse_vmlinux(void);
+struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
 #else
 static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
 						    u32 type_id)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 69c200e6e696..4842a134b202 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -425,6 +425,7 @@ union bpf_attr {
 		__aligned_u64	line_info;	/* line info */
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
 		__u32		attach_btf_id;	/* in-kernel BTF type id to attach to */
+		__u32		attach_prog_fd; /* 0 to attach to vmlinux */
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4620267b186e..40efde5eedcb 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3530,6 +3530,20 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
 	return ctx_type;
 }
 
+static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
+				     struct btf *btf,
+				     const struct btf_type *t,
+				     enum bpf_prog_type prog_type)
+{
+	const struct btf_member *prog_ctx_type, *kern_ctx_type;
+
+	prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type);
+	if (!prog_ctx_type)
+		return -ENOENT;
+	kern_ctx_type = prog_ctx_type + 1;
+	return kern_ctx_type->type;
+}
+
 struct btf *btf_parse_vmlinux(void)
 {
 	struct btf_verifier_env *env = NULL;
@@ -3602,15 +3616,29 @@ errout:
 	return ERR_PTR(err);
 }
 
+struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
+{
+	struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+
+	if (tgt_prog) {
+		return tgt_prog->aux->btf;
+	} else {
+		return btf_vmlinux;
+	}
+}
+
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info)
 {
 	const struct btf_type *t = prog->aux->attach_func_proto;
+	struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+	struct btf *btf = bpf_prog_get_target_btf(prog);
 	const char *tname = prog->aux->attach_func_name;
 	struct bpf_verifier_log *log = info->log;
 	const struct btf_param *args;
 	u32 nr_args, arg;
+	int ret;
 
 	if (off % 8) {
 		bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
@@ -3619,7 +3647,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	}
 	arg = off / 8;
 	args = (const struct btf_param *)(t + 1);
-	nr_args = btf_type_vlen(t);
+	/* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
+	nr_args = t ? btf_type_vlen(t) : 5;
 	if (prog->aux->attach_btf_trace) {
 		/* skip first 'void *__data' argument in btf_trace_##name typedef */
 		args++;
@@ -3628,18 +3657,24 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 
 	if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
 	    arg == nr_args) {
+		if (!t)
+			/* Default prog with 5 args. 6th arg is retval. */
+			return true;
 		/* function return type */
-		t = btf_type_by_id(btf_vmlinux, t->type);
+		t = btf_type_by_id(btf, t->type);
 	} else if (arg >= nr_args) {
 		bpf_log(log, "func '%s' doesn't have %d-th argument\n",
 			tname, arg + 1);
 		return false;
 	} else {
-		t = btf_type_by_id(btf_vmlinux, args[arg].type);
+		if (!t)
+			/* Default prog with 5 args */
+			return true;
+		t = btf_type_by_id(btf, args[arg].type);
 	}
 	/* skip modifiers */
 	while (btf_type_is_modifier(t))
-		t = btf_type_by_id(btf_vmlinux, t->type);
+		t = btf_type_by_id(btf, t->type);
 	if (btf_type_is_int(t))
 		/* accessing a scalar */
 		return true;
@@ -3647,7 +3682,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		bpf_log(log,
 			"func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
 			tname, arg,
-			__btf_name_by_offset(btf_vmlinux, t->name_off),
+			__btf_name_by_offset(btf, t->name_off),
 			btf_kind_str[BTF_INFO_KIND(t->info)]);
 		return false;
 	}
@@ -3662,10 +3697,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	info->reg_type = PTR_TO_BTF_ID;
 	info->btf_id = t->type;
 
-	t = btf_type_by_id(btf_vmlinux, t->type);
+	if (tgt_prog) {
+		ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type);
+		if (ret > 0) {
+			info->btf_id = ret;
+			return true;
+		} else {
+			return false;
+		}
+	}
+	t = btf_type_by_id(btf, t->type);
 	/* skip modifiers */
 	while (btf_type_is_modifier(t))
-		t = btf_type_by_id(btf_vmlinux, t->type);
+		t = btf_type_by_id(btf, t->type);
 	if (!btf_type_is_struct(t)) {
 		bpf_log(log,
 			"func '%s' arg%d type %s is not a struct\n",
@@ -3674,7 +3718,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	}
 	bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
 		tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
-		__btf_name_by_offset(btf_vmlinux, t->name_off));
+		__btf_name_by_offset(btf, t->name_off));
 	return true;
 }
 
@@ -3954,6 +3998,16 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 	u32 i, nargs;
 	int ret;
 
+	if (!func) {
+		/* BTF function prototype doesn't match the verifier types.
+		 * Fall back to 5 u64 args.
+		 */
+		for (i = 0; i < 5; i++)
+			m->arg_size[i] = 8;
+		m->ret_size = 8;
+		m->nr_args = 5;
+		return 0;
+	}
 	args = (const struct btf_param *)(func + 1);
 	nargs = btf_type_vlen(func);
 	if (nargs >= MAX_BPF_FUNC_ARGS) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index da5a8b8e278f..b5945c3aaa8e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2031,6 +2031,8 @@ void bpf_prog_free(struct bpf_prog *fp)
 {
 	struct bpf_prog_aux *aux = fp->aux;
 
+	if (aux->linked_prog)
+		bpf_prog_put(aux->linked_prog);
 	INIT_WORK(&aux->work, bpf_prog_free_deferred);
 	schedule_work(&aux->work);
 }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 43ba647de720..c88c815c2154 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1577,7 +1577,7 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
 static int
 bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 			   enum bpf_attach_type expected_attach_type,
-			   u32 btf_id)
+			   u32 btf_id, u32 prog_fd)
 {
 	switch (prog_type) {
 	case BPF_PROG_TYPE_TRACING:
@@ -1585,7 +1585,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 			return -EINVAL;
 		break;
 	default:
-		if (btf_id)
+		if (btf_id || prog_fd)
 			return -EINVAL;
 		break;
 	}
@@ -1636,7 +1636,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD attach_btf_id
+#define	BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
 
 static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 {
@@ -1679,7 +1679,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 
 	bpf_prog_load_fixup_attach_type(attr);
 	if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
-				       attr->attach_btf_id))
+				       attr->attach_btf_id,
+				       attr->attach_prog_fd))
 		return -EINVAL;
 
 	/* plain bpf_prog allocation */
@@ -1689,6 +1690,16 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 
 	prog->expected_attach_type = attr->expected_attach_type;
 	prog->aux->attach_btf_id = attr->attach_btf_id;
+	if (attr->attach_prog_fd) {
+		struct bpf_prog *tgt_prog;
+
+		tgt_prog = bpf_prog_get(attr->attach_prog_fd);
+		if (IS_ERR(tgt_prog)) {
+			err = PTR_ERR(tgt_prog);
+			goto free_prog_nouncharge;
+		}
+		prog->aux->linked_prog = tgt_prog;
+	}
 
 	prog->aux->offload_requested = !!attr->prog_ifindex;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 11910149ca2f..e9dc95a18d44 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9390,13 +9390,17 @@ static void print_verification_stats(struct bpf_verifier_env *env)
 static int check_attach_btf_id(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog;
+	struct bpf_prog *tgt_prog = prog->aux->linked_prog;
 	u32 btf_id = prog->aux->attach_btf_id;
 	const char prefix[] = "btf_trace_";
+	int ret = 0, subprog = -1, i;
 	struct bpf_trampoline *tr;
 	const struct btf_type *t;
+	bool conservative = true;
 	const char *tname;
-	int ret = 0;
+	struct btf *btf;
 	long addr;
+	u64 key;
 
 	if (prog->type != BPF_PROG_TYPE_TRACING)
 		return 0;
@@ -9405,19 +9409,47 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 		verbose(env, "Tracing programs must provide btf_id\n");
 		return -EINVAL;
 	}
-	t = btf_type_by_id(btf_vmlinux, btf_id);
+	btf = bpf_prog_get_target_btf(prog);
+	if (!btf) {
+		verbose(env,
+			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
+		return -EINVAL;
+	}
+	t = btf_type_by_id(btf, btf_id);
 	if (!t) {
 		verbose(env, "attach_btf_id %u is invalid\n", btf_id);
 		return -EINVAL;
 	}
-	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+	tname = btf_name_by_offset(btf, t->name_off);
 	if (!tname) {
 		verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
 		return -EINVAL;
 	}
+	if (tgt_prog) {
+		struct bpf_prog_aux *aux = tgt_prog->aux;
+
+		for (i = 0; i < aux->func_info_cnt; i++)
+			if (aux->func_info[i].type_id == btf_id) {
+				subprog = i;
+				break;
+			}
+		if (subprog == -1) {
+			verbose(env, "Subprog %s doesn't exist\n", tname);
+			return -EINVAL;
+		}
+		conservative = aux->func_info_aux[subprog].unreliable;
+		key = ((u64)aux->id) << 32 | btf_id;
+	} else {
+		key = btf_id;
+	}
 
 	switch (prog->expected_attach_type) {
 	case BPF_TRACE_RAW_TP:
+		if (tgt_prog) {
+			verbose(env,
+				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
+			return -EINVAL;
+		}
 		if (!btf_type_is_typedef(t)) {
 			verbose(env, "attach_btf_id %u is not a typedef\n",
 				btf_id);
@@ -9429,11 +9461,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 			return -EINVAL;
 		}
 		tname += sizeof(prefix) - 1;
-		t = btf_type_by_id(btf_vmlinux, t->type);
+		t = btf_type_by_id(btf, t->type);
 		if (!btf_type_is_ptr(t))
 			/* should never happen in valid vmlinux build */
 			return -EINVAL;
-		t = btf_type_by_id(btf_vmlinux, t->type);
+		t = btf_type_by_id(btf, t->type);
 		if (!btf_type_is_func_proto(t))
 			/* should never happen in valid vmlinux build */
 			return -EINVAL;
@@ -9452,30 +9484,51 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 				btf_id);
 			return -EINVAL;
 		}
-		t = btf_type_by_id(btf_vmlinux, t->type);
+		t = btf_type_by_id(btf, t->type);
 		if (!btf_type_is_func_proto(t))
 			return -EINVAL;
-		tr = bpf_trampoline_lookup(btf_id);
+		tr = bpf_trampoline_lookup(key);
 		if (!tr)
 			return -ENOMEM;
 		prog->aux->attach_func_name = tname;
+		/* t is either vmlinux type or another program's type */
 		prog->aux->attach_func_proto = t;
 		mutex_lock(&tr->mutex);
 		if (tr->func.addr) {
 			prog->aux->trampoline = tr;
 			goto out;
 		}
-		ret = btf_distill_func_proto(&env->log, btf_vmlinux, t,
+		if (tgt_prog && conservative) {
+			prog->aux->attach_func_proto = NULL;
+			t = NULL;
+		}
+		ret = btf_distill_func_proto(&env->log, btf, t,
 					     tname, &tr->func.model);
 		if (ret < 0)
 			goto out;
-		addr = kallsyms_lookup_name(tname);
-		if (!addr) {
-			verbose(env,
-				"The address of function %s cannot be found\n",
-				tname);
-			ret = -ENOENT;
-			goto out;
+		if (tgt_prog) {
+			if (!tgt_prog->jited) {
+				/* for now */
+				verbose(env, "Can trace only JITed BPF progs\n");
+				ret = -EINVAL;
+				goto out;
+			}
+			if (tgt_prog->type == BPF_PROG_TYPE_TRACING) {
+				/* prevent cycles */
+				verbose(env, "Cannot recursively attach\n");
+				ret = -EINVAL;
+				goto out;
+			}
+			addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
+		} else {
+			addr = kallsyms_lookup_name(tname);
+			if (!addr) {
+				verbose(env,
+					"The address of function %s cannot be found\n",
+					tname);
+				ret = -ENOENT;
+				goto out;
+			}
 		}
 		tr->func.addr = (void *)addr;
 		prog->aux->trampoline = tr;
-- 
cgit v1.2.3


From 5fb8ef25803ef33e2eb60b626435828b937bed75 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:08 +0100
Subject: crypto: chacha - move existing library code into lib/crypto

Currently, our generic ChaCha implementation consists of a permute
function in lib/chacha.c that operates on the 64-byte ChaCha state
directly [and which is always included into the core kernel since it
is used by the /dev/random driver], and the crypto API plumbing to
expose it as a skcipher.

In order to support in-kernel users that need the ChaCha streamcipher
but have no need [or tolerance] for going through the abstractions of
the crypto API, let's expose the streamcipher bits via a library API
as well, in a way that permits the implementation to be superseded by
an architecture specific one if provided.

So move the streamcipher code into a separate module in lib/crypto,
and expose the init() and crypt() routines to users of the library.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/chacha-neon-glue.c   |   2 +-
 arch/arm64/crypto/chacha-neon-glue.c |   2 +-
 arch/x86/crypto/chacha_glue.c        |   2 +-
 crypto/Kconfig                       |   1 +
 crypto/chacha_generic.c              |  60 ++----------------
 include/crypto/chacha.h              |  77 +++++++++++++++++------
 include/crypto/internal/chacha.h     |  53 ++++++++++++++++
 lib/Makefile                         |   3 +-
 lib/chacha.c                         | 113 ----------------------------------
 lib/crypto/Kconfig                   |  26 ++++++++
 lib/crypto/Makefile                  |   4 ++
 lib/crypto/chacha.c                  | 115 +++++++++++++++++++++++++++++++++++
 lib/crypto/libchacha.c               |  35 +++++++++++
 13 files changed, 303 insertions(+), 190 deletions(-)
 create mode 100644 include/crypto/internal/chacha.h
 delete mode 100644 lib/chacha.c
 create mode 100644 lib/crypto/chacha.c
 create mode 100644 lib/crypto/libchacha.c

(limited to 'arch')

diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
index a8e9b534c8da..26576772f18b 100644
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ b/arch/arm/crypto/chacha-neon-glue.c
@@ -20,7 +20,7 @@
  */
 
 #include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 1495d2b18518..d4cc61bfe79d 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -20,7 +20,7 @@
  */
 
 #include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 388f95a4ec24..bc62daa8dafd 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -7,7 +7,7 @@
  */
 
 #include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 9def945e9549..ae4495ae003f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1412,6 +1412,7 @@ config CRYPTO_SALSA20
 
 config CRYPTO_CHACHA20
 	tristate "ChaCha stream cipher algorithms"
+	select CRYPTO_LIB_CHACHA_GENERIC
 	select CRYPTO_SKCIPHER
 	help
 	  The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
index 085d8d219987..ebae6d9d9b32 100644
--- a/crypto/chacha_generic.c
+++ b/crypto/chacha_generic.c
@@ -8,29 +8,10 @@
 
 #include <asm/unaligned.h>
 #include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 
-static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
-			   unsigned int bytes, int nrounds)
-{
-	/* aligned to potentially speed up crypto_xor() */
-	u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
-
-	while (bytes >= CHACHA_BLOCK_SIZE) {
-		chacha_block(state, stream, nrounds);
-		crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
-		bytes -= CHACHA_BLOCK_SIZE;
-		dst += CHACHA_BLOCK_SIZE;
-		src += CHACHA_BLOCK_SIZE;
-	}
-	if (bytes) {
-		chacha_block(state, stream, nrounds);
-		crypto_xor_cpy(dst, src, stream, bytes);
-	}
-}
-
 static int chacha_stream_xor(struct skcipher_request *req,
 			     const struct chacha_ctx *ctx, const u8 *iv)
 {
@@ -48,8 +29,8 @@ static int chacha_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE);
 
-		chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
-			       nbytes, ctx->nrounds);
+		chacha_crypt_generic(state, walk.dst.virt.addr,
+				     walk.src.virt.addr, nbytes, ctx->nrounds);
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
@@ -58,41 +39,10 @@ static int chacha_stream_xor(struct skcipher_request *req,
 
 void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
 {
-	state[0]  = 0x61707865; /* "expa" */
-	state[1]  = 0x3320646e; /* "nd 3" */
-	state[2]  = 0x79622d32; /* "2-by" */
-	state[3]  = 0x6b206574; /* "te k" */
-	state[4]  = ctx->key[0];
-	state[5]  = ctx->key[1];
-	state[6]  = ctx->key[2];
-	state[7]  = ctx->key[3];
-	state[8]  = ctx->key[4];
-	state[9]  = ctx->key[5];
-	state[10] = ctx->key[6];
-	state[11] = ctx->key[7];
-	state[12] = get_unaligned_le32(iv +  0);
-	state[13] = get_unaligned_le32(iv +  4);
-	state[14] = get_unaligned_le32(iv +  8);
-	state[15] = get_unaligned_le32(iv + 12);
+	chacha_init_generic(state, ctx->key, iv);
 }
 EXPORT_SYMBOL_GPL(crypto_chacha_init);
 
-static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			 unsigned int keysize, int nrounds)
-{
-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int i;
-
-	if (keysize != CHACHA_KEY_SIZE)
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
-		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
-
-	ctx->nrounds = nrounds;
-	return 0;
-}
-
 int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keysize)
 {
@@ -126,7 +76,7 @@ int crypto_xchacha_crypt(struct skcipher_request *req)
 
 	/* Compute the subkey given the original key and first 128 nonce bits */
 	crypto_chacha_init(state, ctx, req->iv);
-	hchacha_block(state, subctx.key, ctx->nrounds);
+	hchacha_block_generic(state, subctx.key, ctx->nrounds);
 	subctx.nrounds = ctx->nrounds;
 
 	/* Build the real IV */
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
index d1e723c6a37d..5c662f8fecac 100644
--- a/include/crypto/chacha.h
+++ b/include/crypto/chacha.h
@@ -15,9 +15,8 @@
 #ifndef _CRYPTO_CHACHA_H
 #define _CRYPTO_CHACHA_H
 
-#include <crypto/skcipher.h>
+#include <asm/unaligned.h>
 #include <linux/types.h>
-#include <linux/crypto.h>
 
 /* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */
 #define CHACHA_IV_SIZE		16
@@ -29,26 +28,70 @@
 /* 192-bit nonce, then 64-bit stream position */
 #define XCHACHA_IV_SIZE		32
 
-struct chacha_ctx {
-	u32 key[8];
-	int nrounds;
-};
-
-void chacha_block(u32 *state, u8 *stream, int nrounds);
+void chacha_block_generic(u32 *state, u8 *stream, int nrounds);
 static inline void chacha20_block(u32 *state, u8 *stream)
 {
-	chacha_block(state, stream, 20);
+	chacha_block_generic(state, stream, 20);
 }
-void hchacha_block(const u32 *in, u32 *out, int nrounds);
 
-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
+void hchacha_block_arch(const u32 *state, u32 *out, int nrounds);
+void hchacha_block_generic(const u32 *state, u32 *out, int nrounds);
+
+static inline void hchacha_block(const u32 *state, u32 *out, int nrounds)
+{
+	if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
+		hchacha_block_arch(state, out, nrounds);
+	else
+		hchacha_block_generic(state, out, nrounds);
+}
 
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			   unsigned int keysize);
-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			   unsigned int keysize);
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv);
+static inline void chacha_init_generic(u32 *state, const u32 *key, const u8 *iv)
+{
+	state[0]  = 0x61707865; /* "expa" */
+	state[1]  = 0x3320646e; /* "nd 3" */
+	state[2]  = 0x79622d32; /* "2-by" */
+	state[3]  = 0x6b206574; /* "te k" */
+	state[4]  = key[0];
+	state[5]  = key[1];
+	state[6]  = key[2];
+	state[7]  = key[3];
+	state[8]  = key[4];
+	state[9]  = key[5];
+	state[10] = key[6];
+	state[11] = key[7];
+	state[12] = get_unaligned_le32(iv +  0);
+	state[13] = get_unaligned_le32(iv +  4);
+	state[14] = get_unaligned_le32(iv +  8);
+	state[15] = get_unaligned_le32(iv + 12);
+}
 
-int crypto_chacha_crypt(struct skcipher_request *req);
-int crypto_xchacha_crypt(struct skcipher_request *req);
+static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv)
+{
+	if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
+		chacha_init_arch(state, key, iv);
+	else
+		chacha_init_generic(state, key, iv);
+}
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+		       unsigned int bytes, int nrounds);
+void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
+			  unsigned int bytes, int nrounds);
+
+static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src,
+				unsigned int bytes, int nrounds)
+{
+	if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
+		chacha_crypt_arch(state, dst, src, bytes, nrounds);
+	else
+		chacha_crypt_generic(state, dst, src, bytes, nrounds);
+}
+
+static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src,
+				  unsigned int bytes)
+{
+	chacha_crypt(state, dst, src, bytes, 20);
+}
 
 #endif /* _CRYPTO_CHACHA_H */
diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h
new file mode 100644
index 000000000000..c0e40b245431
--- /dev/null
+++ b/include/crypto/internal/chacha.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _CRYPTO_INTERNAL_CHACHA_H
+#define _CRYPTO_INTERNAL_CHACHA_H
+
+#include <crypto/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/crypto.h>
+
+struct chacha_ctx {
+	u32 key[8];
+	int nrounds;
+};
+
+void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
+
+static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
+				unsigned int keysize, int nrounds)
+{
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int i;
+
+	if (keysize != CHACHA_KEY_SIZE)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+	ctx->nrounds = nrounds;
+	return 0;
+}
+
+static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+				  unsigned int keysize)
+{
+	return chacha_setkey(tfm, key, keysize, 20);
+}
+
+static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+				  unsigned int keysize)
+{
+	return chacha_setkey(tfm, key, keysize, 12);
+}
+
+int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keysize);
+int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keysize);
+
+int crypto_chacha_crypt(struct skcipher_request *req);
+int crypto_xchacha_crypt(struct skcipher_request *req);
+
+#endif /* _CRYPTO_CHACHA_H */
diff --git a/lib/Makefile b/lib/Makefile
index c5892807e06f..5af38fd5cc60 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,8 +26,7 @@ endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o timerqueue.o xarray.o \
-	 idr.o extable.o \
-	 sha1.o chacha.o irq_regs.o argv_split.o \
+	 idr.o extable.o sha1.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
diff --git a/lib/chacha.c b/lib/chacha.c
deleted file mode 100644
index c7c9826564d3..000000000000
--- a/lib/chacha.c
+++ /dev/null
@@ -1,113 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/bitops.h>
-#include <linux/cryptohash.h>
-#include <asm/unaligned.h>
-#include <crypto/chacha.h>
-
-static void chacha_permute(u32 *x, int nrounds)
-{
-	int i;
-
-	/* whitelist the allowed round counts */
-	WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
-
-	for (i = 0; i < nrounds; i += 2) {
-		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],  16);
-		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],  16);
-		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],  16);
-		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],  16);
-
-		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],  12);
-		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],  12);
-		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10], 12);
-		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11], 12);
-
-		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],   8);
-		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],   8);
-		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],   8);
-		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],   8);
-
-		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],   7);
-		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],   7);
-		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10],  7);
-		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11],  7);
-
-		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],  16);
-		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],  16);
-		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],  16);
-		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],  16);
-
-		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10], 12);
-		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11], 12);
-		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],  12);
-		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],  12);
-
-		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],   8);
-		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],   8);
-		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],   8);
-		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],   8);
-
-		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10],  7);
-		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11],  7);
-		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],   7);
-		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],   7);
-	}
-}
-
-/**
- * chacha_block - generate one keystream block and increment block counter
- * @state: input state matrix (16 32-bit words)
- * @stream: output keystream block (64 bytes)
- * @nrounds: number of rounds (20 or 12; 20 is recommended)
- *
- * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
- * The caller has already converted the endianness of the input.  This function
- * also handles incrementing the block counter in the input matrix.
- */
-void chacha_block(u32 *state, u8 *stream, int nrounds)
-{
-	u32 x[16];
-	int i;
-
-	memcpy(x, state, 64);
-
-	chacha_permute(x, nrounds);
-
-	for (i = 0; i < ARRAY_SIZE(x); i++)
-		put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
-
-	state[12]++;
-}
-EXPORT_SYMBOL(chacha_block);
-
-/**
- * hchacha_block - abbreviated ChaCha core, for XChaCha
- * @in: input state matrix (16 32-bit words)
- * @out: output (8 32-bit words)
- * @nrounds: number of rounds (20 or 12; 20 is recommended)
- *
- * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
- * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf).  HChaCha
- * skips the final addition of the initial state, and outputs only certain words
- * of the state.  It should not be used for streaming directly.
- */
-void hchacha_block(const u32 *in, u32 *out, int nrounds)
-{
-	u32 x[16];
-
-	memcpy(x, in, 64);
-
-	chacha_permute(x, nrounds);
-
-	memcpy(&out[0], &x[0], 16);
-	memcpy(&out[4], &x[12], 16);
-}
-EXPORT_SYMBOL(hchacha_block);
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 261430051595..6a11931ae105 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -8,6 +8,32 @@ config CRYPTO_LIB_AES
 config CRYPTO_LIB_ARC4
 	tristate
 
+config CRYPTO_ARCH_HAVE_LIB_CHACHA
+	tristate
+	help
+	  Declares whether the architecture provides an arch-specific
+	  accelerated implementation of the ChaCha library interface,
+	  either builtin or as a module.
+
+config CRYPTO_LIB_CHACHA_GENERIC
+	tristate
+	select CRYPTO_ALGAPI
+	help
+	  This symbol can be depended upon by arch implementations of the
+	  ChaCha library interface that require the generic code as a
+	  fallback, e.g., for SIMD implementations. If no arch specific
+	  implementation is enabled, this implementation serves the users
+	  of CRYPTO_LIB_CHACHA.
+
+config CRYPTO_LIB_CHACHA
+	tristate "ChaCha library interface"
+	depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
+	select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
+	help
+	  Enable the ChaCha library interface. This interface may be fulfilled
+	  by either the generic implementation or an arch-specific one, if one
+	  is available and enabled.
+
 config CRYPTO_LIB_DES
 	tristate
 
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 63de4cb3fcf8..0ce40604e104 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -1,5 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 
+# chacha is used by the /dev/random driver which is always builtin
+obj-y						+= chacha.o
+obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC)		+= libchacha.o
+
 obj-$(CONFIG_CRYPTO_LIB_AES)			+= libaes.o
 libaes-y					:= aes.o
 
diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c
new file mode 100644
index 000000000000..65ead6b0c7e0
--- /dev/null
+++ b/lib/crypto/chacha.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
+ *
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/string.h>
+#include <linux/cryptohash.h>
+#include <asm/unaligned.h>
+#include <crypto/chacha.h>
+
+static void chacha_permute(u32 *x, int nrounds)
+{
+	int i;
+
+	/* whitelist the allowed round counts */
+	WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
+
+	for (i = 0; i < nrounds; i += 2) {
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],  16);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],  16);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],  16);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],  16);
+
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],  12);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],  12);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10], 12);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11], 12);
+
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],   8);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],   8);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],   8);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],   8);
+
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],   7);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],   7);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10],  7);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11],  7);
+
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],  16);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],  16);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],  16);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],  16);
+
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10], 12);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11], 12);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],  12);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],  12);
+
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],   8);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],   8);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],   8);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],   8);
+
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10],  7);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11],  7);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],   7);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],   7);
+	}
+}
+
+/**
+ * chacha_block - generate one keystream block and increment block counter
+ * @state: input state matrix (16 32-bit words)
+ * @stream: output keystream block (64 bytes)
+ * @nrounds: number of rounds (20 or 12; 20 is recommended)
+ *
+ * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
+ * The caller has already converted the endianness of the input.  This function
+ * also handles incrementing the block counter in the input matrix.
+ */
+void chacha_block_generic(u32 *state, u8 *stream, int nrounds)
+{
+	u32 x[16];
+	int i;
+
+	memcpy(x, state, 64);
+
+	chacha_permute(x, nrounds);
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
+
+	state[12]++;
+}
+EXPORT_SYMBOL(chacha_block_generic);
+
+/**
+ * hchacha_block_generic - abbreviated ChaCha core, for XChaCha
+ * @state: input state matrix (16 32-bit words)
+ * @out: output (8 32-bit words)
+ * @nrounds: number of rounds (20 or 12; 20 is recommended)
+ *
+ * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
+ * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf).  HChaCha
+ * skips the final addition of the initial state, and outputs only certain words
+ * of the state.  It should not be used for streaming directly.
+ */
+void hchacha_block_generic(const u32 *state, u32 *stream, int nrounds)
+{
+	u32 x[16];
+
+	memcpy(x, state, 64);
+
+	chacha_permute(x, nrounds);
+
+	memcpy(&stream[0], &x[0], 16);
+	memcpy(&stream[4], &x[12], 16);
+}
+EXPORT_SYMBOL(hchacha_block_generic);
diff --git a/lib/crypto/libchacha.c b/lib/crypto/libchacha.c
new file mode 100644
index 000000000000..dabc3accae05
--- /dev/null
+++ b/lib/crypto/libchacha.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * The ChaCha stream cipher (RFC7539)
+ *
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/module.h>
+
+#include <crypto/algapi.h> // for crypto_xor_cpy
+#include <crypto/chacha.h>
+
+void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
+			  unsigned int bytes, int nrounds)
+{
+	/* aligned to potentially speed up crypto_xor() */
+	u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
+
+	while (bytes >= CHACHA_BLOCK_SIZE) {
+		chacha_block_generic(state, stream, nrounds);
+		crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
+		bytes -= CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
+		src += CHACHA_BLOCK_SIZE;
+	}
+	if (bytes) {
+		chacha_block_generic(state, stream, nrounds);
+		crypto_xor_cpy(dst, src, stream, bytes);
+	}
+}
+EXPORT_SYMBOL(chacha_crypt_generic);
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 28e8d89b1ce8d2e7badfb5f69971dd635acb8863 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:09 +0100
Subject: crypto: x86/chacha - depend on generic chacha library instead of
 crypto driver

In preparation of extending the x86 ChaCha driver to also expose the ChaCha
library interface, drop the dependency on the chacha_generic crypto driver
as a non-SIMD fallback, and depend on the generic ChaCha library directly.
This way, we only pull in the code we actually need, without registering
a set of ChaCha skciphers that we will never use.

Since turning the FPU on and off is cheap these days, simplify the SIMD
routine by dropping the per-page yield, which makes for a cleaner switch
to the library API as well. This also allows use to invoke the skcipher
walk routines in non-atomic mode.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/chacha_glue.c | 90 +++++++++++++++++--------------------------
 crypto/Kconfig                |  2 +-
 2 files changed, 36 insertions(+), 56 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index bc62daa8dafd..0aabb382edce 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -123,37 +123,38 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 	}
 }
 
-static int chacha_simd_stream_xor(struct skcipher_walk *walk,
+static int chacha_simd_stream_xor(struct skcipher_request *req,
 				  const struct chacha_ctx *ctx, const u8 *iv)
 {
 	u32 *state, state_buf[16 + 2] __aligned(8);
-	int next_yield = 4096; /* bytes until next FPU yield */
-	int err = 0;
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
 
 	BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
 	state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
 
-	crypto_chacha_init(state, ctx, iv);
+	chacha_init_generic(state, ctx->key, iv);
 
-	while (walk->nbytes > 0) {
-		unsigned int nbytes = walk->nbytes;
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
 
-		if (nbytes < walk->total) {
-			nbytes = round_down(nbytes, walk->stride);
-			next_yield -= nbytes;
-		}
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
 
-		chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
-			      nbytes, ctx->nrounds);
-
-		if (next_yield <= 0) {
-			/* temporarily allow preemption */
-			kernel_fpu_end();
+		if (!crypto_simd_usable()) {
+			chacha_crypt_generic(state, walk.dst.virt.addr,
+					     walk.src.virt.addr, nbytes,
+					     ctx->nrounds);
+		} else {
 			kernel_fpu_begin();
-			next_yield = 4096;
+			chacha_dosimd(state, walk.dst.virt.addr,
+				      walk.src.virt.addr, nbytes,
+				      ctx->nrounds);
+			kernel_fpu_end();
 		}
-
-		err = skcipher_walk_done(walk, walk->nbytes - nbytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
@@ -163,55 +164,34 @@ static int chacha_simd(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	int err;
 
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_chacha_crypt(req);
-
-	err = skcipher_walk_virt(&walk, req, true);
-	if (err)
-		return err;
-
-	kernel_fpu_begin();
-	err = chacha_simd_stream_xor(&walk, ctx, req->iv);
-	kernel_fpu_end();
-	return err;
+	return chacha_simd_stream_xor(req, ctx, req->iv);
 }
 
 static int xchacha_simd(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	struct chacha_ctx subctx;
 	u32 *state, state_buf[16 + 2] __aligned(8);
+	struct chacha_ctx subctx;
 	u8 real_iv[16];
-	int err;
-
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_xchacha_crypt(req);
-
-	err = skcipher_walk_virt(&walk, req, true);
-	if (err)
-		return err;
 
 	BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
 	state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
-	crypto_chacha_init(state, ctx, req->iv);
-
-	kernel_fpu_begin();
-
-	hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+	chacha_init_generic(state, ctx->key, req->iv);
+
+	if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
+		kernel_fpu_begin();
+		hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+		kernel_fpu_end();
+	} else {
+		hchacha_block_generic(state, subctx.key, ctx->nrounds);
+	}
 	subctx.nrounds = ctx->nrounds;
 
 	memcpy(&real_iv[0], req->iv + 24, 8);
 	memcpy(&real_iv[8], req->iv + 16, 8);
-	err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
-
-	kernel_fpu_end();
-
-	return err;
+	return chacha_simd_stream_xor(req, &subctx, real_iv);
 }
 
 static struct skcipher_alg algs[] = {
@@ -227,7 +207,7 @@ static struct skcipher_alg algs[] = {
 		.max_keysize		= CHACHA_KEY_SIZE,
 		.ivsize			= CHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= chacha_simd,
 		.decrypt		= chacha_simd,
 	}, {
@@ -242,7 +222,7 @@ static struct skcipher_alg algs[] = {
 		.max_keysize		= CHACHA_KEY_SIZE,
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= xchacha_simd,
 		.decrypt		= xchacha_simd,
 	}, {
@@ -257,7 +237,7 @@ static struct skcipher_alg algs[] = {
 		.max_keysize		= CHACHA_KEY_SIZE,
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha12_setkey,
+		.setkey			= chacha12_setkey,
 		.encrypt		= xchacha_simd,
 		.decrypt		= xchacha_simd,
 	},
diff --git a/crypto/Kconfig b/crypto/Kconfig
index ae4495ae003f..1abca30ed6ae 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1436,7 +1436,7 @@ config CRYPTO_CHACHA20_X86_64
 	tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)"
 	depends on X86 && 64BIT
 	select CRYPTO_SKCIPHER
-	select CRYPTO_CHACHA20
+	select CRYPTO_LIB_CHACHA_GENERIC
 	help
 	  SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
 	  XChaCha20, and XChaCha12 stream ciphers.
-- 
cgit v1.2.3


From 84e03fa39fbe95a5567d43bff458c6d3b3a23ad1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:10 +0100
Subject: crypto: x86/chacha - expose SIMD ChaCha routine as library function

Wire the existing x86 SIMD ChaCha code into the new ChaCha library
interface, so that users of the library interface will get the
accelerated version when available.

Given that calls into the library API will always go through the
routines in this module if it is enabled, switch to static keys
to select the optimal implementation available (which may be none
at all, in which case we defer to the generic implementation for
all invocations).

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/chacha_glue.c | 91 +++++++++++++++++++++++++++++++------------
 crypto/Kconfig                |  1 +
 include/crypto/chacha.h       |  6 +++
 3 files changed, 73 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 0aabb382edce..b391e13a9e41 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
 asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
 					unsigned int len, int nrounds);
 asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
-#ifdef CONFIG_AS_AVX2
+
 asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
 asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
 asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
-static bool chacha_use_avx2;
-#ifdef CONFIG_AS_AVX512
+
 asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
 asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
 asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
-static bool chacha_use_avx512vl;
-#endif
-#endif
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
 
 static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
 {
@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
 static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 			  unsigned int bytes, int nrounds)
 {
-#ifdef CONFIG_AS_AVX2
-#ifdef CONFIG_AS_AVX512
-	if (chacha_use_avx512vl) {
+	if (IS_ENABLED(CONFIG_AS_AVX512) &&
+	    static_branch_likely(&chacha_use_avx512vl)) {
 		while (bytes >= CHACHA_BLOCK_SIZE * 8) {
 			chacha_8block_xor_avx512vl(state, dst, src, bytes,
 						   nrounds);
@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 			return;
 		}
 	}
-#endif
-	if (chacha_use_avx2) {
+
+	if (IS_ENABLED(CONFIG_AS_AVX2) &&
+	    static_branch_likely(&chacha_use_avx2)) {
 		while (bytes >= CHACHA_BLOCK_SIZE * 8) {
 			chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
 			bytes -= CHACHA_BLOCK_SIZE * 8;
@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 			return;
 		}
 	}
-#endif
+
 	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 		chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
 		bytes -= CHACHA_BLOCK_SIZE * 4;
@@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 	}
 }
 
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+	if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
+		hchacha_block_generic(state, stream, nrounds);
+	} else {
+		kernel_fpu_begin();
+		hchacha_block_ssse3(state, stream, nrounds);
+		kernel_fpu_end();
+	}
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+		       int nrounds)
+{
+	state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+	if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
+	    bytes <= CHACHA_BLOCK_SIZE)
+		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+	kernel_fpu_begin();
+	chacha_dosimd(state, dst, src, bytes, nrounds);
+	kernel_fpu_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
 static int chacha_simd_stream_xor(struct skcipher_request *req,
 				  const struct chacha_ctx *ctx, const u8 *iv)
 {
@@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = round_down(nbytes, walk.stride);
 
-		if (!crypto_simd_usable()) {
+		if (!static_branch_likely(&chacha_use_simd) ||
+		    !crypto_simd_usable()) {
 			chacha_crypt_generic(state, walk.dst.virt.addr,
 					     walk.src.virt.addr, nbytes,
 					     ctx->nrounds);
@@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = {
 static int __init chacha_simd_mod_init(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_SSSE3))
-		return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
-	chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
-			  boot_cpu_has(X86_FEATURE_AVX2) &&
-			  cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#ifdef CONFIG_AS_AVX512
-	chacha_use_avx512vl = chacha_use_avx2 &&
-			      boot_cpu_has(X86_FEATURE_AVX512VL) &&
-			      boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
-#endif
-#endif
+		return 0;
+
+	static_branch_enable(&chacha_use_simd);
+
+	if (IS_ENABLED(CONFIG_AS_AVX2) &&
+	    boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_AVX2) &&
+	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
+		static_branch_enable(&chacha_use_avx2);
+
+		if (IS_ENABLED(CONFIG_AS_AVX512) &&
+		    boot_cpu_has(X86_FEATURE_AVX512VL) &&
+		    boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
+			static_branch_enable(&chacha_use_avx512vl);
+	}
 	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
 }
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 1abca30ed6ae..07762de1237f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1437,6 +1437,7 @@ config CRYPTO_CHACHA20_X86_64
 	depends on X86 && 64BIT
 	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
 	help
 	  SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
 	  XChaCha20, and XChaCha12 stream ciphers.
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
index 5c662f8fecac..2676f4fbd4c1 100644
--- a/include/crypto/chacha.h
+++ b/include/crypto/chacha.h
@@ -25,6 +25,12 @@
 #define CHACHA_BLOCK_SIZE	64
 #define CHACHAPOLY_IV_SIZE	12
 
+#ifdef CONFIG_X86_64
+#define CHACHA_STATE_WORDS	((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
+#else
+#define CHACHA_STATE_WORDS	(CHACHA_BLOCK_SIZE / sizeof(u32))
+#endif
+
 /* 192-bit nonce, then 64-bit stream position */
 #define XCHACHA_IV_SIZE		32
 
-- 
cgit v1.2.3


From c77da4867cbb7841177275dbb250f5c09679fae4 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:11 +0100
Subject: crypto: arm64/chacha - depend on generic chacha library instead of
 crypto driver

Depend on the generic ChaCha library routines instead of pulling in the
generic ChaCha skcipher driver, which is more than we need, and makes
managing the dependencies between the generic library, generic driver,
accelerated library and driver more complicated.

While at it, drop the logic to prefer the scalar code on short inputs.
Turning the NEON on and off is cheap these days, and one major use case
for ChaCha20 is ChaCha20-Poly1305, which is guaranteed to hit the scalar
path upon every invocation  (when doing the Poly1305 nonce generation)

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig            |  2 +-
 arch/arm64/crypto/chacha-neon-glue.c | 40 ++++++++++++++++++++----------------
 2 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 286e3514d34c..22c6642ae464 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -103,7 +103,7 @@ config CRYPTO_CHACHA20_NEON
 	tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
-	select CRYPTO_CHACHA20
+	select CRYPTO_LIB_CHACHA_GENERIC
 
 config CRYPTO_NHPOLY1305_NEON
 	tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index d4cc61bfe79d..cae2cb92eca8 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -68,7 +68,7 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
 
 	err = skcipher_walk_virt(&walk, req, false);
 
-	crypto_chacha_init(state, ctx, iv);
+	chacha_init_generic(state, ctx->key, iv);
 
 	while (walk.nbytes > 0) {
 		unsigned int nbytes = walk.nbytes;
@@ -76,10 +76,16 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = rounddown(nbytes, walk.stride);
 
-		kernel_neon_begin();
-		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
-			      nbytes, ctx->nrounds);
-		kernel_neon_end();
+		if (!crypto_simd_usable()) {
+			chacha_crypt_generic(state, walk.dst.virt.addr,
+					     walk.src.virt.addr, nbytes,
+					     ctx->nrounds);
+		} else {
+			kernel_neon_begin();
+			chacha_doneon(state, walk.dst.virt.addr,
+				      walk.src.virt.addr, nbytes, ctx->nrounds);
+			kernel_neon_end();
+		}
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
@@ -91,9 +97,6 @@ static int chacha_neon(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_chacha_crypt(req);
-
 	return chacha_neon_stream_xor(req, ctx, req->iv);
 }
 
@@ -105,14 +108,15 @@ static int xchacha_neon(struct skcipher_request *req)
 	u32 state[16];
 	u8 real_iv[16];
 
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_xchacha_crypt(req);
+	chacha_init_generic(state, ctx->key, req->iv);
 
-	crypto_chacha_init(state, ctx, req->iv);
-
-	kernel_neon_begin();
-	hchacha_block_neon(state, subctx.key, ctx->nrounds);
-	kernel_neon_end();
+	if (crypto_simd_usable()) {
+		kernel_neon_begin();
+		hchacha_block_neon(state, subctx.key, ctx->nrounds);
+		kernel_neon_end();
+	} else {
+		hchacha_block_generic(state, subctx.key, ctx->nrounds);
+	}
 	subctx.nrounds = ctx->nrounds;
 
 	memcpy(&real_iv[0], req->iv + 24, 8);
@@ -134,7 +138,7 @@ static struct skcipher_alg algs[] = {
 		.ivsize			= CHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= chacha_neon,
 		.decrypt		= chacha_neon,
 	}, {
@@ -150,7 +154,7 @@ static struct skcipher_alg algs[] = {
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
+		.setkey			= chacha20_setkey,
 		.encrypt		= xchacha_neon,
 		.decrypt		= xchacha_neon,
 	}, {
@@ -166,7 +170,7 @@ static struct skcipher_alg algs[] = {
 		.ivsize			= XCHACHA_IV_SIZE,
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha12_setkey,
+		.setkey			= chacha12_setkey,
 		.encrypt		= xchacha_neon,
 		.decrypt		= xchacha_neon,
 	}
-- 
cgit v1.2.3


From b3aad5bad26a01a4bd8c49a5c5f52aec665f3b7c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:12 +0100
Subject: crypto: arm64/chacha - expose arm64 ChaCha routine as library
 function

Expose the accelerated NEON ChaCha routine directly as a symbol
export so that users of the ChaCha library API can use it directly.

Given that calls into the library API will always go through the
routines in this module if it is enabled, switch to static keys
to select the optimal implementation available (which may be none
at all, in which case we defer to the generic implementation for
all invocations).

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig            |  1 +
 arch/arm64/crypto/chacha-neon-glue.c | 53 ++++++++++++++++++++++++++++--------
 2 files changed, 43 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 22c6642ae464..ffb827b84d6c 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -104,6 +104,7 @@ config CRYPTO_CHACHA20_NEON
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
 
 config CRYPTO_NHPOLY1305_NEON
 	tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index cae2cb92eca8..46cd4297761c 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -23,6 +23,7 @@
 #include <crypto/internal/chacha.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
@@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
 				       int nrounds, int bytes);
 asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
 
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
 static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
 			  int bytes, int nrounds)
 {
@@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
 	}
 }
 
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
+		hchacha_block_generic(state, stream, nrounds);
+	} else {
+		kernel_neon_begin();
+		hchacha_block_neon(state, stream, nrounds);
+		kernel_neon_end();
+	}
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+		       int nrounds)
+{
+	if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
+	    !crypto_simd_usable())
+		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+	kernel_neon_begin();
+	chacha_doneon(state, dst, src, bytes, nrounds);
+	kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
 static int chacha_neon_stream_xor(struct skcipher_request *req,
 				  const struct chacha_ctx *ctx, const u8 *iv)
 {
@@ -76,7 +110,8 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = rounddown(nbytes, walk.stride);
 
-		if (!crypto_simd_usable()) {
+		if (!static_branch_likely(&have_neon) ||
+		    !crypto_simd_usable()) {
 			chacha_crypt_generic(state, walk.dst.virt.addr,
 					     walk.src.virt.addr, nbytes,
 					     ctx->nrounds);
@@ -109,14 +144,7 @@ static int xchacha_neon(struct skcipher_request *req)
 	u8 real_iv[16];
 
 	chacha_init_generic(state, ctx->key, req->iv);
-
-	if (crypto_simd_usable()) {
-		kernel_neon_begin();
-		hchacha_block_neon(state, subctx.key, ctx->nrounds);
-		kernel_neon_end();
-	} else {
-		hchacha_block_generic(state, subctx.key, ctx->nrounds);
-	}
+	hchacha_block_arch(state, subctx.key, ctx->nrounds);
 	subctx.nrounds = ctx->nrounds;
 
 	memcpy(&real_iv[0], req->iv + 24, 8);
@@ -179,14 +207,17 @@ static struct skcipher_alg algs[] = {
 static int __init chacha_simd_mod_init(void)
 {
 	if (!cpu_have_named_feature(ASIMD))
-		return -ENODEV;
+		return 0;
+
+	static_branch_enable(&have_neon);
 
 	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
 }
 
 static void __exit chacha_simd_mod_fini(void)
 {
-	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+	if (cpu_have_named_feature(ASIMD))
+		crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 }
 
 module_init(chacha_simd_mod_init);
-- 
cgit v1.2.3


From 29621d099f9c642b22a69dc8e7e20c108473a392 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:13 +0100
Subject: crypto: arm/chacha - import Eric Biggers's scalar accelerated ChaCha
 code

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/chacha-scalar-core.S | 461 +++++++++++++++++++++++++++++++++++
 1 file changed, 461 insertions(+)
 create mode 100644 arch/arm/crypto/chacha-scalar-core.S

(limited to 'arch')

diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
new file mode 100644
index 000000000000..2140319b64a0
--- /dev/null
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Google, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Design notes:
+ *
+ * 16 registers would be needed to hold the state matrix, but only 14 are
+ * available because 'sp' and 'pc' cannot be used.  So we spill the elements
+ * (x8, x9) to the stack and swap them out with (x10, x11).  This adds one
+ * 'ldrd' and one 'strd' instruction per round.
+ *
+ * All rotates are performed using the implicit rotate operand accepted by the
+ * 'add' and 'eor' instructions.  This is faster than using explicit rotate
+ * instructions.  To make this work, we allow the values in the second and last
+ * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
+ * wrong rotation amount.  The rotation amount is then fixed up just in time
+ * when the values are used.  'brot' is the number of bits the values in row 'b'
+ * need to be rotated right to arrive at the correct values, and 'drot'
+ * similarly for row 'd'.  (brot, drot) start out as (0, 0) but we make it such
+ * that they end up as (25, 24) after every round.
+ */
+
+	// ChaCha state registers
+	X0	.req	r0
+	X1	.req	r1
+	X2	.req	r2
+	X3	.req	r3
+	X4	.req	r4
+	X5	.req	r5
+	X6	.req	r6
+	X7	.req	r7
+	X8_X10	.req	r8	// shared by x8 and x10
+	X9_X11	.req	r9	// shared by x9 and x11
+	X12	.req	r10
+	X13	.req	r11
+	X14	.req	r12
+	X15	.req	r14
+
+.Lexpand_32byte_k:
+	// "expand 32-byte k"
+	.word	0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+
+#ifdef __thumb2__
+#  define adrl adr
+#endif
+
+.macro __rev		out, in,  t0, t1, t2
+.if __LINUX_ARM_ARCH__ >= 6
+	rev		\out, \in
+.else
+	lsl		\t0, \in, #24
+	and		\t1, \in, #0xff00
+	and		\t2, \in, #0xff0000
+	orr		\out, \t0, \in, lsr #24
+	orr		\out, \out, \t1, lsl #8
+	orr		\out, \out, \t2, lsr #8
+.endif
+.endm
+
+.macro _le32_bswap	x,  t0, t1, t2
+#ifdef __ARMEB__
+	__rev		\x, \x,  \t0, \t1, \t2
+#endif
+.endm
+
+.macro _le32_bswap_4x	a, b, c, d,  t0, t1, t2
+	_le32_bswap	\a,  \t0, \t1, \t2
+	_le32_bswap	\b,  \t0, \t1, \t2
+	_le32_bswap	\c,  \t0, \t1, \t2
+	_le32_bswap	\d,  \t0, \t1, \t2
+.endm
+
+.macro __ldrd		a, b, src, offset
+#if __LINUX_ARM_ARCH__ >= 6
+	ldrd		\a, \b, [\src, #\offset]
+#else
+	ldr		\a, [\src, #\offset]
+	ldr		\b, [\src, #\offset + 4]
+#endif
+.endm
+
+.macro __strd		a, b, dst, offset
+#if __LINUX_ARM_ARCH__ >= 6
+	strd		\a, \b, [\dst, #\offset]
+#else
+	str		\a, [\dst, #\offset]
+	str		\b, [\dst, #\offset + 4]
+#endif
+.endm
+
+.macro _halfround	a1, b1, c1, d1,  a2, b2, c2, d2
+
+	// a += b; d ^= a; d = rol(d, 16);
+	add		\a1, \a1, \b1, ror #brot
+	add		\a2, \a2, \b2, ror #brot
+	eor		\d1, \a1, \d1, ror #drot
+	eor		\d2, \a2, \d2, ror #drot
+	// drot == 32 - 16 == 16
+
+	// c += d; b ^= c; b = rol(b, 12);
+	add		\c1, \c1, \d1, ror #16
+	add		\c2, \c2, \d2, ror #16
+	eor		\b1, \c1, \b1, ror #brot
+	eor		\b2, \c2, \b2, ror #brot
+	// brot == 32 - 12 == 20
+
+	// a += b; d ^= a; d = rol(d, 8);
+	add		\a1, \a1, \b1, ror #20
+	add		\a2, \a2, \b2, ror #20
+	eor		\d1, \a1, \d1, ror #16
+	eor		\d2, \a2, \d2, ror #16
+	// drot == 32 - 8 == 24
+
+	// c += d; b ^= c; b = rol(b, 7);
+	add		\c1, \c1, \d1, ror #24
+	add		\c2, \c2, \d2, ror #24
+	eor		\b1, \c1, \b1, ror #20
+	eor		\b2, \c2, \b2, ror #20
+	// brot == 32 - 7 == 25
+.endm
+
+.macro _doubleround
+
+	// column round
+
+	// quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
+	_halfround	X0, X4, X8_X10, X12,  X1, X5, X9_X11, X13
+
+	// save (x8, x9); restore (x10, x11)
+	__strd		X8_X10, X9_X11, sp, 0
+	__ldrd		X8_X10, X9_X11, sp, 8
+
+	// quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
+	_halfround	X2, X6, X8_X10, X14,  X3, X7, X9_X11, X15
+
+	.set brot, 25
+	.set drot, 24
+
+	// diagonal round
+
+	// quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
+	_halfround	X0, X5, X8_X10, X15,  X1, X6, X9_X11, X12
+
+	// save (x10, x11); restore (x8, x9)
+	__strd		X8_X10, X9_X11, sp, 8
+	__ldrd		X8_X10, X9_X11, sp, 0
+
+	// quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
+	_halfround	X2, X7, X8_X10, X13,  X3, X4, X9_X11, X14
+.endm
+
+.macro _chacha_permute	nrounds
+	.set brot, 0
+	.set drot, 0
+	.rept \nrounds / 2
+	 _doubleround
+	.endr
+.endm
+
+.macro _chacha		nrounds
+
+.Lnext_block\@:
+	// Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
+	// Registers contain x0-x9,x12-x15.
+
+	// Do the core ChaCha permutation to update x0-x15.
+	_chacha_permute	\nrounds
+
+	add		sp, #8
+	// Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
+	// Registers contain x0-x9,x12-x15.
+	// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+	// Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
+	push		{X8_X10, X9_X11, X12, X13, X14, X15}
+
+	// Load (OUT, IN, LEN).
+	ldr		r14, [sp, #96]
+	ldr		r12, [sp, #100]
+	ldr		r11, [sp, #104]
+
+	orr		r10, r14, r12
+
+	// Use slow path if fewer than 64 bytes remain.
+	cmp		r11, #64
+	blt		.Lxor_slowpath\@
+
+	// Use slow path if IN and/or OUT isn't 4-byte aligned.  Needed even on
+	// ARMv6+, since ldmia and stmia (used below) still require alignment.
+	tst		r10, #3
+	bne		.Lxor_slowpath\@
+
+	// Fast path: XOR 64 bytes of aligned data.
+
+	// Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+	// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
+	// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+	// x0-x3
+	__ldrd		r8, r9, sp, 32
+	__ldrd		r10, r11, sp, 40
+	add		X0, X0, r8
+	add		X1, X1, r9
+	add		X2, X2, r10
+	add		X3, X3, r11
+	_le32_bswap_4x	X0, X1, X2, X3,  r8, r9, r10
+	ldmia		r12!, {r8-r11}
+	eor		X0, X0, r8
+	eor		X1, X1, r9
+	eor		X2, X2, r10
+	eor		X3, X3, r11
+	stmia		r14!, {X0-X3}
+
+	// x4-x7
+	__ldrd		r8, r9, sp, 48
+	__ldrd		r10, r11, sp, 56
+	add		X4, r8, X4, ror #brot
+	add		X5, r9, X5, ror #brot
+	ldmia		r12!, {X0-X3}
+	add		X6, r10, X6, ror #brot
+	add		X7, r11, X7, ror #brot
+	_le32_bswap_4x	X4, X5, X6, X7,  r8, r9, r10
+	eor		X4, X4, X0
+	eor		X5, X5, X1
+	eor		X6, X6, X2
+	eor		X7, X7, X3
+	stmia		r14!, {X4-X7}
+
+	// x8-x15
+	pop		{r0-r7}			// (x8-x9,x12-x15,x10-x11)
+	__ldrd		r8, r9, sp, 32
+	__ldrd		r10, r11, sp, 40
+	add		r0, r0, r8		// x8
+	add		r1, r1, r9		// x9
+	add		r6, r6, r10		// x10
+	add		r7, r7, r11		// x11
+	_le32_bswap_4x	r0, r1, r6, r7,  r8, r9, r10
+	ldmia		r12!, {r8-r11}
+	eor		r0, r0, r8		// x8
+	eor		r1, r1, r9		// x9
+	eor		r6, r6, r10		// x10
+	eor		r7, r7, r11		// x11
+	stmia		r14!, {r0,r1,r6,r7}
+	ldmia		r12!, {r0,r1,r6,r7}
+	__ldrd		r8, r9, sp, 48
+	__ldrd		r10, r11, sp, 56
+	add		r2, r8, r2, ror #drot	// x12
+	add		r3, r9, r3, ror #drot	// x13
+	add		r4, r10, r4, ror #drot	// x14
+	add		r5, r11, r5, ror #drot	// x15
+	_le32_bswap_4x	r2, r3, r4, r5,  r9, r10, r11
+	  ldr		r9, [sp, #72]		// load LEN
+	eor		r2, r2, r0		// x12
+	eor		r3, r3, r1		// x13
+	eor		r4, r4, r6		// x14
+	eor		r5, r5, r7		// x15
+	  subs		r9, #64			// decrement and check LEN
+	stmia		r14!, {r2-r5}
+
+	beq		.Ldone\@
+
+.Lprepare_for_next_block\@:
+
+	// Stack: x0-x15 OUT IN LEN
+
+	// Increment block counter (x12)
+	add		r8, #1
+
+	// Store updated (OUT, IN, LEN)
+	str		r14, [sp, #64]
+	str		r12, [sp, #68]
+	str		r9, [sp, #72]
+
+	  mov		r14, sp
+
+	// Store updated block counter (x12)
+	str		r8, [sp, #48]
+
+	  sub		sp, #16
+
+	// Reload state and do next block
+	ldmia		r14!, {r0-r11}		// load x0-x11
+	__strd		r10, r11, sp, 8		// store x10-x11 before state
+	ldmia		r14, {r10-r12,r14}	// load x12-x15
+	b		.Lnext_block\@
+
+.Lxor_slowpath\@:
+	// Slow path: < 64 bytes remaining, or unaligned input or output buffer.
+	// We handle it by storing the 64 bytes of keystream to the stack, then
+	// XOR-ing the needed portion with the data.
+
+	// Allocate keystream buffer
+	sub		sp, #64
+	mov		r14, sp
+
+	// Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+	// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
+	// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+	// Save keystream for x0-x3
+	__ldrd		r8, r9, sp, 96
+	__ldrd		r10, r11, sp, 104
+	add		X0, X0, r8
+	add		X1, X1, r9
+	add		X2, X2, r10
+	add		X3, X3, r11
+	_le32_bswap_4x	X0, X1, X2, X3,  r8, r9, r10
+	stmia		r14!, {X0-X3}
+
+	// Save keystream for x4-x7
+	__ldrd		r8, r9, sp, 112
+	__ldrd		r10, r11, sp, 120
+	add		X4, r8, X4, ror #brot
+	add		X5, r9, X5, ror #brot
+	add		X6, r10, X6, ror #brot
+	add		X7, r11, X7, ror #brot
+	_le32_bswap_4x	X4, X5, X6, X7,  r8, r9, r10
+	  add		r8, sp, #64
+	stmia		r14!, {X4-X7}
+
+	// Save keystream for x8-x15
+	ldm		r8, {r0-r7}		// (x8-x9,x12-x15,x10-x11)
+	__ldrd		r8, r9, sp, 128
+	__ldrd		r10, r11, sp, 136
+	add		r0, r0, r8		// x8
+	add		r1, r1, r9		// x9
+	add		r6, r6, r10		// x10
+	add		r7, r7, r11		// x11
+	_le32_bswap_4x	r0, r1, r6, r7,  r8, r9, r10
+	stmia		r14!, {r0,r1,r6,r7}
+	__ldrd		r8, r9, sp, 144
+	__ldrd		r10, r11, sp, 152
+	add		r2, r8, r2, ror #drot	// x12
+	add		r3, r9, r3, ror #drot	// x13
+	add		r4, r10, r4, ror #drot	// x14
+	add		r5, r11, r5, ror #drot	// x15
+	_le32_bswap_4x	r2, r3, r4, r5,  r9, r10, r11
+	stmia		r14, {r2-r5}
+
+	// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
+	// Registers: r8 is block counter, r12 is IN.
+
+	ldr		r9, [sp, #168]		// LEN
+	ldr		r14, [sp, #160]		// OUT
+	cmp		r9, #64
+	  mov		r0, sp
+	movle		r1, r9
+	movgt		r1, #64
+	// r1 is number of bytes to XOR, in range [1, 64]
+
+.if __LINUX_ARM_ARCH__ < 6
+	orr		r2, r12, r14
+	tst		r2, #3			// IN or OUT misaligned?
+	bne		.Lxor_next_byte\@
+.endif
+
+	// XOR a word at a time
+.rept 16
+	subs		r1, #4
+	blt		.Lxor_words_done\@
+	ldr		r2, [r12], #4
+	ldr		r3, [r0], #4
+	eor		r2, r2, r3
+	str		r2, [r14], #4
+.endr
+	b		.Lxor_slowpath_done\@
+.Lxor_words_done\@:
+	ands		r1, r1, #3
+	beq		.Lxor_slowpath_done\@
+
+	// XOR a byte at a time
+.Lxor_next_byte\@:
+	ldrb		r2, [r12], #1
+	ldrb		r3, [r0], #1
+	eor		r2, r2, r3
+	strb		r2, [r14], #1
+	subs		r1, #1
+	bne		.Lxor_next_byte\@
+
+.Lxor_slowpath_done\@:
+	subs		r9, #64
+	add		sp, #96
+	bgt		.Lprepare_for_next_block\@
+
+.Ldone\@:
+.endm	// _chacha
+
+/*
+ * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
+ *		     const u32 iv[4]);
+ */
+ENTRY(chacha20_arm)
+	cmp		r2, #0			// len == 0?
+	reteq		lr
+
+	push		{r0-r2,r4-r11,lr}
+
+	// Push state x0-x15 onto stack.
+	// Also store an extra copy of x10-x11 just before the state.
+
+	ldr		r4, [sp, #48]		// iv
+	mov		r0, sp
+	sub		sp, #80
+
+	// iv: x12-x15
+	ldm		r4, {X12,X13,X14,X15}
+	stmdb		r0!, {X12,X13,X14,X15}
+
+	// key: x4-x11
+	__ldrd		X8_X10, X9_X11, r3, 24
+	__strd		X8_X10, X9_X11, sp, 8
+	stmdb		r0!, {X8_X10, X9_X11}
+	ldm		r3, {X4-X9_X11}
+	stmdb		r0!, {X4-X9_X11}
+
+	// constants: x0-x3
+	adrl		X3, .Lexpand_32byte_k
+	ldm		X3, {X0-X3}
+	__strd		X0, X1, sp, 16
+	__strd		X2, X3, sp, 24
+
+	_chacha		20
+
+	add		sp, #76
+	pop		{r4-r11, pc}
+ENDPROC(chacha20_arm)
+
+/*
+ * void hchacha20_arm(const u32 state[16], u32 out[8]);
+ */
+ENTRY(hchacha20_arm)
+	push		{r1,r4-r11,lr}
+
+	mov		r14, r0
+	ldmia		r14!, {r0-r11}		// load x0-x11
+	push		{r10-r11}		// store x10-x11 to stack
+	ldm		r14, {r10-r12,r14}	// load x12-x15
+	sub		sp, #8
+
+	_chacha_permute	20
+
+	// Skip over (unused0-unused1, x10-x11)
+	add		sp, #16
+
+	// Fix up rotations of x12-x15
+	ror		X12, X12, #drot
+	ror		X13, X13, #drot
+	  pop		{r4}			// load 'out'
+	ror		X14, X14, #drot
+	ror		X15, X15, #drot
+
+	// Store (x0-x3,x12-x15) to 'out'
+	stm		r4, {X0,X1,X2,X3,X12,X13,X14,X15}
+
+	pop		{r4-r11,pc}
+ENDPROC(hchacha20_arm)
-- 
cgit v1.2.3


From b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:14 +0100
Subject: crypto: arm/chacha - remove dependency on generic ChaCha driver

Instead of falling back to the generic ChaCha skcipher driver for
non-SIMD cases, use a fast scalar implementation for ARM authored
by Eric Biggers. This removes the module dependency on chacha-generic
altogether, which also simplifies things when we expose the ChaCha
library interface from this module.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Kconfig              |   4 +-
 arch/arm/crypto/Makefile             |   3 +-
 arch/arm/crypto/chacha-glue.c        | 304 +++++++++++++++++++++++++++++++++++
 arch/arm/crypto/chacha-neon-glue.c   | 202 -----------------------
 arch/arm/crypto/chacha-scalar-core.S |  65 ++++----
 arch/arm64/crypto/chacha-neon-glue.c |   2 +-
 6 files changed, 340 insertions(+), 240 deletions(-)
 create mode 100644 arch/arm/crypto/chacha-glue.c
 delete mode 100644 arch/arm/crypto/chacha-neon-glue.c

(limited to 'arch')

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index c618c379449f..43452009ebd4 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -128,10 +128,8 @@ config CRYPTO_CRC32_ARM_CE
 	select CRYPTO_HASH
 
 config CRYPTO_CHACHA20_NEON
-	tristate "NEON accelerated ChaCha stream cipher algorithms"
-	depends on KERNEL_MODE_NEON
+	tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
 	select CRYPTO_SKCIPHER
-	select CRYPTO_CHACHA20
 
 config CRYPTO_NHPOLY1305_NEON
 	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index c0d36771a693..0e550badf8ed 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -33,7 +33,8 @@ aes-arm-ce-y	:= aes-ce-core.o aes-ce-glue.o
 ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+chacha-neon-y := chacha-scalar-core.o chacha-glue.o
+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
 
 ifdef REGENERATE_ARM_CRYPTO
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
new file mode 100644
index 000000000000..eb40efb3eb34
--- /dev/null
+++ b/arch/arm/crypto/chacha-glue.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cputype.h>
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+				      int nrounds);
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+				       int nrounds);
+asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+
+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+			     const u32 *state, int nrounds);
+
+static inline bool neon_usable(void)
+{
+	return crypto_simd_usable();
+}
+
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+			  unsigned int bytes, int nrounds)
+{
+	u8 buf[CHACHA_BLOCK_SIZE];
+
+	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+		chacha_4block_xor_neon(state, dst, src, nrounds);
+		bytes -= CHACHA_BLOCK_SIZE * 4;
+		src += CHACHA_BLOCK_SIZE * 4;
+		dst += CHACHA_BLOCK_SIZE * 4;
+		state[12] += 4;
+	}
+	while (bytes >= CHACHA_BLOCK_SIZE) {
+		chacha_block_xor_neon(state, dst, src, nrounds);
+		bytes -= CHACHA_BLOCK_SIZE;
+		src += CHACHA_BLOCK_SIZE;
+		dst += CHACHA_BLOCK_SIZE;
+		state[12]++;
+	}
+	if (bytes) {
+		memcpy(buf, src, bytes);
+		chacha_block_xor_neon(state, buf, buf, nrounds);
+		memcpy(dst, buf, bytes);
+	}
+}
+
+static int chacha_stream_xor(struct skcipher_request *req,
+			     const struct chacha_ctx *ctx, const u8 *iv,
+			     bool neon)
+{
+	struct skcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	chacha_init_generic(state, ctx->key, iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		if (!neon) {
+			chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
+				     nbytes, state, ctx->nrounds);
+			state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
+		} else {
+			kernel_neon_begin();
+			chacha_doneon(state, walk.dst.virt.addr,
+				      walk.src.virt.addr, nbytes, ctx->nrounds);
+			kernel_neon_end();
+		}
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+static int do_chacha(struct skcipher_request *req, bool neon)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return chacha_stream_xor(req, ctx, req->iv, neon);
+}
+
+static int chacha_arm(struct skcipher_request *req)
+{
+	return do_chacha(req, false);
+}
+
+static int chacha_neon(struct skcipher_request *req)
+{
+	return do_chacha(req, neon_usable());
+}
+
+static int do_xchacha(struct skcipher_request *req, bool neon)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	chacha_init_generic(state, ctx->key, req->iv);
+
+	if (!neon) {
+		hchacha_block_arm(state, subctx.key, ctx->nrounds);
+	} else {
+		kernel_neon_begin();
+		hchacha_block_neon(state, subctx.key, ctx->nrounds);
+		kernel_neon_end();
+	}
+	subctx.nrounds = ctx->nrounds;
+
+	memcpy(&real_iv[0], req->iv + 24, 8);
+	memcpy(&real_iv[8], req->iv + 16, 8);
+	return chacha_stream_xor(req, &subctx, real_iv, neon);
+}
+
+static int xchacha_arm(struct skcipher_request *req)
+{
+	return do_xchacha(req, false);
+}
+
+static int xchacha_neon(struct skcipher_request *req)
+{
+	return do_xchacha(req, neon_usable());
+}
+
+static struct skcipher_alg arm_algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-arm",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha_arm,
+		.decrypt		= chacha_arm,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-arm",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= xchacha_arm,
+		.decrypt		= xchacha_arm,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-arm",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha12_setkey,
+		.encrypt		= xchacha_arm,
+		.decrypt		= xchacha_arm,
+	},
+};
+
+static struct skcipher_alg neon_algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha_neon,
+		.decrypt		= chacha_neon,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= xchacha_neon,
+		.decrypt		= xchacha_neon,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.walksize		= 4 * CHACHA_BLOCK_SIZE,
+		.setkey			= chacha12_setkey,
+		.encrypt		= xchacha_neon,
+		.decrypt		= xchacha_neon,
+	}
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+	int err;
+
+	err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+	if (err)
+		return err;
+
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
+		int i;
+
+		switch (read_cpuid_part()) {
+		case ARM_CPU_PART_CORTEX_A7:
+		case ARM_CPU_PART_CORTEX_A5:
+			/*
+			 * The Cortex-A7 and Cortex-A5 do not perform well with
+			 * the NEON implementation but do incredibly with the
+			 * scalar one and use less power.
+			 */
+			for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
+				neon_algs[i].base.cra_priority = 0;
+			break;
+		}
+
+		err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+		if (err)
+			crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+	}
+	return err;
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+	crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
+		crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-arm");
+#ifdef CONFIG_KERNEL_MODE_NEON
+MODULE_ALIAS_CRYPTO("chacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha12-neon");
+#endif
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
deleted file mode 100644
index 26576772f18b..000000000000
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
-				      int nrounds);
-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
-				       int nrounds);
-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
-
-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
-			  unsigned int bytes, int nrounds)
-{
-	u8 buf[CHACHA_BLOCK_SIZE];
-
-	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
-		chacha_4block_xor_neon(state, dst, src, nrounds);
-		bytes -= CHACHA_BLOCK_SIZE * 4;
-		src += CHACHA_BLOCK_SIZE * 4;
-		dst += CHACHA_BLOCK_SIZE * 4;
-		state[12] += 4;
-	}
-	while (bytes >= CHACHA_BLOCK_SIZE) {
-		chacha_block_xor_neon(state, dst, src, nrounds);
-		bytes -= CHACHA_BLOCK_SIZE;
-		src += CHACHA_BLOCK_SIZE;
-		dst += CHACHA_BLOCK_SIZE;
-		state[12]++;
-	}
-	if (bytes) {
-		memcpy(buf, src, bytes);
-		chacha_block_xor_neon(state, buf, buf, nrounds);
-		memcpy(dst, buf, bytes);
-	}
-}
-
-static int chacha_neon_stream_xor(struct skcipher_request *req,
-				  const struct chacha_ctx *ctx, const u8 *iv)
-{
-	struct skcipher_walk walk;
-	u32 state[16];
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, false);
-
-	crypto_chacha_init(state, ctx, iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		kernel_neon_begin();
-		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
-			      nbytes, ctx->nrounds);
-		kernel_neon_end();
-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-
-	return err;
-}
-
-static int chacha_neon(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_chacha_crypt(req);
-
-	return chacha_neon_stream_xor(req, ctx, req->iv);
-}
-
-static int xchacha_neon(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct chacha_ctx subctx;
-	u32 state[16];
-	u8 real_iv[16];
-
-	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
-		return crypto_xchacha_crypt(req);
-
-	crypto_chacha_init(state, ctx, req->iv);
-
-	kernel_neon_begin();
-	hchacha_block_neon(state, subctx.key, ctx->nrounds);
-	kernel_neon_end();
-	subctx.nrounds = ctx->nrounds;
-
-	memcpy(&real_iv[0], req->iv + 24, 8);
-	memcpy(&real_iv[8], req->iv + 16, 8);
-	return chacha_neon_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
-	{
-		.base.cra_name		= "chacha20",
-		.base.cra_driver_name	= "chacha20-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= 1,
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
-		.base.cra_module	= THIS_MODULE,
-
-		.min_keysize		= CHACHA_KEY_SIZE,
-		.max_keysize		= CHACHA_KEY_SIZE,
-		.ivsize			= CHACHA_IV_SIZE,
-		.chunksize		= CHACHA_BLOCK_SIZE,
-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
-		.encrypt		= chacha_neon,
-		.decrypt		= chacha_neon,
-	}, {
-		.base.cra_name		= "xchacha20",
-		.base.cra_driver_name	= "xchacha20-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= 1,
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
-		.base.cra_module	= THIS_MODULE,
-
-		.min_keysize		= CHACHA_KEY_SIZE,
-		.max_keysize		= CHACHA_KEY_SIZE,
-		.ivsize			= XCHACHA_IV_SIZE,
-		.chunksize		= CHACHA_BLOCK_SIZE,
-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha20_setkey,
-		.encrypt		= xchacha_neon,
-		.decrypt		= xchacha_neon,
-	}, {
-		.base.cra_name		= "xchacha12",
-		.base.cra_driver_name	= "xchacha12-neon",
-		.base.cra_priority	= 300,
-		.base.cra_blocksize	= 1,
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
-		.base.cra_module	= THIS_MODULE,
-
-		.min_keysize		= CHACHA_KEY_SIZE,
-		.max_keysize		= CHACHA_KEY_SIZE,
-		.ivsize			= XCHACHA_IV_SIZE,
-		.chunksize		= CHACHA_BLOCK_SIZE,
-		.walksize		= 4 * CHACHA_BLOCK_SIZE,
-		.setkey			= crypto_chacha12_setkey,
-		.encrypt		= xchacha_neon,
-		.decrypt		= xchacha_neon,
-	}
-};
-
-static int __init chacha_simd_mod_init(void)
-{
-	if (!(elf_hwcap & HWCAP_NEON))
-		return -ENODEV;
-
-	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit chacha_simd_mod_fini(void)
-{
-	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-module_init(chacha_simd_mod_init);
-module_exit(chacha_simd_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-neon");
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
index 2140319b64a0..2985b80a45b5 100644
--- a/arch/arm/crypto/chacha-scalar-core.S
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -41,14 +41,6 @@
 	X14	.req	r12
 	X15	.req	r14
 
-.Lexpand_32byte_k:
-	// "expand 32-byte k"
-	.word	0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
-
-#ifdef __thumb2__
-#  define adrl adr
-#endif
-
 .macro __rev		out, in,  t0, t1, t2
 .if __LINUX_ARM_ARCH__ >= 6
 	rev		\out, \in
@@ -391,61 +383,65 @@
 .endm	// _chacha
 
 /*
- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
- *		     const u32 iv[4]);
+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ *		     const u32 *state, int nrounds);
  */
-ENTRY(chacha20_arm)
+ENTRY(chacha_doarm)
 	cmp		r2, #0			// len == 0?
 	reteq		lr
 
+	ldr		ip, [sp]
+	cmp		ip, #12
+
 	push		{r0-r2,r4-r11,lr}
 
 	// Push state x0-x15 onto stack.
 	// Also store an extra copy of x10-x11 just before the state.
 
-	ldr		r4, [sp, #48]		// iv
-	mov		r0, sp
-	sub		sp, #80
-
-	// iv: x12-x15
-	ldm		r4, {X12,X13,X14,X15}
-	stmdb		r0!, {X12,X13,X14,X15}
+	add		X12, r3, #48
+	ldm		X12, {X12,X13,X14,X15}
+	push		{X12,X13,X14,X15}
+	sub		sp, sp, #64
 
-	// key: x4-x11
-	__ldrd		X8_X10, X9_X11, r3, 24
+	__ldrd		X8_X10, X9_X11, r3, 40
 	__strd		X8_X10, X9_X11, sp, 8
-	stmdb		r0!, {X8_X10, X9_X11}
-	ldm		r3, {X4-X9_X11}
-	stmdb		r0!, {X4-X9_X11}
-
-	// constants: x0-x3
-	adrl		X3, .Lexpand_32byte_k
-	ldm		X3, {X0-X3}
+	__strd		X8_X10, X9_X11, sp, 56
+	ldm		r3, {X0-X9_X11}
 	__strd		X0, X1, sp, 16
 	__strd		X2, X3, sp, 24
+	__strd		X4, X5, sp, 32
+	__strd		X6, X7, sp, 40
+	__strd		X8_X10, X9_X11, sp, 48
 
+	beq		1f
 	_chacha		20
 
-	add		sp, #76
+0:	add		sp, #76
 	pop		{r4-r11, pc}
-ENDPROC(chacha20_arm)
+
+1:	_chacha		12
+	b		0b
+ENDPROC(chacha_doarm)
 
 /*
- * void hchacha20_arm(const u32 state[16], u32 out[8]);
+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
  */
-ENTRY(hchacha20_arm)
+ENTRY(hchacha_block_arm)
 	push		{r1,r4-r11,lr}
 
+	cmp		r2, #12			// ChaCha12 ?
+
 	mov		r14, r0
 	ldmia		r14!, {r0-r11}		// load x0-x11
 	push		{r10-r11}		// store x10-x11 to stack
 	ldm		r14, {r10-r12,r14}	// load x12-x15
 	sub		sp, #8
 
+	beq		1f
 	_chacha_permute	20
 
 	// Skip over (unused0-unused1, x10-x11)
-	add		sp, #16
+0:	add		sp, #16
 
 	// Fix up rotations of x12-x15
 	ror		X12, X12, #drot
@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm)
 	stm		r4, {X0,X1,X2,X3,X12,X13,X14,X15}
 
 	pop		{r4-r11,pc}
-ENDPROC(hchacha20_arm)
+
+1:	_chacha_permute	12
+	b		0b
+ENDPROC(hchacha_block_arm)
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 46cd4297761c..b08029d7bde6 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -1,5 +1,5 @@
 /*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
  * including ChaCha20 (RFC7539)
  *
  * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
-- 
cgit v1.2.3


From a44a3430d71bad4ee56788a59fff099b291ea54c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:15 +0100
Subject: crypto: arm/chacha - expose ARM ChaCha routine as library function

Expose the accelerated NEON ChaCha routine directly as a symbol
export so that users of the ChaCha library API can use it directly.

Given that calls into the library API will always go through the
routines in this module if it is enabled, switch to static keys
to select the optimal implementation available (which may be none
at all, in which case we defer to the generic implementation for
all invocations).

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Kconfig       |  1 +
 arch/arm/crypto/chacha-glue.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 43452009ebd4..4d13b5201796 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -130,6 +130,7 @@ config CRYPTO_CRC32_ARM_CE
 config CRYPTO_CHACHA20_NEON
 	tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
 	select CRYPTO_SKCIPHER
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
 
 config CRYPTO_NHPOLY1305_NEON
 	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
index eb40efb3eb34..3f0c057aa050 100644
--- a/arch/arm/crypto/chacha-glue.c
+++ b/arch/arm/crypto/chacha-glue.c
@@ -11,6 +11,7 @@
 #include <crypto/internal/chacha.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
@@ -29,9 +30,11 @@ asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
 			     const u32 *state, int nrounds);
 
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
+
 static inline bool neon_usable(void)
 {
-	return crypto_simd_usable();
+	return static_branch_likely(&use_neon) && crypto_simd_usable();
 }
 
 static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
@@ -60,6 +63,40 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
 	}
 }
 
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
+		hchacha_block_arm(state, stream, nrounds);
+	} else {
+		kernel_neon_begin();
+		hchacha_block_neon(state, stream, nrounds);
+		kernel_neon_end();
+	}
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+		       int nrounds)
+{
+	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
+	    bytes <= CHACHA_BLOCK_SIZE) {
+		chacha_doarm(dst, src, bytes, state, nrounds);
+		state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
+		return;
+	}
+
+	kernel_neon_begin();
+	chacha_doneon(state, dst, src, bytes, nrounds);
+	kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
 static int chacha_stream_xor(struct skcipher_request *req,
 			     const struct chacha_ctx *ctx, const u8 *iv,
 			     bool neon)
@@ -269,6 +306,8 @@ static int __init chacha_simd_mod_init(void)
 			for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
 				neon_algs[i].base.cra_priority = 0;
 			break;
+		default:
+			static_branch_enable(&use_neon);
 		}
 
 		err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
-- 
cgit v1.2.3


From 49aa7c00eddf8d8f462b0256bd82e81762d7b0c6 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:16 +0100
Subject: crypto: mips/chacha - import 32r2 ChaCha code from Zinc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This imports the accelerated MIPS 32r2 ChaCha20 implementation from the
Zinc patch set.

Co-developed-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/mips/crypto/chacha-core.S | 424 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 424 insertions(+)
 create mode 100644 arch/mips/crypto/chacha-core.S

(limited to 'arch')

diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
new file mode 100644
index 000000000000..a81e02db95e7
--- /dev/null
+++ b/arch/mips/crypto/chacha-core.S
@@ -0,0 +1,424 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#define MASK_U32		0x3c
+#define CHACHA20_BLOCK_SIZE	64
+#define STACK_SIZE		32
+
+#define X0	$t0
+#define X1	$t1
+#define X2	$t2
+#define X3	$t3
+#define X4	$t4
+#define X5	$t5
+#define X6	$t6
+#define X7	$t7
+#define X8	$t8
+#define X9	$t9
+#define X10	$v1
+#define X11	$s6
+#define X12	$s5
+#define X13	$s4
+#define X14	$s3
+#define X15	$s2
+/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
+#define T0	$s1
+#define T1	$s0
+#define T(n)	T ## n
+#define X(n)	X ## n
+
+/* Input arguments */
+#define STATE		$a0
+#define OUT		$a1
+#define IN		$a2
+#define BYTES		$a3
+
+/* Output argument */
+/* NONCE[0] is kept in a register and not in memory.
+ * We don't want to touch original value in memory.
+ * Must be incremented every loop iteration.
+ */
+#define NONCE_0		$v0
+
+/* SAVED_X and SAVED_CA are set in the jump table.
+ * Use regs which are overwritten on exit else we don't leak clear data.
+ * They are used to handling the last bytes which are not multiple of 4.
+ */
+#define SAVED_X		X15
+#define SAVED_CA	$s7
+
+#define IS_UNALIGNED	$s7
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define MSB 0
+#define LSB 3
+#define ROTx rotl
+#define ROTR(n) rotr n, 24
+#define	CPU_TO_LE32(n) \
+	wsbh	n; \
+	rotr	n, 16;
+#else
+#define MSB 3
+#define LSB 0
+#define ROTx rotr
+#define CPU_TO_LE32(n)
+#define ROTR(n)
+#endif
+
+#define FOR_EACH_WORD(x) \
+	x( 0); \
+	x( 1); \
+	x( 2); \
+	x( 3); \
+	x( 4); \
+	x( 5); \
+	x( 6); \
+	x( 7); \
+	x( 8); \
+	x( 9); \
+	x(10); \
+	x(11); \
+	x(12); \
+	x(13); \
+	x(14); \
+	x(15);
+
+#define FOR_EACH_WORD_REV(x) \
+	x(15); \
+	x(14); \
+	x(13); \
+	x(12); \
+	x(11); \
+	x(10); \
+	x( 9); \
+	x( 8); \
+	x( 7); \
+	x( 6); \
+	x( 5); \
+	x( 4); \
+	x( 3); \
+	x( 2); \
+	x( 1); \
+	x( 0);
+
+#define PLUS_ONE_0	 1
+#define PLUS_ONE_1	 2
+#define PLUS_ONE_2	 3
+#define PLUS_ONE_3	 4
+#define PLUS_ONE_4	 5
+#define PLUS_ONE_5	 6
+#define PLUS_ONE_6	 7
+#define PLUS_ONE_7	 8
+#define PLUS_ONE_8	 9
+#define PLUS_ONE_9	10
+#define PLUS_ONE_10	11
+#define PLUS_ONE_11	12
+#define PLUS_ONE_12	13
+#define PLUS_ONE_13	14
+#define PLUS_ONE_14	15
+#define PLUS_ONE_15	16
+#define PLUS_ONE(x)	PLUS_ONE_ ## x
+#define _CONCAT3(a,b,c)	a ## b ## c
+#define CONCAT3(a,b,c)	_CONCAT3(a,b,c)
+
+#define STORE_UNALIGNED(x) \
+CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
+	.if (x != 12); \
+		lw	T0, (x*4)(STATE); \
+	.endif; \
+	lwl	T1, (x*4)+MSB ## (IN); \
+	lwr	T1, (x*4)+LSB ## (IN); \
+	.if (x == 12); \
+		addu	X ## x, NONCE_0; \
+	.else; \
+		addu	X ## x, T0; \
+	.endif; \
+	CPU_TO_LE32(X ## x); \
+	xor	X ## x, T1; \
+	swl	X ## x, (x*4)+MSB ## (OUT); \
+	swr	X ## x, (x*4)+LSB ## (OUT);
+
+#define STORE_ALIGNED(x) \
+CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
+	.if (x != 12); \
+		lw	T0, (x*4)(STATE); \
+	.endif; \
+	lw	T1, (x*4) ## (IN); \
+	.if (x == 12); \
+		addu	X ## x, NONCE_0; \
+	.else; \
+		addu	X ## x, T0; \
+	.endif; \
+	CPU_TO_LE32(X ## x); \
+	xor	X ## x, T1; \
+	sw	X ## x, (x*4) ## (OUT);
+
+/* Jump table macro.
+ * Used for setup and handling the last bytes, which are not multiple of 4.
+ * X15 is free to store Xn
+ * Every jumptable entry must be equal in size.
+ */
+#define JMPTBL_ALIGNED(x) \
+.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
+	.set	noreorder; \
+	b	.Lchacha20_mips_xor_aligned_ ## x ## _b; \
+	.if (x == 12); \
+		addu	SAVED_X, X ## x, NONCE_0; \
+	.else; \
+		addu	SAVED_X, X ## x, SAVED_CA; \
+	.endif; \
+	.set	reorder
+
+#define JMPTBL_UNALIGNED(x) \
+.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
+	.set	noreorder; \
+	b	.Lchacha20_mips_xor_unaligned_ ## x ## _b; \
+	.if (x == 12); \
+		addu	SAVED_X, X ## x, NONCE_0; \
+	.else; \
+		addu	SAVED_X, X ## x, SAVED_CA; \
+	.endif; \
+	.set	reorder
+
+#define AXR(A, B, C, D,  K, L, M, N,  V, W, Y, Z,  S) \
+	addu	X(A), X(K); \
+	addu	X(B), X(L); \
+	addu	X(C), X(M); \
+	addu	X(D), X(N); \
+	xor	X(V), X(A); \
+	xor	X(W), X(B); \
+	xor	X(Y), X(C); \
+	xor	X(Z), X(D); \
+	rotl	X(V), S;    \
+	rotl	X(W), S;    \
+	rotl	X(Y), S;    \
+	rotl	X(Z), S;
+
+.text
+.set	reorder
+.set	noat
+.globl	chacha20_mips
+.ent	chacha20_mips
+chacha20_mips:
+	.frame	$sp, STACK_SIZE, $ra
+
+	addiu	$sp, -STACK_SIZE
+
+	/* Return bytes = 0. */
+	beqz	BYTES, .Lchacha20_mips_end
+
+	lw	NONCE_0, 48(STATE)
+
+	/* Save s0-s7 */
+	sw	$s0,  0($sp)
+	sw	$s1,  4($sp)
+	sw	$s2,  8($sp)
+	sw	$s3, 12($sp)
+	sw	$s4, 16($sp)
+	sw	$s5, 20($sp)
+	sw	$s6, 24($sp)
+	sw	$s7, 28($sp)
+
+	/* Test IN or OUT is unaligned.
+	 * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
+	 */
+	or	IS_UNALIGNED, IN, OUT
+	andi	IS_UNALIGNED, 0x3
+
+	/* Set number of rounds */
+	li	$at, 20
+
+	b	.Lchacha20_rounds_start
+
+.align 4
+.Loop_chacha20_rounds:
+	addiu	IN,  CHACHA20_BLOCK_SIZE
+	addiu	OUT, CHACHA20_BLOCK_SIZE
+	addiu	NONCE_0, 1
+
+.Lchacha20_rounds_start:
+	lw	X0,  0(STATE)
+	lw	X1,  4(STATE)
+	lw	X2,  8(STATE)
+	lw	X3,  12(STATE)
+
+	lw	X4,  16(STATE)
+	lw	X5,  20(STATE)
+	lw	X6,  24(STATE)
+	lw	X7,  28(STATE)
+	lw	X8,  32(STATE)
+	lw	X9,  36(STATE)
+	lw	X10, 40(STATE)
+	lw	X11, 44(STATE)
+
+	move	X12, NONCE_0
+	lw	X13, 52(STATE)
+	lw	X14, 56(STATE)
+	lw	X15, 60(STATE)
+
+.Loop_chacha20_xor_rounds:
+	addiu	$at, -2
+	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15, 16);
+	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7, 12);
+	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15,  8);
+	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7,  7);
+	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14, 16);
+	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4, 12);
+	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14,  8);
+	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4,  7);
+	bnez	$at, .Loop_chacha20_xor_rounds
+
+	addiu	BYTES, -(CHACHA20_BLOCK_SIZE)
+
+	/* Is data src/dst unaligned? Jump */
+	bnez	IS_UNALIGNED, .Loop_chacha20_unaligned
+
+	/* Set number rounds here to fill delayslot. */
+	li	$at, 20
+
+	/* BYTES < 0, it has no full block. */
+	bltz	BYTES, .Lchacha20_mips_no_full_block_aligned
+
+	FOR_EACH_WORD_REV(STORE_ALIGNED)
+
+	/* BYTES > 0? Loop again. */
+	bgtz	BYTES, .Loop_chacha20_rounds
+
+	/* Place this here to fill delay slot */
+	addiu	NONCE_0, 1
+
+	/* BYTES < 0? Handle last bytes */
+	bltz	BYTES, .Lchacha20_mips_xor_bytes
+
+.Lchacha20_mips_xor_done:
+	/* Restore used registers */
+	lw	$s0,  0($sp)
+	lw	$s1,  4($sp)
+	lw	$s2,  8($sp)
+	lw	$s3, 12($sp)
+	lw	$s4, 16($sp)
+	lw	$s5, 20($sp)
+	lw	$s6, 24($sp)
+	lw	$s7, 28($sp)
+
+	/* Write NONCE_0 back to right location in state */
+	sw	NONCE_0, 48(STATE)
+
+.Lchacha20_mips_end:
+	addiu	$sp, STACK_SIZE
+	jr	$ra
+
+.Lchacha20_mips_no_full_block_aligned:
+	/* Restore the offset on BYTES */
+	addiu	BYTES, CHACHA20_BLOCK_SIZE
+
+	/* Get number of full WORDS */
+	andi	$at, BYTES, MASK_U32
+
+	/* Load upper half of jump table addr */
+	lui	T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
+
+	/* Calculate lower half jump table offset */
+	ins	T0, $at, 1, 6
+
+	/* Add offset to STATE */
+	addu	T1, STATE, $at
+
+	/* Add lower half jump table addr */
+	addiu	T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
+
+	/* Read value from STATE */
+	lw	SAVED_CA, 0(T1)
+
+	/* Store remaining bytecounter as negative value */
+	subu	BYTES, $at, BYTES
+
+	jr	T0
+
+	/* Jump table */
+	FOR_EACH_WORD(JMPTBL_ALIGNED)
+
+
+.Loop_chacha20_unaligned:
+	/* Set number rounds here to fill delayslot. */
+	li	$at, 20
+
+	/* BYTES > 0, it has no full block. */
+	bltz	BYTES, .Lchacha20_mips_no_full_block_unaligned
+
+	FOR_EACH_WORD_REV(STORE_UNALIGNED)
+
+	/* BYTES > 0? Loop again. */
+	bgtz	BYTES, .Loop_chacha20_rounds
+
+	/* Write NONCE_0 back to right location in state */
+	sw	NONCE_0, 48(STATE)
+
+	.set noreorder
+	/* Fall through to byte handling */
+	bgez	BYTES, .Lchacha20_mips_xor_done
+.Lchacha20_mips_xor_unaligned_0_b:
+.Lchacha20_mips_xor_aligned_0_b:
+	/* Place this here to fill delay slot */
+	addiu	NONCE_0, 1
+	.set reorder
+
+.Lchacha20_mips_xor_bytes:
+	addu	IN, $at
+	addu	OUT, $at
+	/* First byte */
+	lbu	T1, 0(IN)
+	addiu	$at, BYTES, 1
+	CPU_TO_LE32(SAVED_X)
+	ROTR(SAVED_X)
+	xor	T1, SAVED_X
+	sb	T1, 0(OUT)
+	beqz	$at, .Lchacha20_mips_xor_done
+	/* Second byte */
+	lbu	T1, 1(IN)
+	addiu	$at, BYTES, 2
+	ROTx	SAVED_X, 8
+	xor	T1, SAVED_X
+	sb	T1, 1(OUT)
+	beqz	$at, .Lchacha20_mips_xor_done
+	/* Third byte */
+	lbu	T1, 2(IN)
+	ROTx	SAVED_X, 8
+	xor	T1, SAVED_X
+	sb	T1, 2(OUT)
+	b	.Lchacha20_mips_xor_done
+
+.Lchacha20_mips_no_full_block_unaligned:
+	/* Restore the offset on BYTES */
+	addiu	BYTES, CHACHA20_BLOCK_SIZE
+
+	/* Get number of full WORDS */
+	andi	$at, BYTES, MASK_U32
+
+	/* Load upper half of jump table addr */
+	lui	T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
+
+	/* Calculate lower half jump table offset */
+	ins	T0, $at, 1, 6
+
+	/* Add offset to STATE */
+	addu	T1, STATE, $at
+
+	/* Add lower half jump table addr */
+	addiu	T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
+
+	/* Read value from STATE */
+	lw	SAVED_CA, 0(T1)
+
+	/* Store remaining bytecounter as negative value */
+	subu	BYTES, $at, BYTES
+
+	jr	T0
+
+	/* Jump table */
+	FOR_EACH_WORD(JMPTBL_UNALIGNED)
+.end chacha20_mips
+.set at
-- 
cgit v1.2.3


From 3a2f58f3ba4f6f44e33d1a48240d5eadb882cb59 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:17 +0100
Subject: crypto: mips/chacha - wire up accelerated 32r2 code from Zinc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This integrates the accelerated MIPS 32r2 implementation of ChaCha
into both the API and library interfaces of the kernel crypto stack.

The significance of this is that, in addition to becoming available
as an accelerated library implementation, it can also be used by
existing crypto API code such as Adiantum (for block encryption on
ultra low performance cores) or IPsec using chacha20poly1305. These
are use cases that have already opted into using the abstract crypto
API. In order to support Adiantum, the core assembler routine has
been adapted to take the round count as a function argument rather
than hardcoding it to 20.

Co-developed-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/mips/Makefile             |   2 +-
 arch/mips/crypto/Makefile      |   4 ++
 arch/mips/crypto/chacha-core.S | 159 ++++++++++++++++++++++++++++++-----------
 arch/mips/crypto/chacha-glue.c | 150 ++++++++++++++++++++++++++++++++++++++
 crypto/Kconfig                 |   6 ++
 5 files changed, 277 insertions(+), 44 deletions(-)
 create mode 100644 arch/mips/crypto/chacha-glue.c

(limited to 'arch')

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index cdc09b71febe..8584c047ea59 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -323,7 +323,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/
 # See arch/mips/Kbuild for content of core part of the kernel
 core-y += arch/mips/
 
-drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
+drivers-y			+= arch/mips/crypto/
 drivers-$(CONFIG_OPROFILE)	+= arch/mips/oprofile/
 
 # suspend and hibernation support
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index e07aca572c2e..b528b9d300f1 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -4,3 +4,7 @@
 #
 
 obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
+
+obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
+chacha-mips-y := chacha-core.o chacha-glue.o
+AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
index a81e02db95e7..5755f69cfe00 100644
--- a/arch/mips/crypto/chacha-core.S
+++ b/arch/mips/crypto/chacha-core.S
@@ -125,7 +125,7 @@
 #define CONCAT3(a,b,c)	_CONCAT3(a,b,c)
 
 #define STORE_UNALIGNED(x) \
-CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
+CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
 	.if (x != 12); \
 		lw	T0, (x*4)(STATE); \
 	.endif; \
@@ -142,7 +142,7 @@ CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
 	swr	X ## x, (x*4)+LSB ## (OUT);
 
 #define STORE_ALIGNED(x) \
-CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
+CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
 	.if (x != 12); \
 		lw	T0, (x*4)(STATE); \
 	.endif; \
@@ -162,9 +162,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
  * Every jumptable entry must be equal in size.
  */
 #define JMPTBL_ALIGNED(x) \
-.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
+.Lchacha_mips_jmptbl_aligned_ ## x: ; \
 	.set	noreorder; \
-	b	.Lchacha20_mips_xor_aligned_ ## x ## _b; \
+	b	.Lchacha_mips_xor_aligned_ ## x ## _b; \
 	.if (x == 12); \
 		addu	SAVED_X, X ## x, NONCE_0; \
 	.else; \
@@ -173,9 +173,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
 	.set	reorder
 
 #define JMPTBL_UNALIGNED(x) \
-.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
+.Lchacha_mips_jmptbl_unaligned_ ## x: ; \
 	.set	noreorder; \
-	b	.Lchacha20_mips_xor_unaligned_ ## x ## _b; \
+	b	.Lchacha_mips_xor_unaligned_ ## x ## _b; \
 	.if (x == 12); \
 		addu	SAVED_X, X ## x, NONCE_0; \
 	.else; \
@@ -200,15 +200,18 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
 .text
 .set	reorder
 .set	noat
-.globl	chacha20_mips
-.ent	chacha20_mips
-chacha20_mips:
+.globl	chacha_crypt_arch
+.ent	chacha_crypt_arch
+chacha_crypt_arch:
 	.frame	$sp, STACK_SIZE, $ra
 
+	/* Load number of rounds */
+	lw	$at, 16($sp)
+
 	addiu	$sp, -STACK_SIZE
 
 	/* Return bytes = 0. */
-	beqz	BYTES, .Lchacha20_mips_end
+	beqz	BYTES, .Lchacha_mips_end
 
 	lw	NONCE_0, 48(STATE)
 
@@ -228,18 +231,15 @@ chacha20_mips:
 	or	IS_UNALIGNED, IN, OUT
 	andi	IS_UNALIGNED, 0x3
 
-	/* Set number of rounds */
-	li	$at, 20
-
-	b	.Lchacha20_rounds_start
+	b	.Lchacha_rounds_start
 
 .align 4
-.Loop_chacha20_rounds:
+.Loop_chacha_rounds:
 	addiu	IN,  CHACHA20_BLOCK_SIZE
 	addiu	OUT, CHACHA20_BLOCK_SIZE
 	addiu	NONCE_0, 1
 
-.Lchacha20_rounds_start:
+.Lchacha_rounds_start:
 	lw	X0,  0(STATE)
 	lw	X1,  4(STATE)
 	lw	X2,  8(STATE)
@@ -259,7 +259,7 @@ chacha20_mips:
 	lw	X14, 56(STATE)
 	lw	X15, 60(STATE)
 
-.Loop_chacha20_xor_rounds:
+.Loop_chacha_xor_rounds:
 	addiu	$at, -2
 	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15, 16);
 	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7, 12);
@@ -269,31 +269,31 @@ chacha20_mips:
 	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4, 12);
 	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14,  8);
 	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4,  7);
-	bnez	$at, .Loop_chacha20_xor_rounds
+	bnez	$at, .Loop_chacha_xor_rounds
 
 	addiu	BYTES, -(CHACHA20_BLOCK_SIZE)
 
 	/* Is data src/dst unaligned? Jump */
-	bnez	IS_UNALIGNED, .Loop_chacha20_unaligned
+	bnez	IS_UNALIGNED, .Loop_chacha_unaligned
 
 	/* Set number rounds here to fill delayslot. */
-	li	$at, 20
+	lw	$at, (STACK_SIZE+16)($sp)
 
 	/* BYTES < 0, it has no full block. */
-	bltz	BYTES, .Lchacha20_mips_no_full_block_aligned
+	bltz	BYTES, .Lchacha_mips_no_full_block_aligned
 
 	FOR_EACH_WORD_REV(STORE_ALIGNED)
 
 	/* BYTES > 0? Loop again. */
-	bgtz	BYTES, .Loop_chacha20_rounds
+	bgtz	BYTES, .Loop_chacha_rounds
 
 	/* Place this here to fill delay slot */
 	addiu	NONCE_0, 1
 
 	/* BYTES < 0? Handle last bytes */
-	bltz	BYTES, .Lchacha20_mips_xor_bytes
+	bltz	BYTES, .Lchacha_mips_xor_bytes
 
-.Lchacha20_mips_xor_done:
+.Lchacha_mips_xor_done:
 	/* Restore used registers */
 	lw	$s0,  0($sp)
 	lw	$s1,  4($sp)
@@ -307,11 +307,11 @@ chacha20_mips:
 	/* Write NONCE_0 back to right location in state */
 	sw	NONCE_0, 48(STATE)
 
-.Lchacha20_mips_end:
+.Lchacha_mips_end:
 	addiu	$sp, STACK_SIZE
 	jr	$ra
 
-.Lchacha20_mips_no_full_block_aligned:
+.Lchacha_mips_no_full_block_aligned:
 	/* Restore the offset on BYTES */
 	addiu	BYTES, CHACHA20_BLOCK_SIZE
 
@@ -319,7 +319,7 @@ chacha20_mips:
 	andi	$at, BYTES, MASK_U32
 
 	/* Load upper half of jump table addr */
-	lui	T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
+	lui	T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
 
 	/* Calculate lower half jump table offset */
 	ins	T0, $at, 1, 6
@@ -328,7 +328,7 @@ chacha20_mips:
 	addu	T1, STATE, $at
 
 	/* Add lower half jump table addr */
-	addiu	T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
+	addiu	T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
 
 	/* Read value from STATE */
 	lw	SAVED_CA, 0(T1)
@@ -342,31 +342,31 @@ chacha20_mips:
 	FOR_EACH_WORD(JMPTBL_ALIGNED)
 
 
-.Loop_chacha20_unaligned:
+.Loop_chacha_unaligned:
 	/* Set number rounds here to fill delayslot. */
-	li	$at, 20
+	lw	$at, (STACK_SIZE+16)($sp)
 
 	/* BYTES > 0, it has no full block. */
-	bltz	BYTES, .Lchacha20_mips_no_full_block_unaligned
+	bltz	BYTES, .Lchacha_mips_no_full_block_unaligned
 
 	FOR_EACH_WORD_REV(STORE_UNALIGNED)
 
 	/* BYTES > 0? Loop again. */
-	bgtz	BYTES, .Loop_chacha20_rounds
+	bgtz	BYTES, .Loop_chacha_rounds
 
 	/* Write NONCE_0 back to right location in state */
 	sw	NONCE_0, 48(STATE)
 
 	.set noreorder
 	/* Fall through to byte handling */
-	bgez	BYTES, .Lchacha20_mips_xor_done
-.Lchacha20_mips_xor_unaligned_0_b:
-.Lchacha20_mips_xor_aligned_0_b:
+	bgez	BYTES, .Lchacha_mips_xor_done
+.Lchacha_mips_xor_unaligned_0_b:
+.Lchacha_mips_xor_aligned_0_b:
 	/* Place this here to fill delay slot */
 	addiu	NONCE_0, 1
 	.set reorder
 
-.Lchacha20_mips_xor_bytes:
+.Lchacha_mips_xor_bytes:
 	addu	IN, $at
 	addu	OUT, $at
 	/* First byte */
@@ -376,22 +376,22 @@ chacha20_mips:
 	ROTR(SAVED_X)
 	xor	T1, SAVED_X
 	sb	T1, 0(OUT)
-	beqz	$at, .Lchacha20_mips_xor_done
+	beqz	$at, .Lchacha_mips_xor_done
 	/* Second byte */
 	lbu	T1, 1(IN)
 	addiu	$at, BYTES, 2
 	ROTx	SAVED_X, 8
 	xor	T1, SAVED_X
 	sb	T1, 1(OUT)
-	beqz	$at, .Lchacha20_mips_xor_done
+	beqz	$at, .Lchacha_mips_xor_done
 	/* Third byte */
 	lbu	T1, 2(IN)
 	ROTx	SAVED_X, 8
 	xor	T1, SAVED_X
 	sb	T1, 2(OUT)
-	b	.Lchacha20_mips_xor_done
+	b	.Lchacha_mips_xor_done
 
-.Lchacha20_mips_no_full_block_unaligned:
+.Lchacha_mips_no_full_block_unaligned:
 	/* Restore the offset on BYTES */
 	addiu	BYTES, CHACHA20_BLOCK_SIZE
 
@@ -399,7 +399,7 @@ chacha20_mips:
 	andi	$at, BYTES, MASK_U32
 
 	/* Load upper half of jump table addr */
-	lui	T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
+	lui	T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
 
 	/* Calculate lower half jump table offset */
 	ins	T0, $at, 1, 6
@@ -408,7 +408,7 @@ chacha20_mips:
 	addu	T1, STATE, $at
 
 	/* Add lower half jump table addr */
-	addiu	T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
+	addiu	T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
 
 	/* Read value from STATE */
 	lw	SAVED_CA, 0(T1)
@@ -420,5 +420,78 @@ chacha20_mips:
 
 	/* Jump table */
 	FOR_EACH_WORD(JMPTBL_UNALIGNED)
-.end chacha20_mips
+.end chacha_crypt_arch
+.set at
+
+/* Input arguments
+ * STATE	$a0
+ * OUT		$a1
+ * NROUND	$a2
+ */
+
+#undef X12
+#undef X13
+#undef X14
+#undef X15
+
+#define X12	$a3
+#define X13	$at
+#define X14	$v0
+#define X15	STATE
+
+.set noat
+.globl	hchacha_block_arch
+.ent	hchacha_block_arch
+hchacha_block_arch:
+	.frame	$sp, STACK_SIZE, $ra
+
+	addiu	$sp, -STACK_SIZE
+
+	/* Save X11(s6) */
+	sw	X11, 0($sp)
+
+	lw	X0,  0(STATE)
+	lw	X1,  4(STATE)
+	lw	X2,  8(STATE)
+	lw	X3,  12(STATE)
+	lw	X4,  16(STATE)
+	lw	X5,  20(STATE)
+	lw	X6,  24(STATE)
+	lw	X7,  28(STATE)
+	lw	X8,  32(STATE)
+	lw	X9,  36(STATE)
+	lw	X10, 40(STATE)
+	lw	X11, 44(STATE)
+	lw	X12, 48(STATE)
+	lw	X13, 52(STATE)
+	lw	X14, 56(STATE)
+	lw	X15, 60(STATE)
+
+.Loop_hchacha_xor_rounds:
+	addiu	$a2, -2
+	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15, 16);
+	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7, 12);
+	AXR( 0, 1, 2, 3,  4, 5, 6, 7, 12,13,14,15,  8);
+	AXR( 8, 9,10,11, 12,13,14,15,  4, 5, 6, 7,  7);
+	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14, 16);
+	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4, 12);
+	AXR( 0, 1, 2, 3,  5, 6, 7, 4, 15,12,13,14,  8);
+	AXR(10,11, 8, 9, 15,12,13,14,  5, 6, 7, 4,  7);
+	bnez	$a2, .Loop_hchacha_xor_rounds
+
+	/* Restore used register */
+	lw	X11, 0($sp)
+
+	sw	X0,  0(OUT)
+	sw	X1,  4(OUT)
+	sw	X2,  8(OUT)
+	sw	X3,  12(OUT)
+	sw	X12, 16(OUT)
+	sw	X13, 20(OUT)
+	sw	X14, 24(OUT)
+	sw	X15, 28(OUT)
+
+	addiu	$sp, STACK_SIZE
+	jr	$ra
+.end hchacha_block_arch
 .set at
diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c
new file mode 100644
index 000000000000..779e399c9bef
--- /dev/null
+++ b/arch/mips/crypto/chacha-glue.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MIPS accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/byteorder.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+				  unsigned int bytes, int nrounds);
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds);
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+static int chacha_mips_stream_xor(struct skcipher_request *req,
+				  const struct chacha_ctx *ctx, const u8 *iv)
+{
+	struct skcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	chacha_init_generic(state, ctx->key, iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+			     nbytes, ctx->nrounds);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+static int chacha_mips(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return chacha_mips_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_mips(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	chacha_init_generic(state, ctx->key, req->iv);
+
+	hchacha_block(state, subctx.key, ctx->nrounds);
+	subctx.nrounds = ctx->nrounds;
+
+	memcpy(&real_iv[0], req->iv + 24, 8);
+	memcpy(&real_iv[8], req->iv + 16, 8);
+	return chacha_mips_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-mips",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha_mips,
+		.decrypt		= chacha_mips,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-mips",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= xchacha_mips,
+		.decrypt		= xchacha_mips,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-mips",
+		.base.cra_priority	= 200,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha12_setkey,
+		.encrypt		= xchacha_mips,
+		.decrypt		= xchacha_mips,
+	}
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-mips");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 07762de1237f..34c4938febeb 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1442,6 +1442,12 @@ config CRYPTO_CHACHA20_X86_64
 	  SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
 	  XChaCha20, and XChaCha12 stream ciphers.
 
+config CRYPTO_CHACHA_MIPS
+	tristate "ChaCha stream cipher algorithms (MIPS 32r2 optimized)"
+	depends on CPU_MIPS32_R2
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
 config CRYPTO_SEED
 	tristate "SEED cipher algorithm"
 	select CRYPTO_ALGAPI
-- 
cgit v1.2.3


From 48ea8c6ebc96bc0990e12ee1c43d0832c23576bb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:19 +0100
Subject: crypto: poly1305 - move core routines into a separate library

Move the core Poly1305 routines shared between the generic Poly1305
shash driver and the Adiantum and NHPoly1305 drivers into a separate
library so that using just this pieces does not pull in the crypto
API pieces of the generic Poly1305 routine.

In a subsequent patch, we will augment this generic library with
init/update/final routines so that Poyl1305 algorithm can be used
directly without the need for using the crypto API's shash abstraction.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/poly1305_glue.c    |   2 +-
 crypto/Kconfig                     |   5 +-
 crypto/adiantum.c                  |   5 +-
 crypto/nhpoly1305.c                |   3 +-
 crypto/poly1305_generic.c          | 195 ++-----------------------------------
 include/crypto/internal/poly1305.h |  67 +++++++++++++
 include/crypto/poly1305.h          |  23 -----
 lib/crypto/Kconfig                 |   3 +
 lib/crypto/Makefile                |   3 +
 lib/crypto/poly1305.c              | 158 ++++++++++++++++++++++++++++++
 10 files changed, 248 insertions(+), 216 deletions(-)
 create mode 100644 include/crypto/internal/poly1305.h
 create mode 100644 lib/crypto/poly1305.c

(limited to 'arch')

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 4a1c05dce950..6ccf8eb26324 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -7,8 +7,8 @@
 
 #include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
 #include <crypto/internal/simd.h>
-#include <crypto/poly1305.h>
 #include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 34c4938febeb..362dd6ae6aca 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -446,7 +446,7 @@ config CRYPTO_KEYWRAP
 config CRYPTO_NHPOLY1305
 	tristate
 	select CRYPTO_HASH
-	select CRYPTO_POLY1305
+	select CRYPTO_LIB_POLY1305_GENERIC
 
 config CRYPTO_NHPOLY1305_SSE2
 	tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)"
@@ -467,7 +467,7 @@ config CRYPTO_NHPOLY1305_AVX2
 config CRYPTO_ADIANTUM
 	tristate "Adiantum support"
 	select CRYPTO_CHACHA20
-	select CRYPTO_POLY1305
+	select CRYPTO_LIB_POLY1305_GENERIC
 	select CRYPTO_NHPOLY1305
 	select CRYPTO_MANAGER
 	help
@@ -703,6 +703,7 @@ config CRYPTO_GHASH
 config CRYPTO_POLY1305
 	tristate "Poly1305 authenticator algorithm"
 	select CRYPTO_HASH
+	select CRYPTO_LIB_POLY1305_GENERIC
 	help
 	  Poly1305 authenticator algorithm, RFC7539.
 
diff --git a/crypto/adiantum.c b/crypto/adiantum.c
index 395a3ddd3707..aded26092268 100644
--- a/crypto/adiantum.c
+++ b/crypto/adiantum.c
@@ -33,6 +33,7 @@
 #include <crypto/b128ops.h>
 #include <crypto/chacha.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/nhpoly1305.h>
 #include <crypto/scatterwalk.h>
@@ -242,11 +243,11 @@ static void adiantum_hash_header(struct skcipher_request *req)
 
 	BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
 	poly1305_core_blocks(&state, &tctx->header_hash_key,
-			     &header, sizeof(header) / POLY1305_BLOCK_SIZE);
+			     &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1);
 
 	BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
 	poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
-			     TWEAK_SIZE / POLY1305_BLOCK_SIZE);
+			     TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
 
 	poly1305_core_emit(&state, &rctx->header_hash);
 }
diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
index 9ab4e07cde4d..f6b6a52092b4 100644
--- a/crypto/nhpoly1305.c
+++ b/crypto/nhpoly1305.c
@@ -33,6 +33,7 @@
 #include <asm/unaligned.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
 #include <crypto/nhpoly1305.h>
 #include <linux/crypto.h>
 #include <linux/kernel.h>
@@ -78,7 +79,7 @@ static void process_nh_hash_value(struct nhpoly1305_state *state,
 	BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
 
 	poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
-			     NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
+			     NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1);
 }
 
 /*
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index adc40298c749..067f493c2504 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -13,27 +13,12 @@
 
 #include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
-#include <crypto/poly1305.h>
+#include <crypto/internal/poly1305.h>
 #include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <asm/unaligned.h>
 
-static inline u64 mlt(u64 a, u64 b)
-{
-	return a * b;
-}
-
-static inline u32 sr(u64 v, u_char n)
-{
-	return v >> n;
-}
-
-static inline u32 and(u32 v, u32 mask)
-{
-	return v & mask;
-}
-
 int crypto_poly1305_init(struct shash_desc *desc)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -47,124 +32,8 @@ int crypto_poly1305_init(struct shash_desc *desc)
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
-{
-	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-	key->r[0] = (get_unaligned_le32(raw_key +  0) >> 0) & 0x3ffffff;
-	key->r[1] = (get_unaligned_le32(raw_key +  3) >> 2) & 0x3ffff03;
-	key->r[2] = (get_unaligned_le32(raw_key +  6) >> 4) & 0x3ffc0ff;
-	key->r[3] = (get_unaligned_le32(raw_key +  9) >> 6) & 0x3f03fff;
-	key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
-}
-EXPORT_SYMBOL_GPL(poly1305_core_setkey);
-
-/*
- * Poly1305 requires a unique key for each tag, which implies that we can't set
- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
- * expect the key as the first 32 bytes in the update() call.
- */
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
-					const u8 *src, unsigned int srclen)
-{
-	if (!dctx->sset) {
-		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
-			poly1305_core_setkey(&dctx->r, src);
-			src += POLY1305_BLOCK_SIZE;
-			srclen -= POLY1305_BLOCK_SIZE;
-			dctx->rset = true;
-		}
-		if (srclen >= POLY1305_BLOCK_SIZE) {
-			dctx->s[0] = get_unaligned_le32(src +  0);
-			dctx->s[1] = get_unaligned_le32(src +  4);
-			dctx->s[2] = get_unaligned_le32(src +  8);
-			dctx->s[3] = get_unaligned_le32(src + 12);
-			src += POLY1305_BLOCK_SIZE;
-			srclen -= POLY1305_BLOCK_SIZE;
-			dctx->sset = true;
-		}
-	}
-	return srclen;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
-
-static void poly1305_blocks_internal(struct poly1305_state *state,
-				     const struct poly1305_key *key,
-				     const void *src, unsigned int nblocks,
-				     u32 hibit)
-{
-	u32 r0, r1, r2, r3, r4;
-	u32 s1, s2, s3, s4;
-	u32 h0, h1, h2, h3, h4;
-	u64 d0, d1, d2, d3, d4;
-
-	if (!nblocks)
-		return;
-
-	r0 = key->r[0];
-	r1 = key->r[1];
-	r2 = key->r[2];
-	r3 = key->r[3];
-	r4 = key->r[4];
-
-	s1 = r1 * 5;
-	s2 = r2 * 5;
-	s3 = r3 * 5;
-	s4 = r4 * 5;
-
-	h0 = state->h[0];
-	h1 = state->h[1];
-	h2 = state->h[2];
-	h3 = state->h[3];
-	h4 = state->h[4];
-
-	do {
-		/* h += m[i] */
-		h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
-		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
-		h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
-		h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
-		h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
-
-		/* h *= r */
-		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
-		     mlt(h3, s2) + mlt(h4, s1);
-		d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
-		     mlt(h3, s3) + mlt(h4, s2);
-		d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
-		     mlt(h3, s4) + mlt(h4, s3);
-		d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
-		     mlt(h3, r0) + mlt(h4, s4);
-		d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
-		     mlt(h3, r1) + mlt(h4, r0);
-
-		/* (partial) h %= p */
-		d1 += sr(d0, 26);     h0 = and(d0, 0x3ffffff);
-		d2 += sr(d1, 26);     h1 = and(d1, 0x3ffffff);
-		d3 += sr(d2, 26);     h2 = and(d2, 0x3ffffff);
-		d4 += sr(d3, 26);     h3 = and(d3, 0x3ffffff);
-		h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
-		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
-
-		src += POLY1305_BLOCK_SIZE;
-	} while (--nblocks);
-
-	state->h[0] = h0;
-	state->h[1] = h1;
-	state->h[2] = h2;
-	state->h[3] = h3;
-	state->h[4] = h4;
-}
-
-void poly1305_core_blocks(struct poly1305_state *state,
-			  const struct poly1305_key *key,
-			  const void *src, unsigned int nblocks)
-{
-	poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
-}
-EXPORT_SYMBOL_GPL(poly1305_core_blocks);
-
-static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
-			    const u8 *src, unsigned int srclen, u32 hibit)
+static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+			    unsigned int srclen)
 {
 	unsigned int datalen;
 
@@ -174,8 +43,8 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
 		srclen = datalen;
 	}
 
-	poly1305_blocks_internal(&dctx->h, &dctx->r,
-				 src, srclen / POLY1305_BLOCK_SIZE, hibit);
+	poly1305_core_blocks(&dctx->h, &dctx->r, src,
+			     srclen / POLY1305_BLOCK_SIZE, 1);
 }
 
 int crypto_poly1305_update(struct shash_desc *desc,
@@ -193,13 +62,13 @@ int crypto_poly1305_update(struct shash_desc *desc,
 
 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
 			poly1305_blocks(dctx, dctx->buf,
-					POLY1305_BLOCK_SIZE, 1 << 24);
+					POLY1305_BLOCK_SIZE);
 			dctx->buflen = 0;
 		}
 	}
 
 	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-		poly1305_blocks(dctx, src, srclen, 1 << 24);
+		poly1305_blocks(dctx, src, srclen);
 		src += srclen - (srclen % POLY1305_BLOCK_SIZE);
 		srclen %= POLY1305_BLOCK_SIZE;
 	}
@@ -213,54 +82,6 @@ int crypto_poly1305_update(struct shash_desc *desc,
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_update);
 
-void poly1305_core_emit(const struct poly1305_state *state, void *dst)
-{
-	u32 h0, h1, h2, h3, h4;
-	u32 g0, g1, g2, g3, g4;
-	u32 mask;
-
-	/* fully carry h */
-	h0 = state->h[0];
-	h1 = state->h[1];
-	h2 = state->h[2];
-	h3 = state->h[3];
-	h4 = state->h[4];
-
-	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
-	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
-	h4 += (h3 >> 26);     h3 = h3 & 0x3ffffff;
-	h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
-	h1 += (h0 >> 26);     h0 = h0 & 0x3ffffff;
-
-	/* compute h + -p */
-	g0 = h0 + 5;
-	g1 = h1 + (g0 >> 26);             g0 &= 0x3ffffff;
-	g2 = h2 + (g1 >> 26);             g1 &= 0x3ffffff;
-	g3 = h3 + (g2 >> 26);             g2 &= 0x3ffffff;
-	g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
-
-	/* select h if h < p, or h + -p if h >= p */
-	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
-	g0 &= mask;
-	g1 &= mask;
-	g2 &= mask;
-	g3 &= mask;
-	g4 &= mask;
-	mask = ~mask;
-	h0 = (h0 & mask) | g0;
-	h1 = (h1 & mask) | g1;
-	h2 = (h2 & mask) | g2;
-	h3 = (h3 & mask) | g3;
-	h4 = (h4 & mask) | g4;
-
-	/* h = h % (2^128) */
-	put_unaligned_le32((h0 >>  0) | (h1 << 26), dst +  0);
-	put_unaligned_le32((h1 >>  6) | (h2 << 20), dst +  4);
-	put_unaligned_le32((h2 >> 12) | (h3 << 14), dst +  8);
-	put_unaligned_le32((h3 >> 18) | (h4 <<  8), dst + 12);
-}
-EXPORT_SYMBOL_GPL(poly1305_core_emit);
-
 int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -274,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 		dctx->buf[dctx->buflen++] = 1;
 		memset(dctx->buf + dctx->buflen, 0,
 		       POLY1305_BLOCK_SIZE - dctx->buflen);
-		poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+		poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
 	}
 
 	poly1305_core_emit(&dctx->h, digest);
diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
new file mode 100644
index 000000000000..cb58e61f73a7
--- /dev/null
+++ b/include/crypto/internal/poly1305.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values for the Poly1305 algorithm
+ */
+
+#ifndef _CRYPTO_INTERNAL_POLY1305_H
+#define _CRYPTO_INTERNAL_POLY1305_H
+
+#include <asm/unaligned.h>
+#include <linux/types.h>
+#include <crypto/poly1305.h>
+
+struct shash_desc;
+
+/*
+ * Poly1305 core functions.  These implement the ε-almost-∆-universal hash
+ * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
+ * ("s key") at the end.  They also only support block-aligned inputs.
+ */
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
+static inline void poly1305_core_init(struct poly1305_state *state)
+{
+	*state = (struct poly1305_state){};
+}
+
+void poly1305_core_blocks(struct poly1305_state *state,
+			  const struct poly1305_key *key, const void *src,
+			  unsigned int nblocks, u32 hibit);
+void poly1305_core_emit(const struct poly1305_state *state, void *dst);
+
+/* Crypto API helper functions for the Poly1305 MAC */
+int crypto_poly1305_init(struct shash_desc *desc);
+
+int crypto_poly1305_update(struct shash_desc *desc,
+			   const u8 *src, unsigned int srclen);
+int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
+
+/*
+ * Poly1305 requires a unique key for each tag, which implies that we can't set
+ * it on the tfm that gets accessed by multiple users simultaneously. Instead we
+ * expect the key as the first 32 bytes in the update() call.
+ */
+static inline
+unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
+					const u8 *src, unsigned int srclen)
+{
+	if (!dctx->sset) {
+		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
+			poly1305_core_setkey(&dctx->r, src);
+			src += POLY1305_BLOCK_SIZE;
+			srclen -= POLY1305_BLOCK_SIZE;
+			dctx->rset = true;
+		}
+		if (srclen >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(src +  0);
+			dctx->s[1] = get_unaligned_le32(src +  4);
+			dctx->s[2] = get_unaligned_le32(src +  8);
+			dctx->s[3] = get_unaligned_le32(src + 12);
+			src += POLY1305_BLOCK_SIZE;
+			srclen -= POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+	}
+	return srclen;
+}
+
+#endif
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index 34317ed2071e..f5a4319c2a1f 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -38,27 +38,4 @@ struct poly1305_desc_ctx {
 	bool sset;
 };
 
-/*
- * Poly1305 core functions.  These implement the ε-almost-∆-universal hash
- * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
- * ("s key") at the end.  They also only support block-aligned inputs.
- */
-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
-static inline void poly1305_core_init(struct poly1305_state *state)
-{
-	memset(state->h, 0, sizeof(state->h));
-}
-void poly1305_core_blocks(struct poly1305_state *state,
-			  const struct poly1305_key *key,
-			  const void *src, unsigned int nblocks);
-void poly1305_core_emit(const struct poly1305_state *state, void *dst);
-
-/* Crypto API helper functions for the Poly1305 MAC */
-int crypto_poly1305_init(struct shash_desc *desc);
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
-					const u8 *src, unsigned int srclen);
-int crypto_poly1305_update(struct shash_desc *desc,
-			   const u8 *src, unsigned int srclen);
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
-
 #endif
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 6a11931ae105..c4882d29879e 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -37,5 +37,8 @@ config CRYPTO_LIB_CHACHA
 config CRYPTO_LIB_DES
 	tristate
 
+config CRYPTO_LIB_POLY1305_GENERIC
+	tristate
+
 config CRYPTO_LIB_SHA256
 	tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 0ce40604e104..b58ab6843a9d 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -13,5 +13,8 @@ libarc4-y					:= arc4.o
 obj-$(CONFIG_CRYPTO_LIB_DES)			+= libdes.o
 libdes-y					:= des.o
 
+obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC)	+= libpoly1305.o
+libpoly1305-y					:= poly1305.o
+
 obj-$(CONFIG_CRYPTO_LIB_SHA256)			+= libsha256.o
 libsha256-y					:= sha256.o
diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c
new file mode 100644
index 000000000000..f019a57dbc1b
--- /dev/null
+++ b/lib/crypto/poly1305.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Poly1305 authenticator algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
+ */
+
+#include <crypto/internal/poly1305.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/unaligned.h>
+
+static inline u64 mlt(u64 a, u64 b)
+{
+	return a * b;
+}
+
+static inline u32 sr(u64 v, u_char n)
+{
+	return v >> n;
+}
+
+static inline u32 and(u32 v, u32 mask)
+{
+	return v & mask;
+}
+
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
+{
+	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+	key->r[0] = (get_unaligned_le32(raw_key +  0) >> 0) & 0x3ffffff;
+	key->r[1] = (get_unaligned_le32(raw_key +  3) >> 2) & 0x3ffff03;
+	key->r[2] = (get_unaligned_le32(raw_key +  6) >> 4) & 0x3ffc0ff;
+	key->r[3] = (get_unaligned_le32(raw_key +  9) >> 6) & 0x3f03fff;
+	key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
+}
+EXPORT_SYMBOL_GPL(poly1305_core_setkey);
+
+void poly1305_core_blocks(struct poly1305_state *state,
+			  const struct poly1305_key *key, const void *src,
+			  unsigned int nblocks, u32 hibit)
+{
+	u32 r0, r1, r2, r3, r4;
+	u32 s1, s2, s3, s4;
+	u32 h0, h1, h2, h3, h4;
+	u64 d0, d1, d2, d3, d4;
+
+	if (!nblocks)
+		return;
+
+	r0 = key->r[0];
+	r1 = key->r[1];
+	r2 = key->r[2];
+	r3 = key->r[3];
+	r4 = key->r[4];
+
+	s1 = r1 * 5;
+	s2 = r2 * 5;
+	s3 = r3 * 5;
+	s4 = r4 * 5;
+
+	h0 = state->h[0];
+	h1 = state->h[1];
+	h2 = state->h[2];
+	h3 = state->h[3];
+	h4 = state->h[4];
+
+	do {
+		/* h += m[i] */
+		h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
+		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
+		h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
+		h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
+		h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
+
+		/* h *= r */
+		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
+		     mlt(h3, s2) + mlt(h4, s1);
+		d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
+		     mlt(h3, s3) + mlt(h4, s2);
+		d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
+		     mlt(h3, s4) + mlt(h4, s3);
+		d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
+		     mlt(h3, r0) + mlt(h4, s4);
+		d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
+		     mlt(h3, r1) + mlt(h4, r0);
+
+		/* (partial) h %= p */
+		d1 += sr(d0, 26);     h0 = and(d0, 0x3ffffff);
+		d2 += sr(d1, 26);     h1 = and(d1, 0x3ffffff);
+		d3 += sr(d2, 26);     h2 = and(d2, 0x3ffffff);
+		d4 += sr(d3, 26);     h3 = and(d3, 0x3ffffff);
+		h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
+		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
+
+		src += POLY1305_BLOCK_SIZE;
+	} while (--nblocks);
+
+	state->h[0] = h0;
+	state->h[1] = h1;
+	state->h[2] = h2;
+	state->h[3] = h3;
+	state->h[4] = h4;
+}
+EXPORT_SYMBOL_GPL(poly1305_core_blocks);
+
+void poly1305_core_emit(const struct poly1305_state *state, void *dst)
+{
+	u32 h0, h1, h2, h3, h4;
+	u32 g0, g1, g2, g3, g4;
+	u32 mask;
+
+	/* fully carry h */
+	h0 = state->h[0];
+	h1 = state->h[1];
+	h2 = state->h[2];
+	h3 = state->h[3];
+	h4 = state->h[4];
+
+	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
+	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
+	h4 += (h3 >> 26);     h3 = h3 & 0x3ffffff;
+	h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
+	h1 += (h0 >> 26);     h0 = h0 & 0x3ffffff;
+
+	/* compute h + -p */
+	g0 = h0 + 5;
+	g1 = h1 + (g0 >> 26);             g0 &= 0x3ffffff;
+	g2 = h2 + (g1 >> 26);             g1 &= 0x3ffffff;
+	g3 = h3 + (g2 >> 26);             g2 &= 0x3ffffff;
+	g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
+
+	/* select h if h < p, or h + -p if h >= p */
+	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
+	g0 &= mask;
+	g1 &= mask;
+	g2 &= mask;
+	g3 &= mask;
+	g4 &= mask;
+	mask = ~mask;
+	h0 = (h0 & mask) | g0;
+	h1 = (h1 & mask) | g1;
+	h2 = (h2 & mask) | g2;
+	h3 = (h3 & mask) | g3;
+	h4 = (h4 & mask) | g4;
+
+	/* h = h % (2^128) */
+	put_unaligned_le32((h0 >>  0) | (h1 << 26), dst +  0);
+	put_unaligned_le32((h1 >>  6) | (h2 << 20), dst +  4);
+	put_unaligned_le32((h2 >> 12) | (h3 << 14), dst +  8);
+	put_unaligned_le32((h3 >> 18) | (h4 <<  8), dst + 12);
+}
+EXPORT_SYMBOL_GPL(poly1305_core_emit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-- 
cgit v1.2.3


From ad8f5b88383ea685f2b8df2a12ee3e08089a1287 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:20 +0100
Subject: crypto: x86/poly1305 - unify Poly1305 state struct with generic code

In preparation of exposing a Poly1305 library interface directly from
the accelerated x86 driver, align the state descriptor of the x86 code
with the one used by the generic driver. This is needed to make the
library interface unified between all implementations.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/poly1305_glue.c    | 88 +++++++++++++-------------------------
 crypto/poly1305_generic.c          |  6 +--
 include/crypto/internal/poly1305.h |  4 +-
 include/crypto/poly1305.h          | 18 ++++----
 4 files changed, 43 insertions(+), 73 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 6ccf8eb26324..b43b93c95e79 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -14,40 +14,14 @@
 #include <linux/module.h>
 #include <asm/simd.h>
 
-struct poly1305_simd_desc_ctx {
-	struct poly1305_desc_ctx base;
-	/* derived key u set? */
-	bool uset;
-#ifdef CONFIG_AS_AVX2
-	/* derived keys r^3, r^4 set? */
-	bool wset;
-#endif
-	/* derived Poly1305 key r^2 */
-	u32 u[5];
-	/* ... silently appended r^3 and r^4 when using AVX2 */
-};
-
 asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
 				    const u32 *r, unsigned int blocks);
 asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
 				     unsigned int blocks, const u32 *u);
-#ifdef CONFIG_AS_AVX2
 asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
 				     unsigned int blocks, const u32 *u);
-static bool poly1305_use_avx2;
-#endif
 
-static int poly1305_simd_init(struct shash_desc *desc)
-{
-	struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
-
-	sctx->uset = false;
-#ifdef CONFIG_AS_AVX2
-	sctx->wset = false;
-#endif
-
-	return crypto_poly1305_init(desc);
-}
+static bool poly1305_use_avx2 __ro_after_init;
 
 static void poly1305_simd_mult(u32 *a, const u32 *b)
 {
@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
 static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
 					 const u8 *src, unsigned int srclen)
 {
-	struct poly1305_simd_desc_ctx *sctx;
 	unsigned int blocks, datalen;
 
-	BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
-	sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
-
 	if (unlikely(!dctx->sset)) {
 		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
 		src += srclen - datalen;
 		srclen = datalen;
 	}
 
-#ifdef CONFIG_AS_AVX2
-	if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
-		if (unlikely(!sctx->wset)) {
-			if (!sctx->uset) {
-				memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
-				poly1305_simd_mult(sctx->u, dctx->r.r);
-				sctx->uset = true;
+	if (IS_ENABLED(CONFIG_AS_AVX2) &&
+	    poly1305_use_avx2 &&
+	    srclen >= POLY1305_BLOCK_SIZE * 4) {
+		if (unlikely(dctx->rset < 4)) {
+			if (dctx->rset < 2) {
+				dctx->r[1] = dctx->r[0];
+				poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
 			}
-			memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 5, dctx->r.r);
-			memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 10, dctx->r.r);
-			sctx->wset = true;
+			dctx->r[2] = dctx->r[1];
+			poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
+			dctx->r[3] = dctx->r[2];
+			poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
+			dctx->rset = 4;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
-		poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
-				     sctx->u);
+		poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
+				     dctx->r[1].r);
 		src += POLY1305_BLOCK_SIZE * 4 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
 	}
-#endif
+
 	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
-		if (unlikely(!sctx->uset)) {
-			memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u, dctx->r.r);
-			sctx->uset = true;
+		if (unlikely(dctx->rset < 2)) {
+			dctx->r[1] = dctx->r[0];
+			poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
+			dctx->rset = 2;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
-		poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
-				     sctx->u);
+		poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
+				     blocks, dctx->r[1].r);
 		src += POLY1305_BLOCK_SIZE * 2 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
 	}
 	if (srclen >= POLY1305_BLOCK_SIZE) {
-		poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
+		poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
 		srclen -= POLY1305_BLOCK_SIZE;
 	}
 	return srclen;
@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct shash_desc *desc,
 
 static struct shash_alg alg = {
 	.digestsize	= POLY1305_DIGEST_SIZE,
-	.init		= poly1305_simd_init,
+	.init		= crypto_poly1305_init,
 	.update		= poly1305_simd_update,
 	.final		= crypto_poly1305_final,
-	.descsize	= sizeof(struct poly1305_simd_desc_ctx),
+	.descsize	= sizeof(struct poly1305_desc_ctx),
 	.base		= {
 		.cra_name		= "poly1305",
 		.cra_driver_name	= "poly1305-simd",
@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init(void)
 	if (!boot_cpu_has(X86_FEATURE_XMM2))
 		return -ENODEV;
 
-#ifdef CONFIG_AS_AVX2
-	poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+	poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
+			    boot_cpu_has(X86_FEATURE_AVX) &&
 			    boot_cpu_has(X86_FEATURE_AVX2) &&
 			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-	alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
+	alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
 	if (poly1305_use_avx2)
 		alg.descsize += 10 * sizeof(u32);
-#endif
+
 	return crypto_register_shash(&alg);
 }
 
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 067f493c2504..f3fcd9578a47 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_desc *desc)
 
 	poly1305_core_init(&dctx->h);
 	dctx->buflen = 0;
-	dctx->rset = false;
+	dctx->rset = 0;
 	dctx->sset = false;
 
 	return 0;
@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
 		srclen = datalen;
 	}
 
-	poly1305_core_blocks(&dctx->h, &dctx->r, src,
+	poly1305_core_blocks(&dctx->h, dctx->r, src,
 			     srclen / POLY1305_BLOCK_SIZE, 1);
 }
 
@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 		dctx->buf[dctx->buflen++] = 1;
 		memset(dctx->buf + dctx->buflen, 0,
 		       POLY1305_BLOCK_SIZE - dctx->buflen);
-		poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
+		poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
 	}
 
 	poly1305_core_emit(&dctx->h, digest);
diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
index cb58e61f73a7..04fa269e5534 100644
--- a/include/crypto/internal/poly1305.h
+++ b/include/crypto/internal/poly1305.h
@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 {
 	if (!dctx->sset) {
 		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
-			poly1305_core_setkey(&dctx->r, src);
+			poly1305_core_setkey(dctx->r, src);
 			src += POLY1305_BLOCK_SIZE;
 			srclen -= POLY1305_BLOCK_SIZE;
-			dctx->rset = true;
+			dctx->rset = 1;
 		}
 		if (srclen >= POLY1305_BLOCK_SIZE) {
 			dctx->s[0] = get_unaligned_le32(src +  0);
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index f5a4319c2a1f..36b5886cb50c 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -22,20 +22,20 @@ struct poly1305_state {
 };
 
 struct poly1305_desc_ctx {
-	/* key */
-	struct poly1305_key r;
-	/* finalize key */
-	u32 s[4];
-	/* accumulator */
-	struct poly1305_state h;
 	/* partial buffer */
 	u8 buf[POLY1305_BLOCK_SIZE];
 	/* bytes used in partial buffer */
 	unsigned int buflen;
-	/* r key has been set */
-	bool rset;
-	/* s key has been set */
+	/* how many keys have been set in r[] */
+	unsigned short rset;
+	/* whether s[] has been set */
 	bool sset;
+	/* finalize key */
+	u32 s[4];
+	/* accumulator */
+	struct poly1305_state h;
+	/* key */
+	struct poly1305_key r[1];
 };
 
 #endif
-- 
cgit v1.2.3


From 1b2c6a5120489d41c8ea3b8dacd0b4586289b158 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:22 +0100
Subject: crypto: x86/poly1305 - depend on generic library not generic shash

Remove the dependency on the generic Poly1305 driver. Instead, depend
on the generic library so that we only reuse code without pulling in
the generic skcipher implementation as well.

While at it, remove the logic that prefers the non-SIMD path for short
inputs - this is no longer necessary after recent FPU handling changes
on x86.

Since this removes the last remaining user of the routines exported
by the generic shash driver, unexport them and make them static.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/poly1305_glue.c    | 66 +++++++++++++++++++++++++++++++-------
 crypto/Kconfig                     |  2 +-
 crypto/poly1305_generic.c          | 11 +++----
 include/crypto/internal/poly1305.h |  9 ------
 4 files changed, 60 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index b43b93c95e79..a5b3a054604c 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -34,6 +34,24 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
 	poly1305_block_sse2(a, m, b, 1);
 }
 
+static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
+					   const u8 *src, unsigned int srclen)
+{
+	unsigned int datalen;
+
+	if (unlikely(!dctx->sset)) {
+		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+		src += srclen - datalen;
+		srclen = datalen;
+	}
+	if (srclen >= POLY1305_BLOCK_SIZE) {
+		poly1305_core_blocks(&dctx->h, dctx->r, src,
+				     srclen / POLY1305_BLOCK_SIZE, 1);
+		srclen %= POLY1305_BLOCK_SIZE;
+	}
+	return srclen;
+}
+
 static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
 					 const u8 *src, unsigned int srclen)
 {
@@ -91,12 +109,6 @@ static int poly1305_simd_update(struct shash_desc *desc,
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 	unsigned int bytes;
 
-	/* kernel_fpu_begin/end is costly, use fallback for small updates */
-	if (srclen <= 288 || !crypto_simd_usable())
-		return crypto_poly1305_update(desc, src, srclen);
-
-	kernel_fpu_begin();
-
 	if (unlikely(dctx->buflen)) {
 		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
 		memcpy(dctx->buf + dctx->buflen, src, bytes);
@@ -105,25 +117,57 @@ static int poly1305_simd_update(struct shash_desc *desc,
 		dctx->buflen += bytes;
 
 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
-			poly1305_simd_blocks(dctx, dctx->buf,
-					     POLY1305_BLOCK_SIZE);
+			if (likely(crypto_simd_usable())) {
+				kernel_fpu_begin();
+				poly1305_simd_blocks(dctx, dctx->buf,
+						     POLY1305_BLOCK_SIZE);
+				kernel_fpu_end();
+			} else {
+				poly1305_scalar_blocks(dctx, dctx->buf,
+						       POLY1305_BLOCK_SIZE);
+			}
 			dctx->buflen = 0;
 		}
 	}
 
 	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-		bytes = poly1305_simd_blocks(dctx, src, srclen);
+		if (likely(crypto_simd_usable())) {
+			kernel_fpu_begin();
+			bytes = poly1305_simd_blocks(dctx, src, srclen);
+			kernel_fpu_end();
+		} else {
+			bytes = poly1305_scalar_blocks(dctx, src, srclen);
+		}
 		src += srclen - bytes;
 		srclen = bytes;
 	}
 
-	kernel_fpu_end();
-
 	if (unlikely(srclen)) {
 		dctx->buflen = srclen;
 		memcpy(dctx->buf, src, srclen);
 	}
+}
+
+static int crypto_poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	poly1305_core_init(&dctx->h);
+	dctx->buflen = 0;
+	dctx->rset = 0;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
 
+	poly1305_final_generic(dctx, dst);
 	return 0;
 }
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 362dd6ae6aca..2c7327a5b28e 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -714,7 +714,7 @@ config CRYPTO_POLY1305
 config CRYPTO_POLY1305_X86_64
 	tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
 	depends on X86 && 64BIT
-	select CRYPTO_POLY1305
+	select CRYPTO_LIB_POLY1305_GENERIC
 	help
 	  Poly1305 authenticator algorithm, RFC7539.
 
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index afe9a9e576dd..21edbd8c99fb 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -19,7 +19,7 @@
 #include <linux/module.h>
 #include <asm/unaligned.h>
 
-int crypto_poly1305_init(struct shash_desc *desc)
+static int crypto_poly1305_init(struct shash_desc *desc)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 
@@ -30,7 +30,6 @@ int crypto_poly1305_init(struct shash_desc *desc)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 
 static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
 			    unsigned int srclen)
@@ -47,8 +46,8 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
 			     srclen / POLY1305_BLOCK_SIZE, 1);
 }
 
-int crypto_poly1305_update(struct shash_desc *desc,
-			   const u8 *src, unsigned int srclen)
+static int crypto_poly1305_update(struct shash_desc *desc,
+				  const u8 *src, unsigned int srclen)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 	unsigned int bytes;
@@ -80,9 +79,8 @@ int crypto_poly1305_update(struct shash_desc *desc,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(crypto_poly1305_update);
 
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 
@@ -92,7 +90,6 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 	poly1305_final_generic(dctx, dst);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(crypto_poly1305_final);
 
 static struct shash_alg poly1305_alg = {
 	.digestsize	= POLY1305_DIGEST_SIZE,
diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
index 04fa269e5534..479b0cab2a1a 100644
--- a/include/crypto/internal/poly1305.h
+++ b/include/crypto/internal/poly1305.h
@@ -10,8 +10,6 @@
 #include <linux/types.h>
 #include <crypto/poly1305.h>
 
-struct shash_desc;
-
 /*
  * Poly1305 core functions.  These implement the ε-almost-∆-universal hash
  * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
@@ -28,13 +26,6 @@ void poly1305_core_blocks(struct poly1305_state *state,
 			  unsigned int nblocks, u32 hibit);
 void poly1305_core_emit(const struct poly1305_state *state, void *dst);
 
-/* Crypto API helper functions for the Poly1305 MAC */
-int crypto_poly1305_init(struct shash_desc *desc);
-
-int crypto_poly1305_update(struct shash_desc *desc,
-			   const u8 *src, unsigned int srclen);
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
-
 /*
  * Poly1305 requires a unique key for each tag, which implies that we can't set
  * it on the tfm that gets accessed by multiple users simultaneously. Instead we
-- 
cgit v1.2.3


From f0e89bcfbb894e5844cd1bbf6b3cf7c63cb0f5ac Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:23 +0100
Subject: crypto: x86/poly1305 - expose existing driver as poly1305 library

Implement the arch init/update/final Poly1305 library routines in the
accelerated SIMD driver for x86 so they are accessible to users of
the Poly1305 library interface as well.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/poly1305_glue.c | 57 +++++++++++++++++++++++++++++------------
 crypto/Kconfig                  |  1 +
 lib/crypto/Kconfig              |  1 +
 3 files changed, 43 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index a5b3a054604c..370cd88068ec 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -10,6 +10,7 @@
 #include <crypto/internal/poly1305.h>
 #include <crypto/internal/simd.h>
 #include <linux/crypto.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <asm/simd.h>
@@ -21,7 +22,8 @@ asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
 asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
 				     unsigned int blocks, const u32 *u);
 
-static bool poly1305_use_avx2 __ro_after_init;
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
 
 static void poly1305_simd_mult(u32 *a, const u32 *b)
 {
@@ -64,7 +66,7 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
 	}
 
 	if (IS_ENABLED(CONFIG_AS_AVX2) &&
-	    poly1305_use_avx2 &&
+	    static_branch_likely(&poly1305_use_avx2) &&
 	    srclen >= POLY1305_BLOCK_SIZE * 4) {
 		if (unlikely(dctx->rset < 4)) {
 			if (dctx->rset < 2) {
@@ -103,10 +105,15 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
 	return srclen;
 }
 
-static int poly1305_simd_update(struct shash_desc *desc,
-				const u8 *src, unsigned int srclen)
+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
+{
+	poly1305_init_generic(desc, key);
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+			  unsigned int srclen)
 {
-	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 	unsigned int bytes;
 
 	if (unlikely(dctx->buflen)) {
@@ -117,7 +124,8 @@ static int poly1305_simd_update(struct shash_desc *desc,
 		dctx->buflen += bytes;
 
 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
-			if (likely(crypto_simd_usable())) {
+			if (static_branch_likely(&poly1305_use_simd) &&
+			    likely(crypto_simd_usable())) {
 				kernel_fpu_begin();
 				poly1305_simd_blocks(dctx, dctx->buf,
 						     POLY1305_BLOCK_SIZE);
@@ -131,7 +139,8 @@ static int poly1305_simd_update(struct shash_desc *desc,
 	}
 
 	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-		if (likely(crypto_simd_usable())) {
+		if (static_branch_likely(&poly1305_use_simd) &&
+		    likely(crypto_simd_usable())) {
 			kernel_fpu_begin();
 			bytes = poly1305_simd_blocks(dctx, src, srclen);
 			kernel_fpu_end();
@@ -147,6 +156,13 @@ static int poly1305_simd_update(struct shash_desc *desc,
 		memcpy(dctx->buf, src, srclen);
 	}
 }
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
+{
+	poly1305_final_generic(desc, digest);
+}
+EXPORT_SYMBOL(poly1305_final_arch);
 
 static int crypto_poly1305_init(struct shash_desc *desc)
 {
@@ -171,6 +187,15 @@ static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 	return 0;
 }
 
+static int poly1305_simd_update(struct shash_desc *desc,
+				const u8 *src, unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	poly1305_update_arch(dctx, src, srclen);
+	return 0;
+}
+
 static struct shash_alg alg = {
 	.digestsize	= POLY1305_DIGEST_SIZE,
 	.init		= crypto_poly1305_init,
@@ -189,15 +214,15 @@ static struct shash_alg alg = {
 static int __init poly1305_simd_mod_init(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_XMM2))
-		return -ENODEV;
-
-	poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
-			    boot_cpu_has(X86_FEATURE_AVX) &&
-			    boot_cpu_has(X86_FEATURE_AVX2) &&
-			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-	alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
-	if (poly1305_use_avx2)
-		alg.descsize += 10 * sizeof(u32);
+		return 0;
+
+	static_branch_enable(&poly1305_use_simd);
+
+	if (IS_ENABLED(CONFIG_AS_AVX2) &&
+	    boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_AVX2) &&
+	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+		static_branch_enable(&poly1305_use_avx2);
 
 	return crypto_register_shash(&alg);
 }
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 2c7327a5b28e..7aa4310713cf 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -715,6 +715,7 @@ config CRYPTO_POLY1305_X86_64
 	tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
 	depends on X86 && 64BIT
 	select CRYPTO_LIB_POLY1305_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
 	help
 	  Poly1305 authenticator algorithm, RFC7539.
 
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index a731ea36bd5c..181754615f73 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -39,6 +39,7 @@ config CRYPTO_LIB_DES
 
 config CRYPTO_LIB_POLY1305_RSIZE
 	int
+	default 4 if X86_64
 	default 1
 
 config CRYPTO_ARCH_HAVE_LIB_POLY1305
-- 
cgit v1.2.3


From f569ca16475155013525686d0f73bc379c67e635 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:24 +0100
Subject: crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON
 implementation

This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation
for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL
project. The file 'poly1305-armv8.pl' is taken straight from this upstream
GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f,
and already contains all the changes required to build it as part of a
Linux kernel module.

[0] https://github.com/dot-asm/cryptogams

Co-developed-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig                 |   6 +
 arch/arm64/crypto/Makefile                |  10 +-
 arch/arm64/crypto/poly1305-armv8.pl       | 913 ++++++++++++++++++++++++++++++
 arch/arm64/crypto/poly1305-core.S_shipped | 835 +++++++++++++++++++++++++++
 arch/arm64/crypto/poly1305-glue.c         | 237 ++++++++
 lib/crypto/Kconfig                        |   1 +
 6 files changed, 2001 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/crypto/poly1305-armv8.pl
 create mode 100644 arch/arm64/crypto/poly1305-core.S_shipped
 create mode 100644 arch/arm64/crypto/poly1305-glue.c

(limited to 'arch')

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index ffb827b84d6c..b8eb0453123d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -106,6 +106,12 @@ config CRYPTO_CHACHA20_NEON
 	select CRYPTO_LIB_CHACHA_GENERIC
 	select CRYPTO_ARCH_HAVE_LIB_CHACHA
 
+config CRYPTO_POLY1305_NEON
+	tristate "Poly1305 hash function using scalar or NEON instructions"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
 config CRYPTO_NHPOLY1305_NEON
 	tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
 	depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 0435f2a0610e..d0901e610df3 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -50,6 +50,10 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
 chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
 
+obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o
+poly1305-neon-y := poly1305-core.o poly1305-glue.o
+AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64
+
 obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
 
@@ -68,11 +72,15 @@ ifdef REGENERATE_ARM64_CRYPTO
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) void $(@)
 
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl
+	$(call cmd,perlasm)
+
 $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
 
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
+
 endif
 
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
new file mode 100644
index 000000000000..6e5576d19af8
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -0,0 +1,913 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+# This module implements Poly1305 hash for ARMv8.
+#
+# June 2015
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+#		IALU/gcc-4.9	NEON
+#
+# Apple A7	1.86/+5%	0.72
+# Cortex-A53	2.69/+58%	1.47
+# Cortex-A57	2.70/+7%	1.14
+# Denver	1.64/+50%	1.18(*)
+# X-Gene	2.13/+68%	2.27
+# Mongoose	1.77/+75%	1.12
+# Kryo		2.70/+55%	1.13
+# ThunderX2	1.17/+95%	1.36
+#
+# (*)	estimate based on resources availability is less than 1.0,
+#	i.e. measured result is worse than expected, presumably binary
+#	translator is not almighty;
+
+$flavour=shift;
+$output=shift;
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}
+
+my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
+my ($mac,$nonce)=($inp,$len);
+
+my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern	OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl	poly1305_blocks
+.globl	poly1305_emit
+
+.globl	poly1305_init
+.type	poly1305_init,%function
+.align	5
+poly1305_init:
+	cmp	$inp,xzr
+	stp	xzr,xzr,[$ctx]		// zero hash value
+	stp	xzr,xzr,[$ctx,#16]	// [along with is_base2_26]
+
+	csel	x0,xzr,x0,eq
+	b.eq	.Lno_key
+
+#ifndef	__KERNEL__
+	adrp	x17,OPENSSL_armcap_P
+	ldr	w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+	ldp	$r0,$r1,[$inp]		// load key
+	mov	$s1,#0xfffffffc0fffffff
+	movk	$s1,#0x0fff,lsl#48
+#ifdef	__AARCH64EB__
+	rev	$r0,$r0			// flip bytes
+	rev	$r1,$r1
+#endif
+	and	$r0,$r0,$s1		// &=0ffffffc0fffffff
+	and	$s1,$s1,#-4
+	and	$r1,$r1,$s1		// &=0ffffffc0ffffffc
+	mov	w#$s1,#-1
+	stp	$r0,$r1,[$ctx,#32]	// save key value
+	str	w#$s1,[$ctx,#48]	// impossible key power value
+
+#ifndef	__KERNEL__
+	tst	w17,#ARMV7_NEON
+
+	adr	$d0,.Lpoly1305_blocks
+	adr	$r0,.Lpoly1305_blocks_neon
+	adr	$d1,.Lpoly1305_emit
+
+	csel	$d0,$d0,$r0,eq
+
+# ifdef	__ILP32__
+	stp	w#$d0,w#$d1,[$len]
+# else
+	stp	$d0,$d1,[$len]
+# endif
+#endif
+	mov	x0,#1
+.Lno_key:
+	ret
+.size	poly1305_init,.-poly1305_init
+
+.type	poly1305_blocks,%function
+.align	5
+poly1305_blocks:
+.Lpoly1305_blocks:
+	ands	$len,$len,#-16
+	b.eq	.Lno_data
+
+	ldp	$h0,$h1,[$ctx]		// load hash value
+	ldp	$h2,x17,[$ctx,#16]	// [along with is_base2_26]
+	ldp	$r0,$r1,[$ctx,#32]	// load key value
+
+#ifdef	__AARCH64EB__
+	lsr	$d0,$h0,#32
+	mov	w#$d1,w#$h0
+	lsr	$d2,$h1,#32
+	mov	w15,w#$h1
+	lsr	x16,$h2,#32
+#else
+	mov	w#$d0,w#$h0
+	lsr	$d1,$h0,#32
+	mov	w#$d2,w#$h1
+	lsr	x15,$h1,#32
+	mov	w16,w#$h2
+#endif
+
+	add	$d0,$d0,$d1,lsl#26	// base 2^26 -> base 2^64
+	lsr	$d1,$d2,#12
+	adds	$d0,$d0,$d2,lsl#52
+	add	$d1,$d1,x15,lsl#14
+	adc	$d1,$d1,xzr
+	lsr	$d2,x16,#24
+	adds	$d1,$d1,x16,lsl#40
+	adc	$d2,$d2,xzr
+
+	cmp	x17,#0			// is_base2_26?
+	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
+	csel	$h0,$h0,$d0,eq		// choose between radixes
+	csel	$h1,$h1,$d1,eq
+	csel	$h2,$h2,$d2,eq
+
+.Loop:
+	ldp	$t0,$t1,[$inp],#16	// load input
+	sub	$len,$len,#16
+#ifdef	__AARCH64EB__
+	rev	$t0,$t0
+	rev	$t1,$t1
+#endif
+	adds	$h0,$h0,$t0		// accumulate input
+	adcs	$h1,$h1,$t1
+
+	mul	$d0,$h0,$r0		// h0*r0
+	adc	$h2,$h2,$padbit
+	umulh	$d1,$h0,$r0
+
+	mul	$t0,$h1,$s1		// h1*5*r1
+	umulh	$t1,$h1,$s1
+
+	adds	$d0,$d0,$t0
+	mul	$t0,$h0,$r1		// h0*r1
+	adc	$d1,$d1,$t1
+	umulh	$d2,$h0,$r1
+
+	adds	$d1,$d1,$t0
+	mul	$t0,$h1,$r0		// h1*r0
+	adc	$d2,$d2,xzr
+	umulh	$t1,$h1,$r0
+
+	adds	$d1,$d1,$t0
+	mul	$t0,$h2,$s1		// h2*5*r1
+	adc	$d2,$d2,$t1
+	mul	$t1,$h2,$r0		// h2*r0
+
+	adds	$d1,$d1,$t0
+	adc	$d2,$d2,$t1
+
+	and	$t0,$d2,#-4		// final reduction
+	and	$h2,$d2,#3
+	add	$t0,$t0,$d2,lsr#2
+	adds	$h0,$d0,$t0
+	adcs	$h1,$d1,xzr
+	adc	$h2,$h2,xzr
+
+	cbnz	$len,.Loop
+
+	stp	$h0,$h1,[$ctx]		// store hash value
+	stp	$h2,xzr,[$ctx,#16]	// [and clear is_base2_26]
+
+.Lno_data:
+	ret
+.size	poly1305_blocks,.-poly1305_blocks
+
+.type	poly1305_emit,%function
+.align	5
+poly1305_emit:
+.Lpoly1305_emit:
+	ldp	$h0,$h1,[$ctx]		// load hash base 2^64
+	ldp	$h2,$r0,[$ctx,#16]	// [along with is_base2_26]
+	ldp	$t0,$t1,[$nonce]	// load nonce
+
+#ifdef	__AARCH64EB__
+	lsr	$d0,$h0,#32
+	mov	w#$d1,w#$h0
+	lsr	$d2,$h1,#32
+	mov	w15,w#$h1
+	lsr	x16,$h2,#32
+#else
+	mov	w#$d0,w#$h0
+	lsr	$d1,$h0,#32
+	mov	w#$d2,w#$h1
+	lsr	x15,$h1,#32
+	mov	w16,w#$h2
+#endif
+
+	add	$d0,$d0,$d1,lsl#26	// base 2^26 -> base 2^64
+	lsr	$d1,$d2,#12
+	adds	$d0,$d0,$d2,lsl#52
+	add	$d1,$d1,x15,lsl#14
+	adc	$d1,$d1,xzr
+	lsr	$d2,x16,#24
+	adds	$d1,$d1,x16,lsl#40
+	adc	$d2,$d2,xzr
+
+	cmp	$r0,#0			// is_base2_26?
+	csel	$h0,$h0,$d0,eq		// choose between radixes
+	csel	$h1,$h1,$d1,eq
+	csel	$h2,$h2,$d2,eq
+
+	adds	$d0,$h0,#5		// compare to modulus
+	adcs	$d1,$h1,xzr
+	adc	$d2,$h2,xzr
+
+	tst	$d2,#-4			// see if it's carried/borrowed
+
+	csel	$h0,$h0,$d0,eq
+	csel	$h1,$h1,$d1,eq
+
+#ifdef	__AARCH64EB__
+	ror	$t0,$t0,#32		// flip nonce words
+	ror	$t1,$t1,#32
+#endif
+	adds	$h0,$h0,$t0		// accumulate nonce
+	adc	$h1,$h1,$t1
+#ifdef	__AARCH64EB__
+	rev	$h0,$h0			// flip output bytes
+	rev	$h1,$h1
+#endif
+	stp	$h0,$h1,[$mac]		// write result
+
+	ret
+.size	poly1305_emit,.-poly1305_emit
+___
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
+my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
+my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
+my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
+my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
+my ($T0,$T1,$MASK) = map("v$_",(29..31));
+
+my ($in2,$zeros)=("x16","x17");
+my $is_base2_26 = $zeros;		# borrow
+
+$code.=<<___;
+.type	poly1305_mult,%function
+.align	5
+poly1305_mult:
+	mul	$d0,$h0,$r0		// h0*r0
+	umulh	$d1,$h0,$r0
+
+	mul	$t0,$h1,$s1		// h1*5*r1
+	umulh	$t1,$h1,$s1
+
+	adds	$d0,$d0,$t0
+	mul	$t0,$h0,$r1		// h0*r1
+	adc	$d1,$d1,$t1
+	umulh	$d2,$h0,$r1
+
+	adds	$d1,$d1,$t0
+	mul	$t0,$h1,$r0		// h1*r0
+	adc	$d2,$d2,xzr
+	umulh	$t1,$h1,$r0
+
+	adds	$d1,$d1,$t0
+	mul	$t0,$h2,$s1		// h2*5*r1
+	adc	$d2,$d2,$t1
+	mul	$t1,$h2,$r0		// h2*r0
+
+	adds	$d1,$d1,$t0
+	adc	$d2,$d2,$t1
+
+	and	$t0,$d2,#-4		// final reduction
+	and	$h2,$d2,#3
+	add	$t0,$t0,$d2,lsr#2
+	adds	$h0,$d0,$t0
+	adcs	$h1,$d1,xzr
+	adc	$h2,$h2,xzr
+
+	ret
+.size	poly1305_mult,.-poly1305_mult
+
+.type	poly1305_splat,%function
+.align	4
+poly1305_splat:
+	and	x12,$h0,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x13,$h0,#26,#26
+	extr	x14,$h1,$h0,#52
+	and	x14,x14,#0x03ffffff
+	ubfx	x15,$h1,#14,#26
+	extr	x16,$h2,$h1,#40
+
+	str	w12,[$ctx,#16*0]	// r0
+	add	w12,w13,w13,lsl#2	// r1*5
+	str	w13,[$ctx,#16*1]	// r1
+	add	w13,w14,w14,lsl#2	// r2*5
+	str	w12,[$ctx,#16*2]	// s1
+	str	w14,[$ctx,#16*3]	// r2
+	add	w14,w15,w15,lsl#2	// r3*5
+	str	w13,[$ctx,#16*4]	// s2
+	str	w15,[$ctx,#16*5]	// r3
+	add	w15,w16,w16,lsl#2	// r4*5
+	str	w14,[$ctx,#16*6]	// s3
+	str	w16,[$ctx,#16*7]	// r4
+	str	w15,[$ctx,#16*8]	// s4
+
+	ret
+.size	poly1305_splat,.-poly1305_splat
+
+#ifdef	__KERNEL__
+.globl	poly1305_blocks_neon
+#endif
+.type	poly1305_blocks_neon,%function
+.align	5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+	ldr	$is_base2_26,[$ctx,#24]
+	cmp	$len,#128
+	b.lo	.Lpoly1305_blocks
+
+	.inst	0xd503233f		// paciasp
+	stp	x29,x30,[sp,#-80]!
+	add	x29,sp,#0
+
+	stp	d8,d9,[sp,#16]		// meet ABI requirements
+	stp	d10,d11,[sp,#32]
+	stp	d12,d13,[sp,#48]
+	stp	d14,d15,[sp,#64]
+
+	cbz	$is_base2_26,.Lbase2_64_neon
+
+	ldp	w10,w11,[$ctx]		// load hash value base 2^26
+	ldp	w12,w13,[$ctx,#8]
+	ldr	w14,[$ctx,#16]
+
+	tst	$len,#31
+	b.eq	.Leven_neon
+
+	ldp	$r0,$r1,[$ctx,#32]	// load key value
+
+	add	$h0,x10,x11,lsl#26	// base 2^26 -> base 2^64
+	lsr	$h1,x12,#12
+	adds	$h0,$h0,x12,lsl#52
+	add	$h1,$h1,x13,lsl#14
+	adc	$h1,$h1,xzr
+	lsr	$h2,x14,#24
+	adds	$h1,$h1,x14,lsl#40
+	adc	$d2,$h2,xzr		// can be partially reduced...
+
+	ldp	$d0,$d1,[$inp],#16	// load input
+	sub	$len,$len,#16
+	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
+
+#ifdef	__AARCH64EB__
+	rev	$d0,$d0
+	rev	$d1,$d1
+#endif
+	adds	$h0,$h0,$d0		// accumulate input
+	adcs	$h1,$h1,$d1
+	adc	$h2,$h2,$padbit
+
+	bl	poly1305_mult
+
+	and	x10,$h0,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,$h0,#26,#26
+	extr	x12,$h1,$h0,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,$h1,#14,#26
+	extr	x14,$h2,$h1,#40
+
+	b	.Leven_neon
+
+.align	4
+.Lbase2_64_neon:
+	ldp	$r0,$r1,[$ctx,#32]	// load key value
+
+	ldp	$h0,$h1,[$ctx]		// load hash value base 2^64
+	ldr	$h2,[$ctx,#16]
+
+	tst	$len,#31
+	b.eq	.Linit_neon
+
+	ldp	$d0,$d1,[$inp],#16	// load input
+	sub	$len,$len,#16
+	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
+#ifdef	__AARCH64EB__
+	rev	$d0,$d0
+	rev	$d1,$d1
+#endif
+	adds	$h0,$h0,$d0		// accumulate input
+	adcs	$h1,$h1,$d1
+	adc	$h2,$h2,$padbit
+
+	bl	poly1305_mult
+
+.Linit_neon:
+	ldr	w17,[$ctx,#48]		// first table element
+	and	x10,$h0,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,$h0,#26,#26
+	extr	x12,$h1,$h0,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,$h1,#14,#26
+	extr	x14,$h2,$h1,#40
+
+	cmp	w17,#-1			// is value impossible?
+	b.ne	.Leven_neon
+
+	fmov	${H0},x10
+	fmov	${H1},x11
+	fmov	${H2},x12
+	fmov	${H3},x13
+	fmov	${H4},x14
+
+	////////////////////////////////// initialize r^n table
+	mov	$h0,$r0			// r^1
+	add	$s1,$r1,$r1,lsr#2	// s1 = r1 + (r1 >> 2)
+	mov	$h1,$r1
+	mov	$h2,xzr
+	add	$ctx,$ctx,#48+12
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^2
+	sub	$ctx,$ctx,#4
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^3
+	sub	$ctx,$ctx,#4
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^4
+	sub	$ctx,$ctx,#4
+	bl	poly1305_splat
+	sub	$ctx,$ctx,#48		// restore original $ctx
+	b	.Ldo_neon
+
+.align	4
+.Leven_neon:
+	fmov	${H0},x10
+	fmov	${H1},x11
+	fmov	${H2},x12
+	fmov	${H3},x13
+	fmov	${H4},x14
+
+.Ldo_neon:
+	ldp	x8,x12,[$inp,#32]	// inp[2:3]
+	subs	$len,$len,#64
+	ldp	x9,x13,[$inp,#48]
+	add	$in2,$inp,#96
+	adr	$zeros,.Lzeros
+
+	lsl	$padbit,$padbit,#24
+	add	x15,$ctx,#48
+
+#ifdef	__AARCH64EB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	$IN23_0,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,$padbit,x12,lsr#40
+	add	x13,$padbit,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	$IN23_1,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	fmov	$IN23_2,x8
+	fmov	$IN23_3,x10
+	fmov	$IN23_4,x12
+
+	ldp	x8,x12,[$inp],#16	// inp[0:1]
+	ldp	x9,x13,[$inp],#48
+
+	ld1	{$R0,$R1,$S1,$R2},[x15],#64
+	ld1	{$S2,$R3,$S3,$R4},[x15],#64
+	ld1	{$S4},[x15]
+
+#ifdef	__AARCH64EB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	$IN01_0,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,$padbit,x12,lsr#40
+	add	x13,$padbit,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	$IN01_1,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	movi	$MASK.2d,#-1
+	fmov	$IN01_2,x8
+	fmov	$IN01_3,x10
+	fmov	$IN01_4,x12
+	ushr	$MASK.2d,$MASK.2d,#38
+
+	b.ls	.Lskip_loop
+
+.align	4
+.Loop_neon:
+	////////////////////////////////////////////////////////////////
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	//   \___________________/
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	//   \___________________/ \____________________/
+	//
+	// Note that we start with inp[2:3]*r^2. This is because it
+	// doesn't depend on reduction in previous iteration.
+	////////////////////////////////////////////////////////////////
+	// d4 = h0*r4 + h1*r3   + h2*r2   + h3*r1   + h4*r0
+	// d3 = h0*r3 + h1*r2   + h2*r1   + h3*r0   + h4*5*r4
+	// d2 = h0*r2 + h1*r1   + h2*r0   + h3*5*r4 + h4*5*r3
+	// d1 = h0*r1 + h1*r0   + h2*5*r4 + h3*5*r3 + h4*5*r2
+	// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+	subs	$len,$len,#64
+	umull	$ACC4,$IN23_0,${R4}[2]
+	csel	$in2,$zeros,$in2,lo
+	umull	$ACC3,$IN23_0,${R3}[2]
+	umull	$ACC2,$IN23_0,${R2}[2]
+	 ldp	x8,x12,[$in2],#16	// inp[2:3] (or zero)
+	umull	$ACC1,$IN23_0,${R1}[2]
+	 ldp	x9,x13,[$in2],#48
+	umull	$ACC0,$IN23_0,${R0}[2]
+#ifdef	__AARCH64EB__
+	 rev	x8,x8
+	 rev	x12,x12
+	 rev	x9,x9
+	 rev	x13,x13
+#endif
+
+	umlal	$ACC4,$IN23_1,${R3}[2]
+	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	$ACC3,$IN23_1,${R2}[2]
+	 and	x5,x9,#0x03ffffff
+	umlal	$ACC2,$IN23_1,${R1}[2]
+	 ubfx	x6,x8,#26,#26
+	umlal	$ACC1,$IN23_1,${R0}[2]
+	 ubfx	x7,x9,#26,#26
+	umlal	$ACC0,$IN23_1,${S4}[2]
+	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+
+	umlal	$ACC4,$IN23_2,${R2}[2]
+	 extr	x8,x12,x8,#52
+	umlal	$ACC3,$IN23_2,${R1}[2]
+	 extr	x9,x13,x9,#52
+	umlal	$ACC2,$IN23_2,${R0}[2]
+	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	$ACC1,$IN23_2,${S4}[2]
+	 fmov	$IN23_0,x4
+	umlal	$ACC0,$IN23_2,${S3}[2]
+	 and	x8,x8,#0x03ffffff
+
+	umlal	$ACC4,$IN23_3,${R1}[2]
+	 and	x9,x9,#0x03ffffff
+	umlal	$ACC3,$IN23_3,${R0}[2]
+	 ubfx	x10,x12,#14,#26
+	umlal	$ACC2,$IN23_3,${S4}[2]
+	 ubfx	x11,x13,#14,#26
+	umlal	$ACC1,$IN23_3,${S3}[2]
+	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	$ACC0,$IN23_3,${S2}[2]
+	 fmov	$IN23_1,x6
+
+	add	$IN01_2,$IN01_2,$H2
+	 add	x12,$padbit,x12,lsr#40
+	umlal	$ACC4,$IN23_4,${R0}[2]
+	 add	x13,$padbit,x13,lsr#40
+	umlal	$ACC3,$IN23_4,${S4}[2]
+	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	$ACC2,$IN23_4,${S3}[2]
+	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	$ACC1,$IN23_4,${S2}[2]
+	 fmov	$IN23_2,x8
+	umlal	$ACC0,$IN23_4,${S1}[2]
+	 fmov	$IN23_3,x10
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4 and accumulate
+
+	add	$IN01_0,$IN01_0,$H0
+	 fmov	$IN23_4,x12
+	umlal	$ACC3,$IN01_2,${R1}[0]
+	 ldp	x8,x12,[$inp],#16	// inp[0:1]
+	umlal	$ACC0,$IN01_2,${S3}[0]
+	 ldp	x9,x13,[$inp],#48
+	umlal	$ACC4,$IN01_2,${R2}[0]
+	umlal	$ACC1,$IN01_2,${S4}[0]
+	umlal	$ACC2,$IN01_2,${R0}[0]
+#ifdef	__AARCH64EB__
+	 rev	x8,x8
+	 rev	x12,x12
+	 rev	x9,x9
+	 rev	x13,x13
+#endif
+
+	add	$IN01_1,$IN01_1,$H1
+	umlal	$ACC3,$IN01_0,${R3}[0]
+	umlal	$ACC4,$IN01_0,${R4}[0]
+	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	$ACC2,$IN01_0,${R2}[0]
+	 and	x5,x9,#0x03ffffff
+	umlal	$ACC0,$IN01_0,${R0}[0]
+	 ubfx	x6,x8,#26,#26
+	umlal	$ACC1,$IN01_0,${R1}[0]
+	 ubfx	x7,x9,#26,#26
+
+	add	$IN01_3,$IN01_3,$H3
+	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	umlal	$ACC3,$IN01_1,${R2}[0]
+	 extr	x8,x12,x8,#52
+	umlal	$ACC4,$IN01_1,${R3}[0]
+	 extr	x9,x13,x9,#52
+	umlal	$ACC0,$IN01_1,${S4}[0]
+	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	$ACC2,$IN01_1,${R1}[0]
+	 fmov	$IN01_0,x4
+	umlal	$ACC1,$IN01_1,${R0}[0]
+	 and	x8,x8,#0x03ffffff
+
+	add	$IN01_4,$IN01_4,$H4
+	 and	x9,x9,#0x03ffffff
+	umlal	$ACC3,$IN01_3,${R0}[0]
+	 ubfx	x10,x12,#14,#26
+	umlal	$ACC0,$IN01_3,${S2}[0]
+	 ubfx	x11,x13,#14,#26
+	umlal	$ACC4,$IN01_3,${R1}[0]
+	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	$ACC1,$IN01_3,${S3}[0]
+	 fmov	$IN01_1,x6
+	umlal	$ACC2,$IN01_3,${S4}[0]
+	 add	x12,$padbit,x12,lsr#40
+
+	umlal	$ACC3,$IN01_4,${S4}[0]
+	 add	x13,$padbit,x13,lsr#40
+	umlal	$ACC0,$IN01_4,${S1}[0]
+	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	$ACC4,$IN01_4,${R0}[0]
+	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	$ACC1,$IN01_4,${S2}[0]
+	 fmov	$IN01_2,x8
+	umlal	$ACC2,$IN01_4,${S3}[0]
+	 fmov	$IN01_3,x10
+	 fmov	$IN01_4,x12
+
+	/////////////////////////////////////////////////////////////////
+	// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	// and P. Schwabe
+	//
+	// [see discussion in poly1305-armv4 module]
+
+	ushr	$T0.2d,$ACC3,#26
+	xtn	$H3,$ACC3
+	 ushr	$T1.2d,$ACC0,#26
+	 and	$ACC0,$ACC0,$MASK.2d
+	add	$ACC4,$ACC4,$T0.2d	// h3 -> h4
+	bic	$H3,#0xfc,lsl#24	// &=0x03ffffff
+	 add	$ACC1,$ACC1,$T1.2d	// h0 -> h1
+
+	ushr	$T0.2d,$ACC4,#26
+	xtn	$H4,$ACC4
+	 ushr	$T1.2d,$ACC1,#26
+	 xtn	$H1,$ACC1
+	bic	$H4,#0xfc,lsl#24
+	 add	$ACC2,$ACC2,$T1.2d	// h1 -> h2
+
+	add	$ACC0,$ACC0,$T0.2d
+	shl	$T0.2d,$T0.2d,#2
+	 shrn	$T1.2s,$ACC2,#26
+	 xtn	$H2,$ACC2
+	add	$ACC0,$ACC0,$T0.2d	// h4 -> h0
+	 bic	$H1,#0xfc,lsl#24
+	 add	$H3,$H3,$T1.2s		// h2 -> h3
+	 bic	$H2,#0xfc,lsl#24
+
+	shrn	$T0.2s,$ACC0,#26
+	xtn	$H0,$ACC0
+	 ushr	$T1.2s,$H3,#26
+	 bic	$H3,#0xfc,lsl#24
+	 bic	$H0,#0xfc,lsl#24
+	add	$H1,$H1,$T0.2s		// h0 -> h1
+	 add	$H4,$H4,$T1.2s		// h3 -> h4
+
+	b.hi	.Loop_neon
+
+.Lskip_loop:
+	dup	$IN23_2,${IN23_2}[0]
+	add	$IN01_2,$IN01_2,$H2
+
+	////////////////////////////////////////////////////////////////
+	// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	adds	$len,$len,#32
+	b.ne	.Long_tail
+
+	dup	$IN23_2,${IN01_2}[0]
+	add	$IN23_0,$IN01_0,$H0
+	add	$IN23_3,$IN01_3,$H3
+	add	$IN23_1,$IN01_1,$H1
+	add	$IN23_4,$IN01_4,$H4
+
+.Long_tail:
+	dup	$IN23_0,${IN23_0}[0]
+	umull2	$ACC0,$IN23_2,${S3}
+	umull2	$ACC3,$IN23_2,${R1}
+	umull2	$ACC4,$IN23_2,${R2}
+	umull2	$ACC2,$IN23_2,${R0}
+	umull2	$ACC1,$IN23_2,${S4}
+
+	dup	$IN23_1,${IN23_1}[0]
+	umlal2	$ACC0,$IN23_0,${R0}
+	umlal2	$ACC2,$IN23_0,${R2}
+	umlal2	$ACC3,$IN23_0,${R3}
+	umlal2	$ACC4,$IN23_0,${R4}
+	umlal2	$ACC1,$IN23_0,${R1}
+
+	dup	$IN23_3,${IN23_3}[0]
+	umlal2	$ACC0,$IN23_1,${S4}
+	umlal2	$ACC3,$IN23_1,${R2}
+	umlal2	$ACC2,$IN23_1,${R1}
+	umlal2	$ACC4,$IN23_1,${R3}
+	umlal2	$ACC1,$IN23_1,${R0}
+
+	dup	$IN23_4,${IN23_4}[0]
+	umlal2	$ACC3,$IN23_3,${R0}
+	umlal2	$ACC4,$IN23_3,${R1}
+	umlal2	$ACC0,$IN23_3,${S2}
+	umlal2	$ACC1,$IN23_3,${S3}
+	umlal2	$ACC2,$IN23_3,${S4}
+
+	umlal2	$ACC3,$IN23_4,${S4}
+	umlal2	$ACC0,$IN23_4,${S1}
+	umlal2	$ACC4,$IN23_4,${R0}
+	umlal2	$ACC1,$IN23_4,${S2}
+	umlal2	$ACC2,$IN23_4,${S3}
+
+	b.eq	.Lshort_tail
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	add	$IN01_0,$IN01_0,$H0
+	umlal	$ACC3,$IN01_2,${R1}
+	umlal	$ACC0,$IN01_2,${S3}
+	umlal	$ACC4,$IN01_2,${R2}
+	umlal	$ACC1,$IN01_2,${S4}
+	umlal	$ACC2,$IN01_2,${R0}
+
+	add	$IN01_1,$IN01_1,$H1
+	umlal	$ACC3,$IN01_0,${R3}
+	umlal	$ACC0,$IN01_0,${R0}
+	umlal	$ACC4,$IN01_0,${R4}
+	umlal	$ACC1,$IN01_0,${R1}
+	umlal	$ACC2,$IN01_0,${R2}
+
+	add	$IN01_3,$IN01_3,$H3
+	umlal	$ACC3,$IN01_1,${R2}
+	umlal	$ACC0,$IN01_1,${S4}
+	umlal	$ACC4,$IN01_1,${R3}
+	umlal	$ACC1,$IN01_1,${R0}
+	umlal	$ACC2,$IN01_1,${R1}
+
+	add	$IN01_4,$IN01_4,$H4
+	umlal	$ACC3,$IN01_3,${R0}
+	umlal	$ACC0,$IN01_3,${S2}
+	umlal	$ACC4,$IN01_3,${R1}
+	umlal	$ACC1,$IN01_3,${S3}
+	umlal	$ACC2,$IN01_3,${S4}
+
+	umlal	$ACC3,$IN01_4,${S4}
+	umlal	$ACC0,$IN01_4,${S1}
+	umlal	$ACC4,$IN01_4,${R0}
+	umlal	$ACC1,$IN01_4,${S2}
+	umlal	$ACC2,$IN01_4,${S3}
+
+.Lshort_tail:
+	////////////////////////////////////////////////////////////////
+	// horizontal add
+
+	addp	$ACC3,$ACC3,$ACC3
+	 ldp	d8,d9,[sp,#16]		// meet ABI requirements
+	addp	$ACC0,$ACC0,$ACC0
+	 ldp	d10,d11,[sp,#32]
+	addp	$ACC4,$ACC4,$ACC4
+	 ldp	d12,d13,[sp,#48]
+	addp	$ACC1,$ACC1,$ACC1
+	 ldp	d14,d15,[sp,#64]
+	addp	$ACC2,$ACC2,$ACC2
+	 ldr	x30,[sp,#8]
+	 .inst	0xd50323bf		// autiasp
+
+	////////////////////////////////////////////////////////////////
+	// lazy reduction, but without narrowing
+
+	ushr	$T0.2d,$ACC3,#26
+	and	$ACC3,$ACC3,$MASK.2d
+	 ushr	$T1.2d,$ACC0,#26
+	 and	$ACC0,$ACC0,$MASK.2d
+
+	add	$ACC4,$ACC4,$T0.2d	// h3 -> h4
+	 add	$ACC1,$ACC1,$T1.2d	// h0 -> h1
+
+	ushr	$T0.2d,$ACC4,#26
+	and	$ACC4,$ACC4,$MASK.2d
+	 ushr	$T1.2d,$ACC1,#26
+	 and	$ACC1,$ACC1,$MASK.2d
+	 add	$ACC2,$ACC2,$T1.2d	// h1 -> h2
+
+	add	$ACC0,$ACC0,$T0.2d
+	shl	$T0.2d,$T0.2d,#2
+	 ushr	$T1.2d,$ACC2,#26
+	 and	$ACC2,$ACC2,$MASK.2d
+	add	$ACC0,$ACC0,$T0.2d	// h4 -> h0
+	 add	$ACC3,$ACC3,$T1.2d	// h2 -> h3
+
+	ushr	$T0.2d,$ACC0,#26
+	and	$ACC0,$ACC0,$MASK.2d
+	 ushr	$T1.2d,$ACC3,#26
+	 and	$ACC3,$ACC3,$MASK.2d
+	add	$ACC1,$ACC1,$T0.2d	// h0 -> h1
+	 add	$ACC4,$ACC4,$T1.2d	// h3 -> h4
+
+	////////////////////////////////////////////////////////////////
+	// write the result, can be partially reduced
+
+	st4	{$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
+	mov	x4,#1
+	st1	{$ACC4}[0],[$ctx]
+	str	x4,[$ctx,#8]		// set is_base2_26
+
+	ldr	x29,[sp],#80
+	ret
+.size	poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0
+.asciz	"Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
+.align	2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm	OPENSSL_armcap_P,4,4
+.hidden	OPENSSL_armcap_P
+#endif
+___
+
+foreach (split("\n",$code)) {
+	s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/			or
+	s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/	or
+	(m/\bdup\b/ and (s/\.[24]s/.2d/g or 1))			or
+	(m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1))	or
+	(m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1))		or
+	(m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1))		or
+	(m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
+
+	s/\.[124]([sd])\[/.$1\[/;
+	s/w#x([0-9]+)/w$1/g;
+
+	print $_,"\n";
+}
+close STDOUT;
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..8d1c4e420ccd
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-core.S_shipped
@@ -0,0 +1,835 @@
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern	OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl	poly1305_blocks
+.globl	poly1305_emit
+
+.globl	poly1305_init
+.type	poly1305_init,%function
+.align	5
+poly1305_init:
+	cmp	x1,xzr
+	stp	xzr,xzr,[x0]		// zero hash value
+	stp	xzr,xzr,[x0,#16]	// [along with is_base2_26]
+
+	csel	x0,xzr,x0,eq
+	b.eq	.Lno_key
+
+#ifndef	__KERNEL__
+	adrp	x17,OPENSSL_armcap_P
+	ldr	w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+	ldp	x7,x8,[x1]		// load key
+	mov	x9,#0xfffffffc0fffffff
+	movk	x9,#0x0fff,lsl#48
+#ifdef	__AARCH64EB__
+	rev	x7,x7			// flip bytes
+	rev	x8,x8
+#endif
+	and	x7,x7,x9		// &=0ffffffc0fffffff
+	and	x9,x9,#-4
+	and	x8,x8,x9		// &=0ffffffc0ffffffc
+	mov	w9,#-1
+	stp	x7,x8,[x0,#32]	// save key value
+	str	w9,[x0,#48]	// impossible key power value
+
+#ifndef	__KERNEL__
+	tst	w17,#ARMV7_NEON
+
+	adr	x12,.Lpoly1305_blocks
+	adr	x7,.Lpoly1305_blocks_neon
+	adr	x13,.Lpoly1305_emit
+
+	csel	x12,x12,x7,eq
+
+# ifdef	__ILP32__
+	stp	w12,w13,[x2]
+# else
+	stp	x12,x13,[x2]
+# endif
+#endif
+	mov	x0,#1
+.Lno_key:
+	ret
+.size	poly1305_init,.-poly1305_init
+
+.type	poly1305_blocks,%function
+.align	5
+poly1305_blocks:
+.Lpoly1305_blocks:
+	ands	x2,x2,#-16
+	b.eq	.Lno_data
+
+	ldp	x4,x5,[x0]		// load hash value
+	ldp	x6,x17,[x0,#16]	// [along with is_base2_26]
+	ldp	x7,x8,[x0,#32]	// load key value
+
+#ifdef	__AARCH64EB__
+	lsr	x12,x4,#32
+	mov	w13,w4
+	lsr	x14,x5,#32
+	mov	w15,w5
+	lsr	x16,x6,#32
+#else
+	mov	w12,w4
+	lsr	x13,x4,#32
+	mov	w14,w5
+	lsr	x15,x5,#32
+	mov	w16,w6
+#endif
+
+	add	x12,x12,x13,lsl#26	// base 2^26 -> base 2^64
+	lsr	x13,x14,#12
+	adds	x12,x12,x14,lsl#52
+	add	x13,x13,x15,lsl#14
+	adc	x13,x13,xzr
+	lsr	x14,x16,#24
+	adds	x13,x13,x16,lsl#40
+	adc	x14,x14,xzr
+
+	cmp	x17,#0			// is_base2_26?
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+	csel	x4,x4,x12,eq		// choose between radixes
+	csel	x5,x5,x13,eq
+	csel	x6,x6,x14,eq
+
+.Loop:
+	ldp	x10,x11,[x1],#16	// load input
+	sub	x2,x2,#16
+#ifdef	__AARCH64EB__
+	rev	x10,x10
+	rev	x11,x11
+#endif
+	adds	x4,x4,x10		// accumulate input
+	adcs	x5,x5,x11
+
+	mul	x12,x4,x7		// h0*r0
+	adc	x6,x6,x3
+	umulh	x13,x4,x7
+
+	mul	x10,x5,x9		// h1*5*r1
+	umulh	x11,x5,x9
+
+	adds	x12,x12,x10
+	mul	x10,x4,x8		// h0*r1
+	adc	x13,x13,x11
+	umulh	x14,x4,x8
+
+	adds	x13,x13,x10
+	mul	x10,x5,x7		// h1*r0
+	adc	x14,x14,xzr
+	umulh	x11,x5,x7
+
+	adds	x13,x13,x10
+	mul	x10,x6,x9		// h2*5*r1
+	adc	x14,x14,x11
+	mul	x11,x6,x7		// h2*r0
+
+	adds	x13,x13,x10
+	adc	x14,x14,x11
+
+	and	x10,x14,#-4		// final reduction
+	and	x6,x14,#3
+	add	x10,x10,x14,lsr#2
+	adds	x4,x12,x10
+	adcs	x5,x13,xzr
+	adc	x6,x6,xzr
+
+	cbnz	x2,.Loop
+
+	stp	x4,x5,[x0]		// store hash value
+	stp	x6,xzr,[x0,#16]	// [and clear is_base2_26]
+
+.Lno_data:
+	ret
+.size	poly1305_blocks,.-poly1305_blocks
+
+.type	poly1305_emit,%function
+.align	5
+poly1305_emit:
+.Lpoly1305_emit:
+	ldp	x4,x5,[x0]		// load hash base 2^64
+	ldp	x6,x7,[x0,#16]	// [along with is_base2_26]
+	ldp	x10,x11,[x2]	// load nonce
+
+#ifdef	__AARCH64EB__
+	lsr	x12,x4,#32
+	mov	w13,w4
+	lsr	x14,x5,#32
+	mov	w15,w5
+	lsr	x16,x6,#32
+#else
+	mov	w12,w4
+	lsr	x13,x4,#32
+	mov	w14,w5
+	lsr	x15,x5,#32
+	mov	w16,w6
+#endif
+
+	add	x12,x12,x13,lsl#26	// base 2^26 -> base 2^64
+	lsr	x13,x14,#12
+	adds	x12,x12,x14,lsl#52
+	add	x13,x13,x15,lsl#14
+	adc	x13,x13,xzr
+	lsr	x14,x16,#24
+	adds	x13,x13,x16,lsl#40
+	adc	x14,x14,xzr
+
+	cmp	x7,#0			// is_base2_26?
+	csel	x4,x4,x12,eq		// choose between radixes
+	csel	x5,x5,x13,eq
+	csel	x6,x6,x14,eq
+
+	adds	x12,x4,#5		// compare to modulus
+	adcs	x13,x5,xzr
+	adc	x14,x6,xzr
+
+	tst	x14,#-4			// see if it's carried/borrowed
+
+	csel	x4,x4,x12,eq
+	csel	x5,x5,x13,eq
+
+#ifdef	__AARCH64EB__
+	ror	x10,x10,#32		// flip nonce words
+	ror	x11,x11,#32
+#endif
+	adds	x4,x4,x10		// accumulate nonce
+	adc	x5,x5,x11
+#ifdef	__AARCH64EB__
+	rev	x4,x4			// flip output bytes
+	rev	x5,x5
+#endif
+	stp	x4,x5,[x1]		// write result
+
+	ret
+.size	poly1305_emit,.-poly1305_emit
+.type	poly1305_mult,%function
+.align	5
+poly1305_mult:
+	mul	x12,x4,x7		// h0*r0
+	umulh	x13,x4,x7
+
+	mul	x10,x5,x9		// h1*5*r1
+	umulh	x11,x5,x9
+
+	adds	x12,x12,x10
+	mul	x10,x4,x8		// h0*r1
+	adc	x13,x13,x11
+	umulh	x14,x4,x8
+
+	adds	x13,x13,x10
+	mul	x10,x5,x7		// h1*r0
+	adc	x14,x14,xzr
+	umulh	x11,x5,x7
+
+	adds	x13,x13,x10
+	mul	x10,x6,x9		// h2*5*r1
+	adc	x14,x14,x11
+	mul	x11,x6,x7		// h2*r0
+
+	adds	x13,x13,x10
+	adc	x14,x14,x11
+
+	and	x10,x14,#-4		// final reduction
+	and	x6,x14,#3
+	add	x10,x10,x14,lsr#2
+	adds	x4,x12,x10
+	adcs	x5,x13,xzr
+	adc	x6,x6,xzr
+
+	ret
+.size	poly1305_mult,.-poly1305_mult
+
+.type	poly1305_splat,%function
+.align	4
+poly1305_splat:
+	and	x12,x4,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x13,x4,#26,#26
+	extr	x14,x5,x4,#52
+	and	x14,x14,#0x03ffffff
+	ubfx	x15,x5,#14,#26
+	extr	x16,x6,x5,#40
+
+	str	w12,[x0,#16*0]	// r0
+	add	w12,w13,w13,lsl#2	// r1*5
+	str	w13,[x0,#16*1]	// r1
+	add	w13,w14,w14,lsl#2	// r2*5
+	str	w12,[x0,#16*2]	// s1
+	str	w14,[x0,#16*3]	// r2
+	add	w14,w15,w15,lsl#2	// r3*5
+	str	w13,[x0,#16*4]	// s2
+	str	w15,[x0,#16*5]	// r3
+	add	w15,w16,w16,lsl#2	// r4*5
+	str	w14,[x0,#16*6]	// s3
+	str	w16,[x0,#16*7]	// r4
+	str	w15,[x0,#16*8]	// s4
+
+	ret
+.size	poly1305_splat,.-poly1305_splat
+
+#ifdef	__KERNEL__
+.globl	poly1305_blocks_neon
+#endif
+.type	poly1305_blocks_neon,%function
+.align	5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+	ldr	x17,[x0,#24]
+	cmp	x2,#128
+	b.lo	.Lpoly1305_blocks
+
+	.inst	0xd503233f		// paciasp
+	stp	x29,x30,[sp,#-80]!
+	add	x29,sp,#0
+
+	stp	d8,d9,[sp,#16]		// meet ABI requirements
+	stp	d10,d11,[sp,#32]
+	stp	d12,d13,[sp,#48]
+	stp	d14,d15,[sp,#64]
+
+	cbz	x17,.Lbase2_64_neon
+
+	ldp	w10,w11,[x0]		// load hash value base 2^26
+	ldp	w12,w13,[x0,#8]
+	ldr	w14,[x0,#16]
+
+	tst	x2,#31
+	b.eq	.Leven_neon
+
+	ldp	x7,x8,[x0,#32]	// load key value
+
+	add	x4,x10,x11,lsl#26	// base 2^26 -> base 2^64
+	lsr	x5,x12,#12
+	adds	x4,x4,x12,lsl#52
+	add	x5,x5,x13,lsl#14
+	adc	x5,x5,xzr
+	lsr	x6,x14,#24
+	adds	x5,x5,x14,lsl#40
+	adc	x14,x6,xzr		// can be partially reduced...
+
+	ldp	x12,x13,[x1],#16	// load input
+	sub	x2,x2,#16
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+
+#ifdef	__AARCH64EB__
+	rev	x12,x12
+	rev	x13,x13
+#endif
+	adds	x4,x4,x12		// accumulate input
+	adcs	x5,x5,x13
+	adc	x6,x6,x3
+
+	bl	poly1305_mult
+
+	and	x10,x4,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,x4,#26,#26
+	extr	x12,x5,x4,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,x5,#14,#26
+	extr	x14,x6,x5,#40
+
+	b	.Leven_neon
+
+.align	4
+.Lbase2_64_neon:
+	ldp	x7,x8,[x0,#32]	// load key value
+
+	ldp	x4,x5,[x0]		// load hash value base 2^64
+	ldr	x6,[x0,#16]
+
+	tst	x2,#31
+	b.eq	.Linit_neon
+
+	ldp	x12,x13,[x1],#16	// load input
+	sub	x2,x2,#16
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+#ifdef	__AARCH64EB__
+	rev	x12,x12
+	rev	x13,x13
+#endif
+	adds	x4,x4,x12		// accumulate input
+	adcs	x5,x5,x13
+	adc	x6,x6,x3
+
+	bl	poly1305_mult
+
+.Linit_neon:
+	ldr	w17,[x0,#48]		// first table element
+	and	x10,x4,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,x4,#26,#26
+	extr	x12,x5,x4,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,x5,#14,#26
+	extr	x14,x6,x5,#40
+
+	cmp	w17,#-1			// is value impossible?
+	b.ne	.Leven_neon
+
+	fmov	d24,x10
+	fmov	d25,x11
+	fmov	d26,x12
+	fmov	d27,x13
+	fmov	d28,x14
+
+	////////////////////////////////// initialize r^n table
+	mov	x4,x7			// r^1
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+	mov	x5,x8
+	mov	x6,xzr
+	add	x0,x0,#48+12
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^2
+	sub	x0,x0,#4
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^3
+	sub	x0,x0,#4
+	bl	poly1305_splat
+
+	bl	poly1305_mult		// r^4
+	sub	x0,x0,#4
+	bl	poly1305_splat
+	sub	x0,x0,#48		// restore original x0
+	b	.Ldo_neon
+
+.align	4
+.Leven_neon:
+	fmov	d24,x10
+	fmov	d25,x11
+	fmov	d26,x12
+	fmov	d27,x13
+	fmov	d28,x14
+
+.Ldo_neon:
+	ldp	x8,x12,[x1,#32]	// inp[2:3]
+	subs	x2,x2,#64
+	ldp	x9,x13,[x1,#48]
+	add	x16,x1,#96
+	adr	x17,.Lzeros
+
+	lsl	x3,x3,#24
+	add	x15,x0,#48
+
+#ifdef	__AARCH64EB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	d14,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,x3,x12,lsr#40
+	add	x13,x3,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	d15,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	fmov	d16,x8
+	fmov	d17,x10
+	fmov	d18,x12
+
+	ldp	x8,x12,[x1],#16	// inp[0:1]
+	ldp	x9,x13,[x1],#48
+
+	ld1	{v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
+	ld1	{v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
+	ld1	{v8.4s},[x15]
+
+#ifdef	__AARCH64EB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	d9,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,x3,x12,lsr#40
+	add	x13,x3,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	d10,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	movi	v31.2d,#-1
+	fmov	d11,x8
+	fmov	d12,x10
+	fmov	d13,x12
+	ushr	v31.2d,v31.2d,#38
+
+	b.ls	.Lskip_loop
+
+.align	4
+.Loop_neon:
+	////////////////////////////////////////////////////////////////
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	//   ___________________/
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	//   ___________________/ ____________________/
+	//
+	// Note that we start with inp[2:3]*r^2. This is because it
+	// doesn't depend on reduction in previous iteration.
+	////////////////////////////////////////////////////////////////
+	// d4 = h0*r4 + h1*r3   + h2*r2   + h3*r1   + h4*r0
+	// d3 = h0*r3 + h1*r2   + h2*r1   + h3*r0   + h4*5*r4
+	// d2 = h0*r2 + h1*r1   + h2*r0   + h3*5*r4 + h4*5*r3
+	// d1 = h0*r1 + h1*r0   + h2*5*r4 + h3*5*r3 + h4*5*r2
+	// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+	subs	x2,x2,#64
+	umull	v23.2d,v14.2s,v7.s[2]
+	csel	x16,x17,x16,lo
+	umull	v22.2d,v14.2s,v5.s[2]
+	umull	v21.2d,v14.2s,v3.s[2]
+	 ldp	x8,x12,[x16],#16	// inp[2:3] (or zero)
+	umull	v20.2d,v14.2s,v1.s[2]
+	 ldp	x9,x13,[x16],#48
+	umull	v19.2d,v14.2s,v0.s[2]
+#ifdef	__AARCH64EB__
+	 rev	x8,x8
+	 rev	x12,x12
+	 rev	x9,x9
+	 rev	x13,x13
+#endif
+
+	umlal	v23.2d,v15.2s,v5.s[2]
+	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	v22.2d,v15.2s,v3.s[2]
+	 and	x5,x9,#0x03ffffff
+	umlal	v21.2d,v15.2s,v1.s[2]
+	 ubfx	x6,x8,#26,#26
+	umlal	v20.2d,v15.2s,v0.s[2]
+	 ubfx	x7,x9,#26,#26
+	umlal	v19.2d,v15.2s,v8.s[2]
+	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+
+	umlal	v23.2d,v16.2s,v3.s[2]
+	 extr	x8,x12,x8,#52
+	umlal	v22.2d,v16.2s,v1.s[2]
+	 extr	x9,x13,x9,#52
+	umlal	v21.2d,v16.2s,v0.s[2]
+	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	v20.2d,v16.2s,v8.s[2]
+	 fmov	d14,x4
+	umlal	v19.2d,v16.2s,v6.s[2]
+	 and	x8,x8,#0x03ffffff
+
+	umlal	v23.2d,v17.2s,v1.s[2]
+	 and	x9,x9,#0x03ffffff
+	umlal	v22.2d,v17.2s,v0.s[2]
+	 ubfx	x10,x12,#14,#26
+	umlal	v21.2d,v17.2s,v8.s[2]
+	 ubfx	x11,x13,#14,#26
+	umlal	v20.2d,v17.2s,v6.s[2]
+	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	v19.2d,v17.2s,v4.s[2]
+	 fmov	d15,x6
+
+	add	v11.2s,v11.2s,v26.2s
+	 add	x12,x3,x12,lsr#40
+	umlal	v23.2d,v18.2s,v0.s[2]
+	 add	x13,x3,x13,lsr#40
+	umlal	v22.2d,v18.2s,v8.s[2]
+	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	v21.2d,v18.2s,v6.s[2]
+	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	v20.2d,v18.2s,v4.s[2]
+	 fmov	d16,x8
+	umlal	v19.2d,v18.2s,v2.s[2]
+	 fmov	d17,x10
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4 and accumulate
+
+	add	v9.2s,v9.2s,v24.2s
+	 fmov	d18,x12
+	umlal	v22.2d,v11.2s,v1.s[0]
+	 ldp	x8,x12,[x1],#16	// inp[0:1]
+	umlal	v19.2d,v11.2s,v6.s[0]
+	 ldp	x9,x13,[x1],#48
+	umlal	v23.2d,v11.2s,v3.s[0]
+	umlal	v20.2d,v11.2s,v8.s[0]
+	umlal	v21.2d,v11.2s,v0.s[0]
+#ifdef	__AARCH64EB__
+	 rev	x8,x8
+	 rev	x12,x12
+	 rev	x9,x9
+	 rev	x13,x13
+#endif
+
+	add	v10.2s,v10.2s,v25.2s
+	umlal	v22.2d,v9.2s,v5.s[0]
+	umlal	v23.2d,v9.2s,v7.s[0]
+	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	v21.2d,v9.2s,v3.s[0]
+	 and	x5,x9,#0x03ffffff
+	umlal	v19.2d,v9.2s,v0.s[0]
+	 ubfx	x6,x8,#26,#26
+	umlal	v20.2d,v9.2s,v1.s[0]
+	 ubfx	x7,x9,#26,#26
+
+	add	v12.2s,v12.2s,v27.2s
+	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	umlal	v22.2d,v10.2s,v3.s[0]
+	 extr	x8,x12,x8,#52
+	umlal	v23.2d,v10.2s,v5.s[0]
+	 extr	x9,x13,x9,#52
+	umlal	v19.2d,v10.2s,v8.s[0]
+	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	v21.2d,v10.2s,v1.s[0]
+	 fmov	d9,x4
+	umlal	v20.2d,v10.2s,v0.s[0]
+	 and	x8,x8,#0x03ffffff
+
+	add	v13.2s,v13.2s,v28.2s
+	 and	x9,x9,#0x03ffffff
+	umlal	v22.2d,v12.2s,v0.s[0]
+	 ubfx	x10,x12,#14,#26
+	umlal	v19.2d,v12.2s,v4.s[0]
+	 ubfx	x11,x13,#14,#26
+	umlal	v23.2d,v12.2s,v1.s[0]
+	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	v20.2d,v12.2s,v6.s[0]
+	 fmov	d10,x6
+	umlal	v21.2d,v12.2s,v8.s[0]
+	 add	x12,x3,x12,lsr#40
+
+	umlal	v22.2d,v13.2s,v8.s[0]
+	 add	x13,x3,x13,lsr#40
+	umlal	v19.2d,v13.2s,v2.s[0]
+	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	v23.2d,v13.2s,v0.s[0]
+	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	v20.2d,v13.2s,v4.s[0]
+	 fmov	d11,x8
+	umlal	v21.2d,v13.2s,v6.s[0]
+	 fmov	d12,x10
+	 fmov	d13,x12
+
+	/////////////////////////////////////////////////////////////////
+	// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	// and P. Schwabe
+	//
+	// [see discussion in poly1305-armv4 module]
+
+	ushr	v29.2d,v22.2d,#26
+	xtn	v27.2s,v22.2d
+	 ushr	v30.2d,v19.2d,#26
+	 and	v19.16b,v19.16b,v31.16b
+	add	v23.2d,v23.2d,v29.2d	// h3 -> h4
+	bic	v27.2s,#0xfc,lsl#24	// &=0x03ffffff
+	 add	v20.2d,v20.2d,v30.2d	// h0 -> h1
+
+	ushr	v29.2d,v23.2d,#26
+	xtn	v28.2s,v23.2d
+	 ushr	v30.2d,v20.2d,#26
+	 xtn	v25.2s,v20.2d
+	bic	v28.2s,#0xfc,lsl#24
+	 add	v21.2d,v21.2d,v30.2d	// h1 -> h2
+
+	add	v19.2d,v19.2d,v29.2d
+	shl	v29.2d,v29.2d,#2
+	 shrn	v30.2s,v21.2d,#26
+	 xtn	v26.2s,v21.2d
+	add	v19.2d,v19.2d,v29.2d	// h4 -> h0
+	 bic	v25.2s,#0xfc,lsl#24
+	 add	v27.2s,v27.2s,v30.2s		// h2 -> h3
+	 bic	v26.2s,#0xfc,lsl#24
+
+	shrn	v29.2s,v19.2d,#26
+	xtn	v24.2s,v19.2d
+	 ushr	v30.2s,v27.2s,#26
+	 bic	v27.2s,#0xfc,lsl#24
+	 bic	v24.2s,#0xfc,lsl#24
+	add	v25.2s,v25.2s,v29.2s		// h0 -> h1
+	 add	v28.2s,v28.2s,v30.2s		// h3 -> h4
+
+	b.hi	.Loop_neon
+
+.Lskip_loop:
+	dup	v16.2d,v16.d[0]
+	add	v11.2s,v11.2s,v26.2s
+
+	////////////////////////////////////////////////////////////////
+	// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	adds	x2,x2,#32
+	b.ne	.Long_tail
+
+	dup	v16.2d,v11.d[0]
+	add	v14.2s,v9.2s,v24.2s
+	add	v17.2s,v12.2s,v27.2s
+	add	v15.2s,v10.2s,v25.2s
+	add	v18.2s,v13.2s,v28.2s
+
+.Long_tail:
+	dup	v14.2d,v14.d[0]
+	umull2	v19.2d,v16.4s,v6.4s
+	umull2	v22.2d,v16.4s,v1.4s
+	umull2	v23.2d,v16.4s,v3.4s
+	umull2	v21.2d,v16.4s,v0.4s
+	umull2	v20.2d,v16.4s,v8.4s
+
+	dup	v15.2d,v15.d[0]
+	umlal2	v19.2d,v14.4s,v0.4s
+	umlal2	v21.2d,v14.4s,v3.4s
+	umlal2	v22.2d,v14.4s,v5.4s
+	umlal2	v23.2d,v14.4s,v7.4s
+	umlal2	v20.2d,v14.4s,v1.4s
+
+	dup	v17.2d,v17.d[0]
+	umlal2	v19.2d,v15.4s,v8.4s
+	umlal2	v22.2d,v15.4s,v3.4s
+	umlal2	v21.2d,v15.4s,v1.4s
+	umlal2	v23.2d,v15.4s,v5.4s
+	umlal2	v20.2d,v15.4s,v0.4s
+
+	dup	v18.2d,v18.d[0]
+	umlal2	v22.2d,v17.4s,v0.4s
+	umlal2	v23.2d,v17.4s,v1.4s
+	umlal2	v19.2d,v17.4s,v4.4s
+	umlal2	v20.2d,v17.4s,v6.4s
+	umlal2	v21.2d,v17.4s,v8.4s
+
+	umlal2	v22.2d,v18.4s,v8.4s
+	umlal2	v19.2d,v18.4s,v2.4s
+	umlal2	v23.2d,v18.4s,v0.4s
+	umlal2	v20.2d,v18.4s,v4.4s
+	umlal2	v21.2d,v18.4s,v6.4s
+
+	b.eq	.Lshort_tail
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	add	v9.2s,v9.2s,v24.2s
+	umlal	v22.2d,v11.2s,v1.2s
+	umlal	v19.2d,v11.2s,v6.2s
+	umlal	v23.2d,v11.2s,v3.2s
+	umlal	v20.2d,v11.2s,v8.2s
+	umlal	v21.2d,v11.2s,v0.2s
+
+	add	v10.2s,v10.2s,v25.2s
+	umlal	v22.2d,v9.2s,v5.2s
+	umlal	v19.2d,v9.2s,v0.2s
+	umlal	v23.2d,v9.2s,v7.2s
+	umlal	v20.2d,v9.2s,v1.2s
+	umlal	v21.2d,v9.2s,v3.2s
+
+	add	v12.2s,v12.2s,v27.2s
+	umlal	v22.2d,v10.2s,v3.2s
+	umlal	v19.2d,v10.2s,v8.2s
+	umlal	v23.2d,v10.2s,v5.2s
+	umlal	v20.2d,v10.2s,v0.2s
+	umlal	v21.2d,v10.2s,v1.2s
+
+	add	v13.2s,v13.2s,v28.2s
+	umlal	v22.2d,v12.2s,v0.2s
+	umlal	v19.2d,v12.2s,v4.2s
+	umlal	v23.2d,v12.2s,v1.2s
+	umlal	v20.2d,v12.2s,v6.2s
+	umlal	v21.2d,v12.2s,v8.2s
+
+	umlal	v22.2d,v13.2s,v8.2s
+	umlal	v19.2d,v13.2s,v2.2s
+	umlal	v23.2d,v13.2s,v0.2s
+	umlal	v20.2d,v13.2s,v4.2s
+	umlal	v21.2d,v13.2s,v6.2s
+
+.Lshort_tail:
+	////////////////////////////////////////////////////////////////
+	// horizontal add
+
+	addp	v22.2d,v22.2d,v22.2d
+	 ldp	d8,d9,[sp,#16]		// meet ABI requirements
+	addp	v19.2d,v19.2d,v19.2d
+	 ldp	d10,d11,[sp,#32]
+	addp	v23.2d,v23.2d,v23.2d
+	 ldp	d12,d13,[sp,#48]
+	addp	v20.2d,v20.2d,v20.2d
+	 ldp	d14,d15,[sp,#64]
+	addp	v21.2d,v21.2d,v21.2d
+	 ldr	x30,[sp,#8]
+	 .inst	0xd50323bf		// autiasp
+
+	////////////////////////////////////////////////////////////////
+	// lazy reduction, but without narrowing
+
+	ushr	v29.2d,v22.2d,#26
+	and	v22.16b,v22.16b,v31.16b
+	 ushr	v30.2d,v19.2d,#26
+	 and	v19.16b,v19.16b,v31.16b
+
+	add	v23.2d,v23.2d,v29.2d	// h3 -> h4
+	 add	v20.2d,v20.2d,v30.2d	// h0 -> h1
+
+	ushr	v29.2d,v23.2d,#26
+	and	v23.16b,v23.16b,v31.16b
+	 ushr	v30.2d,v20.2d,#26
+	 and	v20.16b,v20.16b,v31.16b
+	 add	v21.2d,v21.2d,v30.2d	// h1 -> h2
+
+	add	v19.2d,v19.2d,v29.2d
+	shl	v29.2d,v29.2d,#2
+	 ushr	v30.2d,v21.2d,#26
+	 and	v21.16b,v21.16b,v31.16b
+	add	v19.2d,v19.2d,v29.2d	// h4 -> h0
+	 add	v22.2d,v22.2d,v30.2d	// h2 -> h3
+
+	ushr	v29.2d,v19.2d,#26
+	and	v19.16b,v19.16b,v31.16b
+	 ushr	v30.2d,v22.2d,#26
+	 and	v22.16b,v22.16b,v31.16b
+	add	v20.2d,v20.2d,v29.2d	// h0 -> h1
+	 add	v23.2d,v23.2d,v30.2d	// h3 -> h4
+
+	////////////////////////////////////////////////////////////////
+	// write the result, can be partially reduced
+
+	st4	{v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
+	mov	x4,#1
+	st1	{v23.s}[0],[x0]
+	str	x4,[x0,#8]		// set is_base2_26
+
+	ldr	x29,[sp],#80
+	ret
+.size	poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0
+.asciz	"Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm"
+.align	2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm	OPENSSL_armcap_P,4,4
+.hidden	OPENSSL_armcap_P
+#endif
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..dd843d0ee83a
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
+asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+	poly1305_init_arm64(&dctx->h, key);
+	dctx->s[0] = get_unaligned_le32(key + 16);
+	dctx->s[1] = get_unaligned_le32(key + 20);
+	dctx->s[2] = get_unaligned_le32(key + 24);
+	dctx->s[3] = get_unaligned_le32(key + 28);
+	dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int neon_poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	dctx->buflen = 0;
+	dctx->rset = 0;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+				 u32 len, u32 hibit, bool do_neon)
+{
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset) {
+			poly1305_init_arch(dctx, src);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->rset = 1;
+		}
+		if (len >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(src +  0);
+			dctx->s[1] = get_unaligned_le32(src +  4);
+			dctx->s[2] = get_unaligned_le32(src +  8);
+			dctx->s[3] = get_unaligned_le32(src + 12);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+		if (len < POLY1305_BLOCK_SIZE)
+			return;
+	}
+
+	len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+	if (static_branch_likely(&have_neon) && likely(do_neon))
+		poly1305_blocks_neon(&dctx->h, src, len, hibit);
+	else
+		poly1305_blocks(&dctx->h, src, len, hibit);
+}
+
+static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+				    const u8 *src, u32 len, bool do_neon)
+{
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		len -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			neon_poly1305_blocks(dctx, dctx->buf,
+					     POLY1305_BLOCK_SIZE, 1, false);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(len >= POLY1305_BLOCK_SIZE)) {
+		neon_poly1305_blocks(dctx, src, len, 1, do_neon);
+		src += round_down(len, POLY1305_BLOCK_SIZE);
+		len %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(len)) {
+		dctx->buflen = len;
+		memcpy(dctx->buf, src, len);
+	}
+}
+
+static int neon_poly1305_update(struct shash_desc *desc,
+				const u8 *src, unsigned int srclen)
+{
+	bool do_neon = crypto_simd_usable() && srclen > 128;
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (static_branch_likely(&have_neon) && do_neon)
+		kernel_neon_begin();
+	neon_poly1305_do_update(dctx, src, srclen, do_neon);
+	if (static_branch_likely(&have_neon) && do_neon)
+		kernel_neon_end();
+	return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+			  unsigned int nbytes)
+{
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		nbytes -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+		if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+			kernel_neon_begin();
+			poly1305_blocks_neon(&dctx->h, src, len, 1);
+			kernel_neon_end();
+		} else {
+			poly1305_blocks(&dctx->h, src, len, 1);
+		}
+		src += len;
+		nbytes %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(nbytes)) {
+		dctx->buflen = nbytes;
+		memcpy(dctx->buf, src, nbytes);
+	}
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+	__le32 digest[4];
+	u64 f = 0;
+
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	poly1305_emit(&dctx->h, digest, dctx->s);
+
+	/* mac = (h + s) % (2^128) */
+	f = (f >> 32) + le32_to_cpu(digest[0]);
+	put_unaligned_le32(f, dst);
+	f = (f >> 32) + le32_to_cpu(digest[1]);
+	put_unaligned_le32(f, dst + 4);
+	f = (f >> 32) + le32_to_cpu(digest[2]);
+	put_unaligned_le32(f, dst + 8);
+	f = (f >> 32) + le32_to_cpu(digest[3]);
+	put_unaligned_le32(f, dst + 12);
+
+	*dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	poly1305_final_arch(dctx, dst);
+	return 0;
+}
+
+static struct shash_alg neon_poly1305_alg = {
+	.init			= neon_poly1305_init,
+	.update			= neon_poly1305_update,
+	.final			= neon_poly1305_final,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.descsize		= sizeof(struct poly1305_desc_ctx),
+
+	.base.cra_name		= "poly1305",
+	.base.cra_driver_name	= "poly1305-neon",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+};
+
+static int __init neon_poly1305_mod_init(void)
+{
+	if (!cpu_have_named_feature(ASIMD))
+		return 0;
+
+	static_branch_enable(&have_neon);
+
+	return crypto_register_shash(&neon_poly1305_alg);
+}
+
+static void __exit neon_poly1305_mod_exit(void)
+{
+	if (cpu_have_named_feature(ASIMD))
+		crypto_unregister_shash(&neon_poly1305_alg);
+}
+
+module_init(neon_poly1305_mod_init);
+module_exit(neon_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 181754615f73..9923445e8225 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -40,6 +40,7 @@ config CRYPTO_LIB_DES
 config CRYPTO_LIB_POLY1305_RSIZE
 	int
 	default 4 if X86_64
+	default 9 if ARM64
 	default 1
 
 config CRYPTO_ARCH_HAVE_LIB_POLY1305
-- 
cgit v1.2.3


From a6b803b3ddc793d6db0c16f12fc12d30d20fa9cc Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:25 +0100
Subject: crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON
 implementation

This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation
for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL
project. The file 'poly1305-armv4.pl' is taken straight from this upstream
GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f,
and already contains all the changes required to build it as part of a
Linux kernel module.

[0] https://github.com/dot-asm/cryptogams

Co-developed-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Kconfig                 |    5 +
 arch/arm/crypto/Makefile                |   12 +-
 arch/arm/crypto/poly1305-armv4.pl       | 1236 +++++++++++++++++++++++++++++++
 arch/arm/crypto/poly1305-core.S_shipped | 1158 +++++++++++++++++++++++++++++
 arch/arm/crypto/poly1305-glue.c         |  276 +++++++
 lib/crypto/Kconfig                      |    2 +-
 6 files changed, 2687 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/crypto/poly1305-armv4.pl
 create mode 100644 arch/arm/crypto/poly1305-core.S_shipped
 create mode 100644 arch/arm/crypto/poly1305-glue.c

(limited to 'arch')

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 4d13b5201796..166d32616fea 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -132,6 +132,11 @@ config CRYPTO_CHACHA20_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_ARCH_HAVE_LIB_CHACHA
 
+config CRYPTO_POLY1305_ARM
+	tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
+	select CRYPTO_HASH
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
 config CRYPTO_NHPOLY1305_NEON
 	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
 	depends on KERNEL_MODE_NEON
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 0e550badf8ed..d568d699b3b7 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
@@ -35,12 +36,16 @@ crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 chacha-neon-y := chacha-scalar-core.o chacha-glue.o
 chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
+poly1305-arm-y := poly1305-core.o poly1305-glue.o
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
 
 ifdef REGENERATE_ARM_CRYPTO
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
 
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
+	$(call cmd,perl)
+
 $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
 	$(call cmd,perl)
 
@@ -48,4 +53,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
 	$(call cmd,perl)
 endif
 
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
+
+# massage the perlasm code a bit so we only get the NEON routine if we need it
+poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
+poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
+AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/crypto/poly1305-armv4.pl
new file mode 100644
index 000000000000..6d79498d3115
--- /dev/null
+++ b/arch/arm/crypto/poly1305-armv4.pl
@@ -0,0 +1,1236 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+#			IALU(*)/gcc-4.4		NEON
+#
+# ARM11xx(ARMv6)	7.78/+100%		-
+# Cortex-A5		6.35/+130%		3.00
+# Cortex-A8		6.25/+115%		2.36
+# Cortex-A9		5.10/+95%		2.55
+# Cortex-A15		3.85/+85%		1.25(**)
+# Snapdragon S4		5.70/+100%		1.48(**)
+#
+# (*)	this is for -march=armv6, i.e. with bunch of ldrb loading data;
+# (**)	these are trade-off results, they can be improved by ~8% but at
+#	the cost of 15/12% regression on Cortex-A5/A7, it's even possible
+#	to improve Cortex-A9 result, but then A5/A7 loose more than 20%;
+
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}
+
+($ctx,$inp,$len,$padbit)=map("r$_",(0..3));
+
+$code.=<<___;
+#ifndef	__KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init   poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit   poly1305_emit_arm
+.globl	poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax	unified
+.thumb
+#else
+.code	32
+#endif
+
+.text
+
+.globl	poly1305_emit
+.globl	poly1305_blocks
+.globl	poly1305_init
+.type	poly1305_init,%function
+.align	5
+poly1305_init:
+.Lpoly1305_init:
+	stmdb	sp!,{r4-r11}
+
+	eor	r3,r3,r3
+	cmp	$inp,#0
+	str	r3,[$ctx,#0]		@ zero hash value
+	str	r3,[$ctx,#4]
+	str	r3,[$ctx,#8]
+	str	r3,[$ctx,#12]
+	str	r3,[$ctx,#16]
+	str	r3,[$ctx,#36]		@ clear is_base2_26
+	add	$ctx,$ctx,#20
+
+#ifdef	__thumb2__
+	it	eq
+#endif
+	moveq	r0,#0
+	beq	.Lno_key
+
+#if	__ARM_MAX_ARCH__>=7
+	mov	r3,#-1
+	str	r3,[$ctx,#28]		@ impossible key power value
+# ifndef __KERNEL__
+	adr	r11,.Lpoly1305_init
+	ldr	r12,.LOPENSSL_armcap
+# endif
+#endif
+	ldrb	r4,[$inp,#0]
+	mov	r10,#0x0fffffff
+	ldrb	r5,[$inp,#1]
+	and	r3,r10,#-4		@ 0x0ffffffc
+	ldrb	r6,[$inp,#2]
+	ldrb	r7,[$inp,#3]
+	orr	r4,r4,r5,lsl#8
+	ldrb	r5,[$inp,#4]
+	orr	r4,r4,r6,lsl#16
+	ldrb	r6,[$inp,#5]
+	orr	r4,r4,r7,lsl#24
+	ldrb	r7,[$inp,#6]
+	and	r4,r4,r10
+
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+	ldr	r12,[r11,r12]		@ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+	ldr	r12,[r12]
+# endif
+#endif
+	ldrb	r8,[$inp,#7]
+	orr	r5,r5,r6,lsl#8
+	ldrb	r6,[$inp,#8]
+	orr	r5,r5,r7,lsl#16
+	ldrb	r7,[$inp,#9]
+	orr	r5,r5,r8,lsl#24
+	ldrb	r8,[$inp,#10]
+	and	r5,r5,r3
+
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	tst	r12,#ARMV7_NEON		@ check for NEON
+# ifdef	__thumb2__
+	adr	r9,.Lpoly1305_blocks_neon
+	adr	r11,.Lpoly1305_blocks
+	it	ne
+	movne	r11,r9
+	adr	r12,.Lpoly1305_emit
+	orr	r11,r11,#1		@ thumb-ify addresses
+	orr	r12,r12,#1
+# else
+	add	r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+	ite	eq
+	addeq	r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+	addne	r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+	ldrb	r9,[$inp,#11]
+	orr	r6,r6,r7,lsl#8
+	ldrb	r7,[$inp,#12]
+	orr	r6,r6,r8,lsl#16
+	ldrb	r8,[$inp,#13]
+	orr	r6,r6,r9,lsl#24
+	ldrb	r9,[$inp,#14]
+	and	r6,r6,r3
+
+	ldrb	r10,[$inp,#15]
+	orr	r7,r7,r8,lsl#8
+	str	r4,[$ctx,#0]
+	orr	r7,r7,r9,lsl#16
+	str	r5,[$ctx,#4]
+	orr	r7,r7,r10,lsl#24
+	str	r6,[$ctx,#8]
+	and	r7,r7,r3
+	str	r7,[$ctx,#12]
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	stmia	r2,{r11,r12}		@ fill functions table
+	mov	r0,#1
+#else
+	mov	r0,#0
+#endif
+.Lno_key:
+	ldmia	sp!,{r4-r11}
+#if	__ARM_ARCH__>=5
+	ret				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_init,.-poly1305_init
+___
+{
+my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12));
+my ($s1,$s2,$s3)=($r1,$r2,$r3);
+
+$code.=<<___;
+.type	poly1305_blocks,%function
+.align	5
+poly1305_blocks:
+.Lpoly1305_blocks:
+	stmdb	sp!,{r3-r11,lr}
+
+	ands	$len,$len,#-16
+	beq	.Lno_data
+
+	add	$len,$len,$inp		@ end pointer
+	sub	sp,sp,#32
+
+#if __ARM_ARCH__<7
+	ldmia	$ctx,{$h0-$r3}		@ load context
+	add	$ctx,$ctx,#20
+	str	$len,[sp,#16]		@ offload stuff
+	str	$ctx,[sp,#12]
+#else
+	ldr	lr,[$ctx,#36]		@ is_base2_26
+	ldmia	$ctx!,{$h0-$h4}		@ load hash value
+	str	$len,[sp,#16]		@ offload stuff
+	str	$ctx,[sp,#12]
+
+	adds	$r0,$h0,$h1,lsl#26	@ base 2^26 -> base 2^32
+	mov	$r1,$h1,lsr#6
+	adcs	$r1,$r1,$h2,lsl#20
+	mov	$r2,$h2,lsr#12
+	adcs	$r2,$r2,$h3,lsl#14
+	mov	$r3,$h3,lsr#18
+	adcs	$r3,$r3,$h4,lsl#8
+	mov	$len,#0
+	teq	lr,#0
+	str	$len,[$ctx,#16]		@ clear is_base2_26
+	adc	$len,$len,$h4,lsr#24
+
+	itttt	ne
+	movne	$h0,$r0			@ choose between radixes
+	movne	$h1,$r1
+	movne	$h2,$r2
+	movne	$h3,$r3
+	ldmia	$ctx,{$r0-$r3}		@ load key
+	it	ne
+	movne	$h4,$len
+#endif
+
+	mov	lr,$inp
+	cmp	$padbit,#0
+	str	$r1,[sp,#20]
+	str	$r2,[sp,#24]
+	str	$r3,[sp,#28]
+	b	.Loop
+
+.align	4
+.Loop:
+#if __ARM_ARCH__<7
+	ldrb	r0,[lr],#16		@ load input
+# ifdef	__thumb2__
+	it	hi
+# endif
+	addhi	$h4,$h4,#1		@ 1<<128
+	ldrb	r1,[lr,#-15]
+	ldrb	r2,[lr,#-14]
+	ldrb	r3,[lr,#-13]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-12]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-11]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-10]
+	adds	$h0,$h0,r3		@ accumulate input
+
+	ldrb	r3,[lr,#-9]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-8]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-7]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-6]
+	adcs	$h1,$h1,r3
+
+	ldrb	r3,[lr,#-5]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-4]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-3]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-2]
+	adcs	$h2,$h2,r3
+
+	ldrb	r3,[lr,#-1]
+	orr	r1,r0,r1,lsl#8
+	str	lr,[sp,#8]		@ offload input pointer
+	orr	r2,r1,r2,lsl#16
+	add	$s1,$r1,$r1,lsr#2
+	orr	r3,r2,r3,lsl#24
+#else
+	ldr	r0,[lr],#16		@ load input
+	it	hi
+	addhi	$h4,$h4,#1		@ padbit
+	ldr	r1,[lr,#-12]
+	ldr	r2,[lr,#-8]
+	ldr	r3,[lr,#-4]
+# ifdef	__ARMEB__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+# endif
+	adds	$h0,$h0,r0		@ accumulate input
+	str	lr,[sp,#8]		@ offload input pointer
+	adcs	$h1,$h1,r1
+	add	$s1,$r1,$r1,lsr#2
+	adcs	$h2,$h2,r2
+#endif
+	add	$s2,$r2,$r2,lsr#2
+	adcs	$h3,$h3,r3
+	add	$s3,$r3,$r3,lsr#2
+
+	umull	r2,r3,$h1,$r0
+	 adc	$h4,$h4,#0
+	umull	r0,r1,$h0,$r0
+	umlal	r2,r3,$h4,$s1
+	umlal	r0,r1,$h3,$s1
+	ldr	$r1,[sp,#20]		@ reload $r1
+	umlal	r2,r3,$h2,$s3
+	umlal	r0,r1,$h1,$s3
+	umlal	r2,r3,$h3,$s2
+	umlal	r0,r1,$h2,$s2
+	umlal	r2,r3,$h0,$r1
+	str	r0,[sp,#0]		@ future $h0
+	 mul	r0,$s2,$h4
+	ldr	$r2,[sp,#24]		@ reload $r2
+	adds	r2,r2,r1		@ d1+=d0>>32
+	 eor	r1,r1,r1
+	adc	lr,r3,#0		@ future $h2
+	str	r2,[sp,#4]		@ future $h1
+
+	mul	r2,$s3,$h4
+	eor	r3,r3,r3
+	umlal	r0,r1,$h3,$s3
+	ldr	$r3,[sp,#28]		@ reload $r3
+	umlal	r2,r3,$h3,$r0
+	umlal	r0,r1,$h2,$r0
+	umlal	r2,r3,$h2,$r1
+	umlal	r0,r1,$h1,$r1
+	umlal	r2,r3,$h1,$r2
+	umlal	r0,r1,$h0,$r2
+	umlal	r2,r3,$h0,$r3
+	ldr	$h0,[sp,#0]
+	mul	$h4,$r0,$h4
+	ldr	$h1,[sp,#4]
+
+	adds	$h2,lr,r0		@ d2+=d1>>32
+	ldr	lr,[sp,#8]		@ reload input pointer
+	adc	r1,r1,#0
+	adds	$h3,r2,r1		@ d3+=d2>>32
+	ldr	r0,[sp,#16]		@ reload end pointer
+	adc	r3,r3,#0
+	add	$h4,$h4,r3		@ h4+=d3>>32
+
+	and	r1,$h4,#-4
+	and	$h4,$h4,#3
+	add	r1,r1,r1,lsr#2		@ *=5
+	adds	$h0,$h0,r1
+	adcs	$h1,$h1,#0
+	adcs	$h2,$h2,#0
+	adcs	$h3,$h3,#0
+	adc	$h4,$h4,#0
+
+	cmp	r0,lr			@ done yet?
+	bhi	.Loop
+
+	ldr	$ctx,[sp,#12]
+	add	sp,sp,#32
+	stmdb	$ctx,{$h0-$h4}		@ store the result
+
+.Lno_data:
+#if	__ARM_ARCH__>=5
+	ldmia	sp!,{r3-r11,pc}
+#else
+	ldmia	sp!,{r3-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_blocks,.-poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce)=map("r$_",(0..2));
+my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11));
+my $g4=$ctx;
+
+$code.=<<___;
+.type	poly1305_emit,%function
+.align	5
+poly1305_emit:
+.Lpoly1305_emit:
+	stmdb	sp!,{r4-r11}
+
+	ldmia	$ctx,{$h0-$h4}
+
+#if __ARM_ARCH__>=7
+	ldr	ip,[$ctx,#36]		@ is_base2_26
+
+	adds	$g0,$h0,$h1,lsl#26	@ base 2^26 -> base 2^32
+	mov	$g1,$h1,lsr#6
+	adcs	$g1,$g1,$h2,lsl#20
+	mov	$g2,$h2,lsr#12
+	adcs	$g2,$g2,$h3,lsl#14
+	mov	$g3,$h3,lsr#18
+	adcs	$g3,$g3,$h4,lsl#8
+	mov	$g4,#0
+	adc	$g4,$g4,$h4,lsr#24
+
+	tst	ip,ip
+	itttt	ne
+	movne	$h0,$g0
+	movne	$h1,$g1
+	movne	$h2,$g2
+	movne	$h3,$g3
+	it	ne
+	movne	$h4,$g4
+#endif
+
+	adds	$g0,$h0,#5		@ compare to modulus
+	adcs	$g1,$h1,#0
+	adcs	$g2,$h2,#0
+	adcs	$g3,$h3,#0
+	adc	$g4,$h4,#0
+	tst	$g4,#4			@ did it carry/borrow?
+
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	$h0,$g0
+	ldr	$g0,[$nonce,#0]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	$h1,$g1
+	ldr	$g1,[$nonce,#4]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	$h2,$g2
+	ldr	$g2,[$nonce,#8]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	$h3,$g3
+	ldr	$g3,[$nonce,#12]
+
+	adds	$h0,$h0,$g0
+	adcs	$h1,$h1,$g1
+	adcs	$h2,$h2,$g2
+	adc	$h3,$h3,$g3
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+	rev	$h0,$h0
+	rev	$h1,$h1
+	rev	$h2,$h2
+	rev	$h3,$h3
+# endif
+	str	$h0,[$mac,#0]
+	str	$h1,[$mac,#4]
+	str	$h2,[$mac,#8]
+	str	$h3,[$mac,#12]
+#else
+	strb	$h0,[$mac,#0]
+	mov	$h0,$h0,lsr#8
+	strb	$h1,[$mac,#4]
+	mov	$h1,$h1,lsr#8
+	strb	$h2,[$mac,#8]
+	mov	$h2,$h2,lsr#8
+	strb	$h3,[$mac,#12]
+	mov	$h3,$h3,lsr#8
+
+	strb	$h0,[$mac,#1]
+	mov	$h0,$h0,lsr#8
+	strb	$h1,[$mac,#5]
+	mov	$h1,$h1,lsr#8
+	strb	$h2,[$mac,#9]
+	mov	$h2,$h2,lsr#8
+	strb	$h3,[$mac,#13]
+	mov	$h3,$h3,lsr#8
+
+	strb	$h0,[$mac,#2]
+	mov	$h0,$h0,lsr#8
+	strb	$h1,[$mac,#6]
+	mov	$h1,$h1,lsr#8
+	strb	$h2,[$mac,#10]
+	mov	$h2,$h2,lsr#8
+	strb	$h3,[$mac,#14]
+	mov	$h3,$h3,lsr#8
+
+	strb	$h0,[$mac,#3]
+	strb	$h1,[$mac,#7]
+	strb	$h2,[$mac,#11]
+	strb	$h3,[$mac,#15]
+#endif
+	ldmia	sp!,{r4-r11}
+#if	__ARM_ARCH__>=5
+	ret				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_emit,.-poly1305_emit
+___
+{
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9));
+my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14));
+my ($T0,$T1,$MASK) = map("q$_",(15,4,0));
+
+my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7));
+
+$code.=<<___;
+#if	__ARM_MAX_ARCH__>=7
+.fpu	neon
+
+.type	poly1305_init_neon,%function
+.align	5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+	ldr	r3,[$ctx,#48]		@ first table element
+	cmp	r3,#-1			@ is value impossible?
+	bne	.Lno_init_neon
+
+	ldr	r4,[$ctx,#20]		@ load key base 2^32
+	ldr	r5,[$ctx,#24]
+	ldr	r6,[$ctx,#28]
+	ldr	r7,[$ctx,#32]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	and	r3,r3,#0x03ffffff
+	and	r4,r4,#0x03ffffff
+	and	r5,r5,#0x03ffffff
+
+	vdup.32	$R0,r2			@ r^1 in both lanes
+	add	r2,r3,r3,lsl#2		@ *5
+	vdup.32	$R1,r3
+	add	r3,r4,r4,lsl#2
+	vdup.32	$S1,r2
+	vdup.32	$R2,r4
+	add	r4,r5,r5,lsl#2
+	vdup.32	$S2,r3
+	vdup.32	$R3,r5
+	add	r5,r6,r6,lsl#2
+	vdup.32	$S3,r4
+	vdup.32	$R4,r6
+	vdup.32	$S4,r5
+
+	mov	$zeros,#2		@ counter
+
+.Lsquare_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+
+	vmull.u32	$D0,$R0,${R0}[1]
+	vmull.u32	$D1,$R1,${R0}[1]
+	vmull.u32	$D2,$R2,${R0}[1]
+	vmull.u32	$D3,$R3,${R0}[1]
+	vmull.u32	$D4,$R4,${R0}[1]
+
+	vmlal.u32	$D0,$R4,${S1}[1]
+	vmlal.u32	$D1,$R0,${R1}[1]
+	vmlal.u32	$D2,$R1,${R1}[1]
+	vmlal.u32	$D3,$R2,${R1}[1]
+	vmlal.u32	$D4,$R3,${R1}[1]
+
+	vmlal.u32	$D0,$R3,${S2}[1]
+	vmlal.u32	$D1,$R4,${S2}[1]
+	vmlal.u32	$D3,$R1,${R2}[1]
+	vmlal.u32	$D2,$R0,${R2}[1]
+	vmlal.u32	$D4,$R2,${R2}[1]
+
+	vmlal.u32	$D0,$R2,${S3}[1]
+	vmlal.u32	$D3,$R0,${R3}[1]
+	vmlal.u32	$D1,$R3,${S3}[1]
+	vmlal.u32	$D2,$R4,${S3}[1]
+	vmlal.u32	$D4,$R1,${R3}[1]
+
+	vmlal.u32	$D3,$R4,${S4}[1]
+	vmlal.u32	$D0,$R1,${S4}[1]
+	vmlal.u32	$D1,$R2,${S4}[1]
+	vmlal.u32	$D2,$R3,${S4}[1]
+	vmlal.u32	$D4,$R0,${R4}[1]
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	@ and P. Schwabe
+	@
+	@ H0>>+H1>>+H2>>+H3>>+H4
+	@ H3>>+H4>>*5+H0>>+H1
+	@
+	@ Trivia.
+	@
+	@ Result of multiplication of n-bit number by m-bit number is
+	@ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+	@ m-bit number multiplied by 2^n is still n+m bits wide.
+	@
+	@ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+	@ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+	@ one is n+1 bits wide.
+	@
+	@ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+	@ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+	@ can be 27. However! In cases when their width exceeds 26 bits
+	@ they are limited by 2^26+2^6. This in turn means that *sum*
+	@ of the products with these values can still be viewed as sum
+	@ of 52-bit numbers as long as the amount of addends is not a
+	@ power of 2. For example,
+	@
+	@ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+	@
+	@ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+	@ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+	@ 8 * (2^52) or 2^55. However, the value is then multiplied by
+	@ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+	@ which is less than 32 * (2^52) or 2^57. And when processing
+	@ data we are looking at triple as many addends...
+	@
+	@ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+	@ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+	@ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+	@ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+	@ instruction accepts 2x32-bit input and writes 2x64-bit result.
+	@ This means that result of reduction have to be compressed upon
+	@ loop wrap-around. This can be done in the process of reduction
+	@ to minimize amount of instructions [as well as amount of
+	@ 128-bit instructions, which benefits low-end processors], but
+	@ one has to watch for H2 (which is narrower than H0) and 5*H4
+	@ not being wider than 58 bits, so that result of right shift
+	@ by 26 bits fits in 32 bits. This is also useful on x86,
+	@ because it allows to use paddd in place for paddq, which
+	@ benefits Atom, where paddq is ridiculously slow.
+
+	vshr.u64	$T0,$D3,#26
+	vmovn.i64	$D3#lo,$D3
+	 vshr.u64	$T1,$D0,#26
+	 vmovn.i64	$D0#lo,$D0
+	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
+	vbic.i32	$D3#lo,#0xfc000000	@ &=0x03ffffff
+	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
+	 vbic.i32	$D0#lo,#0xfc000000
+
+	vshrn.u64	$T0#lo,$D4,#26
+	vmovn.i64	$D4#lo,$D4
+	 vshr.u64	$T1,$D1,#26
+	 vmovn.i64	$D1#lo,$D1
+	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
+	vbic.i32	$D4#lo,#0xfc000000
+	 vbic.i32	$D1#lo,#0xfc000000
+
+	vadd.i32	$D0#lo,$D0#lo,$T0#lo
+	vshl.u32	$T0#lo,$T0#lo,#2
+	 vshrn.u64	$T1#lo,$D2,#26
+	 vmovn.i64	$D2#lo,$D2
+	vadd.i32	$D0#lo,$D0#lo,$T0#lo	@ h4 -> h0
+	 vadd.i32	$D3#lo,$D3#lo,$T1#lo	@ h2 -> h3
+	 vbic.i32	$D2#lo,#0xfc000000
+
+	vshr.u32	$T0#lo,$D0#lo,#26
+	vbic.i32	$D0#lo,#0xfc000000
+	 vshr.u32	$T1#lo,$D3#lo,#26
+	 vbic.i32	$D3#lo,#0xfc000000
+	vadd.i32	$D1#lo,$D1#lo,$T0#lo	@ h0 -> h1
+	 vadd.i32	$D4#lo,$D4#lo,$T1#lo	@ h3 -> h4
+
+	subs		$zeros,$zeros,#1
+	beq		.Lsquare_break_neon
+
+	add		$tbl0,$ctx,#(48+0*9*4)
+	add		$tbl1,$ctx,#(48+1*9*4)
+
+	vtrn.32		$R0,$D0#lo		@ r^2:r^1
+	vtrn.32		$R2,$D2#lo
+	vtrn.32		$R3,$D3#lo
+	vtrn.32		$R1,$D1#lo
+	vtrn.32		$R4,$D4#lo
+
+	vshl.u32	$S2,$R2,#2		@ *5
+	vshl.u32	$S3,$R3,#2
+	vshl.u32	$S1,$R1,#2
+	vshl.u32	$S4,$R4,#2
+	vadd.i32	$S2,$S2,$R2
+	vadd.i32	$S1,$S1,$R1
+	vadd.i32	$S3,$S3,$R3
+	vadd.i32	$S4,$S4,$R4
+
+	vst4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+	vst4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+	vst4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	vst4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vst1.32		{${S4}[0]},[$tbl0,:32]
+	vst1.32		{${S4}[1]},[$tbl1,:32]
+
+	b		.Lsquare_neon
+
+.align	4
+.Lsquare_break_neon:
+	add		$tbl0,$ctx,#(48+2*4*9)
+	add		$tbl1,$ctx,#(48+3*4*9)
+
+	vmov		$R0,$D0#lo		@ r^4:r^3
+	vshl.u32	$S1,$D1#lo,#2		@ *5
+	vmov		$R1,$D1#lo
+	vshl.u32	$S2,$D2#lo,#2
+	vmov		$R2,$D2#lo
+	vshl.u32	$S3,$D3#lo,#2
+	vmov		$R3,$D3#lo
+	vshl.u32	$S4,$D4#lo,#2
+	vmov		$R4,$D4#lo
+	vadd.i32	$S1,$S1,$D1#lo
+	vadd.i32	$S2,$S2,$D2#lo
+	vadd.i32	$S3,$S3,$D3#lo
+	vadd.i32	$S4,$S4,$D4#lo
+
+	vst4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+	vst4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+	vst4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	vst4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vst1.32		{${S4}[0]},[$tbl0]
+	vst1.32		{${S4}[1]},[$tbl1]
+
+.Lno_init_neon:
+	ret				@ bx	lr
+.size	poly1305_init_neon,.-poly1305_init_neon
+
+.type	poly1305_blocks_neon,%function
+.align	5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+	ldr	ip,[$ctx,#36]		@ is_base2_26
+
+	cmp	$len,#64
+	blo	.Lpoly1305_blocks
+
+	stmdb	sp!,{r4-r7}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+
+	tst	ip,ip			@ is_base2_26?
+	bne	.Lbase2_26_neon
+
+	stmdb	sp!,{r1-r3,lr}
+	bl	.Lpoly1305_init_neon
+
+	ldr	r4,[$ctx,#0]		@ load hash value base 2^32
+	ldr	r5,[$ctx,#4]
+	ldr	r6,[$ctx,#8]
+	ldr	r7,[$ctx,#12]
+	ldr	ip,[$ctx,#16]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	 veor	$D0#lo,$D0#lo,$D0#lo
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	 veor	$D1#lo,$D1#lo,$D1#lo
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	 veor	$D2#lo,$D2#lo,$D2#lo
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	 veor	$D3#lo,$D3#lo,$D3#lo
+	and	r3,r3,#0x03ffffff
+	orr	r6,r6,ip,lsl#24
+	 veor	$D4#lo,$D4#lo,$D4#lo
+	and	r4,r4,#0x03ffffff
+	mov	r1,#1
+	and	r5,r5,#0x03ffffff
+	str	r1,[$ctx,#36]		@ set is_base2_26
+
+	vmov.32	$D0#lo[0],r2
+	vmov.32	$D1#lo[0],r3
+	vmov.32	$D2#lo[0],r4
+	vmov.32	$D3#lo[0],r5
+	vmov.32	$D4#lo[0],r6
+	adr	$zeros,.Lzeros
+
+	ldmia	sp!,{r1-r3,lr}
+	b	.Lhash_loaded
+
+.align	4
+.Lbase2_26_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ load hash value
+
+	veor		$D0#lo,$D0#lo,$D0#lo
+	veor		$D1#lo,$D1#lo,$D1#lo
+	veor		$D2#lo,$D2#lo,$D2#lo
+	veor		$D3#lo,$D3#lo,$D3#lo
+	veor		$D4#lo,$D4#lo,$D4#lo
+	vld4.32		{$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+	adr		$zeros,.Lzeros
+	vld1.32		{$D4#lo[0]},[$ctx]
+	sub		$ctx,$ctx,#16		@ rewind
+
+.Lhash_loaded:
+	add		$in2,$inp,#32
+	mov		$padbit,$padbit,lsl#24
+	tst		$len,#31
+	beq		.Leven
+
+	vld4.32		{$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]!
+	vmov.32		$H4#lo[0],$padbit
+	sub		$len,$len,#16
+	add		$in2,$inp,#32
+
+# ifdef	__ARMEB__
+	vrev32.8	$H0,$H0
+	vrev32.8	$H3,$H3
+	vrev32.8	$H1,$H1
+	vrev32.8	$H2,$H2
+# endif
+	vsri.u32	$H4#lo,$H3#lo,#8	@ base 2^32 -> base 2^26
+	vshl.u32	$H3#lo,$H3#lo,#18
+
+	vsri.u32	$H3#lo,$H2#lo,#14
+	vshl.u32	$H2#lo,$H2#lo,#12
+	vadd.i32	$H4#hi,$H4#lo,$D4#lo	@ add hash value and move to #hi
+
+	vbic.i32	$H3#lo,#0xfc000000
+	vsri.u32	$H2#lo,$H1#lo,#20
+	vshl.u32	$H1#lo,$H1#lo,#6
+
+	vbic.i32	$H2#lo,#0xfc000000
+	vsri.u32	$H1#lo,$H0#lo,#26
+	vadd.i32	$H3#hi,$H3#lo,$D3#lo
+
+	vbic.i32	$H0#lo,#0xfc000000
+	vbic.i32	$H1#lo,#0xfc000000
+	vadd.i32	$H2#hi,$H2#lo,$D2#lo
+
+	vadd.i32	$H0#hi,$H0#lo,$D0#lo
+	vadd.i32	$H1#hi,$H1#lo,$D1#lo
+
+	mov		$tbl1,$zeros
+	add		$tbl0,$ctx,#48
+
+	cmp		$len,$len
+	b		.Long_tail
+
+.align	4
+.Leven:
+	subs		$len,$len,#64
+	it		lo
+	movlo		$in2,$zeros
+
+	vmov.i32	$H4,#1<<24		@ padbit, yes, always
+	vld4.32		{$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp]	@ inp[0:1]
+	add		$inp,$inp,#64
+	vld4.32		{$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2]	@ inp[2:3] (or 0)
+	add		$in2,$in2,#64
+	itt		hi
+	addhi		$tbl1,$ctx,#(48+1*9*4)
+	addhi		$tbl0,$ctx,#(48+3*9*4)
+
+# ifdef	__ARMEB__
+	vrev32.8	$H0,$H0
+	vrev32.8	$H3,$H3
+	vrev32.8	$H1,$H1
+	vrev32.8	$H2,$H2
+# endif
+	vsri.u32	$H4,$H3,#8		@ base 2^32 -> base 2^26
+	vshl.u32	$H3,$H3,#18
+
+	vsri.u32	$H3,$H2,#14
+	vshl.u32	$H2,$H2,#12
+
+	vbic.i32	$H3,#0xfc000000
+	vsri.u32	$H2,$H1,#20
+	vshl.u32	$H1,$H1,#6
+
+	vbic.i32	$H2,#0xfc000000
+	vsri.u32	$H1,$H0,#26
+
+	vbic.i32	$H0,#0xfc000000
+	vbic.i32	$H1,#0xfc000000
+
+	bls		.Lskip_loop
+
+	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^2
+	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^4
+	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	b		.Loop_neon
+
+.align	5
+.Loop_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	@   \___________________/
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	@   \___________________/ \____________________/
+	@
+	@ Note that we start with inp[2:3]*r^2. This is because it
+	@ doesn't depend on reduction in previous iteration.
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ inp[2:3]*r^2
+
+	vadd.i32	$H2#lo,$H2#lo,$D2#lo	@ accumulate inp[0:1]
+	vmull.u32	$D2,$H2#hi,${R0}[1]
+	vadd.i32	$H0#lo,$H0#lo,$D0#lo
+	vmull.u32	$D0,$H0#hi,${R0}[1]
+	vadd.i32	$H3#lo,$H3#lo,$D3#lo
+	vmull.u32	$D3,$H3#hi,${R0}[1]
+	vmlal.u32	$D2,$H1#hi,${R1}[1]
+	vadd.i32	$H1#lo,$H1#lo,$D1#lo
+	vmull.u32	$D1,$H1#hi,${R0}[1]
+
+	vadd.i32	$H4#lo,$H4#lo,$D4#lo
+	vmull.u32	$D4,$H4#hi,${R0}[1]
+	subs		$len,$len,#64
+	vmlal.u32	$D0,$H4#hi,${S1}[1]
+	it		lo
+	movlo		$in2,$zeros
+	vmlal.u32	$D3,$H2#hi,${R1}[1]
+	vld1.32		${S4}[1],[$tbl1,:32]
+	vmlal.u32	$D1,$H0#hi,${R1}[1]
+	vmlal.u32	$D4,$H3#hi,${R1}[1]
+
+	vmlal.u32	$D0,$H3#hi,${S2}[1]
+	vmlal.u32	$D3,$H1#hi,${R2}[1]
+	vmlal.u32	$D4,$H2#hi,${R2}[1]
+	vmlal.u32	$D1,$H4#hi,${S2}[1]
+	vmlal.u32	$D2,$H0#hi,${R2}[1]
+
+	vmlal.u32	$D3,$H0#hi,${R3}[1]
+	vmlal.u32	$D0,$H2#hi,${S3}[1]
+	vmlal.u32	$D4,$H1#hi,${R3}[1]
+	vmlal.u32	$D1,$H3#hi,${S3}[1]
+	vmlal.u32	$D2,$H4#hi,${S3}[1]
+
+	vmlal.u32	$D3,$H4#hi,${S4}[1]
+	vmlal.u32	$D0,$H1#hi,${S4}[1]
+	vmlal.u32	$D4,$H0#hi,${R4}[1]
+	vmlal.u32	$D1,$H2#hi,${S4}[1]
+	vmlal.u32	$D2,$H3#hi,${S4}[1]
+
+	vld4.32		{$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2]	@ inp[2:3] (or 0)
+	add		$in2,$in2,#64
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4 and accumulate
+
+	vmlal.u32	$D3,$H3#lo,${R0}[0]
+	vmlal.u32	$D0,$H0#lo,${R0}[0]
+	vmlal.u32	$D4,$H4#lo,${R0}[0]
+	vmlal.u32	$D1,$H1#lo,${R0}[0]
+	vmlal.u32	$D2,$H2#lo,${R0}[0]
+	vld1.32		${S4}[0],[$tbl0,:32]
+
+	vmlal.u32	$D3,$H2#lo,${R1}[0]
+	vmlal.u32	$D0,$H4#lo,${S1}[0]
+	vmlal.u32	$D4,$H3#lo,${R1}[0]
+	vmlal.u32	$D1,$H0#lo,${R1}[0]
+	vmlal.u32	$D2,$H1#lo,${R1}[0]
+
+	vmlal.u32	$D3,$H1#lo,${R2}[0]
+	vmlal.u32	$D0,$H3#lo,${S2}[0]
+	vmlal.u32	$D4,$H2#lo,${R2}[0]
+	vmlal.u32	$D1,$H4#lo,${S2}[0]
+	vmlal.u32	$D2,$H0#lo,${R2}[0]
+
+	vmlal.u32	$D3,$H0#lo,${R3}[0]
+	vmlal.u32	$D0,$H2#lo,${S3}[0]
+	vmlal.u32	$D4,$H1#lo,${R3}[0]
+	vmlal.u32	$D1,$H3#lo,${S3}[0]
+	vmlal.u32	$D3,$H4#lo,${S4}[0]
+
+	vmlal.u32	$D2,$H4#lo,${S3}[0]
+	vmlal.u32	$D0,$H1#lo,${S4}[0]
+	vmlal.u32	$D4,$H0#lo,${R4}[0]
+	vmov.i32	$H4,#1<<24		@ padbit, yes, always
+	vmlal.u32	$D1,$H2#lo,${S4}[0]
+	vmlal.u32	$D2,$H3#lo,${S4}[0]
+
+	vld4.32		{$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp]	@ inp[0:1]
+	add		$inp,$inp,#64
+# ifdef	__ARMEB__
+	vrev32.8	$H0,$H0
+	vrev32.8	$H1,$H1
+	vrev32.8	$H2,$H2
+	vrev32.8	$H3,$H3
+# endif
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction interleaved with base 2^32 -> base 2^26 of
+	@ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4.
+
+	vshr.u64	$T0,$D3,#26
+	vmovn.i64	$D3#lo,$D3
+	 vshr.u64	$T1,$D0,#26
+	 vmovn.i64	$D0#lo,$D0
+	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
+	vbic.i32	$D3#lo,#0xfc000000
+	  vsri.u32	$H4,$H3,#8		@ base 2^32 -> base 2^26
+	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
+	  vshl.u32	$H3,$H3,#18
+	 vbic.i32	$D0#lo,#0xfc000000
+
+	vshrn.u64	$T0#lo,$D4,#26
+	vmovn.i64	$D4#lo,$D4
+	 vshr.u64	$T1,$D1,#26
+	 vmovn.i64	$D1#lo,$D1
+	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
+	  vsri.u32	$H3,$H2,#14
+	vbic.i32	$D4#lo,#0xfc000000
+	  vshl.u32	$H2,$H2,#12
+	 vbic.i32	$D1#lo,#0xfc000000
+
+	vadd.i32	$D0#lo,$D0#lo,$T0#lo
+	vshl.u32	$T0#lo,$T0#lo,#2
+	  vbic.i32	$H3,#0xfc000000
+	 vshrn.u64	$T1#lo,$D2,#26
+	 vmovn.i64	$D2#lo,$D2
+	vaddl.u32	$D0,$D0#lo,$T0#lo	@ h4 -> h0 [widen for a sec]
+	  vsri.u32	$H2,$H1,#20
+	 vadd.i32	$D3#lo,$D3#lo,$T1#lo	@ h2 -> h3
+	  vshl.u32	$H1,$H1,#6
+	 vbic.i32	$D2#lo,#0xfc000000
+	  vbic.i32	$H2,#0xfc000000
+
+	vshrn.u64	$T0#lo,$D0,#26		@ re-narrow
+	vmovn.i64	$D0#lo,$D0
+	  vsri.u32	$H1,$H0,#26
+	  vbic.i32	$H0,#0xfc000000
+	 vshr.u32	$T1#lo,$D3#lo,#26
+	 vbic.i32	$D3#lo,#0xfc000000
+	vbic.i32	$D0#lo,#0xfc000000
+	vadd.i32	$D1#lo,$D1#lo,$T0#lo	@ h0 -> h1
+	 vadd.i32	$D4#lo,$D4#lo,$T1#lo	@ h3 -> h4
+	  vbic.i32	$H1,#0xfc000000
+
+	bhi		.Loop_neon
+
+.Lskip_loop:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	add		$tbl1,$ctx,#(48+0*9*4)
+	add		$tbl0,$ctx,#(48+1*9*4)
+	adds		$len,$len,#32
+	it		ne
+	movne		$len,#0
+	bne		.Long_tail
+
+	vadd.i32	$H2#hi,$H2#lo,$D2#lo	@ add hash value and move to #hi
+	vadd.i32	$H0#hi,$H0#lo,$D0#lo
+	vadd.i32	$H3#hi,$H3#lo,$D3#lo
+	vadd.i32	$H1#hi,$H1#lo,$D1#lo
+	vadd.i32	$H4#hi,$H4#lo,$D4#lo
+
+.Long_tail:
+	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^1
+	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^2
+
+	vadd.i32	$H2#lo,$H2#lo,$D2#lo	@ can be redundant
+	vmull.u32	$D2,$H2#hi,$R0
+	vadd.i32	$H0#lo,$H0#lo,$D0#lo
+	vmull.u32	$D0,$H0#hi,$R0
+	vadd.i32	$H3#lo,$H3#lo,$D3#lo
+	vmull.u32	$D3,$H3#hi,$R0
+	vadd.i32	$H1#lo,$H1#lo,$D1#lo
+	vmull.u32	$D1,$H1#hi,$R0
+	vadd.i32	$H4#lo,$H4#lo,$D4#lo
+	vmull.u32	$D4,$H4#hi,$R0
+
+	vmlal.u32	$D0,$H4#hi,$S1
+	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vmlal.u32	$D3,$H2#hi,$R1
+	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	vmlal.u32	$D1,$H0#hi,$R1
+	vmlal.u32	$D4,$H3#hi,$R1
+	vmlal.u32	$D2,$H1#hi,$R1
+
+	vmlal.u32	$D3,$H1#hi,$R2
+	vld1.32		${S4}[1],[$tbl1,:32]
+	vmlal.u32	$D0,$H3#hi,$S2
+	vld1.32		${S4}[0],[$tbl0,:32]
+	vmlal.u32	$D4,$H2#hi,$R2
+	vmlal.u32	$D1,$H4#hi,$S2
+	vmlal.u32	$D2,$H0#hi,$R2
+
+	vmlal.u32	$D3,$H0#hi,$R3
+	 it		ne
+	 addne		$tbl1,$ctx,#(48+2*9*4)
+	vmlal.u32	$D0,$H2#hi,$S3
+	 it		ne
+	 addne		$tbl0,$ctx,#(48+3*9*4)
+	vmlal.u32	$D4,$H1#hi,$R3
+	vmlal.u32	$D1,$H3#hi,$S3
+	vmlal.u32	$D2,$H4#hi,$S3
+
+	vmlal.u32	$D3,$H4#hi,$S4
+	 vorn		$MASK,$MASK,$MASK	@ all-ones, can be redundant
+	vmlal.u32	$D0,$H1#hi,$S4
+	 vshr.u64	$MASK,$MASK,#38
+	vmlal.u32	$D4,$H0#hi,$R4
+	vmlal.u32	$D1,$H2#hi,$S4
+	vmlal.u32	$D2,$H3#hi,$S4
+
+	beq		.Lshort_tail
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	vld4.32		{${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!	@ load r^3
+	vld4.32		{${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!	@ load r^4
+
+	vmlal.u32	$D2,$H2#lo,$R0
+	vmlal.u32	$D0,$H0#lo,$R0
+	vmlal.u32	$D3,$H3#lo,$R0
+	vmlal.u32	$D1,$H1#lo,$R0
+	vmlal.u32	$D4,$H4#lo,$R0
+
+	vmlal.u32	$D0,$H4#lo,$S1
+	vld4.32		{${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+	vmlal.u32	$D3,$H2#lo,$R1
+	vld4.32		{${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+	vmlal.u32	$D1,$H0#lo,$R1
+	vmlal.u32	$D4,$H3#lo,$R1
+	vmlal.u32	$D2,$H1#lo,$R1
+
+	vmlal.u32	$D3,$H1#lo,$R2
+	vld1.32		${S4}[1],[$tbl1,:32]
+	vmlal.u32	$D0,$H3#lo,$S2
+	vld1.32		${S4}[0],[$tbl0,:32]
+	vmlal.u32	$D4,$H2#lo,$R2
+	vmlal.u32	$D1,$H4#lo,$S2
+	vmlal.u32	$D2,$H0#lo,$R2
+
+	vmlal.u32	$D3,$H0#lo,$R3
+	vmlal.u32	$D0,$H2#lo,$S3
+	vmlal.u32	$D4,$H1#lo,$R3
+	vmlal.u32	$D1,$H3#lo,$S3
+	vmlal.u32	$D2,$H4#lo,$S3
+
+	vmlal.u32	$D3,$H4#lo,$S4
+	 vorn		$MASK,$MASK,$MASK	@ all-ones
+	vmlal.u32	$D0,$H1#lo,$S4
+	 vshr.u64	$MASK,$MASK,#38
+	vmlal.u32	$D4,$H0#lo,$R4
+	vmlal.u32	$D1,$H2#lo,$S4
+	vmlal.u32	$D2,$H3#lo,$S4
+
+.Lshort_tail:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ horizontal addition
+
+	vadd.i64	$D3#lo,$D3#lo,$D3#hi
+	vadd.i64	$D0#lo,$D0#lo,$D0#hi
+	vadd.i64	$D4#lo,$D4#lo,$D4#hi
+	vadd.i64	$D1#lo,$D1#lo,$D1#hi
+	vadd.i64	$D2#lo,$D2#lo,$D2#hi
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction, but without narrowing
+
+	vshr.u64	$T0,$D3,#26
+	vand.i64	$D3,$D3,$MASK
+	 vshr.u64	$T1,$D0,#26
+	 vand.i64	$D0,$D0,$MASK
+	vadd.i64	$D4,$D4,$T0		@ h3 -> h4
+	 vadd.i64	$D1,$D1,$T1		@ h0 -> h1
+
+	vshr.u64	$T0,$D4,#26
+	vand.i64	$D4,$D4,$MASK
+	 vshr.u64	$T1,$D1,#26
+	 vand.i64	$D1,$D1,$MASK
+	 vadd.i64	$D2,$D2,$T1		@ h1 -> h2
+
+	vadd.i64	$D0,$D0,$T0
+	vshl.u64	$T0,$T0,#2
+	 vshr.u64	$T1,$D2,#26
+	 vand.i64	$D2,$D2,$MASK
+	vadd.i64	$D0,$D0,$T0		@ h4 -> h0
+	 vadd.i64	$D3,$D3,$T1		@ h2 -> h3
+
+	vshr.u64	$T0,$D0,#26
+	vand.i64	$D0,$D0,$MASK
+	 vshr.u64	$T1,$D3,#26
+	 vand.i64	$D3,$D3,$MASK
+	vadd.i64	$D1,$D1,$T0		@ h0 -> h1
+	 vadd.i64	$D4,$D4,$T1		@ h3 -> h4
+
+	cmp		$len,#0
+	bne		.Leven
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ store hash value
+
+	vst4.32		{$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+	vst1.32		{$D4#lo[0]},[$ctx]
+
+	vldmia	sp!,{d8-d15}			@ epilogue
+	ldmia	sp!,{r4-r7}
+	ret					@ bx	lr
+.size	poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef	__KERNEL__
+.LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
+.word	OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm	OPENSSL_armcap_P,4,4
+.hidden	OPENSSL_armcap_P
+#endif
+#endif
+___
+}	}
+$code.=<<___;
+.asciz	"Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm"
+.align	2
+___
+
+foreach (split("\n",$code)) {
+	s/\`([^\`]*)\`/eval $1/geo;
+
+	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or
+	s/\bret\b/bx	lr/go						or
+	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4
+
+	print $_,"\n";
+}
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..37b71d990293
--- /dev/null
+++ b/arch/arm/crypto/poly1305-core.S_shipped
@@ -0,0 +1,1158 @@
+#ifndef	__KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init   poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit   poly1305_emit_arm
+.globl	poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax	unified
+.thumb
+#else
+.code	32
+#endif
+
+.text
+
+.globl	poly1305_emit
+.globl	poly1305_blocks
+.globl	poly1305_init
+.type	poly1305_init,%function
+.align	5
+poly1305_init:
+.Lpoly1305_init:
+	stmdb	sp!,{r4-r11}
+
+	eor	r3,r3,r3
+	cmp	r1,#0
+	str	r3,[r0,#0]		@ zero hash value
+	str	r3,[r0,#4]
+	str	r3,[r0,#8]
+	str	r3,[r0,#12]
+	str	r3,[r0,#16]
+	str	r3,[r0,#36]		@ clear is_base2_26
+	add	r0,r0,#20
+
+#ifdef	__thumb2__
+	it	eq
+#endif
+	moveq	r0,#0
+	beq	.Lno_key
+
+#if	__ARM_MAX_ARCH__>=7
+	mov	r3,#-1
+	str	r3,[r0,#28]		@ impossible key power value
+# ifndef __KERNEL__
+	adr	r11,.Lpoly1305_init
+	ldr	r12,.LOPENSSL_armcap
+# endif
+#endif
+	ldrb	r4,[r1,#0]
+	mov	r10,#0x0fffffff
+	ldrb	r5,[r1,#1]
+	and	r3,r10,#-4		@ 0x0ffffffc
+	ldrb	r6,[r1,#2]
+	ldrb	r7,[r1,#3]
+	orr	r4,r4,r5,lsl#8
+	ldrb	r5,[r1,#4]
+	orr	r4,r4,r6,lsl#16
+	ldrb	r6,[r1,#5]
+	orr	r4,r4,r7,lsl#24
+	ldrb	r7,[r1,#6]
+	and	r4,r4,r10
+
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+	ldr	r12,[r11,r12]		@ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+	ldr	r12,[r12]
+# endif
+#endif
+	ldrb	r8,[r1,#7]
+	orr	r5,r5,r6,lsl#8
+	ldrb	r6,[r1,#8]
+	orr	r5,r5,r7,lsl#16
+	ldrb	r7,[r1,#9]
+	orr	r5,r5,r8,lsl#24
+	ldrb	r8,[r1,#10]
+	and	r5,r5,r3
+
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	tst	r12,#ARMV7_NEON		@ check for NEON
+# ifdef	__thumb2__
+	adr	r9,.Lpoly1305_blocks_neon
+	adr	r11,.Lpoly1305_blocks
+	it	ne
+	movne	r11,r9
+	adr	r12,.Lpoly1305_emit
+	orr	r11,r11,#1		@ thumb-ify addresses
+	orr	r12,r12,#1
+# else
+	add	r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+	ite	eq
+	addeq	r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+	addne	r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+	ldrb	r9,[r1,#11]
+	orr	r6,r6,r7,lsl#8
+	ldrb	r7,[r1,#12]
+	orr	r6,r6,r8,lsl#16
+	ldrb	r8,[r1,#13]
+	orr	r6,r6,r9,lsl#24
+	ldrb	r9,[r1,#14]
+	and	r6,r6,r3
+
+	ldrb	r10,[r1,#15]
+	orr	r7,r7,r8,lsl#8
+	str	r4,[r0,#0]
+	orr	r7,r7,r9,lsl#16
+	str	r5,[r0,#4]
+	orr	r7,r7,r10,lsl#24
+	str	r6,[r0,#8]
+	and	r7,r7,r3
+	str	r7,[r0,#12]
+#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	stmia	r2,{r11,r12}		@ fill functions table
+	mov	r0,#1
+#else
+	mov	r0,#0
+#endif
+.Lno_key:
+	ldmia	sp!,{r4-r11}
+#if	__ARM_ARCH__>=5
+	bx	lr				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_init,.-poly1305_init
+.type	poly1305_blocks,%function
+.align	5
+poly1305_blocks:
+.Lpoly1305_blocks:
+	stmdb	sp!,{r3-r11,lr}
+
+	ands	r2,r2,#-16
+	beq	.Lno_data
+
+	add	r2,r2,r1		@ end pointer
+	sub	sp,sp,#32
+
+#if __ARM_ARCH__<7
+	ldmia	r0,{r4-r12}		@ load context
+	add	r0,r0,#20
+	str	r2,[sp,#16]		@ offload stuff
+	str	r0,[sp,#12]
+#else
+	ldr	lr,[r0,#36]		@ is_base2_26
+	ldmia	r0!,{r4-r8}		@ load hash value
+	str	r2,[sp,#16]		@ offload stuff
+	str	r0,[sp,#12]
+
+	adds	r9,r4,r5,lsl#26	@ base 2^26 -> base 2^32
+	mov	r10,r5,lsr#6
+	adcs	r10,r10,r6,lsl#20
+	mov	r11,r6,lsr#12
+	adcs	r11,r11,r7,lsl#14
+	mov	r12,r7,lsr#18
+	adcs	r12,r12,r8,lsl#8
+	mov	r2,#0
+	teq	lr,#0
+	str	r2,[r0,#16]		@ clear is_base2_26
+	adc	r2,r2,r8,lsr#24
+
+	itttt	ne
+	movne	r4,r9			@ choose between radixes
+	movne	r5,r10
+	movne	r6,r11
+	movne	r7,r12
+	ldmia	r0,{r9-r12}		@ load key
+	it	ne
+	movne	r8,r2
+#endif
+
+	mov	lr,r1
+	cmp	r3,#0
+	str	r10,[sp,#20]
+	str	r11,[sp,#24]
+	str	r12,[sp,#28]
+	b	.Loop
+
+.align	4
+.Loop:
+#if __ARM_ARCH__<7
+	ldrb	r0,[lr],#16		@ load input
+# ifdef	__thumb2__
+	it	hi
+# endif
+	addhi	r8,r8,#1		@ 1<<128
+	ldrb	r1,[lr,#-15]
+	ldrb	r2,[lr,#-14]
+	ldrb	r3,[lr,#-13]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-12]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-11]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-10]
+	adds	r4,r4,r3		@ accumulate input
+
+	ldrb	r3,[lr,#-9]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-8]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-7]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-6]
+	adcs	r5,r5,r3
+
+	ldrb	r3,[lr,#-5]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-4]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-3]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-2]
+	adcs	r6,r6,r3
+
+	ldrb	r3,[lr,#-1]
+	orr	r1,r0,r1,lsl#8
+	str	lr,[sp,#8]		@ offload input pointer
+	orr	r2,r1,r2,lsl#16
+	add	r10,r10,r10,lsr#2
+	orr	r3,r2,r3,lsl#24
+#else
+	ldr	r0,[lr],#16		@ load input
+	it	hi
+	addhi	r8,r8,#1		@ padbit
+	ldr	r1,[lr,#-12]
+	ldr	r2,[lr,#-8]
+	ldr	r3,[lr,#-4]
+# ifdef	__ARMEB__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+# endif
+	adds	r4,r4,r0		@ accumulate input
+	str	lr,[sp,#8]		@ offload input pointer
+	adcs	r5,r5,r1
+	add	r10,r10,r10,lsr#2
+	adcs	r6,r6,r2
+#endif
+	add	r11,r11,r11,lsr#2
+	adcs	r7,r7,r3
+	add	r12,r12,r12,lsr#2
+
+	umull	r2,r3,r5,r9
+	 adc	r8,r8,#0
+	umull	r0,r1,r4,r9
+	umlal	r2,r3,r8,r10
+	umlal	r0,r1,r7,r10
+	ldr	r10,[sp,#20]		@ reload r10
+	umlal	r2,r3,r6,r12
+	umlal	r0,r1,r5,r12
+	umlal	r2,r3,r7,r11
+	umlal	r0,r1,r6,r11
+	umlal	r2,r3,r4,r10
+	str	r0,[sp,#0]		@ future r4
+	 mul	r0,r11,r8
+	ldr	r11,[sp,#24]		@ reload r11
+	adds	r2,r2,r1		@ d1+=d0>>32
+	 eor	r1,r1,r1
+	adc	lr,r3,#0		@ future r6
+	str	r2,[sp,#4]		@ future r5
+
+	mul	r2,r12,r8
+	eor	r3,r3,r3
+	umlal	r0,r1,r7,r12
+	ldr	r12,[sp,#28]		@ reload r12
+	umlal	r2,r3,r7,r9
+	umlal	r0,r1,r6,r9
+	umlal	r2,r3,r6,r10
+	umlal	r0,r1,r5,r10
+	umlal	r2,r3,r5,r11
+	umlal	r0,r1,r4,r11
+	umlal	r2,r3,r4,r12
+	ldr	r4,[sp,#0]
+	mul	r8,r9,r8
+	ldr	r5,[sp,#4]
+
+	adds	r6,lr,r0		@ d2+=d1>>32
+	ldr	lr,[sp,#8]		@ reload input pointer
+	adc	r1,r1,#0
+	adds	r7,r2,r1		@ d3+=d2>>32
+	ldr	r0,[sp,#16]		@ reload end pointer
+	adc	r3,r3,#0
+	add	r8,r8,r3		@ h4+=d3>>32
+
+	and	r1,r8,#-4
+	and	r8,r8,#3
+	add	r1,r1,r1,lsr#2		@ *=5
+	adds	r4,r4,r1
+	adcs	r5,r5,#0
+	adcs	r6,r6,#0
+	adcs	r7,r7,#0
+	adc	r8,r8,#0
+
+	cmp	r0,lr			@ done yet?
+	bhi	.Loop
+
+	ldr	r0,[sp,#12]
+	add	sp,sp,#32
+	stmdb	r0,{r4-r8}		@ store the result
+
+.Lno_data:
+#if	__ARM_ARCH__>=5
+	ldmia	sp!,{r3-r11,pc}
+#else
+	ldmia	sp!,{r3-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_blocks,.-poly1305_blocks
+.type	poly1305_emit,%function
+.align	5
+poly1305_emit:
+.Lpoly1305_emit:
+	stmdb	sp!,{r4-r11}
+
+	ldmia	r0,{r3-r7}
+
+#if __ARM_ARCH__>=7
+	ldr	ip,[r0,#36]		@ is_base2_26
+
+	adds	r8,r3,r4,lsl#26	@ base 2^26 -> base 2^32
+	mov	r9,r4,lsr#6
+	adcs	r9,r9,r5,lsl#20
+	mov	r10,r5,lsr#12
+	adcs	r10,r10,r6,lsl#14
+	mov	r11,r6,lsr#18
+	adcs	r11,r11,r7,lsl#8
+	mov	r0,#0
+	adc	r0,r0,r7,lsr#24
+
+	tst	ip,ip
+	itttt	ne
+	movne	r3,r8
+	movne	r4,r9
+	movne	r5,r10
+	movne	r6,r11
+	it	ne
+	movne	r7,r0
+#endif
+
+	adds	r8,r3,#5		@ compare to modulus
+	adcs	r9,r4,#0
+	adcs	r10,r5,#0
+	adcs	r11,r6,#0
+	adc	r0,r7,#0
+	tst	r0,#4			@ did it carry/borrow?
+
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r3,r8
+	ldr	r8,[r2,#0]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r4,r9
+	ldr	r9,[r2,#4]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r5,r10
+	ldr	r10,[r2,#8]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r6,r11
+	ldr	r11,[r2,#12]
+
+	adds	r3,r3,r8
+	adcs	r4,r4,r9
+	adcs	r5,r5,r10
+	adc	r6,r6,r11
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+	rev	r3,r3
+	rev	r4,r4
+	rev	r5,r5
+	rev	r6,r6
+# endif
+	str	r3,[r1,#0]
+	str	r4,[r1,#4]
+	str	r5,[r1,#8]
+	str	r6,[r1,#12]
+#else
+	strb	r3,[r1,#0]
+	mov	r3,r3,lsr#8
+	strb	r4,[r1,#4]
+	mov	r4,r4,lsr#8
+	strb	r5,[r1,#8]
+	mov	r5,r5,lsr#8
+	strb	r6,[r1,#12]
+	mov	r6,r6,lsr#8
+
+	strb	r3,[r1,#1]
+	mov	r3,r3,lsr#8
+	strb	r4,[r1,#5]
+	mov	r4,r4,lsr#8
+	strb	r5,[r1,#9]
+	mov	r5,r5,lsr#8
+	strb	r6,[r1,#13]
+	mov	r6,r6,lsr#8
+
+	strb	r3,[r1,#2]
+	mov	r3,r3,lsr#8
+	strb	r4,[r1,#6]
+	mov	r4,r4,lsr#8
+	strb	r5,[r1,#10]
+	mov	r5,r5,lsr#8
+	strb	r6,[r1,#14]
+	mov	r6,r6,lsr#8
+
+	strb	r3,[r1,#3]
+	strb	r4,[r1,#7]
+	strb	r5,[r1,#11]
+	strb	r6,[r1,#15]
+#endif
+	ldmia	sp!,{r4-r11}
+#if	__ARM_ARCH__>=5
+	bx	lr				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	poly1305_emit,.-poly1305_emit
+#if	__ARM_MAX_ARCH__>=7
+.fpu	neon
+
+.type	poly1305_init_neon,%function
+.align	5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+	ldr	r3,[r0,#48]		@ first table element
+	cmp	r3,#-1			@ is value impossible?
+	bne	.Lno_init_neon
+
+	ldr	r4,[r0,#20]		@ load key base 2^32
+	ldr	r5,[r0,#24]
+	ldr	r6,[r0,#28]
+	ldr	r7,[r0,#32]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	and	r3,r3,#0x03ffffff
+	and	r4,r4,#0x03ffffff
+	and	r5,r5,#0x03ffffff
+
+	vdup.32	d0,r2			@ r^1 in both lanes
+	add	r2,r3,r3,lsl#2		@ *5
+	vdup.32	d1,r3
+	add	r3,r4,r4,lsl#2
+	vdup.32	d2,r2
+	vdup.32	d3,r4
+	add	r4,r5,r5,lsl#2
+	vdup.32	d4,r3
+	vdup.32	d5,r5
+	add	r5,r6,r6,lsl#2
+	vdup.32	d6,r4
+	vdup.32	d7,r6
+	vdup.32	d8,r5
+
+	mov	r5,#2		@ counter
+
+.Lsquare_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+
+	vmull.u32	q5,d0,d0[1]
+	vmull.u32	q6,d1,d0[1]
+	vmull.u32	q7,d3,d0[1]
+	vmull.u32	q8,d5,d0[1]
+	vmull.u32	q9,d7,d0[1]
+
+	vmlal.u32	q5,d7,d2[1]
+	vmlal.u32	q6,d0,d1[1]
+	vmlal.u32	q7,d1,d1[1]
+	vmlal.u32	q8,d3,d1[1]
+	vmlal.u32	q9,d5,d1[1]
+
+	vmlal.u32	q5,d5,d4[1]
+	vmlal.u32	q6,d7,d4[1]
+	vmlal.u32	q8,d1,d3[1]
+	vmlal.u32	q7,d0,d3[1]
+	vmlal.u32	q9,d3,d3[1]
+
+	vmlal.u32	q5,d3,d6[1]
+	vmlal.u32	q8,d0,d5[1]
+	vmlal.u32	q6,d5,d6[1]
+	vmlal.u32	q7,d7,d6[1]
+	vmlal.u32	q9,d1,d5[1]
+
+	vmlal.u32	q8,d7,d8[1]
+	vmlal.u32	q5,d1,d8[1]
+	vmlal.u32	q6,d3,d8[1]
+	vmlal.u32	q7,d5,d8[1]
+	vmlal.u32	q9,d0,d7[1]
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	@ and P. Schwabe
+	@
+	@ H0>>+H1>>+H2>>+H3>>+H4
+	@ H3>>+H4>>*5+H0>>+H1
+	@
+	@ Trivia.
+	@
+	@ Result of multiplication of n-bit number by m-bit number is
+	@ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+	@ m-bit number multiplied by 2^n is still n+m bits wide.
+	@
+	@ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+	@ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+	@ one is n+1 bits wide.
+	@
+	@ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+	@ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+	@ can be 27. However! In cases when their width exceeds 26 bits
+	@ they are limited by 2^26+2^6. This in turn means that *sum*
+	@ of the products with these values can still be viewed as sum
+	@ of 52-bit numbers as long as the amount of addends is not a
+	@ power of 2. For example,
+	@
+	@ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+	@
+	@ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+	@ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+	@ 8 * (2^52) or 2^55. However, the value is then multiplied by
+	@ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+	@ which is less than 32 * (2^52) or 2^57. And when processing
+	@ data we are looking at triple as many addends...
+	@
+	@ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+	@ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+	@ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+	@ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+	@ instruction accepts 2x32-bit input and writes 2x64-bit result.
+	@ This means that result of reduction have to be compressed upon
+	@ loop wrap-around. This can be done in the process of reduction
+	@ to minimize amount of instructions [as well as amount of
+	@ 128-bit instructions, which benefits low-end processors], but
+	@ one has to watch for H2 (which is narrower than H0) and 5*H4
+	@ not being wider than 58 bits, so that result of right shift
+	@ by 26 bits fits in 32 bits. This is also useful on x86,
+	@ because it allows to use paddd in place for paddq, which
+	@ benefits Atom, where paddq is ridiculously slow.
+
+	vshr.u64	q15,q8,#26
+	vmovn.i64	d16,q8
+	 vshr.u64	q4,q5,#26
+	 vmovn.i64	d10,q5
+	vadd.i64	q9,q9,q15		@ h3 -> h4
+	vbic.i32	d16,#0xfc000000	@ &=0x03ffffff
+	 vadd.i64	q6,q6,q4		@ h0 -> h1
+	 vbic.i32	d10,#0xfc000000
+
+	vshrn.u64	d30,q9,#26
+	vmovn.i64	d18,q9
+	 vshr.u64	q4,q6,#26
+	 vmovn.i64	d12,q6
+	 vadd.i64	q7,q7,q4		@ h1 -> h2
+	vbic.i32	d18,#0xfc000000
+	 vbic.i32	d12,#0xfc000000
+
+	vadd.i32	d10,d10,d30
+	vshl.u32	d30,d30,#2
+	 vshrn.u64	d8,q7,#26
+	 vmovn.i64	d14,q7
+	vadd.i32	d10,d10,d30	@ h4 -> h0
+	 vadd.i32	d16,d16,d8	@ h2 -> h3
+	 vbic.i32	d14,#0xfc000000
+
+	vshr.u32	d30,d10,#26
+	vbic.i32	d10,#0xfc000000
+	 vshr.u32	d8,d16,#26
+	 vbic.i32	d16,#0xfc000000
+	vadd.i32	d12,d12,d30	@ h0 -> h1
+	 vadd.i32	d18,d18,d8	@ h3 -> h4
+
+	subs		r5,r5,#1
+	beq		.Lsquare_break_neon
+
+	add		r6,r0,#(48+0*9*4)
+	add		r7,r0,#(48+1*9*4)
+
+	vtrn.32		d0,d10		@ r^2:r^1
+	vtrn.32		d3,d14
+	vtrn.32		d5,d16
+	vtrn.32		d1,d12
+	vtrn.32		d7,d18
+
+	vshl.u32	d4,d3,#2		@ *5
+	vshl.u32	d6,d5,#2
+	vshl.u32	d2,d1,#2
+	vshl.u32	d8,d7,#2
+	vadd.i32	d4,d4,d3
+	vadd.i32	d2,d2,d1
+	vadd.i32	d6,d6,d5
+	vadd.i32	d8,d8,d7
+
+	vst4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!
+	vst4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!
+	vst4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vst4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vst1.32		{d8[0]},[r6,:32]
+	vst1.32		{d8[1]},[r7,:32]
+
+	b		.Lsquare_neon
+
+.align	4
+.Lsquare_break_neon:
+	add		r6,r0,#(48+2*4*9)
+	add		r7,r0,#(48+3*4*9)
+
+	vmov		d0,d10		@ r^4:r^3
+	vshl.u32	d2,d12,#2		@ *5
+	vmov		d1,d12
+	vshl.u32	d4,d14,#2
+	vmov		d3,d14
+	vshl.u32	d6,d16,#2
+	vmov		d5,d16
+	vshl.u32	d8,d18,#2
+	vmov		d7,d18
+	vadd.i32	d2,d2,d12
+	vadd.i32	d4,d4,d14
+	vadd.i32	d6,d6,d16
+	vadd.i32	d8,d8,d18
+
+	vst4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!
+	vst4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!
+	vst4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vst4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vst1.32		{d8[0]},[r6]
+	vst1.32		{d8[1]},[r7]
+
+.Lno_init_neon:
+	bx	lr				@ bx	lr
+.size	poly1305_init_neon,.-poly1305_init_neon
+
+.type	poly1305_blocks_neon,%function
+.align	5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+	ldr	ip,[r0,#36]		@ is_base2_26
+
+	cmp	r2,#64
+	blo	.Lpoly1305_blocks
+
+	stmdb	sp!,{r4-r7}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+
+	tst	ip,ip			@ is_base2_26?
+	bne	.Lbase2_26_neon
+
+	stmdb	sp!,{r1-r3,lr}
+	bl	.Lpoly1305_init_neon
+
+	ldr	r4,[r0,#0]		@ load hash value base 2^32
+	ldr	r5,[r0,#4]
+	ldr	r6,[r0,#8]
+	ldr	r7,[r0,#12]
+	ldr	ip,[r0,#16]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	 veor	d10,d10,d10
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	 veor	d12,d12,d12
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	 veor	d14,d14,d14
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	 veor	d16,d16,d16
+	and	r3,r3,#0x03ffffff
+	orr	r6,r6,ip,lsl#24
+	 veor	d18,d18,d18
+	and	r4,r4,#0x03ffffff
+	mov	r1,#1
+	and	r5,r5,#0x03ffffff
+	str	r1,[r0,#36]		@ set is_base2_26
+
+	vmov.32	d10[0],r2
+	vmov.32	d12[0],r3
+	vmov.32	d14[0],r4
+	vmov.32	d16[0],r5
+	vmov.32	d18[0],r6
+	adr	r5,.Lzeros
+
+	ldmia	sp!,{r1-r3,lr}
+	b	.Lhash_loaded
+
+.align	4
+.Lbase2_26_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ load hash value
+
+	veor		d10,d10,d10
+	veor		d12,d12,d12
+	veor		d14,d14,d14
+	veor		d16,d16,d16
+	veor		d18,d18,d18
+	vld4.32		{d10[0],d12[0],d14[0],d16[0]},[r0]!
+	adr		r5,.Lzeros
+	vld1.32		{d18[0]},[r0]
+	sub		r0,r0,#16		@ rewind
+
+.Lhash_loaded:
+	add		r4,r1,#32
+	mov		r3,r3,lsl#24
+	tst		r2,#31
+	beq		.Leven
+
+	vld4.32		{d20[0],d22[0],d24[0],d26[0]},[r1]!
+	vmov.32		d28[0],r3
+	sub		r2,r2,#16
+	add		r4,r1,#32
+
+# ifdef	__ARMEB__
+	vrev32.8	q10,q10
+	vrev32.8	q13,q13
+	vrev32.8	q11,q11
+	vrev32.8	q12,q12
+# endif
+	vsri.u32	d28,d26,#8	@ base 2^32 -> base 2^26
+	vshl.u32	d26,d26,#18
+
+	vsri.u32	d26,d24,#14
+	vshl.u32	d24,d24,#12
+	vadd.i32	d29,d28,d18	@ add hash value and move to #hi
+
+	vbic.i32	d26,#0xfc000000
+	vsri.u32	d24,d22,#20
+	vshl.u32	d22,d22,#6
+
+	vbic.i32	d24,#0xfc000000
+	vsri.u32	d22,d20,#26
+	vadd.i32	d27,d26,d16
+
+	vbic.i32	d20,#0xfc000000
+	vbic.i32	d22,#0xfc000000
+	vadd.i32	d25,d24,d14
+
+	vadd.i32	d21,d20,d10
+	vadd.i32	d23,d22,d12
+
+	mov		r7,r5
+	add		r6,r0,#48
+
+	cmp		r2,r2
+	b		.Long_tail
+
+.align	4
+.Leven:
+	subs		r2,r2,#64
+	it		lo
+	movlo		r4,r5
+
+	vmov.i32	q14,#1<<24		@ padbit, yes, always
+	vld4.32		{d20,d22,d24,d26},[r1]	@ inp[0:1]
+	add		r1,r1,#64
+	vld4.32		{d21,d23,d25,d27},[r4]	@ inp[2:3] (or 0)
+	add		r4,r4,#64
+	itt		hi
+	addhi		r7,r0,#(48+1*9*4)
+	addhi		r6,r0,#(48+3*9*4)
+
+# ifdef	__ARMEB__
+	vrev32.8	q10,q10
+	vrev32.8	q13,q13
+	vrev32.8	q11,q11
+	vrev32.8	q12,q12
+# endif
+	vsri.u32	q14,q13,#8		@ base 2^32 -> base 2^26
+	vshl.u32	q13,q13,#18
+
+	vsri.u32	q13,q12,#14
+	vshl.u32	q12,q12,#12
+
+	vbic.i32	q13,#0xfc000000
+	vsri.u32	q12,q11,#20
+	vshl.u32	q11,q11,#6
+
+	vbic.i32	q12,#0xfc000000
+	vsri.u32	q11,q10,#26
+
+	vbic.i32	q10,#0xfc000000
+	vbic.i32	q11,#0xfc000000
+
+	bls		.Lskip_loop
+
+	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^2
+	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^4
+	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	b		.Loop_neon
+
+.align	5
+.Loop_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	@   ___________________/
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	@   ___________________/ ____________________/
+	@
+	@ Note that we start with inp[2:3]*r^2. This is because it
+	@ doesn't depend on reduction in previous iteration.
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ inp[2:3]*r^2
+
+	vadd.i32	d24,d24,d14	@ accumulate inp[0:1]
+	vmull.u32	q7,d25,d0[1]
+	vadd.i32	d20,d20,d10
+	vmull.u32	q5,d21,d0[1]
+	vadd.i32	d26,d26,d16
+	vmull.u32	q8,d27,d0[1]
+	vmlal.u32	q7,d23,d1[1]
+	vadd.i32	d22,d22,d12
+	vmull.u32	q6,d23,d0[1]
+
+	vadd.i32	d28,d28,d18
+	vmull.u32	q9,d29,d0[1]
+	subs		r2,r2,#64
+	vmlal.u32	q5,d29,d2[1]
+	it		lo
+	movlo		r4,r5
+	vmlal.u32	q8,d25,d1[1]
+	vld1.32		d8[1],[r7,:32]
+	vmlal.u32	q6,d21,d1[1]
+	vmlal.u32	q9,d27,d1[1]
+
+	vmlal.u32	q5,d27,d4[1]
+	vmlal.u32	q8,d23,d3[1]
+	vmlal.u32	q9,d25,d3[1]
+	vmlal.u32	q6,d29,d4[1]
+	vmlal.u32	q7,d21,d3[1]
+
+	vmlal.u32	q8,d21,d5[1]
+	vmlal.u32	q5,d25,d6[1]
+	vmlal.u32	q9,d23,d5[1]
+	vmlal.u32	q6,d27,d6[1]
+	vmlal.u32	q7,d29,d6[1]
+
+	vmlal.u32	q8,d29,d8[1]
+	vmlal.u32	q5,d23,d8[1]
+	vmlal.u32	q9,d21,d7[1]
+	vmlal.u32	q6,d25,d8[1]
+	vmlal.u32	q7,d27,d8[1]
+
+	vld4.32		{d21,d23,d25,d27},[r4]	@ inp[2:3] (or 0)
+	add		r4,r4,#64
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4 and accumulate
+
+	vmlal.u32	q8,d26,d0[0]
+	vmlal.u32	q5,d20,d0[0]
+	vmlal.u32	q9,d28,d0[0]
+	vmlal.u32	q6,d22,d0[0]
+	vmlal.u32	q7,d24,d0[0]
+	vld1.32		d8[0],[r6,:32]
+
+	vmlal.u32	q8,d24,d1[0]
+	vmlal.u32	q5,d28,d2[0]
+	vmlal.u32	q9,d26,d1[0]
+	vmlal.u32	q6,d20,d1[0]
+	vmlal.u32	q7,d22,d1[0]
+
+	vmlal.u32	q8,d22,d3[0]
+	vmlal.u32	q5,d26,d4[0]
+	vmlal.u32	q9,d24,d3[0]
+	vmlal.u32	q6,d28,d4[0]
+	vmlal.u32	q7,d20,d3[0]
+
+	vmlal.u32	q8,d20,d5[0]
+	vmlal.u32	q5,d24,d6[0]
+	vmlal.u32	q9,d22,d5[0]
+	vmlal.u32	q6,d26,d6[0]
+	vmlal.u32	q8,d28,d8[0]
+
+	vmlal.u32	q7,d28,d6[0]
+	vmlal.u32	q5,d22,d8[0]
+	vmlal.u32	q9,d20,d7[0]
+	vmov.i32	q14,#1<<24		@ padbit, yes, always
+	vmlal.u32	q6,d24,d8[0]
+	vmlal.u32	q7,d26,d8[0]
+
+	vld4.32		{d20,d22,d24,d26},[r1]	@ inp[0:1]
+	add		r1,r1,#64
+# ifdef	__ARMEB__
+	vrev32.8	q10,q10
+	vrev32.8	q11,q11
+	vrev32.8	q12,q12
+	vrev32.8	q13,q13
+# endif
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction interleaved with base 2^32 -> base 2^26 of
+	@ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14.
+
+	vshr.u64	q15,q8,#26
+	vmovn.i64	d16,q8
+	 vshr.u64	q4,q5,#26
+	 vmovn.i64	d10,q5
+	vadd.i64	q9,q9,q15		@ h3 -> h4
+	vbic.i32	d16,#0xfc000000
+	  vsri.u32	q14,q13,#8		@ base 2^32 -> base 2^26
+	 vadd.i64	q6,q6,q4		@ h0 -> h1
+	  vshl.u32	q13,q13,#18
+	 vbic.i32	d10,#0xfc000000
+
+	vshrn.u64	d30,q9,#26
+	vmovn.i64	d18,q9
+	 vshr.u64	q4,q6,#26
+	 vmovn.i64	d12,q6
+	 vadd.i64	q7,q7,q4		@ h1 -> h2
+	  vsri.u32	q13,q12,#14
+	vbic.i32	d18,#0xfc000000
+	  vshl.u32	q12,q12,#12
+	 vbic.i32	d12,#0xfc000000
+
+	vadd.i32	d10,d10,d30
+	vshl.u32	d30,d30,#2
+	  vbic.i32	q13,#0xfc000000
+	 vshrn.u64	d8,q7,#26
+	 vmovn.i64	d14,q7
+	vaddl.u32	q5,d10,d30	@ h4 -> h0 [widen for a sec]
+	  vsri.u32	q12,q11,#20
+	 vadd.i32	d16,d16,d8	@ h2 -> h3
+	  vshl.u32	q11,q11,#6
+	 vbic.i32	d14,#0xfc000000
+	  vbic.i32	q12,#0xfc000000
+
+	vshrn.u64	d30,q5,#26		@ re-narrow
+	vmovn.i64	d10,q5
+	  vsri.u32	q11,q10,#26
+	  vbic.i32	q10,#0xfc000000
+	 vshr.u32	d8,d16,#26
+	 vbic.i32	d16,#0xfc000000
+	vbic.i32	d10,#0xfc000000
+	vadd.i32	d12,d12,d30	@ h0 -> h1
+	 vadd.i32	d18,d18,d8	@ h3 -> h4
+	  vbic.i32	q11,#0xfc000000
+
+	bhi		.Loop_neon
+
+.Lskip_loop:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	add		r7,r0,#(48+0*9*4)
+	add		r6,r0,#(48+1*9*4)
+	adds		r2,r2,#32
+	it		ne
+	movne		r2,#0
+	bne		.Long_tail
+
+	vadd.i32	d25,d24,d14	@ add hash value and move to #hi
+	vadd.i32	d21,d20,d10
+	vadd.i32	d27,d26,d16
+	vadd.i32	d23,d22,d12
+	vadd.i32	d29,d28,d18
+
+.Long_tail:
+	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^1
+	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^2
+
+	vadd.i32	d24,d24,d14	@ can be redundant
+	vmull.u32	q7,d25,d0
+	vadd.i32	d20,d20,d10
+	vmull.u32	q5,d21,d0
+	vadd.i32	d26,d26,d16
+	vmull.u32	q8,d27,d0
+	vadd.i32	d22,d22,d12
+	vmull.u32	q6,d23,d0
+	vadd.i32	d28,d28,d18
+	vmull.u32	q9,d29,d0
+
+	vmlal.u32	q5,d29,d2
+	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vmlal.u32	q8,d25,d1
+	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vmlal.u32	q6,d21,d1
+	vmlal.u32	q9,d27,d1
+	vmlal.u32	q7,d23,d1
+
+	vmlal.u32	q8,d23,d3
+	vld1.32		d8[1],[r7,:32]
+	vmlal.u32	q5,d27,d4
+	vld1.32		d8[0],[r6,:32]
+	vmlal.u32	q9,d25,d3
+	vmlal.u32	q6,d29,d4
+	vmlal.u32	q7,d21,d3
+
+	vmlal.u32	q8,d21,d5
+	 it		ne
+	 addne		r7,r0,#(48+2*9*4)
+	vmlal.u32	q5,d25,d6
+	 it		ne
+	 addne		r6,r0,#(48+3*9*4)
+	vmlal.u32	q9,d23,d5
+	vmlal.u32	q6,d27,d6
+	vmlal.u32	q7,d29,d6
+
+	vmlal.u32	q8,d29,d8
+	 vorn		q0,q0,q0	@ all-ones, can be redundant
+	vmlal.u32	q5,d23,d8
+	 vshr.u64	q0,q0,#38
+	vmlal.u32	q9,d21,d7
+	vmlal.u32	q6,d25,d8
+	vmlal.u32	q7,d27,d8
+
+	beq		.Lshort_tail
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^3
+	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^4
+
+	vmlal.u32	q7,d24,d0
+	vmlal.u32	q5,d20,d0
+	vmlal.u32	q8,d26,d0
+	vmlal.u32	q6,d22,d0
+	vmlal.u32	q9,d28,d0
+
+	vmlal.u32	q5,d28,d2
+	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vmlal.u32	q8,d24,d1
+	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vmlal.u32	q6,d20,d1
+	vmlal.u32	q9,d26,d1
+	vmlal.u32	q7,d22,d1
+
+	vmlal.u32	q8,d22,d3
+	vld1.32		d8[1],[r7,:32]
+	vmlal.u32	q5,d26,d4
+	vld1.32		d8[0],[r6,:32]
+	vmlal.u32	q9,d24,d3
+	vmlal.u32	q6,d28,d4
+	vmlal.u32	q7,d20,d3
+
+	vmlal.u32	q8,d20,d5
+	vmlal.u32	q5,d24,d6
+	vmlal.u32	q9,d22,d5
+	vmlal.u32	q6,d26,d6
+	vmlal.u32	q7,d28,d6
+
+	vmlal.u32	q8,d28,d8
+	 vorn		q0,q0,q0	@ all-ones
+	vmlal.u32	q5,d22,d8
+	 vshr.u64	q0,q0,#38
+	vmlal.u32	q9,d20,d7
+	vmlal.u32	q6,d24,d8
+	vmlal.u32	q7,d26,d8
+
+.Lshort_tail:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ horizontal addition
+
+	vadd.i64	d16,d16,d17
+	vadd.i64	d10,d10,d11
+	vadd.i64	d18,d18,d19
+	vadd.i64	d12,d12,d13
+	vadd.i64	d14,d14,d15
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction, but without narrowing
+
+	vshr.u64	q15,q8,#26
+	vand.i64	q8,q8,q0
+	 vshr.u64	q4,q5,#26
+	 vand.i64	q5,q5,q0
+	vadd.i64	q9,q9,q15		@ h3 -> h4
+	 vadd.i64	q6,q6,q4		@ h0 -> h1
+
+	vshr.u64	q15,q9,#26
+	vand.i64	q9,q9,q0
+	 vshr.u64	q4,q6,#26
+	 vand.i64	q6,q6,q0
+	 vadd.i64	q7,q7,q4		@ h1 -> h2
+
+	vadd.i64	q5,q5,q15
+	vshl.u64	q15,q15,#2
+	 vshr.u64	q4,q7,#26
+	 vand.i64	q7,q7,q0
+	vadd.i64	q5,q5,q15		@ h4 -> h0
+	 vadd.i64	q8,q8,q4		@ h2 -> h3
+
+	vshr.u64	q15,q5,#26
+	vand.i64	q5,q5,q0
+	 vshr.u64	q4,q8,#26
+	 vand.i64	q8,q8,q0
+	vadd.i64	q6,q6,q15		@ h0 -> h1
+	 vadd.i64	q9,q9,q4		@ h3 -> h4
+
+	cmp		r2,#0
+	bne		.Leven
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ store hash value
+
+	vst4.32		{d10[0],d12[0],d14[0],d16[0]},[r0]!
+	vst1.32		{d18[0]},[r0]
+
+	vldmia	sp!,{d8-d15}			@ epilogue
+	ldmia	sp!,{r4-r7}
+	bx	lr					@ bx	lr
+.size	poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef	__KERNEL__
+.LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
+.word	OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm	OPENSSL_armcap_P,4,4
+.hidden	OPENSSL_armcap_P
+#endif
+#endif
+.asciz	"Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm"
+.align	2
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..74a725ac89c9
--- /dev/null
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+void poly1305_init_arm(void *state, const u8 *key);
+void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
+void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
+
+void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
+{
+}
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+	poly1305_init_arm(&dctx->h, key);
+	dctx->s[0] = get_unaligned_le32(key + 16);
+	dctx->s[1] = get_unaligned_le32(key + 20);
+	dctx->s[2] = get_unaligned_le32(key + 24);
+	dctx->s[3] = get_unaligned_le32(key + 28);
+	dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int arm_poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	dctx->buflen = 0;
+	dctx->rset = 0;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+				 u32 len, u32 hibit, bool do_neon)
+{
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset) {
+			poly1305_init_arm(&dctx->h, src);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->rset = 1;
+		}
+		if (len >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(src +  0);
+			dctx->s[1] = get_unaligned_le32(src +  4);
+			dctx->s[2] = get_unaligned_le32(src +  8);
+			dctx->s[3] = get_unaligned_le32(src + 12);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+		if (len < POLY1305_BLOCK_SIZE)
+			return;
+	}
+
+	len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+	if (static_branch_likely(&have_neon) && likely(do_neon))
+		poly1305_blocks_neon(&dctx->h, src, len, hibit);
+	else
+		poly1305_blocks_arm(&dctx->h, src, len, hibit);
+}
+
+static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+				    const u8 *src, u32 len, bool do_neon)
+{
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		len -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			arm_poly1305_blocks(dctx, dctx->buf,
+					    POLY1305_BLOCK_SIZE, 1, false);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(len >= POLY1305_BLOCK_SIZE)) {
+		arm_poly1305_blocks(dctx, src, len, 1, do_neon);
+		src += round_down(len, POLY1305_BLOCK_SIZE);
+		len %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(len)) {
+		dctx->buflen = len;
+		memcpy(dctx->buf, src, len);
+	}
+}
+
+static int arm_poly1305_update(struct shash_desc *desc,
+			       const u8 *src, unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	arm_poly1305_do_update(dctx, src, srclen, false);
+	return 0;
+}
+
+static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
+						   const u8 *src,
+						   unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	bool do_neon = crypto_simd_usable() && srclen > 128;
+
+	if (static_branch_likely(&have_neon) && do_neon)
+		kernel_neon_begin();
+	arm_poly1305_do_update(dctx, src, srclen, do_neon);
+	if (static_branch_likely(&have_neon) && do_neon)
+		kernel_neon_end();
+	return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+			  unsigned int nbytes)
+{
+	bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+		       crypto_simd_usable();
+
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		nbytes -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			poly1305_blocks_arm(&dctx->h, dctx->buf,
+					    POLY1305_BLOCK_SIZE, 1);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+		if (static_branch_likely(&have_neon) && do_neon) {
+			kernel_neon_begin();
+			poly1305_blocks_neon(&dctx->h, src, len, 1);
+			kernel_neon_end();
+		} else {
+			poly1305_blocks_arm(&dctx->h, src, len, 1);
+		}
+		src += len;
+		nbytes %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(nbytes)) {
+		dctx->buflen = nbytes;
+		memcpy(dctx->buf, src, nbytes);
+	}
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+	__le32 digest[4];
+	u64 f = 0;
+
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	poly1305_emit_arm(&dctx->h, digest, dctx->s);
+
+	/* mac = (h + s) % (2^128) */
+	f = (f >> 32) + le32_to_cpu(digest[0]);
+	put_unaligned_le32(f, dst);
+	f = (f >> 32) + le32_to_cpu(digest[1]);
+	put_unaligned_le32(f, dst + 4);
+	f = (f >> 32) + le32_to_cpu(digest[2]);
+	put_unaligned_le32(f, dst + 8);
+	f = (f >> 32) + le32_to_cpu(digest[3]);
+	put_unaligned_le32(f, dst + 12);
+
+	*dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	poly1305_final_arch(dctx, dst);
+	return 0;
+}
+
+static struct shash_alg arm_poly1305_algs[] = {{
+	.init			= arm_poly1305_init,
+	.update			= arm_poly1305_update,
+	.final			= arm_poly1305_final,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.descsize		= sizeof(struct poly1305_desc_ctx),
+
+	.base.cra_name		= "poly1305",
+	.base.cra_driver_name	= "poly1305-arm",
+	.base.cra_priority	= 150,
+	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+#ifdef CONFIG_KERNEL_MODE_NEON
+}, {
+	.init			= arm_poly1305_init,
+	.update			= arm_poly1305_update_neon,
+	.final			= arm_poly1305_final,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.descsize		= sizeof(struct poly1305_desc_ctx),
+
+	.base.cra_name		= "poly1305",
+	.base.cra_driver_name	= "poly1305-neon",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+#endif
+}};
+
+static int __init arm_poly1305_mod_init(void)
+{
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+	    (elf_hwcap & HWCAP_NEON))
+		static_branch_enable(&have_neon);
+	else
+		/* register only the first entry */
+		return crypto_register_shash(&arm_poly1305_algs[0]);
+
+	return crypto_register_shashes(arm_poly1305_algs,
+				       ARRAY_SIZE(arm_poly1305_algs));
+}
+
+static void __exit arm_poly1305_mod_exit(void)
+{
+	if (!static_branch_likely(&have_neon)) {
+		crypto_unregister_shash(&arm_poly1305_algs[0]);
+		return;
+	}
+	crypto_unregister_shashes(arm_poly1305_algs,
+				  ARRAY_SIZE(arm_poly1305_algs));
+}
+
+module_init(arm_poly1305_mod_init);
+module_exit(arm_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-arm");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 9923445e8225..9bd15b227e78 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -40,7 +40,7 @@ config CRYPTO_LIB_DES
 config CRYPTO_LIB_POLY1305_RSIZE
 	int
 	default 4 if X86_64
-	default 9 if ARM64
+	default 9 if ARM || ARM64
 	default 1
 
 config CRYPTO_ARCH_HAVE_LIB_POLY1305
-- 
cgit v1.2.3


From a11d055e7a64ac34a5e99b6fe731299449cbcd58 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:26 +0100
Subject: crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized
 implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for
MIPS authored by Andy Polyakov, a prior 64-bit only version of which has been
contributed by him to the OpenSSL project. The file 'poly1305-mips.pl' is taken
straight from this upstream GitHub repository [0] at commit
d22ade312a7af958ec955620b0d241cf42c37feb, and already contains all the changes
required to build it as part of a Linux kernel module.

[0] https://github.com/dot-asm/cryptogams

Co-developed-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Andy Polyakov <appro@cryptogams.org>
Co-developed-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/mips/crypto/Makefile         |   14 +
 arch/mips/crypto/poly1305-glue.c  |  203 ++++++
 arch/mips/crypto/poly1305-mips.pl | 1273 +++++++++++++++++++++++++++++++++++++
 crypto/Kconfig                    |    5 +
 lib/crypto/Kconfig                |    1 +
 5 files changed, 1496 insertions(+)
 create mode 100644 arch/mips/crypto/poly1305-glue.c
 create mode 100644 arch/mips/crypto/poly1305-mips.pl

(limited to 'arch')

diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index b528b9d300f1..8e1deaf00e0c 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -8,3 +8,17 @@ obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
 obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
 chacha-mips-y := chacha-core.o chacha-glue.o
 AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
+
+obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
+poly1305-mips-y := poly1305-core.o poly1305-glue.o
+
+perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
+perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
+
+quiet_cmd_perlasm = PERLASM $@
+      cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
+
+$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE
+	$(call if_changed,perlasm)
+
+targets += poly1305-core.S
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..b759b6ccc361
--- /dev/null
+++ b/arch/mips/crypto/poly1305-glue.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_mips(void *state, const u8 *key);
+asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+	poly1305_init_mips(&dctx->h, key);
+	dctx->s[0] = get_unaligned_le32(key + 16);
+	dctx->s[1] = get_unaligned_le32(key + 20);
+	dctx->s[2] = get_unaligned_le32(key + 24);
+	dctx->s[3] = get_unaligned_le32(key + 28);
+	dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int mips_poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	dctx->buflen = 0;
+	dctx->rset = 0;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+				 u32 len, u32 hibit)
+{
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset) {
+			poly1305_init_mips(&dctx->h, src);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->rset = 1;
+		}
+		if (len >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(src +  0);
+			dctx->s[1] = get_unaligned_le32(src +  4);
+			dctx->s[2] = get_unaligned_le32(src +  8);
+			dctx->s[3] = get_unaligned_le32(src + 12);
+			src += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+		if (len < POLY1305_BLOCK_SIZE)
+			return;
+	}
+
+	len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+	poly1305_blocks_mips(&dctx->h, src, len, hibit);
+}
+
+static int mips_poly1305_update(struct shash_desc *desc, const u8 *src,
+				unsigned int len)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		len -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(len >= POLY1305_BLOCK_SIZE)) {
+		mips_poly1305_blocks(dctx, src, len, 1);
+		src += round_down(len, POLY1305_BLOCK_SIZE);
+		len %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(len)) {
+		dctx->buflen = len;
+		memcpy(dctx->buf, src, len);
+	}
+	return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+			  unsigned int nbytes)
+{
+	if (unlikely(dctx->buflen)) {
+		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		nbytes -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			poly1305_blocks_mips(&dctx->h, dctx->buf,
+					     POLY1305_BLOCK_SIZE, 1);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+		poly1305_blocks_mips(&dctx->h, src, len, 1);
+		src += len;
+		nbytes %= POLY1305_BLOCK_SIZE;
+	}
+
+	if (unlikely(nbytes)) {
+		dctx->buflen = nbytes;
+		memcpy(dctx->buf, src, nbytes);
+	}
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+	__le32 digest[4];
+	u64 f = 0;
+
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	poly1305_emit_mips(&dctx->h, digest, dctx->s);
+
+	/* mac = (h + s) % (2^128) */
+	f = (f >> 32) + le32_to_cpu(digest[0]);
+	put_unaligned_le32(f, dst);
+	f = (f >> 32) + le32_to_cpu(digest[1]);
+	put_unaligned_le32(f, dst + 4);
+	f = (f >> 32) + le32_to_cpu(digest[2]);
+	put_unaligned_le32(f, dst + 8);
+	f = (f >> 32) + le32_to_cpu(digest[3]);
+	put_unaligned_le32(f, dst + 12);
+
+	*dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int mips_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	poly1305_final_arch(dctx, dst);
+	return 0;
+}
+
+static struct shash_alg mips_poly1305_alg = {
+	.init			= mips_poly1305_init,
+	.update			= mips_poly1305_update,
+	.final			= mips_poly1305_final,
+	.digestsize		= POLY1305_DIGEST_SIZE,
+	.descsize		= sizeof(struct poly1305_desc_ctx),
+
+	.base.cra_name		= "poly1305",
+	.base.cra_driver_name	= "poly1305-mips",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+};
+
+static int __init mips_poly1305_mod_init(void)
+{
+	return crypto_register_shash(&mips_poly1305_alg);
+}
+
+static void __exit mips_poly1305_mod_exit(void)
+{
+	crypto_unregister_shash(&mips_poly1305_alg);
+}
+
+module_init(mips_poly1305_mod_init);
+module_exit(mips_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-mips");
diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl
new file mode 100644
index 000000000000..b05bab884ed2
--- /dev/null
+++ b/arch/mips/crypto/poly1305-mips.pl
@@ -0,0 +1,1273 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
+# project.
+# ====================================================================
+
+# Poly1305 hash for MIPS.
+#
+# May 2016
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+#		IALU/gcc
+# R1x000	~5.5/+130%	(big-endian)
+# Octeon II	2.50/+70%	(little-endian)
+#
+# March 2019
+#
+# Add 32-bit code path.
+#
+# October 2019
+#
+# Modulo-scheduling reduction allows to omit dependency chain at the
+# end of inner loop and improve performance. Also optimize MIPS32R2
+# code path for MIPS 1004K core. Per René von Dorst's suggestions.
+#
+#		IALU/gcc
+# R1x000	~9.8/?		(big-endian)
+# Octeon II	3.65/+140%	(little-endian)
+# MT7621/1004K	4.75/?		(little-endian)
+#
+######################################################################
+# There is a number of MIPS ABI in use, O32 and N32/64 are most
+# widely used. Then there is a new contender: NUBI. It appears that if
+# one picks the latter, it's possible to arrange code in ABI neutral
+# manner. Therefore let's stick to NUBI register layout:
+#
+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
+#
+# The return value is placed in $a0. Following coding rules facilitate
+# interoperability:
+#
+# - never ever touch $tp, "thread pointer", former $gp [o32 can be
+#   excluded from the rule, because it's specified volatile];
+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
+#   old code];
+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
+#
+# For reference here is register layout for N32/64 MIPS ABIs:
+#
+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
+#
+# <appro@openssl.org>
+#
+######################################################################
+
+$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
+
+$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
+
+if ($flavour =~ /64|n32/i) {{{
+######################################################################
+# 64-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
+     defined(_MIPS_ARCH_MIPS64R6)) \\
+     && !defined(_MIPS_ARCH_MIPS64R2)
+# define _MIPS_ARCH_MIPS64R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+# define dmultu(rs,rt)
+# define mflo(rd,rs,rt)	dmulu	rd,rs,rt
+# define mfhi(rd,rs,rt)	dmuhu	rd,rs,rt
+#else
+# define dmultu(rs,rt)		dmultu	rs,rt
+# define mflo(rd,rs,rt)	mflo	rd
+# define mfhi(rd,rs,rt)	mfhi	rd
+#endif
+
+#ifdef	__KERNEL__
+# define poly1305_init   poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit   poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 7
+#else
+# define MSB 7
+# define LSB 0
+#endif
+
+.text
+.set	noat
+.set	noreorder
+
+.align	5
+.globl	poly1305_init
+.ent	poly1305_init
+poly1305_init:
+	.frame	$sp,0,$ra
+	.set	reorder
+
+	sd	$zero,0($ctx)
+	sd	$zero,8($ctx)
+	sd	$zero,16($ctx)
+
+	beqz	$inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+	andi	$tmp0,$inp,7		# $inp % 8
+	dsubu	$inp,$inp,$tmp0		# align $inp
+	sll	$tmp0,$tmp0,3		# byte to bit offset
+	ld	$in0,0($inp)
+	ld	$in1,8($inp)
+	beqz	$tmp0,.Laligned_key
+	ld	$tmp2,16($inp)
+
+	subu	$tmp1,$zero,$tmp0
+# ifdef	MIPSEB
+	dsllv	$in0,$in0,$tmp0
+	dsrlv	$tmp3,$in1,$tmp1
+	dsllv	$in1,$in1,$tmp0
+	dsrlv	$tmp2,$tmp2,$tmp1
+# else
+	dsrlv	$in0,$in0,$tmp0
+	dsllv	$tmp3,$in1,$tmp1
+	dsrlv	$in1,$in1,$tmp0
+	dsllv	$tmp2,$tmp2,$tmp1
+# endif
+	or	$in0,$in0,$tmp3
+	or	$in1,$in1,$tmp2
+.Laligned_key:
+#else
+	ldl	$in0,0+MSB($inp)
+	ldl	$in1,8+MSB($inp)
+	ldr	$in0,0+LSB($inp)
+	ldr	$in1,8+LSB($inp)
+#endif
+#ifdef	MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+	dsbh	$in0,$in0		# byte swap
+	 dsbh	$in1,$in1
+	dshd	$in0,$in0
+	 dshd	$in1,$in1
+# else
+	ori	$tmp0,$zero,0xFF
+	dsll	$tmp2,$tmp0,32
+	or	$tmp0,$tmp2		# 0x000000FF000000FF
+
+	and	$tmp1,$in0,$tmp0	# byte swap
+	 and	$tmp3,$in1,$tmp0
+	dsrl	$tmp2,$in0,24
+	 dsrl	$tmp4,$in1,24
+	dsll	$tmp1,24
+	 dsll	$tmp3,24
+	and	$tmp2,$tmp0
+	 and	$tmp4,$tmp0
+	dsll	$tmp0,8			# 0x0000FF000000FF00
+	or	$tmp1,$tmp2
+	 or	$tmp3,$tmp4
+	and	$tmp2,$in0,$tmp0
+	 and	$tmp4,$in1,$tmp0
+	dsrl	$in0,8
+	 dsrl	$in1,8
+	dsll	$tmp2,8
+	 dsll	$tmp4,8
+	and	$in0,$tmp0
+	 and	$in1,$tmp0
+	or	$tmp1,$tmp2
+	 or	$tmp3,$tmp4
+	or	$in0,$tmp1
+	 or	$in1,$tmp3
+	dsrl	$tmp1,$in0,32
+	 dsrl	$tmp3,$in1,32
+	dsll	$in0,32
+	 dsll	$in1,32
+	or	$in0,$tmp1
+	 or	$in1,$tmp3
+# endif
+#endif
+	li	$tmp0,1
+	dsll	$tmp0,32		# 0x0000000100000000
+	daddiu	$tmp0,-63		# 0x00000000ffffffc1
+	dsll	$tmp0,28		# 0x0ffffffc10000000
+	daddiu	$tmp0,-1		# 0x0ffffffc0fffffff
+
+	and	$in0,$tmp0
+	daddiu	$tmp0,-3		# 0x0ffffffc0ffffffc
+	and	$in1,$tmp0
+
+	sd	$in0,24($ctx)
+	dsrl	$tmp0,$in1,2
+	sd	$in1,32($ctx)
+	daddu	$tmp0,$in1		# s1 = r1 + (r1 >> 2)
+	sd	$tmp0,40($ctx)
+
+.Lno_key:
+	li	$v0,0			# return 0
+	jr	$ra
+.end	poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
+
+my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
+   ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
+my ($shr,$shl) = ($s6,$s7);		# used on R6
+
+$code.=<<___;
+.align	5
+.globl	poly1305_blocks
+.ent	poly1305_blocks
+poly1305_blocks:
+	.set	noreorder
+	dsrl	$len,4			# number of complete blocks
+	bnez	$len,poly1305_blocks_internal
+	nop
+	jr	$ra
+	nop
+.end	poly1305_blocks
+
+.align	5
+.ent	poly1305_blocks_internal
+poly1305_blocks_internal:
+	.set	noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+	.frame	$sp,8*8,$ra
+	.mask	$SAVED_REGS_MASK|0x000c0000,-8
+	dsubu	$sp,8*8
+	sd	$s7,56($sp)
+	sd	$s6,48($sp)
+#else
+	.frame	$sp,6*8,$ra
+	.mask	$SAVED_REGS_MASK,-8
+	dsubu	$sp,6*8
+#endif
+	sd	$s5,40($sp)
+	sd	$s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
+	sd	$s3,24($sp)
+	sd	$s2,16($sp)
+	sd	$s1,8($sp)
+	sd	$s0,0($sp)
+___
+$code.=<<___;
+	.set	reorder
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+	andi	$shr,$inp,7
+	dsubu	$inp,$inp,$shr		# align $inp
+	sll	$shr,$shr,3		# byte to bit offset
+	subu	$shl,$zero,$shr
+#endif
+
+	ld	$h0,0($ctx)		# load hash value
+	ld	$h1,8($ctx)
+	ld	$h2,16($ctx)
+
+	ld	$r0,24($ctx)		# load key
+	ld	$r1,32($ctx)
+	ld	$rs1,40($ctx)
+
+	dsll	$len,4
+	daddu	$len,$inp		# end of buffer
+	b	.Loop
+
+.align	4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS64R6)
+	ld	$in0,0($inp)		# load input
+	ld	$in1,8($inp)
+	beqz	$shr,.Laligned_inp
+
+	ld	$tmp2,16($inp)
+# ifdef	MIPSEB
+	dsllv	$in0,$in0,$shr
+	dsrlv	$tmp3,$in1,$shl
+	dsllv	$in1,$in1,$shr
+	dsrlv	$tmp2,$tmp2,$shl
+# else
+	dsrlv	$in0,$in0,$shr
+	dsllv	$tmp3,$in1,$shl
+	dsrlv	$in1,$in1,$shr
+	dsllv	$tmp2,$tmp2,$shl
+# endif
+	or	$in0,$in0,$tmp3
+	or	$in1,$in1,$tmp2
+.Laligned_inp:
+#else
+	ldl	$in0,0+MSB($inp)	# load input
+	ldl	$in1,8+MSB($inp)
+	ldr	$in0,0+LSB($inp)
+	ldr	$in1,8+LSB($inp)
+#endif
+	daddiu	$inp,16
+#ifdef	MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+	dsbh	$in0,$in0		# byte swap
+	 dsbh	$in1,$in1
+	dshd	$in0,$in0
+	 dshd	$in1,$in1
+# else
+	ori	$tmp0,$zero,0xFF
+	dsll	$tmp2,$tmp0,32
+	or	$tmp0,$tmp2		# 0x000000FF000000FF
+
+	and	$tmp1,$in0,$tmp0	# byte swap
+	 and	$tmp3,$in1,$tmp0
+	dsrl	$tmp2,$in0,24
+	 dsrl	$tmp4,$in1,24
+	dsll	$tmp1,24
+	 dsll	$tmp3,24
+	and	$tmp2,$tmp0
+	 and	$tmp4,$tmp0
+	dsll	$tmp0,8			# 0x0000FF000000FF00
+	or	$tmp1,$tmp2
+	 or	$tmp3,$tmp4
+	and	$tmp2,$in0,$tmp0
+	 and	$tmp4,$in1,$tmp0
+	dsrl	$in0,8
+	 dsrl	$in1,8
+	dsll	$tmp2,8
+	 dsll	$tmp4,8
+	and	$in0,$tmp0
+	 and	$in1,$tmp0
+	or	$tmp1,$tmp2
+	 or	$tmp3,$tmp4
+	or	$in0,$tmp1
+	 or	$in1,$tmp3
+	dsrl	$tmp1,$in0,32
+	 dsrl	$tmp3,$in1,32
+	dsll	$in0,32
+	 dsll	$in1,32
+	or	$in0,$tmp1
+	 or	$in1,$tmp3
+# endif
+#endif
+	dsrl	$tmp1,$h2,2		# modulo-scheduled reduction
+	andi	$h2,$h2,3
+	dsll	$tmp0,$tmp1,2
+
+	daddu	$d0,$h0,$in0		# accumulate input
+	 daddu	$tmp1,$tmp0
+	sltu	$tmp0,$d0,$h0
+	daddu	$d0,$d0,$tmp1		# ... and residue
+	sltu	$tmp1,$d0,$tmp1
+	daddu	$d1,$h1,$in1
+	daddu	$tmp0,$tmp1
+	sltu	$tmp1,$d1,$h1
+	daddu	$d1,$tmp0
+
+	dmultu	($r0,$d0)		# h0*r0
+	 daddu	$d2,$h2,$padbit
+	 sltu	$tmp0,$d1,$tmp0
+	mflo	($h0,$r0,$d0)
+	mfhi	($h1,$r0,$d0)
+
+	dmultu	($rs1,$d1)		# h1*5*r1
+	 daddu	$d2,$tmp1
+	 daddu	$d2,$tmp0
+	mflo	($tmp0,$rs1,$d1)
+	mfhi	($tmp1,$rs1,$d1)
+
+	dmultu	($r1,$d0)		# h0*r1
+	mflo	($tmp2,$r1,$d0)
+	mfhi	($h2,$r1,$d0)
+	 daddu	$h0,$tmp0
+	 daddu	$h1,$tmp1
+	 sltu	$tmp0,$h0,$tmp0
+
+	dmultu	($r0,$d1)		# h1*r0
+	 daddu	$h1,$tmp0
+	 daddu	$h1,$tmp2
+	mflo	($tmp0,$r0,$d1)
+	mfhi	($tmp1,$r0,$d1)
+
+	dmultu	($rs1,$d2)		# h2*5*r1
+	 sltu	$tmp2,$h1,$tmp2
+	 daddu	$h2,$tmp2
+	mflo	($tmp2,$rs1,$d2)
+
+	dmultu	($r0,$d2)		# h2*r0
+	 daddu	$h1,$tmp0
+	 daddu	$h2,$tmp1
+	mflo	($tmp3,$r0,$d2)
+	 sltu	$tmp0,$h1,$tmp0
+	 daddu	$h2,$tmp0
+
+	daddu	$h1,$tmp2
+	sltu	$tmp2,$h1,$tmp2
+	daddu	$h2,$tmp2
+	daddu	$h2,$tmp3
+
+	bne	$inp,$len,.Loop
+
+	sd	$h0,0($ctx)		# store hash value
+	sd	$h1,8($ctx)
+	sd	$h2,16($ctx)
+
+	.set	noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+	ld	$s7,56($sp)
+	ld	$s6,48($sp)
+#endif
+	ld	$s5,40($sp)		# epilogue
+	ld	$s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi epilogue
+	ld	$s3,24($sp)
+	ld	$s2,16($sp)
+	ld	$s1,8($sp)
+	ld	$s0,0($sp)
+___
+$code.=<<___;
+	jr	$ra
+#if defined(_MIPS_ARCH_MIPS64R6)
+	daddu	$sp,8*8
+#else
+	daddu	$sp,6*8
+#endif
+.end	poly1305_blocks_internal
+___
+}
+{
+my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
+
+$code.=<<___;
+.align	5
+.globl	poly1305_emit
+.ent	poly1305_emit
+poly1305_emit:
+	.frame	$sp,0,$ra
+	.set	reorder
+
+	ld	$tmp2,16($ctx)
+	ld	$tmp0,0($ctx)
+	ld	$tmp1,8($ctx)
+
+	li	$in0,-4			# final reduction
+	dsrl	$in1,$tmp2,2
+	and	$in0,$tmp2
+	andi	$tmp2,$tmp2,3
+	daddu	$in0,$in1
+
+	daddu	$tmp0,$tmp0,$in0
+	sltu	$in1,$tmp0,$in0
+	 daddiu	$in0,$tmp0,5		# compare to modulus
+	daddu	$tmp1,$tmp1,$in1
+	 sltiu	$tmp3,$in0,5
+	sltu	$tmp4,$tmp1,$in1
+	 daddu	$in1,$tmp1,$tmp3
+	daddu	$tmp2,$tmp2,$tmp4
+	 sltu	$tmp3,$in1,$tmp3
+	 daddu	$tmp2,$tmp2,$tmp3
+
+	dsrl	$tmp2,2			# see if it carried/borrowed
+	dsubu	$tmp2,$zero,$tmp2
+
+	xor	$in0,$tmp0
+	xor	$in1,$tmp1
+	and	$in0,$tmp2
+	and	$in1,$tmp2
+	xor	$in0,$tmp0
+	xor	$in1,$tmp1
+
+	lwu	$tmp0,0($nonce)		# load nonce
+	lwu	$tmp1,4($nonce)
+	lwu	$tmp2,8($nonce)
+	lwu	$tmp3,12($nonce)
+	dsll	$tmp1,32
+	dsll	$tmp3,32
+	or	$tmp0,$tmp1
+	or	$tmp2,$tmp3
+
+	daddu	$in0,$tmp0		# accumulate nonce
+	daddu	$in1,$tmp2
+	sltu	$tmp0,$in0,$tmp0
+	daddu	$in1,$tmp0
+
+	dsrl	$tmp0,$in0,8		# write mac value
+	dsrl	$tmp1,$in0,16
+	dsrl	$tmp2,$in0,24
+	sb	$in0,0($mac)
+	dsrl	$tmp3,$in0,32
+	sb	$tmp0,1($mac)
+	dsrl	$tmp0,$in0,40
+	sb	$tmp1,2($mac)
+	dsrl	$tmp1,$in0,48
+	sb	$tmp2,3($mac)
+	dsrl	$tmp2,$in0,56
+	sb	$tmp3,4($mac)
+	dsrl	$tmp3,$in1,8
+	sb	$tmp0,5($mac)
+	dsrl	$tmp0,$in1,16
+	sb	$tmp1,6($mac)
+	dsrl	$tmp1,$in1,24
+	sb	$tmp2,7($mac)
+
+	sb	$in1,8($mac)
+	dsrl	$tmp2,$in1,32
+	sb	$tmp3,9($mac)
+	dsrl	$tmp3,$in1,40
+	sb	$tmp0,10($mac)
+	dsrl	$tmp0,$in1,48
+	sb	$tmp1,11($mac)
+	dsrl	$tmp1,$in1,56
+	sb	$tmp2,12($mac)
+	sb	$tmp3,13($mac)
+	sb	$tmp0,14($mac)
+	sb	$tmp1,15($mac)
+
+	jr	$ra
+.end	poly1305_emit
+.rdata
+.asciiz	"Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
+.align	2
+___
+}
+}}} else {{{
+######################################################################
+# 32-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
+   ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\
+     defined(_MIPS_ARCH_MIPS32R6)) \\
+     && !defined(_MIPS_ARCH_MIPS32R2)
+# define _MIPS_ARCH_MIPS32R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+# define multu(rs,rt)
+# define mflo(rd,rs,rt)	mulu	rd,rs,rt
+# define mfhi(rd,rs,rt)	muhu	rd,rs,rt
+#else
+# define multu(rs,rt)	multu	rs,rt
+# define mflo(rd,rs,rt)	mflo	rd
+# define mfhi(rd,rs,rt)	mfhi	rd
+#endif
+
+#ifdef	__KERNEL__
+# define poly1305_init   poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit   poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 3
+#else
+# define MSB 3
+# define LSB 0
+#endif
+
+.text
+.set	noat
+.set	noreorder
+
+.align	5
+.globl	poly1305_init
+.ent	poly1305_init
+poly1305_init:
+	.frame	$sp,0,$ra
+	.set	reorder
+
+	sw	$zero,0($ctx)
+	sw	$zero,4($ctx)
+	sw	$zero,8($ctx)
+	sw	$zero,12($ctx)
+	sw	$zero,16($ctx)
+
+	beqz	$inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+	andi	$tmp0,$inp,3		# $inp % 4
+	subu	$inp,$inp,$tmp0		# align $inp
+	sll	$tmp0,$tmp0,3		# byte to bit offset
+	lw	$in0,0($inp)
+	lw	$in1,4($inp)
+	lw	$in2,8($inp)
+	lw	$in3,12($inp)
+	beqz	$tmp0,.Laligned_key
+
+	lw	$tmp2,16($inp)
+	subu	$tmp1,$zero,$tmp0
+# ifdef	MIPSEB
+	sllv	$in0,$in0,$tmp0
+	srlv	$tmp3,$in1,$tmp1
+	sllv	$in1,$in1,$tmp0
+	or	$in0,$in0,$tmp3
+	srlv	$tmp3,$in2,$tmp1
+	sllv	$in2,$in2,$tmp0
+	or	$in1,$in1,$tmp3
+	srlv	$tmp3,$in3,$tmp1
+	sllv	$in3,$in3,$tmp0
+	or	$in2,$in2,$tmp3
+	srlv	$tmp2,$tmp2,$tmp1
+	or	$in3,$in3,$tmp2
+# else
+	srlv	$in0,$in0,$tmp0
+	sllv	$tmp3,$in1,$tmp1
+	srlv	$in1,$in1,$tmp0
+	or	$in0,$in0,$tmp3
+	sllv	$tmp3,$in2,$tmp1
+	srlv	$in2,$in2,$tmp0
+	or	$in1,$in1,$tmp3
+	sllv	$tmp3,$in3,$tmp1
+	srlv	$in3,$in3,$tmp0
+	or	$in2,$in2,$tmp3
+	sllv	$tmp2,$tmp2,$tmp1
+	or	$in3,$in3,$tmp2
+# endif
+.Laligned_key:
+#else
+	lwl	$in0,0+MSB($inp)
+	lwl	$in1,4+MSB($inp)
+	lwl	$in2,8+MSB($inp)
+	lwl	$in3,12+MSB($inp)
+	lwr	$in0,0+LSB($inp)
+	lwr	$in1,4+LSB($inp)
+	lwr	$in2,8+LSB($inp)
+	lwr	$in3,12+LSB($inp)
+#endif
+#ifdef	MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+	wsbh	$in0,$in0		# byte swap
+	wsbh	$in1,$in1
+	wsbh	$in2,$in2
+	wsbh	$in3,$in3
+	rotr	$in0,$in0,16
+	rotr	$in1,$in1,16
+	rotr	$in2,$in2,16
+	rotr	$in3,$in3,16
+# else
+	srl	$tmp0,$in0,24		# byte swap
+	srl	$tmp1,$in0,8
+	andi	$tmp2,$in0,0xFF00
+	sll	$in0,$in0,24
+	andi	$tmp1,0xFF00
+	sll	$tmp2,$tmp2,8
+	or	$in0,$tmp0
+	 srl	$tmp0,$in1,24
+	or	$tmp1,$tmp2
+	 srl	$tmp2,$in1,8
+	or	$in0,$tmp1
+	 andi	$tmp1,$in1,0xFF00
+	 sll	$in1,$in1,24
+	 andi	$tmp2,0xFF00
+	 sll	$tmp1,$tmp1,8
+	 or	$in1,$tmp0
+	srl	$tmp0,$in2,24
+	 or	$tmp2,$tmp1
+	srl	$tmp1,$in2,8
+	 or	$in1,$tmp2
+	andi	$tmp2,$in2,0xFF00
+	sll	$in2,$in2,24
+	andi	$tmp1,0xFF00
+	sll	$tmp2,$tmp2,8
+	or	$in2,$tmp0
+	 srl	$tmp0,$in3,24
+	or	$tmp1,$tmp2
+	 srl	$tmp2,$in3,8
+	or	$in2,$tmp1
+	 andi	$tmp1,$in3,0xFF00
+	 sll	$in3,$in3,24
+	 andi	$tmp2,0xFF00
+	 sll	$tmp1,$tmp1,8
+	 or	$in3,$tmp0
+	 or	$tmp2,$tmp1
+	 or	$in3,$tmp2
+# endif
+#endif
+	lui	$tmp0,0x0fff
+	ori	$tmp0,0xffff		# 0x0fffffff
+	and	$in0,$in0,$tmp0
+	subu	$tmp0,3			# 0x0ffffffc
+	and	$in1,$in1,$tmp0
+	and	$in2,$in2,$tmp0
+	and	$in3,$in3,$tmp0
+
+	sw	$in0,20($ctx)
+	sw	$in1,24($ctx)
+	sw	$in2,28($ctx)
+	sw	$in3,32($ctx)
+
+	srl	$tmp1,$in1,2
+	srl	$tmp2,$in2,2
+	srl	$tmp3,$in3,2
+	addu	$in1,$in1,$tmp1		# s1 = r1 + (r1 >> 2)
+	addu	$in2,$in2,$tmp2
+	addu	$in3,$in3,$tmp3
+	sw	$in1,36($ctx)
+	sw	$in2,40($ctx)
+	sw	$in3,44($ctx)
+.Lno_key:
+	li	$v0,0
+	jr	$ra
+.end	poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
+
+my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
+   ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
+my ($d0,$d1,$d2,$d3) =
+   ($a4,$a5,$a6,$a7);
+my $shr = $t2;		# used on R6
+my $one = $t2;		# used on R2
+
+$code.=<<___;
+.globl	poly1305_blocks
+.align	5
+.ent	poly1305_blocks
+poly1305_blocks:
+	.frame	$sp,16*4,$ra
+	.mask	$SAVED_REGS_MASK,-4
+	.set	noreorder
+	subu	$sp, $sp,4*12
+	sw	$s11,4*11($sp)
+	sw	$s10,4*10($sp)
+	sw	$s9, 4*9($sp)
+	sw	$s8, 4*8($sp)
+	sw	$s7, 4*7($sp)
+	sw	$s6, 4*6($sp)
+	sw	$s5, 4*5($sp)
+	sw	$s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
+	sw	$s3, 4*3($sp)
+	sw	$s2, 4*2($sp)
+	sw	$s1, 4*1($sp)
+	sw	$s0, 4*0($sp)
+___
+$code.=<<___;
+	.set	reorder
+
+	srl	$len,4			# number of complete blocks
+	li	$one,1
+	beqz	$len,.Labort
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+	andi	$shr,$inp,3
+	subu	$inp,$inp,$shr		# align $inp
+	sll	$shr,$shr,3		# byte to bit offset
+#endif
+
+	lw	$h0,0($ctx)		# load hash value
+	lw	$h1,4($ctx)
+	lw	$h2,8($ctx)
+	lw	$h3,12($ctx)
+	lw	$h4,16($ctx)
+
+	lw	$r0,20($ctx)		# load key
+	lw	$r1,24($ctx)
+	lw	$r2,28($ctx)
+	lw	$r3,32($ctx)
+	lw	$rs1,36($ctx)
+	lw	$rs2,40($ctx)
+	lw	$rs3,44($ctx)
+
+	sll	$len,4
+	addu	$len,$len,$inp		# end of buffer
+	b	.Loop
+
+.align	4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS32R6)
+	lw	$d0,0($inp)		# load input
+	lw	$d1,4($inp)
+	lw	$d2,8($inp)
+	lw	$d3,12($inp)
+	beqz	$shr,.Laligned_inp
+
+	lw	$t0,16($inp)
+	subu	$t1,$zero,$shr
+# ifdef	MIPSEB
+	sllv	$d0,$d0,$shr
+	srlv	$at,$d1,$t1
+	sllv	$d1,$d1,$shr
+	or	$d0,$d0,$at
+	srlv	$at,$d2,$t1
+	sllv	$d2,$d2,$shr
+	or	$d1,$d1,$at
+	srlv	$at,$d3,$t1
+	sllv	$d3,$d3,$shr
+	or	$d2,$d2,$at
+	srlv	$t0,$t0,$t1
+	or	$d3,$d3,$t0
+# else
+	srlv	$d0,$d0,$shr
+	sllv	$at,$d1,$t1
+	srlv	$d1,$d1,$shr
+	or	$d0,$d0,$at
+	sllv	$at,$d2,$t1
+	srlv	$d2,$d2,$shr
+	or	$d1,$d1,$at
+	sllv	$at,$d3,$t1
+	srlv	$d3,$d3,$shr
+	or	$d2,$d2,$at
+	sllv	$t0,$t0,$t1
+	or	$d3,$d3,$t0
+# endif
+.Laligned_inp:
+#else
+	lwl	$d0,0+MSB($inp)		# load input
+	lwl	$d1,4+MSB($inp)
+	lwl	$d2,8+MSB($inp)
+	lwl	$d3,12+MSB($inp)
+	lwr	$d0,0+LSB($inp)
+	lwr	$d1,4+LSB($inp)
+	lwr	$d2,8+LSB($inp)
+	lwr	$d3,12+LSB($inp)
+#endif
+#ifdef	MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+	wsbh	$d0,$d0			# byte swap
+	wsbh	$d1,$d1
+	wsbh	$d2,$d2
+	wsbh	$d3,$d3
+	rotr	$d0,$d0,16
+	rotr	$d1,$d1,16
+	rotr	$d2,$d2,16
+	rotr	$d3,$d3,16
+# else
+	srl	$at,$d0,24		# byte swap
+	srl	$t0,$d0,8
+	andi	$t1,$d0,0xFF00
+	sll	$d0,$d0,24
+	andi	$t0,0xFF00
+	sll	$t1,$t1,8
+	or	$d0,$at
+	 srl	$at,$d1,24
+	or	$t0,$t1
+	 srl	$t1,$d1,8
+	or	$d0,$t0
+	 andi	$t0,$d1,0xFF00
+	 sll	$d1,$d1,24
+	 andi	$t1,0xFF00
+	 sll	$t0,$t0,8
+	 or	$d1,$at
+	srl	$at,$d2,24
+	 or	$t1,$t0
+	srl	$t0,$d2,8
+	 or	$d1,$t1
+	andi	$t1,$d2,0xFF00
+	sll	$d2,$d2,24
+	andi	$t0,0xFF00
+	sll	$t1,$t1,8
+	or	$d2,$at
+	 srl	$at,$d3,24
+	or	$t0,$t1
+	 srl	$t1,$d3,8
+	or	$d2,$t0
+	 andi	$t0,$d3,0xFF00
+	 sll	$d3,$d3,24
+	 andi	$t1,0xFF00
+	 sll	$t0,$t0,8
+	 or	$d3,$at
+	 or	$t1,$t0
+	 or	$d3,$t1
+# endif
+#endif
+	srl	$t0,$h4,2		# modulo-scheduled reduction
+	andi	$h4,$h4,3
+	sll	$at,$t0,2
+
+	addu	$d0,$d0,$h0		# accumulate input
+	 addu	$t0,$t0,$at
+	sltu	$h0,$d0,$h0
+	addu	$d0,$d0,$t0		# ... and residue
+	sltu	$at,$d0,$t0
+
+	addu	$d1,$d1,$h1
+	 addu	$h0,$h0,$at		# carry
+	sltu	$h1,$d1,$h1
+	addu	$d1,$d1,$h0
+	sltu	$h0,$d1,$h0
+
+	addu	$d2,$d2,$h2
+	 addu	$h1,$h1,$h0		# carry
+	sltu	$h2,$d2,$h2
+	addu	$d2,$d2,$h1
+	sltu	$h1,$d2,$h1
+
+	addu	$d3,$d3,$h3
+	 addu	$h2,$h2,$h1		# carry
+	sltu	$h3,$d3,$h3
+	addu	$d3,$d3,$h2
+
+#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6)
+	multu	$r0,$d0			# d0*r0
+	 sltu	$h2,$d3,$h2
+	maddu	$rs3,$d1		# d1*s3
+	 addu	$h3,$h3,$h2		# carry
+	maddu	$rs2,$d2		# d2*s2
+	 addu	$h4,$h4,$padbit
+	maddu	$rs1,$d3		# d3*s1
+	 addu	$h4,$h4,$h3
+	mfhi	$at
+	mflo	$h0
+
+	multu	$r1,$d0			# d0*r1
+	maddu	$r0,$d1			# d1*r0
+	maddu	$rs3,$d2		# d2*s3
+	maddu	$rs2,$d3		# d3*s2
+	maddu	$rs1,$h4		# h4*s1
+	maddu	$at,$one		# hi*1
+	mfhi	$at
+	mflo	$h1
+
+	multu	$r2,$d0			# d0*r2
+	maddu	$r1,$d1			# d1*r1
+	maddu	$r0,$d2			# d2*r0
+	maddu	$rs3,$d3		# d3*s3
+	maddu	$rs2,$h4		# h4*s2
+	maddu	$at,$one		# hi*1
+	mfhi	$at
+	mflo	$h2
+
+	mul	$t0,$r0,$h4		# h4*r0
+
+	multu	$r3,$d0			# d0*r3
+	maddu	$r2,$d1			# d1*r2
+	maddu	$r1,$d2			# d2*r1
+	maddu	$r0,$d3			# d3*r0
+	maddu	$rs3,$h4		# h4*s3
+	maddu	$at,$one		# hi*1
+	mfhi	$at
+	mflo	$h3
+
+	 addiu	$inp,$inp,16
+
+	addu	$h4,$t0,$at
+#else
+	multu	($r0,$d0)		# d0*r0
+	mflo	($h0,$r0,$d0)
+	mfhi	($h1,$r0,$d0)
+
+	 sltu	$h2,$d3,$h2
+	 addu	$h3,$h3,$h2		# carry
+
+	multu	($rs3,$d1)		# d1*s3
+	mflo	($at,$rs3,$d1)
+	mfhi	($t0,$rs3,$d1)
+
+	 addu	$h4,$h4,$padbit
+	 addiu	$inp,$inp,16
+	 addu	$h4,$h4,$h3
+
+	multu	($rs2,$d2)		# d2*s2
+	mflo	($a3,$rs2,$d2)
+	mfhi	($t1,$rs2,$d2)
+	 addu	$h0,$h0,$at
+	 addu	$h1,$h1,$t0
+	multu	($rs1,$d3)		# d3*s1
+	 sltu	$at,$h0,$at
+	 addu	$h1,$h1,$at
+
+	mflo	($at,$rs1,$d3)
+	mfhi	($t0,$rs1,$d3)
+	 addu	$h0,$h0,$a3
+	 addu	$h1,$h1,$t1
+	multu	($r1,$d0)		# d0*r1
+	 sltu	$a3,$h0,$a3
+	 addu	$h1,$h1,$a3
+
+
+	mflo	($a3,$r1,$d0)
+	mfhi	($h2,$r1,$d0)
+	 addu	$h0,$h0,$at
+	 addu	$h1,$h1,$t0
+	multu	($r0,$d1)		# d1*r0
+	 sltu	$at,$h0,$at
+	 addu	$h1,$h1,$at
+
+	mflo	($at,$r0,$d1)
+	mfhi	($t0,$r0,$d1)
+	 addu	$h1,$h1,$a3
+	 sltu	$a3,$h1,$a3
+	multu	($rs3,$d2)		# d2*s3
+	 addu	$h2,$h2,$a3
+
+	mflo	($a3,$rs3,$d2)
+	mfhi	($t1,$rs3,$d2)
+	 addu	$h1,$h1,$at
+	 addu	$h2,$h2,$t0
+	multu	($rs2,$d3)		# d3*s2
+	 sltu	$at,$h1,$at
+	 addu	$h2,$h2,$at
+
+	mflo	($at,$rs2,$d3)
+	mfhi	($t0,$rs2,$d3)
+	 addu	$h1,$h1,$a3
+	 addu	$h2,$h2,$t1
+	multu	($rs1,$h4)		# h4*s1
+	 sltu	$a3,$h1,$a3
+	 addu	$h2,$h2,$a3
+
+	mflo	($a3,$rs1,$h4)
+	 addu	$h1,$h1,$at
+	 addu	$h2,$h2,$t0
+	multu	($r2,$d0)		# d0*r2
+	 sltu	$at,$h1,$at
+	 addu	$h2,$h2,$at
+
+
+	mflo	($at,$r2,$d0)
+	mfhi	($h3,$r2,$d0)
+	 addu	$h1,$h1,$a3
+	 sltu	$a3,$h1,$a3
+	multu	($r1,$d1)		# d1*r1
+	 addu	$h2,$h2,$a3
+
+	mflo	($a3,$r1,$d1)
+	mfhi	($t1,$r1,$d1)
+	 addu	$h2,$h2,$at
+	 sltu	$at,$h2,$at
+	multu	($r0,$d2)		# d2*r0
+	 addu	$h3,$h3,$at
+
+	mflo	($at,$r0,$d2)
+	mfhi	($t0,$r0,$d2)
+	 addu	$h2,$h2,$a3
+	 addu	$h3,$h3,$t1
+	multu	($rs3,$d3)		# d3*s3
+	 sltu	$a3,$h2,$a3
+	 addu	$h3,$h3,$a3
+
+	mflo	($a3,$rs3,$d3)
+	mfhi	($t1,$rs3,$d3)
+	 addu	$h2,$h2,$at
+	 addu	$h3,$h3,$t0
+	multu	($rs2,$h4)		# h4*s2
+	 sltu	$at,$h2,$at
+	 addu	$h3,$h3,$at
+
+	mflo	($at,$rs2,$h4)
+	 addu	$h2,$h2,$a3
+	 addu	$h3,$h3,$t1
+	multu	($r3,$d0)		# d0*r3
+	 sltu	$a3,$h2,$a3
+	 addu	$h3,$h3,$a3
+
+
+	mflo	($a3,$r3,$d0)
+	mfhi	($t1,$r3,$d0)
+	 addu	$h2,$h2,$at
+	 sltu	$at,$h2,$at
+	multu	($r2,$d1)		# d1*r2
+	 addu	$h3,$h3,$at
+
+	mflo	($at,$r2,$d1)
+	mfhi	($t0,$r2,$d1)
+	 addu	$h3,$h3,$a3
+	 sltu	$a3,$h3,$a3
+	multu	($r0,$d3)		# d3*r0
+	 addu	$t1,$t1,$a3
+
+	mflo	($a3,$r0,$d3)
+	mfhi	($d3,$r0,$d3)
+	 addu	$h3,$h3,$at
+	 addu	$t1,$t1,$t0
+	multu	($r1,$d2)		# d2*r1
+	 sltu	$at,$h3,$at
+	 addu	$t1,$t1,$at
+
+	mflo	($at,$r1,$d2)
+	mfhi	($t0,$r1,$d2)
+	 addu	$h3,$h3,$a3
+	 addu	$t1,$t1,$d3
+	multu	($rs3,$h4)		# h4*s3
+	 sltu	$a3,$h3,$a3
+	 addu	$t1,$t1,$a3
+
+	mflo	($a3,$rs3,$h4)
+	 addu	$h3,$h3,$at
+	 addu	$t1,$t1,$t0
+	multu	($r0,$h4)		# h4*r0
+	 sltu	$at,$h3,$at
+	 addu	$t1,$t1,$at
+
+
+	mflo	($h4,$r0,$h4)
+	 addu	$h3,$h3,$a3
+	 sltu	$a3,$h3,$a3
+	 addu	$t1,$t1,$a3
+	addu	$h4,$h4,$t1
+
+	li	$padbit,1		# if we loop, padbit is 1
+#endif
+	bne	$inp,$len,.Loop
+
+	sw	$h0,0($ctx)		# store hash value
+	sw	$h1,4($ctx)
+	sw	$h2,8($ctx)
+	sw	$h3,12($ctx)
+	sw	$h4,16($ctx)
+
+	.set	noreorder
+.Labort:
+	lw	$s11,4*11($sp)
+	lw	$s10,4*10($sp)
+	lw	$s9, 4*9($sp)
+	lw	$s8, 4*8($sp)
+	lw	$s7, 4*7($sp)
+	lw	$s6, 4*6($sp)
+	lw	$s5, 4*5($sp)
+	lw	$s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
+	lw	$s3, 4*3($sp)
+	lw	$s2, 4*2($sp)
+	lw	$s1, 4*1($sp)
+	lw	$s0, 4*0($sp)
+___
+$code.=<<___;
+	jr	$ra
+	addu	$sp,$sp,4*12
+.end	poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
+
+$code.=<<___;
+.align	5
+.globl	poly1305_emit
+.ent	poly1305_emit
+poly1305_emit:
+	.frame	$sp,0,$ra
+	.set	reorder
+
+	lw	$tmp4,16($ctx)
+	lw	$tmp0,0($ctx)
+	lw	$tmp1,4($ctx)
+	lw	$tmp2,8($ctx)
+	lw	$tmp3,12($ctx)
+
+	li	$in0,-4			# final reduction
+	srl	$ctx,$tmp4,2
+	and	$in0,$in0,$tmp4
+	andi	$tmp4,$tmp4,3
+	addu	$ctx,$ctx,$in0
+
+	addu	$tmp0,$tmp0,$ctx
+	sltu	$ctx,$tmp0,$ctx
+	 addiu	$in0,$tmp0,5		# compare to modulus
+	addu	$tmp1,$tmp1,$ctx
+	 sltiu	$in1,$in0,5
+	sltu	$ctx,$tmp1,$ctx
+	 addu	$in1,$in1,$tmp1
+	addu	$tmp2,$tmp2,$ctx
+	 sltu	$in2,$in1,$tmp1
+	sltu	$ctx,$tmp2,$ctx
+	 addu	$in2,$in2,$tmp2
+	addu	$tmp3,$tmp3,$ctx
+	 sltu	$in3,$in2,$tmp2
+	sltu	$ctx,$tmp3,$ctx
+	 addu	$in3,$in3,$tmp3
+	addu	$tmp4,$tmp4,$ctx
+	 sltu	$ctx,$in3,$tmp3
+	 addu	$ctx,$tmp4
+
+	srl	$ctx,2			# see if it carried/borrowed
+	subu	$ctx,$zero,$ctx
+
+	xor	$in0,$tmp0
+	xor	$in1,$tmp1
+	xor	$in2,$tmp2
+	xor	$in3,$tmp3
+	and	$in0,$ctx
+	and	$in1,$ctx
+	and	$in2,$ctx
+	and	$in3,$ctx
+	xor	$in0,$tmp0
+	xor	$in1,$tmp1
+	xor	$in2,$tmp2
+	xor	$in3,$tmp3
+
+	lw	$tmp0,0($nonce)		# load nonce
+	lw	$tmp1,4($nonce)
+	lw	$tmp2,8($nonce)
+	lw	$tmp3,12($nonce)
+
+	addu	$in0,$tmp0		# accumulate nonce
+	sltu	$ctx,$in0,$tmp0
+
+	addu	$in1,$tmp1
+	sltu	$tmp1,$in1,$tmp1
+	addu	$in1,$ctx
+	sltu	$ctx,$in1,$ctx
+	addu	$ctx,$tmp1
+
+	addu	$in2,$tmp2
+	sltu	$tmp2,$in2,$tmp2
+	addu	$in2,$ctx
+	sltu	$ctx,$in2,$ctx
+	addu	$ctx,$tmp2
+
+	addu	$in3,$tmp3
+	addu	$in3,$ctx
+
+	srl	$tmp0,$in0,8		# write mac value
+	srl	$tmp1,$in0,16
+	srl	$tmp2,$in0,24
+	sb	$in0, 0($mac)
+	sb	$tmp0,1($mac)
+	srl	$tmp0,$in1,8
+	sb	$tmp1,2($mac)
+	srl	$tmp1,$in1,16
+	sb	$tmp2,3($mac)
+	srl	$tmp2,$in1,24
+	sb	$in1, 4($mac)
+	sb	$tmp0,5($mac)
+	srl	$tmp0,$in2,8
+	sb	$tmp1,6($mac)
+	srl	$tmp1,$in2,16
+	sb	$tmp2,7($mac)
+	srl	$tmp2,$in2,24
+	sb	$in2, 8($mac)
+	sb	$tmp0,9($mac)
+	srl	$tmp0,$in3,8
+	sb	$tmp1,10($mac)
+	srl	$tmp1,$in3,16
+	sb	$tmp2,11($mac)
+	srl	$tmp2,$in3,24
+	sb	$in3, 12($mac)
+	sb	$tmp0,13($mac)
+	sb	$tmp1,14($mac)
+	sb	$tmp2,15($mac)
+
+	jr	$ra
+.end	poly1305_emit
+.rdata
+.asciiz	"Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
+.align	2
+___
+}
+}}}
+
+$output=pop and open STDOUT,">$output";
+print $code;
+close STDOUT;
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 7aa4310713cf..2668eed03c5f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -724,6 +724,11 @@ config CRYPTO_POLY1305_X86_64
 	  in IETF protocols. This is the x86_64 assembler implementation using SIMD
 	  instructions.
 
+config CRYPTO_POLY1305_MIPS
+	tristate "Poly1305 authenticator algorithm (MIPS optimized)"
+	depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
 config CRYPTO_MD4
 	tristate "MD4 digest algorithm"
 	select CRYPTO_HASH
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 9bd15b227e78..d15ec5382986 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -39,6 +39,7 @@ config CRYPTO_LIB_DES
 
 config CRYPTO_LIB_POLY1305_RSIZE
 	int
+	default 2 if MIPS
 	default 4 if X86_64
 	default 9 if ARM || ARM64
 	default 1
-- 
cgit v1.2.3


From c12d3362a74bf0cd9e1d488918d40607b62a3104 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 8 Nov 2019 13:22:27 +0100
Subject: int128: move __uint128_t compiler test to Kconfig

In order to use 128-bit integer arithmetic in C code, the architecture
needs to have declared support for it by setting ARCH_SUPPORTS_INT128,
and it requires a version of the toolchain that supports this at build
time. This is why all existing tests for ARCH_SUPPORTS_INT128 also test
whether __SIZEOF_INT128__ is defined, since this is only the case for
compilers that can support 128-bit integers.

Let's fold this additional test into the Kconfig declaration of
ARCH_SUPPORTS_INT128 so that we can also use the symbol in Makefiles,
e.g., to decide whether a certain object needs to be included in the
first place.

Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/Kconfig | 2 +-
 arch/riscv/Kconfig | 2 +-
 arch/x86/Kconfig   | 2 +-
 crypto/ecc.c       | 2 +-
 init/Kconfig       | 4 ++++
 lib/ubsan.c        | 2 +-
 lib/ubsan.h        | 2 +-
 7 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 41a9b4257b72..a591a0673694 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -67,7 +67,7 @@ config ARM64
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_SUPPORTS_MEMORY_FAILURE
 	select ARCH_SUPPORTS_ATOMIC_RMW
-	select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
+	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8eebbc8860bb..75a6c9117622 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -164,7 +164,7 @@ config ARCH_RV32I
 config ARCH_RV64I
 	bool "RV64I"
 	select 64BIT
-	select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000
+	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d6e1faa28c58..f4d9d1e55e5c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86_64
 	depends on 64BIT
 	# Options that are inherently 64-bit kernel only:
 	select ARCH_HAS_GIGANTIC_PAGE
-	select ARCH_SUPPORTS_INT128
+	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select HAVE_ARCH_SOFT_DIRTY
 	select MODULES_USE_ELF_RELA
diff --git a/crypto/ecc.c b/crypto/ecc.c
index 8ee787723c5c..02d35be7702b 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -336,7 +336,7 @@ static u64 vli_usub(u64 *result, const u64 *left, u64 right,
 static uint128_t mul_64_64(u64 left, u64 right)
 {
 	uint128_t result;
-#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+#if defined(CONFIG_ARCH_SUPPORTS_INT128)
 	unsigned __int128 m = (unsigned __int128)left * right;
 
 	result.m_low  = m;
diff --git a/init/Kconfig b/init/Kconfig
index b4daad2bac23..020526f681c0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -785,6 +785,10 @@ config ARCH_SUPPORTS_NUMA_BALANCING
 config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	bool
 
+config CC_HAS_INT128
+	def_bool y
+	depends on !$(cc-option,-D__SIZEOF_INT128__=0)
+
 #
 # For architectures that know their GCC __int128 support is sound
 #
diff --git a/lib/ubsan.c b/lib/ubsan.c
index e7d31735950d..b652cc14dd60 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -119,7 +119,7 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type,
 {
 	if (type_is_int(type)) {
 		if (type_bit_width(type) == 128) {
-#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+#if defined(CONFIG_ARCH_SUPPORTS_INT128)
 			u_max val = get_unsigned_val(type, value);
 
 			scnprintf(str, size, "0x%08x%08x%08x%08x",
diff --git a/lib/ubsan.h b/lib/ubsan.h
index b8fa83864467..7b56c09473a9 100644
--- a/lib/ubsan.h
+++ b/lib/ubsan.h
@@ -78,7 +78,7 @@ struct invalid_value_data {
 	struct type_descriptor *type;
 };
 
-#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+#if defined(CONFIG_ARCH_SUPPORTS_INT128)
 typedef __int128 s_max;
 typedef unsigned __int128 u_max;
 #else
-- 
cgit v1.2.3


From ed0356eda153f6a95649e11feb7b07083caf9e20 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:31 +0100
Subject: crypto: blake2s - x86_64 SIMD implementation

These implementations from Samuel Neves support AVX and AVX-512VL.
Originally this used AVX-512F, but Skylake thermal throttling made
AVX-512VL more attractive and possible to do with negligable difference.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Co-developed-by: Samuel Neves <sneves@dei.uc.pt>
[ardb: move to arch/x86/crypto, wire into lib/crypto framework]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile       |   2 +
 arch/x86/crypto/blake2s-core.S | 258 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/crypto/blake2s-glue.c | 233 +++++++++++++++++++++++++++++++++++++
 crypto/Kconfig                 |   6 +
 4 files changed, 499 insertions(+)
 create mode 100644 arch/x86/crypto/blake2s-core.S
 create mode 100644 arch/x86/crypto/blake2s-glue.c

(limited to 'arch')

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 759b1a927826..922c8ecfa00f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -48,6 +48,7 @@ ifeq ($(avx_supported),yes)
 	obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
 	obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
 	obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
+	obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
 endif
 
 # These modules require assembler to support AVX2.
@@ -70,6 +71,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
 
 nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
+blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
 
 ifeq ($(avx_supported),yes)
 	camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S
new file mode 100644
index 000000000000..8591938eee26
--- /dev/null
+++ b/arch/x86/crypto/blake2s-core.S
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
+.align 32
+IV:	.octa 0xA54FF53A3C6EF372BB67AE856A09E667
+	.octa 0x5BE0CD191F83D9AB9B05688C510E527F
+.section .rodata.cst16.ROT16, "aM", @progbits, 16
+.align 16
+ROT16:	.octa 0x0D0C0F0E09080B0A0504070601000302
+.section .rodata.cst16.ROR328, "aM", @progbits, 16
+.align 16
+ROR328:	.octa 0x0C0F0E0D080B0A090407060500030201
+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
+.align 64
+SIGMA:
+.byte  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13
+.byte 14,  4,  9, 13, 10,  8, 15,  6,  5,  1,  0, 11,  3, 12,  2,  7
+.byte 11, 12,  5, 15,  8,  0,  2, 13,  9, 10,  3,  7,  4, 14,  6,  1
+.byte  7,  3, 13, 11,  9,  1, 12, 14, 15,  2,  5,  4,  8,  6, 10,  0
+.byte  9,  5,  2, 10,  0,  7,  4, 15,  3, 14, 11,  6, 13,  1, 12,  8
+.byte  2,  6,  0,  8, 12, 10, 11,  3,  1,  4,  7, 15,  9, 13,  5, 14
+.byte 12,  1, 14,  4,  5, 15, 13, 10,  8,  0,  6,  9, 11,  7,  3,  2
+.byte 13,  7, 12,  3, 11, 14,  1,  9,  2,  5, 15,  8, 10,  0,  4,  6
+.byte  6, 14, 11,  0, 15,  9,  3,  8, 10, 12, 13,  1,  5,  2,  7,  4
+.byte 10,  8,  7,  1,  2,  4,  6,  5, 13, 15,  9,  3,  0, 11, 14, 12
+#ifdef CONFIG_AS_AVX512
+.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
+.align 64
+SIGMA2:
+.long  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13
+.long  8,  2, 13, 15, 10,  9, 12,  3,  6,  4,  0, 14,  5, 11,  1,  7
+.long 11, 13,  8,  6,  5, 10, 14,  3,  2,  4, 12, 15,  1,  0,  7,  9
+.long 11, 10,  7,  0,  8, 15,  1, 13,  3,  6,  2, 12,  4, 14,  9,  5
+.long  4, 10,  9, 14, 15,  0, 11,  8,  1,  7,  3, 13,  2,  5,  6, 12
+.long  2, 11,  4, 15, 14,  3, 10,  8, 13,  6,  5,  7,  0, 12,  1,  9
+.long  4,  8, 15,  9, 14, 11, 13,  5,  3,  2,  1, 12,  6, 10,  7,  0
+.long  6, 13,  0, 14, 12,  2,  1, 11, 15,  4,  5,  8,  7,  9,  3, 10
+.long 15,  5,  4, 13, 10,  7,  3, 11, 12,  2,  0,  6,  9,  8,  1, 14
+.long  8,  7, 14, 11, 13, 15,  0, 12, 10,  4,  5,  6,  3,  2,  1,  9
+#endif /* CONFIG_AS_AVX512 */
+
+.text
+#ifdef CONFIG_AS_SSSE3
+ENTRY(blake2s_compress_ssse3)
+	testq		%rdx,%rdx
+	je		.Lendofloop
+	movdqu		(%rdi),%xmm0
+	movdqu		0x10(%rdi),%xmm1
+	movdqa		ROT16(%rip),%xmm12
+	movdqa		ROR328(%rip),%xmm13
+	movdqu		0x20(%rdi),%xmm14
+	movq		%rcx,%xmm15
+	leaq		SIGMA+0xa0(%rip),%r8
+	jmp		.Lbeginofloop
+	.align		32
+.Lbeginofloop:
+	movdqa		%xmm0,%xmm10
+	movdqa		%xmm1,%xmm11
+	paddq		%xmm15,%xmm14
+	movdqa		IV(%rip),%xmm2
+	movdqa		%xmm14,%xmm3
+	pxor		IV+0x10(%rip),%xmm3
+	leaq		SIGMA(%rip),%rcx
+.Lroundloop:
+	movzbl		(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm4
+	movzbl		0x1(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm5
+	movzbl		0x2(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm6
+	movzbl		0x3(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm7
+	punpckldq	%xmm5,%xmm4
+	punpckldq	%xmm7,%xmm6
+	punpcklqdq	%xmm6,%xmm4
+	paddd		%xmm4,%xmm0
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm12,%xmm3
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm8
+	psrld		$0xc,%xmm1
+	pslld		$0x14,%xmm8
+	por		%xmm8,%xmm1
+	movzbl		0x4(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm5
+	movzbl		0x5(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm6
+	movzbl		0x6(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm7
+	movzbl		0x7(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm4
+	punpckldq	%xmm6,%xmm5
+	punpckldq	%xmm4,%xmm7
+	punpcklqdq	%xmm7,%xmm5
+	paddd		%xmm5,%xmm0
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm13,%xmm3
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm8
+	psrld		$0x7,%xmm1
+	pslld		$0x19,%xmm8
+	por		%xmm8,%xmm1
+	pshufd		$0x93,%xmm0,%xmm0
+	pshufd		$0x4e,%xmm3,%xmm3
+	pshufd		$0x39,%xmm2,%xmm2
+	movzbl		0x8(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm6
+	movzbl		0x9(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm7
+	movzbl		0xa(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm4
+	movzbl		0xb(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm5
+	punpckldq	%xmm7,%xmm6
+	punpckldq	%xmm5,%xmm4
+	punpcklqdq	%xmm4,%xmm6
+	paddd		%xmm6,%xmm0
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm12,%xmm3
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm8
+	psrld		$0xc,%xmm1
+	pslld		$0x14,%xmm8
+	por		%xmm8,%xmm1
+	movzbl		0xc(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm7
+	movzbl		0xd(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm4
+	movzbl		0xe(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm5
+	movzbl		0xf(%rcx),%eax
+	movd		(%rsi,%rax,4),%xmm6
+	punpckldq	%xmm4,%xmm7
+	punpckldq	%xmm6,%xmm5
+	punpcklqdq	%xmm5,%xmm7
+	paddd		%xmm7,%xmm0
+	paddd		%xmm1,%xmm0
+	pxor		%xmm0,%xmm3
+	pshufb		%xmm13,%xmm3
+	paddd		%xmm3,%xmm2
+	pxor		%xmm2,%xmm1
+	movdqa		%xmm1,%xmm8
+	psrld		$0x7,%xmm1
+	pslld		$0x19,%xmm8
+	por		%xmm8,%xmm1
+	pshufd		$0x39,%xmm0,%xmm0
+	pshufd		$0x4e,%xmm3,%xmm3
+	pshufd		$0x93,%xmm2,%xmm2
+	addq		$0x10,%rcx
+	cmpq		%r8,%rcx
+	jnz		.Lroundloop
+	pxor		%xmm2,%xmm0
+	pxor		%xmm3,%xmm1
+	pxor		%xmm10,%xmm0
+	pxor		%xmm11,%xmm1
+	addq		$0x40,%rsi
+	decq		%rdx
+	jnz		.Lbeginofloop
+	movdqu		%xmm0,(%rdi)
+	movdqu		%xmm1,0x10(%rdi)
+	movdqu		%xmm14,0x20(%rdi)
+.Lendofloop:
+	ret
+ENDPROC(blake2s_compress_ssse3)
+#endif /* CONFIG_AS_SSSE3 */
+
+#ifdef CONFIG_AS_AVX512
+ENTRY(blake2s_compress_avx512)
+	vmovdqu		(%rdi),%xmm0
+	vmovdqu		0x10(%rdi),%xmm1
+	vmovdqu		0x20(%rdi),%xmm4
+	vmovq		%rcx,%xmm5
+	vmovdqa		IV(%rip),%xmm14
+	vmovdqa		IV+16(%rip),%xmm15
+	jmp		.Lblake2s_compress_avx512_mainloop
+.align 32
+.Lblake2s_compress_avx512_mainloop:
+	vmovdqa		%xmm0,%xmm10
+	vmovdqa		%xmm1,%xmm11
+	vpaddq		%xmm5,%xmm4,%xmm4
+	vmovdqa		%xmm14,%xmm2
+	vpxor		%xmm15,%xmm4,%xmm3
+	vmovdqu		(%rsi),%ymm6
+	vmovdqu		0x20(%rsi),%ymm7
+	addq		$0x40,%rsi
+	leaq		SIGMA2(%rip),%rax
+	movb		$0xa,%cl
+.Lblake2s_compress_avx512_roundloop:
+	addq		$0x40,%rax
+	vmovdqa		-0x40(%rax),%ymm8
+	vmovdqa		-0x20(%rax),%ymm9
+	vpermi2d	%ymm7,%ymm6,%ymm8
+	vpermi2d	%ymm7,%ymm6,%ymm9
+	vmovdqa		%ymm8,%ymm6
+	vmovdqa		%ymm9,%ymm7
+	vpaddd		%xmm8,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x10,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0xc,%xmm1,%xmm1
+	vextracti128	$0x1,%ymm8,%xmm8
+	vpaddd		%xmm8,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x8,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0x7,%xmm1,%xmm1
+	vpshufd		$0x93,%xmm0,%xmm0
+	vpshufd		$0x4e,%xmm3,%xmm3
+	vpshufd		$0x39,%xmm2,%xmm2
+	vpaddd		%xmm9,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x10,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0xc,%xmm1,%xmm1
+	vextracti128	$0x1,%ymm9,%xmm9
+	vpaddd		%xmm9,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x8,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0x7,%xmm1,%xmm1
+	vpshufd		$0x39,%xmm0,%xmm0
+	vpshufd		$0x4e,%xmm3,%xmm3
+	vpshufd		$0x93,%xmm2,%xmm2
+	decb		%cl
+	jne		.Lblake2s_compress_avx512_roundloop
+	vpxor		%xmm10,%xmm0,%xmm0
+	vpxor		%xmm11,%xmm1,%xmm1
+	vpxor		%xmm2,%xmm0,%xmm0
+	vpxor		%xmm3,%xmm1,%xmm1
+	decq		%rdx
+	jne		.Lblake2s_compress_avx512_mainloop
+	vmovdqu		%xmm0,(%rdi)
+	vmovdqu		%xmm1,0x10(%rdi)
+	vmovdqu		%xmm4,0x20(%rdi)
+	vzeroupper
+	retq
+ENDPROC(blake2s_compress_avx512)
+#endif /* CONFIG_AS_AVX512 */
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
new file mode 100644
index 000000000000..4a37ba7cdbe5
--- /dev/null
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/api.h>
+#include <asm/processor.h>
+#include <asm/simd.h>
+
+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
+				       const u8 *block, const size_t nblocks,
+				       const u32 inc);
+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
+					const u8 *block, const size_t nblocks,
+					const u32 inc);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
+
+void blake2s_compress_arch(struct blake2s_state *state,
+			   const u8 *block, size_t nblocks,
+			   const u32 inc)
+{
+	/* SIMD disables preemption, so relax after processing each page. */
+	BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
+
+	if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
+		blake2s_compress_generic(state, block, nblocks, inc);
+		return;
+	}
+
+	for (;;) {
+		const size_t blocks = min_t(size_t, nblocks,
+					    PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
+
+		kernel_fpu_begin();
+		if (IS_ENABLED(CONFIG_AS_AVX512) &&
+		    static_branch_likely(&blake2s_use_avx512))
+			blake2s_compress_avx512(state, block, blocks, inc);
+		else
+			blake2s_compress_ssse3(state, block, blocks, inc);
+		kernel_fpu_end();
+
+		nblocks -= blocks;
+		if (!nblocks)
+			break;
+		block += blocks * BLAKE2S_BLOCK_SIZE;
+	}
+}
+EXPORT_SYMBOL(blake2s_compress_arch);
+
+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+	if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(tctx->key, key, keylen);
+	tctx->keylen = keylen;
+
+	return 0;
+}
+
+static int crypto_blake2s_init(struct shash_desc *desc)
+{
+	struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct blake2s_state *state = shash_desc_ctx(desc);
+	const int outlen = crypto_shash_digestsize(desc->tfm);
+
+	if (tctx->keylen)
+		blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
+	else
+		blake2s_init(state, outlen);
+
+	return 0;
+}
+
+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
+				 unsigned int inlen)
+{
+	struct blake2s_state *state = shash_desc_ctx(desc);
+	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+
+	if (unlikely(!inlen))
+		return 0;
+	if (inlen > fill) {
+		memcpy(state->buf + state->buflen, in, fill);
+		blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+		state->buflen = 0;
+		in += fill;
+		inlen -= fill;
+	}
+	if (inlen > BLAKE2S_BLOCK_SIZE) {
+		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+		/* Hash one less (full) block than strictly possible */
+		blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+	}
+	memcpy(state->buf + state->buflen, in, inlen);
+	state->buflen += inlen;
+
+	return 0;
+}
+
+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
+{
+	struct blake2s_state *state = shash_desc_ctx(desc);
+
+	blake2s_set_lastblock(state);
+	memset(state->buf + state->buflen, 0,
+	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+	blake2s_compress_arch(state, state->buf, 1, state->buflen);
+	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+	memcpy(out, state->h, state->outlen);
+	memzero_explicit(state, sizeof(*state));
+
+	return 0;
+}
+
+static struct shash_alg blake2s_algs[] = {{
+	.base.cra_name		= "blake2s-128",
+	.base.cra_driver_name	= "blake2s-128-x86",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_128_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}, {
+	.base.cra_name		= "blake2s-160",
+	.base.cra_driver_name	= "blake2s-160-x86",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_160_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}, {
+	.base.cra_name		= "blake2s-224",
+	.base.cra_driver_name	= "blake2s-224-x86",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_224_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}, {
+	.base.cra_name		= "blake2s-256",
+	.base.cra_driver_name	= "blake2s-256-x86",
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx),
+	.base.cra_priority	= 200,
+	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= BLAKE2S_256_HASH_SIZE,
+	.setkey			= crypto_blake2s_setkey,
+	.init			= crypto_blake2s_init,
+	.update			= crypto_blake2s_update,
+	.final			= crypto_blake2s_final,
+	.descsize		= sizeof(struct blake2s_state),
+}};
+
+static int __init blake2s_mod_init(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_SSSE3))
+		return 0;
+
+	static_branch_enable(&blake2s_use_ssse3);
+
+	if (IS_ENABLED(CONFIG_AS_AVX512) &&
+	    boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_AVX2) &&
+	    boot_cpu_has(X86_FEATURE_AVX512F) &&
+	    boot_cpu_has(X86_FEATURE_AVX512VL) &&
+	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+			      XFEATURE_MASK_AVX512, NULL))
+		static_branch_enable(&blake2s_use_avx512);
+
+	return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+static void __exit blake2s_mod_exit(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+module_init(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
+
+MODULE_ALIAS_CRYPTO("blake2s-128");
+MODULE_ALIAS_CRYPTO("blake2s-128-x86");
+MODULE_ALIAS_CRYPTO("blake2s-160");
+MODULE_ALIAS_CRYPTO("blake2s-160-x86");
+MODULE_ALIAS_CRYPTO("blake2s-224");
+MODULE_ALIAS_CRYPTO("blake2s-224-x86");
+MODULE_ALIAS_CRYPTO("blake2s-256");
+MODULE_ALIAS_CRYPTO("blake2s-256-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 3c23187eeeb1..64cc4a93b51c 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -674,6 +674,12 @@ config CRYPTO_BLAKE2S
 
 	  See https://blake2.net for further information.
 
+config CRYPTO_BLAKE2S_X86
+	tristate "BLAKE2s digest algorithm (x86 accelerated version)"
+	depends on X86 && 64BIT
+	select CRYPTO_LIB_BLAKE2S_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+
 config CRYPTO_CRCT10DIF
 	tristate "CRCT10DIF algorithm"
 	select CRYPTO_HASH
-- 
cgit v1.2.3


From bb611bdfd6be34d9f822c73305fcc83720499d38 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:36 +0100
Subject: crypto: curve25519 - x86_64 library and KPP implementations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This implementation is the fastest available x86_64 implementation, and
unlike Sandy2x, it doesn't requie use of the floating point registers at
all. Instead it makes use of BMI2 and ADX, available on recent
microarchitectures. The implementation was written by Armando
Faz-Hernández with contributions (upstream) from Samuel Neves and me,
in addition to further changes in the kernel implementation from us.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Co-developed-by: Samuel Neves <sneves@dei.uc.pt>
[ardb: - move to arch/x86/crypto
       - wire into lib/crypto framework
       - implement crypto API KPP hooks ]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile            |    1 +
 arch/x86/crypto/curve25519-x86_64.c | 2475 +++++++++++++++++++++++++++++++++++
 crypto/Kconfig                      |    6 +
 3 files changed, 2482 insertions(+)
 create mode 100644 arch/x86/crypto/curve25519-x86_64.c

(limited to 'arch')

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 922c8ecfa00f..958440eae27e 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
 
 obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
+obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
 
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
new file mode 100644
index 000000000000..a52a3fb15727
--- /dev/null
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -0,0 +1,2475 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved.
+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <crypto/curve25519.h>
+#include <crypto/internal/kpp.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
+
+enum { NUM_WORDS_ELTFP25519 = 4 };
+typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
+typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
+
+#define mul_eltfp25519_1w_adx(c, a, b) do { \
+	mul_256x256_integer_adx(m.buffer, a, b); \
+	red_eltfp25519_1w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
+	mul_256x256_integer_bmi2(m.buffer, a, b); \
+	red_eltfp25519_1w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_adx(a) do { \
+	sqr_256x256_integer_adx(m.buffer, a); \
+	red_eltfp25519_1w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_bmi2(a) do { \
+	sqr_256x256_integer_bmi2(m.buffer, a); \
+	red_eltfp25519_1w_bmi2(a, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_adx(c, a, b) do { \
+	mul2_256x256_integer_adx(m.buffer, a, b); \
+	red_eltfp25519_2w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
+	mul2_256x256_integer_bmi2(m.buffer, a, b); \
+	red_eltfp25519_2w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_adx(a) do { \
+	sqr2_256x256_integer_adx(m.buffer, a); \
+	red_eltfp25519_2w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_bmi2(a) do { \
+	sqr2_256x256_integer_bmi2(m.buffer, a); \
+	red_eltfp25519_2w_bmi2(a, m.buffer); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_adx(a, times) do { \
+	int ____counter = (times); \
+	while (____counter-- > 0) \
+		sqr_eltfp25519_1w_adx(a); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
+	int ____counter = (times); \
+	while (____counter-- > 0) \
+		sqr_eltfp25519_1w_bmi2(a); \
+} while (0)
+
+#define copy_eltfp25519_1w(C, A) do { \
+	(C)[0] = (A)[0]; \
+	(C)[1] = (A)[1]; \
+	(C)[2] = (A)[2]; \
+	(C)[3] = (A)[3]; \
+} while (0)
+
+#define setzero_eltfp25519_1w(C) do { \
+	(C)[0] = 0; \
+	(C)[1] = 0; \
+	(C)[2] = 0; \
+	(C)[3] = 0; \
+} while (0)
+
+__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
+	/*   1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
+		  0xffffffffffffffffUL, 0x5fffffffffffffffUL,
+	/*   2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
+		  0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
+	/*   3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
+		  0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
+	/*   4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
+		  0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
+	/*   5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
+		  0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
+	/*   6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
+		  0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
+	/*   7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
+		  0xc1c20d06231f7614UL, 0x2938218da274f972UL,
+	/*   8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
+		  0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
+	/*   9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
+		  0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
+	/*  10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
+		  0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
+	/*  11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
+		  0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
+	/*  12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
+		  0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
+	/*  13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
+		  0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
+	/*  14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
+		  0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
+	/*  15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
+		  0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
+	/*  16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
+		  0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
+	/*  17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
+		  0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
+	/*  18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
+		  0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
+	/*  19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
+		  0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
+	/*  20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
+		  0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
+	/*  21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
+		  0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
+	/*  22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
+		  0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
+	/*  23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
+		  0x23758739f630a257UL, 0x295a407a01a78580UL,
+	/*  24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
+		  0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
+	/*  25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
+		  0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
+	/*  26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
+		  0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
+	/*  27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
+		  0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
+	/*  28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
+		  0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
+	/*  29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
+		  0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
+	/*  30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
+		  0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
+	/*  31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
+		  0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
+	/*  32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
+		  0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
+	/*  33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
+		  0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
+	/*  34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
+		  0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
+	/*  35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
+		  0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
+	/*  36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
+		  0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
+	/*  37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
+		  0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
+	/*  38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
+		  0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
+	/*  39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
+		  0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
+	/*  40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
+		  0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
+	/*  41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
+		  0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
+	/*  42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
+		  0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
+	/*  43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
+		  0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
+	/*  44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
+		  0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
+	/*  45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
+		  0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
+	/*  46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
+		  0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
+	/*  47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
+		  0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
+	/*  48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL,
+		  0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
+	/*  49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
+		  0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
+	/*  50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
+		  0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
+	/*  51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
+		  0xc189218075e91436UL, 0x6d9284169b3b8484UL,
+	/*  52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
+		  0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
+	/*  53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
+		  0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
+	/*  54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
+		  0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
+	/*  55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
+		  0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
+	/*  56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
+		  0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
+	/*  57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
+		  0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
+	/*  58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
+		  0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
+	/*  59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
+		  0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
+	/*  60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
+		  0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
+	/*  61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
+		  0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
+	/*  62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
+		  0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
+	/*  63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
+		  0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
+	/*  64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
+		  0x25232973322dbef4UL, 0x445dc4758c17f770UL,
+	/*  65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
+		  0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
+	/*  66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
+		  0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
+	/*  67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
+		  0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
+	/*  68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
+		  0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
+	/*  69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
+		  0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
+	/*  70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
+		  0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
+	/*  71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
+		  0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
+	/*  72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
+		  0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
+	/*  73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
+		  0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
+	/*  74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
+		  0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
+	/*  75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
+		  0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
+	/*  76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
+		  0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
+	/*  77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
+		  0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
+	/*  78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
+		  0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
+	/*  79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
+		  0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
+	/*  80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
+		  0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
+	/*  81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
+		  0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
+	/*  82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
+		  0x894d1d855ae52359UL, 0x68e122157b743d69UL,
+	/*  83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
+		  0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
+	/*  84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
+		  0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
+	/*  85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
+		  0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
+	/*  86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
+		  0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
+	/*  87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
+		  0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
+	/*  88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
+		  0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
+	/*  89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
+		  0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
+	/*  90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
+		  0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
+	/*  91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
+		  0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
+	/*  92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
+		  0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
+	/*  93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
+		  0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
+	/*  94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
+		  0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
+	/*  95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
+		  0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
+	/*  96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
+		  0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
+	/*  97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
+		  0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
+	/*  98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
+		  0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
+	/*  99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL,
+		  0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
+	/* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
+		  0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
+	/* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
+		  0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
+	/* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
+		  0x4a497962066e6043UL, 0x705b3aab41355b44UL,
+	/* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
+		  0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
+	/* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
+		  0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
+	/* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
+		  0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
+	/* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
+		  0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
+	/* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
+		  0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
+	/* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
+		  0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
+	/* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
+		  0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
+	/* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
+		  0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
+	/* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
+		  0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
+	/* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
+		  0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
+	/* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
+		  0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
+	/* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
+		  0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
+	/* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
+		  0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
+	/* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
+		  0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
+	/* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
+		  0x508e862f121692fcUL, 0x3a81907fa093c291UL,
+	/* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
+		  0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
+	/* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
+		  0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
+	/* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
+		  0xe488de11d761e352UL, 0x0e878a01a085545cUL,
+	/* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
+		  0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
+	/* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
+		  0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
+	/* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
+		  0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
+	/* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
+		  0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
+	/* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
+		  0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
+	/* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
+		  0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
+	/* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
+		  0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
+	/* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
+		  0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
+	/* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
+		  0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
+	/* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
+		  0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
+	/* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
+		  0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
+	/* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
+		  0x266fd5809208f294UL, 0x5c847085619a26b9UL,
+	/* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
+		  0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
+	/* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
+		  0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
+	/* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
+		  0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
+	/* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
+		  0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
+	/* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
+		  0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
+	/* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
+		  0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
+	/* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
+		  0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
+	/* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
+		  0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
+	/* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
+		  0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
+	/* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
+		  0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
+	/* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
+		  0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
+	/* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
+		  0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
+	/* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
+		  0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
+	/* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
+		  0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
+	/* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
+		  0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
+	/* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
+		  0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
+	/* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
+		  0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
+	/* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
+		  0x52d17436309d4253UL, 0x356f97e13efae576UL,
+	/* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
+		  0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
+	/* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
+		  0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
+	/* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
+		  0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
+	/* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
+		  0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
+	/* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
+		  0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
+	/* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
+		  0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
+	/* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
+		  0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
+	/* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
+		  0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
+	/* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
+		  0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
+	/* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
+		  0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
+	/* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
+		  0x497d723f802e88e1UL, 0x30684dea602f408dUL,
+	/* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
+		  0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
+	/* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
+		  0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
+	/* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
+		  0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
+	/* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
+		  0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
+	/* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
+		  0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
+	/* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
+		  0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
+	/* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
+		  0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
+	/* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
+		  0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
+	/* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
+		  0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
+	/* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
+		  0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
+	/* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
+		  0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
+	/* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
+		  0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
+	/* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
+		  0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
+	/* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
+		  0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
+	/* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
+		  0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
+	/* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
+		  0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
+	/* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
+		  0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
+	/* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
+		  0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
+	/* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
+		  0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
+	/* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
+		  0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
+	/* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
+		  0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
+	/* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
+		  0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
+	/* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
+		  0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
+	/* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
+		  0x81004b71e33cc191UL, 0x44e6be345122803cUL,
+	/* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
+		  0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
+	/* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
+		  0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
+	/* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
+		  0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
+	/* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
+		  0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
+	/* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
+		  0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
+	/* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
+		  0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
+	/* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
+		  0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
+	/* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
+		  0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
+	/* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
+		  0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
+	/* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
+		  0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
+	/* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
+		  0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
+	/* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
+		  0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
+	/* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
+		  0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
+	/* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
+		  0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
+	/* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
+		  0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
+	/* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
+		  0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
+	/* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
+		  0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
+	/* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
+		  0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
+	/* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
+		  0x33979624f0e917beUL, 0x2c018dc527356b30UL,
+	/* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
+		  0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
+	/* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
+		  0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
+	/* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
+		  0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
+	/* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
+		  0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
+	/* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
+		  0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
+	/* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
+		  0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
+	/* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
+		  0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
+	/* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
+		  0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
+	/* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
+		  0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
+	/* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
+		  0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
+	/* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
+		  0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
+	/* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
+		  0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
+	/* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
+		  0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
+	/* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
+		  0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
+	/* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
+		  0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
+	/* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
+		  0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
+	/* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
+		  0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
+	/* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
+		  0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
+	/* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
+		  0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
+	/* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
+		  0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
+	/* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
+		  0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
+	/* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
+		  0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
+	/* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
+		  0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
+	/* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
+		  0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
+	/* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
+		  0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
+	/* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
+		  0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
+	/* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
+		  0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
+	/* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
+		  0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
+	/* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
+		  0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
+	/* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
+		  0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
+	/* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
+		  0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
+	/* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
+		  0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
+	/* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL,
+		  0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
+	/* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
+		  0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
+	/* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
+		  0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
+	/* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
+		  0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
+	/* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL,
+		  0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
+	/* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
+		  0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
+	/* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
+		  0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
+	/* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
+		  0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
+	/* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
+		  0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
+	/* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
+		  0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
+	/* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
+		  0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
+	/* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
+		  0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
+	/* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
+		  0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
+	/* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
+		  0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
+	/* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
+		  0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
+	/* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
+		  0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
+};
+
+/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
+ * a is two 256-bit integers: a0[0:3] and a1[4:7]
+ * b is two 256-bit integers: b0[0:3] and b1[4:7]
+ */
+static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
+				     const u64 *const b)
+{
+	asm volatile(
+		"xorl %%r14d, %%r14d ;"
+		"movq   (%1), %%rdx; "	/* A[0] */
+		"mulx   (%2),  %%r8, %%r15; " /* A[0]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"movq %%r8, (%0) ;"
+		"mulx  8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+		"adox %%r10, %%r15 ;"
+		"mulx 16(%2),  %%r8, %%rbx; " /* A[0]*B[2] */
+		"adox  %%r8, %%rax ;"
+		"mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+		"adox %%r10, %%rbx ;"
+		/******************************************/
+		"adox %%r14, %%rcx ;"
+
+		"movq  8(%1), %%rdx; "	/* A[1] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */
+		"adox %%r15,  %%r8 ;"
+		"movq  %%r8, 8(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rax ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[1]*B[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%rbx ;"
+		"mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%rcx ;"
+		/******************************************/
+		"adox %%r14, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+
+		"movq 16(%1), %%rdx; " /* A[2] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */
+		"adox %%rax,  %%r8 ;"
+		"movq %%r8, 16(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rbx ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[2]*B[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%rcx ;"
+		"mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%r15 ;"
+		/******************************************/
+		"adox %%r14, %%rax ;"
+		"adcx %%r14, %%rax ;"
+
+		"movq 24(%1), %%rdx; " /* A[3] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */
+		"adox %%rbx,  %%r8 ;"
+		"movq %%r8, 24(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rcx ;"
+		"movq %%rcx, 32(%0) ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[3]*B[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%rax ;"
+		"movq %%rax, 48(%0) ;"
+		/******************************************/
+		"adox %%r14, %%rbx ;"
+		"adcx %%r14, %%rbx ;"
+		"movq %%rbx, 56(%0) ;"
+
+		"movq 32(%1), %%rdx; "	/* C[0] */
+		"mulx 32(%2),  %%r8, %%r15; " /* C[0]*D[0] */
+		"xorl %%r10d, %%r10d ;"
+		"movq %%r8, 64(%0);"
+		"mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+		"adox %%r10, %%r15 ;"
+		"mulx 48(%2),  %%r8, %%rbx; " /* C[0]*D[2] */
+		"adox  %%r8, %%rax ;"
+		"mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+		"adox %%r10, %%rbx ;"
+		/******************************************/
+		"adox %%r14, %%rcx ;"
+
+		"movq 40(%1), %%rdx; " /* C[1] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[1]*D[0] */
+		"adox %%r15,  %%r8 ;"
+		"movq  %%r8, 72(%0);"
+		"mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rax ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[1]*D[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%rbx ;"
+		"mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%rcx ;"
+		/******************************************/
+		"adox %%r14, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+
+		"movq 48(%1), %%rdx; " /* C[2] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[2]*D[0] */
+		"adox %%rax,  %%r8 ;"
+		"movq  %%r8, 80(%0);"
+		"mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rbx ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[2]*D[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%rcx ;"
+		"mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%r15 ;"
+		/******************************************/
+		"adox %%r14, %%rax ;"
+		"adcx %%r14, %%rax ;"
+
+		"movq 56(%1), %%rdx; " /* C[3] */
+		"xorl %%r10d, %%r10d ;"
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[3]*D[0] */
+		"adox %%rbx,  %%r8 ;"
+		"movq  %%r8, 88(%0);"
+		"mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+		"adox %%r10,  %%r9 ;"
+		"adcx  %%r9, %%rcx ;"
+		"movq %%rcx,  96(%0) ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[3]*D[2] */
+		"adox  %%r8, %%r11 ;"
+		"adcx %%r11, %%r15 ;"
+		"movq %%r15, 104(%0) ;"
+		"mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+		"adox %%r10, %%r13 ;"
+		"adcx %%r13, %%rax ;"
+		"movq %%rax, 112(%0) ;"
+		/******************************************/
+		"adox %%r14, %%rbx ;"
+		"adcx %%r14, %%rbx ;"
+		"movq %%rbx, 120(%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+				      const u64 *const b)
+{
+	asm volatile(
+		"movq   (%1), %%rdx; "	/* A[0] */
+		"mulx   (%2),  %%r8, %%r15; " /* A[0]*B[0] */
+		"movq %%r8,  (%0) ;"
+		"mulx  8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+		"addq %%r10, %%r15 ;"
+		"mulx 16(%2),  %%r8, %%rbx; " /* A[0]*B[2] */
+		"adcq  %%r8, %%rax ;"
+		"mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+		"adcq %%r10, %%rbx ;"
+		/******************************************/
+		"adcq    $0, %%rcx ;"
+
+		"movq  8(%1), %%rdx; "	/* A[1] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */
+		"addq %%r15,  %%r8 ;"
+		"movq %%r8, 8(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[1]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%r15 ;"
+
+		"addq  %%r9, %%rax ;"
+		"adcq %%r11, %%rbx ;"
+		"adcq %%r13, %%rcx ;"
+		"adcq    $0, %%r15 ;"
+
+		"movq 16(%1), %%rdx; "	/* A[2] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */
+		"addq %%rax,  %%r8 ;"
+		"movq %%r8, 16(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[2]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rax ;"
+
+		"addq  %%r9, %%rbx ;"
+		"adcq %%r11, %%rcx ;"
+		"adcq %%r13, %%r15 ;"
+		"adcq    $0, %%rax ;"
+
+		"movq 24(%1), %%rdx; "	/* A[3] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */
+		"addq %%rbx,  %%r8 ;"
+		"movq %%r8, 24(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[3]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rbx ;"
+
+		"addq  %%r9, %%rcx ;"
+		"movq %%rcx, 32(%0) ;"
+		"adcq %%r11, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"adcq %%r13, %%rax ;"
+		"movq %%rax, 48(%0) ;"
+		"adcq    $0, %%rbx ;"
+		"movq %%rbx, 56(%0) ;"
+
+		"movq 32(%1), %%rdx; "	/* C[0] */
+		"mulx 32(%2),  %%r8, %%r15; " /* C[0]*D[0] */
+		"movq %%r8, 64(%0) ;"
+		"mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+		"addq %%r10, %%r15 ;"
+		"mulx 48(%2),  %%r8, %%rbx; " /* C[0]*D[2] */
+		"adcq  %%r8, %%rax ;"
+		"mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+		"adcq %%r10, %%rbx ;"
+		/******************************************/
+		"adcq    $0, %%rcx ;"
+
+		"movq 40(%1), %%rdx; "	/* C[1] */
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[1]*D[0] */
+		"addq %%r15,  %%r8 ;"
+		"movq %%r8, 72(%0) ;"
+		"mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[1]*D[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%r15 ;"
+
+		"addq  %%r9, %%rax ;"
+		"adcq %%r11, %%rbx ;"
+		"adcq %%r13, %%rcx ;"
+		"adcq    $0, %%r15 ;"
+
+		"movq 48(%1), %%rdx; "	/* C[2] */
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[2]*D[0] */
+		"addq %%rax,  %%r8 ;"
+		"movq %%r8, 80(%0) ;"
+		"mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[2]*D[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rax ;"
+
+		"addq  %%r9, %%rbx ;"
+		"adcq %%r11, %%rcx ;"
+		"adcq %%r13, %%r15 ;"
+		"adcq    $0, %%rax ;"
+
+		"movq 56(%1), %%rdx; "	/* C[3] */
+		"mulx 32(%2),  %%r8,  %%r9; " /* C[3]*D[0] */
+		"addq %%rbx,  %%r8 ;"
+		"movq %%r8, 88(%0) ;"
+		"mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 48(%2),  %%r8, %%r13; " /* C[3]*D[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rbx ;"
+
+		"addq  %%r9, %%rcx ;"
+		"movq %%rcx,  96(%0) ;"
+		"adcq %%r11, %%r15 ;"
+		"movq %%r15, 104(%0) ;"
+		"adcq %%r13, %%rax ;"
+		"movq %%rax, 112(%0) ;"
+		"adcq    $0, %%rbx ;"
+		"movq %%rbx, 120(%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movq   (%1), %%rdx        ;" /* A[0]      */
+		"mulx  8(%1),  %%r8, %%r14 ;" /* A[1]*A[0] */
+		"xorl %%r15d, %%r15d;"
+		"mulx 16(%1),  %%r9, %%r10 ;" /* A[2]*A[0] */
+		"adcx %%r14,  %%r9 ;"
+		"mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+		"adcx %%rax, %%r10 ;"
+		"movq 24(%1), %%rdx        ;" /* A[3]      */
+		"mulx  8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+		"adcx %%rcx, %%r11 ;"
+		"mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+		"adcx %%rax, %%rbx ;"
+		"movq  8(%1), %%rdx        ;" /* A[1]      */
+		"adcx %%r15, %%r13 ;"
+		"mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+		"movq    $0, %%r14 ;"
+		/******************************************/
+		"adcx %%r15, %%r14 ;"
+
+		"xorl %%r15d, %%r15d;"
+		"adox %%rax, %%r10 ;"
+		"adcx  %%r8,  %%r8 ;"
+		"adox %%rcx, %%r11 ;"
+		"adcx  %%r9,  %%r9 ;"
+		"adox %%r15, %%rbx ;"
+		"adcx %%r10, %%r10 ;"
+		"adox %%r15, %%r13 ;"
+		"adcx %%r11, %%r11 ;"
+		"adox %%r15, %%r14 ;"
+		"adcx %%rbx, %%rbx ;"
+		"adcx %%r13, %%r13 ;"
+		"adcx %%r14, %%r14 ;"
+
+		"movq   (%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+		/*******************/
+		"movq %%rax,  0(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"movq  8(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9, 16(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"movq 16(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11, 32(%0) ;"
+		"adcq %%rcx, %%rbx ;"
+		"movq %%rbx, 40(%0) ;"
+		"movq 24(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 48(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 56(%0) ;"
+
+
+		"movq 32(%1), %%rdx        ;" /* B[0]      */
+		"mulx 40(%1),  %%r8, %%r14 ;" /* B[1]*B[0] */
+		"xorl %%r15d, %%r15d;"
+		"mulx 48(%1),  %%r9, %%r10 ;" /* B[2]*B[0] */
+		"adcx %%r14,  %%r9 ;"
+		"mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
+		"adcx %%rax, %%r10 ;"
+		"movq 56(%1), %%rdx        ;" /* B[3]      */
+		"mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
+		"adcx %%rcx, %%r11 ;"
+		"mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
+		"adcx %%rax, %%rbx ;"
+		"movq 40(%1), %%rdx        ;" /* B[1]      */
+		"adcx %%r15, %%r13 ;"
+		"mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
+		"movq    $0, %%r14 ;"
+		/******************************************/
+		"adcx %%r15, %%r14 ;"
+
+		"xorl %%r15d, %%r15d;"
+		"adox %%rax, %%r10 ;"
+		"adcx  %%r8,  %%r8 ;"
+		"adox %%rcx, %%r11 ;"
+		"adcx  %%r9,  %%r9 ;"
+		"adox %%r15, %%rbx ;"
+		"adcx %%r10, %%r10 ;"
+		"adox %%r15, %%r13 ;"
+		"adcx %%r11, %%r11 ;"
+		"adox %%r15, %%r14 ;"
+		"adcx %%rbx, %%rbx ;"
+		"adcx %%r13, %%r13 ;"
+		"adcx %%r14, %%r14 ;"
+
+		"movq 32(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
+		/*******************/
+		"movq %%rax,  64(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  72(%0) ;"
+		"movq 40(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9,  80(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10,  88(%0) ;"
+		"movq 48(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11,  96(%0) ;"
+		"adcq %%rcx, %%rbx ;"
+		"movq %%rbx, 104(%0) ;"
+		"movq 56(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 112(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 120(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movq  8(%1), %%rdx        ;" /* A[1]      */
+		"mulx   (%1),  %%r8,  %%r9 ;" /* A[0]*A[1] */
+		"mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+		"mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+		"movq 16(%1), %%rdx        ;" /* A[2]      */
+		"mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+		"mulx   (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+		"addq %%rax,  %%r9 ;"
+		"adcq %%rdx, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq %%r14, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"movq    $0, %%r14 ;"
+		"adcq    $0, %%r14 ;"
+
+		"movq   (%1), %%rdx        ;" /* A[0]      */
+		"mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+		"addq %%rax, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq    $0, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"adcq    $0, %%r14 ;"
+
+		"shldq $1, %%r13, %%r14 ;"
+		"shldq $1, %%r15, %%r13 ;"
+		"shldq $1, %%r11, %%r15 ;"
+		"shldq $1, %%r10, %%r11 ;"
+		"shldq $1,  %%r9, %%r10 ;"
+		"shldq $1,  %%r8,  %%r9 ;"
+		"shlq  $1,  %%r8        ;"
+
+		/*******************/
+		"mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
+		/*******************/
+		"movq %%rax,  0(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"movq  8(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9, 16(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"movq 16(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11, 32(%0) ;"
+		"adcq %%rcx, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"movq 24(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 48(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 56(%0) ;"
+
+		"movq 40(%1), %%rdx        ;" /* B[1]      */
+		"mulx 32(%1),  %%r8,  %%r9 ;" /* B[0]*B[1] */
+		"mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
+		"mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
+
+		"movq 48(%1), %%rdx        ;" /* B[2]      */
+		"mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
+		"mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
+
+		"addq %%rax,  %%r9 ;"
+		"adcq %%rdx, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq %%r14, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"movq    $0, %%r14 ;"
+		"adcq    $0, %%r14 ;"
+
+		"movq 32(%1), %%rdx        ;" /* B[0]      */
+		"mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
+
+		"addq %%rax, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq    $0, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"adcq    $0, %%r14 ;"
+
+		"shldq $1, %%r13, %%r14 ;"
+		"shldq $1, %%r15, %%r13 ;"
+		"shldq $1, %%r11, %%r15 ;"
+		"shldq $1, %%r10, %%r11 ;"
+		"shldq $1,  %%r9, %%r10 ;"
+		"shldq $1,  %%r8,  %%r9 ;"
+		"shlq  $1,  %%r8        ;"
+
+		/*******************/
+		"mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
+		/*******************/
+		"movq %%rax,  64(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  72(%0) ;"
+		"movq 40(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9,  80(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10,  88(%0) ;"
+		"movq 48(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11,  96(%0) ;"
+		"adcq %%rcx, %%r15 ;"
+		"movq %%r15, 104(%0) ;"
+		"movq 56(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 112(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 120(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movl    $38, %%edx; "	/* 2*c = 38 = 2^256 */
+		"mulx 32(%1),  %%r8, %%r10; " /* c*C[4] */
+		"xorl %%ebx, %%ebx ;"
+		"adox   (%1),  %%r8 ;"
+		"mulx 40(%1),  %%r9, %%r11; " /* c*C[5] */
+		"adcx %%r10,  %%r9 ;"
+		"adox  8(%1),  %%r9 ;"
+		"mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
+		"adcx %%r11, %%r10 ;"
+		"adox 16(%1), %%r10 ;"
+		"mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
+		"adcx %%rax, %%r11 ;"
+		"adox 24(%1), %%r11 ;"
+		/***************************************/
+		"adcx %%rbx, %%rcx ;"
+		"adox  %%rbx, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+		"adcx %%rcx,  %%r8 ;"
+		"adcx %%rbx,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcx %%rbx, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcx %%rbx, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+
+		"mulx  96(%1),  %%r8, %%r10; " /* c*C[4] */
+		"xorl %%ebx, %%ebx ;"
+		"adox 64(%1),  %%r8 ;"
+		"mulx 104(%1),  %%r9, %%r11; " /* c*C[5] */
+		"adcx %%r10,  %%r9 ;"
+		"adox 72(%1),  %%r9 ;"
+		"mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
+		"adcx %%r11, %%r10 ;"
+		"adox 80(%1), %%r10 ;"
+		"mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
+		"adcx %%rax, %%r11 ;"
+		"adox 88(%1), %%r11 ;"
+		/****************************************/
+		"adcx %%rbx, %%rcx ;"
+		"adox  %%rbx, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+		"adcx %%rcx,  %%r8 ;"
+		"adcx %%rbx,  %%r9 ;"
+		"movq  %%r9, 40(%0) ;"
+		"adcx %%rbx, %%r10 ;"
+		"movq %%r10, 48(%0) ;"
+		"adcx %%rbx, %%r11 ;"
+		"movq %%r11, 56(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8, 32(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11");
+}
+
+static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movl    $38, %%edx ; "       /* 2*c = 38 = 2^256 */
+		"mulx 32(%1),  %%r8, %%r10 ;" /* c*C[4] */
+		"mulx 40(%1),  %%r9, %%r11 ;" /* c*C[5] */
+		"addq %%r10,  %%r9 ;"
+		"mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+		"adcq %%r11, %%r10 ;"
+		"mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+		"adcq %%rax, %%r11 ;"
+		/***************************************/
+		"adcq    $0, %%rcx ;"
+		"addq   (%1),  %%r8 ;"
+		"adcq  8(%1),  %%r9 ;"
+		"adcq 16(%1), %%r10 ;"
+		"adcq 24(%1), %%r11 ;"
+		"adcq     $0, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+		"addq %%rcx,  %%r8 ;"
+		"adcq    $0,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcq    $0, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcq    $0, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+
+		"mulx  96(%1),  %%r8, %%r10 ;" /* c*C[4] */
+		"mulx 104(%1),  %%r9, %%r11 ;" /* c*C[5] */
+		"addq %%r10,  %%r9 ;"
+		"mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
+		"adcq %%r11, %%r10 ;"
+		"mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
+		"adcq %%rax, %%r11 ;"
+		/****************************************/
+		"adcq    $0, %%rcx ;"
+		"addq 64(%1),  %%r8 ;"
+		"adcq 72(%1),  %%r9 ;"
+		"adcq 80(%1), %%r10 ;"
+		"adcq 88(%1), %%r11 ;"
+		"adcq     $0, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+		"addq %%rcx,  %%r8 ;"
+		"adcq    $0,  %%r9 ;"
+		"movq  %%r9, 40(%0) ;"
+		"adcq    $0, %%r10 ;"
+		"movq %%r10, 48(%0) ;"
+		"adcq    $0, %%r11 ;"
+		"movq %%r11, 56(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8, 32(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11");
+}
+
+static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
+				    const u64 *const b)
+{
+	asm volatile(
+		"movq   (%1), %%rdx; "	/* A[0] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[0]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"movq  %%r8,  (%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[0]*B[1] */
+		"adox  %%r9, %%r10 ;"
+		"movq %%r10, 8(%0) ;"
+		"mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
+		"adox %%r11, %%r15 ;"
+		"mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
+		"adox %%r13, %%r14 ;"
+		"movq $0, %%rax ;"
+		/******************************************/
+		"adox %%rdx, %%rax ;"
+
+		"movq  8(%1), %%rdx; "	/* A[1] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"adcx 8(%0),  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+		"adox  %%r9, %%r10 ;"
+		"adcx %%r15, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
+		"adox %%r11, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+		"movq $0, %%r8  ;"
+		"mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
+		"adox %%r13, %%r14 ;"
+		"adcx %%rax, %%r14 ;"
+		"movq $0, %%rax ;"
+		/******************************************/
+		"adox %%rdx, %%rax ;"
+		"adcx  %%r8, %%rax ;"
+
+		"movq 16(%1), %%rdx; "	/* A[2] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"adcx 16(%0), %%r8 ;"
+		"movq  %%r8, 16(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+		"adox  %%r9, %%r10 ;"
+		"adcx %%r15, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
+		"adox %%r11, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+		"movq $0, %%r8  ;"
+		"mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
+		"adox %%r13, %%r14 ;"
+		"adcx %%rax, %%r14 ;"
+		"movq $0, %%rax ;"
+		/******************************************/
+		"adox %%rdx, %%rax ;"
+		"adcx  %%r8, %%rax ;"
+
+		"movq 24(%1), %%rdx; "	/* A[3] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */
+		"xorl %%r10d, %%r10d ;"
+		"adcx 24(%0), %%r8 ;"
+		"movq  %%r8, 24(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+		"adox  %%r9, %%r10 ;"
+		"adcx %%r15, %%r10 ;"
+		"movq %%r10, 32(%0) ;"
+		"mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
+		"adox %%r11, %%r15 ;"
+		"adcx %%r14, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"movq $0, %%r8  ;"
+		"mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
+		"adox %%r13, %%r14 ;"
+		"adcx %%rax, %%r14 ;"
+		"movq %%r14, 48(%0) ;"
+		"movq $0, %%rax ;"
+		/******************************************/
+		"adox %%rdx, %%rax ;"
+		"adcx  %%r8, %%rax ;"
+		"movq %%rax, 56(%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
+		  "%r13", "%r14", "%r15");
+}
+
+static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+				     const u64 *const b)
+{
+	asm volatile(
+		"movq   (%1), %%rdx; "	/* A[0] */
+		"mulx   (%2),  %%r8, %%r15; " /* A[0]*B[0] */
+		"movq %%r8,  (%0) ;"
+		"mulx  8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+		"addq %%r10, %%r15 ;"
+		"mulx 16(%2),  %%r8, %%rbx; " /* A[0]*B[2] */
+		"adcq  %%r8, %%rax ;"
+		"mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+		"adcq %%r10, %%rbx ;"
+		/******************************************/
+		"adcq    $0, %%rcx ;"
+
+		"movq  8(%1), %%rdx; "	/* A[1] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[1]*B[0] */
+		"addq %%r15,  %%r8 ;"
+		"movq %%r8, 8(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[1]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%r15 ;"
+
+		"addq  %%r9, %%rax ;"
+		"adcq %%r11, %%rbx ;"
+		"adcq %%r13, %%rcx ;"
+		"adcq    $0, %%r15 ;"
+
+		"movq 16(%1), %%rdx; "	/* A[2] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[2]*B[0] */
+		"addq %%rax,  %%r8 ;"
+		"movq %%r8, 16(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[2]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rax ;"
+
+		"addq  %%r9, %%rbx ;"
+		"adcq %%r11, %%rcx ;"
+		"adcq %%r13, %%r15 ;"
+		"adcq    $0, %%rax ;"
+
+		"movq 24(%1), %%rdx; "	/* A[3] */
+		"mulx   (%2),  %%r8,  %%r9; " /* A[3]*B[0] */
+		"addq %%rbx,  %%r8 ;"
+		"movq %%r8, 24(%0) ;"
+		"mulx  8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+		"adcq %%r10,  %%r9 ;"
+		"mulx 16(%2),  %%r8, %%r13; " /* A[3]*B[2] */
+		"adcq  %%r8, %%r11 ;"
+		"mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+		"adcq %%r10, %%r13 ;"
+		/******************************************/
+		"adcq    $0, %%rbx ;"
+
+		"addq  %%r9, %%rcx ;"
+		"movq %%rcx, 32(%0) ;"
+		"adcq %%r11, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"adcq %%r13, %%rax ;"
+		"movq %%rax, 48(%0) ;"
+		"adcq    $0, %%rbx ;"
+		"movq %%rbx, 56(%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movq   (%1), %%rdx        ;" /* A[0]      */
+		"mulx  8(%1),  %%r8, %%r14 ;" /* A[1]*A[0] */
+		"xorl %%r15d, %%r15d;"
+		"mulx 16(%1),  %%r9, %%r10 ;" /* A[2]*A[0] */
+		"adcx %%r14,  %%r9 ;"
+		"mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+		"adcx %%rax, %%r10 ;"
+		"movq 24(%1), %%rdx        ;" /* A[3]      */
+		"mulx  8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+		"adcx %%rcx, %%r11 ;"
+		"mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+		"adcx %%rax, %%rbx ;"
+		"movq  8(%1), %%rdx        ;" /* A[1]      */
+		"adcx %%r15, %%r13 ;"
+		"mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+		"movq    $0, %%r14 ;"
+		/******************************************/
+		"adcx %%r15, %%r14 ;"
+
+		"xorl %%r15d, %%r15d;"
+		"adox %%rax, %%r10 ;"
+		"adcx  %%r8,  %%r8 ;"
+		"adox %%rcx, %%r11 ;"
+		"adcx  %%r9,  %%r9 ;"
+		"adox %%r15, %%rbx ;"
+		"adcx %%r10, %%r10 ;"
+		"adox %%r15, %%r13 ;"
+		"adcx %%r11, %%r11 ;"
+		"adox %%r15, %%r14 ;"
+		"adcx %%rbx, %%rbx ;"
+		"adcx %%r13, %%r13 ;"
+		"adcx %%r14, %%r14 ;"
+
+		"movq   (%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+		/*******************/
+		"movq %%rax,  0(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"movq  8(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9, 16(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"movq 16(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11, 32(%0) ;"
+		"adcq %%rcx, %%rbx ;"
+		"movq %%rbx, 40(%0) ;"
+		"movq 24(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 48(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 56(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movq  8(%1), %%rdx        ;" /* A[1]      */
+		"mulx   (%1),  %%r8,  %%r9 ;" /* A[0]*A[1] */
+		"mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+		"mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+		"movq 16(%1), %%rdx        ;" /* A[2]      */
+		"mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+		"mulx   (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+		"addq %%rax,  %%r9 ;"
+		"adcq %%rdx, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq %%r14, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"movq    $0, %%r14 ;"
+		"adcq    $0, %%r14 ;"
+
+		"movq   (%1), %%rdx        ;" /* A[0]      */
+		"mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+		"addq %%rax, %%r10 ;"
+		"adcq %%rcx, %%r11 ;"
+		"adcq    $0, %%r15 ;"
+		"adcq    $0, %%r13 ;"
+		"adcq    $0, %%r14 ;"
+
+		"shldq $1, %%r13, %%r14 ;"
+		"shldq $1, %%r15, %%r13 ;"
+		"shldq $1, %%r11, %%r15 ;"
+		"shldq $1, %%r10, %%r11 ;"
+		"shldq $1,  %%r9, %%r10 ;"
+		"shldq $1,  %%r8,  %%r9 ;"
+		"shlq  $1,  %%r8        ;"
+
+		/*******************/
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+		/*******************/
+		"movq %%rax,  0(%0) ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,  8(%0) ;"
+		"movq  8(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+		"adcq %%rax,  %%r9 ;"
+		"movq  %%r9, 16(%0) ;"
+		"adcq %%rcx, %%r10 ;"
+		"movq %%r10, 24(%0) ;"
+		"movq 16(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+		"adcq %%rax, %%r11 ;"
+		"movq %%r11, 32(%0) ;"
+		"adcq %%rcx, %%r15 ;"
+		"movq %%r15, 40(%0) ;"
+		"movq 24(%1), %%rdx ;"
+		"mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+		"adcq %%rax, %%r13 ;"
+		"movq %%r13, 48(%0) ;"
+		"adcq %%rcx, %%r14 ;"
+		"movq %%r14, 56(%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movl    $38, %%edx ;"	/* 2*c = 38 = 2^256 */
+		"mulx 32(%1),  %%r8, %%r10 ;" /* c*C[4] */
+		"xorl %%ebx, %%ebx ;"
+		"adox   (%1),  %%r8 ;"
+		"mulx 40(%1),  %%r9, %%r11 ;" /* c*C[5] */
+		"adcx %%r10,  %%r9 ;"
+		"adox  8(%1),  %%r9 ;"
+		"mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+		"adcx %%r11, %%r10 ;"
+		"adox 16(%1), %%r10 ;"
+		"mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+		"adcx %%rax, %%r11 ;"
+		"adox 24(%1), %%r11 ;"
+		/***************************************/
+		"adcx %%rbx, %%rcx ;"
+		"adox  %%rbx, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+		"adcx %%rcx,  %%r8 ;"
+		"adcx %%rbx,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcx %%rbx, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcx %%rbx, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+		  "%r10", "%r11");
+}
+
+static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+	asm volatile(
+		"movl    $38, %%edx ;"	/* 2*c = 38 = 2^256 */
+		"mulx 32(%1),  %%r8, %%r10 ;" /* c*C[4] */
+		"mulx 40(%1),  %%r9, %%r11 ;" /* c*C[5] */
+		"addq %%r10,  %%r9 ;"
+		"mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+		"adcq %%r11, %%r10 ;"
+		"mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+		"adcq %%rax, %%r11 ;"
+		/***************************************/
+		"adcq    $0, %%rcx ;"
+		"addq   (%1),  %%r8 ;"
+		"adcq  8(%1),  %%r9 ;"
+		"adcq 16(%1), %%r10 ;"
+		"adcq 24(%1), %%r11 ;"
+		"adcq     $0, %%rcx ;"
+		"imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+		"addq %%rcx,  %%r8 ;"
+		"adcq    $0,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcq    $0, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcq    $0, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
+{
+	asm volatile(
+		"mov     $38, %%eax ;"
+		"xorl  %%ecx, %%ecx ;"
+		"movq   (%2),  %%r8 ;"
+		"adcx   (%1),  %%r8 ;"
+		"movq  8(%2),  %%r9 ;"
+		"adcx  8(%1),  %%r9 ;"
+		"movq 16(%2), %%r10 ;"
+		"adcx 16(%1), %%r10 ;"
+		"movq 24(%2), %%r11 ;"
+		"adcx 24(%1), %%r11 ;"
+		"cmovc %%eax, %%ecx ;"
+		"xorl %%eax, %%eax  ;"
+		"adcx %%rcx,  %%r8  ;"
+		"adcx %%rax,  %%r9  ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcx %%rax, %%r10  ;"
+		"movq %%r10, 16(%0) ;"
+		"adcx %%rax, %%r11  ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $38, %%ecx ;"
+		"cmovc %%ecx, %%eax ;"
+		"addq %%rax,  %%r8  ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
+{
+	asm volatile(
+		"mov     $38, %%eax ;"
+		"movq   (%2),  %%r8 ;"
+		"addq   (%1),  %%r8 ;"
+		"movq  8(%2),  %%r9 ;"
+		"adcq  8(%1),  %%r9 ;"
+		"movq 16(%2), %%r10 ;"
+		"adcq 16(%1), %%r10 ;"
+		"movq 24(%2), %%r11 ;"
+		"adcq 24(%1), %%r11 ;"
+		"mov      $0, %%ecx ;"
+		"cmovc %%eax, %%ecx ;"
+		"addq %%rcx,  %%r8  ;"
+		"adcq    $0,  %%r9  ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcq    $0, %%r10  ;"
+		"movq %%r10, 16(%0) ;"
+		"adcq    $0, %%r11  ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx  ;"
+		"cmovc %%eax, %%ecx ;"
+		"addq %%rcx,  %%r8  ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
+{
+	asm volatile(
+		"mov     $38, %%eax ;"
+		"movq   (%1),  %%r8 ;"
+		"subq   (%2),  %%r8 ;"
+		"movq  8(%1),  %%r9 ;"
+		"sbbq  8(%2),  %%r9 ;"
+		"movq 16(%1), %%r10 ;"
+		"sbbq 16(%2), %%r10 ;"
+		"movq 24(%1), %%r11 ;"
+		"sbbq 24(%2), %%r11 ;"
+		"mov      $0, %%ecx ;"
+		"cmovc %%eax, %%ecx ;"
+		"subq %%rcx,  %%r8  ;"
+		"sbbq    $0,  %%r9  ;"
+		"movq  %%r9,  8(%0) ;"
+		"sbbq    $0, %%r10  ;"
+		"movq %%r10, 16(%0) ;"
+		"sbbq    $0, %%r11  ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx  ;"
+		"cmovc %%eax, %%ecx ;"
+		"subq %%rcx,  %%r8  ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(b)
+		: "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
+static __always_inline void
+mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
+{
+	const u64 a24 = 121666;
+	asm volatile(
+		"movq     %2, %%rdx ;"
+		"mulx   (%1),  %%r8, %%r10 ;"
+		"mulx  8(%1),  %%r9, %%r11 ;"
+		"addq %%r10,  %%r9 ;"
+		"mulx 16(%1), %%r10, %%rax ;"
+		"adcq %%r11, %%r10 ;"
+		"mulx 24(%1), %%r11, %%rcx ;"
+		"adcq %%rax, %%r11 ;"
+		/**************************/
+		"adcq    $0, %%rcx ;"
+		"movl   $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
+		"imul %%rdx, %%rcx ;"
+		"addq %%rcx,  %%r8 ;"
+		"adcq    $0,  %%r9 ;"
+		"movq  %%r9,  8(%0) ;"
+		"adcq    $0, %%r10 ;"
+		"movq %%r10, 16(%0) ;"
+		"adcq    $0, %%r11 ;"
+		"movq %%r11, 24(%0) ;"
+		"mov     $0, %%ecx ;"
+		"cmovc %%edx, %%ecx ;"
+		"addq %%rcx,  %%r8 ;"
+		"movq  %%r8,   (%0) ;"
+		:
+		: "r"(c), "r"(a), "r"(a24)
+		: "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+		  "%r11");
+}
+
+static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+	struct {
+		eltfp25519_1w_buffer buffer;
+		eltfp25519_1w x0, x1, x2;
+	} __aligned(32) m;
+	u64 *T[4];
+
+	T[0] = m.x0;
+	T[1] = c; /* x^(-1) */
+	T[2] = m.x1;
+	T[3] = m.x2;
+
+	copy_eltfp25519_1w(T[1], a);
+	sqrn_eltfp25519_1w_adx(T[1], 1);
+	copy_eltfp25519_1w(T[2], T[1]);
+	sqrn_eltfp25519_1w_adx(T[2], 2);
+	mul_eltfp25519_1w_adx(T[0], a, T[2]);
+	mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
+	copy_eltfp25519_1w(T[2], T[1]);
+	sqrn_eltfp25519_1w_adx(T[2], 1);
+	mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_adx(T[2], 5);
+	mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_adx(T[2], 10);
+	mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+	copy_eltfp25519_1w(T[3], T[2]);
+	sqrn_eltfp25519_1w_adx(T[3], 20);
+	mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
+	sqrn_eltfp25519_1w_adx(T[3], 10);
+	mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
+	copy_eltfp25519_1w(T[0], T[3]);
+	sqrn_eltfp25519_1w_adx(T[0], 50);
+	mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_adx(T[2], 100);
+	mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+	sqrn_eltfp25519_1w_adx(T[2], 50);
+	mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
+	sqrn_eltfp25519_1w_adx(T[2], 5);
+	mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+	struct {
+		eltfp25519_1w_buffer buffer;
+		eltfp25519_1w x0, x1, x2;
+	} __aligned(32) m;
+	u64 *T[5];
+
+	T[0] = m.x0;
+	T[1] = c; /* x^(-1) */
+	T[2] = m.x1;
+	T[3] = m.x2;
+
+	copy_eltfp25519_1w(T[1], a);
+	sqrn_eltfp25519_1w_bmi2(T[1], 1);
+	copy_eltfp25519_1w(T[2], T[1]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 2);
+	mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
+	mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
+	copy_eltfp25519_1w(T[2], T[1]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 1);
+	mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 5);
+	mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 10);
+	mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+	copy_eltfp25519_1w(T[3], T[2]);
+	sqrn_eltfp25519_1w_bmi2(T[3], 20);
+	mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
+	sqrn_eltfp25519_1w_bmi2(T[3], 10);
+	mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
+	copy_eltfp25519_1w(T[0], T[3]);
+	sqrn_eltfp25519_1w_bmi2(T[0], 50);
+	mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
+	copy_eltfp25519_1w(T[2], T[0]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 100);
+	mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 50);
+	mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
+	sqrn_eltfp25519_1w_bmi2(T[2], 5);
+	mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+/* Given c, a 256-bit number, fred_eltfp25519_1w updates c
+ * with a number such that 0 <= C < 2**255-19.
+ */
+static __always_inline void fred_eltfp25519_1w(u64 *const c)
+{
+	u64 tmp0 = 38, tmp1 = 19;
+	asm volatile(
+		"btrq   $63,    %3 ;" /* Put bit 255 in carry flag and clear */
+		"cmovncl %k5,   %k4 ;" /* c[255] ? 38 : 19 */
+
+		/* Add either 19 or 38 to c */
+		"addq    %4,   %0 ;"
+		"adcq    $0,   %1 ;"
+		"adcq    $0,   %2 ;"
+		"adcq    $0,   %3 ;"
+
+		/* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
+		"movl    $0,  %k4 ;"
+		"cmovnsl %k5,  %k4 ;" /* c[255] ? 0 : 19 */
+		"btrq   $63,   %3 ;" /* Clear bit 255 */
+
+		/* Subtract 19 if necessary */
+		"subq    %4,   %0 ;"
+		"sbbq    $0,   %1 ;"
+		"sbbq    $0,   %2 ;"
+		"sbbq    $0,   %3 ;"
+
+		: "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
+		  "+r"(tmp1)
+		:
+		: "memory", "cc");
+}
+
+static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
+{
+	u64 temp;
+	asm volatile(
+		"test %9, %9 ;"
+		"movq %0, %8 ;"
+		"cmovnzq %4, %0 ;"
+		"cmovnzq %8, %4 ;"
+		"movq %1, %8 ;"
+		"cmovnzq %5, %1 ;"
+		"cmovnzq %8, %5 ;"
+		"movq %2, %8 ;"
+		"cmovnzq %6, %2 ;"
+		"cmovnzq %8, %6 ;"
+		"movq %3, %8 ;"
+		"cmovnzq %7, %3 ;"
+		"cmovnzq %8, %7 ;"
+		: "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
+		  "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
+		  "=r"(temp)
+		: "r"(bit)
+		: "cc"
+	);
+}
+
+static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
+{
+	asm volatile(
+		"test %4, %4 ;"
+		"cmovnzq %5, %0 ;"
+		"cmovnzq %6, %1 ;"
+		"cmovnzq %7, %2 ;"
+		"cmovnzq %8, %3 ;"
+		: "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
+		: "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
+		: "cc"
+	);
+}
+
+static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
+			   const u8 private_key[CURVE25519_KEY_SIZE],
+			   const u8 session_key[CURVE25519_KEY_SIZE])
+{
+	struct {
+		u64 buffer[4 * NUM_WORDS_ELTFP25519];
+		u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+		u64 workspace[6 * NUM_WORDS_ELTFP25519];
+		u8 session[CURVE25519_KEY_SIZE];
+		u8 private[CURVE25519_KEY_SIZE];
+	} __aligned(32) m;
+
+	int i = 0, j = 0;
+	u64 prev = 0;
+	u64 *const X1 = (u64 *)m.session;
+	u64 *const key = (u64 *)m.private;
+	u64 *const Px = m.coordinates + 0;
+	u64 *const Pz = m.coordinates + 4;
+	u64 *const Qx = m.coordinates + 8;
+	u64 *const Qz = m.coordinates + 12;
+	u64 *const X2 = Qx;
+	u64 *const Z2 = Qz;
+	u64 *const X3 = Px;
+	u64 *const Z3 = Pz;
+	u64 *const X2Z2 = Qx;
+	u64 *const X3Z3 = Px;
+
+	u64 *const A = m.workspace + 0;
+	u64 *const B = m.workspace + 4;
+	u64 *const D = m.workspace + 8;
+	u64 *const C = m.workspace + 12;
+	u64 *const DA = m.workspace + 16;
+	u64 *const CB = m.workspace + 20;
+	u64 *const AB = A;
+	u64 *const DC = D;
+	u64 *const DACB = DA;
+
+	memcpy(m.private, private_key, sizeof(m.private));
+	memcpy(m.session, session_key, sizeof(m.session));
+
+	curve25519_clamp_secret(m.private);
+
+	/* As in the draft:
+	 * When receiving such an array, implementations of curve25519
+	 * MUST mask the most-significant bit in the final byte. This
+	 * is done to preserve compatibility with point formats which
+	 * reserve the sign bit for use in other protocols and to
+	 * increase resistance to implementation fingerprinting
+	 */
+	m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+	copy_eltfp25519_1w(Px, X1);
+	setzero_eltfp25519_1w(Pz);
+	setzero_eltfp25519_1w(Qx);
+	setzero_eltfp25519_1w(Qz);
+
+	Pz[0] = 1;
+	Qx[0] = 1;
+
+	/* main-loop */
+	prev = 0;
+	j = 62;
+	for (i = 3; i >= 0; --i) {
+		while (j >= 0) {
+			u64 bit = (key[i] >> j) & 0x1;
+			u64 swap = bit ^ prev;
+			prev = bit;
+
+			add_eltfp25519_1w_adx(A, X2, Z2);	/* A = (X2+Z2) */
+			sub_eltfp25519_1w(B, X2, Z2);		/* B = (X2-Z2) */
+			add_eltfp25519_1w_adx(C, X3, Z3);	/* C = (X3+Z3) */
+			sub_eltfp25519_1w(D, X3, Z3);		/* D = (X3-Z3) */
+			mul_eltfp25519_2w_adx(DACB, AB, DC);	/* [DA|CB] = [A|B]*[D|C] */
+
+			cselect(swap, A, C);
+			cselect(swap, B, D);
+
+			sqr_eltfp25519_2w_adx(AB);		/* [AA|BB] = [A^2|B^2] */
+			add_eltfp25519_1w_adx(X3, DA, CB);	/* X3 = (DA+CB) */
+			sub_eltfp25519_1w(Z3, DA, CB);		/* Z3 = (DA-CB) */
+			sqr_eltfp25519_2w_adx(X3Z3);		/* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+			copy_eltfp25519_1w(X2, B);		/* X2 = B^2 */
+			sub_eltfp25519_1w(Z2, A, B);		/* Z2 = E = AA-BB */
+
+			mul_a24_eltfp25519_1w(B, Z2);		/* B = a24*E */
+			add_eltfp25519_1w_adx(B, B, X2);	/* B = a24*E+B */
+			mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB);	/* [X2|Z2] = [B|E]*[A|a24*E+B] */
+			mul_eltfp25519_1w_adx(Z3, Z3, X1);	/* Z3 = Z3*X1 */
+			--j;
+		}
+		j = 63;
+	}
+
+	inv_eltfp25519_1w_adx(A, Qz);
+	mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
+	fred_eltfp25519_1w((u64 *)shared);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
+				const u8 private_key[CURVE25519_KEY_SIZE])
+{
+	struct {
+		u64 buffer[4 * NUM_WORDS_ELTFP25519];
+		u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+		u64 workspace[4 * NUM_WORDS_ELTFP25519];
+		u8 private[CURVE25519_KEY_SIZE];
+	} __aligned(32) m;
+
+	const int ite[4] = { 64, 64, 64, 63 };
+	const int q = 3;
+	u64 swap = 1;
+
+	int i = 0, j = 0, k = 0;
+	u64 *const key = (u64 *)m.private;
+	u64 *const Ur1 = m.coordinates + 0;
+	u64 *const Zr1 = m.coordinates + 4;
+	u64 *const Ur2 = m.coordinates + 8;
+	u64 *const Zr2 = m.coordinates + 12;
+
+	u64 *const UZr1 = m.coordinates + 0;
+	u64 *const ZUr2 = m.coordinates + 8;
+
+	u64 *const A = m.workspace + 0;
+	u64 *const B = m.workspace + 4;
+	u64 *const C = m.workspace + 8;
+	u64 *const D = m.workspace + 12;
+
+	u64 *const AB = m.workspace + 0;
+	u64 *const CD = m.workspace + 8;
+
+	const u64 *const P = table_ladder_8k;
+
+	memcpy(m.private, private_key, sizeof(m.private));
+
+	curve25519_clamp_secret(m.private);
+
+	setzero_eltfp25519_1w(Ur1);
+	setzero_eltfp25519_1w(Zr1);
+	setzero_eltfp25519_1w(Zr2);
+	Ur1[0] = 1;
+	Zr1[0] = 1;
+	Zr2[0] = 1;
+
+	/* G-S */
+	Ur2[3] = 0x1eaecdeee27cab34UL;
+	Ur2[2] = 0xadc7a0b9235d48e2UL;
+	Ur2[1] = 0xbbf095ae14b2edf8UL;
+	Ur2[0] = 0x7e94e1fec82faabdUL;
+
+	/* main-loop */
+	j = q;
+	for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+		while (j < ite[i]) {
+			u64 bit = (key[i] >> j) & 0x1;
+			k = (64 * i + j - q);
+			swap = swap ^ bit;
+			cswap(swap, Ur1, Ur2);
+			cswap(swap, Zr1, Zr2);
+			swap = bit;
+			/* Addition */
+			sub_eltfp25519_1w(B, Ur1, Zr1);		/* B = Ur1-Zr1 */
+			add_eltfp25519_1w_adx(A, Ur1, Zr1);	/* A = Ur1+Zr1 */
+			mul_eltfp25519_1w_adx(C, &P[4 * k], B);	/* C = M0-B */
+			sub_eltfp25519_1w(B, A, C);		/* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+			add_eltfp25519_1w_adx(A, A, C);		/* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+			sqr_eltfp25519_2w_adx(AB);		/* A = A^2      |  B = B^2 */
+			mul_eltfp25519_2w_adx(UZr1, ZUr2, AB);	/* Ur1 = Zr2*A  |  Zr1 = Ur2*B */
+			++j;
+		}
+		j = 0;
+	}
+
+	/* Doubling */
+	for (i = 0; i < q; ++i) {
+		add_eltfp25519_1w_adx(A, Ur1, Zr1);	/*  A = Ur1+Zr1 */
+		sub_eltfp25519_1w(B, Ur1, Zr1);		/*  B = Ur1-Zr1 */
+		sqr_eltfp25519_2w_adx(AB);		/*  A = A**2     B = B**2 */
+		copy_eltfp25519_1w(C, B);		/*  C = B */
+		sub_eltfp25519_1w(B, A, B);		/*  B = A-B */
+		mul_a24_eltfp25519_1w(D, B);		/*  D = my_a24*B */
+		add_eltfp25519_1w_adx(D, D, C);		/*  D = D+C */
+		mul_eltfp25519_2w_adx(UZr1, AB, CD);	/*  Ur1 = A*B   Zr1 = Zr1*A */
+	}
+
+	/* Convert to affine coordinates */
+	inv_eltfp25519_1w_adx(A, Zr1);
+	mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
+	fred_eltfp25519_1w((u64 *)session_key);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
+			    const u8 private_key[CURVE25519_KEY_SIZE],
+			    const u8 session_key[CURVE25519_KEY_SIZE])
+{
+	struct {
+		u64 buffer[4 * NUM_WORDS_ELTFP25519];
+		u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+		u64 workspace[6 * NUM_WORDS_ELTFP25519];
+		u8 session[CURVE25519_KEY_SIZE];
+		u8 private[CURVE25519_KEY_SIZE];
+	} __aligned(32) m;
+
+	int i = 0, j = 0;
+	u64 prev = 0;
+	u64 *const X1 = (u64 *)m.session;
+	u64 *const key = (u64 *)m.private;
+	u64 *const Px = m.coordinates + 0;
+	u64 *const Pz = m.coordinates + 4;
+	u64 *const Qx = m.coordinates + 8;
+	u64 *const Qz = m.coordinates + 12;
+	u64 *const X2 = Qx;
+	u64 *const Z2 = Qz;
+	u64 *const X3 = Px;
+	u64 *const Z3 = Pz;
+	u64 *const X2Z2 = Qx;
+	u64 *const X3Z3 = Px;
+
+	u64 *const A = m.workspace + 0;
+	u64 *const B = m.workspace + 4;
+	u64 *const D = m.workspace + 8;
+	u64 *const C = m.workspace + 12;
+	u64 *const DA = m.workspace + 16;
+	u64 *const CB = m.workspace + 20;
+	u64 *const AB = A;
+	u64 *const DC = D;
+	u64 *const DACB = DA;
+
+	memcpy(m.private, private_key, sizeof(m.private));
+	memcpy(m.session, session_key, sizeof(m.session));
+
+	curve25519_clamp_secret(m.private);
+
+	/* As in the draft:
+	 * When receiving such an array, implementations of curve25519
+	 * MUST mask the most-significant bit in the final byte. This
+	 * is done to preserve compatibility with point formats which
+	 * reserve the sign bit for use in other protocols and to
+	 * increase resistance to implementation fingerprinting
+	 */
+	m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+	copy_eltfp25519_1w(Px, X1);
+	setzero_eltfp25519_1w(Pz);
+	setzero_eltfp25519_1w(Qx);
+	setzero_eltfp25519_1w(Qz);
+
+	Pz[0] = 1;
+	Qx[0] = 1;
+
+	/* main-loop */
+	prev = 0;
+	j = 62;
+	for (i = 3; i >= 0; --i) {
+		while (j >= 0) {
+			u64 bit = (key[i] >> j) & 0x1;
+			u64 swap = bit ^ prev;
+			prev = bit;
+
+			add_eltfp25519_1w_bmi2(A, X2, Z2);	/* A = (X2+Z2) */
+			sub_eltfp25519_1w(B, X2, Z2);		/* B = (X2-Z2) */
+			add_eltfp25519_1w_bmi2(C, X3, Z3);	/* C = (X3+Z3) */
+			sub_eltfp25519_1w(D, X3, Z3);		/* D = (X3-Z3) */
+			mul_eltfp25519_2w_bmi2(DACB, AB, DC);	/* [DA|CB] = [A|B]*[D|C] */
+
+			cselect(swap, A, C);
+			cselect(swap, B, D);
+
+			sqr_eltfp25519_2w_bmi2(AB);		/* [AA|BB] = [A^2|B^2] */
+			add_eltfp25519_1w_bmi2(X3, DA, CB);	/* X3 = (DA+CB) */
+			sub_eltfp25519_1w(Z3, DA, CB);		/* Z3 = (DA-CB) */
+			sqr_eltfp25519_2w_bmi2(X3Z3);		/* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+			copy_eltfp25519_1w(X2, B);		/* X2 = B^2 */
+			sub_eltfp25519_1w(Z2, A, B);		/* Z2 = E = AA-BB */
+
+			mul_a24_eltfp25519_1w(B, Z2);		/* B = a24*E */
+			add_eltfp25519_1w_bmi2(B, B, X2);	/* B = a24*E+B */
+			mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB);	/* [X2|Z2] = [B|E]*[A|a24*E+B] */
+			mul_eltfp25519_1w_bmi2(Z3, Z3, X1);	/* Z3 = Z3*X1 */
+			--j;
+		}
+		j = 63;
+	}
+
+	inv_eltfp25519_1w_bmi2(A, Qz);
+	mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
+	fred_eltfp25519_1w((u64 *)shared);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
+				 const u8 private_key[CURVE25519_KEY_SIZE])
+{
+	struct {
+		u64 buffer[4 * NUM_WORDS_ELTFP25519];
+		u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+		u64 workspace[4 * NUM_WORDS_ELTFP25519];
+		u8 private[CURVE25519_KEY_SIZE];
+	} __aligned(32) m;
+
+	const int ite[4] = { 64, 64, 64, 63 };
+	const int q = 3;
+	u64 swap = 1;
+
+	int i = 0, j = 0, k = 0;
+	u64 *const key = (u64 *)m.private;
+	u64 *const Ur1 = m.coordinates + 0;
+	u64 *const Zr1 = m.coordinates + 4;
+	u64 *const Ur2 = m.coordinates + 8;
+	u64 *const Zr2 = m.coordinates + 12;
+
+	u64 *const UZr1 = m.coordinates + 0;
+	u64 *const ZUr2 = m.coordinates + 8;
+
+	u64 *const A = m.workspace + 0;
+	u64 *const B = m.workspace + 4;
+	u64 *const C = m.workspace + 8;
+	u64 *const D = m.workspace + 12;
+
+	u64 *const AB = m.workspace + 0;
+	u64 *const CD = m.workspace + 8;
+
+	const u64 *const P = table_ladder_8k;
+
+	memcpy(m.private, private_key, sizeof(m.private));
+
+	curve25519_clamp_secret(m.private);
+
+	setzero_eltfp25519_1w(Ur1);
+	setzero_eltfp25519_1w(Zr1);
+	setzero_eltfp25519_1w(Zr2);
+	Ur1[0] = 1;
+	Zr1[0] = 1;
+	Zr2[0] = 1;
+
+	/* G-S */
+	Ur2[3] = 0x1eaecdeee27cab34UL;
+	Ur2[2] = 0xadc7a0b9235d48e2UL;
+	Ur2[1] = 0xbbf095ae14b2edf8UL;
+	Ur2[0] = 0x7e94e1fec82faabdUL;
+
+	/* main-loop */
+	j = q;
+	for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+		while (j < ite[i]) {
+			u64 bit = (key[i] >> j) & 0x1;
+			k = (64 * i + j - q);
+			swap = swap ^ bit;
+			cswap(swap, Ur1, Ur2);
+			cswap(swap, Zr1, Zr2);
+			swap = bit;
+			/* Addition */
+			sub_eltfp25519_1w(B, Ur1, Zr1);		/* B = Ur1-Zr1 */
+			add_eltfp25519_1w_bmi2(A, Ur1, Zr1);	/* A = Ur1+Zr1 */
+			mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */
+			sub_eltfp25519_1w(B, A, C);		/* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+			add_eltfp25519_1w_bmi2(A, A, C);	/* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+			sqr_eltfp25519_2w_bmi2(AB);		/* A = A^2      |  B = B^2 */
+			mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB);	/* Ur1 = Zr2*A  |  Zr1 = Ur2*B */
+			++j;
+		}
+		j = 0;
+	}
+
+	/* Doubling */
+	for (i = 0; i < q; ++i) {
+		add_eltfp25519_1w_bmi2(A, Ur1, Zr1);	/*  A = Ur1+Zr1 */
+		sub_eltfp25519_1w(B, Ur1, Zr1);		/*  B = Ur1-Zr1 */
+		sqr_eltfp25519_2w_bmi2(AB);		/*  A = A**2     B = B**2 */
+		copy_eltfp25519_1w(C, B);		/*  C = B */
+		sub_eltfp25519_1w(B, A, B);		/*  B = A-B */
+		mul_a24_eltfp25519_1w(D, B);		/*  D = my_a24*B */
+		add_eltfp25519_1w_bmi2(D, D, C);	/*  D = D+C */
+		mul_eltfp25519_2w_bmi2(UZr1, AB, CD);	/*  Ur1 = A*B   Zr1 = Zr1*A */
+	}
+
+	/* Convert to affine coordinates */
+	inv_eltfp25519_1w_bmi2(A, Zr1);
+	mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
+	fred_eltfp25519_1w((u64 *)session_key);
+
+	memzero_explicit(&m, sizeof(m));
+}
+
+void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
+		     const u8 secret[CURVE25519_KEY_SIZE],
+		     const u8 basepoint[CURVE25519_KEY_SIZE])
+{
+	if (static_branch_likely(&curve25519_use_adx))
+		curve25519_adx(mypublic, secret, basepoint);
+	else if (static_branch_likely(&curve25519_use_bmi2))
+		curve25519_bmi2(mypublic, secret, basepoint);
+	else
+		curve25519_generic(mypublic, secret, basepoint);
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
+			  const u8 secret[CURVE25519_KEY_SIZE])
+{
+	if (static_branch_likely(&curve25519_use_adx))
+		curve25519_adx_base(pub, secret);
+	else if (static_branch_likely(&curve25519_use_bmi2))
+		curve25519_bmi2_base(pub, secret);
+	else
+		curve25519_generic(pub, secret, curve25519_base_point);
+}
+EXPORT_SYMBOL(curve25519_base_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+				 unsigned int len)
+{
+	u8 *secret = kpp_tfm_ctx(tfm);
+
+	if (!len)
+		curve25519_generate_secret(secret);
+	else if (len == CURVE25519_KEY_SIZE &&
+		 crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+		memcpy(secret, buf, CURVE25519_KEY_SIZE);
+	else
+		return -EINVAL;
+	return 0;
+}
+
+static int curve25519_generate_public_key(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	const u8 *secret = kpp_tfm_ctx(tfm);
+	u8 buf[CURVE25519_KEY_SIZE];
+	int copied, nbytes;
+
+	if (req->src)
+		return -EINVAL;
+
+	curve25519_base_arch(buf, secret);
+
+	/* might want less than we've got */
+	nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
+	if (copied != nbytes)
+		return -EINVAL;
+	return 0;
+}
+
+static int curve25519_compute_shared_secret(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	const u8 *secret = kpp_tfm_ctx(tfm);
+	u8 public_key[CURVE25519_KEY_SIZE];
+	u8 buf[CURVE25519_KEY_SIZE];
+	int copied, nbytes;
+
+	if (!req->src)
+		return -EINVAL;
+
+	copied = sg_copy_to_buffer(req->src,
+				   sg_nents_for_len(req->src,
+						    CURVE25519_KEY_SIZE),
+				   public_key, CURVE25519_KEY_SIZE);
+	if (copied != CURVE25519_KEY_SIZE)
+		return -EINVAL;
+
+	curve25519_arch(buf, secret, public_key);
+
+	/* might want less than we've got */
+	nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
+	if (copied != nbytes)
+		return -EINVAL;
+	return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+	return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+	.base.cra_name		= "curve25519",
+	.base.cra_driver_name	= "curve25519-x86",
+	.base.cra_priority	= 200,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_ctxsize	= CURVE25519_KEY_SIZE,
+
+	.set_secret		= curve25519_set_secret,
+	.generate_public_key	= curve25519_generate_public_key,
+	.compute_shared_secret	= curve25519_compute_shared_secret,
+	.max_size		= curve25519_max_size,
+};
+
+static int __init curve25519_mod_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_BMI2))
+		static_branch_enable(&curve25519_use_bmi2);
+	else if (boot_cpu_has(X86_FEATURE_ADX))
+		static_branch_enable(&curve25519_use_adx);
+	else
+		return 0;
+	return crypto_register_kpp(&curve25519_alg);
+}
+
+static void __exit curve25519_mod_exit(void)
+{
+	if (boot_cpu_has(X86_FEATURE_BMI2) ||
+	    boot_cpu_has(X86_FEATURE_ADX))
+		crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(curve25519_mod_init);
+module_exit(curve25519_mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index fab259d9d056..472c2ad36063 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -269,6 +269,12 @@ config CRYPTO_CURVE25519
 	select CRYPTO_KPP
 	select CRYPTO_LIB_CURVE25519_GENERIC
 
+config CRYPTO_CURVE25519_X86
+	tristate "x86_64 accelerated Curve25519 scalar multiplication library"
+	depends on X86 && 64BIT
+	select CRYPTO_LIB_CURVE25519_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+
 comment "Authenticated Encryption with Associated Data"
 
 config CRYPTO_CCM
-- 
cgit v1.2.3


From f0fb006b604f98e2309a30f34ef455ac734f7c1c Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:37 +0100
Subject: crypto: arm/curve25519 - import Bernstein and Schwabe's Curve25519
 ARM implementation

This comes from Dan Bernstein and Peter Schwabe's public domain NEON
code, and is included here in raw form so that subsequent commits that
fix these up for the kernel can see how it has changed. This code does
have some entirely cosmetic formatting differences, adding indentation
and so forth, so that when we actually port it for use in the kernel in
the subsequent commit, it's obvious what's changed in the process.

This code originates from SUPERCOP 20180818, available at
<https://bench.cr.yp.to/supercop.html>.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/curve25519-core.S | 2105 +++++++++++++++++++++++++++++++++++++
 1 file changed, 2105 insertions(+)
 create mode 100644 arch/arm/crypto/curve25519-core.S

(limited to 'arch')

diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
new file mode 100644
index 000000000000..f33b85fef382
--- /dev/null
+++ b/arch/arm/crypto/curve25519-core.S
@@ -0,0 +1,2105 @@
+/*
+ * Public domain code from Daniel J. Bernstein and Peter Schwabe, from
+ * SUPERCOP's curve25519/neon2/scalarmult.s.
+ */
+
+.fpu neon
+.text
+.align 4
+.global _crypto_scalarmult_curve25519_neon2
+.global crypto_scalarmult_curve25519_neon2
+.type _crypto_scalarmult_curve25519_neon2 STT_FUNC
+.type crypto_scalarmult_curve25519_neon2 STT_FUNC
+	_crypto_scalarmult_curve25519_neon2:
+	crypto_scalarmult_curve25519_neon2:
+	vpush		{q4, q5, q6, q7}
+	mov		r12, sp
+	sub		sp, sp, #736
+	and		sp, sp, #0xffffffe0
+	strd		r4, [sp, #0]
+	strd		r6, [sp, #8]
+	strd		r8, [sp, #16]
+	strd		r10, [sp, #24]
+	str		r12, [sp, #480]
+	str		r14, [sp, #484]
+	mov		r0, r0
+	mov		r1, r1
+	mov		r2, r2
+	add		r3, sp, #32
+	ldr		r4, =0
+	ldr		r5, =254
+	vmov.i32	q0, #1
+	vshr.u64	q1, q0, #7
+	vshr.u64	q0, q0, #8
+	vmov.i32	d4, #19
+	vmov.i32	d5, #38
+	add		r6, sp, #512
+	vst1.8		{d2-d3}, [r6, : 128]
+	add		r6, sp, #528
+	vst1.8		{d0-d1}, [r6, : 128]
+	add		r6, sp, #544
+	vst1.8		{d4-d5}, [r6, : 128]
+	add		r6, r3, #0
+	vmov.i32	q2, #0
+	vst1.8		{d4-d5}, [r6, : 128]!
+	vst1.8		{d4-d5}, [r6, : 128]!
+	vst1.8		d4, [r6, : 64]
+	add		r6, r3, #0
+	ldr		r7, =960
+	sub		r7, r7, #2
+	neg		r7, r7
+	sub		r7, r7, r7, LSL #7
+	str		r7, [r6]
+	add		r6, sp, #704
+	vld1.8		{d4-d5}, [r1]!
+	vld1.8		{d6-d7}, [r1]
+	vst1.8		{d4-d5}, [r6, : 128]!
+	vst1.8		{d6-d7}, [r6, : 128]
+	sub		r1, r6, #16
+	ldrb		r6, [r1]
+	and		r6, r6, #248
+	strb		r6, [r1]
+	ldrb		r6, [r1, #31]
+	and		r6, r6, #127
+	orr		r6, r6, #64
+	strb		r6, [r1, #31]
+	vmov.i64	q2, #0xffffffff
+	vshr.u64	q3, q2, #7
+	vshr.u64	q2, q2, #6
+	vld1.8		{d8}, [r2]
+	vld1.8		{d10}, [r2]
+	add		r2, r2, #6
+	vld1.8		{d12}, [r2]
+	vld1.8		{d14}, [r2]
+	add		r2, r2, #6
+	vld1.8		{d16}, [r2]
+	add		r2, r2, #4
+	vld1.8		{d18}, [r2]
+	vld1.8		{d20}, [r2]
+	add		r2, r2, #6
+	vld1.8		{d22}, [r2]
+	add		r2, r2, #2
+	vld1.8		{d24}, [r2]
+	vld1.8		{d26}, [r2]
+	vshr.u64	q5, q5, #26
+	vshr.u64	q6, q6, #3
+	vshr.u64	q7, q7, #29
+	vshr.u64	q8, q8, #6
+	vshr.u64	q10, q10, #25
+	vshr.u64	q11, q11, #3
+	vshr.u64	q12, q12, #12
+	vshr.u64	q13, q13, #38
+	vand		q4, q4, q2
+	vand		q6, q6, q2
+	vand		q8, q8, q2
+	vand		q10, q10, q2
+	vand		q2, q12, q2
+	vand		q5, q5, q3
+	vand		q7, q7, q3
+	vand		q9, q9, q3
+	vand		q11, q11, q3
+	vand		q3, q13, q3
+	add		r2, r3, #48
+	vadd.i64	q12, q4, q1
+	vadd.i64	q13, q10, q1
+	vshr.s64	q12, q12, #26
+	vshr.s64	q13, q13, #26
+	vadd.i64	q5, q5, q12
+	vshl.i64	q12, q12, #26
+	vadd.i64	q14, q5, q0
+	vadd.i64	q11, q11, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q15, q11, q0
+	vsub.i64	q4, q4, q12
+	vshr.s64	q12, q14, #25
+	vsub.i64	q10, q10, q13
+	vshr.s64	q13, q15, #25
+	vadd.i64	q6, q6, q12
+	vshl.i64	q12, q12, #25
+	vadd.i64	q14, q6, q1
+	vadd.i64	q2, q2, q13
+	vsub.i64	q5, q5, q12
+	vshr.s64	q12, q14, #26
+	vshl.i64	q13, q13, #25
+	vadd.i64	q14, q2, q1
+	vadd.i64	q7, q7, q12
+	vshl.i64	q12, q12, #26
+	vadd.i64	q15, q7, q0
+	vsub.i64	q11, q11, q13
+	vshr.s64	q13, q14, #26
+	vsub.i64	q6, q6, q12
+	vshr.s64	q12, q15, #25
+	vadd.i64	q3, q3, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q14, q3, q0
+	vadd.i64	q8, q8, q12
+	vshl.i64	q12, q12, #25
+	vadd.i64	q15, q8, q1
+	add		r2, r2, #8
+	vsub.i64	q2, q2, q13
+	vshr.s64	q13, q14, #25
+	vsub.i64	q7, q7, q12
+	vshr.s64	q12, q15, #26
+	vadd.i64	q14, q13, q13
+	vadd.i64	q9, q9, q12
+	vtrn.32		d12, d14
+	vshl.i64	q12, q12, #26
+	vtrn.32		d13, d15
+	vadd.i64	q0, q9, q0
+	vadd.i64	q4, q4, q14
+	vst1.8		d12, [r2, : 64]!
+	vshl.i64	q6, q13, #4
+	vsub.i64	q7, q8, q12
+	vshr.s64	q0, q0, #25
+	vadd.i64	q4, q4, q6
+	vadd.i64	q6, q10, q0
+	vshl.i64	q0, q0, #25
+	vadd.i64	q8, q6, q1
+	vadd.i64	q4, q4, q13
+	vshl.i64	q10, q13, #25
+	vadd.i64	q1, q4, q1
+	vsub.i64	q0, q9, q0
+	vshr.s64	q8, q8, #26
+	vsub.i64	q3, q3, q10
+	vtrn.32		d14, d0
+	vshr.s64	q1, q1, #26
+	vtrn.32		d15, d1
+	vadd.i64	q0, q11, q8
+	vst1.8		d14, [r2, : 64]
+	vshl.i64	q7, q8, #26
+	vadd.i64	q5, q5, q1
+	vtrn.32		d4, d6
+	vshl.i64	q1, q1, #26
+	vtrn.32		d5, d7
+	vsub.i64	q3, q6, q7
+	add		r2, r2, #16
+	vsub.i64	q1, q4, q1
+	vst1.8		d4, [r2, : 64]
+	vtrn.32		d6, d0
+	vtrn.32		d7, d1
+	sub		r2, r2, #8
+	vtrn.32		d2, d10
+	vtrn.32		d3, d11
+	vst1.8		d6, [r2, : 64]
+	sub		r2, r2, #24
+	vst1.8		d2, [r2, : 64]
+	add		r2, r3, #96
+	vmov.i32	q0, #0
+	vmov.i64	d2, #0xff
+	vmov.i64	d3, #0
+	vshr.u32	q1, q1, #7
+	vst1.8		{d2-d3}, [r2, : 128]!
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		d0, [r2, : 64]
+	add		r2, r3, #144
+	vmov.i32	q0, #0
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		d0, [r2, : 64]
+	add		r2, r3, #240
+	vmov.i32	q0, #0
+	vmov.i64	d2, #0xff
+	vmov.i64	d3, #0
+	vshr.u32	q1, q1, #7
+	vst1.8		{d2-d3}, [r2, : 128]!
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		d0, [r2, : 64]
+	add		r2, r3, #48
+	add		r6, r3, #192
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4}, [r2, : 64]
+	vst1.8		{d0-d1}, [r6, : 128]!
+	vst1.8		{d2-d3}, [r6, : 128]!
+	vst1.8		d4, [r6, : 64]
+._mainloop:
+	mov		r2, r5, LSR #3
+	and		r6, r5, #7
+	ldrb		r2, [r1, r2]
+	mov		r2, r2, LSR r6
+	and		r2, r2, #1
+	str		r5, [sp, #488]
+	eor		r4, r4, r2
+	str		r2, [sp, #492]
+	neg		r2, r4
+	add		r4, r3, #96
+	add		r5, r3, #192
+	add		r6, r3, #144
+	vld1.8		{d8-d9}, [r4, : 128]!
+	add		r7, r3, #240
+	vld1.8		{d10-d11}, [r5, : 128]!
+	veor		q6, q4, q5
+	vld1.8		{d14-d15}, [r6, : 128]!
+	vdup.i32	q8, r2
+	vld1.8		{d18-d19}, [r7, : 128]!
+	veor		q10, q7, q9
+	vld1.8		{d22-d23}, [r4, : 128]!
+	vand		q6, q6, q8
+	vld1.8		{d24-d25}, [r5, : 128]!
+	vand		q10, q10, q8
+	vld1.8		{d26-d27}, [r6, : 128]!
+	veor		q4, q4, q6
+	vld1.8		{d28-d29}, [r7, : 128]!
+	veor		q5, q5, q6
+	vld1.8		{d0}, [r4, : 64]
+	veor		q6, q7, q10
+	vld1.8		{d2}, [r5, : 64]
+	veor		q7, q9, q10
+	vld1.8		{d4}, [r6, : 64]
+	veor		q9, q11, q12
+	vld1.8		{d6}, [r7, : 64]
+	veor		q10, q0, q1
+	sub		r2, r4, #32
+	vand		q9, q9, q8
+	sub		r4, r5, #32
+	vand		q10, q10, q8
+	sub		r5, r6, #32
+	veor		q11, q11, q9
+	sub		r6, r7, #32
+	veor		q0, q0, q10
+	veor		q9, q12, q9
+	veor		q1, q1, q10
+	veor		q10, q13, q14
+	veor		q12, q2, q3
+	vand		q10, q10, q8
+	vand		q8, q12, q8
+	veor		q12, q13, q10
+	veor		q2, q2, q8
+	veor		q10, q14, q10
+	veor		q3, q3, q8
+	vadd.i32	q8, q4, q6
+	vsub.i32	q4, q4, q6
+	vst1.8		{d16-d17}, [r2, : 128]!
+	vadd.i32	q6, q11, q12
+	vst1.8		{d8-d9}, [r5, : 128]!
+	vsub.i32	q4, q11, q12
+	vst1.8		{d12-d13}, [r2, : 128]!
+	vadd.i32	q6, q0, q2
+	vst1.8		{d8-d9}, [r5, : 128]!
+	vsub.i32	q0, q0, q2
+	vst1.8		d12, [r2, : 64]
+	vadd.i32	q2, q5, q7
+	vst1.8		d0, [r5, : 64]
+	vsub.i32	q0, q5, q7
+	vst1.8		{d4-d5}, [r4, : 128]!
+	vadd.i32	q2, q9, q10
+	vst1.8		{d0-d1}, [r6, : 128]!
+	vsub.i32	q0, q9, q10
+	vst1.8		{d4-d5}, [r4, : 128]!
+	vadd.i32	q2, q1, q3
+	vst1.8		{d0-d1}, [r6, : 128]!
+	vsub.i32	q0, q1, q3
+	vst1.8		d4, [r4, : 64]
+	vst1.8		d0, [r6, : 64]
+	add		r2, sp, #544
+	add		r4, r3, #96
+	add		r5, r3, #144
+	vld1.8		{d0-d1}, [r2, : 128]
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4-d5}, [r5, : 128]!
+	vzip.i32	q1, q2
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vld1.8		{d8-d9}, [r5, : 128]!
+	vshl.i32	q5, q1, #1
+	vzip.i32	q3, q4
+	vshl.i32	q6, q2, #1
+	vld1.8		{d14}, [r4, : 64]
+	vshl.i32	q8, q3, #1
+	vld1.8		{d15}, [r5, : 64]
+	vshl.i32	q9, q4, #1
+	vmul.i32	d21, d7, d1
+	vtrn.32		d14, d15
+	vmul.i32	q11, q4, q0
+	vmul.i32	q0, q7, q0
+	vmull.s32	q12, d2, d2
+	vmlal.s32	q12, d11, d1
+	vmlal.s32	q12, d12, d0
+	vmlal.s32	q12, d13, d23
+	vmlal.s32	q12, d16, d22
+	vmlal.s32	q12, d7, d21
+	vmull.s32	q10, d2, d11
+	vmlal.s32	q10, d4, d1
+	vmlal.s32	q10, d13, d0
+	vmlal.s32	q10, d6, d23
+	vmlal.s32	q10, d17, d22
+	vmull.s32	q13, d10, d4
+	vmlal.s32	q13, d11, d3
+	vmlal.s32	q13, d13, d1
+	vmlal.s32	q13, d16, d0
+	vmlal.s32	q13, d17, d23
+	vmlal.s32	q13, d8, d22
+	vmull.s32	q1, d10, d5
+	vmlal.s32	q1, d11, d4
+	vmlal.s32	q1, d6, d1
+	vmlal.s32	q1, d17, d0
+	vmlal.s32	q1, d8, d23
+	vmull.s32	q14, d10, d6
+	vmlal.s32	q14, d11, d13
+	vmlal.s32	q14, d4, d4
+	vmlal.s32	q14, d17, d1
+	vmlal.s32	q14, d18, d0
+	vmlal.s32	q14, d9, d23
+	vmull.s32	q11, d10, d7
+	vmlal.s32	q11, d11, d6
+	vmlal.s32	q11, d12, d5
+	vmlal.s32	q11, d8, d1
+	vmlal.s32	q11, d19, d0
+	vmull.s32	q15, d10, d8
+	vmlal.s32	q15, d11, d17
+	vmlal.s32	q15, d12, d6
+	vmlal.s32	q15, d13, d5
+	vmlal.s32	q15, d19, d1
+	vmlal.s32	q15, d14, d0
+	vmull.s32	q2, d10, d9
+	vmlal.s32	q2, d11, d8
+	vmlal.s32	q2, d12, d7
+	vmlal.s32	q2, d13, d6
+	vmlal.s32	q2, d14, d1
+	vmull.s32	q0, d15, d1
+	vmlal.s32	q0, d10, d14
+	vmlal.s32	q0, d11, d19
+	vmlal.s32	q0, d12, d8
+	vmlal.s32	q0, d13, d17
+	vmlal.s32	q0, d6, d6
+	add		r2, sp, #512
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmull.s32	q3, d16, d7
+	vmlal.s32	q3, d10, d15
+	vmlal.s32	q3, d11, d14
+	vmlal.s32	q3, d12, d9
+	vmlal.s32	q3, d13, d8
+	add		r2, sp, #528
+	vld1.8		{d8-d9}, [r2, : 128]
+	vadd.i64	q5, q12, q9
+	vadd.i64	q6, q15, q9
+	vshr.s64	q5, q5, #26
+	vshr.s64	q6, q6, #26
+	vadd.i64	q7, q10, q5
+	vshl.i64	q5, q5, #26
+	vadd.i64	q8, q7, q4
+	vadd.i64	q2, q2, q6
+	vshl.i64	q6, q6, #26
+	vadd.i64	q10, q2, q4
+	vsub.i64	q5, q12, q5
+	vshr.s64	q8, q8, #25
+	vsub.i64	q6, q15, q6
+	vshr.s64	q10, q10, #25
+	vadd.i64	q12, q13, q8
+	vshl.i64	q8, q8, #25
+	vadd.i64	q13, q12, q9
+	vadd.i64	q0, q0, q10
+	vsub.i64	q7, q7, q8
+	vshr.s64	q8, q13, #26
+	vshl.i64	q10, q10, #25
+	vadd.i64	q13, q0, q9
+	vadd.i64	q1, q1, q8
+	vshl.i64	q8, q8, #26
+	vadd.i64	q15, q1, q4
+	vsub.i64	q2, q2, q10
+	vshr.s64	q10, q13, #26
+	vsub.i64	q8, q12, q8
+	vshr.s64	q12, q15, #25
+	vadd.i64	q3, q3, q10
+	vshl.i64	q10, q10, #26
+	vadd.i64	q13, q3, q4
+	vadd.i64	q14, q14, q12
+	add		r2, r3, #288
+	vshl.i64	q12, q12, #25
+	add		r4, r3, #336
+	vadd.i64	q15, q14, q9
+	add		r2, r2, #8
+	vsub.i64	q0, q0, q10
+	add		r4, r4, #8
+	vshr.s64	q10, q13, #25
+	vsub.i64	q1, q1, q12
+	vshr.s64	q12, q15, #26
+	vadd.i64	q13, q10, q10
+	vadd.i64	q11, q11, q12
+	vtrn.32		d16, d2
+	vshl.i64	q12, q12, #26
+	vtrn.32		d17, d3
+	vadd.i64	q1, q11, q4
+	vadd.i64	q4, q5, q13
+	vst1.8		d16, [r2, : 64]!
+	vshl.i64	q5, q10, #4
+	vst1.8		d17, [r4, : 64]!
+	vsub.i64	q8, q14, q12
+	vshr.s64	q1, q1, #25
+	vadd.i64	q4, q4, q5
+	vadd.i64	q5, q6, q1
+	vshl.i64	q1, q1, #25
+	vadd.i64	q6, q5, q9
+	vadd.i64	q4, q4, q10
+	vshl.i64	q10, q10, #25
+	vadd.i64	q9, q4, q9
+	vsub.i64	q1, q11, q1
+	vshr.s64	q6, q6, #26
+	vsub.i64	q3, q3, q10
+	vtrn.32		d16, d2
+	vshr.s64	q9, q9, #26
+	vtrn.32		d17, d3
+	vadd.i64	q1, q2, q6
+	vst1.8		d16, [r2, : 64]
+	vshl.i64	q2, q6, #26
+	vst1.8		d17, [r4, : 64]
+	vadd.i64	q6, q7, q9
+	vtrn.32		d0, d6
+	vshl.i64	q7, q9, #26
+	vtrn.32		d1, d7
+	vsub.i64	q2, q5, q2
+	add		r2, r2, #16
+	vsub.i64	q3, q4, q7
+	vst1.8		d0, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d1, [r4, : 64]
+	vtrn.32		d4, d2
+	vtrn.32		d5, d3
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d6, d12
+	vtrn.32		d7, d13
+	vst1.8		d4, [r2, : 64]
+	vst1.8		d5, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d6, [r2, : 64]
+	vst1.8		d7, [r4, : 64]
+	add		r2, r3, #240
+	add		r4, r3, #96
+	vld1.8		{d0-d1}, [r4, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4}, [r4, : 64]
+	add		r4, r3, #144
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vtrn.32		q0, q3
+	vld1.8		{d8-d9}, [r4, : 128]!
+	vshl.i32	q5, q0, #4
+	vtrn.32		q1, q4
+	vshl.i32	q6, q3, #4
+	vadd.i32	q5, q5, q0
+	vadd.i32	q6, q6, q3
+	vshl.i32	q7, q1, #4
+	vld1.8		{d5}, [r4, : 64]
+	vshl.i32	q8, q4, #4
+	vtrn.32		d4, d5
+	vadd.i32	q7, q7, q1
+	vadd.i32	q8, q8, q4
+	vld1.8		{d18-d19}, [r2, : 128]!
+	vshl.i32	q10, q2, #4
+	vld1.8		{d22-d23}, [r2, : 128]!
+	vadd.i32	q10, q10, q2
+	vld1.8		{d24}, [r2, : 64]
+	vadd.i32	q5, q5, q0
+	add		r2, r3, #192
+	vld1.8		{d26-d27}, [r2, : 128]!
+	vadd.i32	q6, q6, q3
+	vld1.8		{d28-d29}, [r2, : 128]!
+	vadd.i32	q8, q8, q4
+	vld1.8		{d25}, [r2, : 64]
+	vadd.i32	q10, q10, q2
+	vtrn.32		q9, q13
+	vadd.i32	q7, q7, q1
+	vadd.i32	q5, q5, q0
+	vtrn.32		q11, q14
+	vadd.i32	q6, q6, q3
+	add		r2, sp, #560
+	vadd.i32	q10, q10, q2
+	vtrn.32		d24, d25
+	vst1.8		{d12-d13}, [r2, : 128]
+	vshl.i32	q6, q13, #1
+	add		r2, sp, #576
+	vst1.8		{d20-d21}, [r2, : 128]
+	vshl.i32	q10, q14, #1
+	add		r2, sp, #592
+	vst1.8		{d12-d13}, [r2, : 128]
+	vshl.i32	q15, q12, #1
+	vadd.i32	q8, q8, q4
+	vext.32		d10, d31, d30, #0
+	vadd.i32	q7, q7, q1
+	add		r2, sp, #608
+	vst1.8		{d16-d17}, [r2, : 128]
+	vmull.s32	q8, d18, d5
+	vmlal.s32	q8, d26, d4
+	vmlal.s32	q8, d19, d9
+	vmlal.s32	q8, d27, d3
+	vmlal.s32	q8, d22, d8
+	vmlal.s32	q8, d28, d2
+	vmlal.s32	q8, d23, d7
+	vmlal.s32	q8, d29, d1
+	vmlal.s32	q8, d24, d6
+	vmlal.s32	q8, d25, d0
+	add		r2, sp, #624
+	vst1.8		{d14-d15}, [r2, : 128]
+	vmull.s32	q2, d18, d4
+	vmlal.s32	q2, d12, d9
+	vmlal.s32	q2, d13, d8
+	vmlal.s32	q2, d19, d3
+	vmlal.s32	q2, d22, d2
+	vmlal.s32	q2, d23, d1
+	vmlal.s32	q2, d24, d0
+	add		r2, sp, #640
+	vst1.8		{d20-d21}, [r2, : 128]
+	vmull.s32	q7, d18, d9
+	vmlal.s32	q7, d26, d3
+	vmlal.s32	q7, d19, d8
+	vmlal.s32	q7, d27, d2
+	vmlal.s32	q7, d22, d7
+	vmlal.s32	q7, d28, d1
+	vmlal.s32	q7, d23, d6
+	vmlal.s32	q7, d29, d0
+	add		r2, sp, #656
+	vst1.8		{d10-d11}, [r2, : 128]
+	vmull.s32	q5, d18, d3
+	vmlal.s32	q5, d19, d2
+	vmlal.s32	q5, d22, d1
+	vmlal.s32	q5, d23, d0
+	vmlal.s32	q5, d12, d8
+	add		r2, sp, #672
+	vst1.8		{d16-d17}, [r2, : 128]
+	vmull.s32	q4, d18, d8
+	vmlal.s32	q4, d26, d2
+	vmlal.s32	q4, d19, d7
+	vmlal.s32	q4, d27, d1
+	vmlal.s32	q4, d22, d6
+	vmlal.s32	q4, d28, d0
+	vmull.s32	q8, d18, d7
+	vmlal.s32	q8, d26, d1
+	vmlal.s32	q8, d19, d6
+	vmlal.s32	q8, d27, d0
+	add		r2, sp, #576
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q7, d24, d21
+	vmlal.s32	q7, d25, d20
+	vmlal.s32	q4, d23, d21
+	vmlal.s32	q4, d29, d20
+	vmlal.s32	q8, d22, d21
+	vmlal.s32	q8, d28, d20
+	vmlal.s32	q5, d24, d20
+	add		r2, sp, #576
+	vst1.8		{d14-d15}, [r2, : 128]
+	vmull.s32	q7, d18, d6
+	vmlal.s32	q7, d26, d0
+	add		r2, sp, #656
+	vld1.8		{d30-d31}, [r2, : 128]
+	vmlal.s32	q2, d30, d21
+	vmlal.s32	q7, d19, d21
+	vmlal.s32	q7, d27, d20
+	add		r2, sp, #624
+	vld1.8		{d26-d27}, [r2, : 128]
+	vmlal.s32	q4, d25, d27
+	vmlal.s32	q8, d29, d27
+	vmlal.s32	q8, d25, d26
+	vmlal.s32	q7, d28, d27
+	vmlal.s32	q7, d29, d26
+	add		r2, sp, #608
+	vld1.8		{d28-d29}, [r2, : 128]
+	vmlal.s32	q4, d24, d29
+	vmlal.s32	q8, d23, d29
+	vmlal.s32	q8, d24, d28
+	vmlal.s32	q7, d22, d29
+	vmlal.s32	q7, d23, d28
+	add		r2, sp, #608
+	vst1.8		{d8-d9}, [r2, : 128]
+	add		r2, sp, #560
+	vld1.8		{d8-d9}, [r2, : 128]
+	vmlal.s32	q7, d24, d9
+	vmlal.s32	q7, d25, d31
+	vmull.s32	q1, d18, d2
+	vmlal.s32	q1, d19, d1
+	vmlal.s32	q1, d22, d0
+	vmlal.s32	q1, d24, d27
+	vmlal.s32	q1, d23, d20
+	vmlal.s32	q1, d12, d7
+	vmlal.s32	q1, d13, d6
+	vmull.s32	q6, d18, d1
+	vmlal.s32	q6, d19, d0
+	vmlal.s32	q6, d23, d27
+	vmlal.s32	q6, d22, d20
+	vmlal.s32	q6, d24, d26
+	vmull.s32	q0, d18, d0
+	vmlal.s32	q0, d22, d27
+	vmlal.s32	q0, d23, d26
+	vmlal.s32	q0, d24, d31
+	vmlal.s32	q0, d19, d20
+	add		r2, sp, #640
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q2, d18, d7
+	vmlal.s32	q2, d19, d6
+	vmlal.s32	q5, d18, d6
+	vmlal.s32	q5, d19, d21
+	vmlal.s32	q1, d18, d21
+	vmlal.s32	q1, d19, d29
+	vmlal.s32	q0, d18, d28
+	vmlal.s32	q0, d19, d9
+	vmlal.s32	q6, d18, d29
+	vmlal.s32	q6, d19, d28
+	add		r2, sp, #592
+	vld1.8		{d18-d19}, [r2, : 128]
+	add		r2, sp, #512
+	vld1.8		{d22-d23}, [r2, : 128]
+	vmlal.s32	q5, d19, d7
+	vmlal.s32	q0, d18, d21
+	vmlal.s32	q0, d19, d29
+	vmlal.s32	q6, d18, d6
+	add		r2, sp, #528
+	vld1.8		{d6-d7}, [r2, : 128]
+	vmlal.s32	q6, d19, d21
+	add		r2, sp, #576
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q0, d30, d8
+	add		r2, sp, #672
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q5, d30, d29
+	add		r2, sp, #608
+	vld1.8		{d24-d25}, [r2, : 128]
+	vmlal.s32	q1, d30, d28
+	vadd.i64	q13, q0, q11
+	vadd.i64	q14, q5, q11
+	vmlal.s32	q6, d30, d9
+	vshr.s64	q4, q13, #26
+	vshr.s64	q13, q14, #26
+	vadd.i64	q7, q7, q4
+	vshl.i64	q4, q4, #26
+	vadd.i64	q14, q7, q3
+	vadd.i64	q9, q9, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q15, q9, q3
+	vsub.i64	q0, q0, q4
+	vshr.s64	q4, q14, #25
+	vsub.i64	q5, q5, q13
+	vshr.s64	q13, q15, #25
+	vadd.i64	q6, q6, q4
+	vshl.i64	q4, q4, #25
+	vadd.i64	q14, q6, q11
+	vadd.i64	q2, q2, q13
+	vsub.i64	q4, q7, q4
+	vshr.s64	q7, q14, #26
+	vshl.i64	q13, q13, #25
+	vadd.i64	q14, q2, q11
+	vadd.i64	q8, q8, q7
+	vshl.i64	q7, q7, #26
+	vadd.i64	q15, q8, q3
+	vsub.i64	q9, q9, q13
+	vshr.s64	q13, q14, #26
+	vsub.i64	q6, q6, q7
+	vshr.s64	q7, q15, #25
+	vadd.i64	q10, q10, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q14, q10, q3
+	vadd.i64	q1, q1, q7
+	add		r2, r3, #144
+	vshl.i64	q7, q7, #25
+	add		r4, r3, #96
+	vadd.i64	q15, q1, q11
+	add		r2, r2, #8
+	vsub.i64	q2, q2, q13
+	add		r4, r4, #8
+	vshr.s64	q13, q14, #25
+	vsub.i64	q7, q8, q7
+	vshr.s64	q8, q15, #26
+	vadd.i64	q14, q13, q13
+	vadd.i64	q12, q12, q8
+	vtrn.32		d12, d14
+	vshl.i64	q8, q8, #26
+	vtrn.32		d13, d15
+	vadd.i64	q3, q12, q3
+	vadd.i64	q0, q0, q14
+	vst1.8		d12, [r2, : 64]!
+	vshl.i64	q7, q13, #4
+	vst1.8		d13, [r4, : 64]!
+	vsub.i64	q1, q1, q8
+	vshr.s64	q3, q3, #25
+	vadd.i64	q0, q0, q7
+	vadd.i64	q5, q5, q3
+	vshl.i64	q3, q3, #25
+	vadd.i64	q6, q5, q11
+	vadd.i64	q0, q0, q13
+	vshl.i64	q7, q13, #25
+	vadd.i64	q8, q0, q11
+	vsub.i64	q3, q12, q3
+	vshr.s64	q6, q6, #26
+	vsub.i64	q7, q10, q7
+	vtrn.32		d2, d6
+	vshr.s64	q8, q8, #26
+	vtrn.32		d3, d7
+	vadd.i64	q3, q9, q6
+	vst1.8		d2, [r2, : 64]
+	vshl.i64	q6, q6, #26
+	vst1.8		d3, [r4, : 64]
+	vadd.i64	q1, q4, q8
+	vtrn.32		d4, d14
+	vshl.i64	q4, q8, #26
+	vtrn.32		d5, d15
+	vsub.i64	q5, q5, q6
+	add		r2, r2, #16
+	vsub.i64	q0, q0, q4
+	vst1.8		d4, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d5, [r4, : 64]
+	vtrn.32		d10, d6
+	vtrn.32		d11, d7
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d0, d2
+	vtrn.32		d1, d3
+	vst1.8		d10, [r2, : 64]
+	vst1.8		d11, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d0, [r2, : 64]
+	vst1.8		d1, [r4, : 64]
+	add		r2, r3, #288
+	add		r4, r3, #336
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vsub.i32	q0, q0, q1
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4-d5}, [r4, : 128]!
+	vsub.i32	q1, q1, q2
+	add		r5, r3, #240
+	vld1.8		{d4}, [r2, : 64]
+	vld1.8		{d6}, [r4, : 64]
+	vsub.i32	q2, q2, q3
+	vst1.8		{d0-d1}, [r5, : 128]!
+	vst1.8		{d2-d3}, [r5, : 128]!
+	vst1.8		d4, [r5, : 64]
+	add		r2, r3, #144
+	add		r4, r3, #96
+	add		r5, r3, #144
+	add		r6, r3, #192
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vsub.i32	q2, q0, q1
+	vadd.i32	q0, q0, q1
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vsub.i32	q4, q1, q3
+	vadd.i32	q1, q1, q3
+	vld1.8		{d6}, [r2, : 64]
+	vld1.8		{d10}, [r4, : 64]
+	vsub.i32	q6, q3, q5
+	vadd.i32	q3, q3, q5
+	vst1.8		{d4-d5}, [r5, : 128]!
+	vst1.8		{d0-d1}, [r6, : 128]!
+	vst1.8		{d8-d9}, [r5, : 128]!
+	vst1.8		{d2-d3}, [r6, : 128]!
+	vst1.8		d12, [r5, : 64]
+	vst1.8		d6, [r6, : 64]
+	add		r2, r3, #0
+	add		r4, r3, #240
+	vld1.8		{d0-d1}, [r4, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4}, [r4, : 64]
+	add		r4, r3, #336
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vtrn.32		q0, q3
+	vld1.8		{d8-d9}, [r4, : 128]!
+	vshl.i32	q5, q0, #4
+	vtrn.32		q1, q4
+	vshl.i32	q6, q3, #4
+	vadd.i32	q5, q5, q0
+	vadd.i32	q6, q6, q3
+	vshl.i32	q7, q1, #4
+	vld1.8		{d5}, [r4, : 64]
+	vshl.i32	q8, q4, #4
+	vtrn.32		d4, d5
+	vadd.i32	q7, q7, q1
+	vadd.i32	q8, q8, q4
+	vld1.8		{d18-d19}, [r2, : 128]!
+	vshl.i32	q10, q2, #4
+	vld1.8		{d22-d23}, [r2, : 128]!
+	vadd.i32	q10, q10, q2
+	vld1.8		{d24}, [r2, : 64]
+	vadd.i32	q5, q5, q0
+	add		r2, r3, #288
+	vld1.8		{d26-d27}, [r2, : 128]!
+	vadd.i32	q6, q6, q3
+	vld1.8		{d28-d29}, [r2, : 128]!
+	vadd.i32	q8, q8, q4
+	vld1.8		{d25}, [r2, : 64]
+	vadd.i32	q10, q10, q2
+	vtrn.32		q9, q13
+	vadd.i32	q7, q7, q1
+	vadd.i32	q5, q5, q0
+	vtrn.32		q11, q14
+	vadd.i32	q6, q6, q3
+	add		r2, sp, #560
+	vadd.i32	q10, q10, q2
+	vtrn.32		d24, d25
+	vst1.8		{d12-d13}, [r2, : 128]
+	vshl.i32	q6, q13, #1
+	add		r2, sp, #576
+	vst1.8		{d20-d21}, [r2, : 128]
+	vshl.i32	q10, q14, #1
+	add		r2, sp, #592
+	vst1.8		{d12-d13}, [r2, : 128]
+	vshl.i32	q15, q12, #1
+	vadd.i32	q8, q8, q4
+	vext.32		d10, d31, d30, #0
+	vadd.i32	q7, q7, q1
+	add		r2, sp, #608
+	vst1.8		{d16-d17}, [r2, : 128]
+	vmull.s32	q8, d18, d5
+	vmlal.s32	q8, d26, d4
+	vmlal.s32	q8, d19, d9
+	vmlal.s32	q8, d27, d3
+	vmlal.s32	q8, d22, d8
+	vmlal.s32	q8, d28, d2
+	vmlal.s32	q8, d23, d7
+	vmlal.s32	q8, d29, d1
+	vmlal.s32	q8, d24, d6
+	vmlal.s32	q8, d25, d0
+	add		r2, sp, #624
+	vst1.8		{d14-d15}, [r2, : 128]
+	vmull.s32	q2, d18, d4
+	vmlal.s32	q2, d12, d9
+	vmlal.s32	q2, d13, d8
+	vmlal.s32	q2, d19, d3
+	vmlal.s32	q2, d22, d2
+	vmlal.s32	q2, d23, d1
+	vmlal.s32	q2, d24, d0
+	add		r2, sp, #640
+	vst1.8		{d20-d21}, [r2, : 128]
+	vmull.s32	q7, d18, d9
+	vmlal.s32	q7, d26, d3
+	vmlal.s32	q7, d19, d8
+	vmlal.s32	q7, d27, d2
+	vmlal.s32	q7, d22, d7
+	vmlal.s32	q7, d28, d1
+	vmlal.s32	q7, d23, d6
+	vmlal.s32	q7, d29, d0
+	add		r2, sp, #656
+	vst1.8		{d10-d11}, [r2, : 128]
+	vmull.s32	q5, d18, d3
+	vmlal.s32	q5, d19, d2
+	vmlal.s32	q5, d22, d1
+	vmlal.s32	q5, d23, d0
+	vmlal.s32	q5, d12, d8
+	add		r2, sp, #672
+	vst1.8		{d16-d17}, [r2, : 128]
+	vmull.s32	q4, d18, d8
+	vmlal.s32	q4, d26, d2
+	vmlal.s32	q4, d19, d7
+	vmlal.s32	q4, d27, d1
+	vmlal.s32	q4, d22, d6
+	vmlal.s32	q4, d28, d0
+	vmull.s32	q8, d18, d7
+	vmlal.s32	q8, d26, d1
+	vmlal.s32	q8, d19, d6
+	vmlal.s32	q8, d27, d0
+	add		r2, sp, #576
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q7, d24, d21
+	vmlal.s32	q7, d25, d20
+	vmlal.s32	q4, d23, d21
+	vmlal.s32	q4, d29, d20
+	vmlal.s32	q8, d22, d21
+	vmlal.s32	q8, d28, d20
+	vmlal.s32	q5, d24, d20
+	add		r2, sp, #576
+	vst1.8		{d14-d15}, [r2, : 128]
+	vmull.s32	q7, d18, d6
+	vmlal.s32	q7, d26, d0
+	add		r2, sp, #656
+	vld1.8		{d30-d31}, [r2, : 128]
+	vmlal.s32	q2, d30, d21
+	vmlal.s32	q7, d19, d21
+	vmlal.s32	q7, d27, d20
+	add		r2, sp, #624
+	vld1.8		{d26-d27}, [r2, : 128]
+	vmlal.s32	q4, d25, d27
+	vmlal.s32	q8, d29, d27
+	vmlal.s32	q8, d25, d26
+	vmlal.s32	q7, d28, d27
+	vmlal.s32	q7, d29, d26
+	add		r2, sp, #608
+	vld1.8		{d28-d29}, [r2, : 128]
+	vmlal.s32	q4, d24, d29
+	vmlal.s32	q8, d23, d29
+	vmlal.s32	q8, d24, d28
+	vmlal.s32	q7, d22, d29
+	vmlal.s32	q7, d23, d28
+	add		r2, sp, #608
+	vst1.8		{d8-d9}, [r2, : 128]
+	add		r2, sp, #560
+	vld1.8		{d8-d9}, [r2, : 128]
+	vmlal.s32	q7, d24, d9
+	vmlal.s32	q7, d25, d31
+	vmull.s32	q1, d18, d2
+	vmlal.s32	q1, d19, d1
+	vmlal.s32	q1, d22, d0
+	vmlal.s32	q1, d24, d27
+	vmlal.s32	q1, d23, d20
+	vmlal.s32	q1, d12, d7
+	vmlal.s32	q1, d13, d6
+	vmull.s32	q6, d18, d1
+	vmlal.s32	q6, d19, d0
+	vmlal.s32	q6, d23, d27
+	vmlal.s32	q6, d22, d20
+	vmlal.s32	q6, d24, d26
+	vmull.s32	q0, d18, d0
+	vmlal.s32	q0, d22, d27
+	vmlal.s32	q0, d23, d26
+	vmlal.s32	q0, d24, d31
+	vmlal.s32	q0, d19, d20
+	add		r2, sp, #640
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q2, d18, d7
+	vmlal.s32	q2, d19, d6
+	vmlal.s32	q5, d18, d6
+	vmlal.s32	q5, d19, d21
+	vmlal.s32	q1, d18, d21
+	vmlal.s32	q1, d19, d29
+	vmlal.s32	q0, d18, d28
+	vmlal.s32	q0, d19, d9
+	vmlal.s32	q6, d18, d29
+	vmlal.s32	q6, d19, d28
+	add		r2, sp, #592
+	vld1.8		{d18-d19}, [r2, : 128]
+	add		r2, sp, #512
+	vld1.8		{d22-d23}, [r2, : 128]
+	vmlal.s32	q5, d19, d7
+	vmlal.s32	q0, d18, d21
+	vmlal.s32	q0, d19, d29
+	vmlal.s32	q6, d18, d6
+	add		r2, sp, #528
+	vld1.8		{d6-d7}, [r2, : 128]
+	vmlal.s32	q6, d19, d21
+	add		r2, sp, #576
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q0, d30, d8
+	add		r2, sp, #672
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q5, d30, d29
+	add		r2, sp, #608
+	vld1.8		{d24-d25}, [r2, : 128]
+	vmlal.s32	q1, d30, d28
+	vadd.i64	q13, q0, q11
+	vadd.i64	q14, q5, q11
+	vmlal.s32	q6, d30, d9
+	vshr.s64	q4, q13, #26
+	vshr.s64	q13, q14, #26
+	vadd.i64	q7, q7, q4
+	vshl.i64	q4, q4, #26
+	vadd.i64	q14, q7, q3
+	vadd.i64	q9, q9, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q15, q9, q3
+	vsub.i64	q0, q0, q4
+	vshr.s64	q4, q14, #25
+	vsub.i64	q5, q5, q13
+	vshr.s64	q13, q15, #25
+	vadd.i64	q6, q6, q4
+	vshl.i64	q4, q4, #25
+	vadd.i64	q14, q6, q11
+	vadd.i64	q2, q2, q13
+	vsub.i64	q4, q7, q4
+	vshr.s64	q7, q14, #26
+	vshl.i64	q13, q13, #25
+	vadd.i64	q14, q2, q11
+	vadd.i64	q8, q8, q7
+	vshl.i64	q7, q7, #26
+	vadd.i64	q15, q8, q3
+	vsub.i64	q9, q9, q13
+	vshr.s64	q13, q14, #26
+	vsub.i64	q6, q6, q7
+	vshr.s64	q7, q15, #25
+	vadd.i64	q10, q10, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q14, q10, q3
+	vadd.i64	q1, q1, q7
+	add		r2, r3, #288
+	vshl.i64	q7, q7, #25
+	add		r4, r3, #96
+	vadd.i64	q15, q1, q11
+	add		r2, r2, #8
+	vsub.i64	q2, q2, q13
+	add		r4, r4, #8
+	vshr.s64	q13, q14, #25
+	vsub.i64	q7, q8, q7
+	vshr.s64	q8, q15, #26
+	vadd.i64	q14, q13, q13
+	vadd.i64	q12, q12, q8
+	vtrn.32		d12, d14
+	vshl.i64	q8, q8, #26
+	vtrn.32		d13, d15
+	vadd.i64	q3, q12, q3
+	vadd.i64	q0, q0, q14
+	vst1.8		d12, [r2, : 64]!
+	vshl.i64	q7, q13, #4
+	vst1.8		d13, [r4, : 64]!
+	vsub.i64	q1, q1, q8
+	vshr.s64	q3, q3, #25
+	vadd.i64	q0, q0, q7
+	vadd.i64	q5, q5, q3
+	vshl.i64	q3, q3, #25
+	vadd.i64	q6, q5, q11
+	vadd.i64	q0, q0, q13
+	vshl.i64	q7, q13, #25
+	vadd.i64	q8, q0, q11
+	vsub.i64	q3, q12, q3
+	vshr.s64	q6, q6, #26
+	vsub.i64	q7, q10, q7
+	vtrn.32		d2, d6
+	vshr.s64	q8, q8, #26
+	vtrn.32		d3, d7
+	vadd.i64	q3, q9, q6
+	vst1.8		d2, [r2, : 64]
+	vshl.i64	q6, q6, #26
+	vst1.8		d3, [r4, : 64]
+	vadd.i64	q1, q4, q8
+	vtrn.32		d4, d14
+	vshl.i64	q4, q8, #26
+	vtrn.32		d5, d15
+	vsub.i64	q5, q5, q6
+	add		r2, r2, #16
+	vsub.i64	q0, q0, q4
+	vst1.8		d4, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d5, [r4, : 64]
+	vtrn.32		d10, d6
+	vtrn.32		d11, d7
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d0, d2
+	vtrn.32		d1, d3
+	vst1.8		d10, [r2, : 64]
+	vst1.8		d11, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d0, [r2, : 64]
+	vst1.8		d1, [r4, : 64]
+	add		r2, sp, #544
+	add		r4, r3, #144
+	add		r5, r3, #192
+	vld1.8		{d0-d1}, [r2, : 128]
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4-d5}, [r5, : 128]!
+	vzip.i32	q1, q2
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vld1.8		{d8-d9}, [r5, : 128]!
+	vshl.i32	q5, q1, #1
+	vzip.i32	q3, q4
+	vshl.i32	q6, q2, #1
+	vld1.8		{d14}, [r4, : 64]
+	vshl.i32	q8, q3, #1
+	vld1.8		{d15}, [r5, : 64]
+	vshl.i32	q9, q4, #1
+	vmul.i32	d21, d7, d1
+	vtrn.32		d14, d15
+	vmul.i32	q11, q4, q0
+	vmul.i32	q0, q7, q0
+	vmull.s32	q12, d2, d2
+	vmlal.s32	q12, d11, d1
+	vmlal.s32	q12, d12, d0
+	vmlal.s32	q12, d13, d23
+	vmlal.s32	q12, d16, d22
+	vmlal.s32	q12, d7, d21
+	vmull.s32	q10, d2, d11
+	vmlal.s32	q10, d4, d1
+	vmlal.s32	q10, d13, d0
+	vmlal.s32	q10, d6, d23
+	vmlal.s32	q10, d17, d22
+	vmull.s32	q13, d10, d4
+	vmlal.s32	q13, d11, d3
+	vmlal.s32	q13, d13, d1
+	vmlal.s32	q13, d16, d0
+	vmlal.s32	q13, d17, d23
+	vmlal.s32	q13, d8, d22
+	vmull.s32	q1, d10, d5
+	vmlal.s32	q1, d11, d4
+	vmlal.s32	q1, d6, d1
+	vmlal.s32	q1, d17, d0
+	vmlal.s32	q1, d8, d23
+	vmull.s32	q14, d10, d6
+	vmlal.s32	q14, d11, d13
+	vmlal.s32	q14, d4, d4
+	vmlal.s32	q14, d17, d1
+	vmlal.s32	q14, d18, d0
+	vmlal.s32	q14, d9, d23
+	vmull.s32	q11, d10, d7
+	vmlal.s32	q11, d11, d6
+	vmlal.s32	q11, d12, d5
+	vmlal.s32	q11, d8, d1
+	vmlal.s32	q11, d19, d0
+	vmull.s32	q15, d10, d8
+	vmlal.s32	q15, d11, d17
+	vmlal.s32	q15, d12, d6
+	vmlal.s32	q15, d13, d5
+	vmlal.s32	q15, d19, d1
+	vmlal.s32	q15, d14, d0
+	vmull.s32	q2, d10, d9
+	vmlal.s32	q2, d11, d8
+	vmlal.s32	q2, d12, d7
+	vmlal.s32	q2, d13, d6
+	vmlal.s32	q2, d14, d1
+	vmull.s32	q0, d15, d1
+	vmlal.s32	q0, d10, d14
+	vmlal.s32	q0, d11, d19
+	vmlal.s32	q0, d12, d8
+	vmlal.s32	q0, d13, d17
+	vmlal.s32	q0, d6, d6
+	add		r2, sp, #512
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmull.s32	q3, d16, d7
+	vmlal.s32	q3, d10, d15
+	vmlal.s32	q3, d11, d14
+	vmlal.s32	q3, d12, d9
+	vmlal.s32	q3, d13, d8
+	add		r2, sp, #528
+	vld1.8		{d8-d9}, [r2, : 128]
+	vadd.i64	q5, q12, q9
+	vadd.i64	q6, q15, q9
+	vshr.s64	q5, q5, #26
+	vshr.s64	q6, q6, #26
+	vadd.i64	q7, q10, q5
+	vshl.i64	q5, q5, #26
+	vadd.i64	q8, q7, q4
+	vadd.i64	q2, q2, q6
+	vshl.i64	q6, q6, #26
+	vadd.i64	q10, q2, q4
+	vsub.i64	q5, q12, q5
+	vshr.s64	q8, q8, #25
+	vsub.i64	q6, q15, q6
+	vshr.s64	q10, q10, #25
+	vadd.i64	q12, q13, q8
+	vshl.i64	q8, q8, #25
+	vadd.i64	q13, q12, q9
+	vadd.i64	q0, q0, q10
+	vsub.i64	q7, q7, q8
+	vshr.s64	q8, q13, #26
+	vshl.i64	q10, q10, #25
+	vadd.i64	q13, q0, q9
+	vadd.i64	q1, q1, q8
+	vshl.i64	q8, q8, #26
+	vadd.i64	q15, q1, q4
+	vsub.i64	q2, q2, q10
+	vshr.s64	q10, q13, #26
+	vsub.i64	q8, q12, q8
+	vshr.s64	q12, q15, #25
+	vadd.i64	q3, q3, q10
+	vshl.i64	q10, q10, #26
+	vadd.i64	q13, q3, q4
+	vadd.i64	q14, q14, q12
+	add		r2, r3, #144
+	vshl.i64	q12, q12, #25
+	add		r4, r3, #192
+	vadd.i64	q15, q14, q9
+	add		r2, r2, #8
+	vsub.i64	q0, q0, q10
+	add		r4, r4, #8
+	vshr.s64	q10, q13, #25
+	vsub.i64	q1, q1, q12
+	vshr.s64	q12, q15, #26
+	vadd.i64	q13, q10, q10
+	vadd.i64	q11, q11, q12
+	vtrn.32		d16, d2
+	vshl.i64	q12, q12, #26
+	vtrn.32		d17, d3
+	vadd.i64	q1, q11, q4
+	vadd.i64	q4, q5, q13
+	vst1.8		d16, [r2, : 64]!
+	vshl.i64	q5, q10, #4
+	vst1.8		d17, [r4, : 64]!
+	vsub.i64	q8, q14, q12
+	vshr.s64	q1, q1, #25
+	vadd.i64	q4, q4, q5
+	vadd.i64	q5, q6, q1
+	vshl.i64	q1, q1, #25
+	vadd.i64	q6, q5, q9
+	vadd.i64	q4, q4, q10
+	vshl.i64	q10, q10, #25
+	vadd.i64	q9, q4, q9
+	vsub.i64	q1, q11, q1
+	vshr.s64	q6, q6, #26
+	vsub.i64	q3, q3, q10
+	vtrn.32		d16, d2
+	vshr.s64	q9, q9, #26
+	vtrn.32		d17, d3
+	vadd.i64	q1, q2, q6
+	vst1.8		d16, [r2, : 64]
+	vshl.i64	q2, q6, #26
+	vst1.8		d17, [r4, : 64]
+	vadd.i64	q6, q7, q9
+	vtrn.32		d0, d6
+	vshl.i64	q7, q9, #26
+	vtrn.32		d1, d7
+	vsub.i64	q2, q5, q2
+	add		r2, r2, #16
+	vsub.i64	q3, q4, q7
+	vst1.8		d0, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d1, [r4, : 64]
+	vtrn.32		d4, d2
+	vtrn.32		d5, d3
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d6, d12
+	vtrn.32		d7, d13
+	vst1.8		d4, [r2, : 64]
+	vst1.8		d5, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d6, [r2, : 64]
+	vst1.8		d7, [r4, : 64]
+	add		r2, r3, #336
+	add		r4, r3, #288
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vadd.i32	q0, q0, q1
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4-d5}, [r4, : 128]!
+	vadd.i32	q1, q1, q2
+	add		r5, r3, #288
+	vld1.8		{d4}, [r2, : 64]
+	vld1.8		{d6}, [r4, : 64]
+	vadd.i32	q2, q2, q3
+	vst1.8		{d0-d1}, [r5, : 128]!
+	vst1.8		{d2-d3}, [r5, : 128]!
+	vst1.8		d4, [r5, : 64]
+	add		r2, r3, #48
+	add		r4, r3, #144
+	vld1.8		{d0-d1}, [r4, : 128]!
+	vld1.8		{d2-d3}, [r4, : 128]!
+	vld1.8		{d4}, [r4, : 64]
+	add		r4, r3, #288
+	vld1.8		{d6-d7}, [r4, : 128]!
+	vtrn.32		q0, q3
+	vld1.8		{d8-d9}, [r4, : 128]!
+	vshl.i32	q5, q0, #4
+	vtrn.32		q1, q4
+	vshl.i32	q6, q3, #4
+	vadd.i32	q5, q5, q0
+	vadd.i32	q6, q6, q3
+	vshl.i32	q7, q1, #4
+	vld1.8		{d5}, [r4, : 64]
+	vshl.i32	q8, q4, #4
+	vtrn.32		d4, d5
+	vadd.i32	q7, q7, q1
+	vadd.i32	q8, q8, q4
+	vld1.8		{d18-d19}, [r2, : 128]!
+	vshl.i32	q10, q2, #4
+	vld1.8		{d22-d23}, [r2, : 128]!
+	vadd.i32	q10, q10, q2
+	vld1.8		{d24}, [r2, : 64]
+	vadd.i32	q5, q5, q0
+	add		r2, r3, #240
+	vld1.8		{d26-d27}, [r2, : 128]!
+	vadd.i32	q6, q6, q3
+	vld1.8		{d28-d29}, [r2, : 128]!
+	vadd.i32	q8, q8, q4
+	vld1.8		{d25}, [r2, : 64]
+	vadd.i32	q10, q10, q2
+	vtrn.32		q9, q13
+	vadd.i32	q7, q7, q1
+	vadd.i32	q5, q5, q0
+	vtrn.32		q11, q14
+	vadd.i32	q6, q6, q3
+	add		r2, sp, #560
+	vadd.i32	q10, q10, q2
+	vtrn.32		d24, d25
+	vst1.8		{d12-d13}, [r2, : 128]
+	vshl.i32	q6, q13, #1
+	add		r2, sp, #576
+	vst1.8		{d20-d21}, [r2, : 128]
+	vshl.i32	q10, q14, #1
+	add		r2, sp, #592
+	vst1.8		{d12-d13}, [r2, : 128]
+	vshl.i32	q15, q12, #1
+	vadd.i32	q8, q8, q4
+	vext.32		d10, d31, d30, #0
+	vadd.i32	q7, q7, q1
+	add		r2, sp, #608
+	vst1.8		{d16-d17}, [r2, : 128]
+	vmull.s32	q8, d18, d5
+	vmlal.s32	q8, d26, d4
+	vmlal.s32	q8, d19, d9
+	vmlal.s32	q8, d27, d3
+	vmlal.s32	q8, d22, d8
+	vmlal.s32	q8, d28, d2
+	vmlal.s32	q8, d23, d7
+	vmlal.s32	q8, d29, d1
+	vmlal.s32	q8, d24, d6
+	vmlal.s32	q8, d25, d0
+	add		r2, sp, #624
+	vst1.8		{d14-d15}, [r2, : 128]
+	vmull.s32	q2, d18, d4
+	vmlal.s32	q2, d12, d9
+	vmlal.s32	q2, d13, d8
+	vmlal.s32	q2, d19, d3
+	vmlal.s32	q2, d22, d2
+	vmlal.s32	q2, d23, d1
+	vmlal.s32	q2, d24, d0
+	add		r2, sp, #640
+	vst1.8		{d20-d21}, [r2, : 128]
+	vmull.s32	q7, d18, d9
+	vmlal.s32	q7, d26, d3
+	vmlal.s32	q7, d19, d8
+	vmlal.s32	q7, d27, d2
+	vmlal.s32	q7, d22, d7
+	vmlal.s32	q7, d28, d1
+	vmlal.s32	q7, d23, d6
+	vmlal.s32	q7, d29, d0
+	add		r2, sp, #656
+	vst1.8		{d10-d11}, [r2, : 128]
+	vmull.s32	q5, d18, d3
+	vmlal.s32	q5, d19, d2
+	vmlal.s32	q5, d22, d1
+	vmlal.s32	q5, d23, d0
+	vmlal.s32	q5, d12, d8
+	add		r2, sp, #672
+	vst1.8		{d16-d17}, [r2, : 128]
+	vmull.s32	q4, d18, d8
+	vmlal.s32	q4, d26, d2
+	vmlal.s32	q4, d19, d7
+	vmlal.s32	q4, d27, d1
+	vmlal.s32	q4, d22, d6
+	vmlal.s32	q4, d28, d0
+	vmull.s32	q8, d18, d7
+	vmlal.s32	q8, d26, d1
+	vmlal.s32	q8, d19, d6
+	vmlal.s32	q8, d27, d0
+	add		r2, sp, #576
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q7, d24, d21
+	vmlal.s32	q7, d25, d20
+	vmlal.s32	q4, d23, d21
+	vmlal.s32	q4, d29, d20
+	vmlal.s32	q8, d22, d21
+	vmlal.s32	q8, d28, d20
+	vmlal.s32	q5, d24, d20
+	add		r2, sp, #576
+	vst1.8		{d14-d15}, [r2, : 128]
+	vmull.s32	q7, d18, d6
+	vmlal.s32	q7, d26, d0
+	add		r2, sp, #656
+	vld1.8		{d30-d31}, [r2, : 128]
+	vmlal.s32	q2, d30, d21
+	vmlal.s32	q7, d19, d21
+	vmlal.s32	q7, d27, d20
+	add		r2, sp, #624
+	vld1.8		{d26-d27}, [r2, : 128]
+	vmlal.s32	q4, d25, d27
+	vmlal.s32	q8, d29, d27
+	vmlal.s32	q8, d25, d26
+	vmlal.s32	q7, d28, d27
+	vmlal.s32	q7, d29, d26
+	add		r2, sp, #608
+	vld1.8		{d28-d29}, [r2, : 128]
+	vmlal.s32	q4, d24, d29
+	vmlal.s32	q8, d23, d29
+	vmlal.s32	q8, d24, d28
+	vmlal.s32	q7, d22, d29
+	vmlal.s32	q7, d23, d28
+	add		r2, sp, #608
+	vst1.8		{d8-d9}, [r2, : 128]
+	add		r2, sp, #560
+	vld1.8		{d8-d9}, [r2, : 128]
+	vmlal.s32	q7, d24, d9
+	vmlal.s32	q7, d25, d31
+	vmull.s32	q1, d18, d2
+	vmlal.s32	q1, d19, d1
+	vmlal.s32	q1, d22, d0
+	vmlal.s32	q1, d24, d27
+	vmlal.s32	q1, d23, d20
+	vmlal.s32	q1, d12, d7
+	vmlal.s32	q1, d13, d6
+	vmull.s32	q6, d18, d1
+	vmlal.s32	q6, d19, d0
+	vmlal.s32	q6, d23, d27
+	vmlal.s32	q6, d22, d20
+	vmlal.s32	q6, d24, d26
+	vmull.s32	q0, d18, d0
+	vmlal.s32	q0, d22, d27
+	vmlal.s32	q0, d23, d26
+	vmlal.s32	q0, d24, d31
+	vmlal.s32	q0, d19, d20
+	add		r2, sp, #640
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q2, d18, d7
+	vmlal.s32	q2, d19, d6
+	vmlal.s32	q5, d18, d6
+	vmlal.s32	q5, d19, d21
+	vmlal.s32	q1, d18, d21
+	vmlal.s32	q1, d19, d29
+	vmlal.s32	q0, d18, d28
+	vmlal.s32	q0, d19, d9
+	vmlal.s32	q6, d18, d29
+	vmlal.s32	q6, d19, d28
+	add		r2, sp, #592
+	vld1.8		{d18-d19}, [r2, : 128]
+	add		r2, sp, #512
+	vld1.8		{d22-d23}, [r2, : 128]
+	vmlal.s32	q5, d19, d7
+	vmlal.s32	q0, d18, d21
+	vmlal.s32	q0, d19, d29
+	vmlal.s32	q6, d18, d6
+	add		r2, sp, #528
+	vld1.8		{d6-d7}, [r2, : 128]
+	vmlal.s32	q6, d19, d21
+	add		r2, sp, #576
+	vld1.8		{d18-d19}, [r2, : 128]
+	vmlal.s32	q0, d30, d8
+	add		r2, sp, #672
+	vld1.8		{d20-d21}, [r2, : 128]
+	vmlal.s32	q5, d30, d29
+	add		r2, sp, #608
+	vld1.8		{d24-d25}, [r2, : 128]
+	vmlal.s32	q1, d30, d28
+	vadd.i64	q13, q0, q11
+	vadd.i64	q14, q5, q11
+	vmlal.s32	q6, d30, d9
+	vshr.s64	q4, q13, #26
+	vshr.s64	q13, q14, #26
+	vadd.i64	q7, q7, q4
+	vshl.i64	q4, q4, #26
+	vadd.i64	q14, q7, q3
+	vadd.i64	q9, q9, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q15, q9, q3
+	vsub.i64	q0, q0, q4
+	vshr.s64	q4, q14, #25
+	vsub.i64	q5, q5, q13
+	vshr.s64	q13, q15, #25
+	vadd.i64	q6, q6, q4
+	vshl.i64	q4, q4, #25
+	vadd.i64	q14, q6, q11
+	vadd.i64	q2, q2, q13
+	vsub.i64	q4, q7, q4
+	vshr.s64	q7, q14, #26
+	vshl.i64	q13, q13, #25
+	vadd.i64	q14, q2, q11
+	vadd.i64	q8, q8, q7
+	vshl.i64	q7, q7, #26
+	vadd.i64	q15, q8, q3
+	vsub.i64	q9, q9, q13
+	vshr.s64	q13, q14, #26
+	vsub.i64	q6, q6, q7
+	vshr.s64	q7, q15, #25
+	vadd.i64	q10, q10, q13
+	vshl.i64	q13, q13, #26
+	vadd.i64	q14, q10, q3
+	vadd.i64	q1, q1, q7
+	add		r2, r3, #240
+	vshl.i64	q7, q7, #25
+	add		r4, r3, #144
+	vadd.i64	q15, q1, q11
+	add		r2, r2, #8
+	vsub.i64	q2, q2, q13
+	add		r4, r4, #8
+	vshr.s64	q13, q14, #25
+	vsub.i64	q7, q8, q7
+	vshr.s64	q8, q15, #26
+	vadd.i64	q14, q13, q13
+	vadd.i64	q12, q12, q8
+	vtrn.32		d12, d14
+	vshl.i64	q8, q8, #26
+	vtrn.32		d13, d15
+	vadd.i64	q3, q12, q3
+	vadd.i64	q0, q0, q14
+	vst1.8		d12, [r2, : 64]!
+	vshl.i64	q7, q13, #4
+	vst1.8		d13, [r4, : 64]!
+	vsub.i64	q1, q1, q8
+	vshr.s64	q3, q3, #25
+	vadd.i64	q0, q0, q7
+	vadd.i64	q5, q5, q3
+	vshl.i64	q3, q3, #25
+	vadd.i64	q6, q5, q11
+	vadd.i64	q0, q0, q13
+	vshl.i64	q7, q13, #25
+	vadd.i64	q8, q0, q11
+	vsub.i64	q3, q12, q3
+	vshr.s64	q6, q6, #26
+	vsub.i64	q7, q10, q7
+	vtrn.32		d2, d6
+	vshr.s64	q8, q8, #26
+	vtrn.32		d3, d7
+	vadd.i64	q3, q9, q6
+	vst1.8		d2, [r2, : 64]
+	vshl.i64	q6, q6, #26
+	vst1.8		d3, [r4, : 64]
+	vadd.i64	q1, q4, q8
+	vtrn.32		d4, d14
+	vshl.i64	q4, q8, #26
+	vtrn.32		d5, d15
+	vsub.i64	q5, q5, q6
+	add		r2, r2, #16
+	vsub.i64	q0, q0, q4
+	vst1.8		d4, [r2, : 64]
+	add		r4, r4, #16
+	vst1.8		d5, [r4, : 64]
+	vtrn.32		d10, d6
+	vtrn.32		d11, d7
+	sub		r2, r2, #8
+	sub		r4, r4, #8
+	vtrn.32		d0, d2
+	vtrn.32		d1, d3
+	vst1.8		d10, [r2, : 64]
+	vst1.8		d11, [r4, : 64]
+	sub		r2, r2, #24
+	sub		r4, r4, #24
+	vst1.8		d0, [r2, : 64]
+	vst1.8		d1, [r4, : 64]
+	ldr		r2, [sp, #488]
+	ldr		r4, [sp, #492]
+	subs		r5, r2, #1
+	bge		._mainloop
+	add		r1, r3, #144
+	add		r2, r3, #336
+	vld1.8		{d0-d1}, [r1, : 128]!
+	vld1.8		{d2-d3}, [r1, : 128]!
+	vld1.8		{d4}, [r1, : 64]
+	vst1.8		{d0-d1}, [r2, : 128]!
+	vst1.8		{d2-d3}, [r2, : 128]!
+	vst1.8		d4, [r2, : 64]
+	ldr		r1, =0
+._invertloop:
+	add		r2, r3, #144
+	ldr		r4, =0
+	ldr		r5, =2
+	cmp		r1, #1
+	ldreq		r5, =1
+	addeq		r2, r3, #336
+	addeq		r4, r3, #48
+	cmp		r1, #2
+	ldreq		r5, =1
+	addeq		r2, r3, #48
+	cmp		r1, #3
+	ldreq		r5, =5
+	addeq		r4, r3, #336
+	cmp		r1, #4
+	ldreq		r5, =10
+	cmp		r1, #5
+	ldreq		r5, =20
+	cmp		r1, #6
+	ldreq		r5, =10
+	addeq		r2, r3, #336
+	addeq		r4, r3, #336
+	cmp		r1, #7
+	ldreq		r5, =50
+	cmp		r1, #8
+	ldreq		r5, =100
+	cmp		r1, #9
+	ldreq		r5, =50
+	addeq		r2, r3, #336
+	cmp		r1, #10
+	ldreq		r5, =5
+	addeq		r2, r3, #48
+	cmp		r1, #11
+	ldreq		r5, =0
+	addeq		r2, r3, #96
+	add		r6, r3, #144
+	add		r7, r3, #288
+	vld1.8		{d0-d1}, [r6, : 128]!
+	vld1.8		{d2-d3}, [r6, : 128]!
+	vld1.8		{d4}, [r6, : 64]
+	vst1.8		{d0-d1}, [r7, : 128]!
+	vst1.8		{d2-d3}, [r7, : 128]!
+	vst1.8		d4, [r7, : 64]
+	cmp		r5, #0
+	beq		._skipsquaringloop
+._squaringloop:
+	add		r6, r3, #288
+	add		r7, r3, #288
+	add		r8, r3, #288
+	vmov.i32	q0, #19
+	vmov.i32	q1, #0
+	vmov.i32	q2, #1
+	vzip.i32	q1, q2
+	vld1.8		{d4-d5}, [r7, : 128]!
+	vld1.8		{d6-d7}, [r7, : 128]!
+	vld1.8		{d9}, [r7, : 64]
+	vld1.8		{d10-d11}, [r6, : 128]!
+	add		r7, sp, #416
+	vld1.8		{d12-d13}, [r6, : 128]!
+	vmul.i32	q7, q2, q0
+	vld1.8		{d8}, [r6, : 64]
+	vext.32		d17, d11, d10, #1
+	vmul.i32	q9, q3, q0
+	vext.32		d16, d10, d8, #1
+	vshl.u32	q10, q5, q1
+	vext.32		d22, d14, d4, #1
+	vext.32		d24, d18, d6, #1
+	vshl.u32	q13, q6, q1
+	vshl.u32	d28, d8, d2
+	vrev64.i32	d22, d22
+	vmul.i32	d1, d9, d1
+	vrev64.i32	d24, d24
+	vext.32		d29, d8, d13, #1
+	vext.32		d0, d1, d9, #1
+	vrev64.i32	d0, d0
+	vext.32		d2, d9, d1, #1
+	vext.32		d23, d15, d5, #1
+	vmull.s32	q4, d20, d4
+	vrev64.i32	d23, d23
+	vmlal.s32	q4, d21, d1
+	vrev64.i32	d2, d2
+	vmlal.s32	q4, d26, d19
+	vext.32		d3, d5, d15, #1
+	vmlal.s32	q4, d27, d18
+	vrev64.i32	d3, d3
+	vmlal.s32	q4, d28, d15
+	vext.32		d14, d12, d11, #1
+	vmull.s32	q5, d16, d23
+	vext.32		d15, d13, d12, #1
+	vmlal.s32	q5, d17, d4
+	vst1.8		d8, [r7, : 64]!
+	vmlal.s32	q5, d14, d1
+	vext.32		d12, d9, d8, #0
+	vmlal.s32	q5, d15, d19
+	vmov.i64	d13, #0
+	vmlal.s32	q5, d29, d18
+	vext.32		d25, d19, d7, #1
+	vmlal.s32	q6, d20, d5
+	vrev64.i32	d25, d25
+	vmlal.s32	q6, d21, d4
+	vst1.8		d11, [r7, : 64]!
+	vmlal.s32	q6, d26, d1
+	vext.32		d9, d10, d10, #0
+	vmlal.s32	q6, d27, d19
+	vmov.i64	d8, #0
+	vmlal.s32	q6, d28, d18
+	vmlal.s32	q4, d16, d24
+	vmlal.s32	q4, d17, d5
+	vmlal.s32	q4, d14, d4
+	vst1.8		d12, [r7, : 64]!
+	vmlal.s32	q4, d15, d1
+	vext.32		d10, d13, d12, #0
+	vmlal.s32	q4, d29, d19
+	vmov.i64	d11, #0
+	vmlal.s32	q5, d20, d6
+	vmlal.s32	q5, d21, d5
+	vmlal.s32	q5, d26, d4
+	vext.32		d13, d8, d8, #0
+	vmlal.s32	q5, d27, d1
+	vmov.i64	d12, #0
+	vmlal.s32	q5, d28, d19
+	vst1.8		d9, [r7, : 64]!
+	vmlal.s32	q6, d16, d25
+	vmlal.s32	q6, d17, d6
+	vst1.8		d10, [r7, : 64]
+	vmlal.s32	q6, d14, d5
+	vext.32		d8, d11, d10, #0
+	vmlal.s32	q6, d15, d4
+	vmov.i64	d9, #0
+	vmlal.s32	q6, d29, d1
+	vmlal.s32	q4, d20, d7
+	vmlal.s32	q4, d21, d6
+	vmlal.s32	q4, d26, d5
+	vext.32		d11, d12, d12, #0
+	vmlal.s32	q4, d27, d4
+	vmov.i64	d10, #0
+	vmlal.s32	q4, d28, d1
+	vmlal.s32	q5, d16, d0
+	sub		r6, r7, #32
+	vmlal.s32	q5, d17, d7
+	vmlal.s32	q5, d14, d6
+	vext.32		d30, d9, d8, #0
+	vmlal.s32	q5, d15, d5
+	vld1.8		{d31}, [r6, : 64]!
+	vmlal.s32	q5, d29, d4
+	vmlal.s32	q15, d20, d0
+	vext.32		d0, d6, d18, #1
+	vmlal.s32	q15, d21, d25
+	vrev64.i32	d0, d0
+	vmlal.s32	q15, d26, d24
+	vext.32		d1, d7, d19, #1
+	vext.32		d7, d10, d10, #0
+	vmlal.s32	q15, d27, d23
+	vrev64.i32	d1, d1
+	vld1.8		{d6}, [r6, : 64]
+	vmlal.s32	q15, d28, d22
+	vmlal.s32	q3, d16, d4
+	add		r6, r6, #24
+	vmlal.s32	q3, d17, d2
+	vext.32		d4, d31, d30, #0
+	vmov		d17, d11
+	vmlal.s32	q3, d14, d1
+	vext.32		d11, d13, d13, #0
+	vext.32		d13, d30, d30, #0
+	vmlal.s32	q3, d15, d0
+	vext.32		d1, d8, d8, #0
+	vmlal.s32	q3, d29, d3
+	vld1.8		{d5}, [r6, : 64]
+	sub		r6, r6, #16
+	vext.32		d10, d6, d6, #0
+	vmov.i32	q1, #0xffffffff
+	vshl.i64	q4, q1, #25
+	add		r7, sp, #512
+	vld1.8		{d14-d15}, [r7, : 128]
+	vadd.i64	q9, q2, q7
+	vshl.i64	q1, q1, #26
+	vshr.s64	q10, q9, #26
+	vld1.8		{d0}, [r6, : 64]!
+	vadd.i64	q5, q5, q10
+	vand		q9, q9, q1
+	vld1.8		{d16}, [r6, : 64]!
+	add		r6, sp, #528
+	vld1.8		{d20-d21}, [r6, : 128]
+	vadd.i64	q11, q5, q10
+	vsub.i64	q2, q2, q9
+	vshr.s64	q9, q11, #25
+	vext.32		d12, d5, d4, #0
+	vand		q11, q11, q4
+	vadd.i64	q0, q0, q9
+	vmov		d19, d7
+	vadd.i64	q3, q0, q7
+	vsub.i64	q5, q5, q11
+	vshr.s64	q11, q3, #26
+	vext.32		d18, d11, d10, #0
+	vand		q3, q3, q1
+	vadd.i64	q8, q8, q11
+	vadd.i64	q11, q8, q10
+	vsub.i64	q0, q0, q3
+	vshr.s64	q3, q11, #25
+	vand		q11, q11, q4
+	vadd.i64	q3, q6, q3
+	vadd.i64	q6, q3, q7
+	vsub.i64	q8, q8, q11
+	vshr.s64	q11, q6, #26
+	vand		q6, q6, q1
+	vadd.i64	q9, q9, q11
+	vadd.i64	d25, d19, d21
+	vsub.i64	q3, q3, q6
+	vshr.s64	d23, d25, #25
+	vand		q4, q12, q4
+	vadd.i64	d21, d23, d23
+	vshl.i64	d25, d23, #4
+	vadd.i64	d21, d21, d23
+	vadd.i64	d25, d25, d21
+	vadd.i64	d4, d4, d25
+	vzip.i32	q0, q8
+	vadd.i64	d12, d4, d14
+	add		r6, r8, #8
+	vst1.8		d0, [r6, : 64]
+	vsub.i64	d19, d19, d9
+	add		r6, r6, #16
+	vst1.8		d16, [r6, : 64]
+	vshr.s64	d22, d12, #26
+	vand		q0, q6, q1
+	vadd.i64	d10, d10, d22
+	vzip.i32	q3, q9
+	vsub.i64	d4, d4, d0
+	sub		r6, r6, #8
+	vst1.8		d6, [r6, : 64]
+	add		r6, r6, #16
+	vst1.8		d18, [r6, : 64]
+	vzip.i32	q2, q5
+	sub		r6, r6, #32
+	vst1.8		d4, [r6, : 64]
+	subs		r5, r5, #1
+	bhi		._squaringloop
+._skipsquaringloop:
+	mov		r2, r2
+	add		r5, r3, #288
+	add		r6, r3, #144
+	vmov.i32	q0, #19
+	vmov.i32	q1, #0
+	vmov.i32	q2, #1
+	vzip.i32	q1, q2
+	vld1.8		{d4-d5}, [r5, : 128]!
+	vld1.8		{d6-d7}, [r5, : 128]!
+	vld1.8		{d9}, [r5, : 64]
+	vld1.8		{d10-d11}, [r2, : 128]!
+	add		r5, sp, #416
+	vld1.8		{d12-d13}, [r2, : 128]!
+	vmul.i32	q7, q2, q0
+	vld1.8		{d8}, [r2, : 64]
+	vext.32		d17, d11, d10, #1
+	vmul.i32	q9, q3, q0
+	vext.32		d16, d10, d8, #1
+	vshl.u32	q10, q5, q1
+	vext.32		d22, d14, d4, #1
+	vext.32		d24, d18, d6, #1
+	vshl.u32	q13, q6, q1
+	vshl.u32	d28, d8, d2
+	vrev64.i32	d22, d22
+	vmul.i32	d1, d9, d1
+	vrev64.i32	d24, d24
+	vext.32		d29, d8, d13, #1
+	vext.32		d0, d1, d9, #1
+	vrev64.i32	d0, d0
+	vext.32		d2, d9, d1, #1
+	vext.32		d23, d15, d5, #1
+	vmull.s32	q4, d20, d4
+	vrev64.i32	d23, d23
+	vmlal.s32	q4, d21, d1
+	vrev64.i32	d2, d2
+	vmlal.s32	q4, d26, d19
+	vext.32		d3, d5, d15, #1
+	vmlal.s32	q4, d27, d18
+	vrev64.i32	d3, d3
+	vmlal.s32	q4, d28, d15
+	vext.32		d14, d12, d11, #1
+	vmull.s32	q5, d16, d23
+	vext.32		d15, d13, d12, #1
+	vmlal.s32	q5, d17, d4
+	vst1.8		d8, [r5, : 64]!
+	vmlal.s32	q5, d14, d1
+	vext.32		d12, d9, d8, #0
+	vmlal.s32	q5, d15, d19
+	vmov.i64	d13, #0
+	vmlal.s32	q5, d29, d18
+	vext.32		d25, d19, d7, #1
+	vmlal.s32	q6, d20, d5
+	vrev64.i32	d25, d25
+	vmlal.s32	q6, d21, d4
+	vst1.8		d11, [r5, : 64]!
+	vmlal.s32	q6, d26, d1
+	vext.32		d9, d10, d10, #0
+	vmlal.s32	q6, d27, d19
+	vmov.i64	d8, #0
+	vmlal.s32	q6, d28, d18
+	vmlal.s32	q4, d16, d24
+	vmlal.s32	q4, d17, d5
+	vmlal.s32	q4, d14, d4
+	vst1.8		d12, [r5, : 64]!
+	vmlal.s32	q4, d15, d1
+	vext.32		d10, d13, d12, #0
+	vmlal.s32	q4, d29, d19
+	vmov.i64	d11, #0
+	vmlal.s32	q5, d20, d6
+	vmlal.s32	q5, d21, d5
+	vmlal.s32	q5, d26, d4
+	vext.32		d13, d8, d8, #0
+	vmlal.s32	q5, d27, d1
+	vmov.i64	d12, #0
+	vmlal.s32	q5, d28, d19
+	vst1.8		d9, [r5, : 64]!
+	vmlal.s32	q6, d16, d25
+	vmlal.s32	q6, d17, d6
+	vst1.8		d10, [r5, : 64]
+	vmlal.s32	q6, d14, d5
+	vext.32		d8, d11, d10, #0
+	vmlal.s32	q6, d15, d4
+	vmov.i64	d9, #0
+	vmlal.s32	q6, d29, d1
+	vmlal.s32	q4, d20, d7
+	vmlal.s32	q4, d21, d6
+	vmlal.s32	q4, d26, d5
+	vext.32		d11, d12, d12, #0
+	vmlal.s32	q4, d27, d4
+	vmov.i64	d10, #0
+	vmlal.s32	q4, d28, d1
+	vmlal.s32	q5, d16, d0
+	sub		r2, r5, #32
+	vmlal.s32	q5, d17, d7
+	vmlal.s32	q5, d14, d6
+	vext.32		d30, d9, d8, #0
+	vmlal.s32	q5, d15, d5
+	vld1.8		{d31}, [r2, : 64]!
+	vmlal.s32	q5, d29, d4
+	vmlal.s32	q15, d20, d0
+	vext.32		d0, d6, d18, #1
+	vmlal.s32	q15, d21, d25
+	vrev64.i32	d0, d0
+	vmlal.s32	q15, d26, d24
+	vext.32		d1, d7, d19, #1
+	vext.32		d7, d10, d10, #0
+	vmlal.s32	q15, d27, d23
+	vrev64.i32	d1, d1
+	vld1.8		{d6}, [r2, : 64]
+	vmlal.s32	q15, d28, d22
+	vmlal.s32	q3, d16, d4
+	add		r2, r2, #24
+	vmlal.s32	q3, d17, d2
+	vext.32		d4, d31, d30, #0
+	vmov		d17, d11
+	vmlal.s32	q3, d14, d1
+	vext.32		d11, d13, d13, #0
+	vext.32		d13, d30, d30, #0
+	vmlal.s32	q3, d15, d0
+	vext.32		d1, d8, d8, #0
+	vmlal.s32	q3, d29, d3
+	vld1.8		{d5}, [r2, : 64]
+	sub		r2, r2, #16
+	vext.32		d10, d6, d6, #0
+	vmov.i32	q1, #0xffffffff
+	vshl.i64	q4, q1, #25
+	add		r5, sp, #512
+	vld1.8		{d14-d15}, [r5, : 128]
+	vadd.i64	q9, q2, q7
+	vshl.i64	q1, q1, #26
+	vshr.s64	q10, q9, #26
+	vld1.8		{d0}, [r2, : 64]!
+	vadd.i64	q5, q5, q10
+	vand		q9, q9, q1
+	vld1.8		{d16}, [r2, : 64]!
+	add		r2, sp, #528
+	vld1.8		{d20-d21}, [r2, : 128]
+	vadd.i64	q11, q5, q10
+	vsub.i64	q2, q2, q9
+	vshr.s64	q9, q11, #25
+	vext.32		d12, d5, d4, #0
+	vand		q11, q11, q4
+	vadd.i64	q0, q0, q9
+	vmov		d19, d7
+	vadd.i64	q3, q0, q7
+	vsub.i64	q5, q5, q11
+	vshr.s64	q11, q3, #26
+	vext.32		d18, d11, d10, #0
+	vand		q3, q3, q1
+	vadd.i64	q8, q8, q11
+	vadd.i64	q11, q8, q10
+	vsub.i64	q0, q0, q3
+	vshr.s64	q3, q11, #25
+	vand		q11, q11, q4
+	vadd.i64	q3, q6, q3
+	vadd.i64	q6, q3, q7
+	vsub.i64	q8, q8, q11
+	vshr.s64	q11, q6, #26
+	vand		q6, q6, q1
+	vadd.i64	q9, q9, q11
+	vadd.i64	d25, d19, d21
+	vsub.i64	q3, q3, q6
+	vshr.s64	d23, d25, #25
+	vand		q4, q12, q4
+	vadd.i64	d21, d23, d23
+	vshl.i64	d25, d23, #4
+	vadd.i64	d21, d21, d23
+	vadd.i64	d25, d25, d21
+	vadd.i64	d4, d4, d25
+	vzip.i32	q0, q8
+	vadd.i64	d12, d4, d14
+	add		r2, r6, #8
+	vst1.8		d0, [r2, : 64]
+	vsub.i64	d19, d19, d9
+	add		r2, r2, #16
+	vst1.8		d16, [r2, : 64]
+	vshr.s64	d22, d12, #26
+	vand		q0, q6, q1
+	vadd.i64	d10, d10, d22
+	vzip.i32	q3, q9
+	vsub.i64	d4, d4, d0
+	sub		r2, r2, #8
+	vst1.8		d6, [r2, : 64]
+	add		r2, r2, #16
+	vst1.8		d18, [r2, : 64]
+	vzip.i32	q2, q5
+	sub		r2, r2, #32
+	vst1.8		d4, [r2, : 64]
+	cmp		r4, #0
+	beq		._skippostcopy
+	add		r2, r3, #144
+	mov		r4, r4
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4}, [r2, : 64]
+	vst1.8		{d0-d1}, [r4, : 128]!
+	vst1.8		{d2-d3}, [r4, : 128]!
+	vst1.8		d4, [r4, : 64]
+._skippostcopy:
+	cmp		r1, #1
+	bne		._skipfinalcopy
+	add		r2, r3, #288
+	add		r4, r3, #144
+	vld1.8		{d0-d1}, [r2, : 128]!
+	vld1.8		{d2-d3}, [r2, : 128]!
+	vld1.8		{d4}, [r2, : 64]
+	vst1.8		{d0-d1}, [r4, : 128]!
+	vst1.8		{d2-d3}, [r4, : 128]!
+	vst1.8		d4, [r4, : 64]
+._skipfinalcopy:
+	add		r1, r1, #1
+	cmp		r1, #12
+	blo		._invertloop
+	add		r1, r3, #144
+	ldr		r2, [r1], #4
+	ldr		r3, [r1], #4
+	ldr		r4, [r1], #4
+	ldr		r5, [r1], #4
+	ldr		r6, [r1], #4
+	ldr		r7, [r1], #4
+	ldr		r8, [r1], #4
+	ldr		r9, [r1], #4
+	ldr		r10, [r1], #4
+	ldr		r1, [r1]
+	add		r11, r1, r1, LSL #4
+	add		r11, r11, r1, LSL #1
+	add		r11, r11, #16777216
+	mov		r11, r11, ASR #25
+	add		r11, r11, r2
+	mov		r11, r11, ASR #26
+	add		r11, r11, r3
+	mov		r11, r11, ASR #25
+	add		r11, r11, r4
+	mov		r11, r11, ASR #26
+	add		r11, r11, r5
+	mov		r11, r11, ASR #25
+	add		r11, r11, r6
+	mov		r11, r11, ASR #26
+	add		r11, r11, r7
+	mov		r11, r11, ASR #25
+	add		r11, r11, r8
+	mov		r11, r11, ASR #26
+	add		r11, r11, r9
+	mov		r11, r11, ASR #25
+	add		r11, r11, r10
+	mov		r11, r11, ASR #26
+	add		r11, r11, r1
+	mov		r11, r11, ASR #25
+	add		r2, r2, r11
+	add		r2, r2, r11, LSL #1
+	add		r2, r2, r11, LSL #4
+	mov		r11, r2, ASR #26
+	add		r3, r3, r11
+	sub		r2, r2, r11, LSL #26
+	mov		r11, r3, ASR #25
+	add		r4, r4, r11
+	sub		r3, r3, r11, LSL #25
+	mov		r11, r4, ASR #26
+	add		r5, r5, r11
+	sub		r4, r4, r11, LSL #26
+	mov		r11, r5, ASR #25
+	add		r6, r6, r11
+	sub		r5, r5, r11, LSL #25
+	mov		r11, r6, ASR #26
+	add		r7, r7, r11
+	sub		r6, r6, r11, LSL #26
+	mov		r11, r7, ASR #25
+	add		r8, r8, r11
+	sub		r7, r7, r11, LSL #25
+	mov		r11, r8, ASR #26
+	add		r9, r9, r11
+	sub		r8, r8, r11, LSL #26
+	mov		r11, r9, ASR #25
+	add		r10, r10, r11
+	sub		r9, r9, r11, LSL #25
+	mov		r11, r10, ASR #26
+	add		r1, r1, r11
+	sub		r10, r10, r11, LSL #26
+	mov		r11, r1, ASR #25
+	sub		r1, r1, r11, LSL #25
+	add		r2, r2, r3, LSL #26
+	mov		r3, r3, LSR #6
+	add		r3, r3, r4, LSL #19
+	mov		r4, r4, LSR #13
+	add		r4, r4, r5, LSL #13
+	mov		r5, r5, LSR #19
+	add		r5, r5, r6, LSL #6
+	add		r6, r7, r8, LSL #25
+	mov		r7, r8, LSR #7
+	add		r7, r7, r9, LSL #19
+	mov		r8, r9, LSR #13
+	add		r8, r8, r10, LSL #12
+	mov		r9, r10, LSR #20
+	add		r1, r9, r1, LSL #6
+	str		r2, [r0], #4
+	str		r3, [r0], #4
+	str		r4, [r0], #4
+	str		r5, [r0], #4
+	str		r6, [r0], #4
+	str		r7, [r0], #4
+	str		r8, [r0], #4
+	str		r1, [r0]
+	ldrd		r4, [sp, #0]
+	ldrd		r6, [sp, #8]
+	ldrd		r8, [sp, #16]
+	ldrd		r10, [sp, #24]
+	ldr		r12, [sp, #480]
+	ldr		r14, [sp, #484]
+	ldr		r0, =0
+	mov		sp, r12
+	vpop		{q4, q5, q6, q7}
+	bx		lr
-- 
cgit v1.2.3


From d8f1308a025fc7e00414194ed742d5f05a21e13c Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 8 Nov 2019 13:22:38 +0100
Subject: crypto: arm/curve25519 - wire up NEON implementation

This ports the SUPERCOP implementation for usage in kernel space. In
addition to the usual header, macro, and style changes required for
kernel space, it makes a few small changes to the code:

  - The stack alignment is relaxed to 16 bytes.
  - Superfluous mov statements have been removed.
  - ldr for constants has been replaced with movw.
  - ldreq has been replaced with moveq.
  - The str epilogue has been made more idiomatic.
  - SIMD registers are not pushed and popped at the beginning and end.
  - The prologue and epilogue have been made idiomatic.
  - A hole has been removed from the stack, saving 32 bytes.
  - We write-back the base register whenever possible for vld1.8.
  - Some multiplications have been reordered for better A7 performance.

There are more opportunities for cleanup, since this code is from qhasm,
which doesn't always do the most opportune thing. But even prior to
extensive hand optimizations, this code delivers significant performance
improvements (given in get_cycles() per call):

		      ----------- -------------
	             | generic C | this commit |
	 ------------ ----------- -------------
	| Cortex-A7  |     49136 |       22395 |
	 ------------ ----------- -------------
	| Cortex-A17 |     17326 |        4983 |
	 ------------ ----------- -------------

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
[ardb: - move to arch/arm/crypto
       - wire into lib/crypto framework
       - implement crypto API KPP hooks ]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Kconfig           |   6 +
 arch/arm/crypto/Makefile          |   2 +
 arch/arm/crypto/curve25519-core.S | 347 +++++++++++++++++---------------------
 arch/arm/crypto/curve25519-glue.c | 127 ++++++++++++++
 4 files changed, 287 insertions(+), 195 deletions(-)
 create mode 100644 arch/arm/crypto/curve25519-glue.c

(limited to 'arch')

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 166d32616fea..ab676229b0da 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -142,4 +142,10 @@ config CRYPTO_NHPOLY1305_NEON
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_NHPOLY1305
 
+config CRYPTO_CURVE25519_NEON
+	tristate "NEON accelerated Curve25519 scalar multiplication library"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_LIB_CURVE25519_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+
 endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index d568d699b3b7..b745c17d356f 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
 obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
+obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -38,6 +39,7 @@ chacha-neon-y := chacha-scalar-core.o chacha-glue.o
 chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
 poly1305-arm-y := poly1305-core.o poly1305-glue.o
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
+curve25519-neon-y := curve25519-core.o curve25519-glue.o
 
 ifdef REGENERATE_ARM_CRYPTO
 quiet_cmd_perl = PERL    $@
diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
index f33b85fef382..be18af52e7dc 100644
--- a/arch/arm/crypto/curve25519-core.S
+++ b/arch/arm/crypto/curve25519-core.S
@@ -1,43 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /*
- * Public domain code from Daniel J. Bernstein and Peter Schwabe, from
- * SUPERCOP's curve25519/neon2/scalarmult.s.
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
  */
 
-.fpu neon
+#include <linux/linkage.h>
+
 .text
+.fpu neon
+.arch armv7-a
 .align 4
-.global _crypto_scalarmult_curve25519_neon2
-.global crypto_scalarmult_curve25519_neon2
-.type _crypto_scalarmult_curve25519_neon2 STT_FUNC
-.type crypto_scalarmult_curve25519_neon2 STT_FUNC
-	_crypto_scalarmult_curve25519_neon2:
-	crypto_scalarmult_curve25519_neon2:
-	vpush		{q4, q5, q6, q7}
-	mov		r12, sp
-	sub		sp, sp, #736
-	and		sp, sp, #0xffffffe0
-	strd		r4, [sp, #0]
-	strd		r6, [sp, #8]
-	strd		r8, [sp, #16]
-	strd		r10, [sp, #24]
-	str		r12, [sp, #480]
-	str		r14, [sp, #484]
-	mov		r0, r0
-	mov		r1, r1
-	mov		r2, r2
-	add		r3, sp, #32
-	ldr		r4, =0
-	ldr		r5, =254
+
+ENTRY(curve25519_neon)
+	push		{r4-r11, lr}
+	mov		ip, sp
+	sub		r3, sp, #704
+	and		r3, r3, #0xfffffff0
+	mov		sp, r3
+	movw		r4, #0
+	movw		r5, #254
 	vmov.i32	q0, #1
 	vshr.u64	q1, q0, #7
 	vshr.u64	q0, q0, #8
 	vmov.i32	d4, #19
 	vmov.i32	d5, #38
-	add		r6, sp, #512
-	vst1.8		{d2-d3}, [r6, : 128]
-	add		r6, sp, #528
-	vst1.8		{d0-d1}, [r6, : 128]
-	add		r6, sp, #544
+	add		r6, sp, #480
+	vst1.8		{d2-d3}, [r6, : 128]!
+	vst1.8		{d0-d1}, [r6, : 128]!
 	vst1.8		{d4-d5}, [r6, : 128]
 	add		r6, r3, #0
 	vmov.i32	q2, #0
@@ -45,12 +37,12 @@
 	vst1.8		{d4-d5}, [r6, : 128]!
 	vst1.8		d4, [r6, : 64]
 	add		r6, r3, #0
-	ldr		r7, =960
+	movw		r7, #960
 	sub		r7, r7, #2
 	neg		r7, r7
 	sub		r7, r7, r7, LSL #7
 	str		r7, [r6]
-	add		r6, sp, #704
+	add		r6, sp, #672
 	vld1.8		{d4-d5}, [r1]!
 	vld1.8		{d6-d7}, [r1]
 	vst1.8		{d4-d5}, [r6, : 128]!
@@ -212,15 +204,15 @@
 	vst1.8		{d0-d1}, [r6, : 128]!
 	vst1.8		{d2-d3}, [r6, : 128]!
 	vst1.8		d4, [r6, : 64]
-._mainloop:
+.Lmainloop:
 	mov		r2, r5, LSR #3
 	and		r6, r5, #7
 	ldrb		r2, [r1, r2]
 	mov		r2, r2, LSR r6
 	and		r2, r2, #1
-	str		r5, [sp, #488]
+	str		r5, [sp, #456]
 	eor		r4, r4, r2
-	str		r2, [sp, #492]
+	str		r2, [sp, #460]
 	neg		r2, r4
 	add		r4, r3, #96
 	add		r5, r3, #192
@@ -291,7 +283,7 @@
 	vsub.i32	q0, q1, q3
 	vst1.8		d4, [r4, : 64]
 	vst1.8		d0, [r6, : 64]
-	add		r2, sp, #544
+	add		r2, sp, #512
 	add		r4, r3, #96
 	add		r5, r3, #144
 	vld1.8		{d0-d1}, [r2, : 128]
@@ -361,14 +353,13 @@
 	vmlal.s32	q0, d12, d8
 	vmlal.s32	q0, d13, d17
 	vmlal.s32	q0, d6, d6
-	add		r2, sp, #512
-	vld1.8		{d18-d19}, [r2, : 128]
+	add		r2, sp, #480
+	vld1.8		{d18-d19}, [r2, : 128]!
 	vmull.s32	q3, d16, d7
 	vmlal.s32	q3, d10, d15
 	vmlal.s32	q3, d11, d14
 	vmlal.s32	q3, d12, d9
 	vmlal.s32	q3, d13, d8
-	add		r2, sp, #528
 	vld1.8		{d8-d9}, [r2, : 128]
 	vadd.i64	q5, q12, q9
 	vadd.i64	q6, q15, q9
@@ -502,22 +493,19 @@
 	vadd.i32	q5, q5, q0
 	vtrn.32		q11, q14
 	vadd.i32	q6, q6, q3
-	add		r2, sp, #560
+	add		r2, sp, #528
 	vadd.i32	q10, q10, q2
 	vtrn.32		d24, d25
-	vst1.8		{d12-d13}, [r2, : 128]
+	vst1.8		{d12-d13}, [r2, : 128]!
 	vshl.i32	q6, q13, #1
-	add		r2, sp, #576
-	vst1.8		{d20-d21}, [r2, : 128]
+	vst1.8		{d20-d21}, [r2, : 128]!
 	vshl.i32	q10, q14, #1
-	add		r2, sp, #592
-	vst1.8		{d12-d13}, [r2, : 128]
+	vst1.8		{d12-d13}, [r2, : 128]!
 	vshl.i32	q15, q12, #1
 	vadd.i32	q8, q8, q4
 	vext.32		d10, d31, d30, #0
 	vadd.i32	q7, q7, q1
-	add		r2, sp, #608
-	vst1.8		{d16-d17}, [r2, : 128]
+	vst1.8		{d16-d17}, [r2, : 128]!
 	vmull.s32	q8, d18, d5
 	vmlal.s32	q8, d26, d4
 	vmlal.s32	q8, d19, d9
@@ -528,8 +516,7 @@
 	vmlal.s32	q8, d29, d1
 	vmlal.s32	q8, d24, d6
 	vmlal.s32	q8, d25, d0
-	add		r2, sp, #624
-	vst1.8		{d14-d15}, [r2, : 128]
+	vst1.8		{d14-d15}, [r2, : 128]!
 	vmull.s32	q2, d18, d4
 	vmlal.s32	q2, d12, d9
 	vmlal.s32	q2, d13, d8
@@ -537,8 +524,7 @@
 	vmlal.s32	q2, d22, d2
 	vmlal.s32	q2, d23, d1
 	vmlal.s32	q2, d24, d0
-	add		r2, sp, #640
-	vst1.8		{d20-d21}, [r2, : 128]
+	vst1.8		{d20-d21}, [r2, : 128]!
 	vmull.s32	q7, d18, d9
 	vmlal.s32	q7, d26, d3
 	vmlal.s32	q7, d19, d8
@@ -547,14 +533,12 @@
 	vmlal.s32	q7, d28, d1
 	vmlal.s32	q7, d23, d6
 	vmlal.s32	q7, d29, d0
-	add		r2, sp, #656
-	vst1.8		{d10-d11}, [r2, : 128]
+	vst1.8		{d10-d11}, [r2, : 128]!
 	vmull.s32	q5, d18, d3
 	vmlal.s32	q5, d19, d2
 	vmlal.s32	q5, d22, d1
 	vmlal.s32	q5, d23, d0
 	vmlal.s32	q5, d12, d8
-	add		r2, sp, #672
 	vst1.8		{d16-d17}, [r2, : 128]
 	vmull.s32	q4, d18, d8
 	vmlal.s32	q4, d26, d2
@@ -566,7 +550,7 @@
 	vmlal.s32	q8, d26, d1
 	vmlal.s32	q8, d19, d6
 	vmlal.s32	q8, d27, d0
-	add		r2, sp, #576
+	add		r2, sp, #544
 	vld1.8		{d20-d21}, [r2, : 128]
 	vmlal.s32	q7, d24, d21
 	vmlal.s32	q7, d25, d20
@@ -575,32 +559,30 @@
 	vmlal.s32	q8, d22, d21
 	vmlal.s32	q8, d28, d20
 	vmlal.s32	q5, d24, d20
-	add		r2, sp, #576
 	vst1.8		{d14-d15}, [r2, : 128]
 	vmull.s32	q7, d18, d6
 	vmlal.s32	q7, d26, d0
-	add		r2, sp, #656
+	add		r2, sp, #624
 	vld1.8		{d30-d31}, [r2, : 128]
 	vmlal.s32	q2, d30, d21
 	vmlal.s32	q7, d19, d21
 	vmlal.s32	q7, d27, d20
-	add		r2, sp, #624
+	add		r2, sp, #592
 	vld1.8		{d26-d27}, [r2, : 128]
 	vmlal.s32	q4, d25, d27
 	vmlal.s32	q8, d29, d27
 	vmlal.s32	q8, d25, d26
 	vmlal.s32	q7, d28, d27
 	vmlal.s32	q7, d29, d26
-	add		r2, sp, #608
+	add		r2, sp, #576
 	vld1.8		{d28-d29}, [r2, : 128]
 	vmlal.s32	q4, d24, d29
 	vmlal.s32	q8, d23, d29
 	vmlal.s32	q8, d24, d28
 	vmlal.s32	q7, d22, d29
 	vmlal.s32	q7, d23, d28
-	add		r2, sp, #608
 	vst1.8		{d8-d9}, [r2, : 128]
-	add		r2, sp, #560
+	add		r2, sp, #528
 	vld1.8		{d8-d9}, [r2, : 128]
 	vmlal.s32	q7, d24, d9
 	vmlal.s32	q7, d25, d31
@@ -621,36 +603,36 @@
 	vmlal.s32	q0, d23, d26
 	vmlal.s32	q0, d24, d31
 	vmlal.s32	q0, d19, d20
-	add		r2, sp, #640
+	add		r2, sp, #608
 	vld1.8		{d18-d19}, [r2, : 128]
 	vmlal.s32	q2, d18, d7
-	vmlal.s32	q2, d19, d6
 	vmlal.s32	q5, d18, d6
-	vmlal.s32	q5, d19, d21
 	vmlal.s32	q1, d18, d21
-	vmlal.s32	q1, d19, d29
 	vmlal.s32	q0, d18, d28
-	vmlal.s32	q0, d19, d9
 	vmlal.s32	q6, d18, d29
+	vmlal.s32	q2, d19, d6
+	vmlal.s32	q5, d19, d21
+	vmlal.s32	q1, d19, d29
+	vmlal.s32	q0, d19, d9
 	vmlal.s32	q6, d19, d28
-	add		r2, sp, #592
+	add		r2, sp, #560
 	vld1.8		{d18-d19}, [r2, : 128]
-	add		r2, sp, #512
+	add		r2, sp, #480
 	vld1.8		{d22-d23}, [r2, : 128]
 	vmlal.s32	q5, d19, d7
 	vmlal.s32	q0, d18, d21
 	vmlal.s32	q0, d19, d29
 	vmlal.s32	q6, d18, d6
-	add		r2, sp, #528
+	add		r2, sp, #496
 	vld1.8		{d6-d7}, [r2, : 128]
 	vmlal.s32	q6, d19, d21
-	add		r2, sp, #576
+	add		r2, sp, #544
 	vld1.8		{d18-d19}, [r2, : 128]
 	vmlal.s32	q0, d30, d8
-	add		r2, sp, #672
+	add		r2, sp, #640
 	vld1.8		{d20-d21}, [r2, : 128]
 	vmlal.s32	q5, d30, d29
-	add		r2, sp, #608
+	add		r2, sp, #576
 	vld1.8		{d24-d25}, [r2, : 128]
 	vmlal.s32	q1, d30, d28
 	vadd.i64	q13, q0, q11
@@ -823,22 +805,19 @@
 	vadd.i32	q5, q5, q0
 	vtrn.32		q11, q14
 	vadd.i32	q6, q6, q3
-	add		r2, sp, #560
+	add		r2, sp, #528
 	vadd.i32	q10, q10, q2
 	vtrn.32		d24, d25
-	vst1.8		{d12-d13}, [r2, : 128]
+	vst1.8		{d12-d13}, [r2, : 128]!
 	vshl.i32	q6, q13, #1
-	add		r2, sp, #576
-	vst1.8		{d20-d21}, [r2, : 128]
+	vst1.8		{d20-d21}, [r2, : 128]!
 	vshl.i32	q10, q14, #1
-	add		r2, sp, #592
-	vst1.8		{d12-d13}, [r2, : 128]
+	vst1.8		{d12-d13}, [r2, : 128]!
 	vshl.i32	q15, q12, #1
 	vadd.i32	q8, q8, q4
 	vext.32		d10, d31, d30, #0
 	vadd.i32	q7, q7, q1
-	add		r2, sp, #608
-	vst1.8		{d16-d17}, [r2, : 128]
+	vst1.8		{d16-d17}, [r2, : 128]!
 	vmull.s32	q8, d18, d5
 	vmlal.s32	q8, d26, d4
 	vmlal.s32	q8, d19, d9
@@ -849,8 +828,7 @@
 	vmlal.s32	q8, d29, d1
 	vmlal.s32	q8, d24, d6
 	vmlal.s32	q8, d25, d0
-	add		r2, sp, #624
-	vst1.8		{d14-d15}, [r2, : 128]
+	vst1.8		{d14-d15}, [r2, : 128]!
 	vmull.s32	q2, d18, d4
 	vmlal.s32	q2, d12, d9
 	vmlal.s32	q2, d13, d8
@@ -858,8 +836,7 @@
 	vmlal.s32	q2, d22, d2
 	vmlal.s32	q2, d23, d1
 	vmlal.s32	q2, d24, d0
-	add		r2, sp, #640
-	vst1.8		{d20-d21}, [r2, : 128]
+	vst1.8		{d20-d21}, [r2, : 128]!
 	vmull.s32	q7, d18, d9
 	vmlal.s32	q7, d26, d3
 	vmlal.s32	q7, d19, d8
@@ -868,15 +845,13 @@
 	vmlal.s32	q7, d28, d1
 	vmlal.s32	q7, d23, d6
 	vmlal.s32	q7, d29, d0
-	add		r2, sp, #656
-	vst1.8		{d10-d11}, [r2, : 128]
+	vst1.8		{d10-d11}, [r2, : 128]!
 	vmull.s32	q5, d18, d3
 	vmlal.s32	q5, d19, d2
 	vmlal.s32	q5, d22, d1
 	vmlal.s32	q5, d23, d0
 	vmlal.s32	q5, d12, d8
-	add		r2, sp, #672
-	vst1.8		{d16-d17}, [r2, : 128]
+	vst1.8		{d16-d17}, [r2, : 128]!
 	vmull.s32	q4, d18, d8
 	vmlal.s32	q4, d26, d2
 	vmlal.s32	q4, d19, d7
@@ -887,7 +862,7 @@
 	vmlal.s32	q8, d26, d1
 	vmlal.s32	q8, d19, d6
 	vmlal.s32	q8, d27, d0
-	add		r2, sp, #576
+	add		r2, sp, #544
 	vld1.8		{d20-d21}, [r2, : 128]
 	vmlal.s32	q7, d24, d21
 	vmlal.s32	q7, d25, d20
@@ -896,32 +871,30 @@
 	vmlal.s32	q8, d22, d21
 	vmlal.s32	q8, d28, d20
 	vmlal.s32	q5, d24, d20
-	add		r2, sp, #576
 	vst1.8		{d14-d15}, [r2, : 128]
 	vmull.s32	q7, d18, d6
 	vmlal.s32	q7, d26, d0
-	add		r2, sp, #656
+	add		r2, sp, #624
 	vld1.8		{d30-d31}, [r2, : 128]
 	vmlal.s32	q2, d30, d21
 	vmlal.s32	q7, d19, d21
 	vmlal.s32	q7, d27, d20
-	add		r2, sp, #624
+	add		r2, sp, #592
 	vld1.8		{d26-d27}, [r2, : 128]
 	vmlal.s32	q4, d25, d27
 	vmlal.s32	q8, d29, d27
 	vmlal.s32	q8, d25, d26
 	vmlal.s32	q7, d28, d27
 	vmlal.s32	q7, d29, d26
-	add		r2, sp, #608
+	add		r2, sp, #576
 	vld1.8		{d28-d29}, [r2, : 128]
 	vmlal.s32	q4, d24, d29
 	vmlal.s32	q8, d23, d29
 	vmlal.s32	q8, d24, d28
 	vmlal.s32	q7, d22, d29
 	vmlal.s32	q7, d23, d28
-	add		r2, sp, #608
 	vst1.8		{d8-d9}, [r2, : 128]
-	add		r2, sp, #560
+	add		r2, sp, #528
 	vld1.8		{d8-d9}, [r2, : 128]
 	vmlal.s32	q7, d24, d9
 	vmlal.s32	q7, d25, d31
@@ -942,36 +915,36 @@
 	vmlal.s32	q0, d23, d26
 	vmlal.s32	q0, d24, d31
 	vmlal.s32	q0, d19, d20
-	add		r2, sp, #640
+	add		r2, sp, #608
 	vld1.8		{d18-d19}, [r2, : 128]
 	vmlal.s32	q2, d18, d7
-	vmlal.s32	q2, d19, d6
 	vmlal.s32	q5, d18, d6
-	vmlal.s32	q5, d19, d21
 	vmlal.s32	q1, d18, d21
-	vmlal.s32	q1, d19, d29
 	vmlal.s32	q0, d18, d28
-	vmlal.s32	q0, d19, d9
 	vmlal.s32	q6, d18, d29
+	vmlal.s32	q2, d19, d6
+	vmlal.s32	q5, d19, d21
+	vmlal.s32	q1, d19, d29
+	vmlal.s32	q0, d19, d9
 	vmlal.s32	q6, d19, d28
-	add		r2, sp, #592
+	add		r2, sp, #560
 	vld1.8		{d18-d19}, [r2, : 128]
-	add		r2, sp, #512
+	add		r2, sp, #480
 	vld1.8		{d22-d23}, [r2, : 128]
 	vmlal.s32	q5, d19, d7
 	vmlal.s32	q0, d18, d21
 	vmlal.s32	q0, d19, d29
 	vmlal.s32	q6, d18, d6
-	add		r2, sp, #528
+	add		r2, sp, #496
 	vld1.8		{d6-d7}, [r2, : 128]
 	vmlal.s32	q6, d19, d21
-	add		r2, sp, #576
+	add		r2, sp, #544
 	vld1.8		{d18-d19}, [r2, : 128]
 	vmlal.s32	q0, d30, d8
-	add		r2, sp, #672
+	add		r2, sp, #640
 	vld1.8		{d20-d21}, [r2, : 128]
 	vmlal.s32	q5, d30, d29
-	add		r2, sp, #608
+	add		r2, sp, #576
 	vld1.8		{d24-d25}, [r2, : 128]
 	vmlal.s32	q1, d30, d28
 	vadd.i64	q13, q0, q11
@@ -1069,7 +1042,7 @@
 	sub		r4, r4, #24
 	vst1.8		d0, [r2, : 64]
 	vst1.8		d1, [r4, : 64]
-	add		r2, sp, #544
+	add		r2, sp, #512
 	add		r4, r3, #144
 	add		r5, r3, #192
 	vld1.8		{d0-d1}, [r2, : 128]
@@ -1139,14 +1112,13 @@
 	vmlal.s32	q0, d12, d8
 	vmlal.s32	q0, d13, d17
 	vmlal.s32	q0, d6, d6
-	add		r2, sp, #512
-	vld1.8		{d18-d19}, [r2, : 128]
+	add		r2, sp, #480
+	vld1.8		{d18-d19}, [r2, : 128]!
 	vmull.s32	q3, d16, d7
 	vmlal.s32	q3, d10, d15
 	vmlal.s32	q3, d11, d14
 	vmlal.s32	q3, d12, d9
 	vmlal.s32	q3, d13, d8
-	add		r2, sp, #528
 	vld1.8		{d8-d9}, [r2, : 128]
 	vadd.i64	q5, q12, q9
 	vadd.i64	q6, q15, q9
@@ -1295,22 +1267,19 @@
 	vadd.i32	q5, q5, q0
 	vtrn.32		q11, q14
 	vadd.i32	q6, q6, q3
-	add		r2, sp, #560
+	add		r2, sp, #528
 	vadd.i32	q10, q10, q2
 	vtrn.32		d24, d25
-	vst1.8		{d12-d13}, [r2, : 128]
+	vst1.8		{d12-d13}, [r2, : 128]!
 	vshl.i32	q6, q13, #1
-	add		r2, sp, #576
-	vst1.8		{d20-d21}, [r2, : 128]
+	vst1.8		{d20-d21}, [r2, : 128]!
 	vshl.i32	q10, q14, #1
-	add		r2, sp, #592
-	vst1.8		{d12-d13}, [r2, : 128]
+	vst1.8		{d12-d13}, [r2, : 128]!
 	vshl.i32	q15, q12, #1
 	vadd.i32	q8, q8, q4
 	vext.32		d10, d31, d30, #0
 	vadd.i32	q7, q7, q1
-	add		r2, sp, #608
-	vst1.8		{d16-d17}, [r2, : 128]
+	vst1.8		{d16-d17}, [r2, : 128]!
 	vmull.s32	q8, d18, d5
 	vmlal.s32	q8, d26, d4
 	vmlal.s32	q8, d19, d9
@@ -1321,8 +1290,7 @@
 	vmlal.s32	q8, d29, d1
 	vmlal.s32	q8, d24, d6
 	vmlal.s32	q8, d25, d0
-	add		r2, sp, #624
-	vst1.8		{d14-d15}, [r2, : 128]
+	vst1.8		{d14-d15}, [r2, : 128]!
 	vmull.s32	q2, d18, d4
 	vmlal.s32	q2, d12, d9
 	vmlal.s32	q2, d13, d8
@@ -1330,8 +1298,7 @@
 	vmlal.s32	q2, d22, d2
 	vmlal.s32	q2, d23, d1
 	vmlal.s32	q2, d24, d0
-	add		r2, sp, #640
-	vst1.8		{d20-d21}, [r2, : 128]
+	vst1.8		{d20-d21}, [r2, : 128]!
 	vmull.s32	q7, d18, d9
 	vmlal.s32	q7, d26, d3
 	vmlal.s32	q7, d19, d8
@@ -1340,15 +1307,13 @@
 	vmlal.s32	q7, d28, d1
 	vmlal.s32	q7, d23, d6
 	vmlal.s32	q7, d29, d0
-	add		r2, sp, #656
-	vst1.8		{d10-d11}, [r2, : 128]
+	vst1.8		{d10-d11}, [r2, : 128]!
 	vmull.s32	q5, d18, d3
 	vmlal.s32	q5, d19, d2
 	vmlal.s32	q5, d22, d1
 	vmlal.s32	q5, d23, d0
 	vmlal.s32	q5, d12, d8
-	add		r2, sp, #672
-	vst1.8		{d16-d17}, [r2, : 128]
+	vst1.8		{d16-d17}, [r2, : 128]!
 	vmull.s32	q4, d18, d8
 	vmlal.s32	q4, d26, d2
 	vmlal.s32	q4, d19, d7
@@ -1359,7 +1324,7 @@
 	vmlal.s32	q8, d26, d1
 	vmlal.s32	q8, d19, d6
 	vmlal.s32	q8, d27, d0
-	add		r2, sp, #576
+	add		r2, sp, #544
 	vld1.8		{d20-d21}, [r2, : 128]
 	vmlal.s32	q7, d24, d21
 	vmlal.s32	q7, d25, d20
@@ -1368,32 +1333,30 @@
 	vmlal.s32	q8, d22, d21
 	vmlal.s32	q8, d28, d20
 	vmlal.s32	q5, d24, d20
-	add		r2, sp, #576
 	vst1.8		{d14-d15}, [r2, : 128]
 	vmull.s32	q7, d18, d6
 	vmlal.s32	q7, d26, d0
-	add		r2, sp, #656
+	add		r2, sp, #624
 	vld1.8		{d30-d31}, [r2, : 128]
 	vmlal.s32	q2, d30, d21
 	vmlal.s32	q7, d19, d21
 	vmlal.s32	q7, d27, d20
-	add		r2, sp, #624
+	add		r2, sp, #592
 	vld1.8		{d26-d27}, [r2, : 128]
 	vmlal.s32	q4, d25, d27
 	vmlal.s32	q8, d29, d27
 	vmlal.s32	q8, d25, d26
 	vmlal.s32	q7, d28, d27
 	vmlal.s32	q7, d29, d26
-	add		r2, sp, #608
+	add		r2, sp, #576
 	vld1.8		{d28-d29}, [r2, : 128]
 	vmlal.s32	q4, d24, d29
 	vmlal.s32	q8, d23, d29
 	vmlal.s32	q8, d24, d28
 	vmlal.s32	q7, d22, d29
 	vmlal.s32	q7, d23, d28
-	add		r2, sp, #608
 	vst1.8		{d8-d9}, [r2, : 128]
-	add		r2, sp, #560
+	add		r2, sp, #528
 	vld1.8		{d8-d9}, [r2, : 128]
 	vmlal.s32	q7, d24, d9
 	vmlal.s32	q7, d25, d31
@@ -1414,36 +1377,36 @@
 	vmlal.s32	q0, d23, d26
 	vmlal.s32	q0, d24, d31
 	vmlal.s32	q0, d19, d20
-	add		r2, sp, #640
+	add		r2, sp, #608
 	vld1.8		{d18-d19}, [r2, : 128]
 	vmlal.s32	q2, d18, d7
-	vmlal.s32	q2, d19, d6
 	vmlal.s32	q5, d18, d6
-	vmlal.s32	q5, d19, d21
 	vmlal.s32	q1, d18, d21
-	vmlal.s32	q1, d19, d29
 	vmlal.s32	q0, d18, d28
-	vmlal.s32	q0, d19, d9
 	vmlal.s32	q6, d18, d29
+	vmlal.s32	q2, d19, d6
+	vmlal.s32	q5, d19, d21
+	vmlal.s32	q1, d19, d29
+	vmlal.s32	q0, d19, d9
 	vmlal.s32	q6, d19, d28
-	add		r2, sp, #592
+	add		r2, sp, #560
 	vld1.8		{d18-d19}, [r2, : 128]
-	add		r2, sp, #512
+	add		r2, sp, #480
 	vld1.8		{d22-d23}, [r2, : 128]
 	vmlal.s32	q5, d19, d7
 	vmlal.s32	q0, d18, d21
 	vmlal.s32	q0, d19, d29
 	vmlal.s32	q6, d18, d6
-	add		r2, sp, #528
+	add		r2, sp, #496
 	vld1.8		{d6-d7}, [r2, : 128]
 	vmlal.s32	q6, d19, d21
-	add		r2, sp, #576
+	add		r2, sp, #544
 	vld1.8		{d18-d19}, [r2, : 128]
 	vmlal.s32	q0, d30, d8
-	add		r2, sp, #672
+	add		r2, sp, #640
 	vld1.8		{d20-d21}, [r2, : 128]
 	vmlal.s32	q5, d30, d29
-	add		r2, sp, #608
+	add		r2, sp, #576
 	vld1.8		{d24-d25}, [r2, : 128]
 	vmlal.s32	q1, d30, d28
 	vadd.i64	q13, q0, q11
@@ -1541,10 +1504,10 @@
 	sub		r4, r4, #24
 	vst1.8		d0, [r2, : 64]
 	vst1.8		d1, [r4, : 64]
-	ldr		r2, [sp, #488]
-	ldr		r4, [sp, #492]
+	ldr		r2, [sp, #456]
+	ldr		r4, [sp, #460]
 	subs		r5, r2, #1
-	bge		._mainloop
+	bge		.Lmainloop
 	add		r1, r3, #144
 	add		r2, r3, #336
 	vld1.8		{d0-d1}, [r1, : 128]!
@@ -1553,41 +1516,41 @@
 	vst1.8		{d0-d1}, [r2, : 128]!
 	vst1.8		{d2-d3}, [r2, : 128]!
 	vst1.8		d4, [r2, : 64]
-	ldr		r1, =0
-._invertloop:
+	movw		r1, #0
+.Linvertloop:
 	add		r2, r3, #144
-	ldr		r4, =0
-	ldr		r5, =2
+	movw		r4, #0
+	movw		r5, #2
 	cmp		r1, #1
-	ldreq		r5, =1
+	moveq		r5, #1
 	addeq		r2, r3, #336
 	addeq		r4, r3, #48
 	cmp		r1, #2
-	ldreq		r5, =1
+	moveq		r5, #1
 	addeq		r2, r3, #48
 	cmp		r1, #3
-	ldreq		r5, =5
+	moveq		r5, #5
 	addeq		r4, r3, #336
 	cmp		r1, #4
-	ldreq		r5, =10
+	moveq		r5, #10
 	cmp		r1, #5
-	ldreq		r5, =20
+	moveq		r5, #20
 	cmp		r1, #6
-	ldreq		r5, =10
+	moveq		r5, #10
 	addeq		r2, r3, #336
 	addeq		r4, r3, #336
 	cmp		r1, #7
-	ldreq		r5, =50
+	moveq		r5, #50
 	cmp		r1, #8
-	ldreq		r5, =100
+	moveq		r5, #100
 	cmp		r1, #9
-	ldreq		r5, =50
+	moveq		r5, #50
 	addeq		r2, r3, #336
 	cmp		r1, #10
-	ldreq		r5, =5
+	moveq		r5, #5
 	addeq		r2, r3, #48
 	cmp		r1, #11
-	ldreq		r5, =0
+	moveq		r5, #0
 	addeq		r2, r3, #96
 	add		r6, r3, #144
 	add		r7, r3, #288
@@ -1598,8 +1561,8 @@
 	vst1.8		{d2-d3}, [r7, : 128]!
 	vst1.8		d4, [r7, : 64]
 	cmp		r5, #0
-	beq		._skipsquaringloop
-._squaringloop:
+	beq		.Lskipsquaringloop
+.Lsquaringloop:
 	add		r6, r3, #288
 	add		r7, r3, #288
 	add		r8, r3, #288
@@ -1611,7 +1574,7 @@
 	vld1.8		{d6-d7}, [r7, : 128]!
 	vld1.8		{d9}, [r7, : 64]
 	vld1.8		{d10-d11}, [r6, : 128]!
-	add		r7, sp, #416
+	add		r7, sp, #384
 	vld1.8		{d12-d13}, [r6, : 128]!
 	vmul.i32	q7, q2, q0
 	vld1.8		{d8}, [r6, : 64]
@@ -1726,7 +1689,7 @@
 	vext.32		d10, d6, d6, #0
 	vmov.i32	q1, #0xffffffff
 	vshl.i64	q4, q1, #25
-	add		r7, sp, #512
+	add		r7, sp, #480
 	vld1.8		{d14-d15}, [r7, : 128]
 	vadd.i64	q9, q2, q7
 	vshl.i64	q1, q1, #26
@@ -1735,7 +1698,7 @@
 	vadd.i64	q5, q5, q10
 	vand		q9, q9, q1
 	vld1.8		{d16}, [r6, : 64]!
-	add		r6, sp, #528
+	add		r6, sp, #496
 	vld1.8		{d20-d21}, [r6, : 128]
 	vadd.i64	q11, q5, q10
 	vsub.i64	q2, q2, q9
@@ -1789,8 +1752,8 @@
 	sub		r6, r6, #32
 	vst1.8		d4, [r6, : 64]
 	subs		r5, r5, #1
-	bhi		._squaringloop
-._skipsquaringloop:
+	bhi		.Lsquaringloop
+.Lskipsquaringloop:
 	mov		r2, r2
 	add		r5, r3, #288
 	add		r6, r3, #144
@@ -1802,7 +1765,7 @@
 	vld1.8		{d6-d7}, [r5, : 128]!
 	vld1.8		{d9}, [r5, : 64]
 	vld1.8		{d10-d11}, [r2, : 128]!
-	add		r5, sp, #416
+	add		r5, sp, #384
 	vld1.8		{d12-d13}, [r2, : 128]!
 	vmul.i32	q7, q2, q0
 	vld1.8		{d8}, [r2, : 64]
@@ -1917,7 +1880,7 @@
 	vext.32		d10, d6, d6, #0
 	vmov.i32	q1, #0xffffffff
 	vshl.i64	q4, q1, #25
-	add		r5, sp, #512
+	add		r5, sp, #480
 	vld1.8		{d14-d15}, [r5, : 128]
 	vadd.i64	q9, q2, q7
 	vshl.i64	q1, q1, #26
@@ -1926,7 +1889,7 @@
 	vadd.i64	q5, q5, q10
 	vand		q9, q9, q1
 	vld1.8		{d16}, [r2, : 64]!
-	add		r2, sp, #528
+	add		r2, sp, #496
 	vld1.8		{d20-d21}, [r2, : 128]
 	vadd.i64	q11, q5, q10
 	vsub.i64	q2, q2, q9
@@ -1980,7 +1943,7 @@
 	sub		r2, r2, #32
 	vst1.8		d4, [r2, : 64]
 	cmp		r4, #0
-	beq		._skippostcopy
+	beq		.Lskippostcopy
 	add		r2, r3, #144
 	mov		r4, r4
 	vld1.8		{d0-d1}, [r2, : 128]!
@@ -1989,9 +1952,9 @@
 	vst1.8		{d0-d1}, [r4, : 128]!
 	vst1.8		{d2-d3}, [r4, : 128]!
 	vst1.8		d4, [r4, : 64]
-._skippostcopy:
+.Lskippostcopy:
 	cmp		r1, #1
-	bne		._skipfinalcopy
+	bne		.Lskipfinalcopy
 	add		r2, r3, #288
 	add		r4, r3, #144
 	vld1.8		{d0-d1}, [r2, : 128]!
@@ -2000,10 +1963,10 @@
 	vst1.8		{d0-d1}, [r4, : 128]!
 	vst1.8		{d2-d3}, [r4, : 128]!
 	vst1.8		d4, [r4, : 64]
-._skipfinalcopy:
+.Lskipfinalcopy:
 	add		r1, r1, #1
 	cmp		r1, #12
-	blo		._invertloop
+	blo		.Linvertloop
 	add		r1, r3, #144
 	ldr		r2, [r1], #4
 	ldr		r3, [r1], #4
@@ -2085,21 +2048,15 @@
 	add		r8, r8, r10, LSL #12
 	mov		r9, r10, LSR #20
 	add		r1, r9, r1, LSL #6
-	str		r2, [r0], #4
-	str		r3, [r0], #4
-	str		r4, [r0], #4
-	str		r5, [r0], #4
-	str		r6, [r0], #4
-	str		r7, [r0], #4
-	str		r8, [r0], #4
-	str		r1, [r0]
-	ldrd		r4, [sp, #0]
-	ldrd		r6, [sp, #8]
-	ldrd		r8, [sp, #16]
-	ldrd		r10, [sp, #24]
-	ldr		r12, [sp, #480]
-	ldr		r14, [sp, #484]
-	ldr		r0, =0
-	mov		sp, r12
-	vpop		{q4, q5, q6, q7}
-	bx		lr
+	str		r2, [r0]
+	str		r3, [r0, #4]
+	str		r4, [r0, #8]
+	str		r5, [r0, #12]
+	str		r6, [r0, #16]
+	str		r7, [r0, #20]
+	str		r8, [r0, #24]
+	str		r1, [r0, #28]
+	movw		r0, #0
+	mov		sp, ip
+	pop		{r4-r11, pc}
+ENDPROC(curve25519_neon)
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
new file mode 100644
index 000000000000..2e9e12d2f642
--- /dev/null
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/internal/simd.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <crypto/curve25519.h>
+
+asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
+				const u8 secret[CURVE25519_KEY_SIZE],
+				const u8 basepoint[CURVE25519_KEY_SIZE]);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
+		     const u8 scalar[CURVE25519_KEY_SIZE],
+		     const u8 point[CURVE25519_KEY_SIZE])
+{
+	if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+		kernel_neon_begin();
+		curve25519_neon(out, scalar, point);
+		kernel_neon_end();
+	} else {
+		curve25519_generic(out, scalar, point);
+	}
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+				 unsigned int len)
+{
+	u8 *secret = kpp_tfm_ctx(tfm);
+
+	if (!len)
+		curve25519_generate_secret(secret);
+	else if (len == CURVE25519_KEY_SIZE &&
+		 crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+		memcpy(secret, buf, CURVE25519_KEY_SIZE);
+	else
+		return -EINVAL;
+	return 0;
+}
+
+static int curve25519_compute_value(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	const u8 *secret = kpp_tfm_ctx(tfm);
+	u8 public_key[CURVE25519_KEY_SIZE];
+	u8 buf[CURVE25519_KEY_SIZE];
+	int copied, nbytes;
+	u8 const *bp;
+
+	if (req->src) {
+		copied = sg_copy_to_buffer(req->src,
+					   sg_nents_for_len(req->src,
+							    CURVE25519_KEY_SIZE),
+					   public_key, CURVE25519_KEY_SIZE);
+		if (copied != CURVE25519_KEY_SIZE)
+			return -EINVAL;
+		bp = public_key;
+	} else {
+		bp = curve25519_base_point;
+	}
+
+	curve25519_arch(buf, secret, bp);
+
+	/* might want less than we've got */
+	nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
+	if (copied != nbytes)
+		return -EINVAL;
+	return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+	return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+	.base.cra_name		= "curve25519",
+	.base.cra_driver_name	= "curve25519-neon",
+	.base.cra_priority	= 200,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_ctxsize	= CURVE25519_KEY_SIZE,
+
+	.set_secret		= curve25519_set_secret,
+	.generate_public_key	= curve25519_compute_value,
+	.compute_shared_secret	= curve25519_compute_value,
+	.max_size		= curve25519_max_size,
+};
+
+static int __init mod_init(void)
+{
+	if (elf_hwcap & HWCAP_NEON) {
+		static_branch_enable(&have_neon);
+		return crypto_register_kpp(&curve25519_alg);
+	}
+	return 0;
+}
+
+static void __exit mod_exit(void)
+{
+	if (elf_hwcap & HWCAP_NEON)
+		crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-neon");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 5ed0794cde59365d4d5895b89bb2f7ef7ffdbd55 Mon Sep 17 00:00:00 2001
From: Michael Schmitz <schmitzmic@gmail.com>
Date: Wed, 6 Nov 2019 15:47:29 +1300
Subject: m68k/atari: Convert Falcon IDE drivers to platform drivers

Autoloading of Falcon IDE driver modules requires converting these
drivers to platform drivers.

Add platform device for Falcon IDE interface in Atari platform setup
code. Use this in the pata_falcon driver in place of the simple
platform device set up on the fly.

Convert falconide driver to use the same platform device that is used
by pata_falcon also. (With the introduction of a platform device for
the Atari Falcon IDE interface, the old Falcon IDE driver no longer
loads (resource already claimed by the platform device)).

Tested (as built-in driver) on my Atari Falcon.

Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Link: https://lore.kernel.org/r/1573008449-8226-1-git-send-email-schmitzmic@gmail.com
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/atari/config.c  | 27 +++++++++++++++++++++
 drivers/ata/pata_falcon.c | 42 ++++++++++++++++++++++-----------
 drivers/ide/falconide.c   | 60 +++++++++++++++++++++++++++++++----------------
 3 files changed, 95 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 73bf5ea9ee1b..7ec3161e8517 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -869,8 +869,28 @@ static const struct resource atari_scsi_tt_rsrc[] __initconst = {
 };
 #endif
 
+/*
+ * Falcon IDE interface
+ */
+
+#define FALCON_IDE_BASE	0xfff00000
+
+static const struct resource atari_falconide_rsrc[] __initconst = {
+	{
+		.flags = IORESOURCE_MEM,
+		.start = FALCON_IDE_BASE,
+		.end   = FALCON_IDE_BASE + 0x39,
+	},
+	{
+		.flags = IORESOURCE_IRQ,
+		.start = IRQ_MFP_FSCSI,
+		.end   = IRQ_MFP_FSCSI,
+	},
+};
+
 int __init atari_platform_init(void)
 {
+	struct platform_device *pdev;
 	int rv = 0;
 
 	if (!MACH_IS_ATARI)
@@ -912,6 +932,13 @@ int __init atari_platform_init(void)
 			atari_scsi_tt_rsrc, ARRAY_SIZE(atari_scsi_tt_rsrc));
 #endif
 
+	if (ATARIHW_PRESENT(IDE)) {
+		pdev = platform_device_register_simple("atari-falcon-ide", -1,
+			atari_falconide_rsrc, ARRAY_SIZE(atari_falconide_rsrc));
+		if (IS_ERR(pdev))
+			rv = PTR_ERR(pdev);
+	}
+
 	return rv;
 }
 
diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c
index 41e0d6a6cd05..27b0952fde6b 100644
--- a/drivers/ata/pata_falcon.c
+++ b/drivers/ata/pata_falcon.c
@@ -33,7 +33,6 @@
 #define DRV_NAME "pata_falcon"
 #define DRV_VERSION "0.1.0"
 
-#define ATA_HD_BASE	0xfff00000
 #define ATA_HD_CONTROL	0x39
 
 static struct scsi_host_template pata_falcon_sht = {
@@ -120,24 +119,22 @@ static struct ata_port_operations pata_falcon_ops = {
 	.set_mode	= pata_falcon_set_mode,
 };
 
-static int pata_falcon_init_one(void)
+static int __init pata_falcon_init_one(struct platform_device *pdev)
 {
+	struct resource *res;
 	struct ata_host *host;
 	struct ata_port *ap;
-	struct platform_device *pdev;
 	void __iomem *base;
 
-	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
-		return -ENODEV;
-
-	pr_info(DRV_NAME ": Atari Falcon PATA controller\n");
+	dev_info(&pdev->dev, "Atari Falcon PATA controller\n");
 
-	pdev = platform_device_register_simple(DRV_NAME, 0, NULL, 0);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
 
-	if (!devm_request_mem_region(&pdev->dev, ATA_HD_BASE, 0x40, DRV_NAME)) {
-		pr_err(DRV_NAME ": resources busy\n");
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+				     resource_size(res), DRV_NAME)) {
+		dev_err(&pdev->dev, "resources busy\n");
 		return -EBUSY;
 	}
 
@@ -152,7 +149,7 @@ static int pata_falcon_init_one(void)
 	ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY;
 	ap->flags |= ATA_FLAG_PIO_POLLING;
 
-	base = (void __iomem *)ATA_HD_BASE;
+	base = (void __iomem *)res->start;
 	ap->ioaddr.data_addr		= base;
 	ap->ioaddr.error_addr		= base + 1 + 1 * 4;
 	ap->ioaddr.feature_addr		= base + 1 + 1 * 4;
@@ -174,9 +171,26 @@ static int pata_falcon_init_one(void)
 	return ata_host_activate(host, 0, NULL, 0, &pata_falcon_sht);
 }
 
-module_init(pata_falcon_init_one);
+static int __exit pata_falcon_remove_one(struct platform_device *pdev)
+{
+	struct ata_host *host = platform_get_drvdata(pdev);
+
+	ata_host_detach(host);
+
+	return 0;
+}
+
+static struct platform_driver pata_falcon_driver = {
+	.remove = __exit_p(pata_falcon_remove_one),
+	.driver   = {
+		.name	= "atari-falcon-ide",
+	},
+};
+
+module_platform_driver_probe(pata_falcon_driver, pata_falcon_init_one);
 
 MODULE_AUTHOR("Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("low-level driver for Atari Falcon PATA");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:atari-falcon-ide");
 MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index a5a07ccb81a7..dbeb2605e5f6 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -15,6 +15,7 @@
 #include <linux/blkdev.h>
 #include <linux/ide.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
 
 #include <asm/setup.h>
 #include <asm/atarihw.h>
@@ -25,13 +26,7 @@
 #define DRV_NAME "falconide"
 
     /*
-     *  Base of the IDE interface
-     */
-
-#define ATA_HD_BASE	0xfff00000
-
-    /*
-     *  Offsets from the above base
+     *  Offsets from base address
      */
 
 #define ATA_HD_CONTROL	0x39
@@ -114,18 +109,18 @@ static const struct ide_port_info falconide_port_info = {
 	.chipset		= ide_generic,
 };
 
-static void __init falconide_setup_ports(struct ide_hw *hw)
+static void __init falconide_setup_ports(struct ide_hw *hw, unsigned long base)
 {
 	int i;
 
 	memset(hw, 0, sizeof(*hw));
 
-	hw->io_ports.data_addr = ATA_HD_BASE;
+	hw->io_ports.data_addr = base;
 
 	for (i = 1; i < 8; i++)
-		hw->io_ports_array[i] = ATA_HD_BASE + 1 + i * 4;
+		hw->io_ports_array[i] = base + 1 + i * 4;
 
-	hw->io_ports.ctl_addr = ATA_HD_BASE + ATA_HD_CONTROL;
+	hw->io_ports.ctl_addr = base + ATA_HD_CONTROL;
 
 	hw->irq = IRQ_MFP_IDE;
 }
@@ -134,23 +129,29 @@ static void __init falconide_setup_ports(struct ide_hw *hw)
      *  Probe for a Falcon IDE interface
      */
 
-static int __init falconide_init(void)
+static int __init falconide_init(struct platform_device *pdev)
 {
+	struct resource *res;
 	struct ide_host *host;
 	struct ide_hw hw, *hws[] = { &hw };
+	unsigned long base;
 	int rc;
 
-	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
-		return -ENODEV;
+	dev_info(&pdev->dev, "Atari Falcon IDE controller\n");
 
-	printk(KERN_INFO "ide: Falcon IDE controller\n");
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
 
-	if (!request_mem_region(ATA_HD_BASE, 0x40, DRV_NAME)) {
-		printk(KERN_ERR "%s: resources busy\n", DRV_NAME);
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+				     resource_size(res), DRV_NAME)) {
+		dev_err(&pdev->dev, "resources busy\n");
 		return -EBUSY;
 	}
 
-	falconide_setup_ports(&hw);
+	base = (unsigned long)res->start;
+
+	falconide_setup_ports(&hw, base);
 
 	host = ide_host_alloc(&falconide_port_info, hws, 1);
 	if (host == NULL) {
@@ -169,10 +170,29 @@ static int __init falconide_init(void)
 err_free:
 	ide_host_free(host);
 err:
-	release_mem_region(ATA_HD_BASE, 0x40);
+	release_mem_region(res->start, resource_size(res));
 	return rc;
 }
 
-module_init(falconide_init);
+static int falconide_remove(struct platform_device *pdev)
+{
+	struct ide_host *host = dev_get_drvdata(&pdev->dev);
+
+	ide_host_remove(host);
+
+	return 0;
+}
+
+static struct platform_driver ide_falcon_driver = {
+	.remove = falconide_remove,
+	.driver   = {
+		.name	= "atari-falcon-ide",
+	},
+};
+
+module_platform_driver_probe(ide_falcon_driver, falconide_init);
 
+MODULE_AUTHOR("Geert Uytterhoeven");
+MODULE_DESCRIPTION("low-level driver for Atari Falcon IDE");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:atari-falcon-ide");
-- 
cgit v1.2.3


From 4e9b4a6883dd97aff53ae3b08eb900716a5469dc Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 18 Nov 2019 19:03:35 +0100
Subject: s390/bpf: Use relative long branches

Currently maximum JITed code size is limited to 64k, because JIT can
emit only relative short branches, whose range is limited by 64k in both
directions.

Teach JIT to use relative long branches. There are no compare+branch
relative long instructions, so using relative long branches consumes
more space due to having to having to emit an explicit comparison
instruction. Therefore do this only when relative short branch is not
enough.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20191118180340.68373-2-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 158 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 126 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 7bddb27c81e3..5ee1ebc6e448 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -189,6 +189,12 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 	_EMIT4((op) | __pcrel);					\
 })
 
+#define EMIT4_PCREL_RIC(op, mask, target)			\
+({								\
+	int __rel = ((target) - jit->prg) / 2;			\
+	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
+})
+
 #define _EMIT6(op1, op2)					\
 ({								\
 	if (jit->prg_buf) {					\
@@ -250,17 +256,22 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 
 #define EMIT6_PCREL_RILB(op, b, target)				\
 ({								\
-	int rel = ((target) - jit->prg) / 2;			\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
 	_EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
 	REG_SET_SEEN(b);					\
 })
 
 #define EMIT6_PCREL_RIL(op, target)				\
 ({								\
-	int rel = ((target) - jit->prg) / 2;			\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
 	_EMIT6((op) | rel >> 16, rel & 0xffff);			\
 })
 
+#define EMIT6_PCREL_RILC(op, mask, target)			\
+({								\
+	EMIT6_PCREL_RIL((op) | (mask) << 20, (target));		\
+})
+
 #define _EMIT6_IMM(op, imm)					\
 ({								\
 	unsigned int __imm = (imm);				\
@@ -322,6 +333,22 @@ static bool is_codegen_pass(struct bpf_jit *jit)
 	return jit->prg_buf;
 }
 
+/*
+ * Return whether "rel" can be encoded as a short PC-relative offset
+ */
+static bool is_valid_rel(int rel)
+{
+	return rel >= -65536 && rel <= 65534;
+}
+
+/*
+ * Return whether "off" can be reached using a short PC-relative offset
+ */
+static bool can_use_rel(struct bpf_jit *jit, int off)
+{
+	return is_valid_rel(off - jit->prg);
+}
+
 /*
  * Fill whole space with illegal instructions
  */
@@ -525,9 +552,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 				 int i, bool extra_pass)
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
-	int jmp_off, last, insn_count = 1;
 	u32 dst_reg = insn->dst_reg;
 	u32 src_reg = insn->src_reg;
+	int last, insn_count = 1;
 	u32 *addrs = jit->addrs;
 	s32 imm = insn->imm;
 	s16 off = insn->off;
@@ -1071,9 +1098,17 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		/* llgf %w1,map.max_entries(%b2) */
 		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
 			      offsetof(struct bpf_array, map.max_entries));
-		/* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */
-		EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
-				  REG_W1, 0, 0xa);
+		/* if ((u32)%b3 >= (u32)%w1) goto out; */
+		if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+			/* clrj %b3,%w1,0xa,label0 */
+			EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
+					  REG_W1, 0, 0xa);
+		} else {
+			/* clr %b3,%w1 */
+			EMIT2(0x1500, BPF_REG_3, REG_W1);
+			/* brcl 0xa,label0 */
+			EMIT6_PCREL_RILC(0xc0040000, 0xa, jit->labels[0]);
+		}
 
 		/*
 		 * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
@@ -1088,9 +1123,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT4_IMM(0xa7080000, REG_W0, 1);
 		/* laal %w1,%w0,off(%r15) */
 		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
-		/* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
-		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
-				      MAX_TAIL_CALL_CNT, 0, 0x2);
+		if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+			/* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
+			EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
+					      MAX_TAIL_CALL_CNT, 0, 0x2);
+		} else {
+			/* clfi %w1,MAX_TAIL_CALL_CNT */
+			EMIT6_IMM(0xc20f0000, REG_W1, MAX_TAIL_CALL_CNT);
+			/* brcl 0x2,label0 */
+			EMIT6_PCREL_RILC(0xc0040000, 0x2, jit->labels[0]);
+		}
 
 		/*
 		 * prog = array->ptrs[index];
@@ -1102,11 +1144,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT4(0xb9160000, REG_1, BPF_REG_3);
 		/* sllg %r1,%r1,3: %r1 *= 8 */
 		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
-		/* lg %r1,prog(%b2,%r1) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+		/* ltg %r1,prog(%b2,%r1) */
+		EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
 			      REG_1, offsetof(struct bpf_array, ptrs));
-		/* clgij %r1,0,0x8,label0 */
-		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+		if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+			/* brc 0x8,label0 */
+			EMIT4_PCREL_RIC(0xa7040000, 0x8, jit->labels[0]);
+		} else {
+			/* brcl 0x8,label0 */
+			EMIT6_PCREL_RILC(0xc0040000, 0x8, jit->labels[0]);
+		}
 
 		/*
 		 * Restore registers before calling function
@@ -1263,36 +1310,83 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		goto branch_oc;
 branch_ks:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* lgfi %w1,imm (load sign extend imm) */
-		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* crj or cgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
-			    dst_reg, REG_W1, i, off, mask);
+		/* cfi or cgfi %dst,imm */
+		EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
+			  dst_reg, imm);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_ku:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* lgfi %w1,imm (load sign extend imm) */
-		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* clrj or clgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
-			    dst_reg, REG_W1, i, off, mask);
+		/* clfi or clgfi %dst,imm */
+		EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
+			  dst_reg, imm);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_xs:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* crj or cgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
-			    dst_reg, src_reg, i, off, mask);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* crj or cgrj %dst,%src,mask,off */
+			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+				    dst_reg, src_reg, i, off, mask);
+		} else {
+			/* cr or cgr %dst,%src */
+			if (is_jmp32)
+				EMIT2(0x1900, dst_reg, src_reg);
+			else
+				EMIT4(0xb9200000, dst_reg, src_reg);
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_xu:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* clrj or clgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
-			    dst_reg, src_reg, i, off, mask);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* clrj or clgrj %dst,%src,mask,off */
+			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+				    dst_reg, src_reg, i, off, mask);
+		} else {
+			/* clr or clgr %dst,%src */
+			if (is_jmp32)
+				EMIT2(0x1500, dst_reg, src_reg);
+			else
+				EMIT4(0xb9210000, dst_reg, src_reg);
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_oc:
-		/* brc mask,jmp_off (branch instruction needs 4 bytes) */
-		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
-		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 	}
 	default: /* too complex, give up */
-- 
cgit v1.2.3


From e0491f64795bfc71ef6b13ba6b6fa6e176fa3c23 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 18 Nov 2019 19:03:36 +0100
Subject: s390/bpf: Align literal pool entries

When literal pool size exceeds 512k, it's no longer possible to
reference all the entries in it using a single base register and long
displacement. Therefore, PC-relative lgfrl and lgrl instructions need to
be used.

Unfortunately, they require their arguments to be aligned to 4- and
8-byte boundaries respectively. This generates certain overhead due to
necessary padding bytes. Grouping 4- and 8-byte entries together reduces
the maximum overhead to 6 bytes (2 for aligning 4-byte entries and 4 for
aligning 8-byte entries).

While in theory it is possible to detect whether or not alignment is
needed by comparing the literal pool size with 512k, in practice this
leads to having two ways of emitting constants, making the code more
complicated.

Prefer code simplicity over trivial size saving, and always group and
align literal pool entries.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20191118180340.68373-3-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 5ee1ebc6e448..bb0215d290f4 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/bpf.h>
 #include <linux/mm.h>
+#include <linux/kernel.h>
 #include <asm/cacheflush.h>
 #include <asm/dis.h>
 #include <asm/facility.h>
@@ -39,8 +40,10 @@ struct bpf_jit {
 	int size;		/* Size of program and literal pool */
 	int size_prg;		/* Size of program */
 	int prg;		/* Current position in program */
-	int lit_start;		/* Start of literal pool */
-	int lit;		/* Current position in literal pool */
+	int lit32_start;	/* Start of 32-bit literal pool */
+	int lit32;		/* Current position in 32-bit literal pool */
+	int lit64_start;	/* Start of 64-bit literal pool */
+	int lit64;		/* Current position in 64-bit literal pool */
 	int base_ip;		/* Base address for literal pool */
 	int exit_ip;		/* Address of exit */
 	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
@@ -287,22 +290,22 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 #define EMIT_CONST_U32(val)					\
 ({								\
 	unsigned int ret;					\
-	ret = jit->lit - jit->base_ip;				\
+	ret = jit->lit32 - jit->base_ip;			\
 	jit->seen |= SEEN_LITERAL;				\
 	if (jit->prg_buf)					\
-		*(u32 *) (jit->prg_buf + jit->lit) = (u32) (val);\
-	jit->lit += 4;						\
+		*(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
+	jit->lit32 += 4;					\
 	ret;							\
 })
 
 #define EMIT_CONST_U64(val)					\
 ({								\
 	unsigned int ret;					\
-	ret = jit->lit - jit->base_ip;				\
+	ret = jit->lit64 - jit->base_ip;			\
 	jit->seen |= SEEN_LITERAL;				\
 	if (jit->prg_buf)					\
-		*(u64 *) (jit->prg_buf + jit->lit) = (u64) (val);\
-	jit->lit += 8;						\
+		*(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
+	jit->lit64 += 8;					\
 	ret;							\
 })
 
@@ -1430,9 +1433,10 @@ static int bpf_set_addr(struct bpf_jit *jit, int i)
 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
 			bool extra_pass)
 {
-	int i, insn_count;
+	int i, insn_count, lit32_size, lit64_size;
 
-	jit->lit = jit->lit_start;
+	jit->lit32 = jit->lit32_start;
+	jit->lit64 = jit->lit64_start;
 	jit->prg = 0;
 
 	bpf_jit_prologue(jit, fp->aux->stack_depth);
@@ -1448,8 +1452,15 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
 	}
 	bpf_jit_epilogue(jit, fp->aux->stack_depth);
 
-	jit->lit_start = jit->prg;
-	jit->size = jit->lit;
+	lit32_size = jit->lit32 - jit->lit32_start;
+	lit64_size = jit->lit64 - jit->lit64_start;
+	jit->lit32_start = jit->prg;
+	if (lit32_size)
+		jit->lit32_start = ALIGN(jit->lit32_start, 4);
+	jit->lit64_start = jit->lit32_start + lit32_size;
+	if (lit64_size)
+		jit->lit64_start = ALIGN(jit->lit64_start, 8);
+	jit->size = jit->lit64_start + lit64_size;
 	jit->size_prg = jit->prg;
 	return 0;
 }
@@ -1535,7 +1546,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		goto free_addrs;
 	}
 
-	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole);
 	if (!header) {
 		fp = orig_fp;
 		goto free_addrs;
-- 
cgit v1.2.3


From c1aff5682da2977c26fc087cf6a28e31a430174b Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 18 Nov 2019 19:03:37 +0100
Subject: s390/bpf: Load literal pool register using larl

Currently literal pool register is loaded using basr, which makes it
point not to the beginning of the literal pool, but rather to the next
instruction. In case JITed code is larger than 512k, this renders
literal pool register absolutely useless due to long displacement range
restrictions.

The solution is to use larl to make literal pool register point to the
very beginning of the literal pool. This makes it always possible to
address 512k worth of literal pool entries using long displacement.

However, for short programs, in which the entire literal pool is covered
by basr-generated base, it is still beneficial to use basr, since it is
4 bytes shorter than larl.

Detect situations when basr-generated base does not cover the entire
literal pool, and in such cases use larl instead.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20191118180340.68373-4-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bb0215d290f4..964a09fd10f1 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -352,6 +352,15 @@ static bool can_use_rel(struct bpf_jit *jit, int off)
 	return is_valid_rel(off - jit->prg);
 }
 
+/*
+ * Return whether given displacement can be encoded using
+ * Long-Displacement Facility
+ */
+static bool is_valid_ldisp(int disp)
+{
+	return disp >= -524288 && disp <= 524287;
+}
+
 /*
  * Fill whole space with illegal instructions
  */
@@ -476,9 +485,16 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 	save_restore_regs(jit, REGS_SAVE, stack_depth);
 	/* Setup literal pool */
 	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
-		/* basr %r13,0 */
-		EMIT2(0x0d00, REG_L, REG_0);
-		jit->base_ip = jit->prg;
+		if (!is_first_pass(jit) &&
+		    is_valid_ldisp(jit->size - (jit->prg + 2))) {
+			/* basr %l,0 */
+			EMIT2(0x0d00, REG_L, REG_0);
+			jit->base_ip = jit->prg;
+		} else {
+			/* larl %l,lit32_start */
+			EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
+			jit->base_ip = jit->lit32_start;
+		}
 	}
 	/* Setup stack and backchain */
 	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
-- 
cgit v1.2.3


From 451e448ff4bb137da3d4b8b26a8260a2ff66869a Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 18 Nov 2019 19:03:38 +0100
Subject: s390/bpf: Use lgrl instead of lg where possible

lg and lgrl have the same performance characteristics, but the former
requires a base register and is subject to long displacement range
limits, while the latter does not. Therefore, lgrl is totally superior
to lg and should be used instead whenever possible.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20191118180340.68373-5-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 964a09fd10f1..6b3f85e4c5b0 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -287,28 +287,38 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 	REG_SET_SEEN(b1);					\
 })
 
-#define EMIT_CONST_U32(val)					\
+#define _EMIT_CONST_U32(val)					\
 ({								\
 	unsigned int ret;					\
-	ret = jit->lit32 - jit->base_ip;			\
-	jit->seen |= SEEN_LITERAL;				\
+	ret = jit->lit32;					\
 	if (jit->prg_buf)					\
 		*(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
 	jit->lit32 += 4;					\
 	ret;							\
 })
 
-#define EMIT_CONST_U64(val)					\
+#define EMIT_CONST_U32(val)					\
 ({								\
-	unsigned int ret;					\
-	ret = jit->lit64 - jit->base_ip;			\
 	jit->seen |= SEEN_LITERAL;				\
+	_EMIT_CONST_U32(val) - jit->base_ip;			\
+})
+
+#define _EMIT_CONST_U64(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit64;					\
 	if (jit->prg_buf)					\
 		*(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
 	jit->lit64 += 8;					\
 	ret;							\
 })
 
+#define EMIT_CONST_U64(val)					\
+({								\
+	jit->seen |= SEEN_LITERAL;				\
+	_EMIT_CONST_U64(val) - jit->base_ip;			\
+})
+
 #define EMIT_ZERO(b1)						\
 ({								\
 	if (!fp->aux->verifier_zext) {				\
@@ -612,9 +622,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		u64 imm64;
 
 		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
-		/* lg %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm64));
+		/* lgrl %dst,imm */
+		EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
 		insn_count = 2;
 		break;
 	}
@@ -1086,9 +1095,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 
 		REG_SET_SEEN(BPF_REG_5);
 		jit->seen |= SEEN_FUNC;
-		/* lg %w1,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
-			      EMIT_CONST_U64(func));
+		/* lgrl %w1,func */
+		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
 		if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
 			/* brasl %r14,__s390_indirect_jump_r1 */
 			EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
-- 
cgit v1.2.3


From b25c57b6b7dda3799aaebc5f463776e4a0555927 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 18 Nov 2019 19:03:39 +0100
Subject: s390/bpf: Use lg(f)rl when long displacement cannot be used

If literal pool grows past 524287 mark, it's no longer possible to use
long displacement to reference literal pool entries. In JIT setting
maintaining multiple literal pool registers is next to impossible, since
we operate on one instruction at a time.

Therefore, fall back to loading literal pool entry using PC-relative
addressing, and then using a register-register form of the following
machine instruction.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20191118180340.68373-6-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 96 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 81 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 6b3f85e4c5b0..3398cd939496 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -371,6 +371,24 @@ static bool is_valid_ldisp(int disp)
 	return disp >= -524288 && disp <= 524287;
 }
 
+/*
+ * Return whether the next 32-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
+{
+	return is_valid_ldisp(jit->lit32 - jit->base_ip);
+}
+
+/*
+ * Return whether the next 64-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
+{
+	return is_valid_ldisp(jit->lit64 - jit->base_ip);
+}
+
 /*
  * Fill whole space with illegal instructions
  */
@@ -752,9 +770,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT4_IMM(0xa7080000, REG_W0, 0);
 		/* lr %w1,%dst */
 		EMIT2(0x1800, REG_W1, dst_reg);
-		/* dl %w0,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
-			      EMIT_CONST_U32(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
+			/* dl %w0,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+				      EMIT_CONST_U32(imm));
+		} else {
+			/* lgfrl %dst,imm */
+			EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+					 _EMIT_CONST_U32(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* dlr %w0,%dst */
+			EMIT4(0xb9970000, REG_W0, dst_reg);
+		}
 		/* llgfr %dst,%rc */
 		EMIT4(0xb9160000, dst_reg, rc_reg);
 		if (insn_is_zext(&insn[1]))
@@ -776,9 +803,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT4_IMM(0xa7090000, REG_W0, 0);
 		/* lgr %w1,%dst */
 		EMIT4(0xb9040000, REG_W1, dst_reg);
-		/* dlg %w0,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* dlg %w0,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %dst,imm */
+			EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* dlgr %w0,%dst */
+			EMIT4(0xb9870000, REG_W0, dst_reg);
+		}
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
 		break;
@@ -801,9 +837,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT_ZERO(dst_reg);
 		break;
 	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
-		/* ng %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* ng %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0080,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* ngr %dst,%w0 */
+			EMIT4(0xb9800000, dst_reg, REG_W0);
+		}
 		break;
 	/*
 	 * BPF_OR
@@ -823,9 +869,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT_ZERO(dst_reg);
 		break;
 	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
-		/* og %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* og %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0081,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* ogr %dst,%w0 */
+			EMIT4(0xb9810000, dst_reg, REG_W0);
+		}
 		break;
 	/*
 	 * BPF_XOR
@@ -847,9 +903,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		EMIT_ZERO(dst_reg);
 		break;
 	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
-		/* xg %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* xg %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0082,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* xgr %dst,%w0 */
+			EMIT4(0xb9820000, dst_reg, REG_W0);
+		}
 		break;
 	/*
 	 * BPF_LSH
-- 
cgit v1.2.3


From d1242b10ff03a40ae095e6dd54aac4a6f0f547d5 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 18 Nov 2019 19:03:40 +0100
Subject: s390/bpf: Remove JITed image size limitations

Now that jump and long displacement ranges are no longer a problem,
remove the limit on JITed image size. In practice it's still limited by
2G, but with verifier allowing "only" 1M instructions, it's not an
issue.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20191118180340.68373-7-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 3398cd939496..8d2134136290 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -52,8 +52,6 @@ struct bpf_jit {
 	int labels[1];		/* Labels for local jumps */
 };
 
-#define BPF_SIZE_MAX	0xffff	/* Max size for program (16 bit branches) */
-
 #define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
 #define SEEN_LITERAL	BIT(1)		/* code uses literals */
 #define SEEN_FUNC	BIT(2)		/* calls C functions */
@@ -1631,11 +1629,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	/*
 	 * Final pass: Allocate and generate program
 	 */
-	if (jit.size >= BPF_SIZE_MAX) {
-		fp = orig_fp;
-		goto free_addrs;
-	}
-
 	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole);
 	if (!header) {
 		fp = orig_fp;
-- 
cgit v1.2.3


From 0398d4ab1677f7d8cd43aac2aa29a93dfcf9e2e3 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 14 Nov 2019 15:30:05 +0800
Subject: s390/crypto: Fix unsigned variable compared with zero

s390_crypto_shash_parmsize() return type is int, it
should not be stored in a unsigned variable, which
compared with zero.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: 3c2eb6b76cab ("s390/crypto: Support for SHA3 via CPACF (MSA6)")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Joerg Schmidbauer <jschmidb@linux.vnet.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/crypto/sha_common.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index d39e0f079217..686fe7aa192f 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -74,14 +74,17 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
 	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
 	u64 bits;
-	unsigned int n, mbl_offset;
+	unsigned int n;
+	int mbl_offset;
 
 	n = ctx->count % bsize;
 	bits = ctx->count * 8;
-	mbl_offset = s390_crypto_shash_parmsize(ctx->func) / sizeof(u32);
+	mbl_offset = s390_crypto_shash_parmsize(ctx->func);
 	if (mbl_offset < 0)
 		return -EINVAL;
 
+	mbl_offset = mbl_offset / sizeof(u32);
+
 	/* set total msg bit length (mbl) in CPACF parmblock */
 	switch (ctx->func) {
 	case CPACF_KLMD_SHA_1:
-- 
cgit v1.2.3


From 13f9bae579c6bd051e58f326913dd09af1291208 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Tue, 5 Nov 2019 17:33:20 +0100
Subject: s390/kasan: support memcpy_real with TRACE_IRQFLAGS

Currently if the kernel is built with CONFIG_TRACE_IRQFLAGS and KASAN
and used as crash kernel it crashes itself due to
trace_hardirqs_off/trace_hardirqs_on being called with DAT off. This
happens because trace_hardirqs_off/trace_hardirqs_on are instrumented and
kasan code tries to perform access to shadow memory to validate memory
accesses. Kasan shadow memory is populated with vmemmap, so all accesses
require DAT on.

memcpy_real could be called with DAT on or off (with kasan enabled DAT
is set even before early code is executed).

Make sure that trace_hardirqs_off/trace_hardirqs_on are called with DAT
on and only actual __memcpy_real is called with DAT off.

Also annotate __memcpy_real and _memcpy_real with __no_sanitize_address
to avoid further problems due to switching DAT off.

Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/mm/maccess.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 1864a8bb9622..59ad7997fed1 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -70,7 +70,7 @@ void notrace s390_kernel_write(void *dst, const void *src, size_t size)
 	spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
 }
 
-static int __memcpy_real(void *dest, void *src, size_t count)
+static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
 {
 	register unsigned long _dest asm("2") = (unsigned long) dest;
 	register unsigned long _len1 asm("3") = (unsigned long) count;
@@ -91,19 +91,23 @@ static int __memcpy_real(void *dest, void *src, size_t count)
 	return rc;
 }
 
-static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
-				  unsigned long count)
+static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
+							unsigned long src,
+							unsigned long count)
 {
 	int irqs_disabled, rc;
 	unsigned long flags;
 
 	if (!count)
 		return 0;
-	flags = __arch_local_irq_stnsm(0xf8UL);
+	flags = arch_local_irq_save();
 	irqs_disabled = arch_irqs_disabled_flags(flags);
 	if (!irqs_disabled)
 		trace_hardirqs_off();
+	__arch_local_irq_stnsm(0xf8); // disable DAT
 	rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
+	if (flags & PSW_MASK_DAT)
+		__arch_local_irq_stosm(0x04); // enable DAT
 	if (!irqs_disabled)
 		trace_hardirqs_on();
 	__arch_local_irq_ssm(flags);
-- 
cgit v1.2.3


From c02ee6a16a260ae00a403be440e229fd8618486b Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Tue, 12 Nov 2019 17:35:28 +0100
Subject: s390/early: move control registers setup in C code

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/ctl_reg.h |  1 +
 arch/s390/kernel/early.c        | 12 ++++++++++++
 arch/s390/kernel/head64.S       |  6 ------
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 60f907516335..ed5efbb531c4 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -11,6 +11,7 @@
 #include <linux/bits.h>
 
 #define CR0_CLOCK_COMPARATOR_SIGN	BIT(63 - 10)
+#define CR0_LOW_ADDRESS_PROTECTION	BIT(63 - 35)
 #define CR0_EMERGENCY_SIGNAL_SUBMASK	BIT(63 - 49)
 #define CR0_EXTERNAL_CALL_SUBMASK	BIT(63 - 50)
 #define CR0_CLOCK_COMPARATOR_SUBMASK	BIT(63 - 52)
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 2e99e01e4f62..0ed6ae6a8bde 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -260,6 +260,17 @@ static inline void save_vector_registers(void)
 #endif
 }
 
+static inline void setup_control_registers(void)
+{
+	unsigned long reg;
+
+	__ctl_store(reg, 0, 0);
+	reg |= CR0_LOW_ADDRESS_PROTECTION;
+	reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
+	reg |= CR0_EXTERNAL_CALL_SUBMASK;
+	__ctl_load(reg, 0, 0);
+}
+
 static int __init disable_vector_extension(char *str)
 {
 	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
@@ -317,5 +328,6 @@ void __init startup_init(void)
 	save_vector_registers();
 	setup_topology();
 	sclp_early_detect();
+	setup_control_registers();
 	lockdep_on();
 }
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 0d9ee198f4eb..5b21702ea080 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -42,12 +42,6 @@ ENTRY(startup_continue)
 #
 	brasl	%r14,startup_init
 
-# check control registers
-	stctg	%c0,%c15,0(%r15)
-	oi	6(%r15),0x60		# enable sigp emergency & external call
-	oi	4(%r15),0x10		# switch on low address proctection
-	lctlg	%c0,%c15,0(%r15)
-
 	lam	0,15,.Laregs-.LPG1(%r13)	# load acrs needed by uaccess
 	brasl	%r14,start_kernel		# go to C code
 #
-- 
cgit v1.2.3


From b8ce1fa4892cccff0576827c8fd7fcad1698de3b Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 13 Nov 2019 11:22:30 +0100
Subject: s390/head64: remove unnecessary vdso_per_cpu_data setup

vdso_per_cpu_data lowcore value is only needed for fully functional
exception handlers, which are activated in setup_lowcore_dat_off. The
same function does init vdso_per_cpu_data via vdso_alloc_boot_cpu.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/head64.S | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 5b21702ea080..9d649abddffd 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,8 +26,6 @@ ENTRY(startup_continue)
 0:	larl	%r1,tod_clock_base
 	mvc	0(16,%r1),__LC_BOOT_CLOCK
 	larl	%r13,.LPG1		# get base
-	larl	%r0,boot_vdso_data
-	stg	%r0,__LC_VDSO_PER_CPU
 #
 # Setup stack
 #
-- 
cgit v1.2.3


From c2313594216b3fde9559e502bb36d14e9d601a56 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Thu, 14 Nov 2019 14:08:28 +0100
Subject: s390/early: move access registers setup in C code

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/early.c  |  9 +++++++++
 arch/s390/kernel/head64.S | 10 ++--------
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 0ed6ae6a8bde..db32a55daaec 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,6 +30,7 @@
 #include <asm/sclp.h>
 #include <asm/facility.h>
 #include <asm/boot_data.h>
+#include <asm/switch_to.h>
 #include "entry.h"
 
 static void __init reset_tod_clock(void)
@@ -271,6 +272,13 @@ static inline void setup_control_registers(void)
 	__ctl_load(reg, 0, 0);
 }
 
+static inline void setup_access_registers(void)
+{
+	unsigned int acrs[NUM_ACRS] = { 0 };
+
+	restore_access_regs(acrs);
+}
+
 static int __init disable_vector_extension(char *str)
 {
 	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
@@ -329,5 +337,6 @@ void __init startup_init(void)
 	setup_topology();
 	sclp_early_detect();
 	setup_control_registers();
+	setup_access_registers();
 	lockdep_on();
 }
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 9d649abddffd..b9e585f528a6 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -35,13 +35,8 @@ ENTRY(startup_continue)
 #ifdef CONFIG_KASAN
 	brasl	%r14,kasan_early_init
 #endif
-#
-# Early machine initialization and detection functions.
-#
-	brasl	%r14,startup_init
-
-	lam	0,15,.Laregs-.LPG1(%r13)	# load acrs needed by uaccess
-	brasl	%r14,start_kernel		# go to C code
+	brasl	%r14,startup_init		# s390 specific early init
+	brasl	%r14,start_kernel		# common init code
 #
 # We returned from start_kernel ?!? PANIK
 #
@@ -51,4 +46,3 @@ ENTRY(startup_continue)
 	.align	16
 .LPG1:
 .Ldw:	.quad	0x0002000180000000,0x0000000000000000
-.Laregs:.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-- 
cgit v1.2.3


From 72a81ad9d6d62dcb79f7e8ad66ffd1c768b72026 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sun, 17 Nov 2019 14:55:38 +0100
Subject: s390/smp: fix physical to logical CPU map for SMT

If an SMT capable system is not IPL'ed from the first CPU the setup of
the physical to logical CPU mapping is broken: the IPL core gets CPU
number 0, but then the next core gets CPU number 1. Correct would be
that all SMT threads of CPU 0 get the subsequent logical CPU numbers.

This is important since a lot of code (like e.g. the CPU topology
code) assumes that CPU maps are setup like this. If the mapping is
broken the system will not IPL due to broken topology masks:

[    1.716341] BUG: arch topology broken
[    1.716342]      the SMT domain not a subset of the MC domain
[    1.716343] BUG: arch topology broken
[    1.716344]      the MC domain not a subset of the BOOK domain

This scenario can usually not happen since LPARs are always IPL'ed
from CPU 0 and also re-IPL is intiated from CPU 0. However older
kernels did initiate re-IPL on an arbitrary CPU. If therefore a re-IPL
from an old kernel into a new kernel is initiated this may lead to
crash.

Fix this by setting up the physical to logical CPU mapping correctly.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/smp.c | 80 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 44974654cbd0..6acdcf1d4074 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -724,39 +724,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
 
 static int smp_add_present_cpu(int cpu);
 
-static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
+static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
+			bool configured, bool early)
 {
 	struct pcpu *pcpu;
-	cpumask_t avail;
-	int cpu, nr, i, j;
+	int cpu, nr, i;
 	u16 address;
 
 	nr = 0;
-	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
-	cpu = cpumask_first(&avail);
-	for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
-		if (sclp.has_core_type && info->core[i].type != boot_core_type)
+	if (sclp.has_core_type && core->type != boot_core_type)
+		return nr;
+	cpu = cpumask_first(avail);
+	address = core->core_id << smp_cpu_mt_shift;
+	for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
+		if (pcpu_find_address(cpu_present_mask, address + i))
 			continue;
-		address = info->core[i].core_id << smp_cpu_mt_shift;
-		for (j = 0; j <= smp_cpu_mtid; j++) {
-			if (pcpu_find_address(cpu_present_mask, address + j))
-				continue;
-			pcpu = pcpu_devices + cpu;
-			pcpu->address = address + j;
-			pcpu->state =
-				(cpu >= info->configured*(smp_cpu_mtid + 1)) ?
-				CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
-			smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
-			set_cpu_present(cpu, true);
-			if (sysfs_add && smp_add_present_cpu(cpu) != 0)
-				set_cpu_present(cpu, false);
-			else
-				nr++;
-			cpu = cpumask_next(cpu, &avail);
-			if (cpu >= nr_cpu_ids)
+		pcpu = pcpu_devices + cpu;
+		pcpu->address = address + i;
+		if (configured)
+			pcpu->state = CPU_STATE_CONFIGURED;
+		else
+			pcpu->state = CPU_STATE_STANDBY;
+		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+		set_cpu_present(cpu, true);
+		if (!early && smp_add_present_cpu(cpu) != 0)
+			set_cpu_present(cpu, false);
+		else
+			nr++;
+		cpumask_clear_cpu(cpu, avail);
+		cpu = cpumask_next(cpu, avail);
+	}
+	return nr;
+}
+
+static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
+{
+	struct sclp_core_entry *core;
+	cpumask_t avail;
+	bool configured;
+	u16 core_id;
+	int nr, i;
+
+	nr = 0;
+	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+	/*
+	 * Add IPL core first (which got logical CPU number 0) to make sure
+	 * that all SMT threads get subsequent logical CPU numbers.
+	 */
+	if (early) {
+		core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+		for (i = 0; i < info->configured; i++) {
+			core = &info->core[i];
+			if (core->core_id == core_id) {
+				nr += smp_add_core(core, &avail, true, early);
 				break;
+			}
 		}
 	}
+	for (i = 0; i < info->combined; i++) {
+		configured = i < info->configured;
+		nr += smp_add_core(&info->core[i], &avail, configured, early);
+	}
 	return nr;
 }
 
@@ -805,7 +833,7 @@ void __init smp_detect_cpus(void)
 
 	/* Add CPUs present at boot */
 	get_online_cpus();
-	__smp_rescan_cpus(info, 0);
+	__smp_rescan_cpus(info, true);
 	put_online_cpus();
 	memblock_free_early((unsigned long)info, sizeof(*info));
 }
@@ -1148,7 +1176,7 @@ int __ref smp_rescan_cpus(void)
 	smp_get_core_info(info, 0);
 	get_online_cpus();
 	mutex_lock(&smp_cpu_state_mutex);
-	nr = __smp_rescan_cpus(info, 1);
+	nr = __smp_rescan_cpus(info, false);
 	mutex_unlock(&smp_cpu_state_mutex);
 	put_online_cpus();
 	kfree(info);
-- 
cgit v1.2.3


From 9a2ae9f6b6bbd3ef05d5e5977ace854e9b8f04b5 Mon Sep 17 00:00:00 2001
From: Nitesh Narayan Lal <nitesh@redhat.com>
Date: Wed, 20 Nov 2019 07:12:24 -0500
Subject: KVM: x86: Zero the IOAPIC scan request dest vCPUs bitmap

Not zeroing the bitmap used for identifying the destination vCPUs for an
IOAPIC scan request in fixed delivery mode could lead to waking up unwanted
vCPUs. This patch zeroes the vCPU bitmap before passing it to
kvm_bitmap_or_dest_vcpus(), which is responsible for setting the bitmap
with the bits corresponding to the destination vCPUs.

Fixes: 7ee30bc132c6("KVM: x86: deliver KVM IOAPIC scan request to target vCPUs")
Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/ioapic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index ce30ef23c86b..9fd2dd89a1c5 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -332,6 +332,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 			irq.delivery_mode = e->fields.delivery_mode << 8;
 			irq.dest_id = e->fields.dest_id;
 			irq.dest_mode = e->fields.dest_mode;
+			bitmap_zero(&vcpu_bitmap, 16);
 			kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
 						 &vcpu_bitmap);
 			if (old_dest_mode != e->fields.dest_mode ||
-- 
cgit v1.2.3


From cc877670975be9082138e34f9e55d9d79c527b5c Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 18 Nov 2019 21:11:21 +0200
Subject: KVM: nVMX: Use semi-colon instead of comma for exit-handlers
 initialization

Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 60d42ce42403..f161a941cb09 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -6198,23 +6198,23 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
 		init_vmcs_shadow_fields();
 	}
 
-	exit_handlers[EXIT_REASON_VMCLEAR]	= handle_vmclear,
-	exit_handlers[EXIT_REASON_VMLAUNCH]	= handle_vmlaunch,
-	exit_handlers[EXIT_REASON_VMPTRLD]	= handle_vmptrld,
-	exit_handlers[EXIT_REASON_VMPTRST]	= handle_vmptrst,
-	exit_handlers[EXIT_REASON_VMREAD]	= handle_vmread,
-	exit_handlers[EXIT_REASON_VMRESUME]	= handle_vmresume,
-	exit_handlers[EXIT_REASON_VMWRITE]	= handle_vmwrite,
-	exit_handlers[EXIT_REASON_VMOFF]	= handle_vmoff,
-	exit_handlers[EXIT_REASON_VMON]		= handle_vmon,
-	exit_handlers[EXIT_REASON_INVEPT]	= handle_invept,
-	exit_handlers[EXIT_REASON_INVVPID]	= handle_invvpid,
-	exit_handlers[EXIT_REASON_VMFUNC]	= handle_vmfunc,
+	exit_handlers[EXIT_REASON_VMCLEAR]	= handle_vmclear;
+	exit_handlers[EXIT_REASON_VMLAUNCH]	= handle_vmlaunch;
+	exit_handlers[EXIT_REASON_VMPTRLD]	= handle_vmptrld;
+	exit_handlers[EXIT_REASON_VMPTRST]	= handle_vmptrst;
+	exit_handlers[EXIT_REASON_VMREAD]	= handle_vmread;
+	exit_handlers[EXIT_REASON_VMRESUME]	= handle_vmresume;
+	exit_handlers[EXIT_REASON_VMWRITE]	= handle_vmwrite;
+	exit_handlers[EXIT_REASON_VMOFF]	= handle_vmoff;
+	exit_handlers[EXIT_REASON_VMON]		= handle_vmon;
+	exit_handlers[EXIT_REASON_INVEPT]	= handle_invept;
+	exit_handlers[EXIT_REASON_INVVPID]	= handle_invvpid;
+	exit_handlers[EXIT_REASON_VMFUNC]	= handle_vmfunc;
 
 	kvm_x86_ops->check_nested_events = vmx_check_nested_events;
 	kvm_x86_ops->get_nested_state = vmx_get_nested_state;
 	kvm_x86_ops->set_nested_state = vmx_set_nested_state;
-	kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages,
+	kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages;
 	kvm_x86_ops->nested_enable_evmcs = nested_enable_evmcs;
 	kvm_x86_ops->nested_get_evmcs_version = nested_get_evmcs_version;
 
-- 
cgit v1.2.3


From c79eb77554bb6dde8ecd5319e2824e4c8e1f4819 Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Tue, 19 Nov 2019 16:33:59 +0800
Subject: KVM: nVMX: add CR4_LA57 bit to nested CR4_FIXED1

When L1 guest uses 5-level paging, it fails vm-entry to L2 due to
invalid host-state. It needs to add CR4_LA57 bit to nested CR4_FIXED1
MSR.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 621142e55e28..89253d60e23a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6962,6 +6962,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
 	cr4_fixed1_update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP));
 	cr4_fixed1_update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU));
 	cr4_fixed1_update(X86_CR4_UMIP,       ecx, bit(X86_FEATURE_UMIP));
+	cr4_fixed1_update(X86_CR4_LA57,       ecx, bit(X86_FEATURE_LA57));
 
 #undef cr4_fixed1_update
 }
-- 
cgit v1.2.3


From 5637f60b6828c0abfd5fe3cc6922a7106b5366c7 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 18 Nov 2019 19:27:02 +0200
Subject: KVM: x86: Unexport kvm_vcpu_reload_apic_access_page()

The function is only used in kvm.ko module.

Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 991dd01ba08b..050961a51f84 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7965,7 +7965,6 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 	 */
 	put_page(page);
 }
-EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
 
 void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
 {
-- 
cgit v1.2.3


From 992edeaefed682511bd173dabd2f54b1ce5387df Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Wed, 20 Nov 2019 14:24:52 +0200
Subject: KVM: nVMX: Assume TLB entries of L1 and L2 are tagged differently if
 L0 use EPT

Since commit 1313cc2bd8f6 ("kvm: mmu: Add guest_mode to kvm_mmu_page_role"),
guest_mode was added to mmu-role and therefore if L0 use EPT, it will
always run L1 and L2 with different EPTP. i.e. EPTP01!=EPTP02.

Because TLB entries are tagged with EP4TA, KVM can assume
TLB entries populated while running L2 are tagged differently
than TLB entries populated while running L1.

Therefore, update nested_has_guest_tlb_tag() to consider if
L0 use EPT instead of if L1 use EPT.

Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f161a941cb09..dc06e67be017 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1126,7 +1126,9 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
  * populated by L2 differently than TLB entries populated
  * by L1.
  *
- * If L1 uses EPT, then TLB entries are tagged with different EPTP.
+ * If L0 uses EPT, L1 and L2 run with different EPTP because
+ * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
+ * are tagged with different EPTP.
  *
  * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
  * with different VPID (L1 entries are tagged with vmx->vpid
@@ -1136,7 +1138,7 @@ static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
-	return nested_cpu_has_ept(vmcs12) ||
+	return enable_ept ||
 	       (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
 }
 
-- 
cgit v1.2.3


From 6a82e23f45fe0aa821e7a935e39d0acb20c275c0 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Fri, 15 Nov 2019 12:50:46 +0100
Subject: s390/cpumf: Adjust registration of s390 PMU device drivers

Linux-next commit titled "perf/core: Optimize perf_init_event()"
changed the semantics of PMU device driver registration.
It was done to speed up the lookup/handling of PMU device driver
specific events. It also enforces that only one PMU device
driver will be registered of type PERF_EVENT_RAW.

This change added these line in function perf_pmu_register():

  ...
  +       ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
  +       if (ret < 0)
                goto free_pdc;
  +
  +       WARN_ON(type >= 0 && ret != type);

The warn_on generates a message. We have 3 PMU device drivers,
each registered as type PERF_TYPE_RAW.
The cf_diag device driver (arch/s390/kernel/perf_cpumf_cf_diag.c)
always hits the WARN_ON because it is the second PMU device driver
(after sampling device driver arch/s390/kernel/perf_cpumf_sf.c)
which is registered as type 4 (PERF_TYPE_RAW).
So when the sampling device driver is registered, ret has value 4.
When cf_diag device driver is registered with type 4,
ret has value of 5 and WARN_ON fires.

Adjust the PMU device drivers for s390 to support the new
semantics required by perf_pmu_register().

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_cf.c      | 21 ++++++++++-----------
 arch/s390/kernel/perf_cpum_cf_diag.c | 10 +++++-----
 2 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 48d48b6187c0..0eb1d1cc53a8 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -199,7 +199,7 @@ static const int cpumf_generic_events_user[] = {
 	[PERF_COUNT_HW_BUS_CYCLES]	    = -1,
 };
 
-static int __hw_perf_event_init(struct perf_event *event)
+static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
 {
 	struct perf_event_attr *attr = &event->attr;
 	struct hw_perf_event *hwc = &event->hw;
@@ -207,7 +207,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	int err = 0;
 	u64 ev;
 
-	switch (attr->type) {
+	switch (type) {
 	case PERF_TYPE_RAW:
 		/* Raw events are used to access counters directly,
 		 * hence do not permit excludes */
@@ -294,17 +294,16 @@ static int __hw_perf_event_init(struct perf_event *event)
 
 static int cpumf_pmu_event_init(struct perf_event *event)
 {
+	unsigned int type = event->attr.type;
 	int err;
 
-	switch (event->attr.type) {
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-	case PERF_TYPE_RAW:
-		err = __hw_perf_event_init(event);
-		break;
-	default:
+	if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
+		err = __hw_perf_event_init(event, type);
+	else if (event->pmu->type == type)
+		/* Registered as unknown PMU */
+		err = __hw_perf_event_init(event, PERF_TYPE_RAW);
+	else
 		return -ENOENT;
-	}
 
 	if (unlikely(err) && event->destroy)
 		event->destroy(event);
@@ -553,7 +552,7 @@ static int __init cpumf_pmu_init(void)
 		return -ENODEV;
 
 	cpumf_pmu.attr_groups = cpumf_cf_event_group();
-	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
 	if (rc)
 		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
 	return rc;
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index 2654e348801a..e949ab832ed7 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -243,13 +243,13 @@ static int cf_diag_event_init(struct perf_event *event)
 	int err = -ENOENT;
 
 	debug_sprintf_event(cf_diag_dbg, 5,
-			    "%s event %p cpu %d config %#llx "
+			    "%s event %p cpu %d config %#llx type:%u "
 			    "sample_type %#llx cf_diag_events %d\n", __func__,
-			    event, event->cpu, attr->config, attr->sample_type,
-			    atomic_read(&cf_diag_events));
+			    event, event->cpu, attr->config, event->pmu->type,
+			    attr->sample_type, atomic_read(&cf_diag_events));
 
 	if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
-	    event->attr.type != PERF_TYPE_RAW)
+	    event->attr.type != event->pmu->type)
 		goto out;
 
 	/* Raw events are used to access counters directly,
@@ -693,7 +693,7 @@ static int __init cf_diag_init(void)
 	}
 	debug_register_view(cf_diag_dbg, &debug_sprintf_view);
 
-	rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW);
+	rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
 	if (rc) {
 		debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
 		debug_unregister(cf_diag_dbg);
-- 
cgit v1.2.3


From 94bb804e1e6f0a9a77acf20d7c70ea141c6c821e Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@soleen.com>
Date: Tue, 19 Nov 2019 17:10:06 -0500
Subject: arm64: uaccess: Ensure PAN is re-enabled after unhandled uaccess
 fault

A number of our uaccess routines ('__arch_clear_user()' and
'__arch_copy_{in,from,to}_user()') fail to re-enable PAN if they
encounter an unhandled fault whilst accessing userspace.

For CPUs implementing both hardware PAN and UAO, this bug has no effect
when both extensions are in use by the kernel.

For CPUs implementing hardware PAN but not UAO, this means that a kernel
using hardware PAN may execute portions of code with PAN inadvertently
disabled, opening us up to potential security vulnerabilities that rely
on userspace access from within the kernel which would usually be
prevented by this mechanism. In other words, parts of the kernel run the
same way as they would on a CPU without PAN implemented/emulated at all.

For CPUs not implementing hardware PAN and instead relying on software
emulation via 'CONFIG_ARM64_SW_TTBR0_PAN=y', the impact is unfortunately
much worse. Calling 'schedule()' with software PAN disabled means that
the next task will execute in the kernel using the page-table and ASID
of the previous process even after 'switch_mm()', since the actual
hardware switch is deferred until return to userspace. At this point, or
if there is a intermediate call to 'uaccess_enable()', the page-table
and ASID of the new process are installed. Sadly, due to the changes
introduced by KPTI, this is not an atomic operation and there is a very
small window (two instructions) where the CPU is configured with the
page-table of the old task and the ASID of the new task; a speculative
access in this state is disastrous because it would corrupt the TLB
entries for the new task with mappings from the previous address space.

As Pavel explains:

  | I was able to reproduce memory corruption problem on Broadcom's SoC
  | ARMv8-A like this:
  |
  | Enable software perf-events with PERF_SAMPLE_CALLCHAIN so userland's
  | stack is accessed and copied.
  |
  | The test program performed the following on every CPU and forking
  | many processes:
  |
  |	unsigned long *map = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE,
  |				  MAP_SHARED | MAP_ANONYMOUS, -1, 0);
  |	map[0] = getpid();
  |	sched_yield();
  |	if (map[0] != getpid()) {
  |		fprintf(stderr, "Corruption detected!");
  |	}
  |	munmap(map, PAGE_SIZE);
  |
  | From time to time I was getting map[0] to contain pid for a
  | different process.

Ensure that PAN is re-enabled when returning after an unhandled user
fault from our uaccess routines.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Cc: <stable@vger.kernel.org>
Fixes: 338d4f49d6f7 ("arm64: kernel: Add support for Privileged Access Never")
Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
[will: rewrote commit message]
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/clear_user.S     | 1 +
 arch/arm64/lib/copy_from_user.S | 1 +
 arch/arm64/lib/copy_in_user.S   | 1 +
 arch/arm64/lib/copy_to_user.S   | 1 +
 4 files changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 10415572e82f..322b55664cca 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -48,5 +48,6 @@ EXPORT_SYMBOL(__arch_clear_user)
 	.section .fixup,"ax"
 	.align	2
 9:	mov	x0, x2			// return the original size
+	uaccess_disable_not_uao x2, x3
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 680e74409ff9..8472dc7798b3 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -66,5 +66,6 @@ EXPORT_SYMBOL(__arch_copy_from_user)
 	.section .fixup,"ax"
 	.align	2
 9998:	sub	x0, end, dst			// bytes not copied
+	uaccess_disable_not_uao x3, x4
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 0bedae3f3792..8e0355c1e318 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -68,5 +68,6 @@ EXPORT_SYMBOL(__arch_copy_in_user)
 	.section .fixup,"ax"
 	.align	2
 9998:	sub	x0, end, dst			// bytes not copied
+	uaccess_disable_not_uao x3, x4
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 2d88c736e8f2..6085214654dc 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -65,5 +65,6 @@ EXPORT_SYMBOL(__arch_copy_to_user)
 	.section .fixup,"ax"
 	.align	2
 9998:	sub	x0, end, dst			// bytes not copied
+	uaccess_disable_not_uao x3, x4
 	ret
 	.previous
-- 
cgit v1.2.3


From e50be648aaa3da196d4f4ed49d1c5d4ec105fa4a Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@soleen.com>
Date: Wed, 20 Nov 2019 12:07:40 -0500
Subject: arm64: uaccess: Remove uaccess_*_not_uao asm macros

It is safer and simpler to drop the uaccess assembly macros in favour of
inline C functions. Although this bloats the Image size slightly, it
aligns our user copy routines with '{get,put}_user()' and generally
makes the code a lot easier to reason about.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
[will: tweaked commit message and changed temporary variable names]
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/asm-uaccess.h | 17 -----------------
 arch/arm64/include/asm/uaccess.h     | 27 ++++++++++++++++++++++-----
 arch/arm64/lib/clear_user.S          |  3 ---
 arch/arm64/lib/copy_from_user.S      |  3 ---
 arch/arm64/lib/copy_in_user.S        |  3 ---
 arch/arm64/lib/copy_to_user.S        |  3 ---
 arch/arm64/lib/uaccess_flushcache.c  |  6 +++++-
 7 files changed, 27 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 5bf963830b17..c764cc8fb3b6 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -58,23 +58,6 @@ alternative_else_nop_endif
 	.endm
 #endif
 
-/*
- * These macros are no-ops when UAO is present.
- */
-	.macro	uaccess_disable_not_uao, tmp1, tmp2
-	uaccess_ttbr0_disable \tmp1, \tmp2
-alternative_if ARM64_ALT_PAN_NOT_UAO
-	SET_PSTATE_PAN(1)
-alternative_else_nop_endif
-	.endm
-
-	.macro	uaccess_enable_not_uao, tmp1, tmp2, tmp3
-	uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3
-alternative_if ARM64_ALT_PAN_NOT_UAO
-	SET_PSTATE_PAN(0)
-alternative_else_nop_endif
-	.endm
-
 /*
  * Remove the address tag from a virtual address, if present.
  */
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 097d6bfac0b7..127712b0b970 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -378,20 +378,34 @@ do {									\
 extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
 #define raw_copy_from_user(to, from, n)					\
 ({									\
-	__arch_copy_from_user((to), __uaccess_mask_ptr(from), (n));	\
+	unsigned long __acfu_ret;					\
+	uaccess_enable_not_uao();					\
+	__acfu_ret = __arch_copy_from_user((to),			\
+				      __uaccess_mask_ptr(from), (n));	\
+	uaccess_disable_not_uao();					\
+	__acfu_ret;							\
 })
 
 extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n);
 #define raw_copy_to_user(to, from, n)					\
 ({									\
-	__arch_copy_to_user(__uaccess_mask_ptr(to), (from), (n));	\
+	unsigned long __actu_ret;					\
+	uaccess_enable_not_uao();					\
+	__actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to),	\
+				    (from), (n));			\
+	uaccess_disable_not_uao();					\
+	__actu_ret;							\
 })
 
 extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n);
 #define raw_copy_in_user(to, from, n)					\
 ({									\
-	__arch_copy_in_user(__uaccess_mask_ptr(to),			\
-			    __uaccess_mask_ptr(from), (n));		\
+	unsigned long __aciu_ret;					\
+	uaccess_enable_not_uao();					\
+	__aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to),	\
+				    __uaccess_mask_ptr(from), (n));	\
+	uaccess_disable_not_uao();					\
+	__aciu_ret;							\
 })
 
 #define INLINE_COPY_TO_USER
@@ -400,8 +414,11 @@ extern unsigned long __must_check __arch_copy_in_user(void __user *to, const voi
 extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n);
 static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
 {
-	if (access_ok(to, n))
+	if (access_ok(to, n)) {
+		uaccess_enable_not_uao();
 		n = __arch_clear_user(__uaccess_mask_ptr(to), n);
+		uaccess_disable_not_uao();
+	}
 	return n;
 }
 #define clear_user	__clear_user
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 322b55664cca..aeafc03e961a 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -20,7 +20,6 @@
  * Alignment fixed up by hardware.
  */
 ENTRY(__arch_clear_user)
-	uaccess_enable_not_uao x2, x3, x4
 	mov	x2, x1			// save the size for fixup return
 	subs	x1, x1, #8
 	b.mi	2f
@@ -40,7 +39,6 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
 	b.mi	5f
 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
 5:	mov	x0, #0
-	uaccess_disable_not_uao x2, x3
 	ret
 ENDPROC(__arch_clear_user)
 EXPORT_SYMBOL(__arch_clear_user)
@@ -48,6 +46,5 @@ EXPORT_SYMBOL(__arch_clear_user)
 	.section .fixup,"ax"
 	.align	2
 9:	mov	x0, x2			// return the original size
-	uaccess_disable_not_uao x2, x3
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 8472dc7798b3..ebb3c06cbb5d 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -54,10 +54,8 @@
 
 end	.req	x5
 ENTRY(__arch_copy_from_user)
-	uaccess_enable_not_uao x3, x4, x5
 	add	end, x0, x2
 #include "copy_template.S"
-	uaccess_disable_not_uao x3, x4
 	mov	x0, #0				// Nothing to copy
 	ret
 ENDPROC(__arch_copy_from_user)
@@ -66,6 +64,5 @@ EXPORT_SYMBOL(__arch_copy_from_user)
 	.section .fixup,"ax"
 	.align	2
 9998:	sub	x0, end, dst			// bytes not copied
-	uaccess_disable_not_uao x3, x4
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 8e0355c1e318..3d8153a1ebce 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -56,10 +56,8 @@
 end	.req	x5
 
 ENTRY(__arch_copy_in_user)
-	uaccess_enable_not_uao x3, x4, x5
 	add	end, x0, x2
 #include "copy_template.S"
-	uaccess_disable_not_uao x3, x4
 	mov	x0, #0
 	ret
 ENDPROC(__arch_copy_in_user)
@@ -68,6 +66,5 @@ EXPORT_SYMBOL(__arch_copy_in_user)
 	.section .fixup,"ax"
 	.align	2
 9998:	sub	x0, end, dst			// bytes not copied
-	uaccess_disable_not_uao x3, x4
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 6085214654dc..357eae2c18eb 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -53,10 +53,8 @@
 
 end	.req	x5
 ENTRY(__arch_copy_to_user)
-	uaccess_enable_not_uao x3, x4, x5
 	add	end, x0, x2
 #include "copy_template.S"
-	uaccess_disable_not_uao x3, x4
 	mov	x0, #0
 	ret
 ENDPROC(__arch_copy_to_user)
@@ -65,6 +63,5 @@ EXPORT_SYMBOL(__arch_copy_to_user)
 	.section .fixup,"ax"
 	.align	2
 9998:	sub	x0, end, dst			// bytes not copied
-	uaccess_disable_not_uao x3, x4
 	ret
 	.previous
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
index cbfcbe6470a5..bfa30b75b2b8 100644
--- a/arch/arm64/lib/uaccess_flushcache.c
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -28,7 +28,11 @@ void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
 unsigned long __copy_user_flushcache(void *to, const void __user *from,
 				     unsigned long n)
 {
-	unsigned long rc = __arch_copy_from_user(to, from, n);
+	unsigned long rc;
+
+	uaccess_enable_not_uao();
+	rc = __arch_copy_from_user(to, from, n);
+	uaccess_disable_not_uao();
 
 	/* See above */
 	__clean_dcache_area_pop(to, n - rc);
-- 
cgit v1.2.3


From 39331a49c4e159eca2500ccbd25ccf9e048b7559 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Wed, 20 Nov 2019 00:19:23 +0200
Subject: ARM: dts: dra7: add dt nodes for new cpsw switch dev driver

Add DT nodes for new cpsw switch dev driver.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/boot/dts/dra7-l4.dtsi | 52 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 5cac2dd58241..37e048771b0f 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -3079,6 +3079,58 @@
 					phys = <&phy_gmii_sel 2>;
 				};
 			};
+
+			mac_sw: switch@0 {
+				compatible = "ti,dra7-cpsw-switch","ti,cpsw-switch";
+				reg = <0x0 0x4000>;
+				ranges = <0 0 0x4000>;
+				clocks = <&gmac_main_clk>;
+				clock-names = "fck";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				syscon = <&scm_conf>;
+				status = "disabled";
+
+				interrupts = <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "rx_thresh", "rx", "tx", "misc";
+
+				ethernet-ports {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					cpsw_port1: port@1 {
+						reg = <1>;
+						label = "port1";
+						mac-address = [ 00 00 00 00 00 00 ];
+						phys = <&phy_gmii_sel 1>;
+					};
+
+					cpsw_port2: port@2 {
+						reg = <2>;
+						label = "port2";
+						mac-address = [ 00 00 00 00 00 00 ];
+						phys = <&phy_gmii_sel 2>;
+					};
+				};
+
+				davinci_mdio_sw: mdio@1000 {
+					compatible = "ti,cpsw-mdio","ti,davinci_mdio";
+					clocks = <&gmac_main_clk>;
+					clock-names = "fck";
+					#address-cells = <1>;
+					#size-cells = <0>;
+					bus_freq = <1000000>;
+					reg = <0x1000 0x100>;
+				};
+
+				cpts {
+					clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 25>;
+					clock-names = "cpts";
+				};
+			};
 		};
 	};
 };
-- 
cgit v1.2.3


From 15b991ade40069b2b88f29082f4ee59ae0a1ac38 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Wed, 20 Nov 2019 00:19:24 +0200
Subject: ARM: dts: am571x-idk: enable for new cpsw switch dev driver

Add DT nodes for new cpsw switchdev driver for am571x-idk board for now to
enable testing of the new solution.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/boot/dts/am571x-idk.dts         | 27 +++++++++++++++++++++++++++
 arch/arm/boot/dts/am572x-idk.dts         |  5 +++++
 arch/arm/boot/dts/am574x-idk.dts         |  5 +++++
 arch/arm/boot/dts/am57xx-idk-common.dtsi |  5 -----
 4 files changed, 37 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts
index 0aaacea1d887..820ce3b60bb6 100644
--- a/arch/arm/boot/dts/am571x-idk.dts
+++ b/arch/arm/boot/dts/am571x-idk.dts
@@ -186,3 +186,30 @@
 	pinctrl-1 = <&mmc2_pins_hs>;
 	pinctrl-2 = <&mmc2_pins_ddr_rev20 &mmc2_iodelay_ddr_conf>;
 };
+
+&mac_sw {
+	pinctrl-names = "default", "sleep";
+	status = "okay";
+};
+
+&cpsw_port1 {
+	phy-handle = <&ethphy0_sw>;
+	phy-mode = "rgmii";
+	ti,dual-emac-pvid = <1>;
+};
+
+&cpsw_port2 {
+	phy-handle = <&ethphy1_sw>;
+	phy-mode = "rgmii";
+	ti,dual-emac-pvid = <2>;
+};
+
+&davinci_mdio_sw {
+	ethphy0_sw: ethernet-phy@0 {
+		reg = <0>;
+	};
+
+	ethphy1_sw: ethernet-phy@1 {
+		reg = <1>;
+	};
+};
diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts
index ea1c119feaa5..c3d966904d64 100644
--- a/arch/arm/boot/dts/am572x-idk.dts
+++ b/arch/arm/boot/dts/am572x-idk.dts
@@ -27,3 +27,8 @@
 	pinctrl-1 = <&mmc2_pins_hs>;
 	pinctrl-2 = <&mmc2_pins_ddr_rev20>;
 };
+
+&mac {
+	status = "okay";
+	dual_emac;
+};
diff --git a/arch/arm/boot/dts/am574x-idk.dts b/arch/arm/boot/dts/am574x-idk.dts
index 7935d70874ce..fa0088025b2c 100644
--- a/arch/arm/boot/dts/am574x-idk.dts
+++ b/arch/arm/boot/dts/am574x-idk.dts
@@ -35,3 +35,8 @@
 	pinctrl-1 = <&mmc2_pins_default>;
 	pinctrl-2 = <&mmc2_pins_default>;
 };
+
+&mac {
+	status = "okay";
+	dual_emac;
+};
diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi
index 423855a2a2d6..398721c7201c 100644
--- a/arch/arm/boot/dts/am57xx-idk-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi
@@ -363,11 +363,6 @@
 	ext-clk-src;
 };
 
-&mac {
-	status = "okay";
-	dual_emac;
-};
-
 &cpsw_emac0 {
 	phy-handle = <&ethphy0>;
 	phy-mode = "rgmii";
-- 
cgit v1.2.3


From 3727d259ddafc4c8b8a9034ea4d115a8d0547877 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Wed, 20 Nov 2019 00:19:25 +0200
Subject: arm: omap2plus_defconfig: enable new cpsw switchdev driver

Add CONFIG_TI_CPSW_SWITCHDEV option to enable new cpsw switchdev driver

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/configs/omap2plus_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 40d7f1a4fc45..89cce8d4bc6b 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -554,3 +554,4 @@ CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_SCHEDSTATS=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_TI_CPSW_SWITCHDEV=y
-- 
cgit v1.2.3


From 31a88c82b466d2f31a44e21c479f45b4732ccfd0 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Wed, 13 Nov 2019 17:46:13 +0100
Subject: KVM: PPC: Book3S HV: XIVE: Free previous EQ page when setting up a
 new one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The EQ page is allocated by the guest and then passed to the hypervisor
with the H_INT_SET_QUEUE_CONFIG hcall. A reference is taken on the page
before handing it over to the HW. This reference is dropped either when
the guest issues the H_INT_RESET hcall or when the KVM device is released.
But, the guest can legitimately call H_INT_SET_QUEUE_CONFIG several times,
either to reset the EQ (vCPU hot unplug) or to set a new EQ (guest reboot).
In both cases the existing EQ page reference is leaked because we simply
overwrite it in the XIVE queue structure without calling put_page().

This is especially visible when the guest memory is backed with huge pages:
start a VM up to the guest userspace, either reboot it or unplug a vCPU,
quit QEMU. The leak is observed by comparing the value of HugePages_Free in
/proc/meminfo before and after the VM is run.

Ideally we'd want the XIVE code to handle the EQ page de-allocation at the
platform level. This isn't the case right now because the various XIVE
drivers have different allocation needs. It could maybe worth introducing
hooks for this purpose instead of exposing XIVE internals to the drivers,
but this is certainly a huge work to be done later.

In the meantime, for easier backport, fix both vCPU unplug and guest reboot
leaks by introducing a wrapper around xive_native_configure_queue() that
does the necessary cleanup.

Reported-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Cc: stable@vger.kernel.org # v5.2
Fixes: 13ce3297c576 ("KVM: PPC: Book3S HV: XIVE: Add controls for the EQ configuration")
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Greg Kurz <groug@kaod.org>
Tested-by: Lijun Pan <ljp@linux.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_xive_native.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 34bd123fa024..0e1fc5a16729 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -50,6 +50,24 @@ static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
 	}
 }
 
+static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
+					      u8 prio, __be32 *qpage,
+					      u32 order, bool can_escalate)
+{
+	int rc;
+	__be32 *qpage_prev = q->qpage;
+
+	rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
+					 can_escalate);
+	if (rc)
+		return rc;
+
+	if (qpage_prev)
+		put_page(virt_to_page(qpage_prev));
+
+	return rc;
+}
+
 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -575,19 +593,14 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 		q->guest_qaddr  = 0;
 		q->guest_qshift = 0;
 
-		rc = xive_native_configure_queue(xc->vp_id, q, priority,
-						 NULL, 0, true);
+		rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+							NULL, 0, true);
 		if (rc) {
 			pr_err("Failed to reset queue %d for VCPU %d: %d\n",
 			       priority, xc->server_num, rc);
 			return rc;
 		}
 
-		if (q->qpage) {
-			put_page(virt_to_page(q->qpage));
-			q->qpage = NULL;
-		}
-
 		return 0;
 	}
 
@@ -646,8 +659,8 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 	  * OPAL level because the use of END ESBs is not supported by
 	  * Linux.
 	  */
-	rc = xive_native_configure_queue(xc->vp_id, q, priority,
-					 (__be32 *) qaddr, kvm_eq.qshift, true);
+	rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+					(__be32 *) qaddr, kvm_eq.qshift, true);
 	if (rc) {
 		pr_err("Failed to configure queue %d for VCPU %d: %d\n",
 		       priority, xc->server_num, rc);
-- 
cgit v1.2.3


From 30486e72093ea2e594f44876b7a445c219449bce Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Wed, 13 Nov 2019 17:46:19 +0100
Subject: KVM: PPC: Book3S HV: XIVE: Fix potential page leak on error path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to check the host page size is big enough to accomodate the
EQ. Let's do this before taking a reference on the EQ page to avoid
a potential leak if the check fails.

Cc: stable@vger.kernel.org # v5.2
Fixes: 13ce3297c576 ("KVM: PPC: Book3S HV: XIVE: Add controls for the EQ configuration")
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_xive_native.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 0e1fc5a16729..d83adb1e1490 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -630,12 +630,6 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	gfn = gpa_to_gfn(kvm_eq.qaddr);
-	page = gfn_to_page(kvm, gfn);
-	if (is_error_page(page)) {
-		srcu_read_unlock(&kvm->srcu, srcu_idx);
-		pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
-		return -EINVAL;
-	}
 
 	page_size = kvm_host_page_size(kvm, gfn);
 	if (1ull << kvm_eq.qshift > page_size) {
@@ -644,6 +638,13 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 		return -EINVAL;
 	}
 
+	page = gfn_to_page(kvm, gfn);
+	if (is_error_page(page)) {
+		srcu_read_unlock(&kvm->srcu, srcu_idx);
+		pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
+		return -EINVAL;
+	}
+
 	qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
-- 
cgit v1.2.3


From cbbaa2727aa3ae9e0a844803da7cef7fd3b94f2b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Nov 2019 18:58:26 +0100
Subject: KVM: x86: fix presentation of TSX feature in ARCH_CAPABILITIES

KVM does not implement MSR_IA32_TSX_CTRL, so it must not be presented
to the guests.  It is also confusing to have !ARCH_CAP_TSX_CTRL_MSR &&
!RTM && ARCH_CAP_TAA_NO: lack of MSR_IA32_TSX_CTRL suggests TSX was not
hidden (it actually was), yet the value says that TSX is not vulnerable
to microarchitectural data sampling.  Fix both.

Cc: stable@vger.kernel.org
Tested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d530521f11d..6ea735d632e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1327,12 +1327,18 @@ static u64 kvm_get_arch_capabilities(void)
 	 * If TSX is disabled on the system, guests are also mitigated against
 	 * TAA and clear CPU buffer mitigation is not required for guests.
 	 */
-	if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
-	    (data & ARCH_CAP_TSX_CTRL_MSR))
+	if (!boot_cpu_has(X86_FEATURE_RTM))
+		data &= ~ARCH_CAP_TAA_NO;
+	else if (!boot_cpu_has_bug(X86_BUG_TAA))
+		data |= ARCH_CAP_TAA_NO;
+	else if (data & ARCH_CAP_TSX_CTRL_MSR)
 		data &= ~ARCH_CAP_MDS_NO;
 
+	/* KVM does not emulate MSR_IA32_TSX_CTRL.  */
+	data &= ~ARCH_CAP_TSX_CTRL_MSR;
 	return data;
 }
+EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
 
 static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
 {
-- 
cgit v1.2.3


From de1fca5d6e0105c9d33924e1247e2f386efc3ece Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Nov 2019 12:23:00 -0500
Subject: KVM: x86: do not modify masked bits of shared MSRs

"Shared MSRs" are guest MSRs that are written to the host MSRs but
keep their value until the next return to userspace.  They support
a mask, so that some bits keep the host value, but this mask is
only used to skip an unnecessary MSR write and the value written
to the MSR is always the guest MSR.

Fix this and, while at it, do not update smsr->values[slot].curr if
for whatever reason the wrmsr fails.  This should only happen due to
reserved bits, so the value written to smsr->values[slot].curr
will not match when the user-return notifier and the host value will
always be restored.  However, it is untidy and in rare cases this
can actually avoid spurious WRMSRs on return to userspace.

Cc: stable@vger.kernel.org
Reviewed-by: Jim Mattson <jmattson@google.com>
Tested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6ea735d632e9..02863998af91 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -300,13 +300,14 @@ int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
 	int err;
 
-	if (((value ^ smsr->values[slot].curr) & mask) == 0)
+	value = (value & mask) | (smsr->values[slot].host & ~mask);
+	if (value == smsr->values[slot].curr)
 		return 0;
-	smsr->values[slot].curr = value;
 	err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
 	if (err)
 		return 1;
 
+	smsr->values[slot].curr = value;
 	if (!smsr->registered) {
 		smsr->urn.on_user_return = kvm_on_user_return;
 		user_return_notifier_register(&smsr->urn);
-- 
cgit v1.2.3


From edef5c36b0c7f07ab4926f6c9e50731f3772c79d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Nov 2019 12:23:00 -0500
Subject: KVM: x86: implement MSR_IA32_TSX_CTRL effect on CPUID

Because KVM always emulates CPUID, the CPUID clear bit
(bit 1) of MSR_IA32_TSX_CTRL must be emulated "manually"
by the hypervisor when performing said emulation.

Right now neither kvm-intel.ko nor kvm-amd.ko implement
MSR_IA32_TSX_CTRL but this will change in the next patch.

Reviewed-by: Jim Mattson <jmattson@google.com>
Tested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/cpuid.c            | 8 ++++++--
 arch/x86/kvm/x86.c              | 4 ++--
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4fc61483919a..663d09ac7778 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1357,6 +1357,7 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
 
 void kvm_enable_efer_bits(u64);
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
 int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f68c0c753c38..c0aa07487eb8 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -816,8 +816,6 @@ static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func,
 	return __do_cpuid_func(entry, func, nent, maxnent);
 }
 
-#undef F
-
 struct kvm_cpuid_param {
 	u32 func;
 	bool (*qualifier)(const struct kvm_cpuid_param *param);
@@ -1015,6 +1013,12 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
 		*ebx = entry->ebx;
 		*ecx = entry->ecx;
 		*edx = entry->edx;
+		if (function == 7 && index == 0) {
+			u64 data;
+		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
+			    (data & TSX_CTRL_CPUID_CLEAR))
+				*ebx &= ~(F(RTM) | F(HLE));
+		}
 	} else {
 		*eax = *ebx = *ecx = *edx = 0;
 		/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02863998af91..648e84e728fc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1484,8 +1484,8 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
  */
-static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
-			 bool host_initiated)
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+		  bool host_initiated)
 {
 	struct msr_data msr;
 	int ret;
-- 
cgit v1.2.3


From c11f83e0626bdc2b6c550fc8b9b6eeefbd8cefaa Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Nov 2019 12:23:00 -0500
Subject: KVM: vmx: implement MSR_IA32_TSX_CTRL disable RTM functionality

The current guest mitigation of TAA is both too heavy and not really
sufficient.  It is too heavy because it will cause some affected CPUs
(those that have MDS_NO but lack TAA_NO) to fall back to VERW and
get the corresponding slowdown.  It is not really sufficient because
it will cause the MDS_NO bit to disappear upon microcode update, so
that VMs started before the microcode update will not be runnable
anymore afterwards, even with tsx=on.

Instead, if tsx=on on the host, we can emulate MSR_IA32_TSX_CTRL for
the guest and let it run without the VERW mitigation.  Even though
MSR_IA32_TSX_CTRL is quite heavyweight, and we do not want to write
it on every vmentry, we can use the shared MSR functionality because
the host kernel need not protect itself from TSX-based side-channels.

Tested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 38 +++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.c     | 23 +++++------------------
 2 files changed, 40 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 04a8212704c1..73cbe02640b8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -450,6 +450,7 @@ const u32 vmx_msr_index[] = {
 	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
 #endif
 	MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+	MSR_IA32_TSX_CTRL,
 };
 
 #if IS_ENABLED(CONFIG_HYPERV)
@@ -1683,6 +1684,9 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 	index = __find_msr_index(vmx, MSR_TSC_AUX);
 	if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
 		move_msr_up(vmx, index, save_nmsrs++);
+	index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
+	if (index >= 0)
+		move_msr_up(vmx, index, save_nmsrs++);
 
 	vmx->save_nmsrs = save_nmsrs;
 	vmx->guest_msrs_ready = false;
@@ -1782,6 +1786,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 #endif
 	case MSR_EFER:
 		return kvm_get_msr_common(vcpu, msr_info);
+	case MSR_IA32_TSX_CTRL:
+		if (!msr_info->host_initiated &&
+		    !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
+			return 1;
+		goto find_shared_msr;
 	case MSR_IA32_UMWAIT_CONTROL:
 		if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
 			return 1;
@@ -1884,8 +1893,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
 			return 1;
-		/* Else, falls through */
+		goto find_shared_msr;
 	default:
+	find_shared_msr:
 		msr = find_msr_entry(vmx, msr_info->index);
 		if (msr) {
 			msr_info->data = msr->data;
@@ -2001,6 +2011,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 					      MSR_IA32_SPEC_CTRL,
 					      MSR_TYPE_RW);
 		break;
+	case MSR_IA32_TSX_CTRL:
+		if (!msr_info->host_initiated &&
+		    !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
+			return 1;
+		if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
+			return 1;
+		goto find_shared_msr;
 	case MSR_IA32_PRED_CMD:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -2152,8 +2169,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		/* Check reserved bit, higher 32 bits should be zero */
 		if ((data >> 32) != 0)
 			return 1;
-		/* Else, falls through */
+		goto find_shared_msr;
+
 	default:
+	find_shared_msr:
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
 			u64 old_msr_data = msr->data;
@@ -4234,7 +4253,20 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 			continue;
 		vmx->guest_msrs[j].index = i;
 		vmx->guest_msrs[j].data = 0;
-		vmx->guest_msrs[j].mask = -1ull;
+
+		switch (index) {
+		case MSR_IA32_TSX_CTRL:
+			/*
+			 * No need to pass TSX_CTRL_CPUID_CLEAR through, so
+			 * let's avoid changing CPUID bits under the host
+			 * kernel's feet.
+			 */
+			vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+			break;
+		default:
+			vmx->guest_msrs[j].mask = -1ull;
+			break;
+		}
 		++vmx->nmsrs;
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 648e84e728fc..fc54e3905fe3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1314,29 +1314,16 @@ static u64 kvm_get_arch_capabilities(void)
 		data |= ARCH_CAP_MDS_NO;
 
 	/*
-	 * On TAA affected systems, export MDS_NO=0 when:
-	 *	- TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
-	 *	- Updated microcode is present. This is detected by
-	 *	  the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
-	 *	  that VERW clears CPU buffers.
-	 *
-	 * When MDS_NO=0 is exported, guests deploy clear CPU buffer
-	 * mitigation and don't complain:
-	 *
-	 *	"Vulnerable: Clear CPU buffers attempted, no microcode"
-	 *
-	 * If TSX is disabled on the system, guests are also mitigated against
-	 * TAA and clear CPU buffer mitigation is not required for guests.
+	 * On TAA affected systems:
+	 *      - nothing to do if TSX is disabled on the host.
+	 *      - we emulate TSX_CTRL if present on the host.
+	 *	  This lets the guest use VERW to clear CPU buffers.
 	 */
 	if (!boot_cpu_has(X86_FEATURE_RTM))
-		data &= ~ARCH_CAP_TAA_NO;
+		data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
 	else if (!boot_cpu_has_bug(X86_BUG_TAA))
 		data |= ARCH_CAP_TAA_NO;
-	else if (data & ARCH_CAP_TSX_CTRL_MSR)
-		data &= ~ARCH_CAP_MDS_NO;
 
-	/* KVM does not emulate MSR_IA32_TSX_CTRL.  */
-	data &= ~ARCH_CAP_TSX_CTRL_MSR;
 	return data;
 }
 EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
-- 
cgit v1.2.3


From b07a5c53d42a8c87b208614129e947dd2338ff9c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Nov 2019 12:23:01 -0500
Subject: KVM: vmx: use MSR_IA32_TSX_CTRL to hard-disable TSX on guest that
 lack it

If X86_FEATURE_RTM is disabled, the guest should not be able to access
MSR_IA32_TSX_CTRL.  We can therefore use it in KVM to force all
transactions from the guest to abort.

Tested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 44 ++++++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 73cbe02640b8..813171bb802a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -639,6 +639,23 @@ struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
 	return NULL;
 }
 
+static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data)
+{
+	int ret = 0;
+
+	u64 old_msr_data = msr->data;
+	msr->data = data;
+	if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
+		preempt_disable();
+		ret = kvm_set_shared_msr(msr->index, msr->data,
+					 msr->mask);
+		preempt_enable();
+		if (ret)
+			msr->data = old_msr_data;
+	}
+	return ret;
+}
+
 void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
 {
 	vmcs_clear(loaded_vmcs->vmcs);
@@ -2174,20 +2191,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	default:
 	find_shared_msr:
 		msr = find_msr_entry(vmx, msr_index);
-		if (msr) {
-			u64 old_msr_data = msr->data;
-			msr->data = data;
-			if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
-				preempt_disable();
-				ret = kvm_set_shared_msr(msr->index, msr->data,
-							 msr->mask);
-				preempt_enable();
-				if (ret)
-					msr->data = old_msr_data;
-			}
-			break;
-		}
-		ret = kvm_set_msr_common(vcpu, msr_info);
+		if (msr)
+			ret = vmx_set_guest_msr(vmx, msr, data);
+		else
+			ret = kvm_set_msr_common(vcpu, msr_info);
 	}
 
 	return ret;
@@ -7142,6 +7149,15 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 	if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
 			guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
 		update_intel_pt_cfg(vcpu);
+
+	if (boot_cpu_has(X86_FEATURE_RTM)) {
+		struct shared_msr_entry *msr;
+		msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL);
+		if (msr) {
+			bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
+			vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
+		}
+	}
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
-- 
cgit v1.2.3


From b11494bcabba7383c9db65132f6f73d64fb1407d Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Thu, 21 Nov 2019 00:31:47 +0200
Subject: KVM: nVMX: Do not mark vmcs02->apic_access_page as dirty when
 unpinning

vmcs->apic_access_page is simply a token that the hypervisor puts into
the PFN of a 4KB EPTE (or PTE if using shadow-paging) that triggers
APIC-access VMExit or APIC virtualization logic whenever a CPU running
in VMX non-root mode read/write from/to this PFN.

As every write either triggers an APIC-access VMExit or write is
performed on vmcs->virtual_apic_page, the PFN pointed to by
vmcs->apic_access_page should never actually be touched by CPU.

Therefore, there is no need to mark vmcs02->apic_access_page as dirty
after unpin it on L2->L1 emulated VMExit or when L1 exit VMX operation.

Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 5fb59bed344e..783de6d83f9a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -268,7 +268,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
 	vmx->nested.cached_shadow_vmcs12 = NULL;
 	/* Unpin physical memory we referred to in the vmcs02 */
 	if (vmx->nested.apic_access_page) {
-		kvm_release_page_dirty(vmx->nested.apic_access_page);
+		kvm_release_page_clean(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = NULL;
 	}
 	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
@@ -3070,7 +3070,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 		 * to it so we can release it later.
 		 */
 		if (vmx->nested.apic_access_page) { /* shouldn't happen */
-			kvm_release_page_dirty(vmx->nested.apic_access_page);
+			kvm_release_page_clean(vmx->nested.apic_access_page);
 			vmx->nested.apic_access_page = NULL;
 		}
 		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
@@ -4267,7 +4267,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
 	/* Unpin physical memory we referred to in vmcs02 */
 	if (vmx->nested.apic_access_page) {
-		kvm_release_page_dirty(vmx->nested.apic_access_page);
+		kvm_release_page_clean(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = NULL;
 	}
 	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
-- 
cgit v1.2.3


From db5a95ec166f3fd3eecbd07ebdf56986a3e9f43f Mon Sep 17 00:00:00 2001
From: Mao Wenan <maowenan@huawei.com>
Date: Tue, 19 Nov 2019 11:06:40 +0800
Subject: KVM: x86: remove set but not used variable 'called'

Fixes gcc '-Wunused-but-set-variable' warning:

arch/x86/kvm/x86.c: In function kvm_make_scan_ioapic_request_mask:
arch/x86/kvm/x86.c:7911:7: warning: variable called set but not
used [-Wunused-but-set-variable]

It is not used since commit 7ee30bc132c6 ("KVM: x86: deliver KVM
IOAPIC scan request to target vCPUs")

Signed-off-by: Mao Wenan <maowenan@huawei.com>
Fixes: 7ee30bc132c6 ("KVM: x86: deliver KVM IOAPIC scan request to target vCPUs")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c814c226d02e..a256e09f321a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7900,12 +7900,11 @@ void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
 				       unsigned long *vcpu_bitmap)
 {
 	cpumask_var_t cpus;
-	bool called;
 
 	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
 
-	called = kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
-					     vcpu_bitmap, cpus);
+	kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
+				    vcpu_bitmap, cpus);
 
 	free_cpumask_var(cpus);
 }
-- 
cgit v1.2.3


From 0155b2b91b263248fb5dd01c238439d4ab3731c5 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Wed, 20 Nov 2019 16:33:07 +0200
Subject: KVM: nVMX: Remove unnecessary TLB flushes on L1<->L2 switches when L1
 use apic-access-page
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to Intel SDM section 28.3.3.3/28.3.3.4 Guidelines for Use
of the INVVPID/INVEPT Instruction, the hypervisor needs to execute
INVVPID/INVEPT X in case CPU executes VMEntry with VPID/EPTP X and
either: "Virtualize APIC accesses" VM-execution control was changed
from 0 to 1, OR the value of apic_access_page was changed.

In the nested case, the burden falls on L1, unless L0 enables EPT in
vmcs02 but L1 enables neither EPT nor VPID in vmcs12.  For this reason
prepare_vmcs02() and load_vmcs12_host_state() have special code to
request a TLB flush in case L1 does not use EPT but it uses
"virtualize APIC accesses".

This special case however is not necessary. On a nested vmentry the
physical TLB will already be flushed except if all the following apply:

* L0 uses VPID

* L1 uses VPID

* L0 can guarantee TLB entries populated while running L1 are tagged
differently than TLB entries populated while running L2.

If the first condition is false, the processor will flush the TLB
on vmentry to L2.  If the second or third condition are false,
prepare_vmcs02() will request KVM_REQ_TLB_FLUSH.  However, even
if both are true, no extra TLB flush is needed to handle the APIC
access page:

* if L1 doesn't use VPID, the second condition doesn't hold and the
TLB will be flushed anyway.

* if L1 uses VPID, it has to flush the TLB itself with INVVPID and
section 28.3.3.3 doesn't apply to L0.

* even INVEPT is not needed because, if L0 uses EPT, it uses different
EPTP when running L2 than L1 (because guest_mode is part of mmu-role).
In this case SDM section 28.3.3.4 doesn't apply.

Similarly, examining nested_vmx_vmexit()->load_vmcs12_host_state(),
one could note that L0 won't flush TLB only in cases where SDM sections
28.3.3.3 and 28.3.3.4 don't apply.  In particular, if L0 uses different
VPIDs for L1 and L2 (i.e. vmx->vpid != vmx->nested.vpid02), section
28.3.3.3 doesn't apply.

Thus, remove this flush from prepare_vmcs02() and nested_vmx_vmexit().

Side-note: This patch can be viewed as removing parts of commit
fb6c81984313 ("kvm: vmx: Flush TLB when the APIC-access address changes”)
that is not relevant anymore since commit
1313cc2bd8f6 ("kvm: mmu: Add guest_mode to kvm_mmu_page_role”).
i.e. The first commit assumes that if L0 use EPT and L1 doesn’t use EPT,
then L0 will use same EPTP for both L0 and L1. Which indeed required
L0 to execute INVEPT before entering L2 guest. This assumption is
not true anymore since when guest_mode was added to mmu-role.

Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 783de6d83f9a..4aea7d304beb 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2493,9 +2493,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
 	if (nested_cpu_has_ept(vmcs12))
 		nested_ept_init_mmu_context(vcpu);
-	else if (nested_cpu_has2(vmcs12,
-				 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
-		vmx_flush_tlb(vcpu, true);
 
 	/*
 	 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
@@ -4259,10 +4256,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	if (vmx->nested.change_vmcs01_virtual_apic_mode) {
 		vmx->nested.change_vmcs01_virtual_apic_mode = false;
 		vmx_set_virtual_apic_mode(vcpu);
-	} else if (!nested_cpu_has_ept(vmcs12) &&
-		   nested_cpu_has2(vmcs12,
-				   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
-		vmx_flush_tlb(vcpu, true);
 	}
 
 	/* Unpin physical memory we referred to in vmcs02 */
-- 
cgit v1.2.3


From c50d8ae3a1274f32c9033bbb0e1c5b3115da2112 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 21 Nov 2019 10:45:07 +0100
Subject: KVM: x86: create mmu/ subdirectory

Preparatory work for shattering mmu.c into multiple files.  Besides making it easier
to follow, this will also make it possible to write unit tests for various parts.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Makefile          |    4 +-
 arch/x86/kvm/mmu.c             | 6502 ----------------------------------------
 arch/x86/kvm/mmu/mmu.c         | 6502 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu/page_track.c  |  265 ++
 arch/x86/kvm/mmu/paging_tmpl.h | 1090 +++++++
 arch/x86/kvm/page_track.c      |  265 --
 arch/x86/kvm/paging_tmpl.h     | 1090 -------
 7 files changed, 7859 insertions(+), 7859 deletions(-)
 delete mode 100644 arch/x86/kvm/mmu.c
 create mode 100644 arch/x86/kvm/mmu/mmu.c
 create mode 100644 arch/x86/kvm/mmu/page_track.c
 create mode 100644 arch/x86/kvm/mmu/paging_tmpl.h
 delete mode 100644 arch/x86/kvm/page_track.c
 delete mode 100644 arch/x86/kvm/paging_tmpl.h

(limited to 'arch')

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 31ecf7a76d5a..b19ef421084d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -8,9 +8,9 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
-kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
+kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
-			   hyperv.o page_track.o debugfs.o
+			   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o
 
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
 kvm-amd-y		+= svm.o pmu_amd.o
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
deleted file mode 100644
index 6f92b40d798c..000000000000
--- a/arch/x86/kvm/mmu.c
+++ /dev/null
@@ -1,6502 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Kernel-based Virtual Machine driver for Linux
- *
- * This module enables machines with Intel VT-x extensions to run virtual
- * machines without emulation or binary translation.
- *
- * MMU support
- *
- * Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * Authors:
- *   Yaniv Kamay  <yaniv@qumranet.com>
- *   Avi Kivity   <avi@qumranet.com>
- */
-
-#include "irq.h"
-#include "mmu.h"
-#include "x86.h"
-#include "kvm_cache_regs.h"
-#include "cpuid.h"
-
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/moduleparam.h>
-#include <linux/export.h>
-#include <linux/swap.h>
-#include <linux/hugetlb.h>
-#include <linux/compiler.h>
-#include <linux/srcu.h>
-#include <linux/slab.h>
-#include <linux/sched/signal.h>
-#include <linux/uaccess.h>
-#include <linux/hash.h>
-#include <linux/kern_levels.h>
-#include <linux/kthread.h>
-
-#include <asm/page.h>
-#include <asm/pat.h>
-#include <asm/cmpxchg.h>
-#include <asm/e820/api.h>
-#include <asm/io.h>
-#include <asm/vmx.h>
-#include <asm/kvm_page_track.h>
-#include "trace.h"
-
-extern bool itlb_multihit_kvm_mitigation;
-
-static int __read_mostly nx_huge_pages = -1;
-#ifdef CONFIG_PREEMPT_RT
-/* Recovery can cause latency spikes, disable it for PREEMPT_RT.  */
-static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
-#else
-static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
-#endif
-
-static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
-static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
-
-static struct kernel_param_ops nx_huge_pages_ops = {
-	.set = set_nx_huge_pages,
-	.get = param_get_bool,
-};
-
-static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
-	.set = set_nx_huge_pages_recovery_ratio,
-	.get = param_get_uint,
-};
-
-module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
-__MODULE_PARM_TYPE(nx_huge_pages, "bool");
-module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
-		&nx_huge_pages_recovery_ratio, 0644);
-__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
-
-/*
- * When setting this variable to true it enables Two-Dimensional-Paging
- * where the hardware walks 2 page tables:
- * 1. the guest-virtual to guest-physical
- * 2. while doing 1. it walks guest-physical to host-physical
- * If the hardware supports that we don't need to do shadow paging.
- */
-bool tdp_enabled = false;
-
-enum {
-	AUDIT_PRE_PAGE_FAULT,
-	AUDIT_POST_PAGE_FAULT,
-	AUDIT_PRE_PTE_WRITE,
-	AUDIT_POST_PTE_WRITE,
-	AUDIT_PRE_SYNC,
-	AUDIT_POST_SYNC
-};
-
-#undef MMU_DEBUG
-
-#ifdef MMU_DEBUG
-static bool dbg = 0;
-module_param(dbg, bool, 0644);
-
-#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
-#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
-#define MMU_WARN_ON(x) WARN_ON(x)
-#else
-#define pgprintk(x...) do { } while (0)
-#define rmap_printk(x...) do { } while (0)
-#define MMU_WARN_ON(x) do { } while (0)
-#endif
-
-#define PTE_PREFETCH_NUM		8
-
-#define PT_FIRST_AVAIL_BITS_SHIFT 10
-#define PT64_SECOND_AVAIL_BITS_SHIFT 54
-
-/*
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
- * Access Tracking SPTEs.
- */
-#define SPTE_SPECIAL_MASK (3ULL << 52)
-#define SPTE_AD_ENABLED_MASK (0ULL << 52)
-#define SPTE_AD_DISABLED_MASK (1ULL << 52)
-#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52)
-#define SPTE_MMIO_MASK (3ULL << 52)
-
-#define PT64_LEVEL_BITS 9
-
-#define PT64_LEVEL_SHIFT(level) \
-		(PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS)
-
-#define PT64_INDEX(address, level)\
-	(((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
-
-
-#define PT32_LEVEL_BITS 10
-
-#define PT32_LEVEL_SHIFT(level) \
-		(PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS)
-
-#define PT32_LVL_OFFSET_MASK(level) \
-	(PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
-						* PT32_LEVEL_BITS))) - 1))
-
-#define PT32_INDEX(address, level)\
-	(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
-
-
-#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
-#define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1))
-#else
-#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
-#endif
-#define PT64_LVL_ADDR_MASK(level) \
-	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
-						* PT64_LEVEL_BITS))) - 1))
-#define PT64_LVL_OFFSET_MASK(level) \
-	(PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
-						* PT64_LEVEL_BITS))) - 1))
-
-#define PT32_BASE_ADDR_MASK PAGE_MASK
-#define PT32_DIR_BASE_ADDR_MASK \
-	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
-#define PT32_LVL_ADDR_MASK(level) \
-	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
-					    * PT32_LEVEL_BITS))) - 1))
-
-#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
-			| shadow_x_mask | shadow_nx_mask | shadow_me_mask)
-
-#define ACC_EXEC_MASK    1
-#define ACC_WRITE_MASK   PT_WRITABLE_MASK
-#define ACC_USER_MASK    PT_USER_MASK
-#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
-
-/* The mask for the R/X bits in EPT PTEs */
-#define PT64_EPT_READABLE_MASK			0x1ull
-#define PT64_EPT_EXECUTABLE_MASK		0x4ull
-
-#include <trace/events/kvm.h>
-
-#define SPTE_HOST_WRITEABLE	(1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
-#define SPTE_MMU_WRITEABLE	(1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
-
-#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
-
-/* make pte_list_desc fit well in cache line */
-#define PTE_LIST_EXT 3
-
-/*
- * Return values of handle_mmio_page_fault and mmu.page_fault:
- * RET_PF_RETRY: let CPU fault again on the address.
- * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
- *
- * For handle_mmio_page_fault only:
- * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
- */
-enum {
-	RET_PF_RETRY = 0,
-	RET_PF_EMULATE = 1,
-	RET_PF_INVALID = 2,
-};
-
-struct pte_list_desc {
-	u64 *sptes[PTE_LIST_EXT];
-	struct pte_list_desc *more;
-};
-
-struct kvm_shadow_walk_iterator {
-	u64 addr;
-	hpa_t shadow_addr;
-	u64 *sptep;
-	int level;
-	unsigned index;
-};
-
-static const union kvm_mmu_page_role mmu_base_role_mask = {
-	.cr0_wp = 1,
-	.gpte_is_8_bytes = 1,
-	.nxe = 1,
-	.smep_andnot_wp = 1,
-	.smap_andnot_wp = 1,
-	.smm = 1,
-	.guest_mode = 1,
-	.ad_disabled = 1,
-};
-
-#define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker)     \
-	for (shadow_walk_init_using_root(&(_walker), (_vcpu),              \
-					 (_root), (_addr));                \
-	     shadow_walk_okay(&(_walker));			           \
-	     shadow_walk_next(&(_walker)))
-
-#define for_each_shadow_entry(_vcpu, _addr, _walker)            \
-	for (shadow_walk_init(&(_walker), _vcpu, _addr);	\
-	     shadow_walk_okay(&(_walker));			\
-	     shadow_walk_next(&(_walker)))
-
-#define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte)	\
-	for (shadow_walk_init(&(_walker), _vcpu, _addr);		\
-	     shadow_walk_okay(&(_walker)) &&				\
-		({ spte = mmu_spte_get_lockless(_walker.sptep); 1; });	\
-	     __shadow_walk_next(&(_walker), spte))
-
-static struct kmem_cache *pte_list_desc_cache;
-static struct kmem_cache *mmu_page_header_cache;
-static struct percpu_counter kvm_total_used_mmu_pages;
-
-static u64 __read_mostly shadow_nx_mask;
-static u64 __read_mostly shadow_x_mask;	/* mutual exclusive with nx_mask */
-static u64 __read_mostly shadow_user_mask;
-static u64 __read_mostly shadow_accessed_mask;
-static u64 __read_mostly shadow_dirty_mask;
-static u64 __read_mostly shadow_mmio_mask;
-static u64 __read_mostly shadow_mmio_value;
-static u64 __read_mostly shadow_mmio_access_mask;
-static u64 __read_mostly shadow_present_mask;
-static u64 __read_mostly shadow_me_mask;
-
-/*
- * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK;
- * shadow_acc_track_mask is the set of bits to be cleared in non-accessed
- * pages.
- */
-static u64 __read_mostly shadow_acc_track_mask;
-
-/*
- * The mask/shift to use for saving the original R/X bits when marking the PTE
- * as not-present for access tracking purposes. We do not save the W bit as the
- * PTEs being access tracked also need to be dirty tracked, so the W bit will be
- * restored only when a write is attempted to the page.
- */
-static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
-						    PT64_EPT_EXECUTABLE_MASK;
-static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
-
-/*
- * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order
- * to guard against L1TF attacks.
- */
-static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
-
-/*
- * The number of high-order 1 bits to use in the mask above.
- */
-static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
-
-/*
- * In some cases, we need to preserve the GFN of a non-present or reserved
- * SPTE when we usurp the upper five bits of the physical address space to
- * defend against L1TF, e.g. for MMIO SPTEs.  To preserve the GFN, we'll
- * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask
- * left into the reserved bits, i.e. the GFN in the SPTE will be split into
- * high and low parts.  This mask covers the lower bits of the GFN.
- */
-static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
-
-/*
- * The number of non-reserved physical address bits irrespective of features
- * that repurpose legal bits, e.g. MKTME.
- */
-static u8 __read_mostly shadow_phys_bits;
-
-static void mmu_spte_set(u64 *sptep, u64 spte);
-static bool is_executable_pte(u64 spte);
-static union kvm_mmu_page_role
-kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
-
-#define CREATE_TRACE_POINTS
-#include "mmutrace.h"
-
-
-static inline bool kvm_available_flush_tlb_with_range(void)
-{
-	return kvm_x86_ops->tlb_remote_flush_with_range;
-}
-
-static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
-		struct kvm_tlb_range *range)
-{
-	int ret = -ENOTSUPP;
-
-	if (range && kvm_x86_ops->tlb_remote_flush_with_range)
-		ret = kvm_x86_ops->tlb_remote_flush_with_range(kvm, range);
-
-	if (ret)
-		kvm_flush_remote_tlbs(kvm);
-}
-
-static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
-		u64 start_gfn, u64 pages)
-{
-	struct kvm_tlb_range range;
-
-	range.start_gfn = start_gfn;
-	range.pages = pages;
-
-	kvm_flush_remote_tlbs_with_range(kvm, &range);
-}
-
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
-{
-	BUG_ON((u64)(unsigned)access_mask != access_mask);
-	BUG_ON((mmio_mask & mmio_value) != mmio_value);
-	shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;
-	shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
-	shadow_mmio_access_mask = access_mask;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
-
-static bool is_mmio_spte(u64 spte)
-{
-	return (spte & shadow_mmio_mask) == shadow_mmio_value;
-}
-
-static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
-{
-	return sp->role.ad_disabled;
-}
-
-static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * When using the EPT page-modification log, the GPAs in the log
-	 * would come from L2 rather than L1.  Therefore, we need to rely
-	 * on write protection to record dirty pages.  This also bypasses
-	 * PML, since writes now result in a vmexit.
-	 */
-	return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
-}
-
-static inline bool spte_ad_enabled(u64 spte)
-{
-	MMU_WARN_ON(is_mmio_spte(spte));
-	return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK;
-}
-
-static inline bool spte_ad_need_write_protect(u64 spte)
-{
-	MMU_WARN_ON(is_mmio_spte(spte));
-	return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
-}
-
-static bool is_nx_huge_page_enabled(void)
-{
-	return READ_ONCE(nx_huge_pages);
-}
-
-static inline u64 spte_shadow_accessed_mask(u64 spte)
-{
-	MMU_WARN_ON(is_mmio_spte(spte));
-	return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
-}
-
-static inline u64 spte_shadow_dirty_mask(u64 spte)
-{
-	MMU_WARN_ON(is_mmio_spte(spte));
-	return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
-}
-
-static inline bool is_access_track_spte(u64 spte)
-{
-	return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0;
-}
-
-/*
- * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
- * the memslots generation and is derived as follows:
- *
- * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
- * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
- *
- * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
- * the MMIO generation number, as doing so would require stealing a bit from
- * the "real" generation number and thus effectively halve the maximum number
- * of MMIO generations that can be handled before encountering a wrap (which
- * requires a full MMU zap).  The flag is instead explicitly queried when
- * checking for MMIO spte cache hits.
- */
-#define MMIO_SPTE_GEN_MASK		GENMASK_ULL(18, 0)
-
-#define MMIO_SPTE_GEN_LOW_START		3
-#define MMIO_SPTE_GEN_LOW_END		11
-#define MMIO_SPTE_GEN_LOW_MASK		GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
-						    MMIO_SPTE_GEN_LOW_START)
-
-#define MMIO_SPTE_GEN_HIGH_START	52
-#define MMIO_SPTE_GEN_HIGH_END		61
-#define MMIO_SPTE_GEN_HIGH_MASK		GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
-						    MMIO_SPTE_GEN_HIGH_START)
-static u64 generation_mmio_spte_mask(u64 gen)
-{
-	u64 mask;
-
-	WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
-
-	mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
-	mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
-	return mask;
-}
-
-static u64 get_mmio_spte_generation(u64 spte)
-{
-	u64 gen;
-
-	spte &= ~shadow_mmio_mask;
-
-	gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
-	gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
-	return gen;
-}
-
-static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
-			   unsigned access)
-{
-	u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK;
-	u64 mask = generation_mmio_spte_mask(gen);
-	u64 gpa = gfn << PAGE_SHIFT;
-
-	access &= shadow_mmio_access_mask;
-	mask |= shadow_mmio_value | access;
-	mask |= gpa | shadow_nonpresent_or_rsvd_mask;
-	mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
-		<< shadow_nonpresent_or_rsvd_mask_len;
-
-	trace_mark_mmio_spte(sptep, gfn, access, gen);
-	mmu_spte_set(sptep, mask);
-}
-
-static gfn_t get_mmio_spte_gfn(u64 spte)
-{
-	u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
-
-	gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
-	       & shadow_nonpresent_or_rsvd_mask;
-
-	return gpa >> PAGE_SHIFT;
-}
-
-static unsigned get_mmio_spte_access(u64 spte)
-{
-	return spte & shadow_mmio_access_mask;
-}
-
-static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
-			  kvm_pfn_t pfn, unsigned access)
-{
-	if (unlikely(is_noslot_pfn(pfn))) {
-		mark_mmio_spte(vcpu, sptep, gfn, access);
-		return true;
-	}
-
-	return false;
-}
-
-static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
-{
-	u64 kvm_gen, spte_gen, gen;
-
-	gen = kvm_vcpu_memslots(vcpu)->generation;
-	if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS))
-		return false;
-
-	kvm_gen = gen & MMIO_SPTE_GEN_MASK;
-	spte_gen = get_mmio_spte_generation(spte);
-
-	trace_check_mmio_spte(spte, kvm_gen, spte_gen);
-	return likely(kvm_gen == spte_gen);
-}
-
-/*
- * Sets the shadow PTE masks used by the MMU.
- *
- * Assumptions:
- *  - Setting either @accessed_mask or @dirty_mask requires setting both
- *  - At least one of @accessed_mask or @acc_track_mask must be set
- */
-void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
-		u64 acc_track_mask, u64 me_mask)
-{
-	BUG_ON(!dirty_mask != !accessed_mask);
-	BUG_ON(!accessed_mask && !acc_track_mask);
-	BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK);
-
-	shadow_user_mask = user_mask;
-	shadow_accessed_mask = accessed_mask;
-	shadow_dirty_mask = dirty_mask;
-	shadow_nx_mask = nx_mask;
-	shadow_x_mask = x_mask;
-	shadow_present_mask = p_mask;
-	shadow_acc_track_mask = acc_track_mask;
-	shadow_me_mask = me_mask;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
-
-static u8 kvm_get_shadow_phys_bits(void)
-{
-	/*
-	 * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
-	 * in CPU detection code, but MKTME treats those reduced bits as
-	 * 'keyID' thus they are not reserved bits. Therefore for MKTME
-	 * we should still return physical address bits reported by CPUID.
-	 */
-	if (!boot_cpu_has(X86_FEATURE_TME) ||
-	    WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
-		return boot_cpu_data.x86_phys_bits;
-
-	return cpuid_eax(0x80000008) & 0xff;
-}
-
-static void kvm_mmu_reset_all_pte_masks(void)
-{
-	u8 low_phys_bits;
-
-	shadow_user_mask = 0;
-	shadow_accessed_mask = 0;
-	shadow_dirty_mask = 0;
-	shadow_nx_mask = 0;
-	shadow_x_mask = 0;
-	shadow_mmio_mask = 0;
-	shadow_present_mask = 0;
-	shadow_acc_track_mask = 0;
-
-	shadow_phys_bits = kvm_get_shadow_phys_bits();
-
-	/*
-	 * If the CPU has 46 or less physical address bits, then set an
-	 * appropriate mask to guard against L1TF attacks. Otherwise, it is
-	 * assumed that the CPU is not vulnerable to L1TF.
-	 *
-	 * Some Intel CPUs address the L1 cache using more PA bits than are
-	 * reported by CPUID. Use the PA width of the L1 cache when possible
-	 * to achieve more effective mitigation, e.g. if system RAM overlaps
-	 * the most significant bits of legal physical address space.
-	 */
-	shadow_nonpresent_or_rsvd_mask = 0;
-	low_phys_bits = boot_cpu_data.x86_cache_bits;
-	if (boot_cpu_data.x86_cache_bits <
-	    52 - shadow_nonpresent_or_rsvd_mask_len) {
-		shadow_nonpresent_or_rsvd_mask =
-			rsvd_bits(boot_cpu_data.x86_cache_bits -
-				  shadow_nonpresent_or_rsvd_mask_len,
-				  boot_cpu_data.x86_cache_bits - 1);
-		low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
-	} else
-		WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF));
-
-	shadow_nonpresent_or_rsvd_lower_gfn_mask =
-		GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
-}
-
-static int is_cpuid_PSE36(void)
-{
-	return 1;
-}
-
-static int is_nx(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.efer & EFER_NX;
-}
-
-static int is_shadow_present_pte(u64 pte)
-{
-	return (pte != 0) && !is_mmio_spte(pte);
-}
-
-static int is_large_pte(u64 pte)
-{
-	return pte & PT_PAGE_SIZE_MASK;
-}
-
-static int is_last_spte(u64 pte, int level)
-{
-	if (level == PT_PAGE_TABLE_LEVEL)
-		return 1;
-	if (is_large_pte(pte))
-		return 1;
-	return 0;
-}
-
-static bool is_executable_pte(u64 spte)
-{
-	return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
-}
-
-static kvm_pfn_t spte_to_pfn(u64 pte)
-{
-	return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
-}
-
-static gfn_t pse36_gfn_delta(u32 gpte)
-{
-	int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
-
-	return (gpte & PT32_DIR_PSE36_MASK) << shift;
-}
-
-#ifdef CONFIG_X86_64
-static void __set_spte(u64 *sptep, u64 spte)
-{
-	WRITE_ONCE(*sptep, spte);
-}
-
-static void __update_clear_spte_fast(u64 *sptep, u64 spte)
-{
-	WRITE_ONCE(*sptep, spte);
-}
-
-static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
-{
-	return xchg(sptep, spte);
-}
-
-static u64 __get_spte_lockless(u64 *sptep)
-{
-	return READ_ONCE(*sptep);
-}
-#else
-union split_spte {
-	struct {
-		u32 spte_low;
-		u32 spte_high;
-	};
-	u64 spte;
-};
-
-static void count_spte_clear(u64 *sptep, u64 spte)
-{
-	struct kvm_mmu_page *sp =  page_header(__pa(sptep));
-
-	if (is_shadow_present_pte(spte))
-		return;
-
-	/* Ensure the spte is completely set before we increase the count */
-	smp_wmb();
-	sp->clear_spte_count++;
-}
-
-static void __set_spte(u64 *sptep, u64 spte)
-{
-	union split_spte *ssptep, sspte;
-
-	ssptep = (union split_spte *)sptep;
-	sspte = (union split_spte)spte;
-
-	ssptep->spte_high = sspte.spte_high;
-
-	/*
-	 * If we map the spte from nonpresent to present, We should store
-	 * the high bits firstly, then set present bit, so cpu can not
-	 * fetch this spte while we are setting the spte.
-	 */
-	smp_wmb();
-
-	WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
-}
-
-static void __update_clear_spte_fast(u64 *sptep, u64 spte)
-{
-	union split_spte *ssptep, sspte;
-
-	ssptep = (union split_spte *)sptep;
-	sspte = (union split_spte)spte;
-
-	WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
-
-	/*
-	 * If we map the spte from present to nonpresent, we should clear
-	 * present bit firstly to avoid vcpu fetch the old high bits.
-	 */
-	smp_wmb();
-
-	ssptep->spte_high = sspte.spte_high;
-	count_spte_clear(sptep, spte);
-}
-
-static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
-{
-	union split_spte *ssptep, sspte, orig;
-
-	ssptep = (union split_spte *)sptep;
-	sspte = (union split_spte)spte;
-
-	/* xchg acts as a barrier before the setting of the high bits */
-	orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low);
-	orig.spte_high = ssptep->spte_high;
-	ssptep->spte_high = sspte.spte_high;
-	count_spte_clear(sptep, spte);
-
-	return orig.spte;
-}
-
-/*
- * The idea using the light way get the spte on x86_32 guest is from
- * gup_get_pte (mm/gup.c).
- *
- * An spte tlb flush may be pending, because kvm_set_pte_rmapp
- * coalesces them and we are running out of the MMU lock.  Therefore
- * we need to protect against in-progress updates of the spte.
- *
- * Reading the spte while an update is in progress may get the old value
- * for the high part of the spte.  The race is fine for a present->non-present
- * change (because the high part of the spte is ignored for non-present spte),
- * but for a present->present change we must reread the spte.
- *
- * All such changes are done in two steps (present->non-present and
- * non-present->present), hence it is enough to count the number of
- * present->non-present updates: if it changed while reading the spte,
- * we might have hit the race.  This is done using clear_spte_count.
- */
-static u64 __get_spte_lockless(u64 *sptep)
-{
-	struct kvm_mmu_page *sp =  page_header(__pa(sptep));
-	union split_spte spte, *orig = (union split_spte *)sptep;
-	int count;
-
-retry:
-	count = sp->clear_spte_count;
-	smp_rmb();
-
-	spte.spte_low = orig->spte_low;
-	smp_rmb();
-
-	spte.spte_high = orig->spte_high;
-	smp_rmb();
-
-	if (unlikely(spte.spte_low != orig->spte_low ||
-	      count != sp->clear_spte_count))
-		goto retry;
-
-	return spte.spte;
-}
-#endif
-
-static bool spte_can_locklessly_be_made_writable(u64 spte)
-{
-	return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
-		(SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
-}
-
-static bool spte_has_volatile_bits(u64 spte)
-{
-	if (!is_shadow_present_pte(spte))
-		return false;
-
-	/*
-	 * Always atomically update spte if it can be updated
-	 * out of mmu-lock, it can ensure dirty bit is not lost,
-	 * also, it can help us to get a stable is_writable_pte()
-	 * to ensure tlb flush is not missed.
-	 */
-	if (spte_can_locklessly_be_made_writable(spte) ||
-	    is_access_track_spte(spte))
-		return true;
-
-	if (spte_ad_enabled(spte)) {
-		if ((spte & shadow_accessed_mask) == 0 ||
-	    	    (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
-			return true;
-	}
-
-	return false;
-}
-
-static bool is_accessed_spte(u64 spte)
-{
-	u64 accessed_mask = spte_shadow_accessed_mask(spte);
-
-	return accessed_mask ? spte & accessed_mask
-			     : !is_access_track_spte(spte);
-}
-
-static bool is_dirty_spte(u64 spte)
-{
-	u64 dirty_mask = spte_shadow_dirty_mask(spte);
-
-	return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK;
-}
-
-/* Rules for using mmu_spte_set:
- * Set the sptep from nonpresent to present.
- * Note: the sptep being assigned *must* be either not present
- * or in a state where the hardware will not attempt to update
- * the spte.
- */
-static void mmu_spte_set(u64 *sptep, u64 new_spte)
-{
-	WARN_ON(is_shadow_present_pte(*sptep));
-	__set_spte(sptep, new_spte);
-}
-
-/*
- * Update the SPTE (excluding the PFN), but do not track changes in its
- * accessed/dirty status.
- */
-static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
-{
-	u64 old_spte = *sptep;
-
-	WARN_ON(!is_shadow_present_pte(new_spte));
-
-	if (!is_shadow_present_pte(old_spte)) {
-		mmu_spte_set(sptep, new_spte);
-		return old_spte;
-	}
-
-	if (!spte_has_volatile_bits(old_spte))
-		__update_clear_spte_fast(sptep, new_spte);
-	else
-		old_spte = __update_clear_spte_slow(sptep, new_spte);
-
-	WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
-
-	return old_spte;
-}
-
-/* Rules for using mmu_spte_update:
- * Update the state bits, it means the mapped pfn is not changed.
- *
- * Whenever we overwrite a writable spte with a read-only one we
- * should flush remote TLBs. Otherwise rmap_write_protect
- * will find a read-only spte, even though the writable spte
- * might be cached on a CPU's TLB, the return value indicates this
- * case.
- *
- * Returns true if the TLB needs to be flushed
- */
-static bool mmu_spte_update(u64 *sptep, u64 new_spte)
-{
-	bool flush = false;
-	u64 old_spte = mmu_spte_update_no_track(sptep, new_spte);
-
-	if (!is_shadow_present_pte(old_spte))
-		return false;
-
-	/*
-	 * For the spte updated out of mmu-lock is safe, since
-	 * we always atomically update it, see the comments in
-	 * spte_has_volatile_bits().
-	 */
-	if (spte_can_locklessly_be_made_writable(old_spte) &&
-	      !is_writable_pte(new_spte))
-		flush = true;
-
-	/*
-	 * Flush TLB when accessed/dirty states are changed in the page tables,
-	 * to guarantee consistency between TLB and page tables.
-	 */
-
-	if (is_accessed_spte(old_spte) && !is_accessed_spte(new_spte)) {
-		flush = true;
-		kvm_set_pfn_accessed(spte_to_pfn(old_spte));
-	}
-
-	if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte)) {
-		flush = true;
-		kvm_set_pfn_dirty(spte_to_pfn(old_spte));
-	}
-
-	return flush;
-}
-
-/*
- * Rules for using mmu_spte_clear_track_bits:
- * It sets the sptep from present to nonpresent, and track the
- * state bits, it is used to clear the last level sptep.
- * Returns non-zero if the PTE was previously valid.
- */
-static int mmu_spte_clear_track_bits(u64 *sptep)
-{
-	kvm_pfn_t pfn;
-	u64 old_spte = *sptep;
-
-	if (!spte_has_volatile_bits(old_spte))
-		__update_clear_spte_fast(sptep, 0ull);
-	else
-		old_spte = __update_clear_spte_slow(sptep, 0ull);
-
-	if (!is_shadow_present_pte(old_spte))
-		return 0;
-
-	pfn = spte_to_pfn(old_spte);
-
-	/*
-	 * KVM does not hold the refcount of the page used by
-	 * kvm mmu, before reclaiming the page, we should
-	 * unmap it from mmu first.
-	 */
-	WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
-
-	if (is_accessed_spte(old_spte))
-		kvm_set_pfn_accessed(pfn);
-
-	if (is_dirty_spte(old_spte))
-		kvm_set_pfn_dirty(pfn);
-
-	return 1;
-}
-
-/*
- * Rules for using mmu_spte_clear_no_track:
- * Directly clear spte without caring the state bits of sptep,
- * it is used to set the upper level spte.
- */
-static void mmu_spte_clear_no_track(u64 *sptep)
-{
-	__update_clear_spte_fast(sptep, 0ull);
-}
-
-static u64 mmu_spte_get_lockless(u64 *sptep)
-{
-	return __get_spte_lockless(sptep);
-}
-
-static u64 mark_spte_for_access_track(u64 spte)
-{
-	if (spte_ad_enabled(spte))
-		return spte & ~shadow_accessed_mask;
-
-	if (is_access_track_spte(spte))
-		return spte;
-
-	/*
-	 * Making an Access Tracking PTE will result in removal of write access
-	 * from the PTE. So, verify that we will be able to restore the write
-	 * access in the fast page fault path later on.
-	 */
-	WARN_ONCE((spte & PT_WRITABLE_MASK) &&
-		  !spte_can_locklessly_be_made_writable(spte),
-		  "kvm: Writable SPTE is not locklessly dirty-trackable\n");
-
-	WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask <<
-			  shadow_acc_track_saved_bits_shift),
-		  "kvm: Access Tracking saved bit locations are not zero\n");
-
-	spte |= (spte & shadow_acc_track_saved_bits_mask) <<
-		shadow_acc_track_saved_bits_shift;
-	spte &= ~shadow_acc_track_mask;
-
-	return spte;
-}
-
-/* Restore an acc-track PTE back to a regular PTE */
-static u64 restore_acc_track_spte(u64 spte)
-{
-	u64 new_spte = spte;
-	u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift)
-			 & shadow_acc_track_saved_bits_mask;
-
-	WARN_ON_ONCE(spte_ad_enabled(spte));
-	WARN_ON_ONCE(!is_access_track_spte(spte));
-
-	new_spte &= ~shadow_acc_track_mask;
-	new_spte &= ~(shadow_acc_track_saved_bits_mask <<
-		      shadow_acc_track_saved_bits_shift);
-	new_spte |= saved_bits;
-
-	return new_spte;
-}
-
-/* Returns the Accessed status of the PTE and resets it at the same time. */
-static bool mmu_spte_age(u64 *sptep)
-{
-	u64 spte = mmu_spte_get_lockless(sptep);
-
-	if (!is_accessed_spte(spte))
-		return false;
-
-	if (spte_ad_enabled(spte)) {
-		clear_bit((ffs(shadow_accessed_mask) - 1),
-			  (unsigned long *)sptep);
-	} else {
-		/*
-		 * Capture the dirty status of the page, so that it doesn't get
-		 * lost when the SPTE is marked for access tracking.
-		 */
-		if (is_writable_pte(spte))
-			kvm_set_pfn_dirty(spte_to_pfn(spte));
-
-		spte = mark_spte_for_access_track(spte);
-		mmu_spte_update_no_track(sptep, spte);
-	}
-
-	return true;
-}
-
-static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * Prevent page table teardown by making any free-er wait during
-	 * kvm_flush_remote_tlbs() IPI to all active vcpus.
-	 */
-	local_irq_disable();
-
-	/*
-	 * Make sure a following spte read is not reordered ahead of the write
-	 * to vcpu->mode.
-	 */
-	smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
-}
-
-static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * Make sure the write to vcpu->mode is not reordered in front of
-	 * reads to sptes.  If it does, kvm_mmu_commit_zap_page() can see us
-	 * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
-	 */
-	smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
-	local_irq_enable();
-}
-
-static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
-				  struct kmem_cache *base_cache, int min)
-{
-	void *obj;
-
-	if (cache->nobjs >= min)
-		return 0;
-	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-		obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT);
-		if (!obj)
-			return cache->nobjs >= min ? 0 : -ENOMEM;
-		cache->objects[cache->nobjs++] = obj;
-	}
-	return 0;
-}
-
-static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache)
-{
-	return cache->nobjs;
-}
-
-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
-				  struct kmem_cache *cache)
-{
-	while (mc->nobjs)
-		kmem_cache_free(cache, mc->objects[--mc->nobjs]);
-}
-
-static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
-				       int min)
-{
-	void *page;
-
-	if (cache->nobjs >= min)
-		return 0;
-	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-		page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
-		if (!page)
-			return cache->nobjs >= min ? 0 : -ENOMEM;
-		cache->objects[cache->nobjs++] = page;
-	}
-	return 0;
-}
-
-static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
-{
-	while (mc->nobjs)
-		free_page((unsigned long)mc->objects[--mc->nobjs]);
-}
-
-static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
-{
-	int r;
-
-	r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
-				   pte_list_desc_cache, 8 + PTE_PREFETCH_NUM);
-	if (r)
-		goto out;
-	r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
-	if (r)
-		goto out;
-	r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
-				   mmu_page_header_cache, 4);
-out:
-	return r;
-}
-
-static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
-{
-	mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
-				pte_list_desc_cache);
-	mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
-	mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache,
-				mmu_page_header_cache);
-}
-
-static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
-{
-	void *p;
-
-	BUG_ON(!mc->nobjs);
-	p = mc->objects[--mc->nobjs];
-	return p;
-}
-
-static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu)
-{
-	return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache);
-}
-
-static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
-{
-	kmem_cache_free(pte_list_desc_cache, pte_list_desc);
-}
-
-static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
-{
-	if (!sp->role.direct)
-		return sp->gfns[index];
-
-	return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS));
-}
-
-static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
-{
-	if (!sp->role.direct) {
-		sp->gfns[index] = gfn;
-		return;
-	}
-
-	if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
-		pr_err_ratelimited("gfn mismatch under direct page %llx "
-				   "(expected %llx, got %llx)\n",
-				   sp->gfn,
-				   kvm_mmu_page_get_gfn(sp, index), gfn);
-}
-
-/*
- * Return the pointer to the large page information for a given gfn,
- * handling slots that are not large page aligned.
- */
-static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
-					      struct kvm_memory_slot *slot,
-					      int level)
-{
-	unsigned long idx;
-
-	idx = gfn_to_index(gfn, slot->base_gfn, level);
-	return &slot->arch.lpage_info[level - 2][idx];
-}
-
-static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
-					    gfn_t gfn, int count)
-{
-	struct kvm_lpage_info *linfo;
-	int i;
-
-	for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
-		linfo = lpage_info_slot(gfn, slot, i);
-		linfo->disallow_lpage += count;
-		WARN_ON(linfo->disallow_lpage < 0);
-	}
-}
-
-void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
-{
-	update_gfn_disallow_lpage_count(slot, gfn, 1);
-}
-
-void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
-{
-	update_gfn_disallow_lpage_count(slot, gfn, -1);
-}
-
-static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *slot;
-	gfn_t gfn;
-
-	kvm->arch.indirect_shadow_pages++;
-	gfn = sp->gfn;
-	slots = kvm_memslots_for_spte_role(kvm, sp->role);
-	slot = __gfn_to_memslot(slots, gfn);
-
-	/* the non-leaf shadow pages are keeping readonly. */
-	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
-		return kvm_slot_page_track_add_page(kvm, slot, gfn,
-						    KVM_PAGE_TRACK_WRITE);
-
-	kvm_mmu_gfn_disallow_lpage(slot, gfn);
-}
-
-static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	if (sp->lpage_disallowed)
-		return;
-
-	++kvm->stat.nx_lpage_splits;
-	list_add_tail(&sp->lpage_disallowed_link,
-		      &kvm->arch.lpage_disallowed_mmu_pages);
-	sp->lpage_disallowed = true;
-}
-
-static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *slot;
-	gfn_t gfn;
-
-	kvm->arch.indirect_shadow_pages--;
-	gfn = sp->gfn;
-	slots = kvm_memslots_for_spte_role(kvm, sp->role);
-	slot = __gfn_to_memslot(slots, gfn);
-	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
-		return kvm_slot_page_track_remove_page(kvm, slot, gfn,
-						       KVM_PAGE_TRACK_WRITE);
-
-	kvm_mmu_gfn_allow_lpage(slot, gfn);
-}
-
-static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	--kvm->stat.nx_lpage_splits;
-	sp->lpage_disallowed = false;
-	list_del(&sp->lpage_disallowed_link);
-}
-
-static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
-					  struct kvm_memory_slot *slot)
-{
-	struct kvm_lpage_info *linfo;
-
-	if (slot) {
-		linfo = lpage_info_slot(gfn, slot, level);
-		return !!linfo->disallow_lpage;
-	}
-
-	return true;
-}
-
-static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn,
-					int level)
-{
-	struct kvm_memory_slot *slot;
-
-	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	return __mmu_gfn_lpage_is_disallowed(gfn, level, slot);
-}
-
-static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
-{
-	unsigned long page_size;
-	int i, ret = 0;
-
-	page_size = kvm_host_page_size(kvm, gfn);
-
-	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
-		if (page_size >= KVM_HPAGE_SIZE(i))
-			ret = i;
-		else
-			break;
-	}
-
-	return ret;
-}
-
-static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot,
-					  bool no_dirty_log)
-{
-	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
-		return false;
-	if (no_dirty_log && slot->dirty_bitmap)
-		return false;
-
-	return true;
-}
-
-static struct kvm_memory_slot *
-gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
-			    bool no_dirty_log)
-{
-	struct kvm_memory_slot *slot;
-
-	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	if (!memslot_valid_for_gpte(slot, no_dirty_log))
-		slot = NULL;
-
-	return slot;
-}
-
-static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
-			 bool *force_pt_level)
-{
-	int host_level, level, max_level;
-	struct kvm_memory_slot *slot;
-
-	if (unlikely(*force_pt_level))
-		return PT_PAGE_TABLE_LEVEL;
-
-	slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn);
-	*force_pt_level = !memslot_valid_for_gpte(slot, true);
-	if (unlikely(*force_pt_level))
-		return PT_PAGE_TABLE_LEVEL;
-
-	host_level = host_mapping_level(vcpu->kvm, large_gfn);
-
-	if (host_level == PT_PAGE_TABLE_LEVEL)
-		return host_level;
-
-	max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
-
-	for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
-		if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot))
-			break;
-
-	return level - 1;
-}
-
-/*
- * About rmap_head encoding:
- *
- * If the bit zero of rmap_head->val is clear, then it points to the only spte
- * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
- * pte_list_desc containing more mappings.
- */
-
-/*
- * Returns the number of pointers in the rmap chain, not counting the new one.
- */
-static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
-			struct kvm_rmap_head *rmap_head)
-{
-	struct pte_list_desc *desc;
-	int i, count = 0;
-
-	if (!rmap_head->val) {
-		rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
-		rmap_head->val = (unsigned long)spte;
-	} else if (!(rmap_head->val & 1)) {
-		rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
-		desc = mmu_alloc_pte_list_desc(vcpu);
-		desc->sptes[0] = (u64 *)rmap_head->val;
-		desc->sptes[1] = spte;
-		rmap_head->val = (unsigned long)desc | 1;
-		++count;
-	} else {
-		rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
-		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-		while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
-			desc = desc->more;
-			count += PTE_LIST_EXT;
-		}
-		if (desc->sptes[PTE_LIST_EXT-1]) {
-			desc->more = mmu_alloc_pte_list_desc(vcpu);
-			desc = desc->more;
-		}
-		for (i = 0; desc->sptes[i]; ++i)
-			++count;
-		desc->sptes[i] = spte;
-	}
-	return count;
-}
-
-static void
-pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
-			   struct pte_list_desc *desc, int i,
-			   struct pte_list_desc *prev_desc)
-{
-	int j;
-
-	for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j)
-		;
-	desc->sptes[i] = desc->sptes[j];
-	desc->sptes[j] = NULL;
-	if (j != 0)
-		return;
-	if (!prev_desc && !desc->more)
-		rmap_head->val = (unsigned long)desc->sptes[0];
-	else
-		if (prev_desc)
-			prev_desc->more = desc->more;
-		else
-			rmap_head->val = (unsigned long)desc->more | 1;
-	mmu_free_pte_list_desc(desc);
-}
-
-static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
-{
-	struct pte_list_desc *desc;
-	struct pte_list_desc *prev_desc;
-	int i;
-
-	if (!rmap_head->val) {
-		pr_err("%s: %p 0->BUG\n", __func__, spte);
-		BUG();
-	} else if (!(rmap_head->val & 1)) {
-		rmap_printk("%s:  %p 1->0\n", __func__, spte);
-		if ((u64 *)rmap_head->val != spte) {
-			pr_err("%s:  %p 1->BUG\n", __func__, spte);
-			BUG();
-		}
-		rmap_head->val = 0;
-	} else {
-		rmap_printk("%s:  %p many->many\n", __func__, spte);
-		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-		prev_desc = NULL;
-		while (desc) {
-			for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
-				if (desc->sptes[i] == spte) {
-					pte_list_desc_remove_entry(rmap_head,
-							desc, i, prev_desc);
-					return;
-				}
-			}
-			prev_desc = desc;
-			desc = desc->more;
-		}
-		pr_err("%s: %p many->many\n", __func__, spte);
-		BUG();
-	}
-}
-
-static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
-{
-	mmu_spte_clear_track_bits(sptep);
-	__pte_list_remove(sptep, rmap_head);
-}
-
-static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
-					   struct kvm_memory_slot *slot)
-{
-	unsigned long idx;
-
-	idx = gfn_to_index(gfn, slot->base_gfn, level);
-	return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
-}
-
-static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
-					 struct kvm_mmu_page *sp)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *slot;
-
-	slots = kvm_memslots_for_spte_role(kvm, sp->role);
-	slot = __gfn_to_memslot(slots, gfn);
-	return __gfn_to_rmap(gfn, sp->role.level, slot);
-}
-
-static bool rmap_can_add(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmu_memory_cache *cache;
-
-	cache = &vcpu->arch.mmu_pte_list_desc_cache;
-	return mmu_memory_cache_free_objects(cache);
-}
-
-static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
-{
-	struct kvm_mmu_page *sp;
-	struct kvm_rmap_head *rmap_head;
-
-	sp = page_header(__pa(spte));
-	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
-	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
-	return pte_list_add(vcpu, spte, rmap_head);
-}
-
-static void rmap_remove(struct kvm *kvm, u64 *spte)
-{
-	struct kvm_mmu_page *sp;
-	gfn_t gfn;
-	struct kvm_rmap_head *rmap_head;
-
-	sp = page_header(__pa(spte));
-	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
-	rmap_head = gfn_to_rmap(kvm, gfn, sp);
-	__pte_list_remove(spte, rmap_head);
-}
-
-/*
- * Used by the following functions to iterate through the sptes linked by a
- * rmap.  All fields are private and not assumed to be used outside.
- */
-struct rmap_iterator {
-	/* private fields */
-	struct pte_list_desc *desc;	/* holds the sptep if not NULL */
-	int pos;			/* index of the sptep */
-};
-
-/*
- * Iteration must be started by this function.  This should also be used after
- * removing/dropping sptes from the rmap link because in such cases the
- * information in the itererator may not be valid.
- *
- * Returns sptep if found, NULL otherwise.
- */
-static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
-			   struct rmap_iterator *iter)
-{
-	u64 *sptep;
-
-	if (!rmap_head->val)
-		return NULL;
-
-	if (!(rmap_head->val & 1)) {
-		iter->desc = NULL;
-		sptep = (u64 *)rmap_head->val;
-		goto out;
-	}
-
-	iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-	iter->pos = 0;
-	sptep = iter->desc->sptes[iter->pos];
-out:
-	BUG_ON(!is_shadow_present_pte(*sptep));
-	return sptep;
-}
-
-/*
- * Must be used with a valid iterator: e.g. after rmap_get_first().
- *
- * Returns sptep if found, NULL otherwise.
- */
-static u64 *rmap_get_next(struct rmap_iterator *iter)
-{
-	u64 *sptep;
-
-	if (iter->desc) {
-		if (iter->pos < PTE_LIST_EXT - 1) {
-			++iter->pos;
-			sptep = iter->desc->sptes[iter->pos];
-			if (sptep)
-				goto out;
-		}
-
-		iter->desc = iter->desc->more;
-
-		if (iter->desc) {
-			iter->pos = 0;
-			/* desc->sptes[0] cannot be NULL */
-			sptep = iter->desc->sptes[iter->pos];
-			goto out;
-		}
-	}
-
-	return NULL;
-out:
-	BUG_ON(!is_shadow_present_pte(*sptep));
-	return sptep;
-}
-
-#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)			\
-	for (_spte_ = rmap_get_first(_rmap_head_, _iter_);		\
-	     _spte_; _spte_ = rmap_get_next(_iter_))
-
-static void drop_spte(struct kvm *kvm, u64 *sptep)
-{
-	if (mmu_spte_clear_track_bits(sptep))
-		rmap_remove(kvm, sptep);
-}
-
-
-static bool __drop_large_spte(struct kvm *kvm, u64 *sptep)
-{
-	if (is_large_pte(*sptep)) {
-		WARN_ON(page_header(__pa(sptep))->role.level ==
-			PT_PAGE_TABLE_LEVEL);
-		drop_spte(kvm, sptep);
-		--kvm->stat.lpages;
-		return true;
-	}
-
-	return false;
-}
-
-static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
-{
-	if (__drop_large_spte(vcpu->kvm, sptep)) {
-		struct kvm_mmu_page *sp = page_header(__pa(sptep));
-
-		kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
-			KVM_PAGES_PER_HPAGE(sp->role.level));
-	}
-}
-
-/*
- * Write-protect on the specified @sptep, @pt_protect indicates whether
- * spte write-protection is caused by protecting shadow page table.
- *
- * Note: write protection is difference between dirty logging and spte
- * protection:
- * - for dirty logging, the spte can be set to writable at anytime if
- *   its dirty bitmap is properly set.
- * - for spte protection, the spte can be writable only after unsync-ing
- *   shadow page.
- *
- * Return true if tlb need be flushed.
- */
-static bool spte_write_protect(u64 *sptep, bool pt_protect)
-{
-	u64 spte = *sptep;
-
-	if (!is_writable_pte(spte) &&
-	      !(pt_protect && spte_can_locklessly_be_made_writable(spte)))
-		return false;
-
-	rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
-
-	if (pt_protect)
-		spte &= ~SPTE_MMU_WRITEABLE;
-	spte = spte & ~PT_WRITABLE_MASK;
-
-	return mmu_spte_update(sptep, spte);
-}
-
-static bool __rmap_write_protect(struct kvm *kvm,
-				 struct kvm_rmap_head *rmap_head,
-				 bool pt_protect)
-{
-	u64 *sptep;
-	struct rmap_iterator iter;
-	bool flush = false;
-
-	for_each_rmap_spte(rmap_head, &iter, sptep)
-		flush |= spte_write_protect(sptep, pt_protect);
-
-	return flush;
-}
-
-static bool spte_clear_dirty(u64 *sptep)
-{
-	u64 spte = *sptep;
-
-	rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep);
-
-	MMU_WARN_ON(!spte_ad_enabled(spte));
-	spte &= ~shadow_dirty_mask;
-	return mmu_spte_update(sptep, spte);
-}
-
-static bool spte_wrprot_for_clear_dirty(u64 *sptep)
-{
-	bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
-					       (unsigned long *)sptep);
-	if (was_writable && !spte_ad_enabled(*sptep))
-		kvm_set_pfn_dirty(spte_to_pfn(*sptep));
-
-	return was_writable;
-}
-
-/*
- * Gets the GFN ready for another round of dirty logging by clearing the
- *	- D bit on ad-enabled SPTEs, and
- *	- W bit on ad-disabled SPTEs.
- * Returns true iff any D or W bits were cleared.
- */
-static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
-{
-	u64 *sptep;
-	struct rmap_iterator iter;
-	bool flush = false;
-
-	for_each_rmap_spte(rmap_head, &iter, sptep)
-		if (spte_ad_need_write_protect(*sptep))
-			flush |= spte_wrprot_for_clear_dirty(sptep);
-		else
-			flush |= spte_clear_dirty(sptep);
-
-	return flush;
-}
-
-static bool spte_set_dirty(u64 *sptep)
-{
-	u64 spte = *sptep;
-
-	rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep);
-
-	/*
-	 * Similar to the !kvm_x86_ops->slot_disable_log_dirty case,
-	 * do not bother adding back write access to pages marked
-	 * SPTE_AD_WRPROT_ONLY_MASK.
-	 */
-	spte |= shadow_dirty_mask;
-
-	return mmu_spte_update(sptep, spte);
-}
-
-static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
-{
-	u64 *sptep;
-	struct rmap_iterator iter;
-	bool flush = false;
-
-	for_each_rmap_spte(rmap_head, &iter, sptep)
-		if (spte_ad_enabled(*sptep))
-			flush |= spte_set_dirty(sptep);
-
-	return flush;
-}
-
-/**
- * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
- * @kvm: kvm instance
- * @slot: slot to protect
- * @gfn_offset: start of the BITS_PER_LONG pages we care about
- * @mask: indicates which pages we should protect
- *
- * Used when we do not need to care about huge page mappings: e.g. during dirty
- * logging we do not have any such mappings.
- */
-static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
-				     struct kvm_memory_slot *slot,
-				     gfn_t gfn_offset, unsigned long mask)
-{
-	struct kvm_rmap_head *rmap_head;
-
-	while (mask) {
-		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-					  PT_PAGE_TABLE_LEVEL, slot);
-		__rmap_write_protect(kvm, rmap_head, false);
-
-		/* clear the first set bit */
-		mask &= mask - 1;
-	}
-}
-
-/**
- * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write
- * protect the page if the D-bit isn't supported.
- * @kvm: kvm instance
- * @slot: slot to clear D-bit
- * @gfn_offset: start of the BITS_PER_LONG pages we care about
- * @mask: indicates which pages we should clear D-bit
- *
- * Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap.
- */
-void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
-				     struct kvm_memory_slot *slot,
-				     gfn_t gfn_offset, unsigned long mask)
-{
-	struct kvm_rmap_head *rmap_head;
-
-	while (mask) {
-		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-					  PT_PAGE_TABLE_LEVEL, slot);
-		__rmap_clear_dirty(kvm, rmap_head);
-
-		/* clear the first set bit */
-		mask &= mask - 1;
-	}
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked);
-
-/**
- * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
- * PT level pages.
- *
- * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
- * enable dirty logging for them.
- *
- * Used when we do not need to care about huge page mappings: e.g. during dirty
- * logging we do not have any such mappings.
- */
-void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
-				struct kvm_memory_slot *slot,
-				gfn_t gfn_offset, unsigned long mask)
-{
-	if (kvm_x86_ops->enable_log_dirty_pt_masked)
-		kvm_x86_ops->enable_log_dirty_pt_masked(kvm, slot, gfn_offset,
-				mask);
-	else
-		kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
-}
-
-/**
- * kvm_arch_write_log_dirty - emulate dirty page logging
- * @vcpu: Guest mode vcpu
- *
- * Emulate arch specific page modification logging for the
- * nested hypervisor
- */
-int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
-{
-	if (kvm_x86_ops->write_log_dirty)
-		return kvm_x86_ops->write_log_dirty(vcpu);
-
-	return 0;
-}
-
-bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
-				    struct kvm_memory_slot *slot, u64 gfn)
-{
-	struct kvm_rmap_head *rmap_head;
-	int i;
-	bool write_protected = false;
-
-	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
-		rmap_head = __gfn_to_rmap(gfn, i, slot);
-		write_protected |= __rmap_write_protect(kvm, rmap_head, true);
-	}
-
-	return write_protected;
-}
-
-static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
-{
-	struct kvm_memory_slot *slot;
-
-	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn);
-}
-
-static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
-{
-	u64 *sptep;
-	struct rmap_iterator iter;
-	bool flush = false;
-
-	while ((sptep = rmap_get_first(rmap_head, &iter))) {
-		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
-
-		pte_list_remove(rmap_head, sptep);
-		flush = true;
-	}
-
-	return flush;
-}
-
-static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-			   struct kvm_memory_slot *slot, gfn_t gfn, int level,
-			   unsigned long data)
-{
-	return kvm_zap_rmapp(kvm, rmap_head);
-}
-
-static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-			     struct kvm_memory_slot *slot, gfn_t gfn, int level,
-			     unsigned long data)
-{
-	u64 *sptep;
-	struct rmap_iterator iter;
-	int need_flush = 0;
-	u64 new_spte;
-	pte_t *ptep = (pte_t *)data;
-	kvm_pfn_t new_pfn;
-
-	WARN_ON(pte_huge(*ptep));
-	new_pfn = pte_pfn(*ptep);
-
-restart:
-	for_each_rmap_spte(rmap_head, &iter, sptep) {
-		rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
-			    sptep, *sptep, gfn, level);
-
-		need_flush = 1;
-
-		if (pte_write(*ptep)) {
-			pte_list_remove(rmap_head, sptep);
-			goto restart;
-		} else {
-			new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
-			new_spte |= (u64)new_pfn << PAGE_SHIFT;
-
-			new_spte &= ~PT_WRITABLE_MASK;
-			new_spte &= ~SPTE_HOST_WRITEABLE;
-
-			new_spte = mark_spte_for_access_track(new_spte);
-
-			mmu_spte_clear_track_bits(sptep);
-			mmu_spte_set(sptep, new_spte);
-		}
-	}
-
-	if (need_flush && kvm_available_flush_tlb_with_range()) {
-		kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
-		return 0;
-	}
-
-	return need_flush;
-}
-
-struct slot_rmap_walk_iterator {
-	/* input fields. */
-	struct kvm_memory_slot *slot;
-	gfn_t start_gfn;
-	gfn_t end_gfn;
-	int start_level;
-	int end_level;
-
-	/* output fields. */
-	gfn_t gfn;
-	struct kvm_rmap_head *rmap;
-	int level;
-
-	/* private field. */
-	struct kvm_rmap_head *end_rmap;
-};
-
-static void
-rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
-{
-	iterator->level = level;
-	iterator->gfn = iterator->start_gfn;
-	iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot);
-	iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level,
-					   iterator->slot);
-}
-
-static void
-slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator,
-		    struct kvm_memory_slot *slot, int start_level,
-		    int end_level, gfn_t start_gfn, gfn_t end_gfn)
-{
-	iterator->slot = slot;
-	iterator->start_level = start_level;
-	iterator->end_level = end_level;
-	iterator->start_gfn = start_gfn;
-	iterator->end_gfn = end_gfn;
-
-	rmap_walk_init_level(iterator, iterator->start_level);
-}
-
-static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator)
-{
-	return !!iterator->rmap;
-}
-
-static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
-{
-	if (++iterator->rmap <= iterator->end_rmap) {
-		iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level));
-		return;
-	}
-
-	if (++iterator->level > iterator->end_level) {
-		iterator->rmap = NULL;
-		return;
-	}
-
-	rmap_walk_init_level(iterator, iterator->level);
-}
-
-#define for_each_slot_rmap_range(_slot_, _start_level_, _end_level_,	\
-	   _start_gfn, _end_gfn, _iter_)				\
-	for (slot_rmap_walk_init(_iter_, _slot_, _start_level_,		\
-				 _end_level_, _start_gfn, _end_gfn);	\
-	     slot_rmap_walk_okay(_iter_);				\
-	     slot_rmap_walk_next(_iter_))
-
-static int kvm_handle_hva_range(struct kvm *kvm,
-				unsigned long start,
-				unsigned long end,
-				unsigned long data,
-				int (*handler)(struct kvm *kvm,
-					       struct kvm_rmap_head *rmap_head,
-					       struct kvm_memory_slot *slot,
-					       gfn_t gfn,
-					       int level,
-					       unsigned long data))
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	struct slot_rmap_walk_iterator iterator;
-	int ret = 0;
-	int i;
-
-	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
-		slots = __kvm_memslots(kvm, i);
-		kvm_for_each_memslot(memslot, slots) {
-			unsigned long hva_start, hva_end;
-			gfn_t gfn_start, gfn_end;
-
-			hva_start = max(start, memslot->userspace_addr);
-			hva_end = min(end, memslot->userspace_addr +
-				      (memslot->npages << PAGE_SHIFT));
-			if (hva_start >= hva_end)
-				continue;
-			/*
-			 * {gfn(page) | page intersects with [hva_start, hva_end)} =
-			 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
-			 */
-			gfn_start = hva_to_gfn_memslot(hva_start, memslot);
-			gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
-
-			for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL,
-						 PT_MAX_HUGEPAGE_LEVEL,
-						 gfn_start, gfn_end - 1,
-						 &iterator)
-				ret |= handler(kvm, iterator.rmap, memslot,
-					       iterator.gfn, iterator.level, data);
-		}
-	}
-
-	return ret;
-}
-
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-			  unsigned long data,
-			  int (*handler)(struct kvm *kvm,
-					 struct kvm_rmap_head *rmap_head,
-					 struct kvm_memory_slot *slot,
-					 gfn_t gfn, int level,
-					 unsigned long data))
-{
-	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
-}
-
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
-{
-	return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
-}
-
-int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
-{
-	return kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
-}
-
-static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-			 struct kvm_memory_slot *slot, gfn_t gfn, int level,
-			 unsigned long data)
-{
-	u64 *sptep;
-	struct rmap_iterator uninitialized_var(iter);
-	int young = 0;
-
-	for_each_rmap_spte(rmap_head, &iter, sptep)
-		young |= mmu_spte_age(sptep);
-
-	trace_kvm_age_page(gfn, level, slot, young);
-	return young;
-}
-
-static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-			      struct kvm_memory_slot *slot, gfn_t gfn,
-			      int level, unsigned long data)
-{
-	u64 *sptep;
-	struct rmap_iterator iter;
-
-	for_each_rmap_spte(rmap_head, &iter, sptep)
-		if (is_accessed_spte(*sptep))
-			return 1;
-	return 0;
-}
-
-#define RMAP_RECYCLE_THRESHOLD 1000
-
-static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
-{
-	struct kvm_rmap_head *rmap_head;
-	struct kvm_mmu_page *sp;
-
-	sp = page_header(__pa(spte));
-
-	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
-
-	kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0);
-	kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
-			KVM_PAGES_PER_HPAGE(sp->role.level));
-}
-
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
-{
-	return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
-}
-
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-	return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
-}
-
-#ifdef MMU_DEBUG
-static int is_empty_shadow_page(u64 *spt)
-{
-	u64 *pos;
-	u64 *end;
-
-	for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
-		if (is_shadow_present_pte(*pos)) {
-			printk(KERN_ERR "%s: %p %llx\n", __func__,
-			       pos, *pos);
-			return 0;
-		}
-	return 1;
-}
-#endif
-
-/*
- * This value is the sum of all of the kvm instances's
- * kvm->arch.n_used_mmu_pages values.  We need a global,
- * aggregate version in order to make the slab shrinker
- * faster
- */
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
-{
-	kvm->arch.n_used_mmu_pages += nr;
-	percpu_counter_add(&kvm_total_used_mmu_pages, nr);
-}
-
-static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
-{
-	MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
-	hlist_del(&sp->hash_link);
-	list_del(&sp->link);
-	free_page((unsigned long)sp->spt);
-	if (!sp->role.direct)
-		free_page((unsigned long)sp->gfns);
-	kmem_cache_free(mmu_page_header_cache, sp);
-}
-
-static unsigned kvm_page_table_hashfn(gfn_t gfn)
-{
-	return hash_64(gfn, KVM_MMU_HASH_SHIFT);
-}
-
-static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
-				    struct kvm_mmu_page *sp, u64 *parent_pte)
-{
-	if (!parent_pte)
-		return;
-
-	pte_list_add(vcpu, parent_pte, &sp->parent_ptes);
-}
-
-static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
-				       u64 *parent_pte)
-{
-	__pte_list_remove(parent_pte, &sp->parent_ptes);
-}
-
-static void drop_parent_pte(struct kvm_mmu_page *sp,
-			    u64 *parent_pte)
-{
-	mmu_page_remove_parent_pte(sp, parent_pte);
-	mmu_spte_clear_no_track(parent_pte);
-}
-
-static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct)
-{
-	struct kvm_mmu_page *sp;
-
-	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
-	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
-	if (!direct)
-		sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
-	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
-
-	/*
-	 * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
-	 * depends on valid pages being added to the head of the list.  See
-	 * comments in kvm_zap_obsolete_pages().
-	 */
-	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
-	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
-	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
-	return sp;
-}
-
-static void mark_unsync(u64 *spte);
-static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
-{
-	u64 *sptep;
-	struct rmap_iterator iter;
-
-	for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
-		mark_unsync(sptep);
-	}
-}
-
-static void mark_unsync(u64 *spte)
-{
-	struct kvm_mmu_page *sp;
-	unsigned int index;
-
-	sp = page_header(__pa(spte));
-	index = spte - sp->spt;
-	if (__test_and_set_bit(index, sp->unsync_child_bitmap))
-		return;
-	if (sp->unsync_children++)
-		return;
-	kvm_mmu_mark_parents_unsync(sp);
-}
-
-static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
-			       struct kvm_mmu_page *sp)
-{
-	return 0;
-}
-
-static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
-{
-}
-
-static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
-				 struct kvm_mmu_page *sp, u64 *spte,
-				 const void *pte)
-{
-	WARN_ON(1);
-}
-
-#define KVM_PAGE_ARRAY_NR 16
-
-struct kvm_mmu_pages {
-	struct mmu_page_and_offset {
-		struct kvm_mmu_page *sp;
-		unsigned int idx;
-	} page[KVM_PAGE_ARRAY_NR];
-	unsigned int nr;
-};
-
-static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
-			 int idx)
-{
-	int i;
-
-	if (sp->unsync)
-		for (i=0; i < pvec->nr; i++)
-			if (pvec->page[i].sp == sp)
-				return 0;
-
-	pvec->page[pvec->nr].sp = sp;
-	pvec->page[pvec->nr].idx = idx;
-	pvec->nr++;
-	return (pvec->nr == KVM_PAGE_ARRAY_NR);
-}
-
-static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
-{
-	--sp->unsync_children;
-	WARN_ON((int)sp->unsync_children < 0);
-	__clear_bit(idx, sp->unsync_child_bitmap);
-}
-
-static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
-			   struct kvm_mmu_pages *pvec)
-{
-	int i, ret, nr_unsync_leaf = 0;
-
-	for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
-		struct kvm_mmu_page *child;
-		u64 ent = sp->spt[i];
-
-		if (!is_shadow_present_pte(ent) || is_large_pte(ent)) {
-			clear_unsync_child_bit(sp, i);
-			continue;
-		}
-
-		child = page_header(ent & PT64_BASE_ADDR_MASK);
-
-		if (child->unsync_children) {
-			if (mmu_pages_add(pvec, child, i))
-				return -ENOSPC;
-
-			ret = __mmu_unsync_walk(child, pvec);
-			if (!ret) {
-				clear_unsync_child_bit(sp, i);
-				continue;
-			} else if (ret > 0) {
-				nr_unsync_leaf += ret;
-			} else
-				return ret;
-		} else if (child->unsync) {
-			nr_unsync_leaf++;
-			if (mmu_pages_add(pvec, child, i))
-				return -ENOSPC;
-		} else
-			clear_unsync_child_bit(sp, i);
-	}
-
-	return nr_unsync_leaf;
-}
-
-#define INVALID_INDEX (-1)
-
-static int mmu_unsync_walk(struct kvm_mmu_page *sp,
-			   struct kvm_mmu_pages *pvec)
-{
-	pvec->nr = 0;
-	if (!sp->unsync_children)
-		return 0;
-
-	mmu_pages_add(pvec, sp, INVALID_INDEX);
-	return __mmu_unsync_walk(sp, pvec);
-}
-
-static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	WARN_ON(!sp->unsync);
-	trace_kvm_mmu_sync_page(sp);
-	sp->unsync = 0;
-	--kvm->stat.mmu_unsync;
-}
-
-static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-				     struct list_head *invalid_list);
-static void kvm_mmu_commit_zap_page(struct kvm *kvm,
-				    struct list_head *invalid_list);
-
-
-#define for_each_valid_sp(_kvm, _sp, _gfn)				\
-	hlist_for_each_entry(_sp,					\
-	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
-		if (is_obsolete_sp((_kvm), (_sp))) {			\
-		} else
-
-#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)			\
-	for_each_valid_sp(_kvm, _sp, _gfn)				\
-		if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
-
-static inline bool is_ept_sp(struct kvm_mmu_page *sp)
-{
-	return sp->role.cr0_wp && sp->role.smap_andnot_wp;
-}
-
-/* @sp->gfn should be write-protected at the call site */
-static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			    struct list_head *invalid_list)
-{
-	if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) ||
-	    vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
-		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
-		return false;
-	}
-
-	return true;
-}
-
-static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
-					struct list_head *invalid_list,
-					bool remote_flush)
-{
-	if (!remote_flush && list_empty(invalid_list))
-		return false;
-
-	if (!list_empty(invalid_list))
-		kvm_mmu_commit_zap_page(kvm, invalid_list);
-	else
-		kvm_flush_remote_tlbs(kvm);
-	return true;
-}
-
-static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
-				 struct list_head *invalid_list,
-				 bool remote_flush, bool local_flush)
-{
-	if (kvm_mmu_remote_flush_or_zap(vcpu->kvm, invalid_list, remote_flush))
-		return;
-
-	if (local_flush)
-		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-}
-
-#ifdef CONFIG_KVM_MMU_AUDIT
-#include "mmu_audit.c"
-#else
-static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
-static void mmu_audit_disable(void) { }
-#endif
-
-static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	return sp->role.invalid ||
-	       unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
-}
-
-static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			 struct list_head *invalid_list)
-{
-	kvm_unlink_unsync_page(vcpu->kvm, sp);
-	return __kvm_sync_page(vcpu, sp, invalid_list);
-}
-
-/* @gfn should be write-protected at the call site */
-static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
-			   struct list_head *invalid_list)
-{
-	struct kvm_mmu_page *s;
-	bool ret = false;
-
-	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
-		if (!s->unsync)
-			continue;
-
-		WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
-		ret |= kvm_sync_page(vcpu, s, invalid_list);
-	}
-
-	return ret;
-}
-
-struct mmu_page_path {
-	struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL];
-	unsigned int idx[PT64_ROOT_MAX_LEVEL];
-};
-
-#define for_each_sp(pvec, sp, parents, i)			\
-		for (i = mmu_pages_first(&pvec, &parents);	\
-			i < pvec.nr && ({ sp = pvec.page[i].sp; 1;});	\
-			i = mmu_pages_next(&pvec, &parents, i))
-
-static int mmu_pages_next(struct kvm_mmu_pages *pvec,
-			  struct mmu_page_path *parents,
-			  int i)
-{
-	int n;
-
-	for (n = i+1; n < pvec->nr; n++) {
-		struct kvm_mmu_page *sp = pvec->page[n].sp;
-		unsigned idx = pvec->page[n].idx;
-		int level = sp->role.level;
-
-		parents->idx[level-1] = idx;
-		if (level == PT_PAGE_TABLE_LEVEL)
-			break;
-
-		parents->parent[level-2] = sp;
-	}
-
-	return n;
-}
-
-static int mmu_pages_first(struct kvm_mmu_pages *pvec,
-			   struct mmu_page_path *parents)
-{
-	struct kvm_mmu_page *sp;
-	int level;
-
-	if (pvec->nr == 0)
-		return 0;
-
-	WARN_ON(pvec->page[0].idx != INVALID_INDEX);
-
-	sp = pvec->page[0].sp;
-	level = sp->role.level;
-	WARN_ON(level == PT_PAGE_TABLE_LEVEL);
-
-	parents->parent[level-2] = sp;
-
-	/* Also set up a sentinel.  Further entries in pvec are all
-	 * children of sp, so this element is never overwritten.
-	 */
-	parents->parent[level-1] = NULL;
-	return mmu_pages_next(pvec, parents, 0);
-}
-
-static void mmu_pages_clear_parents(struct mmu_page_path *parents)
-{
-	struct kvm_mmu_page *sp;
-	unsigned int level = 0;
-
-	do {
-		unsigned int idx = parents->idx[level];
-		sp = parents->parent[level];
-		if (!sp)
-			return;
-
-		WARN_ON(idx == INVALID_INDEX);
-		clear_unsync_child_bit(sp, idx);
-		level++;
-	} while (!sp->unsync_children);
-}
-
-static void mmu_sync_children(struct kvm_vcpu *vcpu,
-			      struct kvm_mmu_page *parent)
-{
-	int i;
-	struct kvm_mmu_page *sp;
-	struct mmu_page_path parents;
-	struct kvm_mmu_pages pages;
-	LIST_HEAD(invalid_list);
-	bool flush = false;
-
-	while (mmu_unsync_walk(parent, &pages)) {
-		bool protected = false;
-
-		for_each_sp(pages, sp, parents, i)
-			protected |= rmap_write_protect(vcpu, sp->gfn);
-
-		if (protected) {
-			kvm_flush_remote_tlbs(vcpu->kvm);
-			flush = false;
-		}
-
-		for_each_sp(pages, sp, parents, i) {
-			flush |= kvm_sync_page(vcpu, sp, &invalid_list);
-			mmu_pages_clear_parents(&parents);
-		}
-		if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
-			kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
-			cond_resched_lock(&vcpu->kvm->mmu_lock);
-			flush = false;
-		}
-	}
-
-	kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
-}
-
-static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
-{
-	atomic_set(&sp->write_flooding_count,  0);
-}
-
-static void clear_sp_write_flooding_count(u64 *spte)
-{
-	struct kvm_mmu_page *sp =  page_header(__pa(spte));
-
-	__clear_sp_write_flooding_count(sp);
-}
-
-static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
-					     gfn_t gfn,
-					     gva_t gaddr,
-					     unsigned level,
-					     int direct,
-					     unsigned access)
-{
-	union kvm_mmu_page_role role;
-	unsigned quadrant;
-	struct kvm_mmu_page *sp;
-	bool need_sync = false;
-	bool flush = false;
-	int collisions = 0;
-	LIST_HEAD(invalid_list);
-
-	role = vcpu->arch.mmu->mmu_role.base;
-	role.level = level;
-	role.direct = direct;
-	if (role.direct)
-		role.gpte_is_8_bytes = true;
-	role.access = access;
-	if (!vcpu->arch.mmu->direct_map
-	    && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
-		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
-		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
-		role.quadrant = quadrant;
-	}
-	for_each_valid_sp(vcpu->kvm, sp, gfn) {
-		if (sp->gfn != gfn) {
-			collisions++;
-			continue;
-		}
-
-		if (!need_sync && sp->unsync)
-			need_sync = true;
-
-		if (sp->role.word != role.word)
-			continue;
-
-		if (sp->unsync) {
-			/* The page is good, but __kvm_sync_page might still end
-			 * up zapping it.  If so, break in order to rebuild it.
-			 */
-			if (!__kvm_sync_page(vcpu, sp, &invalid_list))
-				break;
-
-			WARN_ON(!list_empty(&invalid_list));
-			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-		}
-
-		if (sp->unsync_children)
-			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-
-		__clear_sp_write_flooding_count(sp);
-		trace_kvm_mmu_get_page(sp, false);
-		goto out;
-	}
-
-	++vcpu->kvm->stat.mmu_cache_miss;
-
-	sp = kvm_mmu_alloc_page(vcpu, direct);
-
-	sp->gfn = gfn;
-	sp->role = role;
-	hlist_add_head(&sp->hash_link,
-		&vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
-	if (!direct) {
-		/*
-		 * we should do write protection before syncing pages
-		 * otherwise the content of the synced shadow page may
-		 * be inconsistent with guest page table.
-		 */
-		account_shadowed(vcpu->kvm, sp);
-		if (level == PT_PAGE_TABLE_LEVEL &&
-		      rmap_write_protect(vcpu, gfn))
-			kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1);
-
-		if (level > PT_PAGE_TABLE_LEVEL && need_sync)
-			flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
-	}
-	clear_page(sp->spt);
-	trace_kvm_mmu_get_page(sp, true);
-
-	kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
-out:
-	if (collisions > vcpu->kvm->stat.max_mmu_page_hash_collisions)
-		vcpu->kvm->stat.max_mmu_page_hash_collisions = collisions;
-	return sp;
-}
-
-static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator,
-					struct kvm_vcpu *vcpu, hpa_t root,
-					u64 addr)
-{
-	iterator->addr = addr;
-	iterator->shadow_addr = root;
-	iterator->level = vcpu->arch.mmu->shadow_root_level;
-
-	if (iterator->level == PT64_ROOT_4LEVEL &&
-	    vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
-	    !vcpu->arch.mmu->direct_map)
-		--iterator->level;
-
-	if (iterator->level == PT32E_ROOT_LEVEL) {
-		/*
-		 * prev_root is currently only used for 64-bit hosts. So only
-		 * the active root_hpa is valid here.
-		 */
-		BUG_ON(root != vcpu->arch.mmu->root_hpa);
-
-		iterator->shadow_addr
-			= vcpu->arch.mmu->pae_root[(addr >> 30) & 3];
-		iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
-		--iterator->level;
-		if (!iterator->shadow_addr)
-			iterator->level = 0;
-	}
-}
-
-static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
-			     struct kvm_vcpu *vcpu, u64 addr)
-{
-	shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root_hpa,
-				    addr);
-}
-
-static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
-{
-	if (iterator->level < PT_PAGE_TABLE_LEVEL)
-		return false;
-
-	iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
-	iterator->sptep	= ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
-	return true;
-}
-
-static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
-			       u64 spte)
-{
-	if (is_last_spte(spte, iterator->level)) {
-		iterator->level = 0;
-		return;
-	}
-
-	iterator->shadow_addr = spte & PT64_BASE_ADDR_MASK;
-	--iterator->level;
-}
-
-static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
-{
-	__shadow_walk_next(iterator, *iterator->sptep);
-}
-
-static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
-			     struct kvm_mmu_page *sp)
-{
-	u64 spte;
-
-	BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
-
-	spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
-	       shadow_user_mask | shadow_x_mask | shadow_me_mask;
-
-	if (sp_ad_disabled(sp))
-		spte |= SPTE_AD_DISABLED_MASK;
-	else
-		spte |= shadow_accessed_mask;
-
-	mmu_spte_set(sptep, spte);
-
-	mmu_page_add_parent_pte(vcpu, sp, sptep);
-
-	if (sp->unsync_children || sp->unsync)
-		mark_unsync(sptep);
-}
-
-static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
-				   unsigned direct_access)
-{
-	if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) {
-		struct kvm_mmu_page *child;
-
-		/*
-		 * For the direct sp, if the guest pte's dirty bit
-		 * changed form clean to dirty, it will corrupt the
-		 * sp's access: allow writable in the read-only sp,
-		 * so we should update the spte at this point to get
-		 * a new sp with the correct access.
-		 */
-		child = page_header(*sptep & PT64_BASE_ADDR_MASK);
-		if (child->role.access == direct_access)
-			return;
-
-		drop_parent_pte(child, sptep);
-		kvm_flush_remote_tlbs_with_address(vcpu->kvm, child->gfn, 1);
-	}
-}
-
-static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
-			     u64 *spte)
-{
-	u64 pte;
-	struct kvm_mmu_page *child;
-
-	pte = *spte;
-	if (is_shadow_present_pte(pte)) {
-		if (is_last_spte(pte, sp->role.level)) {
-			drop_spte(kvm, spte);
-			if (is_large_pte(pte))
-				--kvm->stat.lpages;
-		} else {
-			child = page_header(pte & PT64_BASE_ADDR_MASK);
-			drop_parent_pte(child, spte);
-		}
-		return true;
-	}
-
-	if (is_mmio_spte(pte))
-		mmu_spte_clear_no_track(spte);
-
-	return false;
-}
-
-static void kvm_mmu_page_unlink_children(struct kvm *kvm,
-					 struct kvm_mmu_page *sp)
-{
-	unsigned i;
-
-	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
-		mmu_page_zap_pte(kvm, sp, sp->spt + i);
-}
-
-static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	u64 *sptep;
-	struct rmap_iterator iter;
-
-	while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
-		drop_parent_pte(sp, sptep);
-}
-
-static int mmu_zap_unsync_children(struct kvm *kvm,
-				   struct kvm_mmu_page *parent,
-				   struct list_head *invalid_list)
-{
-	int i, zapped = 0;
-	struct mmu_page_path parents;
-	struct kvm_mmu_pages pages;
-
-	if (parent->role.level == PT_PAGE_TABLE_LEVEL)
-		return 0;
-
-	while (mmu_unsync_walk(parent, &pages)) {
-		struct kvm_mmu_page *sp;
-
-		for_each_sp(pages, sp, parents, i) {
-			kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
-			mmu_pages_clear_parents(&parents);
-			zapped++;
-		}
-	}
-
-	return zapped;
-}
-
-static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
-				       struct kvm_mmu_page *sp,
-				       struct list_head *invalid_list,
-				       int *nr_zapped)
-{
-	bool list_unstable;
-
-	trace_kvm_mmu_prepare_zap_page(sp);
-	++kvm->stat.mmu_shadow_zapped;
-	*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
-	kvm_mmu_page_unlink_children(kvm, sp);
-	kvm_mmu_unlink_parents(kvm, sp);
-
-	/* Zapping children means active_mmu_pages has become unstable. */
-	list_unstable = *nr_zapped;
-
-	if (!sp->role.invalid && !sp->role.direct)
-		unaccount_shadowed(kvm, sp);
-
-	if (sp->unsync)
-		kvm_unlink_unsync_page(kvm, sp);
-	if (!sp->root_count) {
-		/* Count self */
-		(*nr_zapped)++;
-		list_move(&sp->link, invalid_list);
-		kvm_mod_used_mmu_pages(kvm, -1);
-	} else {
-		list_move(&sp->link, &kvm->arch.active_mmu_pages);
-
-		/*
-		 * Obsolete pages cannot be used on any vCPUs, see the comment
-		 * in kvm_mmu_zap_all_fast().  Note, is_obsolete_sp() also
-		 * treats invalid shadow pages as being obsolete.
-		 */
-		if (!is_obsolete_sp(kvm, sp))
-			kvm_reload_remote_mmus(kvm);
-	}
-
-	if (sp->lpage_disallowed)
-		unaccount_huge_nx_page(kvm, sp);
-
-	sp->role.invalid = 1;
-	return list_unstable;
-}
-
-static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-				     struct list_head *invalid_list)
-{
-	int nr_zapped;
-
-	__kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped);
-	return nr_zapped;
-}
-
-static void kvm_mmu_commit_zap_page(struct kvm *kvm,
-				    struct list_head *invalid_list)
-{
-	struct kvm_mmu_page *sp, *nsp;
-
-	if (list_empty(invalid_list))
-		return;
-
-	/*
-	 * We need to make sure everyone sees our modifications to
-	 * the page tables and see changes to vcpu->mode here. The barrier
-	 * in the kvm_flush_remote_tlbs() achieves this. This pairs
-	 * with vcpu_enter_guest and walk_shadow_page_lockless_begin/end.
-	 *
-	 * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit
-	 * guest mode and/or lockless shadow page table walks.
-	 */
-	kvm_flush_remote_tlbs(kvm);
-
-	list_for_each_entry_safe(sp, nsp, invalid_list, link) {
-		WARN_ON(!sp->role.invalid || sp->root_count);
-		kvm_mmu_free_page(sp);
-	}
-}
-
-static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
-					struct list_head *invalid_list)
-{
-	struct kvm_mmu_page *sp;
-
-	if (list_empty(&kvm->arch.active_mmu_pages))
-		return false;
-
-	sp = list_last_entry(&kvm->arch.active_mmu_pages,
-			     struct kvm_mmu_page, link);
-	return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
-}
-
-/*
- * Changing the number of mmu pages allocated to the vm
- * Note: if goal_nr_mmu_pages is too small, you will get dead lock
- */
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
-{
-	LIST_HEAD(invalid_list);
-
-	spin_lock(&kvm->mmu_lock);
-
-	if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
-		/* Need to free some mmu pages to achieve the goal. */
-		while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages)
-			if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list))
-				break;
-
-		kvm_mmu_commit_zap_page(kvm, &invalid_list);
-		goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
-	}
-
-	kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
-
-	spin_unlock(&kvm->mmu_lock);
-}
-
-int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
-{
-	struct kvm_mmu_page *sp;
-	LIST_HEAD(invalid_list);
-	int r;
-
-	pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
-	r = 0;
-	spin_lock(&kvm->mmu_lock);
-	for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
-		pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
-			 sp->role.word);
-		r = 1;
-		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
-	}
-	kvm_mmu_commit_zap_page(kvm, &invalid_list);
-	spin_unlock(&kvm->mmu_lock);
-
-	return r;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
-
-static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
-{
-	trace_kvm_mmu_unsync_page(sp);
-	++vcpu->kvm->stat.mmu_unsync;
-	sp->unsync = 1;
-
-	kvm_mmu_mark_parents_unsync(sp);
-}
-
-static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
-				   bool can_unsync)
-{
-	struct kvm_mmu_page *sp;
-
-	if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
-		return true;
-
-	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
-		if (!can_unsync)
-			return true;
-
-		if (sp->unsync)
-			continue;
-
-		WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
-		kvm_unsync_page(vcpu, sp);
-	}
-
-	/*
-	 * We need to ensure that the marking of unsync pages is visible
-	 * before the SPTE is updated to allow writes because
-	 * kvm_mmu_sync_roots() checks the unsync flags without holding
-	 * the MMU lock and so can race with this. If the SPTE was updated
-	 * before the page had been marked as unsync-ed, something like the
-	 * following could happen:
-	 *
-	 * CPU 1                    CPU 2
-	 * ---------------------------------------------------------------------
-	 * 1.2 Host updates SPTE
-	 *     to be writable
-	 *                      2.1 Guest writes a GPTE for GVA X.
-	 *                          (GPTE being in the guest page table shadowed
-	 *                           by the SP from CPU 1.)
-	 *                          This reads SPTE during the page table walk.
-	 *                          Since SPTE.W is read as 1, there is no
-	 *                          fault.
-	 *
-	 *                      2.2 Guest issues TLB flush.
-	 *                          That causes a VM Exit.
-	 *
-	 *                      2.3 kvm_mmu_sync_pages() reads sp->unsync.
-	 *                          Since it is false, so it just returns.
-	 *
-	 *                      2.4 Guest accesses GVA X.
-	 *                          Since the mapping in the SP was not updated,
-	 *                          so the old mapping for GVA X incorrectly
-	 *                          gets used.
-	 * 1.1 Host marks SP
-	 *     as unsync
-	 *     (sp->unsync = true)
-	 *
-	 * The write barrier below ensures that 1.1 happens before 1.2 and thus
-	 * the situation in 2.4 does not arise. The implicit barrier in 2.2
-	 * pairs with this write barrier.
-	 */
-	smp_wmb();
-
-	return false;
-}
-
-static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
-{
-	if (pfn_valid(pfn))
-		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
-			/*
-			 * Some reserved pages, such as those from NVDIMM
-			 * DAX devices, are not for MMIO, and can be mapped
-			 * with cached memory type for better performance.
-			 * However, the above check misconceives those pages
-			 * as MMIO, and results in KVM mapping them with UC
-			 * memory type, which would hurt the performance.
-			 * Therefore, we check the host memory type in addition
-			 * and only treat UC/UC-/WC pages as MMIO.
-			 */
-			(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
-
-	return !e820__mapped_raw_any(pfn_to_hpa(pfn),
-				     pfn_to_hpa(pfn + 1) - 1,
-				     E820_TYPE_RAM);
-}
-
-/* Bits which may be returned by set_spte() */
-#define SET_SPTE_WRITE_PROTECTED_PT	BIT(0)
-#define SET_SPTE_NEED_REMOTE_TLB_FLUSH	BIT(1)
-
-static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
-		    unsigned pte_access, int level,
-		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
-		    bool can_unsync, bool host_writable)
-{
-	u64 spte = 0;
-	int ret = 0;
-	struct kvm_mmu_page *sp;
-
-	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
-		return 0;
-
-	sp = page_header(__pa(sptep));
-	if (sp_ad_disabled(sp))
-		spte |= SPTE_AD_DISABLED_MASK;
-	else if (kvm_vcpu_ad_need_write_protect(vcpu))
-		spte |= SPTE_AD_WRPROT_ONLY_MASK;
-
-	/*
-	 * For the EPT case, shadow_present_mask is 0 if hardware
-	 * supports exec-only page table entries.  In that case,
-	 * ACC_USER_MASK and shadow_user_mask are used to represent
-	 * read access.  See FNAME(gpte_access) in paging_tmpl.h.
-	 */
-	spte |= shadow_present_mask;
-	if (!speculative)
-		spte |= spte_shadow_accessed_mask(spte);
-
-	if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
-	    is_nx_huge_page_enabled()) {
-		pte_access &= ~ACC_EXEC_MASK;
-	}
-
-	if (pte_access & ACC_EXEC_MASK)
-		spte |= shadow_x_mask;
-	else
-		spte |= shadow_nx_mask;
-
-	if (pte_access & ACC_USER_MASK)
-		spte |= shadow_user_mask;
-
-	if (level > PT_PAGE_TABLE_LEVEL)
-		spte |= PT_PAGE_SIZE_MASK;
-	if (tdp_enabled)
-		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
-			kvm_is_mmio_pfn(pfn));
-
-	if (host_writable)
-		spte |= SPTE_HOST_WRITEABLE;
-	else
-		pte_access &= ~ACC_WRITE_MASK;
-
-	if (!kvm_is_mmio_pfn(pfn))
-		spte |= shadow_me_mask;
-
-	spte |= (u64)pfn << PAGE_SHIFT;
-
-	if (pte_access & ACC_WRITE_MASK) {
-
-		/*
-		 * Other vcpu creates new sp in the window between
-		 * mapping_level() and acquiring mmu-lock. We can
-		 * allow guest to retry the access, the mapping can
-		 * be fixed if guest refault.
-		 */
-		if (level > PT_PAGE_TABLE_LEVEL &&
-		    mmu_gfn_lpage_is_disallowed(vcpu, gfn, level))
-			goto done;
-
-		spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
-
-		/*
-		 * Optimization: for pte sync, if spte was writable the hash
-		 * lookup is unnecessary (and expensive). Write protection
-		 * is responsibility of mmu_get_page / kvm_sync_page.
-		 * Same reasoning can be applied to dirty page accounting.
-		 */
-		if (!can_unsync && is_writable_pte(*sptep))
-			goto set_pte;
-
-		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
-			pgprintk("%s: found shadow page for %llx, marking ro\n",
-				 __func__, gfn);
-			ret |= SET_SPTE_WRITE_PROTECTED_PT;
-			pte_access &= ~ACC_WRITE_MASK;
-			spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
-		}
-	}
-
-	if (pte_access & ACC_WRITE_MASK) {
-		kvm_vcpu_mark_page_dirty(vcpu, gfn);
-		spte |= spte_shadow_dirty_mask(spte);
-	}
-
-	if (speculative)
-		spte = mark_spte_for_access_track(spte);
-
-set_pte:
-	if (mmu_spte_update(sptep, spte))
-		ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
-done:
-	return ret;
-}
-
-static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
-			int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
-		       	bool speculative, bool host_writable)
-{
-	int was_rmapped = 0;
-	int rmap_count;
-	int set_spte_ret;
-	int ret = RET_PF_RETRY;
-	bool flush = false;
-
-	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
-		 *sptep, write_fault, gfn);
-
-	if (is_shadow_present_pte(*sptep)) {
-		/*
-		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
-		 * the parent of the now unreachable PTE.
-		 */
-		if (level > PT_PAGE_TABLE_LEVEL &&
-		    !is_large_pte(*sptep)) {
-			struct kvm_mmu_page *child;
-			u64 pte = *sptep;
-
-			child = page_header(pte & PT64_BASE_ADDR_MASK);
-			drop_parent_pte(child, sptep);
-			flush = true;
-		} else if (pfn != spte_to_pfn(*sptep)) {
-			pgprintk("hfn old %llx new %llx\n",
-				 spte_to_pfn(*sptep), pfn);
-			drop_spte(vcpu->kvm, sptep);
-			flush = true;
-		} else
-			was_rmapped = 1;
-	}
-
-	set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn,
-				speculative, true, host_writable);
-	if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
-		if (write_fault)
-			ret = RET_PF_EMULATE;
-		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-	}
-
-	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush)
-		kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn,
-				KVM_PAGES_PER_HPAGE(level));
-
-	if (unlikely(is_mmio_spte(*sptep)))
-		ret = RET_PF_EMULATE;
-
-	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
-	trace_kvm_mmu_set_spte(level, gfn, sptep);
-	if (!was_rmapped && is_large_pte(*sptep))
-		++vcpu->kvm->stat.lpages;
-
-	if (is_shadow_present_pte(*sptep)) {
-		if (!was_rmapped) {
-			rmap_count = rmap_add(vcpu, sptep, gfn);
-			if (rmap_count > RMAP_RECYCLE_THRESHOLD)
-				rmap_recycle(vcpu, sptep, gfn);
-		}
-	}
-
-	return ret;
-}
-
-static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
-				     bool no_dirty_log)
-{
-	struct kvm_memory_slot *slot;
-
-	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
-	if (!slot)
-		return KVM_PFN_ERR_FAULT;
-
-	return gfn_to_pfn_memslot_atomic(slot, gfn);
-}
-
-static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
-				    struct kvm_mmu_page *sp,
-				    u64 *start, u64 *end)
-{
-	struct page *pages[PTE_PREFETCH_NUM];
-	struct kvm_memory_slot *slot;
-	unsigned access = sp->role.access;
-	int i, ret;
-	gfn_t gfn;
-
-	gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
-	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK);
-	if (!slot)
-		return -1;
-
-	ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start);
-	if (ret <= 0)
-		return -1;
-
-	for (i = 0; i < ret; i++, gfn++, start++) {
-		mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
-			     page_to_pfn(pages[i]), true, true);
-		put_page(pages[i]);
-	}
-
-	return 0;
-}
-
-static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu_page *sp, u64 *sptep)
-{
-	u64 *spte, *start = NULL;
-	int i;
-
-	WARN_ON(!sp->role.direct);
-
-	i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
-	spte = sp->spt + i;
-
-	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
-		if (is_shadow_present_pte(*spte) || spte == sptep) {
-			if (!start)
-				continue;
-			if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0)
-				break;
-			start = NULL;
-		} else if (!start)
-			start = spte;
-	}
-}
-
-static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
-{
-	struct kvm_mmu_page *sp;
-
-	sp = page_header(__pa(sptep));
-
-	/*
-	 * Without accessed bits, there's no way to distinguish between
-	 * actually accessed translations and prefetched, so disable pte
-	 * prefetch if accessed bits aren't available.
-	 */
-	if (sp_ad_disabled(sp))
-		return;
-
-	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
-		return;
-
-	__direct_pte_prefetch(vcpu, sp, sptep);
-}
-
-static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
-				       gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
-{
-	int level = *levelp;
-	u64 spte = *it.sptep;
-
-	if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
-	    is_nx_huge_page_enabled() &&
-	    is_shadow_present_pte(spte) &&
-	    !is_large_pte(spte)) {
-		/*
-		 * A small SPTE exists for this pfn, but FNAME(fetch)
-		 * and __direct_map would like to create a large PTE
-		 * instead: just force them to go down another level,
-		 * patching back for them into pfn the next 9 bits of
-		 * the address.
-		 */
-		u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
-		*pfnp |= gfn & page_mask;
-		(*levelp)--;
-	}
-}
-
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
-			int map_writable, int level, kvm_pfn_t pfn,
-			bool prefault, bool lpage_disallowed)
-{
-	struct kvm_shadow_walk_iterator it;
-	struct kvm_mmu_page *sp;
-	int ret;
-	gfn_t gfn = gpa >> PAGE_SHIFT;
-	gfn_t base_gfn = gfn;
-
-	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
-		return RET_PF_RETRY;
-
-	trace_kvm_mmu_spte_requested(gpa, level, pfn);
-	for_each_shadow_entry(vcpu, gpa, it) {
-		/*
-		 * We cannot overwrite existing page tables with an NX
-		 * large page, as the leaf could be executable.
-		 */
-		disallowed_hugepage_adjust(it, gfn, &pfn, &level);
-
-		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
-		if (it.level == level)
-			break;
-
-		drop_large_spte(vcpu, it.sptep);
-		if (!is_shadow_present_pte(*it.sptep)) {
-			sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
-					      it.level - 1, true, ACC_ALL);
-
-			link_shadow_page(vcpu, it.sptep, sp);
-			if (lpage_disallowed)
-				account_huge_nx_page(vcpu->kvm, sp);
-		}
-	}
-
-	ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
-			   write, level, base_gfn, pfn, prefault,
-			   map_writable);
-	direct_pte_prefetch(vcpu, it.sptep);
-	++vcpu->stat.pf_fixed;
-	return ret;
-}
-
-static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
-{
-	send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk);
-}
-
-static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
-{
-	/*
-	 * Do not cache the mmio info caused by writing the readonly gfn
-	 * into the spte otherwise read access on readonly gfn also can
-	 * caused mmio page fault and treat it as mmio access.
-	 */
-	if (pfn == KVM_PFN_ERR_RO_FAULT)
-		return RET_PF_EMULATE;
-
-	if (pfn == KVM_PFN_ERR_HWPOISON) {
-		kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current);
-		return RET_PF_RETRY;
-	}
-
-	return -EFAULT;
-}
-
-static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
-					gfn_t gfn, kvm_pfn_t *pfnp,
-					int *levelp)
-{
-	kvm_pfn_t pfn = *pfnp;
-	int level = *levelp;
-
-	/*
-	 * Check if it's a transparent hugepage. If this would be an
-	 * hugetlbfs page, level wouldn't be set to
-	 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
-	 * here.
-	 */
-	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
-	    !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
-	    PageTransCompoundMap(pfn_to_page(pfn)) &&
-	    !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
-		unsigned long mask;
-		/*
-		 * mmu_notifier_retry was successful and we hold the
-		 * mmu_lock here, so the pmd can't become splitting
-		 * from under us, and in turn
-		 * __split_huge_page_refcount() can't run from under
-		 * us and we can safely transfer the refcount from
-		 * PG_tail to PG_head as we switch the pfn to tail to
-		 * head.
-		 */
-		*levelp = level = PT_DIRECTORY_LEVEL;
-		mask = KVM_PAGES_PER_HPAGE(level) - 1;
-		VM_BUG_ON((gfn & mask) != (pfn & mask));
-		if (pfn & mask) {
-			kvm_release_pfn_clean(pfn);
-			pfn &= ~mask;
-			kvm_get_pfn(pfn);
-			*pfnp = pfn;
-		}
-	}
-}
-
-static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
-				kvm_pfn_t pfn, unsigned access, int *ret_val)
-{
-	/* The pfn is invalid, report the error! */
-	if (unlikely(is_error_pfn(pfn))) {
-		*ret_val = kvm_handle_bad_page(vcpu, gfn, pfn);
-		return true;
-	}
-
-	if (unlikely(is_noslot_pfn(pfn)))
-		vcpu_cache_mmio_info(vcpu, gva, gfn,
-				     access & shadow_mmio_access_mask);
-
-	return false;
-}
-
-static bool page_fault_can_be_fast(u32 error_code)
-{
-	/*
-	 * Do not fix the mmio spte with invalid generation number which
-	 * need to be updated by slow page fault path.
-	 */
-	if (unlikely(error_code & PFERR_RSVD_MASK))
-		return false;
-
-	/* See if the page fault is due to an NX violation */
-	if (unlikely(((error_code & (PFERR_FETCH_MASK | PFERR_PRESENT_MASK))
-		      == (PFERR_FETCH_MASK | PFERR_PRESENT_MASK))))
-		return false;
-
-	/*
-	 * #PF can be fast if:
-	 * 1. The shadow page table entry is not present, which could mean that
-	 *    the fault is potentially caused by access tracking (if enabled).
-	 * 2. The shadow page table entry is present and the fault
-	 *    is caused by write-protect, that means we just need change the W
-	 *    bit of the spte which can be done out of mmu-lock.
-	 *
-	 * However, if access tracking is disabled we know that a non-present
-	 * page must be a genuine page fault where we have to create a new SPTE.
-	 * So, if access tracking is disabled, we return true only for write
-	 * accesses to a present page.
-	 */
-
-	return shadow_acc_track_mask != 0 ||
-	       ((error_code & (PFERR_WRITE_MASK | PFERR_PRESENT_MASK))
-		== (PFERR_WRITE_MASK | PFERR_PRESENT_MASK));
-}
-
-/*
- * Returns true if the SPTE was fixed successfully. Otherwise,
- * someone else modified the SPTE from its original value.
- */
-static bool
-fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			u64 *sptep, u64 old_spte, u64 new_spte)
-{
-	gfn_t gfn;
-
-	WARN_ON(!sp->role.direct);
-
-	/*
-	 * Theoretically we could also set dirty bit (and flush TLB) here in
-	 * order to eliminate unnecessary PML logging. See comments in
-	 * set_spte. But fast_page_fault is very unlikely to happen with PML
-	 * enabled, so we do not do this. This might result in the same GPA
-	 * to be logged in PML buffer again when the write really happens, and
-	 * eventually to be called by mark_page_dirty twice. But it's also no
-	 * harm. This also avoids the TLB flush needed after setting dirty bit
-	 * so non-PML cases won't be impacted.
-	 *
-	 * Compare with set_spte where instead shadow_dirty_mask is set.
-	 */
-	if (cmpxchg64(sptep, old_spte, new_spte) != old_spte)
-		return false;
-
-	if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) {
-		/*
-		 * The gfn of direct spte is stable since it is
-		 * calculated by sp->gfn.
-		 */
-		gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
-		kvm_vcpu_mark_page_dirty(vcpu, gfn);
-	}
-
-	return true;
-}
-
-static bool is_access_allowed(u32 fault_err_code, u64 spte)
-{
-	if (fault_err_code & PFERR_FETCH_MASK)
-		return is_executable_pte(spte);
-
-	if (fault_err_code & PFERR_WRITE_MASK)
-		return is_writable_pte(spte);
-
-	/* Fault was on Read access */
-	return spte & PT_PRESENT_MASK;
-}
-
-/*
- * Return value:
- * - true: let the vcpu to access on the same address again.
- * - false: let the real page fault path to fix it.
- */
-static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
-			    u32 error_code)
-{
-	struct kvm_shadow_walk_iterator iterator;
-	struct kvm_mmu_page *sp;
-	bool fault_handled = false;
-	u64 spte = 0ull;
-	uint retry_count = 0;
-
-	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
-		return false;
-
-	if (!page_fault_can_be_fast(error_code))
-		return false;
-
-	walk_shadow_page_lockless_begin(vcpu);
-
-	do {
-		u64 new_spte;
-
-		for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
-			if (!is_shadow_present_pte(spte) ||
-			    iterator.level < level)
-				break;
-
-		sp = page_header(__pa(iterator.sptep));
-		if (!is_last_spte(spte, sp->role.level))
-			break;
-
-		/*
-		 * Check whether the memory access that caused the fault would
-		 * still cause it if it were to be performed right now. If not,
-		 * then this is a spurious fault caused by TLB lazily flushed,
-		 * or some other CPU has already fixed the PTE after the
-		 * current CPU took the fault.
-		 *
-		 * Need not check the access of upper level table entries since
-		 * they are always ACC_ALL.
-		 */
-		if (is_access_allowed(error_code, spte)) {
-			fault_handled = true;
-			break;
-		}
-
-		new_spte = spte;
-
-		if (is_access_track_spte(spte))
-			new_spte = restore_acc_track_spte(new_spte);
-
-		/*
-		 * Currently, to simplify the code, write-protection can
-		 * be removed in the fast path only if the SPTE was
-		 * write-protected for dirty-logging or access tracking.
-		 */
-		if ((error_code & PFERR_WRITE_MASK) &&
-		    spte_can_locklessly_be_made_writable(spte))
-		{
-			new_spte |= PT_WRITABLE_MASK;
-
-			/*
-			 * Do not fix write-permission on the large spte.  Since
-			 * we only dirty the first page into the dirty-bitmap in
-			 * fast_pf_fix_direct_spte(), other pages are missed
-			 * if its slot has dirty logging enabled.
-			 *
-			 * Instead, we let the slow page fault path create a
-			 * normal spte to fix the access.
-			 *
-			 * See the comments in kvm_arch_commit_memory_region().
-			 */
-			if (sp->role.level > PT_PAGE_TABLE_LEVEL)
-				break;
-		}
-
-		/* Verify that the fault can be handled in the fast path */
-		if (new_spte == spte ||
-		    !is_access_allowed(error_code, new_spte))
-			break;
-
-		/*
-		 * Currently, fast page fault only works for direct mapping
-		 * since the gfn is not stable for indirect shadow page. See
-		 * Documentation/virt/kvm/locking.txt to get more detail.
-		 */
-		fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
-							iterator.sptep, spte,
-							new_spte);
-		if (fault_handled)
-			break;
-
-		if (++retry_count > 4) {
-			printk_once(KERN_WARNING
-				"kvm: Fast #PF retrying more than 4 times.\n");
-			break;
-		}
-
-	} while (true);
-
-	trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
-			      spte, fault_handled);
-	walk_shadow_page_lockless_end(vcpu);
-
-	return fault_handled;
-}
-
-static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-			 gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
-static int make_mmu_pages_available(struct kvm_vcpu *vcpu);
-
-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
-			 gfn_t gfn, bool prefault)
-{
-	int r;
-	int level;
-	bool force_pt_level;
-	kvm_pfn_t pfn;
-	unsigned long mmu_seq;
-	bool map_writable, write = error_code & PFERR_WRITE_MASK;
-	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
-				is_nx_huge_page_enabled();
-
-	force_pt_level = lpage_disallowed;
-	level = mapping_level(vcpu, gfn, &force_pt_level);
-	if (likely(!force_pt_level)) {
-		/*
-		 * This path builds a PAE pagetable - so we can map
-		 * 2mb pages at maximum. Therefore check if the level
-		 * is larger than that.
-		 */
-		if (level > PT_DIRECTORY_LEVEL)
-			level = PT_DIRECTORY_LEVEL;
-
-		gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
-	}
-
-	if (fast_page_fault(vcpu, v, level, error_code))
-		return RET_PF_RETRY;
-
-	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	smp_rmb();
-
-	if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
-		return RET_PF_RETRY;
-
-	if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
-		return r;
-
-	r = RET_PF_RETRY;
-	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
-		goto out_unlock;
-	if (make_mmu_pages_available(vcpu) < 0)
-		goto out_unlock;
-	if (likely(!force_pt_level))
-		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
-	r = __direct_map(vcpu, v, write, map_writable, level, pfn,
-			 prefault, false);
-out_unlock:
-	spin_unlock(&vcpu->kvm->mmu_lock);
-	kvm_release_pfn_clean(pfn);
-	return r;
-}
-
-static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
-			       struct list_head *invalid_list)
-{
-	struct kvm_mmu_page *sp;
-
-	if (!VALID_PAGE(*root_hpa))
-		return;
-
-	sp = page_header(*root_hpa & PT64_BASE_ADDR_MASK);
-	--sp->root_count;
-	if (!sp->root_count && sp->role.invalid)
-		kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
-
-	*root_hpa = INVALID_PAGE;
-}
-
-/* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-			ulong roots_to_free)
-{
-	int i;
-	LIST_HEAD(invalid_list);
-	bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
-
-	BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
-
-	/* Before acquiring the MMU lock, see if we need to do any real work. */
-	if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
-		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-			if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
-			    VALID_PAGE(mmu->prev_roots[i].hpa))
-				break;
-
-		if (i == KVM_MMU_NUM_PREV_ROOTS)
-			return;
-	}
-
-	spin_lock(&vcpu->kvm->mmu_lock);
-
-	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-		if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
-			mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
-					   &invalid_list);
-
-	if (free_active_root) {
-		if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
-		    (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
-			mmu_free_root_page(vcpu->kvm, &mmu->root_hpa,
-					   &invalid_list);
-		} else {
-			for (i = 0; i < 4; ++i)
-				if (mmu->pae_root[i] != 0)
-					mmu_free_root_page(vcpu->kvm,
-							   &mmu->pae_root[i],
-							   &invalid_list);
-			mmu->root_hpa = INVALID_PAGE;
-		}
-		mmu->root_cr3 = 0;
-	}
-
-	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
-	spin_unlock(&vcpu->kvm->mmu_lock);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
-
-static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
-{
-	int ret = 0;
-
-	if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
-		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-		ret = 1;
-	}
-
-	return ret;
-}
-
-static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmu_page *sp;
-	unsigned i;
-
-	if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
-		spin_lock(&vcpu->kvm->mmu_lock);
-		if(make_mmu_pages_available(vcpu) < 0) {
-			spin_unlock(&vcpu->kvm->mmu_lock);
-			return -ENOSPC;
-		}
-		sp = kvm_mmu_get_page(vcpu, 0, 0,
-				vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL);
-		++sp->root_count;
-		spin_unlock(&vcpu->kvm->mmu_lock);
-		vcpu->arch.mmu->root_hpa = __pa(sp->spt);
-	} else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) {
-		for (i = 0; i < 4; ++i) {
-			hpa_t root = vcpu->arch.mmu->pae_root[i];
-
-			MMU_WARN_ON(VALID_PAGE(root));
-			spin_lock(&vcpu->kvm->mmu_lock);
-			if (make_mmu_pages_available(vcpu) < 0) {
-				spin_unlock(&vcpu->kvm->mmu_lock);
-				return -ENOSPC;
-			}
-			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
-					i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
-			root = __pa(sp->spt);
-			++sp->root_count;
-			spin_unlock(&vcpu->kvm->mmu_lock);
-			vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK;
-		}
-		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
-	} else
-		BUG();
-	vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
-
-	return 0;
-}
-
-static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmu_page *sp;
-	u64 pdptr, pm_mask;
-	gfn_t root_gfn, root_cr3;
-	int i;
-
-	root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
-	root_gfn = root_cr3 >> PAGE_SHIFT;
-
-	if (mmu_check_root(vcpu, root_gfn))
-		return 1;
-
-	/*
-	 * Do we shadow a long mode page table? If so we need to
-	 * write-protect the guests page table root.
-	 */
-	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
-		hpa_t root = vcpu->arch.mmu->root_hpa;
-
-		MMU_WARN_ON(VALID_PAGE(root));
-
-		spin_lock(&vcpu->kvm->mmu_lock);
-		if (make_mmu_pages_available(vcpu) < 0) {
-			spin_unlock(&vcpu->kvm->mmu_lock);
-			return -ENOSPC;
-		}
-		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-				vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL);
-		root = __pa(sp->spt);
-		++sp->root_count;
-		spin_unlock(&vcpu->kvm->mmu_lock);
-		vcpu->arch.mmu->root_hpa = root;
-		goto set_root_cr3;
-	}
-
-	/*
-	 * We shadow a 32 bit page table. This may be a legacy 2-level
-	 * or a PAE 3-level page table. In either case we need to be aware that
-	 * the shadow page table may be a PAE or a long mode page table.
-	 */
-	pm_mask = PT_PRESENT_MASK;
-	if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL)
-		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
-
-	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu->pae_root[i];
-
-		MMU_WARN_ON(VALID_PAGE(root));
-		if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) {
-			pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i);
-			if (!(pdptr & PT_PRESENT_MASK)) {
-				vcpu->arch.mmu->pae_root[i] = 0;
-				continue;
-			}
-			root_gfn = pdptr >> PAGE_SHIFT;
-			if (mmu_check_root(vcpu, root_gfn))
-				return 1;
-		}
-		spin_lock(&vcpu->kvm->mmu_lock);
-		if (make_mmu_pages_available(vcpu) < 0) {
-			spin_unlock(&vcpu->kvm->mmu_lock);
-			return -ENOSPC;
-		}
-		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
-				      0, ACC_ALL);
-		root = __pa(sp->spt);
-		++sp->root_count;
-		spin_unlock(&vcpu->kvm->mmu_lock);
-
-		vcpu->arch.mmu->pae_root[i] = root | pm_mask;
-	}
-	vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
-
-	/*
-	 * If we shadow a 32 bit page table with a long mode page
-	 * table we enter this path.
-	 */
-	if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
-		if (vcpu->arch.mmu->lm_root == NULL) {
-			/*
-			 * The additional page necessary for this is only
-			 * allocated on demand.
-			 */
-
-			u64 *lm_root;
-
-			lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT);
-			if (lm_root == NULL)
-				return 1;
-
-			lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask;
-
-			vcpu->arch.mmu->lm_root = lm_root;
-		}
-
-		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
-	}
-
-set_root_cr3:
-	vcpu->arch.mmu->root_cr3 = root_cr3;
-
-	return 0;
-}
-
-static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.mmu->direct_map)
-		return mmu_alloc_direct_roots(vcpu);
-	else
-		return mmu_alloc_shadow_roots(vcpu);
-}
-
-void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
-{
-	int i;
-	struct kvm_mmu_page *sp;
-
-	if (vcpu->arch.mmu->direct_map)
-		return;
-
-	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
-		return;
-
-	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
-
-	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
-		hpa_t root = vcpu->arch.mmu->root_hpa;
-		sp = page_header(root);
-
-		/*
-		 * Even if another CPU was marking the SP as unsync-ed
-		 * simultaneously, any guest page table changes are not
-		 * guaranteed to be visible anyway until this VCPU issues a TLB
-		 * flush strictly after those changes are made. We only need to
-		 * ensure that the other CPU sets these flags before any actual
-		 * changes to the page tables are made. The comments in
-		 * mmu_need_write_protect() describe what could go wrong if this
-		 * requirement isn't satisfied.
-		 */
-		if (!smp_load_acquire(&sp->unsync) &&
-		    !smp_load_acquire(&sp->unsync_children))
-			return;
-
-		spin_lock(&vcpu->kvm->mmu_lock);
-		kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
-
-		mmu_sync_children(vcpu, sp);
-
-		kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
-		spin_unlock(&vcpu->kvm->mmu_lock);
-		return;
-	}
-
-	spin_lock(&vcpu->kvm->mmu_lock);
-	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
-
-	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu->pae_root[i];
-
-		if (root && VALID_PAGE(root)) {
-			root &= PT64_BASE_ADDR_MASK;
-			sp = page_header(root);
-			mmu_sync_children(vcpu, sp);
-		}
-	}
-
-	kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
-	spin_unlock(&vcpu->kvm->mmu_lock);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
-
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
-				  u32 access, struct x86_exception *exception)
-{
-	if (exception)
-		exception->error_code = 0;
-	return vaddr;
-}
-
-static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
-					 u32 access,
-					 struct x86_exception *exception)
-{
-	if (exception)
-		exception->error_code = 0;
-	return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
-}
-
-static bool
-__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
-{
-	int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
-
-	return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
-		((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
-}
-
-static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
-{
-	return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
-}
-
-static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
-{
-	return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
-}
-
-static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
-{
-	/*
-	 * A nested guest cannot use the MMIO cache if it is using nested
-	 * page tables, because cr2 is a nGPA while the cache stores GPAs.
-	 */
-	if (mmu_is_nested(vcpu))
-		return false;
-
-	if (direct)
-		return vcpu_match_mmio_gpa(vcpu, addr);
-
-	return vcpu_match_mmio_gva(vcpu, addr);
-}
-
-/* return true if reserved bit is detected on spte. */
-static bool
-walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
-{
-	struct kvm_shadow_walk_iterator iterator;
-	u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull;
-	int root, leaf;
-	bool reserved = false;
-
-	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
-		goto exit;
-
-	walk_shadow_page_lockless_begin(vcpu);
-
-	for (shadow_walk_init(&iterator, vcpu, addr),
-		 leaf = root = iterator.level;
-	     shadow_walk_okay(&iterator);
-	     __shadow_walk_next(&iterator, spte)) {
-		spte = mmu_spte_get_lockless(iterator.sptep);
-
-		sptes[leaf - 1] = spte;
-		leaf--;
-
-		if (!is_shadow_present_pte(spte))
-			break;
-
-		reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte,
-						    iterator.level);
-	}
-
-	walk_shadow_page_lockless_end(vcpu);
-
-	if (reserved) {
-		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
-		       __func__, addr);
-		while (root > leaf) {
-			pr_err("------ spte 0x%llx level %d.\n",
-			       sptes[root - 1], root);
-			root--;
-		}
-	}
-exit:
-	*sptep = spte;
-	return reserved;
-}
-
-static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
-{
-	u64 spte;
-	bool reserved;
-
-	if (mmio_info_in_cache(vcpu, addr, direct))
-		return RET_PF_EMULATE;
-
-	reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
-	if (WARN_ON(reserved))
-		return -EINVAL;
-
-	if (is_mmio_spte(spte)) {
-		gfn_t gfn = get_mmio_spte_gfn(spte);
-		unsigned access = get_mmio_spte_access(spte);
-
-		if (!check_mmio_spte(vcpu, spte))
-			return RET_PF_INVALID;
-
-		if (direct)
-			addr = 0;
-
-		trace_handle_mmio_page_fault(addr, gfn, access);
-		vcpu_cache_mmio_info(vcpu, addr, gfn, access);
-		return RET_PF_EMULATE;
-	}
-
-	/*
-	 * If the page table is zapped by other cpus, let CPU fault again on
-	 * the address.
-	 */
-	return RET_PF_RETRY;
-}
-
-static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
-					 u32 error_code, gfn_t gfn)
-{
-	if (unlikely(error_code & PFERR_RSVD_MASK))
-		return false;
-
-	if (!(error_code & PFERR_PRESENT_MASK) ||
-	      !(error_code & PFERR_WRITE_MASK))
-		return false;
-
-	/*
-	 * guest is writing the page which is write tracked which can
-	 * not be fixed by page fault handler.
-	 */
-	if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
-		return true;
-
-	return false;
-}
-
-static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
-{
-	struct kvm_shadow_walk_iterator iterator;
-	u64 spte;
-
-	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
-		return;
-
-	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
-		clear_sp_write_flooding_count(iterator.sptep);
-		if (!is_shadow_present_pte(spte))
-			break;
-	}
-	walk_shadow_page_lockless_end(vcpu);
-}
-
-static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
-				u32 error_code, bool prefault)
-{
-	gfn_t gfn = gva >> PAGE_SHIFT;
-	int r;
-
-	pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
-
-	if (page_fault_handle_page_track(vcpu, error_code, gfn))
-		return RET_PF_EMULATE;
-
-	r = mmu_topup_memory_caches(vcpu);
-	if (r)
-		return r;
-
-	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
-
-
-	return nonpaging_map(vcpu, gva & PAGE_MASK,
-			     error_code, gfn, prefault);
-}
-
-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
-{
-	struct kvm_arch_async_pf arch;
-
-	arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
-	arch.gfn = gfn;
-	arch.direct_map = vcpu->arch.mmu->direct_map;
-	arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
-
-	return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
-}
-
-static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-			 gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
-{
-	struct kvm_memory_slot *slot;
-	bool async;
-
-	/*
-	 * Don't expose private memslots to L2.
-	 */
-	if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) {
-		*pfn = KVM_PFN_NOSLOT;
-		return false;
-	}
-
-	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	async = false;
-	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
-	if (!async)
-		return false; /* *pfn has correct page already */
-
-	if (!prefault && kvm_can_do_async_pf(vcpu)) {
-		trace_kvm_try_async_get_page(gva, gfn);
-		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
-			trace_kvm_async_pf_doublefault(gva, gfn);
-			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
-			return true;
-		} else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
-			return true;
-	}
-
-	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
-	return false;
-}
-
-int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
-				u64 fault_address, char *insn, int insn_len)
-{
-	int r = 1;
-
-	vcpu->arch.l1tf_flush_l1d = true;
-	switch (vcpu->arch.apf.host_apf_reason) {
-	default:
-		trace_kvm_page_fault(fault_address, error_code);
-
-		if (kvm_event_needs_reinjection(vcpu))
-			kvm_mmu_unprotect_page_virt(vcpu, fault_address);
-		r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
-				insn_len);
-		break;
-	case KVM_PV_REASON_PAGE_NOT_PRESENT:
-		vcpu->arch.apf.host_apf_reason = 0;
-		local_irq_disable();
-		kvm_async_pf_task_wait(fault_address, 0);
-		local_irq_enable();
-		break;
-	case KVM_PV_REASON_PAGE_READY:
-		vcpu->arch.apf.host_apf_reason = 0;
-		local_irq_disable();
-		kvm_async_pf_task_wake(fault_address);
-		local_irq_enable();
-		break;
-	}
-	return r;
-}
-EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
-
-static bool
-check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
-{
-	int page_num = KVM_PAGES_PER_HPAGE(level);
-
-	gfn &= ~(page_num - 1);
-
-	return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
-}
-
-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
-			  bool prefault)
-{
-	kvm_pfn_t pfn;
-	int r;
-	int level;
-	bool force_pt_level;
-	gfn_t gfn = gpa >> PAGE_SHIFT;
-	unsigned long mmu_seq;
-	int write = error_code & PFERR_WRITE_MASK;
-	bool map_writable;
-	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
-				is_nx_huge_page_enabled();
-
-	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
-
-	if (page_fault_handle_page_track(vcpu, error_code, gfn))
-		return RET_PF_EMULATE;
-
-	r = mmu_topup_memory_caches(vcpu);
-	if (r)
-		return r;
-
-	force_pt_level =
-		lpage_disallowed ||
-		!check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
-	level = mapping_level(vcpu, gfn, &force_pt_level);
-	if (likely(!force_pt_level)) {
-		if (level > PT_DIRECTORY_LEVEL &&
-		    !check_hugepage_cache_consistency(vcpu, gfn, level))
-			level = PT_DIRECTORY_LEVEL;
-		gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
-	}
-
-	if (fast_page_fault(vcpu, gpa, level, error_code))
-		return RET_PF_RETRY;
-
-	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	smp_rmb();
-
-	if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
-		return RET_PF_RETRY;
-
-	if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
-		return r;
-
-	r = RET_PF_RETRY;
-	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
-		goto out_unlock;
-	if (make_mmu_pages_available(vcpu) < 0)
-		goto out_unlock;
-	if (likely(!force_pt_level))
-		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
-	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
-			 prefault, lpage_disallowed);
-out_unlock:
-	spin_unlock(&vcpu->kvm->mmu_lock);
-	kvm_release_pfn_clean(pfn);
-	return r;
-}
-
-static void nonpaging_init_context(struct kvm_vcpu *vcpu,
-				   struct kvm_mmu *context)
-{
-	context->page_fault = nonpaging_page_fault;
-	context->gva_to_gpa = nonpaging_gva_to_gpa;
-	context->sync_page = nonpaging_sync_page;
-	context->invlpg = nonpaging_invlpg;
-	context->update_pte = nonpaging_update_pte;
-	context->root_level = 0;
-	context->shadow_root_level = PT32E_ROOT_LEVEL;
-	context->direct_map = true;
-	context->nx = false;
-}
-
-/*
- * Find out if a previously cached root matching the new CR3/role is available.
- * The current root is also inserted into the cache.
- * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
- * returned.
- * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
- * false is returned. This root should now be freed by the caller.
- */
-static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
-				  union kvm_mmu_page_role new_role)
-{
-	uint i;
-	struct kvm_mmu_root_info root;
-	struct kvm_mmu *mmu = vcpu->arch.mmu;
-
-	root.cr3 = mmu->root_cr3;
-	root.hpa = mmu->root_hpa;
-
-	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
-		swap(root, mmu->prev_roots[i]);
-
-		if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
-		    page_header(root.hpa) != NULL &&
-		    new_role.word == page_header(root.hpa)->role.word)
-			break;
-	}
-
-	mmu->root_hpa = root.hpa;
-	mmu->root_cr3 = root.cr3;
-
-	return i < KVM_MMU_NUM_PREV_ROOTS;
-}
-
-static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
-			    union kvm_mmu_page_role new_role,
-			    bool skip_tlb_flush)
-{
-	struct kvm_mmu *mmu = vcpu->arch.mmu;
-
-	/*
-	 * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
-	 * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
-	 * later if necessary.
-	 */
-	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
-	    mmu->root_level >= PT64_ROOT_4LEVEL) {
-		if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
-			return false;
-
-		if (cached_root_available(vcpu, new_cr3, new_role)) {
-			/*
-			 * It is possible that the cached previous root page is
-			 * obsolete because of a change in the MMU generation
-			 * number. However, changing the generation number is
-			 * accompanied by KVM_REQ_MMU_RELOAD, which will free
-			 * the root set here and allocate a new one.
-			 */
-			kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
-			if (!skip_tlb_flush) {
-				kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-				kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-			}
-
-			/*
-			 * The last MMIO access's GVA and GPA are cached in the
-			 * VCPU. When switching to a new CR3, that GVA->GPA
-			 * mapping may no longer be valid. So clear any cached
-			 * MMIO info even when we don't need to sync the shadow
-			 * page tables.
-			 */
-			vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
-
-			__clear_sp_write_flooding_count(
-				page_header(mmu->root_hpa));
-
-			return true;
-		}
-	}
-
-	return false;
-}
-
-static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3,
-			      union kvm_mmu_page_role new_role,
-			      bool skip_tlb_flush)
-{
-	if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush))
-		kvm_mmu_free_roots(vcpu, vcpu->arch.mmu,
-				   KVM_MMU_ROOT_CURRENT);
-}
-
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush)
-{
-	__kvm_mmu_new_cr3(vcpu, new_cr3, kvm_mmu_calc_root_page_role(vcpu),
-			  skip_tlb_flush);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3);
-
-static unsigned long get_cr3(struct kvm_vcpu *vcpu)
-{
-	return kvm_read_cr3(vcpu);
-}
-
-static void inject_page_fault(struct kvm_vcpu *vcpu,
-			      struct x86_exception *fault)
-{
-	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
-}
-
-static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
-			   unsigned access, int *nr_present)
-{
-	if (unlikely(is_mmio_spte(*sptep))) {
-		if (gfn != get_mmio_spte_gfn(*sptep)) {
-			mmu_spte_clear_no_track(sptep);
-			return true;
-		}
-
-		(*nr_present)++;
-		mark_mmio_spte(vcpu, sptep, gfn, access);
-		return true;
-	}
-
-	return false;
-}
-
-static inline bool is_last_gpte(struct kvm_mmu *mmu,
-				unsigned level, unsigned gpte)
-{
-	/*
-	 * The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
-	 * If it is clear, there are no large pages at this level, so clear
-	 * PT_PAGE_SIZE_MASK in gpte if that is the case.
-	 */
-	gpte &= level - mmu->last_nonleaf_level;
-
-	/*
-	 * PT_PAGE_TABLE_LEVEL always terminates.  The RHS has bit 7 set
-	 * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
-	 * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
-	 */
-	gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
-
-	return gpte & PT_PAGE_SIZE_MASK;
-}
-
-#define PTTYPE_EPT 18 /* arbitrary */
-#define PTTYPE PTTYPE_EPT
-#include "paging_tmpl.h"
-#undef PTTYPE
-
-#define PTTYPE 64
-#include "paging_tmpl.h"
-#undef PTTYPE
-
-#define PTTYPE 32
-#include "paging_tmpl.h"
-#undef PTTYPE
-
-static void
-__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
-			struct rsvd_bits_validate *rsvd_check,
-			int maxphyaddr, int level, bool nx, bool gbpages,
-			bool pse, bool amd)
-{
-	u64 exb_bit_rsvd = 0;
-	u64 gbpages_bit_rsvd = 0;
-	u64 nonleaf_bit8_rsvd = 0;
-
-	rsvd_check->bad_mt_xwr = 0;
-
-	if (!nx)
-		exb_bit_rsvd = rsvd_bits(63, 63);
-	if (!gbpages)
-		gbpages_bit_rsvd = rsvd_bits(7, 7);
-
-	/*
-	 * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
-	 * leaf entries) on AMD CPUs only.
-	 */
-	if (amd)
-		nonleaf_bit8_rsvd = rsvd_bits(8, 8);
-
-	switch (level) {
-	case PT32_ROOT_LEVEL:
-		/* no rsvd bits for 2 level 4K page table entries */
-		rsvd_check->rsvd_bits_mask[0][1] = 0;
-		rsvd_check->rsvd_bits_mask[0][0] = 0;
-		rsvd_check->rsvd_bits_mask[1][0] =
-			rsvd_check->rsvd_bits_mask[0][0];
-
-		if (!pse) {
-			rsvd_check->rsvd_bits_mask[1][1] = 0;
-			break;
-		}
-
-		if (is_cpuid_PSE36())
-			/* 36bits PSE 4MB page */
-			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
-		else
-			/* 32 bits PSE 4MB page */
-			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
-		break;
-	case PT32E_ROOT_LEVEL:
-		rsvd_check->rsvd_bits_mask[0][2] =
-			rsvd_bits(maxphyaddr, 63) |
-			rsvd_bits(5, 8) | rsvd_bits(1, 2);	/* PDPTE */
-		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 62);	/* PDE */
-		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 62); 	/* PTE */
-		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 62) |
-			rsvd_bits(13, 20);		/* large page */
-		rsvd_check->rsvd_bits_mask[1][0] =
-			rsvd_check->rsvd_bits_mask[0][0];
-		break;
-	case PT64_ROOT_5LEVEL:
-		rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd |
-			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
-			rsvd_bits(maxphyaddr, 51);
-		rsvd_check->rsvd_bits_mask[1][4] =
-			rsvd_check->rsvd_bits_mask[0][4];
-		/* fall through */
-	case PT64_ROOT_4LEVEL:
-		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
-			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
-			rsvd_bits(maxphyaddr, 51);
-		rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
-			nonleaf_bit8_rsvd | gbpages_bit_rsvd |
-			rsvd_bits(maxphyaddr, 51);
-		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 51);
-		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 51);
-		rsvd_check->rsvd_bits_mask[1][3] =
-			rsvd_check->rsvd_bits_mask[0][3];
-		rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
-			gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
-			rsvd_bits(13, 29);
-		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 51) |
-			rsvd_bits(13, 20);		/* large page */
-		rsvd_check->rsvd_bits_mask[1][0] =
-			rsvd_check->rsvd_bits_mask[0][0];
-		break;
-	}
-}
-
-static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu *context)
-{
-	__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
-				cpuid_maxphyaddr(vcpu), context->root_level,
-				context->nx,
-				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
-				is_pse(vcpu), guest_cpuid_is_amd(vcpu));
-}
-
-static void
-__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
-			    int maxphyaddr, bool execonly)
-{
-	u64 bad_mt_xwr;
-
-	rsvd_check->rsvd_bits_mask[0][4] =
-		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
-	rsvd_check->rsvd_bits_mask[0][3] =
-		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
-	rsvd_check->rsvd_bits_mask[0][2] =
-		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-	rsvd_check->rsvd_bits_mask[0][1] =
-		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
-	rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
-
-	/* large page */
-	rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
-	rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
-	rsvd_check->rsvd_bits_mask[1][2] =
-		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
-	rsvd_check->rsvd_bits_mask[1][1] =
-		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
-	rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
-
-	bad_mt_xwr = 0xFFull << (2 * 8);	/* bits 3..5 must not be 2 */
-	bad_mt_xwr |= 0xFFull << (3 * 8);	/* bits 3..5 must not be 3 */
-	bad_mt_xwr |= 0xFFull << (7 * 8);	/* bits 3..5 must not be 7 */
-	bad_mt_xwr |= REPEAT_BYTE(1ull << 2);	/* bits 0..2 must not be 010 */
-	bad_mt_xwr |= REPEAT_BYTE(1ull << 6);	/* bits 0..2 must not be 110 */
-	if (!execonly) {
-		/* bits 0..2 must not be 100 unless VMX capabilities allow it */
-		bad_mt_xwr |= REPEAT_BYTE(1ull << 4);
-	}
-	rsvd_check->bad_mt_xwr = bad_mt_xwr;
-}
-
-static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
-		struct kvm_mmu *context, bool execonly)
-{
-	__reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
-				    cpuid_maxphyaddr(vcpu), execonly);
-}
-
-/*
- * the page table on host is the shadow page table for the page
- * table in guest or amd nested guest, its mmu features completely
- * follow the features in guest.
- */
-void
-reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
-{
-	bool uses_nx = context->nx ||
-		context->mmu_role.base.smep_andnot_wp;
-	struct rsvd_bits_validate *shadow_zero_check;
-	int i;
-
-	/*
-	 * Passing "true" to the last argument is okay; it adds a check
-	 * on bit 8 of the SPTEs which KVM doesn't use anyway.
-	 */
-	shadow_zero_check = &context->shadow_zero_check;
-	__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
-				shadow_phys_bits,
-				context->shadow_root_level, uses_nx,
-				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
-				is_pse(vcpu), true);
-
-	if (!shadow_me_mask)
-		return;
-
-	for (i = context->shadow_root_level; --i >= 0;) {
-		shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
-		shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
-	}
-
-}
-EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
-
-static inline bool boot_cpu_is_amd(void)
-{
-	WARN_ON_ONCE(!tdp_enabled);
-	return shadow_x_mask == 0;
-}
-
-/*
- * the direct page table on host, use as much mmu features as
- * possible, however, kvm currently does not do execution-protection.
- */
-static void
-reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
-				struct kvm_mmu *context)
-{
-	struct rsvd_bits_validate *shadow_zero_check;
-	int i;
-
-	shadow_zero_check = &context->shadow_zero_check;
-
-	if (boot_cpu_is_amd())
-		__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
-					shadow_phys_bits,
-					context->shadow_root_level, false,
-					boot_cpu_has(X86_FEATURE_GBPAGES),
-					true, true);
-	else
-		__reset_rsvds_bits_mask_ept(shadow_zero_check,
-					    shadow_phys_bits,
-					    false);
-
-	if (!shadow_me_mask)
-		return;
-
-	for (i = context->shadow_root_level; --i >= 0;) {
-		shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
-		shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
-	}
-}
-
-/*
- * as the comments in reset_shadow_zero_bits_mask() except it
- * is the shadow page table for intel nested guest.
- */
-static void
-reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
-				struct kvm_mmu *context, bool execonly)
-{
-	__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
-				    shadow_phys_bits, execonly);
-}
-
-#define BYTE_MASK(access) \
-	((1 & (access) ? 2 : 0) | \
-	 (2 & (access) ? 4 : 0) | \
-	 (3 & (access) ? 8 : 0) | \
-	 (4 & (access) ? 16 : 0) | \
-	 (5 & (access) ? 32 : 0) | \
-	 (6 & (access) ? 64 : 0) | \
-	 (7 & (access) ? 128 : 0))
-
-
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
-				      struct kvm_mmu *mmu, bool ept)
-{
-	unsigned byte;
-
-	const u8 x = BYTE_MASK(ACC_EXEC_MASK);
-	const u8 w = BYTE_MASK(ACC_WRITE_MASK);
-	const u8 u = BYTE_MASK(ACC_USER_MASK);
-
-	bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
-	bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
-	bool cr0_wp = is_write_protection(vcpu);
-
-	for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
-		unsigned pfec = byte << 1;
-
-		/*
-		 * Each "*f" variable has a 1 bit for each UWX value
-		 * that causes a fault with the given PFEC.
-		 */
-
-		/* Faults from writes to non-writable pages */
-		u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
-		/* Faults from user mode accesses to supervisor pages */
-		u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
-		/* Faults from fetches of non-executable pages*/
-		u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
-		/* Faults from kernel mode fetches of user pages */
-		u8 smepf = 0;
-		/* Faults from kernel mode accesses of user pages */
-		u8 smapf = 0;
-
-		if (!ept) {
-			/* Faults from kernel mode accesses to user pages */
-			u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
-
-			/* Not really needed: !nx will cause pte.nx to fault */
-			if (!mmu->nx)
-				ff = 0;
-
-			/* Allow supervisor writes if !cr0.wp */
-			if (!cr0_wp)
-				wf = (pfec & PFERR_USER_MASK) ? wf : 0;
-
-			/* Disallow supervisor fetches of user code if cr4.smep */
-			if (cr4_smep)
-				smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0;
-
-			/*
-			 * SMAP:kernel-mode data accesses from user-mode
-			 * mappings should fault. A fault is considered
-			 * as a SMAP violation if all of the following
-			 * conditions are true:
-			 *   - X86_CR4_SMAP is set in CR4
-			 *   - A user page is accessed
-			 *   - The access is not a fetch
-			 *   - Page fault in kernel mode
-			 *   - if CPL = 3 or X86_EFLAGS_AC is clear
-			 *
-			 * Here, we cover the first three conditions.
-			 * The fourth is computed dynamically in permission_fault();
-			 * PFERR_RSVD_MASK bit will be set in PFEC if the access is
-			 * *not* subject to SMAP restrictions.
-			 */
-			if (cr4_smap)
-				smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
-		}
-
-		mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
-	}
-}
-
-/*
-* PKU is an additional mechanism by which the paging controls access to
-* user-mode addresses based on the value in the PKRU register.  Protection
-* key violations are reported through a bit in the page fault error code.
-* Unlike other bits of the error code, the PK bit is not known at the
-* call site of e.g. gva_to_gpa; it must be computed directly in
-* permission_fault based on two bits of PKRU, on some machine state (CR4,
-* CR0, EFER, CPL), and on other bits of the error code and the page tables.
-*
-* In particular the following conditions come from the error code, the
-* page tables and the machine state:
-* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1
-* - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch)
-* - PK is always zero if U=0 in the page tables
-* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access.
-*
-* The PKRU bitmask caches the result of these four conditions.  The error
-* code (minus the P bit) and the page table's U bit form an index into the
-* PKRU bitmask.  Two bits of the PKRU bitmask are then extracted and ANDed
-* with the two bits of the PKRU register corresponding to the protection key.
-* For the first three conditions above the bits will be 00, thus masking
-* away both AD and WD.  For all reads or if the last condition holds, WD
-* only will be masked away.
-*/
-static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-				bool ept)
-{
-	unsigned bit;
-	bool wp;
-
-	if (ept) {
-		mmu->pkru_mask = 0;
-		return;
-	}
-
-	/* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
-	if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
-		mmu->pkru_mask = 0;
-		return;
-	}
-
-	wp = is_write_protection(vcpu);
-
-	for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
-		unsigned pfec, pkey_bits;
-		bool check_pkey, check_write, ff, uf, wf, pte_user;
-
-		pfec = bit << 1;
-		ff = pfec & PFERR_FETCH_MASK;
-		uf = pfec & PFERR_USER_MASK;
-		wf = pfec & PFERR_WRITE_MASK;
-
-		/* PFEC.RSVD is replaced by ACC_USER_MASK. */
-		pte_user = pfec & PFERR_RSVD_MASK;
-
-		/*
-		 * Only need to check the access which is not an
-		 * instruction fetch and is to a user page.
-		 */
-		check_pkey = (!ff && pte_user);
-		/*
-		 * write access is controlled by PKRU if it is a
-		 * user access or CR0.WP = 1.
-		 */
-		check_write = check_pkey && wf && (uf || wp);
-
-		/* PKRU.AD stops both read and write access. */
-		pkey_bits = !!check_pkey;
-		/* PKRU.WD stops write access. */
-		pkey_bits |= (!!check_write) << 1;
-
-		mmu->pkru_mask |= (pkey_bits & 3) << pfec;
-	}
-}
-
-static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
-{
-	unsigned root_level = mmu->root_level;
-
-	mmu->last_nonleaf_level = root_level;
-	if (root_level == PT32_ROOT_LEVEL && is_pse(vcpu))
-		mmu->last_nonleaf_level++;
-}
-
-static void paging64_init_context_common(struct kvm_vcpu *vcpu,
-					 struct kvm_mmu *context,
-					 int level)
-{
-	context->nx = is_nx(vcpu);
-	context->root_level = level;
-
-	reset_rsvds_bits_mask(vcpu, context);
-	update_permission_bitmask(vcpu, context, false);
-	update_pkru_bitmask(vcpu, context, false);
-	update_last_nonleaf_level(vcpu, context);
-
-	MMU_WARN_ON(!is_pae(vcpu));
-	context->page_fault = paging64_page_fault;
-	context->gva_to_gpa = paging64_gva_to_gpa;
-	context->sync_page = paging64_sync_page;
-	context->invlpg = paging64_invlpg;
-	context->update_pte = paging64_update_pte;
-	context->shadow_root_level = level;
-	context->direct_map = false;
-}
-
-static void paging64_init_context(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu *context)
-{
-	int root_level = is_la57_mode(vcpu) ?
-			 PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
-
-	paging64_init_context_common(vcpu, context, root_level);
-}
-
-static void paging32_init_context(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu *context)
-{
-	context->nx = false;
-	context->root_level = PT32_ROOT_LEVEL;
-
-	reset_rsvds_bits_mask(vcpu, context);
-	update_permission_bitmask(vcpu, context, false);
-	update_pkru_bitmask(vcpu, context, false);
-	update_last_nonleaf_level(vcpu, context);
-
-	context->page_fault = paging32_page_fault;
-	context->gva_to_gpa = paging32_gva_to_gpa;
-	context->sync_page = paging32_sync_page;
-	context->invlpg = paging32_invlpg;
-	context->update_pte = paging32_update_pte;
-	context->shadow_root_level = PT32E_ROOT_LEVEL;
-	context->direct_map = false;
-}
-
-static void paging32E_init_context(struct kvm_vcpu *vcpu,
-				   struct kvm_mmu *context)
-{
-	paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
-}
-
-static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
-{
-	union kvm_mmu_extended_role ext = {0};
-
-	ext.cr0_pg = !!is_paging(vcpu);
-	ext.cr4_pae = !!is_pae(vcpu);
-	ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
-	ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
-	ext.cr4_pse = !!is_pse(vcpu);
-	ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
-	ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
-	ext.maxphyaddr = cpuid_maxphyaddr(vcpu);
-
-	ext.valid = 1;
-
-	return ext;
-}
-
-static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
-						   bool base_only)
-{
-	union kvm_mmu_role role = {0};
-
-	role.base.access = ACC_ALL;
-	role.base.nxe = !!is_nx(vcpu);
-	role.base.cr0_wp = is_write_protection(vcpu);
-	role.base.smm = is_smm(vcpu);
-	role.base.guest_mode = is_guest_mode(vcpu);
-
-	if (base_only)
-		return role;
-
-	role.ext = kvm_calc_mmu_role_ext(vcpu);
-
-	return role;
-}
-
-static union kvm_mmu_role
-kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
-{
-	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
-
-	role.base.ad_disabled = (shadow_accessed_mask == 0);
-	role.base.level = kvm_x86_ops->get_tdp_level(vcpu);
-	role.base.direct = true;
-	role.base.gpte_is_8_bytes = true;
-
-	return role;
-}
-
-static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmu *context = vcpu->arch.mmu;
-	union kvm_mmu_role new_role =
-		kvm_calc_tdp_mmu_root_page_role(vcpu, false);
-
-	new_role.base.word &= mmu_base_role_mask.word;
-	if (new_role.as_u64 == context->mmu_role.as_u64)
-		return;
-
-	context->mmu_role.as_u64 = new_role.as_u64;
-	context->page_fault = tdp_page_fault;
-	context->sync_page = nonpaging_sync_page;
-	context->invlpg = nonpaging_invlpg;
-	context->update_pte = nonpaging_update_pte;
-	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
-	context->direct_map = true;
-	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
-	context->get_cr3 = get_cr3;
-	context->get_pdptr = kvm_pdptr_read;
-	context->inject_page_fault = kvm_inject_page_fault;
-
-	if (!is_paging(vcpu)) {
-		context->nx = false;
-		context->gva_to_gpa = nonpaging_gva_to_gpa;
-		context->root_level = 0;
-	} else if (is_long_mode(vcpu)) {
-		context->nx = is_nx(vcpu);
-		context->root_level = is_la57_mode(vcpu) ?
-				PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
-		reset_rsvds_bits_mask(vcpu, context);
-		context->gva_to_gpa = paging64_gva_to_gpa;
-	} else if (is_pae(vcpu)) {
-		context->nx = is_nx(vcpu);
-		context->root_level = PT32E_ROOT_LEVEL;
-		reset_rsvds_bits_mask(vcpu, context);
-		context->gva_to_gpa = paging64_gva_to_gpa;
-	} else {
-		context->nx = false;
-		context->root_level = PT32_ROOT_LEVEL;
-		reset_rsvds_bits_mask(vcpu, context);
-		context->gva_to_gpa = paging32_gva_to_gpa;
-	}
-
-	update_permission_bitmask(vcpu, context, false);
-	update_pkru_bitmask(vcpu, context, false);
-	update_last_nonleaf_level(vcpu, context);
-	reset_tdp_shadow_zero_bits_mask(vcpu, context);
-}
-
-static union kvm_mmu_role
-kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
-{
-	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
-
-	role.base.smep_andnot_wp = role.ext.cr4_smep &&
-		!is_write_protection(vcpu);
-	role.base.smap_andnot_wp = role.ext.cr4_smap &&
-		!is_write_protection(vcpu);
-	role.base.direct = !is_paging(vcpu);
-	role.base.gpte_is_8_bytes = !!is_pae(vcpu);
-
-	if (!is_long_mode(vcpu))
-		role.base.level = PT32E_ROOT_LEVEL;
-	else if (is_la57_mode(vcpu))
-		role.base.level = PT64_ROOT_5LEVEL;
-	else
-		role.base.level = PT64_ROOT_4LEVEL;
-
-	return role;
-}
-
-void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmu *context = vcpu->arch.mmu;
-	union kvm_mmu_role new_role =
-		kvm_calc_shadow_mmu_root_page_role(vcpu, false);
-
-	new_role.base.word &= mmu_base_role_mask.word;
-	if (new_role.as_u64 == context->mmu_role.as_u64)
-		return;
-
-	if (!is_paging(vcpu))
-		nonpaging_init_context(vcpu, context);
-	else if (is_long_mode(vcpu))
-		paging64_init_context(vcpu, context);
-	else if (is_pae(vcpu))
-		paging32E_init_context(vcpu, context);
-	else
-		paging32_init_context(vcpu, context);
-
-	context->mmu_role.as_u64 = new_role.as_u64;
-	reset_shadow_zero_bits_mask(vcpu, context);
-}
-EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
-
-static union kvm_mmu_role
-kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
-				   bool execonly)
-{
-	union kvm_mmu_role role = {0};
-
-	/* SMM flag is inherited from root_mmu */
-	role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;
-
-	role.base.level = PT64_ROOT_4LEVEL;
-	role.base.gpte_is_8_bytes = true;
-	role.base.direct = false;
-	role.base.ad_disabled = !accessed_dirty;
-	role.base.guest_mode = true;
-	role.base.access = ACC_ALL;
-
-	/*
-	 * WP=1 and NOT_WP=1 is an impossible combination, use WP and the
-	 * SMAP variation to denote shadow EPT entries.
-	 */
-	role.base.cr0_wp = true;
-	role.base.smap_andnot_wp = true;
-
-	role.ext = kvm_calc_mmu_role_ext(vcpu);
-	role.ext.execonly = execonly;
-
-	return role;
-}
-
-void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
-			     bool accessed_dirty, gpa_t new_eptp)
-{
-	struct kvm_mmu *context = vcpu->arch.mmu;
-	union kvm_mmu_role new_role =
-		kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
-						   execonly);
-
-	__kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);
-
-	new_role.base.word &= mmu_base_role_mask.word;
-	if (new_role.as_u64 == context->mmu_role.as_u64)
-		return;
-
-	context->shadow_root_level = PT64_ROOT_4LEVEL;
-
-	context->nx = true;
-	context->ept_ad = accessed_dirty;
-	context->page_fault = ept_page_fault;
-	context->gva_to_gpa = ept_gva_to_gpa;
-	context->sync_page = ept_sync_page;
-	context->invlpg = ept_invlpg;
-	context->update_pte = ept_update_pte;
-	context->root_level = PT64_ROOT_4LEVEL;
-	context->direct_map = false;
-	context->mmu_role.as_u64 = new_role.as_u64;
-
-	update_permission_bitmask(vcpu, context, true);
-	update_pkru_bitmask(vcpu, context, true);
-	update_last_nonleaf_level(vcpu, context);
-	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
-	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
-}
-EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
-
-static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmu *context = vcpu->arch.mmu;
-
-	kvm_init_shadow_mmu(vcpu);
-	context->set_cr3           = kvm_x86_ops->set_cr3;
-	context->get_cr3           = get_cr3;
-	context->get_pdptr         = kvm_pdptr_read;
-	context->inject_page_fault = kvm_inject_page_fault;
-}
-
-static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
-{
-	union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
-	struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
-
-	new_role.base.word &= mmu_base_role_mask.word;
-	if (new_role.as_u64 == g_context->mmu_role.as_u64)
-		return;
-
-	g_context->mmu_role.as_u64 = new_role.as_u64;
-	g_context->get_cr3           = get_cr3;
-	g_context->get_pdptr         = kvm_pdptr_read;
-	g_context->inject_page_fault = kvm_inject_page_fault;
-
-	/*
-	 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
-	 * L1's nested page tables (e.g. EPT12). The nested translation
-	 * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
-	 * L2's page tables as the first level of translation and L1's
-	 * nested page tables as the second level of translation. Basically
-	 * the gva_to_gpa functions between mmu and nested_mmu are swapped.
-	 */
-	if (!is_paging(vcpu)) {
-		g_context->nx = false;
-		g_context->root_level = 0;
-		g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
-	} else if (is_long_mode(vcpu)) {
-		g_context->nx = is_nx(vcpu);
-		g_context->root_level = is_la57_mode(vcpu) ?
-					PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
-		reset_rsvds_bits_mask(vcpu, g_context);
-		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
-	} else if (is_pae(vcpu)) {
-		g_context->nx = is_nx(vcpu);
-		g_context->root_level = PT32E_ROOT_LEVEL;
-		reset_rsvds_bits_mask(vcpu, g_context);
-		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
-	} else {
-		g_context->nx = false;
-		g_context->root_level = PT32_ROOT_LEVEL;
-		reset_rsvds_bits_mask(vcpu, g_context);
-		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
-	}
-
-	update_permission_bitmask(vcpu, g_context, false);
-	update_pkru_bitmask(vcpu, g_context, false);
-	update_last_nonleaf_level(vcpu, g_context);
-}
-
-void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
-{
-	if (reset_roots) {
-		uint i;
-
-		vcpu->arch.mmu->root_hpa = INVALID_PAGE;
-
-		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-			vcpu->arch.mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
-	}
-
-	if (mmu_is_nested(vcpu))
-		init_kvm_nested_mmu(vcpu);
-	else if (tdp_enabled)
-		init_kvm_tdp_mmu(vcpu);
-	else
-		init_kvm_softmmu(vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_init_mmu);
-
-static union kvm_mmu_page_role
-kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
-{
-	union kvm_mmu_role role;
-
-	if (tdp_enabled)
-		role = kvm_calc_tdp_mmu_root_page_role(vcpu, true);
-	else
-		role = kvm_calc_shadow_mmu_root_page_role(vcpu, true);
-
-	return role.base;
-}
-
-void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
-{
-	kvm_mmu_unload(vcpu);
-	kvm_init_mmu(vcpu, true);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
-
-int kvm_mmu_load(struct kvm_vcpu *vcpu)
-{
-	int r;
-
-	r = mmu_topup_memory_caches(vcpu);
-	if (r)
-		goto out;
-	r = mmu_alloc_roots(vcpu);
-	kvm_mmu_sync_roots(vcpu);
-	if (r)
-		goto out;
-	kvm_mmu_load_cr3(vcpu);
-	kvm_x86_ops->tlb_flush(vcpu, true);
-out:
-	return r;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_load);
-
-void kvm_mmu_unload(struct kvm_vcpu *vcpu)
-{
-	kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
-	WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa));
-	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
-	WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa));
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_unload);
-
-static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu_page *sp, u64 *spte,
-				  const void *new)
-{
-	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
-		++vcpu->kvm->stat.mmu_pde_zapped;
-		return;
-        }
-
-	++vcpu->kvm->stat.mmu_pte_updated;
-	vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
-}
-
-static bool need_remote_flush(u64 old, u64 new)
-{
-	if (!is_shadow_present_pte(old))
-		return false;
-	if (!is_shadow_present_pte(new))
-		return true;
-	if ((old ^ new) & PT64_BASE_ADDR_MASK)
-		return true;
-	old ^= shadow_nx_mask;
-	new ^= shadow_nx_mask;
-	return (old & ~new & PT64_PERM_MASK) != 0;
-}
-
-static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
-				    int *bytes)
-{
-	u64 gentry = 0;
-	int r;
-
-	/*
-	 * Assume that the pte write on a page table of the same type
-	 * as the current vcpu paging mode since we update the sptes only
-	 * when they have the same mode.
-	 */
-	if (is_pae(vcpu) && *bytes == 4) {
-		/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
-		*gpa &= ~(gpa_t)7;
-		*bytes = 8;
-	}
-
-	if (*bytes == 4 || *bytes == 8) {
-		r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
-		if (r)
-			gentry = 0;
-	}
-
-	return gentry;
-}
-
-/*
- * If we're seeing too many writes to a page, it may no longer be a page table,
- * or we may be forking, in which case it is better to unmap the page.
- */
-static bool detect_write_flooding(struct kvm_mmu_page *sp)
-{
-	/*
-	 * Skip write-flooding detected for the sp whose level is 1, because
-	 * it can become unsync, then the guest page is not write-protected.
-	 */
-	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
-		return false;
-
-	atomic_inc(&sp->write_flooding_count);
-	return atomic_read(&sp->write_flooding_count) >= 3;
-}
-
-/*
- * Misaligned accesses are too much trouble to fix up; also, they usually
- * indicate a page is not used as a page table.
- */
-static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
-				    int bytes)
-{
-	unsigned offset, pte_size, misaligned;
-
-	pgprintk("misaligned: gpa %llx bytes %d role %x\n",
-		 gpa, bytes, sp->role.word);
-
-	offset = offset_in_page(gpa);
-	pte_size = sp->role.gpte_is_8_bytes ? 8 : 4;
-
-	/*
-	 * Sometimes, the OS only writes the last one bytes to update status
-	 * bits, for example, in linux, andb instruction is used in clear_bit().
-	 */
-	if (!(offset & (pte_size - 1)) && bytes == 1)
-		return false;
-
-	misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
-	misaligned |= bytes < 4;
-
-	return misaligned;
-}
-
-static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
-{
-	unsigned page_offset, quadrant;
-	u64 *spte;
-	int level;
-
-	page_offset = offset_in_page(gpa);
-	level = sp->role.level;
-	*nspte = 1;
-	if (!sp->role.gpte_is_8_bytes) {
-		page_offset <<= 1;	/* 32->64 */
-		/*
-		 * A 32-bit pde maps 4MB while the shadow pdes map
-		 * only 2MB.  So we need to double the offset again
-		 * and zap two pdes instead of one.
-		 */
-		if (level == PT32_ROOT_LEVEL) {
-			page_offset &= ~7; /* kill rounding error */
-			page_offset <<= 1;
-			*nspte = 2;
-		}
-		quadrant = page_offset >> PAGE_SHIFT;
-		page_offset &= ~PAGE_MASK;
-		if (quadrant != sp->role.quadrant)
-			return NULL;
-	}
-
-	spte = &sp->spt[page_offset / sizeof(*spte)];
-	return spte;
-}
-
-static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-			      const u8 *new, int bytes,
-			      struct kvm_page_track_notifier_node *node)
-{
-	gfn_t gfn = gpa >> PAGE_SHIFT;
-	struct kvm_mmu_page *sp;
-	LIST_HEAD(invalid_list);
-	u64 entry, gentry, *spte;
-	int npte;
-	bool remote_flush, local_flush;
-
-	/*
-	 * If we don't have indirect shadow pages, it means no page is
-	 * write-protected, so we can exit simply.
-	 */
-	if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
-		return;
-
-	remote_flush = local_flush = false;
-
-	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
-
-	/*
-	 * No need to care whether allocation memory is successful
-	 * or not since pte prefetch is skiped if it does not have
-	 * enough objects in the cache.
-	 */
-	mmu_topup_memory_caches(vcpu);
-
-	spin_lock(&vcpu->kvm->mmu_lock);
-
-	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
-
-	++vcpu->kvm->stat.mmu_pte_write;
-	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
-
-	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
-		if (detect_write_misaligned(sp, gpa, bytes) ||
-		      detect_write_flooding(sp)) {
-			kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
-			++vcpu->kvm->stat.mmu_flooded;
-			continue;
-		}
-
-		spte = get_written_sptes(sp, gpa, &npte);
-		if (!spte)
-			continue;
-
-		local_flush = true;
-		while (npte--) {
-			u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
-
-			entry = *spte;
-			mmu_page_zap_pte(vcpu->kvm, sp, spte);
-			if (gentry &&
-			      !((sp->role.word ^ base_role)
-			      & mmu_base_role_mask.word) && rmap_can_add(vcpu))
-				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
-			if (need_remote_flush(entry, *spte))
-				remote_flush = true;
-			++spte;
-		}
-	}
-	kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
-	kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
-	spin_unlock(&vcpu->kvm->mmu_lock);
-}
-
-int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
-{
-	gpa_t gpa;
-	int r;
-
-	if (vcpu->arch.mmu->direct_map)
-		return 0;
-
-	gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
-
-	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-
-	return r;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
-
-static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
-{
-	LIST_HEAD(invalid_list);
-
-	if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
-		return 0;
-
-	while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
-		if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
-			break;
-
-		++vcpu->kvm->stat.mmu_recycled;
-	}
-	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
-
-	if (!kvm_mmu_available_pages(vcpu->kvm))
-		return -ENOSPC;
-	return 0;
-}
-
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
-		       void *insn, int insn_len)
-{
-	int r, emulation_type = 0;
-	bool direct = vcpu->arch.mmu->direct_map;
-
-	/* With shadow page tables, fault_address contains a GVA or nGPA.  */
-	if (vcpu->arch.mmu->direct_map) {
-		vcpu->arch.gpa_available = true;
-		vcpu->arch.gpa_val = cr2;
-	}
-
-	r = RET_PF_INVALID;
-	if (unlikely(error_code & PFERR_RSVD_MASK)) {
-		r = handle_mmio_page_fault(vcpu, cr2, direct);
-		if (r == RET_PF_EMULATE)
-			goto emulate;
-	}
-
-	if (r == RET_PF_INVALID) {
-		r = vcpu->arch.mmu->page_fault(vcpu, cr2,
-					       lower_32_bits(error_code),
-					       false);
-		WARN_ON(r == RET_PF_INVALID);
-	}
-
-	if (r == RET_PF_RETRY)
-		return 1;
-	if (r < 0)
-		return r;
-
-	/*
-	 * Before emulating the instruction, check if the error code
-	 * was due to a RO violation while translating the guest page.
-	 * This can occur when using nested virtualization with nested
-	 * paging in both guests. If true, we simply unprotect the page
-	 * and resume the guest.
-	 */
-	if (vcpu->arch.mmu->direct_map &&
-	    (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
-		kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
-		return 1;
-	}
-
-	/*
-	 * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
-	 * optimistically try to just unprotect the page and let the processor
-	 * re-execute the instruction that caused the page fault.  Do not allow
-	 * retrying MMIO emulation, as it's not only pointless but could also
-	 * cause us to enter an infinite loop because the processor will keep
-	 * faulting on the non-existent MMIO address.  Retrying an instruction
-	 * from a nested guest is also pointless and dangerous as we are only
-	 * explicitly shadowing L1's page tables, i.e. unprotecting something
-	 * for L1 isn't going to magically fix whatever issue cause L2 to fail.
-	 */
-	if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
-		emulation_type = EMULTYPE_ALLOW_RETRY;
-emulate:
-	/*
-	 * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
-	 * This can happen if a guest gets a page-fault on data access but the HW
-	 * table walker is not able to read the instruction page (e.g instruction
-	 * page is not present in memory). In those cases we simply restart the
-	 * guest, with the exception of AMD Erratum 1096 which is unrecoverable.
-	 */
-	if (unlikely(insn && !insn_len)) {
-		if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu))
-			return 1;
-	}
-
-	return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
-				       insn_len);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
-
-void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
-{
-	struct kvm_mmu *mmu = vcpu->arch.mmu;
-	int i;
-
-	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
-	if (is_noncanonical_address(gva, vcpu))
-		return;
-
-	mmu->invlpg(vcpu, gva, mmu->root_hpa);
-
-	/*
-	 * INVLPG is required to invalidate any global mappings for the VA,
-	 * irrespective of PCID. Since it would take us roughly similar amount
-	 * of work to determine whether any of the prev_root mappings of the VA
-	 * is marked global, or to just sync it blindly, so we might as well
-	 * just always sync it.
-	 *
-	 * Mappings not reachable via the current cr3 or the prev_roots will be
-	 * synced when switching to that cr3, so nothing needs to be done here
-	 * for them.
-	 */
-	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-		if (VALID_PAGE(mmu->prev_roots[i].hpa))
-			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
-
-	kvm_x86_ops->tlb_flush_gva(vcpu, gva);
-	++vcpu->stat.invlpg;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
-
-void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
-{
-	struct kvm_mmu *mmu = vcpu->arch.mmu;
-	bool tlb_flush = false;
-	uint i;
-
-	if (pcid == kvm_get_active_pcid(vcpu)) {
-		mmu->invlpg(vcpu, gva, mmu->root_hpa);
-		tlb_flush = true;
-	}
-
-	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
-		if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
-		    pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
-			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
-			tlb_flush = true;
-		}
-	}
-
-	if (tlb_flush)
-		kvm_x86_ops->tlb_flush_gva(vcpu, gva);
-
-	++vcpu->stat.invlpg;
-
-	/*
-	 * Mappings not reachable via the current cr3 or the prev_roots will be
-	 * synced when switching to that cr3, so nothing needs to be done here
-	 * for them.
-	 */
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
-
-void kvm_enable_tdp(void)
-{
-	tdp_enabled = true;
-}
-EXPORT_SYMBOL_GPL(kvm_enable_tdp);
-
-void kvm_disable_tdp(void)
-{
-	tdp_enabled = false;
-}
-EXPORT_SYMBOL_GPL(kvm_disable_tdp);
-
-
-/* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
-
-/* The caller should hold mmu-lock before calling this function. */
-static __always_inline bool
-slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
-			slot_level_handler fn, int start_level, int end_level,
-			gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
-{
-	struct slot_rmap_walk_iterator iterator;
-	bool flush = false;
-
-	for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
-			end_gfn, &iterator) {
-		if (iterator.rmap)
-			flush |= fn(kvm, iterator.rmap);
-
-		if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
-			if (flush && lock_flush_tlb) {
-				kvm_flush_remote_tlbs_with_address(kvm,
-						start_gfn,
-						iterator.gfn - start_gfn + 1);
-				flush = false;
-			}
-			cond_resched_lock(&kvm->mmu_lock);
-		}
-	}
-
-	if (flush && lock_flush_tlb) {
-		kvm_flush_remote_tlbs_with_address(kvm, start_gfn,
-						   end_gfn - start_gfn + 1);
-		flush = false;
-	}
-
-	return flush;
-}
-
-static __always_inline bool
-slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-		  slot_level_handler fn, int start_level, int end_level,
-		  bool lock_flush_tlb)
-{
-	return slot_handle_level_range(kvm, memslot, fn, start_level,
-			end_level, memslot->base_gfn,
-			memslot->base_gfn + memslot->npages - 1,
-			lock_flush_tlb);
-}
-
-static __always_inline bool
-slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-		      slot_level_handler fn, bool lock_flush_tlb)
-{
-	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
-				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
-}
-
-static __always_inline bool
-slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-			slot_level_handler fn, bool lock_flush_tlb)
-{
-	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1,
-				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
-}
-
-static __always_inline bool
-slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
-		 slot_level_handler fn, bool lock_flush_tlb)
-{
-	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
-				 PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
-}
-
-static void free_mmu_pages(struct kvm_mmu *mmu)
-{
-	free_page((unsigned long)mmu->pae_root);
-	free_page((unsigned long)mmu->lm_root);
-}
-
-static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
-{
-	struct page *page;
-	int i;
-
-	/*
-	 * When using PAE paging, the four PDPTEs are treated as 'root' pages,
-	 * while the PDP table is a per-vCPU construct that's allocated at MMU
-	 * creation.  When emulating 32-bit mode, cr3 is only 32 bits even on
-	 * x86_64.  Therefore we need to allocate the PDP table in the first
-	 * 4GB of memory, which happens to fit the DMA32 zone.  Except for
-	 * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can
-	 * skip allocating the PDP table.
-	 */
-	if (tdp_enabled && kvm_x86_ops->get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
-		return 0;
-
-	page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
-	if (!page)
-		return -ENOMEM;
-
-	mmu->pae_root = page_address(page);
-	for (i = 0; i < 4; ++i)
-		mmu->pae_root[i] = INVALID_PAGE;
-
-	return 0;
-}
-
-int kvm_mmu_create(struct kvm_vcpu *vcpu)
-{
-	uint i;
-	int ret;
-
-	vcpu->arch.mmu = &vcpu->arch.root_mmu;
-	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
-
-	vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
-	vcpu->arch.root_mmu.root_cr3 = 0;
-	vcpu->arch.root_mmu.translate_gpa = translate_gpa;
-	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-		vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
-
-	vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
-	vcpu->arch.guest_mmu.root_cr3 = 0;
-	vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
-	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-		vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
-
-	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
-
-	ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu);
-	if (ret)
-		return ret;
-
-	ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu);
-	if (ret)
-		goto fail_allocate_root;
-
-	return ret;
- fail_allocate_root:
-	free_mmu_pages(&vcpu->arch.guest_mmu);
-	return ret;
-}
-
-#define BATCH_ZAP_PAGES	10
-static void kvm_zap_obsolete_pages(struct kvm *kvm)
-{
-	struct kvm_mmu_page *sp, *node;
-	int nr_zapped, batch = 0;
-
-restart:
-	list_for_each_entry_safe_reverse(sp, node,
-	      &kvm->arch.active_mmu_pages, link) {
-		/*
-		 * No obsolete valid page exists before a newly created page
-		 * since active_mmu_pages is a FIFO list.
-		 */
-		if (!is_obsolete_sp(kvm, sp))
-			break;
-
-		/*
-		 * Skip invalid pages with a non-zero root count, zapping pages
-		 * with a non-zero root count will never succeed, i.e. the page
-		 * will get thrown back on active_mmu_pages and we'll get stuck
-		 * in an infinite loop.
-		 */
-		if (sp->role.invalid && sp->root_count)
-			continue;
-
-		/*
-		 * No need to flush the TLB since we're only zapping shadow
-		 * pages with an obsolete generation number and all vCPUS have
-		 * loaded a new root, i.e. the shadow pages being zapped cannot
-		 * be in active use by the guest.
-		 */
-		if (batch >= BATCH_ZAP_PAGES &&
-		    cond_resched_lock(&kvm->mmu_lock)) {
-			batch = 0;
-			goto restart;
-		}
-
-		if (__kvm_mmu_prepare_zap_page(kvm, sp,
-				&kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
-			batch += nr_zapped;
-			goto restart;
-		}
-	}
-
-	/*
-	 * Trigger a remote TLB flush before freeing the page tables to ensure
-	 * KVM is not in the middle of a lockless shadow page table walk, which
-	 * may reference the pages.
-	 */
-	kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
-}
-
-/*
- * Fast invalidate all shadow pages and use lock-break technique
- * to zap obsolete pages.
- *
- * It's required when memslot is being deleted or VM is being
- * destroyed, in these cases, we should ensure that KVM MMU does
- * not use any resource of the being-deleted slot or all slots
- * after calling the function.
- */
-static void kvm_mmu_zap_all_fast(struct kvm *kvm)
-{
-	lockdep_assert_held(&kvm->slots_lock);
-
-	spin_lock(&kvm->mmu_lock);
-	trace_kvm_mmu_zap_all_fast(kvm);
-
-	/*
-	 * Toggle mmu_valid_gen between '0' and '1'.  Because slots_lock is
-	 * held for the entire duration of zapping obsolete pages, it's
-	 * impossible for there to be multiple invalid generations associated
-	 * with *valid* shadow pages at any given time, i.e. there is exactly
-	 * one valid generation and (at most) one invalid generation.
-	 */
-	kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1;
-
-	/*
-	 * Notify all vcpus to reload its shadow page table and flush TLB.
-	 * Then all vcpus will switch to new shadow page table with the new
-	 * mmu_valid_gen.
-	 *
-	 * Note: we need to do this under the protection of mmu_lock,
-	 * otherwise, vcpu would purge shadow page but miss tlb flush.
-	 */
-	kvm_reload_remote_mmus(kvm);
-
-	kvm_zap_obsolete_pages(kvm);
-	spin_unlock(&kvm->mmu_lock);
-}
-
-static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
-{
-	return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
-}
-
-static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
-			struct kvm_memory_slot *slot,
-			struct kvm_page_track_notifier_node *node)
-{
-	kvm_mmu_zap_all_fast(kvm);
-}
-
-void kvm_mmu_init_vm(struct kvm *kvm)
-{
-	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
-
-	node->track_write = kvm_mmu_pte_write;
-	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
-	kvm_page_track_register_notifier(kvm, node);
-}
-
-void kvm_mmu_uninit_vm(struct kvm *kvm)
-{
-	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
-
-	kvm_page_track_unregister_notifier(kvm, node);
-}
-
-void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	int i;
-
-	spin_lock(&kvm->mmu_lock);
-	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
-		slots = __kvm_memslots(kvm, i);
-		kvm_for_each_memslot(memslot, slots) {
-			gfn_t start, end;
-
-			start = max(gfn_start, memslot->base_gfn);
-			end = min(gfn_end, memslot->base_gfn + memslot->npages);
-			if (start >= end)
-				continue;
-
-			slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
-						PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
-						start, end - 1, true);
-		}
-	}
-
-	spin_unlock(&kvm->mmu_lock);
-}
-
-static bool slot_rmap_write_protect(struct kvm *kvm,
-				    struct kvm_rmap_head *rmap_head)
-{
-	return __rmap_write_protect(kvm, rmap_head, false);
-}
-
-void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-				      struct kvm_memory_slot *memslot)
-{
-	bool flush;
-
-	spin_lock(&kvm->mmu_lock);
-	flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
-				      false);
-	spin_unlock(&kvm->mmu_lock);
-
-	/*
-	 * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log()
-	 * which do tlb flush out of mmu-lock should be serialized by
-	 * kvm->slots_lock otherwise tlb flush would be missed.
-	 */
-	lockdep_assert_held(&kvm->slots_lock);
-
-	/*
-	 * We can flush all the TLBs out of the mmu lock without TLB
-	 * corruption since we just change the spte from writable to
-	 * readonly so that we only need to care the case of changing
-	 * spte from present to present (changing the spte from present
-	 * to nonpresent will flush all the TLBs immediately), in other
-	 * words, the only case we care is mmu_spte_update() where we
-	 * have checked SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE
-	 * instead of PT_WRITABLE_MASK, that means it does not depend
-	 * on PT_WRITABLE_MASK anymore.
-	 */
-	if (flush)
-		kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
-			memslot->npages);
-}
-
-static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
-					 struct kvm_rmap_head *rmap_head)
-{
-	u64 *sptep;
-	struct rmap_iterator iter;
-	int need_tlb_flush = 0;
-	kvm_pfn_t pfn;
-	struct kvm_mmu_page *sp;
-
-restart:
-	for_each_rmap_spte(rmap_head, &iter, sptep) {
-		sp = page_header(__pa(sptep));
-		pfn = spte_to_pfn(*sptep);
-
-		/*
-		 * We cannot do huge page mapping for indirect shadow pages,
-		 * which are found on the last rmap (level = 1) when not using
-		 * tdp; such shadow pages are synced with the page table in
-		 * the guest, and the guest page table is using 4K page size
-		 * mapping if the indirect sp has level = 1.
-		 */
-		if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
-		    !kvm_is_zone_device_pfn(pfn) &&
-		    PageTransCompoundMap(pfn_to_page(pfn))) {
-			pte_list_remove(rmap_head, sptep);
-
-			if (kvm_available_flush_tlb_with_range())
-				kvm_flush_remote_tlbs_with_address(kvm, sp->gfn,
-					KVM_PAGES_PER_HPAGE(sp->role.level));
-			else
-				need_tlb_flush = 1;
-
-			goto restart;
-		}
-	}
-
-	return need_tlb_flush;
-}
-
-void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
-				   const struct kvm_memory_slot *memslot)
-{
-	/* FIXME: const-ify all uses of struct kvm_memory_slot.  */
-	spin_lock(&kvm->mmu_lock);
-	slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
-			 kvm_mmu_zap_collapsible_spte, true);
-	spin_unlock(&kvm->mmu_lock);
-}
-
-void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot)
-{
-	bool flush;
-
-	spin_lock(&kvm->mmu_lock);
-	flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
-	spin_unlock(&kvm->mmu_lock);
-
-	lockdep_assert_held(&kvm->slots_lock);
-
-	/*
-	 * It's also safe to flush TLBs out of mmu lock here as currently this
-	 * function is only used for dirty logging, in which case flushing TLB
-	 * out of mmu lock also guarantees no dirty pages will be lost in
-	 * dirty_bitmap.
-	 */
-	if (flush)
-		kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
-				memslot->npages);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
-
-void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
-					struct kvm_memory_slot *memslot)
-{
-	bool flush;
-
-	spin_lock(&kvm->mmu_lock);
-	flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
-					false);
-	spin_unlock(&kvm->mmu_lock);
-
-	/* see kvm_mmu_slot_remove_write_access */
-	lockdep_assert_held(&kvm->slots_lock);
-
-	if (flush)
-		kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
-				memslot->npages);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
-
-void kvm_mmu_slot_set_dirty(struct kvm *kvm,
-			    struct kvm_memory_slot *memslot)
-{
-	bool flush;
-
-	spin_lock(&kvm->mmu_lock);
-	flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
-	spin_unlock(&kvm->mmu_lock);
-
-	lockdep_assert_held(&kvm->slots_lock);
-
-	/* see kvm_mmu_slot_leaf_clear_dirty */
-	if (flush)
-		kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
-				memslot->npages);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
-
-void kvm_mmu_zap_all(struct kvm *kvm)
-{
-	struct kvm_mmu_page *sp, *node;
-	LIST_HEAD(invalid_list);
-	int ign;
-
-	spin_lock(&kvm->mmu_lock);
-restart:
-	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
-		if (sp->role.invalid && sp->root_count)
-			continue;
-		if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
-			goto restart;
-		if (cond_resched_lock(&kvm->mmu_lock))
-			goto restart;
-	}
-
-	kvm_mmu_commit_zap_page(kvm, &invalid_list);
-	spin_unlock(&kvm->mmu_lock);
-}
-
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
-{
-	WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
-
-	gen &= MMIO_SPTE_GEN_MASK;
-
-	/*
-	 * Generation numbers are incremented in multiples of the number of
-	 * address spaces in order to provide unique generations across all
-	 * address spaces.  Strip what is effectively the address space
-	 * modifier prior to checking for a wrap of the MMIO generation so
-	 * that a wrap in any address space is detected.
-	 */
-	gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1);
-
-	/*
-	 * The very rare case: if the MMIO generation number has wrapped,
-	 * zap all shadow pages.
-	 */
-	if (unlikely(gen == 0)) {
-		kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
-		kvm_mmu_zap_all_fast(kvm);
-	}
-}
-
-static unsigned long
-mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
-{
-	struct kvm *kvm;
-	int nr_to_scan = sc->nr_to_scan;
-	unsigned long freed = 0;
-
-	mutex_lock(&kvm_lock);
-
-	list_for_each_entry(kvm, &vm_list, vm_list) {
-		int idx;
-		LIST_HEAD(invalid_list);
-
-		/*
-		 * Never scan more than sc->nr_to_scan VM instances.
-		 * Will not hit this condition practically since we do not try
-		 * to shrink more than one VM and it is very unlikely to see
-		 * !n_used_mmu_pages so many times.
-		 */
-		if (!nr_to_scan--)
-			break;
-		/*
-		 * n_used_mmu_pages is accessed without holding kvm->mmu_lock
-		 * here. We may skip a VM instance errorneosly, but we do not
-		 * want to shrink a VM that only started to populate its MMU
-		 * anyway.
-		 */
-		if (!kvm->arch.n_used_mmu_pages &&
-		    !kvm_has_zapped_obsolete_pages(kvm))
-			continue;
-
-		idx = srcu_read_lock(&kvm->srcu);
-		spin_lock(&kvm->mmu_lock);
-
-		if (kvm_has_zapped_obsolete_pages(kvm)) {
-			kvm_mmu_commit_zap_page(kvm,
-			      &kvm->arch.zapped_obsolete_pages);
-			goto unlock;
-		}
-
-		if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
-			freed++;
-		kvm_mmu_commit_zap_page(kvm, &invalid_list);
-
-unlock:
-		spin_unlock(&kvm->mmu_lock);
-		srcu_read_unlock(&kvm->srcu, idx);
-
-		/*
-		 * unfair on small ones
-		 * per-vm shrinkers cry out
-		 * sadness comes quickly
-		 */
-		list_move_tail(&kvm->vm_list, &vm_list);
-		break;
-	}
-
-	mutex_unlock(&kvm_lock);
-	return freed;
-}
-
-static unsigned long
-mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
-{
-	return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
-}
-
-static struct shrinker mmu_shrinker = {
-	.count_objects = mmu_shrink_count,
-	.scan_objects = mmu_shrink_scan,
-	.seeks = DEFAULT_SEEKS * 10,
-};
-
-static void mmu_destroy_caches(void)
-{
-	kmem_cache_destroy(pte_list_desc_cache);
-	kmem_cache_destroy(mmu_page_header_cache);
-}
-
-static void kvm_set_mmio_spte_mask(void)
-{
-	u64 mask;
-
-	/*
-	 * Set the reserved bits and the present bit of an paging-structure
-	 * entry to generate page fault with PFER.RSV = 1.
-	 */
-
-	/*
-	 * Mask the uppermost physical address bit, which would be reserved as
-	 * long as the supported physical address width is less than 52.
-	 */
-	mask = 1ull << 51;
-
-	/* Set the present bit. */
-	mask |= 1ull;
-
-	/*
-	 * If reserved bit is not supported, clear the present bit to disable
-	 * mmio page fault.
-	 */
-	if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
-		mask &= ~1ull;
-
-	kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
-}
-
-static bool get_nx_auto_mode(void)
-{
-	/* Return true when CPU has the bug, and mitigations are ON */
-	return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
-}
-
-static void __set_nx_huge_pages(bool val)
-{
-	nx_huge_pages = itlb_multihit_kvm_mitigation = val;
-}
-
-static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
-{
-	bool old_val = nx_huge_pages;
-	bool new_val;
-
-	/* In "auto" mode deploy workaround only if CPU has the bug. */
-	if (sysfs_streq(val, "off"))
-		new_val = 0;
-	else if (sysfs_streq(val, "force"))
-		new_val = 1;
-	else if (sysfs_streq(val, "auto"))
-		new_val = get_nx_auto_mode();
-	else if (strtobool(val, &new_val) < 0)
-		return -EINVAL;
-
-	__set_nx_huge_pages(new_val);
-
-	if (new_val != old_val) {
-		struct kvm *kvm;
-
-		mutex_lock(&kvm_lock);
-
-		list_for_each_entry(kvm, &vm_list, vm_list) {
-			mutex_lock(&kvm->slots_lock);
-			kvm_mmu_zap_all_fast(kvm);
-			mutex_unlock(&kvm->slots_lock);
-
-			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
-		}
-		mutex_unlock(&kvm_lock);
-	}
-
-	return 0;
-}
-
-int kvm_mmu_module_init(void)
-{
-	int ret = -ENOMEM;
-
-	if (nx_huge_pages == -1)
-		__set_nx_huge_pages(get_nx_auto_mode());
-
-	/*
-	 * MMU roles use union aliasing which is, generally speaking, an
-	 * undefined behavior. However, we supposedly know how compilers behave
-	 * and the current status quo is unlikely to change. Guardians below are
-	 * supposed to let us know if the assumption becomes false.
-	 */
-	BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32));
-	BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32));
-	BUILD_BUG_ON(sizeof(union kvm_mmu_role) != sizeof(u64));
-
-	kvm_mmu_reset_all_pte_masks();
-
-	kvm_set_mmio_spte_mask();
-
-	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
-					    sizeof(struct pte_list_desc),
-					    0, SLAB_ACCOUNT, NULL);
-	if (!pte_list_desc_cache)
-		goto out;
-
-	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
-						  sizeof(struct kvm_mmu_page),
-						  0, SLAB_ACCOUNT, NULL);
-	if (!mmu_page_header_cache)
-		goto out;
-
-	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
-		goto out;
-
-	ret = register_shrinker(&mmu_shrinker);
-	if (ret)
-		goto out;
-
-	return 0;
-
-out:
-	mmu_destroy_caches();
-	return ret;
-}
-
-/*
- * Calculate mmu pages needed for kvm.
- */
-unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
-{
-	unsigned long nr_mmu_pages;
-	unsigned long nr_pages = 0;
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	int i;
-
-	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
-		slots = __kvm_memslots(kvm, i);
-
-		kvm_for_each_memslot(memslot, slots)
-			nr_pages += memslot->npages;
-	}
-
-	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
-	nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
-
-	return nr_mmu_pages;
-}
-
-void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
-{
-	kvm_mmu_unload(vcpu);
-	free_mmu_pages(&vcpu->arch.root_mmu);
-	free_mmu_pages(&vcpu->arch.guest_mmu);
-	mmu_free_memory_caches(vcpu);
-}
-
-void kvm_mmu_module_exit(void)
-{
-	mmu_destroy_caches();
-	percpu_counter_destroy(&kvm_total_used_mmu_pages);
-	unregister_shrinker(&mmu_shrinker);
-	mmu_audit_disable();
-}
-
-static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
-{
-	unsigned int old_val;
-	int err;
-
-	old_val = nx_huge_pages_recovery_ratio;
-	err = param_set_uint(val, kp);
-	if (err)
-		return err;
-
-	if (READ_ONCE(nx_huge_pages) &&
-	    !old_val && nx_huge_pages_recovery_ratio) {
-		struct kvm *kvm;
-
-		mutex_lock(&kvm_lock);
-
-		list_for_each_entry(kvm, &vm_list, vm_list)
-			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
-
-		mutex_unlock(&kvm_lock);
-	}
-
-	return err;
-}
-
-static void kvm_recover_nx_lpages(struct kvm *kvm)
-{
-	int rcu_idx;
-	struct kvm_mmu_page *sp;
-	unsigned int ratio;
-	LIST_HEAD(invalid_list);
-	ulong to_zap;
-
-	rcu_idx = srcu_read_lock(&kvm->srcu);
-	spin_lock(&kvm->mmu_lock);
-
-	ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
-	to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
-	while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
-		/*
-		 * We use a separate list instead of just using active_mmu_pages
-		 * because the number of lpage_disallowed pages is expected to
-		 * be relatively small compared to the total.
-		 */
-		sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
-				      struct kvm_mmu_page,
-				      lpage_disallowed_link);
-		WARN_ON_ONCE(!sp->lpage_disallowed);
-		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
-		WARN_ON_ONCE(sp->lpage_disallowed);
-
-		if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
-			kvm_mmu_commit_zap_page(kvm, &invalid_list);
-			if (to_zap)
-				cond_resched_lock(&kvm->mmu_lock);
-		}
-	}
-
-	spin_unlock(&kvm->mmu_lock);
-	srcu_read_unlock(&kvm->srcu, rcu_idx);
-}
-
-static long get_nx_lpage_recovery_timeout(u64 start_time)
-{
-	return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
-		? start_time + 60 * HZ - get_jiffies_64()
-		: MAX_SCHEDULE_TIMEOUT;
-}
-
-static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
-{
-	u64 start_time;
-	long remaining_time;
-
-	while (true) {
-		start_time = get_jiffies_64();
-		remaining_time = get_nx_lpage_recovery_timeout(start_time);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		while (!kthread_should_stop() && remaining_time > 0) {
-			schedule_timeout(remaining_time);
-			remaining_time = get_nx_lpage_recovery_timeout(start_time);
-			set_current_state(TASK_INTERRUPTIBLE);
-		}
-
-		set_current_state(TASK_RUNNING);
-
-		if (kthread_should_stop())
-			return 0;
-
-		kvm_recover_nx_lpages(kvm);
-	}
-}
-
-int kvm_mmu_post_init_vm(struct kvm *kvm)
-{
-	int err;
-
-	err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
-					  "kvm-nx-lpage-recovery",
-					  &kvm->arch.nx_lpage_recovery_thread);
-	if (!err)
-		kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
-
-	return err;
-}
-
-void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
-{
-	if (kvm->arch.nx_lpage_recovery_thread)
-		kthread_stop(kvm->arch.nx_lpage_recovery_thread);
-}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
new file mode 100644
index 000000000000..6f92b40d798c
--- /dev/null
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -0,0 +1,6502 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * MMU support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * Authors:
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Avi Kivity   <avi@qumranet.com>
+ */
+
+#include "irq.h"
+#include "mmu.h"
+#include "x86.h"
+#include "kvm_cache_regs.h"
+#include "cpuid.h"
+
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/moduleparam.h>
+#include <linux/export.h>
+#include <linux/swap.h>
+#include <linux/hugetlb.h>
+#include <linux/compiler.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+#include <linux/hash.h>
+#include <linux/kern_levels.h>
+#include <linux/kthread.h>
+
+#include <asm/page.h>
+#include <asm/pat.h>
+#include <asm/cmpxchg.h>
+#include <asm/e820/api.h>
+#include <asm/io.h>
+#include <asm/vmx.h>
+#include <asm/kvm_page_track.h>
+#include "trace.h"
+
+extern bool itlb_multihit_kvm_mitigation;
+
+static int __read_mostly nx_huge_pages = -1;
+#ifdef CONFIG_PREEMPT_RT
+/* Recovery can cause latency spikes, disable it for PREEMPT_RT.  */
+static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
+#else
+static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
+#endif
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
+
+static struct kernel_param_ops nx_huge_pages_ops = {
+	.set = set_nx_huge_pages,
+	.get = param_get_bool,
+};
+
+static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
+	.set = set_nx_huge_pages_recovery_ratio,
+	.get = param_get_uint,
+};
+
+module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages, "bool");
+module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
+		&nx_huge_pages_recovery_ratio, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
+
+/*
+ * When setting this variable to true it enables Two-Dimensional-Paging
+ * where the hardware walks 2 page tables:
+ * 1. the guest-virtual to guest-physical
+ * 2. while doing 1. it walks guest-physical to host-physical
+ * If the hardware supports that we don't need to do shadow paging.
+ */
+bool tdp_enabled = false;
+
+enum {
+	AUDIT_PRE_PAGE_FAULT,
+	AUDIT_POST_PAGE_FAULT,
+	AUDIT_PRE_PTE_WRITE,
+	AUDIT_POST_PTE_WRITE,
+	AUDIT_PRE_SYNC,
+	AUDIT_POST_SYNC
+};
+
+#undef MMU_DEBUG
+
+#ifdef MMU_DEBUG
+static bool dbg = 0;
+module_param(dbg, bool, 0644);
+
+#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
+#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
+#define MMU_WARN_ON(x) WARN_ON(x)
+#else
+#define pgprintk(x...) do { } while (0)
+#define rmap_printk(x...) do { } while (0)
+#define MMU_WARN_ON(x) do { } while (0)
+#endif
+
+#define PTE_PREFETCH_NUM		8
+
+#define PT_FIRST_AVAIL_BITS_SHIFT 10
+#define PT64_SECOND_AVAIL_BITS_SHIFT 54
+
+/*
+ * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
+ * Access Tracking SPTEs.
+ */
+#define SPTE_SPECIAL_MASK (3ULL << 52)
+#define SPTE_AD_ENABLED_MASK (0ULL << 52)
+#define SPTE_AD_DISABLED_MASK (1ULL << 52)
+#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52)
+#define SPTE_MMIO_MASK (3ULL << 52)
+
+#define PT64_LEVEL_BITS 9
+
+#define PT64_LEVEL_SHIFT(level) \
+		(PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS)
+
+#define PT64_INDEX(address, level)\
+	(((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
+
+
+#define PT32_LEVEL_BITS 10
+
+#define PT32_LEVEL_SHIFT(level) \
+		(PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS)
+
+#define PT32_LVL_OFFSET_MASK(level) \
+	(PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
+						* PT32_LEVEL_BITS))) - 1))
+
+#define PT32_INDEX(address, level)\
+	(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
+
+
+#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
+#define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1))
+#else
+#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+#endif
+#define PT64_LVL_ADDR_MASK(level) \
+	(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
+						* PT64_LEVEL_BITS))) - 1))
+#define PT64_LVL_OFFSET_MASK(level) \
+	(PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
+						* PT64_LEVEL_BITS))) - 1))
+
+#define PT32_BASE_ADDR_MASK PAGE_MASK
+#define PT32_DIR_BASE_ADDR_MASK \
+	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
+#define PT32_LVL_ADDR_MASK(level) \
+	(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
+					    * PT32_LEVEL_BITS))) - 1))
+
+#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
+			| shadow_x_mask | shadow_nx_mask | shadow_me_mask)
+
+#define ACC_EXEC_MASK    1
+#define ACC_WRITE_MASK   PT_WRITABLE_MASK
+#define ACC_USER_MASK    PT_USER_MASK
+#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+
+/* The mask for the R/X bits in EPT PTEs */
+#define PT64_EPT_READABLE_MASK			0x1ull
+#define PT64_EPT_EXECUTABLE_MASK		0x4ull
+
+#include <trace/events/kvm.h>
+
+#define SPTE_HOST_WRITEABLE	(1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+#define SPTE_MMU_WRITEABLE	(1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
+
+#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
+
+/* make pte_list_desc fit well in cache line */
+#define PTE_LIST_EXT 3
+
+/*
+ * Return values of handle_mmio_page_fault and mmu.page_fault:
+ * RET_PF_RETRY: let CPU fault again on the address.
+ * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
+ *
+ * For handle_mmio_page_fault only:
+ * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
+ */
+enum {
+	RET_PF_RETRY = 0,
+	RET_PF_EMULATE = 1,
+	RET_PF_INVALID = 2,
+};
+
+struct pte_list_desc {
+	u64 *sptes[PTE_LIST_EXT];
+	struct pte_list_desc *more;
+};
+
+struct kvm_shadow_walk_iterator {
+	u64 addr;
+	hpa_t shadow_addr;
+	u64 *sptep;
+	int level;
+	unsigned index;
+};
+
+static const union kvm_mmu_page_role mmu_base_role_mask = {
+	.cr0_wp = 1,
+	.gpte_is_8_bytes = 1,
+	.nxe = 1,
+	.smep_andnot_wp = 1,
+	.smap_andnot_wp = 1,
+	.smm = 1,
+	.guest_mode = 1,
+	.ad_disabled = 1,
+};
+
+#define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker)     \
+	for (shadow_walk_init_using_root(&(_walker), (_vcpu),              \
+					 (_root), (_addr));                \
+	     shadow_walk_okay(&(_walker));			           \
+	     shadow_walk_next(&(_walker)))
+
+#define for_each_shadow_entry(_vcpu, _addr, _walker)            \
+	for (shadow_walk_init(&(_walker), _vcpu, _addr);	\
+	     shadow_walk_okay(&(_walker));			\
+	     shadow_walk_next(&(_walker)))
+
+#define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte)	\
+	for (shadow_walk_init(&(_walker), _vcpu, _addr);		\
+	     shadow_walk_okay(&(_walker)) &&				\
+		({ spte = mmu_spte_get_lockless(_walker.sptep); 1; });	\
+	     __shadow_walk_next(&(_walker), spte))
+
+static struct kmem_cache *pte_list_desc_cache;
+static struct kmem_cache *mmu_page_header_cache;
+static struct percpu_counter kvm_total_used_mmu_pages;
+
+static u64 __read_mostly shadow_nx_mask;
+static u64 __read_mostly shadow_x_mask;	/* mutual exclusive with nx_mask */
+static u64 __read_mostly shadow_user_mask;
+static u64 __read_mostly shadow_accessed_mask;
+static u64 __read_mostly shadow_dirty_mask;
+static u64 __read_mostly shadow_mmio_mask;
+static u64 __read_mostly shadow_mmio_value;
+static u64 __read_mostly shadow_mmio_access_mask;
+static u64 __read_mostly shadow_present_mask;
+static u64 __read_mostly shadow_me_mask;
+
+/*
+ * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK;
+ * shadow_acc_track_mask is the set of bits to be cleared in non-accessed
+ * pages.
+ */
+static u64 __read_mostly shadow_acc_track_mask;
+
+/*
+ * The mask/shift to use for saving the original R/X bits when marking the PTE
+ * as not-present for access tracking purposes. We do not save the W bit as the
+ * PTEs being access tracked also need to be dirty tracked, so the W bit will be
+ * restored only when a write is attempted to the page.
+ */
+static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
+						    PT64_EPT_EXECUTABLE_MASK;
+static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
+
+/*
+ * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order
+ * to guard against L1TF attacks.
+ */
+static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
+
+/*
+ * The number of high-order 1 bits to use in the mask above.
+ */
+static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
+
+/*
+ * In some cases, we need to preserve the GFN of a non-present or reserved
+ * SPTE when we usurp the upper five bits of the physical address space to
+ * defend against L1TF, e.g. for MMIO SPTEs.  To preserve the GFN, we'll
+ * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask
+ * left into the reserved bits, i.e. the GFN in the SPTE will be split into
+ * high and low parts.  This mask covers the lower bits of the GFN.
+ */
+static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
+
+/*
+ * The number of non-reserved physical address bits irrespective of features
+ * that repurpose legal bits, e.g. MKTME.
+ */
+static u8 __read_mostly shadow_phys_bits;
+
+static void mmu_spte_set(u64 *sptep, u64 spte);
+static bool is_executable_pte(u64 spte);
+static union kvm_mmu_page_role
+kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
+
+#define CREATE_TRACE_POINTS
+#include "mmutrace.h"
+
+
+static inline bool kvm_available_flush_tlb_with_range(void)
+{
+	return kvm_x86_ops->tlb_remote_flush_with_range;
+}
+
+static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
+		struct kvm_tlb_range *range)
+{
+	int ret = -ENOTSUPP;
+
+	if (range && kvm_x86_ops->tlb_remote_flush_with_range)
+		ret = kvm_x86_ops->tlb_remote_flush_with_range(kvm, range);
+
+	if (ret)
+		kvm_flush_remote_tlbs(kvm);
+}
+
+static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
+		u64 start_gfn, u64 pages)
+{
+	struct kvm_tlb_range range;
+
+	range.start_gfn = start_gfn;
+	range.pages = pages;
+
+	kvm_flush_remote_tlbs_with_range(kvm, &range);
+}
+
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
+{
+	BUG_ON((u64)(unsigned)access_mask != access_mask);
+	BUG_ON((mmio_mask & mmio_value) != mmio_value);
+	shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;
+	shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
+	shadow_mmio_access_mask = access_mask;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
+
+static bool is_mmio_spte(u64 spte)
+{
+	return (spte & shadow_mmio_mask) == shadow_mmio_value;
+}
+
+static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
+{
+	return sp->role.ad_disabled;
+}
+
+static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * When using the EPT page-modification log, the GPAs in the log
+	 * would come from L2 rather than L1.  Therefore, we need to rely
+	 * on write protection to record dirty pages.  This also bypasses
+	 * PML, since writes now result in a vmexit.
+	 */
+	return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
+}
+
+static inline bool spte_ad_enabled(u64 spte)
+{
+	MMU_WARN_ON(is_mmio_spte(spte));
+	return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK;
+}
+
+static inline bool spte_ad_need_write_protect(u64 spte)
+{
+	MMU_WARN_ON(is_mmio_spte(spte));
+	return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
+}
+
+static bool is_nx_huge_page_enabled(void)
+{
+	return READ_ONCE(nx_huge_pages);
+}
+
+static inline u64 spte_shadow_accessed_mask(u64 spte)
+{
+	MMU_WARN_ON(is_mmio_spte(spte));
+	return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
+}
+
+static inline u64 spte_shadow_dirty_mask(u64 spte)
+{
+	MMU_WARN_ON(is_mmio_spte(spte));
+	return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
+}
+
+static inline bool is_access_track_spte(u64 spte)
+{
+	return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0;
+}
+
+/*
+ * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
+ * the memslots generation and is derived as follows:
+ *
+ * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
+ * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
+ *
+ * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
+ * the MMIO generation number, as doing so would require stealing a bit from
+ * the "real" generation number and thus effectively halve the maximum number
+ * of MMIO generations that can be handled before encountering a wrap (which
+ * requires a full MMU zap).  The flag is instead explicitly queried when
+ * checking for MMIO spte cache hits.
+ */
+#define MMIO_SPTE_GEN_MASK		GENMASK_ULL(18, 0)
+
+#define MMIO_SPTE_GEN_LOW_START		3
+#define MMIO_SPTE_GEN_LOW_END		11
+#define MMIO_SPTE_GEN_LOW_MASK		GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
+						    MMIO_SPTE_GEN_LOW_START)
+
+#define MMIO_SPTE_GEN_HIGH_START	52
+#define MMIO_SPTE_GEN_HIGH_END		61
+#define MMIO_SPTE_GEN_HIGH_MASK		GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
+						    MMIO_SPTE_GEN_HIGH_START)
+static u64 generation_mmio_spte_mask(u64 gen)
+{
+	u64 mask;
+
+	WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+
+	mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
+	mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
+	return mask;
+}
+
+static u64 get_mmio_spte_generation(u64 spte)
+{
+	u64 gen;
+
+	spte &= ~shadow_mmio_mask;
+
+	gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
+	gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
+	return gen;
+}
+
+static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
+			   unsigned access)
+{
+	u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK;
+	u64 mask = generation_mmio_spte_mask(gen);
+	u64 gpa = gfn << PAGE_SHIFT;
+
+	access &= shadow_mmio_access_mask;
+	mask |= shadow_mmio_value | access;
+	mask |= gpa | shadow_nonpresent_or_rsvd_mask;
+	mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
+		<< shadow_nonpresent_or_rsvd_mask_len;
+
+	trace_mark_mmio_spte(sptep, gfn, access, gen);
+	mmu_spte_set(sptep, mask);
+}
+
+static gfn_t get_mmio_spte_gfn(u64 spte)
+{
+	u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
+
+	gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
+	       & shadow_nonpresent_or_rsvd_mask;
+
+	return gpa >> PAGE_SHIFT;
+}
+
+static unsigned get_mmio_spte_access(u64 spte)
+{
+	return spte & shadow_mmio_access_mask;
+}
+
+static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
+			  kvm_pfn_t pfn, unsigned access)
+{
+	if (unlikely(is_noslot_pfn(pfn))) {
+		mark_mmio_spte(vcpu, sptep, gfn, access);
+		return true;
+	}
+
+	return false;
+}
+
+static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
+{
+	u64 kvm_gen, spte_gen, gen;
+
+	gen = kvm_vcpu_memslots(vcpu)->generation;
+	if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS))
+		return false;
+
+	kvm_gen = gen & MMIO_SPTE_GEN_MASK;
+	spte_gen = get_mmio_spte_generation(spte);
+
+	trace_check_mmio_spte(spte, kvm_gen, spte_gen);
+	return likely(kvm_gen == spte_gen);
+}
+
+/*
+ * Sets the shadow PTE masks used by the MMU.
+ *
+ * Assumptions:
+ *  - Setting either @accessed_mask or @dirty_mask requires setting both
+ *  - At least one of @accessed_mask or @acc_track_mask must be set
+ */
+void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
+		u64 acc_track_mask, u64 me_mask)
+{
+	BUG_ON(!dirty_mask != !accessed_mask);
+	BUG_ON(!accessed_mask && !acc_track_mask);
+	BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK);
+
+	shadow_user_mask = user_mask;
+	shadow_accessed_mask = accessed_mask;
+	shadow_dirty_mask = dirty_mask;
+	shadow_nx_mask = nx_mask;
+	shadow_x_mask = x_mask;
+	shadow_present_mask = p_mask;
+	shadow_acc_track_mask = acc_track_mask;
+	shadow_me_mask = me_mask;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
+
+static u8 kvm_get_shadow_phys_bits(void)
+{
+	/*
+	 * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
+	 * in CPU detection code, but MKTME treats those reduced bits as
+	 * 'keyID' thus they are not reserved bits. Therefore for MKTME
+	 * we should still return physical address bits reported by CPUID.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_TME) ||
+	    WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
+		return boot_cpu_data.x86_phys_bits;
+
+	return cpuid_eax(0x80000008) & 0xff;
+}
+
+static void kvm_mmu_reset_all_pte_masks(void)
+{
+	u8 low_phys_bits;
+
+	shadow_user_mask = 0;
+	shadow_accessed_mask = 0;
+	shadow_dirty_mask = 0;
+	shadow_nx_mask = 0;
+	shadow_x_mask = 0;
+	shadow_mmio_mask = 0;
+	shadow_present_mask = 0;
+	shadow_acc_track_mask = 0;
+
+	shadow_phys_bits = kvm_get_shadow_phys_bits();
+
+	/*
+	 * If the CPU has 46 or less physical address bits, then set an
+	 * appropriate mask to guard against L1TF attacks. Otherwise, it is
+	 * assumed that the CPU is not vulnerable to L1TF.
+	 *
+	 * Some Intel CPUs address the L1 cache using more PA bits than are
+	 * reported by CPUID. Use the PA width of the L1 cache when possible
+	 * to achieve more effective mitigation, e.g. if system RAM overlaps
+	 * the most significant bits of legal physical address space.
+	 */
+	shadow_nonpresent_or_rsvd_mask = 0;
+	low_phys_bits = boot_cpu_data.x86_cache_bits;
+	if (boot_cpu_data.x86_cache_bits <
+	    52 - shadow_nonpresent_or_rsvd_mask_len) {
+		shadow_nonpresent_or_rsvd_mask =
+			rsvd_bits(boot_cpu_data.x86_cache_bits -
+				  shadow_nonpresent_or_rsvd_mask_len,
+				  boot_cpu_data.x86_cache_bits - 1);
+		low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
+	} else
+		WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF));
+
+	shadow_nonpresent_or_rsvd_lower_gfn_mask =
+		GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
+}
+
+static int is_cpuid_PSE36(void)
+{
+	return 1;
+}
+
+static int is_nx(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.efer & EFER_NX;
+}
+
+static int is_shadow_present_pte(u64 pte)
+{
+	return (pte != 0) && !is_mmio_spte(pte);
+}
+
+static int is_large_pte(u64 pte)
+{
+	return pte & PT_PAGE_SIZE_MASK;
+}
+
+static int is_last_spte(u64 pte, int level)
+{
+	if (level == PT_PAGE_TABLE_LEVEL)
+		return 1;
+	if (is_large_pte(pte))
+		return 1;
+	return 0;
+}
+
+static bool is_executable_pte(u64 spte)
+{
+	return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
+}
+
+static kvm_pfn_t spte_to_pfn(u64 pte)
+{
+	return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+}
+
+static gfn_t pse36_gfn_delta(u32 gpte)
+{
+	int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
+
+	return (gpte & PT32_DIR_PSE36_MASK) << shift;
+}
+
+#ifdef CONFIG_X86_64
+static void __set_spte(u64 *sptep, u64 spte)
+{
+	WRITE_ONCE(*sptep, spte);
+}
+
+static void __update_clear_spte_fast(u64 *sptep, u64 spte)
+{
+	WRITE_ONCE(*sptep, spte);
+}
+
+static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
+{
+	return xchg(sptep, spte);
+}
+
+static u64 __get_spte_lockless(u64 *sptep)
+{
+	return READ_ONCE(*sptep);
+}
+#else
+union split_spte {
+	struct {
+		u32 spte_low;
+		u32 spte_high;
+	};
+	u64 spte;
+};
+
+static void count_spte_clear(u64 *sptep, u64 spte)
+{
+	struct kvm_mmu_page *sp =  page_header(__pa(sptep));
+
+	if (is_shadow_present_pte(spte))
+		return;
+
+	/* Ensure the spte is completely set before we increase the count */
+	smp_wmb();
+	sp->clear_spte_count++;
+}
+
+static void __set_spte(u64 *sptep, u64 spte)
+{
+	union split_spte *ssptep, sspte;
+
+	ssptep = (union split_spte *)sptep;
+	sspte = (union split_spte)spte;
+
+	ssptep->spte_high = sspte.spte_high;
+
+	/*
+	 * If we map the spte from nonpresent to present, We should store
+	 * the high bits firstly, then set present bit, so cpu can not
+	 * fetch this spte while we are setting the spte.
+	 */
+	smp_wmb();
+
+	WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
+}
+
+static void __update_clear_spte_fast(u64 *sptep, u64 spte)
+{
+	union split_spte *ssptep, sspte;
+
+	ssptep = (union split_spte *)sptep;
+	sspte = (union split_spte)spte;
+
+	WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
+
+	/*
+	 * If we map the spte from present to nonpresent, we should clear
+	 * present bit firstly to avoid vcpu fetch the old high bits.
+	 */
+	smp_wmb();
+
+	ssptep->spte_high = sspte.spte_high;
+	count_spte_clear(sptep, spte);
+}
+
+static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
+{
+	union split_spte *ssptep, sspte, orig;
+
+	ssptep = (union split_spte *)sptep;
+	sspte = (union split_spte)spte;
+
+	/* xchg acts as a barrier before the setting of the high bits */
+	orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low);
+	orig.spte_high = ssptep->spte_high;
+	ssptep->spte_high = sspte.spte_high;
+	count_spte_clear(sptep, spte);
+
+	return orig.spte;
+}
+
+/*
+ * The idea using the light way get the spte on x86_32 guest is from
+ * gup_get_pte (mm/gup.c).
+ *
+ * An spte tlb flush may be pending, because kvm_set_pte_rmapp
+ * coalesces them and we are running out of the MMU lock.  Therefore
+ * we need to protect against in-progress updates of the spte.
+ *
+ * Reading the spte while an update is in progress may get the old value
+ * for the high part of the spte.  The race is fine for a present->non-present
+ * change (because the high part of the spte is ignored for non-present spte),
+ * but for a present->present change we must reread the spte.
+ *
+ * All such changes are done in two steps (present->non-present and
+ * non-present->present), hence it is enough to count the number of
+ * present->non-present updates: if it changed while reading the spte,
+ * we might have hit the race.  This is done using clear_spte_count.
+ */
+static u64 __get_spte_lockless(u64 *sptep)
+{
+	struct kvm_mmu_page *sp =  page_header(__pa(sptep));
+	union split_spte spte, *orig = (union split_spte *)sptep;
+	int count;
+
+retry:
+	count = sp->clear_spte_count;
+	smp_rmb();
+
+	spte.spte_low = orig->spte_low;
+	smp_rmb();
+
+	spte.spte_high = orig->spte_high;
+	smp_rmb();
+
+	if (unlikely(spte.spte_low != orig->spte_low ||
+	      count != sp->clear_spte_count))
+		goto retry;
+
+	return spte.spte;
+}
+#endif
+
+static bool spte_can_locklessly_be_made_writable(u64 spte)
+{
+	return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
+		(SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
+}
+
+static bool spte_has_volatile_bits(u64 spte)
+{
+	if (!is_shadow_present_pte(spte))
+		return false;
+
+	/*
+	 * Always atomically update spte if it can be updated
+	 * out of mmu-lock, it can ensure dirty bit is not lost,
+	 * also, it can help us to get a stable is_writable_pte()
+	 * to ensure tlb flush is not missed.
+	 */
+	if (spte_can_locklessly_be_made_writable(spte) ||
+	    is_access_track_spte(spte))
+		return true;
+
+	if (spte_ad_enabled(spte)) {
+		if ((spte & shadow_accessed_mask) == 0 ||
+	    	    (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
+			return true;
+	}
+
+	return false;
+}
+
+static bool is_accessed_spte(u64 spte)
+{
+	u64 accessed_mask = spte_shadow_accessed_mask(spte);
+
+	return accessed_mask ? spte & accessed_mask
+			     : !is_access_track_spte(spte);
+}
+
+static bool is_dirty_spte(u64 spte)
+{
+	u64 dirty_mask = spte_shadow_dirty_mask(spte);
+
+	return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK;
+}
+
+/* Rules for using mmu_spte_set:
+ * Set the sptep from nonpresent to present.
+ * Note: the sptep being assigned *must* be either not present
+ * or in a state where the hardware will not attempt to update
+ * the spte.
+ */
+static void mmu_spte_set(u64 *sptep, u64 new_spte)
+{
+	WARN_ON(is_shadow_present_pte(*sptep));
+	__set_spte(sptep, new_spte);
+}
+
+/*
+ * Update the SPTE (excluding the PFN), but do not track changes in its
+ * accessed/dirty status.
+ */
+static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
+{
+	u64 old_spte = *sptep;
+
+	WARN_ON(!is_shadow_present_pte(new_spte));
+
+	if (!is_shadow_present_pte(old_spte)) {
+		mmu_spte_set(sptep, new_spte);
+		return old_spte;
+	}
+
+	if (!spte_has_volatile_bits(old_spte))
+		__update_clear_spte_fast(sptep, new_spte);
+	else
+		old_spte = __update_clear_spte_slow(sptep, new_spte);
+
+	WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
+
+	return old_spte;
+}
+
+/* Rules for using mmu_spte_update:
+ * Update the state bits, it means the mapped pfn is not changed.
+ *
+ * Whenever we overwrite a writable spte with a read-only one we
+ * should flush remote TLBs. Otherwise rmap_write_protect
+ * will find a read-only spte, even though the writable spte
+ * might be cached on a CPU's TLB, the return value indicates this
+ * case.
+ *
+ * Returns true if the TLB needs to be flushed
+ */
+static bool mmu_spte_update(u64 *sptep, u64 new_spte)
+{
+	bool flush = false;
+	u64 old_spte = mmu_spte_update_no_track(sptep, new_spte);
+
+	if (!is_shadow_present_pte(old_spte))
+		return false;
+
+	/*
+	 * For the spte updated out of mmu-lock is safe, since
+	 * we always atomically update it, see the comments in
+	 * spte_has_volatile_bits().
+	 */
+	if (spte_can_locklessly_be_made_writable(old_spte) &&
+	      !is_writable_pte(new_spte))
+		flush = true;
+
+	/*
+	 * Flush TLB when accessed/dirty states are changed in the page tables,
+	 * to guarantee consistency between TLB and page tables.
+	 */
+
+	if (is_accessed_spte(old_spte) && !is_accessed_spte(new_spte)) {
+		flush = true;
+		kvm_set_pfn_accessed(spte_to_pfn(old_spte));
+	}
+
+	if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte)) {
+		flush = true;
+		kvm_set_pfn_dirty(spte_to_pfn(old_spte));
+	}
+
+	return flush;
+}
+
+/*
+ * Rules for using mmu_spte_clear_track_bits:
+ * It sets the sptep from present to nonpresent, and track the
+ * state bits, it is used to clear the last level sptep.
+ * Returns non-zero if the PTE was previously valid.
+ */
+static int mmu_spte_clear_track_bits(u64 *sptep)
+{
+	kvm_pfn_t pfn;
+	u64 old_spte = *sptep;
+
+	if (!spte_has_volatile_bits(old_spte))
+		__update_clear_spte_fast(sptep, 0ull);
+	else
+		old_spte = __update_clear_spte_slow(sptep, 0ull);
+
+	if (!is_shadow_present_pte(old_spte))
+		return 0;
+
+	pfn = spte_to_pfn(old_spte);
+
+	/*
+	 * KVM does not hold the refcount of the page used by
+	 * kvm mmu, before reclaiming the page, we should
+	 * unmap it from mmu first.
+	 */
+	WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
+
+	if (is_accessed_spte(old_spte))
+		kvm_set_pfn_accessed(pfn);
+
+	if (is_dirty_spte(old_spte))
+		kvm_set_pfn_dirty(pfn);
+
+	return 1;
+}
+
+/*
+ * Rules for using mmu_spte_clear_no_track:
+ * Directly clear spte without caring the state bits of sptep,
+ * it is used to set the upper level spte.
+ */
+static void mmu_spte_clear_no_track(u64 *sptep)
+{
+	__update_clear_spte_fast(sptep, 0ull);
+}
+
+static u64 mmu_spte_get_lockless(u64 *sptep)
+{
+	return __get_spte_lockless(sptep);
+}
+
+static u64 mark_spte_for_access_track(u64 spte)
+{
+	if (spte_ad_enabled(spte))
+		return spte & ~shadow_accessed_mask;
+
+	if (is_access_track_spte(spte))
+		return spte;
+
+	/*
+	 * Making an Access Tracking PTE will result in removal of write access
+	 * from the PTE. So, verify that we will be able to restore the write
+	 * access in the fast page fault path later on.
+	 */
+	WARN_ONCE((spte & PT_WRITABLE_MASK) &&
+		  !spte_can_locklessly_be_made_writable(spte),
+		  "kvm: Writable SPTE is not locklessly dirty-trackable\n");
+
+	WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask <<
+			  shadow_acc_track_saved_bits_shift),
+		  "kvm: Access Tracking saved bit locations are not zero\n");
+
+	spte |= (spte & shadow_acc_track_saved_bits_mask) <<
+		shadow_acc_track_saved_bits_shift;
+	spte &= ~shadow_acc_track_mask;
+
+	return spte;
+}
+
+/* Restore an acc-track PTE back to a regular PTE */
+static u64 restore_acc_track_spte(u64 spte)
+{
+	u64 new_spte = spte;
+	u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift)
+			 & shadow_acc_track_saved_bits_mask;
+
+	WARN_ON_ONCE(spte_ad_enabled(spte));
+	WARN_ON_ONCE(!is_access_track_spte(spte));
+
+	new_spte &= ~shadow_acc_track_mask;
+	new_spte &= ~(shadow_acc_track_saved_bits_mask <<
+		      shadow_acc_track_saved_bits_shift);
+	new_spte |= saved_bits;
+
+	return new_spte;
+}
+
+/* Returns the Accessed status of the PTE and resets it at the same time. */
+static bool mmu_spte_age(u64 *sptep)
+{
+	u64 spte = mmu_spte_get_lockless(sptep);
+
+	if (!is_accessed_spte(spte))
+		return false;
+
+	if (spte_ad_enabled(spte)) {
+		clear_bit((ffs(shadow_accessed_mask) - 1),
+			  (unsigned long *)sptep);
+	} else {
+		/*
+		 * Capture the dirty status of the page, so that it doesn't get
+		 * lost when the SPTE is marked for access tracking.
+		 */
+		if (is_writable_pte(spte))
+			kvm_set_pfn_dirty(spte_to_pfn(spte));
+
+		spte = mark_spte_for_access_track(spte);
+		mmu_spte_update_no_track(sptep, spte);
+	}
+
+	return true;
+}
+
+static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Prevent page table teardown by making any free-er wait during
+	 * kvm_flush_remote_tlbs() IPI to all active vcpus.
+	 */
+	local_irq_disable();
+
+	/*
+	 * Make sure a following spte read is not reordered ahead of the write
+	 * to vcpu->mode.
+	 */
+	smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
+}
+
+static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Make sure the write to vcpu->mode is not reordered in front of
+	 * reads to sptes.  If it does, kvm_mmu_commit_zap_page() can see us
+	 * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
+	 */
+	smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
+	local_irq_enable();
+}
+
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
+				  struct kmem_cache *base_cache, int min)
+{
+	void *obj;
+
+	if (cache->nobjs >= min)
+		return 0;
+	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
+		obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT);
+		if (!obj)
+			return cache->nobjs >= min ? 0 : -ENOMEM;
+		cache->objects[cache->nobjs++] = obj;
+	}
+	return 0;
+}
+
+static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache)
+{
+	return cache->nobjs;
+}
+
+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
+				  struct kmem_cache *cache)
+{
+	while (mc->nobjs)
+		kmem_cache_free(cache, mc->objects[--mc->nobjs]);
+}
+
+static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
+				       int min)
+{
+	void *page;
+
+	if (cache->nobjs >= min)
+		return 0;
+	while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
+		page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
+		if (!page)
+			return cache->nobjs >= min ? 0 : -ENOMEM;
+		cache->objects[cache->nobjs++] = page;
+	}
+	return 0;
+}
+
+static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
+{
+	while (mc->nobjs)
+		free_page((unsigned long)mc->objects[--mc->nobjs]);
+}
+
+static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+{
+	int r;
+
+	r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
+				   pte_list_desc_cache, 8 + PTE_PREFETCH_NUM);
+	if (r)
+		goto out;
+	r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
+	if (r)
+		goto out;
+	r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
+				   mmu_page_header_cache, 4);
+out:
+	return r;
+}
+
+static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
+{
+	mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
+				pte_list_desc_cache);
+	mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
+	mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache,
+				mmu_page_header_cache);
+}
+
+static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
+{
+	void *p;
+
+	BUG_ON(!mc->nobjs);
+	p = mc->objects[--mc->nobjs];
+	return p;
+}
+
+static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu)
+{
+	return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache);
+}
+
+static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
+{
+	kmem_cache_free(pte_list_desc_cache, pte_list_desc);
+}
+
+static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
+{
+	if (!sp->role.direct)
+		return sp->gfns[index];
+
+	return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS));
+}
+
+static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
+{
+	if (!sp->role.direct) {
+		sp->gfns[index] = gfn;
+		return;
+	}
+
+	if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
+		pr_err_ratelimited("gfn mismatch under direct page %llx "
+				   "(expected %llx, got %llx)\n",
+				   sp->gfn,
+				   kvm_mmu_page_get_gfn(sp, index), gfn);
+}
+
+/*
+ * Return the pointer to the large page information for a given gfn,
+ * handling slots that are not large page aligned.
+ */
+static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
+					      struct kvm_memory_slot *slot,
+					      int level)
+{
+	unsigned long idx;
+
+	idx = gfn_to_index(gfn, slot->base_gfn, level);
+	return &slot->arch.lpage_info[level - 2][idx];
+}
+
+static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot,
+					    gfn_t gfn, int count)
+{
+	struct kvm_lpage_info *linfo;
+	int i;
+
+	for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
+		linfo = lpage_info_slot(gfn, slot, i);
+		linfo->disallow_lpage += count;
+		WARN_ON(linfo->disallow_lpage < 0);
+	}
+}
+
+void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	update_gfn_disallow_lpage_count(slot, gfn, 1);
+}
+
+void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	update_gfn_disallow_lpage_count(slot, gfn, -1);
+}
+
+static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *slot;
+	gfn_t gfn;
+
+	kvm->arch.indirect_shadow_pages++;
+	gfn = sp->gfn;
+	slots = kvm_memslots_for_spte_role(kvm, sp->role);
+	slot = __gfn_to_memslot(slots, gfn);
+
+	/* the non-leaf shadow pages are keeping readonly. */
+	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+		return kvm_slot_page_track_add_page(kvm, slot, gfn,
+						    KVM_PAGE_TRACK_WRITE);
+
+	kvm_mmu_gfn_disallow_lpage(slot, gfn);
+}
+
+static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	if (sp->lpage_disallowed)
+		return;
+
+	++kvm->stat.nx_lpage_splits;
+	list_add_tail(&sp->lpage_disallowed_link,
+		      &kvm->arch.lpage_disallowed_mmu_pages);
+	sp->lpage_disallowed = true;
+}
+
+static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *slot;
+	gfn_t gfn;
+
+	kvm->arch.indirect_shadow_pages--;
+	gfn = sp->gfn;
+	slots = kvm_memslots_for_spte_role(kvm, sp->role);
+	slot = __gfn_to_memslot(slots, gfn);
+	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+		return kvm_slot_page_track_remove_page(kvm, slot, gfn,
+						       KVM_PAGE_TRACK_WRITE);
+
+	kvm_mmu_gfn_allow_lpage(slot, gfn);
+}
+
+static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	--kvm->stat.nx_lpage_splits;
+	sp->lpage_disallowed = false;
+	list_del(&sp->lpage_disallowed_link);
+}
+
+static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
+					  struct kvm_memory_slot *slot)
+{
+	struct kvm_lpage_info *linfo;
+
+	if (slot) {
+		linfo = lpage_info_slot(gfn, slot, level);
+		return !!linfo->disallow_lpage;
+	}
+
+	return true;
+}
+
+static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn,
+					int level)
+{
+	struct kvm_memory_slot *slot;
+
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	return __mmu_gfn_lpage_is_disallowed(gfn, level, slot);
+}
+
+static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
+{
+	unsigned long page_size;
+	int i, ret = 0;
+
+	page_size = kvm_host_page_size(kvm, gfn);
+
+	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
+		if (page_size >= KVM_HPAGE_SIZE(i))
+			ret = i;
+		else
+			break;
+	}
+
+	return ret;
+}
+
+static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot,
+					  bool no_dirty_log)
+{
+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+		return false;
+	if (no_dirty_log && slot->dirty_bitmap)
+		return false;
+
+	return true;
+}
+
+static struct kvm_memory_slot *
+gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
+			    bool no_dirty_log)
+{
+	struct kvm_memory_slot *slot;
+
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	if (!memslot_valid_for_gpte(slot, no_dirty_log))
+		slot = NULL;
+
+	return slot;
+}
+
+static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
+			 bool *force_pt_level)
+{
+	int host_level, level, max_level;
+	struct kvm_memory_slot *slot;
+
+	if (unlikely(*force_pt_level))
+		return PT_PAGE_TABLE_LEVEL;
+
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn);
+	*force_pt_level = !memslot_valid_for_gpte(slot, true);
+	if (unlikely(*force_pt_level))
+		return PT_PAGE_TABLE_LEVEL;
+
+	host_level = host_mapping_level(vcpu->kvm, large_gfn);
+
+	if (host_level == PT_PAGE_TABLE_LEVEL)
+		return host_level;
+
+	max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
+
+	for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
+		if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot))
+			break;
+
+	return level - 1;
+}
+
+/*
+ * About rmap_head encoding:
+ *
+ * If the bit zero of rmap_head->val is clear, then it points to the only spte
+ * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
+ * pte_list_desc containing more mappings.
+ */
+
+/*
+ * Returns the number of pointers in the rmap chain, not counting the new one.
+ */
+static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
+			struct kvm_rmap_head *rmap_head)
+{
+	struct pte_list_desc *desc;
+	int i, count = 0;
+
+	if (!rmap_head->val) {
+		rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
+		rmap_head->val = (unsigned long)spte;
+	} else if (!(rmap_head->val & 1)) {
+		rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
+		desc = mmu_alloc_pte_list_desc(vcpu);
+		desc->sptes[0] = (u64 *)rmap_head->val;
+		desc->sptes[1] = spte;
+		rmap_head->val = (unsigned long)desc | 1;
+		++count;
+	} else {
+		rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
+		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+		while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
+			desc = desc->more;
+			count += PTE_LIST_EXT;
+		}
+		if (desc->sptes[PTE_LIST_EXT-1]) {
+			desc->more = mmu_alloc_pte_list_desc(vcpu);
+			desc = desc->more;
+		}
+		for (i = 0; desc->sptes[i]; ++i)
+			++count;
+		desc->sptes[i] = spte;
+	}
+	return count;
+}
+
+static void
+pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+			   struct pte_list_desc *desc, int i,
+			   struct pte_list_desc *prev_desc)
+{
+	int j;
+
+	for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j)
+		;
+	desc->sptes[i] = desc->sptes[j];
+	desc->sptes[j] = NULL;
+	if (j != 0)
+		return;
+	if (!prev_desc && !desc->more)
+		rmap_head->val = (unsigned long)desc->sptes[0];
+	else
+		if (prev_desc)
+			prev_desc->more = desc->more;
+		else
+			rmap_head->val = (unsigned long)desc->more | 1;
+	mmu_free_pte_list_desc(desc);
+}
+
+static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+{
+	struct pte_list_desc *desc;
+	struct pte_list_desc *prev_desc;
+	int i;
+
+	if (!rmap_head->val) {
+		pr_err("%s: %p 0->BUG\n", __func__, spte);
+		BUG();
+	} else if (!(rmap_head->val & 1)) {
+		rmap_printk("%s:  %p 1->0\n", __func__, spte);
+		if ((u64 *)rmap_head->val != spte) {
+			pr_err("%s:  %p 1->BUG\n", __func__, spte);
+			BUG();
+		}
+		rmap_head->val = 0;
+	} else {
+		rmap_printk("%s:  %p many->many\n", __func__, spte);
+		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+		prev_desc = NULL;
+		while (desc) {
+			for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
+				if (desc->sptes[i] == spte) {
+					pte_list_desc_remove_entry(rmap_head,
+							desc, i, prev_desc);
+					return;
+				}
+			}
+			prev_desc = desc;
+			desc = desc->more;
+		}
+		pr_err("%s: %p many->many\n", __func__, spte);
+		BUG();
+	}
+}
+
+static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
+{
+	mmu_spte_clear_track_bits(sptep);
+	__pte_list_remove(sptep, rmap_head);
+}
+
+static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
+					   struct kvm_memory_slot *slot)
+{
+	unsigned long idx;
+
+	idx = gfn_to_index(gfn, slot->base_gfn, level);
+	return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
+}
+
+static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
+					 struct kvm_mmu_page *sp)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *slot;
+
+	slots = kvm_memslots_for_spte_role(kvm, sp->role);
+	slot = __gfn_to_memslot(slots, gfn);
+	return __gfn_to_rmap(gfn, sp->role.level, slot);
+}
+
+static bool rmap_can_add(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu_memory_cache *cache;
+
+	cache = &vcpu->arch.mmu_pte_list_desc_cache;
+	return mmu_memory_cache_free_objects(cache);
+}
+
+static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+{
+	struct kvm_mmu_page *sp;
+	struct kvm_rmap_head *rmap_head;
+
+	sp = page_header(__pa(spte));
+	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
+	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
+	return pte_list_add(vcpu, spte, rmap_head);
+}
+
+static void rmap_remove(struct kvm *kvm, u64 *spte)
+{
+	struct kvm_mmu_page *sp;
+	gfn_t gfn;
+	struct kvm_rmap_head *rmap_head;
+
+	sp = page_header(__pa(spte));
+	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
+	rmap_head = gfn_to_rmap(kvm, gfn, sp);
+	__pte_list_remove(spte, rmap_head);
+}
+
+/*
+ * Used by the following functions to iterate through the sptes linked by a
+ * rmap.  All fields are private and not assumed to be used outside.
+ */
+struct rmap_iterator {
+	/* private fields */
+	struct pte_list_desc *desc;	/* holds the sptep if not NULL */
+	int pos;			/* index of the sptep */
+};
+
+/*
+ * Iteration must be started by this function.  This should also be used after
+ * removing/dropping sptes from the rmap link because in such cases the
+ * information in the itererator may not be valid.
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
+			   struct rmap_iterator *iter)
+{
+	u64 *sptep;
+
+	if (!rmap_head->val)
+		return NULL;
+
+	if (!(rmap_head->val & 1)) {
+		iter->desc = NULL;
+		sptep = (u64 *)rmap_head->val;
+		goto out;
+	}
+
+	iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+	iter->pos = 0;
+	sptep = iter->desc->sptes[iter->pos];
+out:
+	BUG_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
+}
+
+/*
+ * Must be used with a valid iterator: e.g. after rmap_get_first().
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+static u64 *rmap_get_next(struct rmap_iterator *iter)
+{
+	u64 *sptep;
+
+	if (iter->desc) {
+		if (iter->pos < PTE_LIST_EXT - 1) {
+			++iter->pos;
+			sptep = iter->desc->sptes[iter->pos];
+			if (sptep)
+				goto out;
+		}
+
+		iter->desc = iter->desc->more;
+
+		if (iter->desc) {
+			iter->pos = 0;
+			/* desc->sptes[0] cannot be NULL */
+			sptep = iter->desc->sptes[iter->pos];
+			goto out;
+		}
+	}
+
+	return NULL;
+out:
+	BUG_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
+}
+
+#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)			\
+	for (_spte_ = rmap_get_first(_rmap_head_, _iter_);		\
+	     _spte_; _spte_ = rmap_get_next(_iter_))
+
+static void drop_spte(struct kvm *kvm, u64 *sptep)
+{
+	if (mmu_spte_clear_track_bits(sptep))
+		rmap_remove(kvm, sptep);
+}
+
+
+static bool __drop_large_spte(struct kvm *kvm, u64 *sptep)
+{
+	if (is_large_pte(*sptep)) {
+		WARN_ON(page_header(__pa(sptep))->role.level ==
+			PT_PAGE_TABLE_LEVEL);
+		drop_spte(kvm, sptep);
+		--kvm->stat.lpages;
+		return true;
+	}
+
+	return false;
+}
+
+static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
+{
+	if (__drop_large_spte(vcpu->kvm, sptep)) {
+		struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+		kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
+			KVM_PAGES_PER_HPAGE(sp->role.level));
+	}
+}
+
+/*
+ * Write-protect on the specified @sptep, @pt_protect indicates whether
+ * spte write-protection is caused by protecting shadow page table.
+ *
+ * Note: write protection is difference between dirty logging and spte
+ * protection:
+ * - for dirty logging, the spte can be set to writable at anytime if
+ *   its dirty bitmap is properly set.
+ * - for spte protection, the spte can be writable only after unsync-ing
+ *   shadow page.
+ *
+ * Return true if tlb need be flushed.
+ */
+static bool spte_write_protect(u64 *sptep, bool pt_protect)
+{
+	u64 spte = *sptep;
+
+	if (!is_writable_pte(spte) &&
+	      !(pt_protect && spte_can_locklessly_be_made_writable(spte)))
+		return false;
+
+	rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
+
+	if (pt_protect)
+		spte &= ~SPTE_MMU_WRITEABLE;
+	spte = spte & ~PT_WRITABLE_MASK;
+
+	return mmu_spte_update(sptep, spte);
+}
+
+static bool __rmap_write_protect(struct kvm *kvm,
+				 struct kvm_rmap_head *rmap_head,
+				 bool pt_protect)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	bool flush = false;
+
+	for_each_rmap_spte(rmap_head, &iter, sptep)
+		flush |= spte_write_protect(sptep, pt_protect);
+
+	return flush;
+}
+
+static bool spte_clear_dirty(u64 *sptep)
+{
+	u64 spte = *sptep;
+
+	rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep);
+
+	MMU_WARN_ON(!spte_ad_enabled(spte));
+	spte &= ~shadow_dirty_mask;
+	return mmu_spte_update(sptep, spte);
+}
+
+static bool spte_wrprot_for_clear_dirty(u64 *sptep)
+{
+	bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
+					       (unsigned long *)sptep);
+	if (was_writable && !spte_ad_enabled(*sptep))
+		kvm_set_pfn_dirty(spte_to_pfn(*sptep));
+
+	return was_writable;
+}
+
+/*
+ * Gets the GFN ready for another round of dirty logging by clearing the
+ *	- D bit on ad-enabled SPTEs, and
+ *	- W bit on ad-disabled SPTEs.
+ * Returns true iff any D or W bits were cleared.
+ */
+static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	bool flush = false;
+
+	for_each_rmap_spte(rmap_head, &iter, sptep)
+		if (spte_ad_need_write_protect(*sptep))
+			flush |= spte_wrprot_for_clear_dirty(sptep);
+		else
+			flush |= spte_clear_dirty(sptep);
+
+	return flush;
+}
+
+static bool spte_set_dirty(u64 *sptep)
+{
+	u64 spte = *sptep;
+
+	rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep);
+
+	/*
+	 * Similar to the !kvm_x86_ops->slot_disable_log_dirty case,
+	 * do not bother adding back write access to pages marked
+	 * SPTE_AD_WRPROT_ONLY_MASK.
+	 */
+	spte |= shadow_dirty_mask;
+
+	return mmu_spte_update(sptep, spte);
+}
+
+static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	bool flush = false;
+
+	for_each_rmap_spte(rmap_head, &iter, sptep)
+		if (spte_ad_enabled(*sptep))
+			flush |= spte_set_dirty(sptep);
+
+	return flush;
+}
+
+/**
+ * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
+ * @kvm: kvm instance
+ * @slot: slot to protect
+ * @gfn_offset: start of the BITS_PER_LONG pages we care about
+ * @mask: indicates which pages we should protect
+ *
+ * Used when we do not need to care about huge page mappings: e.g. during dirty
+ * logging we do not have any such mappings.
+ */
+static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+				     struct kvm_memory_slot *slot,
+				     gfn_t gfn_offset, unsigned long mask)
+{
+	struct kvm_rmap_head *rmap_head;
+
+	while (mask) {
+		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					  PT_PAGE_TABLE_LEVEL, slot);
+		__rmap_write_protect(kvm, rmap_head, false);
+
+		/* clear the first set bit */
+		mask &= mask - 1;
+	}
+}
+
+/**
+ * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write
+ * protect the page if the D-bit isn't supported.
+ * @kvm: kvm instance
+ * @slot: slot to clear D-bit
+ * @gfn_offset: start of the BITS_PER_LONG pages we care about
+ * @mask: indicates which pages we should clear D-bit
+ *
+ * Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap.
+ */
+void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+				     struct kvm_memory_slot *slot,
+				     gfn_t gfn_offset, unsigned long mask)
+{
+	struct kvm_rmap_head *rmap_head;
+
+	while (mask) {
+		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					  PT_PAGE_TABLE_LEVEL, slot);
+		__rmap_clear_dirty(kvm, rmap_head);
+
+		/* clear the first set bit */
+		mask &= mask - 1;
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked);
+
+/**
+ * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
+ * PT level pages.
+ *
+ * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
+ * enable dirty logging for them.
+ *
+ * Used when we do not need to care about huge page mappings: e.g. during dirty
+ * logging we do not have any such mappings.
+ */
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+				struct kvm_memory_slot *slot,
+				gfn_t gfn_offset, unsigned long mask)
+{
+	if (kvm_x86_ops->enable_log_dirty_pt_masked)
+		kvm_x86_ops->enable_log_dirty_pt_masked(kvm, slot, gfn_offset,
+				mask);
+	else
+		kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
+}
+
+/**
+ * kvm_arch_write_log_dirty - emulate dirty page logging
+ * @vcpu: Guest mode vcpu
+ *
+ * Emulate arch specific page modification logging for the
+ * nested hypervisor
+ */
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
+{
+	if (kvm_x86_ops->write_log_dirty)
+		return kvm_x86_ops->write_log_dirty(vcpu);
+
+	return 0;
+}
+
+bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
+				    struct kvm_memory_slot *slot, u64 gfn)
+{
+	struct kvm_rmap_head *rmap_head;
+	int i;
+	bool write_protected = false;
+
+	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
+		rmap_head = __gfn_to_rmap(gfn, i, slot);
+		write_protected |= __rmap_write_protect(kvm, rmap_head, true);
+	}
+
+	return write_protected;
+}
+
+static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
+{
+	struct kvm_memory_slot *slot;
+
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn);
+}
+
+static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	bool flush = false;
+
+	while ((sptep = rmap_get_first(rmap_head, &iter))) {
+		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
+
+		pte_list_remove(rmap_head, sptep);
+		flush = true;
+	}
+
+	return flush;
+}
+
+static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+			   struct kvm_memory_slot *slot, gfn_t gfn, int level,
+			   unsigned long data)
+{
+	return kvm_zap_rmapp(kvm, rmap_head);
+}
+
+static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+			     struct kvm_memory_slot *slot, gfn_t gfn, int level,
+			     unsigned long data)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	int need_flush = 0;
+	u64 new_spte;
+	pte_t *ptep = (pte_t *)data;
+	kvm_pfn_t new_pfn;
+
+	WARN_ON(pte_huge(*ptep));
+	new_pfn = pte_pfn(*ptep);
+
+restart:
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
+		rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
+			    sptep, *sptep, gfn, level);
+
+		need_flush = 1;
+
+		if (pte_write(*ptep)) {
+			pte_list_remove(rmap_head, sptep);
+			goto restart;
+		} else {
+			new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
+			new_spte |= (u64)new_pfn << PAGE_SHIFT;
+
+			new_spte &= ~PT_WRITABLE_MASK;
+			new_spte &= ~SPTE_HOST_WRITEABLE;
+
+			new_spte = mark_spte_for_access_track(new_spte);
+
+			mmu_spte_clear_track_bits(sptep);
+			mmu_spte_set(sptep, new_spte);
+		}
+	}
+
+	if (need_flush && kvm_available_flush_tlb_with_range()) {
+		kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
+		return 0;
+	}
+
+	return need_flush;
+}
+
+struct slot_rmap_walk_iterator {
+	/* input fields. */
+	struct kvm_memory_slot *slot;
+	gfn_t start_gfn;
+	gfn_t end_gfn;
+	int start_level;
+	int end_level;
+
+	/* output fields. */
+	gfn_t gfn;
+	struct kvm_rmap_head *rmap;
+	int level;
+
+	/* private field. */
+	struct kvm_rmap_head *end_rmap;
+};
+
+static void
+rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level)
+{
+	iterator->level = level;
+	iterator->gfn = iterator->start_gfn;
+	iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot);
+	iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level,
+					   iterator->slot);
+}
+
+static void
+slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator,
+		    struct kvm_memory_slot *slot, int start_level,
+		    int end_level, gfn_t start_gfn, gfn_t end_gfn)
+{
+	iterator->slot = slot;
+	iterator->start_level = start_level;
+	iterator->end_level = end_level;
+	iterator->start_gfn = start_gfn;
+	iterator->end_gfn = end_gfn;
+
+	rmap_walk_init_level(iterator, iterator->start_level);
+}
+
+static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator)
+{
+	return !!iterator->rmap;
+}
+
+static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
+{
+	if (++iterator->rmap <= iterator->end_rmap) {
+		iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level));
+		return;
+	}
+
+	if (++iterator->level > iterator->end_level) {
+		iterator->rmap = NULL;
+		return;
+	}
+
+	rmap_walk_init_level(iterator, iterator->level);
+}
+
+#define for_each_slot_rmap_range(_slot_, _start_level_, _end_level_,	\
+	   _start_gfn, _end_gfn, _iter_)				\
+	for (slot_rmap_walk_init(_iter_, _slot_, _start_level_,		\
+				 _end_level_, _start_gfn, _end_gfn);	\
+	     slot_rmap_walk_okay(_iter_);				\
+	     slot_rmap_walk_next(_iter_))
+
+static int kvm_handle_hva_range(struct kvm *kvm,
+				unsigned long start,
+				unsigned long end,
+				unsigned long data,
+				int (*handler)(struct kvm *kvm,
+					       struct kvm_rmap_head *rmap_head,
+					       struct kvm_memory_slot *slot,
+					       gfn_t gfn,
+					       int level,
+					       unsigned long data))
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	struct slot_rmap_walk_iterator iterator;
+	int ret = 0;
+	int i;
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		kvm_for_each_memslot(memslot, slots) {
+			unsigned long hva_start, hva_end;
+			gfn_t gfn_start, gfn_end;
+
+			hva_start = max(start, memslot->userspace_addr);
+			hva_end = min(end, memslot->userspace_addr +
+				      (memslot->npages << PAGE_SHIFT));
+			if (hva_start >= hva_end)
+				continue;
+			/*
+			 * {gfn(page) | page intersects with [hva_start, hva_end)} =
+			 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
+			 */
+			gfn_start = hva_to_gfn_memslot(hva_start, memslot);
+			gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+
+			for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL,
+						 PT_MAX_HUGEPAGE_LEVEL,
+						 gfn_start, gfn_end - 1,
+						 &iterator)
+				ret |= handler(kvm, iterator.rmap, memslot,
+					       iterator.gfn, iterator.level, data);
+		}
+	}
+
+	return ret;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+			  unsigned long data,
+			  int (*handler)(struct kvm *kvm,
+					 struct kvm_rmap_head *rmap_head,
+					 struct kvm_memory_slot *slot,
+					 gfn_t gfn, int level,
+					 unsigned long data))
+{
+	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
+}
+
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+	return kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+			 struct kvm_memory_slot *slot, gfn_t gfn, int level,
+			 unsigned long data)
+{
+	u64 *sptep;
+	struct rmap_iterator uninitialized_var(iter);
+	int young = 0;
+
+	for_each_rmap_spte(rmap_head, &iter, sptep)
+		young |= mmu_spte_age(sptep);
+
+	trace_kvm_age_page(gfn, level, slot, young);
+	return young;
+}
+
+static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+			      struct kvm_memory_slot *slot, gfn_t gfn,
+			      int level, unsigned long data)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+
+	for_each_rmap_spte(rmap_head, &iter, sptep)
+		if (is_accessed_spte(*sptep))
+			return 1;
+	return 0;
+}
+
+#define RMAP_RECYCLE_THRESHOLD 1000
+
+static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+{
+	struct kvm_rmap_head *rmap_head;
+	struct kvm_mmu_page *sp;
+
+	sp = page_header(__pa(spte));
+
+	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
+
+	kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0);
+	kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
+			KVM_PAGES_PER_HPAGE(sp->role.level));
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
+}
+
+#ifdef MMU_DEBUG
+static int is_empty_shadow_page(u64 *spt)
+{
+	u64 *pos;
+	u64 *end;
+
+	for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
+		if (is_shadow_present_pte(*pos)) {
+			printk(KERN_ERR "%s: %p %llx\n", __func__,
+			       pos, *pos);
+			return 0;
+		}
+	return 1;
+}
+#endif
+
+/*
+ * This value is the sum of all of the kvm instances's
+ * kvm->arch.n_used_mmu_pages values.  We need a global,
+ * aggregate version in order to make the slab shrinker
+ * faster
+ */
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
+{
+	kvm->arch.n_used_mmu_pages += nr;
+	percpu_counter_add(&kvm_total_used_mmu_pages, nr);
+}
+
+static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
+{
+	MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
+	hlist_del(&sp->hash_link);
+	list_del(&sp->link);
+	free_page((unsigned long)sp->spt);
+	if (!sp->role.direct)
+		free_page((unsigned long)sp->gfns);
+	kmem_cache_free(mmu_page_header_cache, sp);
+}
+
+static unsigned kvm_page_table_hashfn(gfn_t gfn)
+{
+	return hash_64(gfn, KVM_MMU_HASH_SHIFT);
+}
+
+static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
+				    struct kvm_mmu_page *sp, u64 *parent_pte)
+{
+	if (!parent_pte)
+		return;
+
+	pte_list_add(vcpu, parent_pte, &sp->parent_ptes);
+}
+
+static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
+				       u64 *parent_pte)
+{
+	__pte_list_remove(parent_pte, &sp->parent_ptes);
+}
+
+static void drop_parent_pte(struct kvm_mmu_page *sp,
+			    u64 *parent_pte)
+{
+	mmu_page_remove_parent_pte(sp, parent_pte);
+	mmu_spte_clear_no_track(parent_pte);
+}
+
+static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct)
+{
+	struct kvm_mmu_page *sp;
+
+	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
+	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
+	if (!direct)
+		sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
+	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+
+	/*
+	 * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
+	 * depends on valid pages being added to the head of the list.  See
+	 * comments in kvm_zap_obsolete_pages().
+	 */
+	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
+	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
+	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
+	return sp;
+}
+
+static void mark_unsync(u64 *spte);
+static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+
+	for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
+		mark_unsync(sptep);
+	}
+}
+
+static void mark_unsync(u64 *spte)
+{
+	struct kvm_mmu_page *sp;
+	unsigned int index;
+
+	sp = page_header(__pa(spte));
+	index = spte - sp->spt;
+	if (__test_and_set_bit(index, sp->unsync_child_bitmap))
+		return;
+	if (sp->unsync_children++)
+		return;
+	kvm_mmu_mark_parents_unsync(sp);
+}
+
+static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
+			       struct kvm_mmu_page *sp)
+{
+	return 0;
+}
+
+static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
+{
+}
+
+static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
+				 struct kvm_mmu_page *sp, u64 *spte,
+				 const void *pte)
+{
+	WARN_ON(1);
+}
+
+#define KVM_PAGE_ARRAY_NR 16
+
+struct kvm_mmu_pages {
+	struct mmu_page_and_offset {
+		struct kvm_mmu_page *sp;
+		unsigned int idx;
+	} page[KVM_PAGE_ARRAY_NR];
+	unsigned int nr;
+};
+
+static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+			 int idx)
+{
+	int i;
+
+	if (sp->unsync)
+		for (i=0; i < pvec->nr; i++)
+			if (pvec->page[i].sp == sp)
+				return 0;
+
+	pvec->page[pvec->nr].sp = sp;
+	pvec->page[pvec->nr].idx = idx;
+	pvec->nr++;
+	return (pvec->nr == KVM_PAGE_ARRAY_NR);
+}
+
+static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
+{
+	--sp->unsync_children;
+	WARN_ON((int)sp->unsync_children < 0);
+	__clear_bit(idx, sp->unsync_child_bitmap);
+}
+
+static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_mmu_pages *pvec)
+{
+	int i, ret, nr_unsync_leaf = 0;
+
+	for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
+		struct kvm_mmu_page *child;
+		u64 ent = sp->spt[i];
+
+		if (!is_shadow_present_pte(ent) || is_large_pte(ent)) {
+			clear_unsync_child_bit(sp, i);
+			continue;
+		}
+
+		child = page_header(ent & PT64_BASE_ADDR_MASK);
+
+		if (child->unsync_children) {
+			if (mmu_pages_add(pvec, child, i))
+				return -ENOSPC;
+
+			ret = __mmu_unsync_walk(child, pvec);
+			if (!ret) {
+				clear_unsync_child_bit(sp, i);
+				continue;
+			} else if (ret > 0) {
+				nr_unsync_leaf += ret;
+			} else
+				return ret;
+		} else if (child->unsync) {
+			nr_unsync_leaf++;
+			if (mmu_pages_add(pvec, child, i))
+				return -ENOSPC;
+		} else
+			clear_unsync_child_bit(sp, i);
+	}
+
+	return nr_unsync_leaf;
+}
+
+#define INVALID_INDEX (-1)
+
+static int mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_mmu_pages *pvec)
+{
+	pvec->nr = 0;
+	if (!sp->unsync_children)
+		return 0;
+
+	mmu_pages_add(pvec, sp, INVALID_INDEX);
+	return __mmu_unsync_walk(sp, pvec);
+}
+
+static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	WARN_ON(!sp->unsync);
+	trace_kvm_mmu_sync_page(sp);
+	sp->unsync = 0;
+	--kvm->stat.mmu_unsync;
+}
+
+static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+				     struct list_head *invalid_list);
+static void kvm_mmu_commit_zap_page(struct kvm *kvm,
+				    struct list_head *invalid_list);
+
+
+#define for_each_valid_sp(_kvm, _sp, _gfn)				\
+	hlist_for_each_entry(_sp,					\
+	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
+		if (is_obsolete_sp((_kvm), (_sp))) {			\
+		} else
+
+#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)			\
+	for_each_valid_sp(_kvm, _sp, _gfn)				\
+		if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
+
+static inline bool is_ept_sp(struct kvm_mmu_page *sp)
+{
+	return sp->role.cr0_wp && sp->role.smap_andnot_wp;
+}
+
+/* @sp->gfn should be write-protected at the call site */
+static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+			    struct list_head *invalid_list)
+{
+	if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) ||
+	    vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
+		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
+		return false;
+	}
+
+	return true;
+}
+
+static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
+					struct list_head *invalid_list,
+					bool remote_flush)
+{
+	if (!remote_flush && list_empty(invalid_list))
+		return false;
+
+	if (!list_empty(invalid_list))
+		kvm_mmu_commit_zap_page(kvm, invalid_list);
+	else
+		kvm_flush_remote_tlbs(kvm);
+	return true;
+}
+
+static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
+				 struct list_head *invalid_list,
+				 bool remote_flush, bool local_flush)
+{
+	if (kvm_mmu_remote_flush_or_zap(vcpu->kvm, invalid_list, remote_flush))
+		return;
+
+	if (local_flush)
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+}
+
+#ifdef CONFIG_KVM_MMU_AUDIT
+#include "mmu_audit.c"
+#else
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
+static void mmu_audit_disable(void) { }
+#endif
+
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	return sp->role.invalid ||
+	       unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
+static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+			 struct list_head *invalid_list)
+{
+	kvm_unlink_unsync_page(vcpu->kvm, sp);
+	return __kvm_sync_page(vcpu, sp, invalid_list);
+}
+
+/* @gfn should be write-protected at the call site */
+static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
+			   struct list_head *invalid_list)
+{
+	struct kvm_mmu_page *s;
+	bool ret = false;
+
+	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
+		if (!s->unsync)
+			continue;
+
+		WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
+		ret |= kvm_sync_page(vcpu, s, invalid_list);
+	}
+
+	return ret;
+}
+
+struct mmu_page_path {
+	struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL];
+	unsigned int idx[PT64_ROOT_MAX_LEVEL];
+};
+
+#define for_each_sp(pvec, sp, parents, i)			\
+		for (i = mmu_pages_first(&pvec, &parents);	\
+			i < pvec.nr && ({ sp = pvec.page[i].sp; 1;});	\
+			i = mmu_pages_next(&pvec, &parents, i))
+
+static int mmu_pages_next(struct kvm_mmu_pages *pvec,
+			  struct mmu_page_path *parents,
+			  int i)
+{
+	int n;
+
+	for (n = i+1; n < pvec->nr; n++) {
+		struct kvm_mmu_page *sp = pvec->page[n].sp;
+		unsigned idx = pvec->page[n].idx;
+		int level = sp->role.level;
+
+		parents->idx[level-1] = idx;
+		if (level == PT_PAGE_TABLE_LEVEL)
+			break;
+
+		parents->parent[level-2] = sp;
+	}
+
+	return n;
+}
+
+static int mmu_pages_first(struct kvm_mmu_pages *pvec,
+			   struct mmu_page_path *parents)
+{
+	struct kvm_mmu_page *sp;
+	int level;
+
+	if (pvec->nr == 0)
+		return 0;
+
+	WARN_ON(pvec->page[0].idx != INVALID_INDEX);
+
+	sp = pvec->page[0].sp;
+	level = sp->role.level;
+	WARN_ON(level == PT_PAGE_TABLE_LEVEL);
+
+	parents->parent[level-2] = sp;
+
+	/* Also set up a sentinel.  Further entries in pvec are all
+	 * children of sp, so this element is never overwritten.
+	 */
+	parents->parent[level-1] = NULL;
+	return mmu_pages_next(pvec, parents, 0);
+}
+
+static void mmu_pages_clear_parents(struct mmu_page_path *parents)
+{
+	struct kvm_mmu_page *sp;
+	unsigned int level = 0;
+
+	do {
+		unsigned int idx = parents->idx[level];
+		sp = parents->parent[level];
+		if (!sp)
+			return;
+
+		WARN_ON(idx == INVALID_INDEX);
+		clear_unsync_child_bit(sp, idx);
+		level++;
+	} while (!sp->unsync_children);
+}
+
+static void mmu_sync_children(struct kvm_vcpu *vcpu,
+			      struct kvm_mmu_page *parent)
+{
+	int i;
+	struct kvm_mmu_page *sp;
+	struct mmu_page_path parents;
+	struct kvm_mmu_pages pages;
+	LIST_HEAD(invalid_list);
+	bool flush = false;
+
+	while (mmu_unsync_walk(parent, &pages)) {
+		bool protected = false;
+
+		for_each_sp(pages, sp, parents, i)
+			protected |= rmap_write_protect(vcpu, sp->gfn);
+
+		if (protected) {
+			kvm_flush_remote_tlbs(vcpu->kvm);
+			flush = false;
+		}
+
+		for_each_sp(pages, sp, parents, i) {
+			flush |= kvm_sync_page(vcpu, sp, &invalid_list);
+			mmu_pages_clear_parents(&parents);
+		}
+		if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
+			kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+			cond_resched_lock(&vcpu->kvm->mmu_lock);
+			flush = false;
+		}
+	}
+
+	kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+}
+
+static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
+{
+	atomic_set(&sp->write_flooding_count,  0);
+}
+
+static void clear_sp_write_flooding_count(u64 *spte)
+{
+	struct kvm_mmu_page *sp =  page_header(__pa(spte));
+
+	__clear_sp_write_flooding_count(sp);
+}
+
+static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
+					     gfn_t gfn,
+					     gva_t gaddr,
+					     unsigned level,
+					     int direct,
+					     unsigned access)
+{
+	union kvm_mmu_page_role role;
+	unsigned quadrant;
+	struct kvm_mmu_page *sp;
+	bool need_sync = false;
+	bool flush = false;
+	int collisions = 0;
+	LIST_HEAD(invalid_list);
+
+	role = vcpu->arch.mmu->mmu_role.base;
+	role.level = level;
+	role.direct = direct;
+	if (role.direct)
+		role.gpte_is_8_bytes = true;
+	role.access = access;
+	if (!vcpu->arch.mmu->direct_map
+	    && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
+		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
+		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
+		role.quadrant = quadrant;
+	}
+	for_each_valid_sp(vcpu->kvm, sp, gfn) {
+		if (sp->gfn != gfn) {
+			collisions++;
+			continue;
+		}
+
+		if (!need_sync && sp->unsync)
+			need_sync = true;
+
+		if (sp->role.word != role.word)
+			continue;
+
+		if (sp->unsync) {
+			/* The page is good, but __kvm_sync_page might still end
+			 * up zapping it.  If so, break in order to rebuild it.
+			 */
+			if (!__kvm_sync_page(vcpu, sp, &invalid_list))
+				break;
+
+			WARN_ON(!list_empty(&invalid_list));
+			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+		}
+
+		if (sp->unsync_children)
+			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+
+		__clear_sp_write_flooding_count(sp);
+		trace_kvm_mmu_get_page(sp, false);
+		goto out;
+	}
+
+	++vcpu->kvm->stat.mmu_cache_miss;
+
+	sp = kvm_mmu_alloc_page(vcpu, direct);
+
+	sp->gfn = gfn;
+	sp->role = role;
+	hlist_add_head(&sp->hash_link,
+		&vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
+	if (!direct) {
+		/*
+		 * we should do write protection before syncing pages
+		 * otherwise the content of the synced shadow page may
+		 * be inconsistent with guest page table.
+		 */
+		account_shadowed(vcpu->kvm, sp);
+		if (level == PT_PAGE_TABLE_LEVEL &&
+		      rmap_write_protect(vcpu, gfn))
+			kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1);
+
+		if (level > PT_PAGE_TABLE_LEVEL && need_sync)
+			flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
+	}
+	clear_page(sp->spt);
+	trace_kvm_mmu_get_page(sp, true);
+
+	kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+out:
+	if (collisions > vcpu->kvm->stat.max_mmu_page_hash_collisions)
+		vcpu->kvm->stat.max_mmu_page_hash_collisions = collisions;
+	return sp;
+}
+
+static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator,
+					struct kvm_vcpu *vcpu, hpa_t root,
+					u64 addr)
+{
+	iterator->addr = addr;
+	iterator->shadow_addr = root;
+	iterator->level = vcpu->arch.mmu->shadow_root_level;
+
+	if (iterator->level == PT64_ROOT_4LEVEL &&
+	    vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
+	    !vcpu->arch.mmu->direct_map)
+		--iterator->level;
+
+	if (iterator->level == PT32E_ROOT_LEVEL) {
+		/*
+		 * prev_root is currently only used for 64-bit hosts. So only
+		 * the active root_hpa is valid here.
+		 */
+		BUG_ON(root != vcpu->arch.mmu->root_hpa);
+
+		iterator->shadow_addr
+			= vcpu->arch.mmu->pae_root[(addr >> 30) & 3];
+		iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
+		--iterator->level;
+		if (!iterator->shadow_addr)
+			iterator->level = 0;
+	}
+}
+
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+			     struct kvm_vcpu *vcpu, u64 addr)
+{
+	shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root_hpa,
+				    addr);
+}
+
+static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
+{
+	if (iterator->level < PT_PAGE_TABLE_LEVEL)
+		return false;
+
+	iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
+	iterator->sptep	= ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
+	return true;
+}
+
+static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
+			       u64 spte)
+{
+	if (is_last_spte(spte, iterator->level)) {
+		iterator->level = 0;
+		return;
+	}
+
+	iterator->shadow_addr = spte & PT64_BASE_ADDR_MASK;
+	--iterator->level;
+}
+
+static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
+{
+	__shadow_walk_next(iterator, *iterator->sptep);
+}
+
+static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
+			     struct kvm_mmu_page *sp)
+{
+	u64 spte;
+
+	BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
+
+	spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
+	       shadow_user_mask | shadow_x_mask | shadow_me_mask;
+
+	if (sp_ad_disabled(sp))
+		spte |= SPTE_AD_DISABLED_MASK;
+	else
+		spte |= shadow_accessed_mask;
+
+	mmu_spte_set(sptep, spte);
+
+	mmu_page_add_parent_pte(vcpu, sp, sptep);
+
+	if (sp->unsync_children || sp->unsync)
+		mark_unsync(sptep);
+}
+
+static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
+				   unsigned direct_access)
+{
+	if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) {
+		struct kvm_mmu_page *child;
+
+		/*
+		 * For the direct sp, if the guest pte's dirty bit
+		 * changed form clean to dirty, it will corrupt the
+		 * sp's access: allow writable in the read-only sp,
+		 * so we should update the spte at this point to get
+		 * a new sp with the correct access.
+		 */
+		child = page_header(*sptep & PT64_BASE_ADDR_MASK);
+		if (child->role.access == direct_access)
+			return;
+
+		drop_parent_pte(child, sptep);
+		kvm_flush_remote_tlbs_with_address(vcpu->kvm, child->gfn, 1);
+	}
+}
+
+static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
+			     u64 *spte)
+{
+	u64 pte;
+	struct kvm_mmu_page *child;
+
+	pte = *spte;
+	if (is_shadow_present_pte(pte)) {
+		if (is_last_spte(pte, sp->role.level)) {
+			drop_spte(kvm, spte);
+			if (is_large_pte(pte))
+				--kvm->stat.lpages;
+		} else {
+			child = page_header(pte & PT64_BASE_ADDR_MASK);
+			drop_parent_pte(child, spte);
+		}
+		return true;
+	}
+
+	if (is_mmio_spte(pte))
+		mmu_spte_clear_no_track(spte);
+
+	return false;
+}
+
+static void kvm_mmu_page_unlink_children(struct kvm *kvm,
+					 struct kvm_mmu_page *sp)
+{
+	unsigned i;
+
+	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
+		mmu_page_zap_pte(kvm, sp, sp->spt + i);
+}
+
+static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+
+	while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
+		drop_parent_pte(sp, sptep);
+}
+
+static int mmu_zap_unsync_children(struct kvm *kvm,
+				   struct kvm_mmu_page *parent,
+				   struct list_head *invalid_list)
+{
+	int i, zapped = 0;
+	struct mmu_page_path parents;
+	struct kvm_mmu_pages pages;
+
+	if (parent->role.level == PT_PAGE_TABLE_LEVEL)
+		return 0;
+
+	while (mmu_unsync_walk(parent, &pages)) {
+		struct kvm_mmu_page *sp;
+
+		for_each_sp(pages, sp, parents, i) {
+			kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+			mmu_pages_clear_parents(&parents);
+			zapped++;
+		}
+	}
+
+	return zapped;
+}
+
+static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
+				       struct kvm_mmu_page *sp,
+				       struct list_head *invalid_list,
+				       int *nr_zapped)
+{
+	bool list_unstable;
+
+	trace_kvm_mmu_prepare_zap_page(sp);
+	++kvm->stat.mmu_shadow_zapped;
+	*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
+	kvm_mmu_page_unlink_children(kvm, sp);
+	kvm_mmu_unlink_parents(kvm, sp);
+
+	/* Zapping children means active_mmu_pages has become unstable. */
+	list_unstable = *nr_zapped;
+
+	if (!sp->role.invalid && !sp->role.direct)
+		unaccount_shadowed(kvm, sp);
+
+	if (sp->unsync)
+		kvm_unlink_unsync_page(kvm, sp);
+	if (!sp->root_count) {
+		/* Count self */
+		(*nr_zapped)++;
+		list_move(&sp->link, invalid_list);
+		kvm_mod_used_mmu_pages(kvm, -1);
+	} else {
+		list_move(&sp->link, &kvm->arch.active_mmu_pages);
+
+		/*
+		 * Obsolete pages cannot be used on any vCPUs, see the comment
+		 * in kvm_mmu_zap_all_fast().  Note, is_obsolete_sp() also
+		 * treats invalid shadow pages as being obsolete.
+		 */
+		if (!is_obsolete_sp(kvm, sp))
+			kvm_reload_remote_mmus(kvm);
+	}
+
+	if (sp->lpage_disallowed)
+		unaccount_huge_nx_page(kvm, sp);
+
+	sp->role.invalid = 1;
+	return list_unstable;
+}
+
+static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+				     struct list_head *invalid_list)
+{
+	int nr_zapped;
+
+	__kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped);
+	return nr_zapped;
+}
+
+static void kvm_mmu_commit_zap_page(struct kvm *kvm,
+				    struct list_head *invalid_list)
+{
+	struct kvm_mmu_page *sp, *nsp;
+
+	if (list_empty(invalid_list))
+		return;
+
+	/*
+	 * We need to make sure everyone sees our modifications to
+	 * the page tables and see changes to vcpu->mode here. The barrier
+	 * in the kvm_flush_remote_tlbs() achieves this. This pairs
+	 * with vcpu_enter_guest and walk_shadow_page_lockless_begin/end.
+	 *
+	 * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit
+	 * guest mode and/or lockless shadow page table walks.
+	 */
+	kvm_flush_remote_tlbs(kvm);
+
+	list_for_each_entry_safe(sp, nsp, invalid_list, link) {
+		WARN_ON(!sp->role.invalid || sp->root_count);
+		kvm_mmu_free_page(sp);
+	}
+}
+
+static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
+					struct list_head *invalid_list)
+{
+	struct kvm_mmu_page *sp;
+
+	if (list_empty(&kvm->arch.active_mmu_pages))
+		return false;
+
+	sp = list_last_entry(&kvm->arch.active_mmu_pages,
+			     struct kvm_mmu_page, link);
+	return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+}
+
+/*
+ * Changing the number of mmu pages allocated to the vm
+ * Note: if goal_nr_mmu_pages is too small, you will get dead lock
+ */
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
+{
+	LIST_HEAD(invalid_list);
+
+	spin_lock(&kvm->mmu_lock);
+
+	if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
+		/* Need to free some mmu pages to achieve the goal. */
+		while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages)
+			if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list))
+				break;
+
+		kvm_mmu_commit_zap_page(kvm, &invalid_list);
+		goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
+	}
+
+	kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
+
+	spin_unlock(&kvm->mmu_lock);
+}
+
+int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
+{
+	struct kvm_mmu_page *sp;
+	LIST_HEAD(invalid_list);
+	int r;
+
+	pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
+	r = 0;
+	spin_lock(&kvm->mmu_lock);
+	for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
+		pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
+			 sp->role.word);
+		r = 1;
+		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+	}
+	kvm_mmu_commit_zap_page(kvm, &invalid_list);
+	spin_unlock(&kvm->mmu_lock);
+
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
+
+static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	trace_kvm_mmu_unsync_page(sp);
+	++vcpu->kvm->stat.mmu_unsync;
+	sp->unsync = 1;
+
+	kvm_mmu_mark_parents_unsync(sp);
+}
+
+static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
+				   bool can_unsync)
+{
+	struct kvm_mmu_page *sp;
+
+	if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+		return true;
+
+	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
+		if (!can_unsync)
+			return true;
+
+		if (sp->unsync)
+			continue;
+
+		WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
+		kvm_unsync_page(vcpu, sp);
+	}
+
+	/*
+	 * We need to ensure that the marking of unsync pages is visible
+	 * before the SPTE is updated to allow writes because
+	 * kvm_mmu_sync_roots() checks the unsync flags without holding
+	 * the MMU lock and so can race with this. If the SPTE was updated
+	 * before the page had been marked as unsync-ed, something like the
+	 * following could happen:
+	 *
+	 * CPU 1                    CPU 2
+	 * ---------------------------------------------------------------------
+	 * 1.2 Host updates SPTE
+	 *     to be writable
+	 *                      2.1 Guest writes a GPTE for GVA X.
+	 *                          (GPTE being in the guest page table shadowed
+	 *                           by the SP from CPU 1.)
+	 *                          This reads SPTE during the page table walk.
+	 *                          Since SPTE.W is read as 1, there is no
+	 *                          fault.
+	 *
+	 *                      2.2 Guest issues TLB flush.
+	 *                          That causes a VM Exit.
+	 *
+	 *                      2.3 kvm_mmu_sync_pages() reads sp->unsync.
+	 *                          Since it is false, so it just returns.
+	 *
+	 *                      2.4 Guest accesses GVA X.
+	 *                          Since the mapping in the SP was not updated,
+	 *                          so the old mapping for GVA X incorrectly
+	 *                          gets used.
+	 * 1.1 Host marks SP
+	 *     as unsync
+	 *     (sp->unsync = true)
+	 *
+	 * The write barrier below ensures that 1.1 happens before 1.2 and thus
+	 * the situation in 2.4 does not arise. The implicit barrier in 2.2
+	 * pairs with this write barrier.
+	 */
+	smp_wmb();
+
+	return false;
+}
+
+static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
+{
+	if (pfn_valid(pfn))
+		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
+			/*
+			 * Some reserved pages, such as those from NVDIMM
+			 * DAX devices, are not for MMIO, and can be mapped
+			 * with cached memory type for better performance.
+			 * However, the above check misconceives those pages
+			 * as MMIO, and results in KVM mapping them with UC
+			 * memory type, which would hurt the performance.
+			 * Therefore, we check the host memory type in addition
+			 * and only treat UC/UC-/WC pages as MMIO.
+			 */
+			(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
+
+	return !e820__mapped_raw_any(pfn_to_hpa(pfn),
+				     pfn_to_hpa(pfn + 1) - 1,
+				     E820_TYPE_RAM);
+}
+
+/* Bits which may be returned by set_spte() */
+#define SET_SPTE_WRITE_PROTECTED_PT	BIT(0)
+#define SET_SPTE_NEED_REMOTE_TLB_FLUSH	BIT(1)
+
+static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
+		    unsigned pte_access, int level,
+		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
+		    bool can_unsync, bool host_writable)
+{
+	u64 spte = 0;
+	int ret = 0;
+	struct kvm_mmu_page *sp;
+
+	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
+		return 0;
+
+	sp = page_header(__pa(sptep));
+	if (sp_ad_disabled(sp))
+		spte |= SPTE_AD_DISABLED_MASK;
+	else if (kvm_vcpu_ad_need_write_protect(vcpu))
+		spte |= SPTE_AD_WRPROT_ONLY_MASK;
+
+	/*
+	 * For the EPT case, shadow_present_mask is 0 if hardware
+	 * supports exec-only page table entries.  In that case,
+	 * ACC_USER_MASK and shadow_user_mask are used to represent
+	 * read access.  See FNAME(gpte_access) in paging_tmpl.h.
+	 */
+	spte |= shadow_present_mask;
+	if (!speculative)
+		spte |= spte_shadow_accessed_mask(spte);
+
+	if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
+	    is_nx_huge_page_enabled()) {
+		pte_access &= ~ACC_EXEC_MASK;
+	}
+
+	if (pte_access & ACC_EXEC_MASK)
+		spte |= shadow_x_mask;
+	else
+		spte |= shadow_nx_mask;
+
+	if (pte_access & ACC_USER_MASK)
+		spte |= shadow_user_mask;
+
+	if (level > PT_PAGE_TABLE_LEVEL)
+		spte |= PT_PAGE_SIZE_MASK;
+	if (tdp_enabled)
+		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
+			kvm_is_mmio_pfn(pfn));
+
+	if (host_writable)
+		spte |= SPTE_HOST_WRITEABLE;
+	else
+		pte_access &= ~ACC_WRITE_MASK;
+
+	if (!kvm_is_mmio_pfn(pfn))
+		spte |= shadow_me_mask;
+
+	spte |= (u64)pfn << PAGE_SHIFT;
+
+	if (pte_access & ACC_WRITE_MASK) {
+
+		/*
+		 * Other vcpu creates new sp in the window between
+		 * mapping_level() and acquiring mmu-lock. We can
+		 * allow guest to retry the access, the mapping can
+		 * be fixed if guest refault.
+		 */
+		if (level > PT_PAGE_TABLE_LEVEL &&
+		    mmu_gfn_lpage_is_disallowed(vcpu, gfn, level))
+			goto done;
+
+		spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
+
+		/*
+		 * Optimization: for pte sync, if spte was writable the hash
+		 * lookup is unnecessary (and expensive). Write protection
+		 * is responsibility of mmu_get_page / kvm_sync_page.
+		 * Same reasoning can be applied to dirty page accounting.
+		 */
+		if (!can_unsync && is_writable_pte(*sptep))
+			goto set_pte;
+
+		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
+			pgprintk("%s: found shadow page for %llx, marking ro\n",
+				 __func__, gfn);
+			ret |= SET_SPTE_WRITE_PROTECTED_PT;
+			pte_access &= ~ACC_WRITE_MASK;
+			spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
+		}
+	}
+
+	if (pte_access & ACC_WRITE_MASK) {
+		kvm_vcpu_mark_page_dirty(vcpu, gfn);
+		spte |= spte_shadow_dirty_mask(spte);
+	}
+
+	if (speculative)
+		spte = mark_spte_for_access_track(spte);
+
+set_pte:
+	if (mmu_spte_update(sptep, spte))
+		ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
+done:
+	return ret;
+}
+
+static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
+			int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
+		       	bool speculative, bool host_writable)
+{
+	int was_rmapped = 0;
+	int rmap_count;
+	int set_spte_ret;
+	int ret = RET_PF_RETRY;
+	bool flush = false;
+
+	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
+		 *sptep, write_fault, gfn);
+
+	if (is_shadow_present_pte(*sptep)) {
+		/*
+		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
+		 * the parent of the now unreachable PTE.
+		 */
+		if (level > PT_PAGE_TABLE_LEVEL &&
+		    !is_large_pte(*sptep)) {
+			struct kvm_mmu_page *child;
+			u64 pte = *sptep;
+
+			child = page_header(pte & PT64_BASE_ADDR_MASK);
+			drop_parent_pte(child, sptep);
+			flush = true;
+		} else if (pfn != spte_to_pfn(*sptep)) {
+			pgprintk("hfn old %llx new %llx\n",
+				 spte_to_pfn(*sptep), pfn);
+			drop_spte(vcpu->kvm, sptep);
+			flush = true;
+		} else
+			was_rmapped = 1;
+	}
+
+	set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn,
+				speculative, true, host_writable);
+	if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
+		if (write_fault)
+			ret = RET_PF_EMULATE;
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	}
+
+	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush)
+		kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn,
+				KVM_PAGES_PER_HPAGE(level));
+
+	if (unlikely(is_mmio_spte(*sptep)))
+		ret = RET_PF_EMULATE;
+
+	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
+	trace_kvm_mmu_set_spte(level, gfn, sptep);
+	if (!was_rmapped && is_large_pte(*sptep))
+		++vcpu->kvm->stat.lpages;
+
+	if (is_shadow_present_pte(*sptep)) {
+		if (!was_rmapped) {
+			rmap_count = rmap_add(vcpu, sptep, gfn);
+			if (rmap_count > RMAP_RECYCLE_THRESHOLD)
+				rmap_recycle(vcpu, sptep, gfn);
+		}
+	}
+
+	return ret;
+}
+
+static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
+				     bool no_dirty_log)
+{
+	struct kvm_memory_slot *slot;
+
+	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
+	if (!slot)
+		return KVM_PFN_ERR_FAULT;
+
+	return gfn_to_pfn_memslot_atomic(slot, gfn);
+}
+
+static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
+				    struct kvm_mmu_page *sp,
+				    u64 *start, u64 *end)
+{
+	struct page *pages[PTE_PREFETCH_NUM];
+	struct kvm_memory_slot *slot;
+	unsigned access = sp->role.access;
+	int i, ret;
+	gfn_t gfn;
+
+	gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
+	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK);
+	if (!slot)
+		return -1;
+
+	ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start);
+	if (ret <= 0)
+		return -1;
+
+	for (i = 0; i < ret; i++, gfn++, start++) {
+		mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
+			     page_to_pfn(pages[i]), true, true);
+		put_page(pages[i]);
+	}
+
+	return 0;
+}
+
+static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu_page *sp, u64 *sptep)
+{
+	u64 *spte, *start = NULL;
+	int i;
+
+	WARN_ON(!sp->role.direct);
+
+	i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
+	spte = sp->spt + i;
+
+	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
+		if (is_shadow_present_pte(*spte) || spte == sptep) {
+			if (!start)
+				continue;
+			if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0)
+				break;
+			start = NULL;
+		} else if (!start)
+			start = spte;
+	}
+}
+
+static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
+{
+	struct kvm_mmu_page *sp;
+
+	sp = page_header(__pa(sptep));
+
+	/*
+	 * Without accessed bits, there's no way to distinguish between
+	 * actually accessed translations and prefetched, so disable pte
+	 * prefetch if accessed bits aren't available.
+	 */
+	if (sp_ad_disabled(sp))
+		return;
+
+	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+		return;
+
+	__direct_pte_prefetch(vcpu, sp, sptep);
+}
+
+static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
+				       gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
+{
+	int level = *levelp;
+	u64 spte = *it.sptep;
+
+	if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
+	    is_nx_huge_page_enabled() &&
+	    is_shadow_present_pte(spte) &&
+	    !is_large_pte(spte)) {
+		/*
+		 * A small SPTE exists for this pfn, but FNAME(fetch)
+		 * and __direct_map would like to create a large PTE
+		 * instead: just force them to go down another level,
+		 * patching back for them into pfn the next 9 bits of
+		 * the address.
+		 */
+		u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
+		*pfnp |= gfn & page_mask;
+		(*levelp)--;
+	}
+}
+
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
+			int map_writable, int level, kvm_pfn_t pfn,
+			bool prefault, bool lpage_disallowed)
+{
+	struct kvm_shadow_walk_iterator it;
+	struct kvm_mmu_page *sp;
+	int ret;
+	gfn_t gfn = gpa >> PAGE_SHIFT;
+	gfn_t base_gfn = gfn;
+
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+		return RET_PF_RETRY;
+
+	trace_kvm_mmu_spte_requested(gpa, level, pfn);
+	for_each_shadow_entry(vcpu, gpa, it) {
+		/*
+		 * We cannot overwrite existing page tables with an NX
+		 * large page, as the leaf could be executable.
+		 */
+		disallowed_hugepage_adjust(it, gfn, &pfn, &level);
+
+		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+		if (it.level == level)
+			break;
+
+		drop_large_spte(vcpu, it.sptep);
+		if (!is_shadow_present_pte(*it.sptep)) {
+			sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
+					      it.level - 1, true, ACC_ALL);
+
+			link_shadow_page(vcpu, it.sptep, sp);
+			if (lpage_disallowed)
+				account_huge_nx_page(vcpu->kvm, sp);
+		}
+	}
+
+	ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
+			   write, level, base_gfn, pfn, prefault,
+			   map_writable);
+	direct_pte_prefetch(vcpu, it.sptep);
+	++vcpu->stat.pf_fixed;
+	return ret;
+}
+
+static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
+{
+	send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk);
+}
+
+static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
+{
+	/*
+	 * Do not cache the mmio info caused by writing the readonly gfn
+	 * into the spte otherwise read access on readonly gfn also can
+	 * caused mmio page fault and treat it as mmio access.
+	 */
+	if (pfn == KVM_PFN_ERR_RO_FAULT)
+		return RET_PF_EMULATE;
+
+	if (pfn == KVM_PFN_ERR_HWPOISON) {
+		kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current);
+		return RET_PF_RETRY;
+	}
+
+	return -EFAULT;
+}
+
+static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
+					gfn_t gfn, kvm_pfn_t *pfnp,
+					int *levelp)
+{
+	kvm_pfn_t pfn = *pfnp;
+	int level = *levelp;
+
+	/*
+	 * Check if it's a transparent hugepage. If this would be an
+	 * hugetlbfs page, level wouldn't be set to
+	 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
+	 * here.
+	 */
+	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
+	    !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
+	    PageTransCompoundMap(pfn_to_page(pfn)) &&
+	    !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
+		unsigned long mask;
+		/*
+		 * mmu_notifier_retry was successful and we hold the
+		 * mmu_lock here, so the pmd can't become splitting
+		 * from under us, and in turn
+		 * __split_huge_page_refcount() can't run from under
+		 * us and we can safely transfer the refcount from
+		 * PG_tail to PG_head as we switch the pfn to tail to
+		 * head.
+		 */
+		*levelp = level = PT_DIRECTORY_LEVEL;
+		mask = KVM_PAGES_PER_HPAGE(level) - 1;
+		VM_BUG_ON((gfn & mask) != (pfn & mask));
+		if (pfn & mask) {
+			kvm_release_pfn_clean(pfn);
+			pfn &= ~mask;
+			kvm_get_pfn(pfn);
+			*pfnp = pfn;
+		}
+	}
+}
+
+static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+				kvm_pfn_t pfn, unsigned access, int *ret_val)
+{
+	/* The pfn is invalid, report the error! */
+	if (unlikely(is_error_pfn(pfn))) {
+		*ret_val = kvm_handle_bad_page(vcpu, gfn, pfn);
+		return true;
+	}
+
+	if (unlikely(is_noslot_pfn(pfn)))
+		vcpu_cache_mmio_info(vcpu, gva, gfn,
+				     access & shadow_mmio_access_mask);
+
+	return false;
+}
+
+static bool page_fault_can_be_fast(u32 error_code)
+{
+	/*
+	 * Do not fix the mmio spte with invalid generation number which
+	 * need to be updated by slow page fault path.
+	 */
+	if (unlikely(error_code & PFERR_RSVD_MASK))
+		return false;
+
+	/* See if the page fault is due to an NX violation */
+	if (unlikely(((error_code & (PFERR_FETCH_MASK | PFERR_PRESENT_MASK))
+		      == (PFERR_FETCH_MASK | PFERR_PRESENT_MASK))))
+		return false;
+
+	/*
+	 * #PF can be fast if:
+	 * 1. The shadow page table entry is not present, which could mean that
+	 *    the fault is potentially caused by access tracking (if enabled).
+	 * 2. The shadow page table entry is present and the fault
+	 *    is caused by write-protect, that means we just need change the W
+	 *    bit of the spte which can be done out of mmu-lock.
+	 *
+	 * However, if access tracking is disabled we know that a non-present
+	 * page must be a genuine page fault where we have to create a new SPTE.
+	 * So, if access tracking is disabled, we return true only for write
+	 * accesses to a present page.
+	 */
+
+	return shadow_acc_track_mask != 0 ||
+	       ((error_code & (PFERR_WRITE_MASK | PFERR_PRESENT_MASK))
+		== (PFERR_WRITE_MASK | PFERR_PRESENT_MASK));
+}
+
+/*
+ * Returns true if the SPTE was fixed successfully. Otherwise,
+ * someone else modified the SPTE from its original value.
+ */
+static bool
+fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+			u64 *sptep, u64 old_spte, u64 new_spte)
+{
+	gfn_t gfn;
+
+	WARN_ON(!sp->role.direct);
+
+	/*
+	 * Theoretically we could also set dirty bit (and flush TLB) here in
+	 * order to eliminate unnecessary PML logging. See comments in
+	 * set_spte. But fast_page_fault is very unlikely to happen with PML
+	 * enabled, so we do not do this. This might result in the same GPA
+	 * to be logged in PML buffer again when the write really happens, and
+	 * eventually to be called by mark_page_dirty twice. But it's also no
+	 * harm. This also avoids the TLB flush needed after setting dirty bit
+	 * so non-PML cases won't be impacted.
+	 *
+	 * Compare with set_spte where instead shadow_dirty_mask is set.
+	 */
+	if (cmpxchg64(sptep, old_spte, new_spte) != old_spte)
+		return false;
+
+	if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) {
+		/*
+		 * The gfn of direct spte is stable since it is
+		 * calculated by sp->gfn.
+		 */
+		gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
+		kvm_vcpu_mark_page_dirty(vcpu, gfn);
+	}
+
+	return true;
+}
+
+static bool is_access_allowed(u32 fault_err_code, u64 spte)
+{
+	if (fault_err_code & PFERR_FETCH_MASK)
+		return is_executable_pte(spte);
+
+	if (fault_err_code & PFERR_WRITE_MASK)
+		return is_writable_pte(spte);
+
+	/* Fault was on Read access */
+	return spte & PT_PRESENT_MASK;
+}
+
+/*
+ * Return value:
+ * - true: let the vcpu to access on the same address again.
+ * - false: let the real page fault path to fix it.
+ */
+static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
+			    u32 error_code)
+{
+	struct kvm_shadow_walk_iterator iterator;
+	struct kvm_mmu_page *sp;
+	bool fault_handled = false;
+	u64 spte = 0ull;
+	uint retry_count = 0;
+
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+		return false;
+
+	if (!page_fault_can_be_fast(error_code))
+		return false;
+
+	walk_shadow_page_lockless_begin(vcpu);
+
+	do {
+		u64 new_spte;
+
+		for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
+			if (!is_shadow_present_pte(spte) ||
+			    iterator.level < level)
+				break;
+
+		sp = page_header(__pa(iterator.sptep));
+		if (!is_last_spte(spte, sp->role.level))
+			break;
+
+		/*
+		 * Check whether the memory access that caused the fault would
+		 * still cause it if it were to be performed right now. If not,
+		 * then this is a spurious fault caused by TLB lazily flushed,
+		 * or some other CPU has already fixed the PTE after the
+		 * current CPU took the fault.
+		 *
+		 * Need not check the access of upper level table entries since
+		 * they are always ACC_ALL.
+		 */
+		if (is_access_allowed(error_code, spte)) {
+			fault_handled = true;
+			break;
+		}
+
+		new_spte = spte;
+
+		if (is_access_track_spte(spte))
+			new_spte = restore_acc_track_spte(new_spte);
+
+		/*
+		 * Currently, to simplify the code, write-protection can
+		 * be removed in the fast path only if the SPTE was
+		 * write-protected for dirty-logging or access tracking.
+		 */
+		if ((error_code & PFERR_WRITE_MASK) &&
+		    spte_can_locklessly_be_made_writable(spte))
+		{
+			new_spte |= PT_WRITABLE_MASK;
+
+			/*
+			 * Do not fix write-permission on the large spte.  Since
+			 * we only dirty the first page into the dirty-bitmap in
+			 * fast_pf_fix_direct_spte(), other pages are missed
+			 * if its slot has dirty logging enabled.
+			 *
+			 * Instead, we let the slow page fault path create a
+			 * normal spte to fix the access.
+			 *
+			 * See the comments in kvm_arch_commit_memory_region().
+			 */
+			if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+				break;
+		}
+
+		/* Verify that the fault can be handled in the fast path */
+		if (new_spte == spte ||
+		    !is_access_allowed(error_code, new_spte))
+			break;
+
+		/*
+		 * Currently, fast page fault only works for direct mapping
+		 * since the gfn is not stable for indirect shadow page. See
+		 * Documentation/virt/kvm/locking.txt to get more detail.
+		 */
+		fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
+							iterator.sptep, spte,
+							new_spte);
+		if (fault_handled)
+			break;
+
+		if (++retry_count > 4) {
+			printk_once(KERN_WARNING
+				"kvm: Fast #PF retrying more than 4 times.\n");
+			break;
+		}
+
+	} while (true);
+
+	trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
+			      spte, fault_handled);
+	walk_shadow_page_lockless_end(vcpu);
+
+	return fault_handled;
+}
+
+static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+			 gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
+static int make_mmu_pages_available(struct kvm_vcpu *vcpu);
+
+static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
+			 gfn_t gfn, bool prefault)
+{
+	int r;
+	int level;
+	bool force_pt_level;
+	kvm_pfn_t pfn;
+	unsigned long mmu_seq;
+	bool map_writable, write = error_code & PFERR_WRITE_MASK;
+	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+				is_nx_huge_page_enabled();
+
+	force_pt_level = lpage_disallowed;
+	level = mapping_level(vcpu, gfn, &force_pt_level);
+	if (likely(!force_pt_level)) {
+		/*
+		 * This path builds a PAE pagetable - so we can map
+		 * 2mb pages at maximum. Therefore check if the level
+		 * is larger than that.
+		 */
+		if (level > PT_DIRECTORY_LEVEL)
+			level = PT_DIRECTORY_LEVEL;
+
+		gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
+	}
+
+	if (fast_page_fault(vcpu, v, level, error_code))
+		return RET_PF_RETRY;
+
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	smp_rmb();
+
+	if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
+		return RET_PF_RETRY;
+
+	if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
+		return r;
+
+	r = RET_PF_RETRY;
+	spin_lock(&vcpu->kvm->mmu_lock);
+	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+		goto out_unlock;
+	if (make_mmu_pages_available(vcpu) < 0)
+		goto out_unlock;
+	if (likely(!force_pt_level))
+		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+	r = __direct_map(vcpu, v, write, map_writable, level, pfn,
+			 prefault, false);
+out_unlock:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	return r;
+}
+
+static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
+			       struct list_head *invalid_list)
+{
+	struct kvm_mmu_page *sp;
+
+	if (!VALID_PAGE(*root_hpa))
+		return;
+
+	sp = page_header(*root_hpa & PT64_BASE_ADDR_MASK);
+	--sp->root_count;
+	if (!sp->root_count && sp->role.invalid)
+		kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+
+	*root_hpa = INVALID_PAGE;
+}
+
+/* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+			ulong roots_to_free)
+{
+	int i;
+	LIST_HEAD(invalid_list);
+	bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
+
+	BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
+
+	/* Before acquiring the MMU lock, see if we need to do any real work. */
+	if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
+			    VALID_PAGE(mmu->prev_roots[i].hpa))
+				break;
+
+		if (i == KVM_MMU_NUM_PREV_ROOTS)
+			return;
+	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
+			mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
+					   &invalid_list);
+
+	if (free_active_root) {
+		if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+		    (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
+			mmu_free_root_page(vcpu->kvm, &mmu->root_hpa,
+					   &invalid_list);
+		} else {
+			for (i = 0; i < 4; ++i)
+				if (mmu->pae_root[i] != 0)
+					mmu_free_root_page(vcpu->kvm,
+							   &mmu->pae_root[i],
+							   &invalid_list);
+			mmu->root_hpa = INVALID_PAGE;
+		}
+		mmu->root_cr3 = 0;
+	}
+
+	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
+
+static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
+{
+	int ret = 0;
+
+	if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
+		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+		ret = 1;
+	}
+
+	return ret;
+}
+
+static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu_page *sp;
+	unsigned i;
+
+	if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
+		spin_lock(&vcpu->kvm->mmu_lock);
+		if(make_mmu_pages_available(vcpu) < 0) {
+			spin_unlock(&vcpu->kvm->mmu_lock);
+			return -ENOSPC;
+		}
+		sp = kvm_mmu_get_page(vcpu, 0, 0,
+				vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL);
+		++sp->root_count;
+		spin_unlock(&vcpu->kvm->mmu_lock);
+		vcpu->arch.mmu->root_hpa = __pa(sp->spt);
+	} else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) {
+		for (i = 0; i < 4; ++i) {
+			hpa_t root = vcpu->arch.mmu->pae_root[i];
+
+			MMU_WARN_ON(VALID_PAGE(root));
+			spin_lock(&vcpu->kvm->mmu_lock);
+			if (make_mmu_pages_available(vcpu) < 0) {
+				spin_unlock(&vcpu->kvm->mmu_lock);
+				return -ENOSPC;
+			}
+			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
+					i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
+			root = __pa(sp->spt);
+			++sp->root_count;
+			spin_unlock(&vcpu->kvm->mmu_lock);
+			vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK;
+		}
+		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
+	} else
+		BUG();
+	vcpu->arch.mmu->root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
+
+	return 0;
+}
+
+static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu_page *sp;
+	u64 pdptr, pm_mask;
+	gfn_t root_gfn, root_cr3;
+	int i;
+
+	root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
+	root_gfn = root_cr3 >> PAGE_SHIFT;
+
+	if (mmu_check_root(vcpu, root_gfn))
+		return 1;
+
+	/*
+	 * Do we shadow a long mode page table? If so we need to
+	 * write-protect the guests page table root.
+	 */
+	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+		hpa_t root = vcpu->arch.mmu->root_hpa;
+
+		MMU_WARN_ON(VALID_PAGE(root));
+
+		spin_lock(&vcpu->kvm->mmu_lock);
+		if (make_mmu_pages_available(vcpu) < 0) {
+			spin_unlock(&vcpu->kvm->mmu_lock);
+			return -ENOSPC;
+		}
+		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+				vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL);
+		root = __pa(sp->spt);
+		++sp->root_count;
+		spin_unlock(&vcpu->kvm->mmu_lock);
+		vcpu->arch.mmu->root_hpa = root;
+		goto set_root_cr3;
+	}
+
+	/*
+	 * We shadow a 32 bit page table. This may be a legacy 2-level
+	 * or a PAE 3-level page table. In either case we need to be aware that
+	 * the shadow page table may be a PAE or a long mode page table.
+	 */
+	pm_mask = PT_PRESENT_MASK;
+	if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL)
+		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
+
+	for (i = 0; i < 4; ++i) {
+		hpa_t root = vcpu->arch.mmu->pae_root[i];
+
+		MMU_WARN_ON(VALID_PAGE(root));
+		if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) {
+			pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i);
+			if (!(pdptr & PT_PRESENT_MASK)) {
+				vcpu->arch.mmu->pae_root[i] = 0;
+				continue;
+			}
+			root_gfn = pdptr >> PAGE_SHIFT;
+			if (mmu_check_root(vcpu, root_gfn))
+				return 1;
+		}
+		spin_lock(&vcpu->kvm->mmu_lock);
+		if (make_mmu_pages_available(vcpu) < 0) {
+			spin_unlock(&vcpu->kvm->mmu_lock);
+			return -ENOSPC;
+		}
+		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
+				      0, ACC_ALL);
+		root = __pa(sp->spt);
+		++sp->root_count;
+		spin_unlock(&vcpu->kvm->mmu_lock);
+
+		vcpu->arch.mmu->pae_root[i] = root | pm_mask;
+	}
+	vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
+
+	/*
+	 * If we shadow a 32 bit page table with a long mode page
+	 * table we enter this path.
+	 */
+	if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+		if (vcpu->arch.mmu->lm_root == NULL) {
+			/*
+			 * The additional page necessary for this is only
+			 * allocated on demand.
+			 */
+
+			u64 *lm_root;
+
+			lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+			if (lm_root == NULL)
+				return 1;
+
+			lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask;
+
+			vcpu->arch.mmu->lm_root = lm_root;
+		}
+
+		vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
+	}
+
+set_root_cr3:
+	vcpu->arch.mmu->root_cr3 = root_cr3;
+
+	return 0;
+}
+
+static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.mmu->direct_map)
+		return mmu_alloc_direct_roots(vcpu);
+	else
+		return mmu_alloc_shadow_roots(vcpu);
+}
+
+void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_mmu_page *sp;
+
+	if (vcpu->arch.mmu->direct_map)
+		return;
+
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+		return;
+
+	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
+
+	if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+		hpa_t root = vcpu->arch.mmu->root_hpa;
+		sp = page_header(root);
+
+		/*
+		 * Even if another CPU was marking the SP as unsync-ed
+		 * simultaneously, any guest page table changes are not
+		 * guaranteed to be visible anyway until this VCPU issues a TLB
+		 * flush strictly after those changes are made. We only need to
+		 * ensure that the other CPU sets these flags before any actual
+		 * changes to the page tables are made. The comments in
+		 * mmu_need_write_protect() describe what could go wrong if this
+		 * requirement isn't satisfied.
+		 */
+		if (!smp_load_acquire(&sp->unsync) &&
+		    !smp_load_acquire(&sp->unsync_children))
+			return;
+
+		spin_lock(&vcpu->kvm->mmu_lock);
+		kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+
+		mmu_sync_children(vcpu, sp);
+
+		kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
+		spin_unlock(&vcpu->kvm->mmu_lock);
+		return;
+	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+
+	for (i = 0; i < 4; ++i) {
+		hpa_t root = vcpu->arch.mmu->pae_root[i];
+
+		if (root && VALID_PAGE(root)) {
+			root &= PT64_BASE_ADDR_MASK;
+			sp = page_header(root);
+			mmu_sync_children(vcpu, sp);
+		}
+	}
+
+	kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
+
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+				  u32 access, struct x86_exception *exception)
+{
+	if (exception)
+		exception->error_code = 0;
+	return vaddr;
+}
+
+static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
+					 u32 access,
+					 struct x86_exception *exception)
+{
+	if (exception)
+		exception->error_code = 0;
+	return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
+}
+
+static bool
+__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
+{
+	int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
+
+	return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
+		((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
+}
+
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+	return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
+}
+
+static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
+{
+	return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
+}
+
+static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+{
+	/*
+	 * A nested guest cannot use the MMIO cache if it is using nested
+	 * page tables, because cr2 is a nGPA while the cache stores GPAs.
+	 */
+	if (mmu_is_nested(vcpu))
+		return false;
+
+	if (direct)
+		return vcpu_match_mmio_gpa(vcpu, addr);
+
+	return vcpu_match_mmio_gva(vcpu, addr);
+}
+
+/* return true if reserved bit is detected on spte. */
+static bool
+walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+{
+	struct kvm_shadow_walk_iterator iterator;
+	u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull;
+	int root, leaf;
+	bool reserved = false;
+
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+		goto exit;
+
+	walk_shadow_page_lockless_begin(vcpu);
+
+	for (shadow_walk_init(&iterator, vcpu, addr),
+		 leaf = root = iterator.level;
+	     shadow_walk_okay(&iterator);
+	     __shadow_walk_next(&iterator, spte)) {
+		spte = mmu_spte_get_lockless(iterator.sptep);
+
+		sptes[leaf - 1] = spte;
+		leaf--;
+
+		if (!is_shadow_present_pte(spte))
+			break;
+
+		reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte,
+						    iterator.level);
+	}
+
+	walk_shadow_page_lockless_end(vcpu);
+
+	if (reserved) {
+		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
+		       __func__, addr);
+		while (root > leaf) {
+			pr_err("------ spte 0x%llx level %d.\n",
+			       sptes[root - 1], root);
+			root--;
+		}
+	}
+exit:
+	*sptep = spte;
+	return reserved;
+}
+
+static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+{
+	u64 spte;
+	bool reserved;
+
+	if (mmio_info_in_cache(vcpu, addr, direct))
+		return RET_PF_EMULATE;
+
+	reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+	if (WARN_ON(reserved))
+		return -EINVAL;
+
+	if (is_mmio_spte(spte)) {
+		gfn_t gfn = get_mmio_spte_gfn(spte);
+		unsigned access = get_mmio_spte_access(spte);
+
+		if (!check_mmio_spte(vcpu, spte))
+			return RET_PF_INVALID;
+
+		if (direct)
+			addr = 0;
+
+		trace_handle_mmio_page_fault(addr, gfn, access);
+		vcpu_cache_mmio_info(vcpu, addr, gfn, access);
+		return RET_PF_EMULATE;
+	}
+
+	/*
+	 * If the page table is zapped by other cpus, let CPU fault again on
+	 * the address.
+	 */
+	return RET_PF_RETRY;
+}
+
+static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
+					 u32 error_code, gfn_t gfn)
+{
+	if (unlikely(error_code & PFERR_RSVD_MASK))
+		return false;
+
+	if (!(error_code & PFERR_PRESENT_MASK) ||
+	      !(error_code & PFERR_WRITE_MASK))
+		return false;
+
+	/*
+	 * guest is writing the page which is write tracked which can
+	 * not be fixed by page fault handler.
+	 */
+	if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+		return true;
+
+	return false;
+}
+
+static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
+{
+	struct kvm_shadow_walk_iterator iterator;
+	u64 spte;
+
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+		return;
+
+	walk_shadow_page_lockless_begin(vcpu);
+	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
+		clear_sp_write_flooding_count(iterator.sptep);
+		if (!is_shadow_present_pte(spte))
+			break;
+	}
+	walk_shadow_page_lockless_end(vcpu);
+}
+
+static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
+				u32 error_code, bool prefault)
+{
+	gfn_t gfn = gva >> PAGE_SHIFT;
+	int r;
+
+	pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
+
+	if (page_fault_handle_page_track(vcpu, error_code, gfn))
+		return RET_PF_EMULATE;
+
+	r = mmu_topup_memory_caches(vcpu);
+	if (r)
+		return r;
+
+	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
+
+
+	return nonpaging_map(vcpu, gva & PAGE_MASK,
+			     error_code, gfn, prefault);
+}
+
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
+{
+	struct kvm_arch_async_pf arch;
+
+	arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+	arch.gfn = gfn;
+	arch.direct_map = vcpu->arch.mmu->direct_map;
+	arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
+
+	return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
+}
+
+static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+			 gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
+{
+	struct kvm_memory_slot *slot;
+	bool async;
+
+	/*
+	 * Don't expose private memslots to L2.
+	 */
+	if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+		*pfn = KVM_PFN_NOSLOT;
+		return false;
+	}
+
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	async = false;
+	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
+	if (!async)
+		return false; /* *pfn has correct page already */
+
+	if (!prefault && kvm_can_do_async_pf(vcpu)) {
+		trace_kvm_try_async_get_page(gva, gfn);
+		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
+			trace_kvm_async_pf_doublefault(gva, gfn);
+			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
+			return true;
+		} else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
+			return true;
+	}
+
+	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
+	return false;
+}
+
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+				u64 fault_address, char *insn, int insn_len)
+{
+	int r = 1;
+
+	vcpu->arch.l1tf_flush_l1d = true;
+	switch (vcpu->arch.apf.host_apf_reason) {
+	default:
+		trace_kvm_page_fault(fault_address, error_code);
+
+		if (kvm_event_needs_reinjection(vcpu))
+			kvm_mmu_unprotect_page_virt(vcpu, fault_address);
+		r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
+				insn_len);
+		break;
+	case KVM_PV_REASON_PAGE_NOT_PRESENT:
+		vcpu->arch.apf.host_apf_reason = 0;
+		local_irq_disable();
+		kvm_async_pf_task_wait(fault_address, 0);
+		local_irq_enable();
+		break;
+	case KVM_PV_REASON_PAGE_READY:
+		vcpu->arch.apf.host_apf_reason = 0;
+		local_irq_disable();
+		kvm_async_pf_task_wake(fault_address);
+		local_irq_enable();
+		break;
+	}
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
+
+static bool
+check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
+{
+	int page_num = KVM_PAGES_PER_HPAGE(level);
+
+	gfn &= ~(page_num - 1);
+
+	return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
+}
+
+static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
+			  bool prefault)
+{
+	kvm_pfn_t pfn;
+	int r;
+	int level;
+	bool force_pt_level;
+	gfn_t gfn = gpa >> PAGE_SHIFT;
+	unsigned long mmu_seq;
+	int write = error_code & PFERR_WRITE_MASK;
+	bool map_writable;
+	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+				is_nx_huge_page_enabled();
+
+	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
+
+	if (page_fault_handle_page_track(vcpu, error_code, gfn))
+		return RET_PF_EMULATE;
+
+	r = mmu_topup_memory_caches(vcpu);
+	if (r)
+		return r;
+
+	force_pt_level =
+		lpage_disallowed ||
+		!check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
+	level = mapping_level(vcpu, gfn, &force_pt_level);
+	if (likely(!force_pt_level)) {
+		if (level > PT_DIRECTORY_LEVEL &&
+		    !check_hugepage_cache_consistency(vcpu, gfn, level))
+			level = PT_DIRECTORY_LEVEL;
+		gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
+	}
+
+	if (fast_page_fault(vcpu, gpa, level, error_code))
+		return RET_PF_RETRY;
+
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	smp_rmb();
+
+	if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
+		return RET_PF_RETRY;
+
+	if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
+		return r;
+
+	r = RET_PF_RETRY;
+	spin_lock(&vcpu->kvm->mmu_lock);
+	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+		goto out_unlock;
+	if (make_mmu_pages_available(vcpu) < 0)
+		goto out_unlock;
+	if (likely(!force_pt_level))
+		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
+			 prefault, lpage_disallowed);
+out_unlock:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	return r;
+}
+
+static void nonpaging_init_context(struct kvm_vcpu *vcpu,
+				   struct kvm_mmu *context)
+{
+	context->page_fault = nonpaging_page_fault;
+	context->gva_to_gpa = nonpaging_gva_to_gpa;
+	context->sync_page = nonpaging_sync_page;
+	context->invlpg = nonpaging_invlpg;
+	context->update_pte = nonpaging_update_pte;
+	context->root_level = 0;
+	context->shadow_root_level = PT32E_ROOT_LEVEL;
+	context->direct_map = true;
+	context->nx = false;
+}
+
+/*
+ * Find out if a previously cached root matching the new CR3/role is available.
+ * The current root is also inserted into the cache.
+ * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
+ * returned.
+ * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
+ * false is returned. This root should now be freed by the caller.
+ */
+static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+				  union kvm_mmu_page_role new_role)
+{
+	uint i;
+	struct kvm_mmu_root_info root;
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+
+	root.cr3 = mmu->root_cr3;
+	root.hpa = mmu->root_hpa;
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+		swap(root, mmu->prev_roots[i]);
+
+		if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
+		    page_header(root.hpa) != NULL &&
+		    new_role.word == page_header(root.hpa)->role.word)
+			break;
+	}
+
+	mmu->root_hpa = root.hpa;
+	mmu->root_cr3 = root.cr3;
+
+	return i < KVM_MMU_NUM_PREV_ROOTS;
+}
+
+static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+			    union kvm_mmu_page_role new_role,
+			    bool skip_tlb_flush)
+{
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+
+	/*
+	 * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
+	 * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
+	 * later if necessary.
+	 */
+	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+	    mmu->root_level >= PT64_ROOT_4LEVEL) {
+		if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
+			return false;
+
+		if (cached_root_available(vcpu, new_cr3, new_role)) {
+			/*
+			 * It is possible that the cached previous root page is
+			 * obsolete because of a change in the MMU generation
+			 * number. However, changing the generation number is
+			 * accompanied by KVM_REQ_MMU_RELOAD, which will free
+			 * the root set here and allocate a new one.
+			 */
+			kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
+			if (!skip_tlb_flush) {
+				kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+				kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+			}
+
+			/*
+			 * The last MMIO access's GVA and GPA are cached in the
+			 * VCPU. When switching to a new CR3, that GVA->GPA
+			 * mapping may no longer be valid. So clear any cached
+			 * MMIO info even when we don't need to sync the shadow
+			 * page tables.
+			 */
+			vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
+
+			__clear_sp_write_flooding_count(
+				page_header(mmu->root_hpa));
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+			      union kvm_mmu_page_role new_role,
+			      bool skip_tlb_flush)
+{
+	if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush))
+		kvm_mmu_free_roots(vcpu, vcpu->arch.mmu,
+				   KVM_MMU_ROOT_CURRENT);
+}
+
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush)
+{
+	__kvm_mmu_new_cr3(vcpu, new_cr3, kvm_mmu_calc_root_page_role(vcpu),
+			  skip_tlb_flush);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3);
+
+static unsigned long get_cr3(struct kvm_vcpu *vcpu)
+{
+	return kvm_read_cr3(vcpu);
+}
+
+static void inject_page_fault(struct kvm_vcpu *vcpu,
+			      struct x86_exception *fault)
+{
+	vcpu->arch.mmu->inject_page_fault(vcpu, fault);
+}
+
+static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
+			   unsigned access, int *nr_present)
+{
+	if (unlikely(is_mmio_spte(*sptep))) {
+		if (gfn != get_mmio_spte_gfn(*sptep)) {
+			mmu_spte_clear_no_track(sptep);
+			return true;
+		}
+
+		(*nr_present)++;
+		mark_mmio_spte(vcpu, sptep, gfn, access);
+		return true;
+	}
+
+	return false;
+}
+
+static inline bool is_last_gpte(struct kvm_mmu *mmu,
+				unsigned level, unsigned gpte)
+{
+	/*
+	 * The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
+	 * If it is clear, there are no large pages at this level, so clear
+	 * PT_PAGE_SIZE_MASK in gpte if that is the case.
+	 */
+	gpte &= level - mmu->last_nonleaf_level;
+
+	/*
+	 * PT_PAGE_TABLE_LEVEL always terminates.  The RHS has bit 7 set
+	 * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
+	 * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
+	 */
+	gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
+
+	return gpte & PT_PAGE_SIZE_MASK;
+}
+
+#define PTTYPE_EPT 18 /* arbitrary */
+#define PTTYPE PTTYPE_EPT
+#include "paging_tmpl.h"
+#undef PTTYPE
+
+#define PTTYPE 64
+#include "paging_tmpl.h"
+#undef PTTYPE
+
+#define PTTYPE 32
+#include "paging_tmpl.h"
+#undef PTTYPE
+
+static void
+__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+			struct rsvd_bits_validate *rsvd_check,
+			int maxphyaddr, int level, bool nx, bool gbpages,
+			bool pse, bool amd)
+{
+	u64 exb_bit_rsvd = 0;
+	u64 gbpages_bit_rsvd = 0;
+	u64 nonleaf_bit8_rsvd = 0;
+
+	rsvd_check->bad_mt_xwr = 0;
+
+	if (!nx)
+		exb_bit_rsvd = rsvd_bits(63, 63);
+	if (!gbpages)
+		gbpages_bit_rsvd = rsvd_bits(7, 7);
+
+	/*
+	 * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
+	 * leaf entries) on AMD CPUs only.
+	 */
+	if (amd)
+		nonleaf_bit8_rsvd = rsvd_bits(8, 8);
+
+	switch (level) {
+	case PT32_ROOT_LEVEL:
+		/* no rsvd bits for 2 level 4K page table entries */
+		rsvd_check->rsvd_bits_mask[0][1] = 0;
+		rsvd_check->rsvd_bits_mask[0][0] = 0;
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
+
+		if (!pse) {
+			rsvd_check->rsvd_bits_mask[1][1] = 0;
+			break;
+		}
+
+		if (is_cpuid_PSE36())
+			/* 36bits PSE 4MB page */
+			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+		else
+			/* 32 bits PSE 4MB page */
+			rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+		break;
+	case PT32E_ROOT_LEVEL:
+		rsvd_check->rsvd_bits_mask[0][2] =
+			rsvd_bits(maxphyaddr, 63) |
+			rsvd_bits(5, 8) | rsvd_bits(1, 2);	/* PDPTE */
+		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62);	/* PDE */
+		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
+		break;
+	case PT64_ROOT_5LEVEL:
+		rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[1][4] =
+			rsvd_check->rsvd_bits_mask[0][4];
+		/* fall through */
+	case PT64_ROOT_4LEVEL:
+		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			nonleaf_bit8_rsvd | gbpages_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		rsvd_check->rsvd_bits_mask[1][3] =
+			rsvd_check->rsvd_bits_mask[0][3];
+		rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
+			gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
+			rsvd_bits(13, 29);
+		rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) |
+			rsvd_bits(13, 20);		/* large page */
+		rsvd_check->rsvd_bits_mask[1][0] =
+			rsvd_check->rsvd_bits_mask[0][0];
+		break;
+	}
+}
+
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu *context)
+{
+	__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
+				cpuid_maxphyaddr(vcpu), context->root_level,
+				context->nx,
+				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
+				is_pse(vcpu), guest_cpuid_is_amd(vcpu));
+}
+
+static void
+__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
+			    int maxphyaddr, bool execonly)
+{
+	u64 bad_mt_xwr;
+
+	rsvd_check->rsvd_bits_mask[0][4] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
+	rsvd_check->rsvd_bits_mask[0][3] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
+	rsvd_check->rsvd_bits_mask[0][2] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
+	rsvd_check->rsvd_bits_mask[0][1] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
+	rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+
+	/* large page */
+	rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
+	rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
+	rsvd_check->rsvd_bits_mask[1][2] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
+	rsvd_check->rsvd_bits_mask[1][1] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
+	rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
+
+	bad_mt_xwr = 0xFFull << (2 * 8);	/* bits 3..5 must not be 2 */
+	bad_mt_xwr |= 0xFFull << (3 * 8);	/* bits 3..5 must not be 3 */
+	bad_mt_xwr |= 0xFFull << (7 * 8);	/* bits 3..5 must not be 7 */
+	bad_mt_xwr |= REPEAT_BYTE(1ull << 2);	/* bits 0..2 must not be 010 */
+	bad_mt_xwr |= REPEAT_BYTE(1ull << 6);	/* bits 0..2 must not be 110 */
+	if (!execonly) {
+		/* bits 0..2 must not be 100 unless VMX capabilities allow it */
+		bad_mt_xwr |= REPEAT_BYTE(1ull << 4);
+	}
+	rsvd_check->bad_mt_xwr = bad_mt_xwr;
+}
+
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+		struct kvm_mmu *context, bool execonly)
+{
+	__reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
+				    cpuid_maxphyaddr(vcpu), execonly);
+}
+
+/*
+ * the page table on host is the shadow page table for the page
+ * table in guest or amd nested guest, its mmu features completely
+ * follow the features in guest.
+ */
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+{
+	bool uses_nx = context->nx ||
+		context->mmu_role.base.smep_andnot_wp;
+	struct rsvd_bits_validate *shadow_zero_check;
+	int i;
+
+	/*
+	 * Passing "true" to the last argument is okay; it adds a check
+	 * on bit 8 of the SPTEs which KVM doesn't use anyway.
+	 */
+	shadow_zero_check = &context->shadow_zero_check;
+	__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
+				shadow_phys_bits,
+				context->shadow_root_level, uses_nx,
+				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
+				is_pse(vcpu), true);
+
+	if (!shadow_me_mask)
+		return;
+
+	for (i = context->shadow_root_level; --i >= 0;) {
+		shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
+		shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
+	}
+
+}
+EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
+
+static inline bool boot_cpu_is_amd(void)
+{
+	WARN_ON_ONCE(!tdp_enabled);
+	return shadow_x_mask == 0;
+}
+
+/*
+ * the direct page table on host, use as much mmu features as
+ * possible, however, kvm currently does not do execution-protection.
+ */
+static void
+reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+				struct kvm_mmu *context)
+{
+	struct rsvd_bits_validate *shadow_zero_check;
+	int i;
+
+	shadow_zero_check = &context->shadow_zero_check;
+
+	if (boot_cpu_is_amd())
+		__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
+					shadow_phys_bits,
+					context->shadow_root_level, false,
+					boot_cpu_has(X86_FEATURE_GBPAGES),
+					true, true);
+	else
+		__reset_rsvds_bits_mask_ept(shadow_zero_check,
+					    shadow_phys_bits,
+					    false);
+
+	if (!shadow_me_mask)
+		return;
+
+	for (i = context->shadow_root_level; --i >= 0;) {
+		shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
+		shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
+	}
+}
+
+/*
+ * as the comments in reset_shadow_zero_bits_mask() except it
+ * is the shadow page table for intel nested guest.
+ */
+static void
+reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+				struct kvm_mmu *context, bool execonly)
+{
+	__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+				    shadow_phys_bits, execonly);
+}
+
+#define BYTE_MASK(access) \
+	((1 & (access) ? 2 : 0) | \
+	 (2 & (access) ? 4 : 0) | \
+	 (3 & (access) ? 8 : 0) | \
+	 (4 & (access) ? 16 : 0) | \
+	 (5 & (access) ? 32 : 0) | \
+	 (6 & (access) ? 64 : 0) | \
+	 (7 & (access) ? 128 : 0))
+
+
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+				      struct kvm_mmu *mmu, bool ept)
+{
+	unsigned byte;
+
+	const u8 x = BYTE_MASK(ACC_EXEC_MASK);
+	const u8 w = BYTE_MASK(ACC_WRITE_MASK);
+	const u8 u = BYTE_MASK(ACC_USER_MASK);
+
+	bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
+	bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
+	bool cr0_wp = is_write_protection(vcpu);
+
+	for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
+		unsigned pfec = byte << 1;
+
+		/*
+		 * Each "*f" variable has a 1 bit for each UWX value
+		 * that causes a fault with the given PFEC.
+		 */
+
+		/* Faults from writes to non-writable pages */
+		u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
+		/* Faults from user mode accesses to supervisor pages */
+		u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
+		/* Faults from fetches of non-executable pages*/
+		u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
+		/* Faults from kernel mode fetches of user pages */
+		u8 smepf = 0;
+		/* Faults from kernel mode accesses of user pages */
+		u8 smapf = 0;
+
+		if (!ept) {
+			/* Faults from kernel mode accesses to user pages */
+			u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
+
+			/* Not really needed: !nx will cause pte.nx to fault */
+			if (!mmu->nx)
+				ff = 0;
+
+			/* Allow supervisor writes if !cr0.wp */
+			if (!cr0_wp)
+				wf = (pfec & PFERR_USER_MASK) ? wf : 0;
+
+			/* Disallow supervisor fetches of user code if cr4.smep */
+			if (cr4_smep)
+				smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0;
+
+			/*
+			 * SMAP:kernel-mode data accesses from user-mode
+			 * mappings should fault. A fault is considered
+			 * as a SMAP violation if all of the following
+			 * conditions are true:
+			 *   - X86_CR4_SMAP is set in CR4
+			 *   - A user page is accessed
+			 *   - The access is not a fetch
+			 *   - Page fault in kernel mode
+			 *   - if CPL = 3 or X86_EFLAGS_AC is clear
+			 *
+			 * Here, we cover the first three conditions.
+			 * The fourth is computed dynamically in permission_fault();
+			 * PFERR_RSVD_MASK bit will be set in PFEC if the access is
+			 * *not* subject to SMAP restrictions.
+			 */
+			if (cr4_smap)
+				smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
+		}
+
+		mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
+	}
+}
+
+/*
+* PKU is an additional mechanism by which the paging controls access to
+* user-mode addresses based on the value in the PKRU register.  Protection
+* key violations are reported through a bit in the page fault error code.
+* Unlike other bits of the error code, the PK bit is not known at the
+* call site of e.g. gva_to_gpa; it must be computed directly in
+* permission_fault based on two bits of PKRU, on some machine state (CR4,
+* CR0, EFER, CPL), and on other bits of the error code and the page tables.
+*
+* In particular the following conditions come from the error code, the
+* page tables and the machine state:
+* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1
+* - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch)
+* - PK is always zero if U=0 in the page tables
+* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access.
+*
+* The PKRU bitmask caches the result of these four conditions.  The error
+* code (minus the P bit) and the page table's U bit form an index into the
+* PKRU bitmask.  Two bits of the PKRU bitmask are then extracted and ANDed
+* with the two bits of the PKRU register corresponding to the protection key.
+* For the first three conditions above the bits will be 00, thus masking
+* away both AD and WD.  For all reads or if the last condition holds, WD
+* only will be masked away.
+*/
+static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+				bool ept)
+{
+	unsigned bit;
+	bool wp;
+
+	if (ept) {
+		mmu->pkru_mask = 0;
+		return;
+	}
+
+	/* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
+	if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
+		mmu->pkru_mask = 0;
+		return;
+	}
+
+	wp = is_write_protection(vcpu);
+
+	for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
+		unsigned pfec, pkey_bits;
+		bool check_pkey, check_write, ff, uf, wf, pte_user;
+
+		pfec = bit << 1;
+		ff = pfec & PFERR_FETCH_MASK;
+		uf = pfec & PFERR_USER_MASK;
+		wf = pfec & PFERR_WRITE_MASK;
+
+		/* PFEC.RSVD is replaced by ACC_USER_MASK. */
+		pte_user = pfec & PFERR_RSVD_MASK;
+
+		/*
+		 * Only need to check the access which is not an
+		 * instruction fetch and is to a user page.
+		 */
+		check_pkey = (!ff && pte_user);
+		/*
+		 * write access is controlled by PKRU if it is a
+		 * user access or CR0.WP = 1.
+		 */
+		check_write = check_pkey && wf && (uf || wp);
+
+		/* PKRU.AD stops both read and write access. */
+		pkey_bits = !!check_pkey;
+		/* PKRU.WD stops write access. */
+		pkey_bits |= (!!check_write) << 1;
+
+		mmu->pkru_mask |= (pkey_bits & 3) << pfec;
+	}
+}
+
+static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+{
+	unsigned root_level = mmu->root_level;
+
+	mmu->last_nonleaf_level = root_level;
+	if (root_level == PT32_ROOT_LEVEL && is_pse(vcpu))
+		mmu->last_nonleaf_level++;
+}
+
+static void paging64_init_context_common(struct kvm_vcpu *vcpu,
+					 struct kvm_mmu *context,
+					 int level)
+{
+	context->nx = is_nx(vcpu);
+	context->root_level = level;
+
+	reset_rsvds_bits_mask(vcpu, context);
+	update_permission_bitmask(vcpu, context, false);
+	update_pkru_bitmask(vcpu, context, false);
+	update_last_nonleaf_level(vcpu, context);
+
+	MMU_WARN_ON(!is_pae(vcpu));
+	context->page_fault = paging64_page_fault;
+	context->gva_to_gpa = paging64_gva_to_gpa;
+	context->sync_page = paging64_sync_page;
+	context->invlpg = paging64_invlpg;
+	context->update_pte = paging64_update_pte;
+	context->shadow_root_level = level;
+	context->direct_map = false;
+}
+
+static void paging64_init_context(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu *context)
+{
+	int root_level = is_la57_mode(vcpu) ?
+			 PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
+
+	paging64_init_context_common(vcpu, context, root_level);
+}
+
+static void paging32_init_context(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu *context)
+{
+	context->nx = false;
+	context->root_level = PT32_ROOT_LEVEL;
+
+	reset_rsvds_bits_mask(vcpu, context);
+	update_permission_bitmask(vcpu, context, false);
+	update_pkru_bitmask(vcpu, context, false);
+	update_last_nonleaf_level(vcpu, context);
+
+	context->page_fault = paging32_page_fault;
+	context->gva_to_gpa = paging32_gva_to_gpa;
+	context->sync_page = paging32_sync_page;
+	context->invlpg = paging32_invlpg;
+	context->update_pte = paging32_update_pte;
+	context->shadow_root_level = PT32E_ROOT_LEVEL;
+	context->direct_map = false;
+}
+
+static void paging32E_init_context(struct kvm_vcpu *vcpu,
+				   struct kvm_mmu *context)
+{
+	paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
+}
+
+static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
+{
+	union kvm_mmu_extended_role ext = {0};
+
+	ext.cr0_pg = !!is_paging(vcpu);
+	ext.cr4_pae = !!is_pae(vcpu);
+	ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+	ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
+	ext.cr4_pse = !!is_pse(vcpu);
+	ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
+	ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
+	ext.maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+	ext.valid = 1;
+
+	return ext;
+}
+
+static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
+						   bool base_only)
+{
+	union kvm_mmu_role role = {0};
+
+	role.base.access = ACC_ALL;
+	role.base.nxe = !!is_nx(vcpu);
+	role.base.cr0_wp = is_write_protection(vcpu);
+	role.base.smm = is_smm(vcpu);
+	role.base.guest_mode = is_guest_mode(vcpu);
+
+	if (base_only)
+		return role;
+
+	role.ext = kvm_calc_mmu_role_ext(vcpu);
+
+	return role;
+}
+
+static union kvm_mmu_role
+kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
+{
+	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
+
+	role.base.ad_disabled = (shadow_accessed_mask == 0);
+	role.base.level = kvm_x86_ops->get_tdp_level(vcpu);
+	role.base.direct = true;
+	role.base.gpte_is_8_bytes = true;
+
+	return role;
+}
+
+static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu *context = vcpu->arch.mmu;
+	union kvm_mmu_role new_role =
+		kvm_calc_tdp_mmu_root_page_role(vcpu, false);
+
+	new_role.base.word &= mmu_base_role_mask.word;
+	if (new_role.as_u64 == context->mmu_role.as_u64)
+		return;
+
+	context->mmu_role.as_u64 = new_role.as_u64;
+	context->page_fault = tdp_page_fault;
+	context->sync_page = nonpaging_sync_page;
+	context->invlpg = nonpaging_invlpg;
+	context->update_pte = nonpaging_update_pte;
+	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
+	context->direct_map = true;
+	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
+	context->get_cr3 = get_cr3;
+	context->get_pdptr = kvm_pdptr_read;
+	context->inject_page_fault = kvm_inject_page_fault;
+
+	if (!is_paging(vcpu)) {
+		context->nx = false;
+		context->gva_to_gpa = nonpaging_gva_to_gpa;
+		context->root_level = 0;
+	} else if (is_long_mode(vcpu)) {
+		context->nx = is_nx(vcpu);
+		context->root_level = is_la57_mode(vcpu) ?
+				PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
+		reset_rsvds_bits_mask(vcpu, context);
+		context->gva_to_gpa = paging64_gva_to_gpa;
+	} else if (is_pae(vcpu)) {
+		context->nx = is_nx(vcpu);
+		context->root_level = PT32E_ROOT_LEVEL;
+		reset_rsvds_bits_mask(vcpu, context);
+		context->gva_to_gpa = paging64_gva_to_gpa;
+	} else {
+		context->nx = false;
+		context->root_level = PT32_ROOT_LEVEL;
+		reset_rsvds_bits_mask(vcpu, context);
+		context->gva_to_gpa = paging32_gva_to_gpa;
+	}
+
+	update_permission_bitmask(vcpu, context, false);
+	update_pkru_bitmask(vcpu, context, false);
+	update_last_nonleaf_level(vcpu, context);
+	reset_tdp_shadow_zero_bits_mask(vcpu, context);
+}
+
+static union kvm_mmu_role
+kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
+{
+	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
+
+	role.base.smep_andnot_wp = role.ext.cr4_smep &&
+		!is_write_protection(vcpu);
+	role.base.smap_andnot_wp = role.ext.cr4_smap &&
+		!is_write_protection(vcpu);
+	role.base.direct = !is_paging(vcpu);
+	role.base.gpte_is_8_bytes = !!is_pae(vcpu);
+
+	if (!is_long_mode(vcpu))
+		role.base.level = PT32E_ROOT_LEVEL;
+	else if (is_la57_mode(vcpu))
+		role.base.level = PT64_ROOT_5LEVEL;
+	else
+		role.base.level = PT64_ROOT_4LEVEL;
+
+	return role;
+}
+
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu *context = vcpu->arch.mmu;
+	union kvm_mmu_role new_role =
+		kvm_calc_shadow_mmu_root_page_role(vcpu, false);
+
+	new_role.base.word &= mmu_base_role_mask.word;
+	if (new_role.as_u64 == context->mmu_role.as_u64)
+		return;
+
+	if (!is_paging(vcpu))
+		nonpaging_init_context(vcpu, context);
+	else if (is_long_mode(vcpu))
+		paging64_init_context(vcpu, context);
+	else if (is_pae(vcpu))
+		paging32E_init_context(vcpu, context);
+	else
+		paging32_init_context(vcpu, context);
+
+	context->mmu_role.as_u64 = new_role.as_u64;
+	reset_shadow_zero_bits_mask(vcpu, context);
+}
+EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
+
+static union kvm_mmu_role
+kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
+				   bool execonly)
+{
+	union kvm_mmu_role role = {0};
+
+	/* SMM flag is inherited from root_mmu */
+	role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;
+
+	role.base.level = PT64_ROOT_4LEVEL;
+	role.base.gpte_is_8_bytes = true;
+	role.base.direct = false;
+	role.base.ad_disabled = !accessed_dirty;
+	role.base.guest_mode = true;
+	role.base.access = ACC_ALL;
+
+	/*
+	 * WP=1 and NOT_WP=1 is an impossible combination, use WP and the
+	 * SMAP variation to denote shadow EPT entries.
+	 */
+	role.base.cr0_wp = true;
+	role.base.smap_andnot_wp = true;
+
+	role.ext = kvm_calc_mmu_role_ext(vcpu);
+	role.ext.execonly = execonly;
+
+	return role;
+}
+
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
+			     bool accessed_dirty, gpa_t new_eptp)
+{
+	struct kvm_mmu *context = vcpu->arch.mmu;
+	union kvm_mmu_role new_role =
+		kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
+						   execonly);
+
+	__kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);
+
+	new_role.base.word &= mmu_base_role_mask.word;
+	if (new_role.as_u64 == context->mmu_role.as_u64)
+		return;
+
+	context->shadow_root_level = PT64_ROOT_4LEVEL;
+
+	context->nx = true;
+	context->ept_ad = accessed_dirty;
+	context->page_fault = ept_page_fault;
+	context->gva_to_gpa = ept_gva_to_gpa;
+	context->sync_page = ept_sync_page;
+	context->invlpg = ept_invlpg;
+	context->update_pte = ept_update_pte;
+	context->root_level = PT64_ROOT_4LEVEL;
+	context->direct_map = false;
+	context->mmu_role.as_u64 = new_role.as_u64;
+
+	update_permission_bitmask(vcpu, context, true);
+	update_pkru_bitmask(vcpu, context, true);
+	update_last_nonleaf_level(vcpu, context);
+	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
+}
+EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
+
+static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu *context = vcpu->arch.mmu;
+
+	kvm_init_shadow_mmu(vcpu);
+	context->set_cr3           = kvm_x86_ops->set_cr3;
+	context->get_cr3           = get_cr3;
+	context->get_pdptr         = kvm_pdptr_read;
+	context->inject_page_fault = kvm_inject_page_fault;
+}
+
+static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
+{
+	union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
+	struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
+
+	new_role.base.word &= mmu_base_role_mask.word;
+	if (new_role.as_u64 == g_context->mmu_role.as_u64)
+		return;
+
+	g_context->mmu_role.as_u64 = new_role.as_u64;
+	g_context->get_cr3           = get_cr3;
+	g_context->get_pdptr         = kvm_pdptr_read;
+	g_context->inject_page_fault = kvm_inject_page_fault;
+
+	/*
+	 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
+	 * L1's nested page tables (e.g. EPT12). The nested translation
+	 * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
+	 * L2's page tables as the first level of translation and L1's
+	 * nested page tables as the second level of translation. Basically
+	 * the gva_to_gpa functions between mmu and nested_mmu are swapped.
+	 */
+	if (!is_paging(vcpu)) {
+		g_context->nx = false;
+		g_context->root_level = 0;
+		g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
+	} else if (is_long_mode(vcpu)) {
+		g_context->nx = is_nx(vcpu);
+		g_context->root_level = is_la57_mode(vcpu) ?
+					PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
+		reset_rsvds_bits_mask(vcpu, g_context);
+		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
+	} else if (is_pae(vcpu)) {
+		g_context->nx = is_nx(vcpu);
+		g_context->root_level = PT32E_ROOT_LEVEL;
+		reset_rsvds_bits_mask(vcpu, g_context);
+		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
+	} else {
+		g_context->nx = false;
+		g_context->root_level = PT32_ROOT_LEVEL;
+		reset_rsvds_bits_mask(vcpu, g_context);
+		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
+	}
+
+	update_permission_bitmask(vcpu, g_context, false);
+	update_pkru_bitmask(vcpu, g_context, false);
+	update_last_nonleaf_level(vcpu, g_context);
+}
+
+void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
+{
+	if (reset_roots) {
+		uint i;
+
+		vcpu->arch.mmu->root_hpa = INVALID_PAGE;
+
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			vcpu->arch.mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+	}
+
+	if (mmu_is_nested(vcpu))
+		init_kvm_nested_mmu(vcpu);
+	else if (tdp_enabled)
+		init_kvm_tdp_mmu(vcpu);
+	else
+		init_kvm_softmmu(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_init_mmu);
+
+static union kvm_mmu_page_role
+kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
+{
+	union kvm_mmu_role role;
+
+	if (tdp_enabled)
+		role = kvm_calc_tdp_mmu_root_page_role(vcpu, true);
+	else
+		role = kvm_calc_shadow_mmu_root_page_role(vcpu, true);
+
+	return role.base;
+}
+
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
+{
+	kvm_mmu_unload(vcpu);
+	kvm_init_mmu(vcpu, true);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
+
+int kvm_mmu_load(struct kvm_vcpu *vcpu)
+{
+	int r;
+
+	r = mmu_topup_memory_caches(vcpu);
+	if (r)
+		goto out;
+	r = mmu_alloc_roots(vcpu);
+	kvm_mmu_sync_roots(vcpu);
+	if (r)
+		goto out;
+	kvm_mmu_load_cr3(vcpu);
+	kvm_x86_ops->tlb_flush(vcpu, true);
+out:
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_load);
+
+void kvm_mmu_unload(struct kvm_vcpu *vcpu)
+{
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
+	WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa));
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+	WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa));
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_unload);
+
+static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu_page *sp, u64 *spte,
+				  const void *new)
+{
+	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
+		++vcpu->kvm->stat.mmu_pde_zapped;
+		return;
+        }
+
+	++vcpu->kvm->stat.mmu_pte_updated;
+	vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
+}
+
+static bool need_remote_flush(u64 old, u64 new)
+{
+	if (!is_shadow_present_pte(old))
+		return false;
+	if (!is_shadow_present_pte(new))
+		return true;
+	if ((old ^ new) & PT64_BASE_ADDR_MASK)
+		return true;
+	old ^= shadow_nx_mask;
+	new ^= shadow_nx_mask;
+	return (old & ~new & PT64_PERM_MASK) != 0;
+}
+
+static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
+				    int *bytes)
+{
+	u64 gentry = 0;
+	int r;
+
+	/*
+	 * Assume that the pte write on a page table of the same type
+	 * as the current vcpu paging mode since we update the sptes only
+	 * when they have the same mode.
+	 */
+	if (is_pae(vcpu) && *bytes == 4) {
+		/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
+		*gpa &= ~(gpa_t)7;
+		*bytes = 8;
+	}
+
+	if (*bytes == 4 || *bytes == 8) {
+		r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
+		if (r)
+			gentry = 0;
+	}
+
+	return gentry;
+}
+
+/*
+ * If we're seeing too many writes to a page, it may no longer be a page table,
+ * or we may be forking, in which case it is better to unmap the page.
+ */
+static bool detect_write_flooding(struct kvm_mmu_page *sp)
+{
+	/*
+	 * Skip write-flooding detected for the sp whose level is 1, because
+	 * it can become unsync, then the guest page is not write-protected.
+	 */
+	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+		return false;
+
+	atomic_inc(&sp->write_flooding_count);
+	return atomic_read(&sp->write_flooding_count) >= 3;
+}
+
+/*
+ * Misaligned accesses are too much trouble to fix up; also, they usually
+ * indicate a page is not used as a page table.
+ */
+static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
+				    int bytes)
+{
+	unsigned offset, pte_size, misaligned;
+
+	pgprintk("misaligned: gpa %llx bytes %d role %x\n",
+		 gpa, bytes, sp->role.word);
+
+	offset = offset_in_page(gpa);
+	pte_size = sp->role.gpte_is_8_bytes ? 8 : 4;
+
+	/*
+	 * Sometimes, the OS only writes the last one bytes to update status
+	 * bits, for example, in linux, andb instruction is used in clear_bit().
+	 */
+	if (!(offset & (pte_size - 1)) && bytes == 1)
+		return false;
+
+	misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
+	misaligned |= bytes < 4;
+
+	return misaligned;
+}
+
+static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
+{
+	unsigned page_offset, quadrant;
+	u64 *spte;
+	int level;
+
+	page_offset = offset_in_page(gpa);
+	level = sp->role.level;
+	*nspte = 1;
+	if (!sp->role.gpte_is_8_bytes) {
+		page_offset <<= 1;	/* 32->64 */
+		/*
+		 * A 32-bit pde maps 4MB while the shadow pdes map
+		 * only 2MB.  So we need to double the offset again
+		 * and zap two pdes instead of one.
+		 */
+		if (level == PT32_ROOT_LEVEL) {
+			page_offset &= ~7; /* kill rounding error */
+			page_offset <<= 1;
+			*nspte = 2;
+		}
+		quadrant = page_offset >> PAGE_SHIFT;
+		page_offset &= ~PAGE_MASK;
+		if (quadrant != sp->role.quadrant)
+			return NULL;
+	}
+
+	spte = &sp->spt[page_offset / sizeof(*spte)];
+	return spte;
+}
+
+static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+			      const u8 *new, int bytes,
+			      struct kvm_page_track_notifier_node *node)
+{
+	gfn_t gfn = gpa >> PAGE_SHIFT;
+	struct kvm_mmu_page *sp;
+	LIST_HEAD(invalid_list);
+	u64 entry, gentry, *spte;
+	int npte;
+	bool remote_flush, local_flush;
+
+	/*
+	 * If we don't have indirect shadow pages, it means no page is
+	 * write-protected, so we can exit simply.
+	 */
+	if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
+		return;
+
+	remote_flush = local_flush = false;
+
+	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
+
+	/*
+	 * No need to care whether allocation memory is successful
+	 * or not since pte prefetch is skiped if it does not have
+	 * enough objects in the cache.
+	 */
+	mmu_topup_memory_caches(vcpu);
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+
+	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
+
+	++vcpu->kvm->stat.mmu_pte_write;
+	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
+
+	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
+		if (detect_write_misaligned(sp, gpa, bytes) ||
+		      detect_write_flooding(sp)) {
+			kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
+			++vcpu->kvm->stat.mmu_flooded;
+			continue;
+		}
+
+		spte = get_written_sptes(sp, gpa, &npte);
+		if (!spte)
+			continue;
+
+		local_flush = true;
+		while (npte--) {
+			u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
+
+			entry = *spte;
+			mmu_page_zap_pte(vcpu->kvm, sp, spte);
+			if (gentry &&
+			      !((sp->role.word ^ base_role)
+			      & mmu_base_role_mask.word) && rmap_can_add(vcpu))
+				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
+			if (need_remote_flush(entry, *spte))
+				remote_flush = true;
+			++spte;
+		}
+	}
+	kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
+	kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+}
+
+int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
+{
+	gpa_t gpa;
+	int r;
+
+	if (vcpu->arch.mmu->direct_map)
+		return 0;
+
+	gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
+
+	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
+
+static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
+{
+	LIST_HEAD(invalid_list);
+
+	if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
+		return 0;
+
+	while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
+		if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
+			break;
+
+		++vcpu->kvm->stat.mmu_recycled;
+	}
+	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+
+	if (!kvm_mmu_available_pages(vcpu->kvm))
+		return -ENOSPC;
+	return 0;
+}
+
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
+		       void *insn, int insn_len)
+{
+	int r, emulation_type = 0;
+	bool direct = vcpu->arch.mmu->direct_map;
+
+	/* With shadow page tables, fault_address contains a GVA or nGPA.  */
+	if (vcpu->arch.mmu->direct_map) {
+		vcpu->arch.gpa_available = true;
+		vcpu->arch.gpa_val = cr2;
+	}
+
+	r = RET_PF_INVALID;
+	if (unlikely(error_code & PFERR_RSVD_MASK)) {
+		r = handle_mmio_page_fault(vcpu, cr2, direct);
+		if (r == RET_PF_EMULATE)
+			goto emulate;
+	}
+
+	if (r == RET_PF_INVALID) {
+		r = vcpu->arch.mmu->page_fault(vcpu, cr2,
+					       lower_32_bits(error_code),
+					       false);
+		WARN_ON(r == RET_PF_INVALID);
+	}
+
+	if (r == RET_PF_RETRY)
+		return 1;
+	if (r < 0)
+		return r;
+
+	/*
+	 * Before emulating the instruction, check if the error code
+	 * was due to a RO violation while translating the guest page.
+	 * This can occur when using nested virtualization with nested
+	 * paging in both guests. If true, we simply unprotect the page
+	 * and resume the guest.
+	 */
+	if (vcpu->arch.mmu->direct_map &&
+	    (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
+		kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
+		return 1;
+	}
+
+	/*
+	 * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
+	 * optimistically try to just unprotect the page and let the processor
+	 * re-execute the instruction that caused the page fault.  Do not allow
+	 * retrying MMIO emulation, as it's not only pointless but could also
+	 * cause us to enter an infinite loop because the processor will keep
+	 * faulting on the non-existent MMIO address.  Retrying an instruction
+	 * from a nested guest is also pointless and dangerous as we are only
+	 * explicitly shadowing L1's page tables, i.e. unprotecting something
+	 * for L1 isn't going to magically fix whatever issue cause L2 to fail.
+	 */
+	if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
+		emulation_type = EMULTYPE_ALLOW_RETRY;
+emulate:
+	/*
+	 * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
+	 * This can happen if a guest gets a page-fault on data access but the HW
+	 * table walker is not able to read the instruction page (e.g instruction
+	 * page is not present in memory). In those cases we simply restart the
+	 * guest, with the exception of AMD Erratum 1096 which is unrecoverable.
+	 */
+	if (unlikely(insn && !insn_len)) {
+		if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu))
+			return 1;
+	}
+
+	return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
+				       insn_len);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
+
+void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+{
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	int i;
+
+	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
+	if (is_noncanonical_address(gva, vcpu))
+		return;
+
+	mmu->invlpg(vcpu, gva, mmu->root_hpa);
+
+	/*
+	 * INVLPG is required to invalidate any global mappings for the VA,
+	 * irrespective of PCID. Since it would take us roughly similar amount
+	 * of work to determine whether any of the prev_root mappings of the VA
+	 * is marked global, or to just sync it blindly, so we might as well
+	 * just always sync it.
+	 *
+	 * Mappings not reachable via the current cr3 or the prev_roots will be
+	 * synced when switching to that cr3, so nothing needs to be done here
+	 * for them.
+	 */
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		if (VALID_PAGE(mmu->prev_roots[i].hpa))
+			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+
+	kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+	++vcpu->stat.invlpg;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
+
+void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
+{
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	bool tlb_flush = false;
+	uint i;
+
+	if (pcid == kvm_get_active_pcid(vcpu)) {
+		mmu->invlpg(vcpu, gva, mmu->root_hpa);
+		tlb_flush = true;
+	}
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+		if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
+		    pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
+			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+			tlb_flush = true;
+		}
+	}
+
+	if (tlb_flush)
+		kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+
+	++vcpu->stat.invlpg;
+
+	/*
+	 * Mappings not reachable via the current cr3 or the prev_roots will be
+	 * synced when switching to that cr3, so nothing needs to be done here
+	 * for them.
+	 */
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
+
+void kvm_enable_tdp(void)
+{
+	tdp_enabled = true;
+}
+EXPORT_SYMBOL_GPL(kvm_enable_tdp);
+
+void kvm_disable_tdp(void)
+{
+	tdp_enabled = false;
+}
+EXPORT_SYMBOL_GPL(kvm_disable_tdp);
+
+
+/* The return value indicates if tlb flush on all vcpus is needed. */
+typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
+
+/* The caller should hold mmu-lock before calling this function. */
+static __always_inline bool
+slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			slot_level_handler fn, int start_level, int end_level,
+			gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb)
+{
+	struct slot_rmap_walk_iterator iterator;
+	bool flush = false;
+
+	for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
+			end_gfn, &iterator) {
+		if (iterator.rmap)
+			flush |= fn(kvm, iterator.rmap);
+
+		if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+			if (flush && lock_flush_tlb) {
+				kvm_flush_remote_tlbs_with_address(kvm,
+						start_gfn,
+						iterator.gfn - start_gfn + 1);
+				flush = false;
+			}
+			cond_resched_lock(&kvm->mmu_lock);
+		}
+	}
+
+	if (flush && lock_flush_tlb) {
+		kvm_flush_remote_tlbs_with_address(kvm, start_gfn,
+						   end_gfn - start_gfn + 1);
+		flush = false;
+	}
+
+	return flush;
+}
+
+static __always_inline bool
+slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+		  slot_level_handler fn, int start_level, int end_level,
+		  bool lock_flush_tlb)
+{
+	return slot_handle_level_range(kvm, memslot, fn, start_level,
+			end_level, memslot->base_gfn,
+			memslot->base_gfn + memslot->npages - 1,
+			lock_flush_tlb);
+}
+
+static __always_inline bool
+slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+		      slot_level_handler fn, bool lock_flush_tlb)
+{
+	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
+				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+}
+
+static __always_inline bool
+slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			slot_level_handler fn, bool lock_flush_tlb)
+{
+	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1,
+				 PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
+}
+
+static __always_inline bool
+slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
+		 slot_level_handler fn, bool lock_flush_tlb)
+{
+	return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL,
+				 PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
+}
+
+static void free_mmu_pages(struct kvm_mmu *mmu)
+{
+	free_page((unsigned long)mmu->pae_root);
+	free_page((unsigned long)mmu->lm_root);
+}
+
+static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+{
+	struct page *page;
+	int i;
+
+	/*
+	 * When using PAE paging, the four PDPTEs are treated as 'root' pages,
+	 * while the PDP table is a per-vCPU construct that's allocated at MMU
+	 * creation.  When emulating 32-bit mode, cr3 is only 32 bits even on
+	 * x86_64.  Therefore we need to allocate the PDP table in the first
+	 * 4GB of memory, which happens to fit the DMA32 zone.  Except for
+	 * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can
+	 * skip allocating the PDP table.
+	 */
+	if (tdp_enabled && kvm_x86_ops->get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
+		return 0;
+
+	page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
+	if (!page)
+		return -ENOMEM;
+
+	mmu->pae_root = page_address(page);
+	for (i = 0; i < 4; ++i)
+		mmu->pae_root[i] = INVALID_PAGE;
+
+	return 0;
+}
+
+int kvm_mmu_create(struct kvm_vcpu *vcpu)
+{
+	uint i;
+	int ret;
+
+	vcpu->arch.mmu = &vcpu->arch.root_mmu;
+	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
+
+	vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.root_mmu.root_cr3 = 0;
+	vcpu->arch.root_mmu.translate_gpa = translate_gpa;
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+
+	vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.guest_mmu.root_cr3 = 0;
+	vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+
+	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
+
+	ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu);
+	if (ret)
+		return ret;
+
+	ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu);
+	if (ret)
+		goto fail_allocate_root;
+
+	return ret;
+ fail_allocate_root:
+	free_mmu_pages(&vcpu->arch.guest_mmu);
+	return ret;
+}
+
+#define BATCH_ZAP_PAGES	10
+static void kvm_zap_obsolete_pages(struct kvm *kvm)
+{
+	struct kvm_mmu_page *sp, *node;
+	int nr_zapped, batch = 0;
+
+restart:
+	list_for_each_entry_safe_reverse(sp, node,
+	      &kvm->arch.active_mmu_pages, link) {
+		/*
+		 * No obsolete valid page exists before a newly created page
+		 * since active_mmu_pages is a FIFO list.
+		 */
+		if (!is_obsolete_sp(kvm, sp))
+			break;
+
+		/*
+		 * Skip invalid pages with a non-zero root count, zapping pages
+		 * with a non-zero root count will never succeed, i.e. the page
+		 * will get thrown back on active_mmu_pages and we'll get stuck
+		 * in an infinite loop.
+		 */
+		if (sp->role.invalid && sp->root_count)
+			continue;
+
+		/*
+		 * No need to flush the TLB since we're only zapping shadow
+		 * pages with an obsolete generation number and all vCPUS have
+		 * loaded a new root, i.e. the shadow pages being zapped cannot
+		 * be in active use by the guest.
+		 */
+		if (batch >= BATCH_ZAP_PAGES &&
+		    cond_resched_lock(&kvm->mmu_lock)) {
+			batch = 0;
+			goto restart;
+		}
+
+		if (__kvm_mmu_prepare_zap_page(kvm, sp,
+				&kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
+			batch += nr_zapped;
+			goto restart;
+		}
+	}
+
+	/*
+	 * Trigger a remote TLB flush before freeing the page tables to ensure
+	 * KVM is not in the middle of a lockless shadow page table walk, which
+	 * may reference the pages.
+	 */
+	kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
+}
+
+/*
+ * Fast invalidate all shadow pages and use lock-break technique
+ * to zap obsolete pages.
+ *
+ * It's required when memslot is being deleted or VM is being
+ * destroyed, in these cases, we should ensure that KVM MMU does
+ * not use any resource of the being-deleted slot or all slots
+ * after calling the function.
+ */
+static void kvm_mmu_zap_all_fast(struct kvm *kvm)
+{
+	lockdep_assert_held(&kvm->slots_lock);
+
+	spin_lock(&kvm->mmu_lock);
+	trace_kvm_mmu_zap_all_fast(kvm);
+
+	/*
+	 * Toggle mmu_valid_gen between '0' and '1'.  Because slots_lock is
+	 * held for the entire duration of zapping obsolete pages, it's
+	 * impossible for there to be multiple invalid generations associated
+	 * with *valid* shadow pages at any given time, i.e. there is exactly
+	 * one valid generation and (at most) one invalid generation.
+	 */
+	kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1;
+
+	/*
+	 * Notify all vcpus to reload its shadow page table and flush TLB.
+	 * Then all vcpus will switch to new shadow page table with the new
+	 * mmu_valid_gen.
+	 *
+	 * Note: we need to do this under the protection of mmu_lock,
+	 * otherwise, vcpu would purge shadow page but miss tlb flush.
+	 */
+	kvm_reload_remote_mmus(kvm);
+
+	kvm_zap_obsolete_pages(kvm);
+	spin_unlock(&kvm->mmu_lock);
+}
+
+static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
+{
+	return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
+}
+
+static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
+			struct kvm_memory_slot *slot,
+			struct kvm_page_track_notifier_node *node)
+{
+	kvm_mmu_zap_all_fast(kvm);
+}
+
+void kvm_mmu_init_vm(struct kvm *kvm)
+{
+	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+
+	node->track_write = kvm_mmu_pte_write;
+	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
+	kvm_page_track_register_notifier(kvm, node);
+}
+
+void kvm_mmu_uninit_vm(struct kvm *kvm)
+{
+	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+
+	kvm_page_track_unregister_notifier(kvm, node);
+}
+
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int i;
+
+	spin_lock(&kvm->mmu_lock);
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		kvm_for_each_memslot(memslot, slots) {
+			gfn_t start, end;
+
+			start = max(gfn_start, memslot->base_gfn);
+			end = min(gfn_end, memslot->base_gfn + memslot->npages);
+			if (start >= end)
+				continue;
+
+			slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
+						PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
+						start, end - 1, true);
+		}
+	}
+
+	spin_unlock(&kvm->mmu_lock);
+}
+
+static bool slot_rmap_write_protect(struct kvm *kvm,
+				    struct kvm_rmap_head *rmap_head)
+{
+	return __rmap_write_protect(kvm, rmap_head, false);
+}
+
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
+				      struct kvm_memory_slot *memslot)
+{
+	bool flush;
+
+	spin_lock(&kvm->mmu_lock);
+	flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
+				      false);
+	spin_unlock(&kvm->mmu_lock);
+
+	/*
+	 * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log()
+	 * which do tlb flush out of mmu-lock should be serialized by
+	 * kvm->slots_lock otherwise tlb flush would be missed.
+	 */
+	lockdep_assert_held(&kvm->slots_lock);
+
+	/*
+	 * We can flush all the TLBs out of the mmu lock without TLB
+	 * corruption since we just change the spte from writable to
+	 * readonly so that we only need to care the case of changing
+	 * spte from present to present (changing the spte from present
+	 * to nonpresent will flush all the TLBs immediately), in other
+	 * words, the only case we care is mmu_spte_update() where we
+	 * have checked SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE
+	 * instead of PT_WRITABLE_MASK, that means it does not depend
+	 * on PT_WRITABLE_MASK anymore.
+	 */
+	if (flush)
+		kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
+			memslot->npages);
+}
+
+static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+					 struct kvm_rmap_head *rmap_head)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	int need_tlb_flush = 0;
+	kvm_pfn_t pfn;
+	struct kvm_mmu_page *sp;
+
+restart:
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
+		sp = page_header(__pa(sptep));
+		pfn = spte_to_pfn(*sptep);
+
+		/*
+		 * We cannot do huge page mapping for indirect shadow pages,
+		 * which are found on the last rmap (level = 1) when not using
+		 * tdp; such shadow pages are synced with the page table in
+		 * the guest, and the guest page table is using 4K page size
+		 * mapping if the indirect sp has level = 1.
+		 */
+		if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+		    !kvm_is_zone_device_pfn(pfn) &&
+		    PageTransCompoundMap(pfn_to_page(pfn))) {
+			pte_list_remove(rmap_head, sptep);
+
+			if (kvm_available_flush_tlb_with_range())
+				kvm_flush_remote_tlbs_with_address(kvm, sp->gfn,
+					KVM_PAGES_PER_HPAGE(sp->role.level));
+			else
+				need_tlb_flush = 1;
+
+			goto restart;
+		}
+	}
+
+	return need_tlb_flush;
+}
+
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+				   const struct kvm_memory_slot *memslot)
+{
+	/* FIXME: const-ify all uses of struct kvm_memory_slot.  */
+	spin_lock(&kvm->mmu_lock);
+	slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
+			 kvm_mmu_zap_collapsible_spte, true);
+	spin_unlock(&kvm->mmu_lock);
+}
+
+void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot)
+{
+	bool flush;
+
+	spin_lock(&kvm->mmu_lock);
+	flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
+	spin_unlock(&kvm->mmu_lock);
+
+	lockdep_assert_held(&kvm->slots_lock);
+
+	/*
+	 * It's also safe to flush TLBs out of mmu lock here as currently this
+	 * function is only used for dirty logging, in which case flushing TLB
+	 * out of mmu lock also guarantees no dirty pages will be lost in
+	 * dirty_bitmap.
+	 */
+	if (flush)
+		kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
+				memslot->npages);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
+
+void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
+					struct kvm_memory_slot *memslot)
+{
+	bool flush;
+
+	spin_lock(&kvm->mmu_lock);
+	flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
+					false);
+	spin_unlock(&kvm->mmu_lock);
+
+	/* see kvm_mmu_slot_remove_write_access */
+	lockdep_assert_held(&kvm->slots_lock);
+
+	if (flush)
+		kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
+				memslot->npages);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
+
+void kvm_mmu_slot_set_dirty(struct kvm *kvm,
+			    struct kvm_memory_slot *memslot)
+{
+	bool flush;
+
+	spin_lock(&kvm->mmu_lock);
+	flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
+	spin_unlock(&kvm->mmu_lock);
+
+	lockdep_assert_held(&kvm->slots_lock);
+
+	/* see kvm_mmu_slot_leaf_clear_dirty */
+	if (flush)
+		kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
+				memslot->npages);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
+
+void kvm_mmu_zap_all(struct kvm *kvm)
+{
+	struct kvm_mmu_page *sp, *node;
+	LIST_HEAD(invalid_list);
+	int ign;
+
+	spin_lock(&kvm->mmu_lock);
+restart:
+	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
+		if (sp->role.invalid && sp->root_count)
+			continue;
+		if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
+			goto restart;
+		if (cond_resched_lock(&kvm->mmu_lock))
+			goto restart;
+	}
+
+	kvm_mmu_commit_zap_page(kvm, &invalid_list);
+	spin_unlock(&kvm->mmu_lock);
+}
+
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
+{
+	WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
+
+	gen &= MMIO_SPTE_GEN_MASK;
+
+	/*
+	 * Generation numbers are incremented in multiples of the number of
+	 * address spaces in order to provide unique generations across all
+	 * address spaces.  Strip what is effectively the address space
+	 * modifier prior to checking for a wrap of the MMIO generation so
+	 * that a wrap in any address space is detected.
+	 */
+	gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1);
+
+	/*
+	 * The very rare case: if the MMIO generation number has wrapped,
+	 * zap all shadow pages.
+	 */
+	if (unlikely(gen == 0)) {
+		kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
+		kvm_mmu_zap_all_fast(kvm);
+	}
+}
+
+static unsigned long
+mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	struct kvm *kvm;
+	int nr_to_scan = sc->nr_to_scan;
+	unsigned long freed = 0;
+
+	mutex_lock(&kvm_lock);
+
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		int idx;
+		LIST_HEAD(invalid_list);
+
+		/*
+		 * Never scan more than sc->nr_to_scan VM instances.
+		 * Will not hit this condition practically since we do not try
+		 * to shrink more than one VM and it is very unlikely to see
+		 * !n_used_mmu_pages so many times.
+		 */
+		if (!nr_to_scan--)
+			break;
+		/*
+		 * n_used_mmu_pages is accessed without holding kvm->mmu_lock
+		 * here. We may skip a VM instance errorneosly, but we do not
+		 * want to shrink a VM that only started to populate its MMU
+		 * anyway.
+		 */
+		if (!kvm->arch.n_used_mmu_pages &&
+		    !kvm_has_zapped_obsolete_pages(kvm))
+			continue;
+
+		idx = srcu_read_lock(&kvm->srcu);
+		spin_lock(&kvm->mmu_lock);
+
+		if (kvm_has_zapped_obsolete_pages(kvm)) {
+			kvm_mmu_commit_zap_page(kvm,
+			      &kvm->arch.zapped_obsolete_pages);
+			goto unlock;
+		}
+
+		if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
+			freed++;
+		kvm_mmu_commit_zap_page(kvm, &invalid_list);
+
+unlock:
+		spin_unlock(&kvm->mmu_lock);
+		srcu_read_unlock(&kvm->srcu, idx);
+
+		/*
+		 * unfair on small ones
+		 * per-vm shrinkers cry out
+		 * sadness comes quickly
+		 */
+		list_move_tail(&kvm->vm_list, &vm_list);
+		break;
+	}
+
+	mutex_unlock(&kvm_lock);
+	return freed;
+}
+
+static unsigned long
+mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
+}
+
+static struct shrinker mmu_shrinker = {
+	.count_objects = mmu_shrink_count,
+	.scan_objects = mmu_shrink_scan,
+	.seeks = DEFAULT_SEEKS * 10,
+};
+
+static void mmu_destroy_caches(void)
+{
+	kmem_cache_destroy(pte_list_desc_cache);
+	kmem_cache_destroy(mmu_page_header_cache);
+}
+
+static void kvm_set_mmio_spte_mask(void)
+{
+	u64 mask;
+
+	/*
+	 * Set the reserved bits and the present bit of an paging-structure
+	 * entry to generate page fault with PFER.RSV = 1.
+	 */
+
+	/*
+	 * Mask the uppermost physical address bit, which would be reserved as
+	 * long as the supported physical address width is less than 52.
+	 */
+	mask = 1ull << 51;
+
+	/* Set the present bit. */
+	mask |= 1ull;
+
+	/*
+	 * If reserved bit is not supported, clear the present bit to disable
+	 * mmio page fault.
+	 */
+	if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
+		mask &= ~1ull;
+
+	kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
+}
+
+static bool get_nx_auto_mode(void)
+{
+	/* Return true when CPU has the bug, and mitigations are ON */
+	return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
+}
+
+static void __set_nx_huge_pages(bool val)
+{
+	nx_huge_pages = itlb_multihit_kvm_mitigation = val;
+}
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
+{
+	bool old_val = nx_huge_pages;
+	bool new_val;
+
+	/* In "auto" mode deploy workaround only if CPU has the bug. */
+	if (sysfs_streq(val, "off"))
+		new_val = 0;
+	else if (sysfs_streq(val, "force"))
+		new_val = 1;
+	else if (sysfs_streq(val, "auto"))
+		new_val = get_nx_auto_mode();
+	else if (strtobool(val, &new_val) < 0)
+		return -EINVAL;
+
+	__set_nx_huge_pages(new_val);
+
+	if (new_val != old_val) {
+		struct kvm *kvm;
+
+		mutex_lock(&kvm_lock);
+
+		list_for_each_entry(kvm, &vm_list, vm_list) {
+			mutex_lock(&kvm->slots_lock);
+			kvm_mmu_zap_all_fast(kvm);
+			mutex_unlock(&kvm->slots_lock);
+
+			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+		}
+		mutex_unlock(&kvm_lock);
+	}
+
+	return 0;
+}
+
+int kvm_mmu_module_init(void)
+{
+	int ret = -ENOMEM;
+
+	if (nx_huge_pages == -1)
+		__set_nx_huge_pages(get_nx_auto_mode());
+
+	/*
+	 * MMU roles use union aliasing which is, generally speaking, an
+	 * undefined behavior. However, we supposedly know how compilers behave
+	 * and the current status quo is unlikely to change. Guardians below are
+	 * supposed to let us know if the assumption becomes false.
+	 */
+	BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32));
+	BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32));
+	BUILD_BUG_ON(sizeof(union kvm_mmu_role) != sizeof(u64));
+
+	kvm_mmu_reset_all_pte_masks();
+
+	kvm_set_mmio_spte_mask();
+
+	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
+					    sizeof(struct pte_list_desc),
+					    0, SLAB_ACCOUNT, NULL);
+	if (!pte_list_desc_cache)
+		goto out;
+
+	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
+						  sizeof(struct kvm_mmu_page),
+						  0, SLAB_ACCOUNT, NULL);
+	if (!mmu_page_header_cache)
+		goto out;
+
+	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
+		goto out;
+
+	ret = register_shrinker(&mmu_shrinker);
+	if (ret)
+		goto out;
+
+	return 0;
+
+out:
+	mmu_destroy_caches();
+	return ret;
+}
+
+/*
+ * Calculate mmu pages needed for kvm.
+ */
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
+{
+	unsigned long nr_mmu_pages;
+	unsigned long nr_pages = 0;
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int i;
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+
+		kvm_for_each_memslot(memslot, slots)
+			nr_pages += memslot->npages;
+	}
+
+	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
+	nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
+
+	return nr_mmu_pages;
+}
+
+void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_mmu_unload(vcpu);
+	free_mmu_pages(&vcpu->arch.root_mmu);
+	free_mmu_pages(&vcpu->arch.guest_mmu);
+	mmu_free_memory_caches(vcpu);
+}
+
+void kvm_mmu_module_exit(void)
+{
+	mmu_destroy_caches();
+	percpu_counter_destroy(&kvm_total_used_mmu_pages);
+	unregister_shrinker(&mmu_shrinker);
+	mmu_audit_disable();
+}
+
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
+{
+	unsigned int old_val;
+	int err;
+
+	old_val = nx_huge_pages_recovery_ratio;
+	err = param_set_uint(val, kp);
+	if (err)
+		return err;
+
+	if (READ_ONCE(nx_huge_pages) &&
+	    !old_val && nx_huge_pages_recovery_ratio) {
+		struct kvm *kvm;
+
+		mutex_lock(&kvm_lock);
+
+		list_for_each_entry(kvm, &vm_list, vm_list)
+			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+
+		mutex_unlock(&kvm_lock);
+	}
+
+	return err;
+}
+
+static void kvm_recover_nx_lpages(struct kvm *kvm)
+{
+	int rcu_idx;
+	struct kvm_mmu_page *sp;
+	unsigned int ratio;
+	LIST_HEAD(invalid_list);
+	ulong to_zap;
+
+	rcu_idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+	to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
+	while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
+		/*
+		 * We use a separate list instead of just using active_mmu_pages
+		 * because the number of lpage_disallowed pages is expected to
+		 * be relatively small compared to the total.
+		 */
+		sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
+				      struct kvm_mmu_page,
+				      lpage_disallowed_link);
+		WARN_ON_ONCE(!sp->lpage_disallowed);
+		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+		WARN_ON_ONCE(sp->lpage_disallowed);
+
+		if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+			kvm_mmu_commit_zap_page(kvm, &invalid_list);
+			if (to_zap)
+				cond_resched_lock(&kvm->mmu_lock);
+		}
+	}
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, rcu_idx);
+}
+
+static long get_nx_lpage_recovery_timeout(u64 start_time)
+{
+	return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
+		? start_time + 60 * HZ - get_jiffies_64()
+		: MAX_SCHEDULE_TIMEOUT;
+}
+
+static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
+{
+	u64 start_time;
+	long remaining_time;
+
+	while (true) {
+		start_time = get_jiffies_64();
+		remaining_time = get_nx_lpage_recovery_timeout(start_time);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		while (!kthread_should_stop() && remaining_time > 0) {
+			schedule_timeout(remaining_time);
+			remaining_time = get_nx_lpage_recovery_timeout(start_time);
+			set_current_state(TASK_INTERRUPTIBLE);
+		}
+
+		set_current_state(TASK_RUNNING);
+
+		if (kthread_should_stop())
+			return 0;
+
+		kvm_recover_nx_lpages(kvm);
+	}
+}
+
+int kvm_mmu_post_init_vm(struct kvm *kvm)
+{
+	int err;
+
+	err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
+					  "kvm-nx-lpage-recovery",
+					  &kvm->arch.nx_lpage_recovery_thread);
+	if (!err)
+		kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
+
+	return err;
+}
+
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
+{
+	if (kvm->arch.nx_lpage_recovery_thread)
+		kthread_stop(kvm->arch.nx_lpage_recovery_thread);
+}
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
new file mode 100644
index 000000000000..3521e2d176f2
--- /dev/null
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support KVM gust page tracking
+ *
+ * This feature allows us to track page access in guest. Currently, only
+ * write access is tracked.
+ *
+ * Copyright(C) 2015 Intel Corporation.
+ *
+ * Author:
+ *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/rculist.h>
+
+#include <asm/kvm_host.h>
+#include <asm/kvm_page_track.h>
+
+#include "mmu.h"
+
+void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
+				 struct kvm_memory_slot *dont)
+{
+	int i;
+
+	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++)
+		if (!dont || free->arch.gfn_track[i] !=
+		      dont->arch.gfn_track[i]) {
+			kvfree(free->arch.gfn_track[i]);
+			free->arch.gfn_track[i] = NULL;
+		}
+}
+
+int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+				  unsigned long npages)
+{
+	int  i;
+
+	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
+		slot->arch.gfn_track[i] =
+			kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
+				 GFP_KERNEL_ACCOUNT);
+		if (!slot->arch.gfn_track[i])
+			goto track_free;
+	}
+
+	return 0;
+
+track_free:
+	kvm_page_track_free_memslot(slot, NULL);
+	return -ENOMEM;
+}
+
+static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
+{
+	if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
+		return false;
+
+	return true;
+}
+
+static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
+			     enum kvm_page_track_mode mode, short count)
+{
+	int index, val;
+
+	index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
+
+	val = slot->arch.gfn_track[mode][index];
+
+	if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
+		return;
+
+	slot->arch.gfn_track[mode][index] += count;
+}
+
+/*
+ * add guest page to the tracking pool so that corresponding access on that
+ * page will be intercepted.
+ *
+ * It should be called under the protection both of mmu-lock and kvm->srcu
+ * or kvm->slots_lock.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @slot: the @gfn belongs to.
+ * @gfn: the guest page.
+ * @mode: tracking mode, currently only write track is supported.
+ */
+void kvm_slot_page_track_add_page(struct kvm *kvm,
+				  struct kvm_memory_slot *slot, gfn_t gfn,
+				  enum kvm_page_track_mode mode)
+{
+
+	if (WARN_ON(!page_track_mode_is_valid(mode)))
+		return;
+
+	update_gfn_track(slot, gfn, mode, 1);
+
+	/*
+	 * new track stops large page mapping for the
+	 * tracked page.
+	 */
+	kvm_mmu_gfn_disallow_lpage(slot, gfn);
+
+	if (mode == KVM_PAGE_TRACK_WRITE)
+		if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
+			kvm_flush_remote_tlbs(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
+
+/*
+ * remove the guest page from the tracking pool which stops the interception
+ * of corresponding access on that page. It is the opposed operation of
+ * kvm_slot_page_track_add_page().
+ *
+ * It should be called under the protection both of mmu-lock and kvm->srcu
+ * or kvm->slots_lock.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @slot: the @gfn belongs to.
+ * @gfn: the guest page.
+ * @mode: tracking mode, currently only write track is supported.
+ */
+void kvm_slot_page_track_remove_page(struct kvm *kvm,
+				     struct kvm_memory_slot *slot, gfn_t gfn,
+				     enum kvm_page_track_mode mode)
+{
+	if (WARN_ON(!page_track_mode_is_valid(mode)))
+		return;
+
+	update_gfn_track(slot, gfn, mode, -1);
+
+	/*
+	 * allow large page mapping for the tracked page
+	 * after the tracker is gone.
+	 */
+	kvm_mmu_gfn_allow_lpage(slot, gfn);
+}
+EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
+
+/*
+ * check if the corresponding access on the specified guest page is tracked.
+ */
+bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
+			      enum kvm_page_track_mode mode)
+{
+	struct kvm_memory_slot *slot;
+	int index;
+
+	if (WARN_ON(!page_track_mode_is_valid(mode)))
+		return false;
+
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	if (!slot)
+		return false;
+
+	index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
+	return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
+}
+
+void kvm_page_track_cleanup(struct kvm *kvm)
+{
+	struct kvm_page_track_notifier_head *head;
+
+	head = &kvm->arch.track_notifier_head;
+	cleanup_srcu_struct(&head->track_srcu);
+}
+
+void kvm_page_track_init(struct kvm *kvm)
+{
+	struct kvm_page_track_notifier_head *head;
+
+	head = &kvm->arch.track_notifier_head;
+	init_srcu_struct(&head->track_srcu);
+	INIT_HLIST_HEAD(&head->track_notifier_list);
+}
+
+/*
+ * register the notifier so that event interception for the tracked guest
+ * pages can be received.
+ */
+void
+kvm_page_track_register_notifier(struct kvm *kvm,
+				 struct kvm_page_track_notifier_node *n)
+{
+	struct kvm_page_track_notifier_head *head;
+
+	head = &kvm->arch.track_notifier_head;
+
+	spin_lock(&kvm->mmu_lock);
+	hlist_add_head_rcu(&n->node, &head->track_notifier_list);
+	spin_unlock(&kvm->mmu_lock);
+}
+EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
+
+/*
+ * stop receiving the event interception. It is the opposed operation of
+ * kvm_page_track_register_notifier().
+ */
+void
+kvm_page_track_unregister_notifier(struct kvm *kvm,
+				   struct kvm_page_track_notifier_node *n)
+{
+	struct kvm_page_track_notifier_head *head;
+
+	head = &kvm->arch.track_notifier_head;
+
+	spin_lock(&kvm->mmu_lock);
+	hlist_del_rcu(&n->node);
+	spin_unlock(&kvm->mmu_lock);
+	synchronize_srcu(&head->track_srcu);
+}
+EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
+
+/*
+ * Notify the node that write access is intercepted and write emulation is
+ * finished at this time.
+ *
+ * The node should figure out if the written page is the one that node is
+ * interested in by itself.
+ */
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+			  int bytes)
+{
+	struct kvm_page_track_notifier_head *head;
+	struct kvm_page_track_notifier_node *n;
+	int idx;
+
+	head = &vcpu->kvm->arch.track_notifier_head;
+
+	if (hlist_empty(&head->track_notifier_list))
+		return;
+
+	idx = srcu_read_lock(&head->track_srcu);
+	hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+		if (n->track_write)
+			n->track_write(vcpu, gpa, new, bytes, n);
+	srcu_read_unlock(&head->track_srcu, idx);
+}
+
+/*
+ * Notify the node that memory slot is being removed or moved so that it can
+ * drop write-protection for the pages in the memory slot.
+ *
+ * The node should figure out it has any write-protected pages in this slot
+ * by itself.
+ */
+void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	struct kvm_page_track_notifier_head *head;
+	struct kvm_page_track_notifier_node *n;
+	int idx;
+
+	head = &kvm->arch.track_notifier_head;
+
+	if (hlist_empty(&head->track_notifier_list))
+		return;
+
+	idx = srcu_read_lock(&head->track_srcu);
+	hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+		if (n->track_flush_slot)
+			n->track_flush_slot(kvm, slot, n);
+	srcu_read_unlock(&head->track_srcu, idx);
+}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
new file mode 100644
index 000000000000..97b21e7fd013
--- /dev/null
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -0,0 +1,1090 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * MMU support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * Authors:
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Avi Kivity   <avi@qumranet.com>
+ */
+
+/*
+ * We need the mmu code to access both 32-bit and 64-bit guest ptes,
+ * so the code in this file is compiled twice, once per pte size.
+ */
+
+#if PTTYPE == 64
+	#define pt_element_t u64
+	#define guest_walker guest_walker64
+	#define FNAME(name) paging##64_##name
+	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
+	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
+	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
+	#define PT_LEVEL_BITS PT64_LEVEL_BITS
+	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
+	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
+	#ifdef CONFIG_X86_64
+	#define PT_MAX_FULL_LEVELS 4
+	#define CMPXCHG cmpxchg
+	#else
+	#define CMPXCHG cmpxchg64
+	#define PT_MAX_FULL_LEVELS 2
+	#endif
+#elif PTTYPE == 32
+	#define pt_element_t u32
+	#define guest_walker guest_walker32
+	#define FNAME(name) paging##32_##name
+	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
+	#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
+	#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
+	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
+	#define PT_LEVEL_BITS PT32_LEVEL_BITS
+	#define PT_MAX_FULL_LEVELS 2
+	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
+	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
+	#define CMPXCHG cmpxchg
+#elif PTTYPE == PTTYPE_EPT
+	#define pt_element_t u64
+	#define guest_walker guest_walkerEPT
+	#define FNAME(name) ept_##name
+	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
+	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
+	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
+	#define PT_LEVEL_BITS PT64_LEVEL_BITS
+	#define PT_GUEST_DIRTY_SHIFT 9
+	#define PT_GUEST_ACCESSED_SHIFT 8
+	#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
+	#define CMPXCHG cmpxchg64
+	#define PT_MAX_FULL_LEVELS 4
+#else
+	#error Invalid PTTYPE value
+#endif
+
+#define PT_GUEST_DIRTY_MASK    (1 << PT_GUEST_DIRTY_SHIFT)
+#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT)
+
+#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
+#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
+
+/*
+ * The guest_walker structure emulates the behavior of the hardware page
+ * table walker.
+ */
+struct guest_walker {
+	int level;
+	unsigned max_level;
+	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
+	pt_element_t ptes[PT_MAX_FULL_LEVELS];
+	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
+	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
+	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
+	bool pte_writable[PT_MAX_FULL_LEVELS];
+	unsigned pt_access;
+	unsigned pte_access;
+	gfn_t gfn;
+	struct x86_exception fault;
+};
+
+static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
+{
+	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
+}
+
+static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
+					     unsigned gpte)
+{
+	unsigned mask;
+
+	/* dirty bit is not supported, so no need to track it */
+	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
+		return;
+
+	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
+
+	mask = (unsigned)~ACC_WRITE_MASK;
+	/* Allow write access to dirty gptes */
+	mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) &
+		PT_WRITABLE_MASK;
+	*access &= mask;
+}
+
+static inline int FNAME(is_present_gpte)(unsigned long pte)
+{
+#if PTTYPE != PTTYPE_EPT
+	return pte & PT_PRESENT_MASK;
+#else
+	return pte & 7;
+#endif
+}
+
+static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+			       pt_element_t __user *ptep_user, unsigned index,
+			       pt_element_t orig_pte, pt_element_t new_pte)
+{
+	int npages;
+	pt_element_t ret;
+	pt_element_t *table;
+	struct page *page;
+
+	npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
+	if (likely(npages == 1)) {
+		table = kmap_atomic(page);
+		ret = CMPXCHG(&table[index], orig_pte, new_pte);
+		kunmap_atomic(table);
+
+		kvm_release_page_dirty(page);
+	} else {
+		struct vm_area_struct *vma;
+		unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
+		unsigned long pfn;
+		unsigned long paddr;
+
+		down_read(&current->mm->mmap_sem);
+		vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
+		if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
+			up_read(&current->mm->mmap_sem);
+			return -EFAULT;
+		}
+		pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+		paddr = pfn << PAGE_SHIFT;
+		table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
+		if (!table) {
+			up_read(&current->mm->mmap_sem);
+			return -EFAULT;
+		}
+		ret = CMPXCHG(&table[index], orig_pte, new_pte);
+		memunmap(table);
+		up_read(&current->mm->mmap_sem);
+	}
+
+	return (ret != orig_pte);
+}
+
+static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
+				  struct kvm_mmu_page *sp, u64 *spte,
+				  u64 gpte)
+{
+	if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+		goto no_present;
+
+	if (!FNAME(is_present_gpte)(gpte))
+		goto no_present;
+
+	/* if accessed bit is not supported prefetch non accessed gpte */
+	if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) &&
+	    !(gpte & PT_GUEST_ACCESSED_MASK))
+		goto no_present;
+
+	return false;
+
+no_present:
+	drop_spte(vcpu->kvm, spte);
+	return true;
+}
+
+/*
+ * For PTTYPE_EPT, a page table can be executable but not readable
+ * on supported processors. Therefore, set_spte does not automatically
+ * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
+ * to signify readability since it isn't used in the EPT case
+ */
+static inline unsigned FNAME(gpte_access)(u64 gpte)
+{
+	unsigned access;
+#if PTTYPE == PTTYPE_EPT
+	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
+		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
+		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
+#else
+	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
+	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
+	access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
+	/* Combine NX with P (which is set here) to get ACC_EXEC_MASK.  */
+	access ^= (gpte >> PT64_NX_SHIFT);
+#endif
+
+	return access;
+}
+
+static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
+					     struct kvm_mmu *mmu,
+					     struct guest_walker *walker,
+					     int write_fault)
+{
+	unsigned level, index;
+	pt_element_t pte, orig_pte;
+	pt_element_t __user *ptep_user;
+	gfn_t table_gfn;
+	int ret;
+
+	/* dirty/accessed bits are not supported, so no need to update them */
+	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
+		return 0;
+
+	for (level = walker->max_level; level >= walker->level; --level) {
+		pte = orig_pte = walker->ptes[level - 1];
+		table_gfn = walker->table_gfn[level - 1];
+		ptep_user = walker->ptep_user[level - 1];
+		index = offset_in_page(ptep_user) / sizeof(pt_element_t);
+		if (!(pte & PT_GUEST_ACCESSED_MASK)) {
+			trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
+			pte |= PT_GUEST_ACCESSED_MASK;
+		}
+		if (level == walker->level && write_fault &&
+				!(pte & PT_GUEST_DIRTY_MASK)) {
+			trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
+#if PTTYPE == PTTYPE_EPT
+			if (kvm_arch_write_log_dirty(vcpu))
+				return -EINVAL;
+#endif
+			pte |= PT_GUEST_DIRTY_MASK;
+		}
+		if (pte == orig_pte)
+			continue;
+
+		/*
+		 * If the slot is read-only, simply do not process the accessed
+		 * and dirty bits.  This is the correct thing to do if the slot
+		 * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
+		 * are only supported if the accessed and dirty bits are already
+		 * set in the ROM (so that MMIO writes are never needed).
+		 *
+		 * Note that NPT does not allow this at all and faults, since
+		 * it always wants nested page table entries for the guest
+		 * page tables to be writable.  And EPT works but will simply
+		 * overwrite the read-only memory to set the accessed and dirty
+		 * bits.
+		 */
+		if (unlikely(!walker->pte_writable[level - 1]))
+			continue;
+
+		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
+		if (ret)
+			return ret;
+
+		kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
+		walker->ptes[level - 1] = pte;
+	}
+	return 0;
+}
+
+static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
+{
+	unsigned pkeys = 0;
+#if PTTYPE == 64
+	pte_t pte = {.pte = gpte};
+
+	pkeys = pte_flags_pkey(pte_flags(pte));
+#endif
+	return pkeys;
+}
+
+/*
+ * Fetch a guest pte for a guest virtual address
+ */
+static int FNAME(walk_addr_generic)(struct guest_walker *walker,
+				    struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+				    gva_t addr, u32 access)
+{
+	int ret;
+	pt_element_t pte;
+	pt_element_t __user *uninitialized_var(ptep_user);
+	gfn_t table_gfn;
+	u64 pt_access, pte_access;
+	unsigned index, accessed_dirty, pte_pkey;
+	unsigned nested_access;
+	gpa_t pte_gpa;
+	bool have_ad;
+	int offset;
+	u64 walk_nx_mask = 0;
+	const int write_fault = access & PFERR_WRITE_MASK;
+	const int user_fault  = access & PFERR_USER_MASK;
+	const int fetch_fault = access & PFERR_FETCH_MASK;
+	u16 errcode = 0;
+	gpa_t real_gpa;
+	gfn_t gfn;
+
+	trace_kvm_mmu_pagetable_walk(addr, access);
+retry_walk:
+	walker->level = mmu->root_level;
+	pte           = mmu->get_cr3(vcpu);
+	have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
+
+#if PTTYPE == 64
+	walk_nx_mask = 1ULL << PT64_NX_SHIFT;
+	if (walker->level == PT32E_ROOT_LEVEL) {
+		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
+		trace_kvm_mmu_paging_element(pte, walker->level);
+		if (!FNAME(is_present_gpte)(pte))
+			goto error;
+		--walker->level;
+	}
+#endif
+	walker->max_level = walker->level;
+	ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
+
+	/*
+	 * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
+	 * by the MOV to CR instruction are treated as reads and do not cause the
+	 * processor to set the dirty flag in any EPT paging-structure entry.
+	 */
+	nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
+
+	pte_access = ~0;
+	++walker->level;
+
+	do {
+		gfn_t real_gfn;
+		unsigned long host_addr;
+
+		pt_access = pte_access;
+		--walker->level;
+
+		index = PT_INDEX(addr, walker->level);
+		table_gfn = gpte_to_gfn(pte);
+		offset    = index * sizeof(pt_element_t);
+		pte_gpa   = gfn_to_gpa(table_gfn) + offset;
+
+		BUG_ON(walker->level < 1);
+		walker->table_gfn[walker->level - 1] = table_gfn;
+		walker->pte_gpa[walker->level - 1] = pte_gpa;
+
+		real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
+					      nested_access,
+					      &walker->fault);
+
+		/*
+		 * FIXME: This can happen if emulation (for of an INS/OUTS
+		 * instruction) triggers a nested page fault.  The exit
+		 * qualification / exit info field will incorrectly have
+		 * "guest page access" as the nested page fault's cause,
+		 * instead of "guest page structure access".  To fix this,
+		 * the x86_exception struct should be augmented with enough
+		 * information to fix the exit_qualification or exit_info_1
+		 * fields.
+		 */
+		if (unlikely(real_gfn == UNMAPPED_GVA))
+			return 0;
+
+		real_gfn = gpa_to_gfn(real_gfn);
+
+		host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, real_gfn,
+					    &walker->pte_writable[walker->level - 1]);
+		if (unlikely(kvm_is_error_hva(host_addr)))
+			goto error;
+
+		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
+		if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
+			goto error;
+		walker->ptep_user[walker->level - 1] = ptep_user;
+
+		trace_kvm_mmu_paging_element(pte, walker->level);
+
+		/*
+		 * Inverting the NX it lets us AND it like other
+		 * permission bits.
+		 */
+		pte_access = pt_access & (pte ^ walk_nx_mask);
+
+		if (unlikely(!FNAME(is_present_gpte)(pte)))
+			goto error;
+
+		if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
+			errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
+			goto error;
+		}
+
+		walker->ptes[walker->level - 1] = pte;
+	} while (!is_last_gpte(mmu, walker->level, pte));
+
+	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
+	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
+
+	/* Convert to ACC_*_MASK flags for struct guest_walker.  */
+	walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
+	walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
+	errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
+	if (unlikely(errcode))
+		goto error;
+
+	gfn = gpte_to_gfn_lvl(pte, walker->level);
+	gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
+
+	if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
+		gfn += pse36_gfn_delta(pte);
+
+	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
+	if (real_gpa == UNMAPPED_GVA)
+		return 0;
+
+	walker->gfn = real_gpa >> PAGE_SHIFT;
+
+	if (!write_fault)
+		FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
+	else
+		/*
+		 * On a write fault, fold the dirty bit into accessed_dirty.
+		 * For modes without A/D bits support accessed_dirty will be
+		 * always clear.
+		 */
+		accessed_dirty &= pte >>
+			(PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT);
+
+	if (unlikely(!accessed_dirty)) {
+		ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
+		if (unlikely(ret < 0))
+			goto error;
+		else if (ret)
+			goto retry_walk;
+	}
+
+	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
+		 __func__, (u64)pte, walker->pte_access, walker->pt_access);
+	return 1;
+
+error:
+	errcode |= write_fault | user_fault;
+	if (fetch_fault && (mmu->nx ||
+			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)))
+		errcode |= PFERR_FETCH_MASK;
+
+	walker->fault.vector = PF_VECTOR;
+	walker->fault.error_code_valid = true;
+	walker->fault.error_code = errcode;
+
+#if PTTYPE == PTTYPE_EPT
+	/*
+	 * Use PFERR_RSVD_MASK in error_code to to tell if EPT
+	 * misconfiguration requires to be injected. The detection is
+	 * done by is_rsvd_bits_set() above.
+	 *
+	 * We set up the value of exit_qualification to inject:
+	 * [2:0] - Derive from the access bits. The exit_qualification might be
+	 *         out of date if it is serving an EPT misconfiguration.
+	 * [5:3] - Calculated by the page walk of the guest EPT page tables
+	 * [7:8] - Derived from [7:8] of real exit_qualification
+	 *
+	 * The other bits are set to 0.
+	 */
+	if (!(errcode & PFERR_RSVD_MASK)) {
+		vcpu->arch.exit_qualification &= 0x180;
+		if (write_fault)
+			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
+		if (user_fault)
+			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ;
+		if (fetch_fault)
+			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
+		vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
+	}
+#endif
+	walker->fault.address = addr;
+	walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
+
+	trace_kvm_mmu_walker_error(walker->fault.error_code);
+	return 0;
+}
+
+static int FNAME(walk_addr)(struct guest_walker *walker,
+			    struct kvm_vcpu *vcpu, gva_t addr, u32 access)
+{
+	return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
+					access);
+}
+
+#if PTTYPE != PTTYPE_EPT
+static int FNAME(walk_addr_nested)(struct guest_walker *walker,
+				   struct kvm_vcpu *vcpu, gva_t addr,
+				   u32 access)
+{
+	return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
+					addr, access);
+}
+#endif
+
+static bool
+FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+		     u64 *spte, pt_element_t gpte, bool no_dirty_log)
+{
+	unsigned pte_access;
+	gfn_t gfn;
+	kvm_pfn_t pfn;
+
+	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
+		return false;
+
+	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
+
+	gfn = gpte_to_gfn(gpte);
+	pte_access = sp->role.access & FNAME(gpte_access)(gpte);
+	FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
+	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
+			no_dirty_log && (pte_access & ACC_WRITE_MASK));
+	if (is_error_pfn(pfn))
+		return false;
+
+	/*
+	 * we call mmu_set_spte() with host_writable = true because
+	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
+	 */
+	mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
+		     true, true);
+
+	kvm_release_pfn_clean(pfn);
+	return true;
+}
+
+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+			      u64 *spte, const void *pte)
+{
+	pt_element_t gpte = *(const pt_element_t *)pte;
+
+	FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
+}
+
+static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
+				struct guest_walker *gw, int level)
+{
+	pt_element_t curr_pte;
+	gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1];
+	u64 mask;
+	int r, index;
+
+	if (level == PT_PAGE_TABLE_LEVEL) {
+		mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
+		base_gpa = pte_gpa & ~mask;
+		index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
+
+		r = kvm_vcpu_read_guest_atomic(vcpu, base_gpa,
+				gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
+		curr_pte = gw->prefetch_ptes[index];
+	} else
+		r = kvm_vcpu_read_guest_atomic(vcpu, pte_gpa,
+				  &curr_pte, sizeof(curr_pte));
+
+	return r || curr_pte != gw->ptes[level - 1];
+}
+
+static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
+				u64 *sptep)
+{
+	struct kvm_mmu_page *sp;
+	pt_element_t *gptep = gw->prefetch_ptes;
+	u64 *spte;
+	int i;
+
+	sp = page_header(__pa(sptep));
+
+	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+		return;
+
+	if (sp->role.direct)
+		return __direct_pte_prefetch(vcpu, sp, sptep);
+
+	i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
+	spte = sp->spt + i;
+
+	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
+		if (spte == sptep)
+			continue;
+
+		if (is_shadow_present_pte(*spte))
+			continue;
+
+		if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
+			break;
+	}
+}
+
+/*
+ * Fetch a shadow pte for a specific level in the paging hierarchy.
+ * If the guest tries to write a write-protected page, we need to
+ * emulate this operation, return 1 to indicate this case.
+ */
+static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+			 struct guest_walker *gw,
+			 int write_fault, int hlevel,
+			 kvm_pfn_t pfn, bool map_writable, bool prefault,
+			 bool lpage_disallowed)
+{
+	struct kvm_mmu_page *sp = NULL;
+	struct kvm_shadow_walk_iterator it;
+	unsigned direct_access, access = gw->pt_access;
+	int top_level, ret;
+	gfn_t gfn, base_gfn;
+
+	direct_access = gw->pte_access;
+
+	top_level = vcpu->arch.mmu->root_level;
+	if (top_level == PT32E_ROOT_LEVEL)
+		top_level = PT32_ROOT_LEVEL;
+	/*
+	 * Verify that the top-level gpte is still there.  Since the page
+	 * is a root page, it is either write protected (and cannot be
+	 * changed from now on) or it is invalid (in which case, we don't
+	 * really care if it changes underneath us after this point).
+	 */
+	if (FNAME(gpte_changed)(vcpu, gw, top_level))
+		goto out_gpte_changed;
+
+	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+		goto out_gpte_changed;
+
+	for (shadow_walk_init(&it, vcpu, addr);
+	     shadow_walk_okay(&it) && it.level > gw->level;
+	     shadow_walk_next(&it)) {
+		gfn_t table_gfn;
+
+		clear_sp_write_flooding_count(it.sptep);
+		drop_large_spte(vcpu, it.sptep);
+
+		sp = NULL;
+		if (!is_shadow_present_pte(*it.sptep)) {
+			table_gfn = gw->table_gfn[it.level - 2];
+			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
+					      false, access);
+		}
+
+		/*
+		 * Verify that the gpte in the page we've just write
+		 * protected is still there.
+		 */
+		if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
+			goto out_gpte_changed;
+
+		if (sp)
+			link_shadow_page(vcpu, it.sptep, sp);
+	}
+
+	/*
+	 * FNAME(page_fault) might have clobbered the bottom bits of
+	 * gw->gfn, restore them from the virtual address.
+	 */
+	gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
+	base_gfn = gfn;
+
+	trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
+
+	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
+		clear_sp_write_flooding_count(it.sptep);
+
+		/*
+		 * We cannot overwrite existing page tables with an NX
+		 * large page, as the leaf could be executable.
+		 */
+		disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
+
+		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+		if (it.level == hlevel)
+			break;
+
+		validate_direct_spte(vcpu, it.sptep, direct_access);
+
+		drop_large_spte(vcpu, it.sptep);
+
+		if (!is_shadow_present_pte(*it.sptep)) {
+			sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
+					      it.level - 1, true, direct_access);
+			link_shadow_page(vcpu, it.sptep, sp);
+			if (lpage_disallowed)
+				account_huge_nx_page(vcpu->kvm, sp);
+		}
+	}
+
+	ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
+			   it.level, base_gfn, pfn, prefault, map_writable);
+	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
+	++vcpu->stat.pf_fixed;
+	return ret;
+
+out_gpte_changed:
+	return RET_PF_RETRY;
+}
+
+ /*
+ * To see whether the mapped gfn can write its page table in the current
+ * mapping.
+ *
+ * It is the helper function of FNAME(page_fault). When guest uses large page
+ * size to map the writable gfn which is used as current page table, we should
+ * force kvm to use small page size to map it because new shadow page will be
+ * created when kvm establishes shadow page table that stop kvm using large
+ * page size. Do it early can avoid unnecessary #PF and emulation.
+ *
+ * @write_fault_to_shadow_pgtable will return true if the fault gfn is
+ * currently used as its page table.
+ *
+ * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
+ * since the PDPT is always shadowed, that means, we can not use large page
+ * size to map the gfn which is used as PDPT.
+ */
+static bool
+FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
+			      struct guest_walker *walker, int user_fault,
+			      bool *write_fault_to_shadow_pgtable)
+{
+	int level;
+	gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
+	bool self_changed = false;
+
+	if (!(walker->pte_access & ACC_WRITE_MASK ||
+	      (!is_write_protection(vcpu) && !user_fault)))
+		return false;
+
+	for (level = walker->level; level <= walker->max_level; level++) {
+		gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
+
+		self_changed |= !(gfn & mask);
+		*write_fault_to_shadow_pgtable |= !gfn;
+	}
+
+	return self_changed;
+}
+
+/*
+ * Page fault handler.  There are several causes for a page fault:
+ *   - there is no shadow pte for the guest pte
+ *   - write access through a shadow pte marked read only so that we can set
+ *     the dirty bit
+ *   - write access to a shadow pte marked read only so we can update the page
+ *     dirty bitmap, when userspace requests it
+ *   - mmio access; in this case we will never install a present shadow pte
+ *   - normal guest page fault due to the guest pte marked not present, not
+ *     writable, or not executable
+ *
+ *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
+ *           a negative value on error.
+ */
+static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
+			     bool prefault)
+{
+	int write_fault = error_code & PFERR_WRITE_MASK;
+	int user_fault = error_code & PFERR_USER_MASK;
+	struct guest_walker walker;
+	int r;
+	kvm_pfn_t pfn;
+	int level = PT_PAGE_TABLE_LEVEL;
+	unsigned long mmu_seq;
+	bool map_writable, is_self_change_mapping;
+	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+				is_nx_huge_page_enabled();
+	bool force_pt_level = lpage_disallowed;
+
+	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
+
+	r = mmu_topup_memory_caches(vcpu);
+	if (r)
+		return r;
+
+	/*
+	 * If PFEC.RSVD is set, this is a shadow page fault.
+	 * The bit needs to be cleared before walking guest page tables.
+	 */
+	error_code &= ~PFERR_RSVD_MASK;
+
+	/*
+	 * Look up the guest pte for the faulting address.
+	 */
+	r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
+
+	/*
+	 * The page is not mapped by the guest.  Let the guest handle it.
+	 */
+	if (!r) {
+		pgprintk("%s: guest page fault\n", __func__);
+		if (!prefault)
+			inject_page_fault(vcpu, &walker.fault);
+
+		return RET_PF_RETRY;
+	}
+
+	if (page_fault_handle_page_track(vcpu, error_code, walker.gfn)) {
+		shadow_page_table_clear_flood(vcpu, addr);
+		return RET_PF_EMULATE;
+	}
+
+	vcpu->arch.write_fault_to_shadow_pgtable = false;
+
+	is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
+	      &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
+
+	if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) {
+		level = mapping_level(vcpu, walker.gfn, &force_pt_level);
+		if (likely(!force_pt_level)) {
+			level = min(walker.level, level);
+			walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
+		}
+	} else
+		force_pt_level = true;
+
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	smp_rmb();
+
+	if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
+			 &map_writable))
+		return RET_PF_RETRY;
+
+	if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
+		return r;
+
+	/*
+	 * Do not change pte_access if the pfn is a mmio page, otherwise
+	 * we will cache the incorrect access into mmio spte.
+	 */
+	if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
+	     !is_write_protection(vcpu) && !user_fault &&
+	      !is_noslot_pfn(pfn)) {
+		walker.pte_access |= ACC_WRITE_MASK;
+		walker.pte_access &= ~ACC_USER_MASK;
+
+		/*
+		 * If we converted a user page to a kernel page,
+		 * so that the kernel can write to it when cr0.wp=0,
+		 * then we should prevent the kernel from executing it
+		 * if SMEP is enabled.
+		 */
+		if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
+			walker.pte_access &= ~ACC_EXEC_MASK;
+	}
+
+	r = RET_PF_RETRY;
+	spin_lock(&vcpu->kvm->mmu_lock);
+	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+		goto out_unlock;
+
+	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
+	if (make_mmu_pages_available(vcpu) < 0)
+		goto out_unlock;
+	if (!force_pt_level)
+		transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
+	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
+			 level, pfn, map_writable, prefault, lpage_disallowed);
+	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
+
+out_unlock:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	return r;
+}
+
+static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
+{
+	int offset = 0;
+
+	WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
+
+	if (PTTYPE == 32)
+		offset = sp->role.quadrant << PT64_LEVEL_BITS;
+
+	return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
+}
+
+static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
+{
+	struct kvm_shadow_walk_iterator iterator;
+	struct kvm_mmu_page *sp;
+	int level;
+	u64 *sptep;
+
+	vcpu_clear_mmio_info(vcpu, gva);
+
+	/*
+	 * No need to check return value here, rmap_can_add() can
+	 * help us to skip pte prefetch later.
+	 */
+	mmu_topup_memory_caches(vcpu);
+
+	if (!VALID_PAGE(root_hpa)) {
+		WARN_ON(1);
+		return;
+	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
+		level = iterator.level;
+		sptep = iterator.sptep;
+
+		sp = page_header(__pa(sptep));
+		if (is_last_spte(*sptep, level)) {
+			pt_element_t gpte;
+			gpa_t pte_gpa;
+
+			if (!sp->unsync)
+				break;
+
+			pte_gpa = FNAME(get_level1_sp_gpa)(sp);
+			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
+			if (mmu_page_zap_pte(vcpu->kvm, sp, sptep))
+				kvm_flush_remote_tlbs_with_address(vcpu->kvm,
+					sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
+
+			if (!rmap_can_add(vcpu))
+				break;
+
+			if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
+						       sizeof(pt_element_t)))
+				break;
+
+			FNAME(update_pte)(vcpu, sp, sptep, &gpte);
+		}
+
+		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
+			break;
+	}
+	spin_unlock(&vcpu->kvm->mmu_lock);
+}
+
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
+			       struct x86_exception *exception)
+{
+	struct guest_walker walker;
+	gpa_t gpa = UNMAPPED_GVA;
+	int r;
+
+	r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
+
+	if (r) {
+		gpa = gfn_to_gpa(walker.gfn);
+		gpa |= vaddr & ~PAGE_MASK;
+	} else if (exception)
+		*exception = walker.fault;
+
+	return gpa;
+}
+
+#if PTTYPE != PTTYPE_EPT
+static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
+				      u32 access,
+				      struct x86_exception *exception)
+{
+	struct guest_walker walker;
+	gpa_t gpa = UNMAPPED_GVA;
+	int r;
+
+	r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
+
+	if (r) {
+		gpa = gfn_to_gpa(walker.gfn);
+		gpa |= vaddr & ~PAGE_MASK;
+	} else if (exception)
+		*exception = walker.fault;
+
+	return gpa;
+}
+#endif
+
+/*
+ * Using the cached information from sp->gfns is safe because:
+ * - The spte has a reference to the struct page, so the pfn for a given gfn
+ *   can't change unless all sptes pointing to it are nuked first.
+ *
+ * Note:
+ *   We should flush all tlbs if spte is dropped even though guest is
+ *   responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page
+ *   and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
+ *   used by guest then tlbs are not flushed, so guest is allowed to access the
+ *   freed pages.
+ *   And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
+ */
+static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	int i, nr_present = 0;
+	bool host_writable;
+	gpa_t first_pte_gpa;
+	int set_spte_ret = 0;
+
+	/* direct kvm_mmu_page can not be unsync. */
+	BUG_ON(sp->role.direct);
+
+	first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
+
+	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+		unsigned pte_access;
+		pt_element_t gpte;
+		gpa_t pte_gpa;
+		gfn_t gfn;
+
+		if (!sp->spt[i])
+			continue;
+
+		pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
+
+		if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
+					       sizeof(pt_element_t)))
+			return 0;
+
+		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
+			/*
+			 * Update spte before increasing tlbs_dirty to make
+			 * sure no tlb flush is lost after spte is zapped; see
+			 * the comments in kvm_flush_remote_tlbs().
+			 */
+			smp_wmb();
+			vcpu->kvm->tlbs_dirty++;
+			continue;
+		}
+
+		gfn = gpte_to_gfn(gpte);
+		pte_access = sp->role.access;
+		pte_access &= FNAME(gpte_access)(gpte);
+		FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
+
+		if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
+		      &nr_present))
+			continue;
+
+		if (gfn != sp->gfns[i]) {
+			drop_spte(vcpu->kvm, &sp->spt[i]);
+			/*
+			 * The same as above where we are doing
+			 * prefetch_invalid_gpte().
+			 */
+			smp_wmb();
+			vcpu->kvm->tlbs_dirty++;
+			continue;
+		}
+
+		nr_present++;
+
+		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
+
+		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
+					 pte_access, PT_PAGE_TABLE_LEVEL,
+					 gfn, spte_to_pfn(sp->spt[i]),
+					 true, false, host_writable);
+	}
+
+	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
+		kvm_flush_remote_tlbs(vcpu->kvm);
+
+	return nr_present;
+}
+
+#undef pt_element_t
+#undef guest_walker
+#undef FNAME
+#undef PT_BASE_ADDR_MASK
+#undef PT_INDEX
+#undef PT_LVL_ADDR_MASK
+#undef PT_LVL_OFFSET_MASK
+#undef PT_LEVEL_BITS
+#undef PT_MAX_FULL_LEVELS
+#undef gpte_to_gfn
+#undef gpte_to_gfn_lvl
+#undef CMPXCHG
+#undef PT_GUEST_ACCESSED_MASK
+#undef PT_GUEST_DIRTY_MASK
+#undef PT_GUEST_DIRTY_SHIFT
+#undef PT_GUEST_ACCESSED_SHIFT
+#undef PT_HAVE_ACCESSED_DIRTY
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
deleted file mode 100644
index 3521e2d176f2..000000000000
--- a/arch/x86/kvm/page_track.c
+++ /dev/null
@@ -1,265 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Support KVM gust page tracking
- *
- * This feature allows us to track page access in guest. Currently, only
- * write access is tracked.
- *
- * Copyright(C) 2015 Intel Corporation.
- *
- * Author:
- *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/rculist.h>
-
-#include <asm/kvm_host.h>
-#include <asm/kvm_page_track.h>
-
-#include "mmu.h"
-
-void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
-				 struct kvm_memory_slot *dont)
-{
-	int i;
-
-	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++)
-		if (!dont || free->arch.gfn_track[i] !=
-		      dont->arch.gfn_track[i]) {
-			kvfree(free->arch.gfn_track[i]);
-			free->arch.gfn_track[i] = NULL;
-		}
-}
-
-int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
-				  unsigned long npages)
-{
-	int  i;
-
-	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
-		slot->arch.gfn_track[i] =
-			kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
-				 GFP_KERNEL_ACCOUNT);
-		if (!slot->arch.gfn_track[i])
-			goto track_free;
-	}
-
-	return 0;
-
-track_free:
-	kvm_page_track_free_memslot(slot, NULL);
-	return -ENOMEM;
-}
-
-static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
-{
-	if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
-		return false;
-
-	return true;
-}
-
-static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
-			     enum kvm_page_track_mode mode, short count)
-{
-	int index, val;
-
-	index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
-
-	val = slot->arch.gfn_track[mode][index];
-
-	if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
-		return;
-
-	slot->arch.gfn_track[mode][index] += count;
-}
-
-/*
- * add guest page to the tracking pool so that corresponding access on that
- * page will be intercepted.
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_add_page(struct kvm *kvm,
-				  struct kvm_memory_slot *slot, gfn_t gfn,
-				  enum kvm_page_track_mode mode)
-{
-
-	if (WARN_ON(!page_track_mode_is_valid(mode)))
-		return;
-
-	update_gfn_track(slot, gfn, mode, 1);
-
-	/*
-	 * new track stops large page mapping for the
-	 * tracked page.
-	 */
-	kvm_mmu_gfn_disallow_lpage(slot, gfn);
-
-	if (mode == KVM_PAGE_TRACK_WRITE)
-		if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
-			kvm_flush_remote_tlbs(kvm);
-}
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
-
-/*
- * remove the guest page from the tracking pool which stops the interception
- * of corresponding access on that page. It is the opposed operation of
- * kvm_slot_page_track_add_page().
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
-				     struct kvm_memory_slot *slot, gfn_t gfn,
-				     enum kvm_page_track_mode mode)
-{
-	if (WARN_ON(!page_track_mode_is_valid(mode)))
-		return;
-
-	update_gfn_track(slot, gfn, mode, -1);
-
-	/*
-	 * allow large page mapping for the tracked page
-	 * after the tracker is gone.
-	 */
-	kvm_mmu_gfn_allow_lpage(slot, gfn);
-}
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
-
-/*
- * check if the corresponding access on the specified guest page is tracked.
- */
-bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
-			      enum kvm_page_track_mode mode)
-{
-	struct kvm_memory_slot *slot;
-	int index;
-
-	if (WARN_ON(!page_track_mode_is_valid(mode)))
-		return false;
-
-	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	if (!slot)
-		return false;
-
-	index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
-	return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
-}
-
-void kvm_page_track_cleanup(struct kvm *kvm)
-{
-	struct kvm_page_track_notifier_head *head;
-
-	head = &kvm->arch.track_notifier_head;
-	cleanup_srcu_struct(&head->track_srcu);
-}
-
-void kvm_page_track_init(struct kvm *kvm)
-{
-	struct kvm_page_track_notifier_head *head;
-
-	head = &kvm->arch.track_notifier_head;
-	init_srcu_struct(&head->track_srcu);
-	INIT_HLIST_HEAD(&head->track_notifier_list);
-}
-
-/*
- * register the notifier so that event interception for the tracked guest
- * pages can be received.
- */
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
-				 struct kvm_page_track_notifier_node *n)
-{
-	struct kvm_page_track_notifier_head *head;
-
-	head = &kvm->arch.track_notifier_head;
-
-	spin_lock(&kvm->mmu_lock);
-	hlist_add_head_rcu(&n->node, &head->track_notifier_list);
-	spin_unlock(&kvm->mmu_lock);
-}
-EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
-
-/*
- * stop receiving the event interception. It is the opposed operation of
- * kvm_page_track_register_notifier().
- */
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
-				   struct kvm_page_track_notifier_node *n)
-{
-	struct kvm_page_track_notifier_head *head;
-
-	head = &kvm->arch.track_notifier_head;
-
-	spin_lock(&kvm->mmu_lock);
-	hlist_del_rcu(&n->node);
-	spin_unlock(&kvm->mmu_lock);
-	synchronize_srcu(&head->track_srcu);
-}
-EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
-
-/*
- * Notify the node that write access is intercepted and write emulation is
- * finished at this time.
- *
- * The node should figure out if the written page is the one that node is
- * interested in by itself.
- */
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-			  int bytes)
-{
-	struct kvm_page_track_notifier_head *head;
-	struct kvm_page_track_notifier_node *n;
-	int idx;
-
-	head = &vcpu->kvm->arch.track_notifier_head;
-
-	if (hlist_empty(&head->track_notifier_list))
-		return;
-
-	idx = srcu_read_lock(&head->track_srcu);
-	hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
-		if (n->track_write)
-			n->track_write(vcpu, gpa, new, bytes, n);
-	srcu_read_unlock(&head->track_srcu, idx);
-}
-
-/*
- * Notify the node that memory slot is being removed or moved so that it can
- * drop write-protection for the pages in the memory slot.
- *
- * The node should figure out it has any write-protected pages in this slot
- * by itself.
- */
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-	struct kvm_page_track_notifier_head *head;
-	struct kvm_page_track_notifier_node *n;
-	int idx;
-
-	head = &kvm->arch.track_notifier_head;
-
-	if (hlist_empty(&head->track_notifier_list))
-		return;
-
-	idx = srcu_read_lock(&head->track_srcu);
-	hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
-		if (n->track_flush_slot)
-			n->track_flush_slot(kvm, slot, n);
-	srcu_read_unlock(&head->track_srcu, idx);
-}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
deleted file mode 100644
index 97b21e7fd013..000000000000
--- a/arch/x86/kvm/paging_tmpl.h
+++ /dev/null
@@ -1,1090 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Kernel-based Virtual Machine driver for Linux
- *
- * This module enables machines with Intel VT-x extensions to run virtual
- * machines without emulation or binary translation.
- *
- * MMU support
- *
- * Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * Authors:
- *   Yaniv Kamay  <yaniv@qumranet.com>
- *   Avi Kivity   <avi@qumranet.com>
- */
-
-/*
- * We need the mmu code to access both 32-bit and 64-bit guest ptes,
- * so the code in this file is compiled twice, once per pte size.
- */
-
-#if PTTYPE == 64
-	#define pt_element_t u64
-	#define guest_walker guest_walker64
-	#define FNAME(name) paging##64_##name
-	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
-	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
-	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
-	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
-	#define PT_LEVEL_BITS PT64_LEVEL_BITS
-	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
-	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
-	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
-	#ifdef CONFIG_X86_64
-	#define PT_MAX_FULL_LEVELS 4
-	#define CMPXCHG cmpxchg
-	#else
-	#define CMPXCHG cmpxchg64
-	#define PT_MAX_FULL_LEVELS 2
-	#endif
-#elif PTTYPE == 32
-	#define pt_element_t u32
-	#define guest_walker guest_walker32
-	#define FNAME(name) paging##32_##name
-	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
-	#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
-	#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
-	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
-	#define PT_LEVEL_BITS PT32_LEVEL_BITS
-	#define PT_MAX_FULL_LEVELS 2
-	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
-	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
-	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
-	#define CMPXCHG cmpxchg
-#elif PTTYPE == PTTYPE_EPT
-	#define pt_element_t u64
-	#define guest_walker guest_walkerEPT
-	#define FNAME(name) ept_##name
-	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
-	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
-	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
-	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
-	#define PT_LEVEL_BITS PT64_LEVEL_BITS
-	#define PT_GUEST_DIRTY_SHIFT 9
-	#define PT_GUEST_ACCESSED_SHIFT 8
-	#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
-	#define CMPXCHG cmpxchg64
-	#define PT_MAX_FULL_LEVELS 4
-#else
-	#error Invalid PTTYPE value
-#endif
-
-#define PT_GUEST_DIRTY_MASK    (1 << PT_GUEST_DIRTY_SHIFT)
-#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT)
-
-#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
-#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
-
-/*
- * The guest_walker structure emulates the behavior of the hardware page
- * table walker.
- */
-struct guest_walker {
-	int level;
-	unsigned max_level;
-	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
-	pt_element_t ptes[PT_MAX_FULL_LEVELS];
-	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
-	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
-	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
-	bool pte_writable[PT_MAX_FULL_LEVELS];
-	unsigned pt_access;
-	unsigned pte_access;
-	gfn_t gfn;
-	struct x86_exception fault;
-};
-
-static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
-{
-	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
-}
-
-static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
-					     unsigned gpte)
-{
-	unsigned mask;
-
-	/* dirty bit is not supported, so no need to track it */
-	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
-		return;
-
-	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
-
-	mask = (unsigned)~ACC_WRITE_MASK;
-	/* Allow write access to dirty gptes */
-	mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) &
-		PT_WRITABLE_MASK;
-	*access &= mask;
-}
-
-static inline int FNAME(is_present_gpte)(unsigned long pte)
-{
-#if PTTYPE != PTTYPE_EPT
-	return pte & PT_PRESENT_MASK;
-#else
-	return pte & 7;
-#endif
-}
-
-static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-			       pt_element_t __user *ptep_user, unsigned index,
-			       pt_element_t orig_pte, pt_element_t new_pte)
-{
-	int npages;
-	pt_element_t ret;
-	pt_element_t *table;
-	struct page *page;
-
-	npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
-	if (likely(npages == 1)) {
-		table = kmap_atomic(page);
-		ret = CMPXCHG(&table[index], orig_pte, new_pte);
-		kunmap_atomic(table);
-
-		kvm_release_page_dirty(page);
-	} else {
-		struct vm_area_struct *vma;
-		unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
-		unsigned long pfn;
-		unsigned long paddr;
-
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
-		if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
-			up_read(&current->mm->mmap_sem);
-			return -EFAULT;
-		}
-		pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-		paddr = pfn << PAGE_SHIFT;
-		table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
-		if (!table) {
-			up_read(&current->mm->mmap_sem);
-			return -EFAULT;
-		}
-		ret = CMPXCHG(&table[index], orig_pte, new_pte);
-		memunmap(table);
-		up_read(&current->mm->mmap_sem);
-	}
-
-	return (ret != orig_pte);
-}
-
-static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu_page *sp, u64 *spte,
-				  u64 gpte)
-{
-	if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
-		goto no_present;
-
-	if (!FNAME(is_present_gpte)(gpte))
-		goto no_present;
-
-	/* if accessed bit is not supported prefetch non accessed gpte */
-	if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) &&
-	    !(gpte & PT_GUEST_ACCESSED_MASK))
-		goto no_present;
-
-	return false;
-
-no_present:
-	drop_spte(vcpu->kvm, spte);
-	return true;
-}
-
-/*
- * For PTTYPE_EPT, a page table can be executable but not readable
- * on supported processors. Therefore, set_spte does not automatically
- * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
- * to signify readability since it isn't used in the EPT case
- */
-static inline unsigned FNAME(gpte_access)(u64 gpte)
-{
-	unsigned access;
-#if PTTYPE == PTTYPE_EPT
-	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
-		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
-		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
-#else
-	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
-	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
-	access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
-	/* Combine NX with P (which is set here) to get ACC_EXEC_MASK.  */
-	access ^= (gpte >> PT64_NX_SHIFT);
-#endif
-
-	return access;
-}
-
-static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
-					     struct kvm_mmu *mmu,
-					     struct guest_walker *walker,
-					     int write_fault)
-{
-	unsigned level, index;
-	pt_element_t pte, orig_pte;
-	pt_element_t __user *ptep_user;
-	gfn_t table_gfn;
-	int ret;
-
-	/* dirty/accessed bits are not supported, so no need to update them */
-	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
-		return 0;
-
-	for (level = walker->max_level; level >= walker->level; --level) {
-		pte = orig_pte = walker->ptes[level - 1];
-		table_gfn = walker->table_gfn[level - 1];
-		ptep_user = walker->ptep_user[level - 1];
-		index = offset_in_page(ptep_user) / sizeof(pt_element_t);
-		if (!(pte & PT_GUEST_ACCESSED_MASK)) {
-			trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
-			pte |= PT_GUEST_ACCESSED_MASK;
-		}
-		if (level == walker->level && write_fault &&
-				!(pte & PT_GUEST_DIRTY_MASK)) {
-			trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
-#if PTTYPE == PTTYPE_EPT
-			if (kvm_arch_write_log_dirty(vcpu))
-				return -EINVAL;
-#endif
-			pte |= PT_GUEST_DIRTY_MASK;
-		}
-		if (pte == orig_pte)
-			continue;
-
-		/*
-		 * If the slot is read-only, simply do not process the accessed
-		 * and dirty bits.  This is the correct thing to do if the slot
-		 * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
-		 * are only supported if the accessed and dirty bits are already
-		 * set in the ROM (so that MMIO writes are never needed).
-		 *
-		 * Note that NPT does not allow this at all and faults, since
-		 * it always wants nested page table entries for the guest
-		 * page tables to be writable.  And EPT works but will simply
-		 * overwrite the read-only memory to set the accessed and dirty
-		 * bits.
-		 */
-		if (unlikely(!walker->pte_writable[level - 1]))
-			continue;
-
-		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
-		if (ret)
-			return ret;
-
-		kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
-		walker->ptes[level - 1] = pte;
-	}
-	return 0;
-}
-
-static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
-{
-	unsigned pkeys = 0;
-#if PTTYPE == 64
-	pte_t pte = {.pte = gpte};
-
-	pkeys = pte_flags_pkey(pte_flags(pte));
-#endif
-	return pkeys;
-}
-
-/*
- * Fetch a guest pte for a guest virtual address
- */
-static int FNAME(walk_addr_generic)(struct guest_walker *walker,
-				    struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-				    gva_t addr, u32 access)
-{
-	int ret;
-	pt_element_t pte;
-	pt_element_t __user *uninitialized_var(ptep_user);
-	gfn_t table_gfn;
-	u64 pt_access, pte_access;
-	unsigned index, accessed_dirty, pte_pkey;
-	unsigned nested_access;
-	gpa_t pte_gpa;
-	bool have_ad;
-	int offset;
-	u64 walk_nx_mask = 0;
-	const int write_fault = access & PFERR_WRITE_MASK;
-	const int user_fault  = access & PFERR_USER_MASK;
-	const int fetch_fault = access & PFERR_FETCH_MASK;
-	u16 errcode = 0;
-	gpa_t real_gpa;
-	gfn_t gfn;
-
-	trace_kvm_mmu_pagetable_walk(addr, access);
-retry_walk:
-	walker->level = mmu->root_level;
-	pte           = mmu->get_cr3(vcpu);
-	have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
-
-#if PTTYPE == 64
-	walk_nx_mask = 1ULL << PT64_NX_SHIFT;
-	if (walker->level == PT32E_ROOT_LEVEL) {
-		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
-		trace_kvm_mmu_paging_element(pte, walker->level);
-		if (!FNAME(is_present_gpte)(pte))
-			goto error;
-		--walker->level;
-	}
-#endif
-	walker->max_level = walker->level;
-	ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
-
-	/*
-	 * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
-	 * by the MOV to CR instruction are treated as reads and do not cause the
-	 * processor to set the dirty flag in any EPT paging-structure entry.
-	 */
-	nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
-
-	pte_access = ~0;
-	++walker->level;
-
-	do {
-		gfn_t real_gfn;
-		unsigned long host_addr;
-
-		pt_access = pte_access;
-		--walker->level;
-
-		index = PT_INDEX(addr, walker->level);
-		table_gfn = gpte_to_gfn(pte);
-		offset    = index * sizeof(pt_element_t);
-		pte_gpa   = gfn_to_gpa(table_gfn) + offset;
-
-		BUG_ON(walker->level < 1);
-		walker->table_gfn[walker->level - 1] = table_gfn;
-		walker->pte_gpa[walker->level - 1] = pte_gpa;
-
-		real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
-					      nested_access,
-					      &walker->fault);
-
-		/*
-		 * FIXME: This can happen if emulation (for of an INS/OUTS
-		 * instruction) triggers a nested page fault.  The exit
-		 * qualification / exit info field will incorrectly have
-		 * "guest page access" as the nested page fault's cause,
-		 * instead of "guest page structure access".  To fix this,
-		 * the x86_exception struct should be augmented with enough
-		 * information to fix the exit_qualification or exit_info_1
-		 * fields.
-		 */
-		if (unlikely(real_gfn == UNMAPPED_GVA))
-			return 0;
-
-		real_gfn = gpa_to_gfn(real_gfn);
-
-		host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, real_gfn,
-					    &walker->pte_writable[walker->level - 1]);
-		if (unlikely(kvm_is_error_hva(host_addr)))
-			goto error;
-
-		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
-		if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
-			goto error;
-		walker->ptep_user[walker->level - 1] = ptep_user;
-
-		trace_kvm_mmu_paging_element(pte, walker->level);
-
-		/*
-		 * Inverting the NX it lets us AND it like other
-		 * permission bits.
-		 */
-		pte_access = pt_access & (pte ^ walk_nx_mask);
-
-		if (unlikely(!FNAME(is_present_gpte)(pte)))
-			goto error;
-
-		if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
-			errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
-			goto error;
-		}
-
-		walker->ptes[walker->level - 1] = pte;
-	} while (!is_last_gpte(mmu, walker->level, pte));
-
-	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
-	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
-
-	/* Convert to ACC_*_MASK flags for struct guest_walker.  */
-	walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
-	walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
-	errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
-	if (unlikely(errcode))
-		goto error;
-
-	gfn = gpte_to_gfn_lvl(pte, walker->level);
-	gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
-
-	if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
-		gfn += pse36_gfn_delta(pte);
-
-	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
-	if (real_gpa == UNMAPPED_GVA)
-		return 0;
-
-	walker->gfn = real_gpa >> PAGE_SHIFT;
-
-	if (!write_fault)
-		FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
-	else
-		/*
-		 * On a write fault, fold the dirty bit into accessed_dirty.
-		 * For modes without A/D bits support accessed_dirty will be
-		 * always clear.
-		 */
-		accessed_dirty &= pte >>
-			(PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT);
-
-	if (unlikely(!accessed_dirty)) {
-		ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
-		if (unlikely(ret < 0))
-			goto error;
-		else if (ret)
-			goto retry_walk;
-	}
-
-	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-		 __func__, (u64)pte, walker->pte_access, walker->pt_access);
-	return 1;
-
-error:
-	errcode |= write_fault | user_fault;
-	if (fetch_fault && (mmu->nx ||
-			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)))
-		errcode |= PFERR_FETCH_MASK;
-
-	walker->fault.vector = PF_VECTOR;
-	walker->fault.error_code_valid = true;
-	walker->fault.error_code = errcode;
-
-#if PTTYPE == PTTYPE_EPT
-	/*
-	 * Use PFERR_RSVD_MASK in error_code to to tell if EPT
-	 * misconfiguration requires to be injected. The detection is
-	 * done by is_rsvd_bits_set() above.
-	 *
-	 * We set up the value of exit_qualification to inject:
-	 * [2:0] - Derive from the access bits. The exit_qualification might be
-	 *         out of date if it is serving an EPT misconfiguration.
-	 * [5:3] - Calculated by the page walk of the guest EPT page tables
-	 * [7:8] - Derived from [7:8] of real exit_qualification
-	 *
-	 * The other bits are set to 0.
-	 */
-	if (!(errcode & PFERR_RSVD_MASK)) {
-		vcpu->arch.exit_qualification &= 0x180;
-		if (write_fault)
-			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
-		if (user_fault)
-			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ;
-		if (fetch_fault)
-			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
-		vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
-	}
-#endif
-	walker->fault.address = addr;
-	walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
-
-	trace_kvm_mmu_walker_error(walker->fault.error_code);
-	return 0;
-}
-
-static int FNAME(walk_addr)(struct guest_walker *walker,
-			    struct kvm_vcpu *vcpu, gva_t addr, u32 access)
-{
-	return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
-					access);
-}
-
-#if PTTYPE != PTTYPE_EPT
-static int FNAME(walk_addr_nested)(struct guest_walker *walker,
-				   struct kvm_vcpu *vcpu, gva_t addr,
-				   u32 access)
-{
-	return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
-					addr, access);
-}
-#endif
-
-static bool
-FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-		     u64 *spte, pt_element_t gpte, bool no_dirty_log)
-{
-	unsigned pte_access;
-	gfn_t gfn;
-	kvm_pfn_t pfn;
-
-	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
-		return false;
-
-	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-
-	gfn = gpte_to_gfn(gpte);
-	pte_access = sp->role.access & FNAME(gpte_access)(gpte);
-	FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
-	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
-			no_dirty_log && (pte_access & ACC_WRITE_MASK));
-	if (is_error_pfn(pfn))
-		return false;
-
-	/*
-	 * we call mmu_set_spte() with host_writable = true because
-	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
-	 */
-	mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
-		     true, true);
-
-	kvm_release_pfn_clean(pfn);
-	return true;
-}
-
-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			      u64 *spte, const void *pte)
-{
-	pt_element_t gpte = *(const pt_element_t *)pte;
-
-	FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
-}
-
-static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
-				struct guest_walker *gw, int level)
-{
-	pt_element_t curr_pte;
-	gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1];
-	u64 mask;
-	int r, index;
-
-	if (level == PT_PAGE_TABLE_LEVEL) {
-		mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
-		base_gpa = pte_gpa & ~mask;
-		index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
-
-		r = kvm_vcpu_read_guest_atomic(vcpu, base_gpa,
-				gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
-		curr_pte = gw->prefetch_ptes[index];
-	} else
-		r = kvm_vcpu_read_guest_atomic(vcpu, pte_gpa,
-				  &curr_pte, sizeof(curr_pte));
-
-	return r || curr_pte != gw->ptes[level - 1];
-}
-
-static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
-				u64 *sptep)
-{
-	struct kvm_mmu_page *sp;
-	pt_element_t *gptep = gw->prefetch_ptes;
-	u64 *spte;
-	int i;
-
-	sp = page_header(__pa(sptep));
-
-	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
-		return;
-
-	if (sp->role.direct)
-		return __direct_pte_prefetch(vcpu, sp, sptep);
-
-	i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
-	spte = sp->spt + i;
-
-	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
-		if (spte == sptep)
-			continue;
-
-		if (is_shadow_present_pte(*spte))
-			continue;
-
-		if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
-			break;
-	}
-}
-
-/*
- * Fetch a shadow pte for a specific level in the paging hierarchy.
- * If the guest tries to write a write-protected page, we need to
- * emulate this operation, return 1 to indicate this case.
- */
-static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
-			 struct guest_walker *gw,
-			 int write_fault, int hlevel,
-			 kvm_pfn_t pfn, bool map_writable, bool prefault,
-			 bool lpage_disallowed)
-{
-	struct kvm_mmu_page *sp = NULL;
-	struct kvm_shadow_walk_iterator it;
-	unsigned direct_access, access = gw->pt_access;
-	int top_level, ret;
-	gfn_t gfn, base_gfn;
-
-	direct_access = gw->pte_access;
-
-	top_level = vcpu->arch.mmu->root_level;
-	if (top_level == PT32E_ROOT_LEVEL)
-		top_level = PT32_ROOT_LEVEL;
-	/*
-	 * Verify that the top-level gpte is still there.  Since the page
-	 * is a root page, it is either write protected (and cannot be
-	 * changed from now on) or it is invalid (in which case, we don't
-	 * really care if it changes underneath us after this point).
-	 */
-	if (FNAME(gpte_changed)(vcpu, gw, top_level))
-		goto out_gpte_changed;
-
-	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
-		goto out_gpte_changed;
-
-	for (shadow_walk_init(&it, vcpu, addr);
-	     shadow_walk_okay(&it) && it.level > gw->level;
-	     shadow_walk_next(&it)) {
-		gfn_t table_gfn;
-
-		clear_sp_write_flooding_count(it.sptep);
-		drop_large_spte(vcpu, it.sptep);
-
-		sp = NULL;
-		if (!is_shadow_present_pte(*it.sptep)) {
-			table_gfn = gw->table_gfn[it.level - 2];
-			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
-					      false, access);
-		}
-
-		/*
-		 * Verify that the gpte in the page we've just write
-		 * protected is still there.
-		 */
-		if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
-			goto out_gpte_changed;
-
-		if (sp)
-			link_shadow_page(vcpu, it.sptep, sp);
-	}
-
-	/*
-	 * FNAME(page_fault) might have clobbered the bottom bits of
-	 * gw->gfn, restore them from the virtual address.
-	 */
-	gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
-	base_gfn = gfn;
-
-	trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
-
-	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
-		clear_sp_write_flooding_count(it.sptep);
-
-		/*
-		 * We cannot overwrite existing page tables with an NX
-		 * large page, as the leaf could be executable.
-		 */
-		disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
-
-		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
-		if (it.level == hlevel)
-			break;
-
-		validate_direct_spte(vcpu, it.sptep, direct_access);
-
-		drop_large_spte(vcpu, it.sptep);
-
-		if (!is_shadow_present_pte(*it.sptep)) {
-			sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
-					      it.level - 1, true, direct_access);
-			link_shadow_page(vcpu, it.sptep, sp);
-			if (lpage_disallowed)
-				account_huge_nx_page(vcpu->kvm, sp);
-		}
-	}
-
-	ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
-			   it.level, base_gfn, pfn, prefault, map_writable);
-	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
-	++vcpu->stat.pf_fixed;
-	return ret;
-
-out_gpte_changed:
-	return RET_PF_RETRY;
-}
-
- /*
- * To see whether the mapped gfn can write its page table in the current
- * mapping.
- *
- * It is the helper function of FNAME(page_fault). When guest uses large page
- * size to map the writable gfn which is used as current page table, we should
- * force kvm to use small page size to map it because new shadow page will be
- * created when kvm establishes shadow page table that stop kvm using large
- * page size. Do it early can avoid unnecessary #PF and emulation.
- *
- * @write_fault_to_shadow_pgtable will return true if the fault gfn is
- * currently used as its page table.
- *
- * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
- * since the PDPT is always shadowed, that means, we can not use large page
- * size to map the gfn which is used as PDPT.
- */
-static bool
-FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
-			      struct guest_walker *walker, int user_fault,
-			      bool *write_fault_to_shadow_pgtable)
-{
-	int level;
-	gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
-	bool self_changed = false;
-
-	if (!(walker->pte_access & ACC_WRITE_MASK ||
-	      (!is_write_protection(vcpu) && !user_fault)))
-		return false;
-
-	for (level = walker->level; level <= walker->max_level; level++) {
-		gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
-
-		self_changed |= !(gfn & mask);
-		*write_fault_to_shadow_pgtable |= !gfn;
-	}
-
-	return self_changed;
-}
-
-/*
- * Page fault handler.  There are several causes for a page fault:
- *   - there is no shadow pte for the guest pte
- *   - write access through a shadow pte marked read only so that we can set
- *     the dirty bit
- *   - write access to a shadow pte marked read only so we can update the page
- *     dirty bitmap, when userspace requests it
- *   - mmio access; in this case we will never install a present shadow pte
- *   - normal guest page fault due to the guest pte marked not present, not
- *     writable, or not executable
- *
- *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
- *           a negative value on error.
- */
-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
-			     bool prefault)
-{
-	int write_fault = error_code & PFERR_WRITE_MASK;
-	int user_fault = error_code & PFERR_USER_MASK;
-	struct guest_walker walker;
-	int r;
-	kvm_pfn_t pfn;
-	int level = PT_PAGE_TABLE_LEVEL;
-	unsigned long mmu_seq;
-	bool map_writable, is_self_change_mapping;
-	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
-				is_nx_huge_page_enabled();
-	bool force_pt_level = lpage_disallowed;
-
-	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
-
-	r = mmu_topup_memory_caches(vcpu);
-	if (r)
-		return r;
-
-	/*
-	 * If PFEC.RSVD is set, this is a shadow page fault.
-	 * The bit needs to be cleared before walking guest page tables.
-	 */
-	error_code &= ~PFERR_RSVD_MASK;
-
-	/*
-	 * Look up the guest pte for the faulting address.
-	 */
-	r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
-
-	/*
-	 * The page is not mapped by the guest.  Let the guest handle it.
-	 */
-	if (!r) {
-		pgprintk("%s: guest page fault\n", __func__);
-		if (!prefault)
-			inject_page_fault(vcpu, &walker.fault);
-
-		return RET_PF_RETRY;
-	}
-
-	if (page_fault_handle_page_track(vcpu, error_code, walker.gfn)) {
-		shadow_page_table_clear_flood(vcpu, addr);
-		return RET_PF_EMULATE;
-	}
-
-	vcpu->arch.write_fault_to_shadow_pgtable = false;
-
-	is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
-	      &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
-
-	if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) {
-		level = mapping_level(vcpu, walker.gfn, &force_pt_level);
-		if (likely(!force_pt_level)) {
-			level = min(walker.level, level);
-			walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
-		}
-	} else
-		force_pt_level = true;
-
-	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	smp_rmb();
-
-	if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
-			 &map_writable))
-		return RET_PF_RETRY;
-
-	if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
-		return r;
-
-	/*
-	 * Do not change pte_access if the pfn is a mmio page, otherwise
-	 * we will cache the incorrect access into mmio spte.
-	 */
-	if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
-	     !is_write_protection(vcpu) && !user_fault &&
-	      !is_noslot_pfn(pfn)) {
-		walker.pte_access |= ACC_WRITE_MASK;
-		walker.pte_access &= ~ACC_USER_MASK;
-
-		/*
-		 * If we converted a user page to a kernel page,
-		 * so that the kernel can write to it when cr0.wp=0,
-		 * then we should prevent the kernel from executing it
-		 * if SMEP is enabled.
-		 */
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
-			walker.pte_access &= ~ACC_EXEC_MASK;
-	}
-
-	r = RET_PF_RETRY;
-	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
-		goto out_unlock;
-
-	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
-	if (make_mmu_pages_available(vcpu) < 0)
-		goto out_unlock;
-	if (!force_pt_level)
-		transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
-	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
-			 level, pfn, map_writable, prefault, lpage_disallowed);
-	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
-
-out_unlock:
-	spin_unlock(&vcpu->kvm->mmu_lock);
-	kvm_release_pfn_clean(pfn);
-	return r;
-}
-
-static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
-{
-	int offset = 0;
-
-	WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
-
-	if (PTTYPE == 32)
-		offset = sp->role.quadrant << PT64_LEVEL_BITS;
-
-	return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
-}
-
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
-{
-	struct kvm_shadow_walk_iterator iterator;
-	struct kvm_mmu_page *sp;
-	int level;
-	u64 *sptep;
-
-	vcpu_clear_mmio_info(vcpu, gva);
-
-	/*
-	 * No need to check return value here, rmap_can_add() can
-	 * help us to skip pte prefetch later.
-	 */
-	mmu_topup_memory_caches(vcpu);
-
-	if (!VALID_PAGE(root_hpa)) {
-		WARN_ON(1);
-		return;
-	}
-
-	spin_lock(&vcpu->kvm->mmu_lock);
-	for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
-		level = iterator.level;
-		sptep = iterator.sptep;
-
-		sp = page_header(__pa(sptep));
-		if (is_last_spte(*sptep, level)) {
-			pt_element_t gpte;
-			gpa_t pte_gpa;
-
-			if (!sp->unsync)
-				break;
-
-			pte_gpa = FNAME(get_level1_sp_gpa)(sp);
-			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
-
-			if (mmu_page_zap_pte(vcpu->kvm, sp, sptep))
-				kvm_flush_remote_tlbs_with_address(vcpu->kvm,
-					sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
-
-			if (!rmap_can_add(vcpu))
-				break;
-
-			if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
-						       sizeof(pt_element_t)))
-				break;
-
-			FNAME(update_pte)(vcpu, sp, sptep, &gpte);
-		}
-
-		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
-			break;
-	}
-	spin_unlock(&vcpu->kvm->mmu_lock);
-}
-
-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
-			       struct x86_exception *exception)
-{
-	struct guest_walker walker;
-	gpa_t gpa = UNMAPPED_GVA;
-	int r;
-
-	r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
-
-	if (r) {
-		gpa = gfn_to_gpa(walker.gfn);
-		gpa |= vaddr & ~PAGE_MASK;
-	} else if (exception)
-		*exception = walker.fault;
-
-	return gpa;
-}
-
-#if PTTYPE != PTTYPE_EPT
-static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
-				      u32 access,
-				      struct x86_exception *exception)
-{
-	struct guest_walker walker;
-	gpa_t gpa = UNMAPPED_GVA;
-	int r;
-
-	r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
-
-	if (r) {
-		gpa = gfn_to_gpa(walker.gfn);
-		gpa |= vaddr & ~PAGE_MASK;
-	} else if (exception)
-		*exception = walker.fault;
-
-	return gpa;
-}
-#endif
-
-/*
- * Using the cached information from sp->gfns is safe because:
- * - The spte has a reference to the struct page, so the pfn for a given gfn
- *   can't change unless all sptes pointing to it are nuked first.
- *
- * Note:
- *   We should flush all tlbs if spte is dropped even though guest is
- *   responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page
- *   and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
- *   used by guest then tlbs are not flushed, so guest is allowed to access the
- *   freed pages.
- *   And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
- */
-static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
-{
-	int i, nr_present = 0;
-	bool host_writable;
-	gpa_t first_pte_gpa;
-	int set_spte_ret = 0;
-
-	/* direct kvm_mmu_page can not be unsync. */
-	BUG_ON(sp->role.direct);
-
-	first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
-
-	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
-		unsigned pte_access;
-		pt_element_t gpte;
-		gpa_t pte_gpa;
-		gfn_t gfn;
-
-		if (!sp->spt[i])
-			continue;
-
-		pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
-
-		if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
-					       sizeof(pt_element_t)))
-			return 0;
-
-		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
-			/*
-			 * Update spte before increasing tlbs_dirty to make
-			 * sure no tlb flush is lost after spte is zapped; see
-			 * the comments in kvm_flush_remote_tlbs().
-			 */
-			smp_wmb();
-			vcpu->kvm->tlbs_dirty++;
-			continue;
-		}
-
-		gfn = gpte_to_gfn(gpte);
-		pte_access = sp->role.access;
-		pte_access &= FNAME(gpte_access)(gpte);
-		FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
-
-		if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
-		      &nr_present))
-			continue;
-
-		if (gfn != sp->gfns[i]) {
-			drop_spte(vcpu->kvm, &sp->spt[i]);
-			/*
-			 * The same as above where we are doing
-			 * prefetch_invalid_gpte().
-			 */
-			smp_wmb();
-			vcpu->kvm->tlbs_dirty++;
-			continue;
-		}
-
-		nr_present++;
-
-		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
-
-		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
-					 pte_access, PT_PAGE_TABLE_LEVEL,
-					 gfn, spte_to_pfn(sp->spt[i]),
-					 true, false, host_writable);
-	}
-
-	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
-		kvm_flush_remote_tlbs(vcpu->kvm);
-
-	return nr_present;
-}
-
-#undef pt_element_t
-#undef guest_walker
-#undef FNAME
-#undef PT_BASE_ADDR_MASK
-#undef PT_INDEX
-#undef PT_LVL_ADDR_MASK
-#undef PT_LVL_OFFSET_MASK
-#undef PT_LEVEL_BITS
-#undef PT_MAX_FULL_LEVELS
-#undef gpte_to_gfn
-#undef gpte_to_gfn_lvl
-#undef CMPXCHG
-#undef PT_GUEST_ACCESSED_MASK
-#undef PT_GUEST_DIRTY_MASK
-#undef PT_GUEST_DIRTY_SHIFT
-#undef PT_GUEST_ACCESSED_SHIFT
-#undef PT_HAVE_ACCESSED_DIRTY
-- 
cgit v1.2.3


From b62755aed3a3f5ca9edd2718339ccea3b6bbbe57 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 17 Nov 2019 23:21:58 -0800
Subject: crypto: x86/chacha - only unregister algorithms if registered

It's not valid to call crypto_unregister_skciphers() without a prior
call to crypto_register_skciphers().

Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/chacha_glue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index b391e13a9e41..a94e30b6f941 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -304,7 +304,8 @@ static int __init chacha_simd_mod_init(void)
 
 static void __exit chacha_simd_mod_fini(void)
 {
-	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 }
 
 module_init(chacha_simd_mod_init);
-- 
cgit v1.2.3


From 3ed6751bb8fa89c3014399bb0414348499ee202a Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Thu, 21 Nov 2019 18:21:31 +0200
Subject: mips: fix build when "48 bits virtual memory" is enabled

With CONFIG_MIPS_VA_BITS_48=y the build fails miserably:

  CC      arch/mips/kernel/asm-offsets.s
In file included from arch/mips/include/asm/pgtable.h:644,
                 from include/linux/mm.h:99,
                 from arch/mips/kernel/asm-offsets.c:15:
include/asm-generic/pgtable.h:16:2: error: #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
 #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
  ^~~~~
include/asm-generic/pgtable.h:390:28: error: unknown type name 'p4d_t'; did you mean 'pmd_t'?
 static inline int p4d_same(p4d_t p4d_a, p4d_t p4d_b)
                            ^~~~~
                            pmd_t

[ ... more such errors ... ]

scripts/Makefile.build:99: recipe for target 'arch/mips/kernel/asm-offsets.s' failed
make[2]: *** [arch/mips/kernel/asm-offsets.s] Error 1

This happens because when CONFIG_MIPS_VA_BITS_48 enables 4th level of the
page tables, but neither pgtable-nop4d.h nor 5level-fixup.h are included to
cope with the 5th level.

Replace #ifdef conditions around includes of the pgtable-nop{m,u}d.h with
explicit CONFIG_PGTABLE_LEVELS and add include of 5level-fixup.h for the
case when CONFIG_PGTABLE_LEVELS==4

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: Mike Rapoport <rppt@kernel.org>
---
 arch/mips/include/asm/pgtable-64.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 93a9dce31f25..813dfe5f45a5 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -18,10 +18,12 @@
 #include <asm/fixmap.h>
 
 #define __ARCH_USE_5LEVEL_HACK
-#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
+#if CONFIG_PGTABLE_LEVELS == 2
 #include <asm-generic/pgtable-nopmd.h>
-#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48))
+#elif CONFIG_PGTABLE_LEVELS == 3
 #include <asm-generic/pgtable-nopud.h>
+#else
+#include <asm-generic/5level-fixup.h>
 #endif
 
 /*
@@ -216,6 +218,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 	return pgd_val(pgd);
 }
 
+#define pgd_phys(pgd)		virt_to_phys((void *)pgd_val(pgd))
+#define pgd_page(pgd)		(pfn_to_page(pgd_phys(pgd) >> PAGE_SHIFT))
+
 static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
 {
 	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
-- 
cgit v1.2.3


From 31168f033e3751d1c9245d9ee847d775af16dcd6 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Thu, 21 Nov 2019 18:21:32 +0200
Subject: mips: drop __pXd_offset() macros that duplicate pXd_index() ones

The __pXd_offset() macros are identical to the pXd_index() macros and there
is no point to keep both of them. All architectures define and use
pXd_index() so let's keep only those to make mips consistent with the rest
of the kernel.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: Mike Rapoport <rppt@kernel.org>
---
 arch/mips/include/asm/pgtable-32.h |  5 +----
 arch/mips/include/asm/pgtable-64.h |  4 ----
 arch/mips/kvm/mmu.c                | 24 ++++++++++++------------
 arch/mips/mm/fault.c               |  2 +-
 arch/mips/mm/init.c                |  6 +++---
 arch/mips/mm/pgtable-32.c          |  2 +-
 6 files changed, 18 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index ba967148b016..6089c5c4936c 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -196,14 +196,11 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
 
-#define __pgd_offset(address)	pgd_index(address)
-#define __pud_offset(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define __pmd_offset(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
 #define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define pmd_index(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 
 /* to find an entry in a page-table-directory */
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 813dfe5f45a5..62a9e2c31312 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -319,10 +319,6 @@ static inline void pud_clear(pud_t *pudp)
 #define pfn_pmd(pfn, prot)	__pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
 #endif
 
-#define __pgd_offset(address)	pgd_index(address)
-#define __pud_offset(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define __pmd_offset(address)	pmd_index(address)
-
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 97e538a8c1be..a62a434b256e 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -204,8 +204,8 @@ static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
 {
 	pte_t *pte;
 	unsigned long end = ~0ul;
-	int i_min = __pmd_offset(start_gpa);
-	int i_max = __pmd_offset(end_gpa);
+	int i_min = pmd_index(start_gpa);
+	int i_max = pmd_index(end_gpa);
 	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
 	int i;
 
@@ -232,8 +232,8 @@ static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
 {
 	pmd_t *pmd;
 	unsigned long end = ~0ul;
-	int i_min = __pud_offset(start_gpa);
-	int i_max = __pud_offset(end_gpa);
+	int i_min = pud_index(start_gpa);
+	int i_max = pud_index(end_gpa);
 	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
 	int i;
 
@@ -334,8 +334,8 @@ static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start,	\
 	int ret = 0;							\
 	pte_t *pte;							\
 	unsigned long cur_end = ~0ul;					\
-	int i_min = __pmd_offset(start);				\
-	int i_max = __pmd_offset(end);					\
+	int i_min = pmd_index(start);				\
+	int i_max = pmd_index(end);					\
 	int i;								\
 									\
 	for (i = i_min; i <= i_max; ++i, start = 0) {			\
@@ -357,8 +357,8 @@ static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start,	\
 	int ret = 0;							\
 	pmd_t *pmd;							\
 	unsigned long cur_end = ~0ul;					\
-	int i_min = __pud_offset(start);				\
-	int i_max = __pud_offset(end);					\
+	int i_min = pud_index(start);				\
+	int i_max = pud_index(end);					\
 	int i;								\
 									\
 	for (i = i_min; i <= i_max; ++i, start = 0) {			\
@@ -862,8 +862,8 @@ static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva,
 {
 	pte_t *pte;
 	unsigned long end = ~0ul;
-	int i_min = __pmd_offset(start_gva);
-	int i_max = __pmd_offset(end_gva);
+	int i_min = pmd_index(start_gva);
+	int i_max = pmd_index(end_gva);
 	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
 	int i;
 
@@ -890,8 +890,8 @@ static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
 {
 	pmd_t *pmd;
 	unsigned long end = ~0ul;
-	int i_min = __pud_offset(start_gva);
-	int i_max = __pud_offset(end_gva);
+	int i_min = pud_index(start_gva);
+	int i_max = pud_index(end_gva);
 	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
 	int i;
 
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index f589aa8f47d9..524af96c8131 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -292,7 +292,7 @@ vmalloc_fault:
 		 * Do _not_ use "tsk" here. We might be inside
 		 * an interrupt in the middle of a task switch..
 		 */
-		int offset = __pgd_offset(address);
+		int offset = pgd_index(address);
 		pgd_t *pgd, *pgd_k;
 		pud_t *pud, *pud_k;
 		pmd_t *pmd, *pmd_k;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 090fa653dfa9..50f9ed8c6c1b 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -239,9 +239,9 @@ void __init fixrange_init(unsigned long start, unsigned long end,
 	unsigned long vaddr;
 
 	vaddr = start;
-	i = __pgd_offset(vaddr);
-	j = __pud_offset(vaddr);
-	k = __pmd_offset(vaddr);
+	i = pgd_index(vaddr);
+	j = pud_index(vaddr);
+	k = pmd_index(vaddr);
 	pgd = pgd_base + i;
 
 	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index 6416a531a4c3..621a01fbef64 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -81,7 +81,7 @@ void __init pagetable_init(void)
 	vaddr = PKMAP_BASE;
 	fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 
-	pgd = swapper_pg_dir + __pgd_offset(vaddr);
+	pgd = swapper_pg_dir + pgd_index(vaddr);
 	pud = pud_offset(pgd, vaddr);
 	pmd = pmd_offset(pud, vaddr);
 	pte = pte_offset_kernel(pmd, vaddr);
-- 
cgit v1.2.3


From 2bee1b58484f036e3e2dc657e524d095d0d332b3 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Thu, 21 Nov 2019 18:21:33 +0200
Subject: mips: add support for folded p4d page tables

Implement primitives necessary for the 4th level folding, add walks of p4d
level where appropriate, replace 5leve-fixup.h with pgtable-nop4d.h and
drop usage of __ARCH_USE_5LEVEL_HACK.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: Mike Rapoport <rppt@kernel.org>
---
 arch/mips/include/asm/fixmap.h     |  2 +-
 arch/mips/include/asm/pgalloc.h    |  4 ++--
 arch/mips/include/asm/pgtable-32.h |  1 -
 arch/mips/include/asm/pgtable-64.h | 37 +++++++++++++++++++------------------
 arch/mips/kvm/mmu.c                | 16 ++++++++++++----
 arch/mips/kvm/trap_emul.c          |  4 +++-
 arch/mips/mm/c-r3k.c               |  4 +++-
 arch/mips/mm/c-r4k.c               |  4 +++-
 arch/mips/mm/c-tx39.c              |  4 +++-
 arch/mips/mm/fault.c               | 10 ++++++++--
 arch/mips/mm/hugetlbpage.c         | 14 ++++++++++----
 arch/mips/mm/ioremap.c             |  6 +++++-
 arch/mips/mm/pgtable-32.c          |  4 +++-
 arch/mips/mm/tlb-r4k.c             |  4 +++-
 14 files changed, 75 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h
index 6842ffafd1e7..1784d4348c36 100644
--- a/arch/mips/include/asm/fixmap.h
+++ b/arch/mips/include/asm/fixmap.h
@@ -70,7 +70,7 @@ enum fixed_addresses {
 #include <asm-generic/fixmap.h>
 
 #define kmap_get_fixmap_pte(vaddr)					\
-	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
+	pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr)), (vaddr))
 
 /*
  * Called from pgtable_init()
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 166842337eb2..fa77cb71f303 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -96,9 +96,9 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 	free_pages((unsigned long)pud, PUD_ORDER);
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
-	set_pgd(pgd, __pgd((unsigned long)pud));
+	set_p4d(p4d, __p4d((unsigned long)pud));
 }
 
 #define __pud_free_tlb(tlb, x, addr)	pud_free((tlb)->mm, x)
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index 6089c5c4936c..1945c8970141 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -16,7 +16,6 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 62a9e2c31312..f92716cfa4f4 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -17,13 +17,12 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
-#define __ARCH_USE_5LEVEL_HACK
 #if CONFIG_PGTABLE_LEVELS == 2
 #include <asm-generic/pgtable-nopmd.h>
 #elif CONFIG_PGTABLE_LEVELS == 3
 #include <asm-generic/pgtable-nopud.h>
 #else
-#include <asm-generic/5level-fixup.h>
+#include <asm-generic/pgtable-nop4d.h>
 #endif
 
 /*
@@ -188,47 +187,49 @@ extern pud_t invalid_pud_table[PTRS_PER_PUD];
 /*
  * Empty pgd entries point to the invalid_pud_table.
  */
-static inline int pgd_none(pgd_t pgd)
+static inline int p4d_none(p4d_t p4d)
 {
-	return pgd_val(pgd) == (unsigned long)invalid_pud_table;
+	return p4d_val(p4d) == (unsigned long)invalid_pud_table;
 }
 
-static inline int pgd_bad(pgd_t pgd)
+static inline int p4d_bad(p4d_t p4d)
 {
-	if (unlikely(pgd_val(pgd) & ~PAGE_MASK))
+	if (unlikely(p4d_val(p4d) & ~PAGE_MASK))
 		return 1;
 
 	return 0;
 }
 
-static inline int pgd_present(pgd_t pgd)
+static inline int p4d_present(p4d_t p4d)
 {
-	return pgd_val(pgd) != (unsigned long)invalid_pud_table;
+	return p4d_val(p4d) != (unsigned long)invalid_pud_table;
 }
 
-static inline void pgd_clear(pgd_t *pgdp)
+static inline void p4d_clear(p4d_t *p4dp)
 {
-	pgd_val(*pgdp) = (unsigned long)invalid_pud_table;
+	p4d_val(*p4dp) = (unsigned long)invalid_pud_table;
 }
 
 #define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
 {
-	return pgd_val(pgd);
+	return p4d_val(p4d);
 }
 
-#define pgd_phys(pgd)		virt_to_phys((void *)pgd_val(pgd))
-#define pgd_page(pgd)		(pfn_to_page(pgd_phys(pgd) >> PAGE_SHIFT))
+#define p4d_phys(p4d)		virt_to_phys((void *)p4d_val(p4d))
+#define p4d_page(p4d)		(pfn_to_page(p4d_phys(p4d) >> PAGE_SHIFT))
 
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+#define p4d_index(address)	(((address) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 {
-	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+	return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
 }
 
-static inline void set_pgd(pgd_t *pgd, pgd_t pgdval)
+static inline void set_p4d(p4d_t *p4d, p4d_t p4dval)
 {
-	*pgd = pgdval;
+	*p4d = p4dval;
 }
 
 #endif
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index a62a434b256e..7dad7a293eae 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -136,6 +136,7 @@ pgd_t *kvm_pgd_alloc(void)
 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 				unsigned long addr)
 {
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -145,7 +146,8 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 		BUG();
 		return NULL;
 	}
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	if (pud_none(*pud)) {
 		pmd_t *new_pmd;
 
@@ -258,6 +260,7 @@ static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
 				   unsigned long end_gpa)
 {
+	p4d_t *p4d;
 	pud_t *pud;
 	unsigned long end = ~0ul;
 	int i_min = pgd_index(start_gpa);
@@ -269,7 +272,8 @@ static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
 		if (!pgd_present(pgd[i]))
 			continue;
 
-		pud = pud_offset(pgd + i, 0);
+		p4d = p4d_offset(pgd, 0);
+		pud = pud_offset(p4d + i, 0);
 		if (i == i_max)
 			end = end_gpa;
 
@@ -378,6 +382,7 @@ static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start,	\
 				 unsigned long end)			\
 {									\
 	int ret = 0;							\
+	p4d_t *p4d;							\
 	pud_t *pud;							\
 	unsigned long cur_end = ~0ul;					\
 	int i_min = pgd_index(start);					\
@@ -388,7 +393,8 @@ static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start,	\
 		if (!pgd_present(pgd[i]))				\
 			continue;					\
 									\
-		pud = pud_offset(pgd + i, 0);				\
+		p4d = p4d_offset(pgd, 0);				\
+		pud = pud_offset(p4d + i, 0);				\
 		if (i == i_max)						\
 			cur_end = end;					\
 									\
@@ -916,6 +922,7 @@ static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
 static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
 				   unsigned long end_gva)
 {
+	p4d_t *p4d;
 	pud_t *pud;
 	unsigned long end = ~0ul;
 	int i_min = pgd_index(start_gva);
@@ -927,7 +934,8 @@ static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
 		if (!pgd_present(pgd[i]))
 			continue;
 
-		pud = pud_offset(pgd + i, 0);
+		p4d = p4d_offset(pgd, 0);
+		pud = pud_offset(p4d + i, 0);
 		if (i == i_max)
 			end = end_gva;
 
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index 73daa6ad33af..5a11e83dffe6 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -564,6 +564,7 @@ static void kvm_mips_emul_free_gva_pt(pgd_t *pgd)
 	/* Don't free host kernel page tables copied from init_mm.pgd */
 	const unsigned long end = 0x80000000;
 	unsigned long pgd_va, pud_va, pmd_va;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -576,7 +577,8 @@ static void kvm_mips_emul_free_gva_pt(pgd_t *pgd)
 		pgd_va = (unsigned long)i << PGDIR_SHIFT;
 		if (pgd_va >= end)
 			break;
-		pud = pud_offset(pgd + i, 0);
+		p4d = p4d_offset(pgd, 0);
+		pud = pud_offset(p4d + i, 0);
 		for (j = 0; j < PTRS_PER_PUD; j++) {
 			if (pud_none(pud[j]))
 				continue;
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 0ca401ddf3b7..15bb8cf59828 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -241,6 +241,7 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma,
 	int exec = vma->vm_flags & VM_EXEC;
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
@@ -253,7 +254,8 @@ static void r3k_flush_cache_page(struct vm_area_struct *vma,
 		return;
 
 	pgdp = pgd_offset(mm, addr);
-	pudp = pud_offset(pgdp, addr);
+	p4dp = p4d_offset(pgdp, addr);
+	pudp = pud_offset(p4dp, addr);
 	pmdp = pmd_offset(pudp, addr);
 	ptep = pte_offset(pmdp, addr);
 
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 9d82cb9ced55..5f3d0103b95d 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -654,6 +654,7 @@ static inline void local_r4k_flush_cache_page(void *args)
 	struct mm_struct *mm = vma->vm_mm;
 	int map_coherent = 0;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
@@ -668,7 +669,8 @@ static inline void local_r4k_flush_cache_page(void *args)
 
 	addr &= PAGE_MASK;
 	pgdp = pgd_offset(mm, addr);
-	pudp = pud_offset(pgdp, addr);
+	p4dp = p4d_offset(pgdp, addr);
+	pudp = pud_offset(p4dp, addr);
 	pmdp = pmd_offset(pudp, addr);
 	ptep = pte_offset(pmdp, addr);
 
diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c
index b7c8a9d79c35..686867270627 100644
--- a/arch/mips/mm/c-tx39.c
+++ b/arch/mips/mm/c-tx39.c
@@ -170,6 +170,7 @@ static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page
 	int exec = vma->vm_flags & VM_EXEC;
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
@@ -183,7 +184,8 @@ static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page
 
 	page &= PAGE_MASK;
 	pgdp = pgd_offset(mm, page);
-	pudp = pud_offset(pgdp, page);
+	p4dp = p4d_offset(pgdp, page);
+	pudp = pud_offset(p4dp, page);
 	pmdp = pmd_offset(pudp, page);
 	ptep = pte_offset(pmdp, page);
 
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 524af96c8131..1e8d00793784 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -294,6 +294,7 @@ vmalloc_fault:
 		 */
 		int offset = pgd_index(address);
 		pgd_t *pgd, *pgd_k;
+		p4d_t *p4d, *p4d_k;
 		pud_t *pud, *pud_k;
 		pmd_t *pmd, *pmd_k;
 		pte_t *pte_k;
@@ -305,8 +306,13 @@ vmalloc_fault:
 			goto no_context;
 		set_pgd(pgd, *pgd_k);
 
-		pud = pud_offset(pgd, address);
-		pud_k = pud_offset(pgd_k, address);
+		p4d = p4d_offset(pgd, address);
+		p4d_k = p4d_offset(pgd_k, address);
+		if (!p4d_present(*p4d_k))
+			goto no_context;
+
+		pud = pud_offset(p4d, address);
+		pud_k = pud_offset(p4d_k, address);
 		if (!pud_present(*pud_k))
 			goto no_context;
 
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
index cef152234312..77ffece9c270 100644
--- a/arch/mips/mm/hugetlbpage.c
+++ b/arch/mips/mm/hugetlbpage.c
@@ -25,11 +25,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
 		      unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pte_t *pte = NULL;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (pud)
 		pte = (pte_t *)pmd_alloc(mm, pud, addr);
 
@@ -40,14 +42,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
 		       unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd = NULL;
 
 	pgd = pgd_offset(mm, addr);
 	if (pgd_present(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (pud_present(*pud))
-			pmd = pmd_offset(pud, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (p4d_present(*p4d)) {
+			pud = pud_offset(p4d, addr);
+			if (pud_present(*pud))
+				pmd = pmd_offset(pud, addr);
+		}
 	}
 	return (pte_t *) pmd;
 }
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 1601d90b087b..8317f337a86e 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -78,11 +78,15 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
 	flush_cache_all();
 	BUG_ON(address >= end);
 	do {
+		p4d_t *p4d;
 		pud_t *pud;
 		pmd_t *pmd;
 
 		error = -ENOMEM;
-		pud = pud_alloc(&init_mm, dir, address);
+		p4d = p4d_alloc(&init_mm, dir, address);
+		if (!p4d)
+			break;
+		pud = pud_alloc(&init_mm, p4d, address);
 		if (!pud)
 			break;
 		pmd = pmd_alloc(&init_mm, pud, address);
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index 621a01fbef64..37c7a01427d2 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -56,6 +56,7 @@ void __init pagetable_init(void)
 	pgd_t *pgd_base;
 #ifdef CONFIG_HIGHMEM
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -82,7 +83,8 @@ void __init pagetable_init(void)
 	fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 
 	pgd = swapper_pg_dir + pgd_index(vaddr);
-	pud = pud_offset(pgd, vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 	pmd = pmd_offset(pud, vaddr);
 	pte = pte_offset_kernel(pmd, vaddr);
 	pkmap_page_table = pte;
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 83b450ddbbc2..d7a9d5f211f0 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -295,6 +295,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
 {
 	unsigned long flags;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
@@ -320,7 +321,8 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
 	mtc0_tlbw_hazard();
 	tlb_probe();
 	tlb_probe_hazard();
-	pudp = pud_offset(pgdp, address);
+	p4dp = p4d_offset(pgdp, address);
+	pudp = pud_offset(p4dp, address);
 	pmdp = pmd_offset(pudp, address);
 	idx = read_c0_index();
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
-- 
cgit v1.2.3


From e3d765a941f6130fd94e47b2064cfee71f4cbadd Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 19 Nov 2019 12:08:56 +0100
Subject: MIPS: SGI-IP27: Fix crash, when CPUs are disabled via nr_cpus
 parameter

If number of CPUs are limited by the kernel commandline parameter nr_cpus
assignment of interrupts accourding to numa rules might not be possibe.
As a fallback use one of the online CPUs as interrupt destination.

Fixes: 69a07a41d908 ("MIPS: SGI-IP27: rework HUB interrupts")
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/sgi-ip27/ip27-irq.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 5aef06e28a5b..c72ae330ea93 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -73,6 +73,9 @@ static void setup_hub_mask(struct hub_irq_data *hd, const struct cpumask *mask)
 	int cpu;
 
 	cpu = cpumask_first_and(mask, cpu_online_mask);
+	if (cpu >= nr_cpu_ids)
+		cpu = cpumask_any(cpu_online_mask);
+
 	nasid = cpu_to_node(cpu);
 	hd->cpu = cpu;
 	if (!cputoslice(cpu)) {
@@ -139,6 +142,7 @@ static int hub_domain_alloc(struct irq_domain *domain, unsigned int virq,
 	/* use CPU connected to nearest hub */
 	hub = hub_data(info->nasid);
 	setup_hub_mask(hd, &hub->h_cpus);
+	info->nasid = cpu_to_node(hd->cpu);
 
 	/* Make sure it's not already pending when we connect it. */
 	REMOTE_HUB_CLR_INTR(info->nasid, swlevel);
-- 
cgit v1.2.3


From 37640adbefd66491cb8083a438f7bf366ac09bc7 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 19 Nov 2019 12:08:57 +0100
Subject: MIPS: PCI: remember nasid changed by set interrupt affinity

When changing interrupt affinity remember the possible changed nasid,
otherwise an interrupt deactivate/activate sequence will incorrectly
setup interrupt.

Fixes: e6308b6d35ea ("MIPS: SGI-IP27: abstract chipset irq from bridge")
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/pci/pci-xtalk-bridge.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 72e60df505f4..05753fad70bf 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -306,16 +306,15 @@ static int bridge_set_affinity(struct irq_data *d, const struct cpumask *mask,
 	struct bridge_irq_chip_data *data = d->chip_data;
 	int bit = d->parent_data->hwirq;
 	int pin = d->hwirq;
-	nasid_t nasid;
 	int ret, cpu;
 
 	ret = irq_chip_set_affinity_parent(d, mask, force);
 	if (ret >= 0) {
 		cpu = cpumask_first_and(mask, cpu_online_mask);
-		nasid = cpu_to_node(cpu);
+		data->nasid = cpu_to_node(cpu);
 		bridge_write(data->bc, b_int_addr[pin].addr,
 			     (((data->bc->intr_addr >> 30) & 0x30000) |
-			      bit | (nasid << 8)));
+			      bit | (data->nasid << 8)));
 		bridge_read(data->bc, b_wid_tflush);
 	}
 	return ret;
-- 
cgit v1.2.3


From b02efeb056998076163083a2be3df4a60830335a Mon Sep 17 00:00:00 2001
From: Zhou Yanjie <zhouyanjie@zoho.com>
Date: Tue, 19 Nov 2019 22:28:47 +0800
Subject: MIPS: Ingenic: Disable abandoned HPTLB function.

JZ4760/JZ4770/JZ4775/X1000/X1500 has an abandoned huge page tlb,
this mode is not compatible with the MIPS standard, it will cause
tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S)
when starting the init process. write 0xa9000000 to cp0 register 5
sel 4 to disable this function to prevent getting stuck. Confirmed
by Ingenic, this operation will not adversely affect processors
without HPTLB function.

Signed-off-by: Zhou Yanjie <zhouyanjie@zoho.com>
Acked-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: ralf@linux-mips.org
Cc: jhogan@kernel.org
Cc: jiaxun.yang@flygoat.com
Cc: gregkh@linuxfoundation.org
Cc: malat@debian.org
Cc: tglx@linutronix.de
Cc: chenhc@lemote.com
---
 arch/mips/include/asm/mipsregs.h |  6 ++++++
 arch/mips/kernel/cpu-probe.c     | 21 +++++++++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index bdbdc19a2b8f..0d5a30988697 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -689,6 +689,9 @@
 #define MIPS_CONF7_IAR		(_ULCAST_(1) << 10)
 #define MIPS_CONF7_AR		(_ULCAST_(1) << 16)
 
+/* Ingenic HPTLB off bits */
+#define XBURST_PAGECTRL_HPTLB_DIS 0xa9000000
+
 /* Ingenic Config7 bits */
 #define MIPS_CONF7_BTB_LOOP_EN	(_ULCAST_(1) << 4)
 
@@ -1971,6 +1974,9 @@ do {									\
 #define read_c0_brcm_sleepcount()	__read_32bit_c0_register($22, 7)
 #define write_c0_brcm_sleepcount(val)	__write_32bit_c0_register($22, 7, val)
 
+/* Ingenic page ctrl register */
+#define write_c0_page_ctrl(val)	__write_32bit_c0_register($5, 4, val)
+
 /*
  * Macros to access the guest system control coprocessor
  */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 105d89caf256..c54332697673 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1977,13 +1977,30 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 		break;
 	}
 
+	switch (c->processor_id & PRID_COMP_MASK) {
+	/*
+	 * The config0 register in the XBurst CPUs with a processor ID of
+	 * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this
+	 * mode is not compatible with the MIPS standard, it will cause
+	 * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S)
+	 * when starting the init process. After chip reset, the default
+	 * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to
+	 * switch back to VTLB mode to prevent getting stuck.
+	 */
+	case PRID_COMP_INGENIC_D1:
+		write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS);
+		break;
 	/*
-	 * The config0 register in the Xburst CPUs with a processor ID of
+	 * The config0 register in the XBurst CPUs with a processor ID of
 	 * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible,
 	 * but they don't actually support this ISA.
 	 */
-	if ((c->processor_id & PRID_COMP_MASK) == PRID_COMP_INGENIC_D0)
+	case PRID_COMP_INGENIC_D0:
 		c->isa_level &= ~MIPS_CPU_ISA_M32R2;
+		break;
+	default:
+		break;
+	}
 }
 
 static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
-- 
cgit v1.2.3


From 24885d1d79e2e83d49201aeae0bc59f1402fd4f1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 22 Nov 2019 12:15:49 -0800
Subject: KVM: x86: Remove a spurious export of a static function

A recent change inadvertently exported a static function, which results
in modpost throwing a warning.  Fix it.

Fixes: cbbaa2727aa3 ("KVM: x86: fix presentation of TSX feature in ARCH_CAPABILITIES")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: stable@vger.kernel.org
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a256e09f321a..3e9ab2d1ea77 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1329,7 +1329,6 @@ static u64 kvm_get_arch_capabilities(void)
 
 	return data;
 }
-EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
 
 static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
 {
-- 
cgit v1.2.3


From 05c19c2fe17c331ec384a2953be50101e8a15a73 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 22 Nov 2019 12:04:50 -0800
Subject: KVM: x86: Open code shared_msr_update() in its only caller

Fold shared_msr_update() into its sole user to eliminate its pointless
bounds check, its godawful printk, its misleading comment (it's called
under a global lock), and its woefully inaccurate name.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3e9ab2d1ea77..f5db520f2ac3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -262,23 +262,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
 	}
 }
 
-static void shared_msr_update(unsigned slot, u32 msr)
-{
-	u64 value;
-	unsigned int cpu = smp_processor_id();
-	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
-
-	/* only read, and nobody should modify it at this time,
-	 * so don't need lock */
-	if (slot >= shared_msrs_global.nr) {
-		printk(KERN_ERR "kvm: invalid MSR slot!");
-		return;
-	}
-	rdmsrl_safe(msr, &value);
-	smsr->values[slot].host = value;
-	smsr->values[slot].curr = value;
-}
-
 void kvm_define_shared_msr(unsigned slot, u32 msr)
 {
 	BUG_ON(slot >= KVM_NR_SHARED_MSRS);
@@ -290,10 +273,16 @@ EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
 
 static void kvm_shared_msr_cpu_online(void)
 {
-	unsigned i;
+	unsigned int cpu = smp_processor_id();
+	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+	u64 value;
+	int i;
 
-	for (i = 0; i < shared_msrs_global.nr; ++i)
-		shared_msr_update(i, shared_msrs_global.msrs[i]);
+	for (i = 0; i < shared_msrs_global.nr; ++i) {
+		rdmsrl_safe(shared_msrs_global.msrs[i], &value);
+		smsr->values[i].host = value;
+		smsr->values[i].curr = value;
+	}
 }
 
 int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
-- 
cgit v1.2.3


From ad5996d9a0e8019c3ae5151e687939369acfe044 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 22 Nov 2019 08:58:18 -0800
Subject: KVM: x86: Grab KVM's srcu lock when setting nested state

Acquire kvm->srcu for the duration of ->set_nested_state() to fix a bug
where nVMX derefences ->memslots without holding ->srcu or ->slots_lock.

The other half of nested migration, ->get_nested_state(), does not need
to acquire ->srcu as it is a purely a dump of internal KVM (and CPU)
state to userspace.

Detected as an RCU lockdep splat that is 100% reproducible by running
KVM's state_test selftest with CONFIG_PROVE_LOCKING=y.  Note that the
failing function, kvm_is_visible_gfn(), is only checking the validity of
a gfn, it's not actually accessing guest memory (which is more or less
unsupported during vmx_set_nested_state() due to incorrect MMU state),
i.e. vmx_set_nested_state() itself isn't fundamentally broken.  In any
case, setting nested state isn't a fast path so there's no reason to go
out of our way to avoid taking ->srcu.

  =============================
  WARNING: suspicious RCU usage
  5.4.0-rc7+ #94 Not tainted
  -----------------------------
  include/linux/kvm_host.h:626 suspicious rcu_dereference_check() usage!

               other info that might help us debug this:

  rcu_scheduler_active = 2, debug_locks = 1
  1 lock held by evmcs_test/10939:
   #0: ffff88826ffcb800 (&vcpu->mutex){+.+.}, at: kvm_vcpu_ioctl+0x85/0x630 [kvm]

  stack backtrace:
  CPU: 1 PID: 10939 Comm: evmcs_test Not tainted 5.4.0-rc7+ #94
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  Call Trace:
   dump_stack+0x68/0x9b
   kvm_is_visible_gfn+0x179/0x180 [kvm]
   mmu_check_root+0x11/0x30 [kvm]
   fast_cr3_switch+0x40/0x120 [kvm]
   kvm_mmu_new_cr3+0x34/0x60 [kvm]
   nested_vmx_load_cr3+0xbd/0x1f0 [kvm_intel]
   nested_vmx_enter_non_root_mode+0xab8/0x1d60 [kvm_intel]
   vmx_set_nested_state+0x256/0x340 [kvm_intel]
   kvm_arch_vcpu_ioctl+0x491/0x11a0 [kvm]
   kvm_vcpu_ioctl+0xde/0x630 [kvm]
   do_vfs_ioctl+0xa2/0x6c0
   ksys_ioctl+0x66/0x70
   __x64_sys_ioctl+0x16/0x20
   do_syscall_64+0x54/0x200
   entry_SYSCALL_64_after_hwframe+0x49/0xbe
  RIP: 0033:0x7f59a2b95f47

Fixes: 8fcc4b5923af5 ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f5db520f2ac3..3ed167e039e5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4442,6 +4442,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	case KVM_SET_NESTED_STATE: {
 		struct kvm_nested_state __user *user_kvm_nested_state = argp;
 		struct kvm_nested_state kvm_state;
+		int idx;
 
 		r = -EINVAL;
 		if (!kvm_x86_ops->set_nested_state)
@@ -4465,7 +4466,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		    && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
 			break;
 
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
 	}
 	case KVM_GET_SUPPORTED_HV_CPUID: {
-- 
cgit v1.2.3


From 85c9aae9ac8b228f2134b56d4fc743afc446947a Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 22 Nov 2019 15:43:55 -0800
Subject: kvm: nVMX: Relax guest IA32_FEATURE_CONTROL constraints

Commit 37e4c997dadf ("KVM: VMX: validate individual bits of guest
MSR_IA32_FEATURE_CONTROL") broke the KVM_SET_MSRS ABI by instituting
new constraints on the data values that kvm would accept for the guest
MSR, IA32_FEATURE_CONTROL. Perhaps these constraints should have been
opt-in via a new KVM capability, but they were applied
indiscriminately, breaking at least one existing hypervisor.

Relax the constraints to allow either or both of
FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX and
FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX to be set when nVMX is
enabled. This change is sufficient to fix the aforementioned breakage.

Fixes: 37e4c997dadf ("KVM: VMX: validate individual bits of guest MSR_IA32_FEATURE_CONTROL")
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d39475e2d44e..d175429c91b0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7115,10 +7115,12 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 
 	if (nested_vmx_allowed(vcpu))
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+			FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
 			FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
 	else
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
-			~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+			~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
+			  FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX);
 
 	if (nested_vmx_allowed(vcpu)) {
 		nested_vmx_cr_fixed1_bits_update(vcpu);
-- 
cgit v1.2.3


From 29b261ff6fd963da2de0e436ce03fff333740f35 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 15 Oct 2019 14:09:48 +0200
Subject: MIPS: PCI: Fix fake subdevice ID for IOC3

Generation of fake subdevice ID had vendor and device ID swapped.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-rtc@vger.kernel.org
Cc: linux-serial@vger.kernel.org
---
 arch/mips/pci/pci-xtalk-bridge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 05753fad70bf..5c1a196be0c5 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -452,7 +452,7 @@ static int bridge_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	return irq;
 }
 
-#define IOC3_SID(sid)	(PCI_VENDOR_ID_SGI << 16 | (sid))
+#define IOC3_SID(sid)	(PCI_VENDOR_ID_SGI | ((sid) << 16))
 
 static void bridge_setup_ip27_baseio6g(struct bridge_controller *bc)
 {
-- 
cgit v1.2.3


From a8d0f11ee50ddbd9f243c7a8b1a393a4f23ba093 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 15 Oct 2019 14:09:51 +0200
Subject: MIPS: SGI-IP27: Enable ethernet phy on second Origin 200 module

PROM only enables ethernet PHY on first Origin 200 module, so we must
do it ourselves for the second module.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-rtc@vger.kernel.org
Cc: linux-serial@vger.kernel.org
---
 arch/mips/pci/pci-ip27.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index 45a0be40c0c3..8e26b120f994 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -7,6 +7,11 @@
  * Copyright (C) 1999, 2000, 04 Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
+#include <asm/sn/addrs.h>
+#include <asm/sn/types.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/hub.h>
+#include <asm/sn/ioc3.h>
 #include <asm/pci/bridge.h>
 
 #ifdef CONFIG_NUMA
@@ -18,3 +23,20 @@ int pcibus_to_node(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pcibus_to_node);
 #endif /* CONFIG_NUMA */
+
+static void ip29_fixup_phy(struct pci_dev *dev)
+{
+	int nasid = pcibus_to_node(dev->bus);
+	u32 sid;
+
+	if (nasid != 1)
+		return; /* only needed on second module */
+
+	/* enable ethernet PHY on IP29 systemboard */
+	pci_read_config_dword(dev, PCI_SUBSYSTEM_VENDOR_ID, &sid);
+	if (sid == (PCI_VENDOR_ID_SGI | (IOC3_SUBSYS_IP29_SYSBOARD) << 16))
+		REMOTE_HUB_S(nasid, MD_LED0, 0x09);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC3,
+			ip29_fixup_phy);
-- 
cgit v1.2.3


From c392bccf2c1075b5d2cc9022d0116a516acb721d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 20 Nov 2019 12:14:51 +1100
Subject: powerpc: Add const qual to local_read() parameter

A patch in net-next triggered a compile error on powerpc:

  include/linux/u64_stats_sync.h: In function 'u64_stats_read':
  include/asm-generic/local64.h:30:37: warning: passing argument 1 of 'local_read' discards 'const' qualifier from pointer target type

This seems reasonable to relax powerpc local_read() requirements.

Fixes: 316580b69d0a ("u64_stats: provide u64_stats_t type")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: kbuild test robot <lkp@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Tested-by: Stephen Rothwell <sfr@canb.auug.org.au> # build only
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 arch/powerpc/include/asm/local.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index fdd00939270b..bc4bd19b7fc2 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -17,7 +17,7 @@ typedef struct
 
 #define LOCAL_INIT(i)	{ (i) }
 
-static __inline__ long local_read(local_t *l)
+static __inline__ long local_read(const local_t *l)
 {
 	return READ_ONCE(l->v);
 }
-- 
cgit v1.2.3


From 4b3da77b72ad6b3c48c6fe4a395ace7db39a12c5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 22 Nov 2019 21:07:54 +0100
Subject: bpf, x86: Generalize and extend bpf_arch_text_poke for direct jumps

Add BPF_MOD_{NOP_TO_JUMP,JUMP_TO_JUMP,JUMP_TO_NOP} patching for x86
JIT in order to be able to patch direct jumps or nop them out. We need
this facility in order to patch tail call jumps and in later work also
BPF static keys.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/aa4784196a8e5e985af4b30a4fe5336bce6e9643.1574452833.git.daniel@iogearbox.net
---
 arch/x86/net/bpf_jit_comp.c | 64 ++++++++++++++++++++++++++++++++-------------
 include/linux/bpf.h         |  6 +++++
 2 files changed, 52 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2e586f579945..f438bd3b7689 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -203,8 +203,9 @@ struct jit_context {
 /* Maximum number of bytes emitted while JITing one eBPF insn */
 #define BPF_MAX_INSN_SIZE	128
 #define BPF_INSN_SAFETY		64
-/* number of bytes emit_call() needs to generate call instruction */
-#define X86_CALL_SIZE		5
+
+/* Number of bytes emit_patch() needs to generate instructions */
+#define X86_PATCH_SIZE		5
 
 #define PROLOGUE_SIZE		25
 
@@ -215,7 +216,7 @@ struct jit_context {
 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 {
 	u8 *prog = *pprog;
-	int cnt = X86_CALL_SIZE;
+	int cnt = X86_PATCH_SIZE;
 
 	/* BPF trampoline can be made to work without these nops,
 	 * but let's waste 5 bytes for now and optimize later
@@ -480,64 +481,91 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 	*pprog = prog;
 }
 
-static int emit_call(u8 **pprog, void *func, void *ip)
+static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
 {
 	u8 *prog = *pprog;
 	int cnt = 0;
 	s64 offset;
 
-	offset = func - (ip + X86_CALL_SIZE);
+	offset = func - (ip + X86_PATCH_SIZE);
 	if (!is_simm32(offset)) {
 		pr_err("Target call %p is out of range\n", func);
 		return -EINVAL;
 	}
-	EMIT1_off32(0xE8, offset);
+	EMIT1_off32(opcode, offset);
 	*pprog = prog;
 	return 0;
 }
 
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+	return emit_patch(pprog, func, ip, 0xE8);
+}
+
+static int emit_jump(u8 **pprog, void *func, void *ip)
+{
+	return emit_patch(pprog, func, ip, 0xE9);
+}
+
 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 		       void *old_addr, void *new_addr)
 {
-	u8 old_insn[X86_CALL_SIZE] = {};
-	u8 new_insn[X86_CALL_SIZE] = {};
+	int (*emit_patch_fn)(u8 **pprog, void *func, void *ip);
+	u8 old_insn[X86_PATCH_SIZE] = {};
+	u8 new_insn[X86_PATCH_SIZE] = {};
 	u8 *prog;
 	int ret;
 
 	if (!is_kernel_text((long)ip) &&
 	    !is_bpf_text_address((long)ip))
-		/* BPF trampoline in modules is not supported */
+		/* BPF poking in modules is not supported */
 		return -EINVAL;
 
+	switch (t) {
+	case BPF_MOD_NOP_TO_CALL ... BPF_MOD_CALL_TO_NOP:
+		emit_patch_fn = emit_call;
+		break;
+	case BPF_MOD_NOP_TO_JUMP ... BPF_MOD_JUMP_TO_NOP:
+		emit_patch_fn = emit_jump;
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
 	if (old_addr) {
 		prog = old_insn;
-		ret = emit_call(&prog, old_addr, (void *)ip);
+		ret = emit_patch_fn(&prog, old_addr, (void *)ip);
 		if (ret)
 			return ret;
 	}
 	if (new_addr) {
 		prog = new_insn;
-		ret = emit_call(&prog, new_addr, (void *)ip);
+		ret = emit_patch_fn(&prog, new_addr, (void *)ip);
 		if (ret)
 			return ret;
 	}
+
 	ret = -EBUSY;
 	mutex_lock(&text_mutex);
 	switch (t) {
 	case BPF_MOD_NOP_TO_CALL:
-		if (memcmp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE))
+	case BPF_MOD_NOP_TO_JUMP:
+		if (memcmp(ip, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE))
 			goto out;
-		text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
+		text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
 		break;
 	case BPF_MOD_CALL_TO_CALL:
-		if (memcmp(ip, old_insn, X86_CALL_SIZE))
+	case BPF_MOD_JUMP_TO_JUMP:
+		if (memcmp(ip, old_insn, X86_PATCH_SIZE))
 			goto out;
-		text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
+		text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
 		break;
 	case BPF_MOD_CALL_TO_NOP:
-		if (memcmp(ip, old_insn, X86_CALL_SIZE))
+	case BPF_MOD_JUMP_TO_NOP:
+		if (memcmp(ip, old_insn, X86_PATCH_SIZE))
 			goto out;
-		text_poke_bp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE, NULL);
+		text_poke_bp(ip, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE,
+			     NULL);
 		break;
 	}
 	ret = 0;
@@ -1394,7 +1422,7 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags
 		/* skip patched call instruction and point orig_call to actual
 		 * body of the kernel function.
 		 */
-		orig_call += X86_CALL_SIZE;
+		orig_call += X86_PATCH_SIZE;
 
 	prog = image;
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e89e86122233..7978b617caa8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1284,10 +1284,16 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
 #endif /* CONFIG_INET */
 
 enum bpf_text_poke_type {
+	/* All call-related pokes. */
 	BPF_MOD_NOP_TO_CALL,
 	BPF_MOD_CALL_TO_CALL,
 	BPF_MOD_CALL_TO_NOP,
+	/* All jump-related pokes. */
+	BPF_MOD_NOP_TO_JUMP,
+	BPF_MOD_JUMP_TO_JUMP,
+	BPF_MOD_JUMP_TO_NOP,
 };
+
 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 		       void *addr1, void *addr2);
 
-- 
cgit v1.2.3


From 428d5df1fa4f28daf622c48dd19da35585c9053c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 22 Nov 2019 21:08:00 +0100
Subject: bpf, x86: Emit patchable direct jump as tail call

Add initial code emission for *direct* jumps for tail call maps in
order to avoid the retpoline overhead from a493a87f38cf ("bpf, x64:
implement retpoline for tail call") for situations that allow for
it, meaning, for known constant keys at verification time which are
used as index into the tail call map. In case of Cilium which makes
heavy use of tail calls, constant keys are used in the vast majority,
only for a single occurrence we use a dynamic key.

High level outline is that if the target prog is NULL in the map, we
emit a 5-byte nop for the fall-through case and if not, we emit a
5-byte direct relative jmp to the target bpf_func + skipped prologue
offset. Later during runtime, we patch these 5-byte nop/jmps upon
tail call map update or deletions dynamically. Note that on x86-64
the direct jmp works as we reuse the same stack frame and skip
prologue (as opposed to some other JIT implementations).

One of the issues is that the tail call map slots can change at any
given time even during JITing. Therefore, we have two passes: i) emit
nops for all patchable locations during main JITing phase until we
declare prog->jited = 1 eventually. At this point the image is stable,
not public yet and with all jmps disabled. While JITing, we collect
additional info like poke->ip in order to remember the patch location
for later modifications. In ii) bpf_tail_call_direct_fixup() walks
over the progs poke_tab, locks the tail call maps poke_mutex to
prevent from parallel updates and patches in the right locations via
__bpf_arch_text_poke(). Note, the main bpf_arch_text_poke() cannot
be used at this point since we're not yet exposed to kallsyms. For
the update we use plain memcpy() since the image is not public and
still in read-write mode. After patching, we activate that poke entry
through poke->ip_stable. Meaning, at this point any tail call map
updates/deletions are not going to ignore that poke entry anymore.
Then, bpf_arch_text_poke() might still occur on the read-write image
until we finally locked it as read-only. Both modifications on the
given image are under text_mutex to avoid interference with each
other when update requests come in in parallel for different tail
call maps (current one we have locked in JIT and different one where
poke->ip_stable was already set).

Example prog:

  # ./bpftool p d x i 1655
   0: (b7) r3 = 0
   1: (18) r2 = map[id:526]
   3: (85) call bpf_tail_call#12
   4: (b7) r0 = 1
   5: (95) exit

Before:

  # ./bpftool p d j i 1655
  0xffffffffc076e55c:
   0:   nopl   0x0(%rax,%rax,1)
   5:   push   %rbp
   6:   mov    %rsp,%rbp
   9:   sub    $0x200,%rsp
  10:   push   %rbx
  11:   push   %r13
  13:   push   %r14
  15:   push   %r15
  17:   pushq  $0x0                      _
  19:   xor    %edx,%edx                |_ index (arg 3)
  1b:   movabs $0xffff88d95cc82600,%rsi |_ map (arg 2)
  25:   mov    %edx,%edx                |  index >= array->map.max_entries
  27:   cmp    %edx,0x24(%rsi)          |
  2a:   jbe    0x0000000000000066       |_
  2c:   mov    -0x224(%rbp),%eax        |  tail call limit check
  32:   cmp    $0x20,%eax               |
  35:   ja     0x0000000000000066       |
  37:   add    $0x1,%eax                |
  3a:   mov    %eax,-0x224(%rbp)        |_
  40:   mov    0xd0(%rsi,%rdx,8),%rax   |_ prog = array->ptrs[index]
  48:   test   %rax,%rax                |  prog == NULL check
  4b:   je     0x0000000000000066       |_
  4d:   mov    0x30(%rax),%rax          |  goto *(prog->bpf_func + prologue_size)
  51:   add    $0x19,%rax               |
  55:   callq  0x0000000000000061       |  retpoline for indirect jump
  5a:   pause                           |
  5c:   lfence                          |
  5f:   jmp    0x000000000000005a       |
  61:   mov    %rax,(%rsp)              |
  65:   retq                            |_
  66:   mov    $0x1,%eax
  6b:   pop    %rbx
  6c:   pop    %r15
  6e:   pop    %r14
  70:   pop    %r13
  72:   pop    %rbx
  73:   leaveq
  74:   retq

After; state after JIT:

  # ./bpftool p d j i 1655
  0xffffffffc08e8930:
   0:   nopl   0x0(%rax,%rax,1)
   5:   push   %rbp
   6:   mov    %rsp,%rbp
   9:   sub    $0x200,%rsp
  10:   push   %rbx
  11:   push   %r13
  13:   push   %r14
  15:   push   %r15
  17:   pushq  $0x0                      _
  19:   xor    %edx,%edx                |_ index (arg 3)
  1b:   movabs $0xffff9d8afd74c000,%rsi |_ map (arg 2)
  25:   mov    -0x224(%rbp),%eax        |  tail call limit check
  2b:   cmp    $0x20,%eax               |
  2e:   ja     0x000000000000003e       |
  30:   add    $0x1,%eax                |
  33:   mov    %eax,-0x224(%rbp)        |_
  39:   jmpq   0xfffffffffffd1785       |_ [direct] goto *(prog->bpf_func + prologue_size)
  3e:   mov    $0x1,%eax
  43:   pop    %rbx
  44:   pop    %r15
  46:   pop    %r14
  48:   pop    %r13
  4a:   pop    %rbx
  4b:   leaveq
  4c:   retq

After; state after map update (target prog):

  # ./bpftool p d j i 1655
  0xffffffffc08e8930:
   0:   nopl   0x0(%rax,%rax,1)
   5:   push   %rbp
   6:   mov    %rsp,%rbp
   9:   sub    $0x200,%rsp
  10:   push   %rbx
  11:   push   %r13
  13:   push   %r14
  15:   push   %r15
  17:   pushq  $0x0
  19:   xor    %edx,%edx
  1b:   movabs $0xffff9d8afd74c000,%rsi
  25:   mov    -0x224(%rbp),%eax
  2b:   cmp    $0x20,%eax               .
  2e:   ja     0x000000000000003e       .
  30:   add    $0x1,%eax                .
  33:   mov    %eax,-0x224(%rbp)        |_
  39:   jmpq   0xffffffffffb09f55       |_ goto *(prog->bpf_func + prologue_size)
  3e:   mov    $0x1,%eax
  43:   pop    %rbx
  44:   pop    %r15
  46:   pop    %r14
  48:   pop    %r13
  4a:   pop    %rbx
  4b:   leaveq
  4c:   retq

After; state after map update (no prog):

  # ./bpftool p d j i 1655
  0xffffffffc08e8930:
   0:   nopl   0x0(%rax,%rax,1)
   5:   push   %rbp
   6:   mov    %rsp,%rbp
   9:   sub    $0x200,%rsp
  10:   push   %rbx
  11:   push   %r13
  13:   push   %r14
  15:   push   %r15
  17:   pushq  $0x0
  19:   xor    %edx,%edx
  1b:   movabs $0xffff9d8afd74c000,%rsi
  25:   mov    -0x224(%rbp),%eax
  2b:   cmp    $0x20,%eax               .
  2e:   ja     0x000000000000003e       .
  30:   add    $0x1,%eax                .
  33:   mov    %eax,-0x224(%rbp)        |_
  39:   nopl   0x0(%rax,%rax,1)         |_ fall-through nop
  3e:   mov    $0x1,%eax
  43:   pop    %rbx
  44:   pop    %r15
  46:   pop    %r14
  48:   pop    %r13
  4a:   pop    %rbx
  4b:   leaveq
  4c:   retq

Nice bonus is that this also shrinks the code emission quite a bit
for every tail call invocation.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/6ada4c1c9d35eeb5f4ecfab94593dafa6b5c4b09.1574452833.git.daniel@iogearbox.net
---
 arch/x86/net/bpf_jit_comp.c | 282 +++++++++++++++++++++++++++++---------------
 1 file changed, 187 insertions(+), 95 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f438bd3b7689..15615c94804f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -239,6 +239,123 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 	*pprog = prog;
 }
 
+static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+	s64 offset;
+
+	offset = func - (ip + X86_PATCH_SIZE);
+	if (!is_simm32(offset)) {
+		pr_err("Target call %p is out of range\n", func);
+		return -ERANGE;
+	}
+	EMIT1_off32(opcode, offset);
+	*pprog = prog;
+	return 0;
+}
+
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+	return emit_patch(pprog, func, ip, 0xE8);
+}
+
+static int emit_jump(u8 **pprog, void *func, void *ip)
+{
+	return emit_patch(pprog, func, ip, 0xE9);
+}
+
+static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+				void *old_addr, void *new_addr,
+				const bool text_live)
+{
+	int (*emit_patch_fn)(u8 **pprog, void *func, void *ip);
+	const u8 *nop_insn = ideal_nops[NOP_ATOMIC5];
+	u8 old_insn[X86_PATCH_SIZE] = {};
+	u8 new_insn[X86_PATCH_SIZE] = {};
+	u8 *prog;
+	int ret;
+
+	switch (t) {
+	case BPF_MOD_NOP_TO_CALL ... BPF_MOD_CALL_TO_NOP:
+		emit_patch_fn = emit_call;
+		break;
+	case BPF_MOD_NOP_TO_JUMP ... BPF_MOD_JUMP_TO_NOP:
+		emit_patch_fn = emit_jump;
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	switch (t) {
+	case BPF_MOD_NOP_TO_CALL:
+	case BPF_MOD_NOP_TO_JUMP:
+		if (!old_addr && new_addr) {
+			memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
+
+			prog = new_insn;
+			ret = emit_patch_fn(&prog, new_addr, ip);
+			if (ret)
+				return ret;
+			break;
+		}
+		return -ENXIO;
+	case BPF_MOD_CALL_TO_CALL:
+	case BPF_MOD_JUMP_TO_JUMP:
+		if (old_addr && new_addr) {
+			prog = old_insn;
+			ret = emit_patch_fn(&prog, old_addr, ip);
+			if (ret)
+				return ret;
+
+			prog = new_insn;
+			ret = emit_patch_fn(&prog, new_addr, ip);
+			if (ret)
+				return ret;
+			break;
+		}
+		return -ENXIO;
+	case BPF_MOD_CALL_TO_NOP:
+	case BPF_MOD_JUMP_TO_NOP:
+		if (old_addr && !new_addr) {
+			memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
+
+			prog = old_insn;
+			ret = emit_patch_fn(&prog, old_addr, ip);
+			if (ret)
+				return ret;
+			break;
+		}
+		return -ENXIO;
+	default:
+		return -ENOTSUPP;
+	}
+
+	ret = -EBUSY;
+	mutex_lock(&text_mutex);
+	if (memcmp(ip, old_insn, X86_PATCH_SIZE))
+		goto out;
+	if (text_live)
+		text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+	else
+		memcpy(ip, new_insn, X86_PATCH_SIZE);
+	ret = 0;
+out:
+	mutex_unlock(&text_mutex);
+	return ret;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+		       void *old_addr, void *new_addr)
+{
+	if (!is_kernel_text((long)ip) &&
+	    !is_bpf_text_address((long)ip))
+		/* BPF poking in modules is not supported */
+		return -EINVAL;
+
+	return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+}
+
 /*
  * Generate the following code:
  *
@@ -253,7 +370,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
  *   goto *(prog->bpf_func + prologue_size);
  * out:
  */
-static void emit_bpf_tail_call(u8 **pprog)
+static void emit_bpf_tail_call_indirect(u8 **pprog)
 {
 	u8 *prog = *pprog;
 	int label1, label2, label3;
@@ -320,6 +437,69 @@ static void emit_bpf_tail_call(u8 **pprog)
 	*pprog = prog;
 }
 
+static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+				      u8 **pprog, int addr, u8 *image)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	/*
+	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+	 *	goto out;
+	 */
+	EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);         /* cmp eax, MAX_TAIL_CALL_CNT */
+	EMIT2(X86_JA, 14);                            /* ja out */
+	EMIT3(0x83, 0xC0, 0x01);                      /* add eax, 1 */
+	EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+
+	poke->ip = image + (addr - X86_PATCH_SIZE);
+	poke->adj_off = PROLOGUE_SIZE;
+
+	memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+	prog += X86_PATCH_SIZE;
+	/* out: */
+
+	*pprog = prog;
+}
+
+static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
+{
+	static const enum bpf_text_poke_type type = BPF_MOD_NOP_TO_JUMP;
+	struct bpf_jit_poke_descriptor *poke;
+	struct bpf_array *array;
+	struct bpf_prog *target;
+	int i, ret;
+
+	for (i = 0; i < prog->aux->size_poke_tab; i++) {
+		poke = &prog->aux->poke_tab[i];
+		WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
+
+		if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
+			continue;
+
+		array = container_of(poke->tail_call.map, struct bpf_array, map);
+		mutex_lock(&array->aux->poke_mutex);
+		target = array->ptrs[poke->tail_call.key];
+		if (target) {
+			/* Plain memcpy is used when image is not live yet
+			 * and still not locked as read-only. Once poke
+			 * location is active (poke->ip_stable), any parallel
+			 * bpf_arch_text_poke() might occur still on the
+			 * read-write image until we finally locked it as
+			 * read-only. Both modifications on the given image
+			 * are under text_mutex to avoid interference.
+			 */
+			ret = __bpf_arch_text_poke(poke->ip, type, NULL,
+						   (u8 *)target->bpf_func +
+						   poke->adj_off, false);
+			BUG_ON(ret < 0);
+		}
+		WRITE_ONCE(poke->ip_stable, true);
+		mutex_unlock(&array->aux->poke_mutex);
+	}
+}
+
 static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
 			   u32 dst_reg, const u32 imm32)
 {
@@ -481,99 +661,6 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 	*pprog = prog;
 }
 
-static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
-{
-	u8 *prog = *pprog;
-	int cnt = 0;
-	s64 offset;
-
-	offset = func - (ip + X86_PATCH_SIZE);
-	if (!is_simm32(offset)) {
-		pr_err("Target call %p is out of range\n", func);
-		return -EINVAL;
-	}
-	EMIT1_off32(opcode, offset);
-	*pprog = prog;
-	return 0;
-}
-
-static int emit_call(u8 **pprog, void *func, void *ip)
-{
-	return emit_patch(pprog, func, ip, 0xE8);
-}
-
-static int emit_jump(u8 **pprog, void *func, void *ip)
-{
-	return emit_patch(pprog, func, ip, 0xE9);
-}
-
-int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
-		       void *old_addr, void *new_addr)
-{
-	int (*emit_patch_fn)(u8 **pprog, void *func, void *ip);
-	u8 old_insn[X86_PATCH_SIZE] = {};
-	u8 new_insn[X86_PATCH_SIZE] = {};
-	u8 *prog;
-	int ret;
-
-	if (!is_kernel_text((long)ip) &&
-	    !is_bpf_text_address((long)ip))
-		/* BPF poking in modules is not supported */
-		return -EINVAL;
-
-	switch (t) {
-	case BPF_MOD_NOP_TO_CALL ... BPF_MOD_CALL_TO_NOP:
-		emit_patch_fn = emit_call;
-		break;
-	case BPF_MOD_NOP_TO_JUMP ... BPF_MOD_JUMP_TO_NOP:
-		emit_patch_fn = emit_jump;
-		break;
-	default:
-		return -ENOTSUPP;
-	}
-
-	if (old_addr) {
-		prog = old_insn;
-		ret = emit_patch_fn(&prog, old_addr, (void *)ip);
-		if (ret)
-			return ret;
-	}
-	if (new_addr) {
-		prog = new_insn;
-		ret = emit_patch_fn(&prog, new_addr, (void *)ip);
-		if (ret)
-			return ret;
-	}
-
-	ret = -EBUSY;
-	mutex_lock(&text_mutex);
-	switch (t) {
-	case BPF_MOD_NOP_TO_CALL:
-	case BPF_MOD_NOP_TO_JUMP:
-		if (memcmp(ip, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE))
-			goto out;
-		text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
-		break;
-	case BPF_MOD_CALL_TO_CALL:
-	case BPF_MOD_JUMP_TO_JUMP:
-		if (memcmp(ip, old_insn, X86_PATCH_SIZE))
-			goto out;
-		text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
-		break;
-	case BPF_MOD_CALL_TO_NOP:
-	case BPF_MOD_JUMP_TO_NOP:
-		if (memcmp(ip, old_insn, X86_PATCH_SIZE))
-			goto out;
-		text_poke_bp(ip, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE,
-			     NULL);
-		break;
-	}
-	ret = 0;
-out:
-	mutex_unlock(&text_mutex);
-	return ret;
-}
-
 static bool ex_handler_bpf(const struct exception_table_entry *x,
 			   struct pt_regs *regs, int trapnr,
 			   unsigned long error_code, unsigned long fault_addr)
@@ -1041,7 +1128,11 @@ xadd:			if (is_imm8(insn->off))
 			break;
 
 		case BPF_JMP | BPF_TAIL_CALL:
-			emit_bpf_tail_call(&prog);
+			if (imm32)
+				emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+							  &prog, addrs[i], image);
+			else
+				emit_bpf_tail_call_indirect(&prog);
 			break;
 
 			/* cond jump */
@@ -1599,6 +1690,7 @@ out_image:
 
 	if (image) {
 		if (!prog->is_func || extra_pass) {
+			bpf_tail_call_direct_fixup(prog);
 			bpf_jit_binary_lock_ro(header);
 		} else {
 			jit_data->addrs = addrs;
-- 
cgit v1.2.3


From b553a6ec570044fc1ae300c6fb24f9ce204c5894 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 24 Nov 2019 01:39:42 +0100
Subject: bpf: Simplify __bpf_arch_text_poke poke type handling

Given that we have BPF_MOD_NOP_TO_{CALL,JUMP}, BPF_MOD_{CALL,JUMP}_TO_NOP
and BPF_MOD_{CALL,JUMP}_TO_{CALL,JUMP} poke types and that we also pass in
old_addr as well as new_addr, it's a bit redundant and unnecessarily
complicates __bpf_arch_text_poke() itself since we can derive the same from
the *_addr that were passed in. Hence simplify and use BPF_MOD_{CALL,JUMP}
as types which also allows to clean up call-sites.

In addition to that, __bpf_arch_text_poke() currently verifies that text
matches expected old_insn before we invoke text_poke_bp(). Also add a check
on new_insn and skip rewrite if it already matches. Reason why this is rather
useful is that it avoids making any special casing in prog_array_map_poke_run()
when old and new prog were NULL and has the benefit that also for this case
we perform a check on text whether it really matches our expectations.

Suggested-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/fcb00a2b0b288d6c73de4ef58116a821c8fe8f2f.1574555798.git.daniel@iogearbox.net
---
 arch/x86/net/bpf_jit_comp.c | 85 +++++++++++++--------------------------------
 include/linux/bpf.h         | 10 ++----
 kernel/bpf/arraymap.c       | 12 +------
 kernel/bpf/trampoline.c     |  8 ++---
 4 files changed, 32 insertions(+), 83 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 15615c94804f..b8be18427277 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -269,76 +269,42 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 				void *old_addr, void *new_addr,
 				const bool text_live)
 {
-	int (*emit_patch_fn)(u8 **pprog, void *func, void *ip);
 	const u8 *nop_insn = ideal_nops[NOP_ATOMIC5];
-	u8 old_insn[X86_PATCH_SIZE] = {};
-	u8 new_insn[X86_PATCH_SIZE] = {};
+	u8 old_insn[X86_PATCH_SIZE];
+	u8 new_insn[X86_PATCH_SIZE];
 	u8 *prog;
 	int ret;
 
-	switch (t) {
-	case BPF_MOD_NOP_TO_CALL ... BPF_MOD_CALL_TO_NOP:
-		emit_patch_fn = emit_call;
-		break;
-	case BPF_MOD_NOP_TO_JUMP ... BPF_MOD_JUMP_TO_NOP:
-		emit_patch_fn = emit_jump;
-		break;
-	default:
-		return -ENOTSUPP;
+	memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
+	if (old_addr) {
+		prog = old_insn;
+		ret = t == BPF_MOD_CALL ?
+		      emit_call(&prog, old_addr, ip) :
+		      emit_jump(&prog, old_addr, ip);
+		if (ret)
+			return ret;
 	}
 
-	switch (t) {
-	case BPF_MOD_NOP_TO_CALL:
-	case BPF_MOD_NOP_TO_JUMP:
-		if (!old_addr && new_addr) {
-			memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
-
-			prog = new_insn;
-			ret = emit_patch_fn(&prog, new_addr, ip);
-			if (ret)
-				return ret;
-			break;
-		}
-		return -ENXIO;
-	case BPF_MOD_CALL_TO_CALL:
-	case BPF_MOD_JUMP_TO_JUMP:
-		if (old_addr && new_addr) {
-			prog = old_insn;
-			ret = emit_patch_fn(&prog, old_addr, ip);
-			if (ret)
-				return ret;
-
-			prog = new_insn;
-			ret = emit_patch_fn(&prog, new_addr, ip);
-			if (ret)
-				return ret;
-			break;
-		}
-		return -ENXIO;
-	case BPF_MOD_CALL_TO_NOP:
-	case BPF_MOD_JUMP_TO_NOP:
-		if (old_addr && !new_addr) {
-			memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
-
-			prog = old_insn;
-			ret = emit_patch_fn(&prog, old_addr, ip);
-			if (ret)
-				return ret;
-			break;
-		}
-		return -ENXIO;
-	default:
-		return -ENOTSUPP;
+	memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
+	if (new_addr) {
+		prog = new_insn;
+		ret = t == BPF_MOD_CALL ?
+		      emit_call(&prog, new_addr, ip) :
+		      emit_jump(&prog, new_addr, ip);
+		if (ret)
+			return ret;
 	}
 
 	ret = -EBUSY;
 	mutex_lock(&text_mutex);
 	if (memcmp(ip, old_insn, X86_PATCH_SIZE))
 		goto out;
-	if (text_live)
-		text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
-	else
-		memcpy(ip, new_insn, X86_PATCH_SIZE);
+	if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
+		if (text_live)
+			text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+		else
+			memcpy(ip, new_insn, X86_PATCH_SIZE);
+	}
 	ret = 0;
 out:
 	mutex_unlock(&text_mutex);
@@ -465,7 +431,6 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
 
 static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
 {
-	static const enum bpf_text_poke_type type = BPF_MOD_NOP_TO_JUMP;
 	struct bpf_jit_poke_descriptor *poke;
 	struct bpf_array *array;
 	struct bpf_prog *target;
@@ -490,7 +455,7 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
 			 * read-only. Both modifications on the given image
 			 * are under text_mutex to avoid interference.
 			 */
-			ret = __bpf_arch_text_poke(poke->ip, type, NULL,
+			ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL,
 						   (u8 *)target->bpf_func +
 						   poke->adj_off, false);
 			BUG_ON(ret < 0);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c2f07fd410c1..35903f148be5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1324,14 +1324,8 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
 #endif /* CONFIG_INET */
 
 enum bpf_text_poke_type {
-	/* All call-related pokes. */
-	BPF_MOD_NOP_TO_CALL,
-	BPF_MOD_CALL_TO_CALL,
-	BPF_MOD_CALL_TO_NOP,
-	/* All jump-related pokes. */
-	BPF_MOD_NOP_TO_JUMP,
-	BPF_MOD_JUMP_TO_JUMP,
-	BPF_MOD_JUMP_TO_NOP,
+	BPF_MOD_CALL,
+	BPF_MOD_JUMP,
 };
 
 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 58bdf5fd24cc..f0d19bbb9211 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -746,19 +746,9 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
 				    struct bpf_prog *old,
 				    struct bpf_prog *new)
 {
-	enum bpf_text_poke_type type;
 	struct prog_poke_elem *elem;
 	struct bpf_array_aux *aux;
 
-	if (!old && new)
-		type = BPF_MOD_NOP_TO_JUMP;
-	else if (old && !new)
-		type = BPF_MOD_JUMP_TO_NOP;
-	else if (old && new)
-		type = BPF_MOD_JUMP_TO_JUMP;
-	else
-		return;
-
 	aux = container_of(map, struct bpf_array, map)->aux;
 	WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
 
@@ -806,7 +796,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
 			    poke->tail_call.key != key)
 				continue;
 
-			ret = bpf_arch_text_poke(poke->ip, type,
+			ret = bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP,
 						 old ? (u8 *)old->bpf_func +
 						 poke->adj_off : NULL,
 						 new ? (u8 *)new->bpf_func +
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 10ae59d65f13..7e89f1f49d77 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -77,7 +77,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
 	int err;
 
 	if (fentry_cnt + fexit_cnt == 0) {
-		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_NOP,
+		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL,
 					 old_image, NULL);
 		tr->selector = 0;
 		goto out;
@@ -105,12 +105,12 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
 
 	if (tr->selector)
 		/* progs already running at this address */
-		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_CALL,
+		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL,
 					 old_image, new_image);
 	else
 		/* first time registering */
-		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP_TO_CALL,
-					 NULL, new_image);
+		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL, NULL,
+					 new_image);
 	if (err)
 		goto out;
 	tr->selector++;
-- 
cgit v1.2.3