From 12997d1a999cd1b22e21a238c96780f2a55e4e13 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 18 Nov 2013 11:00:29 -0700
Subject: Revert "workqueue: allow work_on_cpu() to be called recursively"

This reverts commit c2fda509667b0fda4372a237f5a59ea4570b1627.

c2fda509667b removed lockdep annotation from work_on_cpu() to work around
the PCI path that calls work_on_cpu() from within a work_on_cpu() work item
(PF driver .probe() method -> pci_enable_sriov() -> add VFs -> VF driver
.probe method).

961da7fb6b22 ("PCI: Avoid unnecessary CPU switch when calling driver
.probe() method) avoids that recursive work_on_cpu() use in a different
way, so this revert restores the work_on_cpu() lockdep annotation.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 987293d03ebc..5690b8eabfbc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2840,19 +2840,6 @@ already_gone:
 	return false;
 }
 
-static bool __flush_work(struct work_struct *work)
-{
-	struct wq_barrier barr;
-
-	if (start_flush_work(work, &barr)) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
-		return true;
-	} else {
-		return false;
-	}
-}
-
 /**
  * flush_work - wait for a work to finish executing the last queueing instance
  * @work: the work to flush
@@ -2866,10 +2853,18 @@ static bool __flush_work(struct work_struct *work)
  */
 bool flush_work(struct work_struct *work)
 {
+	struct wq_barrier barr;
+
 	lock_map_acquire(&work->lockdep_map);
 	lock_map_release(&work->lockdep_map);
 
-	return __flush_work(work);
+	if (start_flush_work(work, &barr)) {
+		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+		return true;
+	} else {
+		return false;
+	}
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
@@ -4814,14 +4809,7 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 
 	INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
 	schedule_work_on(cpu, &wfc.work);
-
-	/*
-	 * The work item is on-stack and can't lead to deadlock through
-	 * flushing.  Use __flush_work() to avoid spurious lockdep warnings
-	 * when work_on_cpu()s are nested.
-	 */
-	__flush_work(&wfc.work);
-
+	flush_work(&wfc.work);
 	return wfc.ret;
 }
 EXPORT_SYMBOL_GPL(work_on_cpu);
-- 
cgit v1.2.3


From 4fc9bbf98fd66f879e628d8537ba7c240be2b58e Mon Sep 17 00:00:00 2001
From: Khalid Aziz <khalid.aziz@oracle.com>
Date: Wed, 27 Nov 2013 15:19:25 -0700
Subject: PCI: Disable Bus Master only on kexec reboot

Add a flag to tell the PCI subsystem that kernel is shutting down in
preparation to kexec a kernel.  Add code in PCI subsystem to use this flag
to clear Bus Master bit on PCI devices only in case of kexec reboot.

This fixes a power-off problem on Acer Aspire V5-573G and likely other
machines and avoids any other issues caused by clearing Bus Master bit on
PCI devices in normal shutdown path.  The problem was introduced by
b566a22c2332 ("PCI: disable Bus Master on PCI device shutdown").

This patch is based on discussion at
http://marc.info/?l=linux-pci&m=138425645204355&w=2

Link: https://bugzilla.kernel.org/show_bug.cgi?id=63861
Reported-by: Chang Liu <cl91tp@gmail.com>
Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: stable@vger.kernel.org	# v3.5+
---
 drivers/pci/pci-driver.c | 12 +++++++++---
 include/linux/kexec.h    |  3 +++
 kernel/kexec.c           |  4 ++++
 3 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 7edd5c307446..25f0bc659164 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -19,6 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/pm_runtime.h>
 #include <linux/suspend.h>
+#include <linux/kexec.h>
 #include "pci.h"
 
 struct pci_dynid {
@@ -415,12 +416,17 @@ static void pci_device_shutdown(struct device *dev)
 	pci_msi_shutdown(pci_dev);
 	pci_msix_shutdown(pci_dev);
 
+#ifdef CONFIG_KEXEC
 	/*
-	 * Turn off Bus Master bit on the device to tell it to not
-	 * continue to do DMA. Don't touch devices in D3cold or unknown states.
+	 * If this is a kexec reboot, turn off Bus Master bit on the
+	 * device to tell it to not continue to do DMA. Don't touch
+	 * devices in D3cold or unknown states.
+	 * If it is not a kexec reboot, firmware will hit the PCI
+	 * devices with big hammer and stop their DMA any way.
 	 */
-	if (pci_dev->current_state <= PCI_D3hot)
+	if (kexec_in_progress && (pci_dev->current_state <= PCI_D3hot))
 		pci_clear_master(pci_dev);
+#endif
 }
 
 #ifdef CONFIG_PM
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d78d28a733b1..5fd33dc1fe3a 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -198,6 +198,9 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
+/* flag to track if kexec reboot is in progress */
+extern bool kexec_in_progress;
+
 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
 		unsigned long long *crash_size, unsigned long long *crash_base);
 int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 490afc03627e..d0d8fca54065 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -47,6 +47,9 @@ u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 size_t vmcoreinfo_size;
 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
 
+/* Flag to indicate we are going to kexec a new kernel */
+bool kexec_in_progress = false;
+
 /* Location of the reserved area for the crash kernel */
 struct resource crashk_res = {
 	.name  = "Crash kernel",
@@ -1675,6 +1678,7 @@ int kernel_kexec(void)
 	} else
 #endif
 	{
+		kexec_in_progress = true;
 		kernel_restart_prepare(NULL);
 		printk(KERN_EMERG "Starting new kernel\n");
 		machine_shutdown();
-- 
cgit v1.2.3


From 7cfe5b3310a1b45f385ff18647bddb487a6c5525 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 10 Dec 2013 17:42:50 +1030
Subject: Ignore generated file kernel/x509_certificate_list

$ git status
# On branch pending-rebases
# Untracked files:
#   (use "git add <file>..." to include in what will be committed)
#
#	kernel/x509_certificate_list
nothing added to commit but untracked files present (use "git add" to track)
$

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 kernel/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/.gitignore b/kernel/.gitignore
index b3097bde4e9c..790d83c7d160 100644
--- a/kernel/.gitignore
+++ b/kernel/.gitignore
@@ -5,3 +5,4 @@ config_data.h
 config_data.gz
 timeconst.h
 hz.bc
+x509_certificate_list
-- 
cgit v1.2.3


From 62226983da070f7e51068ec2e3a4da34672964c7 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 5 Dec 2013 14:48:22 +0100
Subject: KEYS: correct alignment of system_certificate_list content in
 assembly file

Apart from data-type specific alignment constraints, there are also
architecture-specific alignment requirements.
For example, on s390 symbols must be on even addresses implying a 2-byte
alignment.  If the system_certificate_list_end symbol is on an odd address
and if this address is loaded, the least-significant bit is ignored.  As a
result, the load_system_certificate_list() fails to load the certificates
because of a wrong certificate length calculation.

To be safe, align system_certificate_list on an 8-byte boundary.  Also improve
the length calculation of the system_certificate_list content.  Introduce a
system_certificate_list_size (8-byte aligned because of unsigned long) variable
that stores the length.  Let the linker calculate this size by introducing
a start and end label for the certificate content.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 kernel/system_certificates.S | 14 ++++++++++++--
 kernel/system_keyring.c      |  4 ++--
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S
index 4aef390671cb..3e9868d47535 100644
--- a/kernel/system_certificates.S
+++ b/kernel/system_certificates.S
@@ -3,8 +3,18 @@
 
 	__INITRODATA
 
+	.align 8
 	.globl VMLINUX_SYMBOL(system_certificate_list)
 VMLINUX_SYMBOL(system_certificate_list):
+__cert_list_start:
 	.incbin "kernel/x509_certificate_list"
-	.globl VMLINUX_SYMBOL(system_certificate_list_end)
-VMLINUX_SYMBOL(system_certificate_list_end):
+__cert_list_end:
+
+	.align 8
+	.globl VMLINUX_SYMBOL(system_certificate_list_size)
+VMLINUX_SYMBOL(system_certificate_list_size):
+#ifdef CONFIG_64BIT
+	.quad __cert_list_end - __cert_list_start
+#else
+	.long __cert_list_end - __cert_list_start
+#endif
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
index 564dd93430a2..52ebc70263f4 100644
--- a/kernel/system_keyring.c
+++ b/kernel/system_keyring.c
@@ -22,7 +22,7 @@ struct key *system_trusted_keyring;
 EXPORT_SYMBOL_GPL(system_trusted_keyring);
 
 extern __initconst const u8 system_certificate_list[];
-extern __initconst const u8 system_certificate_list_end[];
+extern __initconst const unsigned long system_certificate_list_size;
 
 /*
  * Load the compiled-in keys
@@ -60,8 +60,8 @@ static __init int load_system_certificate_list(void)
 
 	pr_notice("Loading compiled-in X.509 certificates\n");
 
-	end = system_certificate_list_end;
 	p = system_certificate_list;
+	end = p + system_certificate_list_size;
 	while (p < end) {
 		/* Each cert begins with an ASN.1 SEQUENCE tag and must be more
 		 * than 256 bytes in size.
-- 
cgit v1.2.3


From f12d5bfceb7e1f9051563381ec047f7f13956c3c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 12 Dec 2013 09:38:42 -0800
Subject: futex: fix handling of read-only-mapped hugepages

The hugepage code had the exact same bug that regular pages had in
commit 7485d0d3758e ("futexes: Remove rw parameter from
get_futex_key()").

The regular page case was fixed by commit 9ea71503a8ed ("futex: Fix
regression with read only mappings"), but the transparent hugepage case
(added in a5b338f2b0b1: "thp: update futex compound knowledge") case
remained broken.

Found by Dave Jones and his trinity tool.

Reported-and-tested-by: Dave Jones <davej@fedoraproject.org>
Cc: stable@kernel.org # v2.6.38+
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Darren Hart <dvhart@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/futex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index 80ba086f021d..02febad00794 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -288,7 +288,7 @@ again:
 		put_page(page);
 		/* serialize against __split_huge_page_splitting() */
 		local_irq_disable();
-		if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
+		if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
 			page_head = compound_head(page);
 			/*
 			 * page_head is valid pointer but we must pin
-- 
cgit v1.2.3


From 5cdec2d833748fbd27d3682f7209225c504c79c5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 12 Dec 2013 09:53:51 -0800
Subject: futex: move user address verification up to common code

When debugging the read-only hugepage case, I was confused by the fact
that get_futex_key() did an access_ok() only for the non-shared futex
case, since the user address checking really isn't in any way specific
to the private key handling.

Now, it turns out that the shared key handling does effectively do the
equivalent checks inside get_user_pages_fast() (it doesn't actually
check the address range on x86, but does check the page protections for
being a user page).  So it wasn't actually a bug, but the fact that we
treat the address differently for private and shared futexes threw me
for a loop.

Just move the check up, so that it gets done for both cases.  Also, use
the 'rw' parameter for the type, even if it doesn't actually matter any
more (it's a historical artifact of the old racy i386 "page faults from
kernel space don't check write protections").

Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/futex.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index 02febad00794..f6ff0191ecf7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -251,6 +251,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 		return -EINVAL;
 	address -= key->both.offset;
 
+	if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
+		return -EFAULT;
+
 	/*
 	 * PROCESS_PRIVATE futexes are fast.
 	 * As the mm cannot disappear under us and the 'key' only needs
@@ -259,8 +262,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	 *        but access_ok() should be faster than find_vma()
 	 */
 	if (!fshared) {
-		if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
-			return -EFAULT;
 		key->private.mm = mm;
 		key->private.address = address;
 		get_futex_key_refs(key);
-- 
cgit v1.2.3