From a2b4a79b88b24c49d98d45a06a014ffd22ada1a4 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Sun, 10 Sep 2017 20:29:45 +0300
Subject: spi: uapi: spidev: add missing ioctl header

The SPI_IOC_MESSAGE() macro references _IOC_SIZEBITS. Add linux/ioctl.h
to make sure this macro is defined. This fixes the following build
failure of lcdproc with the musl libc:

In file included from .../sysroot/usr/include/sys/ioctl.h:7:0,
                 from hd44780-spi.c:31:
hd44780-spi.c: In function 'spi_transfer':
hd44780-spi.c:89:24: error: '_IOC_SIZEBITS' undeclared (first use in this function)
  status = ioctl(p->fd, SPI_IOC_MESSAGE(1), &xfer);
                        ^

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 include/uapi/linux/spi/spidev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h
index dd5f21e75805..856de39d0b89 100644
--- a/include/uapi/linux/spi/spidev.h
+++ b/include/uapi/linux/spi/spidev.h
@@ -23,6 +23,7 @@
 #define SPIDEV_H
 
 #include <linux/types.h>
+#include <linux/ioctl.h>
 
 /* User space versions of kernel symbols for SPI clocking modes,
  * matching <linux/spi/spi.h>
-- 
cgit v1.2.3


From bd7a3fe770ebd8391d1c7d072ff88e9e76d063eb Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 19 Sep 2017 15:07:17 +0200
Subject: USB: fix out-of-bounds in usb_set_configuration

Andrey Konovalov reported a possible out-of-bounds problem for a USB interface
association descriptor.  He writes:
	It seems there's no proper size check of a USB_DT_INTERFACE_ASSOCIATION
	descriptor. It's only checked that the size is >= 2 in
	usb_parse_configuration(), so find_iad() might do out-of-bounds access
	to intf_assoc->bInterfaceCount.

And he's right, we don't check for crazy descriptors of this type very well, so
resolve this problem.  Yet another issue found by syzkaller...

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/config.c    | 14 +++++++++++---
 include/uapi/linux/usb/ch9.h |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 854c8d66cfbe..68b54bd88d1e 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -643,15 +643,23 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 
 		} else if (header->bDescriptorType ==
 				USB_DT_INTERFACE_ASSOCIATION) {
+			struct usb_interface_assoc_descriptor *d;
+
+			d = (struct usb_interface_assoc_descriptor *)header;
+			if (d->bLength < USB_DT_INTERFACE_ASSOCIATION_SIZE) {
+				dev_warn(ddev,
+					 "config %d has an invalid interface association descriptor of length %d, skipping\n",
+					 cfgno, d->bLength);
+				continue;
+			}
+
 			if (iad_num == USB_MAXIADS) {
 				dev_warn(ddev, "found more Interface "
 					       "Association Descriptors "
 					       "than allocated for in "
 					       "configuration %d\n", cfgno);
 			} else {
-				config->intf_assoc[iad_num] =
-					(struct usb_interface_assoc_descriptor
-					*)header;
+				config->intf_assoc[iad_num] = d;
 				iad_num++;
 			}
 
diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h
index ce1169af39d7..2a5d63040a0b 100644
--- a/include/uapi/linux/usb/ch9.h
+++ b/include/uapi/linux/usb/ch9.h
@@ -780,6 +780,7 @@ struct usb_interface_assoc_descriptor {
 	__u8  iFunction;
 } __attribute__ ((packed));
 
+#define USB_DT_INTERFACE_ASSOCIATION_SIZE	8
 
 /*-------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From 62e082430ea4bb5b28909ca4375bb683931e22aa Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 20 Sep 2017 07:29:49 -0400
Subject: dm ioctl: fix alignment of event number in the device list

The size of struct dm_name_list is different on 32-bit and 64-bit
kernels (so "(nl + 1)" differs between 32-bit and 64-bit kernels).

This mismatch caused some harmless difference in padding when using 32-bit
or 64-bit kernel. Commit 23d70c5e52dd ("dm ioctl: report event number in
DM_LIST_DEVICES") added reporting event number in the output of
DM_LIST_DEVICES_CMD. This difference in padding makes it impossible for
userspace to determine the location of the event number (the location
would be different when running on 32-bit and 64-bit kernels).

Fix the padding by using offsetof(struct dm_name_list, name) instead of
sizeof(struct dm_name_list) to determine the location of entries.

Also, the ioctl version number is incremented to 37 so that userspace
can use the version number to determine that the event number is present
and correctly located.

In addition, a global event is now raised when a DM device is created,
removed, renamed or when table is swapped, so that the user can monitor
for device changes.

Reported-by: Eugene Syromiatnikov <esyr@redhat.com>
Fixes: 23d70c5e52dd ("dm ioctl: report event number in DM_LIST_DEVICES")
Cc: stable@vger.kernel.org # 4.13
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-core.h          |  1 +
 drivers/md/dm-ioctl.c         | 37 ++++++++++++++++++++++++-------------
 drivers/md/dm.c               | 10 ++++++++--
 include/uapi/linux/dm-ioctl.h |  4 ++--
 4 files changed, 35 insertions(+), 17 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 24eddbdf2ab4..203144762f36 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -149,5 +149,6 @@ static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen
 
 extern atomic_t dm_global_event_nr;
 extern wait_queue_head_t dm_global_eventq;
+void dm_issue_global_event(void);
 
 #endif
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 8756a6850431..e52676fa9832 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -477,9 +477,13 @@ static int remove_all(struct file *filp, struct dm_ioctl *param, size_t param_si
  * Round up the ptr to an 8-byte boundary.
  */
 #define ALIGN_MASK 7
+static inline size_t align_val(size_t val)
+{
+	return (val + ALIGN_MASK) & ~ALIGN_MASK;
+}
 static inline void *align_ptr(void *ptr)
 {
-	return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
+	return (void *)align_val((size_t)ptr);
 }
 
 /*
@@ -505,7 +509,7 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
 	struct hash_cell *hc;
 	size_t len, needed = 0;
 	struct gendisk *disk;
-	struct dm_name_list *nl, *old_nl = NULL;
+	struct dm_name_list *orig_nl, *nl, *old_nl = NULL;
 	uint32_t *event_nr;
 
 	down_write(&_hash_lock);
@@ -516,17 +520,15 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
 	 */
 	for (i = 0; i < NUM_BUCKETS; i++) {
 		list_for_each_entry (hc, _name_buckets + i, name_list) {
-			needed += sizeof(struct dm_name_list);
-			needed += strlen(hc->name) + 1;
-			needed += ALIGN_MASK;
-			needed += (sizeof(uint32_t) + ALIGN_MASK) & ~ALIGN_MASK;
+			needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1);
+			needed += align_val(sizeof(uint32_t));
 		}
 	}
 
 	/*
 	 * Grab our output buffer.
 	 */
-	nl = get_result_buffer(param, param_size, &len);
+	nl = orig_nl = get_result_buffer(param, param_size, &len);
 	if (len < needed) {
 		param->flags |= DM_BUFFER_FULL_FLAG;
 		goto out;
@@ -549,11 +551,16 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
 			strcpy(nl->name, hc->name);
 
 			old_nl = nl;
-			event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1);
+			event_nr = align_ptr(nl->name + strlen(hc->name) + 1);
 			*event_nr = dm_get_event_nr(hc->md);
 			nl = align_ptr(event_nr + 1);
 		}
 	}
+	/*
+	 * If mismatch happens, security may be compromised due to buffer
+	 * overflow, so it's better to crash.
+	 */
+	BUG_ON((char *)nl - (char *)orig_nl != needed);
 
  out:
 	up_write(&_hash_lock);
@@ -1621,7 +1628,8 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para
  * which has a variable size, is not used by the function processing
  * the ioctl.
  */
-#define IOCTL_FLAGS_NO_PARAMS	1
+#define IOCTL_FLAGS_NO_PARAMS		1
+#define IOCTL_FLAGS_ISSUE_GLOBAL_EVENT	2
 
 /*-----------------------------------------------------------------
  * Implementation of open/close/ioctl on the special char
@@ -1635,12 +1643,12 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
 		ioctl_fn fn;
 	} _ioctls[] = {
 		{DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */
-		{DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS, remove_all},
+		{DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, remove_all},
 		{DM_LIST_DEVICES_CMD, 0, list_devices},
 
-		{DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_create},
-		{DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_remove},
-		{DM_DEV_RENAME_CMD, 0, dev_rename},
+		{DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_create},
+		{DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_remove},
+		{DM_DEV_RENAME_CMD, IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_rename},
 		{DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend},
 		{DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status},
 		{DM_DEV_WAIT_CMD, 0, dev_wait},
@@ -1869,6 +1877,9 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us
 	    unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS))
 		DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd);
 
+	if (!r && ioctl_flags & IOCTL_FLAGS_ISSUE_GLOBAL_EVENT)
+		dm_issue_global_event();
+
 	/*
 	 * Copy the results back to userland.
 	 */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6e54145969c5..4be85324f44d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -52,6 +52,12 @@ static struct workqueue_struct *deferred_remove_workqueue;
 atomic_t dm_global_event_nr = ATOMIC_INIT(0);
 DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
 
+void dm_issue_global_event(void)
+{
+	atomic_inc(&dm_global_event_nr);
+	wake_up(&dm_global_eventq);
+}
+
 /*
  * One of these is allocated per bio.
  */
@@ -1865,9 +1871,8 @@ static void event_callback(void *context)
 	dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
 
 	atomic_inc(&md->event_nr);
-	atomic_inc(&dm_global_event_nr);
 	wake_up(&md->eventq);
-	wake_up(&dm_global_eventq);
+	dm_issue_global_event();
 }
 
 /*
@@ -2283,6 +2288,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 	}
 
 	map = __bind(md, table, &limits);
+	dm_issue_global_event();
 
 out:
 	mutex_unlock(&md->suspend_lock);
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 412c06a624c8..ccaea525340b 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -269,9 +269,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	36
+#define DM_VERSION_MINOR	37
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2017-06-09)"
+#define DM_VERSION_EXTRA	"-ioctl (2017-09-20)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3


From 90caccdd8cc0215705f18b92771b449b01e2474a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 3 Oct 2017 15:37:20 -0700
Subject: bpf: fix bpf_tail_call() x64 JIT

- bpf prog_array just like all other types of bpf array accepts 32-bit index.
  Clarify that in the comment.
- fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes
- tighten corresponding check in the interpreter to stay consistent

The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag
in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and
though JIT code is wrong it will check bounds correctly.
Hence two fixes tags. All other JITs don't have this problem.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation")
Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper")
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c | 4 ++--
 include/uapi/linux/bpf.h    | 2 +-
 kernel/bpf/core.c           | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8c9573660d51..0554e8aef4d5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog)
 	/* if (index >= array->map.max_entries)
 	 *   goto out;
 	 */
-	EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
+	EMIT2(0x89, 0xD2);                        /* mov edx, edx */
+	EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
 	      offsetof(struct bpf_array, map.max_entries));
-	EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
 #define OFFSET1 43 /* number of bytes to jump */
 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 	label1 = cnt;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 43ab5c402f98..f90860d1f897 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -312,7 +312,7 @@ union bpf_attr {
  *     jump into another BPF program
  *     @ctx: context pointer passed to next program
  *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- *     @index: index inside array that selects specific program to run
+ *     @index: 32-bit index inside array that selects specific program to run
  *     Return: 0 on success or negative error
  *
  * int bpf_clone_redirect(skb, ifindex, flags)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 917cc04a0a94..7b62df86be1d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1022,7 +1022,7 @@ select_insn:
 		struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
 		struct bpf_array *array = container_of(map, struct bpf_array, map);
 		struct bpf_prog *prog;
-		u64 index = BPF_R3;
+		u32 index = BPF_R3;
 
 		if (unlikely(index >= array->map.max_entries))
 			goto out;
-- 
cgit v1.2.3


From 98589a0998b8b13c4a8fa1ccb0e62751a019faa5 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Mon, 9 Oct 2017 15:27:15 +0300
Subject: netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1'

Commit 2c16d6033264 ("netfilter: xt_bpf: support ebpf") introduced
support for attaching an eBPF object by an fd, with the
'bpf_mt_check_v1' ABI expecting the '.fd' to be specified upon each
IPT_SO_SET_REPLACE call.

However this breaks subsequent iptables calls:

 # iptables -A INPUT -m bpf --object-pinned /sys/fs/bpf/xxx -j ACCEPT
 # iptables -A INPUT -s 5.6.7.8 -j ACCEPT
 iptables: Invalid argument. Run `dmesg' for more information.

That's because iptables works by loading existing rules using
IPT_SO_GET_ENTRIES to userspace, then issuing IPT_SO_SET_REPLACE with
the replacement set.

However, the loaded 'xt_bpf_info_v1' has an arbitrary '.fd' number
(from the initial "iptables -m bpf" invocation) - so when 2nd invocation
occurs, userspace passes a bogus fd number, which leads to
'bpf_mt_check_v1' to fail.

One suggested solution [1] was to hack iptables userspace, to perform a
"entries fixup" immediatley after IPT_SO_GET_ENTRIES, by opening a new,
process-local fd per every 'xt_bpf_info_v1' entry seen.

However, in [2] both Pablo Neira Ayuso and Willem de Bruijn suggested to
depricate the xt_bpf_info_v1 ABI dealing with pinned ebpf objects.

This fix changes the XT_BPF_MODE_FD_PINNED behavior to ignore the given
'.fd' and instead perform an in-kernel lookup for the bpf object given
the provided '.path'.

It also defines an alias for the XT_BPF_MODE_FD_PINNED mode, named
XT_BPF_MODE_PATH_PINNED, to better reflect the fact that the user is
expected to provide the path of the pinned object.

Existing XT_BPF_MODE_FD_ELF behavior (non-pinned fd mode) is preserved.

References: [1] https://marc.info/?l=netfilter-devel&m=150564724607440&w=2
            [2] https://marc.info/?l=netfilter-devel&m=150575727129880&w=2

Reported-by: Rafael Buchbinder <rafi@rbk.ms>
Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/bpf.h                   |  5 +++++
 include/uapi/linux/netfilter/xt_bpf.h |  1 +
 kernel/bpf/inode.c                    |  1 +
 net/netfilter/xt_bpf.c                | 22 ++++++++++++++++++++--
 4 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8390859e79e7..f1af7d63d678 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -368,6 +368,11 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 {
 }
 
+static inline int bpf_obj_get_user(const char __user *pathname)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
 						       u32 key)
 {
diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
index b97725af2ac0..da161b56c79e 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -23,6 +23,7 @@ enum xt_bpf_modes {
 	XT_BPF_MODE_FD_PINNED,
 	XT_BPF_MODE_FD_ELF,
 };
+#define XT_BPF_MODE_PATH_PINNED XT_BPF_MODE_FD_PINNED
 
 struct xt_bpf_info_v1 {
 	__u16 mode;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index e833ed914358..be1dde967208 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -363,6 +363,7 @@ out:
 	putname(pname);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(bpf_obj_get_user);
 
 static void bpf_evict_inode(struct inode *inode)
 {
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 38986a95216c..29123934887b 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/syscalls.h>
 #include <linux/skbuff.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
@@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
 	return 0;
 }
 
+static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
+{
+	mm_segment_t oldfs = get_fs();
+	int retval, fd;
+
+	set_fs(KERNEL_DS);
+	fd = bpf_obj_get_user(path);
+	set_fs(oldfs);
+	if (fd < 0)
+		return fd;
+
+	retval = __bpf_mt_check_fd(fd, ret);
+	sys_close(fd);
+	return retval;
+}
+
 static int bpf_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_bpf_info *info = par->matchinfo;
@@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
 		return __bpf_mt_check_bytecode(info->bpf_program,
 					       info->bpf_program_num_elem,
 					       &info->filter);
-	else if (info->mode == XT_BPF_MODE_FD_PINNED ||
-		 info->mode == XT_BPF_MODE_FD_ELF)
+	else if (info->mode == XT_BPF_MODE_FD_ELF)
 		return __bpf_mt_check_fd(info->fd, &info->filter);
+	else if (info->mode == XT_BPF_MODE_PATH_PINNED)
+		return __bpf_mt_check_path(info->path, &info->filter);
 	else
 		return -EINVAL;
 }
-- 
cgit v1.2.3


From a961e40917fb14614d368d8bc9782ca4d6a8cd11 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 19 Oct 2017 13:30:15 -0400
Subject: membarrier: Provide register expedited private command

This introduces a "register private expedited" membarrier command which
allows eventual removal of important memory barrier constraints on the
scheduler fast-paths. It changes how the "private expedited" membarrier
command (new to 4.14) is used from user-space.

This new command allows processes to register their intent to use the
private expedited command.  This affects how the expedited private
command introduced in 4.14-rc is meant to be used, and should be merged
before 4.14 final.

Processes are now required to register before using
MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM.

This fixes a problem that arose when designing requested extensions to
sys_membarrier() to allow JITs to efficiently flush old code from
instruction caches.  Several potential algorithms are much less painful
if the user register intent to use this functionality early on, for
example, before the process spawns the second thread.  Registering at
this time removes the need to interrupt each and every thread in that
process at the first expedited sys_membarrier() system call.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                       |  1 +
 include/linux/mm_types.h        |  3 +++
 include/linux/sched/mm.h        | 16 ++++++++++++++++
 include/uapi/linux/membarrier.h | 23 ++++++++++++++++-------
 kernel/sched/membarrier.c       | 34 ++++++++++++++++++++++++++++++----
 5 files changed, 66 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 5470d3c1892a..3e14ba25f678 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
+	membarrier_execve(current);
 	acct_update_integrals(current);
 	task_numa_free(current);
 	free_bprm(bprm);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 46f4ecf5479a..1861ea8dba77 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -445,6 +445,9 @@ struct mm_struct {
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	struct core_state *core_state; /* coredumping support */
+#ifdef CONFIG_MEMBARRIER
+	atomic_t membarrier_state;
+#endif
 #ifdef CONFIG_AIO
 	spinlock_t			ioctx_lock;
 	struct kioctx_table __rcu	*ioctx_table;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index ae53e413fb13..ab9bf7b73954 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
 	current->flags = (current->flags & ~PF_MEMALLOC) | flags;
 }
 
+#ifdef CONFIG_MEMBARRIER
+enum {
+	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY	= (1U << 0),
+	MEMBARRIER_STATE_SWITCH_MM			= (1U << 1),
+};
+
+static inline void membarrier_execve(struct task_struct *t)
+{
+	atomic_set(&t->mm->membarrier_state, 0);
+}
+#else
+static inline void membarrier_execve(struct task_struct *t)
+{
+}
+#endif
+
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
index 6d47b3249d8a..4e01ad7ffe98 100644
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -52,21 +52,30 @@
  *                          (non-running threads are de facto in such a
  *                          state). This only covers threads from the
  *                          same processes as the caller thread. This
- *                          command returns 0. The "expedited" commands
- *                          complete faster than the non-expedited ones,
- *                          they never block, but have the downside of
- *                          causing extra overhead.
+ *                          command returns 0 on success. The
+ *                          "expedited" commands complete faster than
+ *                          the non-expedited ones, they never block,
+ *                          but have the downside of causing extra
+ *                          overhead. A process needs to register its
+ *                          intent to use the private expedited command
+ *                          prior to using it, otherwise this command
+ *                          returns -EPERM.
+ * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
+ *                          Register the process intent to use
+ *                          MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
+ *                          returns 0.
  *
  * Command to be passed to the membarrier system call. The commands need to
  * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
  * the value 0.
  */
 enum membarrier_cmd {
-	MEMBARRIER_CMD_QUERY			= 0,
-	MEMBARRIER_CMD_SHARED			= (1 << 0),
+	MEMBARRIER_CMD_QUERY				= 0,
+	MEMBARRIER_CMD_SHARED				= (1 << 0),
 	/* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
 	/* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
-	MEMBARRIER_CMD_PRIVATE_EXPEDITED	= (1 << 3),
+	MEMBARRIER_CMD_PRIVATE_EXPEDITED		= (1 << 3),
+	MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED	= (1 << 4),
 };
 
 #endif /* _UAPI_LINUX_MEMBARRIER_H */
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index a92fddc22747..dd7908743dab 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -18,6 +18,7 @@
 #include <linux/membarrier.h>
 #include <linux/tick.h>
 #include <linux/cpumask.h>
+#include <linux/atomic.h>
 
 #include "sched.h"	/* for cpu_rq(). */
 
@@ -26,21 +27,26 @@
  * except MEMBARRIER_CMD_QUERY.
  */
 #define MEMBARRIER_CMD_BITMASK	\
-	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
+	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED	\
+	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
 
 static void ipi_mb(void *info)
 {
 	smp_mb();	/* IPIs should be serializing but paranoid. */
 }
 
-static void membarrier_private_expedited(void)
+static int membarrier_private_expedited(void)
 {
 	int cpu;
 	bool fallback = false;
 	cpumask_var_t tmpmask;
 
+	if (!(atomic_read(&current->mm->membarrier_state)
+			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
+		return -EPERM;
+
 	if (num_online_cpus() == 1)
-		return;
+		return 0;
 
 	/*
 	 * Matches memory barriers around rq->curr modification in
@@ -94,6 +100,24 @@ static void membarrier_private_expedited(void)
 	 * rq->curr modification in scheduler.
 	 */
 	smp_mb();	/* exit from system call is not a mb */
+	return 0;
+}
+
+static void membarrier_register_private_expedited(void)
+{
+	struct task_struct *p = current;
+	struct mm_struct *mm = p->mm;
+
+	/*
+	 * We need to consider threads belonging to different thread
+	 * groups, which use the same mm. (CLONE_VM but not
+	 * CLONE_THREAD).
+	 */
+	if (atomic_read(&mm->membarrier_state)
+			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
+		return;
+	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
+			&mm->membarrier_state);
 }
 
 /**
@@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
 			synchronize_sched();
 		return 0;
 	case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
-		membarrier_private_expedited();
+		return membarrier_private_expedited();
+	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
+		membarrier_register_private_expedited();
 		return 0;
 	default:
 		return -EINVAL;
-- 
cgit v1.2.3


From bfa640757e9378c2f26867e723f1287e94f5a7ad Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 27 Oct 2017 09:45:53 -0700
Subject: bpf: rename sk_actions to align with bpf infrastructure

Recent additions to support multiple programs in cgroups impose
a strict requirement, "all yes is yes, any no is no". To enforce
this the infrastructure requires the 'no' return code, SK_DROP in
this case, to be 0.

To apply these rules to SK_SKB program types the sk_actions return
codes need to be adjusted.

This fix adds SK_PASS and makes 'SK_DROP = 0'. Finally, remove
SK_ABORTED to remove any chance that the API may allow aborted
program flows to be passed up the stack. This would be incorrect
behavior and allow programs to break existing policies.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h       | 6 +++---
 kernel/bpf/sockmap.c           | 3 ++-
 net/core/filter.c              | 5 +++--
 tools/include/uapi/linux/bpf.h | 4 ++--
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f90860d1f897..0d7948ce2128 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -575,7 +575,7 @@ union bpf_attr {
  *     @map: pointer to sockmap
  *     @key: key to lookup sock in map
  *     @flags: reserved for future use
- *     Return: SK_REDIRECT
+ *     Return: SK_PASS
  *
  * int bpf_sock_map_update(skops, map, key, flags)
  *	@skops: pointer to bpf_sock_ops
@@ -786,8 +786,8 @@ struct xdp_md {
 };
 
 enum sk_action {
-	SK_ABORTED = 0,
-	SK_DROP,
+	SK_DROP = 0,
+	SK_PASS,
 	SK_REDIRECT,
 };
 
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 6778fb773934..66f00a2b27f4 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -122,7 +122,8 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 	preempt_enable();
 	skb->sk = NULL;
 
-	return rc;
+	return rc == SK_PASS ?
+		(TCP_SKB_CB(skb)->bpf.map ? SK_REDIRECT : SK_PASS) : SK_DROP;
 }
 
 static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
diff --git a/net/core/filter.c b/net/core/filter.c
index 68eaa2f81a8e..6ae94f825f72 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1844,14 +1844,15 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
 {
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
+	/* If user passes invalid input drop the packet. */
 	if (unlikely(flags))
-		return SK_ABORTED;
+		return SK_DROP;
 
 	tcb->bpf.key = key;
 	tcb->bpf.flags = flags;
 	tcb->bpf.map = map;
 
-	return SK_REDIRECT;
+	return SK_PASS;
 }
 
 struct sock *do_sk_redirect_map(struct sk_buff *skb)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 24b35a1fd4d6..c174971afbe6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -787,8 +787,8 @@ struct xdp_md {
 };
 
 enum sk_action {
-	SK_ABORTED = 0,
-	SK_DROP,
+	SK_DROP = 0,
+	SK_PASS,
 	SK_REDIRECT,
 };
 
-- 
cgit v1.2.3


From 978aa0474115f3f5848949f2efce4def0766a5cb Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 28 Oct 2017 19:43:57 +0800
Subject: sctp: fix some type cast warnings introduced since very beginning

These warnings were found by running 'make C=2 M=net/sctp/'.
They are there since very beginning.

Note after this patch, there still one warning left in
sctp_outq_flush():
  sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM)

Since it has been moved to sctp_stream_outq_migrate on net-next,
to avoid the extra job when merging net-next to net, I will post
the fix for it after the merging is done.

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h      | 2 +-
 include/uapi/linux/sctp.h | 2 +-
 net/sctp/ipv6.c           | 2 +-
 net/sctp/sm_make_chunk.c  | 4 ++--
 net/sctp/sm_sideeffect.c  | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 09d7412e9cb0..da803dfc7a39 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -231,7 +231,7 @@ struct sctp_datahdr {
 	__be32 tsn;
 	__be16 stream;
 	__be16 ssn;
-	__be32 ppid;
+	__u32 ppid;
 	__u8  payload[0];
 };
 
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 6217ff8500a1..84fc2914b7fb 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -376,7 +376,7 @@ struct sctp_remote_error {
 	__u16 sre_type;
 	__u16 sre_flags;
 	__u32 sre_length;
-	__u16 sre_error;
+	__be16 sre_error;
 	sctp_assoc_t sre_assoc_id;
 	__u8 sre_data[0];
 };
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 7fe9e1d1b7ec..a6dfa86c0201 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -738,7 +738,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
 /* Was this packet marked by Explicit Congestion Notification? */
 static int sctp_v6_is_ce(const struct sk_buff *skb)
 {
-	return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20);
+	return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20);
 }
 
 /* Dump the v6 addr to the seq file. */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 57c55045f5a7..514465b03829 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2854,7 +2854,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
 		addr_param_len = af->to_addr_param(addr, &addr_param);
 		param.param_hdr.type = flags;
 		param.param_hdr.length = htons(paramlen + addr_param_len);
-		param.crr_id = i;
+		param.crr_id = htonl(i);
 
 		sctp_addto_chunk(retval, paramlen, &param);
 		sctp_addto_chunk(retval, addr_param_len, &addr_param);
@@ -2867,7 +2867,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
 		addr_param_len = af->to_addr_param(addr, &addr_param);
 		param.param_hdr.type = SCTP_PARAM_DEL_IP;
 		param.param_hdr.length = htons(paramlen + addr_param_len);
-		param.crr_id = i;
+		param.crr_id = htonl(i);
 
 		sctp_addto_chunk(retval, paramlen, &param);
 		sctp_addto_chunk(retval, addr_param_len, &addr_param);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 8f2762bba879..e2d9a4b49c9c 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1607,12 +1607,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 			break;
 
 		case SCTP_CMD_INIT_FAILED:
-			sctp_cmd_init_failed(commands, asoc, cmd->obj.err);
+			sctp_cmd_init_failed(commands, asoc, cmd->obj.u32);
 			break;
 
 		case SCTP_CMD_ASSOC_FAILED:
 			sctp_cmd_assoc_failed(commands, asoc, event_type,
-					      subtype, chunk, cmd->obj.err);
+					      subtype, chunk, cmd->obj.u32);
 			break;
 
 		case SCTP_CMD_INIT_COUNTER_INC:
-- 
cgit v1.2.3