From 99ce7962d52d1948ad6f2785e308d48e76e0a6ef Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 8 Feb 2018 14:02:32 +0100
Subject: objtool: Fix switch-table detection

Linus reported that GCC-7.3 generated a switch-table construct that
confused objtool. It turns out that, in particular due to KASAN, it is
possible to have unrelated .rodata usage in between the .rodata setup
for the switch-table and the following indirect jump.

The simple linear reverse search from the indirect jump would hit upon
the KASAN .rodata usage first and fail to find a switch_table,
resulting in a spurious 'sibling call with modified stack frame'
warning.

Fix this by creating a 'jump-stack' which we can 'unwind' during
reversal, thereby skipping over much of the in-between code.

This is not fool proof by any means, but is sufficient to make the
known cases work. Future work would be to construct more comprehensive
flow analysis code.

Reported-and-tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180208130232.GF25235@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 41 +++++++++++++++++++++++++++++++++++++++--
 tools/objtool/check.h |  1 +
 2 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9cd028aa1509..2e458eb45586 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -851,8 +851,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
  *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
  *    writing, there are 11 occurrences of it in the allmodconfig kernel.
  *
+ *    As of GCC 7 there are quite a few more of these and the 'in between' code
+ *    is significant. Esp. with KASAN enabled some of the code between the mov
+ *    and jmpq uses .rodata itself, which can confuse things.
+ *
  *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
  *    ensure the same register is used in the mov and jump instructions.
+ *
+ *    NOTE: RETPOLINE made it harder still to decode dynamic jumps.
  */
 static struct rela *find_switch_table(struct objtool_file *file,
 				      struct symbol *func,
@@ -874,12 +880,25 @@ static struct rela *find_switch_table(struct objtool_file *file,
 						text_rela->addend + 4);
 		if (!rodata_rela)
 			return NULL;
+
 		file->ignore_unreachables = true;
 		return rodata_rela;
 	}
 
 	/* case 3 */
-	func_for_each_insn_continue_reverse(file, func, insn) {
+	/*
+	 * Backward search using the @first_jump_src links, these help avoid
+	 * much of the 'in between' code. Which avoids us getting confused by
+	 * it.
+	 */
+	for (insn = list_prev_entry(insn, list);
+
+	     &insn->list != &file->insn_list &&
+	     insn->sec == func->sec &&
+	     insn->offset >= func->offset;
+
+	     insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+
 		if (insn->type == INSN_JUMP_DYNAMIC)
 			break;
 
@@ -909,14 +928,32 @@ static struct rela *find_switch_table(struct objtool_file *file,
 	return NULL;
 }
 
+
 static int add_func_switch_tables(struct objtool_file *file,
 				  struct symbol *func)
 {
-	struct instruction *insn, *prev_jump = NULL;
+	struct instruction *insn, *last = NULL, *prev_jump = NULL;
 	struct rela *rela, *prev_rela = NULL;
 	int ret;
 
 	func_for_each_insn(file, func, insn) {
+		if (!last)
+			last = insn;
+
+		/*
+		 * Store back-pointers for unconditional forward jumps such
+		 * that find_switch_table() can back-track using those and
+		 * avoid some potentially confusing code.
+		 */
+		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
+		    insn->offset > last->offset &&
+		    insn->jump_dest->offset > insn->offset &&
+		    !insn->jump_dest->first_jump_src) {
+
+			insn->jump_dest->first_jump_src = insn;
+			last = insn->jump_dest;
+		}
+
 		if (insn->type != INSN_JUMP_DYNAMIC)
 			continue;
 
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index dbadb304a410..23a1d065cae1 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -47,6 +47,7 @@ struct instruction {
 	bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
+	struct instruction *first_jump_src;
 	struct list_head alts;
 	struct symbol *func;
 	struct stack_op stack_op;
-- 
cgit v1.2.3


From 198ee8e17502da2634f7366395db1d77630e0219 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Feb 2018 12:10:10 +0100
Subject: selftests/x86: Fix vDSO selftest segfault for vsyscall=none

The vDSO selftest tries to execute a vsyscall unconditionally, even if it
is not present on the test system (e.g. if booted with vsyscall=none or
with CONFIG_LEGACY_VSYSCALL_NONE=y set. Fix this by copying (and tweaking)
the vsyscall check from test_vsyscall.c

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211111013.16888-3-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/test_vdso.c | 50 ++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 29973cde06d3..558c8207e7b9 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -28,18 +28,52 @@
 
 int nerrs = 0;
 
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
 #ifdef __x86_64__
-# define VSYS(x) (x)
+	FILE *maps;
+	char line[128];
+	bool found = false;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+		return NULL;
+
+	while (fgets(line, sizeof(line), maps)) {
+		char r, x;
+		void *start, *end;
+		char name[128];
+		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+			   &start, &end, &r, &x, name) != 5)
+			continue;
+
+		if (strcmp(name, "[vsyscall]"))
+			continue;
+
+		/* assume entries are OK, as we test vDSO here not vsyscall */
+		found = true;
+		break;
+	}
+
+	fclose(maps);
+
+	if (!found) {
+		printf("Warning: failed to find vsyscall getcpu\n");
+		return NULL;
+	}
+	return (void *) (0xffffffffff600800);
 #else
-# define VSYS(x) 0
+	return NULL;
 #endif
+}
 
-typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-
-const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
-getcpu_t vdso_getcpu;
 
-void fill_function_pointers()
+static void fill_function_pointers()
 {
 	void *vdso = dlopen("linux-vdso.so.1",
 			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
@@ -54,6 +88,8 @@ void fill_function_pointers()
 	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
 	if (!vdso_getcpu)
 		printf("Warning: failed to find getcpu in vDSO\n");
+
+	vgetcpu = (getcpu_t) vsyscall_getcpu();
 }
 
 static long sys_getcpu(unsigned * cpu, unsigned * node,
-- 
cgit v1.2.3


From d8e92de8ef952bed88c56c7a44c02d8dcae0984e Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Feb 2018 21:59:24 +0100
Subject: selftests/x86: Clean up and document sscanf() usage

Replace a couple of magically connected buffer length literal constants with
a common definition that makes their relationship obvious. Also document
why our sscanf() usage is safe.

No intended functional changes.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211205924.GA23210@light.dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/test_vdso.c     | 11 ++++++++---
 tools/testing/selftests/x86/test_vsyscall.c | 11 ++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 558c8207e7b9..235259011704 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -26,6 +26,9 @@
 # endif
 #endif
 
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
 int nerrs = 0;
 
 typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
@@ -37,17 +40,19 @@ static void *vsyscall_getcpu(void)
 {
 #ifdef __x86_64__
 	FILE *maps;
-	char line[128];
+	char line[MAPS_LINE_LEN];
 	bool found = false;
 
 	maps = fopen("/proc/self/maps", "r");
 	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
 		return NULL;
 
-	while (fgets(line, sizeof(line), maps)) {
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
 		char r, x;
 		void *start, *end;
-		char name[128];
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
 		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
 			   &start, &end, &r, &x, name) != 5)
 			continue;
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 7a744fa7b786..be81621446f0 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -33,6 +33,9 @@
 # endif
 #endif
 
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 		       int flags)
 {
@@ -98,7 +101,7 @@ static int init_vsys(void)
 #ifdef __x86_64__
 	int nerrs = 0;
 	FILE *maps;
-	char line[128];
+	char line[MAPS_LINE_LEN];
 	bool found = false;
 
 	maps = fopen("/proc/self/maps", "r");
@@ -108,10 +111,12 @@ static int init_vsys(void)
 		return 0;
 	}
 
-	while (fgets(line, sizeof(line), maps)) {
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
 		char r, x;
 		void *start, *end;
-		char name[128];
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
 		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
 			   &start, &end, &r, &x, name) != 5)
 			continue;
-- 
cgit v1.2.3


From ce676638fe7b284132a7d7d5e7e7ad81bab9947e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 13 Feb 2018 08:26:17 +0100
Subject: selftests/x86/pkeys: Remove unused functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This also gets rid of two build warnings:

  protection_keys.c: In function ‘dumpit’:
  protection_keys.c:419:3: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result]
     write(1, buf, nr_read);
     ^~~~~~~~~~~~~~~~~~~~~~

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/protection_keys.c | 28 ---------------------------
 1 file changed, 28 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index bc1b0735bb50..f15aa5a76fe3 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -393,34 +393,6 @@ pid_t fork_lazy_child(void)
 	return forkret;
 }
 
-void davecmp(void *_a, void *_b, int len)
-{
-	int i;
-	unsigned long *a = _a;
-	unsigned long *b = _b;
-
-	for (i = 0; i < len / sizeof(*a); i++) {
-		if (a[i] == b[i])
-			continue;
-
-		dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]);
-	}
-}
-
-void dumpit(char *f)
-{
-	int fd = open(f, O_RDONLY);
-	char buf[100];
-	int nr_read;
-
-	dprintf2("maps fd: %d\n", fd);
-	do {
-		nr_read = read(fd, &buf[0], sizeof(buf));
-		write(1, buf, nr_read);
-	} while (nr_read > 0);
-	close(fd);
-}
-
 #define PKEY_DISABLE_ACCESS    0x1
 #define PKEY_DISABLE_WRITE     0x2
 
-- 
cgit v1.2.3


From 7f95122067ab26fb8344b2a9de64ffbd0fea0010 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Feb 2018 12:10:09 +0100
Subject: selftests/x86: Fix build bug caused by the 5lvl test which has been
 moved to the VM directory

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Fixes: 235266b8e11c "selftests/vm: move 128TB mmap boundary test to generic directory"
Link: http://lkml.kernel.org/r/20180211111013.16888-2-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5d4f10ac2af2..91fbfa8fdc15 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -11,7 +11,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
-- 
cgit v1.2.3


From 2cbc0d66de0480449c75636f55697c7ff3af61fc Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Feb 2018 12:10:11 +0100
Subject: selftests/x86: Do not rely on "int $0x80" in test_mremap_vdso.c

On 64-bit builds, we should not rely on "int $0x80" working (it only does if
CONFIG_IA32_EMULATION=y is enabled).

Without this patch, the move test may succeed, but the "int $0x80" causes
a segfault, resulting in a false negative output of this self-test.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211111013.16888-4-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/test_mremap_vdso.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
index bf0d687c7db7..64f11c8d9b76 100644
--- a/tools/testing/selftests/x86/test_mremap_vdso.c
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp)
 			vdso_size += PAGE_SIZE;
 		}
 
+#ifdef __i386__
 		/* Glibc is likely to explode now - exit with raw syscall */
 		asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+#else /* __x86_64__ */
+		syscall(SYS_exit, ret);
+#endif
 	} else {
 		int status;
 
-- 
cgit v1.2.3


From ecdf06e1ea5376bba03c155751f6869d3dfaa210 Mon Sep 17 00:00:00 2001
From: Harish <harish@linux.vnet.ibm.com>
Date: Tue, 13 Feb 2018 12:02:55 +0530
Subject: selftests/powerpc: Fix to use ucontext_t instead of struct ucontext
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With glibc 2.26 'struct ucontext' is removed to improve POSIX
compliance, which breaks powerpc/alignment_handler selftest. Fix the
test by using ucontext_t. Tested on ppc, works with older glibc
versions as well.

Fixes the following:
  alignment_handler.c: In function ‘sighandler’:
  alignment_handler.c:68:5: error: dereferencing pointer to incomplete type ‘struct ucontext’
    ucp->uc_mcontext.gp_regs[PT_NIP] += 4;

Signed-off-by: Harish <harish@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/alignment/alignment_handler.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 39fd362415cf..0f2698f9fd6d 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -57,7 +57,7 @@ volatile int gotsig;
 
 void sighandler(int sig, siginfo_t *info, void *ctx)
 {
-	struct ucontext *ucp = ctx;
+	ucontext_t *ucp = ctx;
 
 	if (!testing) {
 		signal(sig, SIG_DFL);
-- 
cgit v1.2.3


From b2c93e300a11b419b4c67ce08e16fa1436d8118c Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Tue, 6 Feb 2018 16:23:39 -0600
Subject: selftests: sync: missing CFLAGS while compiling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on patch: https://patchwork.kernel.org/patch/10042045/

arch64-linux-gnu-gcc -c sync.c -o sync/sync.o
sync.c:42:29: fatal error: linux/sync_file.h: No such file or directory
 #include <linux/sync_file.h>
                             ^
CFLAGS is not used during the compile step, so the system instead of
kernel headers are used.  Fix this by adding CFLAGS to the OBJS compile
rule.

Reported-by: Lei Yang <Lei.Yang@windriver.com>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/sync/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
index b3c8ba3cb668..d0121a8a3523 100644
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -30,7 +30,7 @@ $(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
 	$(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
 
 $(OBJS): $(OUTPUT)/%.o: %.c
-	$(CC) -c $^ -o $@
+	$(CC) -c $^ -o $@ $(CFLAGS)
 
 $(TESTS): $(OUTPUT)/%.o: %.c
 	$(CC) -c $^ -o $@
-- 
cgit v1.2.3


From 70b574e7d719bdf96d26528cb289f3e782e83979 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 11 Feb 2018 11:59:50 +0100
Subject: selftest/vDSO: fix O=

The vDSO selftests ignored the O= or KBUILD_OUTPUT= parameters. Fix it.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/vDSO/Makefile | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 3d5a62ff7d31..f5d7a7851e21 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+include ../lib.mk
+
 ifndef CROSS_COMPILE
 CFLAGS := -std=gnu99
 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
@@ -6,16 +8,14 @@ ifeq ($(CONFIG_X86_32),y)
 LDLIBS += -lgcc_s
 endif
 
-TEST_PROGS := vdso_test vdso_standalone_test_x86
+TEST_PROGS := $(OUTPUT)/vdso_test $(OUTPUT)/vdso_standalone_test_x86
 
 all: $(TEST_PROGS)
-vdso_test: parse_vdso.c vdso_test.c
-vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
+$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
-		-o vdso_standalone_test_x86
+		-o $@
 
-include ../lib.mk
-clean:
-	rm -fr $(TEST_PROGS)
+EXTRA_CLEAN := $(TEST_PROGS)
 endif
-- 
cgit v1.2.3


From 64136fb76039defd193e9e885bb722919d220021 Mon Sep 17 00:00:00 2001
From: Daniel Díaz <daniel.diaz@linaro.org>
Date: Tue, 6 Feb 2018 17:52:05 -0600
Subject: selftests/android: Fix line continuation in Makefile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Makefile lacks a couple of line continuation backslashes
in an `if' clause, which can make the subsequent rsync
command go awry over the whole filesystem (`rsync -a / /`).

  /bin/sh: -c: line 5: syntax error: unexpected end of file
  make[1]: [all] Error 1 (ignored)
  TEST=$DIR"_test.sh"; \
                  if [ -e $DIR/$TEST ]; then
  /bin/sh: -c: line 2: syntax error: unexpected end of file
  make[1]: [all] Error 1 (ignored)
  rsync -a $DIR/$TEST $BUILD_TARGET/;
  [...a myriad of:]
  [  rsync: readlink_stat("...") failed: Permission denied (13)]
  [  skipping non-regular file "..."]
  [  rsync: opendir "..." failed: Permission denied (13)]
  [and many other errors...]
  fi
  make[1]: fi: Command not found
  make[1]: [all] Error 127 (ignored)
  done
  make[1]: done: Command not found
  make[1]: [all] Error 127 (ignored)

Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Acked-by: Pintu Agarwal <pintu.ping@gmail.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/android/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index 1a7492268993..f6304d2be90c 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -11,11 +11,11 @@ all:
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-		#SUBDIR test prog name should be in the form: SUBDIR_test.sh
+		#SUBDIR test prog name should be in the form: SUBDIR_test.sh \
 		TEST=$$DIR"_test.sh"; \
-		if [ -e $$DIR/$$TEST ]; then
-			rsync -a $$DIR/$$TEST $$BUILD_TARGET/;
-		fi
+		if [ -e $$DIR/$$TEST ]; then \
+			rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+		fi \
 	done
 
 override define RUN_TESTS
-- 
cgit v1.2.3


From 9a379e77033f02c4a071891afdf0f0a01eff8ccb Mon Sep 17 00:00:00 2001
From: Naresh Kamboju <naresh.kamboju@linaro.org>
Date: Wed, 7 Feb 2018 14:47:20 +0530
Subject: selftests: pstore: Adding config fragment CONFIG_PSTORE_RAM=m

pstore_tests and pstore_post_reboot_tests need CONFIG_PSTORE_RAM=m

Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/pstore/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config
index 6a8e5a9bfc10..d148f9f89fb6 100644
--- a/tools/testing/selftests/pstore/config
+++ b/tools/testing/selftests/pstore/config
@@ -2,3 +2,4 @@ CONFIG_MISC_FILESYSTEMS=y
 CONFIG_PSTORE=y
 CONFIG_PSTORE_PMSG=y
 CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=m
-- 
cgit v1.2.3


From 9a606f8d55cfc932ec02172aaed4124fdc150047 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Tue, 6 Feb 2018 16:20:44 -0600
Subject: selftests: memfd: add config fragment for fuse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The memfd test requires to insert the fuse module (CONFIG_FUSE_FS).

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/memfd/config | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tools/testing/selftests/memfd/config

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config
new file mode 100644
index 000000000000..835c7f4dadcd
--- /dev/null
+++ b/tools/testing/selftests/memfd/config
@@ -0,0 +1 @@
+CONFIG_FUSE_FS=m
-- 
cgit v1.2.3


From 4105c69703cdeba76f384b901712c9397b04e9c2 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Tue, 13 Feb 2018 09:13:21 +0100
Subject: selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c

On 64-bit builds, we should not rely on "int $0x80" working (it only does if
CONFIG_IA32_EMULATION=y is enabled). To keep the "Set TF and check int80"
test running on 64-bit installs with CONFIG_IA32_EMULATION=y enabled, build
this test only if we can also build 32-bit binaries (which should be a
good approximation for that).

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211111013.16888-5-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/Makefile              | 2 ++
 tools/testing/selftests/x86/single_step_syscall.c | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 91fbfa8fdc15..73b8ef665c98 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -30,11 +30,13 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
 all: all_64
 TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
 endif
 
 all_32: $(BINARIES_32)
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index a48da95c18fd..ddfdd635de16 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -119,7 +119,9 @@ static void check_result(void)
 
 int main()
 {
+#ifdef CAN_BUILD_32
 	int tmp;
+#endif
 
 	sethandler(SIGTRAP, sigtrap, 0);
 
@@ -139,12 +141,13 @@ int main()
 		      : : "c" (post_nop) : "r11");
 	check_result();
 #endif
-
+#ifdef CAN_BUILD_32
 	printf("[RUN]\tSet TF and check int80\n");
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
 	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
 			: INT80_CLOBBERS);
 	check_result();
+#endif
 
 	/*
 	 * This test is particularly interesting if fast syscalls use
-- 
cgit v1.2.3


From 9279ddf23ce78ff2676e8e8e19fec0f022c26d04 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Tue, 13 Feb 2018 09:15:19 +0100
Subject: selftests/x86: Disable tests requiring 32-bit support on pure 64-bit
 systems

The ldt_gdt and ptrace_syscall selftests, even in their 64-bit variant, use
hard-coded 32-bit syscall numbers and call "int $0x80".

This will fail on 64-bit systems with CONFIG_IA32_EMULATION=y disabled.

Therefore, do not build these tests if we cannot build 32-bit binaries
(which should be a good approximation for CONFIG_IA32_EMULATION=y being enabled).

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Link: http://lkml.kernel.org/r/20180211111013.16888-6-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/Makefile | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 73b8ef665c98..aa6e2d7f6a1f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,16 +5,26 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
+			check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
 			protection_keys test_vdso test_vsyscall
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
 TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
-TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
 BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
@@ -23,10 +33,6 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
 
-UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-
 ifeq ($(CAN_BUILD_I386),1)
 all: all_32
 TEST_PROGS += $(BINARIES_32)
-- 
cgit v1.2.3


From fe24e27128252c230a34a6c628da2bf1676781ea Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 8 Feb 2018 17:09:25 -0600
Subject: objtool: Fix segfault in ignore_unreachable_insn()

Peter Zijlstra's patch for converting WARN() to use UD2 triggered a
bunch of false "unreachable instruction" warnings, which then triggered
a seg fault in ignore_unreachable_insn().

The seg fault happened when it tried to dereference a NULL 'insn->func'
pointer.  Thanks to static_cpu_has(), some functions can jump to a
non-function area in the .altinstr_aux section.  That breaks
ignore_unreachable_insn()'s assumption that it's always inside the
original function.

Make sure ignore_unreachable_insn() only follows jumps within the
current function.

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kbuild test robot <fengguang.wu@intel.com>
Link: http://lkml.kernel.org/r/bace77a60d5af9b45eddb8f8fb9c776c8de657ef.1518130694.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 2e458eb45586..c7fb5c2392ee 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1935,13 +1935,19 @@ static bool ignore_unreachable_insn(struct instruction *insn)
 		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
 			return true;
 
-		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
-			insn = insn->jump_dest;
-			continue;
+		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+			if (insn->jump_dest &&
+			    insn->jump_dest->func == insn->func) {
+				insn = insn->jump_dest;
+				continue;
+			}
+
+			break;
 		}
 
 		if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
 			break;
+
 		insn = list_next_entry(insn, list);
 	}
 
-- 
cgit v1.2.3


From 961888b1d76d84efc66a8f5604b06ac12ac2f978 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.y.wang@intel.com>
Date: Mon, 18 Dec 2017 16:34:10 +0800
Subject: selftests/x86/mpx: Fix incorrect bounds with old _sigfault

For distributions with old userspace header files, the _sigfault
structure is different. mpx-mini-test fails with the following
error:

  [root@Purley]# mpx-mini-test_64 tabletest
  XSAVE is supported by HW & OS
  XSAVE processor supported state mask: 0x2ff
  XSAVE OS supported state mask: 0x2ff
   BNDREGS: size: 64 user: 1 supervisor: 0 aligned: 0
    BNDCSR: size: 64 user: 1 supervisor: 0 aligned: 0
  starting mpx bounds table test
  ERROR: siginfo bounds do not match shadow bounds for register 0

Fix it by using the correct offset of _lower/_upper in _sigfault.
RHEL needs this patch to work.

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave.hansen@linux.intel.com
Fixes: e754aedc26ef ("x86/mpx, selftests: Add MPX self test")
Link: http://lkml.kernel.org/r/1513586050-1641-1-git-send-email-rui.y.wang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/mpx-mini-test.c | 32 +++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
index ec0f6b45ce8b..9c0325e1ea68 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si)
 	return si->si_upper;
 }
 #else
+
+/*
+ * This deals with old version of _sigfault in some distros:
+ *
+
+old _sigfault:
+        struct {
+            void *si_addr;
+	} _sigfault;
+
+new _sigfault:
+	struct {
+		void __user *_addr;
+		int _trapno;
+		short _addr_lsb;
+		union {
+			struct {
+				void __user *_lower;
+				void __user *_upper;
+			} _addr_bnd;
+			__u32 _pkey;
+		};
+	} _sigfault;
+ *
+ */
+
 static inline void **__si_bounds_hack(siginfo_t *si)
 {
 	void *sigfault = &si->_sifields._sigfault;
 	void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
-	void **__si_lower = end_sigfault;
+	int *trapno = (int*)end_sigfault;
+	/* skip _trapno and _addr_lsb */
+	void **__si_lower = (void**)(trapno + 2);
 
 	return __si_lower;
 }
@@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si)
 
 static inline void *__si_bounds_upper(siginfo_t *si)
 {
-	return (*__si_bounds_hack(si)) + sizeof(void *);
+	return *(__si_bounds_hack(si) + 1);
 }
 #endif
 
-- 
cgit v1.2.3


From 52c84d36b7e2f8197a9a6174d6f901a7c7afb850 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 14 Feb 2018 22:42:54 -0800
Subject: tools: bpftool: preserve JSON for batch mode when dumping insns to
 file

Print a "null" JSON object to standard output when bpftool is used to
print program instructions to a file, so as to avoid breaking JSON
output on batch mode.

This null object was added for most commands in a previous commit, but
this specific case had been omitted.

Fixes: 004b45c0e51a ("tools: bpftool: provide JSON output for all possible commands")
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/prog.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index e8e2baaf93c2..e549e329be82 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -774,6 +774,9 @@ static int do_dump(int argc, char **argv)
 			      n < 0 ? strerror(errno) : "short write");
 			goto err_free;
 		}
+
+		if (json_output)
+			jsonw_null(json_wtr);
 	} else {
 		if (member_len == &info.jited_prog_len) {
 			const char *name = NULL;
-- 
cgit v1.2.3


From 9be6d411b0c473d31f756993b8b41bb16b0679c1 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 14 Feb 2018 22:42:55 -0800
Subject: tools: bpftool: preserve JSON output on errors on batch file parsing

Before this patch, perror() function is used in some cases when bpftool
fails to parse its input file in batch mode. This function does not
integrate well with the rest of the output when JSON is used, so we
replace it by something that is compliant.

Most calls to perror() had already been replaced in a previous patch,
this one is a leftover.

Fixes: d319c8e101c5 ("tools: bpftool: preserve JSON output on errors on batch file parsing")
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 3a0396d87c42..185acfa229b5 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -244,7 +244,7 @@ static int do_batch(int argc, char **argv)
 	}
 
 	if (errno && errno != ENOENT) {
-		perror("reading batch file failed");
+		p_err("reading batch file failed: %s", strerror(errno));
 		err = -1;
 	} else {
 		p_info("processed %d lines", lines);
-- 
cgit v1.2.3


From 0b7c1528fb741803396da68a9d8d285ff7db731c Mon Sep 17 00:00:00 2001
From: William Cohen <wcohen@redhat.com>
Date: Tue, 30 Jan 2018 22:28:13 -0500
Subject: perf vendor events aarch64: Add JSON metrics for ARM Cortex-A53
 Processor

Add JSON metrics for ARM Cortex-A53 Processor.

Unlike the Intel processors there isn't a script that automatically
generated these files. The patch was manually generated from the
documentation and the previous oprofile ARM Cortex ac53 event file patch
I made.

The relevant documentation is in the "12.9 Events" section of the ARM
Cortex A53 MPCore Processor Revision: r0p4 Technical Reference Manual.

The ARM Cortex A53 manual is available at:

  http://infocenter.arm.com/help/topic/com.arm.doc.ddi0500g/DDI0500G_cortex_a53_trm.pdf

Use that to look for additional information about the events.

Signed-off-by: William Cohen <wcohen@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180131032813.9564-1-wcohen@redhat.com
[ Added references provided by William Cohen ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/arm64/cortex-a53/branch.json   | 27 +++++++++++
 .../perf/pmu-events/arch/arm64/cortex-a53/bus.json | 22 +++++++++
 .../pmu-events/arch/arm64/cortex-a53/cache.json    | 27 +++++++++++
 .../pmu-events/arch/arm64/cortex-a53/memory.json   | 22 +++++++++
 .../pmu-events/arch/arm64/cortex-a53/other.json    | 32 +++++++++++++
 .../pmu-events/arch/arm64/cortex-a53/pipeline.json | 52 ++++++++++++++++++++++
 tools/perf/pmu-events/arch/arm64/mapfile.csv       |  1 +
 7 files changed, 183 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
new file mode 100644
index 000000000000..3b6208763e50
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0x7A",
+    "EventName": "BR_INDIRECT_SPEC",
+    "BriefDescription": "Branch speculatively executed - Indirect branch"
+  },
+  {,
+    "EventCode": "0xC9",
+    "EventName": "BR_COND",
+    "BriefDescription": "Conditional branch executed"
+  },
+  {,
+    "EventCode": "0xCA",
+    "EventName": "BR_INDIRECT_MISPRED",
+    "BriefDescription": "Indirect branch mispredicted"
+  },
+  {,
+    "EventCode": "0xCB",
+    "EventName": "BR_INDIRECT_MISPRED_ADDR",
+    "BriefDescription": "Indirect branch mispredicted because of address miscompare"
+  },
+  {,
+    "EventCode": "0xCC",
+    "EventName": "BR_COND_MISPRED",
+    "BriefDescription": "Conditional branch mispredicted"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
new file mode 100644
index 000000000000..11baad6344b9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json
@@ -0,0 +1,27 @@
+[
+  {,
+    "EventCode": "0xC2",
+    "EventName": "PREFETCH_LINEFILL",
+    "BriefDescription": "Linefill because of prefetch"
+  },
+  {,
+    "EventCode": "0xC3",
+    "EventName": "PREFETCH_LINEFILL_DROP",
+    "BriefDescription": "Instruction Cache Throttle occurred"
+  },
+  {,
+    "EventCode": "0xC4",
+    "EventName": "READ_ALLOC_ENTER",
+    "BriefDescription": "Entering read allocate mode"
+  },
+  {,
+    "EventCode": "0xC5",
+    "EventName": "READ_ALLOC",
+    "BriefDescription": "Read allocate mode"
+  },
+  {,
+    "EventCode": "0xC8",
+    "EventName": "EXT_SNOOP",
+    "BriefDescription": "SCU Snooped data from another CPU for this CPU"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
new file mode 100644
index 000000000000..480d9f7460ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json
@@ -0,0 +1,22 @@
+[
+  {,
+    "EventCode": "0x60",
+    "EventName": "BUS_ACCESS_LD",
+    "BriefDescription": "Bus access - Read"
+  },
+  {,
+    "EventCode": "0x61",
+    "EventName": "BUS_ACCESS_ST",
+    "BriefDescription": "Bus access - Write"
+  },
+  {,
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {,
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
new file mode 100644
index 000000000000..73a22402d003
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json
@@ -0,0 +1,32 @@
+[
+  {,
+    "EventCode": "0x86",
+    "EventName": "EXC_IRQ",
+    "BriefDescription": "Exception taken, IRQ"
+  },
+  {,
+    "EventCode": "0x87",
+    "EventName": "EXC_FIQ",
+    "BriefDescription": "Exception taken, FIQ"
+  },
+  {,
+    "EventCode": "0xC6",
+    "EventName": "PRE_DECODE_ERR",
+    "BriefDescription": "Pre-decode error"
+  },
+  {,
+    "EventCode": "0xD0",
+    "EventName": "L1I_CACHE_ERR",
+    "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
+  },
+  {,
+    "EventCode": "0xD1",
+    "EventName": "L1D_CACHE_ERR",
+    "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
+  },
+  {,
+    "EventCode": "0xD2",
+    "EventName": "TLB_ERR",
+    "BriefDescription": "TLB memory error"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
new file mode 100644
index 000000000000..3149fb90555a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json
@@ -0,0 +1,52 @@
+[
+  {,
+    "EventCode": "0xC7",
+    "EventName": "STALL_SB_FULL",
+    "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
+  },
+  {,
+    "EventCode": "0xE0",
+    "EventName": "OTHER_IQ_DEP_STALL",
+    "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
+  },
+  {,
+    "EventCode": "0xE1",
+    "EventName": "IC_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
+  },
+  {,
+    "EventCode": "0xE2",
+    "EventName": "IUTLB_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
+  },
+  {,
+    "EventCode": "0xE3",
+    "EventName": "DECODE_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
+  },
+  {,
+    "EventCode": "0xE4",
+    "EventName": "OTHER_INTERLOCK_STALL",
+    "BriefDescription": "Cycles there is an interlock other than  Advanced SIMD/Floating-point instructions or load/store instruction"
+  },
+  {,
+    "EventCode": "0xE5",
+    "EventName": "AGU_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
+  },
+  {,
+    "EventCode": "0xE6",
+    "EventName": "SIMD_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
+  },
+  {,
+    "EventCode": "0xE7",
+    "EventName": "LD_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
+  },
+  {,
+    "EventCode": "0xE8",
+    "EventName": "ST_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 219d6756134e..e61c9ca6cf9e 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -13,3 +13,4 @@
 #
 #Family-model,Version,Filename,EventType
 0x00000000420f5160,v1,cavium,core
+0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core
-- 
cgit v1.2.3


From 6888ff66c44ffa3077ed69e978902d0ff4b84ae1 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:16 -0800
Subject: perf evlist: Remove stale mmap read for backward

perf_evlist__mmap_read_catchup() and perf_evlist__mmap_read_backward()
are only for overwrite mode.

But they read the evlist->mmap buffer which is for non-overwrite mode.

It did not bring any serious problem yet, because there is no one use
it.

Remove the unused interfaces.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Wang Nan <wangnan0@huawei.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1516310792-208685-2-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 17 -----------------
 tools/perf/util/evlist.h |  4 ----
 2 files changed, 21 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ac35cd214feb..e5fc14e53c05 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -715,28 +715,11 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int
 	return perf_mmap__read_forward(md);
 }
 
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
-{
-	struct perf_mmap *md = &evlist->mmap[idx];
-
-	/*
-	 * No need to check messup for backward ring buffer:
-	 * We can always read arbitrary long data from a backward
-	 * ring buffer unless we forget to pause it before reading.
-	 */
-	return perf_mmap__read_backward(md);
-}
-
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
 	return perf_evlist__mmap_read_forward(evlist, idx);
 }
 
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
-{
-	perf_mmap__read_catchup(&evlist->mmap[idx]);
-}
-
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
 {
 	perf_mmap__consume(&evlist->mmap[idx], false);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 75f8e0ad5d76..336b838e6957 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -133,10 +133,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
 
 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
 						 int idx);
-union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
-						  int idx);
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
-
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
 
 int perf_evlist__open(struct perf_evlist *evlist);
-- 
cgit v1.2.3


From dc6c35c679e96987dc83a003f30bc2cc33c84c00 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:17 -0800
Subject: perf mmap: Recalculate size for overwrite mode

In perf_mmap__push(), the 'size' need to be recalculated, otherwise the
invalid data might be pushed to the record in overwrite mode.

The issue is introduced by commit 7fb4b407a124 ("perf mmap: Don't
discard prev in backward mode").

When the ring buffer is full in overwrite mode, backward_rb_find_range()
will be called to recalculate the 'start' and 'end'. The 'size' needs to
be recalculated accordingly.

Unconditionally recalculate the 'size', not just for full ring buffer in
overwrite mode. Because:

- There is no harmful to recalculate the 'size' for other cases.
- The code of calculating 'start' and 'end' will be factored out later.
  The new function does not need to return 'size'.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 7fb4b407a124 ("perf mmap: Don't discard prev in backward mode")
Link: http://lkml.kernel.org/r/1516310792-208685-3-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 05076e683938..97cf4fab564b 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -302,6 +302,8 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 			return -1;
 	}
 
+	size = end - start;
+
 	if ((start & md->mask) + size != (end & md->mask)) {
 		buf = &data[start & md->mask];
 		size = md->mask + 1 - (start & md->mask);
-- 
cgit v1.2.3


From f92c8cbe597a5a2ccec702dff824f3fe0f3623eb Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:18 -0800
Subject: perf mmap: Cleanup perf_mmap__push()

The first assignment for 'start' and 'end' is redundant.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-4-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 97cf4fab564b..fbbbe87f0308 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -272,7 +272,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 {
 	u64 head = perf_mmap__read_head(md);
 	u64 old = md->prev;
-	u64 end = head, start = old;
+	u64 end, start;
 	unsigned char *data = md->base + page_size;
 	unsigned long size;
 	void *buf;
-- 
cgit v1.2.3


From 8872481bd04850b19e053dc579de5a11b83b16fc Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:19 -0800
Subject: perf mmap: Introduce perf_mmap__read_init()

The new function perf_mmap__read_init() is factored out from
perf_mmap__push().

It is to calculate the 'start' and 'end' of the available data in
ringbuffer.

No functional change.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-5-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 37 +++++++++++++++++++++++++++----------
 tools/perf/util/mmap.h |  2 ++
 2 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index fbbbe87f0308..c19a4e640e8e 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -267,24 +267,24 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6
 	return -1;
 }
 
-int perf_mmap__push(struct perf_mmap *md, bool overwrite,
-		    void *to, int push(void *to, void *buf, size_t size))
+/*
+ * Report the start and end of the available data in ringbuffer
+ */
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+			 u64 *startp, u64 *endp)
 {
 	u64 head = perf_mmap__read_head(md);
 	u64 old = md->prev;
-	u64 end, start;
 	unsigned char *data = md->base + page_size;
 	unsigned long size;
-	void *buf;
-	int rc = 0;
 
-	start = overwrite ? head : old;
-	end = overwrite ? old : head;
+	*startp = overwrite ? head : old;
+	*endp = overwrite ? old : head;
 
-	if (start == end)
+	if (*startp == *endp)
 		return 0;
 
-	size = end - start;
+	size = *endp - *startp;
 	if (size > (unsigned long)(md->mask) + 1) {
 		if (!overwrite) {
 			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
@@ -298,10 +298,27 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 		 * Backward ring buffer is full. We still have a chance to read
 		 * most of data from it.
 		 */
-		if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
+		if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
 			return -1;
 	}
 
+	return 1;
+}
+
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
+		    void *to, int push(void *to, void *buf, size_t size))
+{
+	u64 head = perf_mmap__read_head(md);
+	u64 end, start;
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+	void *buf;
+	int rc = 0;
+
+	rc = perf_mmap__read_init(md, overwrite, &start, &end);
+	if (rc < 1)
+		return rc;
+
 	size = end - start;
 
 	if ((start & md->mask) + size != (end & md->mask)) {
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index e43d7b55a55f..9ab2b48df65b 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -94,4 +94,6 @@ int perf_mmap__push(struct perf_mmap *md, bool backward,
 
 size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
+int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
+			 u64 *startp, u64 *endp);
 #endif /*__PERF_MMAP_H */
-- 
cgit v1.2.3


From 189f2cc91f9f2efef5d5f4dde43684c01b5f6f2f Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:20 -0800
Subject: perf mmap: Add new return value logic for perf_mmap__read_init()

Improve the readability by using meaningful enum (-EAGAIN, -EINVAL and
0) to replace the three returning states (0, -1 and 1).

Suggested-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-6-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index c19a4e640e8e..38fa69dc635e 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -282,7 +282,7 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 	*endp = overwrite ? old : head;
 
 	if (*startp == *endp)
-		return 0;
+		return -EAGAIN;
 
 	size = *endp - *startp;
 	if (size > (unsigned long)(md->mask) + 1) {
@@ -291,7 +291,7 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 
 			md->prev = head;
 			perf_mmap__consume(md, overwrite);
-			return 0;
+			return -EAGAIN;
 		}
 
 		/*
@@ -299,10 +299,10 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 		 * most of data from it.
 		 */
 		if (overwrite_rb_find_range(data, md->mask, head, startp, endp))
-			return -1;
+			return -EINVAL;
 	}
 
-	return 1;
+	return 0;
 }
 
 int perf_mmap__push(struct perf_mmap *md, bool overwrite,
@@ -316,8 +316,8 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 	int rc = 0;
 
 	rc = perf_mmap__read_init(md, overwrite, &start, &end);
-	if (rc < 1)
-		return rc;
+	if (rc < 0)
+		return (rc == -EAGAIN) ? 0 : -1;
 
 	size = end - start;
 
-- 
cgit v1.2.3


From b4b036b4c76341a5034e872aca3727c4988a7304 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:21 -0800
Subject: perf mmap: Discard 'prev' in perf_mmap__read()

The 'start' and 'prev' variables are duplicates in perf_mmap__read().

Use 'map->prev' to replace 'start' in perf_mmap__read_*().

Suggested-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-7-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 38fa69dc635e..125bfda9d037 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -22,29 +22,27 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
 
 /* When check_messup is true, 'end' must points to a good entry */
 static union perf_event *perf_mmap__read(struct perf_mmap *map,
-					 u64 start, u64 end, u64 *prev)
+					 u64 *startp, u64 end)
 {
 	unsigned char *data = map->base + page_size;
 	union perf_event *event = NULL;
-	int diff = end - start;
+	int diff = end - *startp;
 
 	if (diff >= (int)sizeof(event->header)) {
 		size_t size;
 
-		event = (union perf_event *)&data[start & map->mask];
+		event = (union perf_event *)&data[*startp & map->mask];
 		size = event->header.size;
 
-		if (size < sizeof(event->header) || diff < (int)size) {
-			event = NULL;
-			goto broken_event;
-		}
+		if (size < sizeof(event->header) || diff < (int)size)
+			return NULL;
 
 		/*
 		 * Event straddles the mmap boundary -- header should always
 		 * be inside due to u64 alignment of output.
 		 */
-		if ((start & map->mask) + size != ((start + size) & map->mask)) {
-			unsigned int offset = start;
+		if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+			unsigned int offset = *startp;
 			unsigned int len = min(sizeof(*event), size), cpy;
 			void *dst = map->event_copy;
 
@@ -59,20 +57,15 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
 			event = (union perf_event *)map->event_copy;
 		}
 
-		start += size;
+		*startp += size;
 	}
 
-broken_event:
-	if (prev)
-		*prev = start;
-
 	return event;
 }
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 {
 	u64 head;
-	u64 old = map->prev;
 
 	/*
 	 * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -82,13 +75,12 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 
 	head = perf_mmap__read_head(map);
 
-	return perf_mmap__read(map, old, head, &map->prev);
+	return perf_mmap__read(map, &map->prev, head);
 }
 
 union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
 {
 	u64 head, end;
-	u64 start = map->prev;
 
 	/*
 	 * Check if event was unmapped due to a POLLHUP/POLLERR.
@@ -118,7 +110,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
 	else
 		end = head + map->mask + 1;
 
-	return perf_mmap__read(map, start, end, &map->prev);
+	return perf_mmap__read(map, &map->prev, end);
 }
 
 void perf_mmap__read_catchup(struct perf_mmap *map)
-- 
cgit v1.2.3


From ee023de05f35484691f7d9e5c1f92195ac4d64d2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:22 -0800
Subject: perf mmap: Introduce perf_mmap__read_done()

The direction of overwrite mode is backward. The last perf_mmap__read()
will set tail to map->prev. Need to correct the map->prev to head which
is the end of next read.

It will be used later.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-8-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 11 +++++++++++
 tools/perf/util/mmap.h |  1 +
 2 files changed, 12 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 125bfda9d037..4f59eaefc706 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -338,3 +338,14 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite,
 out:
 	return rc;
 }
+
+/*
+ * Mandatory for overwrite mode
+ * The direction of overwrite mode is backward.
+ * The last perf_mmap__read() will set tail to map->prev.
+ * Need to correct the map->prev to head which is the end of next read.
+ */
+void perf_mmap__read_done(struct perf_mmap *map)
+{
+	map->prev = perf_mmap__read_head(map);
+}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 9ab2b48df65b..95549d4af943 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -96,4 +96,5 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
 int perf_mmap__read_init(struct perf_mmap *md, bool overwrite,
 			 u64 *startp, u64 *endp);
+void perf_mmap__read_done(struct perf_mmap *map);
 #endif /*__PERF_MMAP_H */
-- 
cgit v1.2.3


From 7bb45972952db9298fe5cc440160dcad1a66bfbc Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:23 -0800
Subject: perf mmap: Introduce perf_mmap__read_event()

Except for 'perf record', the other perf tools read events one by one
from the ring buffer using perf_mmap__read_forward(). But it only
supports non-overwrite mode.

Introduce perf_mmap__read_event() to support both non-overwrite and
overwrite mode.

Usage:
perf_mmap__read_init()
while(event = perf_mmap__read_event()) {
        //process the event
        perf_mmap__consume()
}
perf_mmap__read_done()

It cannot use perf_mmap__read_backward(). Because it always reads the
stale buffer which is already processed. Furthermore, the forward and
backward concepts have been removed. The perf_mmap__read_backward() will
be replaced and discarded later.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-9-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 39 +++++++++++++++++++++++++++++++++++++++
 tools/perf/util/mmap.h |  4 ++++
 2 files changed, 43 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 4f59eaefc706..f804926778b7 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -113,6 +113,45 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
 	return perf_mmap__read(map, &map->prev, end);
 }
 
+/*
+ * Read event from ring buffer one by one.
+ * Return one event for each call.
+ *
+ * Usage:
+ * perf_mmap__read_init()
+ * while(event = perf_mmap__read_event()) {
+ *	//process the event
+ *	perf_mmap__consume()
+ * }
+ * perf_mmap__read_done()
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+					bool overwrite,
+					u64 *startp, u64 end)
+{
+	union perf_event *event;
+
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return NULL;
+
+	if (startp == NULL)
+		return NULL;
+
+	/* non-overwirte doesn't pause the ringbuffer */
+	if (!overwrite)
+		end = perf_mmap__read_head(map);
+
+	event = perf_mmap__read(map, startp, end);
+
+	if (!overwrite)
+		map->prev = *startp;
+
+	return event;
+}
+
 void perf_mmap__read_catchup(struct perf_mmap *map)
 {
 	u64 head;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 95549d4af943..28718543dd42 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -89,6 +89,10 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
 union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
 
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+					bool overwrite,
+					u64 *startp, u64 end);
+
 int perf_mmap__push(struct perf_mmap *md, bool backward,
 		    void *to, int push(void *to, void *buf, size_t size));
 
-- 
cgit v1.2.3


From 600a7cfe88de2c6e44e23d61dd721b996b790eb2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:24 -0800
Subject: perf test: Update mmap read functions for backward-ring-buffer test

Use the new perf_mmap__read_* interfaces for overwrite ringbuffer test.

Commiter notes:

Testing:

  [root@seventh ~]# perf test -v backward
  48: Read backward ring buffer                             :
  --- start ---
  test child forked, pid 8309
  Using CPUID GenuineIntel-6-9E
  mmap size 1052672B
  mmap size 8192B
  Finished reading overwrite ring buffer: rewind
  test child finished with 0
  ---- end ----
  Read backward ring buffer: Ok
  [root@seventh ~]#

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-10-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/backward-ring-buffer.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 4035d43523c3..e0b1b414d466 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -31,10 +31,12 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
 	int i;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
+		struct perf_mmap *map = &evlist->overwrite_mmap[i];
 		union perf_event *event;
+		u64 start, end;
 
-		perf_mmap__read_catchup(&evlist->overwrite_mmap[i]);
-		while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) {
+		perf_mmap__read_init(map, true, &start, &end);
+		while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) {
 			const u32 type = event->header.type;
 
 			switch (type) {
@@ -49,6 +51,7 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
 				return TEST_FAIL;
 			}
 		}
+		perf_mmap__read_done(map);
 	}
 	return TEST_OK;
 }
-- 
cgit v1.2.3


From 3effc2f165a842d640873e29d4c5cc1650143aef Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:25 -0800
Subject: perf mmap: Discard legacy interface for mmap read

Discards perf_mmap__read_backward() and perf_mmap__read_catchup(). No
tools use them.

There are tools still use perf_mmap__read_forward(). Keep it, but add
comments to point to the new interface for future use.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-11-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.c | 50 ++++----------------------------------------------
 tools/perf/util/mmap.h |  3 ---
 2 files changed, 4 insertions(+), 49 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index f804926778b7..91531a7c8fbf 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -63,6 +63,10 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
 	return event;
 }
 
+/*
+ * legacy interface for mmap read.
+ * Don't use it. Use perf_mmap__read_event().
+ */
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 {
 	u64 head;
@@ -78,41 +82,6 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
 	return perf_mmap__read(map, &map->prev, head);
 }
 
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
-{
-	u64 head, end;
-
-	/*
-	 * Check if event was unmapped due to a POLLHUP/POLLERR.
-	 */
-	if (!refcount_read(&map->refcnt))
-		return NULL;
-
-	head = perf_mmap__read_head(map);
-	if (!head)
-		return NULL;
-
-	/*
-	 * 'head' pointer starts from 0. Kernel minus sizeof(record) form
-	 * it each time when kernel writes to it, so in fact 'head' is
-	 * negative. 'end' pointer is made manually by adding the size of
-	 * the ring buffer to 'head' pointer, means the validate data can
-	 * read is the whole ring buffer. If 'end' is positive, the ring
-	 * buffer has not fully filled, so we must adjust 'end' to 0.
-	 *
-	 * However, since both 'head' and 'end' is unsigned, we can't
-	 * simply compare 'end' against 0. Here we compare '-head' and
-	 * the size of the ring buffer, where -head is the number of bytes
-	 * kernel write to the ring buffer.
-	 */
-	if (-head < (u64)(map->mask + 1))
-		end = 0;
-	else
-		end = head + map->mask + 1;
-
-	return perf_mmap__read(map, &map->prev, end);
-}
-
 /*
  * Read event from ring buffer one by one.
  * Return one event for each call.
@@ -152,17 +121,6 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map,
 	return event;
 }
 
-void perf_mmap__read_catchup(struct perf_mmap *map)
-{
-	u64 head;
-
-	if (!refcount_read(&map->refcnt))
-		return;
-
-	head = perf_mmap__read_head(map);
-	map->prev = head;
-}
-
 static bool perf_mmap__empty(struct perf_mmap *map)
 {
 	return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 28718543dd42..ec7d3a24e276 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -65,8 +65,6 @@ void perf_mmap__put(struct perf_mmap *map);
 
 void perf_mmap__consume(struct perf_mmap *map, bool overwrite);
 
-void perf_mmap__read_catchup(struct perf_mmap *md);
-
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->base;
@@ -87,7 +85,6 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 }
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
-union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
 
 union perf_event *perf_mmap__read_event(struct perf_mmap *map,
 					bool overwrite,
-- 
cgit v1.2.3


From 63878a53cedc3df31bd4ba8740a49fa0fc116ac6 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:26 -0800
Subject: perf top: Check per-event overwrite term

Per-event overwrite term is not forbidden in 'perf top', which can bring
problems. Because 'perf top' only support non-overwrite mode now.

Add new rules and check regarding to overwrite term for 'perf top'.
- All events either have same per-event term or don't have per-event
  mode setting. Otherwise, it will error out.
- Per-event overwrite term should be consistent as opts->overwrite.
  If not, updating the opts->overwrite according to per-event term.

Make it possible to support either non-overwrite or overwrite mode.
The overwrite mode is forbidden now, which will be removed when the
overwrite mode is supported later.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-12-git-send-email-kan.liang@intel.com
[ Renamed perf_top_overwrite_check to perf_top__overwrite_check, to follow existing convention ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c6ccda52117d..17783798924a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -881,6 +881,68 @@ static void perf_top__mmap_read(struct perf_top *top)
 		perf_top__mmap_read_idx(top, i);
 }
 
+/*
+ * Check per-event overwrite term.
+ * perf top should support consistent term for all events.
+ * - All events don't have per-event term
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions/"
+ *   Nothing change, return 0.
+ * - All events have same per-event term
+ *   E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
+ *   Using the per-event setting to replace the opts->overwrite if
+ *   they are different, then return 0.
+ * - Events have different per-event term
+ *   E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ * - Some of the event set per-event term, but some not.
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ */
+static int perf_top__overwrite_check(struct perf_top *top)
+{
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
+	struct perf_evsel_config_term *term;
+	struct list_head *config_terms;
+	struct perf_evsel *evsel;
+	int set, overwrite = -1;
+
+	evlist__for_each_entry(evlist, evsel) {
+		set = -1;
+		config_terms = &evsel->config_terms;
+		list_for_each_entry(term, config_terms, list) {
+			if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+				set = term->val.overwrite ? 1 : 0;
+		}
+
+		/* no term for current and previous event (likely) */
+		if ((overwrite < 0) && (set < 0))
+			continue;
+
+		/* has term for both current and previous event, compare */
+		if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
+			return -1;
+
+		/* no term for current event but has term for previous one */
+		if ((overwrite >= 0) && (set < 0))
+			return -1;
+
+		/* has term for current event */
+		if ((overwrite < 0) && (set >= 0)) {
+			/* if it's first event, set overwrite */
+			if (evsel == perf_evlist__first(evlist))
+				overwrite = set;
+			else
+				return -1;
+		}
+	}
+
+	if ((overwrite >= 0) && (opts->overwrite != overwrite))
+		opts->overwrite = overwrite;
+
+	return 0;
+}
+
 static int perf_top__start_counters(struct perf_top *top)
 {
 	char msg[BUFSIZ];
@@ -888,6 +950,17 @@ static int perf_top__start_counters(struct perf_top *top)
 	struct perf_evlist *evlist = top->evlist;
 	struct record_opts *opts = &top->record_opts;
 
+	if (perf_top__overwrite_check(top)) {
+		ui__error("perf top only support consistent per-event "
+			  "overwrite setting for all events\n");
+		goto out_err;
+	}
+
+	if (opts->overwrite) {
+		ui__error("not support overwrite mode yet\n");
+		goto out_err;
+	}
+
 	perf_evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each_entry(evlist, counter) {
-- 
cgit v1.2.3


From 9a831b3a32c5daf5d7cc672334d51930f78e4ea3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 2 Feb 2018 11:27:25 -0300
Subject: perf evsel: Expose the perf_missing_features struct

As tools may need to adjust to missing features, as 'perf top' will, in
the next csets, to cope with a missing 'write_backward' feature.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-jelngl9q1ooaizvkcput9tic@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 12 +-----------
 tools/perf/util/evsel.h | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ff359c9ece2e..ef351688b797 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -41,17 +41,7 @@
 
 #include "sane_ctype.h"
 
-static struct {
-	bool sample_id_all;
-	bool exclude_guest;
-	bool mmap2;
-	bool cloexec;
-	bool clockid;
-	bool clockid_wrong;
-	bool lbr_flags;
-	bool write_backward;
-	bool group_read;
-} perf_missing_features;
+struct perf_missing_features perf_missing_features;
 
 static clockid_t clockid;
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 846e41644525..a7487c6d1866 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -149,6 +149,20 @@ union u64_swap {
 	u32 val32[2];
 };
 
+struct perf_missing_features {
+	bool sample_id_all;
+	bool exclude_guest;
+	bool mmap2;
+	bool cloexec;
+	bool clockid;
+	bool clockid_wrong;
+	bool lbr_flags;
+	bool write_backward;
+	bool group_read;
+};
+
+extern struct perf_missing_features perf_missing_features;
+
 struct cpu_map;
 struct target;
 struct thread_map;
-- 
cgit v1.2.3


From 204721d7eabe6ee98aafce791ce3efdbc4715834 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:28 -0800
Subject: perf top: Add overwrite fall back

Switch to non-overwrite mode if kernel doesnot support overwrite
ringbuffer.

It's only effect when overwrite mode is supported.  No change to current
behavior.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-14-git-send-email-kan.liang@intel.com
[ Use perf_missing_features.write_backward instead of the non merged is_write_backward_fail() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 17783798924a..ee4bba1e282c 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -943,6 +943,27 @@ static int perf_top__overwrite_check(struct perf_top *top)
 	return 0;
 }
 
+static int perf_top_overwrite_fallback(struct perf_top *top,
+				       struct perf_evsel *evsel)
+{
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
+	struct perf_evsel *counter;
+
+	if (!opts->overwrite)
+		return 0;
+
+	/* only fall back when first event fails */
+	if (evsel != perf_evlist__first(evlist))
+		return 0;
+
+	evlist__for_each_entry(evlist, counter)
+		counter->attr.write_backward = false;
+	opts->overwrite = false;
+	ui__warning("fall back to non-overwrite mode\n");
+	return 1;
+}
+
 static int perf_top__start_counters(struct perf_top *top)
 {
 	char msg[BUFSIZ];
@@ -967,6 +988,21 @@ static int perf_top__start_counters(struct perf_top *top)
 try_again:
 		if (perf_evsel__open(counter, top->evlist->cpus,
 				     top->evlist->threads) < 0) {
+
+			/*
+			 * Specially handle overwrite fall back.
+			 * Because perf top is the only tool which has
+			 * overwrite mode by default, support
+			 * both overwrite and non-overwrite mode, and
+			 * require consistent mode for all events.
+			 *
+			 * May move it to generic code with more tools
+			 * have similar attribute.
+			 */
+			if (perf_missing_features.write_backward &&
+			    perf_top_overwrite_fallback(top, counter))
+				goto try_again;
+
 			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
 				if (verbose > 0)
 					ui__warning("%s\n", msg);
-- 
cgit v1.2.3


From 06cc1a470ab237b991901729b125404c164f3660 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:29 -0800
Subject: perf hists browser: Add parameter to disable lost event warning

For overwrite mode, the ringbuffer will be paused. The event lost is
expected. It needs a way to notify the browser not print the warning.

It will be used later for perf top to disable lost event warning in
overwrite mode. There is no behavior change for now.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-15-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c       |  4 ++--
 tools/perf/builtin-report.c    |  3 ++-
 tools/perf/builtin-top.c       |  2 +-
 tools/perf/ui/browsers/hists.c | 38 +++++++++++++++++++++++++-------------
 tools/perf/ui/browsers/hists.h |  3 ++-
 tools/perf/util/hist.h         |  6 ++++--
 6 files changed, 36 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c0815a37fdb5..539c3d460158 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2245,7 +2245,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
 	c2c_browser__update_nr_entries(browser);
 
 	while (1) {
-		key = hist_browser__run(browser, "? - help");
+		key = hist_browser__run(browser, "? - help", true);
 
 		switch (key) {
 		case 's':
@@ -2314,7 +2314,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
 	c2c_browser__update_nr_entries(browser);
 
 	while (1) {
-		key = hist_browser__run(browser, "? - help");
+		key = hist_browser__run(browser, "? - help", true);
 
 		switch (key) {
 		case 'q':
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 42a52dcc41cd..4ad5dc649716 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -530,7 +530,8 @@ static int report__browse_hists(struct report *rep)
 	case 1:
 		ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
 						    rep->min_percent,
-						    &session->header.env);
+						    &session->header.env,
+						    true);
 		/*
 		 * Usually "ret" is the last pressed key, and we only
 		 * care if the key notifies us to switch data file.
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ee4bba1e282c..7def861a9ec4 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -611,7 +611,7 @@ static void *display_thread_tui(void *arg)
 
 	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
 				      top->min_percent,
-				      &top->session->header.env);
+				      &top->session->header.env, true);
 
 	done = 1;
 	return NULL;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 68146f4620a5..6495ee55d9c3 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -608,7 +608,8 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si
 	return browser->title ? browser->title(browser, bf, size) : 0;
 }
 
-int hist_browser__run(struct hist_browser *browser, const char *help)
+int hist_browser__run(struct hist_browser *browser, const char *help,
+		      bool warn_lost_event)
 {
 	int key;
 	char title[160];
@@ -638,8 +639,9 @@ int hist_browser__run(struct hist_browser *browser, const char *help)
 			nr_entries = hist_browser__nr_entries(browser);
 			ui_browser__update_nr_entries(&browser->b, nr_entries);
 
-			if (browser->hists->stats.nr_lost_warned !=
-			    browser->hists->stats.nr_events[PERF_RECORD_LOST]) {
+			if (warn_lost_event &&
+			    (browser->hists->stats.nr_lost_warned !=
+			    browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
 				browser->hists->stats.nr_lost_warned =
 					browser->hists->stats.nr_events[PERF_RECORD_LOST];
 				ui_browser__warn_lost_events(&browser->b);
@@ -2763,7 +2765,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				    bool left_exits,
 				    struct hist_browser_timer *hbt,
 				    float min_pcnt,
-				    struct perf_env *env)
+				    struct perf_env *env,
+				    bool warn_lost_event)
 {
 	struct hists *hists = evsel__hists(evsel);
 	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
@@ -2844,7 +2847,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
 		nr_options = 0;
 
-		key = hist_browser__run(browser, helpline);
+		key = hist_browser__run(browser, helpline,
+					warn_lost_event);
 
 		if (browser->he_selection != NULL) {
 			thread = hist_browser__selected_thread(browser);
@@ -3184,7 +3188,8 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
 
 static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 				int nr_events, const char *help,
-				struct hist_browser_timer *hbt)
+				struct hist_browser_timer *hbt,
+				bool warn_lost_event)
 {
 	struct perf_evlist *evlist = menu->b.priv;
 	struct perf_evsel *pos;
@@ -3203,7 +3208,9 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 		case K_TIMER:
 			hbt->timer(hbt->arg);
 
-			if (!menu->lost_events_warned && menu->lost_events) {
+			if (!menu->lost_events_warned &&
+			    menu->lost_events &&
+			    warn_lost_event) {
 				ui_browser__warn_lost_events(&menu->b);
 				menu->lost_events_warned = true;
 			}
@@ -3224,7 +3231,8 @@ browse_hists:
 			key = perf_evsel__hists_browse(pos, nr_events, help,
 						       true, hbt,
 						       menu->min_pcnt,
-						       menu->env);
+						       menu->env,
+						       warn_lost_event);
 			ui_browser__show_title(&menu->b, title);
 			switch (key) {
 			case K_TAB:
@@ -3282,7 +3290,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 					   int nr_entries, const char *help,
 					   struct hist_browser_timer *hbt,
 					   float min_pcnt,
-					   struct perf_env *env)
+					   struct perf_env *env,
+					   bool warn_lost_event)
 {
 	struct perf_evsel *pos;
 	struct perf_evsel_menu menu = {
@@ -3309,13 +3318,15 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 			menu.b.width = line_len;
 	}
 
-	return perf_evsel_menu__run(&menu, nr_entries, help, hbt);
+	return perf_evsel_menu__run(&menu, nr_entries, help,
+				    hbt, warn_lost_event);
 }
 
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  struct hist_browser_timer *hbt,
 				  float min_pcnt,
-				  struct perf_env *env)
+				  struct perf_env *env,
+				  bool warn_lost_event)
 {
 	int nr_entries = evlist->nr_entries;
 
@@ -3325,7 +3336,7 @@ single_entry:
 
 		return perf_evsel__hists_browse(first, nr_entries, help,
 						false, hbt, min_pcnt,
-						env);
+						env, warn_lost_event);
 	}
 
 	if (symbol_conf.event_group) {
@@ -3342,5 +3353,6 @@ single_entry:
 	}
 
 	return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
-					       hbt, min_pcnt, env);
+					       hbt, min_pcnt, env,
+					       warn_lost_event);
 }
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index ba431777f559..9428bee076f2 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -28,7 +28,8 @@ struct hist_browser {
 
 struct hist_browser *hist_browser__new(struct hists *hists);
 void hist_browser__delete(struct hist_browser *browser);
-int hist_browser__run(struct hist_browser *browser, const char *help);
+int hist_browser__run(struct hist_browser *browser, const char *help,
+		      bool warn_lost_event);
 void hist_browser__init(struct hist_browser *browser,
 			struct hists *hists);
 #endif /* _PERF_UI_BROWSER_HISTS_H_ */
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index f6630cb95eff..02721b579746 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -430,7 +430,8 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  struct hist_browser_timer *hbt,
 				  float min_pcnt,
-				  struct perf_env *env);
+				  struct perf_env *env,
+				  bool warn_lost_event);
 int script_browse(const char *script_opt);
 #else
 static inline
@@ -438,7 +439,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
 				  const char *help __maybe_unused,
 				  struct hist_browser_timer *hbt __maybe_unused,
 				  float min_pcnt __maybe_unused,
-				  struct perf_env *env __maybe_unused)
+				  struct perf_env *env __maybe_unused,
+				  bool warn_lost_event __maybe_unused)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From a1ff5b05e988ca3620027148cd61013408ea4194 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:30 -0800
Subject: perf top: Remove lost events checking

There would be some records lost in overwrite mode because of pausing
the ringbuffer. It has little impact for the accuracy of the snapshot
and could be tolerated by 'perf top'.

Remove the lost events checking.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-16-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7def861a9ec4..59653062bb48 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -283,8 +283,9 @@ static void perf_top__print_sym_table(struct perf_top *top)
 
 	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
 
-	if (hists->stats.nr_lost_warned !=
-	    hists->stats.nr_events[PERF_RECORD_LOST]) {
+	if (!top->record_opts.overwrite &&
+	    (hists->stats.nr_lost_warned !=
+	    hists->stats.nr_events[PERF_RECORD_LOST])) {
 		hists->stats.nr_lost_warned =
 			      hists->stats.nr_events[PERF_RECORD_LOST];
 		color_fprintf(stdout, PERF_COLOR_RED,
@@ -611,7 +612,8 @@ static void *display_thread_tui(void *arg)
 
 	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
 				      top->min_percent,
-				      &top->session->header.env, true);
+				      &top->session->header.env,
+				      !top->record_opts.overwrite);
 
 	done = 1;
 	return NULL;
-- 
cgit v1.2.3


From ebebbf082357f86cc84a4d46ce897a5750e41b7a Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:31 -0800
Subject: perf top: Switch default mode to overwrite mode

perf_top__mmap_read() has a severe performance issue in the Knights
Landing/Mill platform, when monitoring heavy load systems. It costs
several minutes to finish, which is unacceptable.

Currently, 'perf top' uses the non overwrite mode. For non overwrite
mode, it tries to read everything in the ringbuffer and doesn't pause
it. Once there are lots of samples delivered persistently, the
processing time could be very long. Also, the latest samples could be
lost when the ringbuffer is full.

For overwrite mode, it takes a snapshot for the system by pausing the
ringbuffer, which could significantly reduce the processing time.  Also,
the overwrite mode always keep the latest samples.  Considering the real
time requirement for 'perf top', the overwrite mode is more suitable for
it.

Actually, 'perf top' was overwrite mode. It is changed to non overwrite
mode since commit 93fc64f14472 ("perf top: Switch to non overwrite
mode"). It's better to change it back to overwrite mode by default.

For the kernel which doesn't support overwrite mode, it will fall back
to non overwrite mode.

There would be some records lost in overwrite mode because of pausing
the ringbuffer. It has little impact for the accuracy of the snapshot
and can be tolerated.

For overwrite mode, unconditionally wait 100 ms before each snapshot. It
also reduces the overhead caused by pausing ringbuffer, especially on
light load system.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-17-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 59653062bb48..2b4914f34ed6 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -809,15 +809,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
 
 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 {
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
 	struct perf_sample sample;
 	struct perf_evsel *evsel;
+	struct perf_mmap *md;
 	struct perf_session *session = top->session;
 	union perf_event *event;
 	struct machine *machine;
+	u64 end, start;
 	int ret;
 
-	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
-		ret = perf_evlist__parse_sample(top->evlist, event, &sample);
+	md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
+	if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0)
+		return;
+
+	while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) {
+		ret = perf_evlist__parse_sample(evlist, event, &sample);
 		if (ret) {
 			pr_err("Can't parse sample, err = %d\n", ret);
 			goto next_event;
@@ -871,16 +879,28 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 		} else
 			++session->evlist->stats.nr_unknown_events;
 next_event:
-		perf_evlist__mmap_consume(top->evlist, idx);
+		perf_mmap__consume(md, opts->overwrite);
 	}
+
+	perf_mmap__read_done(md);
 }
 
 static void perf_top__mmap_read(struct perf_top *top)
 {
+	bool overwrite = top->record_opts.overwrite;
+	struct perf_evlist *evlist = top->evlist;
 	int i;
 
+	if (overwrite)
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
+
 	for (i = 0; i < top->evlist->nr_mmaps; i++)
 		perf_top__mmap_read_idx(top, i);
+
+	if (overwrite) {
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+	}
 }
 
 /*
@@ -979,11 +999,6 @@ static int perf_top__start_counters(struct perf_top *top)
 		goto out_err;
 	}
 
-	if (opts->overwrite) {
-		ui__error("not support overwrite mode yet\n");
-		goto out_err;
-	}
-
 	perf_evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each_entry(evlist, counter) {
@@ -1144,7 +1159,7 @@ static int __cmd_top(struct perf_top *top)
 
 		perf_top__mmap_read(top);
 
-		if (hits == top->samples)
+		if (opts->overwrite || (hits == top->samples))
 			ret = perf_evlist__poll(top->evlist, 100);
 
 		if (resize) {
@@ -1238,6 +1253,7 @@ int cmd_top(int argc, const char **argv)
 				.uses_mmap   = true,
 			},
 			.proc_map_timeout    = 500,
+			.overwrite	= 1,
 		},
 		.max_stack	     = sysctl_perf_event_max_stack,
 		.sym_pcnt_filter     = 5,
-- 
cgit v1.2.3


From 8cc42de736b617827a4e7664fb8d7a325bc125bc Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 18 Jan 2018 13:26:32 -0800
Subject: perf top: Check the latency of perf_top__mmap_read()

The latency of perf_top__mmap_read() should be lower than refresh time.
If not, give some hints to reduce the latency.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1516310792-208685-18-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 2b4914f34ed6..b7c823ba8374 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -889,8 +889,10 @@ static void perf_top__mmap_read(struct perf_top *top)
 {
 	bool overwrite = top->record_opts.overwrite;
 	struct perf_evlist *evlist = top->evlist;
+	unsigned long long start, end;
 	int i;
 
+	start = rdclock();
 	if (overwrite)
 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
 
@@ -901,6 +903,13 @@ static void perf_top__mmap_read(struct perf_top *top)
 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
 	}
+	end = rdclock();
+
+	if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC)
+		ui__warning("Too slow to read ring buffer.\n"
+			    "Please try increasing the period (-c) or\n"
+			    "decreasing the freq (-F) or\n"
+			    "limiting the number of CPUs (-C)\n");
 }
 
 /*
-- 
cgit v1.2.3


From 6677d26c8befa462eab9be6c5335a939011e7e65 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 29 Jan 2018 15:03:59 +0200
Subject: perf tools: Substitute yet another strtoull()

Instead of home grown function let's use what library provides us.

Signed-off-by: Andriy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20180129130359.1490-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/util.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 443892dabedb..1019bbc5dbd8 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -340,35 +340,15 @@ size_t hex_width(u64 v)
 	return n;
 }
 
-static int hex(char ch)
-{
-	if ((ch >= '0') && (ch <= '9'))
-		return ch - '0';
-	if ((ch >= 'a') && (ch <= 'f'))
-		return ch - 'a' + 10;
-	if ((ch >= 'A') && (ch <= 'F'))
-		return ch - 'A' + 10;
-	return -1;
-}
-
 /*
  * While we find nice hex chars, build a long_val.
  * Return number of chars processed.
  */
 int hex2u64(const char *ptr, u64 *long_val)
 {
-	const char *p = ptr;
-	*long_val = 0;
-
-	while (*p) {
-		const int hex_val = hex(*p);
+	char *p;
 
-		if (hex_val < 0)
-			break;
-
-		*long_val = (*long_val << 4) | hex_val;
-		p++;
-	}
+	*long_val = strtoull(ptr, &p, 16);
 
 	return p - ptr;
 }
-- 
cgit v1.2.3


From ba7e851642f48002def3450b279598c187721fd0 Mon Sep 17 00:00:00 2001
From: Sangwon Hong <qpakzk@gmail.com>
Date: Mon, 5 Feb 2018 20:48:35 +0900
Subject: perf data: Document missing --force option

Add the --force option to the man page.

Signed-off-by: Sangwon Hong <qpakzk@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/1517831315-31490-1-git-send-email-qpakzk@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-data.txt | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index f0796a47dfa3..90bb4aabe4f8 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -30,6 +30,10 @@ OPTIONS for 'convert'
 -i::
 	Specify input perf data file path.
 
+-f::
+--force::
+	Don't complain, do it.
+
 -v::
 --verbose::
         Be more verbose (show counter open errors, etc).
-- 
cgit v1.2.3


From 7a92453620d42c3a5fea94a864dc6aa04c262b93 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Wed, 17 Jan 2018 09:38:31 +0100
Subject: perf test: Fix test trace+probe_libc_inet_pton.sh for s390x

On Intel test case trace+probe_libc_inet_pton.sh succeeds and the
output is:

[root@f27 perf]# ./perf trace --no-syscalls
                  -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.037 ms

 --- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.037/0.037/0.037/0.000 ms
     0.000 probe_libc:inet_pton:(7fa40ac618a0))
              __GI___inet_pton (/usr/lib64/libc-2.26.so)
              getaddrinfo (/usr/lib64/libc-2.26.so)
              main (/usr/bin/ping)

The kernel stack unwinder is used, it is specified implicitly
as call-graph=fp (frame pointer).

On s390x only dwarf is available for stack unwinding. It is also
done in user space. This requires different parameter setup
and result checking for s390x and Intel.

This patch adds separate perf trace setup and result checking
for Intel and s390x. On s390x specify this command line to
get a call-graph and handle the different call graph result
checking:

[root@s35lp76 perf]# ./perf trace --no-syscalls
	-e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.041 ms

 --- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.041/0.041/0.041/0.000 ms
     0.000 probe_libc:inet_pton:(3ffb9942060))
            __GI___inet_pton (/usr/lib64/libc-2.26.so)
            gaih_inet (inlined)
            __GI_getaddrinfo (inlined)
            main (/usr/bin/ping)
            __libc_start_main (/usr/lib64/libc-2.26.so)
            _start (/usr/bin/ping)
[root@s35lp76 perf]#

Before:
[root@s8360047 perf]# ./perf test -vv 58
58: probe libc's inet_pton & backtrace it with ping       :
 --- start ---
test child forked, pid 26349
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.079 ms
 --- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.079/0.079/0.079/0.000 ms
0.000 probe_libc:inet_pton:(3ff925c2060))
test child finished with -1
 ---- end ----
probe libc's inet_pton & backtrace it with ping: FAILED!
[root@s8360047 perf]#

After:
[root@s35lp76 perf]# ./perf test -vv 57
57: probe libc's inet_pton & backtrace it with ping       :
 --- start ---
test child forked, pid 38708
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.038 ms
 --- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.038/0.038/0.038/0.000 ms
0.000 probe_libc:inet_pton:(3ff87342060))
__GI___inet_pton (/usr/lib64/libc-2.26.so)
gaih_inet (inlined)
__GI_getaddrinfo (inlined)
main (/usr/bin/ping)
__libc_start_main (/usr/lib64/libc-2.26.so)
_start (/usr/bin/ping)
test child finished with 0
 ---- end ----
probe libc's inet_pton & backtrace it with ping: Ok
[root@s35lp76 perf]#

On Intel the test case runs unchanged and succeeds.

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180117083831.101001-1-tmricht@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../perf/tests/shell/trace+probe_libc_inet_pton.sh | 23 +++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index 8b3da21a08f1..c446c894b297 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -22,10 +22,23 @@ trace_libc_inet_pton_backtrace() {
 	expected[4]="rtt min.*"
 	expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
 	expected[6]=".*inet_pton[[:space:]]\($libc\)$"
-	expected[7]="getaddrinfo[[:space:]]\($libc\)$"
-	expected[8]=".*\(.*/bin/ping.*\)$"
-
-	perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
+	case "$(uname -m)" in
+	s390x)
+		eventattr='call-graph=dwarf'
+		expected[7]="gaih_inet[[:space:]]\(inlined\)$"
+		expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$"
+		expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
+		expected[10]="__libc_start_main[[:space:]]\($libc\)$"
+		expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
+		;;
+	*)
+		eventattr='max-stack=3'
+		expected[7]="getaddrinfo[[:space:]]\($libc\)$"
+		expected[8]=".*\(.*/bin/ping.*\)$"
+		;;
+	esac
+
+	perf trace --no-syscalls -e probe_libc:inet_pton/$eventattr/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do
 		echo $line
 		echo "$line" | egrep -q "${expected[$idx]}"
 		if [ $? -ne 0 ] ; then
@@ -33,7 +46,7 @@ trace_libc_inet_pton_backtrace() {
 			exit 1
 		fi
 		let idx+=1
-		[ $idx -eq 9 ] && break
+		[ -z "${expected[$idx]}" ] && break
 	done
 }
 
-- 
cgit v1.2.3


From f091f1d6a2b4840c9b631d6138f5354401347863 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 13 Feb 2018 12:54:58 +0100
Subject: tools/headers: Synchronize kernel ABI headers, v4.16-rc1

Sync the following tooling headers with the latest kernel version:

  tools/arch/powerpc/include/uapi/asm/kvm.h
  tools/arch/x86/include/asm/cpufeatures.h
  tools/include/uapi/drm/i915_drm.h
  tools/include/uapi/linux/if_link.h
  tools/include/uapi/linux/kvm.h

All the changes are new ABI additions which don't impact their use
in existing tooling.

Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/arch/powerpc/include/uapi/asm/kvm.h |  2 +
 tools/arch/x86/include/asm/cpufeatures.h  |  1 +
 tools/include/uapi/drm/i915_drm.h         | 77 ++++++++++++++++++++++++++
 tools/include/uapi/linux/if_link.h        |  1 +
 tools/include/uapi/linux/kvm.h            | 90 +++++++++++++++++++++++++++++++
 5 files changed, 171 insertions(+)

(limited to 'tools')

diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 637b7263cb86..833ed9a16adf 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_TIDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
 #define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
 
+#define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 1d9199e1c2ad..0dfe4d3f74e2 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -210,6 +210,7 @@
 
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
+#define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index ac3c6503ca27..536ee4febd74 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -86,6 +86,62 @@ enum i915_mocs_table_index {
 	I915_MOCS_CACHED,
 };
 
+/*
+ * Different engines serve different roles, and there may be more than one
+ * engine serving each role. enum drm_i915_gem_engine_class provides a
+ * classification of the role of the engine, which may be used when requesting
+ * operations to be performed on a certain subset of engines, or for providing
+ * information about that group.
+ */
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_RENDER	= 0,
+	I915_ENGINE_CLASS_COPY		= 1,
+	I915_ENGINE_CLASS_VIDEO		= 2,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE	= 3,
+
+	I915_ENGINE_CLASS_INVALID	= -1
+};
+
+/**
+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
+ *
+ */
+
+enum drm_i915_pmu_engine_sample {
+	I915_SAMPLE_BUSY = 0,
+	I915_SAMPLE_WAIT = 1,
+	I915_SAMPLE_SEMA = 2
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+	(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+	((class) << I915_PMU_CLASS_SHIFT | \
+	(instance) << I915_PMU_SAMPLE_BITS | \
+	(sample))
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY	__I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(3)
+
+#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
@@ -450,6 +506,27 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
 
+/*
+ * Query whether every context (both per-file default and user created) is
+ * isolated (insofar as HW supports). If this parameter is not true, then
+ * freshly created contexts may inherit values from an existing context,
+ * rather than default HW values. If true, it also ensures (insofar as HW
+ * supports) that all state set by this context will not leak to any other
+ * context.
+ *
+ * As not every engine across every gen support contexts, the returned
+ * value reports the support of context isolation for individual engines by
+ * returning a bitmask of each engine class set to true if that class supports
+ * isolation.
+ */
+#define I915_PARAM_HAS_CONTEXT_ISOLATION 50
+
+/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
+ * registers. This used to be fixed per platform but from CNL onwards, this
+ * might vary depending on the parts.
+ */
+#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 8616131e2c61..6d9447700e18 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -163,6 +163,7 @@ enum {
 	IFLA_IF_NETNSID,
 	IFLA_CARRIER_UP_COUNT,
 	IFLA_CARRIER_DOWN_COUNT,
+	IFLA_NEW_IFINDEX,
 	__IFLA_MAX
 };
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 8fb90a0819c3..0fb5ef939732 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1362,6 +1362,96 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_S390_CMMA_MIGRATION */
 #define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
 #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
+/* Memory Encryption Commands */
+#define KVM_MEMORY_ENCRYPT_OP      _IOWR(KVMIO, 0xba, unsigned long)
+
+struct kvm_enc_region {
+	__u64 addr;
+	__u64 size;
+};
+
+#define KVM_MEMORY_ENCRYPT_REG_REGION    _IOR(KVMIO, 0xbb, struct kvm_enc_region)
+#define KVM_MEMORY_ENCRYPT_UNREG_REGION  _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+	/* Guest initialization commands */
+	KVM_SEV_INIT = 0,
+	KVM_SEV_ES_INIT,
+	/* Guest launch commands */
+	KVM_SEV_LAUNCH_START,
+	KVM_SEV_LAUNCH_UPDATE_DATA,
+	KVM_SEV_LAUNCH_UPDATE_VMSA,
+	KVM_SEV_LAUNCH_SECRET,
+	KVM_SEV_LAUNCH_MEASURE,
+	KVM_SEV_LAUNCH_FINISH,
+	/* Guest migration commands (outgoing) */
+	KVM_SEV_SEND_START,
+	KVM_SEV_SEND_UPDATE_DATA,
+	KVM_SEV_SEND_UPDATE_VMSA,
+	KVM_SEV_SEND_FINISH,
+	/* Guest migration commands (incoming) */
+	KVM_SEV_RECEIVE_START,
+	KVM_SEV_RECEIVE_UPDATE_DATA,
+	KVM_SEV_RECEIVE_UPDATE_VMSA,
+	KVM_SEV_RECEIVE_FINISH,
+	/* Guest status and debug commands */
+	KVM_SEV_GUEST_STATUS,
+	KVM_SEV_DBG_DECRYPT,
+	KVM_SEV_DBG_ENCRYPT,
+	/* Guest certificates commands */
+	KVM_SEV_CERT_EXPORT,
+
+	KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+	__u32 id;
+	__u64 data;
+	__u32 error;
+	__u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+	__u32 handle;
+	__u32 policy;
+	__u64 dh_uaddr;
+	__u32 dh_len;
+	__u64 session_uaddr;
+	__u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+	__u64 uaddr;
+	__u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+	__u64 uaddr;
+	__u32 len;
+};
+
+struct kvm_sev_guest_status {
+	__u32 handle;
+	__u32 policy;
+	__u32 state;
+};
+
+struct kvm_sev_dbg {
+	__u64 src_uaddr;
+	__u64 dst_uaddr;
+	__u32 len;
+};
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
cgit v1.2.3


From baa676103037e0dd145bb905eb51bc0b2f48fd49 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 8 Feb 2018 12:47:49 +0100
Subject: perf s390: Grab a copy of arch/s390/kernel/syscall/syscall.tbl

Grab a copy of the s390 system call table file introduced with commit
857f46bfb07f53dc112d69bdfb137cc5ec3da7c5 "s390/syscalls: add system call
table".

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: linux-s390@vger.kernel.org
LPU-Reference: 1518090470-2899-3-git-send-email-brueckner@linux.vnet.ibm.com
Link: https://lkml.kernel.org/n/tip-hpw7vdjp7g92ivgpddrp5ydq@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/entry/syscalls/syscall.tbl | 390 ++++++++++++++++++++++++
 1 file changed, 390 insertions(+)
 create mode 100644 tools/perf/arch/s390/entry/syscalls/syscall.tbl

(limited to 'tools')

diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..b38d48464368
--- /dev/null
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -0,0 +1,390 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1    common	exit			sys_exit			sys_exit
+2    common	fork			sys_fork			sys_fork
+3    common	read			sys_read			compat_sys_s390_read
+4    common	write			sys_write			compat_sys_s390_write
+5    common	open			sys_open			compat_sys_open
+6    common	close			sys_close			sys_close
+7    common	restart_syscall		sys_restart_syscall		sys_restart_syscall
+8    common	creat			sys_creat			compat_sys_creat
+9    common	link			sys_link			compat_sys_link
+10   common	unlink			sys_unlink			compat_sys_unlink
+11   common	execve			sys_execve			compat_sys_execve
+12   common	chdir			sys_chdir			compat_sys_chdir
+13   32		time			-				compat_sys_time
+14   common	mknod			sys_mknod			compat_sys_mknod
+15   common	chmod			sys_chmod			compat_sys_chmod
+16   32		lchown			-				compat_sys_s390_lchown16
+19   common	lseek			sys_lseek			compat_sys_lseek
+20   common	getpid			sys_getpid			sys_getpid
+21   common	mount			sys_mount			compat_sys_mount
+22   common	umount			sys_oldumount			compat_sys_oldumount
+23   32		setuid			-				compat_sys_s390_setuid16
+24   32		getuid			-				compat_sys_s390_getuid16
+25   32		stime			-				compat_sys_stime
+26   common	ptrace			sys_ptrace			compat_sys_ptrace
+27   common	alarm			sys_alarm			sys_alarm
+29   common	pause			sys_pause			sys_pause
+30   common	utime			sys_utime			compat_sys_utime
+33   common	access			sys_access			compat_sys_access
+34   common	nice			sys_nice			sys_nice
+36   common	sync			sys_sync			sys_sync
+37   common	kill			sys_kill			sys_kill
+38   common	rename			sys_rename			compat_sys_rename
+39   common	mkdir			sys_mkdir			compat_sys_mkdir
+40   common	rmdir			sys_rmdir			compat_sys_rmdir
+41   common	dup			sys_dup				sys_dup
+42   common	pipe			sys_pipe			compat_sys_pipe
+43   common	times			sys_times			compat_sys_times
+45   common	brk			sys_brk				compat_sys_brk
+46   32		setgid			-				compat_sys_s390_setgid16
+47   32		getgid			-				compat_sys_s390_getgid16
+48   common	signal			sys_signal			compat_sys_signal
+49   32		geteuid			-				compat_sys_s390_geteuid16
+50   32		getegid			-				compat_sys_s390_getegid16
+51   common	acct			sys_acct			compat_sys_acct
+52   common	umount2			sys_umount			compat_sys_umount
+54   common	ioctl			sys_ioctl			compat_sys_ioctl
+55   common	fcntl			sys_fcntl			compat_sys_fcntl
+57   common	setpgid			sys_setpgid			sys_setpgid
+60   common	umask			sys_umask			sys_umask
+61   common	chroot			sys_chroot			compat_sys_chroot
+62   common	ustat			sys_ustat			compat_sys_ustat
+63   common	dup2			sys_dup2			sys_dup2
+64   common	getppid			sys_getppid			sys_getppid
+65   common	getpgrp			sys_getpgrp			sys_getpgrp
+66   common	setsid			sys_setsid			sys_setsid
+67   common	sigaction		sys_sigaction			compat_sys_sigaction
+70   32		setreuid		-				compat_sys_s390_setreuid16
+71   32		setregid		-				compat_sys_s390_setregid16
+72   common	sigsuspend		sys_sigsuspend			compat_sys_sigsuspend
+73   common	sigpending		sys_sigpending			compat_sys_sigpending
+74   common	sethostname		sys_sethostname			compat_sys_sethostname
+75   common	setrlimit		sys_setrlimit			compat_sys_setrlimit
+76   32		getrlimit		-				compat_sys_old_getrlimit
+77   common	getrusage		sys_getrusage			compat_sys_getrusage
+78   common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
+79   common	settimeofday		sys_settimeofday		compat_sys_settimeofday
+80   32		getgroups		-				compat_sys_s390_getgroups16
+81   32		setgroups		-				compat_sys_s390_setgroups16
+83   common	symlink			sys_symlink			compat_sys_symlink
+85   common	readlink		sys_readlink			compat_sys_readlink
+86   common	uselib			sys_uselib			compat_sys_uselib
+87   common	swapon			sys_swapon			compat_sys_swapon
+88   common	reboot			sys_reboot			compat_sys_reboot
+89   common	readdir			-				compat_sys_old_readdir
+90   common	mmap			sys_old_mmap			compat_sys_s390_old_mmap
+91   common	munmap			sys_munmap			compat_sys_munmap
+92   common	truncate		sys_truncate			compat_sys_truncate
+93   common	ftruncate		sys_ftruncate			compat_sys_ftruncate
+94   common	fchmod			sys_fchmod			sys_fchmod
+95   32		fchown			-				compat_sys_s390_fchown16
+96   common	getpriority		sys_getpriority			sys_getpriority
+97   common	setpriority		sys_setpriority			sys_setpriority
+99   common	statfs			sys_statfs			compat_sys_statfs
+100  common	fstatfs			sys_fstatfs			compat_sys_fstatfs
+101  32		ioperm			-				-
+102  common	socketcall		sys_socketcall			compat_sys_socketcall
+103  common	syslog			sys_syslog			compat_sys_syslog
+104  common	setitimer		sys_setitimer			compat_sys_setitimer
+105  common	getitimer		sys_getitimer			compat_sys_getitimer
+106  common	stat			sys_newstat			compat_sys_newstat
+107  common	lstat			sys_newlstat			compat_sys_newlstat
+108  common	fstat			sys_newfstat			compat_sys_newfstat
+110  common	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
+111  common	vhangup			sys_vhangup			sys_vhangup
+112  common	idle			-				-
+114  common	wait4			sys_wait4			compat_sys_wait4
+115  common	swapoff			sys_swapoff			compat_sys_swapoff
+116  common	sysinfo			sys_sysinfo			compat_sys_sysinfo
+117  common	ipc			sys_s390_ipc			compat_sys_s390_ipc
+118  common	fsync			sys_fsync			sys_fsync
+119  common	sigreturn		sys_sigreturn			compat_sys_sigreturn
+120  common	clone			sys_clone			compat_sys_clone
+121  common	setdomainname		sys_setdomainname		compat_sys_setdomainname
+122  common	uname			sys_newuname			compat_sys_newuname
+124  common	adjtimex		sys_adjtimex			compat_sys_adjtimex
+125  common	mprotect		sys_mprotect			compat_sys_mprotect
+126  common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
+127  common	create_module		-				-
+128  common	init_module		sys_init_module			compat_sys_init_module
+129  common	delete_module		sys_delete_module		compat_sys_delete_module
+130  common	get_kernel_syms		-				-
+131  common	quotactl		sys_quotactl			compat_sys_quotactl
+132  common	getpgid			sys_getpgid			sys_getpgid
+133  common	fchdir			sys_fchdir			sys_fchdir
+134  common	bdflush			sys_bdflush			compat_sys_bdflush
+135  common	sysfs			sys_sysfs			compat_sys_sysfs
+136  common	personality		sys_s390_personality		sys_s390_personality
+137  common	afs_syscall		-				-
+138  32		setfsuid		-				compat_sys_s390_setfsuid16
+139  32		setfsgid		-				compat_sys_s390_setfsgid16
+140  32		_llseek			-				compat_sys_llseek
+141  common	getdents		sys_getdents			compat_sys_getdents
+142  32		_newselect		-				compat_sys_select
+142  64		select			sys_select			-
+143  common	flock			sys_flock			sys_flock
+144  common	msync			sys_msync			compat_sys_msync
+145  common	readv			sys_readv			compat_sys_readv
+146  common	writev			sys_writev			compat_sys_writev
+147  common	getsid			sys_getsid			sys_getsid
+148  common	fdatasync		sys_fdatasync			sys_fdatasync
+149  common	_sysctl			sys_sysctl			compat_sys_sysctl
+150  common	mlock			sys_mlock			compat_sys_mlock
+151  common	munlock			sys_munlock			compat_sys_munlock
+152  common	mlockall		sys_mlockall			sys_mlockall
+153  common	munlockall		sys_munlockall			sys_munlockall
+154  common	sched_setparam		sys_sched_setparam		compat_sys_sched_setparam
+155  common	sched_getparam		sys_sched_getparam		compat_sys_sched_getparam
+156  common	sched_setscheduler	sys_sched_setscheduler		compat_sys_sched_setscheduler
+157  common	sched_getscheduler	sys_sched_getscheduler		sys_sched_getscheduler
+158  common	sched_yield		sys_sched_yield			sys_sched_yield
+159  common	sched_get_priority_max	sys_sched_get_priority_max	sys_sched_get_priority_max
+160  common	sched_get_priority_min	sys_sched_get_priority_min	sys_sched_get_priority_min
+161  common	sched_rr_get_interval	sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
+162  common	nanosleep		sys_nanosleep			compat_sys_nanosleep
+163  common	mremap			sys_mremap			compat_sys_mremap
+164  32		setresuid		-				compat_sys_s390_setresuid16
+165  32		getresuid		-				compat_sys_s390_getresuid16
+167  common	query_module		-				-
+168  common	poll			sys_poll			compat_sys_poll
+169  common	nfsservctl		-				-
+170  32		setresgid		-				compat_sys_s390_setresgid16
+171  32		getresgid		-				compat_sys_s390_getresgid16
+172  common	prctl			sys_prctl			compat_sys_prctl
+173  common	rt_sigreturn		sys_rt_sigreturn		compat_sys_rt_sigreturn
+174  common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
+175  common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+176  common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
+177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+178  common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+179  common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+180  common	pread64			sys_pread64			compat_sys_s390_pread64
+181  common	pwrite64		sys_pwrite64			compat_sys_s390_pwrite64
+182  32		chown			-				compat_sys_s390_chown16
+183  common	getcwd			sys_getcwd			compat_sys_getcwd
+184  common	capget			sys_capget			compat_sys_capget
+185  common	capset			sys_capset			compat_sys_capset
+186  common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
+187  common	sendfile		sys_sendfile64			compat_sys_sendfile
+188  common	getpmsg			-				-
+189  common	putpmsg			-				-
+190  common	vfork			sys_vfork			sys_vfork
+191  32		ugetrlimit		-				compat_sys_getrlimit
+191  64		getrlimit		sys_getrlimit			-
+192  32		mmap2			-				compat_sys_s390_mmap2
+193  32		truncate64		-				compat_sys_s390_truncate64
+194  32		ftruncate64		-				compat_sys_s390_ftruncate64
+195  32		stat64			-				compat_sys_s390_stat64
+196  32		lstat64			-				compat_sys_s390_lstat64
+197  32		fstat64			-				compat_sys_s390_fstat64
+198  32		lchown32		-				compat_sys_lchown
+198  64		lchown			sys_lchown			-
+199  32		getuid32		-				sys_getuid
+199  64		getuid			sys_getuid			-
+200  32		getgid32		-				sys_getgid
+200  64		getgid			sys_getgid			-
+201  32		geteuid32		-				sys_geteuid
+201  64		geteuid			sys_geteuid			-
+202  32		getegid32		-				sys_getegid
+202  64		getegid			sys_getegid			-
+203  32		setreuid32		-				sys_setreuid
+203  64		setreuid		sys_setreuid			-
+204  32		setregid32		-				sys_setregid
+204  64		setregid		sys_setregid			-
+205  32		getgroups32		-				compat_sys_getgroups
+205  64		getgroups		sys_getgroups			-
+206  32		setgroups32		-				compat_sys_setgroups
+206  64		setgroups		sys_setgroups			-
+207  32		fchown32		-				sys_fchown
+207  64		fchown			sys_fchown			-
+208  32		setresuid32		-				sys_setresuid
+208  64		setresuid		sys_setresuid			-
+209  32		getresuid32		-				compat_sys_getresuid
+209  64		getresuid		sys_getresuid			-
+210  32		setresgid32		-				sys_setresgid
+210  64		setresgid		sys_setresgid			-
+211  32		getresgid32		-				compat_sys_getresgid
+211  64		getresgid		sys_getresgid			-
+212  32		chown32			-				compat_sys_chown
+212  64		chown			sys_chown			-
+213  32		setuid32		-				sys_setuid
+213  64		setuid			sys_setuid			-
+214  32		setgid32		-				sys_setgid
+214  64		setgid			sys_setgid			-
+215  32		setfsuid32		-				sys_setfsuid
+215  64		setfsuid		sys_setfsuid			-
+216  32		setfsgid32		-				sys_setfsgid
+216  64		setfsgid		sys_setfsgid			-
+217  common	pivot_root		sys_pivot_root			compat_sys_pivot_root
+218  common	mincore			sys_mincore			compat_sys_mincore
+219  common	madvise			sys_madvise			compat_sys_madvise
+220  common	getdents64		sys_getdents64			compat_sys_getdents64
+221  32		fcntl64			-				compat_sys_fcntl64
+222  common	readahead		sys_readahead			compat_sys_s390_readahead
+223  32		sendfile64		-				compat_sys_sendfile64
+224  common	setxattr		sys_setxattr			compat_sys_setxattr
+225  common	lsetxattr		sys_lsetxattr			compat_sys_lsetxattr
+226  common	fsetxattr		sys_fsetxattr			compat_sys_fsetxattr
+227  common	getxattr		sys_getxattr			compat_sys_getxattr
+228  common	lgetxattr		sys_lgetxattr			compat_sys_lgetxattr
+229  common	fgetxattr		sys_fgetxattr			compat_sys_fgetxattr
+230  common	listxattr		sys_listxattr			compat_sys_listxattr
+231  common	llistxattr		sys_llistxattr			compat_sys_llistxattr
+232  common	flistxattr		sys_flistxattr			compat_sys_flistxattr
+233  common	removexattr		sys_removexattr			compat_sys_removexattr
+234  common	lremovexattr		sys_lremovexattr		compat_sys_lremovexattr
+235  common	fremovexattr		sys_fremovexattr		compat_sys_fremovexattr
+236  common	gettid			sys_gettid			sys_gettid
+237  common	tkill			sys_tkill			sys_tkill
+238  common	futex			sys_futex			compat_sys_futex
+239  common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
+240  common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
+241  common	tgkill			sys_tgkill			sys_tgkill
+243  common	io_setup		sys_io_setup			compat_sys_io_setup
+244  common	io_destroy		sys_io_destroy			compat_sys_io_destroy
+245  common	io_getevents		sys_io_getevents		compat_sys_io_getevents
+246  common	io_submit		sys_io_submit			compat_sys_io_submit
+247  common	io_cancel		sys_io_cancel			compat_sys_io_cancel
+248  common	exit_group		sys_exit_group			sys_exit_group
+249  common	epoll_create		sys_epoll_create		sys_epoll_create
+250  common	epoll_ctl		sys_epoll_ctl			compat_sys_epoll_ctl
+251  common	epoll_wait		sys_epoll_wait			compat_sys_epoll_wait
+252  common	set_tid_address		sys_set_tid_address		compat_sys_set_tid_address
+253  common	fadvise64		sys_fadvise64_64		compat_sys_s390_fadvise64
+254  common	timer_create		sys_timer_create		compat_sys_timer_create
+255  common	timer_settime		sys_timer_settime		compat_sys_timer_settime
+256  common	timer_gettime		sys_timer_gettime		compat_sys_timer_gettime
+257  common	timer_getoverrun	sys_timer_getoverrun		sys_timer_getoverrun
+258  common	timer_delete		sys_timer_delete		sys_timer_delete
+259  common	clock_settime		sys_clock_settime		compat_sys_clock_settime
+260  common	clock_gettime		sys_clock_gettime		compat_sys_clock_gettime
+261  common	clock_getres		sys_clock_getres		compat_sys_clock_getres
+262  common	clock_nanosleep		sys_clock_nanosleep		compat_sys_clock_nanosleep
+264  32		fadvise64_64		-				compat_sys_s390_fadvise64_64
+265  common	statfs64		sys_statfs64			compat_sys_statfs64
+266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
+267  common	remap_file_pages	sys_remap_file_pages		compat_sys_remap_file_pages
+268  common	mbind			sys_mbind			compat_sys_mbind
+269  common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
+270  common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+271  common	mq_open			sys_mq_open			compat_sys_mq_open
+272  common	mq_unlink		sys_mq_unlink			compat_sys_mq_unlink
+273  common	mq_timedsend		sys_mq_timedsend		compat_sys_mq_timedsend
+274  common	mq_timedreceive		sys_mq_timedreceive		compat_sys_mq_timedreceive
+275  common	mq_notify		sys_mq_notify			compat_sys_mq_notify
+276  common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
+277  common	kexec_load		sys_kexec_load			compat_sys_kexec_load
+278  common	add_key			sys_add_key			compat_sys_add_key
+279  common	request_key		sys_request_key			compat_sys_request_key
+280  common	keyctl			sys_keyctl			compat_sys_keyctl
+281  common	waitid			sys_waitid			compat_sys_waitid
+282  common	ioprio_set		sys_ioprio_set			sys_ioprio_set
+283  common	ioprio_get		sys_ioprio_get			sys_ioprio_get
+284  common	inotify_init		sys_inotify_init		sys_inotify_init
+285  common	inotify_add_watch	sys_inotify_add_watch		compat_sys_inotify_add_watch
+286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
+287  common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
+288  common	openat			sys_openat			compat_sys_openat
+289  common	mkdirat			sys_mkdirat			compat_sys_mkdirat
+290  common	mknodat			sys_mknodat			compat_sys_mknodat
+291  common	fchownat		sys_fchownat			compat_sys_fchownat
+292  common	futimesat		sys_futimesat			compat_sys_futimesat
+293  32		fstatat64		-				compat_sys_s390_fstatat64
+293  64		newfstatat		sys_newfstatat			-
+294  common	unlinkat		sys_unlinkat			compat_sys_unlinkat
+295  common	renameat		sys_renameat			compat_sys_renameat
+296  common	linkat			sys_linkat			compat_sys_linkat
+297  common	symlinkat		sys_symlinkat			compat_sys_symlinkat
+298  common	readlinkat		sys_readlinkat			compat_sys_readlinkat
+299  common	fchmodat		sys_fchmodat			compat_sys_fchmodat
+300  common	faccessat		sys_faccessat			compat_sys_faccessat
+301  common	pselect6		sys_pselect6			compat_sys_pselect6
+302  common	ppoll			sys_ppoll			compat_sys_ppoll
+303  common	unshare			sys_unshare			compat_sys_unshare
+304  common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
+305  common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
+306  common	splice			sys_splice			compat_sys_splice
+307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
+308  common	tee			sys_tee				compat_sys_tee
+309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+310  common	move_pages		sys_move_pages			compat_sys_move_pages
+311  common	getcpu			sys_getcpu			compat_sys_getcpu
+312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
+313  common	utimes			sys_utimes			compat_sys_utimes
+314  common	fallocate		sys_fallocate			compat_sys_s390_fallocate
+315  common	utimensat		sys_utimensat			compat_sys_utimensat
+316  common	signalfd		sys_signalfd			compat_sys_signalfd
+317  common	timerfd			-				-
+318  common	eventfd			sys_eventfd			sys_eventfd
+319  common	timerfd_create		sys_timerfd_create		sys_timerfd_create
+320  common	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
+321  common	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
+322  common	signalfd4		sys_signalfd4			compat_sys_signalfd4
+323  common	eventfd2		sys_eventfd2			sys_eventfd2
+324  common	inotify_init1		sys_inotify_init1		sys_inotify_init1
+325  common	pipe2			sys_pipe2			compat_sys_pipe2
+326  common	dup3			sys_dup3			sys_dup3
+327  common	epoll_create1		sys_epoll_create1		sys_epoll_create1
+328  common	preadv			sys_preadv			compat_sys_preadv
+329  common	pwritev			sys_pwritev			compat_sys_pwritev
+330  common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+331  common	perf_event_open		sys_perf_event_open		compat_sys_perf_event_open
+332  common	fanotify_init		sys_fanotify_init		sys_fanotify_init
+333  common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
+334  common	prlimit64		sys_prlimit64			compat_sys_prlimit64
+335  common	name_to_handle_at	sys_name_to_handle_at		compat_sys_name_to_handle_at
+336  common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
+337  common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+338  common	syncfs			sys_syncfs			sys_syncfs
+339  common	setns			sys_setns			sys_setns
+340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
+343  common	kcmp			sys_kcmp			compat_sys_kcmp
+344  common	finit_module		sys_finit_module		compat_sys_finit_module
+345  common	sched_setattr		sys_sched_setattr		compat_sys_sched_setattr
+346  common	sched_getattr		sys_sched_getattr		compat_sys_sched_getattr
+347  common	renameat2		sys_renameat2			compat_sys_renameat2
+348  common	seccomp			sys_seccomp			compat_sys_seccomp
+349  common	getrandom		sys_getrandom			compat_sys_getrandom
+350  common	memfd_create		sys_memfd_create		compat_sys_memfd_create
+351  common	bpf			sys_bpf				compat_sys_bpf
+352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		compat_sys_s390_pci_mmio_write
+353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		compat_sys_s390_pci_mmio_read
+354  common	execveat		sys_execveat			compat_sys_execveat
+355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
+356  common	membarrier		sys_membarrier			sys_membarrier
+357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
+358  common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
+359  common	socket			sys_socket			sys_socket
+360  common	socketpair		sys_socketpair			compat_sys_socketpair
+361  common	bind			sys_bind			compat_sys_bind
+362  common	connect			sys_connect			compat_sys_connect
+363  common	listen			sys_listen			sys_listen
+364  common	accept4			sys_accept4			compat_sys_accept4
+365  common	getsockopt		sys_getsockopt			compat_sys_getsockopt
+366  common	setsockopt		sys_setsockopt			compat_sys_setsockopt
+367  common	getsockname		sys_getsockname			compat_sys_getsockname
+368  common	getpeername		sys_getpeername			compat_sys_getpeername
+369  common	sendto			sys_sendto			compat_sys_sendto
+370  common	sendmsg			sys_sendmsg			compat_sys_sendmsg
+371  common	recvfrom		sys_recvfrom			compat_sys_recvfrom
+372  common	recvmsg			sys_recvmsg			compat_sys_recvmsg
+373  common	shutdown		sys_shutdown			sys_shutdown
+374  common	mlock2			sys_mlock2			compat_sys_mlock2
+375  common	copy_file_range		sys_copy_file_range		compat_sys_copy_file_range
+376  common	preadv2			sys_preadv2			compat_sys_preadv2
+377  common	pwritev2		sys_pwritev2			compat_sys_pwritev2
+378  common	s390_guarded_storage	sys_s390_guarded_storage	compat_sys_s390_guarded_storage
+379  common	statx			sys_statx			compat_sys_statx
+380  common	s390_sthyi		sys_s390_sthyi			compat_sys_s390_sthyi
-- 
cgit v1.2.3


From 690d22d9d4423b4522fb44a71145403eef2df834 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 8 Feb 2018 12:47:50 +0100
Subject: perf s390: Rework system call table creation by using syscall.tbl

Recently, s390 uses a syscall.tbl input file to generate its system call
table and unistd uapi header files.  Hence, update mksyscalltbl to use
it as input to create the system table for perf.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: linux-s390@vger.kernel.org
LPU-Reference: 1518090470-2899-4-git-send-email-brueckner@linux.vnet.ibm.com
Link: https://lkml.kernel.org/n/tip-bdyhllhsq1zgxv2qx4m377y6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/Makefile                    | 10 +++++++---
 tools/perf/arch/s390/entry/syscalls/mksyscalltbl | 18 +++++++-----------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
index 48228de415d0..dfa6e3103437 100644
--- a/tools/perf/arch/s390/Makefile
+++ b/tools/perf/arch/s390/Makefile
@@ -10,15 +10,19 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 
 out    := $(OUTPUT)arch/s390/include/generated/asm
 header := $(out)/syscalls_64.c
-sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h
-sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/
+syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl
+sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
+sysdef := $(sysprf)/syscall.tbl
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@
+	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+        (diff -B $(sysdef) $(syskrn) >/dev/null) \
+        || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
+	$(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
 
 clean::
 	$(call QUIET_CLEAN, s390) $(RM) $(header)
diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
index 7fa0d0abd419..72ecbb676370 100755
--- a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
@@ -3,25 +3,23 @@
 #
 # Generate system call table for perf
 #
-#
-# Copyright IBM Corp. 2017
+# Copyright IBM Corp. 2017, 2018
 # Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
 #
 
-gcc=$1
-input=$2
+SYSCALL_TBL=$1
 
-if ! test -r $input; then
+if ! test -r $SYSCALL_TBL; then
 	echo "Could not read input file" >&2
 	exit 1
 fi
 
 create_table()
 {
-	local max_nr
+	local max_nr nr abi sc discard
 
 	echo 'static const char *syscalltbl_s390_64[] = {'
-	while read sc nr; do
+	while read nr abi sc discard; do
 		printf '\t[%d] = "%s",\n' $nr $sc
 		max_nr=$nr
 	done
@@ -29,8 +27,6 @@ create_table()
 	echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr"
 }
 
-
-$gcc -m64 -E -dM -x c  $input	       \
-	|sed -ne 's/^#define __NR_//p' \
-	|sort -t' ' -k2 -nu	       \
+grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL	\
+	|sort -k1 -n					\
 	|create_table
-- 
cgit v1.2.3


From f1d0b4cde922863004ce3f5f39e8662cc0686c96 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 8 Feb 2018 12:47:48 +0100
Subject: Revert "tools include s390: Grab a copy of
 arch/s390/include/uapi/asm/unistd.h"

This reverts commit f120c7b187e6c418238710b48723ce141f467543 which is no
longer required with the introduction of a syscall.tbl on s390.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: linux-s390@vger.kernel.org
LPU-Reference: 1518090470-2899-2-git-send-email-brueckner@linux.vnet.ibm.com
Link: https://lkml.kernel.org/n/tip-q1lg0nvhha1tk39ri9aqalcb@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/s390/include/uapi/asm/unistd.h | 412 ------------------------------
 tools/perf/check-headers.sh               |   1 -
 2 files changed, 413 deletions(-)
 delete mode 100644 tools/arch/s390/include/uapi/asm/unistd.h

(limited to 'tools')

diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h
deleted file mode 100644
index 725120939051..000000000000
--- a/tools/arch/s390/include/uapi/asm/unistd.h
+++ /dev/null
@@ -1,412 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/unistd.h"
- */
-
-#ifndef _UAPI_ASM_S390_UNISTD_H_
-#define _UAPI_ASM_S390_UNISTD_H_
-
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_exit                 1
-#define __NR_fork                 2
-#define __NR_read                 3
-#define __NR_write                4
-#define __NR_open                 5
-#define __NR_close                6
-#define __NR_restart_syscall	  7
-#define __NR_creat                8
-#define __NR_link                 9
-#define __NR_unlink              10
-#define __NR_execve              11
-#define __NR_chdir               12
-#define __NR_mknod               14
-#define __NR_chmod               15
-#define __NR_lseek               19
-#define __NR_getpid              20
-#define __NR_mount               21
-#define __NR_umount              22
-#define __NR_ptrace              26
-#define __NR_alarm               27
-#define __NR_pause               29
-#define __NR_utime               30
-#define __NR_access              33
-#define __NR_nice                34
-#define __NR_sync                36
-#define __NR_kill                37
-#define __NR_rename              38
-#define __NR_mkdir               39
-#define __NR_rmdir               40
-#define __NR_dup                 41
-#define __NR_pipe                42
-#define __NR_times               43
-#define __NR_brk                 45
-#define __NR_signal              48
-#define __NR_acct                51
-#define __NR_umount2             52
-#define __NR_ioctl               54
-#define __NR_fcntl               55
-#define __NR_setpgid             57
-#define __NR_umask               60
-#define __NR_chroot              61
-#define __NR_ustat               62
-#define __NR_dup2                63
-#define __NR_getppid             64
-#define __NR_getpgrp             65
-#define __NR_setsid              66
-#define __NR_sigaction           67
-#define __NR_sigsuspend          72
-#define __NR_sigpending          73
-#define __NR_sethostname         74
-#define __NR_setrlimit           75
-#define __NR_getrusage           77
-#define __NR_gettimeofday        78
-#define __NR_settimeofday        79
-#define __NR_symlink             83
-#define __NR_readlink            85
-#define __NR_uselib              86
-#define __NR_swapon              87
-#define __NR_reboot              88
-#define __NR_readdir             89
-#define __NR_mmap                90
-#define __NR_munmap              91
-#define __NR_truncate            92
-#define __NR_ftruncate           93
-#define __NR_fchmod              94
-#define __NR_getpriority         96
-#define __NR_setpriority         97
-#define __NR_statfs              99
-#define __NR_fstatfs            100
-#define __NR_socketcall         102
-#define __NR_syslog             103
-#define __NR_setitimer          104
-#define __NR_getitimer          105
-#define __NR_stat               106
-#define __NR_lstat              107
-#define __NR_fstat              108
-#define __NR_lookup_dcookie     110
-#define __NR_vhangup            111
-#define __NR_idle               112
-#define __NR_wait4              114
-#define __NR_swapoff            115
-#define __NR_sysinfo            116
-#define __NR_ipc                117
-#define __NR_fsync              118
-#define __NR_sigreturn          119
-#define __NR_clone              120
-#define __NR_setdomainname      121
-#define __NR_uname              122
-#define __NR_adjtimex           124
-#define __NR_mprotect           125
-#define __NR_sigprocmask        126
-#define __NR_create_module      127
-#define __NR_init_module        128
-#define __NR_delete_module      129
-#define __NR_get_kernel_syms    130
-#define __NR_quotactl           131
-#define __NR_getpgid            132
-#define __NR_fchdir             133
-#define __NR_bdflush            134
-#define __NR_sysfs              135
-#define __NR_personality        136
-#define __NR_afs_syscall        137 /* Syscall for Andrew File System */
-#define __NR_getdents           141
-#define __NR_flock              143
-#define __NR_msync              144
-#define __NR_readv              145
-#define __NR_writev             146
-#define __NR_getsid             147
-#define __NR_fdatasync          148
-#define __NR__sysctl            149
-#define __NR_mlock              150
-#define __NR_munlock            151
-#define __NR_mlockall           152
-#define __NR_munlockall         153
-#define __NR_sched_setparam             154
-#define __NR_sched_getparam             155
-#define __NR_sched_setscheduler         156
-#define __NR_sched_getscheduler         157
-#define __NR_sched_yield                158
-#define __NR_sched_get_priority_max     159
-#define __NR_sched_get_priority_min     160
-#define __NR_sched_rr_get_interval      161
-#define __NR_nanosleep          162
-#define __NR_mremap             163
-#define __NR_query_module       167
-#define __NR_poll               168
-#define __NR_nfsservctl         169
-#define __NR_prctl              172
-#define __NR_rt_sigreturn       173
-#define __NR_rt_sigaction       174
-#define __NR_rt_sigprocmask     175
-#define __NR_rt_sigpending      176
-#define __NR_rt_sigtimedwait    177
-#define __NR_rt_sigqueueinfo    178
-#define __NR_rt_sigsuspend      179
-#define __NR_pread64            180
-#define __NR_pwrite64           181
-#define __NR_getcwd             183
-#define __NR_capget             184
-#define __NR_capset             185
-#define __NR_sigaltstack        186
-#define __NR_sendfile           187
-#define __NR_getpmsg		188
-#define __NR_putpmsg		189
-#define __NR_vfork		190
-#define __NR_pivot_root         217
-#define __NR_mincore            218
-#define __NR_madvise            219
-#define __NR_getdents64		220
-#define __NR_readahead		222
-#define __NR_setxattr		224
-#define __NR_lsetxattr		225
-#define __NR_fsetxattr		226
-#define __NR_getxattr		227
-#define __NR_lgetxattr		228
-#define __NR_fgetxattr		229
-#define __NR_listxattr		230
-#define __NR_llistxattr		231
-#define __NR_flistxattr		232
-#define __NR_removexattr	233
-#define __NR_lremovexattr	234
-#define __NR_fremovexattr	235
-#define __NR_gettid		236
-#define __NR_tkill		237
-#define __NR_futex		238
-#define __NR_sched_setaffinity	239
-#define __NR_sched_getaffinity	240
-#define __NR_tgkill		241
-/* Number 242 is reserved for tux */
-#define __NR_io_setup		243
-#define __NR_io_destroy		244
-#define __NR_io_getevents	245
-#define __NR_io_submit		246
-#define __NR_io_cancel		247
-#define __NR_exit_group		248
-#define __NR_epoll_create	249
-#define __NR_epoll_ctl		250
-#define __NR_epoll_wait		251
-#define __NR_set_tid_address	252
-#define __NR_fadvise64		253
-#define __NR_timer_create	254
-#define __NR_timer_settime	255
-#define __NR_timer_gettime	256
-#define __NR_timer_getoverrun	257
-#define __NR_timer_delete	258
-#define __NR_clock_settime	259
-#define __NR_clock_gettime	260
-#define __NR_clock_getres	261
-#define __NR_clock_nanosleep	262
-/* Number 263 is reserved for vserver */
-#define __NR_statfs64		265
-#define __NR_fstatfs64		266
-#define __NR_remap_file_pages	267
-#define __NR_mbind		268
-#define __NR_get_mempolicy	269
-#define __NR_set_mempolicy	270
-#define __NR_mq_open		271
-#define __NR_mq_unlink		272
-#define __NR_mq_timedsend	273
-#define __NR_mq_timedreceive	274
-#define __NR_mq_notify		275
-#define __NR_mq_getsetattr	276
-#define __NR_kexec_load		277
-#define __NR_add_key		278
-#define __NR_request_key	279
-#define __NR_keyctl		280
-#define __NR_waitid		281
-#define __NR_ioprio_set		282
-#define __NR_ioprio_get		283
-#define __NR_inotify_init	284
-#define __NR_inotify_add_watch	285
-#define __NR_inotify_rm_watch	286
-#define __NR_migrate_pages	287
-#define __NR_openat		288
-#define __NR_mkdirat		289
-#define __NR_mknodat		290
-#define __NR_fchownat		291
-#define __NR_futimesat		292
-#define __NR_unlinkat		294
-#define __NR_renameat		295
-#define __NR_linkat		296
-#define __NR_symlinkat		297
-#define __NR_readlinkat		298
-#define __NR_fchmodat		299
-#define __NR_faccessat		300
-#define __NR_pselect6		301
-#define __NR_ppoll		302
-#define __NR_unshare		303
-#define __NR_set_robust_list	304
-#define __NR_get_robust_list	305
-#define __NR_splice		306
-#define __NR_sync_file_range	307
-#define __NR_tee		308
-#define __NR_vmsplice		309
-#define __NR_move_pages		310
-#define __NR_getcpu		311
-#define __NR_epoll_pwait	312
-#define __NR_utimes		313
-#define __NR_fallocate		314
-#define __NR_utimensat		315
-#define __NR_signalfd		316
-#define __NR_timerfd		317
-#define __NR_eventfd		318
-#define __NR_timerfd_create	319
-#define __NR_timerfd_settime	320
-#define __NR_timerfd_gettime	321
-#define __NR_signalfd4		322
-#define __NR_eventfd2		323
-#define __NR_inotify_init1	324
-#define __NR_pipe2		325
-#define __NR_dup3		326
-#define __NR_epoll_create1	327
-#define	__NR_preadv		328
-#define	__NR_pwritev		329
-#define __NR_rt_tgsigqueueinfo	330
-#define __NR_perf_event_open	331
-#define __NR_fanotify_init	332
-#define __NR_fanotify_mark	333
-#define __NR_prlimit64		334
-#define __NR_name_to_handle_at	335
-#define __NR_open_by_handle_at	336
-#define __NR_clock_adjtime	337
-#define __NR_syncfs		338
-#define __NR_setns		339
-#define __NR_process_vm_readv	340
-#define __NR_process_vm_writev	341
-#define __NR_s390_runtime_instr 342
-#define __NR_kcmp		343
-#define __NR_finit_module	344
-#define __NR_sched_setattr	345
-#define __NR_sched_getattr	346
-#define __NR_renameat2		347
-#define __NR_seccomp		348
-#define __NR_getrandom		349
-#define __NR_memfd_create	350
-#define __NR_bpf		351
-#define __NR_s390_pci_mmio_write	352
-#define __NR_s390_pci_mmio_read		353
-#define __NR_execveat		354
-#define __NR_userfaultfd	355
-#define __NR_membarrier		356
-#define __NR_recvmmsg		357
-#define __NR_sendmmsg		358
-#define __NR_socket		359
-#define __NR_socketpair		360
-#define __NR_bind		361
-#define __NR_connect		362
-#define __NR_listen		363
-#define __NR_accept4		364
-#define __NR_getsockopt		365
-#define __NR_setsockopt		366
-#define __NR_getsockname	367
-#define __NR_getpeername	368
-#define __NR_sendto		369
-#define __NR_sendmsg		370
-#define __NR_recvfrom		371
-#define __NR_recvmsg		372
-#define __NR_shutdown		373
-#define __NR_mlock2		374
-#define __NR_copy_file_range	375
-#define __NR_preadv2		376
-#define __NR_pwritev2		377
-#define __NR_s390_guarded_storage	378
-#define __NR_statx		379
-#define __NR_s390_sthyi		380
-#define NR_syscalls 381
-
-/* 
- * There are some system calls that are not present on 64 bit, some
- * have a different name although they do the same (e.g. __NR_chown32
- * is __NR_chown on 64 bit).
- */
-#ifndef __s390x__
-
-#define __NR_time		 13
-#define __NR_lchown		 16
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_getrlimit		 76
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_fchown		 95
-#define __NR_ioperm		101
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR__newselect 	142
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-#define __NR_setresgid		170
-#define __NR_getresgid		171
-#define __NR_chown		182
-#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_lchown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_chown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
-#define __NR_fcntl64		221
-#define __NR_sendfile64		223
-#define __NR_fadvise64_64	264
-#define __NR_fstatat64		293
-
-#else
-
-#define __NR_select		142
-#define __NR_getrlimit		191	/* SuS compliant getrlimit */
-#define __NR_lchown  		198
-#define __NR_getuid  		199
-#define __NR_getgid  		200
-#define __NR_geteuid  		201
-#define __NR_getegid  		202
-#define __NR_setreuid  		203
-#define __NR_setregid  		204
-#define __NR_getgroups  	205
-#define __NR_setgroups  	206
-#define __NR_fchown  		207
-#define __NR_setresuid  	208
-#define __NR_getresuid  	209
-#define __NR_setresgid  	210
-#define __NR_getresgid  	211
-#define __NR_chown  		212
-#define __NR_setuid  		213
-#define __NR_setgid  		214
-#define __NR_setfsuid  		215
-#define __NR_setfsgid  		216
-#define __NR_newfstatat		293
-
-#endif
-
-#endif /* _UAPI_ASM_S390_UNISTD_H_ */
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 51abdb0a4047..790ec25919a0 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -33,7 +33,6 @@ arch/s390/include/uapi/asm/kvm.h
 arch/s390/include/uapi/asm/kvm_perf.h
 arch/s390/include/uapi/asm/ptrace.h
 arch/s390/include/uapi/asm/sie.h
-arch/s390/include/uapi/asm/unistd.h
 arch/arm/include/uapi/asm/kvm.h
 arch/arm64/include/uapi/asm/kvm.h
 arch/alpha/include/uapi/asm/errno.h
-- 
cgit v1.2.3


From b1a2ce825737b0165cc08e6f98f8c0ea1affdd60 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jeremy@jcline.org>
Date: Tue, 20 Feb 2018 01:00:07 +0000
Subject: tools/libbpf: Avoid possibly using uninitialized variable

Fixes a GCC maybe-uninitialized warning introduced by 48cca7e44f9f.
"text" is only initialized inside the if statement so only print debug
info there.

Fixes: 48cca7e44f9f ("libbpf: add support for bpf_call")
Signed-off-by: Jeremy Cline <jeremy@jcline.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 97073d649c1a..5bbbf285af74 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1060,11 +1060,12 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
 		prog->insns = new_insn;
 		prog->main_prog_cnt = prog->insns_cnt;
 		prog->insns_cnt = new_cnt;
+		pr_debug("added %zd insn from %s to prog %s\n",
+			 text->insns_cnt, text->section_name,
+			 prog->section_name);
 	}
 	insn = &prog->insns[relo->insn_idx];
 	insn->imm += prog->main_prog_cnt - relo->insn_idx;
-	pr_debug("added %zd insn from %s to prog %s\n",
-		 text->insns_cnt, text->section_name, prog->section_name);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 43a4525f80534530077683f6472d8971646b0ace Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 16 Jan 2018 17:16:32 +0100
Subject: objtool: Use existing global variables for options

Use the existing global variables instead of passing them around and
creating duplicate global variables.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/builtin-check.c | 2 +-
 tools/objtool/builtin-orc.c   | 6 +-----
 tools/objtool/builtin.h       | 5 +++++
 tools/objtool/check.c         | 5 ++---
 tools/objtool/check.h         | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 57254f5b2779..8d0986d2a803 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -53,5 +53,5 @@ int cmd_check(int argc, const char **argv)
 
 	objname = argv[0];
 
-	return check(objname, no_fp, no_unreachable, false);
+	return check(objname, false);
 }
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 91e8e19ff5e0..77ea2b97117d 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -25,7 +25,6 @@
  */
 
 #include <string.h>
-#include <subcmd/parse-options.h>
 #include "builtin.h"
 #include "check.h"
 
@@ -36,9 +35,6 @@ static const char *orc_usage[] = {
 	NULL,
 };
 
-extern const struct option check_options[];
-extern bool no_fp, no_unreachable;
-
 int cmd_orc(int argc, const char **argv)
 {
 	const char *objname;
@@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv)
 
 		objname = argv[0];
 
-		return check(objname, no_fp, no_unreachable, true);
+		return check(objname, true);
 	}
 
 	if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index dd526067fed5..f166ea1b1da2 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -17,6 +17,11 @@
 #ifndef _BUILTIN_H
 #define _BUILTIN_H
 
+#include <subcmd/parse-options.h>
+
+extern const struct option check_options[];
+extern bool no_fp, no_unreachable;
+
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index a8cb69a26576..ab6f0de7f90d 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -18,6 +18,7 @@
 #include <string.h>
 #include <stdlib.h>
 
+#include "builtin.h"
 #include "check.h"
 #include "elf.h"
 #include "special.h"
@@ -33,7 +34,6 @@ struct alternative {
 };
 
 const char *objname;
-static bool no_fp;
 struct cfi_state initial_func_cfi;
 
 struct instruction *find_insn(struct objtool_file *file,
@@ -2022,13 +2022,12 @@ static void cleanup(struct objtool_file *file)
 	elf_close(file->elf);
 }
 
-int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
+int check(const char *_objname, bool orc)
 {
 	struct objtool_file file;
 	int ret, warnings = 0;
 
 	objname = _objname;
-	no_fp = _no_fp;
 
 	file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
 	if (!file.elf)
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 23a1d065cae1..936255ba23db 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -63,7 +63,7 @@ struct objtool_file {
 	bool ignore_unreachables, c_file, hints;
 };
 
-int check(const char *objname, bool no_fp, bool no_unreachable, bool orc);
+int check(const char *objname, bool orc);
 
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset);
-- 
cgit v1.2.3


From b5bc2231b8ad4387c9641f235ca0ad8cd300b6df Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 16 Jan 2018 10:24:06 +0100
Subject: objtool: Add retpoline validation

David requested a objtool validation pass for CONFIG_RETPOLINE=y enabled
builds, where it validates no unannotated indirect  jumps or calls are
left.

Add an additional .discard.retpoline_safe section to allow annotating
the few indirect sites that are required and safe.

Requested-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 scripts/Makefile.build        |  4 ++
 tools/objtool/builtin-check.c |  3 +-
 tools/objtool/builtin.h       |  2 +-
 tools/objtool/check.c         | 86 ++++++++++++++++++++++++++++++++++++++++++-
 tools/objtool/check.h         |  1 +
 5 files changed, 93 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 47cddf32aeba..53d862aee335 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -264,6 +264,10 @@ objtool_args += --no-unreachable
 else
 objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
 endif
+ifdef CONFIG_RETPOLINE
+  objtool_args += --retpoline
+endif
+
 
 ifdef CONFIG_MODVERSIONS
 objtool_o = $(@D)/.tmp_$(@F)
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 8d0986d2a803..dd6bcd6097f5 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -29,7 +29,7 @@
 #include "builtin.h"
 #include "check.h"
 
-bool no_fp, no_unreachable;
+bool no_fp, no_unreachable, retpoline;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -39,6 +39,7 @@ static const char * const check_usage[] = {
 const struct option check_options[] = {
 	OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
 	OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+	OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
 	OPT_END(),
 };
 
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index f166ea1b1da2..7b6addfce045 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -20,7 +20,7 @@
 #include <subcmd/parse-options.h>
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable;
+extern bool no_fp, no_unreachable, retpoline;
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index ab6f0de7f90d..5e5db7b4d77b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file)
 			 * disguise, so convert them accordingly.
 			 */
 			insn->type = INSN_JUMP_DYNAMIC;
+			insn->retpoline_safe = true;
 			continue;
 		} else {
 			/* sibling call */
@@ -548,7 +549,8 @@ static int add_call_destinations(struct objtool_file *file)
 			if (!insn->call_dest && !insn->ignore) {
 				WARN_FUNC("unsupported intra-function call",
 					  insn->sec, insn->offset);
-				WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
+				if (retpoline)
+					WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
 				return -1;
 			}
 
@@ -1108,6 +1110,54 @@ static int read_unwind_hints(struct objtool_file *file)
 	return 0;
 }
 
+static int read_retpoline_hints(struct objtool_file *file)
+{
+	struct section *sec, *relasec;
+	struct instruction *insn;
+	struct rela *rela;
+	int i;
+
+	sec = find_section_by_name(file->elf, ".discard.retpoline_safe");
+	if (!sec)
+		return 0;
+
+	relasec = sec->rela;
+	if (!relasec) {
+		WARN("missing .rela.discard.retpoline_safe section");
+		return -1;
+	}
+
+	if (sec->len % sizeof(unsigned long)) {
+		WARN("retpoline_safe size mismatch: %d %ld", sec->len, sizeof(unsigned long));
+		return -1;
+	}
+
+	for (i = 0; i < sec->len / sizeof(unsigned long); i++) {
+		rela = find_rela_by_dest(sec, i * sizeof(unsigned long));
+		if (!rela) {
+			WARN("can't find rela for retpoline_safe[%d]", i);
+			return -1;
+		}
+
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!insn) {
+			WARN("can't find insn for retpoline_safe[%d]", i);
+			return -1;
+		}
+
+		if (insn->type != INSN_JUMP_DYNAMIC &&
+		    insn->type != INSN_CALL_DYNAMIC) {
+			WARN_FUNC("retpoline_safe hint not a indirect jump/call",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		insn->retpoline_safe = true;
+	}
+
+	return 0;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
 	int ret;
@@ -1146,6 +1196,10 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	ret = read_retpoline_hints(file);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -1891,6 +1945,29 @@ static int validate_unwind_hints(struct objtool_file *file)
 	return warnings;
 }
 
+static int validate_retpoline(struct objtool_file *file)
+{
+	struct instruction *insn;
+	int warnings = 0;
+
+	for_each_insn(file, insn) {
+		if (insn->type != INSN_JUMP_DYNAMIC &&
+		    insn->type != INSN_CALL_DYNAMIC)
+			continue;
+
+		if (insn->retpoline_safe)
+			continue;
+
+		WARN_FUNC("indirect %s found in RETPOLINE build",
+			  insn->sec, insn->offset,
+			  insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+
+		warnings++;
+	}
+
+	return warnings;
+}
+
 static bool is_kasan_insn(struct instruction *insn)
 {
 	return (insn->type == INSN_CALL &&
@@ -2051,6 +2128,13 @@ int check(const char *_objname, bool orc)
 	if (list_empty(&file.insn_list))
 		goto out;
 
+	if (retpoline) {
+		ret = validate_retpoline(&file);
+		if (ret < 0)
+			return ret;
+		warnings += ret;
+	}
+
 	ret = validate_functions(&file);
 	if (ret < 0)
 		goto out;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 936255ba23db..c6b68fcb926f 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -45,6 +45,7 @@ struct instruction {
 	unsigned char type;
 	unsigned long immediate;
 	bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
+	bool retpoline_safe;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
-- 
cgit v1.2.3


From ca41b97ed9124fd62323a162de5852f6e28f94b8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 Jan 2018 10:18:28 +0100
Subject: objtool: Add module specific retpoline rules

David allowed retpolines in .init.text, except for modules, which will
trip up objtool retpoline validation, fix that.

Requested-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 scripts/Makefile.build        | 2 ++
 tools/objtool/builtin-check.c | 3 ++-
 tools/objtool/builtin.h       | 2 +-
 tools/objtool/check.c         | 9 +++++++++
 4 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 53d862aee335..ce0fc4dd68c6 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool
 
 objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
 
+objtool_args += $(if $(part-of-module), --module,)
+
 ifndef CONFIG_FRAME_POINTER
 objtool_args += --no-fp
 endif
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index dd6bcd6097f5..694abc628e9b 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -29,7 +29,7 @@
 #include "builtin.h"
 #include "check.h"
 
-bool no_fp, no_unreachable, retpoline;
+bool no_fp, no_unreachable, retpoline, module;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -40,6 +40,7 @@ const struct option check_options[] = {
 	OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
 	OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
 	OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
+	OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
 	OPT_END(),
 };
 
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index 7b6addfce045..28ff40e19a14 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -20,7 +20,7 @@
 #include <subcmd/parse-options.h>
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline;
+extern bool no_fp, no_unreachable, retpoline, module;
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 5e5db7b4d77b..472e64e95891 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1958,6 +1958,15 @@ static int validate_retpoline(struct objtool_file *file)
 		if (insn->retpoline_safe)
 			continue;
 
+		/*
+		 * .init.text code is ran before userspace and thus doesn't
+		 * strictly need retpolines, except for modules which are
+		 * loaded late, they very much do need retpoline in their
+		 * .init.text
+		 */
+		if (!strcmp(insn->sec->name, ".init.text") && !module)
+			continue;
+
 		WARN_FUNC("indirect %s found in RETPOLINE build",
 			  insn->sec, insn->offset,
 			  insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
-- 
cgit v1.2.3


From 7ed1c1901fe52e6c5828deb155920b44b0adabb1 Mon Sep 17 00:00:00 2001
From: Martin Kelly <martin@martingkelly.com>
Date: Wed, 21 Feb 2018 14:45:12 -0800
Subject: tools: fix cross-compile var clobbering

Currently a number of Makefiles break when used with toolchains that
pass extra flags in CC and other cross-compile related variables (such
as --sysroot).

Thus we get this error when we use a toolchain that puts --sysroot in
the CC var:

  ~/src/linux/tools$ make iio
  [snip]
  iio_event_monitor.c:18:10: fatal error: unistd.h: No such file or directory
    #include <unistd.h>
             ^~~~~~~~~~

This occurs because we clobber several env vars related to
cross-compiling with lines like this:

  CC = $(CROSS_COMPILE)gcc

Although this will point to a valid cross-compiler, we lose any extra
flags that might exist in the CC variable, which can break toolchains
that rely on them (for example, those that use --sysroot).

This easily shows up using a Yocto SDK:

  $ . [snip]/sdk/environment-setup-cortexa8hf-neon-poky-linux-gnueabi

  $ echo $CC
  arm-poky-linux-gnueabi-gcc -march=armv7-a -mfpu=neon -mfloat-abi=hard
  -mcpu=cortex-a8
  --sysroot=[snip]/sdk/sysroots/cortexa8hf-neon-poky-linux-gnueabi

  $ echo $CROSS_COMPILE
  arm-poky-linux-gnueabi-

  $ echo ${CROSS_COMPILE}gcc
  krm-poky-linux-gnueabi-gcc

Although arm-poky-linux-gnueabi-gcc is a cross-compiler, we've lost the
--sysroot and other flags that enable us to find the right libraries to
link against, so we can't find unistd.h and other libraries and headers.
Normally with the --sysroot flag we would find unistd.h in the sdk
directory in the sysroot:

  $ find [snip]/sdk/sysroots -path '*/usr/include/unistd.h'
  [snip]/sdk/sysroots/cortexa8hf-neon-poky-linux-gnueabi/usr/include/unistd.h

The perf Makefile adds CC = $(CROSS_COMPILE)gcc if and only if CC is not
already set, and it compiles correctly with the above toolchain.

So, generalize the logic that perf uses in the common Makefile and
remove the manual CC = $(CROSS_COMPILE)gcc lines from each Makefile.

Note that this patch does not fix cross-compile for all the tools (some
have other bugs), but it does fix it for all except usb and acpi, which
still have other unrelated issues.

I tested both with and without the patch on native and cross-build and
there appear to be no regressions.

Link: http://lkml.kernel.org/r/20180107214028.23771-1-martin@martingkelly.com
Signed-off-by: Martin Kelly <martin@martingkelly.com>
Acked-by: Mark Brown <broonie@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Pali Rohar <pali.rohar@gmail.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Valentina Manea <valentina.manea.m@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Mario Limonciello <mario.limonciello@dell.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/cgroup/Makefile            |  1 -
 tools/gpio/Makefile              |  2 --
 tools/hv/Makefile                |  1 -
 tools/iio/Makefile               |  2 --
 tools/laptop/freefall/Makefile   |  1 -
 tools/leds/Makefile              |  1 -
 tools/perf/Makefile.perf         |  6 ------
 tools/power/acpi/Makefile.config |  3 ---
 tools/scripts/Makefile.include   | 18 ++++++++++++++++++
 tools/spi/Makefile               |  2 --
 tools/usb/Makefile               |  1 -
 tools/vm/Makefile                |  1 -
 tools/wmi/Makefile               |  1 -
 13 files changed, 18 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile
index 860fa151640a..ffca068e4a76 100644
--- a/tools/cgroup/Makefile
+++ b/tools/cgroup/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for cgroup tools
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra
 
 all: cgroup_event_listener
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index 805a2c0cf4cd..240eda014b37 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -12,8 +12,6 @@ endif
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
 
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
 CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
 ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index 1139d71fa0cf..5db5e62cebda 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Hyper-V tools
 
-CC = $(CROSS_COMPILE)gcc
 WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
 
diff --git a/tools/iio/Makefile b/tools/iio/Makefile
index a08e7a47d6a3..332ed2f6c2c2 100644
--- a/tools/iio/Makefile
+++ b/tools/iio/Makefile
@@ -12,8 +12,6 @@ endif
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
 
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
 CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
 ALL_TARGETS := iio_event_monitor lsiio iio_generic_buffer
diff --git a/tools/laptop/freefall/Makefile b/tools/laptop/freefall/Makefile
index 5f758c489a20..b572d94255f6 100644
--- a/tools/laptop/freefall/Makefile
+++ b/tools/laptop/freefall/Makefile
@@ -2,7 +2,6 @@
 PREFIX ?= /usr
 SBINDIR ?= sbin
 INSTALL ?= install
-CC = $(CROSS_COMPILE)gcc
 
 TARGET = freefall
 
diff --git a/tools/leds/Makefile b/tools/leds/Makefile
index c379af003807..7b6bed13daaa 100644
--- a/tools/leds/Makefile
+++ b/tools/leds/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for LEDs tools
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra -g -I../../include/uapi
 
 all: uledmon led_hw_brightness_mon
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 9b0351d3ce34..012328038594 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -146,12 +146,6 @@ define allow-override
     $(eval $(1) = $(2)))
 endef
 
-# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,LD,$(CROSS_COMPILE)ld)
-$(call allow-override,CXX,$(CROSS_COMPILE)g++)
-
 LD += $(EXTRA_LDFLAGS)
 
 HOSTCC  ?= gcc
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
index a1883bbb0144..2cccbba64418 100644
--- a/tools/power/acpi/Makefile.config
+++ b/tools/power/acpi/Makefile.config
@@ -56,9 +56,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
 # to compile vs uClibc, that can be done here as well.
 CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
 CROSS_COMPILE ?= $(CROSS)
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)gcc
-STRIP = $(CROSS_COMPILE)strip
 HOSTCC = gcc
 
 # check if compiler option is supported
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index fcb3ed0be5f8..dd614463d4d6 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -42,6 +42,24 @@ EXTRA_WARNINGS += -Wformat
 
 CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
 
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+$(call allow-override,CXX,$(CROSS_COMPILE)g++)
+$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+
 ifeq ($(CC_NO_CLANG), 1)
 EXTRA_WARNINGS += -Wstrict-aliasing=3
 endif
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
index 90615e10c79a..815d15589177 100644
--- a/tools/spi/Makefile
+++ b/tools/spi/Makefile
@@ -11,8 +11,6 @@ endif
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
 
-CC = $(CROSS_COMPILE)gcc
-LD = $(CROSS_COMPILE)ld
 CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
 
 ALL_TARGETS := spidev_test spidev_fdx
diff --git a/tools/usb/Makefile b/tools/usb/Makefile
index 4e6506078494..01d758d73b6d 100644
--- a/tools/usb/Makefile
+++ b/tools/usb/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for USB tools
 
-CC = $(CROSS_COMPILE)gcc
 PTHREAD_LIBS = -lpthread
 WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g -I../include
diff --git a/tools/vm/Makefile b/tools/vm/Makefile
index be320b905ea7..20f6cf04377f 100644
--- a/tools/vm/Makefile
+++ b/tools/vm/Makefile
@@ -6,7 +6,6 @@ TARGETS=page-types slabinfo page_owner_sort
 LIB_DIR = ../lib/api
 LIBS = $(LIB_DIR)/libapi.a
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -Wextra -I../lib/
 LDFLAGS = $(LIBS)
 
diff --git a/tools/wmi/Makefile b/tools/wmi/Makefile
index e664f1167388..e0e87239126b 100644
--- a/tools/wmi/Makefile
+++ b/tools/wmi/Makefile
@@ -2,7 +2,6 @@ PREFIX ?= /usr
 SBINDIR ?= sbin
 INSTALL ?= install
 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
-CC = $(CROSS_COMPILE)gcc
 
 TARGET = dell-smbios-example
 
-- 
cgit v1.2.3


From bdefe01a6b14bde268741435ac854fda4ef7e847 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Wed, 21 Feb 2018 14:45:58 -0800
Subject: selftests/memfd: add run_fuse_test.sh to TEST_FILES
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While testing memfd tests, there is a missing script, as reported by
kselftest:

  ./run_tests.sh: line 7: ./run_fuse_test.sh: No such file or directory

Link: http://lkml.kernel.org/r/1517955779-11386-1-git-send-email-daniel.diaz@linaro.org
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/memfd/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index a5276a91dfbf..0862e6f47a38 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -5,6 +5,7 @@ CFLAGS += -I../../../../include/
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_tests.sh
+TEST_FILES := run_fuse_test.sh
 TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
 
 fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
-- 
cgit v1.2.3


From b52db43a3d2e34b4ef2bb563d95227bb755027df Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Wed, 21 Feb 2018 17:51:16 +0100
Subject: selftests/bpf: tcpbpf_kern: use in6_* macros from glibc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both glibc and the kernel have in6_* macros definitions. Build fails
because it picks up wrong in6_* macro from the kernel header and not the
header from glibc.

Fixes build error below:
clang -I. -I./include/uapi -I../../../include/uapi
     -Wno-compare-distinct-pointer-types \
         -O2 -target bpf -emit-llvm -c test_tcpbpf_kern.c -o - |      \
llc -march=bpf -mcpu=generic -filetype=obj
     -o .../tools/testing/selftests/bpf/test_tcpbpf_kern.o
In file included from test_tcpbpf_kern.c:12:
.../netinet/in.h:101:5: error: expected identifier
    IPPROTO_HOPOPTS = 0,   /* IPv6 Hop-by-Hop options.  */
    ^
.../linux/in6.h:131:26: note: expanded from macro 'IPPROTO_HOPOPTS'
                                ^
In file included from test_tcpbpf_kern.c:12:
/usr/include/netinet/in.h:103:5: error: expected identifier
    IPPROTO_ROUTING = 43,  /* IPv6 routing header.  */
    ^
.../linux/in6.h:132:26: note: expanded from macro 'IPPROTO_ROUTING'
                                ^
In file included from test_tcpbpf_kern.c:12:
.../netinet/in.h:105:5: error: expected identifier
    IPPROTO_FRAGMENT = 44, /* IPv6 fragmentation header.  */
    ^

Since both glibc and the kernel have in6_* macros definitions, use the
one from glibc.  Kernel headers will check for previous libc definitions
by including include/linux/libc-compat.h.

Reported-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Tested-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_tcpbpf_kern.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
index 57119ad57a3f..3e645ee41ed5 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -5,7 +5,6 @@
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
 #include <linux/ip.h>
-#include <linux/in6.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
-- 
cgit v1.2.3


From 31a8260d3e34aaddf821388b8e0d589f44401f75 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Wed, 21 Feb 2018 22:30:01 +0100
Subject: selftests/bpf: update gitignore with test_libbpf_open
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

bpf builds a test program for loading BPF ELF files. Add the executable
to the .gitignore list.

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Tested-by: Daniel Díaz <daniel.diaz@linaro.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index cc15af2e54fe..9cf83f895d98 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -11,3 +11,4 @@ test_progs
 test_tcpbpf_user
 test_verifier_log
 feature
+test_libbpf_open
-- 
cgit v1.2.3


From d057dc4e35e16050befa3dda943876dab39cbf80 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho@tycho.ws>
Date: Tue, 20 Feb 2018 19:47:47 -0700
Subject: seccomp: add a selftest for get_metadata

Let's test that we get the flags correctly, and that we preserve the filter
index across the ptrace(PTRACE_SECCOMP_GET_METADATA) correctly.

Signed-off-by: Tycho Andersen <tycho@tycho.ws>
CC: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 61 +++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 24dbf634e2dd..92db48825dc1 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -141,6 +141,15 @@ struct seccomp_data {
 #define SECCOMP_FILTER_FLAG_LOG 2
 #endif
 
+#ifndef PTRACE_SECCOMP_GET_METADATA
+#define PTRACE_SECCOMP_GET_METADATA	0x420d
+
+struct seccomp_metadata {
+	__u64 filter_off;       /* Input: which filter */
+	__u64 flags;             /* Output: filter's flags */
+};
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -2845,6 +2854,58 @@ TEST(get_action_avail)
 	EXPECT_EQ(errno, EOPNOTSUPP);
 }
 
+TEST(get_metadata)
+{
+	pid_t pid;
+	int pipefd[2];
+	char buf;
+	struct seccomp_metadata md;
+
+	ASSERT_EQ(0, pipe(pipefd));
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		struct sock_filter filter[] = {
+			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		};
+		struct sock_fprog prog = {
+			.len = (unsigned short)ARRAY_SIZE(filter),
+			.filter = filter,
+		};
+
+		/* one with log, one without */
+		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
+				     SECCOMP_FILTER_FLAG_LOG, &prog));
+		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+
+		ASSERT_EQ(0, close(pipefd[0]));
+		ASSERT_EQ(1, write(pipefd[1], "1", 1));
+		ASSERT_EQ(0, close(pipefd[1]));
+
+		while (1)
+			sleep(100);
+	}
+
+	ASSERT_EQ(0, close(pipefd[1]));
+	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
+
+	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
+	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
+
+	md.filter_off = 0;
+	ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
+	EXPECT_EQ(md.filter_off, 0);
+
+	md.filter_off = 1;
+	ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+	EXPECT_EQ(md.flags, 0);
+	EXPECT_EQ(md.filter_off, 1);
+
+	ASSERT_EQ(0, kill(pid, SIGKILL));
+}
+
 /*
  * TODO:
  * - add microbenchmarks
-- 
cgit v1.2.3


From 80475c48c6a8a65171e035e0915dc7996b5a0a65 Mon Sep 17 00:00:00 2001
From: Li Zhijian <zhijianx.li@intel.com>
Date: Thu, 22 Feb 2018 10:34:02 +0800
Subject: selftests/bpf/test_maps: exit child process without error in ENOMEM
 case

test_maps contains a series of stress tests, and previously it will break the
rest tests when it failed to alloc memory.
-----------------------
Failed to create hashmap key=8 value=262144 'Cannot allocate memory'
Failed to create hashmap key=16 value=262144 'Cannot allocate memory'
Failed to create hashmap key=8 value=262144 'Cannot allocate memory'
Failed to create hashmap key=8 value=262144 'Cannot allocate memory'
test_maps: test_maps.c:955: run_parallel: Assertion `status == 0' failed.
Aborted
not ok 1..3 selftests:  test_maps [FAIL]
-----------------------
after this patch, the rest tests will be continue when it occurs an ENOMEM failure

CC: Alexei Starovoitov <alexei.starovoitov@gmail.com>
CC: Philip Li <philip.li@intel.com>
Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Li Zhijian <zhijianx.li@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_maps.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 436c4c72414f..9e03a4c356a4 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -126,6 +126,8 @@ static void test_hashmap_sizes(int task, void *data)
 			fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
 					    2, map_flags);
 			if (fd < 0) {
+				if (errno == ENOMEM)
+					return;
 				printf("Failed to create hashmap key=%d value=%d '%s'\n",
 				       i, j, strerror(errno));
 				exit(1);
-- 
cgit v1.2.3


From 16338a9b3ac30740d49f5dfed81bac0ffa53b9c7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 23 Feb 2018 01:03:43 +0100
Subject: bpf, arm64: fix out of bounds access in tail call

I recently noticed a crash on arm64 when feeding a bogus index
into BPF tail call helper. The crash would not occur when the
interpreter is used, but only in case of JIT. Output looks as
follows:

  [  347.007486] Unable to handle kernel paging request at virtual address fffb850e96492510
  [...]
  [  347.043065] [fffb850e96492510] address between user and kernel address ranges
  [  347.050205] Internal error: Oops: 96000004 [#1] SMP
  [...]
  [  347.190829] x13: 0000000000000000 x12: 0000000000000000
  [  347.196128] x11: fffc047ebe782800 x10: ffff808fd7d0fd10
  [  347.201427] x9 : 0000000000000000 x8 : 0000000000000000
  [  347.206726] x7 : 0000000000000000 x6 : 001c991738000000
  [  347.212025] x5 : 0000000000000018 x4 : 000000000000ba5a
  [  347.217325] x3 : 00000000000329c4 x2 : ffff808fd7cf0500
  [  347.222625] x1 : ffff808fd7d0fc00 x0 : ffff808fd7cf0500
  [  347.227926] Process test_verifier (pid: 4548, stack limit = 0x000000007467fa61)
  [  347.235221] Call trace:
  [  347.237656]  0xffff000002f3a4fc
  [  347.240784]  bpf_test_run+0x78/0xf8
  [  347.244260]  bpf_prog_test_run_skb+0x148/0x230
  [  347.248694]  SyS_bpf+0x77c/0x1110
  [  347.251999]  el0_svc_naked+0x30/0x34
  [  347.255564] Code: 9100075a d280220a 8b0a002a d37df04b (f86b694b)
  [...]

In this case the index used in BPF r3 is the same as in r1
at the time of the call, meaning we fed a pointer as index;
here, it had the value 0xffff808fd7cf0500 which sits in x2.

While I found tail calls to be working in general (also for
hitting the error cases), I noticed the following in the code
emission:

  # bpftool p d j i 988
  [...]
  38:   ldr     w10, [x1,x10]
  3c:   cmp     w2, w10
  40:   b.ge    0x000000000000007c              <-- signed cmp
  44:   mov     x10, #0x20                      // #32
  48:   cmp     x26, x10
  4c:   b.gt    0x000000000000007c
  50:   add     x26, x26, #0x1
  54:   mov     x10, #0x110                     // #272
  58:   add     x10, x1, x10
  5c:   lsl     x11, x2, #3
  60:   ldr     x11, [x10,x11]                  <-- faulting insn (f86b694b)
  64:   cbz     x11, 0x000000000000007c
  [...]

Meaning, the tests passed because commit ddb55992b04d ("arm64:
bpf: implement bpf_tail_call() helper") was using signed compares
instead of unsigned which as a result had the test wrongly passing.

Change this but also the tail call count test both into unsigned
and cap the index as u32. Latter we did as well in 90caccdd8cc0
("bpf: fix bpf_tail_call() x64 JIT") and is needed in addition here,
too. Tested on HiSilicon Hi1616.

Result after patch:

  # bpftool p d j i 268
  [...]
  38:	ldr	w10, [x1,x10]
  3c:	add	w2, w2, #0x0
  40:	cmp	w2, w10
  44:	b.cs	0x0000000000000080
  48:	mov	x10, #0x20                  	// #32
  4c:	cmp	x26, x10
  50:	b.hi	0x0000000000000080
  54:	add	x26, x26, #0x1
  58:	mov	x10, #0x110                 	// #272
  5c:	add	x10, x1, x10
  60:	lsl	x11, x2, #3
  64:	ldr	x11, [x10,x11]
  68:	cbz	x11, 0x0000000000000080
  [...]

Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/arm64/net/bpf_jit_comp.c               |  5 +++--
 tools/testing/selftests/bpf/test_verifier.c | 26 ++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 1d4f1da7c58f..a93350451e8e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	off = offsetof(struct bpf_array, map.max_entries);
 	emit_a64_mov_i64(tmp, off, ctx);
 	emit(A64_LDR32(tmp, r2, tmp), ctx);
+	emit(A64_MOV(0, r3, r3), ctx);
 	emit(A64_CMP(0, r3, tmp), ctx);
-	emit(A64_B_(A64_COND_GE, jmp_offset), ctx);
+	emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
 
 	/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
 	 *     goto out;
@@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	 */
 	emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
 	emit(A64_CMP(1, tcc, tmp), ctx);
-	emit(A64_B_(A64_COND_GT, jmp_offset), ctx);
+	emit(A64_B_(A64_COND_HI, jmp_offset), ctx);
 	emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
 
 	/* prog = array->ptrs[index];
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c0f16e93f9bd..c73592fa3d41 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2586,6 +2586,32 @@ static struct bpf_test tests[] = {
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 	},
+	{
+		"runtime/jit: pass negative index to tail_call",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, -1),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+	},
+	{
+		"runtime/jit: pass > 32bit index to tail_call",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 2 },
+		.result = ACCEPT,
+	},
 	{
 		"stack pointer arithmetic",
 		.insns = {
-- 
cgit v1.2.3


From ca36960211eb228bcbc7aaebfa0d027368a94c60 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 23 Feb 2018 22:29:05 +0100
Subject: bpf: allow xadd only on aligned memory

The requirements around atomic_add() / atomic64_add() resp. their
JIT implementations differ across architectures. E.g. while x86_64
seems just fine with BPF's xadd on unaligned memory, on arm64 it
triggers via interpreter but also JIT the following crash:

  [  830.864985] Unable to handle kernel paging request at virtual address ffff8097d7ed6703
  [...]
  [  830.916161] Internal error: Oops: 96000021 [#1] SMP
  [  830.984755] CPU: 37 PID: 2788 Comm: test_verifier Not tainted 4.16.0-rc2+ #8
  [  830.991790] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.29 07/17/2017
  [  830.998998] pstate: 80400005 (Nzcv daif +PAN -UAO)
  [  831.003793] pc : __ll_sc_atomic_add+0x4/0x18
  [  831.008055] lr : ___bpf_prog_run+0x1198/0x1588
  [  831.012485] sp : ffff00001ccabc20
  [  831.015786] x29: ffff00001ccabc20 x28: ffff8017d56a0f00
  [  831.021087] x27: 0000000000000001 x26: 0000000000000000
  [  831.026387] x25: 000000c168d9db98 x24: 0000000000000000
  [  831.031686] x23: ffff000008203878 x22: ffff000009488000
  [  831.036986] x21: ffff000008b14e28 x20: ffff00001ccabcb0
  [  831.042286] x19: ffff0000097b5080 x18: 0000000000000a03
  [  831.047585] x17: 0000000000000000 x16: 0000000000000000
  [  831.052885] x15: 0000ffffaeca8000 x14: 0000000000000000
  [  831.058184] x13: 0000000000000000 x12: 0000000000000000
  [  831.063484] x11: 0000000000000001 x10: 0000000000000000
  [  831.068783] x9 : 0000000000000000 x8 : 0000000000000000
  [  831.074083] x7 : 0000000000000000 x6 : 000580d428000000
  [  831.079383] x5 : 0000000000000018 x4 : 0000000000000000
  [  831.084682] x3 : ffff00001ccabcb0 x2 : 0000000000000001
  [  831.089982] x1 : ffff8097d7ed6703 x0 : 0000000000000001
  [  831.095282] Process test_verifier (pid: 2788, stack limit = 0x0000000018370044)
  [  831.102577] Call trace:
  [  831.105012]  __ll_sc_atomic_add+0x4/0x18
  [  831.108923]  __bpf_prog_run32+0x4c/0x70
  [  831.112748]  bpf_test_run+0x78/0xf8
  [  831.116224]  bpf_prog_test_run_xdp+0xb4/0x120
  [  831.120567]  SyS_bpf+0x77c/0x1110
  [  831.123873]  el0_svc_naked+0x30/0x34
  [  831.127437] Code: 97fffe97 17ffffec 00000000 f9800031 (885f7c31)

Reason for this is because memory is required to be aligned. In
case of BPF, we always enforce alignment in terms of stack access,
but not when accessing map values or packet data when the underlying
arch (e.g. arm64) has CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS set.

xadd on packet data that is local to us anyway is just wrong, so
forbid this case entirely. The only place where xadd makes sense in
fact are map values; xadd on stack is wrong as well, but it's been
around for much longer. Specifically enforce strict alignment in case
of xadd, so that we handle this case generically and avoid such crashes
in the first place.

Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                       | 42 +++++++++++++--------
 tools/testing/selftests/bpf/test_verifier.c | 58 +++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5fb69a85d967..c6eff108aa99 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1356,6 +1356,13 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
 	return reg->type == PTR_TO_CTX;
 }
 
+static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
+{
+	const struct bpf_reg_state *reg = cur_regs(env) + regno;
+
+	return type_is_pkt_pointer(reg->type);
+}
+
 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
 				   const struct bpf_reg_state *reg,
 				   int off, int size, bool strict)
@@ -1416,10 +1423,10 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
 }
 
 static int check_ptr_alignment(struct bpf_verifier_env *env,
-			       const struct bpf_reg_state *reg,
-			       int off, int size)
+			       const struct bpf_reg_state *reg, int off,
+			       int size, bool strict_alignment_once)
 {
-	bool strict = env->strict_alignment;
+	bool strict = env->strict_alignment || strict_alignment_once;
 	const char *pointer_desc = "";
 
 	switch (reg->type) {
@@ -1576,9 +1583,9 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
  * if t==write && value_regno==-1, some unknown value is stored into memory
  * if t==read && value_regno==-1, don't care what we read from memory
  */
-static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off,
-			    int bpf_size, enum bpf_access_type t,
-			    int value_regno)
+static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
+			    int off, int bpf_size, enum bpf_access_type t,
+			    int value_regno, bool strict_alignment_once)
 {
 	struct bpf_reg_state *regs = cur_regs(env);
 	struct bpf_reg_state *reg = regs + regno;
@@ -1590,7 +1597,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		return size;
 
 	/* alignment checks will add in reg->off themselves */
-	err = check_ptr_alignment(env, reg, off, size);
+	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
 	if (err)
 		return err;
 
@@ -1735,21 +1742,23 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 		return -EACCES;
 	}
 
-	if (is_ctx_reg(env, insn->dst_reg)) {
-		verbose(env, "BPF_XADD stores into R%d context is not allowed\n",
-			insn->dst_reg);
+	if (is_ctx_reg(env, insn->dst_reg) ||
+	    is_pkt_reg(env, insn->dst_reg)) {
+		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
+			insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
+			"context" : "packet");
 		return -EACCES;
 	}
 
 	/* check whether atomic_add can read the memory */
 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-			       BPF_SIZE(insn->code), BPF_READ, -1);
+			       BPF_SIZE(insn->code), BPF_READ, -1, true);
 	if (err)
 		return err;
 
 	/* check whether atomic_add can write into the same memory */
 	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-				BPF_SIZE(insn->code), BPF_WRITE, -1);
+				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
 }
 
 /* when register 'regno' is passed into function that will read 'access_size'
@@ -2388,7 +2397,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	 * is inferred from register state.
 	 */
 	for (i = 0; i < meta.access_size; i++) {
-		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1);
+		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
+				       BPF_WRITE, -1, false);
 		if (err)
 			return err;
 	}
@@ -4632,7 +4642,7 @@ static int do_check(struct bpf_verifier_env *env)
 			 */
 			err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_READ,
-					       insn->dst_reg);
+					       insn->dst_reg, false);
 			if (err)
 				return err;
 
@@ -4684,7 +4694,7 @@ static int do_check(struct bpf_verifier_env *env)
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
-					       insn->src_reg);
+					       insn->src_reg, false);
 			if (err)
 				return err;
 
@@ -4719,7 +4729,7 @@ static int do_check(struct bpf_verifier_env *env)
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
-					       -1);
+					       -1, false);
 			if (err)
 				return err;
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c73592fa3d41..437c0b1c9d21 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -11163,6 +11163,64 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
+	{
+		"xadd/w check unaligned stack",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "misaligned stack access off",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"xadd/w check unaligned map",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = REJECT,
+		.errstr = "misaligned value access off",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"xadd/w check unaligned pkt",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 99),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0),
+			BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1),
+			BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "BPF_XADD stores into R2 packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From faa312a543283c717342cd332b5b9247bd305dce Mon Sep 17 00:00:00 2001
From: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Date: Tue, 9 Jan 2018 13:27:01 +0100
Subject: tools/kvm_stat: simplify the sortkey function

The 'sortkey' function references a value in its enclosing
scope (closure). This is not common practice for a sort key function
so let's replace it. Additionally, the function 'sorted' has already a
parameter for reversing the result therefore the inversion of the
values is unneeded. The check for stats[x][1] is also superfluous as
it's ensured that this value is initialized with 0.

Signed-off-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Tested-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index a5684d0968b4..d630f5f3e091 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1080,30 +1080,23 @@ class Tui(object):
         self.screen.move(row, 0)
         self.screen.clrtobot()
         stats = self.stats.get(self._display_guests)
-
-        def sortCurAvg(x):
-            # sort by current events if available
-            if stats[x][1]:
-                return (-stats[x][1], -stats[x][0])
-            else:
-                return (0, -stats[x][0])
-
-        def sortTotal(x):
-            # sort by totals
-            return (0, -stats[x][0])
         total = 0.
         for key in stats.keys():
             if key.find('(') is -1:
                 total += stats[key][0]
         if self._sorting == SORT_DEFAULT:
-            sortkey = sortCurAvg
+            def sortkey((_k, v)):
+                # sort by (delta value, overall value)
+                return (v[1], v[0])
         else:
-            sortkey = sortTotal
+            def sortkey((_k, v)):
+                # sort by overall value
+                return v[0]
+
         tavg = 0
-        for key in sorted(stats.keys(), key=sortkey):
+        for key, values in sorted(stats.items(), key=sortkey, reverse=True):
             if row >= self.screen.getmaxyx()[0] - 1:
                 break
-            values = stats[key]
             if not values[0] and not values[1]:
                 break
             if values[0] is not None:
-- 
cgit v1.2.3


From 006f1548ac13d67d21865416a0f4e8062df1a85f Mon Sep 17 00:00:00 2001
From: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Date: Tue, 9 Jan 2018 13:27:02 +0100
Subject: tools/kvm_stat: use a namedtuple for storing the values

Use a namedtuple for storing the values as it allows to access the
fields of a tuple via names. This makes the overall code much easier
to read and to understand. Access by index is still possible as
before.

Signed-off-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Tested-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index d630f5f3e091..2b7e83a5f7b8 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -33,7 +33,7 @@ import resource
 import struct
 import re
 import subprocess
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 
 VMX_EXIT_REASONS = {
     'EXCEPTION_NMI':        0,
@@ -800,6 +800,9 @@ class DebugfsProvider(Provider):
         self.read(2)
 
 
+EventStat = namedtuple('EventStat', ['value', 'delta'])
+
+
 class Stats(object):
     """Manages the data providers and the data they provide.
 
@@ -867,10 +870,10 @@ class Stats(object):
         for provider in self.providers:
             new = provider.read(by_guest=by_guest)
             for key in new if by_guest else provider.fields:
-                oldval = self.values.get(key, (0, 0))[0]
+                oldval = self.values.get(key, EventStat(0, 0)).value
                 newval = new.get(key, 0)
                 newdelta = newval - oldval
-                self.values[key] = (newval, newdelta)
+                self.values[key] = EventStat(newval, newdelta)
         return self.values
 
     def toggle_display_guests(self, to_pid):
@@ -1083,28 +1086,28 @@ class Tui(object):
         total = 0.
         for key in stats.keys():
             if key.find('(') is -1:
-                total += stats[key][0]
+                total += stats[key].value
         if self._sorting == SORT_DEFAULT:
             def sortkey((_k, v)):
                 # sort by (delta value, overall value)
-                return (v[1], v[0])
+                return (v.delta, v.value)
         else:
             def sortkey((_k, v)):
                 # sort by overall value
-                return v[0]
+                return v.value
 
         tavg = 0
         for key, values in sorted(stats.items(), key=sortkey, reverse=True):
             if row >= self.screen.getmaxyx()[0] - 1:
                 break
-            if not values[0] and not values[1]:
+            if not values.value and not values.delta:
                 break
-            if values[0] is not None:
-                cur = int(round(values[1] / sleeptime)) if values[1] else ''
+            if values.value is not None:
+                cur = int(round(values.delta / sleeptime)) if values.delta else ''
                 if self._display_guests:
                     key = self.get_gname_from_pid(key)
                 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
-                                   (key, values[0], values[0] * 100 / total,
+                                   (key, values.value, values.value * 100 / total,
                                     cur))
                 if cur is not '' and key.find('(') is -1:
                     tavg += cur
@@ -1375,7 +1378,7 @@ def batch(stats):
         s = stats.get()
         for key in sorted(s.keys()):
             values = s[key]
-            print('%-42s%10d%10d' % (key, values[0], values[1]))
+            print('%-42s%10d%10d' % (key, values.value, values.delta))
     except KeyboardInterrupt:
         pass
 
@@ -1392,7 +1395,7 @@ def log(stats):
     def statline():
         s = stats.get()
         for k in keys:
-            print(' %9d' % s[k][1], end=' ')
+            print(' %9d' % s[k].delta, end=' ')
         print()
     line = 0
     banner_repeat = 20
-- 
cgit v1.2.3


From 0eb578009a1d530a11846d7c4733a5db04730884 Mon Sep 17 00:00:00 2001
From: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Date: Tue, 9 Jan 2018 13:27:03 +0100
Subject: tools/kvm_stat: use a more pythonic way to iterate over dictionaries

If it's clear that the values of a dictionary will be used then use
the '.items()' method.

Signed-off-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Tested-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
[Include fix for logging mode by Stefan Raspl]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 2b7e83a5f7b8..f0da954a856c 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1084,9 +1084,10 @@ class Tui(object):
         self.screen.clrtobot()
         stats = self.stats.get(self._display_guests)
         total = 0.
-        for key in stats.keys():
+        for key, values in stats.items():
             if key.find('(') is -1:
-                total += stats[key].value
+                total += values.value
+
         if self._sorting == SORT_DEFAULT:
             def sortkey((_k, v)):
                 # sort by (delta value, overall value)
@@ -1376,8 +1377,7 @@ def batch(stats):
         s = stats.get()
         time.sleep(1)
         s = stats.get()
-        for key in sorted(s.keys()):
-            values = s[key]
+        for key, values in sorted(s.items()):
             print('%-42s%10d%10d' % (key, values.value, values.delta))
     except KeyboardInterrupt:
         pass
@@ -1388,14 +1388,14 @@ def log(stats):
     keys = sorted(stats.get().keys())
 
     def banner():
-        for k in keys:
-            print(k, end=' ')
+        for key in keys:
+            print(key, end=' ')
         print()
 
     def statline():
         s = stats.get()
-        for k in keys:
-            print(' %9d' % s[k].delta, end=' ')
+        for key in keys:
+            print(' %9d' % s[key].delta, end=' ')
         print()
     line = 0
     banner_repeat = 20
-- 
cgit v1.2.3


From 369d5a85bb782ecf63c5bae9686c7e6104eea991 Mon Sep 17 00:00:00 2001
From: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Date: Tue, 9 Jan 2018 13:27:04 +0100
Subject: tools/kvm_stat: avoid 'is' for equality checks

Use '==' for equality checks and 'is' when comparing identities.

An example where '==' and 'is' behave differently:
>>> a = 4242
>>> a == 4242
True
>>> a is 4242
False

Signed-off-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index f0da954a856c..e3f0becb6632 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1085,7 +1085,7 @@ class Tui(object):
         stats = self.stats.get(self._display_guests)
         total = 0.
         for key, values in stats.items():
-            if key.find('(') is -1:
+            if key.find('(') == -1:
                 total += values.value
 
         if self._sorting == SORT_DEFAULT:
@@ -1110,7 +1110,7 @@ class Tui(object):
                 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
                                    (key, values.value, values.value * 100 / total,
                                     cur))
-                if cur is not '' and key.find('(') is -1:
+                if cur != '' and key.find('(') == -1:
                     tavg += cur
             row += 1
         if row == 3:
-- 
cgit v1.2.3


From 3df33a0f34a3883b6696bff8cc8fcda3c7444a62 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Mon, 5 Feb 2018 13:59:57 +0100
Subject: tools/kvm_stat: fix crash when filtering out all non-child trace
 events

When we apply a filter that will only leave child trace events, we
receive a ZeroDivisionError when calculating the percentages.
In that case, provide percentages based on child events only.
To reproduce, run 'kvm_stat -f .*[\(].*'.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index e3f0becb6632..4e0f282c5289 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1084,9 +1084,15 @@ class Tui(object):
         self.screen.clrtobot()
         stats = self.stats.get(self._display_guests)
         total = 0.
+        ctotal = 0.
         for key, values in stats.items():
             if key.find('(') == -1:
                 total += values.value
+            else:
+                ctotal += values.value
+        if total == 0.:
+            # we don't have any fields, or all non-child events are filtered
+            total = ctotal
 
         if self._sorting == SORT_DEFAULT:
             def sortkey((_k, v)):
-- 
cgit v1.2.3


From 1cd8bfb1ed9962be6d80d5020508922aa93653ac Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Mon, 5 Feb 2018 13:59:58 +0100
Subject: tools/kvm_stat: print error on invalid regex

Entering an invalid regular expression did not produce any indication of an
error so far.
To reproduce, press 'f' and enter 'foo(' (with an unescaped bracket).

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 4e0f282c5289..08f842238c32 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1176,6 +1176,7 @@ class Tui(object):
         Asks for a valid regex and sets the fields filter accordingly.
 
         """
+        msg = ''
         while True:
             self.screen.erase()
             self.screen.addstr(0, 0,
@@ -1184,6 +1185,7 @@ class Tui(object):
             self.screen.addstr(2, 0,
                                "Current regex: {0}"
                                .format(self.stats.fields_filter))
+            self.screen.addstr(5, 0, msg)
             self.screen.addstr(3, 0, "New regex: ")
             curses.echo()
             regex = self.screen.getstr().decode(ENCODING)
@@ -1198,6 +1200,7 @@ class Tui(object):
                 self.refresh_header()
                 return
             except re.error:
+                msg = '"' + regex + '": Not a valid regular expression'
                 continue
 
     def show_vm_selection_by_pid(self):
-- 
cgit v1.2.3


From 1fd6a708c8438403dee17eb411cf81ffba13cf43 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Thu, 22 Feb 2018 12:16:24 +0100
Subject: tools/kvm_stat: fix debugfs handling

Te checks for debugfs assumed that debugfs is always mounted at
/sys/kernel/debug - which is likely, but not guaranteed. This is addressed
by checking /proc/mounts for the actual location.
Furthermore, when debugfs was mounted, but the kvm module not loaded, a
misleading error pointing towards debugfs not present was given.
To reproduce,
(a) run kvm_stat with debugfs mounted at a place different from
    /sys/kernel/debug
(b) run kvm_stat with debugfs mounted but kvm module not loaded

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 08f842238c32..c5c8e9295b91 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -331,9 +331,6 @@ class perf_event_attr(ctypes.Structure):
 PERF_TYPE_TRACEPOINT = 2
 PERF_FORMAT_GROUP = 1 << 3
 
-PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
-PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
-
 
 class Group(object):
     """Represents a perf event group."""
@@ -1544,17 +1541,6 @@ Press any other key to refresh statistics immediately.
 
 def check_access(options):
     """Exits if the current user can't access all needed directories."""
-    if not os.path.exists('/sys/kernel/debug'):
-        sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
-        sys.exit(1)
-
-    if not os.path.exists(PATH_DEBUGFS_KVM):
-        sys.stderr.write("Please make sure, that debugfs is mounted and "
-                         "readable by the current user:\n"
-                         "('mount -t debugfs debugfs /sys/kernel/debug')\n"
-                         "Also ensure, that the kvm modules are loaded.\n")
-        sys.exit(1)
-
     if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
                                                      not options.debugfs):
         sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
@@ -1572,7 +1558,33 @@ def check_access(options):
     return options
 
 
+def assign_globals():
+    global PATH_DEBUGFS_KVM
+    global PATH_DEBUGFS_TRACING
+
+    debugfs = ''
+    for line in file('/proc/mounts'):
+        if line.split(' ')[0] == 'debugfs':
+            debugfs = line.split(' ')[1]
+            break
+    if debugfs == '':
+        sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
+                         "your kernel, mounted and\nreadable by the current "
+                         "user:\n"
+                         "('mount -t debugfs debugfs /sys/kernel/debug')\n")
+        sys.exit(1)
+
+    PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
+    PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
+
+    if not os.path.exists(PATH_DEBUGFS_KVM):
+        sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
+                         "your kernel and that the modules are loaded.\n")
+        sys.exit(1)
+
+
 def main():
+    assign_globals()
     options = get_options()
     options = check_access(options)
 
-- 
cgit v1.2.3


From c0e8c21eae616ed8703c1b4b01046a1578ee875c Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Thu, 22 Feb 2018 12:16:26 +0100
Subject: tools/kvm_stat: mark private methods as such

Helps quite a bit reading the code when it's obvious when a method is
intended for internal use only.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 132 ++++++++++++++++++++++----------------------
 1 file changed, 66 insertions(+), 66 deletions(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index c5c8e9295b91..c09b7428f563 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -373,8 +373,8 @@ class Event(object):
         self.syscall = self.libc.syscall
         self.name = name
         self.fd = None
-        self.setup_event(group, trace_cpu, trace_pid, trace_point,
-                         trace_filter, trace_set)
+        self._setup_event(group, trace_cpu, trace_pid, trace_point,
+                          trace_filter, trace_set)
 
     def __del__(self):
         """Closes the event's file descriptor.
@@ -387,7 +387,7 @@ class Event(object):
         if self.fd:
             os.close(self.fd)
 
-    def perf_event_open(self, attr, pid, cpu, group_fd, flags):
+    def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
         """Wrapper for the sys_perf_evt_open() syscall.
 
         Used to set up performance events, returns a file descriptor or -1
@@ -406,7 +406,7 @@ class Event(object):
                             ctypes.c_int(pid), ctypes.c_int(cpu),
                             ctypes.c_int(group_fd), ctypes.c_long(flags))
 
-    def setup_event_attribute(self, trace_set, trace_point):
+    def _setup_event_attribute(self, trace_set, trace_point):
         """Returns an initialized ctype perf_event_attr struct."""
 
         id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
@@ -416,8 +416,8 @@ class Event(object):
         event_attr.config = int(open(id_path).read())
         return event_attr
 
-    def setup_event(self, group, trace_cpu, trace_pid, trace_point,
-                    trace_filter, trace_set):
+    def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
+                     trace_filter, trace_set):
         """Sets up the perf event in Linux.
 
         Issues the syscall to register the event in the kernel and
@@ -425,7 +425,7 @@ class Event(object):
 
         """
 
-        event_attr = self.setup_event_attribute(trace_set, trace_point)
+        event_attr = self._setup_event_attribute(trace_set, trace_point)
 
         # First event will be group leader.
         group_leader = -1
@@ -434,8 +434,8 @@ class Event(object):
         if group.events:
             group_leader = group.events[0].fd
 
-        fd = self.perf_event_open(event_attr, trace_pid,
-                                  trace_cpu, group_leader, 0)
+        fd = self._perf_event_open(event_attr, trace_pid,
+                                   trace_cpu, group_leader, 0)
         if fd == -1:
             err = ctypes.get_errno()
             raise OSError(err, os.strerror(err),
@@ -497,12 +497,12 @@ class TracepointProvider(Provider):
     """
     def __init__(self, pid, fields_filter):
         self.group_leaders = []
-        self.filters = self.get_filters()
+        self.filters = self._get_filters()
         self.update_fields(fields_filter)
         self.pid = pid
 
     @staticmethod
-    def get_filters():
+    def _get_filters():
         """Returns a dict of trace events, their filter ids and
         the values that can be filtered.
 
@@ -518,7 +518,7 @@ class TracepointProvider(Provider):
             filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
         return filters
 
-    def get_available_fields(self):
+    def _get_available_fields(self):
         """Returns a list of available event's of format 'event name(filter
         name)'.
 
@@ -546,11 +546,11 @@ class TracepointProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self.fields = [field for field in self.get_available_fields()
+        self.fields = [field for field in self._get_available_fields()
                        if self.is_field_wanted(fields_filter, field)]
 
     @staticmethod
-    def get_online_cpus():
+    def _get_online_cpus():
         """Returns a list of cpu id integers."""
         def parse_int_list(list_string):
             """Returns an int list from a string of comma separated integers and
@@ -572,17 +572,17 @@ class TracepointProvider(Provider):
             cpu_string = cpu_list.readline()
             return parse_int_list(cpu_string)
 
-    def setup_traces(self):
+    def _setup_traces(self):
         """Creates all event and group objects needed to be able to retrieve
         data."""
-        fields = self.get_available_fields()
+        fields = self._get_available_fields()
         if self._pid > 0:
             # Fetch list of all threads of the monitored pid, as qemu
             # starts a thread for each vcpu.
             path = os.path.join('/proc', str(self._pid), 'task')
             groupids = self.walkdir(path)[1]
         else:
-            groupids = self.get_online_cpus()
+            groupids = self._get_online_cpus()
 
         # The constant is needed as a buffer for python libs, std
         # streams and other files that the script opens.
@@ -660,7 +660,7 @@ class TracepointProvider(Provider):
         # The garbage collector will get rid of all Event/Group
         # objects and open files after removing the references.
         self.group_leaders = []
-        self.setup_traces()
+        self._setup_traces()
         self.fields = self._fields
 
     def read(self, by_guest=0):
@@ -689,9 +689,9 @@ class DebugfsProvider(Provider):
         self.paths = []
         self.pid = pid
         if include_past:
-            self.restore()
+            self._restore()
 
-    def get_available_fields(self):
+    def _get_available_fields(self):
         """"Returns a list of available fields.
 
         The fields are all available KVM debugfs files
@@ -701,7 +701,7 @@ class DebugfsProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self._fields = [field for field in self.get_available_fields()
+        self._fields = [field for field in self._get_available_fields()
                         if self.is_field_wanted(fields_filter, field)]
 
     @property
@@ -755,7 +755,7 @@ class DebugfsProvider(Provider):
                     paths.append(dir)
         for path in paths:
             for field in self._fields:
-                value = self.read_field(field, path)
+                value = self._read_field(field, path)
                 key = path + field
                 if reset == 1:
                     self._baseline[key] = value
@@ -776,7 +776,7 @@ class DebugfsProvider(Provider):
 
         return results
 
-    def read_field(self, field, path):
+    def _read_field(self, field, path):
         """Returns the value of a single field from a specific VM."""
         try:
             return int(open(os.path.join(PATH_DEBUGFS_KVM,
@@ -791,7 +791,7 @@ class DebugfsProvider(Provider):
         self._baseline = {}
         self.read(1)
 
-    def restore(self):
+    def _restore(self):
         """Reset field counters"""
         self._baseline = {}
         self.read(2)
@@ -808,13 +808,12 @@ class Stats(object):
 
     """
     def __init__(self, options):
-        self.providers = self.get_providers(options)
+        self.providers = self._get_providers(options)
         self._pid_filter = options.pid
         self._fields_filter = options.fields
         self.values = {}
 
-    @staticmethod
-    def get_providers(options):
+    def _get_providers(self, options):
         """Returns a list of data providers depending on the passed options."""
         providers = []
 
@@ -826,7 +825,7 @@ class Stats(object):
 
         return providers
 
-    def update_provider_filters(self):
+    def _update_provider_filters(self):
         """Propagates fields filters to providers."""
         # As we reset the counters when updating the fields we can
         # also clear the cache of old values.
@@ -847,7 +846,7 @@ class Stats(object):
     def fields_filter(self, fields_filter):
         if fields_filter != self._fields_filter:
             self._fields_filter = fields_filter
-            self.update_provider_filters()
+            self._update_provider_filters()
 
     @property
     def pid_filter(self):
@@ -969,7 +968,7 @@ class Tui(object):
 
         return res
 
-    def print_all_gnames(self, row):
+    def _print_all_gnames(self, row):
         """Print a list of all running guests along with their pids."""
         self.screen.addstr(row, 2, '%8s  %-60s' %
                            ('Pid', 'Guest Name (fuzzy list, might be '
@@ -1032,7 +1031,7 @@ class Tui(object):
 
         return name
 
-    def update_drilldown(self):
+    def _update_drilldown(self):
         """Sets or removes a filter that only allows fields without braces."""
         if not self.stats.fields_filter:
             self.stats.fields_filter = DEFAULT_REGEX
@@ -1040,11 +1039,11 @@ class Tui(object):
         elif self.stats.fields_filter == DEFAULT_REGEX:
             self.stats.fields_filter = None
 
-    def update_pid(self, pid):
+    def _update_pid(self, pid):
         """Propagates pid selection to stats object."""
         self.stats.pid_filter = pid
 
-    def refresh_header(self, pid=None):
+    def _refresh_header(self, pid=None):
         """Refreshes the header."""
         if pid is None:
             pid = self.stats.pid_filter
@@ -1075,7 +1074,7 @@ class Tui(object):
         self.screen.addstr(4, 1, 'Collecting data...')
         self.screen.refresh()
 
-    def refresh_body(self, sleeptime):
+    def _refresh_body(self, sleeptime):
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
@@ -1124,7 +1123,7 @@ class Tui(object):
                                curses.A_BOLD)
         self.screen.refresh()
 
-    def show_msg(self, text):
+    def _show_msg(self, text):
         """Display message centered text and exit on key press"""
         hint = 'Press any key to continue'
         curses.cbreak()
@@ -1139,7 +1138,7 @@ class Tui(object):
                            curses.A_STANDOUT)
         self.screen.getkey()
 
-    def show_help_interactive(self):
+    def _show_help_interactive(self):
         """Display help with list of interactive commands"""
         msg = ('   b     toggle events by guests (debugfs only, honors'
                ' filters)',
@@ -1165,9 +1164,9 @@ class Tui(object):
             self.screen.addstr(row, 0, line)
             row += 1
         self.screen.getkey()
-        self.refresh_header()
+        self._refresh_header()
 
-    def show_filter_selection(self):
+    def _show_filter_selection(self):
         """Draws filter selection mask.
 
         Asks for a valid regex and sets the fields filter accordingly.
@@ -1189,18 +1188,18 @@ class Tui(object):
             curses.noecho()
             if len(regex) == 0:
                 self.stats.fields_filter = DEFAULT_REGEX
-                self.refresh_header()
+                self._refresh_header()
                 return
             try:
                 re.compile(regex)
                 self.stats.fields_filter = regex
-                self.refresh_header()
+                self._refresh_header()
                 return
             except re.error:
                 msg = '"' + regex + '": Not a valid regular expression'
                 continue
 
-    def show_vm_selection_by_pid(self):
+    def _show_vm_selection_by_pid(self):
         """Draws PID selection mask.
 
         Asks for a pid until a valid pid or 0 has been entered.
@@ -1216,7 +1215,7 @@ class Tui(object):
                                'This might limit the shown data to the trace '
                                'statistics.')
             self.screen.addstr(5, 0, msg)
-            self.print_all_gnames(7)
+            self._print_all_gnames(7)
 
             curses.echo()
             self.screen.addstr(3, 0, "Pid [0 or pid]: ")
@@ -1232,13 +1231,13 @@ class Tui(object):
                         continue
                 else:
                     pid = 0
-                self.refresh_header(pid)
-                self.update_pid(pid)
+                self._refresh_header(pid)
+                self._update_pid(pid)
                 break
             except ValueError:
                 msg = '"' + str(pid) + '": Not a valid pid'
 
-    def show_set_update_interval(self):
+    def _show_set_update_interval(self):
         """Draws update interval selection mask."""
         msg = ''
         while True:
@@ -1268,9 +1267,9 @@ class Tui(object):
 
             except ValueError:
                 msg = '"' + str(val) + '": Invalid value'
-        self.refresh_header()
+        self._refresh_header()
 
-    def show_vm_selection_by_guest_name(self):
+    def _show_vm_selection_by_guest_name(self):
         """Draws guest selection mask.
 
         Asks for a guest name until a valid guest name or '' is entered.
@@ -1286,15 +1285,15 @@ class Tui(object):
                                'This might limit the shown data to the trace '
                                'statistics.')
             self.screen.addstr(5, 0, msg)
-            self.print_all_gnames(7)
+            self._print_all_gnames(7)
             curses.echo()
             self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
             gname = self.screen.getstr().decode(ENCODING)
             curses.noecho()
 
             if not gname:
-                self.refresh_header(0)
-                self.update_pid(0)
+                self._refresh_header(0)
+                self._update_pid(0)
                 break
             else:
                 pids = []
@@ -1311,17 +1310,17 @@ class Tui(object):
                     msg = '"' + gname + '": Multiple matches found, use pid ' \
                           'filter instead'
                     continue
-                self.refresh_header(pids[0])
-                self.update_pid(pids[0])
+                self._refresh_header(pids[0])
+                self._update_pid(pids[0])
                 break
 
     def show_stats(self):
         """Refreshes the screen and processes user input."""
         sleeptime = self._delay_initial
-        self.refresh_header()
+        self._refresh_header()
         start = 0.0  # result based on init value never appears on screen
         while True:
-            self.refresh_body(time.time() - start)
+            self._refresh_body(time.time() - start)
             curses.halfdelay(int(sleeptime * 10))
             start = time.time()
             sleeptime = self._delay_regular
@@ -1330,32 +1329,33 @@ class Tui(object):
                 if char == 'b':
                     self._display_guests = not self._display_guests
                     if self.stats.toggle_display_guests(self._display_guests):
-                        self.show_msg(['Command not available with tracepoints'
-                                       ' enabled', 'Restart with debugfs only '
-                                       '(see option \'-d\') and try again!'])
+                        self._show_msg(['Command not available with '
+                                        'tracepoints enabled', 'Restart with '
+                                        'debugfs only (see option \'-d\') and '
+                                        'try again!'])
                         self._display_guests = not self._display_guests
-                    self.refresh_header()
+                    self._refresh_header()
                 if char == 'c':
                     self.stats.fields_filter = DEFAULT_REGEX
-                    self.refresh_header(0)
-                    self.update_pid(0)
+                    self._refresh_header(0)
+                    self._update_pid(0)
                 if char == 'f':
                     curses.curs_set(1)
-                    self.show_filter_selection()
+                    self._show_filter_selection()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
                 if char == 'g':
                     curses.curs_set(1)
-                    self.show_vm_selection_by_guest_name()
+                    self._show_vm_selection_by_guest_name()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
                 if char == 'h':
-                    self.show_help_interactive()
+                    self._show_help_interactive()
                 if char == 'o':
                     self._sorting = not self._sorting
                 if char == 'p':
                     curses.curs_set(1)
-                    self.show_vm_selection_by_pid()
+                    self._show_vm_selection_by_pid()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
                 if char == 'q':
@@ -1364,11 +1364,11 @@ class Tui(object):
                     self.stats.reset()
                 if char == 's':
                     curses.curs_set(1)
-                    self.show_set_update_interval()
+                    self._show_set_update_interval()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
                 if char == 'x':
-                    self.update_drilldown()
+                    self._update_drilldown()
                     # prevents display of current values on next refresh
                     self.stats.get(self._display_guests)
             except KeyboardInterrupt:
-- 
cgit v1.2.3


From 516f1190a1e0cce12128a6446e6438745c8de62a Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Thu, 22 Feb 2018 12:16:27 +0100
Subject: tools/kvm_stat: eliminate extra guest/pid selection dialog

We can do with a single dialog that takes both, pids and guest names.
Note that we keep both interactive commands, 'p' and 'g' for now, to
avoid confusion among users used to a specific key.

While at it, we improve on some minor glitches regarding curses usage,
e.g. cursor still visible when not supposed to be.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat     | 110 ++++++++++++++--------------------------
 tools/kvm/kvm_stat/kvm_stat.txt |   4 +-
 2 files changed, 39 insertions(+), 75 deletions(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index c09b7428f563..0d5776785d27 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1041,6 +1041,8 @@ class Tui(object):
 
     def _update_pid(self, pid):
         """Propagates pid selection to stats object."""
+        self.screen.addstr(4, 1, 'Updating pid filter...')
+        self.screen.refresh()
         self.stats.pid_filter = pid
 
     def _refresh_header(self, pid=None):
@@ -1144,10 +1146,10 @@ class Tui(object):
                ' filters)',
                '   c     clear filter',
                '   f     filter by regular expression',
-               '   g     filter by guest name',
+               '   g     filter by guest name/PID',
                '   h     display interactive commands reference',
                '   o     toggle sorting order (Total vs CurAvg/s)',
-               '   p     filter by PID',
+               '   p     filter by guest name/PID',
                '   q     quit',
                '   r     reset stats',
                '   s     set update interval',
@@ -1199,44 +1201,6 @@ class Tui(object):
                 msg = '"' + regex + '": Not a valid regular expression'
                 continue
 
-    def _show_vm_selection_by_pid(self):
-        """Draws PID selection mask.
-
-        Asks for a pid until a valid pid or 0 has been entered.
-
-        """
-        msg = ''
-        while True:
-            self.screen.erase()
-            self.screen.addstr(0, 0,
-                               'Show statistics for specific pid.',
-                               curses.A_BOLD)
-            self.screen.addstr(1, 0,
-                               'This might limit the shown data to the trace '
-                               'statistics.')
-            self.screen.addstr(5, 0, msg)
-            self._print_all_gnames(7)
-
-            curses.echo()
-            self.screen.addstr(3, 0, "Pid [0 or pid]: ")
-            pid = self.screen.getstr().decode(ENCODING)
-            curses.noecho()
-
-            try:
-                if len(pid) > 0:
-                    pid = int(pid)
-                    if pid != 0 and not os.path.isdir(os.path.join('/proc/',
-                                                                   str(pid))):
-                        msg = '"' + str(pid) + '": Not a running process'
-                        continue
-                else:
-                    pid = 0
-                self._refresh_header(pid)
-                self._update_pid(pid)
-                break
-            except ValueError:
-                msg = '"' + str(pid) + '": Not a valid pid'
-
     def _show_set_update_interval(self):
         """Draws update interval selection mask."""
         msg = ''
@@ -1269,17 +1233,17 @@ class Tui(object):
                 msg = '"' + str(val) + '": Invalid value'
         self._refresh_header()
 
-    def _show_vm_selection_by_guest_name(self):
+    def _show_vm_selection_by_guest(self):
         """Draws guest selection mask.
 
-        Asks for a guest name until a valid guest name or '' is entered.
+        Asks for a guest name or pid until a valid guest name or '' is entered.
 
         """
         msg = ''
         while True:
             self.screen.erase()
             self.screen.addstr(0, 0,
-                               'Show statistics for specific guest.',
+                               'Show statistics for specific guest or pid.',
                                curses.A_BOLD)
             self.screen.addstr(1, 0,
                                'This might limit the shown data to the trace '
@@ -1287,32 +1251,39 @@ class Tui(object):
             self.screen.addstr(5, 0, msg)
             self._print_all_gnames(7)
             curses.echo()
-            self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
-            gname = self.screen.getstr().decode(ENCODING)
+            curses.curs_set(1)
+            self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
+            guest = self.screen.getstr().decode(ENCODING)
             curses.noecho()
 
-            if not gname:
-                self._refresh_header(0)
-                self._update_pid(0)
+            pid = 0
+            if not guest or guest == '0':
                 break
-            else:
-                pids = []
-                try:
-                    pids = self.get_pid_from_gname(gname)
-                except:
-                    msg = '"' + gname + '": Internal error while searching, ' \
-                          'use pid filter instead'
-                    continue
-                if len(pids) == 0:
-                    msg = '"' + gname + '": Not an active guest'
+            if guest.isdigit():
+                if not os.path.isdir(os.path.join('/proc/', guest)):
+                    msg = '"' + guest + '": Not a running process'
                     continue
-                if len(pids) > 1:
-                    msg = '"' + gname + '": Multiple matches found, use pid ' \
-                          'filter instead'
-                    continue
-                self._refresh_header(pids[0])
-                self._update_pid(pids[0])
+                pid = int(guest)
                 break
+            pids = []
+            try:
+                pids = self.get_pid_from_gname(guest)
+            except:
+                msg = '"' + guest + '": Internal error while searching, ' \
+                      'use pid filter instead'
+                continue
+            if len(pids) == 0:
+                msg = '"' + guest + '": Not an active guest'
+                continue
+            if len(pids) > 1:
+                msg = '"' + guest + '": Multiple matches found, use pid ' \
+                      'filter instead'
+                continue
+            pid = pids[0]
+            break
+        curses.curs_set(0)
+        self._refresh_header(pid)
+        self._update_pid(pid)
 
     def show_stats(self):
         """Refreshes the screen and processes user input."""
@@ -1344,20 +1315,13 @@ class Tui(object):
                     self._show_filter_selection()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
-                if char == 'g':
-                    curses.curs_set(1)
-                    self._show_vm_selection_by_guest_name()
-                    curses.curs_set(0)
+                if char == 'g' or char == 'p':
+                    self._show_vm_selection_by_guest()
                     sleeptime = self._delay_initial
                 if char == 'h':
                     self._show_help_interactive()
                 if char == 'o':
                     self._sorting = not self._sorting
-                if char == 'p':
-                    curses.curs_set(1)
-                    self._show_vm_selection_by_pid()
-                    curses.curs_set(0)
-                    sleeptime = self._delay_initial
                 if char == 'q':
                     break
                 if char == 'r':
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index b5b3810c9e94..0811d860fe75 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -35,13 +35,13 @@ INTERACTIVE COMMANDS
 
 *f*::	filter by regular expression
 
-*g*::	filter by guest name
+*g*::	filter by guest name/PID
 
 *h*::	display interactive commands reference
 
 *o*::   toggle sorting order (Total vs CurAvg/s)
 
-*p*::	filter by PID
+*p*::	filter by guest name/PID
 
 *q*::	quit
 
-- 
cgit v1.2.3


From 18e8f4100ef14f924514fbd91eb67bd5fa5396b7 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Thu, 22 Feb 2018 12:16:28 +0100
Subject: tools/kvm_stat: separate drilldown and fields filtering

Drilldown (i.e. toggle display of child trace events) was implemented by
overriding the fields filter. This resulted in inconsistencies: E.g. when
drilldown was not active, adding a filter that also matches child trace
events would not only filter fields according to the filter, but also add
in the child trace events matching the filter. E.g. on x86, setting
'kvm_userspace_exit' as the fields filter after startup would result in
display of kvm_userspace_exit(DCR), although that wasn't previously
present - not exactly what one would expect from a filter.
This patch addresses the issue by keeping drilldown and fields filter
separate. While at it, we also fix a PEP8 issue by adding a blank line
at one place (since we're in the area...).
We implement this by adding a framework that also allows to define a
taxonomy among the debugfs events to identify child trace events. I.e.
drilldown using 'x' can now also work with debugfs. A respective parent-
child relationship is only known for S390 at the moment, but could be
added adjusting other platforms' ARCH.dbg_is_child() methods
accordingly.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 143 +++++++++++++++++++++++++++++++-------------
 1 file changed, 100 insertions(+), 43 deletions(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 0d5776785d27..6b6630ee6daf 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -228,6 +228,7 @@ IOCTL_NUMBERS = {
 }
 
 ENCODING = locale.getpreferredencoding(False)
+TRACE_FILTER = re.compile(r'^[^\(]*$')
 
 
 class Arch(object):
@@ -260,6 +261,11 @@ class Arch(object):
                     return ArchX86(SVM_EXIT_REASONS)
                 return
 
+    def tracepoint_is_child(self, field):
+        if (TRACE_FILTER.match(field)):
+            return None
+        return field.split('(', 1)[0]
+
 
 class ArchX86(Arch):
     def __init__(self, exit_reasons):
@@ -267,6 +273,10 @@ class ArchX86(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = exit_reasons
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchPPC(Arch):
     def __init__(self):
@@ -282,6 +292,10 @@ class ArchPPC(Arch):
         self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
         self.exit_reasons = {}
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchA64(Arch):
     def __init__(self):
@@ -289,6 +303,10 @@ class ArchA64(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = AARCH64_EXIT_REASONS
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchS390(Arch):
     def __init__(self):
@@ -296,6 +314,12 @@ class ArchS390(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = None
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        if field.startswith('instruction_'):
+            return 'exit_instruction'
+
+
 ARCH = Arch.get_arch()
 
 
@@ -472,6 +496,10 @@ class Event(object):
 
 class Provider(object):
     """Encapsulates functionalities used by all providers."""
+    def __init__(self, pid):
+        self.child_events = False
+        self.pid = pid
+
     @staticmethod
     def is_field_wanted(fields_filter, field):
         """Indicate whether field is valid according to fields_filter."""
@@ -499,7 +527,7 @@ class TracepointProvider(Provider):
         self.group_leaders = []
         self.filters = self._get_filters()
         self.update_fields(fields_filter)
-        self.pid = pid
+        super(TracepointProvider, self).__init__(pid)
 
     @staticmethod
     def _get_filters():
@@ -519,7 +547,7 @@ class TracepointProvider(Provider):
         return filters
 
     def _get_available_fields(self):
-        """Returns a list of available event's of format 'event name(filter
+        """Returns a list of available events of format 'event name(filter
         name)'.
 
         All available events have directories under
@@ -547,7 +575,8 @@ class TracepointProvider(Provider):
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
         self.fields = [field for field in self._get_available_fields()
-                       if self.is_field_wanted(fields_filter, field)]
+                       if self.is_field_wanted(fields_filter, field) or
+                       ARCH.tracepoint_is_child(field)]
 
     @staticmethod
     def _get_online_cpus():
@@ -668,8 +697,12 @@ class TracepointProvider(Provider):
         ret = defaultdict(int)
         for group in self.group_leaders:
             for name, val in group.read().items():
-                if name in self._fields:
-                    ret[name] += val
+                if name not in self._fields:
+                    continue
+                parent = ARCH.tracepoint_is_child(name)
+                if parent:
+                    name += ' ' + parent
+                ret[name] += val
         return ret
 
     def reset(self):
@@ -687,7 +720,7 @@ class DebugfsProvider(Provider):
         self._baseline = {}
         self.do_read = True
         self.paths = []
-        self.pid = pid
+        super(DebugfsProvider, self).__init__(pid)
         if include_past:
             self._restore()
 
@@ -702,7 +735,8 @@ class DebugfsProvider(Provider):
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
         self._fields = [field for field in self._get_available_fields()
-                        if self.is_field_wanted(fields_filter, field)]
+                        if self.is_field_wanted(fields_filter, field) or
+                        ARCH.debugfs_is_child(field)]
 
     @property
     def fields(self):
@@ -763,14 +797,15 @@ class DebugfsProvider(Provider):
                     self._baseline[key] = 0
                 if self._baseline.get(key, -1) == -1:
                     self._baseline[key] = value
-                increment = (results.get(field, 0) + value -
-                             self._baseline.get(key, 0))
-                if by_guest:
-                    pid = key.split('-')[0]
-                    if pid in results:
-                        results[pid] += increment
-                    else:
-                        results[pid] = increment
+                parent = ARCH.debugfs_is_child(field)
+                if parent:
+                    field = field + ' ' + parent
+                else:
+                    if by_guest:
+                        field = key.split('-')[0]    # set 'field' to 'pid'
+                increment = value - self._baseline.get(key, 0)
+                if field in results:
+                    results[field] += increment
                 else:
                     results[field] = increment
 
@@ -812,6 +847,7 @@ class Stats(object):
         self._pid_filter = options.pid
         self._fields_filter = options.fields
         self.values = {}
+        self._child_events = False
 
     def _get_providers(self, options):
         """Returns a list of data providers depending on the passed options."""
@@ -860,12 +896,29 @@ class Stats(object):
             for provider in self.providers:
                 provider.pid = self._pid_filter
 
+    @property
+    def child_events(self):
+        return self._child_events
+
+    @child_events.setter
+    def child_events(self, val):
+        self._child_events = val
+        for provider in self.providers:
+            provider.child_events = val
+
     def get(self, by_guest=0):
         """Returns a dict with field -> (value, delta to last value) of all
-        provider data."""
+        provider data.
+        Key formats:
+          * plain: 'key' is event name
+          * child-parent: 'key' is in format '<child> <parent>'
+          * pid: 'key' is the pid of the guest, and the record contains the
+               aggregated event data
+        These formats are generated by the providers, and handled in class TUI.
+        """
         for provider in self.providers:
             new = provider.read(by_guest=by_guest)
-            for key in new if by_guest else provider.fields:
+            for key in new:
                 oldval = self.values.get(key, EventStat(0, 0)).value
                 newval = new.get(key, 0)
                 newdelta = newval - oldval
@@ -898,10 +951,10 @@ class Stats(object):
         self.get(to_pid)
         return 0
 
+
 DELAY_DEFAULT = 3.0
 MAX_GUEST_NAME_LEN = 48
 MAX_REGEX_LEN = 44
-DEFAULT_REGEX = r'^[^\(]*$'
 SORT_DEFAULT = 0
 
 
@@ -1031,14 +1084,6 @@ class Tui(object):
 
         return name
 
-    def _update_drilldown(self):
-        """Sets or removes a filter that only allows fields without braces."""
-        if not self.stats.fields_filter:
-            self.stats.fields_filter = DEFAULT_REGEX
-
-        elif self.stats.fields_filter == DEFAULT_REGEX:
-            self.stats.fields_filter = None
-
     def _update_pid(self, pid):
         """Propagates pid selection to stats object."""
         self.screen.addstr(4, 1, 'Updating pid filter...')
@@ -1060,8 +1105,7 @@ class Tui(object):
                                .format(pid, gname), curses.A_BOLD)
         else:
             self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
-        if self.stats.fields_filter and self.stats.fields_filter \
-           != DEFAULT_REGEX:
+        if self.stats.fields_filter:
             regex = self.stats.fields_filter
             if len(regex) > MAX_REGEX_LEN:
                 regex = regex[:MAX_REGEX_LEN] + '...'
@@ -1077,6 +1121,9 @@ class Tui(object):
         self.screen.refresh()
 
     def _refresh_body(self, sleeptime):
+        def is_child_field(field):
+            return field.find('(') != -1
+
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
@@ -1084,7 +1131,11 @@ class Tui(object):
         total = 0.
         ctotal = 0.
         for key, values in stats.items():
-            if key.find('(') == -1:
+            if self._display_guests:
+                if self.get_gname_from_pid(key):
+                    total += values.value
+                continue
+            if not key.find(' ') != -1:
                 total += values.value
             else:
                 ctotal += values.value
@@ -1101,19 +1152,26 @@ class Tui(object):
                 # sort by overall value
                 return v.value
 
+        sorted_items = sorted(stats.items(), key=sortkey, reverse=True)
+
+        # print events
         tavg = 0
-        for key, values in sorted(stats.items(), key=sortkey, reverse=True):
+        for key, values in sorted_items:
             if row >= self.screen.getmaxyx()[0] - 1:
                 break
-            if not values.value and not values.delta:
-                break
+            if values == (0, 0):
+                continue
+            if not self.stats.child_events and key.find(' ') != -1:
+                continue
             if values.value is not None:
                 cur = int(round(values.delta / sleeptime)) if values.delta else ''
                 if self._display_guests:
                     key = self.get_gname_from_pid(key)
-                self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
-                                   (key, values.value, values.value * 100 / total,
-                                    cur))
+                    if not key:
+                        continue
+                self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key
+                                   .split(' ')[0], values.value,
+                                   values.value * 100 / total, cur))
                 if cur != '' and key.find('(') == -1:
                     tavg += cur
             row += 1
@@ -1189,7 +1247,7 @@ class Tui(object):
             regex = self.screen.getstr().decode(ENCODING)
             curses.noecho()
             if len(regex) == 0:
-                self.stats.fields_filter = DEFAULT_REGEX
+                self.stats.fields_filter = ''
                 self._refresh_header()
                 return
             try:
@@ -1307,7 +1365,7 @@ class Tui(object):
                         self._display_guests = not self._display_guests
                     self._refresh_header()
                 if char == 'c':
-                    self.stats.fields_filter = DEFAULT_REGEX
+                    self.stats.fields_filter = ''
                     self._refresh_header(0)
                     self._update_pid(0)
                 if char == 'f':
@@ -1332,9 +1390,7 @@ class Tui(object):
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
                 if char == 'x':
-                    self._update_drilldown()
-                    # prevents display of current values on next refresh
-                    self.stats.get(self._display_guests)
+                    self.stats.child_events = not self.stats.child_events
             except KeyboardInterrupt:
                 break
             except curses.error:
@@ -1348,7 +1404,8 @@ def batch(stats):
         time.sleep(1)
         s = stats.get()
         for key, values in sorted(s.items()):
-            print('%-42s%10d%10d' % (key, values.value, values.delta))
+            print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
+                  values.delta))
     except KeyboardInterrupt:
         pass
 
@@ -1359,7 +1416,7 @@ def log(stats):
 
     def banner():
         for key in keys:
-            print(key, end=' ')
+            print(key.split(' ')[0], end=' ')
         print()
 
     def statline():
@@ -1470,7 +1527,7 @@ Press any other key to refresh statistics immediately.
                          )
     optparser.add_option('-f', '--fields',
                          action='store',
-                         default=DEFAULT_REGEX,
+                         default='',
                          dest='fields',
                          help='''fields to display (regex)
                                  "-f help" for a list of available events''',
-- 
cgit v1.2.3


From df72ecfc790fa01de1c41f836ff51d12f9c40318 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Thu, 22 Feb 2018 12:16:29 +0100
Subject: tools/kvm_stat: group child events indented after parent

We keep the current logic that sorts all events (parent and child), but
re-shuffle the events afterwards, grouping the children after the
respective parent. Note that the percentage column for child events
gives the percentage of the parent's total.
Since we rework the logic anyway, we modify the total average
calculation to use the raw numbers instead of the (rounded) averages.
Note that this can result in differing numbers (between total average
and the sum of the individual averages) due to rounding errors.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 89 ++++++++++++++++++++++++++++++---------------
 1 file changed, 59 insertions(+), 30 deletions(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 6b6630ee6daf..862c997932e2 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1124,6 +1124,45 @@ class Tui(object):
         def is_child_field(field):
             return field.find('(') != -1
 
+        def insert_child(sorted_items, child, values, parent):
+            num = len(sorted_items)
+            for i in range(0, num):
+                # only add child if parent is present
+                if parent.startswith(sorted_items[i][0]):
+                    sorted_items.insert(i + 1, ('  ' + child, values))
+
+        def get_sorted_events(self, stats):
+            """ separate parent and child events """
+            if self._sorting == SORT_DEFAULT:
+                def sortkey((_k, v)):
+                    # sort by (delta value, overall value)
+                    return (v.delta, v.value)
+            else:
+                def sortkey((_k, v)):
+                    # sort by overall value
+                    return v.value
+
+            childs = []
+            sorted_items = []
+            # we can't rule out child events to appear prior to parents even
+            # when sorted - separate out all children first, and add in later
+            for key, values in sorted(stats.items(), key=sortkey,
+                                      reverse=True):
+                if values == (0, 0):
+                    continue
+                if key.find(' ') != -1:
+                    if not self.stats.child_events:
+                        continue
+                    childs.insert(0, (key, values))
+                else:
+                    sorted_items.append((key, values))
+            if self.stats.child_events:
+                for key, values in childs:
+                    (child, parent) = key.split(' ')
+                    insert_child(sorted_items, child, values, parent)
+
+            return sorted_items
+
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
@@ -1143,44 +1182,34 @@ class Tui(object):
             # we don't have any fields, or all non-child events are filtered
             total = ctotal
 
-        if self._sorting == SORT_DEFAULT:
-            def sortkey((_k, v)):
-                # sort by (delta value, overall value)
-                return (v.delta, v.value)
-        else:
-            def sortkey((_k, v)):
-                # sort by overall value
-                return v.value
-
-        sorted_items = sorted(stats.items(), key=sortkey, reverse=True)
-
         # print events
         tavg = 0
-        for key, values in sorted_items:
-            if row >= self.screen.getmaxyx()[0] - 1:
+        tcur = 0
+        for key, values in get_sorted_events(self, stats):
+            if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
                 break
-            if values == (0, 0):
-                continue
-            if not self.stats.child_events and key.find(' ') != -1:
-                continue
-            if values.value is not None:
-                cur = int(round(values.delta / sleeptime)) if values.delta else ''
-                if self._display_guests:
-                    key = self.get_gname_from_pid(key)
-                    if not key:
-                        continue
-                self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key
-                                   .split(' ')[0], values.value,
-                                   values.value * 100 / total, cur))
-                if cur != '' and key.find('(') == -1:
-                    tavg += cur
+            if self._display_guests:
+                key = self.get_gname_from_pid(key)
+                if not key:
+                    continue
+            cur = int(round(values.delta / sleeptime)) if values.delta else ''
+            if key[0] != ' ':
+                if values.delta:
+                    tcur += values.delta
+                ptotal = values.value
+                ltotal = total
+            else:
+                ltotal = ptotal
+            self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
+                               values.value,
+                               values.value * 100 / float(ltotal), cur))
             row += 1
         if row == 3:
             self.screen.addstr(4, 1, 'No matching events reported yet')
         else:
+            tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
             self.screen.addstr(row, 1, '%-40s %10d        %8s' %
-                               ('Total', total, tavg if tavg else ''),
-                               curses.A_BOLD)
+                               ('Total', total, tavg), curses.A_BOLD)
         self.screen.refresh()
 
     def _show_msg(self, text):
-- 
cgit v1.2.3


From 6789af030a462708f937137e05eb12ea009fb348 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Thu, 22 Feb 2018 12:16:30 +0100
Subject: tools/kvm_stat: print 'Total' line for multiple events only

The 'Total' line looks a bit weird when we have a single event only. This
can happen e.g. due to filters. Therefore suppress when there's only a
single event in the output.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 862c997932e2..5898c22ba310 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1206,7 +1206,7 @@ class Tui(object):
             row += 1
         if row == 3:
             self.screen.addstr(4, 1, 'No matching events reported yet')
-        else:
+        if row > 4:
             tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
             self.screen.addstr(row, 1, '%-40s %10d        %8s' %
                                ('Total', total, tavg), curses.A_BOLD)
-- 
cgit v1.2.3


From 3d4d5d618639c3155cfce57101d619a0935434d2 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 25 Feb 2018 06:00:11 -0500
Subject: radix tree test suite: Fix build

 - Add an empty linux/compiler_types.h (now being included by kconfig.h)
 - Add __GFP_ZERO
 - Add kzalloc
 - Test __GFP_DIRECT_RECLAIM instead of __GFP_NOWARN

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/linux.c                | 11 +++++++++--
 tools/testing/radix-tree/linux/compiler_types.h |  0
 tools/testing/radix-tree/linux/gfp.h            |  1 +
 tools/testing/radix-tree/linux/slab.h           |  6 ++++++
 4 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/radix-tree/linux/compiler_types.h

(limited to 'tools')

diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 6903ccf35595..44a0d1ad4408 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -29,7 +29,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 {
 	struct radix_tree_node *node;
 
-	if (flags & __GFP_NOWARN)
+	if (!(flags & __GFP_DIRECT_RECLAIM))
 		return NULL;
 
 	pthread_mutex_lock(&cachep->lock);
@@ -73,10 +73,17 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 
 void *kmalloc(size_t size, gfp_t gfp)
 {
-	void *ret = malloc(size);
+	void *ret;
+
+	if (!(gfp & __GFP_DIRECT_RECLAIM))
+		return NULL;
+
+	ret = malloc(size);
 	uatomic_inc(&nr_allocated);
 	if (kmalloc_verbose)
 		printf("Allocating %p from malloc\n", ret);
+	if (gfp & __GFP_ZERO)
+		memset(ret, 0, size);
 	return ret;
 }
 
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index e9fff59dfd8a..e3201ccf54c3 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -11,6 +11,7 @@
 #define __GFP_IO		0x40u
 #define __GFP_FS		0x80u
 #define __GFP_NOWARN		0x200u
+#define __GFP_ZERO		0x8000u
 #define __GFP_ATOMIC		0x80000u
 #define __GFP_ACCOUNT		0x100000u
 #define __GFP_DIRECT_RECLAIM	0x400000u
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
index 979baeec7e70..a037def0dec6 100644
--- a/tools/testing/radix-tree/linux/slab.h
+++ b/tools/testing/radix-tree/linux/slab.h
@@ -3,6 +3,7 @@
 #define SLAB_H
 
 #include <linux/types.h>
+#include <linux/gfp.h>
 
 #define SLAB_HWCACHE_ALIGN 1
 #define SLAB_PANIC 2
@@ -11,6 +12,11 @@
 void *kmalloc(size_t size, gfp_t);
 void kfree(void *);
 
+static inline void *kzalloc(size_t size, gfp_t gfp)
+{
+        return kmalloc(size, gfp | __GFP_ZERO);
+}
+
 void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
 void kmem_cache_free(struct kmem_cache *cachep, void *objp);
 
-- 
cgit v1.2.3


From 192b2e742c06af399e8eecb4a1726520bfccece8 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 26 Feb 2018 13:17:07 +1100
Subject: selftests/powerpc: Skip tm-trap if transactional memory is not
 enabled

Some processor revisions do not support transactional memory, and
additionally kernel support can be disabled. In either case the
tm-trap test should be skipped, otherwise it will fail with a SIGILL.

Fixes: a08082f8e4e1 ("powerpc/selftests: Check endianness on trap in TM")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/tm/tm-trap.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 5d92c23ee6cb..179d592f0073 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -255,6 +255,8 @@ int tm_trap_test(void)
 
 	struct sigaction trap_sa;
 
+	SKIP_IF(!have_htm());
+
 	trap_sa.sa_flags = SA_SIGINFO;
 	trap_sa.sa_sigaction = trap_signal_handler;
 	sigaction(SIGTRAP, &trap_sa, NULL);
-- 
cgit v1.2.3


From 4b0ad07653ee94182e2d8f21404242c9e83ad0b4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 26 Feb 2018 14:39:30 -0500
Subject: idr: Fix handling of IDs above INT_MAX

Khalid reported that the kernel selftests are currently failing:

selftests: test_bpf.sh
========================================
test_bpf: [FAIL]
not ok 1..8 selftests:  test_bpf.sh [FAIL]

He bisected it to 6ce711f2750031d12cec91384ac5cfa0a485b60a ("idr: Make
1-based IDRs more efficient").

The root cause is doing a signed comparison in idr_alloc_u32() instead
of an unsigned comparison.  I went looking for any similar problems and
found a couple (which would each result in the failure to warn in two
situations that aren't supposed to happen).

I knocked up a few test-cases to prove that I was right and added them
to the test-suite.

Reported-by: Khalid Aziz <khalid.aziz@oracle.com>
Tested-by: Khalid Aziz <khalid.aziz@oracle.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/idr.c                           | 13 +++++-----
 tools/testing/radix-tree/idr-test.c | 52 +++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/lib/idr.c b/lib/idr.c
index 99ec5bc89d25..823b813f08f8 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -36,8 +36,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
-	int base = idr->idr_base;
-	int id = *nextid;
+	unsigned int base = idr->idr_base;
+	unsigned int id = *nextid;
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return -EINVAL;
@@ -204,10 +204,11 @@ int idr_for_each(const struct idr *idr,
 
 	radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
 		int ret;
+		unsigned long id = iter.index + base;
 
-		if (WARN_ON_ONCE(iter.index > INT_MAX))
+		if (WARN_ON_ONCE(id > INT_MAX))
 			break;
-		ret = fn(iter.index + base, rcu_dereference_raw(*slot), data);
+		ret = fn(id, rcu_dereference_raw(*slot), data);
 		if (ret)
 			return ret;
 	}
@@ -230,8 +231,8 @@ void *idr_get_next(struct idr *idr, int *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
-	int base = idr->idr_base;
-	int id = *nextid;
+	unsigned long base = idr->idr_base;
+	unsigned long id = *nextid;
 
 	id = (id < base) ? 0 : id - base;
 	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 44ef9eba5a7a..6c645eb77d42 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -178,6 +178,55 @@ void idr_get_next_test(int base)
 	idr_destroy(&idr);
 }
 
+int idr_u32_cb(int id, void *ptr, void *data)
+{
+	BUG_ON(id < 0);
+	BUG_ON(ptr != DUMMY_PTR);
+	return 0;
+}
+
+void idr_u32_test1(struct idr *idr, u32 handle)
+{
+	static bool warned = false;
+	u32 id = handle;
+	int sid = 0;
+	void *ptr;
+
+	BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL));
+	BUG_ON(id != handle);
+	BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL) != -ENOSPC);
+	BUG_ON(id != handle);
+	if (!warned && id > INT_MAX)
+		printk("vvv Ignore these warnings\n");
+	ptr = idr_get_next(idr, &sid);
+	if (id > INT_MAX) {
+		BUG_ON(ptr != NULL);
+		BUG_ON(sid != 0);
+	} else {
+		BUG_ON(ptr != DUMMY_PTR);
+		BUG_ON(sid != id);
+	}
+	idr_for_each(idr, idr_u32_cb, NULL);
+	if (!warned && id > INT_MAX) {
+		printk("^^^ Warnings over\n");
+		warned = true;
+	}
+	BUG_ON(idr_remove(idr, id) != DUMMY_PTR);
+	BUG_ON(!idr_is_empty(idr));
+}
+
+void idr_u32_test(int base)
+{
+	DEFINE_IDR(idr);
+	idr_init_base(&idr, base);
+	idr_u32_test1(&idr, 10);
+	idr_u32_test1(&idr, 0x7fffffff);
+	idr_u32_test1(&idr, 0x80000000);
+	idr_u32_test1(&idr, 0x80000001);
+	idr_u32_test1(&idr, 0xffe00000);
+	idr_u32_test1(&idr, 0xffffffff);
+}
+
 void idr_checks(void)
 {
 	unsigned long i;
@@ -248,6 +297,9 @@ void idr_checks(void)
 	idr_get_next_test(0);
 	idr_get_next_test(1);
 	idr_get_next_test(4);
+	idr_u32_test(4);
+	idr_u32_test(1);
+	idr_u32_test(0);
 }
 
 /*
-- 
cgit v1.2.3


From 067b25a5639b10dfdd41ce6b4d4140fe84d0a8e7 Mon Sep 17 00:00:00 2001
From: Daniel Díaz <daniel.diaz@linaro.org>
Date: Wed, 7 Feb 2018 11:24:31 -0600
Subject: selftests/futex: Fix line continuation in Makefile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Makefile lacks a couple of line continuation backslashes
in an `if' clause, which produces an error when make versions
prior to 4.x are used for building the tests.

  $ make
  make[1]: Entering directory `/[...]/linux/tools/testing/selftests/futex'
  /bin/sh: -c: line 5: syntax error: unexpected end of file
  make[1]: *** [all] Error 1
  make[1]: Leaving directory `/[...]/linux/tools/testing/selftests/futex'
  make: *** [all] Error 2

Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/futex/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index cea4adcd42b8..a63e8453984d 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -12,9 +12,9 @@ all:
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-		if [ -e $$DIR/$(TEST_PROGS) ]; then
-			rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
-		fi
+		if [ -e $$DIR/$(TEST_PROGS) ]; then \
+			rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
+		fi \
 	done
 
 override define RUN_TESTS
-- 
cgit v1.2.3


From 16c513b13477b8da7958e8112bf23cd59b87a7c1 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 13 Feb 2018 10:45:57 -0700
Subject: selftests: memory-hotplug: silence test command echo

Silence the following command being printed while running test.

./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" ||
echo "selftests: memory-hotplug [FAIL]"

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/memory-hotplug/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index 86636d207adf..183b46883875 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -4,7 +4,7 @@ all:
 include ../lib.mk
 
 TEST_PROGS := mem-on-off-test.sh
-override RUN_TESTS := ./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
+override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
 override EMIT_TESTS := echo "$(RUN_TESTS)"
 
 run_full_test:
-- 
cgit v1.2.3


From f6869826de700bce59e2cef14974f99836e34e4f Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 13 Feb 2018 10:48:29 -0700
Subject: selftests: vm: update .gitignore with new test

Update .gitignore with new test.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/vm/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 63c94d776e89..342c7bc9dc8c 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -11,3 +11,4 @@ mlock-intersect-test
 mlock-random-test
 virtual_address_range
 gup_benchmark
+va_128TBswitch
-- 
cgit v1.2.3


From b7abbd5a3533a31a1e7d4696ea275df543440c51 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 28 Feb 2018 15:15:56 +1100
Subject: selftests/powerpc: Fix missing clean of pmu/lib.o

The tm-resched-dscr test links against pmu/lib.o, but we don't have a
rule to clean pmu/lib.o. This can lead to a build break if you build
for big endian and then little, or vice versa.

Fix it by making tm-resched-dscr depend on pmu/lib.c, causing the code
to be built directly in, meaning no .o is generated.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/tm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index a23453943ad2..5c72ff978f27 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -16,7 +16,7 @@ $(OUTPUT)/tm-syscall: tm-syscall-asm.S
 $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
 $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
 $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
-$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o
+$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c
 $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
 $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
 
-- 
cgit v1.2.3


From 1402fd8ed7e5bda1b3e7613b70780b0db392d1e6 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 28 Feb 2018 07:19:21 -0600
Subject: objtool: Fix another switch table detection issue

Continue the switch table detection whack-a-mole.  Add a check to
distinguish KASAN data reads from switch data reads.  The switch jump
tables in .rodata have relocations associated with them.

This fixes the following warning:

  crypto/asymmetric_keys/x509_cert_parser.o: warning: objtool: x509_note_pkey_algo()+0xa4: sibling call from callable instruction with modified stack frame

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/d7c8853022ad47d158cb81e953a40469fc08a95e.1519784382.git.jpoimboe@redhat.com
---
 tools/objtool/check.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 472e64e95891..46c1d239cc1b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -925,7 +925,11 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		if (find_symbol_containing(file->rodata, text_rela->addend))
 			continue;
 
-		return find_rela_by_dest(file->rodata, text_rela->addend);
+		rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
+		if (!rodata_rela)
+			continue;
+
+		return rodata_rela;
 	}
 
 	return NULL;
-- 
cgit v1.2.3


From cd4a6f3ab4d80cb919d15897eb3cbc85c2009d4b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 26 Feb 2018 15:22:22 +1100
Subject: selftests/powerpc: Skip the subpage_prot tests if the syscall is
 unavailable

The subpage_prot syscall is only functional when the system is using
the Hash MMU. Since commit 5b2b80714796 ("powerpc/mm: Invalidate
subpage_prot() system call on radix platforms") it returns ENOENT when
the Radix MMU is active. Currently this just makes the test fail.

Additionally the syscall is not available if the kernel is built with
4K pages, or if CONFIG_PPC_SUBPAGE_PROT=n, in which case it returns
ENOSYS because the syscall is missing entirely.

So check explicitly for ENOENT and ENOSYS and skip if we see either of
those.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/mm/subpage_prot.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c
index 35ade7406dcd..3ae77ba93208 100644
--- a/tools/testing/selftests/powerpc/mm/subpage_prot.c
+++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c
@@ -135,6 +135,16 @@ static int run_test(void *addr, unsigned long size)
 	return 0;
 }
 
+static int syscall_available(void)
+{
+	int rc;
+
+	errno = 0;
+	rc = syscall(__NR_subpage_prot, 0, 0, 0);
+
+	return rc == 0 || (errno != ENOENT && errno != ENOSYS);
+}
+
 int test_anon(void)
 {
 	unsigned long align;
@@ -145,6 +155,8 @@ int test_anon(void)
 	void *mallocblock;
 	unsigned long mallocsize;
 
+	SKIP_IF(!syscall_available());
+
 	if (getpagesize() != 0x10000) {
 		fprintf(stderr, "Kernel page size must be 64K!\n");
 		return 1;
@@ -180,6 +192,8 @@ int test_file(void)
 	off_t filesize;
 	int fd;
 
+	SKIP_IF(!syscall_available());
+
 	fd = open(file_name, O_RDWR);
 	if (fd == -1) {
 		perror("failed to open file");
-- 
cgit v1.2.3


From ba004a2955f759946d8c98ca1a9c8d09818b1223 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 1 Mar 2018 13:04:28 -0700
Subject: selftests: memory-hotplug: fix emit_tests regression

Commit 16c513b13477
("selftests: memory-hotplug: silence test command echo")

introduced regression in emit_tests and results in the following
failure when selftests are installed and run. Fix it.

Running tests in memory-hotplug
========================================
./run_kselftest.sh: line 121: @./mem-on-off-test.sh: No such file or
directory
selftests: memory-hotplug [FAIL]

Fixes: 16c513b13477 (selftests: memory-hotplug: silence test command echo")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Tested-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/memory-hotplug/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index 183b46883875..686da510f989 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -5,7 +5,8 @@ include ../lib.mk
 
 TEST_PROGS := mem-on-off-test.sh
 override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
-override EMIT_TESTS := echo "$(RUN_TESTS)"
+
+override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))"
 
 run_full_test:
 	@/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
-- 
cgit v1.2.3


From 79f3a8e662c1ae6e85737eca9ae7d6b52cf87815 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 2 Mar 2018 14:44:39 +0100
Subject: tc-testing: skbmod: fix match value of ethertype

iproute2 print_skbmod() prints the configured ethertype using format 0x%X:
therefore, test 9aa8 systematically fails, because it configures action #4
using ethertype 0x0031, and expects 0x0031 when it reads it back. Changing
the expected value to 0x31 lets the test result 'not ok' become 'ok'.

tested with:
 # ./tdc.py -e 9aa8
 Test 9aa8: Get a single skbmod action from a list
 All test results:

 1..1
 ok 1 9aa8 Get a single skbmod action from a list

Fixes: cf797ac49b94 ("tc-testing: Add test cases for police and skbmod")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
index e34075059c26..90bba48c3f07 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -315,7 +315,7 @@
         "cmdUnderTest": "$TC actions ls action skbmod",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbmod index 4",
-        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x0031",
+        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x31",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbmod"
-- 
cgit v1.2.3


From f6d3f35e006496c282ccbb67494d90b04f6cba10 Mon Sep 17 00:00:00 2001
From: Sangwon Hong <qpakzk@gmail.com>
Date: Mon, 12 Feb 2018 04:37:44 +0900
Subject: perf kallsyms: Fix the usage on the man page

First, all man pages highlight only perf and subcommands except 'perf
kallsyms', which includes the full usage. Fix it for commands to
monopolize underlines.

Second, options can be ommited when executing 'perf kallsyms', so add
square brackets between <option>.

Signed-off-by: Sangwon Hong <qpakzk@gmail.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/1518377864-20353-1-git-send-email-qpakzk@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-kallsyms.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index 954ea9e21236..cf9f4040ea5c 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -8,7 +8,7 @@ perf-kallsyms - Searches running kernel for symbols
 SYNOPSIS
 --------
 [verse]
-'perf kallsyms <options> symbol_name[,symbol_name...]'
+'perf kallsyms' [<options>] symbol_name[,symbol_name...]
 
 DESCRIPTION
 -----------
-- 
cgit v1.2.3


From 626af862da9c650e80bdea44684be9334f8eb75b Mon Sep 17 00:00:00 2001
From: Kan Liang <Kan.liang@intel.com>
Date: Mon, 26 Feb 2018 10:17:10 -0800
Subject: perf top: Fix annoying fallback message on older kernels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On older (e.g. v4.4) kernels, an annoying fallback message can be
observed in 'perf top':

	┌─Warning:──────────────────────┐
	│fall back to non-overwrite mode│
	│                               │
	│                               │
	│Press any key...               │
	└───────────────────────────────┘

The 'perf top' utility has been changed to overwrite mode since commit
ebebbf082357 ("perf top: Switch default mode to overwrite mode").

For older kernels which don't have overwrite mode support, 'perf top'
will fall back to non-overwrite mode and print out the fallback message
using ui__warning(), which needs user's input to close.

The fallback message is not critical for end users. Turning it to debug
message which is printed when running with -vv.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Fixes: ebebbf082357 ("perf top: Switch default mode to overwrite mode")
Link: http://lkml.kernel.org/r/1519669030-176549-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index b7c823ba8374..35ac016fcb98 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -991,7 +991,7 @@ static int perf_top_overwrite_fallback(struct perf_top *top,
 	evlist__for_each_entry(evlist, counter)
 		counter->attr.write_backward = false;
 	opts->overwrite = false;
-	ui__warning("fall back to non-overwrite mode\n");
+	pr_debug2("fall back to non-overwrite mode\n");
 	return 1;
 }
 
-- 
cgit v1.2.3


From 9cf195f80c5e8a6d779119a7d292e537315d2ea6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Mar 2018 11:33:59 -0300
Subject: perf annotate browser: Be more robust when drawing jump arrows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This first happened with a gcc function, _cpp_lex_token, that has the
usual jumps:

 │1159e6c: ↓ jne    115aa32 <_cpp_lex_token@@Base+0xf92>

I.e. jumps to a label inside that function (_cpp_lex_token), and those
works, but also this kind:

 │1159e8b: ↓ jne    c469be <cpp_named_operator2name@@Base+0xa72>

I.e. jumps to another function, outside _cpp_lex_token, which are not
being correctly handled generating as a side effect references to
ab->offset[] entries that are set to NULL, so to make this code more
robust, check that here.

A proper fix for will be put in place, looking at the function name
right after the '<' token and probably treating this like a 'call'
instruction.

For now just don't draw the arrow.

Reported-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Ingo Molnar <mingo@kernel.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Jin Yao <yao.jin@intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Link: https://lkml.kernel.org/n/tip-5tzvb875ep2sel03aeefgmud@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 286427975112..fbf927cf775d 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -327,7 +327,32 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	if (!disasm_line__is_valid_jump(cursor, sym))
 		return;
 
+	/*
+	 * This first was seen with a gcc function, _cpp_lex_token, that
+	 * has the usual jumps:
+	 *
+	 *  │1159e6c: ↓ jne    115aa32 <_cpp_lex_token@@Base+0xf92>
+	 *
+	 * I.e. jumps to a label inside that function (_cpp_lex_token), and
+	 * those works, but also this kind:
+	 *
+	 *  │1159e8b: ↓ jne    c469be <cpp_named_operator2name@@Base+0xa72>
+	 *
+	 *  I.e. jumps to another function, outside _cpp_lex_token, which
+	 *  are not being correctly handled generating as a side effect references
+	 *  to ab->offset[] entries that are set to NULL, so to make this code
+	 *  more robust, check that here.
+	 *
+	 *  A proper fix for will be put in place, looking at the function
+	 *  name right after the '<' token and probably treating this like a
+	 *  'call' instruction.
+	 */
 	target = ab->offsets[cursor->ops.target.offset];
+	if (target == NULL) {
+		ui_helpline__printf("WARN: jump target inconsistency, press 'o', ab->offsets[%#x] = NULL\n",
+				    cursor->ops.target.offset);
+		return;
+	}
 
 	bcursor = browser_line(&cursor->al);
 	btarget = browser_line(target);
-- 
cgit v1.2.3


From cfacbabd1d9c35d2a179650b2911f17a8d8620b8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 2 Mar 2018 17:13:54 +0100
Subject: perf record: Fix crash in pipe mode

Currently we can crash perf record when running in pipe mode, like:

  $ perf record ls | perf report
  # To display the perf.data header info, please use --header/--header-only options.
  #
  perf: Segmentation fault
  Error:
  The - file has no samples!

The callstack of the crash is:

    0x0000000000515242 in perf_event__synthesize_event_update_name
  3513            ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]);
  (gdb) bt
  #0  0x0000000000515242 in perf_event__synthesize_event_update_name
  #1  0x00000000005158a4 in perf_event__synthesize_extra_attr
  #2  0x0000000000443347 in record__synthesize
  #3  0x00000000004438e3 in __cmd_record
  #4  0x000000000044514e in cmd_record
  #5  0x00000000004cbc95 in run_builtin
  #6  0x00000000004cbf02 in handle_internal_command
  #7  0x00000000004cc054 in run_argv
  #8  0x00000000004cc422 in main

The reason of the crash is that the evsel does not have ids array
allocated and the pipe's synthesize code tries to access it.

We don't force evsel ids allocation when we have single event, because
it's not needed. However we need it when we are in pipe mode even for
single event as a key for evsel update event.

Fixing this by forcing evsel ids allocation event for single event, when
we are in pipe mode.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180302161354.30192-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 9 +++++++++
 tools/perf/perf.h           | 1 +
 tools/perf/util/record.c    | 8 ++++++--
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bf4ca749d1ac..a217623fec2e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -881,6 +881,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		}
 	}
 
+	/*
+	 * If we have just single event and are sending data
+	 * through pipe, we need to force the ids allocation,
+	 * because we synthesize event name through the pipe
+	 * and need the id for that.
+	 */
+	if (data->is_pipe && rec->evlist->nr_entries == 1)
+		rec->opts.sample_id = true;
+
 	if (record__open(rec) != 0) {
 		err = -1;
 		goto out_child;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cfe46236a5e5..57b9b342d533 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -61,6 +61,7 @@ struct record_opts {
 	bool	     tail_synthesize;
 	bool	     overwrite;
 	bool	     ignore_missing_thread;
+	bool	     sample_id;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 1e97937b03a9..6f09e4962dad 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -137,6 +137,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 	struct perf_evsel *evsel;
 	bool use_sample_identifier = false;
 	bool use_comm_exec;
+	bool sample_id = opts->sample_id;
 
 	/*
 	 * Set the evsel leader links before we configure attributes,
@@ -163,8 +164,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 		 * match the id.
 		 */
 		use_sample_identifier = perf_can_sample_identifier();
-		evlist__for_each_entry(evlist, evsel)
-			perf_evsel__set_sample_id(evsel, use_sample_identifier);
+		sample_id = true;
 	} else if (evlist->nr_entries > 1) {
 		struct perf_evsel *first = perf_evlist__first(evlist);
 
@@ -174,6 +174,10 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
 			use_sample_identifier = perf_can_sample_identifier();
 			break;
 		}
+		sample_id = true;
+	}
+
+	if (sample_id) {
 		evlist__for_each_entry(evlist, evsel)
 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
 	}
-- 
cgit v1.2.3


From d976a6e9d9614a951dc21aaf2fbebdeca567dcfa Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 5 Mar 2018 11:56:40 -0300
Subject: tools headers: Sync copy of kvm UAPI headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In 801e459a6f3a ("KVM: x86: Add a framework for supporting MSR-based
features") a new ioctl was introduced, which with this sync of the kvm
UAPI headers, makes 'perf trace' know about it:

  $ cd /tmp/build/perf/trace/beauty/generated/ioctl/
  $ diff -u kvm_ioctl_array.c.old kvm_ioctl_array.c
  --- /tmp/kvm_ioctl_array.c	2018-03-05 11:55:38.409145056 -0300
  +++ /tmp/build/perf/trace/beauty/generated/ioctl/kvm_ioctl_array.c	2018-03-05 11:56:17.456153501 -0300
  @@ -6,6 +6,7 @@
 	[0x04] = "GET_VCPU_MMAP_SIZE",
 	[0x05] = "GET_SUPPORTED_CPUID",
 	[0x09] = "GET_EMULATED_CPUID",
  +	[0x0a] = "GET_MSR_FEATURE_INDEX_LIST",
 	[0x40] = "SET_MEMORY_REGION",
 	[0x41] = "CREATE_VCPU",
 	[0x42] = "GET_DIRTY_LOG",

So when using 'perf trace -e ioctl' that will appear along with the
others, like in this excerpt of a system wide session:

  14.556 ( 0.006 ms): CPU 0/KVM/16077 ioctl(fd: 19<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
  14.565 ( 0.006 ms): CPU 0/KVM/16077 ioctl(fd: 19<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) = 0
  14.573 (         ): CPU 0/KVM/16077 ioctl(fd: 19<anon_inode:kvm-vcpu:0>, cmd: KVM_RUN) ...
  34.075 ( 0.016 ms): gnome-shell/2192 ioctl(fd: 8</dev/dri/card0>, cmd: DRM_I915_GEM_BUSY, arg: 0x7ffe4e73e850) = 0
  40.549 ( 0.012 ms): gnome-shell/2192 ioctl(fd: 8</dev/dri/card0>, cmd: DRM_I915_GEM_BUSY, arg: 0x7ffe4e73ece0) = 0
  40.625 ( 0.005 ms): gnome-shell/2192 ioctl(fd: 8</dev/dri/card0>, cmd: DRM_I915_GEM_BUSY, arg: 0x7ffe4e73e940) = 0
  40.632 ( 0.003 ms): gnome-shell/2192 ioctl(fd: 8</dev/dri/card0>, cmd: DRM_I915_GEM_MADVISE, arg: 0x7ffe4e73e9b0) = 0

This also silences the perf build header copy drift verifier:

  make: Entering directory '/home/acme/git/perf/tools/perf'
    BUILD:   Doing 'make -j4' parallel build
  Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h'

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-h31oz5g0mt1dh2s2ajq6o6no@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 0fb5ef939732..7b26d4b0b052 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -761,6 +761,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
 #define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
+#define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
 
 /*
  * Extension capability list.
@@ -934,6 +935,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_AIS_MIGRATION 150
 #define KVM_CAP_PPC_GET_CPU_CHAR 151
 #define KVM_CAP_S390_BPB 152
+#define KVM_CAP_GET_MSR_FEATURES 153
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 4caea0574c5009901d1976980579ccd26dbf358a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 5 Mar 2018 12:07:52 -0300
Subject: tools headers: Sync x86's cpufeatures.h

The changes in dd84441a7971 ("x86/speculation: Use IBRS if available
before calling into firmware") don't need any kind of special treatment
in the current tools/perf/ codebase, so just update the copy to get rid
of the perf build warning:

  BUILD:   Doing 'make -j4' parallel build
  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h'

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-mzmuxocrf96v922xkerey3ns@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/cpufeatures.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 0dfe4d3f74e2..f41079da38c5 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,7 @@
 #define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
-- 
cgit v1.2.3


From 40c21898ba5372c14ef71717040529794a91ccc2 Mon Sep 17 00:00:00 2001
From: Ilya Pronin <ipronin@twitter.com>
Date: Mon, 5 Mar 2018 22:43:53 -0800
Subject: perf stat: Fix CVS output format for non-supported counters

When printing stats in CSV mode, 'perf stat' appends extra separators
when a counter is not supported:

<not supported>,,L1-dcache-store-misses,mesos/bd442f34-2b4a-47df-b966-9b281f9f56fc,0,100.00,,,,

Which causes a failure when parsing fields. The numbers of separators
should be the same for each line, no matter if the counter is or not
supported.

Signed-off-by: Ilya Pronin <ipronin@twitter.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/20180306064353.31930-1-xiyou.wangcong@gmail.com
Fixes: 92a61f6412d3 ("perf stat: Implement CSV metrics output")
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 98bf9d32f222..54a4c152edb3 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -917,7 +917,7 @@ static void print_metric_csv(void *ctx,
 	char buf[64], *vals, *ends;
 
 	if (unit == NULL || fmt == NULL) {
-		fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+		fprintf(out, "%s%s", csv_sep, csv_sep);
 		return;
 	}
 	snprintf(buf, sizeof(buf), fmt, val);
-- 
cgit v1.2.3


From 2e2967f4c398e0f984a6c04daa3a79512e2bb74e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 6 Mar 2018 11:13:13 +0200
Subject: perf auxtrace: Prevent decoding when --no-itrace

Prevent auxtrace_queues__process_index() from queuing AUX area data for
decoding when the --no-itrace option has been used.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1520327598-1317-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 9faf3b5367db..6470ea2aa25e 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -60,6 +60,12 @@
 #include "sane_ctype.h"
 #include "symbol/kallsyms.h"
 
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+	return !session->itrace_synth_opts ||
+	       session->itrace_synth_opts->dont_decode;
+}
+
 int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
 			struct auxtrace_mmap_params *mp,
 			void *userpg, int fd)
@@ -762,6 +768,9 @@ int auxtrace_queues__process_index(struct auxtrace_queues *queues,
 	size_t i;
 	int err;
 
+	if (auxtrace__dont_decode(session))
+		return 0;
+
 	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
 		for (i = 0; i < auxtrace_index->nr; i++) {
 			ent = &auxtrace_index->entries[i];
@@ -892,12 +901,6 @@ out_free:
 	return err;
 }
 
-static bool auxtrace__dont_decode(struct perf_session *session)
-{
-	return !session->itrace_synth_opts ||
-	       session->itrace_synth_opts->dont_decode;
-}
-
 int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
 				      union perf_event *event,
 				      struct perf_session *session)
-- 
cgit v1.2.3


From de19e5c3c51fdb1ff20d0f61d099db902ff7494b Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 28 Feb 2018 10:39:04 +0200
Subject: perf tools: Fix trigger class trigger_on()

trigger_on() means that the trigger is available but not ready, however
trigger_on() was making it ready. That can segfault if the signal comes
before trigger_ready(). e.g. (USR2 signal delivery not shown)

  $ perf record -e intel_pt//u -S sleep 1
  perf: Segmentation fault
  Obtained 16 stack frames.
  /home/ahunter/bin/perf(sighandler_dump_stack+0x40) [0x4ec550]
  /lib/x86_64-linux-gnu/libc.so.6(+0x36caf) [0x7fa76411acaf]
  /home/ahunter/bin/perf(perf_evsel__disable+0x26) [0x4b9dd6]
  /home/ahunter/bin/perf() [0x43a45b]
  /lib/x86_64-linux-gnu/libc.so.6(+0x36caf) [0x7fa76411acaf]
  /lib/x86_64-linux-gnu/libc.so.6(__xstat64+0x15) [0x7fa7641d2cc5]
  /home/ahunter/bin/perf() [0x4ec6c9]
  /home/ahunter/bin/perf() [0x4ec73b]
  /home/ahunter/bin/perf() [0x4ec73b]
  /home/ahunter/bin/perf() [0x4ec73b]
  /home/ahunter/bin/perf() [0x4eca15]
  /home/ahunter/bin/perf(machine__create_kernel_maps+0x257) [0x4f0b77]
  /home/ahunter/bin/perf(perf_session__new+0xc0) [0x4f86f0]
  /home/ahunter/bin/perf(cmd_record+0x722) [0x43c132]
  /home/ahunter/bin/perf() [0x4a11ae]
  /home/ahunter/bin/perf(main+0x5d4) [0x427fb4]

Note, for testing purposes, this is hard to hit unless you add some sleep()
in builtin-record.c before record__open().

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: stable@vger.kernel.org
Fixes: 3dcc4436fa6f ("perf tools: Introduce trigger class")
Link: http://lkml.kernel.org/r/1519807144-30694-1-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/trigger.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h
index 370138e7e35c..88223bc7c82b 100644
--- a/tools/perf/util/trigger.h
+++ b/tools/perf/util/trigger.h
@@ -12,7 +12,7 @@
  * States and transits:
  *
  *
- *  OFF--(on)--> READY --(hit)--> HIT
+ *  OFF--> ON --> READY --(hit)--> HIT
  *                 ^               |
  *                 |            (ready)
  *                 |               |
@@ -27,8 +27,9 @@ struct trigger {
 	volatile enum {
 		TRIGGER_ERROR		= -2,
 		TRIGGER_OFF		= -1,
-		TRIGGER_READY		= 0,
-		TRIGGER_HIT		= 1,
+		TRIGGER_ON		= 0,
+		TRIGGER_READY		= 1,
+		TRIGGER_HIT		= 2,
 	} state;
 	const char *name;
 };
@@ -50,7 +51,7 @@ static inline bool trigger_is_error(struct trigger *t)
 static inline void trigger_on(struct trigger *t)
 {
 	TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
-	t->state = TRIGGER_READY;
+	t->state = TRIGGER_ON;
 }
 
 static inline void trigger_ready(struct trigger *t)
-- 
cgit v1.2.3


From 63474dc4ac7ed3848a4786b9592dd061901f606d Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Tue, 6 Mar 2018 17:58:15 -0600
Subject: objtool: Fix 32-bit build

Fix the objtool build when cross-compiling a 64-bit kernel on a 32-bit
host.  This also simplifies read_retpoline_hints() a bit and makes its
implementation similar to most of the other annotation reading
functions.

Reported-by: Sven Joachim <svenjoac@gmx.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: b5bc2231b8ad ("objtool: Add retpoline validation")
Link: http://lkml.kernel.org/r/2ca46c636c23aa9c9d57d53c75de4ee3ddf7a7df.1520380691.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 46c1d239cc1b..92b6a2c21631 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1116,42 +1116,29 @@ static int read_unwind_hints(struct objtool_file *file)
 
 static int read_retpoline_hints(struct objtool_file *file)
 {
-	struct section *sec, *relasec;
+	struct section *sec;
 	struct instruction *insn;
 	struct rela *rela;
-	int i;
 
-	sec = find_section_by_name(file->elf, ".discard.retpoline_safe");
+	sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
 	if (!sec)
 		return 0;
 
-	relasec = sec->rela;
-	if (!relasec) {
-		WARN("missing .rela.discard.retpoline_safe section");
-		return -1;
-	}
-
-	if (sec->len % sizeof(unsigned long)) {
-		WARN("retpoline_safe size mismatch: %d %ld", sec->len, sizeof(unsigned long));
-		return -1;
-	}
-
-	for (i = 0; i < sec->len / sizeof(unsigned long); i++) {
-		rela = find_rela_by_dest(sec, i * sizeof(unsigned long));
-		if (!rela) {
-			WARN("can't find rela for retpoline_safe[%d]", i);
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
 			return -1;
 		}
 
 		insn = find_insn(file, rela->sym->sec, rela->addend);
 		if (!insn) {
-			WARN("can't find insn for retpoline_safe[%d]", i);
+			WARN("bad .discard.retpoline_safe entry");
 			return -1;
 		}
 
 		if (insn->type != INSN_JUMP_DYNAMIC &&
 		    insn->type != INSN_CALL_DYNAMIC) {
-			WARN_FUNC("retpoline_safe hint not a indirect jump/call",
+			WARN_FUNC("retpoline_safe hint not an indirect jump/call",
 				  insn->sec, insn->offset);
 			return -1;
 		}
-- 
cgit v1.2.3


From 076ca272a14cea558b1092ec85cea08510283f2a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 7 Mar 2018 11:12:27 -0800
Subject: x86/vsyscall/64: Drop "native" vsyscalls

Since Linux v3.2, vsyscalls have been deprecated and slow.  From v3.2
on, Linux had three vsyscall modes: "native", "emulate", and "none".

"emulate" is the default.  All known user programs work correctly in
emulate mode, but vsyscalls turn into page faults and are emulated.
This is very slow.  In "native" mode, the vsyscall page is easily
usable as an exploit gadget, but vsyscalls are a bit faster -- they
turn into normal syscalls.  (This is in contrast to vDSO functions,
which can be much faster than syscalls.)  In "none" mode, there are
no vsyscalls.

For all practical purposes, "native" was really just a chicken bit
in case something went wrong with the emulation.  It's been over six
years, and nothing has gone wrong.  Delete it.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/519fee5268faea09ae550776ce969fa6e88668b0.1520449896.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig                            | 11 +----------
 arch/x86/entry/vsyscall/vsyscall_64.c       | 16 +++-------------
 arch/x86/include/asm/pgtable_types.h        |  2 --
 tools/testing/selftests/x86/test_vsyscall.c | 11 ++++++-----
 4 files changed, 10 insertions(+), 30 deletions(-)

(limited to 'tools')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1aed6c0e413..09c599e0900d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2266,7 +2266,7 @@ choice
 	  it can be used to assist security vulnerability exploitation.
 
 	  This setting can be changed at boot time via the kernel command
-	  line parameter vsyscall=[native|emulate|none].
+	  line parameter vsyscall=[emulate|none].
 
 	  On a system with recent enough glibc (2.14 or newer) and no
 	  static binaries, you can say None without a performance penalty
@@ -2274,15 +2274,6 @@ choice
 
 	  If unsure, select "Emulate".
 
-	config LEGACY_VSYSCALL_NATIVE
-		bool "Native"
-		help
-		  Actual executable code is located in the fixed vsyscall
-		  address mapping, implementing time() efficiently. Since
-		  this makes the mapping executable, it can be used during
-		  security vulnerability exploitation (traditionally as
-		  ROP gadgets). This configuration is not recommended.
-
 	config LEGACY_VSYSCALL_EMULATE
 		bool "Emulate"
 		help
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 577fa8adb785..8560ef68a9d6 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,10 +42,8 @@
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
-#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
-	NATIVE;
-#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
+static enum { EMULATE, NONE } vsyscall_mode =
+#ifdef CONFIG_LEGACY_VSYSCALL_NONE
 	NONE;
 #else
 	EMULATE;
@@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
 	if (str) {
 		if (!strcmp("emulate", str))
 			vsyscall_mode = EMULATE;
-		else if (!strcmp("native", str))
-			vsyscall_mode = NATIVE;
 		else if (!strcmp("none", str))
 			vsyscall_mode = NONE;
 		else
@@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
 	WARN_ON_ONCE(address != regs->ip);
 
-	/* This should be unreachable in NATIVE mode. */
-	if (WARN_ON(vsyscall_mode == NATIVE))
-		return false;
-
 	if (vsyscall_mode == NONE) {
 		warn_bad_vsyscall(KERN_INFO, regs,
 				  "vsyscall attempted with vsyscall=none");
@@ -370,9 +362,7 @@ void __init map_vsyscall(void)
 
 	if (vsyscall_mode != NONE) {
 		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
-			     vsyscall_mode == NATIVE
-			     ? PAGE_KERNEL_VSYSCALL
-			     : PAGE_KERNEL_VVAR);
+			     PAGE_KERNEL_VVAR);
 		set_vsyscall_pgtable_user_bits(swapper_pg_dir);
 	}
 
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 246f15b4e64c..acfe755562a6 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -174,7 +174,6 @@ enum page_cache_mode {
 #define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
 #define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
 #define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
 #define __PAGE_KERNEL_VVAR		(__PAGE_KERNEL_RO | _PAGE_USER)
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -206,7 +205,6 @@ enum page_cache_mode {
 #define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE_EXEC	__pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VSYSCALL	__pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
 #define PAGE_KERNEL_VVAR	__pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
 
 #define PAGE_KERNEL_IO		__pgprot(__PAGE_KERNEL_IO)
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index be81621446f0..0b4f1cc2291c 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -450,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
 		num_vsyscall_traps++;
 }
 
-static int test_native_vsyscall(void)
+static int test_emulation(void)
 {
 	time_t tmp;
 	bool is_native;
@@ -458,7 +458,7 @@ static int test_native_vsyscall(void)
 	if (!vtime)
 		return 0;
 
-	printf("[RUN]\tchecking for native vsyscall\n");
+	printf("[RUN]\tchecking that vsyscalls are emulated\n");
 	sethandler(SIGTRAP, sigtrap, 0);
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
 	vtime(&tmp);
@@ -474,11 +474,12 @@ static int test_native_vsyscall(void)
 	 */
 	is_native = (num_vsyscall_traps > 1);
 
-	printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+	printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+	       (is_native ? "FAIL" : "OK"),
 	       (is_native ? "native" : "emulated"),
 	       (int)num_vsyscall_traps);
 
-	return 0;
+	return is_native;
 }
 #endif
 
@@ -498,7 +499,7 @@ int main(int argc, char **argv)
 	nerrs += test_vsys_r();
 
 #ifdef __x86_64__
-	nerrs += test_native_vsyscall();
+	nerrs += test_emulation();
 #endif
 
 	return nerrs ? 1 : 0;
-- 
cgit v1.2.3


From 0627be7d3c871035364923559543c9b2ff5357f2 Mon Sep 17 00:00:00 2001
From: Li Zhijian <zhijianx.li@intel.com>
Date: Fri, 9 Mar 2018 15:51:16 -0800
Subject: selftests/vm/run_vmtests: adjust hugetlb size according to nr_cpus

Fix userfaultfd_hugetlb on hosts which have more than 64 cpus.

  ---------------------------
  running userfaultfd_hugetlb
  ---------------------------
  invalid MiB
  Usage: <MiB> <bounces>
  [FAIL]

Via userfaultfd.c we can know, hugetlb_size needs to meet hugetlb_size
>= nr_cpus * hugepage_size.  hugepage_size is often 2M, so when host
cpus > 64, it requires more than 128M.

[zhijianx.li@intel.com: update changelog/comments and variable name]
 Link: http://lkml.kernel.org/r/20180302024356.83359-1-zhijianx.li@intel.com
 Link: http://lkml.kernel.org/r/20180303125027.81638-1-zhijianx.li@intel.com
Link: http://lkml.kernel.org/r/20180302024356.83359-1-zhijianx.li@intel.com
Signed-off-by: Li Zhijian <zhijianx.li@intel.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: SeongJae Park <sj38.park@gmail.com>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index d2561895a021..22d564673830 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -2,25 +2,33 @@
 # SPDX-License-Identifier: GPL-2.0
 #please run as root
 
-#we need 256M, below is the size in kB
-needmem=262144
 mnt=./huge
 exitcode=0
 
-#get pagesize and freepages from /proc/meminfo
+#get huge pagesize and freepages from /proc/meminfo
 while read name size unit; do
 	if [ "$name" = "HugePages_Free:" ]; then
 		freepgs=$size
 	fi
 	if [ "$name" = "Hugepagesize:" ]; then
-		pgsize=$size
+		hpgsize_KB=$size
 	fi
 done < /proc/meminfo
 
+# Simple hugetlbfs tests have a hardcoded minimum requirement of
+# huge pages totaling 256MB (262144KB) in size.  The userfaultfd
+# hugetlb test requires a minimum of 2 * nr_cpus huge pages.  Take
+# both of these requirements into account and attempt to increase
+# number of huge pages available.
+nr_cpus=$(nproc)
+hpgsize_MB=$((hpgsize_KB / 1024))
+half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
+needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+
 #set proper nr_hugepages
-if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then
+if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
 	nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
-	needpgs=`expr $needmem / $pgsize`
+	needpgs=$((needmem_KB / hpgsize_KB))
 	tries=2
 	while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
 		lackpgs=$(( $needpgs - $freepgs ))
@@ -107,8 +115,9 @@ fi
 echo "---------------------------"
 echo "running userfaultfd_hugetlb"
 echo "---------------------------"
-# 256MB total huge pages == 128MB src and 128MB dst
-./userfaultfd hugetlb 128 32 $mnt/ufd_test_file
+# Test requires source and destination huge pages.  Size of source
+# (half_ufd_size_MB) is passed as argument to test.
+./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
-- 
cgit v1.2.3