diff options
Diffstat (limited to 'tools/perf/bench')
| -rw-r--r-- | tools/perf/bench/futex-hash.c | 20 | ||||
| -rw-r--r-- | tools/perf/bench/futex-lock-pi.c | 23 | ||||
| -rw-r--r-- | tools/perf/bench/futex-requeue.c | 22 | ||||
| -rw-r--r-- | tools/perf/bench/futex-wake-parallel.c | 46 | ||||
| -rw-r--r-- | tools/perf/bench/futex-wake.c | 18 | ||||
| -rw-r--r-- | tools/perf/bench/numa.c | 56 | 
6 files changed, 136 insertions, 49 deletions
| diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 58ae6ed8f38b..9aa3a674829b 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -24,9 +24,9 @@  #include <subcmd/parse-options.h>  #include "bench.h"  #include "futex.h" +#include "cpumap.h"  #include <err.h> -#include <sys/time.h>  static unsigned int nthreads = 0;  static unsigned int nsecs    = 10; @@ -118,11 +118,12 @@ static void print_summary(void)  int bench_futex_hash(int argc, const char **argv)  {  	int ret = 0; -	cpu_set_t cpu; +	cpu_set_t cpuset;  	struct sigaction act; -	unsigned int i, ncpus; +	unsigned int i;  	pthread_attr_t thread_attr;  	struct worker *worker = NULL; +	struct cpu_map *cpu;  	argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);  	if (argc) { @@ -130,14 +131,16 @@ int bench_futex_hash(int argc, const char **argv)  		exit(EXIT_FAILURE);  	} -	ncpus = sysconf(_SC_NPROCESSORS_ONLN); +	cpu = cpu_map__new(NULL); +	if (!cpu) +		goto errmem;  	sigfillset(&act.sa_mask);  	act.sa_sigaction = toggle_done;  	sigaction(SIGINT, &act, NULL);  	if (!nthreads) /* default to the number of CPUs */ -		nthreads = ncpus; +		nthreads = cpu->nr;  	worker = calloc(nthreads, sizeof(*worker));  	if (!worker) @@ -163,10 +166,10 @@ int bench_futex_hash(int argc, const char **argv)  		if (!worker[i].futex)  			goto errmem; -		CPU_ZERO(&cpu); -		CPU_SET(i % ncpus, &cpu); +		CPU_ZERO(&cpuset); +		CPU_SET(cpu->map[i % cpu->nr], &cpuset); -		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); +		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);  		if (ret)  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -217,6 +220,7 @@ int bench_futex_hash(int argc, const char **argv)  	print_summary();  	free(worker); +	free(cpu);  	return ret;  errmem:  	err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 08653ae8a8c4..8e9c4753e304 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -15,6 +15,7 @@  #include <errno.h>  #include "bench.h"  #include "futex.h" +#include "cpumap.h"  #include <err.h>  #include <stdlib.h> @@ -32,7 +33,7 @@ static struct worker *worker;  static unsigned int nsecs = 10;  static bool silent = false, multi = false;  static bool done = false, fshared = false; -static unsigned int ncpus, nthreads = 0; +static unsigned int nthreads = 0;  static int futex_flag = 0;  struct timeval start, end, runtime;  static pthread_mutex_t thread_lock; @@ -113,9 +114,10 @@ static void *workerfn(void *arg)  	return NULL;  } -static void create_threads(struct worker *w, pthread_attr_t thread_attr) +static void create_threads(struct worker *w, pthread_attr_t thread_attr, +			   struct cpu_map *cpu)  { -	cpu_set_t cpu; +	cpu_set_t cpuset;  	unsigned int i;  	threads_starting = nthreads; @@ -130,10 +132,10 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr)  		} else  			worker[i].futex = &global_futex; -		CPU_ZERO(&cpu); -		CPU_SET(i % ncpus, &cpu); +		CPU_ZERO(&cpuset); +		CPU_SET(cpu->map[i % cpu->nr], &cpuset); -		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) +		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");  		if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) @@ -147,19 +149,22 @@ int bench_futex_lock_pi(int argc, const char **argv)  	unsigned int i;  	struct sigaction act;  	pthread_attr_t thread_attr; +	struct cpu_map *cpu;  	argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);  	if (argc)  		goto err; -	ncpus = sysconf(_SC_NPROCESSORS_ONLN); +	cpu = cpu_map__new(NULL); +	if (!cpu) +		err(EXIT_FAILURE, "calloc");  	sigfillset(&act.sa_mask);  	act.sa_sigaction = toggle_done;  	sigaction(SIGINT, &act, NULL);  	if (!nthreads) -		nthreads = ncpus; +		nthreads = cpu->nr;  	worker = calloc(nthreads, sizeof(*worker));  	if (!worker) @@ -180,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)  	pthread_attr_init(&thread_attr);  	gettimeofday(&start, NULL); -	create_threads(worker, thread_attr); +	create_threads(worker, thread_attr, cpu);  	pthread_attr_destroy(&thread_attr);  	pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 1058c194608a..fc692efa0c05 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -22,6 +22,7 @@  #include <errno.h>  #include "bench.h"  #include "futex.h" +#include "cpumap.h"  #include <err.h>  #include <stdlib.h> @@ -40,7 +41,7 @@ static bool done = false, silent = false, fshared = false;  static pthread_mutex_t thread_lock;  static pthread_cond_t thread_parent, thread_worker;  static struct stats requeuetime_stats, requeued_stats; -static unsigned int ncpus, threads_starting, nthreads = 0; +static unsigned int threads_starting, nthreads = 0;  static int futex_flag = 0;  static const struct option options[] = { @@ -83,19 +84,19 @@ static void *workerfn(void *arg __maybe_unused)  }  static void block_threads(pthread_t *w, -			  pthread_attr_t thread_attr) +			  pthread_attr_t thread_attr, struct cpu_map *cpu)  { -	cpu_set_t cpu; +	cpu_set_t cpuset;  	unsigned int i;  	threads_starting = nthreads;  	/* create and block all threads */  	for (i = 0; i < nthreads; i++) { -		CPU_ZERO(&cpu); -		CPU_SET(i % ncpus, &cpu); +		CPU_ZERO(&cpuset); +		CPU_SET(cpu->map[i % cpu->nr], &cpuset); -		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) +		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");  		if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) @@ -116,19 +117,22 @@ int bench_futex_requeue(int argc, const char **argv)  	unsigned int i, j;  	struct sigaction act;  	pthread_attr_t thread_attr; +	struct cpu_map *cpu;  	argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);  	if (argc)  		goto err; -	ncpus = sysconf(_SC_NPROCESSORS_ONLN); +	cpu = cpu_map__new(NULL); +	if (!cpu) +		err(EXIT_FAILURE, "cpu_map__new");  	sigfillset(&act.sa_mask);  	act.sa_sigaction = toggle_done;  	sigaction(SIGINT, &act, NULL);  	if (!nthreads) -		nthreads = ncpus; +		nthreads = cpu->nr;  	worker = calloc(nthreads, sizeof(*worker));  	if (!worker) @@ -156,7 +160,7 @@ int bench_futex_requeue(int argc, const char **argv)  		struct timeval start, end, runtime;  		/* create, launch & block all threads */ -		block_threads(worker, thread_attr); +		block_threads(worker, thread_attr, cpu);  		/* make sure all threads are already blocked */  		pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index b4732dad9f89..69d8fdc87315 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -7,7 +7,17 @@   * for each individual thread to service its share of work. Ultimately   * it can be used to measure futex_wake() changes.   */ +#include "bench.h" +#include <linux/compiler.h> +#include "../util/debug.h" +#ifndef HAVE_PTHREAD_BARRIER +int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused) +{ +	pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__); +	return 0; +} +#else /* HAVE_PTHREAD_BARRIER */  /* For the CLR_() macros */  #include <string.h>  #include <pthread.h> @@ -15,12 +25,11 @@  #include <signal.h>  #include "../util/stat.h"  #include <subcmd/parse-options.h> -#include <linux/compiler.h>  #include <linux/kernel.h>  #include <linux/time64.h>  #include <errno.h> -#include "bench.h"  #include "futex.h" +#include "cpumap.h"  #include <err.h>  #include <stdlib.h> @@ -42,8 +51,9 @@ static bool done = false, silent = false, fshared = false;  static unsigned int nblocked_threads = 0, nwaking_threads = 0;  static pthread_mutex_t thread_lock;  static pthread_cond_t thread_parent, thread_worker; +static pthread_barrier_t barrier;  static struct stats waketime_stats, wakeup_stats; -static unsigned int ncpus, threads_starting; +static unsigned int threads_starting;  static int futex_flag = 0;  static const struct option options[] = { @@ -64,6 +74,8 @@ static void *waking_workerfn(void *arg)  	struct thread_data *waker = (struct thread_data *) arg;  	struct timeval start, end; +	pthread_barrier_wait(&barrier); +  	gettimeofday(&start, NULL);  	waker->nwoken = futex_wake(&futex, nwakes, futex_flag); @@ -84,6 +96,8 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)  	pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); +	pthread_barrier_init(&barrier, NULL, nwaking_threads + 1); +  	/* create and block all threads */  	for (i = 0; i < nwaking_threads; i++) {  		/* @@ -96,9 +110,13 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)  			err(EXIT_FAILURE, "pthread_create");  	} +	pthread_barrier_wait(&barrier); +  	for (i = 0; i < nwaking_threads; i++)  		if (pthread_join(td[i].worker, NULL))  			err(EXIT_FAILURE, "pthread_join"); + +	pthread_barrier_destroy(&barrier);  }  static void *blocked_workerfn(void *arg __maybe_unused) @@ -119,19 +137,20 @@ static void *blocked_workerfn(void *arg __maybe_unused)  	return NULL;  } -static void block_threads(pthread_t *w, pthread_attr_t thread_attr) +static void block_threads(pthread_t *w, pthread_attr_t thread_attr, +			  struct cpu_map *cpu)  { -	cpu_set_t cpu; +	cpu_set_t cpuset;  	unsigned int i;  	threads_starting = nblocked_threads;  	/* create and block all threads */  	for (i = 0; i < nblocked_threads; i++) { -		CPU_ZERO(&cpu); -		CPU_SET(i % ncpus, &cpu); +		CPU_ZERO(&cpuset); +		CPU_SET(cpu->map[i % cpu->nr], &cpuset); -		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) +		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");  		if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) @@ -205,6 +224,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)  	struct sigaction act;  	pthread_attr_t thread_attr;  	struct thread_data *waking_worker; +	struct cpu_map *cpu;  	argc = parse_options(argc, argv, options,  			     bench_futex_wake_parallel_usage, 0); @@ -217,9 +237,12 @@ int bench_futex_wake_parallel(int argc, const char **argv)  	act.sa_sigaction = toggle_done;  	sigaction(SIGINT, &act, NULL); -	ncpus = sysconf(_SC_NPROCESSORS_ONLN); +	cpu = cpu_map__new(NULL); +	if (!cpu) +		err(EXIT_FAILURE, "calloc"); +  	if (!nblocked_threads) -		nblocked_threads = ncpus; +		nblocked_threads = cpu->nr;  	/* some sanity checks */  	if (nwaking_threads > nblocked_threads || !nwaking_threads) @@ -259,7 +282,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)  			err(EXIT_FAILURE, "calloc");  		/* create, launch & block all threads */ -		block_threads(blocked_worker, thread_attr); +		block_threads(blocked_worker, thread_attr, cpu);  		/* make sure all threads are already blocked */  		pthread_mutex_lock(&thread_lock); @@ -297,3 +320,4 @@ int bench_futex_wake_parallel(int argc, const char **argv)  	free(blocked_worker);  	return ret;  } +#endif /* HAVE_PTHREAD_BARRIER */ diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 8c5c0b6b5c97..e8181ad7d088 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -22,6 +22,7 @@  #include <errno.h>  #include "bench.h"  #include "futex.h" +#include "cpumap.h"  #include <err.h>  #include <stdlib.h> @@ -89,19 +90,19 @@ static void print_summary(void)  }  static void block_threads(pthread_t *w, -			  pthread_attr_t thread_attr) +			  pthread_attr_t thread_attr, struct cpu_map *cpu)  { -	cpu_set_t cpu; +	cpu_set_t cpuset;  	unsigned int i;  	threads_starting = nthreads;  	/* create and block all threads */  	for (i = 0; i < nthreads; i++) { -		CPU_ZERO(&cpu); -		CPU_SET(i % ncpus, &cpu); +		CPU_ZERO(&cpuset); +		CPU_SET(cpu->map[i % cpu->nr], &cpuset); -		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) +		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");  		if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) @@ -122,6 +123,7 @@ int bench_futex_wake(int argc, const char **argv)  	unsigned int i, j;  	struct sigaction act;  	pthread_attr_t thread_attr; +	struct cpu_map *cpu;  	argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);  	if (argc) { @@ -129,7 +131,9 @@ int bench_futex_wake(int argc, const char **argv)  		exit(EXIT_FAILURE);  	} -	ncpus = sysconf(_SC_NPROCESSORS_ONLN); +	cpu = cpu_map__new(NULL); +	if (!cpu) +		err(EXIT_FAILURE, "calloc");  	sigfillset(&act.sa_mask);  	act.sa_sigaction = toggle_done; @@ -161,7 +165,7 @@ int bench_futex_wake(int argc, const char **argv)  		struct timeval start, end, runtime;  		/* create, launch & block all threads */ -		block_threads(worker, thread_attr); +		block_threads(worker, thread_attr, cpu);  		/* make sure all threads are already blocked */  		pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index d95fdcc26f4b..944070e98a2c 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -216,6 +216,47 @@ static const char * const numa_usage[] = {  	NULL  }; +/* + * To get number of numa nodes present. + */ +static int nr_numa_nodes(void) +{ +	int i, nr_nodes = 0; + +	for (i = 0; i < g->p.nr_nodes; i++) { +		if (numa_bitmask_isbitset(numa_nodes_ptr, i)) +			nr_nodes++; +	} + +	return nr_nodes; +} + +/* + * To check if given numa node is present. + */ +static int is_node_present(int node) +{ +	return numa_bitmask_isbitset(numa_nodes_ptr, node); +} + +/* + * To check given numa node has cpus. + */ +static bool node_has_cpus(int node) +{ +	struct bitmask *cpu = numa_allocate_cpumask(); +	unsigned int i; + +	if (cpu && !numa_node_to_cpus(node, cpu)) { +		for (i = 0; i < cpu->size; i++) { +			if (numa_bitmask_isbitset(cpu, i)) +				return true; +		} +	} + +	return false; /* lets fall back to nocpus safely */ +} +  static cpu_set_t bind_to_cpu(int target_cpu)  {  	cpu_set_t orig_mask, mask; @@ -244,12 +285,12 @@ static cpu_set_t bind_to_cpu(int target_cpu)  static cpu_set_t bind_to_node(int target_node)  { -	int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes; +	int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();  	cpu_set_t orig_mask, mask;  	int cpu;  	int ret; -	BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus); +	BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);  	BUG_ON(!cpus_per_node);  	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); @@ -649,7 +690,7 @@ static int parse_setup_node_list(void)  			int i;  			for (i = 0; i < mul; i++) { -				if (t >= g->p.nr_tasks) { +				if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {  					printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);  					goto out;  				} @@ -964,6 +1005,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)  	sum = 0;  	for (node = 0; node < g->p.nr_nodes; node++) { +		if (!is_node_present(node)) +			continue;  		nr = nodes[node];  		nr_min = min(nr, nr_min);  		nr_max = max(nr, nr_max); @@ -984,8 +1027,11 @@ static void calc_convergence(double runtime_ns_max, double *convergence)  	process_groups = 0;  	for (node = 0; node < g->p.nr_nodes; node++) { -		int processes = count_node_processes(node); +		int processes; +		if (!is_node_present(node)) +			continue; +		processes = count_node_processes(node);  		nr = nodes[node];  		tprintf(" %2d/%-2d", nr, processes); @@ -1291,7 +1337,7 @@ static void print_summary(void)  	printf("\n ###\n");  	printf(" # %d %s will execute (on %d nodes, %d CPUs):\n", -		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus); +		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);  	printf(" #      %5dx %5ldMB global  shared mem operations\n",  			g->p.nr_loops, g->p.bytes_global/1024/1024);  	printf(" #      %5dx %5ldMB process shared mem operations\n", | 
