From e51a04840a1c45db101686bef0b7025d5014c74b Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Wed, 21 Mar 2018 18:01:23 +0300 Subject: Add general purpose hasing functions to pgbench. Hashing function is useful for simulating real-world workload in test like WEB workload, as an example - YCSB benchmarks. Author: Ildar Musin with minor editorization by me Reviewed by: Fabien Coelho, me Discussion: https://www.postgresql.org/message-id/flat/0e8bd39e-dfcd-2879-f88f-272799ad7ef2@postgrespro.ru --- doc/src/sgml/ref/pgbench.sgml | 59 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 5f280237991..f07ddf1226e 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -874,13 +874,18 @@ pgbench options d - scale - current scale factor + client_id + unique number identifying the client session (starts from zero) - client_id - unique number identifying the client session (starts from zero) + default_seed + seed used in hash functions by default + + + + scale + current scale factor @@ -1245,6 +1250,27 @@ pgbench options d greatest(5, 4, 3, 2) 5 + + hash(a [, seed ] ) + integer + alias for hash_murmur2() + hash(10, 5432) + -5817877081768721676 + + + hash_fnv1a(a [, seed ] ) + integer + FNV-1a hash + hash_fnv1a(10, 5432) + -7793829335365542153 + + + hash_murmur2(a [, seed ] ) + integer + MurmurHash2 hash + hash_murmur2(10, 5432) + -5817877081768721676 + int(x) integer @@ -1423,6 +1449,31 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / + + Hash functions hash, hash_murmur2 and + hash_fnv1a accept an input value and an optional seed parameter. + In case the seed isn't provided the value of :default_seed + is used, which is initialized randomly unless set by the command-line + -D option. Hash functions can be used to scatter the + distribution of random functions such as random_zipfian or + random_exponential. For instance, the following pgbench + script simulates possible real world workload typical for social media and + blogging platforms where few accounts generate excessive load: + + +\set r random_zipfian(0, 100000000, 1.07) +\set k abs(hash(:r)) % 1000000 + + + In some cases several distinct distributions are needed which don't correlate + with each other and this is when implicit seed parameter comes in handy: + + +\set k1 abs(hash(:r), :default_seed + 123) % 1000000 +\set k2 abs(hash(:r), :default_seed + 321) % 1000000 + + + As an example, the full definition of the built-in TPC-B-like transaction is: -- cgit v1.2.3