From e6c039d13e16a3a2dec5ba479d9d1fb3229c03a3 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 28 Mar 2018 14:22:42 -0700 Subject: Add documentation for the JIT feature. As promised in earlier commits, this adds documentation about the new build options, the new GUCs, about the planner logic when JIT is used, and the benefits of JIT in general. Also adds a more implementation oriented README. I'm sure we're going to want to expand this further, but I think this is a reasonable start. Author: Andres Freund, with contributions by Thomas Munro Reviewed-By: Thomas Munro Discussion: https://postgr.es/m/20170901064131.tazjxwus3k2w3ybh@alap3.anarazel.de --- doc/src/sgml/acronyms.sgml | 10 ++ doc/src/sgml/config.sgml | 183 ++++++++++++++++++++++++- doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/func.sgml | 8 ++ doc/src/sgml/installation.sgml | 53 ++++++++ doc/src/sgml/jit.sgml | 299 +++++++++++++++++++++++++++++++++++++++++ doc/src/sgml/postgres.sgml | 1 + doc/src/sgml/storage.sgml | 2 +- 8 files changed, 555 insertions(+), 2 deletions(-) create mode 100644 doc/src/sgml/jit.sgml (limited to 'doc/src') diff --git a/doc/src/sgml/acronyms.sgml b/doc/src/sgml/acronyms.sgml index 751c46de6d4..638ffc9fe83 100644 --- a/doc/src/sgml/acronyms.sgml +++ b/doc/src/sgml/acronyms.sgml @@ -369,6 +369,16 @@ + + JIT + + + Just-in-Time + compilation + + + + JSON diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 4d899e3b244..dc9ed22eb41 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4136,6 +4136,62 @@ ANY num_sync ( + jit_above_cost (floating point) + + jit_above_cost configuration parameter + + + + + Sets the planner's cutoff above which JIT compilation is used as part + of query execution (see ). Performing + JIT costs time but can accelerate query execution. + + The default is 100000. + + + + + + jit_optimize_above_cost (floating point) + + jit_optimize_above_cost configuration parameter + + + + + Sets the planner's cutoff above which JIT compiled programs (see ) are optimized. Optimization initially + takes time, but can improve execution speed. It is not meaningful to + set this to a lower value than . + + The default is 500000. + + + + + + jit_inline_above_cost (floating point) + + jit_inline_above_cost configuration parameter + + + + + Sets the planner's cutoff above which JIT compiled programs (see ) attempt to inline functions and + operators. Inlining initially takes time, but can improve execution + speed. It is unlikely to be beneficial to set + jit_inline_above_cost below + jit_optimize_above_cost. + + The default is 500000. + + + + @@ -4418,6 +4474,23 @@ SELECT * FROM parent WHERE key = 2400; + + jit (boolean) + + jit configuration parameter + + + + + Determines whether JIT may be used by + PostgreSQL, if available (see ). + + The default is on. + + + + join_collapse_limit (integer) @@ -7412,6 +7485,29 @@ SET XML OPTION { DOCUMENT | CONTENT }; + + + jit_provider (string) + + jit_provider configuration parameter + + + + + Determines which JIT provider (see ) is + used. The built-in default is llvmjit. + + + If set to a non-existent library JIT will not + available, but no error will be raised. This allows JIT support to be + installed separately from the main + PostgreSQL package. + + This parameter can only be set at server start. + + + + @@ -8658,7 +8754,92 @@ LOG: CleanUpLock: deleting: lock(0xb7acd844) id(24688,24696,0,0,0,1) - + + + jit_debugging_support (boolean) + + jit_debugging_support configuration parameter + + + + + If LLVM has the required functionality, register generated functions + with GDB. This makes debugging easier. + + The default setting is off, and can only be set at + server start. + + + + + + jit_dump_bitcode (boolean) + + jit_dump_bitcode configuration parameter + + + + + Writes the generated LLVM IR out to the + filesystem, inside . This is only + useful for working on the internals of the JIT implementation. + + The default setting is off, and it can only be + changed by a superuser. + + + + + + jit_expressions (boolean) + + jit_expressions configuration parameter + + + + + Determines whether expressions are JIT compiled, subject to costing + decisions (see ). The default is + on. + + + + + + jit_profiling_support (boolean) + + jit_profiling_support configuration parameter + + + + + If LLVM has the required functionality, emit required data to allow + perf to profile functions generated by JIT. + This writes out files to $HOME/.debug/jit/; the + user is responsible for performing cleanup when desired. + + The default setting is off, and can only be set at + server start. + + + + + + jit_tuple_deforming (boolean) + + jit_tuple_deforming configuration parameter + + + + + Determines whether tuple deforming is JIT compiled, subject to costing + decisions (see ). The default is + on. + + + + + Short Options diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 732b8ab7d0b..56b8da04488 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -48,6 +48,7 @@ + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 7b1a85fc717..9d1772f349a 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -15942,6 +15942,14 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n); is schema another session's temporary schema? + + pg_jit_available() + boolean + is JIT available in this session (see )? Returns false if is set to false. + + pg_listening_channels() setof text diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index 2d24153bdcc..30921cf4868 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -758,6 +758,39 @@ su - postgres + + + + + Build with support for LLVM based + JIT compilation (see ). This + requires the LLVM library to be installed. + The minimum required version of LLVM is + currently 3.9. + + + llvm-configllvm-config + will be used to find the required compilation options. + llvm-config, and then + llvm-config-$major-$minor for all supported + versions, will be searched on PATH. If that would not + yield the correct binary, use LLVM_CONFIG to specify a + path to the correct llvm-config. For example + +./configure ... --with-llvm LLVM_CONFIG='/path/to/llvm/bin/llvm-config' + + + + + LLVM support requires a compatible + clang compiler (specified, if necessary, using the + CLANG environment variable), and a working C++ + compiler (specified, if necessary, using the CXX + environment variable). + + + + @@ -1342,6 +1375,16 @@ su - postgres + + CLANG + + + path to clang program used to process source code + for inlining when compiling with --with-llvm + + + + CPP @@ -1432,6 +1475,16 @@ su - postgres + + LLVM_CONFIG + + + llvm-config program used to locate the + LLVM installation. + + + + MSGFMT diff --git a/doc/src/sgml/jit.sgml b/doc/src/sgml/jit.sgml new file mode 100644 index 00000000000..f59e4923e14 --- /dev/null +++ b/doc/src/sgml/jit.sgml @@ -0,0 +1,299 @@ + + + + Just-in-Time Compilation (<acronym>JIT</acronym>) + + + JIT + + + + Just-In-Time compilation + JIT + + + + This chapter explains what just-in-time compilation is, and how it can be + configured in PostgreSQL. + + + + What is <acronym>JIT</acronym>? + + + Just-in-time compilation (JIT) is the process of turning + some form of interpreted program evaluation into a native program, and + doing so at runtime. + + For example, instead of using a facility that can evaluate arbitrary SQL + expressions to evaluate an SQL predicate like WHERE a.col = + 3, it is possible to generate a function than can be natively + executed by the CPU that just handles that expression, yielding a speedup. + + + + PostgreSQL has builtin support perform + JIT using LLVM when built + PostgreSQL was built with + --with-llvm (see ). + + + + See src/backend/jit/README for further details. + + + + <acronym>JIT</acronym> Accelerated Operations + + Currently PostgreSQL's JIT + implementation has support for accelerating expression evaluation and + tuple deforming. Several other operations could be accelerated in the + future. + + + Expression evaluation is used to evaluate WHERE + clauses, target lists, aggregates and projections. It can be accelerated + by generating code specific to each case. + + + Tuple deforming is the process of transforming an on-disk tuple (see ) into its in-memory representation. It can be + accelerated by creating a function specific to the table layout and the + number of columns to be extracted. + + + + + Optimization + + LLVM has support for optimizing generated + code. Some of the optimizations are cheap enough to be performed whenever + JIT is used, while others are only beneficial for + longer running queries. + + See for + more details about optimizations. + + + + + Inlining + + PostgreSQL is very extensible and allows new + datatypes, functions, operators and other database objects to be defined; + see . In fact the built-in ones are implemented + using nearly the same mechanisms. This extensibility implies some + overhead, for example due to function calls (see ). + To reduce that overhead JIT compilation can inline the + body for small functions into the expression using them. That allows a + significant percentage of the overhead to be optimized away. + + + + + + + When to <acronym>JIT</acronym>? + + + JIT is beneficial primarily for long-running CPU bound + queries. Frequently these will be analytical queries. For short queries + the overhead of performing JIT will often be higher than + the time it can save. + + + + To determine whether JIT is used, the total cost of a + query (see and ) is used. + + + + The cost of the query will be compared with GUC. If the cost is higher, + JIT compilation will be performed. + + + + If the planner, based on the above criterion, decided that + JIT is beneficial, two further decisions are + made. Firstly, if the query is more costly than the , GUC expensive optimizations are + used to improve the generated code. Secondly, if the query is more costly + than the GUC, short functions + and operators used in the query will be inlined. Both of these operations + increase the JIT overhead, but can reduce query + execution time considerably. + + + + This cost based decision will be made at plan time, not execution + time. This means that when prepared statements are in use, and the generic + plan is used (see ), the values of the + GUCs set at prepare time take effect, not the settings at execution time. + + + + + If is set to off, or no + JIT implementation is available (for example because + the server was compiled without --with-llvm), + JIT will not performed, even if considered to be + beneficial based on the above criteria. Setting + to off takes effect both at plan and at execution time. + + + + + can be used to see whether + JIT is used or not. As an example, here is a query that + is not using JIT: + +=# EXPLAIN ANALYZE SELECT SUM(relpages) FROM pg_class; +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ QUERY PLAN │ +├─────────────────────────────────────────────────────────────────────────────────────────────────────────────┤ +│ Aggregate (cost=16.27..16.29 rows=1 width=8) (actual time=0.303..0.303 rows=1 loops=1) │ +│ -> Seq Scan on pg_class (cost=0.00..15.42 rows=342 width=4) (actual time=0.017..0.111 rows=356 loops=1) │ +│ Planning Time: 0.116 ms │ +│ Execution Time: 0.365 ms │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +(4 rows) + + Given the cost of the plan, it is entirely reasonable that no + JIT was used, the cost of JIT would + have been bigger than the savings. Adjusting the cost limits will lead to + JIT use: + +=# SET jit_above_cost = 10; +SET +=# EXPLAIN ANALYZE SELECT SUM(relpages) FROM pg_class; +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ QUERY PLAN │ +├─────────────────────────────────────────────────────────────────────────────────────────────────────────────┤ +│ Aggregate (cost=16.27..16.29 rows=1 width=8) (actual time=6.049..6.049 rows=1 loops=1) │ +│ -> Seq Scan on pg_class (cost=0.00..15.42 rows=342 width=4) (actual time=0.019..0.052 rows=356 loops=1) │ +│ Planning Time: 0.133 ms │ +│ JIT: │ +│ Functions: 3 │ +│ Generation Time: 1.259 ms │ +│ Inlining: false │ +│ Inlining Time: 0.000 ms │ +│ Optimization: false │ +│ Optimization Time: 0.797 ms │ +│ Emission Time: 5.048 ms │ +│ Execution Time: 7.416 ms │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + + As visible here, JIT was used, but inlining and + optimization were not. If , + were lowered, just like , that would change. + + + + + Configuration + + + determines whether JIT is + enabled or disabled. + + + + As explained in the configuration variables + , , decide whether JIT + compilation is performed for a query, and how much effort is spent doing + so. + + + + For development and debugging purposes a few additional GUCs exist. allows the generated bitcode to be + inspected. allows GDB to see + generated functions. emits + information so the perf profiler can interpret + JIT generated functions sensibly. + + + + determines which JIT + implementation is used. It rarely is required to be changed. See . + + + + + Extensibility + + + Inlining Support for Extensions + + PostgreSQL's JIT + implementation can inline the implementation of operators and functions + (of type C and internal). See . To do so for functions in extensions, the + definition of these functions needs to be made available. When using PGXS to build an extension against a server + that has been compiled with LLVM support, the relevant files will be + installed automatically. + + + + The relevant files have to be installed into + $pkglibdir/bitcode/$extension/ and a summary of them + to $pkglibdir/bitcode/$extension.index.bc, where + $pkglibdir is the directory returned by + pg_config --pkglibdir and $extension + the basename of the extension's shared library. + + + + For functions built into PostgreSQL itself, + the bitcode is installed into + $pkglibdir/bitcode/postgres. + + + + + + + Pluggable <acronym>JIT</acronym> Provider + + + PostgreSQL provides a JIT + implementation based on LLVM. The interface to + the JIT provider is pluggable and the provider can be + changed without recompiling. The provider is chosen via the GUC. + + + + <acronym>JIT</acronym> Provider Interface + + A JIT provider is loaded by dynamically loading the + named shared library. The normal library search path is used to locate + the library. To provide the required JIT provider + callbacks and to indicate that the library is actually a + JIT provider it needs to provide a function named + _PG_jit_provider_init. This function is passed a + struct that needs to be filled with the callback function pointers for + individual actions. + +struct JitProviderCallbacks +{ + JitProviderResetAfterErrorCB reset_after_error; + JitProviderReleaseContextCB release_context; + JitProviderCompileExprCB compile_expr; +}; +extern void _PG_jit_provider_init(JitProviderCallbacks *cb); + + + + + + + diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index 054347b17d9..0070603fc36 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -163,6 +163,7 @@ &diskusage; &wal; &logical-replication; + &jit; ®ress; diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index c0e548fa5bc..70a822e0597 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -875,7 +875,7 @@ data. Empty in ordinary tables. src/include/storage/bufpage.h. - + Following the page header are item identifiers (ItemIdData), each requiring four bytes. -- cgit v1.2.3