From 4de2d4fba38f4f7aff7f95401eb43a6cd05a6db4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 14 Oct 2017 15:21:39 -0400 Subject: Explicitly track whether aggregate final functions modify transition state. Up to now, there's been hard-wired assumptions that normal aggregates' final functions never modify their transition states, while ordered-set aggregates' final functions always do. This has always been a bit limiting, and in particular it's getting in the way of improving the built-in ordered-set aggregates to allow merging of transition states. Therefore, let's introduce catalog and CREATE AGGREGATE infrastructure that lets the finalfn's behavior be declared explicitly. There are now three possibilities for the finalfn behavior: it's purely read-only, it trashes the transition state irrecoverably, or it changes the state in such a way that no more transfn calls are possible but the state can still be passed to other, compatible finalfns. There are no examples of this third case today, but we'll shortly make the built-in OSAs act like that. This change allows user-defined aggregates to explicitly disclaim support for use as window functions, and/or to prevent transition state merging, if their implementations cannot handle that. While it was previously possible to handle the window case with a run-time error check, there was not any way to prevent transition state merging, which in retrospect is something commit 804163bc2 should have provided for. But better late than never. In passing, split out pg_aggregate.c's extern function declarations into a new header file pg_aggregate_fn.h, similarly to what we've done for some other catalog headers, so that pg_aggregate.h itself can be safe for frontend files to include. This lets pg_dump use the symbolic names for relevant constants. Discussion: https://postgr.es/m/4834.1507849699@sss.pgh.pa.us --- doc/src/sgml/catalogs.sgml | 20 ++++++++++ doc/src/sgml/ref/create_aggregate.sgml | 72 +++++++++++++++++++++++++++++++--- doc/src/sgml/xaggr.sgml | 20 ++++++---- 3 files changed, 99 insertions(+), 13 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 9af77c1f5ab..cfec2465d26 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -486,6 +486,26 @@ True to pass extra dummy arguments to aggmfinalfn + + aggfinalmodify + char + + Whether aggfinalfn modifies the + transition state value: + r if it is read-only, + s if the aggtransfn + cannot be applied after the aggfinalfn, or + w if it writes on the value + + + + aggmfinalmodify + char + + Like aggfinalmodify, but for + the aggmfinalfn + + aggsortop oid diff --git a/doc/src/sgml/ref/create_aggregate.sgml b/doc/src/sgml/ref/create_aggregate.sgml index c96e4faba7b..4d9c8b0b707 100644 --- a/doc/src/sgml/ref/create_aggregate.sgml +++ b/doc/src/sgml/ref/create_aggregate.sgml @@ -27,6 +27,7 @@ CREATE AGGREGATE name ( [ state_data_size ] [ , FINALFUNC = ffunc ] [ , FINALFUNC_EXTRA ] + [ , FINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ] [ , COMBINEFUNC = combinefunc ] [ , SERIALFUNC = serialfunc ] [ , DESERIALFUNC = deserialfunc ] @@ -37,6 +38,7 @@ CREATE AGGREGATE name ( [ mstate_data_size ] [ , MFINALFUNC = mffunc ] [ , MFINALFUNC_EXTRA ] + [ , MFINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ] [ , MINITCOND = minitial_condition ] [ , SORTOP = sort_operator ] [ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ] @@ -49,6 +51,7 @@ CREATE AGGREGATE name ( [ [ state_data_size ] [ , FINALFUNC = ffunc ] [ , FINALFUNC_EXTRA ] + [ , FINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ] [ , INITCOND = initial_condition ] [ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ] [ , HYPOTHETICAL ] @@ -63,6 +66,7 @@ CREATE AGGREGATE name ( [ , SSPACE = state_data_size ] [ , FINALFUNC = ffunc ] [ , FINALFUNC_EXTRA ] + [ , FINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ] [ , COMBINEFUNC = combinefunc ] [ , SERIALFUNC = serialfunc ] [ , DESERIALFUNC = deserialfunc ] @@ -73,6 +77,7 @@ CREATE AGGREGATE name ( [ , MSSPACE = mstate_data_size ] [ , MFINALFUNC = mffunc ] [ , MFINALFUNC_EXTRA ] + [ , MFINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ] [ , MINITCOND = minitial_condition ] [ , SORTOP = sort_operator ] ) @@ -197,7 +202,8 @@ CREATE AGGREGATE name ( as described in . This requires specifying the MSFUNC, MINVFUNC, and MSTYPE parameters, and optionally - the MSPACE, MFINALFUNC, MFINALFUNC_EXTRA, + the MSPACE, MFINALFUNC, + MFINALFUNC_EXTRA, MFINALFUNC_MODIFY, and MINITCOND parameters. Except for MINVFUNC, these parameters work like the corresponding simple-aggregate parameters without M; they define a separate implementation of the @@ -412,6 +418,21 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1; + + FINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } + + + This option specifies whether the final function is a pure function + that does not modify its arguments. READ_ONLY indicates + it does not; the other two values indicate that it may change the + transition state value. See below for more detail. The + default is READ_ONLY, except for ordered-set aggregates, + for which the default is READ_WRITE. + + + + combinefunc @@ -563,6 +584,16 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1; + + MFINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } + + + This option is like FINALFUNC_MODIFY, but it describes + the behavior of the moving-aggregate final function. + + + + minitial_condition @@ -587,12 +618,12 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1; - PARALLEL + PARALLEL = { SAFE | RESTRICTED | UNSAFE } The meanings of PARALLEL SAFE, PARALLEL RESTRICTED, and PARALLEL UNSAFE are the same as - for . An aggregate will not be + in . An aggregate will not be considered for parallelization if it is marked PARALLEL UNSAFE (which is the default!) or PARALLEL RESTRICTED. Note that the parallel-safety markings of the aggregate's support @@ -624,8 +655,8 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1; - - Notes + + Notes In parameters that specify support function names, you can write @@ -634,6 +665,34 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1; of the support functions are determined from other parameters. + + Ordinarily, Postgres functions are expected to be true functions that + do not modify their input values. However, an aggregate transition + function, when used in the context of an aggregate, + is allowed to cheat and modify its transition-state argument in place. + This can provide substantial performance benefits compared to making + a fresh copy of the transition state each time. + + + + Likewise, while an aggregate final function is normally expected not to + modify its input values, sometimes it is impractical to avoid modifying + the transition-state argument. Such behavior must be declared using + the FINALFUNC_MODIFY parameter. The READ_WRITE + value indicates that the final function modifies the transition state in + unspecified ways. This value prevents use of the aggregate as a window + function, and it also prevents merging of transition states for aggregate + calls that share the same input values and transition functions. + The SHARABLE value indicates that the transition function + cannot be applied after the final function, but multiple final-function + calls can be performed on the ending transition state value. This value + prevents use of the aggregate as a window function, but it allows merging + of transition states. (That is, the optimization of interest here is not + applying the same final function repeatedly, but applying different final + functions to the same ending transition state value. This is allowed as + long as none of the final functions are marked READ_WRITE.) + + If an aggregate supports moving-aggregate mode, it will improve calculation efficiency when the aggregate is used as a window function @@ -671,7 +730,8 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1; Note that whether or not the aggregate supports moving-aggregate mode, PostgreSQL can handle a moving frame end without recalculation; this is done by continuing to add new values - to the aggregate's state. It is assumed that the final function does + to the aggregate's state. This is why use of an aggregate as a window + function requires that the final function be read-only: it must not damage the aggregate's state value, so that the aggregation can be continued even after an aggregate result value has been obtained for one set of frame boundaries. diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml index 79a9f288b2b..9e6a6648dc3 100644 --- a/doc/src/sgml/xaggr.sgml +++ b/doc/src/sgml/xaggr.sgml @@ -487,6 +487,13 @@ SELECT percentile_disc(0.5) WITHIN GROUP (ORDER BY income) FROM households; C, since their state values aren't definable as any SQL data type. (In the above example, notice that the state value is declared as type internal — this is typical.) + Also, because the final function performs the sort, it is not possible + to continue adding input rows by executing the transition function again + later. This means the final function is not READ_ONLY; + it must be declared in + as READ_WRITE, or as SHARABLE if it's + possible for additional final-function calls to make use of the + already-sorted state. @@ -622,16 +629,15 @@ SELECT percentile_disc(0.5) WITHIN GROUP (ORDER BY income) FROM households; if (AggCheckCallContext(fcinfo, NULL)) - One reason for checking this is that when it is true for a transition - function, the first input + One reason for checking this is that when it is true, the first input must be a temporary state value and can therefore safely be modified in-place rather than allocating a new copy. See int8inc() for an example. - (This is the only - case where it is safe for a function to modify a pass-by-reference input. - In particular, final functions for normal aggregates must not - modify their inputs in any case, because in some cases they will be - re-executed on the same final state value.) + (While aggregate transition functions are always allowed to modify + the transition value in-place, aggregate final functions are generally + discouraged from doing so; if they do so, the behavior must be declared + when creating the aggregate. See + for more detail.) -- cgit v1.2.3