summaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeWindowAgg.c
diff options
context:
space:
mode:
authorTatsuo Ishii <ishii@postgresql.org>2025-10-03 09:47:36 +0900
committerTatsuo Ishii <ishii@postgresql.org>2025-10-03 09:47:36 +0900
commit25a30bbd4235a49c854036c84fe90f2bc5a87652 (patch)
tree263bb113d48edb9858b6ad140590e9cec17a81d2 /src/backend/executor/nodeWindowAgg.c
parent381f5cffae0040a402e082adc5d5e7636035d2a7 (diff)
Add IGNORE NULLS/RESPECT NULLS option to Window functions.
Add IGNORE NULLS/RESPECT NULLS option (null treatment clause) to lead, lag, first_value, last_value and nth_value window functions. If unspecified, the default is RESPECT NULLS which includes NULL values in any result calculation. IGNORE NULLS ignores NULL values. Built-in window functions are modified to call new API WinCheckAndInitializeNullTreatment() to indicate whether they accept IGNORE NULLS/RESPECT NULLS option or not (the API can be called by user defined window functions as well). If WinGetFuncArgInPartition's allowNullTreatment argument is true and IGNORE NULLS option is given, WinGetFuncArgInPartition() or WinGetFuncArgInFrame() will return evaluated function's argument expression on specified non NULL row (if it exists) in the partition or the frame. When IGNORE NULLS option is given, window functions need to visit and evaluate same rows over and over again to look for non null rows. To mitigate the issue, 2-bit not null information array is created while executing window functions to remember whether the row has been already evaluated to NULL or NOT NULL. If already evaluated, we could skip the evaluation work, thus we could get better performance. Author: Oliver Ford <ojford@gmail.com> Co-authored-by: Tatsuo Ishii <ishii@postgresql.org> Reviewed-by: Krasiyan Andreev <krasiyan@gmail.com> Reviewed-by: Andrew Gierth <andrew@tao11.riddles.org.uk> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: David Fetter <david@fetter.org> Reviewed-by: Vik Fearing <vik@postgresfriends.org> Reviewed-by: "David G. Johnston" <david.g.johnston@gmail.com> Reviewed-by: Chao Li <lic@highgo.com> Discussion: https://postgr.es/m/flat/CAGMVOdsbtRwE_4+v8zjH1d9xfovDeQAGLkP_B6k69_VoFEgX-A@mail.gmail.com
Diffstat (limited to 'src/backend/executor/nodeWindowAgg.c')
-rw-r--r--src/backend/executor/nodeWindowAgg.c467
1 files changed, 432 insertions, 35 deletions
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index 9a1acce2b5d..cf667c81211 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -69,6 +69,14 @@ typedef struct WindowObjectData
int readptr; /* tuplestore read pointer for this fn */
int64 markpos; /* row that markptr is positioned on */
int64 seekpos; /* row that readptr is positioned on */
+ uint8 *notnull_info; /* not null info */
+ int num_notnull_info; /* track size of the notnull_info array */
+
+ /*
+ * Null treatment options. One of: NO_NULLTREATMENT, PARSER_IGNORE_NULLS,
+ * PARSER_RESPECT_NULLS or IGNORE_NULLS.
+ */
+ int ignore_nulls;
} WindowObjectData;
/*
@@ -96,6 +104,7 @@ typedef struct WindowStatePerFuncData
bool plain_agg; /* is it just a plain aggregate function? */
int aggno; /* if so, index of its WindowStatePerAggData */
+ uint8 ignore_nulls; /* ignore nulls */
WindowObject winobj; /* object used in window function API */
} WindowStatePerFuncData;
@@ -182,8 +191,8 @@ static void begin_partition(WindowAggState *winstate);
static void spool_tuples(WindowAggState *winstate, int64 pos);
static void release_partition(WindowAggState *winstate);
-static int row_is_in_frame(WindowAggState *winstate, int64 pos,
- TupleTableSlot *slot);
+static int row_is_in_frame(WindowObject winobj, int64 pos,
+ TupleTableSlot *slot, bool fetch_tuple);
static void update_frameheadpos(WindowAggState *winstate);
static void update_frametailpos(WindowAggState *winstate);
static void update_grouptailpos(WindowAggState *winstate);
@@ -198,6 +207,34 @@ static bool are_peers(WindowAggState *winstate, TupleTableSlot *slot1,
static bool window_gettupleslot(WindowObject winobj, int64 pos,
TupleTableSlot *slot);
+static Datum ignorenulls_getfuncarginframe(WindowObject winobj, int argno,
+ int relpos, int seektype,
+ bool set_mark, bool *isnull,
+ bool *isout);
+static Datum gettuple_eval_partition(WindowObject winobj, int argno,
+ int64 abs_pos, bool *isnull,
+ bool *isout);
+static void init_notnull_info(WindowObject winobj);
+static void grow_notnull_info(WindowObject winobj, int64 pos);
+static uint8 get_notnull_info(WindowObject winobj, int64 pos);
+static void put_notnull_info(WindowObject winobj, int64 pos, bool isnull);
+
+/*
+ * Not null info bit array consists of 2-bit items
+ */
+#define NN_UNKNOWN 0x00 /* value not calculated yet */
+#define NN_NULL 0x01 /* NULL */
+#define NN_NOTNULL 0x02 /* NOT NULL */
+#define NN_MASK 0x03 /* mask for NOT NULL MAP */
+#define NN_BITS_PER_MEMBER 2 /* number of bits in not null map */
+/* number of items per variable */
+#define NN_ITEM_PER_VAR (BITS_PER_BYTE / NN_BITS_PER_MEMBER)
+/* convert map position to byte offset */
+#define NN_POS_TO_BYTES(pos) ((pos) / NN_ITEM_PER_VAR)
+/* bytes offset to map position */
+#define NN_BYTES_TO_POS(bytes) ((bytes) * NN_ITEM_PER_VAR)
+/* caculate shift bits */
+#define NN_SHIFT(pos) ((pos) % NN_ITEM_PER_VAR) * NN_BITS_PER_MEMBER
/*
* initialize_windowaggregate
@@ -942,7 +979,8 @@ eval_windowaggregates(WindowAggState *winstate)
* Exit loop if no more rows can be in frame. Skip aggregation if
* current row is not in frame but there might be more in the frame.
*/
- ret = row_is_in_frame(winstate, winstate->aggregatedupto, agg_row_slot);
+ ret = row_is_in_frame(agg_winobj, winstate->aggregatedupto,
+ agg_row_slot, false);
if (ret < 0)
break;
if (ret == 0)
@@ -1263,6 +1301,12 @@ begin_partition(WindowAggState *winstate)
winobj->markpos = -1;
winobj->seekpos = -1;
+
+ /* reset null map */
+ if (winobj->ignore_nulls == IGNORE_NULLS)
+ memset(winobj->notnull_info, 0,
+ NN_POS_TO_BYTES(
+ perfuncstate->winobj->num_notnull_info));
}
}
@@ -1412,8 +1456,8 @@ release_partition(WindowAggState *winstate)
* to our window framing rule
*
* The caller must have already determined that the row is in the partition
- * and fetched it into a slot. This function just encapsulates the framing
- * rules.
+ * and fetched it into a slot if fetch_tuple is false.
+.* This function just encapsulates the framing rules.
*
* Returns:
* -1, if the row is out of frame and no succeeding rows can be in frame
@@ -1423,8 +1467,10 @@ release_partition(WindowAggState *winstate)
* May clobber winstate->temp_slot_2.
*/
static int
-row_is_in_frame(WindowAggState *winstate, int64 pos, TupleTableSlot *slot)
+row_is_in_frame(WindowObject winobj, int64 pos, TupleTableSlot *slot,
+ bool fetch_tuple)
{
+ WindowAggState *winstate = winobj->winstate;
int frameOptions = winstate->frameOptions;
Assert(pos >= 0); /* else caller error */
@@ -1453,9 +1499,13 @@ row_is_in_frame(WindowAggState *winstate, int64 pos, TupleTableSlot *slot)
else if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS))
{
/* following row that is not peer is out of frame */
- if (pos > winstate->currentpos &&
- !are_peers(winstate, slot, winstate->ss.ss_ScanTupleSlot))
- return -1;
+ if (pos > winstate->currentpos)
+ {
+ if (fetch_tuple)
+ window_gettupleslot(winobj, pos, slot);
+ if (!are_peers(winstate, slot, winstate->ss.ss_ScanTupleSlot))
+ return -1;
+ }
}
else
Assert(false);
@@ -2619,14 +2669,17 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
elog(ERROR, "WindowFunc with winref %u assigned to WindowAgg with winref %u",
wfunc->winref, node->winref);
- /* Look for a previous duplicate window function */
+ /*
+ * Look for a previous duplicate window function, which needs the same
+ * ignore_nulls value
+ */
for (i = 0; i <= wfuncno; i++)
{
if (equal(wfunc, perfunc[i].wfunc) &&
!contain_volatile_functions((Node *) wfunc))
break;
}
- if (i <= wfuncno)
+ if (i <= wfuncno && wfunc->ignore_nulls == perfunc[i].ignore_nulls)
{
/* Found a match to an existing entry, so just mark it */
wfuncstate->wfuncno = i;
@@ -2679,6 +2732,8 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
winobj->argstates = wfuncstate->args;
winobj->localmem = NULL;
perfuncstate->winobj = winobj;
+ winobj->ignore_nulls = wfunc->ignore_nulls;
+ init_notnull_info(winobj);
/* It's a real window function, so set up to call it. */
fmgr_info_cxt(wfunc->winfnoid, &perfuncstate->flinfo,
@@ -3214,6 +3269,256 @@ window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot)
return true;
}
+/*
+ * get tupple and evaluate in a partition
+ */
+static Datum
+gettuple_eval_partition(WindowObject winobj, int argno,
+ int64 abs_pos, bool *isnull, bool *isout)
+{
+ WindowAggState *winstate;
+ ExprContext *econtext;
+ TupleTableSlot *slot;
+
+ winstate = winobj->winstate;
+ slot = winstate->temp_slot_1;
+ if (!window_gettupleslot(winobj, abs_pos, slot))
+ {
+ /* out of partition */
+ if (isout)
+ *isout = true;
+ *isnull = true;
+ return (Datum) 0;
+ }
+
+ if (isout)
+ *isout = false;
+ econtext = winstate->ss.ps.ps_ExprContext;
+ econtext->ecxt_outertuple = slot;
+ return ExecEvalExpr((ExprState *) list_nth
+ (winobj->argstates, argno),
+ econtext, isnull);
+}
+
+/*
+ * ignorenulls_getfuncarginframe
+ * For IGNORE NULLS, get the next nonnull value in the frame, moving forward
+ * or backward until we find a value or reach the frame's end.
+ */
+static Datum
+ignorenulls_getfuncarginframe(WindowObject winobj, int argno,
+ int relpos, int seektype, bool set_mark,
+ bool *isnull, bool *isout)
+{
+ WindowAggState *winstate;
+ ExprContext *econtext;
+ TupleTableSlot *slot;
+ Datum datum;
+ int64 abs_pos;
+ int64 mark_pos;
+ int notnull_offset;
+ int notnull_relpos;
+ int forward;
+
+ Assert(WindowObjectIsValid(winobj));
+ winstate = winobj->winstate;
+ econtext = winstate->ss.ps.ps_ExprContext;
+ slot = winstate->temp_slot_1;
+ datum = (Datum) 0;
+ notnull_offset = 0;
+ notnull_relpos = abs(relpos);
+
+ switch (seektype)
+ {
+ case WINDOW_SEEK_CURRENT:
+ elog(ERROR, "WINDOW_SEEK_CURRENT is not supported for WinGetFuncArgInFrame");
+ abs_pos = mark_pos = 0; /* keep compiler quiet */
+ break;
+ case WINDOW_SEEK_HEAD:
+ /* rejecting relpos < 0 is easy and simplifies code below */
+ if (relpos < 0)
+ goto out_of_frame;
+ update_frameheadpos(winstate);
+ abs_pos = winstate->frameheadpos;
+ mark_pos = winstate->frameheadpos;
+ forward = 1;
+ break;
+ case WINDOW_SEEK_TAIL:
+ /* rejecting relpos > 0 is easy and simplifies code below */
+ if (relpos > 0)
+ goto out_of_frame;
+ update_frametailpos(winstate);
+ abs_pos = winstate->frametailpos - 1;
+ mark_pos = 0; /* keep compiler quiet */
+ forward = -1;
+ break;
+ default:
+ elog(ERROR, "unrecognized window seek type: %d", seektype);
+ abs_pos = mark_pos = 0; /* keep compiler quiet */
+ break;
+ }
+
+ /*
+ * Get the next nonnull value in the frame, moving forward or backward
+ * until we find a value or reach the frame's end.
+ */
+ do
+ {
+ int inframe;
+ int v;
+
+ /*
+ * Check apparent out of frame case. We need to do this because we
+ * may not call window_gettupleslot before row_is_in_frame, which
+ * supposes abs_pos is never negative.
+ */
+ if (abs_pos < 0)
+ goto out_of_frame;
+
+ /* check whether row is in frame */
+ inframe = row_is_in_frame(winobj, abs_pos, slot, true);
+ if (inframe == -1)
+ goto out_of_frame;
+ else if (inframe == 0)
+ goto advance;
+
+ if (isout)
+ *isout = false;
+
+ v = get_notnull_info(winobj, abs_pos);
+ if (v == NN_NULL) /* this row is known to be NULL */
+ goto advance;
+
+ else if (v == NN_UNKNOWN) /* need to check NULL or not */
+ {
+ if (!window_gettupleslot(winobj, abs_pos, slot))
+ goto out_of_frame;
+
+ econtext->ecxt_outertuple = slot;
+ datum = ExecEvalExpr(
+ (ExprState *) list_nth(winobj->argstates,
+ argno), econtext,
+ isnull);
+ if (!*isnull)
+ notnull_offset++;
+
+ /* record the row status */
+ put_notnull_info(winobj, abs_pos, *isnull);
+ }
+ else /* this row is known to be NOT NULL */
+ {
+ notnull_offset++;
+ if (notnull_offset > notnull_relpos)
+ {
+ /* to prepare exiting this loop, datum needs to be set */
+ if (!window_gettupleslot(winobj, abs_pos, slot))
+ goto out_of_frame;
+
+ econtext->ecxt_outertuple = slot;
+ datum = ExecEvalExpr(
+ (ExprState *) list_nth
+ (winobj->argstates, argno),
+ econtext, isnull);
+ }
+ }
+advance:
+ abs_pos += forward;
+ } while (notnull_offset <= notnull_relpos);
+
+ if (set_mark)
+ WinSetMarkPosition(winobj, mark_pos);
+
+ return datum;
+
+out_of_frame:
+ if (isout)
+ *isout = true;
+ *isnull = true;
+ return (Datum) 0;
+}
+
+
+/*
+ * init_notnull_info
+ * Initialize non null map.
+ */
+static void
+init_notnull_info(WindowObject winobj)
+{
+/* initial number of notnull info members */
+#define INIT_NOT_NULL_INFO_NUM 128
+
+ if (winobj->ignore_nulls == PARSER_IGNORE_NULLS)
+ {
+ Size size = NN_POS_TO_BYTES(INIT_NOT_NULL_INFO_NUM);
+
+ winobj->notnull_info = palloc0(size);
+ winobj->num_notnull_info = INIT_NOT_NULL_INFO_NUM;
+ }
+}
+
+/*
+ * grow_notnull_info
+ * expand notnull_info if necessary.
+ * pos: not null info position
+*/
+static void
+grow_notnull_info(WindowObject winobj, int64 pos)
+{
+ if (pos >= winobj->num_notnull_info)
+ {
+ for (;;)
+ {
+ Size oldsize = NN_POS_TO_BYTES(winobj->num_notnull_info);
+ Size newsize = oldsize * 2;
+
+ winobj->notnull_info =
+ repalloc0(winobj->notnull_info, oldsize, newsize);
+ winobj->num_notnull_info = NN_BYTES_TO_POS(newsize);
+ if (winobj->num_notnull_info > pos)
+ break;
+ }
+ }
+}
+
+/*
+ * get_notnull_info
+ * retrieve a map
+ * pos: map position
+ */
+static uint8
+get_notnull_info(WindowObject winobj, int64 pos)
+{
+ uint8 mb;
+ int64 bpos;
+
+ grow_notnull_info(winobj, pos);
+ bpos = NN_POS_TO_BYTES(pos);
+ mb = winobj->notnull_info[bpos];
+ return (mb >> (NN_SHIFT(pos))) & NN_MASK;
+}
+
+/*
+ * put_notnull_info
+ * update map
+ * pos: map position
+ */
+static void
+put_notnull_info(WindowObject winobj, int64 pos, bool isnull)
+{
+ uint8 mb;
+ int64 bpos;
+ uint8 val = isnull ? NN_NULL : NN_NOTNULL;
+ int shift;
+
+ grow_notnull_info(winobj, pos);
+ bpos = NN_POS_TO_BYTES(pos);
+ mb = winobj->notnull_info[bpos];
+ shift = NN_SHIFT(pos);
+ mb &= ~(NN_MASK << shift); /* clear map */
+ mb |= (val << shift); /* update map */
+ winobj->notnull_info[bpos] = mb;
+}
/***********************************************************************
* API exposed to window functions
@@ -3221,6 +3526,38 @@ window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot)
/*
+ * WinCheckAndInitializeNullTreatment
+ * Check null treatment clause and sets ignore_nulls
+ *
+ * Window functions should call this to check if they are being called with
+ * a null treatment clause when they don't allow it, or to set ignore_nulls.
+ */
+void
+WinCheckAndInitializeNullTreatment(WindowObject winobj,
+ bool allowNullTreatment,
+ FunctionCallInfo fcinfo)
+{
+ if (winobj->ignore_nulls != NO_NULLTREATMENT && !allowNullTreatment)
+ {
+ HeapTuple proctup;
+ Form_pg_proc procform;
+ Oid funcid;
+
+ funcid = fcinfo->flinfo->fn_oid;
+ proctup = SearchSysCache1(PROCOID,
+ ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(proctup))
+ elog(ERROR, "cache lookup failed for function %u", funcid);
+ procform = (Form_pg_proc) GETSTRUCT(proctup);
+ elog(ERROR, "function %s does not allow RESPECT/IGNORE NULLS",
+ NameStr(procform->proname));
+ }
+ else if (winobj->ignore_nulls == PARSER_IGNORE_NULLS)
+ winobj->ignore_nulls = IGNORE_NULLS;
+
+}
+
+/*
* WinGetPartitionLocalMemory
* Get working memory that lives till end of partition processing
*
@@ -3378,23 +3715,37 @@ WinGetFuncArgInPartition(WindowObject winobj, int argno,
bool *isnull, bool *isout)
{
WindowAggState *winstate;
- ExprContext *econtext;
- TupleTableSlot *slot;
- bool gottuple;
int64 abs_pos;
+ Datum datum;
+ bool null_treatment = false;
+ int notnull_offset;
+ int notnull_relpos;
+ int forward;
Assert(WindowObjectIsValid(winobj));
winstate = winobj->winstate;
- econtext = winstate->ss.ps.ps_ExprContext;
- slot = winstate->temp_slot_1;
+
+ if (winobj->ignore_nulls == IGNORE_NULLS && relpos != 0)
+ {
+ null_treatment = true;
+ notnull_offset = 0;
+ notnull_relpos = abs(relpos);
+ forward = relpos > 0 ? 1 : -1;
+ }
switch (seektype)
{
case WINDOW_SEEK_CURRENT:
- abs_pos = winstate->currentpos + relpos;
+ if (null_treatment)
+ abs_pos = winstate->currentpos;
+ else
+ abs_pos = winstate->currentpos + relpos;
break;
case WINDOW_SEEK_HEAD:
- abs_pos = relpos;
+ if (null_treatment)
+ abs_pos = 0;
+ else
+ abs_pos = relpos;
break;
case WINDOW_SEEK_TAIL:
spool_tuples(winstate, -1);
@@ -3406,25 +3757,67 @@ WinGetFuncArgInPartition(WindowObject winobj, int argno,
break;
}
- gottuple = window_gettupleslot(winobj, abs_pos, slot);
-
- if (!gottuple)
- {
- if (isout)
- *isout = true;
- *isnull = true;
- return (Datum) 0;
- }
- else
+ if (!null_treatment) /* IGNORE NULLS is not specified */
{
- if (isout)
- *isout = false;
- if (set_mark)
+ datum = gettuple_eval_partition(winobj, argno,
+ abs_pos, isnull, isout);
+ if (!*isout && set_mark)
WinSetMarkPosition(winobj, abs_pos);
- econtext->ecxt_outertuple = slot;
- return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno),
- econtext, isnull);
+ return datum;
}
+
+ /*
+ * Get the next nonnull value in the partition, moving forward or backward
+ * until we find a value or reach the partition's end.
+ */
+ do
+ {
+ abs_pos += forward;
+ if (abs_pos < 0)
+ {
+ /* out of partition */
+ if (isout)
+ *isout = true;
+ *isnull = true;
+ datum = 0;
+ break;
+ }
+
+ switch (get_notnull_info(winobj, abs_pos))
+ {
+ case NN_NOTNULL: /* this row is known to be NOT NULL */
+ notnull_offset++;
+ if (notnull_offset >= notnull_relpos)
+ {
+ /* prepare to exit this loop */
+ datum = gettuple_eval_partition(winobj, argno,
+ abs_pos, isnull, isout);
+ }
+ break;
+ case NN_NULL: /* this row is known to be NULL */
+ if (isout)
+ *isout = false;
+ *isnull = true;
+ datum = 0;
+ break;
+ default: /* need to check NULL or not */
+ datum = gettuple_eval_partition(winobj, argno,
+ abs_pos, isnull, isout);
+ if (*isout) /* out of partition? */
+ return datum;
+
+ if (!*isnull)
+ notnull_offset++;
+ /* record the row status */
+ put_notnull_info(winobj, abs_pos, *isnull);
+ break;
+ }
+ } while (notnull_offset < notnull_relpos);
+
+ if (!*isout && set_mark)
+ WinSetMarkPosition(winobj, abs_pos);
+
+ return datum;
}
/*
@@ -3476,6 +3869,10 @@ WinGetFuncArgInFrame(WindowObject winobj, int argno,
econtext = winstate->ss.ps.ps_ExprContext;
slot = winstate->temp_slot_1;
+ if (winobj->ignore_nulls == IGNORE_NULLS)
+ return ignorenulls_getfuncarginframe(winobj, argno, relpos, seektype,
+ set_mark, isnull, isout);
+
switch (seektype)
{
case WINDOW_SEEK_CURRENT:
@@ -3624,7 +4021,7 @@ WinGetFuncArgInFrame(WindowObject winobj, int argno,
goto out_of_frame;
/* The code above does not detect all out-of-frame cases, so check */
- if (row_is_in_frame(winstate, abs_pos, slot) <= 0)
+ if (row_is_in_frame(winobj, abs_pos, slot, false) <= 0)
goto out_of_frame;
if (isout)