diff options
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/orderedsetaggs.c | 33 | ||||
-rw-r--r-- | src/backend/utils/sort/tuplesort.c | 74 |
2 files changed, 75 insertions, 32 deletions
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index 57201fd03d4..fe44d561295 100644 --- a/src/backend/utils/adt/orderedsetaggs.c +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -453,7 +453,7 @@ percentile_disc_final(PG_FUNCTION_ARGS) elog(ERROR, "missing row in percentile_disc"); } - if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull)) + if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, NULL)) elog(ERROR, "missing row in percentile_disc"); /* @@ -553,7 +553,7 @@ percentile_cont_final_common(FunctionCallInfo fcinfo, if (!tuplesort_skiptuples(osastate->sortstate, first_row, true)) elog(ERROR, "missing row in percentile_cont"); - if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull)) + if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull, NULL)) elog(ERROR, "missing row in percentile_cont"); if (isnull) PG_RETURN_NULL(); @@ -564,7 +564,7 @@ percentile_cont_final_common(FunctionCallInfo fcinfo, } else { - if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull)) + if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull, NULL)) elog(ERROR, "missing row in percentile_cont"); if (isnull) @@ -792,7 +792,7 @@ percentile_disc_multi_final(PG_FUNCTION_ARGS) if (!tuplesort_skiptuples(osastate->sortstate, target_row - rownum - 1, true)) elog(ERROR, "missing row in percentile_disc"); - if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull)) + if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, NULL)) elog(ERROR, "missing row in percentile_disc"); rownum = target_row; @@ -921,7 +921,8 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo, if (!tuplesort_skiptuples(osastate->sortstate, first_row - rownum - 1, true)) elog(ERROR, "missing row in percentile_cont"); - if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull) || isnull) + if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, + &isnull, NULL) || isnull) elog(ERROR, "missing row in percentile_cont"); rownum = first_row; @@ -941,7 +942,8 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo, /* Fetch second_row if needed */ if (second_row > rownum) { - if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull) || isnull) + if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, + &isnull, NULL) || isnull) elog(ERROR, "missing row in percentile_cont"); rownum++; } @@ -1016,6 +1018,8 @@ mode_final(PG_FUNCTION_ARGS) int64 last_val_freq = 0; bool last_val_is_mode = false; FmgrInfo *equalfn; + Datum abbrev_val = (Datum) 0; + Datum last_abbrev_val = (Datum) 0; bool shouldfree; Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE); @@ -1042,7 +1046,7 @@ mode_final(PG_FUNCTION_ARGS) tuplesort_performsort(osastate->sortstate); /* Scan tuples and count frequencies */ - while (tuplesort_getdatum(osastate->sortstate, true, &val, &isnull)) + while (tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, &abbrev_val)) { /* we don't expect any nulls, but ignore them if found */ if (isnull) @@ -1054,8 +1058,10 @@ mode_final(PG_FUNCTION_ARGS) mode_val = last_val = val; mode_freq = last_val_freq = 1; last_val_is_mode = true; + last_abbrev_val = abbrev_val; } - else if (DatumGetBool(FunctionCall2(equalfn, val, last_val))) + else if (abbrev_val == last_abbrev_val && + DatumGetBool(FunctionCall2(equalfn, val, last_val))) { /* value equal to previous value, count it */ if (last_val_is_mode) @@ -1078,6 +1084,8 @@ mode_final(PG_FUNCTION_ARGS) if (shouldfree && !last_val_is_mode) pfree(DatumGetPointer(last_val)); last_val = val; + /* avoid equality function calls by reusing abbreviated keys */ + last_abbrev_val = abbrev_val; last_val_freq = 1; last_val_is_mode = false; } @@ -1181,7 +1189,7 @@ hypothetical_rank_common(FunctionCallInfo fcinfo, int flag, tuplesort_performsort(osastate->sortstate); /* iterate till we find the hypothetical row */ - while (tuplesort_gettupleslot(osastate->sortstate, true, slot)) + while (tuplesort_gettupleslot(osastate->sortstate, true, slot, NULL)) { bool isnull; Datum d = slot_getattr(slot, nargs + 1, &isnull); @@ -1266,6 +1274,8 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS) int64 duplicate_count = 0; OSAPerGroupState *osastate; int numDistinctCols; + Datum abbrevVal = (Datum) 0; + Datum abbrevOld = (Datum) 0; AttrNumber *sortColIdx; FmgrInfo *equalfns; TupleTableSlot *slot; @@ -1342,7 +1352,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS) slot2 = extraslot; /* iterate till we find the hypothetical row */ - while (tuplesort_gettupleslot(osastate->sortstate, true, slot)) + while (tuplesort_gettupleslot(osastate->sortstate, true, slot, &abbrevVal)) { bool isnull; Datum d = slot_getattr(slot, nargs + 1, &isnull); @@ -1353,6 +1363,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS) /* count non-distinct tuples */ if (!TupIsNull(slot2) && + abbrevVal == abbrevOld && execTuplesMatch(slot, slot2, numDistinctCols, sortColIdx, @@ -1363,6 +1374,8 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS) tmpslot = slot2; slot2 = slot; slot = tmpslot; + /* avoid execTuplesMatch() calls by reusing abbreviated keys */ + abbrevOld = abbrevVal; rank++; diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index a30e1707a79..67d86ed83b4 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -1280,7 +1280,8 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, * converter it won't expect NULL values, and cost model is not * required to account for NULL, so in that case we avoid calling * converter and just set datum1 to zeroed representation (to be - * consistent). + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). */ stup.datum1 = original; } @@ -1349,7 +1350,11 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) if (isNull || state->datumTypeByVal) { - stup.datum1 = val; + /* + * Set datum1 to zeroed representation for NULLs (to be consistent, and + * to support cheap inequality tests for NULL abbreviated keys). + */ + stup.datum1 = !isNull ? val : (Datum) 0; stup.isnull1 = isNull; stup.tuple = NULL; /* no separate storage */ } @@ -1866,10 +1871,17 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, * Fetch the next tuple in either forward or back direction. * If successful, put tuple in slot and return TRUE; else, clear the slot * and return FALSE. + * + * Caller may optionally be passed back abbreviated value (on TRUE return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value in leading attribute will set abbreviated value to zeroed + * representation, which caller may rely on in abbreviated inequality check. */ bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward, - TupleTableSlot *slot) + TupleTableSlot *slot, Datum *abbrev) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -1882,6 +1894,10 @@ tuplesort_gettupleslot(Tuplesortstate *state, bool forward, if (stup.tuple) { + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, should_free); return true; } @@ -1937,10 +1953,17 @@ tuplesort_getindextuple(Tuplesortstate *state, bool forward, * * If the Datum is pass-by-ref type, the returned value is freshly palloc'd * and is now owned by the caller. + * + * Caller may optionally be passed back abbreviated value (on TRUE return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value will have a zeroed abbreviated value representation, which caller + * may rely on in abbreviated inequality check. */ bool tuplesort_getdatum(Tuplesortstate *state, bool forward, - Datum *val, bool *isNull) + Datum *val, bool *isNull, Datum *abbrev) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -1952,6 +1975,10 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward, return false; } + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + if (stup.isnull1 || state->datumTypeByVal) { *val = stup.datum1; @@ -2232,21 +2259,6 @@ mergeruns(Tuplesortstate *state) Assert(state->status == TSS_BUILDRUNS); Assert(state->memtupcount == 0); - /* - * If we produced only one initial run (quite likely if the total data - * volume is between 1X and 2X workMem), we can just use that tape as the - * finished output, rather than doing a useless merge. (This obvious - * optimization is not in Knuth's algorithm.) - */ - if (state->currentRun == 1) - { - state->result_tape = state->tp_tapenum[state->destTape]; - /* must freeze and rewind the finished output tape */ - LogicalTapeFreeze(state->tapeset, state->result_tape); - state->status = TSS_SORTEDONTAPE; - return; - } - if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL) { /* @@ -2263,6 +2275,21 @@ mergeruns(Tuplesortstate *state) state->sortKeys->abbrev_full_comparator = NULL; } + /* + * If we produced only one initial run (quite likely if the total data + * volume is between 1X and 2X workMem), we can just use that tape as the + * finished output, rather than doing a useless merge. (This obvious + * optimization is not in Knuth's algorithm.) + */ + if (state->currentRun == 1) + { + state->result_tape = state->tp_tapenum[state->destTape]; + /* must freeze and rewind the finished output tape */ + LogicalTapeFreeze(state->tapeset, state->result_tape); + state->status = TSS_SORTEDONTAPE; + return; + } + /* End of step D2: rewind all output tapes to prepare for merging */ for (tapenum = 0; tapenum < state->tapeRange; tapenum++) LogicalTapeRewind(state->tapeset, tapenum, false); @@ -3164,7 +3191,8 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) * converter it won't expect NULL values, and cost model is not * required to account for NULL, so in that case we avoid calling * converter and just set datum1 to zeroed representation (to be - * consistent). + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). */ stup->datum1 = original; } @@ -3406,7 +3434,8 @@ copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) * converter it won't expect NULL values, and cost model is not * required to account for NULL, so in that case we avoid calling * converter and just set datum1 to zeroed representation (to be - * consistent). + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). */ stup->datum1 = original; } @@ -3710,7 +3739,8 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) * converter it won't expect NULL values, and cost model is not * required to account for NULL, so in that case we avoid calling * converter and just set datum1 to zeroed representation (to be - * consistent). + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). */ stup->datum1 = original; } |