From 3ca930fc39ccf987c1c22fd04a1e7463b5dd0dfd Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 7 Sep 2017 19:41:51 -0400 Subject: Improve performance of get_actual_variable_range with recently-dead tuples. In commit fccebe421, we hacked get_actual_variable_range() to scan the index with SnapshotDirty, so that if there are many uncommitted tuples at the end of the index range, it wouldn't laboriously scan through all of them looking for a live value to return. However, that didn't fix it for the case of many recently-dead tuples at the end of the index; SnapshotDirty recognizes those as committed dead and so we're back to the same problem. To improve the situation, invent a "SnapshotNonVacuumable" snapshot type and use that instead. The reason this helps is that, if the snapshot rejects a given index entry, we know that the indexscan will mark that index entry as killed. This means the next get_actual_variable_range() scan will proceed past that entry without visiting the heap, making the scan a lot faster. We may end up accepting a recently-dead tuple as being the estimated extremal value, but that doesn't seem much worse than the compromise we made before to accept not-yet-committed extremal values. The cost of the scan is still proportional to the number of dead index entries at the end of the range, so in the interval after a mass delete but before VACUUM's cleaned up the mess, it's still possible for get_actual_variable_range() to take a noticeable amount of time, if you've got enough such dead entries. But the constant factor is much much better than before, since all we need to do with each index entry is test its "killed" bit. We chose to back-patch commit fccebe421 at the time, but I'm hesitant to do so here, because this form of the problem seems to affect many fewer people. Also, even when it happens, it's less bad than the case fixed by commit fccebe421 because we don't get the contention effects from expensive TransactionIdIsInProgress tests. Dmitriy Sarafannikov, reviewed by Andrey Borodin Discussion: https://postgr.es/m/05C72CF7-B5F6-4DB9-8A09-5AC897653113@yandex.ru --- src/backend/utils/adt/selfuncs.c | 40 +++++++++++++++++++++++++++------------- src/backend/utils/time/tqual.c | 22 ++++++++++++++++++++++ 2 files changed, 49 insertions(+), 13 deletions(-) (limited to 'src/backend/utils') diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 23e5526a8e1..81b0bc37d27 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -142,6 +142,7 @@ #include "utils/pg_locale.h" #include "utils/rel.h" #include "utils/selfuncs.h" +#include "utils/snapmgr.h" #include "utils/spccache.h" #include "utils/syscache.h" #include "utils/timestamp.h" @@ -5328,7 +5329,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, HeapTuple tup; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; - SnapshotData SnapshotDirty; + SnapshotData SnapshotNonVacuumable; estate = CreateExecutorState(); econtext = GetPerTupleExprContext(estate); @@ -5351,7 +5352,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel)); econtext->ecxt_scantuple = slot; get_typlenbyval(vardata->atttype, &typLen, &typByVal); - InitDirtySnapshot(SnapshotDirty); + InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin); /* set up an IS NOT NULL scan key so that we ignore nulls */ ScanKeyEntryInitialize(&scankeys[0], @@ -5373,17 +5374,29 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, * active snapshot, which is the best approximation we've got * to what the query will see when executed. But that won't * be exact if a new snap is taken before running the query, - * and it can be very expensive if a lot of uncommitted rows - * exist at the end of the index (because we'll laboriously - * fetch each one and reject it). What seems like a good - * compromise is to use SnapshotDirty. That will accept - * uncommitted rows, and thus avoid fetching multiple heap - * tuples in this scenario. On the other hand, it will reject - * known-dead rows, and thus not give a bogus answer when the - * extreme value has been deleted; that case motivates not - * using SnapshotAny here. + * and it can be very expensive if a lot of recently-dead or + * uncommitted rows exist at the beginning or end of the index + * (because we'll laboriously fetch each one and reject it). + * Instead, we use SnapshotNonVacuumable. That will accept + * recently-dead and uncommitted rows as well as normal + * visible rows. On the other hand, it will reject known-dead + * rows, and thus not give a bogus answer when the extreme + * value has been deleted (unless the deletion was quite + * recent); that case motivates not using SnapshotAny here. + * + * A crucial point here is that SnapshotNonVacuumable, with + * RecentGlobalXmin as horizon, yields the inverse of the + * condition that the indexscan will use to decide that index + * entries are killable (see heap_hot_search_buffer()). + * Therefore, if the snapshot rejects a tuple and we have to + * continue scanning past it, we know that the indexscan will + * mark that index entry killed. That means that the next + * get_actual_variable_range() call will not have to visit + * that heap entry. In this way we avoid repetitive work when + * this function is used a lot during planning. */ - index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty, + index_scan = index_beginscan(heapRel, indexRel, + &SnapshotNonVacuumable, 1, 0); index_rescan(index_scan, scankeys, 1, NULL, 0); @@ -5415,7 +5428,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, /* If max is requested, and we didn't find the index is empty */ if (max && have_data) { - index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty, + index_scan = index_beginscan(heapRel, indexRel, + &SnapshotNonVacuumable, 1, 0); index_rescan(index_scan, scankeys, 1, NULL, 0); diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index f9da9e17f52..bbac4083c98 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -45,6 +45,8 @@ * like HeapTupleSatisfiesSelf(), but includes open transactions * HeapTupleSatisfiesVacuum() * visible to any running transaction, used by VACUUM + * HeapTupleSatisfiesNonVacuumable() + * Snapshot-style API for HeapTupleSatisfiesVacuum * HeapTupleSatisfiesToast() * visible unless part of interrupted vacuum, used for TOAST * HeapTupleSatisfiesAny() @@ -1392,6 +1394,26 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, return HEAPTUPLE_DEAD; } + +/* + * HeapTupleSatisfiesNonVacuumable + * + * True if tuple might be visible to some transaction; false if it's + * surely dead to everyone, ie, vacuumable. + * + * This is an interface to HeapTupleSatisfiesVacuum that meets the + * SnapshotSatisfiesFunc API, so it can be used through a Snapshot. + * snapshot->xmin must have been set up with the xmin horizon to use. + */ +bool +HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, + Buffer buffer) +{ + return HeapTupleSatisfiesVacuum(htup, snapshot->xmin, buffer) + != HEAPTUPLE_DEAD; +} + + /* * HeapTupleIsSurelyDead * -- cgit v1.2.3