From 27cc7cd2bc8a5e8efc8279bc5d2a8ae42fd8ad33 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 5 Sep 2019 13:00:20 -0700 Subject: Reorder EPQ work, to fix rowmark related bugs and improve efficiency. In ad0bda5d24ea I changed the EvalPlanQual machinery to store substitution tuples in slot, instead of using plain HeapTuples. The main motivation for that was that using HeapTuples will be inefficient for future tableams. But it turns out that that conversion was buggy for non-locking rowmarks - the wrong tuple descriptor was used to create the slot. As a secondary issue 5db6df0c0 changed ExecLockRows() to begin EPQ earlier, to allow to fetch the locked rows directly into the EPQ slots, instead of having to copy tuples around. Unfortunately, as Tom complained, that forces some expensive initialization to happen earlier. As a third issue, the test coverage for EPQ was clearly insufficient. Fixing the first issue is unfortunately not trivial: Non-locked row marks were fetched at the start of EPQ, and we don't have the type information for the rowmarks available at that point. While we could change that, it's not easy. It might be worthwhile to change that at some point, but to fix this bug, it seems better to delay fetching non-locking rowmarks when they're actually needed, rather than eagerly. They're referenced at most once, and in cases where EPQ fails, might never be referenced. Fetching them when needed also increases locality a bit. To be able to fetch rowmarks during execution, rather than initialization, we need to be able to access the active EPQState, as that contains necessary data. To do so move EPQ related data from EState to EPQState, and, only for EStates creates as part of EPQ, reference the associated EPQState from EState. To fix the second issue, change EPQ initialization to allow use of EvalPlanQualSlot() to be used before EvalPlanQualBegin() (but obviously still requiring EvalPlanQualInit() to have been done). As these changes made struct EState harder to understand, e.g. by adding multiple EStates, significantly reorder the members, and add a lot more comments. Also add a few more EPQ tests, including one that fails for the first issue above. More is needed. Reported-By: yi huang Author: Andres Freund Reviewed-By: Tom Lane Discussion: https://postgr.es/m/CAHU7rYZo_C4ULsAx_LAj8az9zqgrD8WDd4hTegDTMM1LMqrBsg@mail.gmail.com https://postgr.es/m/24530.1562686693@sss.pgh.pa.us Backpatch: 12-, where the EPQ changes were introduced --- src/backend/executor/execScan.c | 66 +++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 13 deletions(-) (limited to 'src/backend/executor/execScan.c') diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index c0e4a5376c3..b7fcd94439c 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -40,8 +40,10 @@ ExecScanFetch(ScanState *node, CHECK_FOR_INTERRUPTS(); - if (estate->es_epqTupleSlot != NULL) + if (estate->es_epq_active != NULL) { + EPQState *epqstate = estate->es_epq_active; + /* * We are inside an EvalPlanQual recheck. Return the test tuple if * one is available, after rechecking any access-method-specific @@ -51,29 +53,43 @@ ExecScanFetch(ScanState *node, if (scanrelid == 0) { - TupleTableSlot *slot = node->ss_ScanTupleSlot; - /* * This is a ForeignScan or CustomScan which has pushed down a * join to the remote side. The recheck method is responsible not * only for rechecking the scan/join quals but also for storing * the correct tuple in the slot. */ + + TupleTableSlot *slot = node->ss_ScanTupleSlot; + if (!(*recheckMtd) (node, slot)) ExecClearTuple(slot); /* would not be returned by scan */ return slot; } - else if (estate->es_epqTupleSlot[scanrelid - 1] != NULL) + else if (epqstate->relsubs_done[scanrelid - 1]) { + /* + * Return empty slot, as we already performed an EPQ substitution + * for this relation. + */ + TupleTableSlot *slot = node->ss_ScanTupleSlot; - /* Return empty slot if we already returned a tuple */ - if (estate->es_epqScanDone[scanrelid - 1]) - return ExecClearTuple(slot); - /* Else mark to remember that we shouldn't return more */ - estate->es_epqScanDone[scanrelid - 1] = true; + /* Return empty slot, as we already returned a tuple */ + return ExecClearTuple(slot); + } + else if (epqstate->relsubs_slot[scanrelid - 1] != NULL) + { + /* + * Return replacement tuple provided by the EPQ caller. + */ - slot = estate->es_epqTupleSlot[scanrelid - 1]; + TupleTableSlot *slot = epqstate->relsubs_slot[scanrelid - 1]; + + Assert(epqstate->relsubs_rowmark[scanrelid - 1] == NULL); + + /* Mark to remember that we shouldn't return more */ + epqstate->relsubs_done[scanrelid - 1] = true; /* Return empty slot if we haven't got a test tuple */ if (TupIsNull(slot)) @@ -83,7 +99,30 @@ ExecScanFetch(ScanState *node, if (!(*recheckMtd) (node, slot)) return ExecClearTuple(slot); /* would not be returned by * scan */ + return slot; + } + else if (epqstate->relsubs_rowmark[scanrelid - 1] != NULL) + { + /* + * Fetch and return replacement tuple using a non-locking rowmark. + */ + + TupleTableSlot *slot = node->ss_ScanTupleSlot; + + /* Mark to remember that we shouldn't return more */ + epqstate->relsubs_done[scanrelid - 1] = true; + + if (!EvalPlanQualFetchRowMark(epqstate, scanrelid, slot)) + return NULL; + /* Return empty slot if we haven't got a test tuple */ + if (TupIsNull(slot)) + return NULL; + + /* Check if it meets the access-method conditions */ + if (!(*recheckMtd) (node, slot)) + return ExecClearTuple(slot); /* would not be returned by + * scan */ return slot; } } @@ -268,12 +307,13 @@ ExecScanReScan(ScanState *node) ExecClearTuple(node->ss_ScanTupleSlot); /* Rescan EvalPlanQual tuple if we're inside an EvalPlanQual recheck */ - if (estate->es_epqScanDone != NULL) + if (estate->es_epq_active != NULL) { + EPQState *epqstate = estate->es_epq_active; Index scanrelid = ((Scan *) node->ps.plan)->scanrelid; if (scanrelid > 0) - estate->es_epqScanDone[scanrelid - 1] = false; + epqstate->relsubs_done[scanrelid - 1] = false; else { Bitmapset *relids; @@ -295,7 +335,7 @@ ExecScanReScan(ScanState *node) while ((rtindex = bms_next_member(relids, rtindex)) >= 0) { Assert(rtindex > 0); - estate->es_epqScanDone[rtindex - 1] = false; + epqstate->relsubs_done[rtindex - 1] = false; } } } -- cgit v1.2.3