diff options
Diffstat (limited to 'src/backend/commands/analyze.c')
-rw-r--r-- | src/backend/commands/analyze.c | 207 |
1 files changed, 42 insertions, 165 deletions
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 3465713d108..e0ec62c88c7 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -17,11 +17,11 @@ #include <math.h> #include "access/genam.h" -#include "access/heapam.h" #include "access/multixact.h" #include "access/relation.h" #include "access/sysattr.h" #include "access/table.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/tupconvert.h" #include "access/tuptoaster.h" @@ -1014,6 +1014,8 @@ acquire_sample_rows(Relation onerel, int elevel, TransactionId OldestXmin; BlockSamplerData bs; ReservoirStateData rstate; + TupleTableSlot *slot; + TableScanDesc scan; Assert(targrows > 0); @@ -1027,193 +1029,68 @@ acquire_sample_rows(Relation onerel, int elevel, /* Prepare for sampling rows */ reservoir_init_selection_state(&rstate, targrows); + scan = table_beginscan_analyze(onerel); + slot = table_slot_create(onerel, NULL); + /* Outer loop over blocks to sample */ while (BlockSampler_HasMore(&bs)) { BlockNumber targblock = BlockSampler_Next(&bs); - Buffer targbuffer; - Page targpage; - OffsetNumber targoffset, - maxoffset; vacuum_delay_point(); - /* - * We must maintain a pin on the target page's buffer to ensure that - * the maxoffset value stays good (else concurrent VACUUM might delete - * tuples out from under us). Hence, pin the page until we are done - * looking at it. We also choose to hold sharelock on the buffer - * throughout --- we could release and re-acquire sharelock for each - * tuple, but since we aren't doing much work per tuple, the extra - * lock traffic is probably better avoided. - */ - targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock, - RBM_NORMAL, vac_strategy); - LockBuffer(targbuffer, BUFFER_LOCK_SHARE); - targpage = BufferGetPage(targbuffer); - maxoffset = PageGetMaxOffsetNumber(targpage); - - /* Inner loop over all tuples on the selected page */ - for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++) - { - ItemId itemid; - HeapTupleData targtuple; - bool sample_it = false; - - itemid = PageGetItemId(targpage, targoffset); + if (!table_scan_analyze_next_block(scan, targblock, vac_strategy)) + continue; + while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot)) + { /* - * We ignore unused and redirect line pointers. DEAD line - * pointers should be counted as dead, because we need vacuum to - * run to get rid of them. Note that this rule agrees with the - * way that heap_page_prune() counts things. + * The first targrows sample rows are simply copied into the + * reservoir. Then we start replacing tuples in the sample until + * we reach the end of the relation. This algorithm is from Jeff + * Vitter's paper (see full citation below). It works by + * repeatedly computing the number of tuples to skip before + * selecting a tuple, which replaces a randomly chosen element of + * the reservoir (current set of tuples). At all times the + * reservoir is a true random sample of the tuples we've passed + * over so far, so when we fall off the end of the relation we're + * done. */ - if (!ItemIdIsNormal(itemid)) - { - if (ItemIdIsDead(itemid)) - deadrows += 1; - continue; - } - - ItemPointerSet(&targtuple.t_self, targblock, targoffset); - - targtuple.t_tableOid = RelationGetRelid(onerel); - targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid); - targtuple.t_len = ItemIdGetLength(itemid); - - switch (HeapTupleSatisfiesVacuum(&targtuple, - OldestXmin, - targbuffer)) - { - case HEAPTUPLE_LIVE: - sample_it = true; - liverows += 1; - break; - - case HEAPTUPLE_DEAD: - case HEAPTUPLE_RECENTLY_DEAD: - /* Count dead and recently-dead rows */ - deadrows += 1; - break; - - case HEAPTUPLE_INSERT_IN_PROGRESS: - - /* - * Insert-in-progress rows are not counted. We assume - * that when the inserting transaction commits or aborts, - * it will send a stats message to increment the proper - * count. This works right only if that transaction ends - * after we finish analyzing the table; if things happen - * in the other order, its stats update will be - * overwritten by ours. However, the error will be large - * only if the other transaction runs long enough to - * insert many tuples, so assuming it will finish after us - * is the safer option. - * - * A special case is that the inserting transaction might - * be our own. In this case we should count and sample - * the row, to accommodate users who load a table and - * analyze it in one transaction. (pgstat_report_analyze - * has to adjust the numbers we send to the stats - * collector to make this come out right.) - */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data))) - { - sample_it = true; - liverows += 1; - } - break; - - case HEAPTUPLE_DELETE_IN_PROGRESS: - - /* - * We count and sample delete-in-progress rows the same as - * live ones, so that the stats counters come out right if - * the deleting transaction commits after us, per the same - * reasoning given above. - * - * If the delete was done by our own transaction, however, - * we must count the row as dead to make - * pgstat_report_analyze's stats adjustments come out - * right. (Note: this works out properly when the row was - * both inserted and deleted in our xact.) - * - * The net effect of these choices is that we act as - * though an IN_PROGRESS transaction hasn't happened yet, - * except if it is our own transaction, which we assume - * has happened. - * - * This approach ensures that we behave sanely if we see - * both the pre-image and post-image rows for a row being - * updated by a concurrent transaction: we will sample the - * pre-image but not the post-image. We also get sane - * results if the concurrent transaction never commits. - */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data))) - deadrows += 1; - else - { - sample_it = true; - liverows += 1; - } - break; - - default: - elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); - break; - } - - if (sample_it) + if (numrows < targrows) + rows[numrows++] = ExecCopySlotHeapTuple(slot); + else { /* - * The first targrows sample rows are simply copied into the - * reservoir. Then we start replacing tuples in the sample - * until we reach the end of the relation. This algorithm is - * from Jeff Vitter's paper (see full citation below). It - * works by repeatedly computing the number of tuples to skip - * before selecting a tuple, which replaces a randomly chosen - * element of the reservoir (current set of tuples). At all - * times the reservoir is a true random sample of the tuples - * we've passed over so far, so when we fall off the end of - * the relation we're done. + * t in Vitter's paper is the number of records already + * processed. If we need to compute a new S value, we must + * use the not-yet-incremented value of samplerows as t. */ - if (numrows < targrows) - rows[numrows++] = heap_copytuple(&targtuple); - else + if (rowstoskip < 0) + rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); + + if (rowstoskip <= 0) { /* - * t in Vitter's paper is the number of records already - * processed. If we need to compute a new S value, we - * must use the not-yet-incremented value of samplerows as - * t. + * Found a suitable tuple, so save it, replacing one old + * tuple at random */ - if (rowstoskip < 0) - rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); - - if (rowstoskip <= 0) - { - /* - * Found a suitable tuple, so save it, replacing one - * old tuple at random - */ - int k = (int) (targrows * sampler_random_fract(rstate.randstate)); + int k = (int) (targrows * sampler_random_fract(rstate.randstate)); - Assert(k >= 0 && k < targrows); - heap_freetuple(rows[k]); - rows[k] = heap_copytuple(&targtuple); - } - - rowstoskip -= 1; + Assert(k >= 0 && k < targrows); + heap_freetuple(rows[k]); + rows[k] = ExecCopySlotHeapTuple(slot); } - samplerows += 1; + rowstoskip -= 1; } - } - /* Now release the lock and pin on the page */ - UnlockReleaseBuffer(targbuffer); + samplerows += 1; + } } + ExecDropSingleTupleTableSlot(slot); + table_endscan(scan); + /* * If we didn't find as many tuples as we wanted then we're done. No sort * is needed, since they're already in order. |